mm/util.c

   1 #include <linux/mm.h>
   2 #include <linux/slab.h>
   3 #include <linux/string.h>
   4 #include <linux/compiler.h>
   5 #include <linux/export.h>
   6 #include <linux/err.h>
   7 #include <linux/sched.h>
   8 #include <linux/sched/mm.h>
   9 #include <linux/sched/task_stack.h>
  10 #include <linux/security.h>
  11 #include <linux/swap.h>
  12 #include <linux/swapops.h>
  13 #include <linux/mman.h>
  14 #include <linux/hugetlb.h>
  15 #include <linux/vmalloc.h>
  16 #include <linux/userfaultfd_k.h>
  17 #include <linux/random.h>
  18
  19 #include <asm/sections.h>
  20 #include <linux/uaccess.h>
  21
  22 #include "internal.h"
  23
  24 static inline int is_kernel_rodata(unsigned long addr)
  25 {
  26         return addr >= (unsigned long)__start_rodata &&
  27                 addr < (unsigned long)__end_rodata;
  28 }
  29
  30 /**
  31  * kfree_const - conditionally free memory
  32  * @x: pointer to the memory
  33  *
  34  * Function calls kfree only if @x is not in .rodata section.
  35  */
  36 void kfree_const(const void *x)
  37 {
  38         if (!is_kernel_rodata((unsigned long)x))
  39                 kfree(x);
  40 }
  41 EXPORT_SYMBOL(kfree_const);
  42
  43 /**
  44  * kstrdup - allocate space for and copy an existing string
  45  * @s: the string to duplicate
  46  * @gfp: the GFP mask used in the kmalloc() call when allocating memory
  47  */
  48 char *kstrdup(const char *s, gfp_t gfp)
  49 {
  50         size_t len;
  51         char *buf;
  52
  53         if (!s)
  54                 return NULL;
  55
  56         len = strlen(s) + 1;
  57         buf = kmalloc_track_caller(len, gfp);
  58         if (buf)
  59                 memcpy(buf, s, len);
  60         return buf;
  61 }
  62 EXPORT_SYMBOL(kstrdup);
  63
  64 /**
  65  * kstrdup_const - conditionally duplicate an existing const string
  66  * @s: the string to duplicate
  67  * @gfp: the GFP mask used in the kmalloc() call when allocating memory
  68  *
  69  * Function returns source string if it is in .rodata section otherwise it
  70  * fallbacks to kstrdup.
  71  * Strings allocated by kstrdup_const should be freed by kfree_const.
  72  */
  73 const char *kstrdup_const(const char *s, gfp_t gfp)
  74 {
  75         if (is_kernel_rodata((unsigned long)s))
  76                 return s;
  77
  78         return kstrdup(s, gfp);
  79 }
  80 EXPORT_SYMBOL(kstrdup_const);
  81
  82 /**
  83  * kstrndup - allocate space for and copy an existing string
  84  * @s: the string to duplicate
  85  * @max: read at most @max chars from @s
  86  * @gfp: the GFP mask used in the kmalloc() call when allocating memory
  87  *
  88  * Note: Use kmemdup_nul() instead if the size is known exactly.
  89  */
  90 char *kstrndup(const char *s, size_t max, gfp_t gfp)
  91 {
  92         size_t len;
  93         char *buf;
  94
  95         if (!s)
  96                 return NULL;
  97
  98         len = strnlen(s, max);
  99         buf = kmalloc_track_caller(len+1, gfp);
 100         if (buf) {
 101                 memcpy(buf, s, len);
 102                 buf[len] = '\0';
 103         }
 104         return buf;
 105 }
 106 EXPORT_SYMBOL(kstrndup);
 107
 108 /**
 109  * kmemdup - duplicate region of memory
 110  *
 111  * @src: memory region to duplicate
 112  * @len: memory region length
 113  * @gfp: GFP mask to use
 114  */
 115 void *kmemdup(const void *src, size_t len, gfp_t gfp)
 116 {
 117         void *p;
 118
 119         p = kmalloc_track_caller(len, gfp);
 120         if (p)
 121                 memcpy(p, src, len);
 122         return p;
 123 }
 124 EXPORT_SYMBOL(kmemdup);
 125
 126 /**
 127  * kmemdup_nul - Create a NUL-terminated string from unterminated data
 128  * @s: The data to stringify
 129  * @len: The size of the data
 130  * @gfp: the GFP mask used in the kmalloc() call when allocating memory
 131  */
 132 char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
 133 {
 134         char *buf;
 135
 136         if (!s)
 137                 return NULL;
 138
 139         buf = kmalloc_track_caller(len + 1, gfp);
 140         if (buf) {
 141                 memcpy(buf, s, len);
 142                 buf[len] = '\0';
 143         }
 144         return buf;
 145 }
 146 EXPORT_SYMBOL(kmemdup_nul);
 147
 148 /**
 149  * memdup_user - duplicate memory region from user space
 150  *
 151  * @src: source address in user space
 152  * @len: number of bytes to copy
 153  *
 154  * Returns an ERR_PTR() on failure.  Result is physically
 155  * contiguous, to be freed by kfree().
 156  */
 157 void *memdup_user(const void __user *src, size_t len)
 158 {
 159         void *p;
 160
 161         p = kmalloc_track_caller(len, GFP_USER);
 162         if (!p)
 163                 return ERR_PTR(-ENOMEM);
 164
 165         if (copy_from_user(p, src, len)) {
 166                 kfree(p);
 167                 return ERR_PTR(-EFAULT);
 168         }
 169
 170         return p;
 171 }
 172 EXPORT_SYMBOL(memdup_user);
 173
 174 /**
 175  * vmemdup_user - duplicate memory region from user space
 176  *
 177  * @src: source address in user space
 178  * @len: number of bytes to copy
 179  *
 180  * Returns an ERR_PTR() on failure.  Result may be not
 181  * physically contiguous.  Use kvfree() to free.
 182  */
 183 void *vmemdup_user(const void __user *src, size_t len)
 184 {
 185         void *p;
 186
 187         p = kvmalloc(len, GFP_USER);
 188         if (!p)
 189                 return ERR_PTR(-ENOMEM);
 190
 191         if (copy_from_user(p, src, len)) {
 192                 kvfree(p);
 193                 return ERR_PTR(-EFAULT);
 194         }
 195
 196         return p;
 197 }
 198 EXPORT_SYMBOL(vmemdup_user);
 199
 200 /**
 201  * strndup_user - duplicate an existing string from user space
 202  * @s: The string to duplicate
 203  * @n: Maximum number of bytes to copy, including the trailing NUL.
 204  */
 205 char *strndup_user(const char __user *s, long n)
 206 {
 207         char *p;
 208         long length;
 209
 210         length = strnlen_user(s, n);
 211
 212         if (!length)
 213                 return ERR_PTR(-EFAULT);
 214
 215         if (length > n)
 216                 return ERR_PTR(-EINVAL);
 217
 218         p = memdup_user(s, length);
 219
 220         if (IS_ERR(p))
 221                 return p;
 222
 223         p[length - 1] = '\0';
 224
 225         return p;
 226 }
 227 EXPORT_SYMBOL(strndup_user);
 228
 229 /**
 230  * memdup_user_nul - duplicate memory region from user space and NUL-terminate
 231  *
 232  * @src: source address in user space
 233  * @len: number of bytes to copy
 234  *
 235  * Returns an ERR_PTR() on failure.
 236  */
 237 void *memdup_user_nul(const void __user *src, size_t len)
 238 {
 239         char *p;
 240
 241         /*
 242          * Always use GFP_KERNEL, since copy_from_user() can sleep and
 243          * cause pagefault, which makes it pointless to use GFP_NOFS
 244          * or GFP_ATOMIC.
 245          */
 246         p = kmalloc_track_caller(len + 1, GFP_KERNEL);
 247         if (!p)
 248                 return ERR_PTR(-ENOMEM);
 249
 250         if (copy_from_user(p, src, len)) {
 251                 kfree(p);
 252                 return ERR_PTR(-EFAULT);
 253         }
 254         p[len] = '\0';
 255
 256         return p;
 257 }
 258 EXPORT_SYMBOL(memdup_user_nul);
 259
 260 void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 261                 struct vm_area_struct *prev, struct rb_node *rb_parent)
 262 {
 263         struct vm_area_struct *next;
 264
 265         vma->vm_prev = prev;
 266         if (prev) {
 267                 next = prev->vm_next;
 268                 prev->vm_next = vma;
 269         } else {
 270                 mm->mmap = vma;
 271                 if (rb_parent)
 272                         next = rb_entry(rb_parent,
 273                                         struct vm_area_struct, vm_rb);
 274                 else
 275                         next = NULL;
 276         }
 277         vma->vm_next = next;
 278         if (next)
 279                 next->vm_prev = vma;
 280 }
 281
 282 /* Check if the vma is being used as a stack by this task */
 283 int vma_is_stack_for_current(struct vm_area_struct *vma)
 284 {
 285         struct task_struct * __maybe_unused t = current;
 286
 287         return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
 288 }
 289
 290 /**
 291  * randomize_page - Generate a random, page aligned address
 292  * @start:      The smallest acceptable address the caller will take.
 293  * @range:      The size of the area, starting at @start, within which the
 294  *              random address must fall.
 295  *
 296  * If @start + @range would overflow, @range is capped.
 297  *
 298  * NOTE: Historical use of randomize_range, which this replaces, presumed that
 299  * @start was already page aligned.  We now align it regardless.
 300  *
 301  * Return: A page aligned address within [start, start + range).  On error,
 302  * @start is returned.
 303  */
 304 unsigned long randomize_page(unsigned long start, unsigned long range)
 305 {
 306         if (!PAGE_ALIGNED(start)) {
 307                 range -= PAGE_ALIGN(start) - start;
 308                 start = PAGE_ALIGN(start);
 309         }
 310
 311         if (start > ULONG_MAX - range)
 312                 range = ULONG_MAX - start;
 313
 314         range >>= PAGE_SHIFT;
 315
 316         if (range == 0)
 317                 return start;
 318
 319         return start + (get_random_long() % range << PAGE_SHIFT);
 320 }
 321
 322 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 323 void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 324 {
 325         mm->mmap_base = TASK_UNMAPPED_BASE;
 326         mm->get_unmapped_area = arch_get_unmapped_area;
 327 }
 328 #endif
 329
 330 /*
 331  * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
 332  * back to the regular GUP.
 333  * Note a difference with get_user_pages_fast: this always returns the
 334  * number of pages pinned, 0 if no pages were pinned.
 335  * If the architecture does not support this function, simply return with no
 336  * pages pinned.
 337  */
 338 int __weak __get_user_pages_fast(unsigned long start,
 339                                  int nr_pages, int write, struct page **pages)
 340 {
 341         return 0;
 342 }
 343 EXPORT_SYMBOL_GPL(__get_user_pages_fast);
 344
 345 /**
 346  * get_user_pages_fast() - pin user pages in memory
 347  * @start:      starting user address
 348  * @nr_pages:   number of pages from start to pin
 349  * @write:      whether pages will be written to
 350  * @pages:      array that receives pointers to the pages pinned.
 351  *              Should be at least nr_pages long.
 352  *
 353  * Returns number of pages pinned. This may be fewer than the number
 354  * requested. If nr_pages is 0 or negative, returns 0. If no pages
 355  * were pinned, returns -errno.
 356  *
 357  * get_user_pages_fast provides equivalent functionality to get_user_pages,
 358  * operating on current and current->mm, with force=0 and vma=NULL. However
 359  * unlike get_user_pages, it must be called without mmap_sem held.
 360  *
 361  * get_user_pages_fast may take mmap_sem and page table locks, so no
 362  * assumptions can be made about lack of locking. get_user_pages_fast is to be
 363  * implemented in a way that is advantageous (vs get_user_pages()) when the
 364  * user memory area is already faulted in and present in ptes. However if the
 365  * pages have to be faulted in, it may turn out to be slightly slower so
 366  * callers need to carefully consider what to use. On many architectures,
 367  * get_user_pages_fast simply falls back to get_user_pages.
 368  */
 369 int __weak get_user_pages_fast(unsigned long start,
 370                                 int nr_pages, int write, struct page **pages)
 371 {
 372         return get_user_pages_unlocked(start, nr_pages, pages,
 373                                        write ? FOLL_WRITE : 0);
 374 }
 375 EXPORT_SYMBOL_GPL(get_user_pages_fast);
 376
 377 unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 378         unsigned long len, unsigned long prot,
 379         unsigned long flag, unsigned long pgoff)
 380 {
 381         unsigned long ret;
 382         struct mm_struct *mm = current->mm;
 383         unsigned long populate;
 384         LIST_HEAD(uf);
 385
 386         ret = security_mmap_file(file, prot, flag);
 387         if (!ret) {
 388                 if (down_write_killable(&mm->mmap_sem))
 389                         return -EINTR;
 390                 ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
 391                                     &populate, &uf);
 392                 up_write(&mm->mmap_sem);
 393                 userfaultfd_unmap_complete(mm, &uf);
 394                 if (populate)
 395                         mm_populate(ret, populate);
 396         }
 397         return ret;
 398 }
 399
 400 unsigned long vm_mmap(struct file *file, unsigned long addr,
 401         unsigned long len, unsigned long prot,
 402         unsigned long flag, unsigned long offset)
 403 {
 404         if (unlikely(offset + PAGE_ALIGN(len) < offset))
 405                 return -EINVAL;
 406         if (unlikely(offset_in_page(offset)))
 407                 return -EINVAL;
 408
 409         return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
 410 }
 411 EXPORT_SYMBOL(vm_mmap);
 412
 413 /**
 414  * kvmalloc_node - attempt to allocate physically contiguous memory, but upon
 415  * failure, fall back to non-contiguous (vmalloc) allocation.
 416  * @size: size of the request.
 417  * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
 418  * @node: numa node to allocate from
 419  *
 420  * Uses kmalloc to get the memory but if the allocation fails then falls back
 421  * to the vmalloc allocator. Use kvfree for freeing the memory.
 422  *
 423  * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported.
 424  * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
 425  * preferable to the vmalloc fallback, due to visible performance drawbacks.
 426  *
 427  * Please note that any use of gfp flags outside of GFP_KERNEL is careful to not
 428  * fall back to vmalloc.
 429  */
 430 void *kvmalloc_node(size_t size, gfp_t flags, int node)
 431 {
 432         gfp_t kmalloc_flags = flags;
 433         void *ret;
 434
 435         /*
 436          * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
 437          * so the given set of flags has to be compatible.
 438          */
 439         if ((flags & GFP_KERNEL) != GFP_KERNEL)
 440                 return kmalloc_node(size, flags, node);
 441
 442         /*
 443          * We want to attempt a large physically contiguous block first because
 444          * it is less likely to fragment multiple larger blocks and therefore
 445          * contribute to a long term fragmentation less than vmalloc fallback.
 446          * However make sure that larger requests are not too disruptive - no
 447          * OOM killer and no allocation failure warnings as we have a fallback.
 448          */
 449         if (size > PAGE_SIZE) {
 450                 kmalloc_flags |= __GFP_NOWARN;
 451
 452                 if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL))
 453                         kmalloc_flags |= __GFP_NORETRY;
 454         }
 455
 456         ret = kmalloc_node(size, kmalloc_flags, node);
 457
 458         /*
 459          * It doesn't really make sense to fallback to vmalloc for sub page
 460          * requests
 461          */
 462         if (ret || size <= PAGE_SIZE)
 463                 return ret;
 464
 465         return __vmalloc_node_flags_caller(size, node, flags,
 466                         __builtin_return_address(0));
 467 }
 468 EXPORT_SYMBOL(kvmalloc_node);
 469
 470 /**
 471  * kvfree() - Free memory.
 472  * @addr: Pointer to allocated memory.
 473  *
 474  * kvfree frees memory allocated by any of vmalloc(), kmalloc() or kvmalloc().
 475  * It is slightly more efficient to use kfree() or vfree() if you are certain
 476  * that you know which one to use.
 477  *
 478  * Context: Any context except NMI.
 479  */
 480 void kvfree(const void *addr)
 481 {
 482         if (is_vmalloc_addr(addr))
 483                 vfree(addr);
 484         else
 485                 kfree(addr);
 486 }
 487 EXPORT_SYMBOL(kvfree);
 488
 489 /**
 490  * kvfree_sensitive - Free a data object containing sensitive information.
 491  * @addr: address of the data object to be freed.
 492  * @len: length of the data object.
 493  *
 494  * Use the special memzero_explicit() function to clear the content of a
 495  * kvmalloc'ed object containing sensitive data to make sure that the
 496  * compiler won't optimize out the data clearing.
 497  */
 498 void kvfree_sensitive(const void *addr, size_t len)
 499 {
 500         if (likely(!ZERO_OR_NULL_PTR(addr))) {
 501                 memzero_explicit((void *)addr, len);
 502                 kvfree(addr);
 503         }
 504 }
 505 EXPORT_SYMBOL(kvfree_sensitive);
 506
 507 static inline void *__page_rmapping(struct page *page)
 508 {
 509         unsigned long mapping;
 510
 511         mapping = (unsigned long)page->mapping;
 512         mapping &= ~PAGE_MAPPING_FLAGS;
 513
 514         return (void *)mapping;
 515 }
 516
 517 /* Neutral page->mapping pointer to address_space or anon_vma or other */
 518 void *page_rmapping(struct page *page)
 519 {
 520         page = compound_head(page);
 521         return __page_rmapping(page);
 522 }
 523
 524 /*
 525  * Return true if this page is mapped into pagetables.
 526  * For compound page it returns true if any subpage of compound page is mapped.
 527  */
 528 bool page_mapped(struct page *page)
 529 {
 530         int i;
 531
 532         if (likely(!PageCompound(page)))
 533                 return atomic_read(&page->_mapcount) >= 0;
 534         page = compound_head(page);
 535         if (atomic_read(compound_mapcount_ptr(page)) >= 0)
 536                 return true;
 537         if (PageHuge(page))
 538                 return false;
 539         for (i = 0; i < (1 << compound_order(page)); i++) {
 540                 if (atomic_read(&page[i]._mapcount) >= 0)
 541                         return true;
 542         }
 543         return false;
 544 }
 545 EXPORT_SYMBOL(page_mapped);
 546
 547 struct anon_vma *page_anon_vma(struct page *page)
 548 {
 549         unsigned long mapping;
 550
 551         page = compound_head(page);
 552         mapping = (unsigned long)page->mapping;
 553         if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
 554                 return NULL;
 555         return __page_rmapping(page);
 556 }
 557
 558 struct address_space *page_mapping(struct page *page)
 559 {
 560         struct address_space *mapping;
 561
 562         page = compound_head(page);
 563
 564         /* This happens if someone calls flush_dcache_page on slab page */
 565         if (unlikely(PageSlab(page)))
 566                 return NULL;
 567
 568         if (unlikely(PageSwapCache(page))) {
 569                 swp_entry_t entry;
 570
 571                 entry.val = page_private(page);
 572                 return swap_address_space(entry);
 573         }
 574
 575         mapping = page->mapping;
 576         if ((unsigned long)mapping & PAGE_MAPPING_ANON)
 577                 return NULL;
 578
 579         return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS);
 580 }
 581 EXPORT_SYMBOL(page_mapping);
 582
 583 /*
 584  * For file cache pages, return the address_space, otherwise return NULL
 585  */
 586 struct address_space *page_mapping_file(struct page *page)
 587 {
 588         if (unlikely(PageSwapCache(page)))
 589                 return NULL;
 590         return page_mapping(page);
 591 }
 592
 593 /* Slow path of page_mapcount() for compound pages */
 594 int __page_mapcount(struct page *page)
 595 {
 596         int ret;
 597
 598         ret = atomic_read(&page->_mapcount) + 1;
 599         /*
 600          * For file THP page->_mapcount contains total number of mapping
 601          * of the page: no need to look into compound_mapcount.
 602          */
 603         if (!PageAnon(page) && !PageHuge(page))
 604                 return ret;
 605         page = compound_head(page);
 606         ret += atomic_read(compound_mapcount_ptr(page)) + 1;
 607         if (PageDoubleMap(page))
 608                 ret--;
 609         return ret;
 610 }
 611 EXPORT_SYMBOL_GPL(__page_mapcount);
 612
 613 int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
 614 int sysctl_overcommit_ratio __read_mostly = 50;
 615 unsigned long sysctl_overcommit_kbytes __read_mostly;
 616 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 617 unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
 618 unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
 619
 620 int overcommit_ratio_handler(struct ctl_table *table, int write,
 621                              void __user *buffer, size_t *lenp,
 622                              loff_t *ppos)
 623 {
 624         int ret;
 625
 626         ret = proc_dointvec(table, write, buffer, lenp, ppos);
 627         if (ret == 0 && write)
 628                 sysctl_overcommit_kbytes = 0;
 629         return ret;
 630 }
 631
 632 int overcommit_kbytes_handler(struct ctl_table *table, int write,
 633                              void __user *buffer, size_t *lenp,
 634                              loff_t *ppos)
 635 {
 636         int ret;
 637
 638         ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 639         if (ret == 0 && write)
 640                 sysctl_overcommit_ratio = 0;
 641         return ret;
 642 }
 643
 644 /*
 645  * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
 646  */
 647 unsigned long vm_commit_limit(void)
 648 {
 649         unsigned long allowed;
 650
 651         if (sysctl_overcommit_kbytes)
 652                 allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10);
 653         else
 654                 allowed = ((totalram_pages - hugetlb_total_pages())
 655                            * sysctl_overcommit_ratio / 100);
 656         allowed += total_swap_pages;
 657
 658         return allowed;
 659 }
 660
 661 /*
 662  * Make sure vm_committed_as in one cacheline and not cacheline shared with
 663  * other variables. It can be updated by several CPUs frequently.
 664  */
 665 struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
 666
 667 /*
 668  * The global memory commitment made in the system can be a metric
 669  * that can be used to drive ballooning decisions when Linux is hosted
 670  * as a guest. On Hyper-V, the host implements a policy engine for dynamically
 671  * balancing memory across competing virtual machines that are hosted.
 672  * Several metrics drive this policy engine including the guest reported
 673  * memory commitment.
 674  */
 675 unsigned long vm_memory_committed(void)
 676 {
 677         return percpu_counter_read_positive(&vm_committed_as);
 678 }
 679 EXPORT_SYMBOL_GPL(vm_memory_committed);
 680
 681 /*
 682  * Check that a process has enough memory to allocate a new virtual
 683  * mapping. 0 means there is enough memory for the allocation to
 684  * succeed and -ENOMEM implies there is not.
 685  *
 686  * We currently support three overcommit policies, which are set via the
 687  * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting.rst
 688  *
 689  * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
 690  * Additional code 2002 Jul 20 by Robert Love.
 691  *
 692  * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
 693  *
 694  * Note this is a helper function intended to be used by LSMs which
 695  * wish to use this logic.
 696  */
 697 int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 698 {
 699         long free, allowed, reserve;
 700
 701         VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
 702                         -(s64)vm_committed_as_batch * num_online_cpus(),
 703                         "memory commitment underflow");
 704
 705         vm_acct_memory(pages);
 706
 707         /*
 708          * Sometimes we want to use more memory than we have
 709          */
 710         if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
 711                 return 0;
 712
 713         if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
 714                 free = global_zone_page_state(NR_FREE_PAGES);
 715                 free += global_node_page_state(NR_FILE_PAGES);
 716
 717                 /*
 718                  * shmem pages shouldn't be counted as free in this
 719                  * case, they can't be purged, only swapped out, and
 720                  * that won't affect the overall amount of available
 721                  * memory in the system.
 722                  */
 723                 free -= global_node_page_state(NR_SHMEM);
 724
 725                 free += get_nr_swap_pages();
 726
 727                 /*
 728                  * Any slabs which are created with the
 729                  * SLAB_RECLAIM_ACCOUNT flag claim to have contents
 730                  * which are reclaimable, under pressure.  The dentry
 731                  * cache and most inode caches should fall into this
 732                  */
 733                 free += global_node_page_state(NR_SLAB_RECLAIMABLE);
 734
 735                 /*
 736                  * Part of the kernel memory, which can be released
 737                  * under memory pressure.
 738                  */
 739                 free += global_node_page_state(
 740                         NR_INDIRECTLY_RECLAIMABLE_BYTES) >> PAGE_SHIFT;
 741
 742                 /*
 743                  * Leave reserved pages. The pages are not for anonymous pages.
 744                  */
 745                 if (free <= totalreserve_pages)
 746                         goto error;
 747                 else
 748                         free -= totalreserve_pages;
 749
 750                 /*
 751                  * Reserve some for root
 752                  */
 753                 if (!cap_sys_admin)
 754                         free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
 755
 756                 if (free > pages)
 757                         return 0;
 758
 759                 goto error;
 760         }
 761
 762         allowed = vm_commit_limit();
 763         /*
 764          * Reserve some for root
 765          */
 766         if (!cap_sys_admin)
 767                 allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
 768
 769         /*
 770          * Don't let a single process grow so big a user can't recover
 771          */
 772         if (mm) {
 773                 reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
 774                 allowed -= min_t(long, mm->total_vm / 32, reserve);
 775         }
 776
 777         if (percpu_counter_read_positive(&vm_committed_as) < allowed)
 778                 return 0;
 779 error:
 780         vm_unacct_memory(pages);
 781
 782         return -ENOMEM;
 783 }
 784
 785 /**
 786  * get_cmdline() - copy the cmdline value to a buffer.
 787  * @task:     the task whose cmdline value to copy.
 788  * @buffer:   the buffer to copy to.
 789  * @buflen:   the length of the buffer. Larger cmdline values are truncated
 790  *            to this length.
 791  * Returns the size of the cmdline field copied. Note that the copy does
 792  * not guarantee an ending NULL byte.
 793  */
 794 int get_cmdline(struct task_struct *task, char *buffer, int buflen)
 795 {
 796         int res = 0;
 797         unsigned int len;
 798         struct mm_struct *mm = get_task_mm(task);
 799         unsigned long arg_start, arg_end, env_start, env_end;
 800         if (!mm)
 801                 goto out;
 802         if (!mm->arg_end)
 803                 goto out_mm;    /* Shh! No looking before we're done */
 804
 805         down_read(&mm->mmap_sem);
 806         arg_start = mm->arg_start;
 807         arg_end = mm->arg_end;
 808         env_start = mm->env_start;
 809         env_end = mm->env_end;
 810         up_read(&mm->mmap_sem);
 811
 812         len = arg_end - arg_start;
 813
 814         if (len > buflen)
 815                 len = buflen;
 816
 817         res = access_process_vm(task, arg_start, buffer, len, FOLL_FORCE);
 818
 819         /*
 820          * If the nul at the end of args has been overwritten, then
 821          * assume application is using setproctitle(3).
 822          */
 823         if (res > 0 && buffer[res-1] != '\0' && len < buflen) {
 824                 len = strnlen(buffer, res);
 825                 if (len < res) {
 826                         res = len;
 827                 } else {
 828                         len = env_end - env_start;
 829                         if (len > buflen - res)
 830                                 len = buflen - res;
 831                         res += access_process_vm(task, env_start,
 832                                                  buffer+res, len,
 833                                                  FOLL_FORCE);
 834                         res = strnlen(buffer, res);
 835                 }
 836         }
 837 out_mm:
 838         mmput(mm);
 839 out:
 840         return res;
 841 }