mm/slab_common.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Slab allocator functions that are independent of the allocator strategy
   4  *
   5  * (C) 2012 Christoph Lameter <cl@linux.com>
   6  */
   7 #include <linux/slab.h>
   8
   9 #include <linux/mm.h>
  10 #include <linux/poison.h>
  11 #include <linux/interrupt.h>
  12 #include <linux/memory.h>
  13 #include <linux/cache.h>
  14 #include <linux/compiler.h>
  15 #include <linux/module.h>
  16 #include <linux/cpu.h>
  17 #include <linux/uaccess.h>
  18 #include <linux/seq_file.h>
  19 #include <linux/proc_fs.h>
  20 #include <asm/cacheflush.h>
  21 #include <asm/tlbflush.h>
  22 #include <asm/page.h>
  23 #include <linux/memcontrol.h>
  24
  25 #define CREATE_TRACE_POINTS
  26 #include <trace/events/kmem.h>
  27
  28 #include "slab.h"
  29
  30 enum slab_state slab_state;
  31 LIST_HEAD(slab_caches);
  32 DEFINE_MUTEX(slab_mutex);
  33 struct kmem_cache *kmem_cache;
  34
  35 #ifdef CONFIG_HARDENED_USERCOPY
  36 bool usercopy_fallback __ro_after_init =
  37                 IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK);
  38 module_param(usercopy_fallback, bool, 0400);
  39 MODULE_PARM_DESC(usercopy_fallback,
  40                 "WARN instead of reject usercopy whitelist violations");
  41 #endif
  42
  43 static LIST_HEAD(slab_caches_to_rcu_destroy);
  44 static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
  45 static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
  46                     slab_caches_to_rcu_destroy_workfn);
  47
  48 /*
  49  * Set of flags that will prevent slab merging
  50  */
  51 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
  52                 SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
  53                 SLAB_FAILSLAB | SLAB_KASAN)
  54
  55 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
  56                          SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
  57
  58 /*
  59  * Merge control. If this is set then no merging of slab caches will occur.
  60  */
  61 static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
  62
  63 static int __init setup_slab_nomerge(char *str)
  64 {
  65         slab_nomerge = true;
  66         return 1;
  67 }
  68
  69 #ifdef CONFIG_SLUB
  70 __setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
  71 #endif
  72
  73 __setup("slab_nomerge", setup_slab_nomerge);
  74
  75 /*
  76  * Determine the size of a slab object
  77  */
  78 unsigned int kmem_cache_size(struct kmem_cache *s)
  79 {
  80         return s->object_size;
  81 }
  82 EXPORT_SYMBOL(kmem_cache_size);
  83
  84 #ifdef CONFIG_DEBUG_VM
  85 static int kmem_cache_sanity_check(const char *name, unsigned int size)
  86 {
  87         if (!name || in_interrupt() || size < sizeof(void *) ||
  88                 size > KMALLOC_MAX_SIZE) {
  89                 pr_err("kmem_cache_create(%s) integrity check failed\n", name);
  90                 return -EINVAL;
  91         }
  92
  93         WARN_ON(strchr(name, ' '));     /* It confuses parsers */
  94         return 0;
  95 }
  96 #else
  97 static inline int kmem_cache_sanity_check(const char *name, unsigned int size)
  98 {
  99         return 0;
 100 }
 101 #endif
 102
 103 void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
 104 {
 105         size_t i;
 106
 107         for (i = 0; i < nr; i++) {
 108                 if (s)
 109                         kmem_cache_free(s, p[i]);
 110                 else
 111                         kfree(p[i]);
 112         }
 113 }
 114
 115 int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
 116                                                                 void **p)
 117 {
 118         size_t i;
 119
 120         for (i = 0; i < nr; i++) {
 121                 void *x = p[i] = kmem_cache_alloc(s, flags);
 122                 if (!x) {
 123                         __kmem_cache_free_bulk(s, i, p);
 124                         return 0;
 125                 }
 126         }
 127         return i;
 128 }
 129
 130 #ifdef CONFIG_MEMCG_KMEM
 131
 132 LIST_HEAD(slab_root_caches);
 133 static DEFINE_SPINLOCK(memcg_kmem_wq_lock);
 134
 135 void slab_init_memcg_params(struct kmem_cache *s)
 136 {
 137         s->memcg_params.root_cache = NULL;
 138         RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
 139         INIT_LIST_HEAD(&s->memcg_params.children);
 140         s->memcg_params.dying = false;
 141 }
 142
 143 static int init_memcg_params(struct kmem_cache *s,
 144                 struct mem_cgroup *memcg, struct kmem_cache *root_cache)
 145 {
 146         struct memcg_cache_array *arr;
 147
 148         if (root_cache) {
 149                 s->memcg_params.root_cache = root_cache;
 150                 s->memcg_params.memcg = memcg;
 151                 INIT_LIST_HEAD(&s->memcg_params.children_node);
 152                 INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
 153                 return 0;
 154         }
 155
 156         slab_init_memcg_params(s);
 157
 158         if (!memcg_nr_cache_ids)
 159                 return 0;
 160
 161         arr = kvzalloc(sizeof(struct memcg_cache_array) +
 162                        memcg_nr_cache_ids * sizeof(void *),
 163                        GFP_KERNEL);
 164         if (!arr)
 165                 return -ENOMEM;
 166
 167         RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
 168         return 0;
 169 }
 170
 171 static void destroy_memcg_params(struct kmem_cache *s)
 172 {
 173         if (is_root_cache(s))
 174                 kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
 175 }
 176
 177 static void free_memcg_params(struct rcu_head *rcu)
 178 {
 179         struct memcg_cache_array *old;
 180
 181         old = container_of(rcu, struct memcg_cache_array, rcu);
 182         kvfree(old);
 183 }
 184
 185 static int update_memcg_params(struct kmem_cache *s, int new_array_size)
 186 {
 187         struct memcg_cache_array *old, *new;
 188
 189         new = kvzalloc(sizeof(struct memcg_cache_array) +
 190                        new_array_size * sizeof(void *), GFP_KERNEL);
 191         if (!new)
 192                 return -ENOMEM;
 193
 194         old = rcu_dereference_protected(s->memcg_params.memcg_caches,
 195                                         lockdep_is_held(&slab_mutex));
 196         if (old)
 197                 memcpy(new->entries, old->entries,
 198                        memcg_nr_cache_ids * sizeof(void *));
 199
 200         rcu_assign_pointer(s->memcg_params.memcg_caches, new);
 201         if (old)
 202                 call_rcu(&old->rcu, free_memcg_params);
 203         return 0;
 204 }
 205
 206 int memcg_update_all_caches(int num_memcgs)
 207 {
 208         struct kmem_cache *s;
 209         int ret = 0;
 210
 211         mutex_lock(&slab_mutex);
 212         list_for_each_entry(s, &slab_root_caches, root_caches_node) {
 213                 ret = update_memcg_params(s, num_memcgs);
 214                 /*
 215                  * Instead of freeing the memory, we'll just leave the caches
 216                  * up to this point in an updated state.
 217                  */
 218                 if (ret)
 219                         break;
 220         }
 221         mutex_unlock(&slab_mutex);
 222         return ret;
 223 }
 224
 225 void memcg_link_cache(struct kmem_cache *s)
 226 {
 227         if (is_root_cache(s)) {
 228                 list_add(&s->root_caches_node, &slab_root_caches);
 229         } else {
 230                 list_add(&s->memcg_params.children_node,
 231                          &s->memcg_params.root_cache->memcg_params.children);
 232                 list_add(&s->memcg_params.kmem_caches_node,
 233                          &s->memcg_params.memcg->kmem_caches);
 234         }
 235 }
 236
 237 static void memcg_unlink_cache(struct kmem_cache *s)
 238 {
 239         if (is_root_cache(s)) {
 240                 list_del(&s->root_caches_node);
 241         } else {
 242                 list_del(&s->memcg_params.children_node);
 243                 list_del(&s->memcg_params.kmem_caches_node);
 244         }
 245 }
 246 #else
 247 static inline int init_memcg_params(struct kmem_cache *s,
 248                 struct mem_cgroup *memcg, struct kmem_cache *root_cache)
 249 {
 250         return 0;
 251 }
 252
 253 static inline void destroy_memcg_params(struct kmem_cache *s)
 254 {
 255 }
 256
 257 static inline void memcg_unlink_cache(struct kmem_cache *s)
 258 {
 259 }
 260 #endif /* CONFIG_MEMCG_KMEM */
 261
 262 /*
 263  * Figure out what the alignment of the objects will be given a set of
 264  * flags, a user specified alignment and the size of the objects.
 265  */
 266 static unsigned int calculate_alignment(slab_flags_t flags,
 267                 unsigned int align, unsigned int size)
 268 {
 269         /*
 270          * If the user wants hardware cache aligned objects then follow that
 271          * suggestion if the object is sufficiently large.
 272          *
 273          * The hardware cache alignment cannot override the specified
 274          * alignment though. If that is greater then use it.
 275          */
 276         if (flags & SLAB_HWCACHE_ALIGN) {
 277                 unsigned int ralign;
 278
 279                 ralign = cache_line_size();
 280                 while (size <= ralign / 2)
 281                         ralign /= 2;
 282                 align = max(align, ralign);
 283         }
 284
 285         if (align < ARCH_SLAB_MINALIGN)
 286                 align = ARCH_SLAB_MINALIGN;
 287
 288         return ALIGN(align, sizeof(void *));
 289 }
 290
 291 /*
 292  * Find a mergeable slab cache
 293  */
 294 int slab_unmergeable(struct kmem_cache *s)
 295 {
 296         if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
 297                 return 1;
 298
 299         if (!is_root_cache(s))
 300                 return 1;
 301
 302         if (s->ctor)
 303                 return 1;
 304
 305         if (s->usersize)
 306                 return 1;
 307
 308         /*
 309          * We may have set a slab to be unmergeable during bootstrap.
 310          */
 311         if (s->refcount < 0)
 312                 return 1;
 313
 314         return 0;
 315 }
 316
 317 struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
 318                 slab_flags_t flags, const char *name, void (*ctor)(void *))
 319 {
 320         struct kmem_cache *s;
 321
 322         if (slab_nomerge)
 323                 return NULL;
 324
 325         if (ctor)
 326                 return NULL;
 327
 328         size = ALIGN(size, sizeof(void *));
 329         align = calculate_alignment(flags, align, size);
 330         size = ALIGN(size, align);
 331         flags = kmem_cache_flags(size, flags, name, NULL);
 332
 333         if (flags & SLAB_NEVER_MERGE)
 334                 return NULL;
 335
 336         list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
 337                 if (slab_unmergeable(s))
 338                         continue;
 339
 340                 if (size > s->size)
 341                         continue;
 342
 343                 if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
 344                         continue;
 345                 /*
 346                  * Check if alignment is compatible.
 347                  * Courtesy of Adrian Drzewiecki
 348                  */
 349                 if ((s->size & ~(align - 1)) != s->size)
 350                         continue;
 351
 352                 if (s->size - size >= sizeof(void *))
 353                         continue;
 354
 355                 if (IS_ENABLED(CONFIG_SLAB) && align &&
 356                         (align > s->align || s->align % align))
 357                         continue;
 358
 359                 return s;
 360         }
 361         return NULL;
 362 }
 363
 364 static struct kmem_cache *create_cache(const char *name,
 365                 unsigned int object_size, unsigned int align,
 366                 slab_flags_t flags, unsigned int useroffset,
 367                 unsigned int usersize, void (*ctor)(void *),
 368                 struct mem_cgroup *memcg, struct kmem_cache *root_cache)
 369 {
 370         struct kmem_cache *s;
 371         int err;
 372
 373         if (WARN_ON(useroffset + usersize > object_size))
 374                 useroffset = usersize = 0;
 375
 376         err = -ENOMEM;
 377         s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
 378         if (!s)
 379                 goto out;
 380
 381         s->name = name;
 382         s->size = s->object_size = object_size;
 383         s->align = align;
 384         s->ctor = ctor;
 385         s->useroffset = useroffset;
 386         s->usersize = usersize;
 387
 388         err = init_memcg_params(s, memcg, root_cache);
 389         if (err)
 390                 goto out_free_cache;
 391
 392         err = __kmem_cache_create(s, flags);
 393         if (err)
 394                 goto out_free_cache;
 395
 396         s->refcount = 1;
 397         list_add(&s->list, &slab_caches);
 398         memcg_link_cache(s);
 399 out:
 400         if (err)
 401                 return ERR_PTR(err);
 402         return s;
 403
 404 out_free_cache:
 405         destroy_memcg_params(s);
 406         kmem_cache_free(kmem_cache, s);
 407         goto out;
 408 }
 409
 410 /*
 411  * kmem_cache_create_usercopy - Create a cache.
 412  * @name: A string which is used in /proc/slabinfo to identify this cache.
 413  * @size: The size of objects to be created in this cache.
 414  * @align: The required alignment for the objects.
 415  * @flags: SLAB flags
 416  * @useroffset: Usercopy region offset
 417  * @usersize: Usercopy region size
 418  * @ctor: A constructor for the objects.
 419  *
 420  * Returns a ptr to the cache on success, NULL on failure.
 421  * Cannot be called within a interrupt, but can be interrupted.
 422  * The @ctor is run when new pages are allocated by the cache.
 423  *
 424  * The flags are
 425  *
 426  * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
 427  * to catch references to uninitialised memory.
 428  *
 429  * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
 430  * for buffer overruns.
 431  *
 432  * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
 433  * cacheline.  This can be beneficial if you're counting cycles as closely
 434  * as davem.
 435  */
 436 struct kmem_cache *
 437 kmem_cache_create_usercopy(const char *name,
 438                   unsigned int size, unsigned int align,
 439                   slab_flags_t flags,
 440                   unsigned int useroffset, unsigned int usersize,
 441                   void (*ctor)(void *))
 442 {
 443         struct kmem_cache *s = NULL;
 444         const char *cache_name;
 445         int err;
 446
 447         get_online_cpus();
 448         get_online_mems();
 449         memcg_get_cache_ids();
 450
 451         mutex_lock(&slab_mutex);
 452
 453         err = kmem_cache_sanity_check(name, size);
 454         if (err) {
 455                 goto out_unlock;
 456         }
 457
 458         /* Refuse requests with allocator specific flags */
 459         if (flags & ~SLAB_FLAGS_PERMITTED) {
 460                 err = -EINVAL;
 461                 goto out_unlock;
 462         }
 463
 464         /*
 465          * Some allocators will constraint the set of valid flags to a subset
 466          * of all flags. We expect them to define CACHE_CREATE_MASK in this
 467          * case, and we'll just provide them with a sanitized version of the
 468          * passed flags.
 469          */
 470         flags &= CACHE_CREATE_MASK;
 471
 472         /* Fail closed on bad usersize of useroffset values. */
 473         if (WARN_ON(!usersize && useroffset) ||
 474             WARN_ON(size < usersize || size - usersize < useroffset))
 475                 usersize = useroffset = 0;
 476
 477         if (!usersize)
 478                 s = __kmem_cache_alias(name, size, align, flags, ctor);
 479         if (s)
 480                 goto out_unlock;
 481
 482         cache_name = kstrdup_const(name, GFP_KERNEL);
 483         if (!cache_name) {
 484                 err = -ENOMEM;
 485                 goto out_unlock;
 486         }
 487
 488         s = create_cache(cache_name, size,
 489                          calculate_alignment(flags, align, size),
 490                          flags, useroffset, usersize, ctor, NULL, NULL);
 491         if (IS_ERR(s)) {
 492                 err = PTR_ERR(s);
 493                 kfree_const(cache_name);
 494         }
 495
 496 out_unlock:
 497         mutex_unlock(&slab_mutex);
 498
 499         memcg_put_cache_ids();
 500         put_online_mems();
 501         put_online_cpus();
 502
 503         if (err) {
 504                 if (flags & SLAB_PANIC)
 505                         panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
 506                                 name, err);
 507                 else {
 508                         pr_warn("kmem_cache_create(%s) failed with error %d\n",
 509                                 name, err);
 510                         dump_stack();
 511                 }
 512                 return NULL;
 513         }
 514         return s;
 515 }
 516 EXPORT_SYMBOL(kmem_cache_create_usercopy);
 517
 518 struct kmem_cache *
 519 kmem_cache_create(const char *name, unsigned int size, unsigned int align,
 520                 slab_flags_t flags, void (*ctor)(void *))
 521 {
 522         return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
 523                                           ctor);
 524 }
 525 EXPORT_SYMBOL(kmem_cache_create);
 526
 527 static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
 528 {
 529         LIST_HEAD(to_destroy);
 530         struct kmem_cache *s, *s2;
 531
 532         /*
 533          * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
 534          * @slab_caches_to_rcu_destroy list.  The slab pages are freed
 535          * through RCU and and the associated kmem_cache are dereferenced
 536          * while freeing the pages, so the kmem_caches should be freed only
 537          * after the pending RCU operations are finished.  As rcu_barrier()
 538          * is a pretty slow operation, we batch all pending destructions
 539          * asynchronously.
 540          */
 541         mutex_lock(&slab_mutex);
 542         list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
 543         mutex_unlock(&slab_mutex);
 544
 545         if (list_empty(&to_destroy))
 546                 return;
 547
 548         rcu_barrier();
 549
 550         list_for_each_entry_safe(s, s2, &to_destroy, list) {
 551 #ifdef SLAB_SUPPORTS_SYSFS
 552                 sysfs_slab_release(s);
 553 #else
 554                 slab_kmem_cache_release(s);
 555 #endif
 556         }
 557 }
 558
 559 static int shutdown_cache(struct kmem_cache *s)
 560 {
 561         /* free asan quarantined objects */
 562         kasan_cache_shutdown(s);
 563
 564         if (__kmem_cache_shutdown(s) != 0)
 565                 return -EBUSY;
 566
 567         memcg_unlink_cache(s);
 568         list_del(&s->list);
 569
 570         if (s->flags & SLAB_TYPESAFE_BY_RCU) {
 571 #ifdef SLAB_SUPPORTS_SYSFS
 572                 sysfs_slab_unlink(s);
 573 #endif
 574                 list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
 575                 schedule_work(&slab_caches_to_rcu_destroy_work);
 576         } else {
 577 #ifdef SLAB_SUPPORTS_SYSFS
 578                 sysfs_slab_unlink(s);
 579                 sysfs_slab_release(s);
 580 #else
 581                 slab_kmem_cache_release(s);
 582 #endif
 583         }
 584
 585         return 0;
 586 }
 587
 588 #ifdef CONFIG_MEMCG_KMEM
 589 /*
 590  * memcg_create_kmem_cache - Create a cache for a memory cgroup.
 591  * @memcg: The memory cgroup the new cache is for.
 592  * @root_cache: The parent of the new cache.
 593  *
 594  * This function attempts to create a kmem cache that will serve allocation
 595  * requests going from @memcg to @root_cache. The new cache inherits properties
 596  * from its parent.
 597  */
 598 void memcg_create_kmem_cache(struct mem_cgroup *memcg,
 599                              struct kmem_cache *root_cache)
 600 {
 601         static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
 602         struct cgroup_subsys_state *css = &memcg->css;
 603         struct memcg_cache_array *arr;
 604         struct kmem_cache *s = NULL;
 605         char *cache_name;
 606         int idx;
 607
 608         get_online_cpus();
 609         get_online_mems();
 610
 611         mutex_lock(&slab_mutex);
 612
 613         /*
 614          * The memory cgroup could have been offlined while the cache
 615          * creation work was pending.
 616          */
 617         if (memcg->kmem_state != KMEM_ONLINE || root_cache->memcg_params.dying)
 618                 goto out_unlock;
 619
 620         idx = memcg_cache_id(memcg);
 621         arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
 622                                         lockdep_is_held(&slab_mutex));
 623
 624         /*
 625          * Since per-memcg caches are created asynchronously on first
 626          * allocation (see memcg_kmem_get_cache()), several threads can try to
 627          * create the same cache, but only one of them may succeed.
 628          */
 629         if (arr->entries[idx])
 630                 goto out_unlock;
 631
 632         cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
 633         cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
 634                                css->serial_nr, memcg_name_buf);
 635         if (!cache_name)
 636                 goto out_unlock;
 637
 638         s = create_cache(cache_name, root_cache->object_size,
 639                          root_cache->align,
 640                          root_cache->flags & CACHE_CREATE_MASK,
 641                          root_cache->useroffset, root_cache->usersize,
 642                          root_cache->ctor, memcg, root_cache);
 643         /*
 644          * If we could not create a memcg cache, do not complain, because
 645          * that's not critical at all as we can always proceed with the root
 646          * cache.
 647          */
 648         if (IS_ERR(s)) {
 649                 kfree(cache_name);
 650                 goto out_unlock;
 651         }
 652
 653         /*
 654          * Since readers won't lock (see cache_from_memcg_idx()), we need a
 655          * barrier here to ensure nobody will see the kmem_cache partially
 656          * initialized.
 657          */
 658         smp_wmb();
 659         arr->entries[idx] = s;
 660
 661 out_unlock:
 662         mutex_unlock(&slab_mutex);
 663
 664         put_online_mems();
 665         put_online_cpus();
 666 }
 667
 668 static void kmemcg_deactivate_workfn(struct work_struct *work)
 669 {
 670         struct kmem_cache *s = container_of(work, struct kmem_cache,
 671                                             memcg_params.deact_work);
 672
 673         get_online_cpus();
 674         get_online_mems();
 675
 676         mutex_lock(&slab_mutex);
 677
 678         s->memcg_params.deact_fn(s);
 679
 680         mutex_unlock(&slab_mutex);
 681
 682         put_online_mems();
 683         put_online_cpus();
 684
 685         /* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */
 686         css_put(&s->memcg_params.memcg->css);
 687 }
 688
 689 static void kmemcg_deactivate_rcufn(struct rcu_head *head)
 690 {
 691         struct kmem_cache *s = container_of(head, struct kmem_cache,
 692                                             memcg_params.deact_rcu_head);
 693
 694         /*
 695          * We need to grab blocking locks.  Bounce to ->deact_work.  The
 696          * work item shares the space with the RCU head and can't be
 697          * initialized eariler.
 698          */
 699         INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn);
 700         queue_work(memcg_kmem_cache_wq, &s->memcg_params.deact_work);
 701 }
 702
 703 /**
 704  * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a
 705  *                                         sched RCU grace period
 706  * @s: target kmem_cache
 707  * @deact_fn: deactivation function to call
 708  *
 709  * Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex
 710  * held after a sched RCU grace period.  The slab is guaranteed to stay
 711  * alive until @deact_fn is finished.  This is to be used from
 712  * __kmemcg_cache_deactivate().
 713  */
 714 void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
 715                                            void (*deact_fn)(struct kmem_cache *))
 716 {
 717         if (WARN_ON_ONCE(is_root_cache(s)) ||
 718             WARN_ON_ONCE(s->memcg_params.deact_fn))
 719                 return;
 720
 721         /*
 722          * memcg_kmem_wq_lock is used to synchronize memcg_params.dying
 723          * flag and make sure that no new kmem_cache deactivation tasks
 724          * are queued (see flush_memcg_workqueue() ).
 725          */
 726         spin_lock_irq(&memcg_kmem_wq_lock);
 727         if (s->memcg_params.root_cache->memcg_params.dying)
 728                 goto unlock;
 729
 730         /* pin memcg so that @s doesn't get destroyed in the middle */
 731         css_get(&s->memcg_params.memcg->css);
 732
 733         s->memcg_params.deact_fn = deact_fn;
 734         call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
 735 unlock:
 736         spin_unlock_irq(&memcg_kmem_wq_lock);
 737 }
 738
 739 void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
 740 {
 741         int idx;
 742         struct memcg_cache_array *arr;
 743         struct kmem_cache *s, *c;
 744
 745         idx = memcg_cache_id(memcg);
 746
 747         get_online_cpus();
 748         get_online_mems();
 749
 750         mutex_lock(&slab_mutex);
 751         list_for_each_entry(s, &slab_root_caches, root_caches_node) {
 752                 arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
 753                                                 lockdep_is_held(&slab_mutex));
 754                 c = arr->entries[idx];
 755                 if (!c)
 756                         continue;
 757
 758                 __kmemcg_cache_deactivate(c);
 759                 arr->entries[idx] = NULL;
 760         }
 761         mutex_unlock(&slab_mutex);
 762
 763         put_online_mems();
 764         put_online_cpus();
 765 }
 766
 767 void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
 768 {
 769         struct kmem_cache *s, *s2;
 770
 771         get_online_cpus();
 772         get_online_mems();
 773
 774         mutex_lock(&slab_mutex);
 775         list_for_each_entry_safe(s, s2, &memcg->kmem_caches,
 776                                  memcg_params.kmem_caches_node) {
 777                 /*
 778                  * The cgroup is about to be freed and therefore has no charges
 779                  * left. Hence, all its caches must be empty by now.
 780                  */
 781                 BUG_ON(shutdown_cache(s));
 782         }
 783         mutex_unlock(&slab_mutex);
 784
 785         put_online_mems();
 786         put_online_cpus();
 787 }
 788
 789 static int shutdown_memcg_caches(struct kmem_cache *s)
 790 {
 791         struct memcg_cache_array *arr;
 792         struct kmem_cache *c, *c2;
 793         LIST_HEAD(busy);
 794         int i;
 795
 796         BUG_ON(!is_root_cache(s));
 797
 798         /*
 799          * First, shutdown active caches, i.e. caches that belong to online
 800          * memory cgroups.
 801          */
 802         arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
 803                                         lockdep_is_held(&slab_mutex));
 804         for_each_memcg_cache_index(i) {
 805                 c = arr->entries[i];
 806                 if (!c)
 807                         continue;
 808                 if (shutdown_cache(c))
 809                         /*
 810                          * The cache still has objects. Move it to a temporary
 811                          * list so as not to try to destroy it for a second
 812                          * time while iterating over inactive caches below.
 813                          */
 814                         list_move(&c->memcg_params.children_node, &busy);
 815                 else
 816                         /*
 817                          * The cache is empty and will be destroyed soon. Clear
 818                          * the pointer to it in the memcg_caches array so that
 819                          * it will never be accessed even if the root cache
 820                          * stays alive.
 821                          */
 822                         arr->entries[i] = NULL;
 823         }
 824
 825         /*
 826          * Second, shutdown all caches left from memory cgroups that are now
 827          * offline.
 828          */
 829         list_for_each_entry_safe(c, c2, &s->memcg_params.children,
 830                                  memcg_params.children_node)
 831                 shutdown_cache(c);
 832
 833         list_splice(&busy, &s->memcg_params.children);
 834
 835         /*
 836          * A cache being destroyed must be empty. In particular, this means
 837          * that all per memcg caches attached to it must be empty too.
 838          */
 839         if (!list_empty(&s->memcg_params.children))
 840                 return -EBUSY;
 841         return 0;
 842 }
 843
 844 static void memcg_set_kmem_cache_dying(struct kmem_cache *s)
 845 {
 846         spin_lock_irq(&memcg_kmem_wq_lock);
 847         s->memcg_params.dying = true;
 848         spin_unlock_irq(&memcg_kmem_wq_lock);
 849 }
 850
 851 static void flush_memcg_workqueue(struct kmem_cache *s)
 852 {
 853         /*
 854          * SLUB deactivates the kmem_caches through call_rcu_sched. Make
 855          * sure all registered rcu callbacks have been invoked.
 856          */
 857         if (IS_ENABLED(CONFIG_SLUB))
 858                 rcu_barrier_sched();
 859
 860         /*
 861          * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB
 862          * deactivates the memcg kmem_caches through workqueue. Make sure all
 863          * previous workitems on workqueue are processed.
 864          */
 865         if (likely(memcg_kmem_cache_wq))
 866                 flush_workqueue(memcg_kmem_cache_wq);
 867 }
 868 #else
 869 static inline int shutdown_memcg_caches(struct kmem_cache *s)
 870 {
 871         return 0;
 872 }
 873 #endif /* CONFIG_MEMCG_KMEM */
 874
 875 void slab_kmem_cache_release(struct kmem_cache *s)
 876 {
 877         __kmem_cache_release(s);
 878         destroy_memcg_params(s);
 879         kfree_const(s->name);
 880         kmem_cache_free(kmem_cache, s);
 881 }
 882
 883 void kmem_cache_destroy(struct kmem_cache *s)
 884 {
 885         int err;
 886
 887         if (unlikely(!s))
 888                 return;
 889
 890         get_online_cpus();
 891         get_online_mems();
 892
 893         mutex_lock(&slab_mutex);
 894
 895         s->refcount--;
 896         if (s->refcount)
 897                 goto out_unlock;
 898
 899 #ifdef CONFIG_MEMCG_KMEM
 900         memcg_set_kmem_cache_dying(s);
 901
 902         mutex_unlock(&slab_mutex);
 903
 904         put_online_mems();
 905         put_online_cpus();
 906
 907         flush_memcg_workqueue(s);
 908
 909         get_online_cpus();
 910         get_online_mems();
 911
 912         mutex_lock(&slab_mutex);
 913
 914         /*
 915          * Another thread referenced it again
 916          */
 917         if (READ_ONCE(s->refcount)) {
 918                 spin_lock_irq(&memcg_kmem_wq_lock);
 919                 s->memcg_params.dying = false;
 920                 spin_unlock_irq(&memcg_kmem_wq_lock);
 921                 goto out_unlock;
 922         }
 923 #endif
 924
 925         err = shutdown_memcg_caches(s);
 926         if (!err)
 927                 err = shutdown_cache(s);
 928
 929         if (err) {
 930                 pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
 931                        s->name);
 932                 dump_stack();
 933         }
 934 out_unlock:
 935         mutex_unlock(&slab_mutex);
 936
 937         put_online_mems();
 938         put_online_cpus();
 939 }
 940 EXPORT_SYMBOL(kmem_cache_destroy);
 941
 942 /**
 943  * kmem_cache_shrink - Shrink a cache.
 944  * @cachep: The cache to shrink.
 945  *
 946  * Releases as many slabs as possible for a cache.
 947  * To help debugging, a zero exit status indicates all slabs were released.
 948  */
 949 int kmem_cache_shrink(struct kmem_cache *cachep)
 950 {
 951         int ret;
 952
 953         get_online_cpus();
 954         get_online_mems();
 955         kasan_cache_shrink(cachep);
 956         ret = __kmem_cache_shrink(cachep);
 957         put_online_mems();
 958         put_online_cpus();
 959         return ret;
 960 }
 961 EXPORT_SYMBOL(kmem_cache_shrink);
 962
 963 bool slab_is_available(void)
 964 {
 965         return slab_state >= UP;
 966 }
 967
 968 #ifndef CONFIG_SLOB
 969 /* Create a cache during boot when no slab services are available yet */
 970 void __init create_boot_cache(struct kmem_cache *s, const char *name,
 971                 unsigned int size, slab_flags_t flags,
 972                 unsigned int useroffset, unsigned int usersize)
 973 {
 974         int err;
 975
 976         s->name = name;
 977         s->size = s->object_size = size;
 978         s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
 979         s->useroffset = useroffset;
 980         s->usersize = usersize;
 981
 982         slab_init_memcg_params(s);
 983
 984         err = __kmem_cache_create(s, flags);
 985
 986         if (err)
 987                 panic("Creation of kmalloc slab %s size=%u failed. Reason %d\n",
 988                                         name, size, err);
 989
 990         s->refcount = -1;       /* Exempt from merging for now */
 991 }
 992
 993 struct kmem_cache *__init create_kmalloc_cache(const char *name,
 994                 unsigned int size, slab_flags_t flags,
 995                 unsigned int useroffset, unsigned int usersize)
 996 {
 997         struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
 998
 999         if (!s)
1000                 panic("Out of memory when creating slab %s\n", name);
1001
1002         create_boot_cache(s, name, size, flags, useroffset, usersize);
1003         list_add(&s->list, &slab_caches);
1004         memcg_link_cache(s);
1005         s->refcount = 1;
1006         return s;
1007 }
1008
1009 struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
1010 EXPORT_SYMBOL(kmalloc_caches);
1011
1012 #ifdef CONFIG_ZONE_DMA
1013 struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
1014 EXPORT_SYMBOL(kmalloc_dma_caches);
1015 #endif
1016
1017 /*
1018  * Conversion table for small slabs sizes / 8 to the index in the
1019  * kmalloc array. This is necessary for slabs < 192 since we have non power
1020  * of two cache sizes there. The size of larger slabs can be determined using
1021  * fls.
1022  */
1023 static u8 size_index[24] __ro_after_init = {
1024         3,      /* 8 */
1025         4,      /* 16 */
1026         5,      /* 24 */
1027         5,      /* 32 */
1028         6,      /* 40 */
1029         6,      /* 48 */
1030         6,      /* 56 */
1031         6,      /* 64 */
1032         1,      /* 72 */
1033         1,      /* 80 */
1034         1,      /* 88 */
1035         1,      /* 96 */
1036         7,      /* 104 */
1037         7,      /* 112 */
1038         7,      /* 120 */
1039         7,      /* 128 */
1040         2,      /* 136 */
1041         2,      /* 144 */
1042         2,      /* 152 */
1043         2,      /* 160 */
1044         2,      /* 168 */
1045         2,      /* 176 */
1046         2,      /* 184 */
1047         2       /* 192 */
1048 };
1049
1050 static inline unsigned int size_index_elem(unsigned int bytes)
1051 {
1052         return (bytes - 1) / 8;
1053 }
1054
1055 /*
1056  * Find the kmem_cache structure that serves a given size of
1057  * allocation
1058  */
1059 struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
1060 {
1061         unsigned int index;
1062
1063         if (size <= 192) {
1064                 if (!size)
1065                         return ZERO_SIZE_PTR;
1066
1067                 index = size_index[size_index_elem(size)];
1068         } else {
1069                 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
1070                         WARN_ON(1);
1071                         return NULL;
1072                 }
1073                 index = fls(size - 1);
1074         }
1075
1076 #ifdef CONFIG_ZONE_DMA
1077         if (unlikely((flags & GFP_DMA)))
1078                 return kmalloc_dma_caches[index];
1079
1080 #endif
1081         return kmalloc_caches[index];
1082 }
1083
1084 /*
1085  * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
1086  * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
1087  * kmalloc-67108864.
1088  */
1089 const struct kmalloc_info_struct kmalloc_info[] __initconst = {
1090         {NULL,                      0},         {"kmalloc-96",             96},
1091         {"kmalloc-192",           192},         {"kmalloc-8",               8},
1092         {"kmalloc-16",             16},         {"kmalloc-32",             32},
1093         {"kmalloc-64",             64},         {"kmalloc-128",           128},
1094         {"kmalloc-256",           256},         {"kmalloc-512",           512},
1095         {"kmalloc-1024",         1024},         {"kmalloc-2048",         2048},
1096         {"kmalloc-4096",         4096},         {"kmalloc-8192",         8192},
1097         {"kmalloc-16384",       16384},         {"kmalloc-32768",       32768},
1098         {"kmalloc-65536",       65536},         {"kmalloc-131072",     131072},
1099         {"kmalloc-262144",     262144},         {"kmalloc-524288",     524288},
1100         {"kmalloc-1048576",   1048576},         {"kmalloc-2097152",   2097152},
1101         {"kmalloc-4194304",   4194304},         {"kmalloc-8388608",   8388608},
1102         {"kmalloc-16777216", 16777216},         {"kmalloc-33554432", 33554432},
1103         {"kmalloc-67108864", 67108864}
1104 };
1105
1106 /*
1107  * Patch up the size_index table if we have strange large alignment
1108  * requirements for the kmalloc array. This is only the case for
1109  * MIPS it seems. The standard arches will not generate any code here.
1110  *
1111  * Largest permitted alignment is 256 bytes due to the way we
1112  * handle the index determination for the smaller caches.
1113  *
1114  * Make sure that nothing crazy happens if someone starts tinkering
1115  * around with ARCH_KMALLOC_MINALIGN
1116  */
1117 void __init setup_kmalloc_cache_index_table(void)
1118 {
1119         unsigned int i;
1120
1121         BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
1122                 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
1123
1124         for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
1125                 unsigned int elem = size_index_elem(i);
1126
1127                 if (elem >= ARRAY_SIZE(size_index))
1128                         break;
1129                 size_index[elem] = KMALLOC_SHIFT_LOW;
1130         }
1131
1132         if (KMALLOC_MIN_SIZE >= 64) {
1133                 /*
1134                  * The 96 byte size cache is not used if the alignment
1135                  * is 64 byte.
1136                  */
1137                 for (i = 64 + 8; i <= 96; i += 8)
1138                         size_index[size_index_elem(i)] = 7;
1139
1140         }
1141
1142         if (KMALLOC_MIN_SIZE >= 128) {
1143                 /*
1144                  * The 192 byte sized cache is not used if the alignment
1145                  * is 128 byte. Redirect kmalloc to use the 256 byte cache
1146                  * instead.
1147                  */
1148                 for (i = 128 + 8; i <= 192; i += 8)
1149                         size_index[size_index_elem(i)] = 8;
1150         }
1151 }
1152
1153 static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
1154 {
1155         kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
1156                                         kmalloc_info[idx].size, flags, 0,
1157                                         kmalloc_info[idx].size);
1158 }
1159
1160 /*
1161  * Create the kmalloc array. Some of the regular kmalloc arrays
1162  * may already have been created because they were needed to
1163  * enable allocations for slab creation.
1164  */
1165 void __init create_kmalloc_caches(slab_flags_t flags)
1166 {
1167         int i;
1168
1169         for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
1170                 if (!kmalloc_caches[i])
1171                         new_kmalloc_cache(i, flags);
1172
1173                 /*
1174                  * Caches that are not of the two-to-the-power-of size.
1175                  * These have to be created immediately after the
1176                  * earlier power of two caches
1177                  */
1178                 if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
1179                         new_kmalloc_cache(1, flags);
1180                 if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
1181                         new_kmalloc_cache(2, flags);
1182         }
1183
1184         /* Kmalloc array is now usable */
1185         slab_state = UP;
1186
1187 #ifdef CONFIG_ZONE_DMA
1188         for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
1189                 struct kmem_cache *s = kmalloc_caches[i];
1190
1191                 if (s) {
1192                         unsigned int size = kmalloc_size(i);
1193                         char *n = kasprintf(GFP_NOWAIT,
1194                                  "dma-kmalloc-%u", size);
1195
1196                         BUG_ON(!n);
1197                         kmalloc_dma_caches[i] = create_kmalloc_cache(n,
1198                                 size, SLAB_CACHE_DMA | flags, 0, 0);
1199                 }
1200         }
1201 #endif
1202 }
1203 #endif /* !CONFIG_SLOB */
1204
1205 /*
1206  * To avoid unnecessary overhead, we pass through large allocation requests
1207  * directly to the page allocator. We use __GFP_COMP, because we will need to
1208  * know the allocation order to free the pages properly in kfree.
1209  */
1210 void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
1211 {
1212         void *ret;
1213         struct page *page;
1214
1215         flags |= __GFP_COMP;
1216         page = alloc_pages(flags, order);
1217         ret = page ? page_address(page) : NULL;
1218         kmemleak_alloc(ret, size, 1, flags);
1219         kasan_kmalloc_large(ret, size, flags);
1220         return ret;
1221 }
1222 EXPORT_SYMBOL(kmalloc_order);
1223
1224 #ifdef CONFIG_TRACING
1225 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
1226 {
1227         void *ret = kmalloc_order(size, flags, order);
1228         trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
1229         return ret;
1230 }
1231 EXPORT_SYMBOL(kmalloc_order_trace);
1232 #endif
1233
1234 #ifdef CONFIG_SLAB_FREELIST_RANDOM
1235 /* Randomize a generic freelist */
1236 static void freelist_randomize(struct rnd_state *state, unsigned int *list,
1237                                unsigned int count)
1238 {
1239         unsigned int rand;
1240         unsigned int i;
1241
1242         for (i = 0; i < count; i++)
1243                 list[i] = i;
1244
1245         /* Fisher-Yates shuffle */
1246         for (i = count - 1; i > 0; i--) {
1247                 rand = prandom_u32_state(state);
1248                 rand %= (i + 1);
1249                 swap(list[i], list[rand]);
1250         }
1251 }
1252
1253 /* Create a random sequence per cache */
1254 int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
1255                                     gfp_t gfp)
1256 {
1257         struct rnd_state state;
1258
1259         if (count < 2 || cachep->random_seq)
1260                 return 0;
1261
1262         cachep->random_seq = kcalloc(count, sizeof(unsigned int), gfp);
1263         if (!cachep->random_seq)
1264                 return -ENOMEM;
1265
1266         /* Get best entropy at this stage of boot */
1267         prandom_seed_state(&state, get_random_long());
1268
1269         freelist_randomize(&state, cachep->random_seq, count);
1270         return 0;
1271 }
1272
1273 /* Destroy the per-cache random freelist sequence */
1274 void cache_random_seq_destroy(struct kmem_cache *cachep)
1275 {
1276         kfree(cachep->random_seq);
1277         cachep->random_seq = NULL;
1278 }
1279 #endif /* CONFIG_SLAB_FREELIST_RANDOM */
1280
1281 #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
1282 #ifdef CONFIG_SLAB
1283 #define SLABINFO_RIGHTS (0600)
1284 #else
1285 #define SLABINFO_RIGHTS (0400)
1286 #endif
1287
1288 static void print_slabinfo_header(struct seq_file *m)
1289 {
1290         /*
1291          * Output format version, so at least we can change it
1292          * without _too_ many complaints.
1293          */
1294 #ifdef CONFIG_DEBUG_SLAB
1295         seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
1296 #else
1297         seq_puts(m, "slabinfo - version: 2.1\n");
1298 #endif
1299         seq_puts(m, "# name            <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
1300         seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
1301         seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
1302 #ifdef CONFIG_DEBUG_SLAB
1303         seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
1304         seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
1305 #endif
1306         seq_putc(m, '\n');
1307 }
1308
1309 void *slab_start(struct seq_file *m, loff_t *pos)
1310 {
1311         mutex_lock(&slab_mutex);
1312         return seq_list_start(&slab_root_caches, *pos);
1313 }
1314
1315 void *slab_next(struct seq_file *m, void *p, loff_t *pos)
1316 {
1317         return seq_list_next(p, &slab_root_caches, pos);
1318 }
1319
1320 void slab_stop(struct seq_file *m, void *p)
1321 {
1322         mutex_unlock(&slab_mutex);
1323 }
1324
1325 static void
1326 memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
1327 {
1328         struct kmem_cache *c;
1329         struct slabinfo sinfo;
1330
1331         if (!is_root_cache(s))
1332                 return;
1333
1334         for_each_memcg_cache(c, s) {
1335                 memset(&sinfo, 0, sizeof(sinfo));
1336                 get_slabinfo(c, &sinfo);
1337
1338                 info->active_slabs += sinfo.active_slabs;
1339                 info->num_slabs += sinfo.num_slabs;
1340                 info->shared_avail += sinfo.shared_avail;
1341                 info->active_objs += sinfo.active_objs;
1342                 info->num_objs += sinfo.num_objs;
1343         }
1344 }
1345
1346 static void cache_show(struct kmem_cache *s, struct seq_file *m)
1347 {
1348         struct slabinfo sinfo;
1349
1350         memset(&sinfo, 0, sizeof(sinfo));
1351         get_slabinfo(s, &sinfo);
1352
1353         memcg_accumulate_slabinfo(s, &sinfo);
1354
1355         seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
1356                    cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
1357                    sinfo.objects_per_slab, (1 << sinfo.cache_order));
1358
1359         seq_printf(m, " : tunables %4u %4u %4u",
1360                    sinfo.limit, sinfo.batchcount, sinfo.shared);
1361         seq_printf(m, " : slabdata %6lu %6lu %6lu",
1362                    sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
1363         slabinfo_show_stats(m, s);
1364         seq_putc(m, '\n');
1365 }
1366
1367 static int slab_show(struct seq_file *m, void *p)
1368 {
1369         struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node);
1370
1371         if (p == slab_root_caches.next)
1372                 print_slabinfo_header(m);
1373         cache_show(s, m);
1374         return 0;
1375 }
1376
1377 void dump_unreclaimable_slab(void)
1378 {
1379         struct kmem_cache *s, *s2;
1380         struct slabinfo sinfo;
1381
1382         /*
1383          * Here acquiring slab_mutex is risky since we don't prefer to get
1384          * sleep in oom path. But, without mutex hold, it may introduce a
1385          * risk of crash.
1386          * Use mutex_trylock to protect the list traverse, dump nothing
1387          * without acquiring the mutex.
1388          */
1389         if (!mutex_trylock(&slab_mutex)) {
1390                 pr_warn("excessive unreclaimable slab but cannot dump stats\n");
1391                 return;
1392         }
1393
1394         pr_info("Unreclaimable slab info:\n");
1395         pr_info("Name                      Used          Total\n");
1396
1397         list_for_each_entry_safe(s, s2, &slab_caches, list) {
1398                 if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT))
1399                         continue;
1400
1401                 get_slabinfo(s, &sinfo);
1402
1403                 if (sinfo.num_objs > 0)
1404                         pr_info("%-17s %10luKB %10luKB\n", cache_name(s),
1405                                 (sinfo.active_objs * s->size) / 1024,
1406                                 (sinfo.num_objs * s->size) / 1024);
1407         }
1408         mutex_unlock(&slab_mutex);
1409 }
1410
1411 #if defined(CONFIG_MEMCG)
1412 void *memcg_slab_start(struct seq_file *m, loff_t *pos)
1413 {
1414         struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1415
1416         mutex_lock(&slab_mutex);
1417         return seq_list_start(&memcg->kmem_caches, *pos);
1418 }
1419
1420 void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos)
1421 {
1422         struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1423
1424         return seq_list_next(p, &memcg->kmem_caches, pos);
1425 }
1426
1427 void memcg_slab_stop(struct seq_file *m, void *p)
1428 {
1429         mutex_unlock(&slab_mutex);
1430 }
1431
1432 int memcg_slab_show(struct seq_file *m, void *p)
1433 {
1434         struct kmem_cache *s = list_entry(p, struct kmem_cache,
1435                                           memcg_params.kmem_caches_node);
1436         struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1437
1438         if (p == memcg->kmem_caches.next)
1439                 print_slabinfo_header(m);
1440         cache_show(s, m);
1441         return 0;
1442 }
1443 #endif
1444
1445 /*
1446  * slabinfo_op - iterator that generates /proc/slabinfo
1447  *
1448  * Output layout:
1449  * cache-name
1450  * num-active-objs
1451  * total-objs
1452  * object size
1453  * num-active-slabs
1454  * total-slabs
1455  * num-pages-per-slab
1456  * + further values on SMP and with statistics enabled
1457  */
1458 static const struct seq_operations slabinfo_op = {
1459         .start = slab_start,
1460         .next = slab_next,
1461         .stop = slab_stop,
1462         .show = slab_show,
1463 };
1464
1465 static int slabinfo_open(struct inode *inode, struct file *file)
1466 {
1467         return seq_open(file, &slabinfo_op);
1468 }
1469
1470 static const struct file_operations proc_slabinfo_operations = {
1471         .open           = slabinfo_open,
1472         .read           = seq_read,
1473         .write          = slabinfo_write,
1474         .llseek         = seq_lseek,
1475         .release        = seq_release,
1476 };
1477
1478 static int __init slab_proc_init(void)
1479 {
1480         proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
1481                                                 &proc_slabinfo_operations);
1482         return 0;
1483 }
1484 module_init(slab_proc_init);
1485 #endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
1486
1487 static __always_inline void *__do_krealloc(const void *p, size_t new_size,
1488                                            gfp_t flags)
1489 {
1490         void *ret;
1491         size_t ks = 0;
1492
1493         if (p)
1494                 ks = ksize(p);
1495
1496         if (ks >= new_size) {
1497                 kasan_krealloc((void *)p, new_size, flags);
1498                 return (void *)p;
1499         }
1500
1501         ret = kmalloc_track_caller(new_size, flags);
1502         if (ret && p)
1503                 memcpy(ret, p, ks);
1504
1505         return ret;
1506 }
1507
1508 /**
1509  * __krealloc - like krealloc() but don't free @p.
1510  * @p: object to reallocate memory for.
1511  * @new_size: how many bytes of memory are required.
1512  * @flags: the type of memory to allocate.
1513  *
1514  * This function is like krealloc() except it never frees the originally
1515  * allocated buffer. Use this if you don't want to free the buffer immediately
1516  * like, for example, with RCU.
1517  */
1518 void *__krealloc(const void *p, size_t new_size, gfp_t flags)
1519 {
1520         if (unlikely(!new_size))
1521                 return ZERO_SIZE_PTR;
1522
1523         return __do_krealloc(p, new_size, flags);
1524
1525 }
1526 EXPORT_SYMBOL(__krealloc);
1527
1528 /**
1529  * krealloc - reallocate memory. The contents will remain unchanged.
1530  * @p: object to reallocate memory for.
1531  * @new_size: how many bytes of memory are required.
1532  * @flags: the type of memory to allocate.
1533  *
1534  * The contents of the object pointed to are preserved up to the
1535  * lesser of the new and old sizes.  If @p is %NULL, krealloc()
1536  * behaves exactly like kmalloc().  If @new_size is 0 and @p is not a
1537  * %NULL pointer, the object pointed to is freed.
1538  */
1539 void *krealloc(const void *p, size_t new_size, gfp_t flags)
1540 {
1541         void *ret;
1542
1543         if (unlikely(!new_size)) {
1544                 kfree(p);
1545                 return ZERO_SIZE_PTR;
1546         }
1547
1548         ret = __do_krealloc(p, new_size, flags);
1549         if (ret && p != ret)
1550                 kfree(p);
1551
1552         return ret;
1553 }
1554 EXPORT_SYMBOL(krealloc);
1555
1556 /**
1557  * kzfree - like kfree but zero memory
1558  * @p: object to free memory of
1559  *
1560  * The memory of the object @p points to is zeroed before freed.
1561  * If @p is %NULL, kzfree() does nothing.
1562  *
1563  * Note: this function zeroes the whole allocated buffer which can be a good
1564  * deal bigger than the requested buffer size passed to kmalloc(). So be
1565  * careful when using this function in performance sensitive code.
1566  */
1567 void kzfree(const void *p)
1568 {
1569         size_t ks;
1570         void *mem = (void *)p;
1571
1572         if (unlikely(ZERO_OR_NULL_PTR(mem)))
1573                 return;
1574         ks = ksize(mem);
1575         memzero_explicit(mem, ks);
1576         kfree(mem);
1577 }
1578 EXPORT_SYMBOL(kzfree);
1579
1580 /* Tracepoints definitions. */
1581 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
1582 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
1583 EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
1584 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
1585 EXPORT_TRACEPOINT_SYMBOL(kfree);
1586 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
1587
1588 int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
1589 {
1590         if (__should_failslab(s, gfpflags))
1591                 return -ENOMEM;
1592         return 0;
1593 }
1594 ALLOW_ERROR_INJECTION(should_failslab, ERRNO);