4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
19 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
21 * Copyright (c) 2012, 2015 Intel Corporation.
24 * This file is part of Lustre, http://www.lustre.org/
25 * Lustre is a trademark of Sun Microsystems, Inc.
27 * Author: liang@whamcloud.com
30 #define DEBUG_SUBSYSTEM S_LNET
32 #include <linux/cpu.h>
33 #include <linux/sched.h>
34 #include <linux/libcfs/libcfs.h>
39 * modparam for setting number of partitions
41 * 0 : estimate best value based on cores or NUMA nodes
42 * 1 : disable multiple partitions
43 * >1 : specify number of partitions
45 static int cpu_npartitions;
46 module_param(cpu_npartitions, int, 0444);
47 MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
50 * modparam for setting CPU partitions patterns:
52 * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID,
53 * number in bracket is processor ID (core or HT)
55 * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
56 * are NUMA node ID, number before bracket is CPU partition ID.
58 * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology
60 * NB: If user specified cpu_pattern, cpu_npartitions will be ignored
62 static char *cpu_pattern = "N";
63 module_param(cpu_pattern, charp, 0444);
64 MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
67 /* serialize hotplug etc */
69 /* reserved for hotplug */
70 unsigned long cpt_version;
71 /* mutex to protect cpt_cpumask */
72 struct mutex cpt_mutex;
73 /* scratch buffer for set/unset_node */
74 cpumask_t *cpt_cpumask;
77 static struct cfs_cpt_data cpt_data;
80 cfs_node_to_cpumask(int node, cpumask_t *mask)
82 const cpumask_t *tmp = cpumask_of_node(node);
85 cpumask_copy(mask, tmp);
91 cfs_cpt_table_free(struct cfs_cpt_table *cptab)
95 if (cptab->ctb_cpu2cpt) {
96 LIBCFS_FREE(cptab->ctb_cpu2cpt,
98 sizeof(cptab->ctb_cpu2cpt[0]));
101 for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) {
102 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
104 if (part->cpt_nodemask) {
105 LIBCFS_FREE(part->cpt_nodemask,
106 sizeof(*part->cpt_nodemask));
109 if (part->cpt_cpumask)
110 LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
113 if (cptab->ctb_parts) {
114 LIBCFS_FREE(cptab->ctb_parts,
115 cptab->ctb_nparts * sizeof(cptab->ctb_parts[0]));
118 if (cptab->ctb_nodemask)
119 LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
120 if (cptab->ctb_cpumask)
121 LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size());
123 LIBCFS_FREE(cptab, sizeof(*cptab));
125 EXPORT_SYMBOL(cfs_cpt_table_free);
127 struct cfs_cpt_table *
128 cfs_cpt_table_alloc(unsigned int ncpt)
130 struct cfs_cpt_table *cptab;
133 LIBCFS_ALLOC(cptab, sizeof(*cptab));
137 cptab->ctb_nparts = ncpt;
139 LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size());
140 LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
142 if (!cptab->ctb_cpumask || !cptab->ctb_nodemask)
145 LIBCFS_ALLOC(cptab->ctb_cpu2cpt,
146 num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
147 if (!cptab->ctb_cpu2cpt)
150 memset(cptab->ctb_cpu2cpt, -1,
151 num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
153 LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0]));
154 if (!cptab->ctb_parts)
157 for (i = 0; i < ncpt; i++) {
158 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
160 LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size());
161 LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask));
162 if (!part->cpt_cpumask || !part->cpt_nodemask)
166 spin_lock(&cpt_data.cpt_lock);
167 /* Reserved for hotplug */
168 cptab->ctb_version = cpt_data.cpt_version;
169 spin_unlock(&cpt_data.cpt_lock);
174 cfs_cpt_table_free(cptab);
177 EXPORT_SYMBOL(cfs_cpt_table_alloc);
180 cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
187 for (i = 0; i < cptab->ctb_nparts; i++) {
189 rc = snprintf(tmp, len, "%d\t: ", i);
199 for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
200 rc = snprintf(tmp, len, "%d ", j);
220 EXPORT_SYMBOL(cfs_cpt_table_print);
223 cfs_cpt_number(struct cfs_cpt_table *cptab)
225 return cptab->ctb_nparts;
227 EXPORT_SYMBOL(cfs_cpt_number);
230 cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
232 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
234 return cpt == CFS_CPT_ANY ?
235 cpumask_weight(cptab->ctb_cpumask) :
236 cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask);
238 EXPORT_SYMBOL(cfs_cpt_weight);
241 cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
243 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
245 return cpt == CFS_CPT_ANY ?
246 cpumask_any_and(cptab->ctb_cpumask,
247 cpu_online_mask) < nr_cpu_ids :
248 cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask,
249 cpu_online_mask) < nr_cpu_ids;
251 EXPORT_SYMBOL(cfs_cpt_online);
254 cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
256 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
258 return cpt == CFS_CPT_ANY ?
259 cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask;
261 EXPORT_SYMBOL(cfs_cpt_cpumask);
264 cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
266 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
268 return cpt == CFS_CPT_ANY ?
269 cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
271 EXPORT_SYMBOL(cfs_cpt_nodemask);
274 cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
278 LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
280 if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
281 CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
285 if (cptab->ctb_cpu2cpt[cpu] != -1) {
286 CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
287 cpu, cptab->ctb_cpu2cpt[cpu]);
291 cptab->ctb_cpu2cpt[cpu] = cpt;
293 LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
294 LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
296 cpumask_set_cpu(cpu, cptab->ctb_cpumask);
297 cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
299 node = cpu_to_node(cpu);
301 /* first CPU of @node in this CPT table */
302 if (!node_isset(node, *cptab->ctb_nodemask))
303 node_set(node, *cptab->ctb_nodemask);
305 /* first CPU of @node in this partition */
306 if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
307 node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
311 EXPORT_SYMBOL(cfs_cpt_set_cpu);
314 cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
319 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
321 if (cpu < 0 || cpu >= nr_cpu_ids) {
322 CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
326 if (cpt == CFS_CPT_ANY) {
327 /* caller doesn't know the partition ID */
328 cpt = cptab->ctb_cpu2cpt[cpu];
329 if (cpt < 0) { /* not set in this CPT-table */
330 CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n",
335 } else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
337 "CPU %d is not in cpu-partition %d\n", cpu, cpt);
341 LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
342 LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
344 cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
345 cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
346 cptab->ctb_cpu2cpt[cpu] = -1;
348 node = cpu_to_node(cpu);
350 LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
351 LASSERT(node_isset(node, *cptab->ctb_nodemask));
353 for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
354 /* this CPT has other CPU belonging to this node? */
355 if (cpu_to_node(i) == node)
360 node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
362 for_each_cpu(i, cptab->ctb_cpumask) {
363 /* this CPT-table has other CPU belonging to this node? */
364 if (cpu_to_node(i) == node)
369 node_clear(node, *cptab->ctb_nodemask);
371 EXPORT_SYMBOL(cfs_cpt_unset_cpu);
374 cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
378 if (!cpumask_weight(mask) ||
379 cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
380 CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
385 for_each_cpu(i, mask) {
386 if (!cfs_cpt_set_cpu(cptab, cpt, i))
392 EXPORT_SYMBOL(cfs_cpt_set_cpumask);
395 cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
399 for_each_cpu(i, mask)
400 cfs_cpt_unset_cpu(cptab, cpt, i);
402 EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
405 cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
410 if (node < 0 || node >= MAX_NUMNODES) {
412 "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
416 mutex_lock(&cpt_data.cpt_mutex);
418 mask = cpt_data.cpt_cpumask;
419 cfs_node_to_cpumask(node, mask);
421 rc = cfs_cpt_set_cpumask(cptab, cpt, mask);
423 mutex_unlock(&cpt_data.cpt_mutex);
427 EXPORT_SYMBOL(cfs_cpt_set_node);
430 cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
434 if (node < 0 || node >= MAX_NUMNODES) {
436 "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
440 mutex_lock(&cpt_data.cpt_mutex);
442 mask = cpt_data.cpt_cpumask;
443 cfs_node_to_cpumask(node, mask);
445 cfs_cpt_unset_cpumask(cptab, cpt, mask);
447 mutex_unlock(&cpt_data.cpt_mutex);
449 EXPORT_SYMBOL(cfs_cpt_unset_node);
452 cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
456 for_each_node_mask(i, *mask) {
457 if (!cfs_cpt_set_node(cptab, cpt, i))
463 EXPORT_SYMBOL(cfs_cpt_set_nodemask);
466 cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
470 for_each_node_mask(i, *mask)
471 cfs_cpt_unset_node(cptab, cpt, i);
473 EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
476 cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
481 if (cpt == CFS_CPT_ANY) {
482 last = cptab->ctb_nparts - 1;
488 for (; cpt <= last; cpt++) {
489 for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask)
490 cfs_cpt_unset_cpu(cptab, cpt, i);
493 EXPORT_SYMBOL(cfs_cpt_clear);
496 cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
503 /* convert CPU partition ID to HW node id */
505 if (cpt < 0 || cpt >= cptab->ctb_nparts) {
506 mask = cptab->ctb_nodemask;
507 rotor = cptab->ctb_spread_rotor++;
509 mask = cptab->ctb_parts[cpt].cpt_nodemask;
510 rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
513 weight = nodes_weight(*mask);
518 for_each_node_mask(node, *mask) {
526 EXPORT_SYMBOL(cfs_cpt_spread_node);
529 cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
535 cpu = smp_processor_id();
536 cpt = cptab->ctb_cpu2cpt[cpu];
538 if (cpt < 0 && remap) {
539 /* don't return negative value for safety of upper layer,
540 * instead we shadow the unknown cpu to a valid partition ID
542 cpt = cpu % cptab->ctb_nparts;
547 EXPORT_SYMBOL(cfs_cpt_current);
550 cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
552 LASSERT(cpu >= 0 && cpu < nr_cpu_ids);
554 return cptab->ctb_cpu2cpt[cpu];
556 EXPORT_SYMBOL(cfs_cpt_of_cpu);
559 cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
562 nodemask_t *nodemask;
566 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
568 if (cpt == CFS_CPT_ANY) {
569 cpumask = cptab->ctb_cpumask;
570 nodemask = cptab->ctb_nodemask;
572 cpumask = cptab->ctb_parts[cpt].cpt_cpumask;
573 nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
576 if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) {
577 CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n",
582 for_each_online_cpu(i) {
583 if (cpumask_test_cpu(i, cpumask))
586 rc = set_cpus_allowed_ptr(current, cpumask);
587 set_mems_allowed(*nodemask);
589 schedule(); /* switch to allowed CPU */
594 /* don't need to set affinity because all online CPUs are covered */
597 EXPORT_SYMBOL(cfs_cpt_bind);
600 * Choose max to \a number CPUs from \a node and set them in \a cpt.
601 * We always prefer to choose CPU in the same core/socket.
604 cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
605 cpumask_t *node, int number)
607 cpumask_t *socket = NULL;
608 cpumask_t *core = NULL;
614 if (number >= cpumask_weight(node)) {
615 while (!cpumask_empty(node)) {
616 cpu = cpumask_first(node);
618 rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
621 cpumask_clear_cpu(cpu, node);
626 /* allocate scratch buffer */
627 LIBCFS_ALLOC(socket, cpumask_size());
628 LIBCFS_ALLOC(core, cpumask_size());
629 if (!socket || !core) {
634 while (!cpumask_empty(node)) {
635 cpu = cpumask_first(node);
637 /* get cpumask for cores in the same socket */
638 cpumask_copy(socket, topology_core_cpumask(cpu));
639 cpumask_and(socket, socket, node);
641 LASSERT(!cpumask_empty(socket));
643 while (!cpumask_empty(socket)) {
646 /* get cpumask for hts in the same core */
647 cpumask_copy(core, topology_sibling_cpumask(cpu));
648 cpumask_and(core, core, node);
650 LASSERT(!cpumask_empty(core));
652 for_each_cpu(i, core) {
653 cpumask_clear_cpu(i, socket);
654 cpumask_clear_cpu(i, node);
656 rc = cfs_cpt_set_cpu(cptab, cpt, i);
665 cpu = cpumask_first(socket);
671 LIBCFS_FREE(socket, cpumask_size());
673 LIBCFS_FREE(core, cpumask_size());
677 #define CPT_WEIGHT_MIN 4u
680 cfs_cpt_num_estimate(void)
682 unsigned int nnode = num_online_nodes();
683 unsigned int ncpu = num_online_cpus();
686 if (ncpu <= CPT_WEIGHT_MIN) {
691 /* generate reasonable number of CPU partitions based on total number
692 * of CPUs, Preferred N should be power2 and match this condition:
693 * 2 * (N - 1)^2 < NCPUS <= 2 * N^2
695 for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1)
698 if (ncpt <= nnode) { /* fat numa system */
702 } else { /* ncpt > nnode */
703 while ((nnode << 1) <= ncpt)
710 #if (BITS_PER_LONG == 32)
711 /* config many CPU partitions on 32-bit system could consume
714 ncpt = min(2U, ncpt);
717 ncpt--; /* worst case is 1 */
722 static struct cfs_cpt_table *
723 cfs_cpt_table_create(int ncpt)
725 struct cfs_cpt_table *cptab = NULL;
726 cpumask_t *mask = NULL;
732 rc = cfs_cpt_num_estimate();
736 if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
737 CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
741 if (num_online_cpus() % ncpt) {
742 CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
743 (int)num_online_cpus(), ncpt);
747 cptab = cfs_cpt_table_alloc(ncpt);
749 CERROR("Failed to allocate CPU map(%d)\n", ncpt);
753 num = num_online_cpus() / ncpt;
755 CERROR("CPU changed while setting CPU partition\n");
759 LIBCFS_ALLOC(mask, cpumask_size());
761 CERROR("Failed to allocate scratch cpumask\n");
765 for_each_online_node(i) {
766 cfs_node_to_cpumask(i, mask);
768 while (!cpumask_empty(mask)) {
769 struct cfs_cpu_partition *part;
773 * Each emulated NUMA node has all allowed CPUs in
775 * End loop when all partitions have assigned CPUs.
780 part = &cptab->ctb_parts[cpt];
782 n = num - cpumask_weight(part->cpt_cpumask);
785 rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
789 LASSERT(num >= cpumask_weight(part->cpt_cpumask));
790 if (num == cpumask_weight(part->cpt_cpumask))
796 num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
797 CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n",
798 cptab->ctb_nparts, num, cpt,
799 cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask));
803 LIBCFS_FREE(mask, cpumask_size());
808 CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
809 ncpt, num_online_nodes(), num_online_cpus());
812 LIBCFS_FREE(mask, cpumask_size());
815 cfs_cpt_table_free(cptab);
820 static struct cfs_cpt_table *
821 cfs_cpt_table_create_pattern(char *pattern)
823 struct cfs_cpt_table *cptab;
833 str = cfs_trimwhite(pattern);
834 if (*str == 'n' || *str == 'N') {
836 if (*pattern != '\0') {
838 } else { /* shortcut to create CPT from NUMA & CPU topology */
840 ncpt = num_online_nodes();
844 if (!ncpt) { /* scanning bracket which is mark of partition */
845 for (str = pattern;; str++, ncpt++) {
846 str = strchr(str, '[');
853 (node && ncpt > num_online_nodes()) ||
854 (!node && ncpt > num_online_cpus())) {
855 CERROR("Invalid pattern %s, or too many partitions %d\n",
860 cptab = cfs_cpt_table_alloc(ncpt);
862 CERROR("Failed to allocate cpu partition table\n");
866 if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */
869 for_each_online_node(i) {
871 CERROR("CPU changed while setting CPU partition table, %d/%d\n",
876 rc = cfs_cpt_set_node(cptab, cpt++, i);
883 high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
885 for (str = cfs_trimwhite(pattern), c = 0;; c++) {
886 struct cfs_range_expr *range;
887 struct cfs_expr_list *el;
888 char *bracket = strchr(str, '[');
893 CERROR("Invalid pattern %s\n", str);
897 CERROR("expect %d partitions but found %d\n",
904 if (sscanf(str, "%d%n", &cpt, &n) < 1) {
905 CERROR("Invalid cpu pattern %s\n", str);
909 if (cpt < 0 || cpt >= ncpt) {
910 CERROR("Invalid partition id %d, total partitions %d\n",
915 if (cfs_cpt_weight(cptab, cpt)) {
916 CERROR("Partition %d has already been set.\n", cpt);
920 str = cfs_trimwhite(str + n);
921 if (str != bracket) {
922 CERROR("Invalid pattern %s\n", str);
926 bracket = strchr(str, ']');
928 CERROR("missing right bracket for cpt %d, %s\n",
933 if (cfs_expr_list_parse(str, (bracket - str) + 1,
935 CERROR("Can't parse number range: %s\n", str);
939 list_for_each_entry(range, &el->el_exprs, re_link) {
940 for (i = range->re_lo; i <= range->re_hi; i++) {
941 if ((i - range->re_lo) % range->re_stride)
944 rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
945 cfs_cpt_set_cpu(cptab, cpt, i);
947 cfs_expr_list_free(el);
953 cfs_expr_list_free(el);
955 if (!cfs_cpt_online(cptab, cpt)) {
956 CERROR("No online CPU is found on partition %d\n", cpt);
960 str = cfs_trimwhite(bracket + 1);
966 cfs_cpt_table_free(cptab);
970 #ifdef CONFIG_HOTPLUG_CPU
971 static enum cpuhp_state lustre_cpu_online;
973 static void cfs_cpu_incr_cpt_version(void)
975 spin_lock(&cpt_data.cpt_lock);
976 cpt_data.cpt_version++;
977 spin_unlock(&cpt_data.cpt_lock);
980 static int cfs_cpu_online(unsigned int cpu)
982 cfs_cpu_incr_cpt_version();
986 static int cfs_cpu_dead(unsigned int cpu)
990 cfs_cpu_incr_cpt_version();
992 mutex_lock(&cpt_data.cpt_mutex);
993 /* if all HTs in a core are offline, it may break affinity */
994 cpumask_copy(cpt_data.cpt_cpumask, topology_sibling_cpumask(cpu));
995 warn = cpumask_any_and(cpt_data.cpt_cpumask,
996 cpu_online_mask) >= nr_cpu_ids;
997 mutex_unlock(&cpt_data.cpt_mutex);
998 CDEBUG(warn ? D_WARNING : D_INFO,
999 "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n",
1009 cfs_cpt_table_free(cfs_cpt_table);
1011 #ifdef CONFIG_HOTPLUG_CPU
1012 if (lustre_cpu_online > 0)
1013 cpuhp_remove_state_nocalls(lustre_cpu_online);
1014 cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD);
1016 if (cpt_data.cpt_cpumask)
1017 LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size());
1025 LASSERT(!cfs_cpt_table);
1027 memset(&cpt_data, 0, sizeof(cpt_data));
1029 LIBCFS_ALLOC(cpt_data.cpt_cpumask, cpumask_size());
1030 if (!cpt_data.cpt_cpumask) {
1031 CERROR("Failed to allocate scratch buffer\n");
1035 spin_lock_init(&cpt_data.cpt_lock);
1036 mutex_init(&cpt_data.cpt_mutex);
1038 #ifdef CONFIG_HOTPLUG_CPU
1039 ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD,
1040 "staging/lustre/cfe:dead", NULL,
1044 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
1045 "staging/lustre/cfe:online",
1046 cfs_cpu_online, NULL);
1049 lustre_cpu_online = ret;
1054 char *cpu_pattern_dup = kstrdup(cpu_pattern, GFP_KERNEL);
1056 if (!cpu_pattern_dup) {
1057 CERROR("Failed to duplicate cpu_pattern\n");
1061 cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern_dup);
1062 kfree(cpu_pattern_dup);
1063 if (!cfs_cpt_table) {
1064 CERROR("Failed to create cptab from pattern %s\n",
1070 cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
1071 if (!cfs_cpt_table) {
1072 CERROR("Failed to create ptable with npartitions %d\n",
1078 spin_lock(&cpt_data.cpt_lock);
1079 if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) {
1080 spin_unlock(&cpt_data.cpt_lock);
1081 CERROR("CPU hotplug/unplug during setup\n");
1084 spin_unlock(&cpt_data.cpt_lock);
1086 LCONSOLE(0, "HW nodes: %d, HW CPU cores: %d, npartitions: %d\n",
1087 num_online_nodes(), num_online_cpus(),
1088 cfs_cpt_number(cfs_cpt_table));