GNU Linux-libre 4.14.290-gnu1
[releases.git] / drivers / block / null_blk.c
1 /*
2  * Add configfs and memory store: Kyungchan Koh <kkc6196@fb.com> and
3  * Shaohua Li <shli@fb.com>
4  */
5 #include <linux/module.h>
6
7 #include <linux/moduleparam.h>
8 #include <linux/sched.h>
9 #include <linux/fs.h>
10 #include <linux/blkdev.h>
11 #include <linux/init.h>
12 #include <linux/slab.h>
13 #include <linux/blk-mq.h>
14 #include <linux/hrtimer.h>
15 #include <linux/lightnvm.h>
16 #include <linux/configfs.h>
17 #include <linux/badblocks.h>
18
19 #define PAGE_SECTORS_SHIFT      (PAGE_SHIFT - SECTOR_SHIFT)
20 #define PAGE_SECTORS            (1 << PAGE_SECTORS_SHIFT)
21 #define SECTOR_MASK             (PAGE_SECTORS - 1)
22
23 #define FREE_BATCH              16
24
25 #define TICKS_PER_SEC           50ULL
26 #define TIMER_INTERVAL          (NSEC_PER_SEC / TICKS_PER_SEC)
27
28 static inline u64 mb_per_tick(int mbps)
29 {
30         return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
31 }
32
33 struct nullb_cmd {
34         struct list_head list;
35         struct llist_node ll_list;
36         call_single_data_t csd;
37         struct request *rq;
38         struct bio *bio;
39         unsigned int tag;
40         struct nullb_queue *nq;
41         struct hrtimer timer;
42         blk_status_t error;
43 };
44
45 struct nullb_queue {
46         unsigned long *tag_map;
47         wait_queue_head_t wait;
48         unsigned int queue_depth;
49         struct nullb_device *dev;
50
51         struct nullb_cmd *cmds;
52 };
53
54 /*
55  * Status flags for nullb_device.
56  *
57  * CONFIGURED:  Device has been configured and turned on. Cannot reconfigure.
58  * UP:          Device is currently on and visible in userspace.
59  * THROTTLED:   Device is being throttled.
60  * CACHE:       Device is using a write-back cache.
61  */
62 enum nullb_device_flags {
63         NULLB_DEV_FL_CONFIGURED = 0,
64         NULLB_DEV_FL_UP         = 1,
65         NULLB_DEV_FL_THROTTLED  = 2,
66         NULLB_DEV_FL_CACHE      = 3,
67 };
68
69 #define MAP_SZ          ((PAGE_SIZE >> SECTOR_SHIFT) + 2)
70 /*
71  * nullb_page is a page in memory for nullb devices.
72  *
73  * @page:       The page holding the data.
74  * @bitmap:     The bitmap represents which sector in the page has data.
75  *              Each bit represents one block size. For example, sector 8
76  *              will use the 7th bit
77  * The highest 2 bits of bitmap are for special purpose. LOCK means the cache
78  * page is being flushing to storage. FREE means the cache page is freed and
79  * should be skipped from flushing to storage. Please see
80  * null_make_cache_space
81  */
82 struct nullb_page {
83         struct page *page;
84         DECLARE_BITMAP(bitmap, MAP_SZ);
85 };
86 #define NULLB_PAGE_LOCK (MAP_SZ - 1)
87 #define NULLB_PAGE_FREE (MAP_SZ - 2)
88
89 struct nullb_device {
90         struct nullb *nullb;
91         struct config_item item;
92         struct radix_tree_root data; /* data stored in the disk */
93         struct radix_tree_root cache; /* disk cache data */
94         unsigned long flags; /* device flags */
95         unsigned int curr_cache;
96         struct badblocks badblocks;
97
98         unsigned long size; /* device size in MB */
99         unsigned long completion_nsec; /* time in ns to complete a request */
100         unsigned long cache_size; /* disk cache size in MB */
101         unsigned int submit_queues; /* number of submission queues */
102         unsigned int home_node; /* home node for the device */
103         unsigned int queue_mode; /* block interface */
104         unsigned int blocksize; /* block size */
105         unsigned int irqmode; /* IRQ completion handler */
106         unsigned int hw_queue_depth; /* queue depth */
107         unsigned int index; /* index of the disk, only valid with a disk */
108         unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */
109         bool use_lightnvm; /* register as a LightNVM device */
110         bool blocking; /* blocking blk-mq device */
111         bool use_per_node_hctx; /* use per-node allocation for hardware context */
112         bool power; /* power on/off the device */
113         bool memory_backed; /* if data is stored in memory */
114         bool discard; /* if support discard */
115 };
116
117 struct nullb {
118         struct nullb_device *dev;
119         struct list_head list;
120         unsigned int index;
121         struct request_queue *q;
122         struct gendisk *disk;
123         struct nvm_dev *ndev;
124         struct blk_mq_tag_set *tag_set;
125         struct blk_mq_tag_set __tag_set;
126         unsigned int queue_depth;
127         atomic_long_t cur_bytes;
128         struct hrtimer bw_timer;
129         unsigned long cache_flush_pos;
130         spinlock_t lock;
131
132         struct nullb_queue *queues;
133         unsigned int nr_queues;
134         char disk_name[DISK_NAME_LEN];
135 };
136
137 static LIST_HEAD(nullb_list);
138 static struct mutex lock;
139 static int null_major;
140 static DEFINE_IDA(nullb_indexes);
141 static struct kmem_cache *ppa_cache;
142 static struct blk_mq_tag_set tag_set;
143
144 enum {
145         NULL_IRQ_NONE           = 0,
146         NULL_IRQ_SOFTIRQ        = 1,
147         NULL_IRQ_TIMER          = 2,
148 };
149
150 enum {
151         NULL_Q_BIO              = 0,
152         NULL_Q_RQ               = 1,
153         NULL_Q_MQ               = 2,
154 };
155
156 static int g_submit_queues = 1;
157 module_param_named(submit_queues, g_submit_queues, int, S_IRUGO);
158 MODULE_PARM_DESC(submit_queues, "Number of submission queues");
159
160 static int g_home_node = NUMA_NO_NODE;
161 module_param_named(home_node, g_home_node, int, S_IRUGO);
162 MODULE_PARM_DESC(home_node, "Home node for the device");
163
164 static int g_queue_mode = NULL_Q_MQ;
165
166 static int null_param_store_val(const char *str, int *val, int min, int max)
167 {
168         int ret, new_val;
169
170         ret = kstrtoint(str, 10, &new_val);
171         if (ret)
172                 return -EINVAL;
173
174         if (new_val < min || new_val > max)
175                 return -EINVAL;
176
177         *val = new_val;
178         return 0;
179 }
180
181 static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
182 {
183         return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ);
184 }
185
186 static const struct kernel_param_ops null_queue_mode_param_ops = {
187         .set    = null_set_queue_mode,
188         .get    = param_get_int,
189 };
190
191 device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, S_IRUGO);
192 MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
193
194 static int g_gb = 250;
195 module_param_named(gb, g_gb, int, S_IRUGO);
196 MODULE_PARM_DESC(gb, "Size in GB");
197
198 static int g_bs = 512;
199 module_param_named(bs, g_bs, int, S_IRUGO);
200 MODULE_PARM_DESC(bs, "Block size (in bytes)");
201
202 static int nr_devices = 1;
203 module_param(nr_devices, int, S_IRUGO);
204 MODULE_PARM_DESC(nr_devices, "Number of devices to register");
205
206 static bool g_use_lightnvm;
207 module_param_named(use_lightnvm, g_use_lightnvm, bool, S_IRUGO);
208 MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
209
210 static bool g_blocking;
211 module_param_named(blocking, g_blocking, bool, S_IRUGO);
212 MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
213
214 static bool shared_tags;
215 module_param(shared_tags, bool, S_IRUGO);
216 MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
217
218 static int g_irqmode = NULL_IRQ_SOFTIRQ;
219
220 static int null_set_irqmode(const char *str, const struct kernel_param *kp)
221 {
222         return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
223                                         NULL_IRQ_TIMER);
224 }
225
226 static const struct kernel_param_ops null_irqmode_param_ops = {
227         .set    = null_set_irqmode,
228         .get    = param_get_int,
229 };
230
231 device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, S_IRUGO);
232 MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
233
234 static unsigned long g_completion_nsec = 10000;
235 module_param_named(completion_nsec, g_completion_nsec, ulong, S_IRUGO);
236 MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
237
238 static int g_hw_queue_depth = 64;
239 module_param_named(hw_queue_depth, g_hw_queue_depth, int, S_IRUGO);
240 MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
241
242 static bool g_use_per_node_hctx;
243 module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, S_IRUGO);
244 MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
245
246 static struct nullb_device *null_alloc_dev(void);
247 static void null_free_dev(struct nullb_device *dev);
248 static void null_del_dev(struct nullb *nullb);
249 static int null_add_dev(struct nullb_device *dev);
250 static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
251
252 static inline struct nullb_device *to_nullb_device(struct config_item *item)
253 {
254         return item ? container_of(item, struct nullb_device, item) : NULL;
255 }
256
257 static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
258 {
259         return snprintf(page, PAGE_SIZE, "%u\n", val);
260 }
261
262 static inline ssize_t nullb_device_ulong_attr_show(unsigned long val,
263         char *page)
264 {
265         return snprintf(page, PAGE_SIZE, "%lu\n", val);
266 }
267
268 static inline ssize_t nullb_device_bool_attr_show(bool val, char *page)
269 {
270         return snprintf(page, PAGE_SIZE, "%u\n", val);
271 }
272
273 static ssize_t nullb_device_uint_attr_store(unsigned int *val,
274         const char *page, size_t count)
275 {
276         unsigned int tmp;
277         int result;
278
279         result = kstrtouint(page, 0, &tmp);
280         if (result)
281                 return result;
282
283         *val = tmp;
284         return count;
285 }
286
287 static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
288         const char *page, size_t count)
289 {
290         int result;
291         unsigned long tmp;
292
293         result = kstrtoul(page, 0, &tmp);
294         if (result)
295                 return result;
296
297         *val = tmp;
298         return count;
299 }
300
301 static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
302         size_t count)
303 {
304         bool tmp;
305         int result;
306
307         result = kstrtobool(page,  &tmp);
308         if (result)
309                 return result;
310
311         *val = tmp;
312         return count;
313 }
314
315 /* The following macro should only be used with TYPE = {uint, ulong, bool}. */
316 #define NULLB_DEVICE_ATTR(NAME, TYPE)                                           \
317 static ssize_t                                                                  \
318 nullb_device_##NAME##_show(struct config_item *item, char *page)                \
319 {                                                                               \
320         return nullb_device_##TYPE##_attr_show(                                 \
321                                 to_nullb_device(item)->NAME, page);             \
322 }                                                                               \
323 static ssize_t                                                                  \
324 nullb_device_##NAME##_store(struct config_item *item, const char *page,         \
325                             size_t count)                                       \
326 {                                                                               \
327         if (test_bit(NULLB_DEV_FL_CONFIGURED, &to_nullb_device(item)->flags))   \
328                 return -EBUSY;                                                  \
329         return nullb_device_##TYPE##_attr_store(                                \
330                         &to_nullb_device(item)->NAME, page, count);             \
331 }                                                                               \
332 CONFIGFS_ATTR(nullb_device_, NAME);
333
334 NULLB_DEVICE_ATTR(size, ulong);
335 NULLB_DEVICE_ATTR(completion_nsec, ulong);
336 NULLB_DEVICE_ATTR(submit_queues, uint);
337 NULLB_DEVICE_ATTR(home_node, uint);
338 NULLB_DEVICE_ATTR(queue_mode, uint);
339 NULLB_DEVICE_ATTR(blocksize, uint);
340 NULLB_DEVICE_ATTR(irqmode, uint);
341 NULLB_DEVICE_ATTR(hw_queue_depth, uint);
342 NULLB_DEVICE_ATTR(index, uint);
343 NULLB_DEVICE_ATTR(use_lightnvm, bool);
344 NULLB_DEVICE_ATTR(blocking, bool);
345 NULLB_DEVICE_ATTR(use_per_node_hctx, bool);
346 NULLB_DEVICE_ATTR(memory_backed, bool);
347 NULLB_DEVICE_ATTR(discard, bool);
348 NULLB_DEVICE_ATTR(mbps, uint);
349 NULLB_DEVICE_ATTR(cache_size, ulong);
350
351 static ssize_t nullb_device_power_show(struct config_item *item, char *page)
352 {
353         return nullb_device_bool_attr_show(to_nullb_device(item)->power, page);
354 }
355
356 static ssize_t nullb_device_power_store(struct config_item *item,
357                                      const char *page, size_t count)
358 {
359         struct nullb_device *dev = to_nullb_device(item);
360         bool newp = false;
361         ssize_t ret;
362
363         ret = nullb_device_bool_attr_store(&newp, page, count);
364         if (ret < 0)
365                 return ret;
366
367         if (!dev->power && newp) {
368                 if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
369                         return count;
370                 if (null_add_dev(dev)) {
371                         clear_bit(NULLB_DEV_FL_UP, &dev->flags);
372                         return -ENOMEM;
373                 }
374
375                 set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
376                 dev->power = newp;
377         } else if (dev->power && !newp) {
378                 mutex_lock(&lock);
379                 dev->power = newp;
380                 null_del_dev(dev->nullb);
381                 mutex_unlock(&lock);
382                 clear_bit(NULLB_DEV_FL_UP, &dev->flags);
383         }
384
385         return count;
386 }
387
388 CONFIGFS_ATTR(nullb_device_, power);
389
390 static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page)
391 {
392         struct nullb_device *t_dev = to_nullb_device(item);
393
394         return badblocks_show(&t_dev->badblocks, page, 0);
395 }
396
397 static ssize_t nullb_device_badblocks_store(struct config_item *item,
398                                      const char *page, size_t count)
399 {
400         struct nullb_device *t_dev = to_nullb_device(item);
401         char *orig, *buf, *tmp;
402         u64 start, end;
403         int ret;
404
405         orig = kstrndup(page, count, GFP_KERNEL);
406         if (!orig)
407                 return -ENOMEM;
408
409         buf = strstrip(orig);
410
411         ret = -EINVAL;
412         if (buf[0] != '+' && buf[0] != '-')
413                 goto out;
414         tmp = strchr(&buf[1], '-');
415         if (!tmp)
416                 goto out;
417         *tmp = '\0';
418         ret = kstrtoull(buf + 1, 0, &start);
419         if (ret)
420                 goto out;
421         ret = kstrtoull(tmp + 1, 0, &end);
422         if (ret)
423                 goto out;
424         ret = -EINVAL;
425         if (start > end)
426                 goto out;
427         /* enable badblocks */
428         cmpxchg(&t_dev->badblocks.shift, -1, 0);
429         if (buf[0] == '+')
430                 ret = badblocks_set(&t_dev->badblocks, start,
431                         end - start + 1, 1);
432         else
433                 ret = badblocks_clear(&t_dev->badblocks, start,
434                         end - start + 1);
435         if (ret == 0)
436                 ret = count;
437 out:
438         kfree(orig);
439         return ret;
440 }
441 CONFIGFS_ATTR(nullb_device_, badblocks);
442
443 static struct configfs_attribute *nullb_device_attrs[] = {
444         &nullb_device_attr_size,
445         &nullb_device_attr_completion_nsec,
446         &nullb_device_attr_submit_queues,
447         &nullb_device_attr_home_node,
448         &nullb_device_attr_queue_mode,
449         &nullb_device_attr_blocksize,
450         &nullb_device_attr_irqmode,
451         &nullb_device_attr_hw_queue_depth,
452         &nullb_device_attr_index,
453         &nullb_device_attr_use_lightnvm,
454         &nullb_device_attr_blocking,
455         &nullb_device_attr_use_per_node_hctx,
456         &nullb_device_attr_power,
457         &nullb_device_attr_memory_backed,
458         &nullb_device_attr_discard,
459         &nullb_device_attr_mbps,
460         &nullb_device_attr_cache_size,
461         &nullb_device_attr_badblocks,
462         NULL,
463 };
464
465 static void nullb_device_release(struct config_item *item)
466 {
467         struct nullb_device *dev = to_nullb_device(item);
468
469         null_free_device_storage(dev, false);
470         null_free_dev(dev);
471 }
472
473 static struct configfs_item_operations nullb_device_ops = {
474         .release        = nullb_device_release,
475 };
476
477 static struct config_item_type nullb_device_type = {
478         .ct_item_ops    = &nullb_device_ops,
479         .ct_attrs       = nullb_device_attrs,
480         .ct_owner       = THIS_MODULE,
481 };
482
483 static struct
484 config_item *nullb_group_make_item(struct config_group *group, const char *name)
485 {
486         struct nullb_device *dev;
487
488         dev = null_alloc_dev();
489         if (!dev)
490                 return ERR_PTR(-ENOMEM);
491
492         config_item_init_type_name(&dev->item, name, &nullb_device_type);
493
494         return &dev->item;
495 }
496
497 static void
498 nullb_group_drop_item(struct config_group *group, struct config_item *item)
499 {
500         struct nullb_device *dev = to_nullb_device(item);
501
502         if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
503                 mutex_lock(&lock);
504                 dev->power = false;
505                 null_del_dev(dev->nullb);
506                 mutex_unlock(&lock);
507         }
508
509         config_item_put(item);
510 }
511
512 static ssize_t memb_group_features_show(struct config_item *item, char *page)
513 {
514         return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks\n");
515 }
516
517 CONFIGFS_ATTR_RO(memb_group_, features);
518
519 static struct configfs_attribute *nullb_group_attrs[] = {
520         &memb_group_attr_features,
521         NULL,
522 };
523
524 static struct configfs_group_operations nullb_group_ops = {
525         .make_item      = nullb_group_make_item,
526         .drop_item      = nullb_group_drop_item,
527 };
528
529 static struct config_item_type nullb_group_type = {
530         .ct_group_ops   = &nullb_group_ops,
531         .ct_attrs       = nullb_group_attrs,
532         .ct_owner       = THIS_MODULE,
533 };
534
535 static struct configfs_subsystem nullb_subsys = {
536         .su_group = {
537                 .cg_item = {
538                         .ci_namebuf = "nullb",
539                         .ci_type = &nullb_group_type,
540                 },
541         },
542 };
543
544 static inline int null_cache_active(struct nullb *nullb)
545 {
546         return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
547 }
548
549 static struct nullb_device *null_alloc_dev(void)
550 {
551         struct nullb_device *dev;
552
553         dev = kzalloc(sizeof(*dev), GFP_KERNEL);
554         if (!dev)
555                 return NULL;
556         INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
557         INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
558         if (badblocks_init(&dev->badblocks, 0)) {
559                 kfree(dev);
560                 return NULL;
561         }
562
563         dev->size = g_gb * 1024;
564         dev->completion_nsec = g_completion_nsec;
565         dev->submit_queues = g_submit_queues;
566         dev->home_node = g_home_node;
567         dev->queue_mode = g_queue_mode;
568         dev->blocksize = g_bs;
569         dev->irqmode = g_irqmode;
570         dev->hw_queue_depth = g_hw_queue_depth;
571         dev->use_lightnvm = g_use_lightnvm;
572         dev->blocking = g_blocking;
573         dev->use_per_node_hctx = g_use_per_node_hctx;
574         return dev;
575 }
576
577 static void null_free_dev(struct nullb_device *dev)
578 {
579         if (!dev)
580                 return;
581
582         badblocks_exit(&dev->badblocks);
583         kfree(dev);
584 }
585
586 static void put_tag(struct nullb_queue *nq, unsigned int tag)
587 {
588         clear_bit_unlock(tag, nq->tag_map);
589
590         if (waitqueue_active(&nq->wait))
591                 wake_up(&nq->wait);
592 }
593
594 static unsigned int get_tag(struct nullb_queue *nq)
595 {
596         unsigned int tag;
597
598         do {
599                 tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
600                 if (tag >= nq->queue_depth)
601                         return -1U;
602         } while (test_and_set_bit_lock(tag, nq->tag_map));
603
604         return tag;
605 }
606
607 static void free_cmd(struct nullb_cmd *cmd)
608 {
609         put_tag(cmd->nq, cmd->tag);
610 }
611
612 static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
613
614 static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
615 {
616         struct nullb_cmd *cmd;
617         unsigned int tag;
618
619         tag = get_tag(nq);
620         if (tag != -1U) {
621                 cmd = &nq->cmds[tag];
622                 cmd->tag = tag;
623                 cmd->error = BLK_STS_OK;
624                 cmd->nq = nq;
625                 if (nq->dev->irqmode == NULL_IRQ_TIMER) {
626                         hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
627                                      HRTIMER_MODE_REL);
628                         cmd->timer.function = null_cmd_timer_expired;
629                 }
630                 return cmd;
631         }
632
633         return NULL;
634 }
635
636 static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
637 {
638         struct nullb_cmd *cmd;
639         DEFINE_WAIT(wait);
640
641         cmd = __alloc_cmd(nq);
642         if (cmd || !can_wait)
643                 return cmd;
644
645         do {
646                 prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
647                 cmd = __alloc_cmd(nq);
648                 if (cmd)
649                         break;
650
651                 io_schedule();
652         } while (1);
653
654         finish_wait(&nq->wait, &wait);
655         return cmd;
656 }
657
658 static void end_cmd(struct nullb_cmd *cmd)
659 {
660         struct request_queue *q = NULL;
661         int queue_mode = cmd->nq->dev->queue_mode;
662
663         if (cmd->rq)
664                 q = cmd->rq->q;
665
666         switch (queue_mode)  {
667         case NULL_Q_MQ:
668                 blk_mq_end_request(cmd->rq, cmd->error);
669                 return;
670         case NULL_Q_RQ:
671                 INIT_LIST_HEAD(&cmd->rq->queuelist);
672                 blk_end_request_all(cmd->rq, cmd->error);
673                 break;
674         case NULL_Q_BIO:
675                 cmd->bio->bi_status = cmd->error;
676                 bio_endio(cmd->bio);
677                 break;
678         }
679
680         free_cmd(cmd);
681
682         /* Restart queue if needed, as we are freeing a tag */
683         if (queue_mode == NULL_Q_RQ && blk_queue_stopped(q)) {
684                 unsigned long flags;
685
686                 spin_lock_irqsave(q->queue_lock, flags);
687                 blk_start_queue_async(q);
688                 spin_unlock_irqrestore(q->queue_lock, flags);
689         }
690 }
691
692 static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
693 {
694         end_cmd(container_of(timer, struct nullb_cmd, timer));
695
696         return HRTIMER_NORESTART;
697 }
698
699 static void null_cmd_end_timer(struct nullb_cmd *cmd)
700 {
701         ktime_t kt = cmd->nq->dev->completion_nsec;
702
703         hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
704 }
705
706 static void null_softirq_done_fn(struct request *rq)
707 {
708         struct nullb *nullb = rq->q->queuedata;
709
710         if (nullb->dev->queue_mode == NULL_Q_MQ)
711                 end_cmd(blk_mq_rq_to_pdu(rq));
712         else
713                 end_cmd(rq->special);
714 }
715
716 static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
717 {
718         struct nullb_page *t_page;
719
720         t_page = kmalloc(sizeof(struct nullb_page), gfp_flags);
721         if (!t_page)
722                 goto out;
723
724         t_page->page = alloc_pages(gfp_flags, 0);
725         if (!t_page->page)
726                 goto out_freepage;
727
728         memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
729         return t_page;
730 out_freepage:
731         kfree(t_page);
732 out:
733         return NULL;
734 }
735
736 static void null_free_page(struct nullb_page *t_page)
737 {
738         __set_bit(NULLB_PAGE_FREE, t_page->bitmap);
739         if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap))
740                 return;
741         __free_page(t_page->page);
742         kfree(t_page);
743 }
744
745 static bool null_page_empty(struct nullb_page *page)
746 {
747         int size = MAP_SZ - 2;
748
749         return find_first_bit(page->bitmap, size) == size;
750 }
751
752 static void null_free_sector(struct nullb *nullb, sector_t sector,
753         bool is_cache)
754 {
755         unsigned int sector_bit;
756         u64 idx;
757         struct nullb_page *t_page, *ret;
758         struct radix_tree_root *root;
759
760         root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
761         idx = sector >> PAGE_SECTORS_SHIFT;
762         sector_bit = (sector & SECTOR_MASK);
763
764         t_page = radix_tree_lookup(root, idx);
765         if (t_page) {
766                 __clear_bit(sector_bit, t_page->bitmap);
767
768                 if (null_page_empty(t_page)) {
769                         ret = radix_tree_delete_item(root, idx, t_page);
770                         WARN_ON(ret != t_page);
771                         null_free_page(ret);
772                         if (is_cache)
773                                 nullb->dev->curr_cache -= PAGE_SIZE;
774                 }
775         }
776 }
777
778 static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
779         struct nullb_page *t_page, bool is_cache)
780 {
781         struct radix_tree_root *root;
782
783         root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
784
785         if (radix_tree_insert(root, idx, t_page)) {
786                 null_free_page(t_page);
787                 t_page = radix_tree_lookup(root, idx);
788                 WARN_ON(!t_page || t_page->page->index != idx);
789         } else if (is_cache)
790                 nullb->dev->curr_cache += PAGE_SIZE;
791
792         return t_page;
793 }
794
795 static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
796 {
797         unsigned long pos = 0;
798         int nr_pages;
799         struct nullb_page *ret, *t_pages[FREE_BATCH];
800         struct radix_tree_root *root;
801
802         root = is_cache ? &dev->cache : &dev->data;
803
804         do {
805                 int i;
806
807                 nr_pages = radix_tree_gang_lookup(root,
808                                 (void **)t_pages, pos, FREE_BATCH);
809
810                 for (i = 0; i < nr_pages; i++) {
811                         pos = t_pages[i]->page->index;
812                         ret = radix_tree_delete_item(root, pos, t_pages[i]);
813                         WARN_ON(ret != t_pages[i]);
814                         null_free_page(ret);
815                 }
816
817                 pos++;
818         } while (nr_pages == FREE_BATCH);
819
820         if (is_cache)
821                 dev->curr_cache = 0;
822 }
823
824 static struct nullb_page *__null_lookup_page(struct nullb *nullb,
825         sector_t sector, bool for_write, bool is_cache)
826 {
827         unsigned int sector_bit;
828         u64 idx;
829         struct nullb_page *t_page;
830         struct radix_tree_root *root;
831
832         idx = sector >> PAGE_SECTORS_SHIFT;
833         sector_bit = (sector & SECTOR_MASK);
834
835         root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
836         t_page = radix_tree_lookup(root, idx);
837         WARN_ON(t_page && t_page->page->index != idx);
838
839         if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
840                 return t_page;
841
842         return NULL;
843 }
844
845 static struct nullb_page *null_lookup_page(struct nullb *nullb,
846         sector_t sector, bool for_write, bool ignore_cache)
847 {
848         struct nullb_page *page = NULL;
849
850         if (!ignore_cache)
851                 page = __null_lookup_page(nullb, sector, for_write, true);
852         if (page)
853                 return page;
854         return __null_lookup_page(nullb, sector, for_write, false);
855 }
856
857 static struct nullb_page *null_insert_page(struct nullb *nullb,
858         sector_t sector, bool ignore_cache)
859 {
860         u64 idx;
861         struct nullb_page *t_page;
862
863         t_page = null_lookup_page(nullb, sector, true, ignore_cache);
864         if (t_page)
865                 return t_page;
866
867         spin_unlock_irq(&nullb->lock);
868
869         t_page = null_alloc_page(GFP_NOIO);
870         if (!t_page)
871                 goto out_lock;
872
873         if (radix_tree_preload(GFP_NOIO))
874                 goto out_freepage;
875
876         spin_lock_irq(&nullb->lock);
877         idx = sector >> PAGE_SECTORS_SHIFT;
878         t_page->page->index = idx;
879         t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
880         radix_tree_preload_end();
881
882         return t_page;
883 out_freepage:
884         null_free_page(t_page);
885 out_lock:
886         spin_lock_irq(&nullb->lock);
887         return null_lookup_page(nullb, sector, true, ignore_cache);
888 }
889
890 static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
891 {
892         int i;
893         unsigned int offset;
894         u64 idx;
895         struct nullb_page *t_page, *ret;
896         void *dst, *src;
897
898         idx = c_page->page->index;
899
900         t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
901
902         __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap);
903         if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) {
904                 null_free_page(c_page);
905                 if (t_page && null_page_empty(t_page)) {
906                         ret = radix_tree_delete_item(&nullb->dev->data,
907                                 idx, t_page);
908                         null_free_page(t_page);
909                 }
910                 return 0;
911         }
912
913         if (!t_page)
914                 return -ENOMEM;
915
916         src = kmap_atomic(c_page->page);
917         dst = kmap_atomic(t_page->page);
918
919         for (i = 0; i < PAGE_SECTORS;
920                         i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
921                 if (test_bit(i, c_page->bitmap)) {
922                         offset = (i << SECTOR_SHIFT);
923                         memcpy(dst + offset, src + offset,
924                                 nullb->dev->blocksize);
925                         __set_bit(i, t_page->bitmap);
926                 }
927         }
928
929         kunmap_atomic(dst);
930         kunmap_atomic(src);
931
932         ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
933         null_free_page(ret);
934         nullb->dev->curr_cache -= PAGE_SIZE;
935
936         return 0;
937 }
938
939 static int null_make_cache_space(struct nullb *nullb, unsigned long n)
940 {
941         int i, err, nr_pages;
942         struct nullb_page *c_pages[FREE_BATCH];
943         unsigned long flushed = 0, one_round;
944
945 again:
946         if ((nullb->dev->cache_size * 1024 * 1024) >
947              nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0)
948                 return 0;
949
950         nr_pages = radix_tree_gang_lookup(&nullb->dev->cache,
951                         (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH);
952         /*
953          * nullb_flush_cache_page could unlock before using the c_pages. To
954          * avoid race, we don't allow page free
955          */
956         for (i = 0; i < nr_pages; i++) {
957                 nullb->cache_flush_pos = c_pages[i]->page->index;
958                 /*
959                  * We found the page which is being flushed to disk by other
960                  * threads
961                  */
962                 if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap))
963                         c_pages[i] = NULL;
964                 else
965                         __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap);
966         }
967
968         one_round = 0;
969         for (i = 0; i < nr_pages; i++) {
970                 if (c_pages[i] == NULL)
971                         continue;
972                 err = null_flush_cache_page(nullb, c_pages[i]);
973                 if (err)
974                         return err;
975                 one_round++;
976         }
977         flushed += one_round << PAGE_SHIFT;
978
979         if (n > flushed) {
980                 if (nr_pages == 0)
981                         nullb->cache_flush_pos = 0;
982                 if (one_round == 0) {
983                         /* give other threads a chance */
984                         spin_unlock_irq(&nullb->lock);
985                         spin_lock_irq(&nullb->lock);
986                 }
987                 goto again;
988         }
989         return 0;
990 }
991
992 static int copy_to_nullb(struct nullb *nullb, struct page *source,
993         unsigned int off, sector_t sector, size_t n, bool is_fua)
994 {
995         size_t temp, count = 0;
996         unsigned int offset;
997         struct nullb_page *t_page;
998         void *dst, *src;
999
1000         while (count < n) {
1001                 temp = min_t(size_t, nullb->dev->blocksize, n - count);
1002
1003                 if (null_cache_active(nullb) && !is_fua)
1004                         null_make_cache_space(nullb, PAGE_SIZE);
1005
1006                 offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
1007                 t_page = null_insert_page(nullb, sector,
1008                         !null_cache_active(nullb) || is_fua);
1009                 if (!t_page)
1010                         return -ENOSPC;
1011
1012                 src = kmap_atomic(source);
1013                 dst = kmap_atomic(t_page->page);
1014                 memcpy(dst + offset, src + off + count, temp);
1015                 kunmap_atomic(dst);
1016                 kunmap_atomic(src);
1017
1018                 __set_bit(sector & SECTOR_MASK, t_page->bitmap);
1019
1020                 if (is_fua)
1021                         null_free_sector(nullb, sector, true);
1022
1023                 count += temp;
1024                 sector += temp >> SECTOR_SHIFT;
1025         }
1026         return 0;
1027 }
1028
1029 static int copy_from_nullb(struct nullb *nullb, struct page *dest,
1030         unsigned int off, sector_t sector, size_t n)
1031 {
1032         size_t temp, count = 0;
1033         unsigned int offset;
1034         struct nullb_page *t_page;
1035         void *dst, *src;
1036
1037         while (count < n) {
1038                 temp = min_t(size_t, nullb->dev->blocksize, n - count);
1039
1040                 offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
1041                 t_page = null_lookup_page(nullb, sector, false,
1042                         !null_cache_active(nullb));
1043
1044                 dst = kmap_atomic(dest);
1045                 if (!t_page) {
1046                         memset(dst + off + count, 0, temp);
1047                         goto next;
1048                 }
1049                 src = kmap_atomic(t_page->page);
1050                 memcpy(dst + off + count, src + offset, temp);
1051                 kunmap_atomic(src);
1052 next:
1053                 kunmap_atomic(dst);
1054
1055                 count += temp;
1056                 sector += temp >> SECTOR_SHIFT;
1057         }
1058         return 0;
1059 }
1060
1061 static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n)
1062 {
1063         size_t temp;
1064
1065         spin_lock_irq(&nullb->lock);
1066         while (n > 0) {
1067                 temp = min_t(size_t, n, nullb->dev->blocksize);
1068                 null_free_sector(nullb, sector, false);
1069                 if (null_cache_active(nullb))
1070                         null_free_sector(nullb, sector, true);
1071                 sector += temp >> SECTOR_SHIFT;
1072                 n -= temp;
1073         }
1074         spin_unlock_irq(&nullb->lock);
1075 }
1076
1077 static int null_handle_flush(struct nullb *nullb)
1078 {
1079         int err;
1080
1081         if (!null_cache_active(nullb))
1082                 return 0;
1083
1084         spin_lock_irq(&nullb->lock);
1085         while (true) {
1086                 err = null_make_cache_space(nullb,
1087                         nullb->dev->cache_size * 1024 * 1024);
1088                 if (err || nullb->dev->curr_cache == 0)
1089                         break;
1090         }
1091
1092         WARN_ON(!radix_tree_empty(&nullb->dev->cache));
1093         spin_unlock_irq(&nullb->lock);
1094         return err;
1095 }
1096
1097 static int null_transfer(struct nullb *nullb, struct page *page,
1098         unsigned int len, unsigned int off, bool is_write, sector_t sector,
1099         bool is_fua)
1100 {
1101         int err = 0;
1102
1103         if (!is_write) {
1104                 err = copy_from_nullb(nullb, page, off, sector, len);
1105                 flush_dcache_page(page);
1106         } else {
1107                 flush_dcache_page(page);
1108                 err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
1109         }
1110
1111         return err;
1112 }
1113
1114 static int null_handle_rq(struct nullb_cmd *cmd)
1115 {
1116         struct request *rq = cmd->rq;
1117         struct nullb *nullb = cmd->nq->dev->nullb;
1118         int err;
1119         unsigned int len;
1120         sector_t sector;
1121         struct req_iterator iter;
1122         struct bio_vec bvec;
1123
1124         sector = blk_rq_pos(rq);
1125
1126         if (req_op(rq) == REQ_OP_DISCARD) {
1127                 null_handle_discard(nullb, sector, blk_rq_bytes(rq));
1128                 return 0;
1129         }
1130
1131         spin_lock_irq(&nullb->lock);
1132         rq_for_each_segment(bvec, rq, iter) {
1133                 len = bvec.bv_len;
1134                 err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
1135                                      op_is_write(req_op(rq)), sector,
1136                                      rq->cmd_flags & REQ_FUA);
1137                 if (err) {
1138                         spin_unlock_irq(&nullb->lock);
1139                         return err;
1140                 }
1141                 sector += len >> SECTOR_SHIFT;
1142         }
1143         spin_unlock_irq(&nullb->lock);
1144
1145         return 0;
1146 }
1147
1148 static int null_handle_bio(struct nullb_cmd *cmd)
1149 {
1150         struct bio *bio = cmd->bio;
1151         struct nullb *nullb = cmd->nq->dev->nullb;
1152         int err;
1153         unsigned int len;
1154         sector_t sector;
1155         struct bio_vec bvec;
1156         struct bvec_iter iter;
1157
1158         sector = bio->bi_iter.bi_sector;
1159
1160         if (bio_op(bio) == REQ_OP_DISCARD) {
1161                 null_handle_discard(nullb, sector,
1162                         bio_sectors(bio) << SECTOR_SHIFT);
1163                 return 0;
1164         }
1165
1166         spin_lock_irq(&nullb->lock);
1167         bio_for_each_segment(bvec, bio, iter) {
1168                 len = bvec.bv_len;
1169                 err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
1170                                      op_is_write(bio_op(bio)), sector,
1171                                      bio_op(bio) & REQ_FUA);
1172                 if (err) {
1173                         spin_unlock_irq(&nullb->lock);
1174                         return err;
1175                 }
1176                 sector += len >> SECTOR_SHIFT;
1177         }
1178         spin_unlock_irq(&nullb->lock);
1179         return 0;
1180 }
1181
1182 static void null_stop_queue(struct nullb *nullb)
1183 {
1184         struct request_queue *q = nullb->q;
1185
1186         if (nullb->dev->queue_mode == NULL_Q_MQ)
1187                 blk_mq_stop_hw_queues(q);
1188         else {
1189                 spin_lock_irq(q->queue_lock);
1190                 blk_stop_queue(q);
1191                 spin_unlock_irq(q->queue_lock);
1192         }
1193 }
1194
1195 static void null_restart_queue_async(struct nullb *nullb)
1196 {
1197         struct request_queue *q = nullb->q;
1198         unsigned long flags;
1199
1200         if (nullb->dev->queue_mode == NULL_Q_MQ)
1201                 blk_mq_start_stopped_hw_queues(q, true);
1202         else {
1203                 spin_lock_irqsave(q->queue_lock, flags);
1204                 blk_start_queue_async(q);
1205                 spin_unlock_irqrestore(q->queue_lock, flags);
1206         }
1207 }
1208
1209 static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
1210 {
1211         struct nullb_device *dev = cmd->nq->dev;
1212         struct nullb *nullb = dev->nullb;
1213         int err = 0;
1214
1215         if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
1216                 struct request *rq = cmd->rq;
1217
1218                 if (!hrtimer_active(&nullb->bw_timer))
1219                         hrtimer_restart(&nullb->bw_timer);
1220
1221                 if (atomic_long_sub_return(blk_rq_bytes(rq),
1222                                 &nullb->cur_bytes) < 0) {
1223                         null_stop_queue(nullb);
1224                         /* race with timer */
1225                         if (atomic_long_read(&nullb->cur_bytes) > 0)
1226                                 null_restart_queue_async(nullb);
1227                         if (dev->queue_mode == NULL_Q_RQ) {
1228                                 struct request_queue *q = nullb->q;
1229
1230                                 spin_lock_irq(q->queue_lock);
1231                                 rq->rq_flags |= RQF_DONTPREP;
1232                                 blk_requeue_request(q, rq);
1233                                 spin_unlock_irq(q->queue_lock);
1234                                 return BLK_STS_OK;
1235                         } else
1236                                 /* requeue request */
1237                                 return BLK_STS_RESOURCE;
1238                 }
1239         }
1240
1241         if (nullb->dev->badblocks.shift != -1) {
1242                 int bad_sectors;
1243                 sector_t sector, size, first_bad;
1244                 bool is_flush = true;
1245
1246                 if (dev->queue_mode == NULL_Q_BIO &&
1247                                 bio_op(cmd->bio) != REQ_OP_FLUSH) {
1248                         is_flush = false;
1249                         sector = cmd->bio->bi_iter.bi_sector;
1250                         size = bio_sectors(cmd->bio);
1251                 }
1252                 if (dev->queue_mode != NULL_Q_BIO &&
1253                                 req_op(cmd->rq) != REQ_OP_FLUSH) {
1254                         is_flush = false;
1255                         sector = blk_rq_pos(cmd->rq);
1256                         size = blk_rq_sectors(cmd->rq);
1257                 }
1258                 if (!is_flush && badblocks_check(&nullb->dev->badblocks, sector,
1259                                 size, &first_bad, &bad_sectors)) {
1260                         cmd->error = BLK_STS_IOERR;
1261                         goto out;
1262                 }
1263         }
1264
1265         if (dev->memory_backed) {
1266                 if (dev->queue_mode == NULL_Q_BIO) {
1267                         if (bio_op(cmd->bio) == REQ_OP_FLUSH)
1268                                 err = null_handle_flush(nullb);
1269                         else
1270                                 err = null_handle_bio(cmd);
1271                 } else {
1272                         if (req_op(cmd->rq) == REQ_OP_FLUSH)
1273                                 err = null_handle_flush(nullb);
1274                         else
1275                                 err = null_handle_rq(cmd);
1276                 }
1277         }
1278         cmd->error = errno_to_blk_status(err);
1279 out:
1280         /* Complete IO by inline, softirq or timer */
1281         switch (dev->irqmode) {
1282         case NULL_IRQ_SOFTIRQ:
1283                 switch (dev->queue_mode)  {
1284                 case NULL_Q_MQ:
1285                         blk_mq_complete_request(cmd->rq);
1286                         break;
1287                 case NULL_Q_RQ:
1288                         blk_complete_request(cmd->rq);
1289                         break;
1290                 case NULL_Q_BIO:
1291                         /*
1292                          * XXX: no proper submitting cpu information available.
1293                          */
1294                         end_cmd(cmd);
1295                         break;
1296                 }
1297                 break;
1298         case NULL_IRQ_NONE:
1299                 end_cmd(cmd);
1300                 break;
1301         case NULL_IRQ_TIMER:
1302                 null_cmd_end_timer(cmd);
1303                 break;
1304         }
1305         return BLK_STS_OK;
1306 }
1307
1308 static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
1309 {
1310         struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
1311         ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
1312         unsigned int mbps = nullb->dev->mbps;
1313
1314         if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
1315                 return HRTIMER_NORESTART;
1316
1317         atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
1318         null_restart_queue_async(nullb);
1319
1320         hrtimer_forward_now(&nullb->bw_timer, timer_interval);
1321
1322         return HRTIMER_RESTART;
1323 }
1324
1325 static void nullb_setup_bwtimer(struct nullb *nullb)
1326 {
1327         ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
1328
1329         hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1330         nullb->bw_timer.function = nullb_bwtimer_fn;
1331         atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
1332         hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
1333 }
1334
1335 static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
1336 {
1337         int index = 0;
1338
1339         if (nullb->nr_queues != 1)
1340                 index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
1341
1342         return &nullb->queues[index];
1343 }
1344
1345 static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
1346 {
1347         struct nullb *nullb = q->queuedata;
1348         struct nullb_queue *nq = nullb_to_queue(nullb);
1349         struct nullb_cmd *cmd;
1350
1351         cmd = alloc_cmd(nq, 1);
1352         cmd->bio = bio;
1353
1354         null_handle_cmd(cmd);
1355         return BLK_QC_T_NONE;
1356 }
1357
1358 static int null_rq_prep_fn(struct request_queue *q, struct request *req)
1359 {
1360         struct nullb *nullb = q->queuedata;
1361         struct nullb_queue *nq = nullb_to_queue(nullb);
1362         struct nullb_cmd *cmd;
1363
1364         cmd = alloc_cmd(nq, 0);
1365         if (cmd) {
1366                 cmd->rq = req;
1367                 req->special = cmd;
1368                 return BLKPREP_OK;
1369         }
1370         blk_stop_queue(q);
1371
1372         return BLKPREP_DEFER;
1373 }
1374
1375 static void null_request_fn(struct request_queue *q)
1376 {
1377         struct request *rq;
1378
1379         while ((rq = blk_fetch_request(q)) != NULL) {
1380                 struct nullb_cmd *cmd = rq->special;
1381
1382                 spin_unlock_irq(q->queue_lock);
1383                 null_handle_cmd(cmd);
1384                 spin_lock_irq(q->queue_lock);
1385         }
1386 }
1387
1388 static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
1389                          const struct blk_mq_queue_data *bd)
1390 {
1391         struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
1392         struct nullb_queue *nq = hctx->driver_data;
1393
1394         might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
1395
1396         if (nq->dev->irqmode == NULL_IRQ_TIMER) {
1397                 hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1398                 cmd->timer.function = null_cmd_timer_expired;
1399         }
1400         cmd->rq = bd->rq;
1401         cmd->error = BLK_STS_OK;
1402         cmd->nq = nq;
1403
1404         blk_mq_start_request(bd->rq);
1405
1406         return null_handle_cmd(cmd);
1407 }
1408
1409 static const struct blk_mq_ops null_mq_ops = {
1410         .queue_rq       = null_queue_rq,
1411         .complete       = null_softirq_done_fn,
1412 };
1413
1414 static void cleanup_queue(struct nullb_queue *nq)
1415 {
1416         kfree(nq->tag_map);
1417         kfree(nq->cmds);
1418 }
1419
1420 static void cleanup_queues(struct nullb *nullb)
1421 {
1422         int i;
1423
1424         for (i = 0; i < nullb->nr_queues; i++)
1425                 cleanup_queue(&nullb->queues[i]);
1426
1427         kfree(nullb->queues);
1428 }
1429
1430 #ifdef CONFIG_NVM
1431
1432 static void null_lnvm_end_io(struct request *rq, blk_status_t status)
1433 {
1434         struct nvm_rq *rqd = rq->end_io_data;
1435
1436         /* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */
1437         rqd->error = status ? -EIO : 0;
1438         nvm_end_io(rqd);
1439
1440         blk_put_request(rq);
1441 }
1442
1443 static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
1444 {
1445         struct request_queue *q = dev->q;
1446         struct request *rq;
1447         struct bio *bio = rqd->bio;
1448
1449         rq = blk_mq_alloc_request(q,
1450                 op_is_write(bio_op(bio)) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
1451         if (IS_ERR(rq))
1452                 return -ENOMEM;
1453
1454         blk_init_request_from_bio(rq, bio);
1455
1456         rq->end_io_data = rqd;
1457
1458         blk_execute_rq_nowait(q, NULL, rq, 0, null_lnvm_end_io);
1459
1460         return 0;
1461 }
1462
1463 static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
1464 {
1465         struct nullb *nullb = dev->q->queuedata;
1466         sector_t size = (sector_t)nullb->dev->size * 1024 * 1024ULL;
1467         sector_t blksize;
1468         struct nvm_id_group *grp;
1469
1470         id->ver_id = 0x1;
1471         id->vmnt = 0;
1472         id->cap = 0x2;
1473         id->dom = 0x1;
1474
1475         id->ppaf.blk_offset = 0;
1476         id->ppaf.blk_len = 16;
1477         id->ppaf.pg_offset = 16;
1478         id->ppaf.pg_len = 16;
1479         id->ppaf.sect_offset = 32;
1480         id->ppaf.sect_len = 8;
1481         id->ppaf.pln_offset = 40;
1482         id->ppaf.pln_len = 8;
1483         id->ppaf.lun_offset = 48;
1484         id->ppaf.lun_len = 8;
1485         id->ppaf.ch_offset = 56;
1486         id->ppaf.ch_len = 8;
1487
1488         sector_div(size, nullb->dev->blocksize); /* convert size to pages */
1489         size >>= 8; /* concert size to pgs pr blk */
1490         grp = &id->grp;
1491         grp->mtype = 0;
1492         grp->fmtype = 0;
1493         grp->num_ch = 1;
1494         grp->num_pg = 256;
1495         blksize = size;
1496         size >>= 16;
1497         grp->num_lun = size + 1;
1498         sector_div(blksize, grp->num_lun);
1499         grp->num_blk = blksize;
1500         grp->num_pln = 1;
1501
1502         grp->fpg_sz = nullb->dev->blocksize;
1503         grp->csecs = nullb->dev->blocksize;
1504         grp->trdt = 25000;
1505         grp->trdm = 25000;
1506         grp->tprt = 500000;
1507         grp->tprm = 500000;
1508         grp->tbet = 1500000;
1509         grp->tbem = 1500000;
1510         grp->mpos = 0x010101; /* single plane rwe */
1511         grp->cpar = nullb->dev->hw_queue_depth;
1512
1513         return 0;
1514 }
1515
1516 static void *null_lnvm_create_dma_pool(struct nvm_dev *dev, char *name)
1517 {
1518         mempool_t *virtmem_pool;
1519
1520         virtmem_pool = mempool_create_slab_pool(64, ppa_cache);
1521         if (!virtmem_pool) {
1522                 pr_err("null_blk: Unable to create virtual memory pool\n");
1523                 return NULL;
1524         }
1525
1526         return virtmem_pool;
1527 }
1528
1529 static void null_lnvm_destroy_dma_pool(void *pool)
1530 {
1531         mempool_destroy(pool);
1532 }
1533
1534 static void *null_lnvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
1535                                 gfp_t mem_flags, dma_addr_t *dma_handler)
1536 {
1537         return mempool_alloc(pool, mem_flags);
1538 }
1539
1540 static void null_lnvm_dev_dma_free(void *pool, void *entry,
1541                                                         dma_addr_t dma_handler)
1542 {
1543         mempool_free(entry, pool);
1544 }
1545
1546 static struct nvm_dev_ops null_lnvm_dev_ops = {
1547         .identity               = null_lnvm_id,
1548         .submit_io              = null_lnvm_submit_io,
1549
1550         .create_dma_pool        = null_lnvm_create_dma_pool,
1551         .destroy_dma_pool       = null_lnvm_destroy_dma_pool,
1552         .dev_dma_alloc          = null_lnvm_dev_dma_alloc,
1553         .dev_dma_free           = null_lnvm_dev_dma_free,
1554
1555         /* Simulate nvme protocol restriction */
1556         .max_phys_sect          = 64,
1557 };
1558
1559 static int null_nvm_register(struct nullb *nullb)
1560 {
1561         struct nvm_dev *dev;
1562         int rv;
1563
1564         dev = nvm_alloc_dev(0);
1565         if (!dev)
1566                 return -ENOMEM;
1567
1568         dev->q = nullb->q;
1569         memcpy(dev->name, nullb->disk_name, DISK_NAME_LEN);
1570         dev->ops = &null_lnvm_dev_ops;
1571
1572         rv = nvm_register(dev);
1573         if (rv) {
1574                 kfree(dev);
1575                 return rv;
1576         }
1577         nullb->ndev = dev;
1578         return 0;
1579 }
1580
1581 static void null_nvm_unregister(struct nullb *nullb)
1582 {
1583         nvm_unregister(nullb->ndev);
1584 }
1585 #else
1586 static int null_nvm_register(struct nullb *nullb)
1587 {
1588         pr_err("null_blk: CONFIG_NVM needs to be enabled for LightNVM\n");
1589         return -EINVAL;
1590 }
1591 static void null_nvm_unregister(struct nullb *nullb) {}
1592 #endif /* CONFIG_NVM */
1593
1594 static void null_del_dev(struct nullb *nullb)
1595 {
1596         struct nullb_device *dev;
1597
1598         if (!nullb)
1599                 return;
1600
1601         dev = nullb->dev;
1602
1603         ida_simple_remove(&nullb_indexes, nullb->index);
1604
1605         list_del_init(&nullb->list);
1606
1607         if (dev->use_lightnvm)
1608                 null_nvm_unregister(nullb);
1609         else
1610                 del_gendisk(nullb->disk);
1611
1612         if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
1613                 hrtimer_cancel(&nullb->bw_timer);
1614                 atomic_long_set(&nullb->cur_bytes, LONG_MAX);
1615                 null_restart_queue_async(nullb);
1616         }
1617
1618         blk_cleanup_queue(nullb->q);
1619         if (dev->queue_mode == NULL_Q_MQ &&
1620             nullb->tag_set == &nullb->__tag_set)
1621                 blk_mq_free_tag_set(nullb->tag_set);
1622         if (!dev->use_lightnvm)
1623                 put_disk(nullb->disk);
1624         cleanup_queues(nullb);
1625         if (null_cache_active(nullb))
1626                 null_free_device_storage(nullb->dev, true);
1627         kfree(nullb);
1628         dev->nullb = NULL;
1629 }
1630
1631 static void null_config_discard(struct nullb *nullb)
1632 {
1633         if (nullb->dev->discard == false)
1634                 return;
1635         nullb->q->limits.discard_granularity = nullb->dev->blocksize;
1636         nullb->q->limits.discard_alignment = nullb->dev->blocksize;
1637         blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
1638         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nullb->q);
1639 }
1640
1641 static int null_open(struct block_device *bdev, fmode_t mode)
1642 {
1643         return 0;
1644 }
1645
1646 static void null_release(struct gendisk *disk, fmode_t mode)
1647 {
1648 }
1649
1650 static const struct block_device_operations null_fops = {
1651         .owner =        THIS_MODULE,
1652         .open =         null_open,
1653         .release =      null_release,
1654 };
1655
1656 static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
1657 {
1658         BUG_ON(!nullb);
1659         BUG_ON(!nq);
1660
1661         init_waitqueue_head(&nq->wait);
1662         nq->queue_depth = nullb->queue_depth;
1663         nq->dev = nullb->dev;
1664 }
1665
1666 static void null_init_queues(struct nullb *nullb)
1667 {
1668         struct request_queue *q = nullb->q;
1669         struct blk_mq_hw_ctx *hctx;
1670         struct nullb_queue *nq;
1671         int i;
1672
1673         queue_for_each_hw_ctx(q, hctx, i) {
1674                 if (!hctx->nr_ctx || !hctx->tags)
1675                         continue;
1676                 nq = &nullb->queues[i];
1677                 hctx->driver_data = nq;
1678                 null_init_queue(nullb, nq);
1679                 nullb->nr_queues++;
1680         }
1681 }
1682
1683 static int setup_commands(struct nullb_queue *nq)
1684 {
1685         struct nullb_cmd *cmd;
1686         int i, tag_size;
1687
1688         nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL);
1689         if (!nq->cmds)
1690                 return -ENOMEM;
1691
1692         tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
1693         nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL);
1694         if (!nq->tag_map) {
1695                 kfree(nq->cmds);
1696                 return -ENOMEM;
1697         }
1698
1699         for (i = 0; i < nq->queue_depth; i++) {
1700                 cmd = &nq->cmds[i];
1701                 INIT_LIST_HEAD(&cmd->list);
1702                 cmd->ll_list.next = NULL;
1703                 cmd->tag = -1U;
1704         }
1705
1706         return 0;
1707 }
1708
1709 static int setup_queues(struct nullb *nullb)
1710 {
1711         nullb->queues = kzalloc(nullb->dev->submit_queues *
1712                 sizeof(struct nullb_queue), GFP_KERNEL);
1713         if (!nullb->queues)
1714                 return -ENOMEM;
1715
1716         nullb->nr_queues = 0;
1717         nullb->queue_depth = nullb->dev->hw_queue_depth;
1718
1719         return 0;
1720 }
1721
1722 static int init_driver_queues(struct nullb *nullb)
1723 {
1724         struct nullb_queue *nq;
1725         int i, ret = 0;
1726
1727         for (i = 0; i < nullb->dev->submit_queues; i++) {
1728                 nq = &nullb->queues[i];
1729
1730                 null_init_queue(nullb, nq);
1731
1732                 ret = setup_commands(nq);
1733                 if (ret)
1734                         return ret;
1735                 nullb->nr_queues++;
1736         }
1737         return 0;
1738 }
1739
1740 static int null_gendisk_register(struct nullb *nullb)
1741 {
1742         struct gendisk *disk;
1743         sector_t size;
1744
1745         disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
1746         if (!disk)
1747                 return -ENOMEM;
1748         size = (sector_t)nullb->dev->size * 1024 * 1024ULL;
1749         set_capacity(disk, size >> 9);
1750
1751         disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
1752         disk->major             = null_major;
1753         disk->first_minor       = nullb->index;
1754         disk->fops              = &null_fops;
1755         disk->private_data      = nullb;
1756         disk->queue             = nullb->q;
1757         strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
1758
1759         add_disk(disk);
1760         return 0;
1761 }
1762
1763 static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
1764 {
1765         set->ops = &null_mq_ops;
1766         set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
1767                                                 g_submit_queues;
1768         set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
1769                                                 g_hw_queue_depth;
1770         set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
1771         set->cmd_size   = sizeof(struct nullb_cmd);
1772         set->flags = BLK_MQ_F_SHOULD_MERGE;
1773         set->driver_data = NULL;
1774
1775         if ((nullb && nullb->dev->blocking) || g_blocking)
1776                 set->flags |= BLK_MQ_F_BLOCKING;
1777
1778         return blk_mq_alloc_tag_set(set);
1779 }
1780
1781 static void null_validate_conf(struct nullb_device *dev)
1782 {
1783         dev->blocksize = round_down(dev->blocksize, 512);
1784         dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
1785         if (dev->use_lightnvm && dev->blocksize != 4096)
1786                 dev->blocksize = 4096;
1787
1788         if (dev->use_lightnvm && dev->queue_mode != NULL_Q_MQ)
1789                 dev->queue_mode = NULL_Q_MQ;
1790
1791         if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
1792                 if (dev->submit_queues != nr_online_nodes)
1793                         dev->submit_queues = nr_online_nodes;
1794         } else if (dev->submit_queues > nr_cpu_ids)
1795                 dev->submit_queues = nr_cpu_ids;
1796         else if (dev->submit_queues == 0)
1797                 dev->submit_queues = 1;
1798
1799         dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
1800         dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
1801
1802         /* Do memory allocation, so set blocking */
1803         if (dev->memory_backed)
1804                 dev->blocking = true;
1805         else /* cache is meaningless */
1806                 dev->cache_size = 0;
1807         dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
1808                                                 dev->cache_size);
1809         dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
1810         /* can not stop a queue */
1811         if (dev->queue_mode == NULL_Q_BIO)
1812                 dev->mbps = 0;
1813 }
1814
1815 static int null_add_dev(struct nullb_device *dev)
1816 {
1817         struct nullb *nullb;
1818         int rv;
1819
1820         null_validate_conf(dev);
1821
1822         nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
1823         if (!nullb) {
1824                 rv = -ENOMEM;
1825                 goto out;
1826         }
1827         nullb->dev = dev;
1828         dev->nullb = nullb;
1829
1830         spin_lock_init(&nullb->lock);
1831
1832         rv = setup_queues(nullb);
1833         if (rv)
1834                 goto out_free_nullb;
1835
1836         if (dev->queue_mode == NULL_Q_MQ) {
1837                 if (shared_tags) {
1838                         nullb->tag_set = &tag_set;
1839                         rv = 0;
1840                 } else {
1841                         nullb->tag_set = &nullb->__tag_set;
1842                         rv = null_init_tag_set(nullb, nullb->tag_set);
1843                 }
1844
1845                 if (rv)
1846                         goto out_cleanup_queues;
1847
1848                 nullb->q = blk_mq_init_queue(nullb->tag_set);
1849                 if (IS_ERR(nullb->q)) {
1850                         rv = -ENOMEM;
1851                         goto out_cleanup_tags;
1852                 }
1853                 null_init_queues(nullb);
1854         } else if (dev->queue_mode == NULL_Q_BIO) {
1855                 nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node);
1856                 if (!nullb->q) {
1857                         rv = -ENOMEM;
1858                         goto out_cleanup_queues;
1859                 }
1860                 blk_queue_make_request(nullb->q, null_queue_bio);
1861                 rv = init_driver_queues(nullb);
1862                 if (rv)
1863                         goto out_cleanup_blk_queue;
1864         } else {
1865                 nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock,
1866                                                 dev->home_node);
1867                 if (!nullb->q) {
1868                         rv = -ENOMEM;
1869                         goto out_cleanup_queues;
1870                 }
1871                 blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
1872                 blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
1873                 rv = init_driver_queues(nullb);
1874                 if (rv)
1875                         goto out_cleanup_blk_queue;
1876         }
1877
1878         if (dev->mbps) {
1879                 set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
1880                 nullb_setup_bwtimer(nullb);
1881         }
1882
1883         if (dev->cache_size > 0) {
1884                 set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
1885                 blk_queue_write_cache(nullb->q, true, true);
1886                 blk_queue_flush_queueable(nullb->q, true);
1887         }
1888
1889         nullb->q->queuedata = nullb;
1890         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
1891         queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
1892
1893         mutex_lock(&lock);
1894         nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
1895         dev->index = nullb->index;
1896         mutex_unlock(&lock);
1897
1898         blk_queue_logical_block_size(nullb->q, dev->blocksize);
1899         blk_queue_physical_block_size(nullb->q, dev->blocksize);
1900
1901         null_config_discard(nullb);
1902
1903         sprintf(nullb->disk_name, "nullb%d", nullb->index);
1904
1905         if (dev->use_lightnvm)
1906                 rv = null_nvm_register(nullb);
1907         else
1908                 rv = null_gendisk_register(nullb);
1909
1910         if (rv)
1911                 goto out_cleanup_blk_queue;
1912
1913         mutex_lock(&lock);
1914         list_add_tail(&nullb->list, &nullb_list);
1915         mutex_unlock(&lock);
1916
1917         return 0;
1918 out_cleanup_blk_queue:
1919         blk_cleanup_queue(nullb->q);
1920 out_cleanup_tags:
1921         if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
1922                 blk_mq_free_tag_set(nullb->tag_set);
1923 out_cleanup_queues:
1924         cleanup_queues(nullb);
1925 out_free_nullb:
1926         kfree(nullb);
1927         dev->nullb = NULL;
1928 out:
1929         return rv;
1930 }
1931
1932 static int __init null_init(void)
1933 {
1934         int ret = 0;
1935         unsigned int i;
1936         struct nullb *nullb;
1937         struct nullb_device *dev;
1938
1939         if (g_bs > PAGE_SIZE) {
1940                 pr_warn("null_blk: invalid block size\n");
1941                 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
1942                 g_bs = PAGE_SIZE;
1943         }
1944
1945         if (g_use_lightnvm && g_bs != 4096) {
1946                 pr_warn("null_blk: LightNVM only supports 4k block size\n");
1947                 pr_warn("null_blk: defaults block size to 4k\n");
1948                 g_bs = 4096;
1949         }
1950
1951         if (g_use_lightnvm && g_queue_mode != NULL_Q_MQ) {
1952                 pr_warn("null_blk: LightNVM only supported for blk-mq\n");
1953                 pr_warn("null_blk: defaults queue mode to blk-mq\n");
1954                 g_queue_mode = NULL_Q_MQ;
1955         }
1956
1957         if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
1958                 if (g_submit_queues != nr_online_nodes) {
1959                         pr_warn("null_blk: submit_queues param is set to %u.\n",
1960                                                         nr_online_nodes);
1961                         g_submit_queues = nr_online_nodes;
1962                 }
1963         } else if (g_submit_queues > nr_cpu_ids)
1964                 g_submit_queues = nr_cpu_ids;
1965         else if (g_submit_queues <= 0)
1966                 g_submit_queues = 1;
1967
1968         if (g_queue_mode == NULL_Q_MQ && shared_tags) {
1969                 ret = null_init_tag_set(NULL, &tag_set);
1970                 if (ret)
1971                         return ret;
1972         }
1973
1974         config_group_init(&nullb_subsys.su_group);
1975         mutex_init(&nullb_subsys.su_mutex);
1976
1977         ret = configfs_register_subsystem(&nullb_subsys);
1978         if (ret)
1979                 goto err_tagset;
1980
1981         mutex_init(&lock);
1982
1983         null_major = register_blkdev(0, "nullb");
1984         if (null_major < 0) {
1985                 ret = null_major;
1986                 goto err_conf;
1987         }
1988
1989         if (g_use_lightnvm) {
1990                 ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
1991                                                                 0, 0, NULL);
1992                 if (!ppa_cache) {
1993                         pr_err("null_blk: unable to create ppa cache\n");
1994                         ret = -ENOMEM;
1995                         goto err_ppa;
1996                 }
1997         }
1998
1999         for (i = 0; i < nr_devices; i++) {
2000                 dev = null_alloc_dev();
2001                 if (!dev) {
2002                         ret = -ENOMEM;
2003                         goto err_dev;
2004                 }
2005                 ret = null_add_dev(dev);
2006                 if (ret) {
2007                         null_free_dev(dev);
2008                         goto err_dev;
2009                 }
2010         }
2011
2012         pr_info("null: module loaded\n");
2013         return 0;
2014
2015 err_dev:
2016         while (!list_empty(&nullb_list)) {
2017                 nullb = list_entry(nullb_list.next, struct nullb, list);
2018                 dev = nullb->dev;
2019                 null_del_dev(nullb);
2020                 null_free_dev(dev);
2021         }
2022         kmem_cache_destroy(ppa_cache);
2023 err_ppa:
2024         unregister_blkdev(null_major, "nullb");
2025 err_conf:
2026         configfs_unregister_subsystem(&nullb_subsys);
2027 err_tagset:
2028         if (g_queue_mode == NULL_Q_MQ && shared_tags)
2029                 blk_mq_free_tag_set(&tag_set);
2030         return ret;
2031 }
2032
2033 static void __exit null_exit(void)
2034 {
2035         struct nullb *nullb;
2036
2037         configfs_unregister_subsystem(&nullb_subsys);
2038
2039         unregister_blkdev(null_major, "nullb");
2040
2041         mutex_lock(&lock);
2042         while (!list_empty(&nullb_list)) {
2043                 struct nullb_device *dev;
2044
2045                 nullb = list_entry(nullb_list.next, struct nullb, list);
2046                 dev = nullb->dev;
2047                 null_del_dev(nullb);
2048                 null_free_dev(dev);
2049         }
2050         mutex_unlock(&lock);
2051
2052         if (g_queue_mode == NULL_Q_MQ && shared_tags)
2053                 blk_mq_free_tag_set(&tag_set);
2054
2055         kmem_cache_destroy(ppa_cache);
2056 }
2057
2058 module_init(null_init);
2059 module_exit(null_exit);
2060
2061 MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
2062 MODULE_LICENSE("GPL");