GNU Linux-libre 4.19.264-gnu1
[releases.git] / arch / um / drivers / ubd_kern.c
1 /*
2  * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
3  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
4  * Licensed under the GPL
5  */
6
7 /* 2001-09-28...2002-04-17
8  * Partition stuff by James_McMechan@hotmail.com
9  * old style ubd by setting UBD_SHIFT to 0
10  * 2002-09-27...2002-10-18 massive tinkering for 2.5
11  * partitions have changed in 2.5
12  * 2003-01-29 more tinkering for 2.5.59-1
13  * This should now address the sysfs problems and has
14  * the symlink for devfs to allow for booting with
15  * the common /dev/ubd/discX/... names rather than
16  * only /dev/ubdN/discN this version also has lots of
17  * clean ups preparing for ubd-many.
18  * James McMechan
19  */
20
21 #define UBD_SHIFT 4
22
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/blkdev.h>
26 #include <linux/ata.h>
27 #include <linux/hdreg.h>
28 #include <linux/cdrom.h>
29 #include <linux/proc_fs.h>
30 #include <linux/seq_file.h>
31 #include <linux/ctype.h>
32 #include <linux/slab.h>
33 #include <linux/vmalloc.h>
34 #include <linux/platform_device.h>
35 #include <linux/scatterlist.h>
36 #include <asm/tlbflush.h>
37 #include <kern_util.h>
38 #include "mconsole_kern.h"
39 #include <init.h>
40 #include <irq_kern.h>
41 #include "ubd.h"
42 #include <os.h>
43 #include "cow.h"
44
45 enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
46
47 struct io_thread_req {
48         struct request *req;
49         enum ubd_req op;
50         int fds[2];
51         unsigned long offsets[2];
52         unsigned long long offset;
53         unsigned long length;
54         char *buffer;
55         int sectorsize;
56         unsigned long sector_mask;
57         unsigned long long cow_offset;
58         unsigned long bitmap_words[2];
59         int error;
60 };
61
62
63 static struct io_thread_req * (*irq_req_buffer)[];
64 static struct io_thread_req *irq_remainder;
65 static int irq_remainder_size;
66
67 static struct io_thread_req * (*io_req_buffer)[];
68 static struct io_thread_req *io_remainder;
69 static int io_remainder_size;
70
71
72
73 static inline int ubd_test_bit(__u64 bit, unsigned char *data)
74 {
75         __u64 n;
76         int bits, off;
77
78         bits = sizeof(data[0]) * 8;
79         n = bit / bits;
80         off = bit % bits;
81         return (data[n] & (1 << off)) != 0;
82 }
83
84 static inline void ubd_set_bit(__u64 bit, unsigned char *data)
85 {
86         __u64 n;
87         int bits, off;
88
89         bits = sizeof(data[0]) * 8;
90         n = bit / bits;
91         off = bit % bits;
92         data[n] |= (1 << off);
93 }
94 /*End stuff from ubd_user.h*/
95
96 #define DRIVER_NAME "uml-blkdev"
97
98 static DEFINE_MUTEX(ubd_lock);
99 static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
100
101 static int ubd_open(struct block_device *bdev, fmode_t mode);
102 static void ubd_release(struct gendisk *disk, fmode_t mode);
103 static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
104                      unsigned int cmd, unsigned long arg);
105 static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
106
107 #define MAX_DEV (16)
108
109 static const struct block_device_operations ubd_blops = {
110         .owner          = THIS_MODULE,
111         .open           = ubd_open,
112         .release        = ubd_release,
113         .ioctl          = ubd_ioctl,
114         .getgeo         = ubd_getgeo,
115 };
116
117 /* Protected by ubd_lock */
118 static int fake_major = UBD_MAJOR;
119 static struct gendisk *ubd_gendisk[MAX_DEV];
120 static struct gendisk *fake_gendisk[MAX_DEV];
121
122 #ifdef CONFIG_BLK_DEV_UBD_SYNC
123 #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
124                                          .cl = 1 })
125 #else
126 #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
127                                          .cl = 1 })
128 #endif
129 static struct openflags global_openflags = OPEN_FLAGS;
130
131 struct cow {
132         /* backing file name */
133         char *file;
134         /* backing file fd */
135         int fd;
136         unsigned long *bitmap;
137         unsigned long bitmap_len;
138         int bitmap_offset;
139         int data_offset;
140 };
141
142 #define MAX_SG 64
143
144 struct ubd {
145         struct list_head restart;
146         /* name (and fd, below) of the file opened for writing, either the
147          * backing or the cow file. */
148         char *file;
149         int count;
150         int fd;
151         __u64 size;
152         struct openflags boot_openflags;
153         struct openflags openflags;
154         unsigned shared:1;
155         unsigned no_cow:1;
156         struct cow cow;
157         struct platform_device pdev;
158         struct request_queue *queue;
159         spinlock_t lock;
160         struct scatterlist sg[MAX_SG];
161         struct request *request;
162         int start_sg, end_sg;
163         sector_t rq_pos;
164 };
165
166 #define DEFAULT_COW { \
167         .file =                 NULL, \
168         .fd =                   -1,     \
169         .bitmap =               NULL, \
170         .bitmap_offset =        0, \
171         .data_offset =          0, \
172 }
173
174 #define DEFAULT_UBD { \
175         .file =                 NULL, \
176         .count =                0, \
177         .fd =                   -1, \
178         .size =                 -1, \
179         .boot_openflags =       OPEN_FLAGS, \
180         .openflags =            OPEN_FLAGS, \
181         .no_cow =               0, \
182         .shared =               0, \
183         .cow =                  DEFAULT_COW, \
184         .lock =                 __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
185         .request =              NULL, \
186         .start_sg =             0, \
187         .end_sg =               0, \
188         .rq_pos =               0, \
189 }
190
191 /* Protected by ubd_lock */
192 static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
193
194 /* Only changed by fake_ide_setup which is a setup */
195 static int fake_ide = 0;
196 static struct proc_dir_entry *proc_ide_root = NULL;
197 static struct proc_dir_entry *proc_ide = NULL;
198
199 static void make_proc_ide(void)
200 {
201         proc_ide_root = proc_mkdir("ide", NULL);
202         proc_ide = proc_mkdir("ide0", proc_ide_root);
203 }
204
205 static int fake_ide_media_proc_show(struct seq_file *m, void *v)
206 {
207         seq_puts(m, "disk\n");
208         return 0;
209 }
210
211 static void make_ide_entries(const char *dev_name)
212 {
213         struct proc_dir_entry *dir, *ent;
214         char name[64];
215
216         if(proc_ide_root == NULL) make_proc_ide();
217
218         dir = proc_mkdir(dev_name, proc_ide);
219         if(!dir) return;
220
221         ent = proc_create_single("media", S_IRUGO, dir,
222                         fake_ide_media_proc_show);
223         if(!ent) return;
224         snprintf(name, sizeof(name), "ide0/%s", dev_name);
225         proc_symlink(dev_name, proc_ide_root, name);
226 }
227
228 static int fake_ide_setup(char *str)
229 {
230         fake_ide = 1;
231         return 1;
232 }
233
234 __setup("fake_ide", fake_ide_setup);
235
236 __uml_help(fake_ide_setup,
237 "fake_ide\n"
238 "    Create ide0 entries that map onto ubd devices.\n\n"
239 );
240
241 static int parse_unit(char **ptr)
242 {
243         char *str = *ptr, *end;
244         int n = -1;
245
246         if(isdigit(*str)) {
247                 n = simple_strtoul(str, &end, 0);
248                 if(end == str)
249                         return -1;
250                 *ptr = end;
251         }
252         else if (('a' <= *str) && (*str <= 'z')) {
253                 n = *str - 'a';
254                 str++;
255                 *ptr = str;
256         }
257         return n;
258 }
259
260 /* If *index_out == -1 at exit, the passed option was a general one;
261  * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
262  * should not be freed on exit.
263  */
264 static int ubd_setup_common(char *str, int *index_out, char **error_out)
265 {
266         struct ubd *ubd_dev;
267         struct openflags flags = global_openflags;
268         char *backing_file;
269         int n, err = 0, i;
270
271         if(index_out) *index_out = -1;
272         n = *str;
273         if(n == '='){
274                 char *end;
275                 int major;
276
277                 str++;
278                 if(!strcmp(str, "sync")){
279                         global_openflags = of_sync(global_openflags);
280                         goto out1;
281                 }
282
283                 err = -EINVAL;
284                 major = simple_strtoul(str, &end, 0);
285                 if((*end != '\0') || (end == str)){
286                         *error_out = "Didn't parse major number";
287                         goto out1;
288                 }
289
290                 mutex_lock(&ubd_lock);
291                 if (fake_major != UBD_MAJOR) {
292                         *error_out = "Can't assign a fake major twice";
293                         goto out1;
294                 }
295
296                 fake_major = major;
297
298                 printk(KERN_INFO "Setting extra ubd major number to %d\n",
299                        major);
300                 err = 0;
301         out1:
302                 mutex_unlock(&ubd_lock);
303                 return err;
304         }
305
306         n = parse_unit(&str);
307         if(n < 0){
308                 *error_out = "Couldn't parse device number";
309                 return -EINVAL;
310         }
311         if(n >= MAX_DEV){
312                 *error_out = "Device number out of range";
313                 return 1;
314         }
315
316         err = -EBUSY;
317         mutex_lock(&ubd_lock);
318
319         ubd_dev = &ubd_devs[n];
320         if(ubd_dev->file != NULL){
321                 *error_out = "Device is already configured";
322                 goto out;
323         }
324
325         if (index_out)
326                 *index_out = n;
327
328         err = -EINVAL;
329         for (i = 0; i < sizeof("rscd="); i++) {
330                 switch (*str) {
331                 case 'r':
332                         flags.w = 0;
333                         break;
334                 case 's':
335                         flags.s = 1;
336                         break;
337                 case 'd':
338                         ubd_dev->no_cow = 1;
339                         break;
340                 case 'c':
341                         ubd_dev->shared = 1;
342                         break;
343                 case '=':
344                         str++;
345                         goto break_loop;
346                 default:
347                         *error_out = "Expected '=' or flag letter "
348                                 "(r, s, c, or d)";
349                         goto out;
350                 }
351                 str++;
352         }
353
354         if (*str == '=')
355                 *error_out = "Too many flags specified";
356         else
357                 *error_out = "Missing '='";
358         goto out;
359
360 break_loop:
361         backing_file = strchr(str, ',');
362
363         if (backing_file == NULL)
364                 backing_file = strchr(str, ':');
365
366         if(backing_file != NULL){
367                 if(ubd_dev->no_cow){
368                         *error_out = "Can't specify both 'd' and a cow file";
369                         goto out;
370                 }
371                 else {
372                         *backing_file = '\0';
373                         backing_file++;
374                 }
375         }
376         err = 0;
377         ubd_dev->file = str;
378         ubd_dev->cow.file = backing_file;
379         ubd_dev->boot_openflags = flags;
380 out:
381         mutex_unlock(&ubd_lock);
382         return err;
383 }
384
385 static int ubd_setup(char *str)
386 {
387         char *error;
388         int err;
389
390         err = ubd_setup_common(str, NULL, &error);
391         if(err)
392                 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
393                        "%s\n", str, error);
394         return 1;
395 }
396
397 __setup("ubd", ubd_setup);
398 __uml_help(ubd_setup,
399 "ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
400 "    This is used to associate a device with a file in the underlying\n"
401 "    filesystem. When specifying two filenames, the first one is the\n"
402 "    COW name and the second is the backing file name. As separator you can\n"
403 "    use either a ':' or a ',': the first one allows writing things like;\n"
404 "       ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
405 "    while with a ',' the shell would not expand the 2nd '~'.\n"
406 "    When using only one filename, UML will detect whether to treat it like\n"
407 "    a COW file or a backing file. To override this detection, add the 'd'\n"
408 "    flag:\n"
409 "       ubd0d=BackingFile\n"
410 "    Usually, there is a filesystem in the file, but \n"
411 "    that's not required. Swap devices containing swap files can be\n"
412 "    specified like this. Also, a file which doesn't contain a\n"
413 "    filesystem can have its contents read in the virtual \n"
414 "    machine by running 'dd' on the device. <n> must be in the range\n"
415 "    0 to 7. Appending an 'r' to the number will cause that device\n"
416 "    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
417 "    an 's' will cause data to be written to disk on the host immediately.\n"
418 "    'c' will cause the device to be treated as being shared between multiple\n"
419 "    UMLs and file locking will be turned off - this is appropriate for a\n"
420 "    cluster filesystem and inappropriate at almost all other times.\n\n"
421 );
422
423 static int udb_setup(char *str)
424 {
425         printk("udb%s specified on command line is almost certainly a ubd -> "
426                "udb TYPO\n", str);
427         return 1;
428 }
429
430 __setup("udb", udb_setup);
431 __uml_help(udb_setup,
432 "udb\n"
433 "    This option is here solely to catch ubd -> udb typos, which can be\n"
434 "    to impossible to catch visually unless you specifically look for\n"
435 "    them.  The only result of any option starting with 'udb' is an error\n"
436 "    in the boot output.\n\n"
437 );
438
439 static void do_ubd_request(struct request_queue * q);
440
441 /* Only changed by ubd_init, which is an initcall. */
442 static int thread_fd = -1;
443 static LIST_HEAD(restart);
444
445 /* Function to read several request pointers at a time
446 * handling fractional reads if (and as) needed
447 */
448
449 static int bulk_req_safe_read(
450         int fd,
451         struct io_thread_req * (*request_buffer)[],
452         struct io_thread_req **remainder,
453         int *remainder_size,
454         int max_recs
455         )
456 {
457         int n = 0;
458         int res = 0;
459
460         if (*remainder_size > 0) {
461                 memmove(
462                         (char *) request_buffer,
463                         (char *) remainder, *remainder_size
464                 );
465                 n = *remainder_size;
466         }
467
468         res = os_read_file(
469                         fd,
470                         ((char *) request_buffer) + *remainder_size,
471                         sizeof(struct io_thread_req *)*max_recs
472                                 - *remainder_size
473                 );
474         if (res > 0) {
475                 n += res;
476                 if ((n % sizeof(struct io_thread_req *)) > 0) {
477                         /*
478                         * Read somehow returned not a multiple of dword
479                         * theoretically possible, but never observed in the
480                         * wild, so read routine must be able to handle it
481                         */
482                         *remainder_size = n % sizeof(struct io_thread_req *);
483                         WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
484                         memmove(
485                                 remainder,
486                                 ((char *) request_buffer) +
487                                         (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
488                                 *remainder_size
489                         );
490                         n = n - *remainder_size;
491                 }
492         } else {
493                 n = res;
494         }
495         return n;
496 }
497
498 /* Called without dev->lock held, and only in interrupt context. */
499 static void ubd_handler(void)
500 {
501         struct ubd *ubd;
502         struct list_head *list, *next_ele;
503         unsigned long flags;
504         int n;
505         int count;
506
507         while(1){
508                 n = bulk_req_safe_read(
509                         thread_fd,
510                         irq_req_buffer,
511                         &irq_remainder,
512                         &irq_remainder_size,
513                         UBD_REQ_BUFFER_SIZE
514                 );
515                 if (n < 0) {
516                         if(n == -EAGAIN)
517                                 break;
518                         printk(KERN_ERR "spurious interrupt in ubd_handler, "
519                                "err = %d\n", -n);
520                         return;
521                 }
522                 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
523                         blk_end_request(
524                                 (*irq_req_buffer)[count]->req,
525                                 BLK_STS_OK,
526                                 (*irq_req_buffer)[count]->length
527                         );
528                         kfree((*irq_req_buffer)[count]);
529                 }
530         }
531         reactivate_fd(thread_fd, UBD_IRQ);
532
533         list_for_each_safe(list, next_ele, &restart){
534                 ubd = container_of(list, struct ubd, restart);
535                 list_del_init(&ubd->restart);
536                 spin_lock_irqsave(&ubd->lock, flags);
537                 do_ubd_request(ubd->queue);
538                 spin_unlock_irqrestore(&ubd->lock, flags);
539         }
540 }
541
542 static irqreturn_t ubd_intr(int irq, void *dev)
543 {
544         ubd_handler();
545         return IRQ_HANDLED;
546 }
547
548 /* Only changed by ubd_init, which is an initcall. */
549 static int io_pid = -1;
550
551 static void kill_io_thread(void)
552 {
553         if(io_pid != -1)
554                 os_kill_process(io_pid, 1);
555 }
556
557 __uml_exitcall(kill_io_thread);
558
559 static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
560 {
561         char *file;
562         int fd;
563         int err;
564
565         __u32 version;
566         __u32 align;
567         char *backing_file;
568         time_t mtime;
569         unsigned long long size;
570         int sector_size;
571         int bitmap_offset;
572
573         if (ubd_dev->file && ubd_dev->cow.file) {
574                 file = ubd_dev->cow.file;
575
576                 goto out;
577         }
578
579         fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
580         if (fd < 0)
581                 return fd;
582
583         err = read_cow_header(file_reader, &fd, &version, &backing_file, \
584                 &mtime, &size, &sector_size, &align, &bitmap_offset);
585         os_close_file(fd);
586
587         if(err == -EINVAL)
588                 file = ubd_dev->file;
589         else
590                 file = backing_file;
591
592 out:
593         return os_file_size(file, size_out);
594 }
595
596 static int read_cow_bitmap(int fd, void *buf, int offset, int len)
597 {
598         int err;
599
600         err = os_pread_file(fd, buf, len, offset);
601         if (err < 0)
602                 return err;
603
604         return 0;
605 }
606
607 static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
608 {
609         unsigned long modtime;
610         unsigned long long actual;
611         int err;
612
613         err = os_file_modtime(file, &modtime);
614         if (err < 0) {
615                 printk(KERN_ERR "Failed to get modification time of backing "
616                        "file \"%s\", err = %d\n", file, -err);
617                 return err;
618         }
619
620         err = os_file_size(file, &actual);
621         if (err < 0) {
622                 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
623                        "err = %d\n", file, -err);
624                 return err;
625         }
626
627         if (actual != size) {
628                 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
629                  * the typecast.*/
630                 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
631                        "vs backing file\n", (unsigned long long) size, actual);
632                 return -EINVAL;
633         }
634         if (modtime != mtime) {
635                 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
636                        "backing file\n", mtime, modtime);
637                 return -EINVAL;
638         }
639         return 0;
640 }
641
642 static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
643 {
644         struct uml_stat buf1, buf2;
645         int err;
646
647         if (from_cmdline == NULL)
648                 return 0;
649         if (!strcmp(from_cmdline, from_cow))
650                 return 0;
651
652         err = os_stat_file(from_cmdline, &buf1);
653         if (err < 0) {
654                 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
655                        -err);
656                 return 0;
657         }
658         err = os_stat_file(from_cow, &buf2);
659         if (err < 0) {
660                 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
661                        -err);
662                 return 1;
663         }
664         if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
665                 return 0;
666
667         printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
668                "\"%s\" specified in COW header of \"%s\"\n",
669                from_cmdline, from_cow, cow);
670         return 1;
671 }
672
673 static int open_ubd_file(char *file, struct openflags *openflags, int shared,
674                   char **backing_file_out, int *bitmap_offset_out,
675                   unsigned long *bitmap_len_out, int *data_offset_out,
676                   int *create_cow_out)
677 {
678         time_t mtime;
679         unsigned long long size;
680         __u32 version, align;
681         char *backing_file;
682         int fd, err, sectorsize, asked_switch, mode = 0644;
683
684         fd = os_open_file(file, *openflags, mode);
685         if (fd < 0) {
686                 if ((fd == -ENOENT) && (create_cow_out != NULL))
687                         *create_cow_out = 1;
688                 if (!openflags->w ||
689                     ((fd != -EROFS) && (fd != -EACCES)))
690                         return fd;
691                 openflags->w = 0;
692                 fd = os_open_file(file, *openflags, mode);
693                 if (fd < 0)
694                         return fd;
695         }
696
697         if (shared)
698                 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
699         else {
700                 err = os_lock_file(fd, openflags->w);
701                 if (err < 0) {
702                         printk(KERN_ERR "Failed to lock '%s', err = %d\n",
703                                file, -err);
704                         goto out_close;
705                 }
706         }
707
708         /* Successful return case! */
709         if (backing_file_out == NULL)
710                 return fd;
711
712         err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
713                               &size, &sectorsize, &align, bitmap_offset_out);
714         if (err && (*backing_file_out != NULL)) {
715                 printk(KERN_ERR "Failed to read COW header from COW file "
716                        "\"%s\", errno = %d\n", file, -err);
717                 goto out_close;
718         }
719         if (err)
720                 return fd;
721
722         asked_switch = path_requires_switch(*backing_file_out, backing_file,
723                                             file);
724
725         /* Allow switching only if no mismatch. */
726         if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
727                                                    mtime)) {
728                 printk(KERN_ERR "Switching backing file to '%s'\n",
729                        *backing_file_out);
730                 err = write_cow_header(file, fd, *backing_file_out,
731                                        sectorsize, align, &size);
732                 if (err) {
733                         printk(KERN_ERR "Switch failed, errno = %d\n", -err);
734                         goto out_close;
735                 }
736         } else {
737                 *backing_file_out = backing_file;
738                 err = backing_file_mismatch(*backing_file_out, size, mtime);
739                 if (err)
740                         goto out_close;
741         }
742
743         cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
744                   bitmap_len_out, data_offset_out);
745
746         return fd;
747  out_close:
748         os_close_file(fd);
749         return err;
750 }
751
752 static int create_cow_file(char *cow_file, char *backing_file,
753                     struct openflags flags,
754                     int sectorsize, int alignment, int *bitmap_offset_out,
755                     unsigned long *bitmap_len_out, int *data_offset_out)
756 {
757         int err, fd;
758
759         flags.c = 1;
760         fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
761         if (fd < 0) {
762                 err = fd;
763                 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
764                        cow_file, -err);
765                 goto out;
766         }
767
768         err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
769                             bitmap_offset_out, bitmap_len_out,
770                             data_offset_out);
771         if (!err)
772                 return fd;
773         os_close_file(fd);
774  out:
775         return err;
776 }
777
778 static void ubd_close_dev(struct ubd *ubd_dev)
779 {
780         os_close_file(ubd_dev->fd);
781         if(ubd_dev->cow.file == NULL)
782                 return;
783
784         os_close_file(ubd_dev->cow.fd);
785         vfree(ubd_dev->cow.bitmap);
786         ubd_dev->cow.bitmap = NULL;
787 }
788
789 static int ubd_open_dev(struct ubd *ubd_dev)
790 {
791         struct openflags flags;
792         char **back_ptr;
793         int err, create_cow, *create_ptr;
794         int fd;
795
796         ubd_dev->openflags = ubd_dev->boot_openflags;
797         create_cow = 0;
798         create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
799         back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
800
801         fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
802                                 back_ptr, &ubd_dev->cow.bitmap_offset,
803                                 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
804                                 create_ptr);
805
806         if((fd == -ENOENT) && create_cow){
807                 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
808                                           ubd_dev->openflags, 1 << 9, PAGE_SIZE,
809                                           &ubd_dev->cow.bitmap_offset,
810                                           &ubd_dev->cow.bitmap_len,
811                                           &ubd_dev->cow.data_offset);
812                 if(fd >= 0){
813                         printk(KERN_INFO "Creating \"%s\" as COW file for "
814                                "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
815                 }
816         }
817
818         if(fd < 0){
819                 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
820                        -fd);
821                 return fd;
822         }
823         ubd_dev->fd = fd;
824
825         if(ubd_dev->cow.file != NULL){
826                 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
827
828                 err = -ENOMEM;
829                 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
830                 if(ubd_dev->cow.bitmap == NULL){
831                         printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
832                         goto error;
833                 }
834                 flush_tlb_kernel_vm();
835
836                 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
837                                       ubd_dev->cow.bitmap_offset,
838                                       ubd_dev->cow.bitmap_len);
839                 if(err < 0)
840                         goto error;
841
842                 flags = ubd_dev->openflags;
843                 flags.w = 0;
844                 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
845                                     NULL, NULL, NULL, NULL);
846                 if(err < 0) goto error;
847                 ubd_dev->cow.fd = err;
848         }
849         return 0;
850  error:
851         os_close_file(ubd_dev->fd);
852         return err;
853 }
854
855 static void ubd_device_release(struct device *dev)
856 {
857         struct ubd *ubd_dev = dev_get_drvdata(dev);
858
859         blk_cleanup_queue(ubd_dev->queue);
860         *ubd_dev = ((struct ubd) DEFAULT_UBD);
861 }
862
863 static int ubd_disk_register(int major, u64 size, int unit,
864                              struct gendisk **disk_out)
865 {
866         struct device *parent = NULL;
867         struct gendisk *disk;
868
869         disk = alloc_disk(1 << UBD_SHIFT);
870         if(disk == NULL)
871                 return -ENOMEM;
872
873         disk->major = major;
874         disk->first_minor = unit << UBD_SHIFT;
875         disk->fops = &ubd_blops;
876         set_capacity(disk, size / 512);
877         if (major == UBD_MAJOR)
878                 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
879         else
880                 sprintf(disk->disk_name, "ubd_fake%d", unit);
881
882         /* sysfs register (not for ide fake devices) */
883         if (major == UBD_MAJOR) {
884                 ubd_devs[unit].pdev.id   = unit;
885                 ubd_devs[unit].pdev.name = DRIVER_NAME;
886                 ubd_devs[unit].pdev.dev.release = ubd_device_release;
887                 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
888                 platform_device_register(&ubd_devs[unit].pdev);
889                 parent = &ubd_devs[unit].pdev.dev;
890         }
891
892         disk->private_data = &ubd_devs[unit];
893         disk->queue = ubd_devs[unit].queue;
894         device_add_disk(parent, disk, NULL);
895
896         *disk_out = disk;
897         return 0;
898 }
899
900 #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
901
902 static int ubd_add(int n, char **error_out)
903 {
904         struct ubd *ubd_dev = &ubd_devs[n];
905         int err = 0;
906
907         if(ubd_dev->file == NULL)
908                 goto out;
909
910         err = ubd_file_size(ubd_dev, &ubd_dev->size);
911         if(err < 0){
912                 *error_out = "Couldn't determine size of device's file";
913                 goto out;
914         }
915
916         ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
917
918         INIT_LIST_HEAD(&ubd_dev->restart);
919         sg_init_table(ubd_dev->sg, MAX_SG);
920
921         err = -ENOMEM;
922         ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
923         if (ubd_dev->queue == NULL) {
924                 *error_out = "Failed to initialize device queue";
925                 goto out;
926         }
927         ubd_dev->queue->queuedata = ubd_dev;
928         blk_queue_write_cache(ubd_dev->queue, true, false);
929
930         blk_queue_max_segments(ubd_dev->queue, MAX_SG);
931         err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
932         if(err){
933                 *error_out = "Failed to register device";
934                 goto out_cleanup;
935         }
936
937         if (fake_major != UBD_MAJOR)
938                 ubd_disk_register(fake_major, ubd_dev->size, n,
939                                   &fake_gendisk[n]);
940
941         /*
942          * Perhaps this should also be under the "if (fake_major)" above
943          * using the fake_disk->disk_name
944          */
945         if (fake_ide)
946                 make_ide_entries(ubd_gendisk[n]->disk_name);
947
948         err = 0;
949 out:
950         return err;
951
952 out_cleanup:
953         blk_cleanup_queue(ubd_dev->queue);
954         goto out;
955 }
956
957 static int ubd_config(char *str, char **error_out)
958 {
959         int n, ret;
960
961         /* This string is possibly broken up and stored, so it's only
962          * freed if ubd_setup_common fails, or if only general options
963          * were set.
964          */
965         str = kstrdup(str, GFP_KERNEL);
966         if (str == NULL) {
967                 *error_out = "Failed to allocate memory";
968                 return -ENOMEM;
969         }
970
971         ret = ubd_setup_common(str, &n, error_out);
972         if (ret)
973                 goto err_free;
974
975         if (n == -1) {
976                 ret = 0;
977                 goto err_free;
978         }
979
980         mutex_lock(&ubd_lock);
981         ret = ubd_add(n, error_out);
982         if (ret)
983                 ubd_devs[n].file = NULL;
984         mutex_unlock(&ubd_lock);
985
986 out:
987         return ret;
988
989 err_free:
990         kfree(str);
991         goto out;
992 }
993
994 static int ubd_get_config(char *name, char *str, int size, char **error_out)
995 {
996         struct ubd *ubd_dev;
997         int n, len = 0;
998
999         n = parse_unit(&name);
1000         if((n >= MAX_DEV) || (n < 0)){
1001                 *error_out = "ubd_get_config : device number out of range";
1002                 return -1;
1003         }
1004
1005         ubd_dev = &ubd_devs[n];
1006         mutex_lock(&ubd_lock);
1007
1008         if(ubd_dev->file == NULL){
1009                 CONFIG_CHUNK(str, size, len, "", 1);
1010                 goto out;
1011         }
1012
1013         CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1014
1015         if(ubd_dev->cow.file != NULL){
1016                 CONFIG_CHUNK(str, size, len, ",", 0);
1017                 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1018         }
1019         else CONFIG_CHUNK(str, size, len, "", 1);
1020
1021  out:
1022         mutex_unlock(&ubd_lock);
1023         return len;
1024 }
1025
1026 static int ubd_id(char **str, int *start_out, int *end_out)
1027 {
1028         int n;
1029
1030         n = parse_unit(str);
1031         *start_out = 0;
1032         *end_out = MAX_DEV - 1;
1033         return n;
1034 }
1035
1036 static int ubd_remove(int n, char **error_out)
1037 {
1038         struct gendisk *disk = ubd_gendisk[n];
1039         struct ubd *ubd_dev;
1040         int err = -ENODEV;
1041
1042         mutex_lock(&ubd_lock);
1043
1044         ubd_dev = &ubd_devs[n];
1045
1046         if(ubd_dev->file == NULL)
1047                 goto out;
1048
1049         /* you cannot remove a open disk */
1050         err = -EBUSY;
1051         if(ubd_dev->count > 0)
1052                 goto out;
1053
1054         ubd_gendisk[n] = NULL;
1055         if(disk != NULL){
1056                 del_gendisk(disk);
1057                 put_disk(disk);
1058         }
1059
1060         if(fake_gendisk[n] != NULL){
1061                 del_gendisk(fake_gendisk[n]);
1062                 put_disk(fake_gendisk[n]);
1063                 fake_gendisk[n] = NULL;
1064         }
1065
1066         err = 0;
1067         platform_device_unregister(&ubd_dev->pdev);
1068 out:
1069         mutex_unlock(&ubd_lock);
1070         return err;
1071 }
1072
1073 /* All these are called by mconsole in process context and without
1074  * ubd-specific locks.  The structure itself is const except for .list.
1075  */
1076 static struct mc_device ubd_mc = {
1077         .list           = LIST_HEAD_INIT(ubd_mc.list),
1078         .name           = "ubd",
1079         .config         = ubd_config,
1080         .get_config     = ubd_get_config,
1081         .id             = ubd_id,
1082         .remove         = ubd_remove,
1083 };
1084
1085 static int __init ubd_mc_init(void)
1086 {
1087         mconsole_register_dev(&ubd_mc);
1088         return 0;
1089 }
1090
1091 __initcall(ubd_mc_init);
1092
1093 static int __init ubd0_init(void)
1094 {
1095         struct ubd *ubd_dev = &ubd_devs[0];
1096
1097         mutex_lock(&ubd_lock);
1098         if(ubd_dev->file == NULL)
1099                 ubd_dev->file = "root_fs";
1100         mutex_unlock(&ubd_lock);
1101
1102         return 0;
1103 }
1104
1105 __initcall(ubd0_init);
1106
1107 /* Used in ubd_init, which is an initcall */
1108 static struct platform_driver ubd_driver = {
1109         .driver = {
1110                 .name  = DRIVER_NAME,
1111         },
1112 };
1113
1114 static int __init ubd_init(void)
1115 {
1116         char *error;
1117         int i, err;
1118
1119         if (register_blkdev(UBD_MAJOR, "ubd"))
1120                 return -1;
1121
1122         if (fake_major != UBD_MAJOR) {
1123                 char name[sizeof("ubd_nnn\0")];
1124
1125                 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1126                 if (register_blkdev(fake_major, "ubd"))
1127                         return -1;
1128         }
1129
1130         irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1131                                        sizeof(struct io_thread_req *),
1132                                        GFP_KERNEL
1133                 );
1134         irq_remainder = 0;
1135
1136         if (irq_req_buffer == NULL) {
1137                 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1138                 return -1;
1139         }
1140         io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1141                                       sizeof(struct io_thread_req *),
1142                                       GFP_KERNEL
1143                 );
1144
1145         io_remainder = 0;
1146
1147         if (io_req_buffer == NULL) {
1148                 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1149                 return -1;
1150         }
1151         platform_driver_register(&ubd_driver);
1152         mutex_lock(&ubd_lock);
1153         for (i = 0; i < MAX_DEV; i++){
1154                 err = ubd_add(i, &error);
1155                 if(err)
1156                         printk(KERN_ERR "Failed to initialize ubd device %d :"
1157                                "%s\n", i, error);
1158         }
1159         mutex_unlock(&ubd_lock);
1160         return 0;
1161 }
1162
1163 late_initcall(ubd_init);
1164
1165 static int __init ubd_driver_init(void){
1166         unsigned long stack;
1167         int err;
1168
1169         /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1170         if(global_openflags.s){
1171                 printk(KERN_INFO "ubd: Synchronous mode\n");
1172                 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1173                  * enough. So use anyway the io thread. */
1174         }
1175         stack = alloc_stack(0, 0);
1176         io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1177                                  &thread_fd);
1178         if(io_pid < 0){
1179                 printk(KERN_ERR
1180                        "ubd : Failed to start I/O thread (errno = %d) - "
1181                        "falling back to synchronous I/O\n", -io_pid);
1182                 io_pid = -1;
1183                 return 0;
1184         }
1185         err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1186                              0, "ubd", ubd_devs);
1187         if(err != 0)
1188                 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1189         return 0;
1190 }
1191
1192 device_initcall(ubd_driver_init);
1193
1194 static int ubd_open(struct block_device *bdev, fmode_t mode)
1195 {
1196         struct gendisk *disk = bdev->bd_disk;
1197         struct ubd *ubd_dev = disk->private_data;
1198         int err = 0;
1199
1200         mutex_lock(&ubd_mutex);
1201         if(ubd_dev->count == 0){
1202                 err = ubd_open_dev(ubd_dev);
1203                 if(err){
1204                         printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1205                                disk->disk_name, ubd_dev->file, -err);
1206                         goto out;
1207                 }
1208         }
1209         ubd_dev->count++;
1210         set_disk_ro(disk, !ubd_dev->openflags.w);
1211
1212         /* This should no more be needed. And it didn't work anyway to exclude
1213          * read-write remounting of filesystems.*/
1214         /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1215                 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1216                 err = -EROFS;
1217         }*/
1218 out:
1219         mutex_unlock(&ubd_mutex);
1220         return err;
1221 }
1222
1223 static void ubd_release(struct gendisk *disk, fmode_t mode)
1224 {
1225         struct ubd *ubd_dev = disk->private_data;
1226
1227         mutex_lock(&ubd_mutex);
1228         if(--ubd_dev->count == 0)
1229                 ubd_close_dev(ubd_dev);
1230         mutex_unlock(&ubd_mutex);
1231 }
1232
1233 static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1234                           __u64 *cow_offset, unsigned long *bitmap,
1235                           __u64 bitmap_offset, unsigned long *bitmap_words,
1236                           __u64 bitmap_len)
1237 {
1238         __u64 sector = io_offset >> 9;
1239         int i, update_bitmap = 0;
1240
1241         for(i = 0; i < length >> 9; i++){
1242                 if(cow_mask != NULL)
1243                         ubd_set_bit(i, (unsigned char *) cow_mask);
1244                 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1245                         continue;
1246
1247                 update_bitmap = 1;
1248                 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1249         }
1250
1251         if(!update_bitmap)
1252                 return;
1253
1254         *cow_offset = sector / (sizeof(unsigned long) * 8);
1255
1256         /* This takes care of the case where we're exactly at the end of the
1257          * device, and *cow_offset + 1 is off the end.  So, just back it up
1258          * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1259          * for the original diagnosis.
1260          */
1261         if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1262                                          sizeof(unsigned long)) - 1))
1263                 (*cow_offset)--;
1264
1265         bitmap_words[0] = bitmap[*cow_offset];
1266         bitmap_words[1] = bitmap[*cow_offset + 1];
1267
1268         *cow_offset *= sizeof(unsigned long);
1269         *cow_offset += bitmap_offset;
1270 }
1271
1272 static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1273                        __u64 bitmap_offset, __u64 bitmap_len)
1274 {
1275         __u64 sector = req->offset >> 9;
1276         int i;
1277
1278         if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1279                 panic("Operation too long");
1280
1281         if(req->op == UBD_READ) {
1282                 for(i = 0; i < req->length >> 9; i++){
1283                         if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1284                                 ubd_set_bit(i, (unsigned char *)
1285                                             &req->sector_mask);
1286                 }
1287         }
1288         else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1289                            &req->cow_offset, bitmap, bitmap_offset,
1290                            req->bitmap_words, bitmap_len);
1291 }
1292
1293 /* Called with dev->lock held */
1294 static void prepare_request(struct request *req, struct io_thread_req *io_req,
1295                             unsigned long long offset, int page_offset,
1296                             int len, struct page *page)
1297 {
1298         struct gendisk *disk = req->rq_disk;
1299         struct ubd *ubd_dev = disk->private_data;
1300
1301         io_req->req = req;
1302         io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1303                 ubd_dev->fd;
1304         io_req->fds[1] = ubd_dev->fd;
1305         io_req->cow_offset = -1;
1306         io_req->offset = offset;
1307         io_req->length = len;
1308         io_req->error = 0;
1309         io_req->sector_mask = 0;
1310
1311         io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1312         io_req->offsets[0] = 0;
1313         io_req->offsets[1] = ubd_dev->cow.data_offset;
1314         io_req->buffer = page_address(page) + page_offset;
1315         io_req->sectorsize = 1 << 9;
1316
1317         if(ubd_dev->cow.file != NULL)
1318                 cowify_req(io_req, ubd_dev->cow.bitmap,
1319                            ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1320
1321 }
1322
1323 /* Called with dev->lock held */
1324 static void prepare_flush_request(struct request *req,
1325                                   struct io_thread_req *io_req)
1326 {
1327         struct gendisk *disk = req->rq_disk;
1328         struct ubd *ubd_dev = disk->private_data;
1329
1330         io_req->req = req;
1331         io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1332                 ubd_dev->fd;
1333         io_req->op = UBD_FLUSH;
1334 }
1335
1336 static bool submit_request(struct io_thread_req *io_req, struct ubd *dev)
1337 {
1338         int n = os_write_file(thread_fd, &io_req,
1339                              sizeof(io_req));
1340         if (n != sizeof(io_req)) {
1341                 if (n != -EAGAIN)
1342                         printk("write to io thread failed, "
1343                                "errno = %d\n", -n);
1344                 else if (list_empty(&dev->restart))
1345                         list_add(&dev->restart, &restart);
1346
1347                 kfree(io_req);
1348                 return false;
1349         }
1350         return true;
1351 }
1352
1353 /* Called with dev->lock held */
1354 static void do_ubd_request(struct request_queue *q)
1355 {
1356         struct io_thread_req *io_req;
1357         struct request *req;
1358
1359         while(1){
1360                 struct ubd *dev = q->queuedata;
1361                 if(dev->request == NULL){
1362                         struct request *req = blk_fetch_request(q);
1363                         if(req == NULL)
1364                                 return;
1365
1366                         dev->request = req;
1367                         dev->rq_pos = blk_rq_pos(req);
1368                         dev->start_sg = 0;
1369                         dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1370                 }
1371
1372                 req = dev->request;
1373
1374                 if (req_op(req) == REQ_OP_FLUSH) {
1375                         io_req = kmalloc(sizeof(struct io_thread_req),
1376                                          GFP_ATOMIC);
1377                         if (io_req == NULL) {
1378                                 if (list_empty(&dev->restart))
1379                                         list_add(&dev->restart, &restart);
1380                                 return;
1381                         }
1382                         prepare_flush_request(req, io_req);
1383                         if (submit_request(io_req, dev) == false)
1384                                 return;
1385                 }
1386
1387                 while(dev->start_sg < dev->end_sg){
1388                         struct scatterlist *sg = &dev->sg[dev->start_sg];
1389
1390                         io_req = kmalloc(sizeof(struct io_thread_req),
1391                                          GFP_ATOMIC);
1392                         if(io_req == NULL){
1393                                 if(list_empty(&dev->restart))
1394                                         list_add(&dev->restart, &restart);
1395                                 return;
1396                         }
1397                         prepare_request(req, io_req,
1398                                         (unsigned long long)dev->rq_pos << 9,
1399                                         sg->offset, sg->length, sg_page(sg));
1400
1401                         if (submit_request(io_req, dev) == false)
1402                                 return;
1403
1404                         dev->rq_pos += sg->length >> 9;
1405                         dev->start_sg++;
1406                 }
1407                 dev->end_sg = 0;
1408                 dev->request = NULL;
1409         }
1410 }
1411
1412 static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1413 {
1414         struct ubd *ubd_dev = bdev->bd_disk->private_data;
1415
1416         geo->heads = 128;
1417         geo->sectors = 32;
1418         geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1419         return 0;
1420 }
1421
1422 static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1423                      unsigned int cmd, unsigned long arg)
1424 {
1425         struct ubd *ubd_dev = bdev->bd_disk->private_data;
1426         u16 ubd_id[ATA_ID_WORDS];
1427
1428         switch (cmd) {
1429                 struct cdrom_volctrl volume;
1430         case HDIO_GET_IDENTITY:
1431                 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1432                 ubd_id[ATA_ID_CYLS]     = ubd_dev->size / (128 * 32 * 512);
1433                 ubd_id[ATA_ID_HEADS]    = 128;
1434                 ubd_id[ATA_ID_SECTORS]  = 32;
1435                 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1436                                  sizeof(ubd_id)))
1437                         return -EFAULT;
1438                 return 0;
1439
1440         case CDROMVOLREAD:
1441                 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1442                         return -EFAULT;
1443                 volume.channel0 = 255;
1444                 volume.channel1 = 255;
1445                 volume.channel2 = 255;
1446                 volume.channel3 = 255;
1447                 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1448                         return -EFAULT;
1449                 return 0;
1450         }
1451         return -EINVAL;
1452 }
1453
1454 static int update_bitmap(struct io_thread_req *req)
1455 {
1456         int n;
1457
1458         if(req->cow_offset == -1)
1459                 return 0;
1460
1461         n = os_pwrite_file(req->fds[1], &req->bitmap_words,
1462                           sizeof(req->bitmap_words), req->cow_offset);
1463         if(n != sizeof(req->bitmap_words)){
1464                 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1465                        req->fds[1]);
1466                 return 1;
1467         }
1468
1469         return 0;
1470 }
1471
1472 static void do_io(struct io_thread_req *req)
1473 {
1474         char *buf;
1475         unsigned long len;
1476         int n, nsectors, start, end, bit;
1477         __u64 off;
1478
1479         if (req->op == UBD_FLUSH) {
1480                 /* fds[0] is always either the rw image or our cow file */
1481                 n = os_sync_file(req->fds[0]);
1482                 if (n != 0) {
1483                         printk("do_io - sync failed err = %d "
1484                                "fd = %d\n", -n, req->fds[0]);
1485                         req->error = 1;
1486                 }
1487                 return;
1488         }
1489
1490         nsectors = req->length / req->sectorsize;
1491         start = 0;
1492         do {
1493                 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1494                 end = start;
1495                 while((end < nsectors) &&
1496                       (ubd_test_bit(end, (unsigned char *)
1497                                     &req->sector_mask) == bit))
1498                         end++;
1499
1500                 off = req->offset + req->offsets[bit] +
1501                         start * req->sectorsize;
1502                 len = (end - start) * req->sectorsize;
1503                 buf = &req->buffer[start * req->sectorsize];
1504
1505                 if(req->op == UBD_READ){
1506                         n = 0;
1507                         do {
1508                                 buf = &buf[n];
1509                                 len -= n;
1510                                 n = os_pread_file(req->fds[bit], buf, len, off);
1511                                 if (n < 0) {
1512                                         printk("do_io - read failed, err = %d "
1513                                                "fd = %d\n", -n, req->fds[bit]);
1514                                         req->error = 1;
1515                                         return;
1516                                 }
1517                         } while((n < len) && (n != 0));
1518                         if (n < len) memset(&buf[n], 0, len - n);
1519                 } else {
1520                         n = os_pwrite_file(req->fds[bit], buf, len, off);
1521                         if(n != len){
1522                                 printk("do_io - write failed err = %d "
1523                                        "fd = %d\n", -n, req->fds[bit]);
1524                                 req->error = 1;
1525                                 return;
1526                         }
1527                 }
1528
1529                 start = end;
1530         } while(start < nsectors);
1531
1532         req->error = update_bitmap(req);
1533 }
1534
1535 /* Changed in start_io_thread, which is serialized by being called only
1536  * from ubd_init, which is an initcall.
1537  */
1538 int kernel_fd = -1;
1539
1540 /* Only changed by the io thread. XXX: currently unused. */
1541 static int io_count = 0;
1542
1543 int io_thread(void *arg)
1544 {
1545         int n, count, written, res;
1546
1547         os_fix_helper_signals();
1548
1549         while(1){
1550                 n = bulk_req_safe_read(
1551                         kernel_fd,
1552                         io_req_buffer,
1553                         &io_remainder,
1554                         &io_remainder_size,
1555                         UBD_REQ_BUFFER_SIZE
1556                 );
1557                 if (n < 0) {
1558                         if (n == -EAGAIN) {
1559                                 ubd_read_poll(-1);
1560                                 continue;
1561                         } else {
1562                                 printk("io_thread - read failed, fd = %d, "
1563                                        "err = %d,"
1564                                        "reminder = %d\n",
1565                                        kernel_fd, -n, io_remainder_size);
1566                         }
1567                 }
1568
1569                 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1570                         io_count++;
1571                         do_io((*io_req_buffer)[count]);
1572                 }
1573
1574                 written = 0;
1575
1576                 do {
1577                         res = os_write_file(kernel_fd,
1578                                             ((char *) io_req_buffer) + written,
1579                                             n - written);
1580                         if (res >= 0) {
1581                                 written += res;
1582                         } else {
1583                                 if (res != -EAGAIN) {
1584                                         printk("io_thread - write failed, fd = %d, "
1585                                                "err = %d\n", kernel_fd, -n);
1586                                 }
1587                         }
1588                         if (written < n) {
1589                                 ubd_write_poll(-1);
1590                         }
1591                 } while (written < n);
1592         }
1593
1594         return 0;
1595 }