GNU Linux-libre 4.9.337-gnu1
[releases.git] / fs / ext4 / namei.c
1 /*
2  *  linux/fs/ext4/namei.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  from
10  *
11  *  linux/fs/minix/namei.c
12  *
13  *  Copyright (C) 1991, 1992  Linus Torvalds
14  *
15  *  Big-endian to little-endian byte-swapping/bitmaps by
16  *        David S. Miller (davem@caip.rutgers.edu), 1995
17  *  Directory entry file type support and forward compatibility hooks
18  *      for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
19  *  Hash Tree Directory indexing (c)
20  *      Daniel Phillips, 2001
21  *  Hash Tree Directory indexing porting
22  *      Christopher Li, 2002
23  *  Hash Tree Directory indexing cleanup
24  *      Theodore Ts'o, 2002
25  */
26
27 #include <linux/fs.h>
28 #include <linux/pagemap.h>
29 #include <linux/time.h>
30 #include <linux/fcntl.h>
31 #include <linux/stat.h>
32 #include <linux/string.h>
33 #include <linux/quotaops.h>
34 #include <linux/buffer_head.h>
35 #include <linux/bio.h>
36 #include "ext4.h"
37 #include "ext4_jbd2.h"
38
39 #include "xattr.h"
40 #include "acl.h"
41
42 #include <trace/events/ext4.h>
43 /*
44  * define how far ahead to read directories while searching them.
45  */
46 #define NAMEI_RA_CHUNKS  2
47 #define NAMEI_RA_BLOCKS  4
48 #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
49
50 static struct buffer_head *ext4_append(handle_t *handle,
51                                         struct inode *inode,
52                                         ext4_lblk_t *block)
53 {
54         struct ext4_map_blocks map;
55         struct buffer_head *bh;
56         int err;
57
58         if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
59                      ((inode->i_size >> 10) >=
60                       EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
61                 return ERR_PTR(-ENOSPC);
62
63         *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
64         map.m_lblk = *block;
65         map.m_len = 1;
66
67         /*
68          * We're appending new directory block. Make sure the block is not
69          * allocated yet, otherwise we will end up corrupting the
70          * directory.
71          */
72         err = ext4_map_blocks(NULL, inode, &map, 0);
73         if (err < 0)
74                 return ERR_PTR(err);
75         if (err) {
76                 EXT4_ERROR_INODE(inode, "Logical block already allocated");
77                 return ERR_PTR(-EFSCORRUPTED);
78         }
79
80         bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
81         if (IS_ERR(bh))
82                 return bh;
83         inode->i_size += inode->i_sb->s_blocksize;
84         EXT4_I(inode)->i_disksize = inode->i_size;
85         BUFFER_TRACE(bh, "get_write_access");
86         err = ext4_journal_get_write_access(handle, bh);
87         if (err) {
88                 brelse(bh);
89                 ext4_std_error(inode->i_sb, err);
90                 return ERR_PTR(err);
91         }
92         return bh;
93 }
94
95 static int ext4_dx_csum_verify(struct inode *inode,
96                                struct ext4_dir_entry *dirent);
97
98 /*
99  * Hints to ext4_read_dirblock regarding whether we expect a directory
100  * block being read to be an index block, or a block containing
101  * directory entries (and if the latter, whether it was found via a
102  * logical block in an htree index block).  This is used to control
103  * what sort of sanity checkinig ext4_read_dirblock() will do on the
104  * directory block read from the storage device.  EITHER will means
105  * the caller doesn't know what kind of directory block will be read,
106  * so no specific verification will be done.
107  */
108 typedef enum {
109         EITHER, INDEX, DIRENT, DIRENT_HTREE
110 } dirblock_type_t;
111
112 #define ext4_read_dirblock(inode, block, type) \
113         __ext4_read_dirblock((inode), (block), (type), __func__, __LINE__)
114
115 static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
116                                                 ext4_lblk_t block,
117                                                 dirblock_type_t type,
118                                                 const char *func,
119                                                 unsigned int line)
120 {
121         struct buffer_head *bh;
122         struct ext4_dir_entry *dirent;
123         int is_dx_block = 0;
124
125         bh = ext4_bread(NULL, inode, block, 0);
126         if (IS_ERR(bh)) {
127                 __ext4_warning(inode->i_sb, func, line,
128                                "inode #%lu: lblock %lu: comm %s: "
129                                "error %ld reading directory block",
130                                inode->i_ino, (unsigned long)block,
131                                current->comm, PTR_ERR(bh));
132
133                 return bh;
134         }
135         if (!bh && (type == INDEX || type == DIRENT_HTREE)) {
136                 ext4_error_inode(inode, func, line, block,
137                                  "Directory hole found for htree %s block",
138                                  (type == INDEX) ? "index" : "leaf");
139                 return ERR_PTR(-EFSCORRUPTED);
140         }
141         if (!bh)
142                 return NULL;
143         dirent = (struct ext4_dir_entry *) bh->b_data;
144         /* Determine whether or not we have an index block */
145         if (is_dx(inode)) {
146                 if (block == 0)
147                         is_dx_block = 1;
148                 else if (ext4_rec_len_from_disk(dirent->rec_len,
149                                                 inode->i_sb->s_blocksize) ==
150                          inode->i_sb->s_blocksize)
151                         is_dx_block = 1;
152         }
153         if (!is_dx_block && type == INDEX) {
154                 ext4_error_inode(inode, func, line, block,
155                        "directory leaf block found instead of index block");
156                 brelse(bh);
157                 return ERR_PTR(-EFSCORRUPTED);
158         }
159         if (!ext4_has_metadata_csum(inode->i_sb) ||
160             buffer_verified(bh))
161                 return bh;
162
163         /*
164          * An empty leaf block can get mistaken for a index block; for
165          * this reason, we can only check the index checksum when the
166          * caller is sure it should be an index block.
167          */
168         if (is_dx_block && type == INDEX) {
169                 if (ext4_dx_csum_verify(inode, dirent))
170                         set_buffer_verified(bh);
171                 else {
172                         ext4_error_inode(inode, func, line, block,
173                                          "Directory index failed checksum");
174                         brelse(bh);
175                         return ERR_PTR(-EFSBADCRC);
176                 }
177         }
178         if (!is_dx_block) {
179                 if (ext4_dirent_csum_verify(inode, dirent))
180                         set_buffer_verified(bh);
181                 else {
182                         ext4_error_inode(inode, func, line, block,
183                                          "Directory block failed checksum");
184                         brelse(bh);
185                         return ERR_PTR(-EFSBADCRC);
186                 }
187         }
188         return bh;
189 }
190
191 #ifndef assert
192 #define assert(test) J_ASSERT(test)
193 #endif
194
195 #ifdef DX_DEBUG
196 #define dxtrace(command) command
197 #else
198 #define dxtrace(command)
199 #endif
200
201 struct fake_dirent
202 {
203         __le32 inode;
204         __le16 rec_len;
205         u8 name_len;
206         u8 file_type;
207 };
208
209 struct dx_countlimit
210 {
211         __le16 limit;
212         __le16 count;
213 };
214
215 struct dx_entry
216 {
217         __le32 hash;
218         __le32 block;
219 };
220
221 /*
222  * dx_root_info is laid out so that if it should somehow get overlaid by a
223  * dirent the two low bits of the hash version will be zero.  Therefore, the
224  * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
225  */
226
227 struct dx_root
228 {
229         struct fake_dirent dot;
230         char dot_name[4];
231         struct fake_dirent dotdot;
232         char dotdot_name[4];
233         struct dx_root_info
234         {
235                 __le32 reserved_zero;
236                 u8 hash_version;
237                 u8 info_length; /* 8 */
238                 u8 indirect_levels;
239                 u8 unused_flags;
240         }
241         info;
242         struct dx_entry entries[0];
243 };
244
245 struct dx_node
246 {
247         struct fake_dirent fake;
248         struct dx_entry entries[0];
249 };
250
251
252 struct dx_frame
253 {
254         struct buffer_head *bh;
255         struct dx_entry *entries;
256         struct dx_entry *at;
257 };
258
259 struct dx_map_entry
260 {
261         u32 hash;
262         u16 offs;
263         u16 size;
264 };
265
266 /*
267  * This goes at the end of each htree block.
268  */
269 struct dx_tail {
270         u32 dt_reserved;
271         __le32 dt_checksum;     /* crc32c(uuid+inum+dirblock) */
272 };
273
274 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
275 static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
276 static inline unsigned dx_get_hash(struct dx_entry *entry);
277 static void dx_set_hash(struct dx_entry *entry, unsigned value);
278 static unsigned dx_get_count(struct dx_entry *entries);
279 static unsigned dx_get_limit(struct dx_entry *entries);
280 static void dx_set_count(struct dx_entry *entries, unsigned value);
281 static void dx_set_limit(struct dx_entry *entries, unsigned value);
282 static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
283 static unsigned dx_node_limit(struct inode *dir);
284 static struct dx_frame *dx_probe(struct ext4_filename *fname,
285                                  struct inode *dir,
286                                  struct dx_hash_info *hinfo,
287                                  struct dx_frame *frame);
288 static void dx_release(struct dx_frame *frames);
289 static int dx_make_map(struct inode *dir, struct buffer_head *bh,
290                        struct dx_hash_info *hinfo,
291                        struct dx_map_entry *map_tail);
292 static void dx_sort_map(struct dx_map_entry *map, unsigned count);
293 static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
294                 struct dx_map_entry *offsets, int count, unsigned blocksize);
295 static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
296 static void dx_insert_block(struct dx_frame *frame,
297                                         u32 hash, ext4_lblk_t block);
298 static int ext4_htree_next_block(struct inode *dir, __u32 hash,
299                                  struct dx_frame *frame,
300                                  struct dx_frame *frames,
301                                  __u32 *start_hash);
302 static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
303                 struct ext4_filename *fname,
304                 struct ext4_dir_entry_2 **res_dir);
305 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
306                              struct inode *dir, struct inode *inode);
307
308 /* checksumming functions */
309 void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
310                             unsigned int blocksize)
311 {
312         memset(t, 0, sizeof(struct ext4_dir_entry_tail));
313         t->det_rec_len = ext4_rec_len_to_disk(
314                         sizeof(struct ext4_dir_entry_tail), blocksize);
315         t->det_reserved_ft = EXT4_FT_DIR_CSUM;
316 }
317
318 /* Walk through a dirent block to find a checksum "dirent" at the tail */
319 static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
320                                                    struct ext4_dir_entry *de)
321 {
322         struct ext4_dir_entry_tail *t;
323
324 #ifdef PARANOID
325         struct ext4_dir_entry *d, *top;
326
327         d = de;
328         top = (struct ext4_dir_entry *)(((void *)de) +
329                 (EXT4_BLOCK_SIZE(inode->i_sb) -
330                 sizeof(struct ext4_dir_entry_tail)));
331         while (d < top && d->rec_len)
332                 d = (struct ext4_dir_entry *)(((void *)d) +
333                     le16_to_cpu(d->rec_len));
334
335         if (d != top)
336                 return NULL;
337
338         t = (struct ext4_dir_entry_tail *)d;
339 #else
340         t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
341 #endif
342
343         if (t->det_reserved_zero1 ||
344             le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
345             t->det_reserved_zero2 ||
346             t->det_reserved_ft != EXT4_FT_DIR_CSUM)
347                 return NULL;
348
349         return t;
350 }
351
352 static __le32 ext4_dirent_csum(struct inode *inode,
353                                struct ext4_dir_entry *dirent, int size)
354 {
355         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
356         struct ext4_inode_info *ei = EXT4_I(inode);
357         __u32 csum;
358
359         csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
360         return cpu_to_le32(csum);
361 }
362
363 #define warn_no_space_for_csum(inode)                                   \
364         __warn_no_space_for_csum((inode), __func__, __LINE__)
365
366 static void __warn_no_space_for_csum(struct inode *inode, const char *func,
367                                      unsigned int line)
368 {
369         __ext4_warning_inode(inode, func, line,
370                 "No space for directory leaf checksum. Please run e2fsck -D.");
371 }
372
373 int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
374 {
375         struct ext4_dir_entry_tail *t;
376
377         if (!ext4_has_metadata_csum(inode->i_sb))
378                 return 1;
379
380         t = get_dirent_tail(inode, dirent);
381         if (!t) {
382                 warn_no_space_for_csum(inode);
383                 return 0;
384         }
385
386         if (t->det_checksum != ext4_dirent_csum(inode, dirent,
387                                                 (void *)t - (void *)dirent))
388                 return 0;
389
390         return 1;
391 }
392
393 static void ext4_dirent_csum_set(struct inode *inode,
394                                  struct ext4_dir_entry *dirent)
395 {
396         struct ext4_dir_entry_tail *t;
397
398         if (!ext4_has_metadata_csum(inode->i_sb))
399                 return;
400
401         t = get_dirent_tail(inode, dirent);
402         if (!t) {
403                 warn_no_space_for_csum(inode);
404                 return;
405         }
406
407         t->det_checksum = ext4_dirent_csum(inode, dirent,
408                                            (void *)t - (void *)dirent);
409 }
410
411 int ext4_handle_dirty_dirent_node(handle_t *handle,
412                                   struct inode *inode,
413                                   struct buffer_head *bh)
414 {
415         ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
416         return ext4_handle_dirty_metadata(handle, inode, bh);
417 }
418
419 static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
420                                                struct ext4_dir_entry *dirent,
421                                                int *offset)
422 {
423         struct ext4_dir_entry *dp;
424         struct dx_root_info *root;
425         int count_offset;
426
427         if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
428                 count_offset = 8;
429         else if (le16_to_cpu(dirent->rec_len) == 12) {
430                 dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
431                 if (le16_to_cpu(dp->rec_len) !=
432                     EXT4_BLOCK_SIZE(inode->i_sb) - 12)
433                         return NULL;
434                 root = (struct dx_root_info *)(((void *)dp + 12));
435                 if (root->reserved_zero ||
436                     root->info_length != sizeof(struct dx_root_info))
437                         return NULL;
438                 count_offset = 32;
439         } else
440                 return NULL;
441
442         if (offset)
443                 *offset = count_offset;
444         return (struct dx_countlimit *)(((void *)dirent) + count_offset);
445 }
446
447 static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
448                            int count_offset, int count, struct dx_tail *t)
449 {
450         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
451         struct ext4_inode_info *ei = EXT4_I(inode);
452         __u32 csum;
453         int size;
454         __u32 dummy_csum = 0;
455         int offset = offsetof(struct dx_tail, dt_checksum);
456
457         size = count_offset + (count * sizeof(struct dx_entry));
458         csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
459         csum = ext4_chksum(sbi, csum, (__u8 *)t, offset);
460         csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
461
462         return cpu_to_le32(csum);
463 }
464
465 static int ext4_dx_csum_verify(struct inode *inode,
466                                struct ext4_dir_entry *dirent)
467 {
468         struct dx_countlimit *c;
469         struct dx_tail *t;
470         int count_offset, limit, count;
471
472         if (!ext4_has_metadata_csum(inode->i_sb))
473                 return 1;
474
475         c = get_dx_countlimit(inode, dirent, &count_offset);
476         if (!c) {
477                 EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
478                 return 0;
479         }
480         limit = le16_to_cpu(c->limit);
481         count = le16_to_cpu(c->count);
482         if (count_offset + (limit * sizeof(struct dx_entry)) >
483             EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
484                 warn_no_space_for_csum(inode);
485                 return 0;
486         }
487         t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
488
489         if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
490                                             count, t))
491                 return 0;
492         return 1;
493 }
494
495 static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
496 {
497         struct dx_countlimit *c;
498         struct dx_tail *t;
499         int count_offset, limit, count;
500
501         if (!ext4_has_metadata_csum(inode->i_sb))
502                 return;
503
504         c = get_dx_countlimit(inode, dirent, &count_offset);
505         if (!c) {
506                 EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
507                 return;
508         }
509         limit = le16_to_cpu(c->limit);
510         count = le16_to_cpu(c->count);
511         if (count_offset + (limit * sizeof(struct dx_entry)) >
512             EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
513                 warn_no_space_for_csum(inode);
514                 return;
515         }
516         t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
517
518         t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
519 }
520
521 static inline int ext4_handle_dirty_dx_node(handle_t *handle,
522                                             struct inode *inode,
523                                             struct buffer_head *bh)
524 {
525         ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
526         return ext4_handle_dirty_metadata(handle, inode, bh);
527 }
528
529 /*
530  * p is at least 6 bytes before the end of page
531  */
532 static inline struct ext4_dir_entry_2 *
533 ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
534 {
535         return (struct ext4_dir_entry_2 *)((char *)p +
536                 ext4_rec_len_from_disk(p->rec_len, blocksize));
537 }
538
539 /*
540  * Future: use high four bits of block for coalesce-on-delete flags
541  * Mask them off for now.
542  */
543
544 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
545 {
546         return le32_to_cpu(entry->block) & 0x00ffffff;
547 }
548
549 static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
550 {
551         entry->block = cpu_to_le32(value);
552 }
553
554 static inline unsigned dx_get_hash(struct dx_entry *entry)
555 {
556         return le32_to_cpu(entry->hash);
557 }
558
559 static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
560 {
561         entry->hash = cpu_to_le32(value);
562 }
563
564 static inline unsigned dx_get_count(struct dx_entry *entries)
565 {
566         return le16_to_cpu(((struct dx_countlimit *) entries)->count);
567 }
568
569 static inline unsigned dx_get_limit(struct dx_entry *entries)
570 {
571         return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
572 }
573
574 static inline void dx_set_count(struct dx_entry *entries, unsigned value)
575 {
576         ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
577 }
578
579 static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
580 {
581         ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
582 }
583
584 static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
585 {
586         unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
587                 EXT4_DIR_REC_LEN(2) - infosize;
588
589         if (ext4_has_metadata_csum(dir->i_sb))
590                 entry_space -= sizeof(struct dx_tail);
591         return entry_space / sizeof(struct dx_entry);
592 }
593
594 static inline unsigned dx_node_limit(struct inode *dir)
595 {
596         unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
597
598         if (ext4_has_metadata_csum(dir->i_sb))
599                 entry_space -= sizeof(struct dx_tail);
600         return entry_space / sizeof(struct dx_entry);
601 }
602
603 /*
604  * Debug
605  */
606 #ifdef DX_DEBUG
607 static void dx_show_index(char * label, struct dx_entry *entries)
608 {
609         int i, n = dx_get_count (entries);
610         printk(KERN_DEBUG "%s index", label);
611         for (i = 0; i < n; i++) {
612                 printk(KERN_CONT " %x->%lu",
613                        i ? dx_get_hash(entries + i) : 0,
614                        (unsigned long)dx_get_block(entries + i));
615         }
616         printk(KERN_CONT "\n");
617 }
618
619 struct stats
620 {
621         unsigned names;
622         unsigned space;
623         unsigned bcount;
624 };
625
626 static struct stats dx_show_leaf(struct inode *dir,
627                                 struct dx_hash_info *hinfo,
628                                 struct ext4_dir_entry_2 *de,
629                                 int size, int show_names)
630 {
631         unsigned names = 0, space = 0;
632         char *base = (char *) de;
633         struct dx_hash_info h = *hinfo;
634
635         printk("names: ");
636         while ((char *) de < base + size)
637         {
638                 if (de->inode)
639                 {
640                         if (show_names)
641                         {
642 #ifdef CONFIG_EXT4_FS_ENCRYPTION
643                                 int len;
644                                 char *name;
645                                 struct fscrypt_str fname_crypto_str =
646                                         FSTR_INIT(NULL, 0);
647                                 int res = 0;
648
649                                 name  = de->name;
650                                 len = de->name_len;
651                                 if (ext4_encrypted_inode(dir))
652                                         res = fscrypt_get_encryption_info(dir);
653                                 if (res) {
654                                         printk(KERN_WARNING "Error setting up"
655                                                " fname crypto: %d\n", res);
656                                 }
657                                 if (!fscrypt_has_encryption_key(dir)) {
658                                         /* Directory is not encrypted */
659                                         ext4fs_dirhash(de->name,
660                                                 de->name_len, &h);
661                                         printk("%*.s:(U)%x.%u ", len,
662                                                name, h.hash,
663                                                (unsigned) ((char *) de
664                                                            - base));
665                                 } else {
666                                         struct fscrypt_str de_name =
667                                                 FSTR_INIT(name, len);
668
669                                         /* Directory is encrypted */
670                                         res = fscrypt_fname_alloc_buffer(
671                                                 dir, len,
672                                                 &fname_crypto_str);
673                                         if (res)
674                                                 printk(KERN_WARNING "Error "
675                                                         "allocating crypto "
676                                                         "buffer--skipping "
677                                                         "crypto\n");
678                                         res = fscrypt_fname_disk_to_usr(dir,
679                                                 0, 0, &de_name,
680                                                 &fname_crypto_str);
681                                         if (res) {
682                                                 printk(KERN_WARNING "Error "
683                                                         "converting filename "
684                                                         "from disk to usr"
685                                                         "\n");
686                                                 name = "??";
687                                                 len = 2;
688                                         } else {
689                                                 name = fname_crypto_str.name;
690                                                 len = fname_crypto_str.len;
691                                         }
692                                         ext4fs_dirhash(de->name, de->name_len,
693                                                        &h);
694                                         printk("%*.s:(E)%x.%u ", len, name,
695                                                h.hash, (unsigned) ((char *) de
696                                                                    - base));
697                                         fscrypt_fname_free_buffer(
698                                                         &fname_crypto_str);
699                                 }
700 #else
701                                 int len = de->name_len;
702                                 char *name = de->name;
703                                 ext4fs_dirhash(de->name, de->name_len, &h);
704                                 printk("%*.s:%x.%u ", len, name, h.hash,
705                                        (unsigned) ((char *) de - base));
706 #endif
707                         }
708                         space += EXT4_DIR_REC_LEN(de->name_len);
709                         names++;
710                 }
711                 de = ext4_next_entry(de, size);
712         }
713         printk(KERN_CONT "(%i)\n", names);
714         return (struct stats) { names, space, 1 };
715 }
716
717 struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
718                              struct dx_entry *entries, int levels)
719 {
720         unsigned blocksize = dir->i_sb->s_blocksize;
721         unsigned count = dx_get_count(entries), names = 0, space = 0, i;
722         unsigned bcount = 0;
723         struct buffer_head *bh;
724         printk("%i indexed blocks...\n", count);
725         for (i = 0; i < count; i++, entries++)
726         {
727                 ext4_lblk_t block = dx_get_block(entries);
728                 ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
729                 u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
730                 struct stats stats;
731                 printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
732                 bh = ext4_bread(NULL,dir, block, 0);
733                 if (!bh || IS_ERR(bh))
734                         continue;
735                 stats = levels?
736                    dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
737                    dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *)
738                         bh->b_data, blocksize, 0);
739                 names += stats.names;
740                 space += stats.space;
741                 bcount += stats.bcount;
742                 brelse(bh);
743         }
744         if (bcount)
745                 printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
746                        levels ? "" : "   ", names, space/bcount,
747                        (space/bcount)*100/blocksize);
748         return (struct stats) { names, space, bcount};
749 }
750 #endif /* DX_DEBUG */
751
752 /*
753  * Probe for a directory leaf block to search.
754  *
755  * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
756  * error in the directory index, and the caller should fall back to
757  * searching the directory normally.  The callers of dx_probe **MUST**
758  * check for this error code, and make sure it never gets reflected
759  * back to userspace.
760  */
761 static struct dx_frame *
762 dx_probe(struct ext4_filename *fname, struct inode *dir,
763          struct dx_hash_info *hinfo, struct dx_frame *frame_in)
764 {
765         unsigned count, indirect;
766         struct dx_entry *at, *entries, *p, *q, *m;
767         struct dx_root *root;
768         struct dx_frame *frame = frame_in;
769         struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
770         u32 hash;
771
772         frame->bh = ext4_read_dirblock(dir, 0, INDEX);
773         if (IS_ERR(frame->bh))
774                 return (struct dx_frame *) frame->bh;
775
776         root = (struct dx_root *) frame->bh->b_data;
777         if (root->info.hash_version != DX_HASH_TEA &&
778             root->info.hash_version != DX_HASH_HALF_MD4 &&
779             root->info.hash_version != DX_HASH_LEGACY) {
780                 ext4_warning_inode(dir, "Unrecognised inode hash code %u",
781                                    root->info.hash_version);
782                 goto fail;
783         }
784         if (fname)
785                 hinfo = &fname->hinfo;
786         hinfo->hash_version = root->info.hash_version;
787         if (hinfo->hash_version <= DX_HASH_TEA)
788                 hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
789         hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
790         if (fname && fname_name(fname))
791                 ext4fs_dirhash(fname_name(fname), fname_len(fname), hinfo);
792         hash = hinfo->hash;
793
794         if (root->info.unused_flags & 1) {
795                 ext4_warning_inode(dir, "Unimplemented hash flags: %#06x",
796                                    root->info.unused_flags);
797                 goto fail;
798         }
799
800         indirect = root->info.indirect_levels;
801         if (indirect > 1) {
802                 ext4_warning_inode(dir, "Unimplemented hash depth: %#06x",
803                                    root->info.indirect_levels);
804                 goto fail;
805         }
806
807         entries = (struct dx_entry *)(((char *)&root->info) +
808                                       root->info.info_length);
809
810         if (dx_get_limit(entries) != dx_root_limit(dir,
811                                                    root->info.info_length)) {
812                 ext4_warning_inode(dir, "dx entry: limit %u != root limit %u",
813                                    dx_get_limit(entries),
814                                    dx_root_limit(dir, root->info.info_length));
815                 goto fail;
816         }
817
818         dxtrace(printk("Look up %x", hash));
819         while (1) {
820                 count = dx_get_count(entries);
821                 if (!count || count > dx_get_limit(entries)) {
822                         ext4_warning_inode(dir,
823                                            "dx entry: count %u beyond limit %u",
824                                            count, dx_get_limit(entries));
825                         goto fail;
826                 }
827
828                 p = entries + 1;
829                 q = entries + count - 1;
830                 while (p <= q) {
831                         m = p + (q - p) / 2;
832                         dxtrace(printk(KERN_CONT "."));
833                         if (dx_get_hash(m) > hash)
834                                 q = m - 1;
835                         else
836                                 p = m + 1;
837                 }
838
839                 if (0) { // linear search cross check
840                         unsigned n = count - 1;
841                         at = entries;
842                         while (n--)
843                         {
844                                 dxtrace(printk(KERN_CONT ","));
845                                 if (dx_get_hash(++at) > hash)
846                                 {
847                                         at--;
848                                         break;
849                                 }
850                         }
851                         assert (at == p - 1);
852                 }
853
854                 at = p - 1;
855                 dxtrace(printk(KERN_CONT " %x->%u\n",
856                                at == entries ? 0 : dx_get_hash(at),
857                                dx_get_block(at)));
858                 frame->entries = entries;
859                 frame->at = at;
860                 if (!indirect--)
861                         return frame;
862                 frame++;
863                 frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
864                 if (IS_ERR(frame->bh)) {
865                         ret_err = (struct dx_frame *) frame->bh;
866                         frame->bh = NULL;
867                         goto fail;
868                 }
869                 entries = ((struct dx_node *) frame->bh->b_data)->entries;
870
871                 if (dx_get_limit(entries) != dx_node_limit(dir)) {
872                         ext4_warning_inode(dir,
873                                 "dx entry: limit %u != node limit %u",
874                                 dx_get_limit(entries), dx_node_limit(dir));
875                         goto fail;
876                 }
877         }
878 fail:
879         while (frame >= frame_in) {
880                 brelse(frame->bh);
881                 frame--;
882         }
883
884         if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
885                 ext4_warning_inode(dir,
886                         "Corrupt directory, running e2fsck is recommended");
887         return ret_err;
888 }
889
890 static void dx_release(struct dx_frame *frames)
891 {
892         if (frames[0].bh == NULL)
893                 return;
894
895         if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels)
896                 brelse(frames[1].bh);
897         brelse(frames[0].bh);
898 }
899
900 /*
901  * This function increments the frame pointer to search the next leaf
902  * block, and reads in the necessary intervening nodes if the search
903  * should be necessary.  Whether or not the search is necessary is
904  * controlled by the hash parameter.  If the hash value is even, then
905  * the search is only continued if the next block starts with that
906  * hash value.  This is used if we are searching for a specific file.
907  *
908  * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
909  *
910  * This function returns 1 if the caller should continue to search,
911  * or 0 if it should not.  If there is an error reading one of the
912  * index blocks, it will a negative error code.
913  *
914  * If start_hash is non-null, it will be filled in with the starting
915  * hash of the next page.
916  */
917 static int ext4_htree_next_block(struct inode *dir, __u32 hash,
918                                  struct dx_frame *frame,
919                                  struct dx_frame *frames,
920                                  __u32 *start_hash)
921 {
922         struct dx_frame *p;
923         struct buffer_head *bh;
924         int num_frames = 0;
925         __u32 bhash;
926
927         p = frame;
928         /*
929          * Find the next leaf page by incrementing the frame pointer.
930          * If we run out of entries in the interior node, loop around and
931          * increment pointer in the parent node.  When we break out of
932          * this loop, num_frames indicates the number of interior
933          * nodes need to be read.
934          */
935         while (1) {
936                 if (++(p->at) < p->entries + dx_get_count(p->entries))
937                         break;
938                 if (p == frames)
939                         return 0;
940                 num_frames++;
941                 p--;
942         }
943
944         /*
945          * If the hash is 1, then continue only if the next page has a
946          * continuation hash of any value.  This is used for readdir
947          * handling.  Otherwise, check to see if the hash matches the
948          * desired contiuation hash.  If it doesn't, return since
949          * there's no point to read in the successive index pages.
950          */
951         bhash = dx_get_hash(p->at);
952         if (start_hash)
953                 *start_hash = bhash;
954         if ((hash & 1) == 0) {
955                 if ((bhash & ~1) != hash)
956                         return 0;
957         }
958         /*
959          * If the hash is HASH_NB_ALWAYS, we always go to the next
960          * block so no check is necessary
961          */
962         while (num_frames--) {
963                 bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
964                 if (IS_ERR(bh))
965                         return PTR_ERR(bh);
966                 p++;
967                 brelse(p->bh);
968                 p->bh = bh;
969                 p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
970         }
971         return 1;
972 }
973
974
975 /*
976  * This function fills a red-black tree with information from a
977  * directory block.  It returns the number directory entries loaded
978  * into the tree.  If there is an error it is returned in err.
979  */
980 static int htree_dirblock_to_tree(struct file *dir_file,
981                                   struct inode *dir, ext4_lblk_t block,
982                                   struct dx_hash_info *hinfo,
983                                   __u32 start_hash, __u32 start_minor_hash)
984 {
985         struct buffer_head *bh;
986         struct ext4_dir_entry_2 *de, *top;
987         int err = 0, count = 0;
988         struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0), tmp_str;
989
990         dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
991                                                         (unsigned long)block));
992         bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
993         if (IS_ERR(bh))
994                 return PTR_ERR(bh);
995
996         de = (struct ext4_dir_entry_2 *) bh->b_data;
997         top = (struct ext4_dir_entry_2 *) ((char *) de +
998                                            dir->i_sb->s_blocksize -
999                                            EXT4_DIR_REC_LEN(0));
1000 #ifdef CONFIG_EXT4_FS_ENCRYPTION
1001         /* Check if the directory is encrypted */
1002         if (ext4_encrypted_inode(dir)) {
1003                 err = fscrypt_get_encryption_info(dir);
1004                 if (err < 0) {
1005                         brelse(bh);
1006                         return err;
1007                 }
1008                 err = fscrypt_fname_alloc_buffer(dir, EXT4_NAME_LEN,
1009                                                      &fname_crypto_str);
1010                 if (err < 0) {
1011                         brelse(bh);
1012                         return err;
1013                 }
1014         }
1015 #endif
1016         for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
1017                 if (ext4_check_dir_entry(dir, NULL, de, bh,
1018                                 bh->b_data, bh->b_size,
1019                                 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
1020                                          + ((char *)de - bh->b_data))) {
1021                         /* silently ignore the rest of the block */
1022                         break;
1023                 }
1024                 ext4fs_dirhash(de->name, de->name_len, hinfo);
1025                 if ((hinfo->hash < start_hash) ||
1026                     ((hinfo->hash == start_hash) &&
1027                      (hinfo->minor_hash < start_minor_hash)))
1028                         continue;
1029                 if (de->inode == 0)
1030                         continue;
1031                 if (!ext4_encrypted_inode(dir)) {
1032                         tmp_str.name = de->name;
1033                         tmp_str.len = de->name_len;
1034                         err = ext4_htree_store_dirent(dir_file,
1035                                    hinfo->hash, hinfo->minor_hash, de,
1036                                    &tmp_str);
1037                 } else {
1038                         int save_len = fname_crypto_str.len;
1039                         struct fscrypt_str de_name = FSTR_INIT(de->name,
1040                                                                 de->name_len);
1041
1042                         /* Directory is encrypted */
1043                         err = fscrypt_fname_disk_to_usr(dir, hinfo->hash,
1044                                         hinfo->minor_hash, &de_name,
1045                                         &fname_crypto_str);
1046                         if (err) {
1047                                 count = err;
1048                                 goto errout;
1049                         }
1050                         err = ext4_htree_store_dirent(dir_file,
1051                                    hinfo->hash, hinfo->minor_hash, de,
1052                                         &fname_crypto_str);
1053                         fname_crypto_str.len = save_len;
1054                 }
1055                 if (err != 0) {
1056                         count = err;
1057                         goto errout;
1058                 }
1059                 count++;
1060         }
1061 errout:
1062         brelse(bh);
1063 #ifdef CONFIG_EXT4_FS_ENCRYPTION
1064         fscrypt_fname_free_buffer(&fname_crypto_str);
1065 #endif
1066         return count;
1067 }
1068
1069
1070 /*
1071  * This function fills a red-black tree with information from a
1072  * directory.  We start scanning the directory in hash order, starting
1073  * at start_hash and start_minor_hash.
1074  *
1075  * This function returns the number of entries inserted into the tree,
1076  * or a negative error code.
1077  */
1078 int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1079                          __u32 start_minor_hash, __u32 *next_hash)
1080 {
1081         struct dx_hash_info hinfo;
1082         struct ext4_dir_entry_2 *de;
1083         struct dx_frame frames[2], *frame;
1084         struct inode *dir;
1085         ext4_lblk_t block;
1086         int count = 0;
1087         int ret, err;
1088         __u32 hashval;
1089         struct fscrypt_str tmp_str;
1090
1091         dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
1092                        start_hash, start_minor_hash));
1093         dir = file_inode(dir_file);
1094         if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
1095                 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
1096                 if (hinfo.hash_version <= DX_HASH_TEA)
1097                         hinfo.hash_version +=
1098                                 EXT4_SB(dir->i_sb)->s_hash_unsigned;
1099                 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
1100                 if (ext4_has_inline_data(dir)) {
1101                         int has_inline_data = 1;
1102                         count = htree_inlinedir_to_tree(dir_file, dir, 0,
1103                                                         &hinfo, start_hash,
1104                                                         start_minor_hash,
1105                                                         &has_inline_data);
1106                         if (has_inline_data) {
1107                                 *next_hash = ~0;
1108                                 return count;
1109                         }
1110                 }
1111                 count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
1112                                                start_hash, start_minor_hash);
1113                 *next_hash = ~0;
1114                 return count;
1115         }
1116         hinfo.hash = start_hash;
1117         hinfo.minor_hash = 0;
1118         frame = dx_probe(NULL, dir, &hinfo, frames);
1119         if (IS_ERR(frame))
1120                 return PTR_ERR(frame);
1121
1122         /* Add '.' and '..' from the htree header */
1123         if (!start_hash && !start_minor_hash) {
1124                 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1125                 tmp_str.name = de->name;
1126                 tmp_str.len = de->name_len;
1127                 err = ext4_htree_store_dirent(dir_file, 0, 0,
1128                                               de, &tmp_str);
1129                 if (err != 0)
1130                         goto errout;
1131                 count++;
1132         }
1133         if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
1134                 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1135                 de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1136                 tmp_str.name = de->name;
1137                 tmp_str.len = de->name_len;
1138                 err = ext4_htree_store_dirent(dir_file, 2, 0,
1139                                               de, &tmp_str);
1140                 if (err != 0)
1141                         goto errout;
1142                 count++;
1143         }
1144
1145         while (1) {
1146                 if (fatal_signal_pending(current)) {
1147                         err = -ERESTARTSYS;
1148                         goto errout;
1149                 }
1150                 cond_resched();
1151                 block = dx_get_block(frame->at);
1152                 ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
1153                                              start_hash, start_minor_hash);
1154                 if (ret < 0) {
1155                         err = ret;
1156                         goto errout;
1157                 }
1158                 count += ret;
1159                 hashval = ~0;
1160                 ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
1161                                             frame, frames, &hashval);
1162                 *next_hash = hashval;
1163                 if (ret < 0) {
1164                         err = ret;
1165                         goto errout;
1166                 }
1167                 /*
1168                  * Stop if:  (a) there are no more entries, or
1169                  * (b) we have inserted at least one entry and the
1170                  * next hash value is not a continuation
1171                  */
1172                 if ((ret == 0) ||
1173                     (count && ((hashval & 1) == 0)))
1174                         break;
1175         }
1176         dx_release(frames);
1177         dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
1178                        "next hash: %x\n", count, *next_hash));
1179         return count;
1180 errout:
1181         dx_release(frames);
1182         return (err);
1183 }
1184
1185 static inline int search_dirblock(struct buffer_head *bh,
1186                                   struct inode *dir,
1187                                   struct ext4_filename *fname,
1188                                   const struct qstr *d_name,
1189                                   unsigned int offset,
1190                                   struct ext4_dir_entry_2 **res_dir)
1191 {
1192         return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
1193                                fname, d_name, offset, res_dir);
1194 }
1195
1196 /*
1197  * Directory block splitting, compacting
1198  */
1199
1200 /*
1201  * Create map of hash values, offsets, and sizes, stored at end of block.
1202  * Returns number of entries mapped.
1203  */
1204 static int dx_make_map(struct inode *dir, struct buffer_head *bh,
1205                        struct dx_hash_info *hinfo,
1206                        struct dx_map_entry *map_tail)
1207 {
1208         int count = 0;
1209         struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
1210         unsigned int buflen = bh->b_size;
1211         char *base = bh->b_data;
1212         struct dx_hash_info h = *hinfo;
1213
1214         if (ext4_has_metadata_csum(dir->i_sb))
1215                 buflen -= sizeof(struct ext4_dir_entry_tail);
1216
1217         while ((char *) de < base + buflen) {
1218                 if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen,
1219                                          ((char *)de) - base))
1220                         return -EFSCORRUPTED;
1221                 if (de->name_len && de->inode) {
1222                         ext4fs_dirhash(de->name, de->name_len, &h);
1223                         map_tail--;
1224                         map_tail->hash = h.hash;
1225                         map_tail->offs = ((char *) de - base)>>2;
1226                         map_tail->size = le16_to_cpu(de->rec_len);
1227                         count++;
1228                         cond_resched();
1229                 }
1230                 de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1231         }
1232         return count;
1233 }
1234
1235 /* Sort map by hash value */
1236 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
1237 {
1238         struct dx_map_entry *p, *q, *top = map + count - 1;
1239         int more;
1240         /* Combsort until bubble sort doesn't suck */
1241         while (count > 2) {
1242                 count = count*10/13;
1243                 if (count - 9 < 2) /* 9, 10 -> 11 */
1244                         count = 11;
1245                 for (p = top, q = p - count; q >= map; p--, q--)
1246                         if (p->hash < q->hash)
1247                                 swap(*p, *q);
1248         }
1249         /* Garden variety bubble sort */
1250         do {
1251                 more = 0;
1252                 q = top;
1253                 while (q-- > map) {
1254                         if (q[1].hash >= q[0].hash)
1255                                 continue;
1256                         swap(*(q+1), *q);
1257                         more = 1;
1258                 }
1259         } while(more);
1260 }
1261
1262 static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1263 {
1264         struct dx_entry *entries = frame->entries;
1265         struct dx_entry *old = frame->at, *new = old + 1;
1266         int count = dx_get_count(entries);
1267
1268         assert(count < dx_get_limit(entries));
1269         assert(old < entries + count);
1270         memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
1271         dx_set_hash(new, hash);
1272         dx_set_block(new, block);
1273         dx_set_count(entries, count + 1);
1274 }
1275
1276 /*
1277  * Test whether a directory entry matches the filename being searched for.
1278  *
1279  * Return: %true if the directory entry matches, otherwise %false.
1280  */
1281 static inline bool ext4_match(const struct ext4_filename *fname,
1282                               const struct ext4_dir_entry_2 *de)
1283 {
1284         const void *name = fname_name(fname);
1285         u32 len = fname_len(fname);
1286
1287         if (!de->inode)
1288                 return false;
1289
1290 #ifdef CONFIG_EXT4_FS_ENCRYPTION
1291         if (unlikely(!name)) {
1292                 if (fname->usr_fname->name[0] == '_') {
1293                         int ret;
1294                         if (de->name_len <= 32)
1295                                 return 0;
1296                         ret = memcmp(de->name + ((de->name_len - 17) & ~15),
1297                                      fname->crypto_buf.name + 8, 16);
1298                         return (ret == 0) ? 1 : 0;
1299                 }
1300                 name = fname->crypto_buf.name;
1301                 len = fname->crypto_buf.len;
1302         }
1303 #endif
1304         if (de->name_len != len)
1305                 return 0;
1306         return (memcmp(de->name, name, len) == 0) ? 1 : 0;
1307 }
1308
1309 /*
1310  * Returns 0 if not found, -1 on failure, and 1 on success
1311  */
1312 int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
1313                     struct inode *dir, struct ext4_filename *fname,
1314                     const struct qstr *d_name,
1315                     unsigned int offset, struct ext4_dir_entry_2 **res_dir)
1316 {
1317         struct ext4_dir_entry_2 * de;
1318         char * dlimit;
1319         int de_len;
1320
1321         de = (struct ext4_dir_entry_2 *)search_buf;
1322         dlimit = search_buf + buf_size;
1323         while ((char *) de < dlimit) {
1324                 /* this code is executed quadratically often */
1325                 /* do minimal checking `by hand' */
1326                 if ((char *) de + de->name_len <= dlimit &&
1327                     ext4_match(fname, de)) {
1328                         /* found a match - just to be sure, do
1329                          * a full check */
1330                         if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
1331                                                  buf_size, offset))
1332                                 return -1;
1333                         *res_dir = de;
1334                         return 1;
1335                 }
1336                 /* prevent looping on a bad block */
1337                 de_len = ext4_rec_len_from_disk(de->rec_len,
1338                                                 dir->i_sb->s_blocksize);
1339                 if (de_len <= 0)
1340                         return -1;
1341                 offset += de_len;
1342                 de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1343         }
1344         return 0;
1345 }
1346
1347 static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
1348                                struct ext4_dir_entry *de)
1349 {
1350         struct super_block *sb = dir->i_sb;
1351
1352         if (!is_dx(dir))
1353                 return 0;
1354         if (block == 0)
1355                 return 1;
1356         if (de->inode == 0 &&
1357             ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
1358                         sb->s_blocksize)
1359                 return 1;
1360         return 0;
1361 }
1362
1363 /*
1364  *      ext4_find_entry()
1365  *
1366  * finds an entry in the specified directory with the wanted name. It
1367  * returns the cache buffer in which the entry was found, and the entry
1368  * itself (as a parameter - res_dir). It does NOT read the inode of the
1369  * entry - you'll have to do that yourself if you want to.
1370  *
1371  * The returned buffer_head has ->b_count elevated.  The caller is expected
1372  * to brelse() it when appropriate.
1373  */
1374 static struct buffer_head * ext4_find_entry (struct inode *dir,
1375                                         const struct qstr *d_name,
1376                                         struct ext4_dir_entry_2 **res_dir,
1377                                         int *inlined)
1378 {
1379         struct super_block *sb;
1380         struct buffer_head *bh_use[NAMEI_RA_SIZE];
1381         struct buffer_head *bh, *ret = NULL;
1382         ext4_lblk_t start, block, b;
1383         const u8 *name = d_name->name;
1384         int ra_max = 0;         /* Number of bh's in the readahead
1385                                    buffer, bh_use[] */
1386         int ra_ptr = 0;         /* Current index into readahead
1387                                    buffer */
1388         int num = 0;
1389         ext4_lblk_t  nblocks;
1390         int i, namelen, retval;
1391         struct ext4_filename fname;
1392
1393         *res_dir = NULL;
1394         sb = dir->i_sb;
1395         namelen = d_name->len;
1396         if (namelen > EXT4_NAME_LEN)
1397                 return NULL;
1398
1399         retval = ext4_fname_setup_filename(dir, d_name, 1, &fname);
1400         if (retval == -ENOENT)
1401                 return NULL;
1402         if (retval)
1403                 return ERR_PTR(retval);
1404
1405         if (ext4_has_inline_data(dir)) {
1406                 int has_inline_data = 1;
1407                 ret = ext4_find_inline_entry(dir, &fname, d_name, res_dir,
1408                                              &has_inline_data);
1409                 if (has_inline_data) {
1410                         if (inlined)
1411                                 *inlined = 1;
1412                         goto cleanup_and_exit;
1413                 }
1414         }
1415
1416         if ((namelen <= 2) && (name[0] == '.') &&
1417             (name[1] == '.' || name[1] == '\0')) {
1418                 /*
1419                  * "." or ".." will only be in the first block
1420                  * NFS may look up ".."; "." should be handled by the VFS
1421                  */
1422                 block = start = 0;
1423                 nblocks = 1;
1424                 goto restart;
1425         }
1426         if (is_dx(dir)) {
1427                 ret = ext4_dx_find_entry(dir, &fname, res_dir);
1428                 /*
1429                  * On success, or if the error was file not found,
1430                  * return.  Otherwise, fall back to doing a search the
1431                  * old fashioned way.
1432                  */
1433                 if (!IS_ERR(ret) || PTR_ERR(ret) != ERR_BAD_DX_DIR)
1434                         goto cleanup_and_exit;
1435                 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
1436                                "falling back\n"));
1437                 ret = NULL;
1438         }
1439         nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1440         if (!nblocks) {
1441                 ret = NULL;
1442                 goto cleanup_and_exit;
1443         }
1444         start = EXT4_I(dir)->i_dir_start_lookup;
1445         if (start >= nblocks)
1446                 start = 0;
1447         block = start;
1448 restart:
1449         do {
1450                 /*
1451                  * We deal with the read-ahead logic here.
1452                  */
1453                 cond_resched();
1454                 if (ra_ptr >= ra_max) {
1455                         /* Refill the readahead buffer */
1456                         ra_ptr = 0;
1457                         b = block;
1458                         for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
1459                                 /*
1460                                  * Terminate if we reach the end of the
1461                                  * directory and must wrap, or if our
1462                                  * search has finished at this block.
1463                                  */
1464                                 if (b >= nblocks || (num && block == start)) {
1465                                         bh_use[ra_max] = NULL;
1466                                         break;
1467                                 }
1468                                 num++;
1469                                 bh = ext4_getblk(NULL, dir, b++, 0);
1470                                 if (IS_ERR(bh)) {
1471                                         if (ra_max == 0) {
1472                                                 ret = bh;
1473                                                 goto cleanup_and_exit;
1474                                         }
1475                                         break;
1476                                 }
1477                                 bh_use[ra_max] = bh;
1478                                 if (bh)
1479                                         ll_rw_block(REQ_OP_READ,
1480                                                     REQ_META | REQ_PRIO,
1481                                                     1, &bh);
1482                         }
1483                 }
1484                 if ((bh = bh_use[ra_ptr++]) == NULL)
1485                         goto next;
1486                 wait_on_buffer(bh);
1487                 if (!buffer_uptodate(bh)) {
1488                         /* read error, skip block & hope for the best */
1489                         EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
1490                                          (unsigned long) block);
1491                         brelse(bh);
1492                         goto next;
1493                 }
1494                 if (!buffer_verified(bh) &&
1495                     !is_dx_internal_node(dir, block,
1496                                          (struct ext4_dir_entry *)bh->b_data) &&
1497                     !ext4_dirent_csum_verify(dir,
1498                                 (struct ext4_dir_entry *)bh->b_data)) {
1499                         EXT4_ERROR_INODE(dir, "checksumming directory "
1500                                          "block %lu", (unsigned long)block);
1501                         brelse(bh);
1502                         goto next;
1503                 }
1504                 set_buffer_verified(bh);
1505                 i = search_dirblock(bh, dir, &fname, d_name,
1506                             block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
1507                 if (i == 1) {
1508                         EXT4_I(dir)->i_dir_start_lookup = block;
1509                         ret = bh;
1510                         goto cleanup_and_exit;
1511                 } else {
1512                         brelse(bh);
1513                         if (i < 0)
1514                                 goto cleanup_and_exit;
1515                 }
1516         next:
1517                 if (++block >= nblocks)
1518                         block = 0;
1519         } while (block != start);
1520
1521         /*
1522          * If the directory has grown while we were searching, then
1523          * search the last part of the directory before giving up.
1524          */
1525         block = nblocks;
1526         nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1527         if (block < nblocks) {
1528                 start = 0;
1529                 goto restart;
1530         }
1531
1532 cleanup_and_exit:
1533         /* Clean up the read-ahead blocks */
1534         for (; ra_ptr < ra_max; ra_ptr++)
1535                 brelse(bh_use[ra_ptr]);
1536         ext4_fname_free_filename(&fname);
1537         return ret;
1538 }
1539
1540 static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
1541                         struct ext4_filename *fname,
1542                         struct ext4_dir_entry_2 **res_dir)
1543 {
1544         struct super_block * sb = dir->i_sb;
1545         struct dx_frame frames[2], *frame;
1546         const struct qstr *d_name = fname->usr_fname;
1547         struct buffer_head *bh;
1548         ext4_lblk_t block;
1549         int retval;
1550
1551 #ifdef CONFIG_EXT4_FS_ENCRYPTION
1552         *res_dir = NULL;
1553 #endif
1554         frame = dx_probe(fname, dir, NULL, frames);
1555         if (IS_ERR(frame))
1556                 return (struct buffer_head *) frame;
1557         do {
1558                 block = dx_get_block(frame->at);
1559                 bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
1560                 if (IS_ERR(bh))
1561                         goto errout;
1562
1563                 retval = search_dirblock(bh, dir, fname, d_name,
1564                                          block << EXT4_BLOCK_SIZE_BITS(sb),
1565                                          res_dir);
1566                 if (retval == 1)
1567                         goto success;
1568                 brelse(bh);
1569                 if (retval == -1) {
1570                         bh = ERR_PTR(ERR_BAD_DX_DIR);
1571                         goto errout;
1572                 }
1573
1574                 /* Check to see if we should continue to search */
1575                 retval = ext4_htree_next_block(dir, fname->hinfo.hash, frame,
1576                                                frames, NULL);
1577                 if (retval < 0) {
1578                         ext4_warning_inode(dir,
1579                                 "error %d reading directory index block",
1580                                 retval);
1581                         bh = ERR_PTR(retval);
1582                         goto errout;
1583                 }
1584         } while (retval == 1);
1585
1586         bh = NULL;
1587 errout:
1588         dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
1589 success:
1590         dx_release(frames);
1591         return bh;
1592 }
1593
1594 static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1595 {
1596         struct inode *inode;
1597         struct ext4_dir_entry_2 *de;
1598         struct buffer_head *bh;
1599
1600         if (ext4_encrypted_inode(dir)) {
1601                 int res = fscrypt_get_encryption_info(dir);
1602
1603                 /*
1604                  * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is
1605                  * created while the directory was encrypted and we
1606                  * have access to the key.
1607                  */
1608                 if (fscrypt_has_encryption_key(dir))
1609                         fscrypt_set_encrypted_dentry(dentry);
1610                 fscrypt_set_d_op(dentry);
1611                 if (res && res != -ENOKEY)
1612                         return ERR_PTR(res);
1613         }
1614
1615        if (dentry->d_name.len > EXT4_NAME_LEN)
1616                return ERR_PTR(-ENAMETOOLONG);
1617
1618         bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
1619         if (IS_ERR(bh))
1620                 return (struct dentry *) bh;
1621         inode = NULL;
1622         if (bh) {
1623                 __u32 ino = le32_to_cpu(de->inode);
1624                 brelse(bh);
1625                 if (!ext4_valid_inum(dir->i_sb, ino)) {
1626                         EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
1627                         return ERR_PTR(-EFSCORRUPTED);
1628                 }
1629                 if (unlikely(ino == dir->i_ino)) {
1630                         EXT4_ERROR_INODE(dir, "'%pd' linked to parent dir",
1631                                          dentry);
1632                         return ERR_PTR(-EFSCORRUPTED);
1633                 }
1634                 inode = ext4_iget(dir->i_sb, ino, EXT4_IGET_NORMAL);
1635                 if (inode == ERR_PTR(-ESTALE)) {
1636                         EXT4_ERROR_INODE(dir,
1637                                          "deleted inode referenced: %u",
1638                                          ino);
1639                         return ERR_PTR(-EFSCORRUPTED);
1640                 }
1641                 if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
1642                     (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
1643                     !fscrypt_has_permitted_context(dir, inode)) {
1644                         int nokey = ext4_encrypted_inode(inode) &&
1645                                 !fscrypt_has_encryption_key(inode);
1646                         if (nokey) {
1647                                 iput(inode);
1648                                 return ERR_PTR(-ENOKEY);
1649                         }
1650                         ext4_warning(inode->i_sb,
1651                                      "Inconsistent encryption contexts: %lu/%lu",
1652                                      (unsigned long) dir->i_ino,
1653                                      (unsigned long) inode->i_ino);
1654                         iput(inode);
1655                         return ERR_PTR(-EPERM);
1656                 }
1657         }
1658         return d_splice_alias(inode, dentry);
1659 }
1660
1661
1662 struct dentry *ext4_get_parent(struct dentry *child)
1663 {
1664         __u32 ino;
1665         static const struct qstr dotdot = QSTR_INIT("..", 2);
1666         struct ext4_dir_entry_2 * de;
1667         struct buffer_head *bh;
1668
1669         bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL);
1670         if (IS_ERR(bh))
1671                 return (struct dentry *) bh;
1672         if (!bh)
1673                 return ERR_PTR(-ENOENT);
1674         ino = le32_to_cpu(de->inode);
1675         brelse(bh);
1676
1677         if (!ext4_valid_inum(child->d_sb, ino)) {
1678                 EXT4_ERROR_INODE(d_inode(child),
1679                                  "bad parent inode number: %u", ino);
1680                 return ERR_PTR(-EFSCORRUPTED);
1681         }
1682
1683         return d_obtain_alias(ext4_iget(child->d_sb, ino, EXT4_IGET_NORMAL));
1684 }
1685
1686 /*
1687  * Move count entries from end of map between two memory locations.
1688  * Returns pointer to last entry moved.
1689  */
1690 static struct ext4_dir_entry_2 *
1691 dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
1692                 unsigned blocksize)
1693 {
1694         unsigned rec_len = 0;
1695
1696         while (count--) {
1697                 struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
1698                                                 (from + (map->offs<<2));
1699                 rec_len = EXT4_DIR_REC_LEN(de->name_len);
1700                 memcpy (to, de, rec_len);
1701                 ((struct ext4_dir_entry_2 *) to)->rec_len =
1702                                 ext4_rec_len_to_disk(rec_len, blocksize);
1703                 de->inode = 0;
1704                 map++;
1705                 to += rec_len;
1706         }
1707         return (struct ext4_dir_entry_2 *) (to - rec_len);
1708 }
1709
1710 /*
1711  * Compact each dir entry in the range to the minimal rec_len.
1712  * Returns pointer to last entry in range.
1713  */
1714 static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
1715 {
1716         struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
1717         unsigned rec_len = 0;
1718
1719         prev = to = de;
1720         while ((char*)de < base + blocksize) {
1721                 next = ext4_next_entry(de, blocksize);
1722                 if (de->inode && de->name_len) {
1723                         rec_len = EXT4_DIR_REC_LEN(de->name_len);
1724                         if (de > to)
1725                                 memmove(to, de, rec_len);
1726                         to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
1727                         prev = to;
1728                         to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
1729                 }
1730                 de = next;
1731         }
1732         return prev;
1733 }
1734
1735 /*
1736  * Split a full leaf block to make room for a new dir entry.
1737  * Allocate a new block, and move entries so that they are approx. equally full.
1738  * Returns pointer to de in block into which the new entry will be inserted.
1739  */
1740 static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1741                         struct buffer_head **bh,struct dx_frame *frame,
1742                         struct dx_hash_info *hinfo)
1743 {
1744         unsigned blocksize = dir->i_sb->s_blocksize;
1745         unsigned continued;
1746         int count;
1747         struct buffer_head *bh2;
1748         ext4_lblk_t newblock;
1749         u32 hash2;
1750         struct dx_map_entry *map;
1751         char *data1 = (*bh)->b_data, *data2;
1752         unsigned split, move, size;
1753         struct ext4_dir_entry_2 *de = NULL, *de2;
1754         struct ext4_dir_entry_tail *t;
1755         int     csum_size = 0;
1756         int     err = 0, i;
1757
1758         if (ext4_has_metadata_csum(dir->i_sb))
1759                 csum_size = sizeof(struct ext4_dir_entry_tail);
1760
1761         bh2 = ext4_append(handle, dir, &newblock);
1762         if (IS_ERR(bh2)) {
1763                 brelse(*bh);
1764                 *bh = NULL;
1765                 return (struct ext4_dir_entry_2 *) bh2;
1766         }
1767
1768         BUFFER_TRACE(*bh, "get_write_access");
1769         err = ext4_journal_get_write_access(handle, *bh);
1770         if (err)
1771                 goto journal_error;
1772
1773         BUFFER_TRACE(frame->bh, "get_write_access");
1774         err = ext4_journal_get_write_access(handle, frame->bh);
1775         if (err)
1776                 goto journal_error;
1777
1778         data2 = bh2->b_data;
1779
1780         /* create map in the end of data2 block */
1781         map = (struct dx_map_entry *) (data2 + blocksize);
1782         count = dx_make_map(dir, *bh, hinfo, map);
1783         if (count < 0) {
1784                 err = count;
1785                 goto journal_error;
1786         }
1787         map -= count;
1788         dx_sort_map(map, count);
1789         /* Ensure that neither split block is over half full */
1790         size = 0;
1791         move = 0;
1792         for (i = count-1; i >= 0; i--) {
1793                 /* is more than half of this entry in 2nd half of the block? */
1794                 if (size + map[i].size/2 > blocksize/2)
1795                         break;
1796                 size += map[i].size;
1797                 move++;
1798         }
1799         /*
1800          * map index at which we will split
1801          *
1802          * If the sum of active entries didn't exceed half the block size, just
1803          * split it in half by count; each resulting block will have at least
1804          * half the space free.
1805          */
1806         if (i > 0)
1807                 split = count - move;
1808         else
1809                 split = count/2;
1810
1811         hash2 = map[split].hash;
1812         continued = hash2 == map[split - 1].hash;
1813         dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
1814                         (unsigned long)dx_get_block(frame->at),
1815                                         hash2, split, count-split));
1816
1817         /* Fancy dance to stay within two buffers */
1818         de2 = dx_move_dirents(data1, data2, map + split, count - split,
1819                               blocksize);
1820         de = dx_pack_dirents(data1, blocksize);
1821         de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
1822                                            (char *) de,
1823                                            blocksize);
1824         de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
1825                                             (char *) de2,
1826                                             blocksize);
1827         if (csum_size) {
1828                 t = EXT4_DIRENT_TAIL(data2, blocksize);
1829                 initialize_dirent_tail(t, blocksize);
1830
1831                 t = EXT4_DIRENT_TAIL(data1, blocksize);
1832                 initialize_dirent_tail(t, blocksize);
1833         }
1834
1835         dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1,
1836                         blocksize, 1));
1837         dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data2,
1838                         blocksize, 1));
1839
1840         /* Which block gets the new entry? */
1841         if (hinfo->hash >= hash2) {
1842                 swap(*bh, bh2);
1843                 de = de2;
1844         }
1845         dx_insert_block(frame, hash2 + continued, newblock);
1846         err = ext4_handle_dirty_dirent_node(handle, dir, bh2);
1847         if (err)
1848                 goto journal_error;
1849         err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
1850         if (err)
1851                 goto journal_error;
1852         brelse(bh2);
1853         dxtrace(dx_show_index("frame", frame->entries));
1854         return de;
1855
1856 journal_error:
1857         brelse(*bh);
1858         brelse(bh2);
1859         *bh = NULL;
1860         ext4_std_error(dir->i_sb, err);
1861         return ERR_PTR(err);
1862 }
1863
1864 int ext4_find_dest_de(struct inode *dir, struct inode *inode,
1865                       struct buffer_head *bh,
1866                       void *buf, int buf_size,
1867                       struct ext4_filename *fname,
1868                       struct ext4_dir_entry_2 **dest_de)
1869 {
1870         struct ext4_dir_entry_2 *de;
1871         unsigned short reclen = EXT4_DIR_REC_LEN(fname_len(fname));
1872         int nlen, rlen;
1873         unsigned int offset = 0;
1874         char *top;
1875
1876         de = (struct ext4_dir_entry_2 *)buf;
1877         top = buf + buf_size - reclen;
1878         while ((char *) de <= top) {
1879                 if (ext4_check_dir_entry(dir, NULL, de, bh,
1880                                          buf, buf_size, offset))
1881                         return -EFSCORRUPTED;
1882                 if (ext4_match(fname, de))
1883                         return -EEXIST;
1884                 nlen = EXT4_DIR_REC_LEN(de->name_len);
1885                 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
1886                 if ((de->inode ? rlen - nlen : rlen) >= reclen)
1887                         break;
1888                 de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
1889                 offset += rlen;
1890         }
1891         if ((char *) de > top)
1892                 return -ENOSPC;
1893
1894         *dest_de = de;
1895         return 0;
1896 }
1897
1898 int ext4_insert_dentry(struct inode *dir,
1899                        struct inode *inode,
1900                        struct ext4_dir_entry_2 *de,
1901                        int buf_size,
1902                        struct ext4_filename *fname)
1903 {
1904
1905         int nlen, rlen;
1906
1907         nlen = EXT4_DIR_REC_LEN(de->name_len);
1908         rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
1909         if (de->inode) {
1910                 struct ext4_dir_entry_2 *de1 =
1911                         (struct ext4_dir_entry_2 *)((char *)de + nlen);
1912                 de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size);
1913                 de->rec_len = ext4_rec_len_to_disk(nlen, buf_size);
1914                 de = de1;
1915         }
1916         de->file_type = EXT4_FT_UNKNOWN;
1917         de->inode = cpu_to_le32(inode->i_ino);
1918         ext4_set_de_type(inode->i_sb, de, inode->i_mode);
1919         de->name_len = fname_len(fname);
1920         memcpy(de->name, fname_name(fname), fname_len(fname));
1921         return 0;
1922 }
1923
1924 /*
1925  * Add a new entry into a directory (leaf) block.  If de is non-NULL,
1926  * it points to a directory entry which is guaranteed to be large
1927  * enough for new directory entry.  If de is NULL, then
1928  * add_dirent_to_buf will attempt search the directory block for
1929  * space.  It will return -ENOSPC if no space is available, and -EIO
1930  * and -EEXIST if directory entry already exists.
1931  */
1932 static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
1933                              struct inode *dir,
1934                              struct inode *inode, struct ext4_dir_entry_2 *de,
1935                              struct buffer_head *bh)
1936 {
1937         unsigned int    blocksize = dir->i_sb->s_blocksize;
1938         int             csum_size = 0;
1939         int             err;
1940
1941         if (ext4_has_metadata_csum(inode->i_sb))
1942                 csum_size = sizeof(struct ext4_dir_entry_tail);
1943
1944         if (!de) {
1945                 err = ext4_find_dest_de(dir, inode, bh, bh->b_data,
1946                                         blocksize - csum_size, fname, &de);
1947                 if (err)
1948                         return err;
1949         }
1950         BUFFER_TRACE(bh, "get_write_access");
1951         err = ext4_journal_get_write_access(handle, bh);
1952         if (err) {
1953                 ext4_std_error(dir->i_sb, err);
1954                 return err;
1955         }
1956
1957         /* By now the buffer is marked for journaling. Due to crypto operations,
1958          * the following function call may fail */
1959         err = ext4_insert_dentry(dir, inode, de, blocksize, fname);
1960         if (err < 0)
1961                 return err;
1962
1963         /*
1964          * XXX shouldn't update any times until successful
1965          * completion of syscall, but too many callers depend
1966          * on this.
1967          *
1968          * XXX similarly, too many callers depend on
1969          * ext4_new_inode() setting the times, but error
1970          * recovery deletes the inode, so the worst that can
1971          * happen is that the times are slightly out of date
1972          * and/or different from the directory change time.
1973          */
1974         dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
1975         ext4_update_dx_flag(dir);
1976         dir->i_version++;
1977         ext4_mark_inode_dirty(handle, dir);
1978         BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1979         err = ext4_handle_dirty_dirent_node(handle, dir, bh);
1980         if (err)
1981                 ext4_std_error(dir->i_sb, err);
1982         return 0;
1983 }
1984
1985 /*
1986  * This converts a one block unindexed directory to a 3 block indexed
1987  * directory, and adds the dentry to the indexed directory.
1988  */
1989 static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
1990                             struct inode *dir,
1991                             struct inode *inode, struct buffer_head *bh)
1992 {
1993         struct buffer_head *bh2;
1994         struct dx_root  *root;
1995         struct dx_frame frames[2], *frame;
1996         struct dx_entry *entries;
1997         struct ext4_dir_entry_2 *de, *de2;
1998         struct ext4_dir_entry_tail *t;
1999         char            *data1, *top;
2000         unsigned        len;
2001         int             retval;
2002         unsigned        blocksize;
2003         ext4_lblk_t  block;
2004         struct fake_dirent *fde;
2005         int csum_size = 0;
2006
2007         if (ext4_has_metadata_csum(inode->i_sb))
2008                 csum_size = sizeof(struct ext4_dir_entry_tail);
2009
2010         blocksize =  dir->i_sb->s_blocksize;
2011         dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
2012         BUFFER_TRACE(bh, "get_write_access");
2013         retval = ext4_journal_get_write_access(handle, bh);
2014         if (retval) {
2015                 ext4_std_error(dir->i_sb, retval);
2016                 brelse(bh);
2017                 return retval;
2018         }
2019         root = (struct dx_root *) bh->b_data;
2020
2021         /* The 0th block becomes the root, move the dirents out */
2022         fde = &root->dotdot;
2023         de = (struct ext4_dir_entry_2 *)((char *)fde +
2024                 ext4_rec_len_from_disk(fde->rec_len, blocksize));
2025         if ((char *) de >= (((char *) root) + blocksize)) {
2026                 EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
2027                 brelse(bh);
2028                 return -EFSCORRUPTED;
2029         }
2030         len = ((char *) root) + (blocksize - csum_size) - (char *) de;
2031
2032         /* Allocate new block for the 0th block's dirents */
2033         bh2 = ext4_append(handle, dir, &block);
2034         if (IS_ERR(bh2)) {
2035                 brelse(bh);
2036                 return PTR_ERR(bh2);
2037         }
2038         ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
2039         data1 = bh2->b_data;
2040
2041         memcpy (data1, de, len);
2042         de = (struct ext4_dir_entry_2 *) data1;
2043         top = data1 + len;
2044         while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
2045                 de = de2;
2046         de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
2047                                            (char *) de,
2048                                            blocksize);
2049
2050         if (csum_size) {
2051                 t = EXT4_DIRENT_TAIL(data1, blocksize);
2052                 initialize_dirent_tail(t, blocksize);
2053         }
2054
2055         /* Initialize the root; the dot dirents already exist */
2056         de = (struct ext4_dir_entry_2 *) (&root->dotdot);
2057         de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
2058                                            blocksize);
2059         memset (&root->info, 0, sizeof(root->info));
2060         root->info.info_length = sizeof(root->info);
2061         root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
2062         entries = root->entries;
2063         dx_set_block(entries, 1);
2064         dx_set_count(entries, 1);
2065         dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
2066
2067         /* Initialize as for dx_probe */
2068         fname->hinfo.hash_version = root->info.hash_version;
2069         if (fname->hinfo.hash_version <= DX_HASH_TEA)
2070                 fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
2071         fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
2072         ext4fs_dirhash(fname_name(fname), fname_len(fname), &fname->hinfo);
2073
2074         memset(frames, 0, sizeof(frames));
2075         frame = frames;
2076         frame->entries = entries;
2077         frame->at = entries;
2078         frame->bh = bh;
2079
2080         retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2081         if (retval)
2082                 goto out_frames;        
2083         retval = ext4_handle_dirty_dirent_node(handle, dir, bh2);
2084         if (retval)
2085                 goto out_frames;        
2086
2087         de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
2088         if (IS_ERR(de)) {
2089                 retval = PTR_ERR(de);
2090                 goto out_frames;
2091         }
2092
2093         retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
2094 out_frames:
2095         /*
2096          * Even if the block split failed, we have to properly write
2097          * out all the changes we did so far. Otherwise we can end up
2098          * with corrupted filesystem.
2099          */
2100         if (retval)
2101                 ext4_mark_inode_dirty(handle, dir);
2102         dx_release(frames);
2103         brelse(bh2);
2104         return retval;
2105 }
2106
2107 /*
2108  *      ext4_add_entry()
2109  *
2110  * adds a file entry to the specified directory, using the same
2111  * semantics as ext4_find_entry(). It returns NULL if it failed.
2112  *
2113  * NOTE!! The inode part of 'de' is left at 0 - which means you
2114  * may not sleep between calling this and putting something into
2115  * the entry, as someone else might have used it while you slept.
2116  */
2117 static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
2118                           struct inode *inode)
2119 {
2120         struct inode *dir = d_inode(dentry->d_parent);
2121         struct buffer_head *bh = NULL;
2122         struct ext4_dir_entry_2 *de;
2123         struct ext4_dir_entry_tail *t;
2124         struct super_block *sb;
2125         struct ext4_filename fname;
2126         int     retval;
2127         int     dx_fallback=0;
2128         unsigned blocksize;
2129         ext4_lblk_t block, blocks;
2130         int     csum_size = 0;
2131
2132         if (ext4_has_metadata_csum(inode->i_sb))
2133                 csum_size = sizeof(struct ext4_dir_entry_tail);
2134
2135         sb = dir->i_sb;
2136         blocksize = sb->s_blocksize;
2137         if (!dentry->d_name.len)
2138                 return -EINVAL;
2139
2140         retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname);
2141         if (retval)
2142                 return retval;
2143
2144         if (ext4_has_inline_data(dir)) {
2145                 retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
2146                 if (retval < 0)
2147                         goto out;
2148                 if (retval == 1) {
2149                         retval = 0;
2150                         goto out;
2151                 }
2152         }
2153
2154         if (is_dx(dir)) {
2155                 retval = ext4_dx_add_entry(handle, &fname, dir, inode);
2156                 if (!retval || (retval != ERR_BAD_DX_DIR))
2157                         goto out;
2158                 /* Can we just ignore htree data? */
2159                 if (ext4_has_metadata_csum(sb)) {
2160                         EXT4_ERROR_INODE(dir,
2161                                 "Directory has corrupted htree index.");
2162                         retval = -EFSCORRUPTED;
2163                         goto out;
2164                 }
2165                 ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
2166                 dx_fallback++;
2167                 ext4_mark_inode_dirty(handle, dir);
2168         }
2169         blocks = dir->i_size >> sb->s_blocksize_bits;
2170         for (block = 0; block < blocks; block++) {
2171                 bh = ext4_read_dirblock(dir, block, DIRENT);
2172                 if (bh == NULL) {
2173                         bh = ext4_bread(handle, dir, block,
2174                                         EXT4_GET_BLOCKS_CREATE);
2175                         goto add_to_new_block;
2176                 }
2177                 if (IS_ERR(bh)) {
2178                         retval = PTR_ERR(bh);
2179                         bh = NULL;
2180                         goto out;
2181                 }
2182                 retval = add_dirent_to_buf(handle, &fname, dir, inode,
2183                                            NULL, bh);
2184                 if (retval != -ENOSPC)
2185                         goto out;
2186
2187                 if (blocks == 1 && !dx_fallback &&
2188                     ext4_has_feature_dir_index(sb)) {
2189                         retval = make_indexed_dir(handle, &fname, dir,
2190                                                   inode, bh);
2191                         bh = NULL; /* make_indexed_dir releases bh */
2192                         goto out;
2193                 }
2194                 brelse(bh);
2195         }
2196         bh = ext4_append(handle, dir, &block);
2197 add_to_new_block:
2198         if (IS_ERR(bh)) {
2199                 retval = PTR_ERR(bh);
2200                 bh = NULL;
2201                 goto out;
2202         }
2203         de = (struct ext4_dir_entry_2 *) bh->b_data;
2204         de->inode = 0;
2205         de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
2206
2207         if (csum_size) {
2208                 t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
2209                 initialize_dirent_tail(t, blocksize);
2210         }
2211
2212         retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
2213 out:
2214         ext4_fname_free_filename(&fname);
2215         brelse(bh);
2216         if (retval == 0)
2217                 ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
2218         return retval;
2219 }
2220
2221 /*
2222  * Returns 0 for success, or a negative error value
2223  */
2224 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
2225                              struct inode *dir, struct inode *inode)
2226 {
2227         struct dx_frame frames[2], *frame;
2228         struct dx_entry *entries, *at;
2229         struct buffer_head *bh;
2230         struct super_block *sb = dir->i_sb;
2231         struct ext4_dir_entry_2 *de;
2232         int err;
2233
2234         frame = dx_probe(fname, dir, NULL, frames);
2235         if (IS_ERR(frame))
2236                 return PTR_ERR(frame);
2237         entries = frame->entries;
2238         at = frame->at;
2239         bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE);
2240         if (IS_ERR(bh)) {
2241                 err = PTR_ERR(bh);
2242                 bh = NULL;
2243                 goto cleanup;
2244         }
2245
2246         BUFFER_TRACE(bh, "get_write_access");
2247         err = ext4_journal_get_write_access(handle, bh);
2248         if (err)
2249                 goto journal_error;
2250
2251         err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh);
2252         if (err != -ENOSPC)
2253                 goto cleanup;
2254
2255         /* Block full, should compress but for now just split */
2256         dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
2257                        dx_get_count(entries), dx_get_limit(entries)));
2258         /* Need to split index? */
2259         if (dx_get_count(entries) == dx_get_limit(entries)) {
2260                 ext4_lblk_t newblock;
2261                 unsigned icount = dx_get_count(entries);
2262                 int levels = frame - frames;
2263                 struct dx_entry *entries2;
2264                 struct dx_node *node2;
2265                 struct buffer_head *bh2;
2266
2267                 if (levels && (dx_get_count(frames->entries) ==
2268                                dx_get_limit(frames->entries))) {
2269                         ext4_warning_inode(dir, "Directory index full!");
2270                         err = -ENOSPC;
2271                         goto cleanup;
2272                 }
2273                 bh2 = ext4_append(handle, dir, &newblock);
2274                 if (IS_ERR(bh2)) {
2275                         err = PTR_ERR(bh2);
2276                         goto cleanup;
2277                 }
2278                 node2 = (struct dx_node *)(bh2->b_data);
2279                 entries2 = node2->entries;
2280                 memset(&node2->fake, 0, sizeof(struct fake_dirent));
2281                 node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
2282                                                            sb->s_blocksize);
2283                 BUFFER_TRACE(frame->bh, "get_write_access");
2284                 err = ext4_journal_get_write_access(handle, frame->bh);
2285                 if (err)
2286                         goto journal_error;
2287                 if (levels) {
2288                         unsigned icount1 = icount/2, icount2 = icount - icount1;
2289                         unsigned hash2 = dx_get_hash(entries + icount1);
2290                         dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
2291                                        icount1, icount2));
2292
2293                         BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
2294                         err = ext4_journal_get_write_access(handle,
2295                                                              frames[0].bh);
2296                         if (err)
2297                                 goto journal_error;
2298
2299                         memcpy((char *) entries2, (char *) (entries + icount1),
2300                                icount2 * sizeof(struct dx_entry));
2301                         dx_set_count(entries, icount1);
2302                         dx_set_count(entries2, icount2);
2303                         dx_set_limit(entries2, dx_node_limit(dir));
2304
2305                         /* Which index block gets the new entry? */
2306                         if (at - entries >= icount1) {
2307                                 frame->at = at = at - entries - icount1 + entries2;
2308                                 frame->entries = entries = entries2;
2309                                 swap(frame->bh, bh2);
2310                         }
2311                         dx_insert_block(frames + 0, hash2, newblock);
2312                         dxtrace(dx_show_index("node", frames[1].entries));
2313                         dxtrace(dx_show_index("node",
2314                                ((struct dx_node *) bh2->b_data)->entries));
2315                         err = ext4_handle_dirty_dx_node(handle, dir, bh2);
2316                         if (err)
2317                                 goto journal_error;
2318                         brelse (bh2);
2319                 } else {
2320                         dxtrace(printk(KERN_DEBUG
2321                                        "Creating second level index...\n"));
2322                         memcpy((char *) entries2, (char *) entries,
2323                                icount * sizeof(struct dx_entry));
2324                         dx_set_limit(entries2, dx_node_limit(dir));
2325
2326                         /* Set up root */
2327                         dx_set_count(entries, 1);
2328                         dx_set_block(entries + 0, newblock);
2329                         ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
2330
2331                         /* Add new access path frame */
2332                         frame = frames + 1;
2333                         frame->at = at = at - entries + entries2;
2334                         frame->entries = entries = entries2;
2335                         frame->bh = bh2;
2336                         err = ext4_journal_get_write_access(handle,
2337                                                              frame->bh);
2338                         if (err)
2339                                 goto journal_error;
2340                 }
2341                 err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh);
2342                 if (err) {
2343                         ext4_std_error(inode->i_sb, err);
2344                         goto cleanup;
2345                 }
2346         }
2347         de = do_split(handle, dir, &bh, frame, &fname->hinfo);
2348         if (IS_ERR(de)) {
2349                 err = PTR_ERR(de);
2350                 goto cleanup;
2351         }
2352         err = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
2353         goto cleanup;
2354
2355 journal_error:
2356         ext4_std_error(dir->i_sb, err);
2357 cleanup:
2358         brelse(bh);
2359         dx_release(frames);
2360         return err;
2361 }
2362
2363 /*
2364  * ext4_generic_delete_entry deletes a directory entry by merging it
2365  * with the previous entry
2366  */
2367 int ext4_generic_delete_entry(handle_t *handle,
2368                               struct inode *dir,
2369                               struct ext4_dir_entry_2 *de_del,
2370                               struct buffer_head *bh,
2371                               void *entry_buf,
2372                               int buf_size,
2373                               int csum_size)
2374 {
2375         struct ext4_dir_entry_2 *de, *pde;
2376         unsigned int blocksize = dir->i_sb->s_blocksize;
2377         int i;
2378
2379         i = 0;
2380         pde = NULL;
2381         de = (struct ext4_dir_entry_2 *)entry_buf;
2382         while (i < buf_size - csum_size) {
2383                 if (ext4_check_dir_entry(dir, NULL, de, bh,
2384                                          entry_buf, buf_size, i))
2385                         return -EFSCORRUPTED;
2386                 if (de == de_del)  {
2387                         if (pde)
2388                                 pde->rec_len = ext4_rec_len_to_disk(
2389                                         ext4_rec_len_from_disk(pde->rec_len,
2390                                                                blocksize) +
2391                                         ext4_rec_len_from_disk(de->rec_len,
2392                                                                blocksize),
2393                                         blocksize);
2394                         else
2395                                 de->inode = 0;
2396                         dir->i_version++;
2397                         return 0;
2398                 }
2399                 i += ext4_rec_len_from_disk(de->rec_len, blocksize);
2400                 pde = de;
2401                 de = ext4_next_entry(de, blocksize);
2402         }
2403         return -ENOENT;
2404 }
2405
2406 static int ext4_delete_entry(handle_t *handle,
2407                              struct inode *dir,
2408                              struct ext4_dir_entry_2 *de_del,
2409                              struct buffer_head *bh)
2410 {
2411         int err, csum_size = 0;
2412
2413         if (ext4_has_inline_data(dir)) {
2414                 int has_inline_data = 1;
2415                 err = ext4_delete_inline_entry(handle, dir, de_del, bh,
2416                                                &has_inline_data);
2417                 if (has_inline_data)
2418                         return err;
2419         }
2420
2421         if (ext4_has_metadata_csum(dir->i_sb))
2422                 csum_size = sizeof(struct ext4_dir_entry_tail);
2423
2424         BUFFER_TRACE(bh, "get_write_access");
2425         err = ext4_journal_get_write_access(handle, bh);
2426         if (unlikely(err))
2427                 goto out;
2428
2429         err = ext4_generic_delete_entry(handle, dir, de_del,
2430                                         bh, bh->b_data,
2431                                         dir->i_sb->s_blocksize, csum_size);
2432         if (err)
2433                 goto out;
2434
2435         BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2436         err = ext4_handle_dirty_dirent_node(handle, dir, bh);
2437         if (unlikely(err))
2438                 goto out;
2439
2440         return 0;
2441 out:
2442         if (err != -ENOENT)
2443                 ext4_std_error(dir->i_sb, err);
2444         return err;
2445 }
2446
2447 /*
2448  * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
2449  * since this indicates that nlinks count was previously 1.
2450  */
2451 static void ext4_inc_count(handle_t *handle, struct inode *inode)
2452 {
2453         inc_nlink(inode);
2454         if (is_dx(inode) && inode->i_nlink > 1) {
2455                 /* limit is 16-bit i_links_count */
2456                 if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
2457                         set_nlink(inode, 1);
2458                         ext4_set_feature_dir_nlink(inode->i_sb);
2459                 }
2460         }
2461 }
2462
2463 /*
2464  * If a directory had nlink == 1, then we should let it be 1. This indicates
2465  * directory has >EXT4_LINK_MAX subdirs.
2466  */
2467 static void ext4_dec_count(handle_t *handle, struct inode *inode)
2468 {
2469         if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
2470                 drop_nlink(inode);
2471 }
2472
2473
2474 static int ext4_add_nondir(handle_t *handle,
2475                 struct dentry *dentry, struct inode *inode)
2476 {
2477         int err = ext4_add_entry(handle, dentry, inode);
2478         if (!err) {
2479                 ext4_mark_inode_dirty(handle, inode);
2480                 d_instantiate_new(dentry, inode);
2481                 return 0;
2482         }
2483         drop_nlink(inode);
2484         unlock_new_inode(inode);
2485         iput(inode);
2486         return err;
2487 }
2488
2489 /*
2490  * By the time this is called, we already have created
2491  * the directory cache entry for the new file, but it
2492  * is so far negative - it has no inode.
2493  *
2494  * If the create succeeds, we fill in the inode information
2495  * with d_instantiate().
2496  */
2497 static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2498                        bool excl)
2499 {
2500         handle_t *handle;
2501         struct inode *inode;
2502         int err, credits, retries = 0;
2503
2504         err = dquot_initialize(dir);
2505         if (err)
2506                 return err;
2507
2508         credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2509                    EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2510 retry:
2511         inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
2512                                             NULL, EXT4_HT_DIR, credits);
2513         handle = ext4_journal_current_handle();
2514         err = PTR_ERR(inode);
2515         if (!IS_ERR(inode)) {
2516                 inode->i_op = &ext4_file_inode_operations;
2517                 inode->i_fop = &ext4_file_operations;
2518                 ext4_set_aops(inode);
2519                 err = ext4_add_nondir(handle, dentry, inode);
2520                 if (!err && IS_DIRSYNC(dir))
2521                         ext4_handle_sync(handle);
2522         }
2523         if (handle)
2524                 ext4_journal_stop(handle);
2525         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2526                 goto retry;
2527         return err;
2528 }
2529
2530 static int ext4_mknod(struct inode *dir, struct dentry *dentry,
2531                       umode_t mode, dev_t rdev)
2532 {
2533         handle_t *handle;
2534         struct inode *inode;
2535         int err, credits, retries = 0;
2536
2537         err = dquot_initialize(dir);
2538         if (err)
2539                 return err;
2540
2541         credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2542                    EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2543 retry:
2544         inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
2545                                             NULL, EXT4_HT_DIR, credits);
2546         handle = ext4_journal_current_handle();
2547         err = PTR_ERR(inode);
2548         if (!IS_ERR(inode)) {
2549                 init_special_inode(inode, inode->i_mode, rdev);
2550                 inode->i_op = &ext4_special_inode_operations;
2551                 err = ext4_add_nondir(handle, dentry, inode);
2552                 if (!err && IS_DIRSYNC(dir))
2553                         ext4_handle_sync(handle);
2554         }
2555         if (handle)
2556                 ext4_journal_stop(handle);
2557         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2558                 goto retry;
2559         return err;
2560 }
2561
2562 static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
2563 {
2564         handle_t *handle;
2565         struct inode *inode;
2566         int err, retries = 0;
2567
2568         err = dquot_initialize(dir);
2569         if (err)
2570                 return err;
2571
2572 retry:
2573         inode = ext4_new_inode_start_handle(dir, mode,
2574                                             NULL, 0, NULL,
2575                                             EXT4_HT_DIR,
2576                         EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
2577                           4 + EXT4_XATTR_TRANS_BLOCKS);
2578         handle = ext4_journal_current_handle();
2579         err = PTR_ERR(inode);
2580         if (!IS_ERR(inode)) {
2581                 inode->i_op = &ext4_file_inode_operations;
2582                 inode->i_fop = &ext4_file_operations;
2583                 ext4_set_aops(inode);
2584                 d_tmpfile(dentry, inode);
2585                 err = ext4_orphan_add(handle, inode);
2586                 if (err)
2587                         goto err_unlock_inode;
2588                 mark_inode_dirty(inode);
2589                 unlock_new_inode(inode);
2590         }
2591         if (handle)
2592                 ext4_journal_stop(handle);
2593         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2594                 goto retry;
2595         return err;
2596 err_unlock_inode:
2597         ext4_journal_stop(handle);
2598         unlock_new_inode(inode);
2599         return err;
2600 }
2601
2602 struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
2603                           struct ext4_dir_entry_2 *de,
2604                           int blocksize, int csum_size,
2605                           unsigned int parent_ino, int dotdot_real_len)
2606 {
2607         de->inode = cpu_to_le32(inode->i_ino);
2608         de->name_len = 1;
2609         de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
2610                                            blocksize);
2611         strcpy(de->name, ".");
2612         ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2613
2614         de = ext4_next_entry(de, blocksize);
2615         de->inode = cpu_to_le32(parent_ino);
2616         de->name_len = 2;
2617         if (!dotdot_real_len)
2618                 de->rec_len = ext4_rec_len_to_disk(blocksize -
2619                                         (csum_size + EXT4_DIR_REC_LEN(1)),
2620                                         blocksize);
2621         else
2622                 de->rec_len = ext4_rec_len_to_disk(
2623                                 EXT4_DIR_REC_LEN(de->name_len), blocksize);
2624         strcpy(de->name, "..");
2625         ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2626
2627         return ext4_next_entry(de, blocksize);
2628 }
2629
2630 static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2631                              struct inode *inode)
2632 {
2633         struct buffer_head *dir_block = NULL;
2634         struct ext4_dir_entry_2 *de;
2635         struct ext4_dir_entry_tail *t;
2636         ext4_lblk_t block = 0;
2637         unsigned int blocksize = dir->i_sb->s_blocksize;
2638         int csum_size = 0;
2639         int err;
2640
2641         if (ext4_has_metadata_csum(dir->i_sb))
2642                 csum_size = sizeof(struct ext4_dir_entry_tail);
2643
2644         if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
2645                 err = ext4_try_create_inline_dir(handle, dir, inode);
2646                 if (err < 0 && err != -ENOSPC)
2647                         goto out;
2648                 if (!err)
2649                         goto out;
2650         }
2651
2652         inode->i_size = 0;
2653         dir_block = ext4_append(handle, inode, &block);
2654         if (IS_ERR(dir_block))
2655                 return PTR_ERR(dir_block);
2656         de = (struct ext4_dir_entry_2 *)dir_block->b_data;
2657         ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
2658         set_nlink(inode, 2);
2659         if (csum_size) {
2660                 t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
2661                 initialize_dirent_tail(t, blocksize);
2662         }
2663
2664         BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
2665         err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
2666         if (err)
2667                 goto out;
2668         set_buffer_verified(dir_block);
2669 out:
2670         brelse(dir_block);
2671         return err;
2672 }
2673
2674 static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2675 {
2676         handle_t *handle;
2677         struct inode *inode;
2678         int err, credits, retries = 0;
2679
2680         if (EXT4_DIR_LINK_MAX(dir))
2681                 return -EMLINK;
2682
2683         err = dquot_initialize(dir);
2684         if (err)
2685                 return err;
2686
2687         credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2688                    EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2689 retry:
2690         inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode,
2691                                             &dentry->d_name,
2692                                             0, NULL, EXT4_HT_DIR, credits);
2693         handle = ext4_journal_current_handle();
2694         err = PTR_ERR(inode);
2695         if (IS_ERR(inode))
2696                 goto out_stop;
2697
2698         inode->i_op = &ext4_dir_inode_operations;
2699         inode->i_fop = &ext4_dir_operations;
2700         err = ext4_init_new_dir(handle, dir, inode);
2701         if (err)
2702                 goto out_clear_inode;
2703         err = ext4_mark_inode_dirty(handle, inode);
2704         if (!err)
2705                 err = ext4_add_entry(handle, dentry, inode);
2706         if (err) {
2707 out_clear_inode:
2708                 clear_nlink(inode);
2709                 unlock_new_inode(inode);
2710                 ext4_mark_inode_dirty(handle, inode);
2711                 iput(inode);
2712                 goto out_stop;
2713         }
2714         ext4_inc_count(handle, dir);
2715         ext4_update_dx_flag(dir);
2716         err = ext4_mark_inode_dirty(handle, dir);
2717         if (err)
2718                 goto out_clear_inode;
2719         d_instantiate_new(dentry, inode);
2720         if (IS_DIRSYNC(dir))
2721                 ext4_handle_sync(handle);
2722
2723 out_stop:
2724         if (handle)
2725                 ext4_journal_stop(handle);
2726         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2727                 goto retry;
2728         return err;
2729 }
2730
2731 /*
2732  * routine to check that the specified directory is empty (for rmdir)
2733  */
2734 bool ext4_empty_dir(struct inode *inode)
2735 {
2736         unsigned int offset;
2737         struct buffer_head *bh;
2738         struct ext4_dir_entry_2 *de;
2739         struct super_block *sb;
2740
2741         if (ext4_has_inline_data(inode)) {
2742                 int has_inline_data = 1;
2743                 int ret;
2744
2745                 ret = empty_inline_dir(inode, &has_inline_data);
2746                 if (has_inline_data)
2747                         return ret;
2748         }
2749
2750         sb = inode->i_sb;
2751         if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
2752                 EXT4_ERROR_INODE(inode, "invalid size");
2753                 return true;
2754         }
2755         /* The first directory block must not be a hole,
2756          * so treat it as DIRENT_HTREE
2757          */
2758         bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
2759         if (IS_ERR(bh))
2760                 return true;
2761
2762         de = (struct ext4_dir_entry_2 *) bh->b_data;
2763         if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
2764                                  0) ||
2765             le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) {
2766                 ext4_warning_inode(inode, "directory missing '.'");
2767                 brelse(bh);
2768                 return true;
2769         }
2770         offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
2771         de = ext4_next_entry(de, sb->s_blocksize);
2772         if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
2773                                  offset) ||
2774             le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
2775                 ext4_warning_inode(inode, "directory missing '..'");
2776                 brelse(bh);
2777                 return true;
2778         }
2779         offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
2780         while (offset < inode->i_size) {
2781                 if (!(offset & (sb->s_blocksize - 1))) {
2782                         unsigned int lblock;
2783                         brelse(bh);
2784                         lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
2785                         bh = ext4_read_dirblock(inode, lblock, EITHER);
2786                         if (bh == NULL) {
2787                                 offset += sb->s_blocksize;
2788                                 continue;
2789                         }
2790                         if (IS_ERR(bh))
2791                                 return true;
2792                 }
2793                 de = (struct ext4_dir_entry_2 *) (bh->b_data +
2794                                         (offset & (sb->s_blocksize - 1)));
2795                 if (ext4_check_dir_entry(inode, NULL, de, bh,
2796                                          bh->b_data, bh->b_size, offset) ||
2797                     le32_to_cpu(de->inode)) {
2798                         brelse(bh);
2799                         return false;
2800                 }
2801                 offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
2802         }
2803         brelse(bh);
2804         return true;
2805 }
2806
2807 /*
2808  * ext4_orphan_add() links an unlinked or truncated inode into a list of
2809  * such inodes, starting at the superblock, in case we crash before the
2810  * file is closed/deleted, or in case the inode truncate spans multiple
2811  * transactions and the last transaction is not recovered after a crash.
2812  *
2813  * At filesystem recovery time, we walk this list deleting unlinked
2814  * inodes and truncating linked inodes in ext4_orphan_cleanup().
2815  *
2816  * Orphan list manipulation functions must be called under i_mutex unless
2817  * we are just creating the inode or deleting it.
2818  */
2819 int ext4_orphan_add(handle_t *handle, struct inode *inode)
2820 {
2821         struct super_block *sb = inode->i_sb;
2822         struct ext4_sb_info *sbi = EXT4_SB(sb);
2823         struct ext4_iloc iloc;
2824         int err = 0, rc;
2825         bool dirty = false;
2826
2827         if (!sbi->s_journal || is_bad_inode(inode))
2828                 return 0;
2829
2830         WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
2831                      !inode_is_locked(inode));
2832         /*
2833          * Exit early if inode already is on orphan list. This is a big speedup
2834          * since we don't have to contend on the global s_orphan_lock.
2835          */
2836         if (!list_empty(&EXT4_I(inode)->i_orphan))
2837                 return 0;
2838
2839         /*
2840          * Orphan handling is only valid for files with data blocks
2841          * being truncated, or files being unlinked. Note that we either
2842          * hold i_mutex, or the inode can not be referenced from outside,
2843          * so i_nlink should not be bumped due to race
2844          */
2845         J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2846                   S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
2847
2848         BUFFER_TRACE(sbi->s_sbh, "get_write_access");
2849         err = ext4_journal_get_write_access(handle, sbi->s_sbh);
2850         if (err)
2851                 goto out;
2852
2853         err = ext4_reserve_inode_write(handle, inode, &iloc);
2854         if (err)
2855                 goto out;
2856
2857         mutex_lock(&sbi->s_orphan_lock);
2858         /*
2859          * Due to previous errors inode may be already a part of on-disk
2860          * orphan list. If so skip on-disk list modification.
2861          */
2862         if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) >
2863             (le32_to_cpu(sbi->s_es->s_inodes_count))) {
2864                 /* Insert this inode at the head of the on-disk orphan list */
2865                 NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
2866                 sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
2867                 dirty = true;
2868         }
2869         list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
2870         mutex_unlock(&sbi->s_orphan_lock);
2871
2872         if (dirty) {
2873                 err = ext4_handle_dirty_super(handle, sb);
2874                 rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
2875                 if (!err)
2876                         err = rc;
2877                 if (err) {
2878                         /*
2879                          * We have to remove inode from in-memory list if
2880                          * addition to on disk orphan list failed. Stray orphan
2881                          * list entries can cause panics at unmount time.
2882                          */
2883                         mutex_lock(&sbi->s_orphan_lock);
2884                         list_del_init(&EXT4_I(inode)->i_orphan);
2885                         mutex_unlock(&sbi->s_orphan_lock);
2886                 }
2887         } else
2888                 brelse(iloc.bh);
2889
2890         jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
2891         jbd_debug(4, "orphan inode %lu will point to %d\n",
2892                         inode->i_ino, NEXT_ORPHAN(inode));
2893 out:
2894         ext4_std_error(sb, err);
2895         return err;
2896 }
2897
2898 /*
2899  * ext4_orphan_del() removes an unlinked or truncated inode from the list
2900  * of such inodes stored on disk, because it is finally being cleaned up.
2901  */
2902 int ext4_orphan_del(handle_t *handle, struct inode *inode)
2903 {
2904         struct list_head *prev;
2905         struct ext4_inode_info *ei = EXT4_I(inode);
2906         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2907         __u32 ino_next;
2908         struct ext4_iloc iloc;
2909         int err = 0;
2910
2911         if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
2912                 return 0;
2913
2914         WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
2915                      !inode_is_locked(inode));
2916         /* Do this quick check before taking global s_orphan_lock. */
2917         if (list_empty(&ei->i_orphan))
2918                 return 0;
2919
2920         if (handle) {
2921                 /* Grab inode buffer early before taking global s_orphan_lock */
2922                 err = ext4_reserve_inode_write(handle, inode, &iloc);
2923         }
2924
2925         mutex_lock(&sbi->s_orphan_lock);
2926         jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
2927
2928         prev = ei->i_orphan.prev;
2929         list_del_init(&ei->i_orphan);
2930
2931         /* If we're on an error path, we may not have a valid
2932          * transaction handle with which to update the orphan list on
2933          * disk, but we still need to remove the inode from the linked
2934          * list in memory. */
2935         if (!handle || err) {
2936                 mutex_unlock(&sbi->s_orphan_lock);
2937                 goto out_err;
2938         }
2939
2940         ino_next = NEXT_ORPHAN(inode);
2941         if (prev == &sbi->s_orphan) {
2942                 jbd_debug(4, "superblock will point to %u\n", ino_next);
2943                 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
2944                 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
2945                 if (err) {
2946                         mutex_unlock(&sbi->s_orphan_lock);
2947                         goto out_brelse;
2948                 }
2949                 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
2950                 mutex_unlock(&sbi->s_orphan_lock);
2951                 err = ext4_handle_dirty_super(handle, inode->i_sb);
2952         } else {
2953                 struct ext4_iloc iloc2;
2954                 struct inode *i_prev =
2955                         &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
2956
2957                 jbd_debug(4, "orphan inode %lu will point to %u\n",
2958                           i_prev->i_ino, ino_next);
2959                 err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
2960                 if (err) {
2961                         mutex_unlock(&sbi->s_orphan_lock);
2962                         goto out_brelse;
2963                 }
2964                 NEXT_ORPHAN(i_prev) = ino_next;
2965                 err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
2966                 mutex_unlock(&sbi->s_orphan_lock);
2967         }
2968         if (err)
2969                 goto out_brelse;
2970         NEXT_ORPHAN(inode) = 0;
2971         err = ext4_mark_iloc_dirty(handle, inode, &iloc);
2972 out_err:
2973         ext4_std_error(inode->i_sb, err);
2974         return err;
2975
2976 out_brelse:
2977         brelse(iloc.bh);
2978         goto out_err;
2979 }
2980
2981 static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2982 {
2983         int retval;
2984         struct inode *inode;
2985         struct buffer_head *bh;
2986         struct ext4_dir_entry_2 *de;
2987         handle_t *handle = NULL;
2988
2989         /* Initialize quotas before so that eventual writes go in
2990          * separate transaction */
2991         retval = dquot_initialize(dir);
2992         if (retval)
2993                 return retval;
2994         retval = dquot_initialize(d_inode(dentry));
2995         if (retval)
2996                 return retval;
2997
2998         retval = -ENOENT;
2999         bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
3000         if (IS_ERR(bh))
3001                 return PTR_ERR(bh);
3002         if (!bh)
3003                 goto end_rmdir;
3004
3005         inode = d_inode(dentry);
3006
3007         retval = -EFSCORRUPTED;
3008         if (le32_to_cpu(de->inode) != inode->i_ino)
3009                 goto end_rmdir;
3010
3011         retval = -ENOTEMPTY;
3012         if (!ext4_empty_dir(inode))
3013                 goto end_rmdir;
3014
3015         handle = ext4_journal_start(dir, EXT4_HT_DIR,
3016                                     EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
3017         if (IS_ERR(handle)) {
3018                 retval = PTR_ERR(handle);
3019                 handle = NULL;
3020                 goto end_rmdir;
3021         }
3022
3023         if (IS_DIRSYNC(dir))
3024                 ext4_handle_sync(handle);
3025
3026         retval = ext4_delete_entry(handle, dir, de, bh);
3027         if (retval)
3028                 goto end_rmdir;
3029         if (!EXT4_DIR_LINK_EMPTY(inode))
3030                 ext4_warning_inode(inode,
3031                              "empty directory '%.*s' has too many links (%u)",
3032                              dentry->d_name.len, dentry->d_name.name,
3033                              inode->i_nlink);
3034         inode->i_version++;
3035         clear_nlink(inode);
3036         /* There's no need to set i_disksize: the fact that i_nlink is
3037          * zero will ensure that the right thing happens during any
3038          * recovery. */
3039         inode->i_size = 0;
3040         ext4_orphan_add(handle, inode);
3041         inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
3042         ext4_mark_inode_dirty(handle, inode);
3043         ext4_dec_count(handle, dir);
3044         ext4_update_dx_flag(dir);
3045         ext4_mark_inode_dirty(handle, dir);
3046
3047 end_rmdir:
3048         brelse(bh);
3049         if (handle)
3050                 ext4_journal_stop(handle);
3051         return retval;
3052 }
3053
3054 static int ext4_unlink(struct inode *dir, struct dentry *dentry)
3055 {
3056         int retval;
3057         struct inode *inode;
3058         struct buffer_head *bh;
3059         struct ext4_dir_entry_2 *de;
3060         handle_t *handle = NULL;
3061
3062         trace_ext4_unlink_enter(dir, dentry);
3063         /* Initialize quotas before so that eventual writes go
3064          * in separate transaction */
3065         retval = dquot_initialize(dir);
3066         if (retval)
3067                 return retval;
3068         retval = dquot_initialize(d_inode(dentry));
3069         if (retval)
3070                 return retval;
3071
3072         retval = -ENOENT;
3073         bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
3074         if (IS_ERR(bh))
3075                 return PTR_ERR(bh);
3076         if (!bh)
3077                 goto end_unlink;
3078
3079         inode = d_inode(dentry);
3080
3081         retval = -EFSCORRUPTED;
3082         if (le32_to_cpu(de->inode) != inode->i_ino)
3083                 goto end_unlink;
3084
3085         handle = ext4_journal_start(dir, EXT4_HT_DIR,
3086                                     EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
3087         if (IS_ERR(handle)) {
3088                 retval = PTR_ERR(handle);
3089                 handle = NULL;
3090                 goto end_unlink;
3091         }
3092
3093         if (IS_DIRSYNC(dir))
3094                 ext4_handle_sync(handle);
3095
3096         retval = ext4_delete_entry(handle, dir, de, bh);
3097         if (retval)
3098                 goto end_unlink;
3099         dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
3100         ext4_update_dx_flag(dir);
3101         ext4_mark_inode_dirty(handle, dir);
3102         if (inode->i_nlink == 0)
3103                 ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
3104                                    dentry->d_name.len, dentry->d_name.name);
3105         else
3106                 drop_nlink(inode);
3107         if (!inode->i_nlink)
3108                 ext4_orphan_add(handle, inode);
3109         inode->i_ctime = ext4_current_time(inode);
3110         ext4_mark_inode_dirty(handle, inode);
3111
3112 end_unlink:
3113         brelse(bh);
3114         if (handle)
3115                 ext4_journal_stop(handle);
3116         trace_ext4_unlink_exit(dentry, retval);
3117         return retval;
3118 }
3119
3120 static int ext4_symlink(struct inode *dir,
3121                         struct dentry *dentry, const char *symname)
3122 {
3123         handle_t *handle;
3124         struct inode *inode;
3125         int err, len = strlen(symname);
3126         int credits;
3127         bool encryption_required;
3128         struct fscrypt_str disk_link;
3129         struct fscrypt_symlink_data *sd = NULL;
3130
3131         disk_link.len = len + 1;
3132         disk_link.name = (char *) symname;
3133
3134         encryption_required = (ext4_encrypted_inode(dir) ||
3135                                DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)));
3136         if (encryption_required) {
3137                 err = fscrypt_get_encryption_info(dir);
3138                 if (err)
3139                         return err;
3140                 if (!fscrypt_has_encryption_key(dir))
3141                         return -ENOKEY;
3142                 disk_link.len = (fscrypt_fname_encrypted_size(dir, len) +
3143                                  sizeof(struct fscrypt_symlink_data));
3144                 sd = kzalloc(disk_link.len, GFP_KERNEL);
3145                 if (!sd)
3146                         return -ENOMEM;
3147         }
3148
3149         if (disk_link.len > dir->i_sb->s_blocksize) {
3150                 err = -ENAMETOOLONG;
3151                 goto err_free_sd;
3152         }
3153
3154         err = dquot_initialize(dir);
3155         if (err)
3156                 goto err_free_sd;
3157
3158         if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
3159                 /*
3160                  * For non-fast symlinks, we just allocate inode and put it on
3161                  * orphan list in the first transaction => we need bitmap,
3162                  * group descriptor, sb, inode block, quota blocks, and
3163                  * possibly selinux xattr blocks.
3164                  */
3165                 credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
3166                           EXT4_XATTR_TRANS_BLOCKS;
3167         } else {
3168                 /*
3169                  * Fast symlink. We have to add entry to directory
3170                  * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS),
3171                  * allocate new inode (bitmap, group descriptor, inode block,
3172                  * quota blocks, sb is already counted in previous macros).
3173                  */
3174                 credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
3175                           EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
3176         }
3177
3178         inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
3179                                             &dentry->d_name, 0, NULL,
3180                                             EXT4_HT_DIR, credits);
3181         handle = ext4_journal_current_handle();
3182         if (IS_ERR(inode)) {
3183                 if (handle)
3184                         ext4_journal_stop(handle);
3185                 err = PTR_ERR(inode);
3186                 goto err_free_sd;
3187         }
3188
3189         if (encryption_required) {
3190                 struct qstr istr;
3191                 struct fscrypt_str ostr =
3192                         FSTR_INIT(sd->encrypted_path, disk_link.len);
3193
3194                 istr.name = (const unsigned char *) symname;
3195                 istr.len = len;
3196                 err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr);
3197                 if (err)
3198                         goto err_drop_inode;
3199                 sd->len = cpu_to_le16(ostr.len);
3200                 disk_link.name = (char *) sd;
3201                 inode->i_op = &ext4_encrypted_symlink_inode_operations;
3202         }
3203
3204         if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
3205                 if (!encryption_required)
3206                         inode->i_op = &ext4_symlink_inode_operations;
3207                 inode_nohighmem(inode);
3208                 ext4_set_aops(inode);
3209                 /*
3210                  * We cannot call page_symlink() with transaction started
3211                  * because it calls into ext4_write_begin() which can wait
3212                  * for transaction commit if we are running out of space
3213                  * and thus we deadlock. So we have to stop transaction now
3214                  * and restart it when symlink contents is written.
3215                  * 
3216                  * To keep fs consistent in case of crash, we have to put inode
3217                  * to orphan list in the mean time.
3218                  */
3219                 drop_nlink(inode);
3220                 err = ext4_orphan_add(handle, inode);
3221                 ext4_journal_stop(handle);
3222                 handle = NULL;
3223                 if (err)
3224                         goto err_drop_inode;
3225                 err = __page_symlink(inode, disk_link.name, disk_link.len, 1);
3226                 if (err)
3227                         goto err_drop_inode;
3228                 /*
3229                  * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
3230                  * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
3231                  */
3232                 handle = ext4_journal_start(dir, EXT4_HT_DIR,
3233                                 EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
3234                                 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
3235                 if (IS_ERR(handle)) {
3236                         err = PTR_ERR(handle);
3237                         handle = NULL;
3238                         goto err_drop_inode;
3239                 }
3240                 set_nlink(inode, 1);
3241                 err = ext4_orphan_del(handle, inode);
3242                 if (err)
3243                         goto err_drop_inode;
3244         } else {
3245                 /* clear the extent format for fast symlink */
3246                 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
3247                 if (!encryption_required) {
3248                         inode->i_op = &ext4_fast_symlink_inode_operations;
3249                         inode->i_link = (char *)&EXT4_I(inode)->i_data;
3250                 }
3251                 memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name,
3252                        disk_link.len);
3253                 inode->i_size = disk_link.len - 1;
3254         }
3255         EXT4_I(inode)->i_disksize = inode->i_size;
3256         err = ext4_add_nondir(handle, dentry, inode);
3257         if (!err && IS_DIRSYNC(dir))
3258                 ext4_handle_sync(handle);
3259
3260         if (handle)
3261                 ext4_journal_stop(handle);
3262         kfree(sd);
3263         return err;
3264 err_drop_inode:
3265         if (handle)
3266                 ext4_journal_stop(handle);
3267         clear_nlink(inode);
3268         unlock_new_inode(inode);
3269         iput(inode);
3270 err_free_sd:
3271         kfree(sd);
3272         return err;
3273 }
3274
3275 static int ext4_link(struct dentry *old_dentry,
3276                      struct inode *dir, struct dentry *dentry)
3277 {
3278         handle_t *handle;
3279         struct inode *inode = d_inode(old_dentry);
3280         int err, retries = 0;
3281
3282         if (inode->i_nlink >= EXT4_LINK_MAX)
3283                 return -EMLINK;
3284         if (ext4_encrypted_inode(dir) &&
3285                         !fscrypt_has_permitted_context(dir, inode))
3286                 return -EXDEV;
3287
3288        if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
3289            (!projid_eq(EXT4_I(dir)->i_projid,
3290                        EXT4_I(old_dentry->d_inode)->i_projid)))
3291                 return -EXDEV;
3292
3293         err = dquot_initialize(dir);
3294         if (err)
3295                 return err;
3296
3297 retry:
3298         handle = ext4_journal_start(dir, EXT4_HT_DIR,
3299                 (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
3300                  EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1);
3301         if (IS_ERR(handle))
3302                 return PTR_ERR(handle);
3303
3304         if (IS_DIRSYNC(dir))
3305                 ext4_handle_sync(handle);
3306
3307         inode->i_ctime = ext4_current_time(inode);
3308         ext4_inc_count(handle, inode);
3309         ihold(inode);
3310
3311         err = ext4_add_entry(handle, dentry, inode);
3312         if (!err) {
3313                 ext4_mark_inode_dirty(handle, inode);
3314                 /* this can happen only for tmpfile being
3315                  * linked the first time
3316                  */
3317                 if (inode->i_nlink == 1)
3318                         ext4_orphan_del(handle, inode);
3319                 d_instantiate(dentry, inode);
3320         } else {
3321                 drop_nlink(inode);
3322                 iput(inode);
3323         }
3324         ext4_journal_stop(handle);
3325         if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
3326                 goto retry;
3327         return err;
3328 }
3329
3330
3331 /*
3332  * Try to find buffer head where contains the parent block.
3333  * It should be the inode block if it is inlined or the 1st block
3334  * if it is a normal dir.
3335  */
3336 static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
3337                                         struct inode *inode,
3338                                         int *retval,
3339                                         struct ext4_dir_entry_2 **parent_de,
3340                                         int *inlined)
3341 {
3342         struct buffer_head *bh;
3343
3344         if (!ext4_has_inline_data(inode)) {
3345                 struct ext4_dir_entry_2 *de;
3346                 unsigned int offset;
3347
3348                 /* The first directory block must not be a hole, so
3349                  * treat it as DIRENT_HTREE
3350                  */
3351                 bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
3352                 if (IS_ERR(bh)) {
3353                         *retval = PTR_ERR(bh);
3354                         return NULL;
3355                 }
3356
3357                 de = (struct ext4_dir_entry_2 *) bh->b_data;
3358                 if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
3359                                          bh->b_size, 0) ||
3360                     le32_to_cpu(de->inode) != inode->i_ino ||
3361                     strcmp(".", de->name)) {
3362                         EXT4_ERROR_INODE(inode, "directory missing '.'");
3363                         brelse(bh);
3364                         *retval = -EFSCORRUPTED;
3365                         return NULL;
3366                 }
3367                 offset = ext4_rec_len_from_disk(de->rec_len,
3368                                                 inode->i_sb->s_blocksize);
3369                 de = ext4_next_entry(de, inode->i_sb->s_blocksize);
3370                 if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
3371                                          bh->b_size, offset) ||
3372                     le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
3373                         EXT4_ERROR_INODE(inode, "directory missing '..'");
3374                         brelse(bh);
3375                         *retval = -EFSCORRUPTED;
3376                         return NULL;
3377                 }
3378                 *parent_de = de;
3379
3380                 return bh;
3381         }
3382
3383         *inlined = 1;
3384         return ext4_get_first_inline_block(inode, parent_de, retval);
3385 }
3386
3387 struct ext4_renament {
3388         struct inode *dir;
3389         struct dentry *dentry;
3390         struct inode *inode;
3391         bool is_dir;
3392         int dir_nlink_delta;
3393
3394         /* entry for "dentry" */
3395         struct buffer_head *bh;
3396         struct ext4_dir_entry_2 *de;
3397         int inlined;
3398
3399         /* entry for ".." in inode if it's a directory */
3400         struct buffer_head *dir_bh;
3401         struct ext4_dir_entry_2 *parent_de;
3402         int dir_inlined;
3403 };
3404
3405 static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent)
3406 {
3407         int retval;
3408
3409         ent->dir_bh = ext4_get_first_dir_block(handle, ent->inode,
3410                                               &retval, &ent->parent_de,
3411                                               &ent->dir_inlined);
3412         if (!ent->dir_bh)
3413                 return retval;
3414         if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino)
3415                 return -EFSCORRUPTED;
3416         BUFFER_TRACE(ent->dir_bh, "get_write_access");
3417         return ext4_journal_get_write_access(handle, ent->dir_bh);
3418 }
3419
3420 static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
3421                                   unsigned dir_ino)
3422 {
3423         int retval;
3424
3425         ent->parent_de->inode = cpu_to_le32(dir_ino);
3426         BUFFER_TRACE(ent->dir_bh, "call ext4_handle_dirty_metadata");
3427         if (!ent->dir_inlined) {
3428                 if (is_dx(ent->inode)) {
3429                         retval = ext4_handle_dirty_dx_node(handle,
3430                                                            ent->inode,
3431                                                            ent->dir_bh);
3432                 } else {
3433                         retval = ext4_handle_dirty_dirent_node(handle,
3434                                                                ent->inode,
3435                                                                ent->dir_bh);
3436                 }
3437         } else {
3438                 retval = ext4_mark_inode_dirty(handle, ent->inode);
3439         }
3440         if (retval) {
3441                 ext4_std_error(ent->dir->i_sb, retval);
3442                 return retval;
3443         }
3444         return 0;
3445 }
3446
3447 static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
3448                        unsigned ino, unsigned file_type)
3449 {
3450         int retval;
3451
3452         BUFFER_TRACE(ent->bh, "get write access");
3453         retval = ext4_journal_get_write_access(handle, ent->bh);
3454         if (retval)
3455                 return retval;
3456         ent->de->inode = cpu_to_le32(ino);
3457         if (ext4_has_feature_filetype(ent->dir->i_sb))
3458                 ent->de->file_type = file_type;
3459         ent->dir->i_version++;
3460         ent->dir->i_ctime = ent->dir->i_mtime =
3461                 ext4_current_time(ent->dir);
3462         ext4_mark_inode_dirty(handle, ent->dir);
3463         BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
3464         if (!ent->inlined) {
3465                 retval = ext4_handle_dirty_dirent_node(handle,
3466                                                        ent->dir, ent->bh);
3467                 if (unlikely(retval)) {
3468                         ext4_std_error(ent->dir->i_sb, retval);
3469                         return retval;
3470                 }
3471         }
3472
3473         return 0;
3474 }
3475
3476 static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
3477                           unsigned ino, unsigned file_type)
3478 {
3479         struct ext4_renament old = *ent;
3480         int retval = 0;
3481
3482         /*
3483          * old->de could have moved from under us during make indexed dir,
3484          * so the old->de may no longer valid and need to find it again
3485          * before reset old inode info.
3486          */
3487         old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
3488         if (IS_ERR(old.bh))
3489                 retval = PTR_ERR(old.bh);
3490         if (!old.bh)
3491                 retval = -ENOENT;
3492         if (retval) {
3493                 ext4_std_error(old.dir->i_sb, retval);
3494                 return;
3495         }
3496
3497         ext4_setent(handle, &old, ino, file_type);
3498         brelse(old.bh);
3499 }
3500
3501 static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
3502                                   const struct qstr *d_name)
3503 {
3504         int retval = -ENOENT;
3505         struct buffer_head *bh;
3506         struct ext4_dir_entry_2 *de;
3507
3508         bh = ext4_find_entry(dir, d_name, &de, NULL);
3509         if (IS_ERR(bh))
3510                 return PTR_ERR(bh);
3511         if (bh) {
3512                 retval = ext4_delete_entry(handle, dir, de, bh);
3513                 brelse(bh);
3514         }
3515         return retval;
3516 }
3517
3518 static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
3519                                int force_reread)
3520 {
3521         int retval;
3522         /*
3523          * ent->de could have moved from under us during htree split, so make
3524          * sure that we are deleting the right entry.  We might also be pointing
3525          * to a stale entry in the unused part of ent->bh so just checking inum
3526          * and the name isn't enough.
3527          */
3528         if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino ||
3529             ent->de->name_len != ent->dentry->d_name.len ||
3530             strncmp(ent->de->name, ent->dentry->d_name.name,
3531                     ent->de->name_len) ||
3532             force_reread) {
3533                 retval = ext4_find_delete_entry(handle, ent->dir,
3534                                                 &ent->dentry->d_name);
3535         } else {
3536                 retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh);
3537                 if (retval == -ENOENT) {
3538                         retval = ext4_find_delete_entry(handle, ent->dir,
3539                                                         &ent->dentry->d_name);
3540                 }
3541         }
3542
3543         if (retval) {
3544                 ext4_warning_inode(ent->dir,
3545                                    "Deleting old file: nlink %d, error=%d",
3546                                    ent->dir->i_nlink, retval);
3547         }
3548 }
3549
3550 static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
3551 {
3552         if (ent->dir_nlink_delta) {
3553                 if (ent->dir_nlink_delta == -1)
3554                         ext4_dec_count(handle, ent->dir);
3555                 else
3556                         ext4_inc_count(handle, ent->dir);
3557                 ext4_mark_inode_dirty(handle, ent->dir);
3558         }
3559 }
3560
3561 static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent,
3562                                               int credits, handle_t **h)
3563 {
3564         struct inode *wh;
3565         handle_t *handle;
3566         int retries = 0;
3567
3568         /*
3569          * for inode block, sb block, group summaries,
3570          * and inode bitmap
3571          */
3572         credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) +
3573                     EXT4_XATTR_TRANS_BLOCKS + 4);
3574 retry:
3575         wh = ext4_new_inode_start_handle(ent->dir, S_IFCHR | WHITEOUT_MODE,
3576                                          &ent->dentry->d_name, 0, NULL,
3577                                          EXT4_HT_DIR, credits);
3578
3579         handle = ext4_journal_current_handle();
3580         if (IS_ERR(wh)) {
3581                 if (handle)
3582                         ext4_journal_stop(handle);
3583                 if (PTR_ERR(wh) == -ENOSPC &&
3584                     ext4_should_retry_alloc(ent->dir->i_sb, &retries))
3585                         goto retry;
3586         } else {
3587                 *h = handle;
3588                 init_special_inode(wh, wh->i_mode, WHITEOUT_DEV);
3589                 wh->i_op = &ext4_special_inode_operations;
3590         }
3591         return wh;
3592 }
3593
3594 /*
3595  * Anybody can rename anything with this: the permission checks are left to the
3596  * higher-level routines.
3597  *
3598  * n.b.  old_{dentry,inode) refers to the source dentry/inode
3599  * while new_{dentry,inode) refers to the destination dentry/inode
3600  * This comes from rename(const char *oldpath, const char *newpath)
3601  */
3602 static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3603                        struct inode *new_dir, struct dentry *new_dentry,
3604                        unsigned int flags)
3605 {
3606         handle_t *handle = NULL;
3607         struct ext4_renament old = {
3608                 .dir = old_dir,
3609                 .dentry = old_dentry,
3610                 .inode = d_inode(old_dentry),
3611         };
3612         struct ext4_renament new = {
3613                 .dir = new_dir,
3614                 .dentry = new_dentry,
3615                 .inode = d_inode(new_dentry),
3616         };
3617         int force_reread;
3618         int retval;
3619         struct inode *whiteout = NULL;
3620         int credits;
3621         u8 old_file_type;
3622
3623         if (new.inode && new.inode->i_nlink == 0) {
3624                 EXT4_ERROR_INODE(new.inode,
3625                                  "target of rename is already freed");
3626                 return -EFSCORRUPTED;
3627         }
3628
3629         if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
3630             (!projid_eq(EXT4_I(new_dir)->i_projid,
3631                         EXT4_I(old_dentry->d_inode)->i_projid)))
3632                 return -EXDEV;
3633
3634         if ((ext4_encrypted_inode(old_dir) &&
3635              !fscrypt_has_encryption_key(old_dir)) ||
3636             (ext4_encrypted_inode(new_dir) &&
3637              !fscrypt_has_encryption_key(new_dir)))
3638                 return -ENOKEY;
3639
3640         retval = dquot_initialize(old.dir);
3641         if (retval)
3642                 return retval;
3643         retval = dquot_initialize(new.dir);
3644         if (retval)
3645                 return retval;
3646
3647         /* Initialize quotas before so that eventual writes go
3648          * in separate transaction */
3649         if (new.inode) {
3650                 retval = dquot_initialize(new.inode);
3651                 if (retval)
3652                         return retval;
3653         }
3654
3655         old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
3656         if (IS_ERR(old.bh))
3657                 return PTR_ERR(old.bh);
3658         /*
3659          *  Check for inode number is _not_ due to possible IO errors.
3660          *  We might rmdir the source, keep it as pwd of some process
3661          *  and merrily kill the link to whatever was created under the
3662          *  same name. Goodbye sticky bit ;-<
3663          */
3664         retval = -ENOENT;
3665         if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
3666                 goto release_bh;
3667
3668         if ((old.dir != new.dir) &&
3669             ext4_encrypted_inode(new.dir) &&
3670             !fscrypt_has_permitted_context(new.dir, old.inode)) {
3671                 retval = -EXDEV;
3672                 goto release_bh;
3673         }
3674
3675         new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3676                                  &new.de, &new.inlined);
3677         if (IS_ERR(new.bh)) {
3678                 retval = PTR_ERR(new.bh);
3679                 new.bh = NULL;
3680                 goto release_bh;
3681         }
3682         if (new.bh) {
3683                 if (!new.inode) {
3684                         brelse(new.bh);
3685                         new.bh = NULL;
3686                 }
3687         }
3688         if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC))
3689                 ext4_alloc_da_blocks(old.inode);
3690
3691         credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
3692                    EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
3693         if (!(flags & RENAME_WHITEOUT)) {
3694                 handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
3695                 if (IS_ERR(handle)) {
3696                         retval = PTR_ERR(handle);
3697                         goto release_bh;
3698                 }
3699         } else {
3700                 whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
3701                 if (IS_ERR(whiteout)) {
3702                         retval = PTR_ERR(whiteout);
3703                         goto release_bh;
3704                 }
3705         }
3706
3707         old_file_type = old.de->file_type;
3708         if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
3709                 ext4_handle_sync(handle);
3710
3711         if (S_ISDIR(old.inode->i_mode)) {
3712                 if (new.inode) {
3713                         retval = -ENOTEMPTY;
3714                         if (!ext4_empty_dir(new.inode))
3715                                 goto end_rename;
3716                 } else {
3717                         retval = -EMLINK;
3718                         if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
3719                                 goto end_rename;
3720                 }
3721                 retval = ext4_rename_dir_prepare(handle, &old);
3722                 if (retval)
3723                         goto end_rename;
3724         }
3725         /*
3726          * If we're renaming a file within an inline_data dir and adding or
3727          * setting the new dirent causes a conversion from inline_data to
3728          * extents/blockmap, we need to force the dirent delete code to
3729          * re-read the directory, or else we end up trying to delete a dirent
3730          * from what is now the extent tree root (or a block map).
3731          */
3732         force_reread = (new.dir->i_ino == old.dir->i_ino &&
3733                         ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
3734
3735         if (whiteout) {
3736                 /*
3737                  * Do this before adding a new entry, so the old entry is sure
3738                  * to be still pointing to the valid old entry.
3739                  */
3740                 retval = ext4_setent(handle, &old, whiteout->i_ino,
3741                                      EXT4_FT_CHRDEV);
3742                 if (retval)
3743                         goto end_rename;
3744                 ext4_mark_inode_dirty(handle, whiteout);
3745         }
3746         if (!new.bh) {
3747                 retval = ext4_add_entry(handle, new.dentry, old.inode);
3748                 if (retval)
3749                         goto end_rename;
3750         } else {
3751                 retval = ext4_setent(handle, &new,
3752                                      old.inode->i_ino, old_file_type);
3753                 if (retval)
3754                         goto end_rename;
3755         }
3756         if (force_reread)
3757                 force_reread = !ext4_test_inode_flag(new.dir,
3758                                                      EXT4_INODE_INLINE_DATA);
3759
3760         /*
3761          * Like most other Unix systems, set the ctime for inodes on a
3762          * rename.
3763          */
3764         old.inode->i_ctime = ext4_current_time(old.inode);
3765         ext4_mark_inode_dirty(handle, old.inode);
3766
3767         if (!whiteout) {
3768                 /*
3769                  * ok, that's it
3770                  */
3771                 ext4_rename_delete(handle, &old, force_reread);
3772         }
3773
3774         if (new.inode) {
3775                 ext4_dec_count(handle, new.inode);
3776                 new.inode->i_ctime = ext4_current_time(new.inode);
3777         }
3778         old.dir->i_ctime = old.dir->i_mtime = ext4_current_time(old.dir);
3779         ext4_update_dx_flag(old.dir);
3780         if (old.dir_bh) {
3781                 retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
3782                 if (retval)
3783                         goto end_rename;
3784
3785                 ext4_dec_count(handle, old.dir);
3786                 if (new.inode) {
3787                         /* checked ext4_empty_dir above, can't have another
3788                          * parent, ext4_dec_count() won't work for many-linked
3789                          * dirs */
3790                         clear_nlink(new.inode);
3791                 } else {
3792                         ext4_inc_count(handle, new.dir);
3793                         ext4_update_dx_flag(new.dir);
3794                         ext4_mark_inode_dirty(handle, new.dir);
3795                 }
3796         }
3797         ext4_mark_inode_dirty(handle, old.dir);
3798         if (new.inode) {
3799                 ext4_mark_inode_dirty(handle, new.inode);
3800                 if (!new.inode->i_nlink)
3801                         ext4_orphan_add(handle, new.inode);
3802         }
3803         retval = 0;
3804
3805 end_rename:
3806         if (whiteout) {
3807                 if (retval) {
3808                         ext4_resetent(handle, &old,
3809                                       old.inode->i_ino, old_file_type);
3810                         drop_nlink(whiteout);
3811                         ext4_orphan_add(handle, whiteout);
3812                 }
3813                 unlock_new_inode(whiteout);
3814                 ext4_journal_stop(handle);
3815                 iput(whiteout);
3816         } else {
3817                 ext4_journal_stop(handle);
3818         }
3819 release_bh:
3820         brelse(old.dir_bh);
3821         brelse(old.bh);
3822         brelse(new.bh);
3823         return retval;
3824 }
3825
3826 static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
3827                              struct inode *new_dir, struct dentry *new_dentry)
3828 {
3829         handle_t *handle = NULL;
3830         struct ext4_renament old = {
3831                 .dir = old_dir,
3832                 .dentry = old_dentry,
3833                 .inode = d_inode(old_dentry),
3834         };
3835         struct ext4_renament new = {
3836                 .dir = new_dir,
3837                 .dentry = new_dentry,
3838                 .inode = d_inode(new_dentry),
3839         };
3840         u8 new_file_type;
3841         int retval;
3842
3843         if ((ext4_encrypted_inode(old_dir) &&
3844              !fscrypt_has_encryption_key(old_dir)) ||
3845             (ext4_encrypted_inode(new_dir) &&
3846              !fscrypt_has_encryption_key(new_dir)))
3847                 return -ENOKEY;
3848
3849         if ((ext4_encrypted_inode(old_dir) ||
3850              ext4_encrypted_inode(new_dir)) &&
3851             (old_dir != new_dir) &&
3852             (!fscrypt_has_permitted_context(new_dir, old.inode) ||
3853              !fscrypt_has_permitted_context(old_dir, new.inode)))
3854                 return -EXDEV;
3855
3856         if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
3857              !projid_eq(EXT4_I(new_dir)->i_projid,
3858                         EXT4_I(old_dentry->d_inode)->i_projid)) ||
3859             (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
3860              !projid_eq(EXT4_I(old_dir)->i_projid,
3861                         EXT4_I(new_dentry->d_inode)->i_projid)))
3862                 return -EXDEV;
3863
3864         retval = dquot_initialize(old.dir);
3865         if (retval)
3866                 return retval;
3867         retval = dquot_initialize(old.inode);
3868         if (retval)
3869                 return retval;
3870         retval = dquot_initialize(new.dir);
3871         if (retval)
3872                 return retval;
3873
3874         old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
3875                                  &old.de, &old.inlined);
3876         if (IS_ERR(old.bh))
3877                 return PTR_ERR(old.bh);
3878         /*
3879          *  Check for inode number is _not_ due to possible IO errors.
3880          *  We might rmdir the source, keep it as pwd of some process
3881          *  and merrily kill the link to whatever was created under the
3882          *  same name. Goodbye sticky bit ;-<
3883          */
3884         retval = -ENOENT;
3885         if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
3886                 goto end_rename;
3887
3888         new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3889                                  &new.de, &new.inlined);
3890         if (IS_ERR(new.bh)) {
3891                 retval = PTR_ERR(new.bh);
3892                 new.bh = NULL;
3893                 goto end_rename;
3894         }
3895
3896         /* RENAME_EXCHANGE case: old *and* new must both exist */
3897         if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
3898                 goto end_rename;
3899
3900         handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
3901                 (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
3902                  2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
3903         if (IS_ERR(handle)) {
3904                 retval = PTR_ERR(handle);
3905                 handle = NULL;
3906                 goto end_rename;
3907         }
3908
3909         if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
3910                 ext4_handle_sync(handle);
3911
3912         if (S_ISDIR(old.inode->i_mode)) {
3913                 old.is_dir = true;
3914                 retval = ext4_rename_dir_prepare(handle, &old);
3915                 if (retval)
3916                         goto end_rename;
3917         }
3918         if (S_ISDIR(new.inode->i_mode)) {
3919                 new.is_dir = true;
3920                 retval = ext4_rename_dir_prepare(handle, &new);
3921                 if (retval)
3922                         goto end_rename;
3923         }
3924
3925         /*
3926          * Other than the special case of overwriting a directory, parents'
3927          * nlink only needs to be modified if this is a cross directory rename.
3928          */
3929         if (old.dir != new.dir && old.is_dir != new.is_dir) {
3930                 old.dir_nlink_delta = old.is_dir ? -1 : 1;
3931                 new.dir_nlink_delta = -old.dir_nlink_delta;
3932                 retval = -EMLINK;
3933                 if ((old.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(old.dir)) ||
3934                     (new.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(new.dir)))
3935                         goto end_rename;
3936         }
3937
3938         new_file_type = new.de->file_type;
3939         retval = ext4_setent(handle, &new, old.inode->i_ino, old.de->file_type);
3940         if (retval)
3941                 goto end_rename;
3942
3943         retval = ext4_setent(handle, &old, new.inode->i_ino, new_file_type);
3944         if (retval)
3945                 goto end_rename;
3946
3947         /*
3948          * Like most other Unix systems, set the ctime for inodes on a
3949          * rename.
3950          */
3951         old.inode->i_ctime = ext4_current_time(old.inode);
3952         new.inode->i_ctime = ext4_current_time(new.inode);
3953         ext4_mark_inode_dirty(handle, old.inode);
3954         ext4_mark_inode_dirty(handle, new.inode);
3955
3956         if (old.dir_bh) {
3957                 retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
3958                 if (retval)
3959                         goto end_rename;
3960         }
3961         if (new.dir_bh) {
3962                 retval = ext4_rename_dir_finish(handle, &new, old.dir->i_ino);
3963                 if (retval)
3964                         goto end_rename;
3965         }
3966         ext4_update_dir_count(handle, &old);
3967         ext4_update_dir_count(handle, &new);
3968         retval = 0;
3969
3970 end_rename:
3971         brelse(old.dir_bh);
3972         brelse(new.dir_bh);
3973         brelse(old.bh);
3974         brelse(new.bh);
3975         if (handle)
3976                 ext4_journal_stop(handle);
3977         return retval;
3978 }
3979
3980 static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
3981                         struct inode *new_dir, struct dentry *new_dentry,
3982                         unsigned int flags)
3983 {
3984         if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
3985                 return -EINVAL;
3986
3987         if (flags & RENAME_EXCHANGE) {
3988                 return ext4_cross_rename(old_dir, old_dentry,
3989                                          new_dir, new_dentry);
3990         }
3991
3992         return ext4_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
3993 }
3994
3995 /*
3996  * directories can handle most operations...
3997  */
3998 const struct inode_operations ext4_dir_inode_operations = {
3999         .create         = ext4_create,
4000         .lookup         = ext4_lookup,
4001         .link           = ext4_link,
4002         .unlink         = ext4_unlink,
4003         .symlink        = ext4_symlink,
4004         .mkdir          = ext4_mkdir,
4005         .rmdir          = ext4_rmdir,
4006         .mknod          = ext4_mknod,
4007         .tmpfile        = ext4_tmpfile,
4008         .rename         = ext4_rename2,
4009         .setattr        = ext4_setattr,
4010         .listxattr      = ext4_listxattr,
4011         .get_acl        = ext4_get_acl,
4012         .set_acl        = ext4_set_acl,
4013         .fiemap         = ext4_fiemap,
4014 };
4015
4016 const struct inode_operations ext4_special_inode_operations = {
4017         .setattr        = ext4_setattr,
4018         .listxattr      = ext4_listxattr,
4019         .get_acl        = ext4_get_acl,
4020         .set_acl        = ext4_set_acl,
4021 };