1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
18 #include "xfs_inode.h"
19 #include "xfs_inode_fork.h"
20 #include "xfs_alloc.h"
21 #include "xfs_rtalloc.h"
23 #include "xfs_bmap_util.h"
24 #include "xfs_bmap_btree.h"
26 #include "xfs_rmap_btree.h"
27 #include "xfs_refcount.h"
28 #include "scrub/xfs_scrub.h"
29 #include "scrub/scrub.h"
30 #include "scrub/common.h"
31 #include "scrub/btree.h"
32 #include "scrub/trace.h"
34 /* Set us up with an inode's bmap. */
36 xchk_setup_inode_bmap(
42 error = xchk_get_inode(sc, ip);
46 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
47 xfs_ilock(sc->ip, sc->ilock_flags);
50 * We don't want any ephemeral data fork updates sitting around
51 * while we inspect block mappings, so wait for directio to finish
52 * and flush dirty data if we have delalloc reservations.
54 if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
55 sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
56 struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
58 inode_dio_wait(VFS_I(sc->ip));
61 * Try to flush all incore state to disk before we examine the
62 * space mappings for the data fork. Leave accumulated errors
63 * in the mapping for the writer threads to consume.
65 * On ENOSPC or EIO writeback errors, we continue into the
66 * extent mapping checks because write failures do not
67 * necessarily imply anything about the correctness of the file
68 * metadata. The metadata and the file data could be on
69 * completely separate devices; a media failure might only
70 * affect a subset of the disk, etc. We can handle delalloc
71 * extents in the scrubber, so leaving them in memory is fine.
73 error = filemap_fdatawrite(mapping);
75 error = filemap_fdatawait_keep_errors(mapping);
76 if (error && (error != -ENOSPC && error != -EIO))
80 /* Got the inode, lock it and we're ready to go. */
81 error = xchk_trans_alloc(sc, 0);
84 sc->ilock_flags |= XFS_ILOCK_EXCL;
85 xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
88 /* scrub teardown will unlock and release the inode */
93 * Inode fork block mapping (BMBT) scrubber.
94 * More complex than the others because we have to scrub
95 * all the extents regardless of whether or not the fork
99 struct xchk_bmap_info {
100 struct xfs_scrub *sc;
101 xfs_fileoff_t lastoff;
107 /* Look for a corresponding rmap for this irec. */
110 struct xchk_bmap_info *info,
111 struct xfs_bmbt_irec *irec,
114 struct xfs_rmap_irec *rmap)
116 xfs_fileoff_t offset;
117 unsigned int rflags = 0;
121 if (info->whichfork == XFS_ATTR_FORK)
122 rflags |= XFS_RMAP_ATTR_FORK;
123 if (irec->br_state == XFS_EXT_UNWRITTEN)
124 rflags |= XFS_RMAP_UNWRITTEN;
127 * CoW staging extents are owned (on disk) by the refcountbt, so
128 * their rmaps do not have offsets.
130 if (info->whichfork == XFS_COW_FORK)
133 offset = irec->br_startoff;
136 * If the caller thinks this could be a shared bmbt extent (IOWs,
137 * any data fork extent of a reflink inode) then we have to use the
138 * range rmap lookup to make sure we get the correct owner/offset.
140 if (info->is_shared) {
141 error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
142 owner, offset, rflags, rmap, &has_rmap);
143 if (!xchk_should_check_xref(info->sc, &error,
144 &info->sc->sa.rmap_cur))
150 * Otherwise, use the (faster) regular lookup.
152 error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner,
153 offset, rflags, &has_rmap);
154 if (!xchk_should_check_xref(info->sc, &error,
155 &info->sc->sa.rmap_cur))
160 error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap);
161 if (!xchk_should_check_xref(info->sc, &error,
162 &info->sc->sa.rmap_cur))
167 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
172 /* Make sure that we have rmapbt records for this extent. */
175 struct xchk_bmap_info *info,
176 struct xfs_bmbt_irec *irec,
179 struct xfs_rmap_irec rmap;
180 unsigned long long rmap_end;
183 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
186 if (info->whichfork == XFS_COW_FORK)
187 owner = XFS_RMAP_OWN_COW;
189 owner = info->sc->ip->i_ino;
191 /* Find the rmap record for this irec. */
192 if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
195 /* Check the rmap. */
196 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
197 if (rmap.rm_startblock > agbno ||
198 agbno + irec->br_blockcount > rmap_end)
199 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
203 * Check the logical offsets if applicable. CoW staging extents
204 * don't track logical offsets since the mappings only exist in
207 if (info->whichfork != XFS_COW_FORK) {
208 rmap_end = (unsigned long long)rmap.rm_offset +
210 if (rmap.rm_offset > irec->br_startoff ||
211 irec->br_startoff + irec->br_blockcount > rmap_end)
212 xchk_fblock_xref_set_corrupt(info->sc,
213 info->whichfork, irec->br_startoff);
216 if (rmap.rm_owner != owner)
217 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
221 * Check for discrepancies between the unwritten flag in the irec and
222 * the rmap. Note that the (in-memory) CoW fork distinguishes between
223 * unwritten and written extents, but we don't track that in the rmap
224 * records because the blocks are owned (on-disk) by the refcountbt,
225 * which doesn't track unwritten state.
227 if (owner != XFS_RMAP_OWN_COW &&
228 !!(irec->br_state == XFS_EXT_UNWRITTEN) !=
229 !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
230 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
233 if (!!(info->whichfork == XFS_ATTR_FORK) !=
234 !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
235 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
237 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
238 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
242 /* Cross-reference a single rtdev extent record. */
244 xchk_bmap_rt_extent_xref(
245 struct xchk_bmap_info *info,
246 struct xfs_inode *ip,
247 struct xfs_btree_cur *cur,
248 struct xfs_bmbt_irec *irec)
250 if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
253 xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
254 irec->br_blockcount);
257 /* Cross-reference a single datadev extent record. */
259 xchk_bmap_extent_xref(
260 struct xchk_bmap_info *info,
261 struct xfs_inode *ip,
262 struct xfs_btree_cur *cur,
263 struct xfs_bmbt_irec *irec)
265 struct xfs_mount *mp = info->sc->mp;
271 if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
274 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
275 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
276 len = irec->br_blockcount;
278 error = xchk_ag_init(info->sc, agno, &info->sc->sa);
279 if (!xchk_fblock_process_error(info->sc, info->whichfork,
280 irec->br_startoff, &error))
283 xchk_xref_is_used_space(info->sc, agbno, len);
284 xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
285 xchk_bmap_xref_rmap(info, irec, agbno);
286 switch (info->whichfork) {
288 if (xfs_is_reflink_inode(info->sc->ip))
292 xchk_xref_is_not_shared(info->sc, agbno,
293 irec->br_blockcount);
296 xchk_xref_is_cow_staging(info->sc, agbno,
297 irec->br_blockcount);
301 xchk_ag_free(info->sc, &info->sc->sa);
304 /* Scrub a single extent record. */
307 struct xfs_inode *ip,
308 struct xfs_btree_cur *cur,
309 struct xchk_bmap_info *info,
310 struct xfs_bmbt_irec *irec)
312 struct xfs_mount *mp = info->sc->mp;
313 struct xfs_buf *bp = NULL;
318 xfs_btree_get_block(cur, 0, &bp);
321 * Check for out-of-order extents. This record could have come
322 * from the incore list, for which there is no ordering check.
324 if (irec->br_startoff < info->lastoff)
325 xchk_fblock_set_corrupt(info->sc, info->whichfork,
328 /* There should never be a "hole" extent in either extent list. */
329 if (irec->br_startblock == HOLESTARTBLOCK)
330 xchk_fblock_set_corrupt(info->sc, info->whichfork,
334 * Check for delalloc extents. We never iterate the ones in the
335 * in-core extent scan, and we should never see these in the bmbt.
337 if (isnullstartblock(irec->br_startblock))
338 xchk_fblock_set_corrupt(info->sc, info->whichfork,
341 /* Make sure the extent points to a valid place. */
342 if (irec->br_blockcount > MAXEXTLEN)
343 xchk_fblock_set_corrupt(info->sc, info->whichfork,
345 if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock)
346 xchk_fblock_set_corrupt(info->sc, info->whichfork,
348 end = irec->br_startblock + irec->br_blockcount - 1;
350 (!xfs_verify_rtbno(mp, irec->br_startblock) ||
351 !xfs_verify_rtbno(mp, end)))
352 xchk_fblock_set_corrupt(info->sc, info->whichfork,
355 (!xfs_verify_fsbno(mp, irec->br_startblock) ||
356 !xfs_verify_fsbno(mp, end) ||
357 XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
358 XFS_FSB_TO_AGNO(mp, end)))
359 xchk_fblock_set_corrupt(info->sc, info->whichfork,
362 /* We don't allow unwritten extents on attr forks. */
363 if (irec->br_state == XFS_EXT_UNWRITTEN &&
364 info->whichfork == XFS_ATTR_FORK)
365 xchk_fblock_set_corrupt(info->sc, info->whichfork,
369 xchk_bmap_rt_extent_xref(info, ip, cur, irec);
371 xchk_bmap_extent_xref(info, ip, cur, irec);
373 info->lastoff = irec->br_startoff + irec->br_blockcount;
377 /* Scrub a bmbt record. */
380 struct xchk_btree *bs,
381 union xfs_btree_rec *rec)
383 struct xfs_bmbt_irec irec;
384 struct xchk_bmap_info *info = bs->private;
385 struct xfs_inode *ip = bs->cur->bc_private.b.ip;
386 struct xfs_buf *bp = NULL;
387 struct xfs_btree_block *block;
392 * Check the owners of the btree blocks up to the level below
393 * the root since the verifiers don't do that.
395 if (xfs_sb_version_hascrc(&bs->cur->bc_mp->m_sb) &&
396 bs->cur->bc_ptrs[0] == 1) {
397 for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
398 block = xfs_btree_get_block(bs->cur, i, &bp);
399 owner = be64_to_cpu(block->bb_u.l.bb_owner);
400 if (owner != ip->i_ino)
401 xchk_fblock_set_corrupt(bs->sc,
406 /* Set up the in-core record and scrub it. */
407 xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
408 return xchk_bmap_extent(ip, bs->cur, info, &irec);
411 /* Scan the btree records. */
414 struct xfs_scrub *sc,
416 struct xchk_bmap_info *info)
418 struct xfs_owner_info oinfo;
419 struct xfs_mount *mp = sc->mp;
420 struct xfs_inode *ip = sc->ip;
421 struct xfs_btree_cur *cur;
424 cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
425 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
426 error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
427 xfs_btree_del_cursor(cur, error);
431 struct xchk_bmap_check_rmap_info {
432 struct xfs_scrub *sc;
434 struct xfs_iext_cursor icur;
437 /* Can we find bmaps that fit this rmap? */
439 xchk_bmap_check_rmap(
440 struct xfs_btree_cur *cur,
441 struct xfs_rmap_irec *rec,
444 struct xfs_bmbt_irec irec;
445 struct xchk_bmap_check_rmap_info *sbcri = priv;
446 struct xfs_ifork *ifp;
447 struct xfs_scrub *sc = sbcri->sc;
450 /* Is this even the right fork? */
451 if (rec->rm_owner != sc->ip->i_ino)
453 if ((sbcri->whichfork == XFS_ATTR_FORK) ^
454 !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
456 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
459 /* Now look up the bmbt record. */
460 ifp = XFS_IFORK_PTR(sc->ip, sbcri->whichfork);
462 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
466 have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
467 &sbcri->icur, &irec);
469 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
472 * bmap extent record lengths are constrained to 2^21 blocks in length
473 * because of space constraints in the on-disk metadata structure.
474 * However, rmap extent record lengths are constrained only by AG
475 * length, so we have to loop through the bmbt to make sure that the
476 * entire rmap is covered by bmbt records.
479 if (irec.br_startoff != rec->rm_offset)
480 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
482 if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
483 cur->bc_private.a.agno, rec->rm_startblock))
484 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
486 if (irec.br_blockcount > rec->rm_blockcount)
487 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
489 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
491 rec->rm_startblock += irec.br_blockcount;
492 rec->rm_offset += irec.br_blockcount;
493 rec->rm_blockcount -= irec.br_blockcount;
494 if (rec->rm_blockcount == 0)
496 have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
498 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
503 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
504 return XFS_BTREE_QUERY_RANGE_ABORT;
508 /* Make sure each rmap has a corresponding bmbt entry. */
510 xchk_bmap_check_ag_rmaps(
511 struct xfs_scrub *sc,
515 struct xchk_bmap_check_rmap_info sbcri;
516 struct xfs_btree_cur *cur;
520 error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf);
524 cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, agno);
531 sbcri.whichfork = whichfork;
532 error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
533 if (error == XFS_BTREE_QUERY_RANGE_ABORT)
536 xfs_btree_del_cursor(cur, error);
538 xfs_trans_brelse(sc->tp, agf);
542 /* Make sure each rmap has a corresponding bmbt entry. */
544 xchk_bmap_check_rmaps(
545 struct xfs_scrub *sc,
552 if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb) ||
553 whichfork == XFS_COW_FORK ||
554 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
557 /* Don't support realtime rmap checks yet. */
558 if (XFS_IS_REALTIME_INODE(sc->ip) && whichfork == XFS_DATA_FORK)
562 * Only do this for complex maps that are in btree format, or for
563 * situations where we would seem to have a size but zero extents.
564 * The inode repair code can zap broken iforks, which means we have
565 * to flag this bmap as corrupt if there are rmaps that need to be
570 size = i_size_read(VFS_I(sc->ip));
573 size = XFS_IFORK_Q(sc->ip);
579 if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE &&
580 (size == 0 || XFS_IFORK_NEXTENTS(sc->ip, whichfork) > 0))
583 for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) {
584 error = xchk_bmap_check_ag_rmaps(sc, whichfork, agno);
587 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
595 * Scrub an inode fork's block mappings.
597 * First we scan every record in every btree block, if applicable.
598 * Then we unconditionally scan the incore extent cache.
602 struct xfs_scrub *sc,
605 struct xfs_bmbt_irec irec;
606 struct xchk_bmap_info info = { NULL };
607 struct xfs_mount *mp = sc->mp;
608 struct xfs_inode *ip = sc->ip;
609 struct xfs_ifork *ifp;
610 xfs_fileoff_t endoff;
611 struct xfs_iext_cursor icur;
614 ifp = XFS_IFORK_PTR(ip, whichfork);
616 info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
617 info.whichfork = whichfork;
618 info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
623 /* Non-existent CoW forks are ignorable. */
626 /* No CoW forks on non-reflink inodes/filesystems. */
627 if (!xfs_is_reflink_inode(ip)) {
628 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
635 if (!xfs_sb_version_hasattr(&mp->m_sb) &&
636 !xfs_sb_version_hasattr2(&mp->m_sb))
637 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
640 ASSERT(whichfork == XFS_DATA_FORK);
644 /* Check the fork values */
645 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
646 case XFS_DINODE_FMT_UUID:
647 case XFS_DINODE_FMT_DEV:
648 case XFS_DINODE_FMT_LOCAL:
649 /* No mappings to check. */
651 case XFS_DINODE_FMT_EXTENTS:
652 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
653 xchk_fblock_set_corrupt(sc, whichfork, 0);
657 case XFS_DINODE_FMT_BTREE:
658 if (whichfork == XFS_COW_FORK) {
659 xchk_fblock_set_corrupt(sc, whichfork, 0);
663 error = xchk_bmap_btree(sc, whichfork, &info);
668 xchk_fblock_set_corrupt(sc, whichfork, 0);
672 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
675 /* Now try to scrub the in-memory extent list. */
676 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
677 error = xfs_iread_extents(sc->tp, ip, whichfork);
678 if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
682 /* Find the offset of the last extent in the mapping. */
683 error = xfs_bmap_last_offset(ip, &endoff, whichfork);
684 if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
687 /* Scrub extent records. */
689 ifp = XFS_IFORK_PTR(ip, whichfork);
690 for_each_xfs_iext(ifp, &icur, &irec) {
691 if (xchk_should_terminate(sc, &error) ||
692 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
694 if (isnullstartblock(irec.br_startblock))
696 if (irec.br_startoff >= endoff) {
697 xchk_fblock_set_corrupt(sc, whichfork,
701 error = xchk_bmap_extent(ip, NULL, &info, &irec);
707 error = xchk_bmap_check_rmaps(sc, whichfork);
708 if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
714 /* Scrub an inode's data fork. */
717 struct xfs_scrub *sc)
719 return xchk_bmap(sc, XFS_DATA_FORK);
722 /* Scrub an inode's attr fork. */
725 struct xfs_scrub *sc)
727 return xchk_bmap(sc, XFS_ATTR_FORK);
730 /* Scrub an inode's CoW fork. */
733 struct xfs_scrub *sc)
735 if (!xfs_is_reflink_inode(sc->ip))
738 return xchk_bmap(sc, XFS_COW_FORK);