Commit d723b99e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Improvements to ext4's block allocator performance for very large file
  systems, especially when the file system or files which are highly
  fragmented. There is a new mount option, prefetch_block_bitmaps which
  will pull in the block bitmaps and set up the in-memory buddy bitmaps
  when the file system is initially mounted.

  Beyond that, a lot of bug fixes and cleanups. In particular, a number
  of changes to make ext4 more robust in the face of write errors or
  file system corruptions"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (46 commits)
  ext4: limit the length of per-inode prealloc list
  ext4: reorganize if statement of ext4_mb_release_context()
  ext4: add mb_debug logging when there are lost chunks
  ext4: Fix comment typo "the the".
  jbd2: clean up checksum verification in do_one_pass()
  ext4: change to use fallthrough macro
  ext4: remove unused parameter of ext4_generic_delete_entry function
  mballoc: replace seq_printf with seq_puts
  ext4: optimize the implementation of ext4_mb_good_group()
  ext4: delete invalid comments near ext4_mb_check_limits()
  ext4: fix typos in ext4_mb_regular_allocator() comment
  ext4: fix checking of directory entry validity for inline directories
  fs: prevent BUG_ON in submit_bh_wbc()
  ext4: correctly restore system zone info when remount fails
  ext4: handle add_system_zone() failure in ext4_setup_system_zone()
  ext4: fold ext4_data_block_valid_rcu() into the caller
  ext4: check journal inode extents more carefully
  ext4: don't allow overlapping system zones
  ext4: handle error of ext4_setup_system_zone() on remount
  ext4: delete the invalid BUGON in ext4_mb_load_buddy_gfp()
  ...
parents 5e0b17b0 27bc446e
......@@ -489,6 +489,9 @@ Files in /sys/fs/ext4/<devname>:
multiple of this tuning parameter if the stripe size is not set in the
ext4 superblock
mb_max_inode_prealloc
The maximum length of per-inode ext4_prealloc_space list.
mb_max_to_scan
The maximum number of extents the multiblock allocator will search to
find the best extent.
......@@ -529,21 +532,21 @@ Files in /sys/fs/ext4/<devname>:
Ioctls
======
There is some Ext4 specific functionality which can be accessed by applications
through the system call interfaces. The list of all Ext4 specific ioctls are
shown in the table below.
Ext4 implements various ioctls which can be used by applications to access
ext4-specific functionality. An incomplete list of these ioctls is shown in the
table below. This list includes truly ext4-specific ioctls (``EXT4_IOC_*``) as
well as ioctls that may have been ext4-specific originally but are now supported
by some other filesystem(s) too (``FS_IOC_*``).
Table of Ext4 specific ioctls
Table of Ext4 ioctls
EXT4_IOC_GETFLAGS
FS_IOC_GETFLAGS
Get additional attributes associated with inode. The ioctl argument is
an integer bitfield, with bit values described in ext4.h. This ioctl is
an alias for FS_IOC_GETFLAGS.
an integer bitfield, with bit values described in ext4.h.
EXT4_IOC_SETFLAGS
FS_IOC_SETFLAGS
Set additional attributes associated with inode. The ioctl argument is
an integer bitfield, with bit values described in ext4.h. This ioctl is
an alias for FS_IOC_SETFLAGS.
an integer bitfield, with bit values described in ext4.h.
EXT4_IOC_GETVERSION, EXT4_IOC_GETVERSION_OLD
Get the inode i_generation number stored for each inode. The
......
......@@ -39,6 +39,6 @@ entry.
Other References
----------------
Also see http://www.nongnu.org/ext2-doc/ for quite a collection of
Also see https://www.nongnu.org/ext2-doc/ for quite a collection of
information about ext2/3. Here's another old reference:
http://wiki.osdev.org/Ext2
......@@ -3157,6 +3157,15 @@ int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
WARN_ON(atomic_read(&bh->b_count) < 1);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
/*
* The bh should be mapped, but it might not be if the
* device was hot-removed. Not much we can do but fail the I/O.
*/
if (!buffer_mapped(bh)) {
unlock_buffer(bh);
return -EIO;
}
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
......
......@@ -110,7 +110,7 @@ config EXT4_KUNIT_TESTS
This builds the ext4 KUnit tests.
KUnit tests run during boot and output the results to the debug log
in TAP format (http://testanything.org/). Only useful for kernel devs
in TAP format (https://testanything.org/). Only useful for kernel devs
running KUnit test harness and are not for inclusion into a production
build.
......
......@@ -413,7 +413,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
* Return buffer_head on success or an ERR_PTR in case of failure.
*/
struct buffer_head *
ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group,
bool ignore_locked)
{
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
......@@ -441,6 +442,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
return ERR_PTR(-ENOMEM);
}
if (ignore_locked && buffer_locked(bh)) {
/* buffer under IO already, return if called for prefetching */
put_bh(bh);
return NULL;
}
if (bitmap_uptodate(bh))
goto verify;
......@@ -487,10 +494,11 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
* submit the buffer_head for reading
*/
set_buffer_new(bh);
trace_ext4_read_block_bitmap_load(sb, block_group);
trace_ext4_read_block_bitmap_load(sb, block_group, ignore_locked);
bh->b_end_io = ext4_end_bitmap_read;
get_bh(bh);
submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO |
(ignore_locked ? REQ_RAHEAD : 0), bh);
return bh;
verify:
err = ext4_validate_block_bitmap(sb, desc, block_group, bh);
......@@ -534,7 +542,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
struct buffer_head *bh;
int err;
bh = ext4_read_block_bitmap_nowait(sb, block_group);
bh = ext4_read_block_bitmap_nowait(sb, block_group, false);
if (IS_ERR(bh))
return bh;
err = ext4_wait_block_bitmap(sb, block_group, bh);
......
......@@ -24,6 +24,7 @@ struct ext4_system_zone {
struct rb_node node;
ext4_fsblk_t start_blk;
unsigned int count;
u32 ino;
};
static struct kmem_cache *ext4_system_zone_cachep;
......@@ -45,7 +46,8 @@ void ext4_exit_system_zone(void)
static inline int can_merge(struct ext4_system_zone *entry1,
struct ext4_system_zone *entry2)
{
if ((entry1->start_blk + entry1->count) == entry2->start_blk)
if ((entry1->start_blk + entry1->count) == entry2->start_blk &&
entry1->ino == entry2->ino)
return 1;
return 0;
}
......@@ -66,9 +68,9 @@ static void release_system_zone(struct ext4_system_blocks *system_blks)
*/
static int add_system_zone(struct ext4_system_blocks *system_blks,
ext4_fsblk_t start_blk,
unsigned int count)
unsigned int count, u32 ino)
{
struct ext4_system_zone *new_entry = NULL, *entry;
struct ext4_system_zone *new_entry, *entry;
struct rb_node **n = &system_blks->root.rb_node, *node;
struct rb_node *parent = NULL, *new_node = NULL;
......@@ -79,30 +81,21 @@ static int add_system_zone(struct ext4_system_blocks *system_blks,
n = &(*n)->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = &(*n)->rb_right;
else {
if (start_blk + count > (entry->start_blk +
entry->count))
entry->count = (start_blk + count -
entry->start_blk);
new_node = *n;
new_entry = rb_entry(new_node, struct ext4_system_zone,
node);
break;
}
else /* Unexpected overlap of system zones. */
return -EFSCORRUPTED;
}
if (!new_entry) {
new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
GFP_KERNEL);
if (!new_entry)
return -ENOMEM;
new_entry->start_blk = start_blk;
new_entry->count = count;
new_node = &new_entry->node;
rb_link_node(new_node, parent, n);
rb_insert_color(new_node, &system_blks->root);
}
new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
GFP_KERNEL);
if (!new_entry)
return -ENOMEM;
new_entry->start_blk = start_blk;
new_entry->count = count;
new_entry->ino = ino;
new_node = &new_entry->node;
rb_link_node(new_node, parent, n);
rb_insert_color(new_node, &system_blks->root);
/* Can we merge to the left? */
node = rb_prev(new_node);
......@@ -151,40 +144,6 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
printk(KERN_CONT "\n");
}
/*
* Returns 1 if the passed-in block region (start_blk,
* start_blk+count) is valid; 0 if some part of the block region
* overlaps with filesystem metadata blocks.
*/
static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi,
struct ext4_system_blocks *system_blks,
ext4_fsblk_t start_blk,
unsigned int count)
{
struct ext4_system_zone *entry;
struct rb_node *n;
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
(start_blk + count < start_blk) ||
(start_blk + count > ext4_blocks_count(sbi->s_es)))
return 0;
if (system_blks == NULL)
return 1;
n = system_blks->root.rb_node;
while (n) {
entry = rb_entry(n, struct ext4_system_zone, node);
if (start_blk + count - 1 < entry->start_blk)
n = n->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right;
else
return 0;
}
return 1;
}
static int ext4_protect_reserved_inode(struct super_block *sb,
struct ext4_system_blocks *system_blks,
u32 ino)
......@@ -214,19 +173,18 @@ static int ext4_protect_reserved_inode(struct super_block *sb,
if (n == 0) {
i++;
} else {
if (!ext4_data_block_valid_rcu(sbi, system_blks,
map.m_pblk, n)) {
err = -EFSCORRUPTED;
__ext4_error(sb, __func__, __LINE__, -err,
map.m_pblk, "blocks %llu-%llu "
"from inode %u overlap system zone",
map.m_pblk,
map.m_pblk + map.m_len - 1, ino);
err = add_system_zone(system_blks, map.m_pblk, n, ino);
if (err < 0) {
if (err == -EFSCORRUPTED) {
__ext4_error(sb, __func__, __LINE__,
-err, map.m_pblk,
"blocks %llu-%llu from inode %u overlap system zone",
map.m_pblk,
map.m_pblk + map.m_len - 1,
ino);
}
break;
}
err = add_system_zone(system_blks, map.m_pblk, n);
if (err < 0)
break;
i += n;
}
}
......@@ -262,14 +220,6 @@ int ext4_setup_system_zone(struct super_block *sb)
int flex_size = ext4_flex_bg_size(sbi);
int ret;
if (!test_opt(sb, BLOCK_VALIDITY)) {
if (sbi->system_blks)
ext4_release_system_zone(sb);
return 0;
}
if (sbi->system_blks)
return 0;
system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
if (!system_blks)
return -ENOMEM;
......@@ -277,22 +227,25 @@ int ext4_setup_system_zone(struct super_block *sb)
for (i=0; i < ngroups; i++) {
cond_resched();
if (ext4_bg_has_super(sb, i) &&
((i < 5) || ((i % flex_size) == 0)))
add_system_zone(system_blks,
((i < 5) || ((i % flex_size) == 0))) {
ret = add_system_zone(system_blks,
ext4_group_first_block_no(sb, i),
ext4_bg_num_gdb(sb, i) + 1);
ext4_bg_num_gdb(sb, i) + 1, 0);
if (ret)
goto err;
}
gdp = ext4_get_group_desc(sb, i, NULL);
ret = add_system_zone(system_blks,
ext4_block_bitmap(sb, gdp), 1);
ext4_block_bitmap(sb, gdp), 1, 0);
if (ret)
goto err;
ret = add_system_zone(system_blks,
ext4_inode_bitmap(sb, gdp), 1);
ext4_inode_bitmap(sb, gdp), 1, 0);
if (ret)
goto err;
ret = add_system_zone(system_blks,
ext4_inode_table(sb, gdp),
sbi->s_itb_per_group);
sbi->s_itb_per_group, 0);
if (ret)
goto err;
}
......@@ -341,11 +294,24 @@ void ext4_release_system_zone(struct super_block *sb)
call_rcu(&system_blks->rcu, ext4_destroy_system_zone);
}
int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
/*
* Returns 1 if the passed-in block region (start_blk,
* start_blk+count) is valid; 0 if some part of the block region
* overlaps with some other filesystem metadata blocks.
*/
int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
unsigned int count)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_system_blocks *system_blks;
int ret;
struct ext4_system_zone *entry;
struct rb_node *n;
int ret = 1;
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
(start_blk + count < start_blk) ||
(start_blk + count > ext4_blocks_count(sbi->s_es)))
return 0;
/*
* Lock the system zone to prevent it being released concurrently
......@@ -354,8 +320,22 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
*/
rcu_read_lock();
system_blks = rcu_dereference(sbi->system_blks);
ret = ext4_data_block_valid_rcu(sbi, system_blks, start_blk,
count);
if (system_blks == NULL)
goto out_rcu;
n = system_blks->root.rb_node;
while (n) {
entry = rb_entry(n, struct ext4_system_zone, node);
if (start_blk + count - 1 < entry->start_blk)
n = n->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right;
else {
ret = (entry->ino == inode->i_ino);
break;
}
}
out_rcu:
rcu_read_unlock();
return ret;
}
......@@ -374,8 +354,7 @@ int ext4_check_blockref(const char *function, unsigned int line,
while (bref < p+max) {
blk = le32_to_cpu(*bref++);
if (blk &&
unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
blk, 1))) {
unlikely(!ext4_inode_block_valid(inode, blk, 1))) {
ext4_error_inode(inode, function, line, blk,
"invalid block");
return -EFSCORRUPTED;
......
......@@ -434,10 +434,36 @@ struct flex_groups {
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded directory */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x725BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x624BC0FF /* User modifiable flags */
/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
/* User modifiable flags */
#define EXT4_FL_USER_MODIFIABLE (EXT4_SECRM_FL | \
EXT4_UNRM_FL | \
EXT4_COMPR_FL | \
EXT4_SYNC_FL | \
EXT4_IMMUTABLE_FL | \
EXT4_APPEND_FL | \
EXT4_NODUMP_FL | \
EXT4_NOATIME_FL | \
EXT4_JOURNAL_DATA_FL | \
EXT4_NOTAIL_FL | \
EXT4_DIRSYNC_FL | \
EXT4_TOPDIR_FL | \
EXT4_EXTENTS_FL | \
0x00400000 /* EXT4_EOFBLOCKS_FL */ | \
EXT4_DAX_FL | \
EXT4_PROJINHERIT_FL | \
EXT4_CASEFOLD_FL)
/* User visible flags */
#define EXT4_FL_USER_VISIBLE (EXT4_FL_USER_MODIFIABLE | \
EXT4_DIRTY_FL | \
EXT4_COMPRBLK_FL | \
EXT4_NOCOMPR_FL | \
EXT4_ENCRYPT_FL | \
EXT4_INDEX_FL | \
EXT4_VERITY_FL | \
EXT4_INLINE_DATA_FL)
/* Flags we can manipulate with through FS_IOC_FSSETXATTR */
#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
EXT4_IMMUTABLE_FL | \
EXT4_APPEND_FL | \
......@@ -669,8 +695,6 @@ enum {
/*
* ioctl commands
*/
#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
......@@ -687,17 +711,11 @@ enum {
#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18)
#define EXT4_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
#define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT
#define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
/* ioctl codes 19--39 are reserved for fscrypt */
#define EXT4_IOC_CLEAR_ES_CACHE _IO('f', 40)
#define EXT4_IOC_GETSTATE _IOW('f', 41, __u32)
#define EXT4_IOC_GET_ES_CACHE _IOWR('f', 42, struct fiemap)
#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
#define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32)
/*
......@@ -722,8 +740,6 @@ enum {
/*
* ioctl commands in 32 bit emulation
*/
#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS
#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS
#define EXT4_IOC32_GETVERSION _IOR('f', 3, int)
#define EXT4_IOC32_SETVERSION _IOW('f', 4, int)
#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
......@@ -1054,6 +1070,7 @@ struct ext4_inode_info {
struct timespec64 i_crtime;
/* mballoc */
atomic_t i_prealloc_active;
struct list_head i_prealloc_list;
spinlock_t i_prealloc_lock;
......@@ -1172,6 +1189,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */
#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
......@@ -1501,10 +1519,13 @@ struct ext4_sb_info {
unsigned int s_mb_stats;
unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc;
unsigned int s_mb_max_inode_prealloc;
unsigned int s_max_dir_size_kb;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
unsigned long s_mb_last_start;
unsigned int s_mb_prefetch;
unsigned int s_mb_prefetch_limit;
/* stats for buddy allocator */
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
......@@ -1572,6 +1593,8 @@ struct ext4_sb_info {
struct ratelimit_state s_err_ratelimit_state;
struct ratelimit_state s_warning_ratelimit_state;
struct ratelimit_state s_msg_ratelimit_state;
atomic_t s_warning_count;
atomic_t s_msg_count;
/* Encryption context for '-o test_dummy_encryption' */
struct fscrypt_dummy_context s_dummy_enc_ctx;
......@@ -1585,6 +1608,9 @@ struct ext4_sb_info {
#ifdef CONFIG_EXT4_DEBUG
unsigned long s_simulate_fail;
#endif
/* Record the errseq of the backing block device */
errseq_t s_bdev_wb_err;
spinlock_t s_bdev_wb_lock;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
......@@ -2313,9 +2339,15 @@ struct ext4_lazy_init {
struct mutex li_list_mtx;
};
enum ext4_li_mode {
EXT4_LI_MODE_PREFETCH_BBITMAP,
EXT4_LI_MODE_ITABLE,
};
struct ext4_li_request {
struct super_block *lr_super;
struct ext4_sb_info *lr_sbi;
enum ext4_li_mode lr_mode;
ext4_group_t lr_first_not_zeroed;
ext4_group_t lr_next_group;
struct list_head lr_request;
unsigned long lr_next_sched;
......@@ -2446,7 +2478,8 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
ext4_group_t block_group);
ext4_group_t block_group,
bool ignore_locked);
extern int ext4_wait_block_bitmap(struct super_block *sb,
ext4_group_t block_group,
struct buffer_head *bh);
......@@ -2651,9 +2684,15 @@ extern int ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_discard_preallocations(struct inode *);
extern void ext4_discard_preallocations(struct inode *, unsigned int);
extern int __init ext4_init_mballoc(void);
extern void ext4_exit_mballoc(void);
extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
ext4_group_t group,
unsigned int nr, int *cnt);
extern void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
unsigned int nr);
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block,
unsigned long count, int flags);
......@@ -2765,8 +2804,7 @@ extern int ext4_search_dir(struct buffer_head *bh,
struct ext4_filename *fname,
unsigned int offset,
struct ext4_dir_entry_2 **res_dir);
extern int ext4_generic_delete_entry(handle_t *handle,
struct inode *dir,
extern int ext4_generic_delete_entry(struct inode *dir,
struct ext4_dir_entry_2 *de_del,
struct buffer_head *bh,
void *entry_buf,
......@@ -2924,12 +2962,6 @@ do { \
#endif
extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
__u32 compat);
extern int ext4_update_rocompat_feature(handle_t *handle,
struct super_block *sb, __u32 rocompat);
extern int ext4_update_incompat_feature(handle_t *handle,
struct super_block *sb, __u32 incompat);
extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
struct ext4_group_desc *bg);
extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
......@@ -3145,6 +3177,7 @@ struct ext4_group_info {
(1 << EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT)
#define EXT4_GROUP_INFO_IBITMAP_CORRUPT \
(1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
#define EXT4_GROUP_INFO_BBITMAP_READ_BIT 4
#define EXT4_MB_GRP_NEED_INIT(grp) \
(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
......@@ -3159,6 +3192,8 @@ struct ext4_group_info {
(set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
#define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \
(clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
#define EXT4_MB_GRP_TEST_AND_SET_READ(grp) \
(test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state)))
#define EXT4_MAX_CONTENTION 8
#define EXT4_CONTENTION_THRESHOLD 2
......@@ -3363,9 +3398,9 @@ extern void ext4_release_system_zone(struct super_block *sb);
extern int ext4_setup_system_zone(struct super_block *sb);
extern int __init ext4_init_system_zone(void);
extern void ext4_exit_system_zone(void);
extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
ext4_fsblk_t start_blk,
unsigned int count);
extern int ext4_inode_block_valid(struct inode *inode,
ext4_fsblk_t start_blk,
unsigned int count);
extern int ext4_check_blockref(const char *, unsigned int,
struct inode *, __le32 *, unsigned int);
......
......@@ -195,6 +195,28 @@ static void ext4_journal_abort_handle(const char *caller, unsigned int line,
jbd2_journal_abort_handle(handle);
}
static void ext4_check_bdev_write_error(struct super_block *sb)
{
struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
struct ext4_sb_info *sbi = EXT4_SB(sb);
int err;
/*
* If the block device has write error flag, it may have failed to
* async write out metadata buffers in the background. In this case,
* we could read old data from disk and write it out again, which
* may lead to on-disk filesystem inconsistency.
*/
if (errseq_check(&mapping->wb_err, READ_ONCE(sbi->s_bdev_wb_err))) {
spin_lock(&sbi->s_bdev_wb_lock);
err = errseq_check_and_advance(&mapping->wb_err, &sbi->s_bdev_wb_err);
spin_unlock(&sbi->s_bdev_wb_lock);
if (err)
ext4_error_err(sb, -err,
"Error while async write back metadata");
}
}
int __ext4_journal_get_write_access(const char *where, unsigned int line,
handle_t *handle, struct buffer_head *bh)
{
......@@ -202,6 +224,9 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
might_sleep();
if (bh->b_bdev->bd_super)
ext4_check_bdev_write_error(bh->b_bdev->bd_super);
if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_write_access(handle, bh);
if (err)
......
......@@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
* i_mutex. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
ext4_discard_preallocations(inode);