Import of kernel-4.18.0-553.69.1.el8_10
This commit is contained in:
parent
8e6d029cd2
commit
249be1f988
@ -12,7 +12,7 @@ RHEL_MINOR = 10
|
||||
#
|
||||
# Use this spot to avoid future merge conflicts.
|
||||
# Do not trim this comment.
|
||||
RHEL_RELEASE = 553.66.1
|
||||
RHEL_RELEASE = 553.69.1
|
||||
|
||||
#
|
||||
# ZSTREAM
|
||||
|
@ -1439,22 +1439,12 @@ __acquires(bitmap->lock)
|
||||
&(bitmap->bp[page].map[pageoff]);
|
||||
}
|
||||
|
||||
int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
|
||||
int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset,
|
||||
unsigned long sectors)
|
||||
{
|
||||
if (!bitmap)
|
||||
return 0;
|
||||
|
||||
if (behind) {
|
||||
int bw;
|
||||
atomic_inc(&bitmap->behind_writes);
|
||||
bw = atomic_read(&bitmap->behind_writes);
|
||||
if (bw > bitmap->behind_writes_used)
|
||||
bitmap->behind_writes_used = bw;
|
||||
|
||||
pr_debug("inc write-behind count %d/%lu\n",
|
||||
bw, bitmap->mddev->bitmap_info.max_write_behind);
|
||||
}
|
||||
|
||||
while (sectors) {
|
||||
sector_t blocks;
|
||||
bitmap_counter_t *bmc;
|
||||
@ -1504,17 +1494,10 @@ int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long s
|
||||
EXPORT_SYMBOL(md_bitmap_startwrite);
|
||||
|
||||
void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
|
||||
unsigned long sectors, int success, int behind)
|
||||
unsigned long sectors)
|
||||
{
|
||||
if (!bitmap)
|
||||
return;
|
||||
if (behind) {
|
||||
if (atomic_dec_and_test(&bitmap->behind_writes))
|
||||
wake_up(&bitmap->behind_wait);
|
||||
pr_debug("dec write-behind count %d/%lu\n",
|
||||
atomic_read(&bitmap->behind_writes),
|
||||
bitmap->mddev->bitmap_info.max_write_behind);
|
||||
}
|
||||
|
||||
while (sectors) {
|
||||
sector_t blocks;
|
||||
@ -1528,15 +1511,16 @@ void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
|
||||
return;
|
||||
}
|
||||
|
||||
if (success && !bitmap->mddev->degraded &&
|
||||
bitmap->events_cleared < bitmap->mddev->events) {
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
bitmap->need_sync = 1;
|
||||
sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
|
||||
}
|
||||
|
||||
if (!success && !NEEDED(*bmc))
|
||||
if (!bitmap->mddev->degraded) {
|
||||
if (bitmap->events_cleared < bitmap->mddev->events) {
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
bitmap->need_sync = 1;
|
||||
sysfs_notify_dirent_safe(
|
||||
bitmap->sysfs_can_clear);
|
||||
}
|
||||
} else if (!NEEDED(*bmc)) {
|
||||
*bmc |= NEEDED_MASK;
|
||||
}
|
||||
|
||||
if (COUNTER(*bmc) == COUNTER_MAX)
|
||||
wake_up(&bitmap->overflow_wait);
|
||||
@ -1816,6 +1800,39 @@ void md_bitmap_free(struct bitmap *bitmap)
|
||||
}
|
||||
EXPORT_SYMBOL(md_bitmap_free);
|
||||
|
||||
void md_bitmap_start_behind_write(struct mddev *mddev)
|
||||
{
|
||||
struct bitmap *bitmap = mddev->bitmap;
|
||||
int bw;
|
||||
|
||||
if (!bitmap)
|
||||
return;
|
||||
|
||||
atomic_inc(&bitmap->behind_writes);
|
||||
bw = atomic_read(&bitmap->behind_writes);
|
||||
if (bw > bitmap->behind_writes_used)
|
||||
bitmap->behind_writes_used = bw;
|
||||
|
||||
pr_debug("inc write-behind count %d/%lu\n",
|
||||
bw, bitmap->mddev->bitmap_info.max_write_behind);
|
||||
}
|
||||
EXPORT_SYMBOL(md_bitmap_start_behind_write);
|
||||
|
||||
void md_bitmap_end_behind_write(struct mddev *mddev)
|
||||
{
|
||||
struct bitmap *bitmap = mddev->bitmap;
|
||||
|
||||
if (!bitmap)
|
||||
return;
|
||||
|
||||
if (atomic_dec_and_test(&bitmap->behind_writes))
|
||||
wake_up(&bitmap->behind_wait);
|
||||
pr_debug("dec write-behind count %d/%lu\n",
|
||||
atomic_read(&bitmap->behind_writes),
|
||||
bitmap->mddev->bitmap_info.max_write_behind);
|
||||
}
|
||||
EXPORT_SYMBOL(md_bitmap_end_behind_write);
|
||||
|
||||
void md_bitmap_wait_behind_writes(struct mddev *mddev)
|
||||
{
|
||||
struct bitmap *bitmap = mddev->bitmap;
|
||||
|
@ -252,9 +252,9 @@ void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long
|
||||
|
||||
/* these are exported */
|
||||
int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset,
|
||||
unsigned long sectors, int behind);
|
||||
unsigned long sectors);
|
||||
void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
|
||||
unsigned long sectors, int success, int behind);
|
||||
unsigned long sectors);
|
||||
int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
|
||||
void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
|
||||
void md_bitmap_close_sync(struct bitmap *bitmap);
|
||||
@ -274,6 +274,8 @@ int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
|
||||
sector_t *lo, sector_t *hi, bool clear_bits);
|
||||
void md_bitmap_free(struct bitmap *bitmap);
|
||||
void md_bitmap_wait_behind_writes(struct mddev *mddev);
|
||||
void md_bitmap_start_behind_write(struct mddev *mddev);
|
||||
void md_bitmap_end_behind_write(struct mddev *mddev);
|
||||
|
||||
static inline bool md_bitmap_enabled(struct bitmap *bitmap)
|
||||
{
|
||||
|
@ -279,6 +279,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
|
||||
bio = split;
|
||||
}
|
||||
|
||||
md_account_bio(mddev, &bio);
|
||||
bio_set_dev(bio, tmp_dev->rdev->bdev);
|
||||
bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
|
||||
start_sector + data_offset;
|
||||
|
@ -116,6 +116,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
|
||||
&& md_flush_request(mddev, bio))
|
||||
return true;
|
||||
|
||||
md_account_bio(mddev, &bio);
|
||||
mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
|
||||
|
||||
mp_bh->master_bio = bio;
|
||||
|
@ -2367,7 +2367,7 @@ int md_integrity_register(struct mddev *mddev)
|
||||
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
|
||||
if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
|
||||
(mddev->level != 1 && mddev->level != 10 &&
|
||||
bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
|
||||
bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) {
|
||||
/*
|
||||
* No need to handle the failure of bioset_integrity_create,
|
||||
* because the function is called by md_run() -> pers->run(),
|
||||
@ -5984,9 +5984,9 @@ int md_run(struct mddev *mddev)
|
||||
goto exit_bio_set;
|
||||
}
|
||||
|
||||
if (!bioset_initialized(&mddev->io_acct_set)) {
|
||||
err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
|
||||
offsetof(struct md_io_acct, bio_clone), 0);
|
||||
if (!bioset_initialized(&mddev->io_clone_set)) {
|
||||
err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
|
||||
offsetof(struct md_io_clone, bio_clone), 0);
|
||||
if (err)
|
||||
goto exit_sync_set;
|
||||
}
|
||||
@ -6172,7 +6172,7 @@ bitmap_abort:
|
||||
module_put(pers->owner);
|
||||
md_bitmap_destroy(mddev);
|
||||
abort:
|
||||
bioset_exit(&mddev->io_acct_set);
|
||||
bioset_exit(&mddev->io_clone_set);
|
||||
exit_sync_set:
|
||||
bioset_exit(&mddev->sync_set);
|
||||
exit_bio_set:
|
||||
@ -6398,7 +6398,7 @@ static void __md_stop(struct mddev *mddev)
|
||||
percpu_ref_exit(&mddev->active_io);
|
||||
bioset_exit(&mddev->bio_set);
|
||||
bioset_exit(&mddev->sync_set);
|
||||
bioset_exit(&mddev->io_acct_set);
|
||||
bioset_exit(&mddev->io_clone_set);
|
||||
}
|
||||
|
||||
void md_stop(struct mddev *mddev)
|
||||
@ -8768,44 +8768,69 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_submit_discard_bio);
|
||||
|
||||
static void md_end_io_acct(struct bio *bio)
|
||||
static void md_bitmap_start(struct mddev *mddev,
|
||||
struct md_io_clone *md_io_clone)
|
||||
{
|
||||
struct md_io_acct *md_io_acct = bio->bi_private;
|
||||
struct bio *orig_bio = md_io_acct->orig_bio;
|
||||
struct mddev *mddev = md_io_acct->mddev;
|
||||
if (mddev->pers->bitmap_sector)
|
||||
mddev->pers->bitmap_sector(mddev, &md_io_clone->offset,
|
||||
&md_io_clone->sectors);
|
||||
|
||||
md_bitmap_startwrite(mddev->bitmap, md_io_clone->offset,
|
||||
md_io_clone->sectors);
|
||||
}
|
||||
|
||||
static void md_bitmap_end(struct mddev *mddev, struct md_io_clone *md_io_clone)
|
||||
{
|
||||
md_bitmap_endwrite(mddev->bitmap, md_io_clone->offset,
|
||||
md_io_clone->sectors);
|
||||
}
|
||||
|
||||
static void md_end_clone_io(struct bio *bio)
|
||||
{
|
||||
struct md_io_clone *md_io_clone = bio->bi_private;
|
||||
struct bio *orig_bio = md_io_clone->orig_bio;
|
||||
struct mddev *mddev = md_io_clone->mddev;
|
||||
|
||||
if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
|
||||
md_bitmap_end(mddev, md_io_clone);
|
||||
|
||||
orig_bio->bi_status = bio->bi_status;
|
||||
|
||||
bio_end_io_acct(orig_bio, md_io_acct->start_time);
|
||||
if (md_io_clone->start_time)
|
||||
bio_end_io_acct(orig_bio, md_io_clone->start_time);
|
||||
|
||||
bio_put(bio);
|
||||
bio_endio(orig_bio);
|
||||
|
||||
percpu_ref_put(&mddev->active_io);
|
||||
}
|
||||
|
||||
/*
|
||||
* Used by personalities that don't already clone the bio and thus can't
|
||||
* easily add the timestamp to their extended bio structure.
|
||||
*/
|
||||
static void md_clone_bio(struct mddev *mddev, struct bio **bio)
|
||||
{
|
||||
struct md_io_clone *md_io_clone;
|
||||
struct bio *clone =
|
||||
bio_clone_fast(*bio, GFP_NOIO, &mddev->io_clone_set);
|
||||
|
||||
md_io_clone = container_of(clone, struct md_io_clone, bio_clone);
|
||||
md_io_clone->orig_bio = *bio;
|
||||
md_io_clone->mddev = mddev;
|
||||
if (blk_queue_io_stat((*bio)->bi_disk->queue))
|
||||
md_io_clone->start_time = bio_start_io_acct(*bio);
|
||||
|
||||
if (bio_data_dir(*bio) == WRITE && mddev->bitmap) {
|
||||
md_io_clone->offset = (*bio)->bi_iter.bi_sector;
|
||||
md_io_clone->sectors = bio_sectors(*bio);
|
||||
md_bitmap_start(mddev, md_io_clone);
|
||||
}
|
||||
|
||||
clone->bi_end_io = md_end_clone_io;
|
||||
clone->bi_private = md_io_clone;
|
||||
*bio = clone;
|
||||
}
|
||||
|
||||
void md_account_bio(struct mddev *mddev, struct bio **bio)
|
||||
{
|
||||
struct md_io_acct *md_io_acct;
|
||||
struct bio *clone;
|
||||
|
||||
if (!blk_queue_io_stat((*bio)->bi_disk->queue))
|
||||
return;
|
||||
|
||||
percpu_ref_get(&mddev->active_io);
|
||||
|
||||
clone = bio_clone_fast(*bio, GFP_NOIO, &mddev->io_acct_set);
|
||||
md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
|
||||
md_io_acct->orig_bio = *bio;
|
||||
md_io_acct->start_time = bio_start_io_acct(*bio);
|
||||
md_io_acct->mddev = mddev;
|
||||
|
||||
clone->bi_end_io = md_end_io_acct;
|
||||
clone->bi_private = md_io_acct;
|
||||
*bio = clone;
|
||||
md_clone_bio(mddev, bio);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_account_bio);
|
||||
|
||||
|
@ -517,7 +517,7 @@ struct mddev {
|
||||
struct bio_set sync_set; /* for sync operations like
|
||||
* metadata and bitmap writes
|
||||
*/
|
||||
struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
|
||||
struct bio_set io_clone_set;
|
||||
|
||||
/* Generic flush handling.
|
||||
* The last to finish preflush schedules a worker to submit
|
||||
@ -662,6 +662,9 @@ struct md_personality
|
||||
int (*congested)(struct mddev *mddev, int bits);
|
||||
/* Changes the consistency policy of an active array. */
|
||||
int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
|
||||
/* convert io ranges from array to bitmap */
|
||||
void (*bitmap_sector)(struct mddev *mddev, sector_t *offset,
|
||||
unsigned long *sectors);
|
||||
};
|
||||
|
||||
struct md_sysfs_entry {
|
||||
@ -740,10 +743,12 @@ struct md_thread {
|
||||
void *private;
|
||||
};
|
||||
|
||||
struct md_io_acct {
|
||||
struct md_io_clone {
|
||||
struct mddev *mddev;
|
||||
struct bio *orig_bio;
|
||||
unsigned long start_time;
|
||||
sector_t offset;
|
||||
unsigned long sectors;
|
||||
struct bio bio_clone;
|
||||
};
|
||||
|
||||
|
@ -308,8 +308,6 @@ static void call_bio_endio(struct r1bio *r1_bio)
|
||||
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
|
||||
if (blk_queue_io_stat(bio->bi_disk->queue))
|
||||
bio_end_io_acct(bio, r1_bio->start_time);
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
@ -426,11 +424,8 @@ static void close_write(struct r1bio *r1_bio)
|
||||
bio_put(r1_bio->behind_master_bio);
|
||||
r1_bio->behind_master_bio = NULL;
|
||||
}
|
||||
/* clear the bitmap if all writes complete successfully */
|
||||
md_bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
|
||||
r1_bio->sectors,
|
||||
!test_bit(R1BIO_Degraded, &r1_bio->state),
|
||||
test_bit(R1BIO_BehindIO, &r1_bio->state));
|
||||
if (test_bit(R1BIO_BehindIO, &r1_bio->state))
|
||||
md_bitmap_end_behind_write(r1_bio->mddev);
|
||||
md_write_end(r1_bio->mddev);
|
||||
}
|
||||
|
||||
@ -487,8 +482,6 @@ static void raid1_end_write_request(struct bio *bio)
|
||||
if (!test_bit(Faulty, &rdev->flags))
|
||||
set_bit(R1BIO_WriteError, &r1_bio->state);
|
||||
else {
|
||||
/* Fail the request */
|
||||
set_bit(R1BIO_Degraded, &r1_bio->state);
|
||||
/* Finished with this branch */
|
||||
r1_bio->bios[mirror] = NULL;
|
||||
to_put = bio;
|
||||
@ -1342,10 +1335,10 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
|
||||
}
|
||||
|
||||
r1_bio->read_disk = rdisk;
|
||||
|
||||
if (!r1bio_existed && blk_queue_io_stat(bio->bi_disk->queue))
|
||||
r1_bio->start_time = bio_start_io_acct(bio);
|
||||
|
||||
if (!r1bio_existed) {
|
||||
md_account_bio(mddev, &bio);
|
||||
r1_bio->master_bio = bio;
|
||||
}
|
||||
read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
|
||||
|
||||
r1_bio->bios[rdisk] = read_bio;
|
||||
@ -1454,11 +1447,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
break;
|
||||
}
|
||||
r1_bio->bios[i] = NULL;
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags)) {
|
||||
if (i < conf->raid_disks)
|
||||
set_bit(R1BIO_Degraded, &r1_bio->state);
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
}
|
||||
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
if (test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||
@ -1484,16 +1474,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
*/
|
||||
max_sectors = bad_sectors;
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
/* We don't set R1BIO_Degraded as that
|
||||
* only applies if the disk is
|
||||
* missing, so it might be re-added,
|
||||
* and we want to know to recover this
|
||||
* chunk.
|
||||
* In this case the device is here,
|
||||
* and the fact that this chunk is not
|
||||
* in-sync is recorded in the bad
|
||||
* block log
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
if (is_bad) {
|
||||
@ -1545,8 +1525,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
r1_bio->sectors = max_sectors;
|
||||
}
|
||||
|
||||
if (blk_queue_io_stat(bio->bi_disk->queue))
|
||||
r1_bio->start_time = bio_start_io_acct(bio);
|
||||
md_account_bio(mddev, &bio);
|
||||
r1_bio->master_bio = bio;
|
||||
atomic_set(&r1_bio->remaining, 1);
|
||||
atomic_set(&r1_bio->behind_remaining, 0);
|
||||
|
||||
@ -1570,8 +1550,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
alloc_behind_master_bio(r1_bio, bio);
|
||||
}
|
||||
|
||||
md_bitmap_startwrite(bitmap, r1_bio->sector, r1_bio->sectors,
|
||||
test_bit(R1BIO_BehindIO, &r1_bio->state));
|
||||
if (test_bit(R1BIO_BehindIO, &r1_bio->state))
|
||||
md_bitmap_start_behind_write(mddev);
|
||||
first_clone = 0;
|
||||
}
|
||||
|
||||
@ -2525,12 +2505,10 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
|
||||
* errors.
|
||||
*/
|
||||
fail = true;
|
||||
if (!narrow_write_error(r1_bio, m)) {
|
||||
if (!narrow_write_error(r1_bio, m))
|
||||
md_error(conf->mddev,
|
||||
conf->mirrors[m].rdev);
|
||||
/* an I/O failed, we can't clear the bitmap */
|
||||
set_bit(R1BIO_Degraded, &r1_bio->state);
|
||||
}
|
||||
rdev_dec_pending(conf->mirrors[m].rdev,
|
||||
conf->mddev);
|
||||
}
|
||||
@ -2622,8 +2600,6 @@ static void raid1d(struct md_thread *thread)
|
||||
list_del(&r1_bio->retry_list);
|
||||
idx = sector_to_idx(r1_bio->sector);
|
||||
atomic_dec(&conf->nr_queued[idx]);
|
||||
if (mddev->degraded)
|
||||
set_bit(R1BIO_Degraded, &r1_bio->state);
|
||||
if (test_bit(R1BIO_WriteError, &r1_bio->state))
|
||||
close_write(r1_bio);
|
||||
raid_end_bio_io(r1_bio);
|
||||
|
@ -158,7 +158,6 @@ struct r1bio {
|
||||
sector_t sector;
|
||||
int sectors;
|
||||
unsigned long state;
|
||||
unsigned long start_time;
|
||||
struct mddev *mddev;
|
||||
/*
|
||||
* original bio going to /dev/mdx
|
||||
@ -189,7 +188,6 @@ struct r1bio {
|
||||
enum r1bio_state {
|
||||
R1BIO_Uptodate,
|
||||
R1BIO_IsSync,
|
||||
R1BIO_Degraded,
|
||||
R1BIO_BehindIO,
|
||||
/* Set ReadError on bios that experience a readerror so that
|
||||
* raid1d knows what to do with them.
|
||||
|
@ -327,8 +327,6 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
|
||||
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
|
||||
if (blk_queue_io_stat(bio->bi_disk->queue))
|
||||
bio_end_io_acct(bio, r10_bio->start_time);
|
||||
bio_endio(bio);
|
||||
/*
|
||||
* Wake up any possible resync thread that waits for the device
|
||||
@ -432,11 +430,6 @@ static void raid10_end_read_request(struct bio *bio)
|
||||
|
||||
static void close_write(struct r10bio *r10_bio)
|
||||
{
|
||||
/* clear the bitmap if all writes complete successfully */
|
||||
md_bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
|
||||
r10_bio->sectors,
|
||||
!test_bit(R10BIO_Degraded, &r10_bio->state),
|
||||
0);
|
||||
md_write_end(r10_bio->mddev);
|
||||
}
|
||||
|
||||
@ -506,7 +499,6 @@ static void raid10_end_write_request(struct bio *bio)
|
||||
set_bit(R10BIO_WriteError, &r10_bio->state);
|
||||
else {
|
||||
/* Fail the request */
|
||||
set_bit(R10BIO_Degraded, &r10_bio->state);
|
||||
r10_bio->devs[slot].bio = NULL;
|
||||
to_put = bio;
|
||||
dec_rdev = 1;
|
||||
@ -1206,7 +1198,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf,
|
||||
}
|
||||
|
||||
static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
struct r10bio *r10_bio)
|
||||
struct r10bio *r10_bio, bool io_accounting)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
struct bio *read_bio;
|
||||
@ -1277,8 +1269,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
|
||||
}
|
||||
slot = r10_bio->read_slot;
|
||||
|
||||
if (blk_queue_io_stat(bio->bi_disk->queue))
|
||||
r10_bio->start_time = bio_start_io_acct(bio);
|
||||
if (io_accounting) {
|
||||
md_account_bio(mddev, &bio);
|
||||
r10_bio->master_bio = bio;
|
||||
}
|
||||
read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
|
||||
|
||||
r10_bio->devs[slot].bio = read_bio;
|
||||
@ -1526,10 +1520,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
r10_bio->devs[i].bio = NULL;
|
||||
r10_bio->devs[i].repl_bio = NULL;
|
||||
|
||||
if (!rdev && !rrdev) {
|
||||
set_bit(R10BIO_Degraded, &r10_bio->state);
|
||||
if (!rdev && !rrdev)
|
||||
continue;
|
||||
}
|
||||
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||
sector_t first_bad;
|
||||
sector_t dev_sector = r10_bio->devs[i].addr;
|
||||
@ -1546,14 +1538,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
* to other devices yet
|
||||
*/
|
||||
max_sectors = bad_sectors;
|
||||
/* We don't set R10BIO_Degraded as that
|
||||
* only applies if the disk is missing,
|
||||
* so it might be re-added, and we want to
|
||||
* know to recover this chunk.
|
||||
* In this case the device is here, and the
|
||||
* fact that this chunk is not in-sync is
|
||||
* recorded in the bad block log.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
if (is_bad) {
|
||||
@ -1587,10 +1571,9 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
|
||||
r10_bio->master_bio = bio;
|
||||
}
|
||||
|
||||
if (blk_queue_io_stat(bio->bi_disk->queue))
|
||||
r10_bio->start_time = bio_start_io_acct(bio);
|
||||
md_account_bio(mddev, &bio);
|
||||
r10_bio->master_bio = bio;
|
||||
atomic_set(&r10_bio->remaining, 1);
|
||||
md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
|
||||
|
||||
for (i = 0; i < conf->copies; i++) {
|
||||
if (r10_bio->devs[i].bio)
|
||||
@ -1619,7 +1602,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
|
||||
conf->geo.raid_disks);
|
||||
|
||||
if (bio_data_dir(bio) == READ)
|
||||
raid10_read_request(mddev, bio, r10_bio);
|
||||
raid10_read_request(mddev, bio, r10_bio, true);
|
||||
else
|
||||
raid10_write_request(mddev, bio, r10_bio);
|
||||
}
|
||||
@ -3040,7 +3023,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
r10_bio->state = 0;
|
||||
raid10_read_request(mddev, r10_bio->master_bio, r10_bio);
|
||||
raid10_read_request(mddev, r10_bio->master_bio, r10_bio, false);
|
||||
/*
|
||||
* allow_barrier after re-submit to ensure no sync io
|
||||
* can be issued while regular io pending.
|
||||
@ -3112,11 +3095,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
} else if (bio != NULL && bio->bi_status) {
|
||||
fail = true;
|
||||
if (!narrow_write_error(r10_bio, m)) {
|
||||
if (!narrow_write_error(r10_bio, m))
|
||||
md_error(conf->mddev, rdev);
|
||||
set_bit(R10BIO_Degraded,
|
||||
&r10_bio->state);
|
||||
}
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
bio = r10_bio->devs[m].repl_bio;
|
||||
@ -3175,8 +3155,6 @@ static void raid10d(struct md_thread *thread)
|
||||
r10_bio = list_first_entry(&tmp, struct r10bio,
|
||||
retry_list);
|
||||
list_del(&r10_bio->retry_list);
|
||||
if (mddev->degraded)
|
||||
set_bit(R10BIO_Degraded, &r10_bio->state);
|
||||
|
||||
if (test_bit(R10BIO_WriteError,
|
||||
&r10_bio->state))
|
||||
|
@ -124,7 +124,6 @@ struct r10bio {
|
||||
sector_t sector; /* virtual sector number */
|
||||
int sectors;
|
||||
unsigned long state;
|
||||
unsigned long start_time;
|
||||
struct mddev *mddev;
|
||||
/*
|
||||
* original bio going to /dev/mdx
|
||||
@ -163,7 +162,6 @@ enum r10bio_state {
|
||||
R10BIO_IsSync,
|
||||
R10BIO_IsRecover,
|
||||
R10BIO_IsReshape,
|
||||
R10BIO_Degraded,
|
||||
/* Set ReadError on bios that experience a read error
|
||||
* so that raid10d knows what to do with them.
|
||||
*/
|
||||
|
@ -322,10 +322,6 @@ void r5c_handle_cached_data_endio(struct r5conf *conf,
|
||||
if (sh->dev[i].written) {
|
||||
set_bit(R5_UPTODATE, &sh->dev[i].flags);
|
||||
r5c_return_dev_pending_writes(conf, &sh->dev[i]);
|
||||
md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf),
|
||||
!test_bit(STRIPE_DEGRADED, &sh->state),
|
||||
0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -915,8 +915,7 @@ static bool stripe_can_batch(struct stripe_head *sh)
|
||||
if (raid5_has_log(conf) || raid5_has_ppl(conf))
|
||||
return false;
|
||||
return test_bit(STRIPE_BATCH_READY, &sh->state) &&
|
||||
!test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
|
||||
is_full_stripe_write(sh);
|
||||
is_full_stripe_write(sh);
|
||||
}
|
||||
|
||||
/* we only do back search */
|
||||
@ -1372,8 +1371,6 @@ again:
|
||||
generic_make_request(rbi);
|
||||
}
|
||||
if (!rdev && !rrdev) {
|
||||
if (op_is_write(op))
|
||||
set_bit(STRIPE_DEGRADED, &sh->state);
|
||||
pr_debug("skip op %d on disc %d for sector %llu\n",
|
||||
bi->bi_opf, i, (unsigned long long)sh->sector);
|
||||
clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
||||
@ -2949,7 +2946,6 @@ static void raid5_end_write_request(struct bio *bi)
|
||||
set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
|
||||
} else {
|
||||
if (bi->bi_status) {
|
||||
set_bit(STRIPE_DEGRADED, &sh->state);
|
||||
set_bit(WriteErrorSeen, &rdev->flags);
|
||||
set_bit(R5_WriteError, &sh->dev[i].flags);
|
||||
if (!test_and_set_bit(WantReplacement, &rdev->flags))
|
||||
@ -3616,29 +3612,9 @@ static void __add_stripe_bio(struct stripe_head *sh, struct bio *bi,
|
||||
(*bip)->bi_iter.bi_sector, sh->sector, dd_idx,
|
||||
sh->dev[dd_idx].sector);
|
||||
|
||||
if (conf->mddev->bitmap && firstwrite) {
|
||||
/* Cannot hold spinlock over bitmap_startwrite,
|
||||
* but must ensure this isn't added to a batch until
|
||||
* we have added to the bitmap and set bm_seq.
|
||||
* So set STRIPE_BITMAP_PENDING to prevent
|
||||
* batching.
|
||||
* If multiple __add_stripe_bio() calls race here they
|
||||
* much all set STRIPE_BITMAP_PENDING. So only the first one
|
||||
* to complete "bitmap_startwrite" gets to set
|
||||
* STRIPE_BIT_DELAY. This is important as once a stripe
|
||||
* is added to a batch, STRIPE_BIT_DELAY cannot be changed
|
||||
* any more.
|
||||
*/
|
||||
set_bit(STRIPE_BITMAP_PENDING, &sh->state);
|
||||
spin_unlock_irq(&sh->stripe_lock);
|
||||
md_bitmap_startwrite(conf->mddev->bitmap, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0);
|
||||
spin_lock_irq(&sh->stripe_lock);
|
||||
clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
|
||||
if (!sh->batch_head) {
|
||||
sh->bm_seq = conf->seq_flush+1;
|
||||
set_bit(STRIPE_BIT_DELAY, &sh->state);
|
||||
}
|
||||
if (conf->mddev->bitmap && firstwrite && !sh->batch_head) {
|
||||
sh->bm_seq = conf->seq_flush+1;
|
||||
set_bit(STRIPE_BIT_DELAY, &sh->state);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3689,7 +3665,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
BUG_ON(sh->batch_head);
|
||||
for (i = disks; i--; ) {
|
||||
struct bio *bi;
|
||||
int bitmap_end = 0;
|
||||
|
||||
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
|
||||
struct md_rdev *rdev;
|
||||
@ -3716,8 +3691,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
sh->dev[i].towrite = NULL;
|
||||
sh->overwrite_disks = 0;
|
||||
spin_unlock_irq(&sh->stripe_lock);
|
||||
if (bi)
|
||||
bitmap_end = 1;
|
||||
|
||||
log_stripe_write_finished(sh);
|
||||
|
||||
@ -3732,10 +3705,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
bio_io_error(bi);
|
||||
bi = nextbi;
|
||||
}
|
||||
if (bitmap_end)
|
||||
md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0, 0);
|
||||
bitmap_end = 0;
|
||||
/* and fail all 'written' */
|
||||
bi = sh->dev[i].written;
|
||||
sh->dev[i].written = NULL;
|
||||
@ -3744,7 +3713,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
sh->dev[i].page = sh->dev[i].orig_page;
|
||||
}
|
||||
|
||||
if (bi) bitmap_end = 1;
|
||||
while (bi && bi->bi_iter.bi_sector <
|
||||
sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
|
||||
struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector);
|
||||
@ -3778,9 +3746,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
bi = nextbi;
|
||||
}
|
||||
}
|
||||
if (bitmap_end)
|
||||
md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0, 0);
|
||||
/* If we were in the middle of a write the parity block might
|
||||
* still be locked - so just clear all R5_LOCKED flags
|
||||
*/
|
||||
@ -4131,10 +4096,6 @@ returnbi:
|
||||
bio_endio(wbi);
|
||||
wbi = wbi2;
|
||||
}
|
||||
md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf),
|
||||
!test_bit(STRIPE_DEGRADED, &sh->state),
|
||||
0);
|
||||
if (head_sh->batch_head) {
|
||||
sh = list_first_entry(&sh->batch_list,
|
||||
struct stripe_head,
|
||||
@ -4411,7 +4372,6 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
|
||||
s->locked++;
|
||||
set_bit(R5_Wantwrite, &dev->flags);
|
||||
|
||||
clear_bit(STRIPE_DEGRADED, &sh->state);
|
||||
set_bit(STRIPE_INSYNC, &sh->state);
|
||||
break;
|
||||
case check_state_run:
|
||||
@ -4568,7 +4528,6 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
|
||||
clear_bit(R5_Wantwrite, &dev->flags);
|
||||
s->locked--;
|
||||
}
|
||||
clear_bit(STRIPE_DEGRADED, &sh->state);
|
||||
|
||||
set_bit(STRIPE_INSYNC, &sh->state);
|
||||
break;
|
||||
@ -4968,8 +4927,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
|
||||
(1 << STRIPE_COMPUTE_RUN) |
|
||||
(1 << STRIPE_DISCARD) |
|
||||
(1 << STRIPE_BATCH_READY) |
|
||||
(1 << STRIPE_BATCH_ERR) |
|
||||
(1 << STRIPE_BITMAP_PENDING)),
|
||||
(1 << STRIPE_BATCH_ERR)),
|
||||
"stripe state: %lx\n", sh->state);
|
||||
WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
|
||||
(1 << STRIPE_REPLACED)),
|
||||
@ -4977,7 +4935,6 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
|
||||
|
||||
set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
|
||||
(1 << STRIPE_PREREAD_ACTIVE) |
|
||||
(1 << STRIPE_DEGRADED) |
|
||||
(1 << STRIPE_ON_UNPLUG_LIST)),
|
||||
head_sh->state & (1 << STRIPE_INSYNC));
|
||||
|
||||
@ -5522,26 +5479,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
|
||||
*/
|
||||
static void raid5_align_endio(struct bio *bi)
|
||||
{
|
||||
struct md_io_acct *md_io_acct = bi->bi_private;
|
||||
struct bio *raid_bi = md_io_acct->orig_bio;
|
||||
struct mddev *mddev;
|
||||
struct r5conf *conf;
|
||||
struct md_rdev *rdev;
|
||||
struct bio *raid_bi = bi->bi_private;
|
||||
struct md_rdev *rdev = (void *)raid_bi->bi_next;
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
struct r5conf *conf = mddev->private;
|
||||
blk_status_t error = bi->bi_status;
|
||||
unsigned long start_time = md_io_acct->start_time;
|
||||
|
||||
bio_put(bi);
|
||||
|
||||
rdev = (void*)raid_bi->bi_next;
|
||||
raid_bi->bi_next = NULL;
|
||||
mddev = rdev->mddev;
|
||||
conf = mddev->private;
|
||||
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
|
||||
if (!error) {
|
||||
if (blk_queue_io_stat(raid_bi->bi_disk->queue))
|
||||
bio_end_io_acct(raid_bi, start_time);
|
||||
bio_endio(raid_bi);
|
||||
if (atomic_dec_and_test(&conf->active_aligned_reads))
|
||||
wake_up(&conf->wait_for_quiescent);
|
||||
@ -5560,7 +5508,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||
struct md_rdev *rdev;
|
||||
sector_t sector, end_sector, first_bad;
|
||||
int bad_sectors, dd_idx;
|
||||
struct md_io_acct *md_io_acct;
|
||||
bool did_inc;
|
||||
|
||||
if (!in_chunk_boundary(mddev, raid_bio)) {
|
||||
@ -5597,17 +5544,14 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
|
||||
return 0;
|
||||
}
|
||||
|
||||
align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_acct_set);
|
||||
md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
|
||||
md_account_bio(mddev, &raid_bio);
|
||||
raid_bio->bi_next = (void *)rdev;
|
||||
if (blk_queue_io_stat(raid_bio->bi_disk->queue))
|
||||
md_io_acct->start_time = bio_start_io_acct(raid_bio);
|
||||
md_io_acct->orig_bio = raid_bio;
|
||||
|
||||
bio_set_dev(align_bio, rdev->bdev);
|
||||
align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_clone_set);
|
||||
align_bio->bi_end_io = raid5_align_endio;
|
||||
align_bio->bi_private = md_io_acct;
|
||||
align_bio->bi_private = raid_bio;
|
||||
align_bio->bi_iter.bi_sector = sector;
|
||||
bio_set_dev(align_bio, rdev->bdev);
|
||||
|
||||
/* No reshape active, so we can trust rdev->data_offset */
|
||||
align_bio->bi_iter.bi_sector += rdev->data_offset;
|
||||
@ -5909,13 +5853,6 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
|
||||
}
|
||||
spin_unlock_irq(&sh->stripe_lock);
|
||||
if (conf->mddev->bitmap) {
|
||||
for (d = 0;
|
||||
d < conf->raid_disks - conf->max_degraded;
|
||||
d++)
|
||||
md_bitmap_startwrite(mddev->bitmap,
|
||||
sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf),
|
||||
0);
|
||||
sh->bm_seq = conf->seq_flush + 1;
|
||||
set_bit(STRIPE_BIT_DELAY, &sh->state);
|
||||
}
|
||||
@ -6033,6 +5970,87 @@ static bool reshape_disabled(struct mddev *mddev)
|
||||
return is_md_suspended(mddev) || !md_is_rdwr(mddev);
|
||||
}
|
||||
|
||||
enum reshape_loc {
|
||||
LOC_NO_RESHAPE,
|
||||
LOC_AHEAD_OF_RESHAPE,
|
||||
LOC_INSIDE_RESHAPE,
|
||||
LOC_BEHIND_RESHAPE,
|
||||
};
|
||||
|
||||
static enum reshape_loc get_reshape_loc(struct mddev *mddev,
|
||||
struct r5conf *conf, sector_t logical_sector)
|
||||
{
|
||||
sector_t reshape_progress, reshape_safe;
|
||||
/*
|
||||
* Spinlock is needed as reshape_progress may be
|
||||
* 64bit on a 32bit platform, and so it might be
|
||||
* possible to see a half-updated value
|
||||
* Of course reshape_progress could change after
|
||||
* the lock is dropped, so once we get a reference
|
||||
* to the stripe that we think it is, we will have
|
||||
* to check again.
|
||||
*/
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
reshape_progress = conf->reshape_progress;
|
||||
reshape_safe = conf->reshape_safe;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
if (reshape_progress == MaxSector)
|
||||
return LOC_NO_RESHAPE;
|
||||
if (ahead_of_reshape(mddev, logical_sector, reshape_progress))
|
||||
return LOC_AHEAD_OF_RESHAPE;
|
||||
if (ahead_of_reshape(mddev, logical_sector, reshape_safe))
|
||||
return LOC_INSIDE_RESHAPE;
|
||||
return LOC_BEHIND_RESHAPE;
|
||||
}
|
||||
|
||||
static void raid5_bitmap_sector(struct mddev *mddev, sector_t *offset,
|
||||
unsigned long *sectors)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
sector_t start = *offset;
|
||||
sector_t end = start + *sectors;
|
||||
sector_t prev_start = start;
|
||||
sector_t prev_end = end;
|
||||
int sectors_per_chunk;
|
||||
enum reshape_loc loc;
|
||||
int dd_idx;
|
||||
|
||||
sectors_per_chunk = conf->chunk_sectors *
|
||||
(conf->raid_disks - conf->max_degraded);
|
||||
start = round_down(start, sectors_per_chunk);
|
||||
end = round_up(end, sectors_per_chunk);
|
||||
|
||||
start = raid5_compute_sector(conf, start, 0, &dd_idx, NULL);
|
||||
end = raid5_compute_sector(conf, end, 0, &dd_idx, NULL);
|
||||
|
||||
/*
|
||||
* For LOC_INSIDE_RESHAPE, this IO will wait for reshape to make
|
||||
* progress, hence it's the same as LOC_BEHIND_RESHAPE.
|
||||
*/
|
||||
loc = get_reshape_loc(mddev, conf, prev_start);
|
||||
if (likely(loc != LOC_AHEAD_OF_RESHAPE)) {
|
||||
*offset = start;
|
||||
*sectors = end - start;
|
||||
return;
|
||||
}
|
||||
|
||||
sectors_per_chunk = conf->prev_chunk_sectors *
|
||||
(conf->previous_raid_disks - conf->max_degraded);
|
||||
prev_start = round_down(prev_start, sectors_per_chunk);
|
||||
prev_end = round_down(prev_end, sectors_per_chunk);
|
||||
|
||||
prev_start = raid5_compute_sector(conf, prev_start, 1, &dd_idx, NULL);
|
||||
prev_end = raid5_compute_sector(conf, prev_end, 1, &dd_idx, NULL);
|
||||
|
||||
/*
|
||||
* for LOC_AHEAD_OF_RESHAPE, reshape can make progress before this IO
|
||||
* is handled in make_stripe_request(), we can't know this here hence
|
||||
* we set bits for both.
|
||||
*/
|
||||
*offset = min(start, prev_start);
|
||||
*sectors = max(end, prev_end) - *offset;
|
||||
}
|
||||
|
||||
static enum stripe_result make_stripe_request(struct mddev *mddev,
|
||||
struct r5conf *conf, struct stripe_request_ctx *ctx,
|
||||
sector_t logical_sector, struct bio *bi)
|
||||
@ -6047,28 +6065,14 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
|
||||
seq = read_seqcount_begin(&conf->gen_lock);
|
||||
|
||||
if (unlikely(conf->reshape_progress != MaxSector)) {
|
||||
/*
|
||||
* Spinlock is needed as reshape_progress may be
|
||||
* 64bit on a 32bit platform, and so it might be
|
||||
* possible to see a half-updated value
|
||||
* Of course reshape_progress could change after
|
||||
* the lock is dropped, so once we get a reference
|
||||
* to the stripe that we think it is, we will have
|
||||
* to check again.
|
||||
*/
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
if (ahead_of_reshape(mddev, logical_sector,
|
||||
conf->reshape_progress)) {
|
||||
previous = 1;
|
||||
} else {
|
||||
if (ahead_of_reshape(mddev, logical_sector,
|
||||
conf->reshape_safe)) {
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
ret = STRIPE_SCHEDULE_AND_RETRY;
|
||||
goto out;
|
||||
}
|
||||
enum reshape_loc loc = get_reshape_loc(mddev, conf,
|
||||
logical_sector);
|
||||
if (loc == LOC_INSIDE_RESHAPE) {
|
||||
ret = STRIPE_SCHEDULE_AND_RETRY;
|
||||
goto out;
|
||||
}
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
if (loc == LOC_AHEAD_OF_RESHAPE)
|
||||
previous = 1;
|
||||
}
|
||||
|
||||
new_sector = raid5_compute_sector(conf, logical_sector, previous,
|
||||
@ -6250,8 +6254,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
||||
/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
|
||||
if ((bi->bi_opf & REQ_NOWAIT) &&
|
||||
(conf->reshape_progress != MaxSector) &&
|
||||
!ahead_of_reshape(mddev, logical_sector, conf->reshape_progress) &&
|
||||
ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) {
|
||||
get_reshape_loc(mddev, conf, logical_sector) == LOC_INSIDE_RESHAPE) {
|
||||
bio_wouldblock_error(bi);
|
||||
if (rw == WRITE)
|
||||
md_write_end(mddev);
|
||||
@ -9158,6 +9161,7 @@ static struct md_personality raid6_personality =
|
||||
.takeover = raid6_takeover,
|
||||
.congested = raid5_congested,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
.bitmap_sector = raid5_bitmap_sector,
|
||||
};
|
||||
static struct md_personality raid5_personality =
|
||||
{
|
||||
@ -9184,6 +9188,7 @@ static struct md_personality raid5_personality =
|
||||
.takeover = raid5_takeover,
|
||||
.congested = raid5_congested,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
.bitmap_sector = raid5_bitmap_sector,
|
||||
};
|
||||
|
||||
static struct md_personality raid4_personality =
|
||||
@ -9211,6 +9216,7 @@ static struct md_personality raid4_personality =
|
||||
.takeover = raid4_takeover,
|
||||
.congested = raid5_congested,
|
||||
.change_consistency_policy = raid5_change_consistency_policy,
|
||||
.bitmap_sector = raid5_bitmap_sector,
|
||||
};
|
||||
|
||||
static int __init raid5_init(void)
|
||||
|
@ -358,7 +358,6 @@ enum {
|
||||
STRIPE_REPLACED,
|
||||
STRIPE_PREREAD_ACTIVE,
|
||||
STRIPE_DELAYED,
|
||||
STRIPE_DEGRADED,
|
||||
STRIPE_BIT_DELAY,
|
||||
STRIPE_EXPANDING,
|
||||
STRIPE_EXPAND_SOURCE,
|
||||
@ -372,9 +371,6 @@ enum {
|
||||
STRIPE_ON_RELEASE_LIST,
|
||||
STRIPE_BATCH_READY,
|
||||
STRIPE_BATCH_ERR,
|
||||
STRIPE_BITMAP_PENDING, /* Being added to bitmap, don't add
|
||||
* to batch yet.
|
||||
*/
|
||||
STRIPE_LOG_TRAPPED, /* trapped into log (see raid5-cache.c)
|
||||
* this bit is used in two scenarios:
|
||||
*
|
||||
|
@ -520,11 +520,11 @@ static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if,
|
||||
else
|
||||
memcpy(cfd->data, rm->d, cfd->len);
|
||||
|
||||
peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(rm->ts_low));
|
||||
|
||||
netdev->stats.rx_packets++;
|
||||
netdev->stats.rx_bytes += cfd->len;
|
||||
|
||||
peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(rm->ts_low));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -586,11 +586,11 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if,
|
||||
if (!skb)
|
||||
return -ENOMEM;
|
||||
|
||||
peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(sm->ts_low));
|
||||
|
||||
netdev->stats.rx_packets++;
|
||||
netdev->stats.rx_bytes += cf->can_dlc;
|
||||
|
||||
peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(sm->ts_low));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -309,7 +309,7 @@ static void rtw_coex_tdma_timer_base(struct rtw_dev *rtwdev, u8 type)
|
||||
{
|
||||
struct rtw_coex *coex = &rtwdev->coex;
|
||||
struct rtw_coex_stat *coex_stat = &coex->stat;
|
||||
u8 para[2] = {0};
|
||||
u8 para[6] = {};
|
||||
u8 times;
|
||||
u16 tbtt_interval = coex_stat->wl_beacon_interval;
|
||||
|
||||
|
@ -227,12 +227,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
|
||||
#define tlb_needs_table_invalidate() (true)
|
||||
#endif
|
||||
|
||||
void tlb_remove_table_sync_one(void);
|
||||
|
||||
#else
|
||||
|
||||
#ifdef tlb_needs_table_invalidate
|
||||
#error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
|
||||
#endif
|
||||
|
||||
static inline void tlb_remove_table_sync_one(void) { }
|
||||
|
||||
#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
|
||||
|
||||
|
||||
|
@ -1213,12 +1213,16 @@ EXPORT_SYMBOL(padata_alloc_shell);
|
||||
*/
|
||||
void padata_free_shell(struct padata_shell *ps)
|
||||
{
|
||||
struct padata_instance *pinst = ps->pinst;
|
||||
/*
|
||||
* Wait for all _do_serial calls to finish to avoid touching
|
||||
* freed pd's and ps's.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
|
||||
mutex_lock(&pinst->lock);
|
||||
mutex_lock(&ps->pinst->lock);
|
||||
list_del(&ps->list);
|
||||
padata_free_pd(rcu_dereference_protected(ps->pd, 1));
|
||||
mutex_unlock(&pinst->lock);
|
||||
mutex_unlock(&ps->pinst->lock);
|
||||
|
||||
kfree(ps);
|
||||
}
|
||||
|
@ -4314,7 +4314,7 @@ int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper,
|
||||
* @ip: The instruction pointer address to remove the data from
|
||||
*
|
||||
* Returns the data if it is found, otherwise NULL.
|
||||
* Note, if the data pointer is used as the data itself, (see
|
||||
* Note, if the data pointer is used as the data itself, (see
|
||||
* ftrace_func_mapper_find_ip(), then the return value may be meaningless,
|
||||
* if the data pointer was set to zero.
|
||||
*/
|
||||
@ -5037,8 +5037,6 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr)
|
||||
__add_hash_entry(direct_functions, entry);
|
||||
|
||||
ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0);
|
||||
if (ret)
|
||||
remove_hash_entry(direct_functions, entry);
|
||||
|
||||
if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
|
||||
ret = register_ftrace_function(&direct_ops);
|
||||
@ -5047,6 +5045,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr)
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
remove_hash_entry(direct_functions, entry);
|
||||
kfree(entry);
|
||||
if (!direct->count) {
|
||||
list_del_rcu(&direct->next);
|
||||
|
@ -5655,6 +5655,13 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
return 0;
|
||||
|
||||
pud_clear(pud);
|
||||
/*
|
||||
* Once our caller drops the rmap lock, some other process might be
|
||||
* using this page table as a normal, non-hugetlb page table.
|
||||
* Wait for pending gup_fast() in other threads to finish before letting
|
||||
* that happen.
|
||||
*/
|
||||
tlb_remove_table_sync_one();
|
||||
put_page(virt_to_page(ptep));
|
||||
mm_dec_nr_pmds(mm);
|
||||
/*
|
||||
|
@ -1118,6 +1118,7 @@ static void collapse_huge_page(struct mm_struct *mm,
|
||||
_pmd = pmdp_collapse_flush(vma, address, pmd);
|
||||
spin_unlock(pmd_ptl);
|
||||
mmu_notifier_invalidate_range_end(&range);
|
||||
tlb_remove_table_sync_one();
|
||||
|
||||
spin_lock(pte_ptl);
|
||||
isolated = __collapse_huge_page_isolate(vma, address, pte,
|
||||
@ -1383,6 +1384,42 @@ static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* A note about locking:
|
||||
* Trying to take the page table spinlocks would be useless here because those
|
||||
* are only used to synchronize:
|
||||
*
|
||||
* - modifying terminal entries (ones that point to a data page, not to another
|
||||
* page table)
|
||||
* - installing *new* non-terminal entries
|
||||
*
|
||||
* Instead, we need roughly the same kind of protection as free_pgtables() or
|
||||
* mm_take_all_locks() (but only for a single VMA):
|
||||
* The mmap lock together with this VMA's rmap locks covers all paths towards
|
||||
* the page table entries we're messing with here, except for hardware page
|
||||
* table walks and lockless_pages_from_mm().
|
||||
*/
|
||||
static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
unsigned long addr, pmd_t *pmdp)
|
||||
{
|
||||
pmd_t pmd;
|
||||
|
||||
mmap_assert_write_locked(mm);
|
||||
if (vma->vm_file)
|
||||
lockdep_assert_held_write(&vma->vm_file->f_mapping->i_mmap_rwsem);
|
||||
/*
|
||||
* All anon_vmas attached to the VMA have the same root and are
|
||||
* therefore locked by the same lock.
|
||||
*/
|
||||
if (vma->anon_vma)
|
||||
lockdep_assert_held_write(&vma->anon_vma->root->rwsem);
|
||||
|
||||
pmd = pmdp_collapse_flush(vma, addr, pmdp);
|
||||
tlb_remove_table_sync_one();
|
||||
mm_dec_nr_ptes(mm);
|
||||
pte_free(mm, pmd_pgtable(pmd));
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to collapse a pte-mapped THP for mm at address haddr.
|
||||
*
|
||||
@ -1396,7 +1433,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
|
||||
struct vm_area_struct *vma = find_vma(mm, haddr);
|
||||
struct page *hpage;
|
||||
pte_t *start_pte, *pte;
|
||||
pmd_t *pmd, _pmd;
|
||||
pmd_t *pmd;
|
||||
spinlock_t *ptl;
|
||||
int count = 0;
|
||||
int i;
|
||||
@ -1426,6 +1463,20 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
|
||||
if (!pmd)
|
||||
goto drop_hpage;
|
||||
|
||||
/*
|
||||
* We need to lock the mapping so that from here on, only GUP-fast and
|
||||
* hardware page walks can access the parts of the page tables that
|
||||
* we're operating on.
|
||||
* See collapse_and_free_pmd().
|
||||
*/
|
||||
i_mmap_lock_write(vma->vm_file->f_mapping);
|
||||
|
||||
/*
|
||||
* This spinlock should be unnecessary: Nobody else should be accessing
|
||||
* the page tables under spinlock protection here, only
|
||||
* lockless_pages_from_mm() and the hardware page walker can access page
|
||||
* tables while all the high-level locks are held in write mode.
|
||||
*/
|
||||
start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
|
||||
|
||||
/* step 1: check all mapped PTEs are to the right huge page */
|
||||
@ -1472,11 +1523,15 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
|
||||
}
|
||||
|
||||
/* step 4: collapse pmd */
|
||||
ptl = pmd_lock(vma->vm_mm, pmd);
|
||||
_pmd = pmdp_collapse_flush(vma, haddr, pmd);
|
||||
spin_unlock(ptl);
|
||||
mm_dec_nr_ptes(mm);
|
||||
pte_free(mm, pmd_pgtable(_pmd));
|
||||
/* we make no change to anon, but protect concurrent anon page lookup */
|
||||
if (vma->anon_vma)
|
||||
anon_vma_lock_write(vma->anon_vma);
|
||||
|
||||
collapse_and_free_pmd(mm, vma, haddr, pmd);
|
||||
|
||||
if (vma->anon_vma)
|
||||
anon_vma_unlock_write(vma->anon_vma);
|
||||
i_mmap_unlock_write(vma->vm_file->f_mapping);
|
||||
|
||||
drop_hpage:
|
||||
unlock_page(hpage);
|
||||
@ -1485,6 +1540,7 @@ drop_hpage:
|
||||
|
||||
abort:
|
||||
pte_unmap_unlock(start_pte, ptl);
|
||||
i_mmap_unlock_write(vma->vm_file->f_mapping);
|
||||
goto drop_hpage;
|
||||
}
|
||||
|
||||
@ -1516,7 +1572,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
||||
struct vm_area_struct *vma;
|
||||
struct mm_struct *mm;
|
||||
unsigned long addr;
|
||||
pmd_t *pmd, _pmd;
|
||||
pmd_t *pmd;
|
||||
|
||||
i_mmap_lock_write(mapping);
|
||||
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
|
||||
@ -1534,7 +1590,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
||||
* An alternative would be drop the check, but check that page
|
||||
* table is clear before calling pmdp_collapse_flush() under
|
||||
* ptl. It has higher chance to recover THP for the VMA, but
|
||||
* has higher cost too.
|
||||
* has higher cost too. It would also probably require locking
|
||||
* the anon_vma.
|
||||
*/
|
||||
if (vma->anon_vma)
|
||||
continue;
|
||||
@ -1555,14 +1612,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
||||
* reverse order. Trylock is a way to avoid deadlock.
|
||||
*/
|
||||
if (mmap_write_trylock(mm)) {
|
||||
if (!khugepaged_test_exit(mm)) {
|
||||
spinlock_t *ptl = pmd_lock(mm, pmd);
|
||||
/* assume page table is clear */
|
||||
_pmd = pmdp_collapse_flush(vma, addr, pmd);
|
||||
spin_unlock(ptl);
|
||||
mm_dec_nr_ptes(mm);
|
||||
pte_free(mm, pmd_pgtable(_pmd));
|
||||
}
|
||||
if (!khugepaged_test_exit(mm))
|
||||
collapse_and_free_pmd(mm, vma, addr, pmd);
|
||||
mmap_write_unlock(mm);
|
||||
} else {
|
||||
/* Try again later */
|
||||
|
@ -139,7 +139,7 @@ static void tlb_remove_table_smp_sync(void *arg)
|
||||
/* Simply deliver the interrupt */
|
||||
}
|
||||
|
||||
static void tlb_remove_table_sync_one(void)
|
||||
void tlb_remove_table_sync_one(void)
|
||||
{
|
||||
/*
|
||||
* This isn't an RCU grace period and hence the page-tables cannot be
|
||||
@ -163,8 +163,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch)
|
||||
|
||||
#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
|
||||
|
||||
static void tlb_remove_table_sync_one(void) { }
|
||||
|
||||
static void tlb_remove_table_free(struct mmu_table_batch *batch)
|
||||
{
|
||||
__tlb_remove_table_free(batch);
|
||||
|
@ -1984,21 +1984,21 @@ static void mld_send_cr(struct inet6_dev *idev)
|
||||
|
||||
static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
|
||||
{
|
||||
struct net *net = dev_net(dev);
|
||||
struct sock *sk = net->ipv6.igmp_sk;
|
||||
const struct in6_addr *snd_addr, *saddr;
|
||||
int err, len, payload_len, full_len;
|
||||
struct in6_addr addr_buf;
|
||||
struct inet6_dev *idev;
|
||||
struct sk_buff *skb;
|
||||
struct mld_msg *hdr;
|
||||
const struct in6_addr *snd_addr, *saddr;
|
||||
struct in6_addr addr_buf;
|
||||
int hlen = LL_RESERVED_SPACE(dev);
|
||||
int tlen = dev->needed_tailroom;
|
||||
int err, len, payload_len, full_len;
|
||||
u8 ra[8] = { IPPROTO_ICMPV6, 0,
|
||||
IPV6_TLV_ROUTERALERT, 2, 0, 0,
|
||||
IPV6_TLV_PADN, 0 };
|
||||
struct flowi6 fl6;
|
||||
struct dst_entry *dst;
|
||||
struct flowi6 fl6;
|
||||
struct net *net;
|
||||
struct sock *sk;
|
||||
|
||||
if (type == ICMPV6_MGM_REDUCTION)
|
||||
snd_addr = &in6addr_linklocal_allrouters;
|
||||
@ -2009,20 +2009,21 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
|
||||
payload_len = len + sizeof(ra);
|
||||
full_len = sizeof(struct ipv6hdr) + payload_len;
|
||||
|
||||
skb = alloc_skb(hlen + tlen + full_len, GFP_ATOMIC);
|
||||
|
||||
rcu_read_lock();
|
||||
IP6_UPD_PO_STATS(net, __in6_dev_get(dev),
|
||||
IPSTATS_MIB_OUT, full_len);
|
||||
rcu_read_unlock();
|
||||
|
||||
skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err);
|
||||
|
||||
net = dev_net_rcu(dev);
|
||||
idev = __in6_dev_get(dev);
|
||||
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, full_len);
|
||||
if (!skb) {
|
||||
rcu_read_lock();
|
||||
IP6_INC_STATS(net, __in6_dev_get(dev),
|
||||
IPSTATS_MIB_OUTDISCARDS);
|
||||
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
sk = net->ipv6.igmp_sk;
|
||||
skb_set_owner_w(skb, sk);
|
||||
|
||||
skb->priority = TC_PRIO_CONTROL;
|
||||
skb_reserve(skb, hlen);
|
||||
|
||||
@ -2047,9 +2048,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
|
||||
IPPROTO_ICMPV6,
|
||||
csum_partial(hdr, len, 0));
|
||||
|
||||
rcu_read_lock();
|
||||
idev = __in6_dev_get(skb->dev);
|
||||
|
||||
icmpv6_flow_init(sk, &fl6, type,
|
||||
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
|
||||
skb->dev->ifindex);
|
||||
|
@ -2408,10 +2408,10 @@ static void ip6_negative_advice(struct sock *sk,
|
||||
if (rt->rt6i_flags & RTF_CACHE) {
|
||||
rcu_read_lock();
|
||||
if (rt6_check_expired(rt)) {
|
||||
/* counteract the dst_release() in sk_dst_reset() */
|
||||
dst_hold(dst);
|
||||
/* rt/dst can not be destroyed yet,
|
||||
* because of rcu_read_lock()
|
||||
*/
|
||||
sk_dst_reset(sk);
|
||||
|
||||
rt6_remove_exception_rt(rt);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
Loading…
Reference in New Issue
Block a user