Import of kernel-4.18.0-553.69.1.el8_10

This commit is contained in:
eabdullin 2025-09-05 13:00:16 +00:00
parent 8e6d029cd2
commit 249be1f988
24 changed files with 355 additions and 295 deletions

View File

@ -12,7 +12,7 @@ RHEL_MINOR = 10
#
# Use this spot to avoid future merge conflicts.
# Do not trim this comment.
RHEL_RELEASE = 553.66.1
RHEL_RELEASE = 553.69.1
#
# ZSTREAM

View File

@ -1439,22 +1439,12 @@ __acquires(bitmap->lock)
&(bitmap->bp[page].map[pageoff]);
}
int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset,
unsigned long sectors)
{
if (!bitmap)
return 0;
if (behind) {
int bw;
atomic_inc(&bitmap->behind_writes);
bw = atomic_read(&bitmap->behind_writes);
if (bw > bitmap->behind_writes_used)
bitmap->behind_writes_used = bw;
pr_debug("inc write-behind count %d/%lu\n",
bw, bitmap->mddev->bitmap_info.max_write_behind);
}
while (sectors) {
sector_t blocks;
bitmap_counter_t *bmc;
@ -1504,17 +1494,10 @@ int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long s
EXPORT_SYMBOL(md_bitmap_startwrite);
void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
unsigned long sectors, int success, int behind)
unsigned long sectors)
{
if (!bitmap)
return;
if (behind) {
if (atomic_dec_and_test(&bitmap->behind_writes))
wake_up(&bitmap->behind_wait);
pr_debug("dec write-behind count %d/%lu\n",
atomic_read(&bitmap->behind_writes),
bitmap->mddev->bitmap_info.max_write_behind);
}
while (sectors) {
sector_t blocks;
@ -1528,15 +1511,16 @@ void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
return;
}
if (success && !bitmap->mddev->degraded &&
bitmap->events_cleared < bitmap->mddev->events) {
bitmap->events_cleared = bitmap->mddev->events;
bitmap->need_sync = 1;
sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
}
if (!success && !NEEDED(*bmc))
if (!bitmap->mddev->degraded) {
if (bitmap->events_cleared < bitmap->mddev->events) {
bitmap->events_cleared = bitmap->mddev->events;
bitmap->need_sync = 1;
sysfs_notify_dirent_safe(
bitmap->sysfs_can_clear);
}
} else if (!NEEDED(*bmc)) {
*bmc |= NEEDED_MASK;
}
if (COUNTER(*bmc) == COUNTER_MAX)
wake_up(&bitmap->overflow_wait);
@ -1816,6 +1800,39 @@ void md_bitmap_free(struct bitmap *bitmap)
}
EXPORT_SYMBOL(md_bitmap_free);
void md_bitmap_start_behind_write(struct mddev *mddev)
{
struct bitmap *bitmap = mddev->bitmap;
int bw;
if (!bitmap)
return;
atomic_inc(&bitmap->behind_writes);
bw = atomic_read(&bitmap->behind_writes);
if (bw > bitmap->behind_writes_used)
bitmap->behind_writes_used = bw;
pr_debug("inc write-behind count %d/%lu\n",
bw, bitmap->mddev->bitmap_info.max_write_behind);
}
EXPORT_SYMBOL(md_bitmap_start_behind_write);
void md_bitmap_end_behind_write(struct mddev *mddev)
{
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap)
return;
if (atomic_dec_and_test(&bitmap->behind_writes))
wake_up(&bitmap->behind_wait);
pr_debug("dec write-behind count %d/%lu\n",
atomic_read(&bitmap->behind_writes),
bitmap->mddev->bitmap_info.max_write_behind);
}
EXPORT_SYMBOL(md_bitmap_end_behind_write);
void md_bitmap_wait_behind_writes(struct mddev *mddev)
{
struct bitmap *bitmap = mddev->bitmap;

View File

@ -252,9 +252,9 @@ void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long
/* these are exported */
int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset,
unsigned long sectors, int behind);
unsigned long sectors);
void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
unsigned long sectors, int success, int behind);
unsigned long sectors);
int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
void md_bitmap_close_sync(struct bitmap *bitmap);
@ -274,6 +274,8 @@ int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
sector_t *lo, sector_t *hi, bool clear_bits);
void md_bitmap_free(struct bitmap *bitmap);
void md_bitmap_wait_behind_writes(struct mddev *mddev);
void md_bitmap_start_behind_write(struct mddev *mddev);
void md_bitmap_end_behind_write(struct mddev *mddev);
static inline bool md_bitmap_enabled(struct bitmap *bitmap)
{

View File

@ -279,6 +279,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
bio = split;
}
md_account_bio(mddev, &bio);
bio_set_dev(bio, tmp_dev->rdev->bdev);
bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
start_sector + data_offset;

View File

@ -116,6 +116,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
&& md_flush_request(mddev, bio))
return true;
md_account_bio(mddev, &bio);
mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
mp_bh->master_bio = bio;

View File

@ -2367,7 +2367,7 @@ int md_integrity_register(struct mddev *mddev)
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
(mddev->level != 1 && mddev->level != 10 &&
bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) {
/*
* No need to handle the failure of bioset_integrity_create,
* because the function is called by md_run() -> pers->run(),
@ -5984,9 +5984,9 @@ int md_run(struct mddev *mddev)
goto exit_bio_set;
}
if (!bioset_initialized(&mddev->io_acct_set)) {
err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
offsetof(struct md_io_acct, bio_clone), 0);
if (!bioset_initialized(&mddev->io_clone_set)) {
err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
offsetof(struct md_io_clone, bio_clone), 0);
if (err)
goto exit_sync_set;
}
@ -6172,7 +6172,7 @@ bitmap_abort:
module_put(pers->owner);
md_bitmap_destroy(mddev);
abort:
bioset_exit(&mddev->io_acct_set);
bioset_exit(&mddev->io_clone_set);
exit_sync_set:
bioset_exit(&mddev->sync_set);
exit_bio_set:
@ -6398,7 +6398,7 @@ static void __md_stop(struct mddev *mddev)
percpu_ref_exit(&mddev->active_io);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
bioset_exit(&mddev->io_acct_set);
bioset_exit(&mddev->io_clone_set);
}
void md_stop(struct mddev *mddev)
@ -8768,44 +8768,69 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
}
EXPORT_SYMBOL_GPL(md_submit_discard_bio);
static void md_end_io_acct(struct bio *bio)
static void md_bitmap_start(struct mddev *mddev,
struct md_io_clone *md_io_clone)
{
struct md_io_acct *md_io_acct = bio->bi_private;
struct bio *orig_bio = md_io_acct->orig_bio;
struct mddev *mddev = md_io_acct->mddev;
if (mddev->pers->bitmap_sector)
mddev->pers->bitmap_sector(mddev, &md_io_clone->offset,
&md_io_clone->sectors);
md_bitmap_startwrite(mddev->bitmap, md_io_clone->offset,
md_io_clone->sectors);
}
static void md_bitmap_end(struct mddev *mddev, struct md_io_clone *md_io_clone)
{
md_bitmap_endwrite(mddev->bitmap, md_io_clone->offset,
md_io_clone->sectors);
}
static void md_end_clone_io(struct bio *bio)
{
struct md_io_clone *md_io_clone = bio->bi_private;
struct bio *orig_bio = md_io_clone->orig_bio;
struct mddev *mddev = md_io_clone->mddev;
if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
md_bitmap_end(mddev, md_io_clone);
orig_bio->bi_status = bio->bi_status;
bio_end_io_acct(orig_bio, md_io_acct->start_time);
if (md_io_clone->start_time)
bio_end_io_acct(orig_bio, md_io_clone->start_time);
bio_put(bio);
bio_endio(orig_bio);
percpu_ref_put(&mddev->active_io);
}
/*
* Used by personalities that don't already clone the bio and thus can't
* easily add the timestamp to their extended bio structure.
*/
static void md_clone_bio(struct mddev *mddev, struct bio **bio)
{
struct md_io_clone *md_io_clone;
struct bio *clone =
bio_clone_fast(*bio, GFP_NOIO, &mddev->io_clone_set);
md_io_clone = container_of(clone, struct md_io_clone, bio_clone);
md_io_clone->orig_bio = *bio;
md_io_clone->mddev = mddev;
if (blk_queue_io_stat((*bio)->bi_disk->queue))
md_io_clone->start_time = bio_start_io_acct(*bio);
if (bio_data_dir(*bio) == WRITE && mddev->bitmap) {
md_io_clone->offset = (*bio)->bi_iter.bi_sector;
md_io_clone->sectors = bio_sectors(*bio);
md_bitmap_start(mddev, md_io_clone);
}
clone->bi_end_io = md_end_clone_io;
clone->bi_private = md_io_clone;
*bio = clone;
}
void md_account_bio(struct mddev *mddev, struct bio **bio)
{
struct md_io_acct *md_io_acct;
struct bio *clone;
if (!blk_queue_io_stat((*bio)->bi_disk->queue))
return;
percpu_ref_get(&mddev->active_io);
clone = bio_clone_fast(*bio, GFP_NOIO, &mddev->io_acct_set);
md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
md_io_acct->orig_bio = *bio;
md_io_acct->start_time = bio_start_io_acct(*bio);
md_io_acct->mddev = mddev;
clone->bi_end_io = md_end_io_acct;
clone->bi_private = md_io_acct;
*bio = clone;
md_clone_bio(mddev, bio);
}
EXPORT_SYMBOL_GPL(md_account_bio);

View File

@ -517,7 +517,7 @@ struct mddev {
struct bio_set sync_set; /* for sync operations like
* metadata and bitmap writes
*/
struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
struct bio_set io_clone_set;
/* Generic flush handling.
* The last to finish preflush schedules a worker to submit
@ -662,6 +662,9 @@ struct md_personality
int (*congested)(struct mddev *mddev, int bits);
/* Changes the consistency policy of an active array. */
int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
/* convert io ranges from array to bitmap */
void (*bitmap_sector)(struct mddev *mddev, sector_t *offset,
unsigned long *sectors);
};
struct md_sysfs_entry {
@ -740,10 +743,12 @@ struct md_thread {
void *private;
};
struct md_io_acct {
struct md_io_clone {
struct mddev *mddev;
struct bio *orig_bio;
unsigned long start_time;
sector_t offset;
unsigned long sectors;
struct bio bio_clone;
};

View File

@ -308,8 +308,6 @@ static void call_bio_endio(struct r1bio *r1_bio)
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
bio->bi_status = BLK_STS_IOERR;
if (blk_queue_io_stat(bio->bi_disk->queue))
bio_end_io_acct(bio, r1_bio->start_time);
bio_endio(bio);
}
@ -426,11 +424,8 @@ static void close_write(struct r1bio *r1_bio)
bio_put(r1_bio->behind_master_bio);
r1_bio->behind_master_bio = NULL;
}
/* clear the bitmap if all writes complete successfully */
md_bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
r1_bio->sectors,
!test_bit(R1BIO_Degraded, &r1_bio->state),
test_bit(R1BIO_BehindIO, &r1_bio->state));
if (test_bit(R1BIO_BehindIO, &r1_bio->state))
md_bitmap_end_behind_write(r1_bio->mddev);
md_write_end(r1_bio->mddev);
}
@ -487,8 +482,6 @@ static void raid1_end_write_request(struct bio *bio)
if (!test_bit(Faulty, &rdev->flags))
set_bit(R1BIO_WriteError, &r1_bio->state);
else {
/* Fail the request */
set_bit(R1BIO_Degraded, &r1_bio->state);
/* Finished with this branch */
r1_bio->bios[mirror] = NULL;
to_put = bio;
@ -1342,10 +1335,10 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
}
r1_bio->read_disk = rdisk;
if (!r1bio_existed && blk_queue_io_stat(bio->bi_disk->queue))
r1_bio->start_time = bio_start_io_acct(bio);
if (!r1bio_existed) {
md_account_bio(mddev, &bio);
r1_bio->master_bio = bio;
}
read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
r1_bio->bios[rdisk] = read_bio;
@ -1454,11 +1447,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
break;
}
r1_bio->bios[i] = NULL;
if (!rdev || test_bit(Faulty, &rdev->flags)) {
if (i < conf->raid_disks)
set_bit(R1BIO_Degraded, &r1_bio->state);
if (!rdev || test_bit(Faulty, &rdev->flags))
continue;
}
atomic_inc(&rdev->nr_pending);
if (test_bit(WriteErrorSeen, &rdev->flags)) {
@ -1484,16 +1474,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
*/
max_sectors = bad_sectors;
rdev_dec_pending(rdev, mddev);
/* We don't set R1BIO_Degraded as that
* only applies if the disk is
* missing, so it might be re-added,
* and we want to know to recover this
* chunk.
* In this case the device is here,
* and the fact that this chunk is not
* in-sync is recorded in the bad
* block log
*/
continue;
}
if (is_bad) {
@ -1545,8 +1525,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
r1_bio->sectors = max_sectors;
}
if (blk_queue_io_stat(bio->bi_disk->queue))
r1_bio->start_time = bio_start_io_acct(bio);
md_account_bio(mddev, &bio);
r1_bio->master_bio = bio;
atomic_set(&r1_bio->remaining, 1);
atomic_set(&r1_bio->behind_remaining, 0);
@ -1570,8 +1550,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
alloc_behind_master_bio(r1_bio, bio);
}
md_bitmap_startwrite(bitmap, r1_bio->sector, r1_bio->sectors,
test_bit(R1BIO_BehindIO, &r1_bio->state));
if (test_bit(R1BIO_BehindIO, &r1_bio->state))
md_bitmap_start_behind_write(mddev);
first_clone = 0;
}
@ -2525,12 +2505,10 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
* errors.
*/
fail = true;
if (!narrow_write_error(r1_bio, m)) {
if (!narrow_write_error(r1_bio, m))
md_error(conf->mddev,
conf->mirrors[m].rdev);
/* an I/O failed, we can't clear the bitmap */
set_bit(R1BIO_Degraded, &r1_bio->state);
}
rdev_dec_pending(conf->mirrors[m].rdev,
conf->mddev);
}
@ -2622,8 +2600,6 @@ static void raid1d(struct md_thread *thread)
list_del(&r1_bio->retry_list);
idx = sector_to_idx(r1_bio->sector);
atomic_dec(&conf->nr_queued[idx]);
if (mddev->degraded)
set_bit(R1BIO_Degraded, &r1_bio->state);
if (test_bit(R1BIO_WriteError, &r1_bio->state))
close_write(r1_bio);
raid_end_bio_io(r1_bio);

View File

@ -158,7 +158,6 @@ struct r1bio {
sector_t sector;
int sectors;
unsigned long state;
unsigned long start_time;
struct mddev *mddev;
/*
* original bio going to /dev/mdx
@ -189,7 +188,6 @@ struct r1bio {
enum r1bio_state {
R1BIO_Uptodate,
R1BIO_IsSync,
R1BIO_Degraded,
R1BIO_BehindIO,
/* Set ReadError on bios that experience a readerror so that
* raid1d knows what to do with them.

View File

@ -327,8 +327,6 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
bio->bi_status = BLK_STS_IOERR;
if (blk_queue_io_stat(bio->bi_disk->queue))
bio_end_io_acct(bio, r10_bio->start_time);
bio_endio(bio);
/*
* Wake up any possible resync thread that waits for the device
@ -432,11 +430,6 @@ static void raid10_end_read_request(struct bio *bio)
static void close_write(struct r10bio *r10_bio)
{
/* clear the bitmap if all writes complete successfully */
md_bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
r10_bio->sectors,
!test_bit(R10BIO_Degraded, &r10_bio->state),
0);
md_write_end(r10_bio->mddev);
}
@ -506,7 +499,6 @@ static void raid10_end_write_request(struct bio *bio)
set_bit(R10BIO_WriteError, &r10_bio->state);
else {
/* Fail the request */
set_bit(R10BIO_Degraded, &r10_bio->state);
r10_bio->devs[slot].bio = NULL;
to_put = bio;
dec_rdev = 1;
@ -1206,7 +1198,7 @@ static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf,
}
static void raid10_read_request(struct mddev *mddev, struct bio *bio,
struct r10bio *r10_bio)
struct r10bio *r10_bio, bool io_accounting)
{
struct r10conf *conf = mddev->private;
struct bio *read_bio;
@ -1277,8 +1269,10 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
}
slot = r10_bio->read_slot;
if (blk_queue_io_stat(bio->bi_disk->queue))
r10_bio->start_time = bio_start_io_acct(bio);
if (io_accounting) {
md_account_bio(mddev, &bio);
r10_bio->master_bio = bio;
}
read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
r10_bio->devs[slot].bio = read_bio;
@ -1526,10 +1520,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
r10_bio->devs[i].bio = NULL;
r10_bio->devs[i].repl_bio = NULL;
if (!rdev && !rrdev) {
set_bit(R10BIO_Degraded, &r10_bio->state);
if (!rdev && !rrdev)
continue;
}
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
sector_t first_bad;
sector_t dev_sector = r10_bio->devs[i].addr;
@ -1546,14 +1538,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
* to other devices yet
*/
max_sectors = bad_sectors;
/* We don't set R10BIO_Degraded as that
* only applies if the disk is missing,
* so it might be re-added, and we want to
* know to recover this chunk.
* In this case the device is here, and the
* fact that this chunk is not in-sync is
* recorded in the bad block log.
*/
continue;
}
if (is_bad) {
@ -1587,10 +1571,9 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
r10_bio->master_bio = bio;
}
if (blk_queue_io_stat(bio->bi_disk->queue))
r10_bio->start_time = bio_start_io_acct(bio);
md_account_bio(mddev, &bio);
r10_bio->master_bio = bio;
atomic_set(&r10_bio->remaining, 1);
md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
for (i = 0; i < conf->copies; i++) {
if (r10_bio->devs[i].bio)
@ -1619,7 +1602,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
conf->geo.raid_disks);
if (bio_data_dir(bio) == READ)
raid10_read_request(mddev, bio, r10_bio);
raid10_read_request(mddev, bio, r10_bio, true);
else
raid10_write_request(mddev, bio, r10_bio);
}
@ -3040,7 +3023,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
rdev_dec_pending(rdev, mddev);
r10_bio->state = 0;
raid10_read_request(mddev, r10_bio->master_bio, r10_bio);
raid10_read_request(mddev, r10_bio->master_bio, r10_bio, false);
/*
* allow_barrier after re-submit to ensure no sync io
* can be issued while regular io pending.
@ -3112,11 +3095,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev_dec_pending(rdev, conf->mddev);
} else if (bio != NULL && bio->bi_status) {
fail = true;
if (!narrow_write_error(r10_bio, m)) {
if (!narrow_write_error(r10_bio, m))
md_error(conf->mddev, rdev);
set_bit(R10BIO_Degraded,
&r10_bio->state);
}
rdev_dec_pending(rdev, conf->mddev);
}
bio = r10_bio->devs[m].repl_bio;
@ -3175,8 +3155,6 @@ static void raid10d(struct md_thread *thread)
r10_bio = list_first_entry(&tmp, struct r10bio,
retry_list);
list_del(&r10_bio->retry_list);
if (mddev->degraded)
set_bit(R10BIO_Degraded, &r10_bio->state);
if (test_bit(R10BIO_WriteError,
&r10_bio->state))

View File

@ -124,7 +124,6 @@ struct r10bio {
sector_t sector; /* virtual sector number */
int sectors;
unsigned long state;
unsigned long start_time;
struct mddev *mddev;
/*
* original bio going to /dev/mdx
@ -163,7 +162,6 @@ enum r10bio_state {
R10BIO_IsSync,
R10BIO_IsRecover,
R10BIO_IsReshape,
R10BIO_Degraded,
/* Set ReadError on bios that experience a read error
* so that raid10d knows what to do with them.
*/

View File

@ -322,10 +322,6 @@ void r5c_handle_cached_data_endio(struct r5conf *conf,
if (sh->dev[i].written) {
set_bit(R5_UPTODATE, &sh->dev[i].flags);
r5c_return_dev_pending_writes(conf, &sh->dev[i]);
md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
RAID5_STRIPE_SECTORS(conf),
!test_bit(STRIPE_DEGRADED, &sh->state),
0);
}
}
}

View File

@ -915,8 +915,7 @@ static bool stripe_can_batch(struct stripe_head *sh)
if (raid5_has_log(conf) || raid5_has_ppl(conf))
return false;
return test_bit(STRIPE_BATCH_READY, &sh->state) &&
!test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
is_full_stripe_write(sh);
is_full_stripe_write(sh);
}
/* we only do back search */
@ -1372,8 +1371,6 @@ again:
generic_make_request(rbi);
}
if (!rdev && !rrdev) {
if (op_is_write(op))
set_bit(STRIPE_DEGRADED, &sh->state);
pr_debug("skip op %d on disc %d for sector %llu\n",
bi->bi_opf, i, (unsigned long long)sh->sector);
clear_bit(R5_LOCKED, &sh->dev[i].flags);
@ -2949,7 +2946,6 @@ static void raid5_end_write_request(struct bio *bi)
set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
} else {
if (bi->bi_status) {
set_bit(STRIPE_DEGRADED, &sh->state);
set_bit(WriteErrorSeen, &rdev->flags);
set_bit(R5_WriteError, &sh->dev[i].flags);
if (!test_and_set_bit(WantReplacement, &rdev->flags))
@ -3616,29 +3612,9 @@ static void __add_stripe_bio(struct stripe_head *sh, struct bio *bi,
(*bip)->bi_iter.bi_sector, sh->sector, dd_idx,
sh->dev[dd_idx].sector);
if (conf->mddev->bitmap && firstwrite) {
/* Cannot hold spinlock over bitmap_startwrite,
* but must ensure this isn't added to a batch until
* we have added to the bitmap and set bm_seq.
* So set STRIPE_BITMAP_PENDING to prevent
* batching.
* If multiple __add_stripe_bio() calls race here they
* much all set STRIPE_BITMAP_PENDING. So only the first one
* to complete "bitmap_startwrite" gets to set
* STRIPE_BIT_DELAY. This is important as once a stripe
* is added to a batch, STRIPE_BIT_DELAY cannot be changed
* any more.
*/
set_bit(STRIPE_BITMAP_PENDING, &sh->state);
spin_unlock_irq(&sh->stripe_lock);
md_bitmap_startwrite(conf->mddev->bitmap, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0);
spin_lock_irq(&sh->stripe_lock);
clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
if (!sh->batch_head) {
sh->bm_seq = conf->seq_flush+1;
set_bit(STRIPE_BIT_DELAY, &sh->state);
}
if (conf->mddev->bitmap && firstwrite && !sh->batch_head) {
sh->bm_seq = conf->seq_flush+1;
set_bit(STRIPE_BIT_DELAY, &sh->state);
}
}
@ -3689,7 +3665,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
BUG_ON(sh->batch_head);
for (i = disks; i--; ) {
struct bio *bi;
int bitmap_end = 0;
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
struct md_rdev *rdev;
@ -3716,8 +3691,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
sh->dev[i].towrite = NULL;
sh->overwrite_disks = 0;
spin_unlock_irq(&sh->stripe_lock);
if (bi)
bitmap_end = 1;
log_stripe_write_finished(sh);
@ -3732,10 +3705,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
bio_io_error(bi);
bi = nextbi;
}
if (bitmap_end)
md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0, 0);
bitmap_end = 0;
/* and fail all 'written' */
bi = sh->dev[i].written;
sh->dev[i].written = NULL;
@ -3744,7 +3713,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
sh->dev[i].page = sh->dev[i].orig_page;
}
if (bi) bitmap_end = 1;
while (bi && bi->bi_iter.bi_sector <
sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector);
@ -3778,9 +3746,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
bi = nextbi;
}
}
if (bitmap_end)
md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
RAID5_STRIPE_SECTORS(conf), 0, 0);
/* If we were in the middle of a write the parity block might
* still be locked - so just clear all R5_LOCKED flags
*/
@ -4131,10 +4096,6 @@ returnbi:
bio_endio(wbi);
wbi = wbi2;
}
md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
RAID5_STRIPE_SECTORS(conf),
!test_bit(STRIPE_DEGRADED, &sh->state),
0);
if (head_sh->batch_head) {
sh = list_first_entry(&sh->batch_list,
struct stripe_head,
@ -4411,7 +4372,6 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
s->locked++;
set_bit(R5_Wantwrite, &dev->flags);
clear_bit(STRIPE_DEGRADED, &sh->state);
set_bit(STRIPE_INSYNC, &sh->state);
break;
case check_state_run:
@ -4568,7 +4528,6 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
clear_bit(R5_Wantwrite, &dev->flags);
s->locked--;
}
clear_bit(STRIPE_DEGRADED, &sh->state);
set_bit(STRIPE_INSYNC, &sh->state);
break;
@ -4968,8 +4927,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
(1 << STRIPE_COMPUTE_RUN) |
(1 << STRIPE_DISCARD) |
(1 << STRIPE_BATCH_READY) |
(1 << STRIPE_BATCH_ERR) |
(1 << STRIPE_BITMAP_PENDING)),
(1 << STRIPE_BATCH_ERR)),
"stripe state: %lx\n", sh->state);
WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
(1 << STRIPE_REPLACED)),
@ -4977,7 +4935,6 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
(1 << STRIPE_PREREAD_ACTIVE) |
(1 << STRIPE_DEGRADED) |
(1 << STRIPE_ON_UNPLUG_LIST)),
head_sh->state & (1 << STRIPE_INSYNC));
@ -5522,26 +5479,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
*/
static void raid5_align_endio(struct bio *bi)
{
struct md_io_acct *md_io_acct = bi->bi_private;
struct bio *raid_bi = md_io_acct->orig_bio;
struct mddev *mddev;
struct r5conf *conf;
struct md_rdev *rdev;
struct bio *raid_bi = bi->bi_private;
struct md_rdev *rdev = (void *)raid_bi->bi_next;
struct mddev *mddev = rdev->mddev;
struct r5conf *conf = mddev->private;
blk_status_t error = bi->bi_status;
unsigned long start_time = md_io_acct->start_time;
bio_put(bi);
rdev = (void*)raid_bi->bi_next;
raid_bi->bi_next = NULL;
mddev = rdev->mddev;
conf = mddev->private;
rdev_dec_pending(rdev, conf->mddev);
if (!error) {
if (blk_queue_io_stat(raid_bi->bi_disk->queue))
bio_end_io_acct(raid_bi, start_time);
bio_endio(raid_bi);
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_quiescent);
@ -5560,7 +5508,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
struct md_rdev *rdev;
sector_t sector, end_sector, first_bad;
int bad_sectors, dd_idx;
struct md_io_acct *md_io_acct;
bool did_inc;
if (!in_chunk_boundary(mddev, raid_bio)) {
@ -5597,17 +5544,14 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
return 0;
}
align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_acct_set);
md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
md_account_bio(mddev, &raid_bio);
raid_bio->bi_next = (void *)rdev;
if (blk_queue_io_stat(raid_bio->bi_disk->queue))
md_io_acct->start_time = bio_start_io_acct(raid_bio);
md_io_acct->orig_bio = raid_bio;
bio_set_dev(align_bio, rdev->bdev);
align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_clone_set);
align_bio->bi_end_io = raid5_align_endio;
align_bio->bi_private = md_io_acct;
align_bio->bi_private = raid_bio;
align_bio->bi_iter.bi_sector = sector;
bio_set_dev(align_bio, rdev->bdev);
/* No reshape active, so we can trust rdev->data_offset */
align_bio->bi_iter.bi_sector += rdev->data_offset;
@ -5909,13 +5853,6 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
}
spin_unlock_irq(&sh->stripe_lock);
if (conf->mddev->bitmap) {
for (d = 0;
d < conf->raid_disks - conf->max_degraded;
d++)
md_bitmap_startwrite(mddev->bitmap,
sh->sector,
RAID5_STRIPE_SECTORS(conf),
0);
sh->bm_seq = conf->seq_flush + 1;
set_bit(STRIPE_BIT_DELAY, &sh->state);
}
@ -6033,6 +5970,87 @@ static bool reshape_disabled(struct mddev *mddev)
return is_md_suspended(mddev) || !md_is_rdwr(mddev);
}
enum reshape_loc {
LOC_NO_RESHAPE,
LOC_AHEAD_OF_RESHAPE,
LOC_INSIDE_RESHAPE,
LOC_BEHIND_RESHAPE,
};
static enum reshape_loc get_reshape_loc(struct mddev *mddev,
struct r5conf *conf, sector_t logical_sector)
{
sector_t reshape_progress, reshape_safe;
/*
* Spinlock is needed as reshape_progress may be
* 64bit on a 32bit platform, and so it might be
* possible to see a half-updated value
* Of course reshape_progress could change after
* the lock is dropped, so once we get a reference
* to the stripe that we think it is, we will have
* to check again.
*/
spin_lock_irq(&conf->device_lock);
reshape_progress = conf->reshape_progress;
reshape_safe = conf->reshape_safe;
spin_unlock_irq(&conf->device_lock);
if (reshape_progress == MaxSector)
return LOC_NO_RESHAPE;
if (ahead_of_reshape(mddev, logical_sector, reshape_progress))
return LOC_AHEAD_OF_RESHAPE;
if (ahead_of_reshape(mddev, logical_sector, reshape_safe))
return LOC_INSIDE_RESHAPE;
return LOC_BEHIND_RESHAPE;
}
static void raid5_bitmap_sector(struct mddev *mddev, sector_t *offset,
unsigned long *sectors)
{
struct r5conf *conf = mddev->private;
sector_t start = *offset;
sector_t end = start + *sectors;
sector_t prev_start = start;
sector_t prev_end = end;
int sectors_per_chunk;
enum reshape_loc loc;
int dd_idx;
sectors_per_chunk = conf->chunk_sectors *
(conf->raid_disks - conf->max_degraded);
start = round_down(start, sectors_per_chunk);
end = round_up(end, sectors_per_chunk);
start = raid5_compute_sector(conf, start, 0, &dd_idx, NULL);
end = raid5_compute_sector(conf, end, 0, &dd_idx, NULL);
/*
* For LOC_INSIDE_RESHAPE, this IO will wait for reshape to make
* progress, hence it's the same as LOC_BEHIND_RESHAPE.
*/
loc = get_reshape_loc(mddev, conf, prev_start);
if (likely(loc != LOC_AHEAD_OF_RESHAPE)) {
*offset = start;
*sectors = end - start;
return;
}
sectors_per_chunk = conf->prev_chunk_sectors *
(conf->previous_raid_disks - conf->max_degraded);
prev_start = round_down(prev_start, sectors_per_chunk);
prev_end = round_down(prev_end, sectors_per_chunk);
prev_start = raid5_compute_sector(conf, prev_start, 1, &dd_idx, NULL);
prev_end = raid5_compute_sector(conf, prev_end, 1, &dd_idx, NULL);
/*
* for LOC_AHEAD_OF_RESHAPE, reshape can make progress before this IO
* is handled in make_stripe_request(), we can't know this here hence
* we set bits for both.
*/
*offset = min(start, prev_start);
*sectors = max(end, prev_end) - *offset;
}
static enum stripe_result make_stripe_request(struct mddev *mddev,
struct r5conf *conf, struct stripe_request_ctx *ctx,
sector_t logical_sector, struct bio *bi)
@ -6047,28 +6065,14 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
seq = read_seqcount_begin(&conf->gen_lock);
if (unlikely(conf->reshape_progress != MaxSector)) {
/*
* Spinlock is needed as reshape_progress may be
* 64bit on a 32bit platform, and so it might be
* possible to see a half-updated value
* Of course reshape_progress could change after
* the lock is dropped, so once we get a reference
* to the stripe that we think it is, we will have
* to check again.
*/
spin_lock_irq(&conf->device_lock);
if (ahead_of_reshape(mddev, logical_sector,
conf->reshape_progress)) {
previous = 1;
} else {
if (ahead_of_reshape(mddev, logical_sector,
conf->reshape_safe)) {
spin_unlock_irq(&conf->device_lock);
ret = STRIPE_SCHEDULE_AND_RETRY;
goto out;
}
enum reshape_loc loc = get_reshape_loc(mddev, conf,
logical_sector);
if (loc == LOC_INSIDE_RESHAPE) {
ret = STRIPE_SCHEDULE_AND_RETRY;
goto out;
}
spin_unlock_irq(&conf->device_lock);
if (loc == LOC_AHEAD_OF_RESHAPE)
previous = 1;
}
new_sector = raid5_compute_sector(conf, logical_sector, previous,
@ -6250,8 +6254,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
if ((bi->bi_opf & REQ_NOWAIT) &&
(conf->reshape_progress != MaxSector) &&
!ahead_of_reshape(mddev, logical_sector, conf->reshape_progress) &&
ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) {
get_reshape_loc(mddev, conf, logical_sector) == LOC_INSIDE_RESHAPE) {
bio_wouldblock_error(bi);
if (rw == WRITE)
md_write_end(mddev);
@ -9158,6 +9161,7 @@ static struct md_personality raid6_personality =
.takeover = raid6_takeover,
.congested = raid5_congested,
.change_consistency_policy = raid5_change_consistency_policy,
.bitmap_sector = raid5_bitmap_sector,
};
static struct md_personality raid5_personality =
{
@ -9184,6 +9188,7 @@ static struct md_personality raid5_personality =
.takeover = raid5_takeover,
.congested = raid5_congested,
.change_consistency_policy = raid5_change_consistency_policy,
.bitmap_sector = raid5_bitmap_sector,
};
static struct md_personality raid4_personality =
@ -9211,6 +9216,7 @@ static struct md_personality raid4_personality =
.takeover = raid4_takeover,
.congested = raid5_congested,
.change_consistency_policy = raid5_change_consistency_policy,
.bitmap_sector = raid5_bitmap_sector,
};
static int __init raid5_init(void)

View File

@ -358,7 +358,6 @@ enum {
STRIPE_REPLACED,
STRIPE_PREREAD_ACTIVE,
STRIPE_DELAYED,
STRIPE_DEGRADED,
STRIPE_BIT_DELAY,
STRIPE_EXPANDING,
STRIPE_EXPAND_SOURCE,
@ -372,9 +371,6 @@ enum {
STRIPE_ON_RELEASE_LIST,
STRIPE_BATCH_READY,
STRIPE_BATCH_ERR,
STRIPE_BITMAP_PENDING, /* Being added to bitmap, don't add
* to batch yet.
*/
STRIPE_LOG_TRAPPED, /* trapped into log (see raid5-cache.c)
* this bit is used in two scenarios:
*

View File

@ -520,11 +520,11 @@ static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if,
else
memcpy(cfd->data, rm->d, cfd->len);
peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(rm->ts_low));
netdev->stats.rx_packets++;
netdev->stats.rx_bytes += cfd->len;
peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(rm->ts_low));
return 0;
}
@ -586,11 +586,11 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if,
if (!skb)
return -ENOMEM;
peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(sm->ts_low));
netdev->stats.rx_packets++;
netdev->stats.rx_bytes += cf->can_dlc;
peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(sm->ts_low));
return 0;
}

View File

@ -309,7 +309,7 @@ static void rtw_coex_tdma_timer_base(struct rtw_dev *rtwdev, u8 type)
{
struct rtw_coex *coex = &rtwdev->coex;
struct rtw_coex_stat *coex_stat = &coex->stat;
u8 para[2] = {0};
u8 para[6] = {};
u8 times;
u16 tbtt_interval = coex_stat->wl_beacon_interval;

View File

@ -227,12 +227,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
#define tlb_needs_table_invalidate() (true)
#endif
void tlb_remove_table_sync_one(void);
#else
#ifdef tlb_needs_table_invalidate
#error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
#endif
static inline void tlb_remove_table_sync_one(void) { }
#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */

View File

@ -1213,12 +1213,16 @@ EXPORT_SYMBOL(padata_alloc_shell);
*/
void padata_free_shell(struct padata_shell *ps)
{
struct padata_instance *pinst = ps->pinst;
/*
* Wait for all _do_serial calls to finish to avoid touching
* freed pd's and ps's.
*/
synchronize_rcu();
mutex_lock(&pinst->lock);
mutex_lock(&ps->pinst->lock);
list_del(&ps->list);
padata_free_pd(rcu_dereference_protected(ps->pd, 1));
mutex_unlock(&pinst->lock);
mutex_unlock(&ps->pinst->lock);
kfree(ps);
}

View File

@ -4314,7 +4314,7 @@ int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper,
* @ip: The instruction pointer address to remove the data from
*
* Returns the data if it is found, otherwise NULL.
* Note, if the data pointer is used as the data itself, (see
* Note, if the data pointer is used as the data itself, (see
* ftrace_func_mapper_find_ip(), then the return value may be meaningless,
* if the data pointer was set to zero.
*/
@ -5037,8 +5037,6 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr)
__add_hash_entry(direct_functions, entry);
ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0);
if (ret)
remove_hash_entry(direct_functions, entry);
if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
ret = register_ftrace_function(&direct_ops);
@ -5047,6 +5045,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr)
}
if (ret) {
remove_hash_entry(direct_functions, entry);
kfree(entry);
if (!direct->count) {
list_del_rcu(&direct->next);

View File

@ -5655,6 +5655,13 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
return 0;
pud_clear(pud);
/*
* Once our caller drops the rmap lock, some other process might be
* using this page table as a normal, non-hugetlb page table.
* Wait for pending gup_fast() in other threads to finish before letting
* that happen.
*/
tlb_remove_table_sync_one();
put_page(virt_to_page(ptep));
mm_dec_nr_pmds(mm);
/*

View File

@ -1118,6 +1118,7 @@ static void collapse_huge_page(struct mm_struct *mm,
_pmd = pmdp_collapse_flush(vma, address, pmd);
spin_unlock(pmd_ptl);
mmu_notifier_invalidate_range_end(&range);
tlb_remove_table_sync_one();
spin_lock(pte_ptl);
isolated = __collapse_huge_page_isolate(vma, address, pte,
@ -1383,6 +1384,42 @@ static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm,
return 0;
}
/*
* A note about locking:
* Trying to take the page table spinlocks would be useless here because those
* are only used to synchronize:
*
* - modifying terminal entries (ones that point to a data page, not to another
* page table)
* - installing *new* non-terminal entries
*
* Instead, we need roughly the same kind of protection as free_pgtables() or
* mm_take_all_locks() (but only for a single VMA):
* The mmap lock together with this VMA's rmap locks covers all paths towards
* the page table entries we're messing with here, except for hardware page
* table walks and lockless_pages_from_mm().
*/
static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
pmd_t pmd;
mmap_assert_write_locked(mm);
if (vma->vm_file)
lockdep_assert_held_write(&vma->vm_file->f_mapping->i_mmap_rwsem);
/*
* All anon_vmas attached to the VMA have the same root and are
* therefore locked by the same lock.
*/
if (vma->anon_vma)
lockdep_assert_held_write(&vma->anon_vma->root->rwsem);
pmd = pmdp_collapse_flush(vma, addr, pmdp);
tlb_remove_table_sync_one();
mm_dec_nr_ptes(mm);
pte_free(mm, pmd_pgtable(pmd));
}
/**
* Try to collapse a pte-mapped THP for mm at address haddr.
*
@ -1396,7 +1433,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
struct vm_area_struct *vma = find_vma(mm, haddr);
struct page *hpage;
pte_t *start_pte, *pte;
pmd_t *pmd, _pmd;
pmd_t *pmd;
spinlock_t *ptl;
int count = 0;
int i;
@ -1426,6 +1463,20 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
if (!pmd)
goto drop_hpage;
/*
* We need to lock the mapping so that from here on, only GUP-fast and
* hardware page walks can access the parts of the page tables that
* we're operating on.
* See collapse_and_free_pmd().
*/
i_mmap_lock_write(vma->vm_file->f_mapping);
/*
* This spinlock should be unnecessary: Nobody else should be accessing
* the page tables under spinlock protection here, only
* lockless_pages_from_mm() and the hardware page walker can access page
* tables while all the high-level locks are held in write mode.
*/
start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
/* step 1: check all mapped PTEs are to the right huge page */
@ -1472,11 +1523,15 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
}
/* step 4: collapse pmd */
ptl = pmd_lock(vma->vm_mm, pmd);
_pmd = pmdp_collapse_flush(vma, haddr, pmd);
spin_unlock(ptl);
mm_dec_nr_ptes(mm);
pte_free(mm, pmd_pgtable(_pmd));
/* we make no change to anon, but protect concurrent anon page lookup */
if (vma->anon_vma)
anon_vma_lock_write(vma->anon_vma);
collapse_and_free_pmd(mm, vma, haddr, pmd);
if (vma->anon_vma)
anon_vma_unlock_write(vma->anon_vma);
i_mmap_unlock_write(vma->vm_file->f_mapping);
drop_hpage:
unlock_page(hpage);
@ -1485,6 +1540,7 @@ drop_hpage:
abort:
pte_unmap_unlock(start_pte, ptl);
i_mmap_unlock_write(vma->vm_file->f_mapping);
goto drop_hpage;
}
@ -1516,7 +1572,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
struct vm_area_struct *vma;
struct mm_struct *mm;
unsigned long addr;
pmd_t *pmd, _pmd;
pmd_t *pmd;
i_mmap_lock_write(mapping);
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
@ -1534,7 +1590,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* An alternative would be drop the check, but check that page
* table is clear before calling pmdp_collapse_flush() under
* ptl. It has higher chance to recover THP for the VMA, but
* has higher cost too.
* has higher cost too. It would also probably require locking
* the anon_vma.
*/
if (vma->anon_vma)
continue;
@ -1555,14 +1612,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* reverse order. Trylock is a way to avoid deadlock.
*/
if (mmap_write_trylock(mm)) {
if (!khugepaged_test_exit(mm)) {
spinlock_t *ptl = pmd_lock(mm, pmd);
/* assume page table is clear */
_pmd = pmdp_collapse_flush(vma, addr, pmd);
spin_unlock(ptl);
mm_dec_nr_ptes(mm);
pte_free(mm, pmd_pgtable(_pmd));
}
if (!khugepaged_test_exit(mm))
collapse_and_free_pmd(mm, vma, addr, pmd);
mmap_write_unlock(mm);
} else {
/* Try again later */

View File

@ -139,7 +139,7 @@ static void tlb_remove_table_smp_sync(void *arg)
/* Simply deliver the interrupt */
}
static void tlb_remove_table_sync_one(void)
void tlb_remove_table_sync_one(void)
{
/*
* This isn't an RCU grace period and hence the page-tables cannot be
@ -163,8 +163,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch)
#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
static void tlb_remove_table_sync_one(void) { }
static void tlb_remove_table_free(struct mmu_table_batch *batch)
{
__tlb_remove_table_free(batch);

View File

@ -1984,21 +1984,21 @@ static void mld_send_cr(struct inet6_dev *idev)
static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
{
struct net *net = dev_net(dev);
struct sock *sk = net->ipv6.igmp_sk;
const struct in6_addr *snd_addr, *saddr;
int err, len, payload_len, full_len;
struct in6_addr addr_buf;
struct inet6_dev *idev;
struct sk_buff *skb;
struct mld_msg *hdr;
const struct in6_addr *snd_addr, *saddr;
struct in6_addr addr_buf;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
int err, len, payload_len, full_len;
u8 ra[8] = { IPPROTO_ICMPV6, 0,
IPV6_TLV_ROUTERALERT, 2, 0, 0,
IPV6_TLV_PADN, 0 };
struct flowi6 fl6;
struct dst_entry *dst;
struct flowi6 fl6;
struct net *net;
struct sock *sk;
if (type == ICMPV6_MGM_REDUCTION)
snd_addr = &in6addr_linklocal_allrouters;
@ -2009,20 +2009,21 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
payload_len = len + sizeof(ra);
full_len = sizeof(struct ipv6hdr) + payload_len;
skb = alloc_skb(hlen + tlen + full_len, GFP_ATOMIC);
rcu_read_lock();
IP6_UPD_PO_STATS(net, __in6_dev_get(dev),
IPSTATS_MIB_OUT, full_len);
rcu_read_unlock();
skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err);
net = dev_net_rcu(dev);
idev = __in6_dev_get(dev);
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, full_len);
if (!skb) {
rcu_read_lock();
IP6_INC_STATS(net, __in6_dev_get(dev),
IPSTATS_MIB_OUTDISCARDS);
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
rcu_read_unlock();
return;
}
sk = net->ipv6.igmp_sk;
skb_set_owner_w(skb, sk);
skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, hlen);
@ -2047,9 +2048,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
IPPROTO_ICMPV6,
csum_partial(hdr, len, 0));
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
icmpv6_flow_init(sk, &fl6, type,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
skb->dev->ifindex);

View File

@ -2408,10 +2408,10 @@ static void ip6_negative_advice(struct sock *sk,
if (rt->rt6i_flags & RTF_CACHE) {
rcu_read_lock();
if (rt6_check_expired(rt)) {
/* counteract the dst_release() in sk_dst_reset() */
dst_hold(dst);
/* rt/dst can not be destroyed yet,
* because of rcu_read_lock()
*/
sk_dst_reset(sk);
rt6_remove_exception_rt(rt);
}
rcu_read_unlock();