* Mon Oct 16 2023 Miroslav Rezanina <mrezanin@redhat.com> - 8.1.0-3
- kvm-migration-Fix-race-that-dest-preempt-thread-close-to.patch [RHEL-11219] - kvm-migration-Fix-possible-race-when-setting-rp_state.er.patch [RHEL-11219] - kvm-migration-Fix-possible-races-when-shutting-down-the-.patch [RHEL-11219] - kvm-migration-Fix-possible-race-when-shutting-down-to_ds.patch [RHEL-11219] - kvm-migration-Remove-redundant-cleanup-of-postcopy_qemuf.patch [RHEL-11219] - kvm-migration-Consolidate-return-path-closing-code.patch [RHEL-11219] - kvm-migration-Replace-the-return-path-retry-logic.patch [RHEL-11219] - kvm-migration-Move-return-path-cleanup-to-main-migration.patch [RHEL-11219] - kvm-file-posix-Clear-bs-bl.zoned-on-error.patch [RHEL-7360] - kvm-file-posix-Check-bs-bl.zoned-for-zone-info.patch [RHEL-7360] - kvm-file-posix-Fix-zone-update-in-I-O-error-path.patch [RHEL-7360] - kvm-file-posix-Simplify-raw_co_prw-s-out-zone-code.patch [RHEL-7360] - kvm-tests-file-io-error-New-test.patch [RHEL-7360] - Resolves: RHEL-11219 (migration tests failing for RHEL 9.4 sometimes) - Resolves: RHEL-7360 (Qemu Core Dumped When Writing Larger Size Than The Size of A Data Disk)
This commit is contained in:
		
							parent
							
								
									19e7552cf4
								
							
						
					
					
						commit
						5f95659303
					
				
							
								
								
									
										73
									
								
								kvm-file-posix-Check-bs-bl.zoned-for-zone-info.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								kvm-file-posix-Check-bs-bl.zoned-for-zone-info.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,73 @@ | ||||
| From 42dac3d063e0f44e92211a4fbdaf1f0d64843562 Mon Sep 17 00:00:00 2001 | ||||
| From: Hanna Czenczek <hreitz@redhat.com> | ||||
| Date: Thu, 24 Aug 2023 17:53:41 +0200 | ||||
| Subject: [PATCH 10/13] file-posix: Check bs->bl.zoned for zone info | ||||
| 
 | ||||
| RH-Author: Hanna Czenczek <hreitz@redhat.com> | ||||
| RH-MergeRequest: 202: file-posix: Fix zone update in I/O error path | ||||
| RH-Jira: RHEL-7360 | ||||
| RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com> | ||||
| RH-Commit: [2/5] d853b88d6b71b6f4fe1bb70ae2e3b6108cbdb8dc (hreitz/qemu-kvm-c-9-s) | ||||
| 
 | ||||
| Instead of checking bs->wps or bs->bl.zone_size for whether zone | ||||
| information is present, check bs->bl.zoned.  That is the flag that | ||||
| raw_refresh_zoned_limits() reliably sets to indicate zone support.  If | ||||
| it is set to something other than BLK_Z_NONE, other values and objects | ||||
| like bs->wps and bs->bl.zone_size must be non-null/zero and valid; if it | ||||
| is not, we cannot rely on their validity. | ||||
| 
 | ||||
| Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||||
| Message-Id: <20230824155345.109765-3-hreitz@redhat.com> | ||||
| Reviewed-by: Sam Li <faithilikerun@gmail.com> | ||||
| (cherry picked from commit 4b5d80f3d02096a9bb1f651f6b3401ba40877159) | ||||
| 
 | ||||
| Downstream: Fixed typo in the last hunk ("bs->blk" changed to | ||||
|             "bs->blk").  This code is removed in the commit after the | ||||
|             next, still, should be fixed now that I noticed. | ||||
| 
 | ||||
| Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||||
| ---
 | ||||
|  block/file-posix.c | 12 +++++++----- | ||||
|  1 file changed, 7 insertions(+), 5 deletions(-) | ||||
| 
 | ||||
| diff --git a/block/file-posix.c b/block/file-posix.c
 | ||||
| index 2b88b9eefa..9f14850e24 100644
 | ||||
| --- a/block/file-posix.c
 | ||||
| +++ b/block/file-posix.c
 | ||||
| @@ -2455,9 +2455,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
 | ||||
|      if (fd_open(bs) < 0) | ||||
|          return -EIO; | ||||
|  #if defined(CONFIG_BLKZONED) | ||||
| -    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && bs->wps) {
 | ||||
| +    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
 | ||||
| +        bs->bl.zoned != BLK_Z_NONE) {
 | ||||
|          qemu_co_mutex_lock(&bs->wps->colock); | ||||
| -        if (type & QEMU_AIO_ZONE_APPEND && bs->bl.zone_size) {
 | ||||
| +        if (type & QEMU_AIO_ZONE_APPEND) {
 | ||||
|              int index = offset / bs->bl.zone_size; | ||||
|              offset = bs->wps->wp[index]; | ||||
|          } | ||||
| @@ -2508,8 +2509,8 @@ out:
 | ||||
|  { | ||||
|      BlockZoneWps *wps = bs->wps; | ||||
|      if (ret == 0) { | ||||
| -        if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))
 | ||||
| -            && wps && bs->bl.zone_size) {
 | ||||
| +        if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
 | ||||
| +            bs->bl.zoned != BLK_Z_NONE) {
 | ||||
|              uint64_t *wp = &wps->wp[offset / bs->bl.zone_size]; | ||||
|              if (!BDRV_ZT_IS_CONV(*wp)) { | ||||
|                  if (type & QEMU_AIO_ZONE_APPEND) { | ||||
| @@ -2529,7 +2530,8 @@ out:
 | ||||
|          } | ||||
|      } | ||||
|   | ||||
| -    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && wps) {
 | ||||
| +    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
 | ||||
| +        bs->bl.zoned != BLK_Z_NONE) {
 | ||||
|          qemu_co_mutex_unlock(&wps->colock); | ||||
|      } | ||||
|  } | ||||
| -- 
 | ||||
| 2.39.3 | ||||
| 
 | ||||
							
								
								
									
										96
									
								
								kvm-file-posix-Clear-bs-bl.zoned-on-error.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								kvm-file-posix-Clear-bs-bl.zoned-on-error.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,96 @@ | ||||
| From 1eba7b686a41c71f24f60f8adcd51ddf1882686d Mon Sep 17 00:00:00 2001 | ||||
| From: Hanna Czenczek <hreitz@redhat.com> | ||||
| Date: Thu, 24 Aug 2023 17:53:40 +0200 | ||||
| Subject: [PATCH 09/13] file-posix: Clear bs->bl.zoned on error | ||||
| 
 | ||||
| RH-Author: Hanna Czenczek <hreitz@redhat.com> | ||||
| RH-MergeRequest: 202: file-posix: Fix zone update in I/O error path | ||||
| RH-Jira: RHEL-7360 | ||||
| RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com> | ||||
| RH-Commit: [1/5] 3e2419b64b35afdfa45f70c5eb61478321de5233 (hreitz/qemu-kvm-c-9-s) | ||||
| 
 | ||||
| bs->bl.zoned is what indicates whether the zone information is present | ||||
| and valid; it is the only thing that raw_refresh_zoned_limits() sets if | ||||
| CONFIG_BLKZONED is not defined, and it is also the only thing that it | ||||
| sets if CONFIG_BLKZONED is defined, but there are no zones. | ||||
| 
 | ||||
| Make sure that it is always set to BLK_Z_NONE if there is an error | ||||
| anywhere in raw_refresh_zoned_limits() so that we do not accidentally | ||||
| announce zones while our information is incomplete or invalid. | ||||
| 
 | ||||
| This also fixes a memory leak in the last error path in | ||||
| raw_refresh_zoned_limits(). | ||||
| 
 | ||||
| Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||||
| Message-Id: <20230824155345.109765-2-hreitz@redhat.com> | ||||
| Reviewed-by: Sam Li <faithilikerun@gmail.com> | ||||
| (cherry picked from commit 56d1a022a77ea2125564913665eeadf3e303a671) | ||||
| Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||||
| ---
 | ||||
|  block/file-posix.c | 21 ++++++++++++--------- | ||||
|  1 file changed, 12 insertions(+), 9 deletions(-) | ||||
| 
 | ||||
| diff --git a/block/file-posix.c b/block/file-posix.c
 | ||||
| index b16e9c21a1..2b88b9eefa 100644
 | ||||
| --- a/block/file-posix.c
 | ||||
| +++ b/block/file-posix.c
 | ||||
| @@ -1412,11 +1412,9 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
 | ||||
|      BlockZoneModel zoned; | ||||
|      int ret; | ||||
|   | ||||
| -    bs->bl.zoned = BLK_Z_NONE;
 | ||||
| -
 | ||||
|      ret = get_sysfs_zoned_model(st, &zoned); | ||||
|      if (ret < 0 || zoned == BLK_Z_NONE) { | ||||
| -        return;
 | ||||
| +        goto no_zoned;
 | ||||
|      } | ||||
|      bs->bl.zoned = zoned; | ||||
|   | ||||
| @@ -1437,10 +1435,10 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
 | ||||
|      if (ret < 0) { | ||||
|          error_setg_errno(errp, -ret, "Unable to read chunk_sectors " | ||||
|                                       "sysfs attribute"); | ||||
| -        return;
 | ||||
| +        goto no_zoned;
 | ||||
|      } else if (!ret) { | ||||
|          error_setg(errp, "Read 0 from chunk_sectors sysfs attribute"); | ||||
| -        return;
 | ||||
| +        goto no_zoned;
 | ||||
|      } | ||||
|      bs->bl.zone_size = ret << BDRV_SECTOR_BITS; | ||||
|   | ||||
| @@ -1448,10 +1446,10 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
 | ||||
|      if (ret < 0) { | ||||
|          error_setg_errno(errp, -ret, "Unable to read nr_zones " | ||||
|                                       "sysfs attribute"); | ||||
| -        return;
 | ||||
| +        goto no_zoned;
 | ||||
|      } else if (!ret) { | ||||
|          error_setg(errp, "Read 0 from nr_zones sysfs attribute"); | ||||
| -        return;
 | ||||
| +        goto no_zoned;
 | ||||
|      } | ||||
|      bs->bl.nr_zones = ret; | ||||
|   | ||||
| @@ -1472,10 +1470,15 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
 | ||||
|      ret = get_zones_wp(bs, s->fd, 0, bs->bl.nr_zones, 0); | ||||
|      if (ret < 0) { | ||||
|          error_setg_errno(errp, -ret, "report wps failed"); | ||||
| -        bs->wps = NULL;
 | ||||
| -        return;
 | ||||
| +        goto no_zoned;
 | ||||
|      } | ||||
|      qemu_co_mutex_init(&bs->wps->colock); | ||||
| +    return;
 | ||||
| +
 | ||||
| +no_zoned:
 | ||||
| +    bs->bl.zoned = BLK_Z_NONE;
 | ||||
| +    g_free(bs->wps);
 | ||||
| +    bs->wps = NULL;
 | ||||
|  } | ||||
|  #else /* !defined(CONFIG_BLKZONED) */ | ||||
|  static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st, | ||||
| -- 
 | ||||
| 2.39.3 | ||||
| 
 | ||||
							
								
								
									
										42
									
								
								kvm-file-posix-Fix-zone-update-in-I-O-error-path.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								kvm-file-posix-Fix-zone-update-in-I-O-error-path.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,42 @@ | ||||
| From fc9ca44d7df182a0a483d8376054850ba46e633d Mon Sep 17 00:00:00 2001 | ||||
| From: Hanna Czenczek <hreitz@redhat.com> | ||||
| Date: Thu, 24 Aug 2023 17:53:42 +0200 | ||||
| Subject: [PATCH 11/13] file-posix: Fix zone update in I/O error path | ||||
| 
 | ||||
| RH-Author: Hanna Czenczek <hreitz@redhat.com> | ||||
| RH-MergeRequest: 202: file-posix: Fix zone update in I/O error path | ||||
| RH-Jira: RHEL-7360 | ||||
| RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com> | ||||
| RH-Commit: [3/5] 28a8540b2ac0c437f8afb90fd5566fc83bfcfdc4 (hreitz/qemu-kvm-c-9-s) | ||||
| 
 | ||||
| We must check that zone information is present before running | ||||
| update_zones_wp(). | ||||
| 
 | ||||
| Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2234374 | ||||
| Fixes: Coverity CID 1512459 | ||||
| Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||||
| Message-Id: <20230824155345.109765-4-hreitz@redhat.com> | ||||
| Reviewed-by: Sam Li <faithilikerun@gmail.com> | ||||
| (cherry picked from commit deab5c9a4ed74f76a713008a42527762b30a7e84) | ||||
| Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||||
| ---
 | ||||
|  block/file-posix.c | 3 ++- | ||||
|  1 file changed, 2 insertions(+), 1 deletion(-) | ||||
| 
 | ||||
| diff --git a/block/file-posix.c b/block/file-posix.c
 | ||||
| index 9f14850e24..b9d5e4741b 100644
 | ||||
| --- a/block/file-posix.c
 | ||||
| +++ b/block/file-posix.c
 | ||||
| @@ -2525,7 +2525,8 @@ out:
 | ||||
|              } | ||||
|          } | ||||
|      } else { | ||||
| -        if (type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) {
 | ||||
| +        if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
 | ||||
| +            bs->bl.zoned != BLK_Z_NONE) {
 | ||||
|              update_zones_wp(bs, s->fd, 0, 1); | ||||
|          } | ||||
|      } | ||||
| -- 
 | ||||
| 2.39.3 | ||||
| 
 | ||||
							
								
								
									
										71
									
								
								kvm-file-posix-Simplify-raw_co_prw-s-out-zone-code.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								kvm-file-posix-Simplify-raw_co_prw-s-out-zone-code.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,71 @@ | ||||
| From fbf511a9f7272815fc6d7414735049689186c750 Mon Sep 17 00:00:00 2001 | ||||
| From: Hanna Czenczek <hreitz@redhat.com> | ||||
| Date: Thu, 24 Aug 2023 17:53:43 +0200 | ||||
| Subject: [PATCH 12/13] file-posix: Simplify raw_co_prw's 'out' zone code | ||||
| 
 | ||||
| RH-Author: Hanna Czenczek <hreitz@redhat.com> | ||||
| RH-MergeRequest: 202: file-posix: Fix zone update in I/O error path | ||||
| RH-Jira: RHEL-7360 | ||||
| RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com> | ||||
| RH-Commit: [4/5] c0b37cc9ff308de300e45d1a10942d11fbabea26 (hreitz/qemu-kvm-c-9-s) | ||||
| 
 | ||||
| We duplicate the same condition three times here, pull it out to the top | ||||
| level. | ||||
| 
 | ||||
| Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||||
| Message-Id: <20230824155345.109765-5-hreitz@redhat.com> | ||||
| Reviewed-by: Sam Li <faithilikerun@gmail.com> | ||||
| (cherry picked from commit d31b50a15dd25a560749b25fc40b6484fd1a57b7) | ||||
| 
 | ||||
| Downstream conflict with the downstream change in HEAD^^ (conflicting | ||||
| code is removed). | ||||
| 
 | ||||
| Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||||
| ---
 | ||||
|  block/file-posix.c | 18 +++++------------- | ||||
|  1 file changed, 5 insertions(+), 13 deletions(-) | ||||
| 
 | ||||
| diff --git a/block/file-posix.c b/block/file-posix.c
 | ||||
| index b9d5e4741b..aa89789737 100644
 | ||||
| --- a/block/file-posix.c
 | ||||
| +++ b/block/file-posix.c
 | ||||
| @@ -2506,11 +2506,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
 | ||||
|   | ||||
|  out: | ||||
|  #if defined(CONFIG_BLKZONED) | ||||
| -{
 | ||||
| -    BlockZoneWps *wps = bs->wps;
 | ||||
| -    if (ret == 0) {
 | ||||
| -        if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
 | ||||
| -            bs->bl.zoned != BLK_Z_NONE) {
 | ||||
| +    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
 | ||||
| +        bs->bl.zoned != BLK_Z_NONE) {
 | ||||
| +        BlockZoneWps *wps = bs->wps;
 | ||||
| +        if (ret == 0) {
 | ||||
|              uint64_t *wp = &wps->wp[offset / bs->bl.zone_size]; | ||||
|              if (!BDRV_ZT_IS_CONV(*wp)) { | ||||
|                  if (type & QEMU_AIO_ZONE_APPEND) { | ||||
| @@ -2523,19 +2522,12 @@ out:
 | ||||
|                      *wp = offset + bytes; | ||||
|                  } | ||||
|              } | ||||
| -        }
 | ||||
| -    } else {
 | ||||
| -        if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
 | ||||
| -            bs->bl.zoned != BLK_Z_NONE) {
 | ||||
| +        } else {
 | ||||
|              update_zones_wp(bs, s->fd, 0, 1); | ||||
|          } | ||||
| -    }
 | ||||
|   | ||||
| -    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
 | ||||
| -        bs->bl.zoned != BLK_Z_NONE) {
 | ||||
|          qemu_co_mutex_unlock(&wps->colock); | ||||
|      } | ||||
| -}
 | ||||
|  #endif | ||||
|      return ret; | ||||
|  } | ||||
| -- 
 | ||||
| 2.39.3 | ||||
| 
 | ||||
							
								
								
									
										86
									
								
								kvm-migration-Consolidate-return-path-closing-code.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								kvm-migration-Consolidate-return-path-closing-code.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,86 @@ | ||||
| From 33f0f63da753fff2a3d2bf14542abe0665878e3d Mon Sep 17 00:00:00 2001 | ||||
| From: Fabiano Rosas <farosas@suse.de> | ||||
| Date: Mon, 18 Sep 2023 14:28:20 -0300 | ||||
| Subject: [PATCH 06/13] migration: Consolidate return path closing code | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=UTF-8 | ||||
| Content-Transfer-Encoding: 8bit | ||||
| 
 | ||||
| RH-Author: Peter Xu <peterx@redhat.com> | ||||
| RH-MergeRequest: 203: migration: Fix race that dest preempt thread close too early | ||||
| RH-Jira: RHEL-11219 | ||||
| RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com> | ||||
| RH-Acked-by: Leonardo Brás <leobras@redhat.com> | ||||
| RH-Commit: [6/8] d3a4e08e1f1afe92afac12a3404a84d1c23e0f02 (peterx/qemu-kvm) | ||||
| 
 | ||||
| We'll start calling the await_return_path_close_on_source() function | ||||
| from other parts of the code, so move all of the related checks and | ||||
| tracepoints into it. | ||||
| 
 | ||||
| Reviewed-by: Peter Xu <peterx@redhat.com> | ||||
| Signed-off-by: Fabiano Rosas <farosas@suse.de> | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-ID: <20230918172822.19052-7-farosas@suse.de> | ||||
| (cherry picked from commit d50f5dc075cbb891bfe4a9378600a4871264468a) | ||||
| Signed-off-by: Peter Xu <peterx@redhat.com> | ||||
| ---
 | ||||
|  migration/migration.c | 29 ++++++++++++++--------------- | ||||
|  1 file changed, 14 insertions(+), 15 deletions(-) | ||||
| 
 | ||||
| diff --git a/migration/migration.c b/migration/migration.c
 | ||||
| index e2e4a7d8ae..ac4541b971 100644
 | ||||
| --- a/migration/migration.c
 | ||||
| +++ b/migration/migration.c
 | ||||
| @@ -2050,6 +2050,14 @@ static int open_return_path_on_source(MigrationState *ms,
 | ||||
|  /* Returns 0 if the RP was ok, otherwise there was an error on the RP */ | ||||
|  static int await_return_path_close_on_source(MigrationState *ms) | ||||
|  { | ||||
| +    int ret;
 | ||||
| +
 | ||||
| +    if (!ms->rp_state.rp_thread_created) {
 | ||||
| +        return 0;
 | ||||
| +    }
 | ||||
| +
 | ||||
| +    trace_migration_return_path_end_before();
 | ||||
| +
 | ||||
|      /* | ||||
|       * If this is a normal exit then the destination will send a SHUT | ||||
|       * and the rp_thread will exit, however if there's an error we | ||||
| @@ -2067,7 +2075,10 @@ static int await_return_path_close_on_source(MigrationState *ms)
 | ||||
|      qemu_thread_join(&ms->rp_state.rp_thread); | ||||
|      ms->rp_state.rp_thread_created = false; | ||||
|      trace_await_return_path_close_on_source_close(); | ||||
| -    return ms->rp_state.error;
 | ||||
| +
 | ||||
| +    ret = ms->rp_state.error;
 | ||||
| +    trace_migration_return_path_end_after(ret);
 | ||||
| +    return ret;
 | ||||
|  } | ||||
|   | ||||
|  static inline void | ||||
| @@ -2363,20 +2374,8 @@ static void migration_completion(MigrationState *s)
 | ||||
|          goto fail; | ||||
|      } | ||||
|   | ||||
| -    /*
 | ||||
| -     * If rp was opened we must clean up the thread before
 | ||||
| -     * cleaning everything else up (since if there are no failures
 | ||||
| -     * it will wait for the destination to send it's status in
 | ||||
| -     * a SHUT command).
 | ||||
| -     */
 | ||||
| -    if (s->rp_state.rp_thread_created) {
 | ||||
| -        int rp_error;
 | ||||
| -        trace_migration_return_path_end_before();
 | ||||
| -        rp_error = await_return_path_close_on_source(s);
 | ||||
| -        trace_migration_return_path_end_after(rp_error);
 | ||||
| -        if (rp_error) {
 | ||||
| -            goto fail;
 | ||||
| -        }
 | ||||
| +    if (await_return_path_close_on_source(s)) {
 | ||||
| +        goto fail;
 | ||||
|      } | ||||
|   | ||||
|      if (qemu_file_get_error(s->to_dst_file)) { | ||||
| -- 
 | ||||
| 2.39.3 | ||||
| 
 | ||||
| @ -0,0 +1,48 @@ | ||||
| From 93aa615c5e8de7422d7162a0731739b2bd82a582 Mon Sep 17 00:00:00 2001 | ||||
| From: Fabiano Rosas <farosas@suse.de> | ||||
| Date: Mon, 18 Sep 2023 14:28:16 -0300 | ||||
| Subject: [PATCH 02/13] migration: Fix possible race when setting | ||||
|  rp_state.error | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=UTF-8 | ||||
| Content-Transfer-Encoding: 8bit | ||||
| 
 | ||||
| RH-Author: Peter Xu <peterx@redhat.com> | ||||
| RH-MergeRequest: 203: migration: Fix race that dest preempt thread close too early | ||||
| RH-Jira: RHEL-11219 | ||||
| RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com> | ||||
| RH-Acked-by: Leonardo Brás <leobras@redhat.com> | ||||
| RH-Commit: [2/8] 97305847fc18b6b18cb6f3ff32c82918c2562664 (peterx/qemu-kvm) | ||||
| 
 | ||||
| We don't need to set the rp_state.error right after a shutdown because | ||||
| qemu_file_shutdown() always sets the QEMUFile error, so the return | ||||
| path thread would have seen it and set the rp error itself. | ||||
| 
 | ||||
| Setting the error outside of the thread is also racy because the | ||||
| thread could clear it after we set it. | ||||
| 
 | ||||
| Reviewed-by: Peter Xu <peterx@redhat.com> | ||||
| Signed-off-by: Fabiano Rosas <farosas@suse.de> | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-ID: <20230918172822.19052-3-farosas@suse.de> | ||||
| (cherry picked from commit 28a8347281e24c2e7bba6d3301472eda41d4c096) | ||||
| Signed-off-by: Peter Xu <peterx@redhat.com> | ||||
| ---
 | ||||
|  migration/migration.c | 1 - | ||||
|  1 file changed, 1 deletion(-) | ||||
| 
 | ||||
| diff --git a/migration/migration.c b/migration/migration.c
 | ||||
| index 6a7122694a..b92c6ae436 100644
 | ||||
| --- a/migration/migration.c
 | ||||
| +++ b/migration/migration.c
 | ||||
| @@ -2063,7 +2063,6 @@ static int await_return_path_close_on_source(MigrationState *ms)
 | ||||
|           * waiting for the destination. | ||||
|           */ | ||||
|          qemu_file_shutdown(ms->rp_state.from_dst_file); | ||||
| -        mark_source_rp_bad(ms);
 | ||||
|      } | ||||
|      trace_await_return_path_close_on_source_joining(); | ||||
|      qemu_thread_join(&ms->rp_state.rp_thread); | ||||
| -- 
 | ||||
| 2.39.3 | ||||
| 
 | ||||
| @ -0,0 +1,82 @@ | ||||
| From 52e2456a41678f29a67e4b7cd4d7ee20e5298bc0 Mon Sep 17 00:00:00 2001 | ||||
| From: Fabiano Rosas <farosas@suse.de> | ||||
| Date: Mon, 18 Sep 2023 14:28:18 -0300 | ||||
| Subject: [PATCH 04/13] migration: Fix possible race when shutting down | ||||
|  to_dst_file | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=UTF-8 | ||||
| Content-Transfer-Encoding: 8bit | ||||
| 
 | ||||
| RH-Author: Peter Xu <peterx@redhat.com> | ||||
| RH-MergeRequest: 203: migration: Fix race that dest preempt thread close too early | ||||
| RH-Jira: RHEL-11219 | ||||
| RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com> | ||||
| RH-Acked-by: Leonardo Brás <leobras@redhat.com> | ||||
| RH-Commit: [4/8] 99b9ce27c4477b9630cedfb02a534f8c490ab1eb (peterx/qemu-kvm) | ||||
| 
 | ||||
| It's not safe to call qemu_file_shutdown() on the to_dst_file without | ||||
| first checking for the file's presence under the lock. The cleanup of | ||||
| this file happens at postcopy_pause() and migrate_fd_cleanup() which | ||||
| are not necessarily running in the same thread as migrate_fd_cancel(). | ||||
| 
 | ||||
| Reviewed-by: Peter Xu <peterx@redhat.com> | ||||
| Signed-off-by: Fabiano Rosas <farosas@suse.de> | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-ID: <20230918172822.19052-5-farosas@suse.de> | ||||
| (cherry picked from commit 7478fb0df914f0a5ab551ff74b1df62dd250500e) | ||||
| Signed-off-by: Peter Xu <peterx@redhat.com> | ||||
| ---
 | ||||
|  migration/migration.c | 18 +++++++++++------- | ||||
|  1 file changed, 11 insertions(+), 7 deletions(-) | ||||
| 
 | ||||
| diff --git a/migration/migration.c b/migration/migration.c
 | ||||
| index 517b3e04d2..169e6bdce8 100644
 | ||||
| --- a/migration/migration.c
 | ||||
| +++ b/migration/migration.c
 | ||||
| @@ -1246,7 +1246,7 @@ static void migrate_fd_error(MigrationState *s, const Error *error)
 | ||||
|  static void migrate_fd_cancel(MigrationState *s) | ||||
|  { | ||||
|      int old_state ; | ||||
| -    QEMUFile *f = migrate_get_current()->to_dst_file;
 | ||||
| +
 | ||||
|      trace_migrate_fd_cancel(); | ||||
|   | ||||
|      WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { | ||||
| @@ -1272,11 +1272,13 @@ static void migrate_fd_cancel(MigrationState *s)
 | ||||
|       * If we're unlucky the migration code might be stuck somewhere in a | ||||
|       * send/write while the network has failed and is waiting to timeout; | ||||
|       * if we've got shutdown(2) available then we can force it to quit. | ||||
| -     * The outgoing qemu file gets closed in migrate_fd_cleanup that is
 | ||||
| -     * called in a bh, so there is no race against this cancel.
 | ||||
|       */ | ||||
| -    if (s->state == MIGRATION_STATUS_CANCELLING && f) {
 | ||||
| -        qemu_file_shutdown(f);
 | ||||
| +    if (s->state == MIGRATION_STATUS_CANCELLING) {
 | ||||
| +        WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
 | ||||
| +            if (s->to_dst_file) {
 | ||||
| +                qemu_file_shutdown(s->to_dst_file);
 | ||||
| +            }
 | ||||
| +        }
 | ||||
|      } | ||||
|      if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { | ||||
|          Error *local_err = NULL; | ||||
| @@ -1520,12 +1522,14 @@ void qmp_migrate_pause(Error **errp)
 | ||||
|  { | ||||
|      MigrationState *ms = migrate_get_current(); | ||||
|      MigrationIncomingState *mis = migration_incoming_get_current(); | ||||
| -    int ret;
 | ||||
| +    int ret = 0;
 | ||||
|   | ||||
|      if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { | ||||
|          /* Source side, during postcopy */ | ||||
|          qemu_mutex_lock(&ms->qemu_file_lock); | ||||
| -        ret = qemu_file_shutdown(ms->to_dst_file);
 | ||||
| +        if (ms->to_dst_file) {
 | ||||
| +            ret = qemu_file_shutdown(ms->to_dst_file);
 | ||||
| +        }
 | ||||
|          qemu_mutex_unlock(&ms->qemu_file_lock); | ||||
|          if (ret) { | ||||
|              error_setg(errp, "Failed to pause source migration"); | ||||
| -- 
 | ||||
| 2.39.3 | ||||
| 
 | ||||
| @ -0,0 +1,74 @@ | ||||
| From 1cc26aac30b883244e300d872fa3a19f39afbb66 Mon Sep 17 00:00:00 2001 | ||||
| From: Fabiano Rosas <farosas@suse.de> | ||||
| Date: Mon, 18 Sep 2023 14:28:17 -0300 | ||||
| Subject: [PATCH 03/13] migration: Fix possible races when shutting down the | ||||
|  return path | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=UTF-8 | ||||
| Content-Transfer-Encoding: 8bit | ||||
| 
 | ||||
| RH-Author: Peter Xu <peterx@redhat.com> | ||||
| RH-MergeRequest: 203: migration: Fix race that dest preempt thread close too early | ||||
| RH-Jira: RHEL-11219 | ||||
| RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com> | ||||
| RH-Acked-by: Leonardo Brás <leobras@redhat.com> | ||||
| RH-Commit: [3/8] d734387cf07e956a1f1e817c200e5b36cf0a00b1 (peterx/qemu-kvm) | ||||
| 
 | ||||
| We cannot call qemu_file_shutdown() on the return path file without | ||||
| taking the file lock. The return path thread could be running it's | ||||
| cleanup code and have just cleared the from_dst_file pointer. | ||||
| 
 | ||||
| Checking ms->to_dst_file for errors could also race with | ||||
| migrate_fd_cleanup() which clears the to_dst_file pointer. | ||||
| 
 | ||||
| Protect both accesses by taking the file lock. | ||||
| 
 | ||||
| This was caught by inspection, it should be rare, but the next patches | ||||
| will start calling this code from other places, so let's do the | ||||
| correct thing. | ||||
| 
 | ||||
| Reviewed-by: Peter Xu <peterx@redhat.com> | ||||
| Signed-off-by: Fabiano Rosas <farosas@suse.de> | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-ID: <20230918172822.19052-4-farosas@suse.de> | ||||
| (cherry picked from commit 639decf529793fc544c8055b82be8abe77fa48fa) | ||||
| Signed-off-by: Peter Xu <peterx@redhat.com> | ||||
| ---
 | ||||
|  migration/migration.c | 19 ++++++++++--------- | ||||
|  1 file changed, 10 insertions(+), 9 deletions(-) | ||||
| 
 | ||||
| diff --git a/migration/migration.c b/migration/migration.c
 | ||||
| index b92c6ae436..517b3e04d2 100644
 | ||||
| --- a/migration/migration.c
 | ||||
| +++ b/migration/migration.c
 | ||||
| @@ -2053,17 +2053,18 @@ static int open_return_path_on_source(MigrationState *ms,
 | ||||
|  static int await_return_path_close_on_source(MigrationState *ms) | ||||
|  { | ||||
|      /* | ||||
| -     * If this is a normal exit then the destination will send a SHUT and the
 | ||||
| -     * rp_thread will exit, however if there's an error we need to cause
 | ||||
| -     * it to exit.
 | ||||
| +     * If this is a normal exit then the destination will send a SHUT
 | ||||
| +     * and the rp_thread will exit, however if there's an error we
 | ||||
| +     * need to cause it to exit. shutdown(2), if we have it, will
 | ||||
| +     * cause it to unblock if it's stuck waiting for the destination.
 | ||||
|       */ | ||||
| -    if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
 | ||||
| -        /*
 | ||||
| -         * shutdown(2), if we have it, will cause it to unblock if it's stuck
 | ||||
| -         * waiting for the destination.
 | ||||
| -         */
 | ||||
| -        qemu_file_shutdown(ms->rp_state.from_dst_file);
 | ||||
| +    WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
 | ||||
| +        if (ms->to_dst_file && ms->rp_state.from_dst_file &&
 | ||||
| +            qemu_file_get_error(ms->to_dst_file)) {
 | ||||
| +            qemu_file_shutdown(ms->rp_state.from_dst_file);
 | ||||
| +        }
 | ||||
|      } | ||||
| +
 | ||||
|      trace_await_return_path_close_on_source_joining(); | ||||
|      qemu_thread_join(&ms->rp_state.rp_thread); | ||||
|      ms->rp_state.rp_thread_created = false; | ||||
| -- 
 | ||||
| 2.39.3 | ||||
| 
 | ||||
							
								
								
									
										172
									
								
								kvm-migration-Fix-race-that-dest-preempt-thread-close-to.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										172
									
								
								kvm-migration-Fix-race-that-dest-preempt-thread-close-to.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,172 @@ | ||||
| From c98eb88ce49927f612753b53c890135fde14c3a4 Mon Sep 17 00:00:00 2001 | ||||
| From: Peter Xu <peterx@redhat.com> | ||||
| Date: Mon, 18 Sep 2023 14:28:15 -0300 | ||||
| Subject: [PATCH 01/13] migration: Fix race that dest preempt thread close too | ||||
|  early | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=UTF-8 | ||||
| Content-Transfer-Encoding: 8bit | ||||
| 
 | ||||
| RH-Author: Peter Xu <peterx@redhat.com> | ||||
| RH-MergeRequest: 203: migration: Fix race that dest preempt thread close too early | ||||
| RH-Jira: RHEL-11219 | ||||
| RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com> | ||||
| RH-Acked-by: Leonardo Brás <leobras@redhat.com> | ||||
| RH-Commit: [1/8] a1197ca99878d5d8bbb271c6941aa36d91c66122 (peterx/qemu-kvm) | ||||
| 
 | ||||
| We hit intermit CI issue on failing at migration-test over the unit test | ||||
| preempt/plain: | ||||
| 
 | ||||
| qemu-system-x86_64: Unable to read from socket: Connection reset by peer | ||||
| Memory content inconsistency at 5b43000 first_byte = bd last_byte = bc current = 4f hit_edge = 1 | ||||
| ** | ||||
| ERROR:../tests/qtest/migration-test.c:300:check_guests_ram: assertion failed: (bad == 0) | ||||
| (test program exited with status code -6) | ||||
| 
 | ||||
| Fabiano debugged into it and found that the preempt thread can quit even | ||||
| without receiving all the pages, which can cause guest not receiving all | ||||
| the pages and corrupt the guest memory. | ||||
| 
 | ||||
| To make sure preempt thread finished receiving all the pages, we can rely | ||||
| on the page_requested_count being zero because preempt channel will only | ||||
| receive requested page faults. Note, not all the faulted pages are required | ||||
| to be sent via the preempt channel/thread; imagine the case when a | ||||
| requested page is just queued into the background main channel for | ||||
| migration, the src qemu will just still send it via the background channel. | ||||
| 
 | ||||
| Here instead of spinning over reading the count, we add a condvar so the | ||||
| main thread can wait on it if that unusual case happened, without burning | ||||
| the cpu for no good reason, even if the duration is short; so even if we | ||||
| spin in this rare case is probably fine.  It's just better to not do so. | ||||
| 
 | ||||
| The condvar is only used when that special case is triggered.  Some memory | ||||
| ordering trick is needed to guarantee it from happening (against the | ||||
| preempt thread status field), so the main thread will always get a kick | ||||
| when that triggers correctly. | ||||
| 
 | ||||
| Closes: https://gitlab.com/qemu-project/qemu/-/issues/1886 | ||||
| Debugged-by: Fabiano Rosas <farosas@suse.de> | ||||
| Signed-off-by: Peter Xu <peterx@redhat.com> | ||||
| Signed-off-by: Fabiano Rosas <farosas@suse.de> | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-ID: <20230918172822.19052-2-farosas@suse.de> | ||||
| (cherry picked from commit cf02f29e1e3843784630d04783e372fa541a77e5) | ||||
| Signed-off-by: Peter Xu <peterx@redhat.com> | ||||
| ---
 | ||||
|  migration/migration.c    |  3 ++- | ||||
|  migration/migration.h    | 13 ++++++++++++- | ||||
|  migration/postcopy-ram.c | 38 +++++++++++++++++++++++++++++++++++++- | ||||
|  3 files changed, 51 insertions(+), 3 deletions(-) | ||||
| 
 | ||||
| diff --git a/migration/migration.c b/migration/migration.c
 | ||||
| index 5528acb65e..6a7122694a 100644
 | ||||
| --- a/migration/migration.c
 | ||||
| +++ b/migration/migration.c
 | ||||
| @@ -153,6 +153,7 @@ void migration_object_init(void)
 | ||||
|      qemu_sem_init(¤t_incoming->postcopy_qemufile_dst_done, 0); | ||||
|   | ||||
|      qemu_mutex_init(¤t_incoming->page_request_mutex); | ||||
| +    qemu_cond_init(¤t_incoming->page_request_cond);
 | ||||
|      current_incoming->page_requested = g_tree_new(page_request_addr_cmp); | ||||
|   | ||||
|      migration_object_check(current_migration, &error_fatal); | ||||
| @@ -367,7 +368,7 @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis,
 | ||||
|               * things like g_tree_lookup() will return TRUE (1) when found. | ||||
|               */ | ||||
|              g_tree_insert(mis->page_requested, aligned, (gpointer)1); | ||||
| -            mis->page_requested_count++;
 | ||||
| +            qatomic_inc(&mis->page_requested_count);
 | ||||
|              trace_postcopy_page_req_add(aligned, mis->page_requested_count); | ||||
|          } | ||||
|      } | ||||
| diff --git a/migration/migration.h b/migration/migration.h
 | ||||
| index 6eea18db36..9a30216895 100644
 | ||||
| --- a/migration/migration.h
 | ||||
| +++ b/migration/migration.h
 | ||||
| @@ -196,7 +196,10 @@ struct MigrationIncomingState {
 | ||||
|   | ||||
|      /* A tree of pages that we requested to the source VM */ | ||||
|      GTree *page_requested; | ||||
| -    /* For debugging purpose only, but would be nice to keep */
 | ||||
| +    /*
 | ||||
| +     * For postcopy only, count the number of requested page faults that
 | ||||
| +     * still haven't been resolved.
 | ||||
| +     */
 | ||||
|      int page_requested_count; | ||||
|      /* | ||||
|       * The mutex helps to maintain the requested pages that we sent to the | ||||
| @@ -210,6 +213,14 @@ struct MigrationIncomingState {
 | ||||
|       * contains valid information. | ||||
|       */ | ||||
|      QemuMutex page_request_mutex; | ||||
| +    /*
 | ||||
| +     * If postcopy preempt is enabled, there is a chance that the main
 | ||||
| +     * thread finished loading its data before the preempt channel has
 | ||||
| +     * finished loading the urgent pages.  If that happens, the two threads
 | ||||
| +     * will use this condvar to synchronize, so the main thread will always
 | ||||
| +     * wait until all pages received.
 | ||||
| +     */
 | ||||
| +    QemuCond page_request_cond;
 | ||||
|   | ||||
|      /* | ||||
|       * Number of devices that have yet to approve switchover. When this reaches | ||||
| diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
 | ||||
| index 29aea9456d..5408e028c6 100644
 | ||||
| --- a/migration/postcopy-ram.c
 | ||||
| +++ b/migration/postcopy-ram.c
 | ||||
| @@ -599,6 +599,30 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 | ||||
|      if (mis->preempt_thread_status == PREEMPT_THREAD_CREATED) { | ||||
|          /* Notify the fast load thread to quit */ | ||||
|          mis->preempt_thread_status = PREEMPT_THREAD_QUIT; | ||||
| +        /*
 | ||||
| +         * Update preempt_thread_status before reading count.  Note: mutex
 | ||||
| +         * lock only provide ACQUIRE semantic, and it doesn't stops this
 | ||||
| +         * write to be reordered after reading the count.
 | ||||
| +         */
 | ||||
| +        smp_mb();
 | ||||
| +        /*
 | ||||
| +         * It's possible that the preempt thread is still handling the last
 | ||||
| +         * pages to arrive which were requested by guest page faults.
 | ||||
| +         * Making sure nothing is left behind by waiting on the condvar if
 | ||||
| +         * that unlikely case happened.
 | ||||
| +         */
 | ||||
| +        WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
 | ||||
| +            if (qatomic_read(&mis->page_requested_count)) {
 | ||||
| +                /*
 | ||||
| +                 * It is guaranteed to receive a signal later, because the
 | ||||
| +                 * count>0 now, so it's destined to be decreased to zero
 | ||||
| +                 * very soon by the preempt thread.
 | ||||
| +                 */
 | ||||
| +                qemu_cond_wait(&mis->page_request_cond,
 | ||||
| +                               &mis->page_request_mutex);
 | ||||
| +            }
 | ||||
| +        }
 | ||||
| +        /* Notify the fast load thread to quit */
 | ||||
|          if (mis->postcopy_qemufile_dst) { | ||||
|              qemu_file_shutdown(mis->postcopy_qemufile_dst); | ||||
|          } | ||||
| @@ -1277,8 +1301,20 @@ static int qemu_ufd_copy_ioctl(MigrationIncomingState *mis, void *host_addr,
 | ||||
|           */ | ||||
|          if (g_tree_lookup(mis->page_requested, host_addr)) { | ||||
|              g_tree_remove(mis->page_requested, host_addr); | ||||
| -            mis->page_requested_count--;
 | ||||
| +            int left_pages = qatomic_dec_fetch(&mis->page_requested_count);
 | ||||
| +
 | ||||
|              trace_postcopy_page_req_del(host_addr, mis->page_requested_count); | ||||
| +            /* Order the update of count and read of preempt status */
 | ||||
| +            smp_mb();
 | ||||
| +            if (mis->preempt_thread_status == PREEMPT_THREAD_QUIT &&
 | ||||
| +                left_pages == 0) {
 | ||||
| +                /*
 | ||||
| +                 * This probably means the main thread is waiting for us.
 | ||||
| +                 * Notify that we've finished receiving the last requested
 | ||||
| +                 * page.
 | ||||
| +                 */
 | ||||
| +                qemu_cond_signal(&mis->page_request_cond);
 | ||||
| +            }
 | ||||
|          } | ||||
|          qemu_mutex_unlock(&mis->page_request_mutex); | ||||
|          mark_postcopy_blocktime_end((uintptr_t)host_addr); | ||||
| -- 
 | ||||
| 2.39.3 | ||||
| 
 | ||||
| @ -0,0 +1,76 @@ | ||||
| From 117f325d4983bb7e40a50e51c2fe056938d5add4 Mon Sep 17 00:00:00 2001 | ||||
| From: Fabiano Rosas <farosas@suse.de> | ||||
| Date: Mon, 18 Sep 2023 14:28:22 -0300 | ||||
| Subject: [PATCH 08/13] migration: Move return path cleanup to main migration | ||||
|  thread | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=UTF-8 | ||||
| Content-Transfer-Encoding: 8bit | ||||
| 
 | ||||
| RH-Author: Peter Xu <peterx@redhat.com> | ||||
| RH-MergeRequest: 203: migration: Fix race that dest preempt thread close too early | ||||
| RH-Jira: RHEL-11219 | ||||
| RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com> | ||||
| RH-Acked-by: Leonardo Brás <leobras@redhat.com> | ||||
| RH-Commit: [8/8] 3f085c4ac6c0835d35346bf0cad778c17a248daf (peterx/qemu-kvm) | ||||
| 
 | ||||
| Now that the return path thread is allowed to finish during a paused | ||||
| migration, we can move the cleanup of the QEMUFiles to the main | ||||
| migration thread. | ||||
| 
 | ||||
| Reviewed-by: Peter Xu <peterx@redhat.com> | ||||
| Signed-off-by: Fabiano Rosas <farosas@suse.de> | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-ID: <20230918172822.19052-9-farosas@suse.de> | ||||
| (cherry picked from commit 36e9aab3c569d4c9ad780473596e18479838d1aa) | ||||
| Signed-off-by: Peter Xu <peterx@redhat.com> | ||||
| ---
 | ||||
|  migration/migration.c | 11 ++++++++++- | ||||
|  1 file changed, 10 insertions(+), 1 deletion(-) | ||||
| 
 | ||||
| diff --git a/migration/migration.c b/migration/migration.c
 | ||||
| index a0782c64a1..7a4c8beb5d 100644
 | ||||
| --- a/migration/migration.c
 | ||||
| +++ b/migration/migration.c
 | ||||
| @@ -98,6 +98,7 @@ static int migration_maybe_pause(MigrationState *s,
 | ||||
|                                   int *current_active_state, | ||||
|                                   int new_state); | ||||
|  static void migrate_fd_cancel(MigrationState *s); | ||||
| +static int await_return_path_close_on_source(MigrationState *s);
 | ||||
|   | ||||
|  static bool migration_needs_multiple_sockets(void) | ||||
|  { | ||||
| @@ -1178,6 +1179,12 @@ static void migrate_fd_cleanup(MigrationState *s)
 | ||||
|          qemu_fclose(tmp); | ||||
|      } | ||||
|   | ||||
| +    /*
 | ||||
| +     * We already cleaned up to_dst_file, so errors from the return
 | ||||
| +     * path might be due to that, ignore them.
 | ||||
| +     */
 | ||||
| +    await_return_path_close_on_source(s);
 | ||||
| +
 | ||||
|      assert(!migration_is_active(s)); | ||||
|   | ||||
|      if (s->state == MIGRATION_STATUS_CANCELLING) { | ||||
| @@ -1986,7 +1993,6 @@ out:
 | ||||
|      } | ||||
|   | ||||
|      trace_source_return_path_thread_end(); | ||||
| -    migration_release_dst_files(ms);
 | ||||
|      rcu_unregister_thread(); | ||||
|      return NULL; | ||||
|  } | ||||
| @@ -2040,6 +2046,9 @@ static int await_return_path_close_on_source(MigrationState *ms)
 | ||||
|   | ||||
|      ret = ms->rp_state.error; | ||||
|      ms->rp_state.error = false; | ||||
| +
 | ||||
| +    migration_release_dst_files(ms);
 | ||||
| +
 | ||||
|      trace_migration_return_path_end_after(ret); | ||||
|      return ret; | ||||
|  } | ||||
| -- 
 | ||||
| 2.39.3 | ||||
| 
 | ||||
| @ -0,0 +1,49 @@ | ||||
| From be84138fc49f3e6998d6af8fdfa9cc1054ae1549 Mon Sep 17 00:00:00 2001 | ||||
| From: Fabiano Rosas <farosas@suse.de> | ||||
| Date: Mon, 18 Sep 2023 14:28:19 -0300 | ||||
| Subject: [PATCH 05/13] migration: Remove redundant cleanup of | ||||
|  postcopy_qemufile_src | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=UTF-8 | ||||
| Content-Transfer-Encoding: 8bit | ||||
| 
 | ||||
| RH-Author: Peter Xu <peterx@redhat.com> | ||||
| RH-MergeRequest: 203: migration: Fix race that dest preempt thread close too early | ||||
| RH-Jira: RHEL-11219 | ||||
| RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com> | ||||
| RH-Acked-by: Leonardo Brás <leobras@redhat.com> | ||||
| RH-Commit: [5/8] 2a63a021e923e6460d57f1c0850651f4ddcd5b4f (peterx/qemu-kvm) | ||||
| 
 | ||||
| This file is owned by the return path thread which is already doing | ||||
| cleanup. | ||||
| 
 | ||||
| Reviewed-by: Peter Xu <peterx@redhat.com> | ||||
| Signed-off-by: Fabiano Rosas <farosas@suse.de> | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-ID: <20230918172822.19052-6-farosas@suse.de> | ||||
| (cherry picked from commit b3b101157d4651f12e6b3361af2de6bace7f9b4a) | ||||
| Signed-off-by: Peter Xu <peterx@redhat.com> | ||||
| ---
 | ||||
|  migration/migration.c | 6 ------ | ||||
|  1 file changed, 6 deletions(-) | ||||
| 
 | ||||
| diff --git a/migration/migration.c b/migration/migration.c
 | ||||
| index 169e6bdce8..e2e4a7d8ae 100644
 | ||||
| --- a/migration/migration.c
 | ||||
| +++ b/migration/migration.c
 | ||||
| @@ -1178,12 +1178,6 @@ static void migrate_fd_cleanup(MigrationState *s)
 | ||||
|          qemu_fclose(tmp); | ||||
|      } | ||||
|   | ||||
| -    if (s->postcopy_qemufile_src) {
 | ||||
| -        migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src);
 | ||||
| -        qemu_fclose(s->postcopy_qemufile_src);
 | ||||
| -        s->postcopy_qemufile_src = NULL;
 | ||||
| -    }
 | ||||
| -
 | ||||
|      assert(!migration_is_active(s)); | ||||
|   | ||||
|      if (s->state == MIGRATION_STATUS_CANCELLING) { | ||||
| -- 
 | ||||
| 2.39.3 | ||||
| 
 | ||||
							
								
								
									
										251
									
								
								kvm-migration-Replace-the-return-path-retry-logic.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										251
									
								
								kvm-migration-Replace-the-return-path-retry-logic.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,251 @@ | ||||
| From 74093a6ac7b5290965a2869512b998eac018a630 Mon Sep 17 00:00:00 2001 | ||||
| From: Fabiano Rosas <farosas@suse.de> | ||||
| Date: Mon, 18 Sep 2023 14:28:21 -0300 | ||||
| Subject: [PATCH 07/13] migration: Replace the return path retry logic | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=UTF-8 | ||||
| Content-Transfer-Encoding: 8bit | ||||
| 
 | ||||
| RH-Author: Peter Xu <peterx@redhat.com> | ||||
| RH-MergeRequest: 203: migration: Fix race that dest preempt thread close too early | ||||
| RH-Jira: RHEL-11219 | ||||
| RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com> | ||||
| RH-Acked-by: Leonardo Brás <leobras@redhat.com> | ||||
| RH-Commit: [7/8] 6b9d9b955f95ff14b47491865e7e45bfdcfc8a97 (peterx/qemu-kvm) | ||||
| 
 | ||||
| Replace the return path retry logic with finishing and restarting the | ||||
| thread. This fixes a race when resuming the migration that leads to a | ||||
| segfault. | ||||
| 
 | ||||
| Currently when doing postcopy we consider that an IO error on the | ||||
| return path file could be due to a network intermittency. We then keep | ||||
| the thread alive but have it do cleanup of the 'from_dst_file' and | ||||
| wait on the 'postcopy_pause_rp' semaphore. When the user issues a | ||||
| migrate resume, a new return path is opened and the thread is allowed | ||||
| to continue. | ||||
| 
 | ||||
| There's a race condition in the above mechanism. It is possible for | ||||
| the new return path file to be setup *before* the cleanup code in the | ||||
| return path thread has had a chance to run, leading to the *new* file | ||||
| being closed and the pointer set to NULL. When the thread is released | ||||
| after the resume, it tries to dereference 'from_dst_file' and crashes: | ||||
| 
 | ||||
| Thread 7 "return path" received signal SIGSEGV, Segmentation fault. | ||||
| [Switching to Thread 0x7fffd1dbf700 (LWP 9611)] | ||||
| 0x00005555560e4893 in qemu_file_get_error_obj (f=0x0, errp=0x0) at ../migration/qemu-file.c:154 | ||||
| 154         return f->last_error; | ||||
| 
 | ||||
| (gdb) bt | ||||
|  #0  0x00005555560e4893 in qemu_file_get_error_obj (f=0x0, errp=0x0) at ../migration/qemu-file.c:154 | ||||
|  #1  0x00005555560e4983 in qemu_file_get_error (f=0x0) at ../migration/qemu-file.c:206 | ||||
|  #2  0x0000555555b9a1df in source_return_path_thread (opaque=0x555556e06000) at ../migration/migration.c:1876 | ||||
|  #3  0x000055555602e14f in qemu_thread_start (args=0x55555782e780) at ../util/qemu-thread-posix.c:541 | ||||
|  #4  0x00007ffff38d76ea in start_thread (arg=0x7fffd1dbf700) at pthread_create.c:477 | ||||
|  #5  0x00007ffff35efa6f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95 | ||||
| 
 | ||||
| Here's the race (important bit is open_return_path happening before | ||||
| migration_release_dst_files): | ||||
| 
 | ||||
| migration                 | qmp                         | return path | ||||
| --------------------------+-----------------------------+---------------------------------
 | ||||
| 			    qmp_migrate_pause() | ||||
| 			     shutdown(ms->to_dst_file) | ||||
| 			      f->last_error = -EIO | ||||
| migrate_detect_error() | ||||
|  postcopy_pause() | ||||
|   set_state(PAUSED) | ||||
|   wait(postcopy_pause_sem) | ||||
| 			    qmp_migrate(resume) | ||||
| 			    migrate_fd_connect() | ||||
| 			     resume = state == PAUSED | ||||
| 			     open_return_path <-- TOO SOON! | ||||
| 			     set_state(RECOVER) | ||||
| 			     post(postcopy_pause_sem) | ||||
| 							(incoming closes to_src_file) | ||||
| 							res = qemu_file_get_error(rp) | ||||
| 							migration_release_dst_files() | ||||
| 							ms->rp_state.from_dst_file = NULL | ||||
|   post(postcopy_pause_rp_sem) | ||||
| 							postcopy_pause_return_path_thread() | ||||
| 							  wait(postcopy_pause_rp_sem) | ||||
| 							rp = ms->rp_state.from_dst_file | ||||
| 							goto retry | ||||
| 							qemu_file_get_error(rp) | ||||
| 							SIGSEGV | ||||
| -------------------------------------------------------------------------------------------
 | ||||
| 
 | ||||
| We can keep the retry logic without having the thread alive and | ||||
| waiting. The only piece of data used by it is the 'from_dst_file' and | ||||
| it is only allowed to proceed after a migrate resume is issued and the | ||||
| semaphore released at migrate_fd_connect(). | ||||
| 
 | ||||
| Move the retry logic to outside the thread by waiting for the thread | ||||
| to finish before pausing the migration. | ||||
| 
 | ||||
| Reviewed-by: Peter Xu <peterx@redhat.com> | ||||
| Signed-off-by: Fabiano Rosas <farosas@suse.de> | ||||
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||||
| Message-ID: <20230918172822.19052-8-farosas@suse.de> | ||||
| (cherry picked from commit ef796ee93b313ed2f0b427ef30320417387d2ad5) | ||||
| Signed-off-by: Peter Xu <peterx@redhat.com> | ||||
| ---
 | ||||
|  migration/migration.c | 60 ++++++++----------------------------------- | ||||
|  migration/migration.h |  1 - | ||||
|  2 files changed, 11 insertions(+), 50 deletions(-) | ||||
| 
 | ||||
| diff --git a/migration/migration.c b/migration/migration.c
 | ||||
| index ac4541b971..a0782c64a1 100644
 | ||||
| --- a/migration/migration.c
 | ||||
| +++ b/migration/migration.c
 | ||||
| @@ -1776,18 +1776,6 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
 | ||||
|      } | ||||
|  } | ||||
|   | ||||
| -/* Return true to retry, false to quit */
 | ||||
| -static bool postcopy_pause_return_path_thread(MigrationState *s)
 | ||||
| -{
 | ||||
| -    trace_postcopy_pause_return_path();
 | ||||
| -
 | ||||
| -    qemu_sem_wait(&s->postcopy_pause_rp_sem);
 | ||||
| -
 | ||||
| -    trace_postcopy_pause_return_path_continued();
 | ||||
| -
 | ||||
| -    return true;
 | ||||
| -}
 | ||||
| -
 | ||||
|  static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name) | ||||
|  { | ||||
|      RAMBlock *block = qemu_ram_block_by_name(block_name); | ||||
| @@ -1871,7 +1859,6 @@ static void *source_return_path_thread(void *opaque)
 | ||||
|      trace_source_return_path_thread_entry(); | ||||
|      rcu_register_thread(); | ||||
|   | ||||
| -retry:
 | ||||
|      while (!ms->rp_state.error && !qemu_file_get_error(rp) && | ||||
|             migration_is_setup_or_active(ms->state)) { | ||||
|          trace_source_return_path_thread_loop_top(); | ||||
| @@ -1993,26 +1980,7 @@ retry:
 | ||||
|      } | ||||
|   | ||||
|  out: | ||||
| -    res = qemu_file_get_error(rp);
 | ||||
| -    if (res) {
 | ||||
| -        if (res && migration_in_postcopy()) {
 | ||||
| -            /*
 | ||||
| -             * Maybe there is something we can do: it looks like a
 | ||||
| -             * network down issue, and we pause for a recovery.
 | ||||
| -             */
 | ||||
| -            migration_release_dst_files(ms);
 | ||||
| -            rp = NULL;
 | ||||
| -            if (postcopy_pause_return_path_thread(ms)) {
 | ||||
| -                /*
 | ||||
| -                 * Reload rp, reset the rest.  Referencing it is safe since
 | ||||
| -                 * it's reset only by us above, or when migration completes
 | ||||
| -                 */
 | ||||
| -                rp = ms->rp_state.from_dst_file;
 | ||||
| -                ms->rp_state.error = false;
 | ||||
| -                goto retry;
 | ||||
| -            }
 | ||||
| -        }
 | ||||
| -
 | ||||
| +    if (qemu_file_get_error(rp)) {
 | ||||
|          trace_source_return_path_thread_bad_end(); | ||||
|          mark_source_rp_bad(ms); | ||||
|      } | ||||
| @@ -2023,8 +1991,7 @@ out:
 | ||||
|      return NULL; | ||||
|  } | ||||
|   | ||||
| -static int open_return_path_on_source(MigrationState *ms,
 | ||||
| -                                      bool create_thread)
 | ||||
| +static int open_return_path_on_source(MigrationState *ms)
 | ||||
|  { | ||||
|      ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); | ||||
|      if (!ms->rp_state.from_dst_file) { | ||||
| @@ -2033,11 +2000,6 @@ static int open_return_path_on_source(MigrationState *ms,
 | ||||
|   | ||||
|      trace_open_return_path_on_source(); | ||||
|   | ||||
| -    if (!create_thread) {
 | ||||
| -        /* We're done */
 | ||||
| -        return 0;
 | ||||
| -    }
 | ||||
| -
 | ||||
|      qemu_thread_create(&ms->rp_state.rp_thread, "return path", | ||||
|                         source_return_path_thread, ms, QEMU_THREAD_JOINABLE); | ||||
|      ms->rp_state.rp_thread_created = true; | ||||
| @@ -2077,6 +2039,7 @@ static int await_return_path_close_on_source(MigrationState *ms)
 | ||||
|      trace_await_return_path_close_on_source_close(); | ||||
|   | ||||
|      ret = ms->rp_state.error; | ||||
| +    ms->rp_state.error = false;
 | ||||
|      trace_migration_return_path_end_after(ret); | ||||
|      return ret; | ||||
|  } | ||||
| @@ -2552,6 +2515,13 @@ static MigThrError postcopy_pause(MigrationState *s)
 | ||||
|          qemu_file_shutdown(file); | ||||
|          qemu_fclose(file); | ||||
|   | ||||
| +        /*
 | ||||
| +         * We're already pausing, so ignore any errors on the return
 | ||||
| +         * path and just wait for the thread to finish. It will be
 | ||||
| +         * re-created when we resume.
 | ||||
| +         */
 | ||||
| +        await_return_path_close_on_source(s);
 | ||||
| +
 | ||||
|          migrate_set_state(&s->state, s->state, | ||||
|                            MIGRATION_STATUS_POSTCOPY_PAUSED); | ||||
|   | ||||
| @@ -2569,12 +2539,6 @@ static MigThrError postcopy_pause(MigrationState *s)
 | ||||
|          if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { | ||||
|              /* Woken up by a recover procedure. Give it a shot */ | ||||
|   | ||||
| -            /*
 | ||||
| -             * Firstly, let's wake up the return path now, with a new
 | ||||
| -             * return path channel.
 | ||||
| -             */
 | ||||
| -            qemu_sem_post(&s->postcopy_pause_rp_sem);
 | ||||
| -
 | ||||
|              /* Do the resume logic */ | ||||
|              if (postcopy_do_resume(s) == 0) { | ||||
|                  /* Let's continue! */ | ||||
| @@ -3264,7 +3228,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
 | ||||
|       * QEMU uses the return path. | ||||
|       */ | ||||
|      if (migrate_postcopy_ram() || migrate_return_path()) { | ||||
| -        if (open_return_path_on_source(s, !resume)) {
 | ||||
| +        if (open_return_path_on_source(s)) {
 | ||||
|              error_setg(&local_err, "Unable to open return-path for postcopy"); | ||||
|              migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); | ||||
|              migrate_set_error(s, local_err); | ||||
| @@ -3328,7 +3292,6 @@ static void migration_instance_finalize(Object *obj)
 | ||||
|      qemu_sem_destroy(&ms->rate_limit_sem); | ||||
|      qemu_sem_destroy(&ms->pause_sem); | ||||
|      qemu_sem_destroy(&ms->postcopy_pause_sem); | ||||
| -    qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
 | ||||
|      qemu_sem_destroy(&ms->rp_state.rp_sem); | ||||
|      qemu_sem_destroy(&ms->rp_state.rp_pong_acks); | ||||
|      qemu_sem_destroy(&ms->postcopy_qemufile_src_sem); | ||||
| @@ -3348,7 +3311,6 @@ static void migration_instance_init(Object *obj)
 | ||||
|      migrate_params_init(&ms->parameters); | ||||
|   | ||||
|      qemu_sem_init(&ms->postcopy_pause_sem, 0); | ||||
| -    qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
 | ||||
|      qemu_sem_init(&ms->rp_state.rp_sem, 0); | ||||
|      qemu_sem_init(&ms->rp_state.rp_pong_acks, 0); | ||||
|      qemu_sem_init(&ms->rate_limit_sem, 0); | ||||
| diff --git a/migration/migration.h b/migration/migration.h
 | ||||
| index 9a30216895..1034d617bf 100644
 | ||||
| --- a/migration/migration.h
 | ||||
| +++ b/migration/migration.h
 | ||||
| @@ -393,7 +393,6 @@ struct MigrationState {
 | ||||
|   | ||||
|      /* Needed by postcopy-pause state */ | ||||
|      QemuSemaphore postcopy_pause_sem; | ||||
| -    QemuSemaphore postcopy_pause_rp_sem;
 | ||||
|      /* | ||||
|       * Whether we abort the migration if decompression errors are | ||||
|       * detected at the destination. It is left at false for qemu | ||||
| -- 
 | ||||
| 2.39.3 | ||||
| 
 | ||||
							
								
								
									
										201
									
								
								kvm-tests-file-io-error-New-test.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										201
									
								
								kvm-tests-file-io-error-New-test.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,201 @@ | ||||
| From dbf091dd32daae0999fb62b74f7c834b445f6bf1 Mon Sep 17 00:00:00 2001 | ||||
| From: Hanna Czenczek <hreitz@redhat.com> | ||||
| Date: Thu, 24 Aug 2023 17:53:44 +0200 | ||||
| Subject: [PATCH 13/13] tests/file-io-error: New test | ||||
| 
 | ||||
| RH-Author: Hanna Czenczek <hreitz@redhat.com> | ||||
| RH-MergeRequest: 202: file-posix: Fix zone update in I/O error path | ||||
| RH-Jira: RHEL-7360 | ||||
| RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com> | ||||
| RH-Commit: [5/5] a07662c948f10bea7972a7d525b5941264651a4f (hreitz/qemu-kvm-c-9-s) | ||||
| 
 | ||||
| This is a regression test for | ||||
| https://bugzilla.redhat.com/show_bug.cgi?id=2234374. | ||||
| 
 | ||||
| All this test needs to do is trigger an I/O error inside of file-posix | ||||
| (specifically raw_co_prw()).  One reliable way to do this without | ||||
| requiring special privileges is to use a FUSE export, which allows us to | ||||
| inject any error that we want, e.g. via blkdebug. | ||||
| 
 | ||||
| Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||||
| Message-Id: <20230824155345.109765-6-hreitz@redhat.com> | ||||
| [hreitz: Fixed test to be skipped when there is no FUSE support, to | ||||
|          suppress fusermount's allow_other warning, and to be skipped | ||||
|          with $IMGOPTSSYNTAX enabled] | ||||
| Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||||
| (cherry picked from commit 380448464dd89291cf7fd7434be6c225482a334d) | ||||
| Signed-off-by: Hanna Czenczek <hreitz@redhat.com> | ||||
| ---
 | ||||
|  tests/qemu-iotests/tests/file-io-error     | 119 +++++++++++++++++++++ | ||||
|  tests/qemu-iotests/tests/file-io-error.out |  33 ++++++ | ||||
|  2 files changed, 152 insertions(+) | ||||
|  create mode 100755 tests/qemu-iotests/tests/file-io-error | ||||
|  create mode 100644 tests/qemu-iotests/tests/file-io-error.out | ||||
| 
 | ||||
| diff --git a/tests/qemu-iotests/tests/file-io-error b/tests/qemu-iotests/tests/file-io-error
 | ||||
| new file mode 100755 | ||||
| index 0000000000..88ee5f670c
 | ||||
| --- /dev/null
 | ||||
| +++ b/tests/qemu-iotests/tests/file-io-error
 | ||||
| @@ -0,0 +1,119 @@
 | ||||
| +#!/usr/bin/env bash
 | ||||
| +# group: rw
 | ||||
| +#
 | ||||
| +# Produce an I/O error in file-posix, and hope that it is not catastrophic.
 | ||||
| +# Regression test for: https://bugzilla.redhat.com/show_bug.cgi?id=2234374
 | ||||
| +#
 | ||||
| +# Copyright (C) 2023 Red Hat, Inc.
 | ||||
| +#
 | ||||
| +# This program is free software; you can redistribute it and/or modify
 | ||||
| +# it under the terms of the GNU General Public License as published by
 | ||||
| +# the Free Software Foundation; either version 2 of the License, or
 | ||||
| +# (at your option) any later version.
 | ||||
| +#
 | ||||
| +# This program is distributed in the hope that it will be useful,
 | ||||
| +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||||
| +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | ||||
| +# GNU General Public License for more details.
 | ||||
| +#
 | ||||
| +# You should have received a copy of the GNU General Public License
 | ||||
| +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||
| +#
 | ||||
| +
 | ||||
| +seq=$(basename "$0")
 | ||||
| +echo "QA output created by $seq"
 | ||||
| +
 | ||||
| +status=1	# failure is the default!
 | ||||
| +
 | ||||
| +_cleanup()
 | ||||
| +{
 | ||||
| +    _cleanup_qemu
 | ||||
| +    rm -f "$TEST_DIR/fuse-export"
 | ||||
| +}
 | ||||
| +trap "_cleanup; exit \$status" 0 1 2 3 15
 | ||||
| +
 | ||||
| +# get standard environment, filters and checks
 | ||||
| +. ../common.rc
 | ||||
| +. ../common.filter
 | ||||
| +. ../common.qemu
 | ||||
| +
 | ||||
| +# Format-agnostic (we do not use any), but we do test the file protocol
 | ||||
| +_supported_proto file
 | ||||
| +_require_drivers blkdebug null-co
 | ||||
| +
 | ||||
| +if [ "$IMGOPTSSYNTAX" = "true" ]; then
 | ||||
| +    # We need `$QEMU_IO -f file` to work; IMGOPTSSYNTAX uses --image-opts,
 | ||||
| +    # breaking -f.
 | ||||
| +    _unsupported_fmt $IMGFMT
 | ||||
| +fi
 | ||||
| +
 | ||||
| +# This is a regression test of a bug in which flie-posix would access zone
 | ||||
| +# information in case of an I/O error even when there is no zone information,
 | ||||
| +# resulting in a division by zero.
 | ||||
| +# To reproduce the problem, we need to trigger an I/O error inside of
 | ||||
| +# file-posix, which can be done (rootless) by providing a FUSE export that
 | ||||
| +# presents only errors when accessed.
 | ||||
| +
 | ||||
| +_launch_qemu
 | ||||
| +_send_qemu_cmd $QEMU_HANDLE \
 | ||||
| +    "{'execute': 'qmp_capabilities'}" \
 | ||||
| +    'return'
 | ||||
| +
 | ||||
| +_send_qemu_cmd $QEMU_HANDLE \
 | ||||
| +    "{'execute': 'blockdev-add',
 | ||||
| +      'arguments': {
 | ||||
| +          'driver': 'blkdebug',
 | ||||
| +          'node-name': 'node0',
 | ||||
| +          'inject-error': [{'event': 'none'}],
 | ||||
| +          'image': {
 | ||||
| +              'driver': 'null-co'
 | ||||
| +          }
 | ||||
| +      }}" \
 | ||||
| +    'return'
 | ||||
| +
 | ||||
| +# FUSE mountpoint must exist and be a regular file
 | ||||
| +touch "$TEST_DIR/fuse-export"
 | ||||
| +
 | ||||
| +# The grep -v to filter fusermount's (benign) error when /etc/fuse.conf does
 | ||||
| +# not contain user_allow_other and the subsequent check for missing FUSE support
 | ||||
| +# have both been taken from iotest 308.
 | ||||
| +output=$(_send_qemu_cmd $QEMU_HANDLE \
 | ||||
| +    "{'execute': 'block-export-add',
 | ||||
| +      'arguments': {
 | ||||
| +          'id': 'exp0',
 | ||||
| +          'type': 'fuse',
 | ||||
| +          'node-name': 'node0',
 | ||||
| +          'mountpoint': '$TEST_DIR/fuse-export',
 | ||||
| +          'writable': true
 | ||||
| +      }}" \
 | ||||
| +    'return' \
 | ||||
| +    | grep -v 'option allow_other only allowed if')
 | ||||
| +
 | ||||
| +if echo "$output" | grep -q "Parameter 'type' does not accept value 'fuse'"; then
 | ||||
| +    _notrun 'No FUSE support'
 | ||||
| +fi
 | ||||
| +echo "$output"
 | ||||
| +
 | ||||
| +echo
 | ||||
| +# This should fail, but gracefully, i.e. just print an I/O error, not crash.
 | ||||
| +$QEMU_IO -f file -c 'write 0 64M' "$TEST_DIR/fuse-export" | _filter_qemu_io
 | ||||
| +echo
 | ||||
| +
 | ||||
| +_send_qemu_cmd $QEMU_HANDLE \
 | ||||
| +    "{'execute': 'block-export-del',
 | ||||
| +      'arguments': {'id': 'exp0'}}" \
 | ||||
| +    'return'
 | ||||
| +
 | ||||
| +_send_qemu_cmd $QEMU_HANDLE \
 | ||||
| +    '' \
 | ||||
| +    'BLOCK_EXPORT_DELETED'
 | ||||
| +
 | ||||
| +_send_qemu_cmd $QEMU_HANDLE \
 | ||||
| +    "{'execute': 'blockdev-del',
 | ||||
| +      'arguments': {'node-name': 'node0'}}" \
 | ||||
| +    'return'
 | ||||
| +
 | ||||
| +# success, all done
 | ||||
| +echo "*** done"
 | ||||
| +rm -f $seq.full
 | ||||
| +status=0
 | ||||
| diff --git a/tests/qemu-iotests/tests/file-io-error.out b/tests/qemu-iotests/tests/file-io-error.out
 | ||||
| new file mode 100644 | ||||
| index 0000000000..0f46455a94
 | ||||
| --- /dev/null
 | ||||
| +++ b/tests/qemu-iotests/tests/file-io-error.out
 | ||||
| @@ -0,0 +1,33 @@
 | ||||
| +QA output created by file-io-error
 | ||||
| +{'execute': 'qmp_capabilities'}
 | ||||
| +{"return": {}}
 | ||||
| +{'execute': 'blockdev-add',
 | ||||
| +      'arguments': {
 | ||||
| +          'driver': 'blkdebug',
 | ||||
| +          'node-name': 'node0',
 | ||||
| +          'inject-error': [{'event': 'none'}],
 | ||||
| +          'image': {
 | ||||
| +              'driver': 'null-co'
 | ||||
| +          }
 | ||||
| +      }}
 | ||||
| +{"return": {}}
 | ||||
| +{'execute': 'block-export-add',
 | ||||
| +      'arguments': {
 | ||||
| +          'id': 'exp0',
 | ||||
| +          'type': 'fuse',
 | ||||
| +          'node-name': 'node0',
 | ||||
| +          'mountpoint': 'TEST_DIR/fuse-export',
 | ||||
| +          'writable': true
 | ||||
| +      }}
 | ||||
| +{"return": {}}
 | ||||
| +
 | ||||
| +write failed: Input/output error
 | ||||
| +
 | ||||
| +{'execute': 'block-export-del',
 | ||||
| +      'arguments': {'id': 'exp0'}}
 | ||||
| +{"return": {}}
 | ||||
| +{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "exp0"}}
 | ||||
| +{'execute': 'blockdev-del',
 | ||||
| +      'arguments': {'node-name': 'node0'}}
 | ||||
| +{"return": {}}
 | ||||
| +*** done
 | ||||
| -- 
 | ||||
| 2.39.3 | ||||
| 
 | ||||
| @ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version}                    \ | ||||
| Summary: QEMU is a machine emulator and virtualizer | ||||
| Name: qemu-kvm | ||||
| Version: 8.1.0 | ||||
| Release: 2%{?rcrel}%{?dist}%{?cc_suffix} | ||||
| Release: 3%{?rcrel}%{?dist}%{?cc_suffix} | ||||
| # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped | ||||
| # Epoch 15 used for RHEL 8 | ||||
| # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) | ||||
| @ -195,6 +195,32 @@ Patch0021: 0021-Add-machine-types-compat-bits.patch | ||||
| Patch0022: 0022-Fix-x86-machine-type-compatibility-for-qemu-kvm-8.1..patch | ||||
| # For RHEL-832 - qemu-kvm crashed when migrating guest with failover vf | ||||
| Patch23: kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch | ||||
| # For RHEL-11219 - migration tests failing for RHEL 9.4 sometimes | ||||
| Patch24: kvm-migration-Fix-race-that-dest-preempt-thread-close-to.patch | ||||
| # For RHEL-11219 - migration tests failing for RHEL 9.4 sometimes | ||||
| Patch25: kvm-migration-Fix-possible-race-when-setting-rp_state.er.patch | ||||
| # For RHEL-11219 - migration tests failing for RHEL 9.4 sometimes | ||||
| Patch26: kvm-migration-Fix-possible-races-when-shutting-down-the-.patch | ||||
| # For RHEL-11219 - migration tests failing for RHEL 9.4 sometimes | ||||
| Patch27: kvm-migration-Fix-possible-race-when-shutting-down-to_ds.patch | ||||
| # For RHEL-11219 - migration tests failing for RHEL 9.4 sometimes | ||||
| Patch28: kvm-migration-Remove-redundant-cleanup-of-postcopy_qemuf.patch | ||||
| # For RHEL-11219 - migration tests failing for RHEL 9.4 sometimes | ||||
| Patch29: kvm-migration-Consolidate-return-path-closing-code.patch | ||||
| # For RHEL-11219 - migration tests failing for RHEL 9.4 sometimes | ||||
| Patch30: kvm-migration-Replace-the-return-path-retry-logic.patch | ||||
| # For RHEL-11219 - migration tests failing for RHEL 9.4 sometimes | ||||
| Patch31: kvm-migration-Move-return-path-cleanup-to-main-migration.patch | ||||
| # For RHEL-7360 - Qemu Core Dumped When Writing Larger Size Than The Size of A Data Disk | ||||
| Patch32: kvm-file-posix-Clear-bs-bl.zoned-on-error.patch | ||||
| # For RHEL-7360 - Qemu Core Dumped When Writing Larger Size Than The Size of A Data Disk | ||||
| Patch33: kvm-file-posix-Check-bs-bl.zoned-for-zone-info.patch | ||||
| # For RHEL-7360 - Qemu Core Dumped When Writing Larger Size Than The Size of A Data Disk | ||||
| Patch34: kvm-file-posix-Fix-zone-update-in-I-O-error-path.patch | ||||
| # For RHEL-7360 - Qemu Core Dumped When Writing Larger Size Than The Size of A Data Disk | ||||
| Patch35: kvm-file-posix-Simplify-raw_co_prw-s-out-zone-code.patch | ||||
| # For RHEL-7360 - Qemu Core Dumped When Writing Larger Size Than The Size of A Data Disk | ||||
| Patch36: kvm-tests-file-io-error-New-test.patch | ||||
| 
 | ||||
| %if %{have_clang} | ||||
| BuildRequires: clang | ||||
| @ -1254,6 +1280,25 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ | ||||
| %endif | ||||
| 
 | ||||
| %changelog | ||||
| * Mon Oct 16 2023 Miroslav Rezanina <mrezanin@redhat.com> - 8.1.0-3 | ||||
| - kvm-migration-Fix-race-that-dest-preempt-thread-close-to.patch [RHEL-11219] | ||||
| - kvm-migration-Fix-possible-race-when-setting-rp_state.er.patch [RHEL-11219] | ||||
| - kvm-migration-Fix-possible-races-when-shutting-down-the-.patch [RHEL-11219] | ||||
| - kvm-migration-Fix-possible-race-when-shutting-down-to_ds.patch [RHEL-11219] | ||||
| - kvm-migration-Remove-redundant-cleanup-of-postcopy_qemuf.patch [RHEL-11219] | ||||
| - kvm-migration-Consolidate-return-path-closing-code.patch [RHEL-11219] | ||||
| - kvm-migration-Replace-the-return-path-retry-logic.patch [RHEL-11219] | ||||
| - kvm-migration-Move-return-path-cleanup-to-main-migration.patch [RHEL-11219] | ||||
| - kvm-file-posix-Clear-bs-bl.zoned-on-error.patch [RHEL-7360] | ||||
| - kvm-file-posix-Check-bs-bl.zoned-for-zone-info.patch [RHEL-7360] | ||||
| - kvm-file-posix-Fix-zone-update-in-I-O-error-path.patch [RHEL-7360] | ||||
| - kvm-file-posix-Simplify-raw_co_prw-s-out-zone-code.patch [RHEL-7360] | ||||
| - kvm-tests-file-io-error-New-test.patch [RHEL-7360] | ||||
| - Resolves: RHEL-11219 | ||||
|   (migration tests failing for RHEL 9.4 sometimes) | ||||
| - Resolves: RHEL-7360 | ||||
|   (Qemu Core Dumped When Writing Larger Size Than The Size of A Data Disk) | ||||
| 
 | ||||
| * Mon Oct 02 2023 Miroslav Rezanina <mrezanin@redhat.com> - 8.1.0-2 | ||||
| - kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch [RHEL-832] | ||||
| - Resolves: RHEL-832 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user