- Apply aio-posix-fix-race-between-epoll-upgrade.patch

- Apply io-remove-io-watch-if-TLS-channel-is-closed.patch
- Apply vdpa-map-shadow-vrings-with-MAP_SHARED.patch
- Apply vdpa-net-move-iova-tree-creation.patch
- Apply vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-function.patch
- Apply vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net.patch
This commit is contained in:
eabdullin 2023-09-13 10:53:28 +03:00
parent 6ffea859dd
commit 6ec858b9bc
7 changed files with 651 additions and 2 deletions

View File

@ -0,0 +1,71 @@
From cc04f4c32b475ef0bf1485c5b008921ba18ad89c Mon Sep 17 00:00:00 2001
From: eabdullin <ed.abdullin.1@gmail.com>
Date: Wed, 13 Sep 2023 11:43:16 +0300
Subject: [PATCH] [PATCH] aio-posix: fix race between epoll upgrade and
aio_set_fd_handler()
If another thread calls aio_set_fd_handler() while the IOThread event
loop is upgrading from ppoll(2) to epoll(7) then we might miss new
AioHandlers. The epollfd will not monitor the new AioHandler's fd,
resulting in hangs.
Take the AioHandler list lock while upgrading to epoll. This prevents
AioHandlers from changing while epoll is being set up. If we cannot lock
because we're in a nested event loop, then don't upgrade to epoll (it
will happen next time we're not in a nested call).
The downside to taking the lock is that the aio_set_fd_handler() thread
has to wait until the epoll upgrade is finished, which involves many
epoll_ctl(2) system calls. However, this scenario is rare and I couldn't
think of another solution that is still simple.
---
util/fdmon-epoll.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c
index e11a8a022..1683aa110 100644
--- a/util/fdmon-epoll.c
+++ b/util/fdmon-epoll.c
@@ -127,6 +127,8 @@ static bool fdmon_epoll_try_enable(AioContext *ctx)
bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
{
+ bool ok;
+
if (ctx->epollfd < 0) {
return false;
}
@@ -136,14 +138,23 @@ bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
return false;
}
- if (npfd >= EPOLL_ENABLE_THRESHOLD) {
- if (fdmon_epoll_try_enable(ctx)) {
- return true;
- } else {
- fdmon_epoll_disable(ctx);
- }
+ if (npfd < EPOLL_ENABLE_THRESHOLD) {
+ return false;
+ }
+
+ /* The list must not change while we add fds to epoll */
+ if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
+ return false;
+ }
+
+ ok = fdmon_epoll_try_enable(ctx);
+
+ qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
+
+ if (!ok) {
+ fdmon_epoll_disable(ctx);
}
- return false;
+ return ok;
}
void fdmon_epoll_setup(AioContext *ctx)
--
2.39.2 (Apple Git-143)

View File

@ -0,0 +1,80 @@
From 10be627d2b5ec2d6b3dce045144aa739eef678b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= <berrange@redhat.com>
Date: Tue, 20 Jun 2023 09:45:34 +0100
Subject: [PATCH] io: remove io watch if TLS channel is closed during handshake
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The TLS handshake make take some time to complete, during which time an
I/O watch might be registered with the main loop. If the owner of the
I/O channel invokes qio_channel_close() while the handshake is waiting
to continue the I/O watch must be removed. Failing to remove it will
later trigger the completion callback which the owner is not expecting
to receive. In the case of the VNC server, this results in a SEGV as
vnc_disconnect_start() tries to shutdown a client connection that is
already gone / NULL.
CVE-2023-3354
Reported-by: jiangyegen <jiangyegen@huawei.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
include/io/channel-tls.h | 1 +
io/channel-tls.c | 18 ++++++++++++------
2 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/include/io/channel-tls.h b/include/io/channel-tls.h
index 5672479e9eb6..26c67f17e2d3 100644
--- a/include/io/channel-tls.h
+++ b/include/io/channel-tls.h
@@ -48,6 +48,7 @@ struct QIOChannelTLS {
QIOChannel *master;
QCryptoTLSSession *session;
QIOChannelShutdown shutdown;
+ guint hs_ioc_tag;
};
/**
diff --git a/io/channel-tls.c b/io/channel-tls.c
index 9805dd0a3f64..847d5297c339 100644
--- a/io/channel-tls.c
+++ b/io/channel-tls.c
@@ -198,12 +198,13 @@ static void qio_channel_tls_handshake_task(QIOChannelTLS *ioc,
}
trace_qio_channel_tls_handshake_pending(ioc, status);
- qio_channel_add_watch_full(ioc->master,
- condition,
- qio_channel_tls_handshake_io,
- data,
- NULL,
- context);
+ ioc->hs_ioc_tag =
+ qio_channel_add_watch_full(ioc->master,
+ condition,
+ qio_channel_tls_handshake_io,
+ data,
+ NULL,
+ context);
}
}
@@ -218,6 +219,7 @@ static gboolean qio_channel_tls_handshake_io(QIOChannel *ioc,
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(
qio_task_get_source(task));
+ tioc->hs_ioc_tag = 0;
g_free(data);
qio_channel_tls_handshake_task(tioc, task, context);
@@ -378,6 +380,10 @@ static int qio_channel_tls_close(QIOChannel *ioc,
{
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
+ if (tioc->hs_ioc_tag) {
+ g_clear_handle_id(&tioc->hs_ioc_tag, g_source_remove);
+ }
+
return qio_channel_close(tioc->master, errp);
}

View File

@ -0,0 +1,105 @@
From babf8b87127ae809b31b3c0a117dcbc91aaf9aba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
Date: Fri, 2 Jun 2023 16:38:54 +0200
Subject: [PATCH] vdpa: map shadow vrings with MAP_SHARED
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The vdpa devices that use va addresses neeeds these maps shared.
Otherwise, vhost_vdpa checks will refuse to accept the maps.
The mmap call will always return a page aligned address, so removing the
qemu_memalign call. Keeping the ROUND_UP for the size as we still need
to DMA-map them in full.
Not applying fixes tag as it never worked with va devices.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Message-Id: <20230602143854.1879091-4-eperezma@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
hw/virtio/vhost-shadow-virtqueue.c | 18 +++++++++---------
net/vhost-vdpa.c | 16 ++++++++--------
2 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index bd7c12b6d37a..1b1d85306cf7 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -647,7 +647,7 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
VirtQueue *vq, VhostIOVATree *iova_tree)
{
- size_t desc_size, driver_size, device_size;
+ size_t desc_size;
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
svq->next_guest_avail_elem = NULL;
@@ -659,14 +659,14 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
svq->iova_tree = iova_tree;
svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
- driver_size = vhost_svq_driver_area_size(svq);
- device_size = vhost_svq_device_area_size(svq);
- svq->vring.desc = qemu_memalign(qemu_real_host_page_size(), driver_size);
+ svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
+ -1, 0);
desc_size = sizeof(vring_desc_t) * svq->vring.num;
svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
- memset(svq->vring.desc, 0, driver_size);
- svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size);
- memset(svq->vring.used, 0, device_size);
+ svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
+ -1, 0);
svq->desc_state = g_new0(SVQDescState, svq->vring.num);
svq->desc_next = g_new0(uint16_t, svq->vring.num);
for (unsigned i = 0; i < svq->vring.num - 1; i++) {
@@ -705,8 +705,8 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
svq->vq = NULL;
g_free(svq->desc_next);
g_free(svq->desc_state);
- qemu_vfree(svq->vring.desc);
- qemu_vfree(svq->vring.used);
+ munmap(svq->vring.desc, vhost_svq_driver_area_size(svq));
+ munmap(svq->vring.used, vhost_svq_device_area_size(svq));
event_notifier_set_handler(&svq->hdev_call, NULL);
}
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index e425fabc3489..8840ca2ea447 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -203,8 +203,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
return;
}
- qemu_vfree(s->cvq_cmd_out_buffer);
- qemu_vfree(s->status);
+ munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len());
+ munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len());
if (s->vhost_net) {
vhost_net_cleanup(s->vhost_net);
g_free(s->vhost_net);
@@ -761,12 +761,12 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
s->vhost_vdpa.iova_range = iova_range;
s->vhost_vdpa.shadow_data = svq;
if (!is_datapath) {
- s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
- vhost_vdpa_net_cvq_cmd_page_len());
- memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
- s->status = qemu_memalign(qemu_real_host_page_size(),
- vhost_vdpa_net_cvq_cmd_page_len());
- memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len());
+ s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
+ -1, 0);
s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
s->vhost_vdpa.shadow_vq_ops_opaque = s;

View File

@ -0,0 +1,247 @@
From 00ef422e9fbfef1fb40447b08826db0951d788dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
Date: Fri, 3 Mar 2023 18:24:32 +0100
Subject: [PATCH] vdpa net: move iova tree creation from init to start
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Only create iova_tree if and when it is needed.
The cleanup keeps being responsible for the last VQ but this change
allows it to merge both cleanup functions.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20230303172445.1089785-2-eperezma@redhat.com>
Tested-by: Lei Yang <leiyang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
net/vhost-vdpa.c | 113 ++++++++++++++++++++++++++++++++++-------------
1 file changed, 83 insertions(+), 30 deletions(-)
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index de5ed8ff22ca..d195f48776fe 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -178,7 +178,6 @@ err_init:
static void vhost_vdpa_cleanup(NetClientState *nc)
{
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
- struct vhost_dev *dev = &s->vhost_net->dev;
/*
* If a peer NIC is attached, do not cleanup anything.
@@ -190,9 +189,6 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
}
qemu_vfree(s->cvq_cmd_out_buffer);
qemu_vfree(s->status);
- if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
- g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
- }
if (s->vhost_net) {
vhost_net_cleanup(s->vhost_net);
g_free(s->vhost_net);
@@ -242,10 +238,64 @@ static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
return size;
}
+/** From any vdpa net client, get the netclient of the first queue pair */
+static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
+{
+ NICState *nic = qemu_get_nic(s->nc.peer);
+ NetClientState *nc0 = qemu_get_peer(nic->ncs, 0);
+
+ return DO_UPCAST(VhostVDPAState, nc, nc0);
+}
+
+static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
+{
+ struct vhost_vdpa *v = &s->vhost_vdpa;
+
+ if (v->shadow_vqs_enabled) {
+ v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
+ v->iova_range.last);
+ }
+}
+
+static int vhost_vdpa_net_data_start(NetClientState *nc)
+{
+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+ struct vhost_vdpa *v = &s->vhost_vdpa;
+
+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+ if (v->index == 0) {
+ vhost_vdpa_net_data_start_first(s);
+ return 0;
+ }
+
+ if (v->shadow_vqs_enabled) {
+ VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s);
+ v->iova_tree = s0->vhost_vdpa.iova_tree;
+ }
+
+ return 0;
+}
+
+static void vhost_vdpa_net_client_stop(NetClientState *nc)
+{
+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+ struct vhost_dev *dev;
+
+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
+
+ dev = s->vhost_vdpa.dev;
+ if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
+ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
+ }
+}
+
static NetClientInfo net_vhost_vdpa_info = {
.type = NET_CLIENT_DRIVER_VHOST_VDPA,
.size = sizeof(VhostVDPAState),
.receive = vhost_vdpa_receive,
+ .start = vhost_vdpa_net_data_start,
+ .stop = vhost_vdpa_net_client_stop,
.cleanup = vhost_vdpa_cleanup,
.has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
.has_ufo = vhost_vdpa_has_ufo,
@@ -359,7 +409,7 @@ dma_map_err:
static int vhost_vdpa_net_cvq_start(NetClientState *nc)
{
- VhostVDPAState *s;
+ VhostVDPAState *s, *s0;
struct vhost_vdpa *v;
uint64_t backend_features;
int64_t cvq_group;
@@ -423,8 +473,6 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
return r;
}
- v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
- v->iova_range.last);
v->shadow_vqs_enabled = true;
s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
@@ -433,6 +481,27 @@ out:
return 0;
}
+ s0 = vhost_vdpa_net_first_nc_vdpa(s);
+ if (s0->vhost_vdpa.iova_tree) {
+ /*
+ * SVQ is already configured for all virtqueues. Reuse IOVA tree for
+ * simplicity, whether CVQ shares ASID with guest or not, because:
+ * - Memory listener need access to guest's memory addresses allocated
+ * in the IOVA tree.
+ * - There should be plenty of IOVA address space for both ASID not to
+ * worry about collisions between them. Guest's translations are
+ * still validated with virtio virtqueue_pop so there is no risk for
+ * the guest to access memory that it shouldn't.
+ *
+ * To allocate a iova tree per ASID is doable but it complicates the
+ * code and it is not worth it for the moment.
+ */
+ v->iova_tree = s0->vhost_vdpa.iova_tree;
+ } else {
+ v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
+ v->iova_range.last);
+ }
+
r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
vhost_vdpa_net_cvq_cmd_page_len(), false);
if (unlikely(r < 0)) {
@@ -457,15 +526,9 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
if (s->vhost_vdpa.shadow_vqs_enabled) {
vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
- if (!s->always_svq) {
- /*
- * If only the CVQ is shadowed we can delete this safely.
- * If all the VQs are shadows this will be needed by the time the
- * device is started again to register SVQ vrings and similar.
- */
- g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
- }
}
+
+ vhost_vdpa_net_client_stop(nc);
}
static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
@@ -675,8 +738,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
int nvqs,
bool is_datapath,
bool svq,
- struct vhost_vdpa_iova_range iova_range,
- VhostIOVATree *iova_tree)
+ struct vhost_vdpa_iova_range iova_range)
{
NetClientState *nc = NULL;
VhostVDPAState *s;
@@ -698,7 +760,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
s->vhost_vdpa.shadow_vqs_enabled = svq;
s->vhost_vdpa.iova_range = iova_range;
s->vhost_vdpa.shadow_data = svq;
- s->vhost_vdpa.iova_tree = iova_tree;
if (!is_datapath) {
s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
vhost_vdpa_net_cvq_cmd_page_len());
@@ -776,7 +837,6 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
uint64_t features;
int vdpa_device_fd;
g_autofree NetClientState **ncs = NULL;
- g_autoptr(VhostIOVATree) iova_tree = NULL;
struct vhost_vdpa_iova_range iova_range;
NetClientState *nc;
int queue_pairs, r, i = 0, has_cvq = 0;
@@ -822,12 +882,8 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
}
vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
- if (opts->x_svq) {
- if (!vhost_vdpa_net_valid_svq_features(features, errp)) {
- goto err_svq;
- }
-
- iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
+ if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) {
+ goto err;
}
ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
@@ -835,7 +891,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
for (i = 0; i < queue_pairs; i++) {
ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
vdpa_device_fd, i, 2, true, opts->x_svq,
- iova_range, iova_tree);
+ iova_range);
if (!ncs[i])
goto err;
}
@@ -843,13 +899,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
if (has_cvq) {
nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
vdpa_device_fd, i, 1, false,
- opts->x_svq, iova_range, iova_tree);
+ opts->x_svq, iova_range);
if (!nc)
goto err;
}
- /* iova_tree ownership belongs to last NetClientState */
- g_steal_pointer(&iova_tree);
return 0;
err:
@@ -859,7 +913,6 @@ err:
}
}
-err_svq:
qemu_close(vdpa_device_fd);
return -1;

View File

@ -0,0 +1,70 @@
From 915bf6ccd7a5c9b6cbea7a72f153597d1b98834f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
Date: Fri, 2 Jun 2023 16:38:53 +0200
Subject: [PATCH] vdpa: reorder vhost_vdpa_net_cvq_cmd_page_len function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
We need to call it from resource cleanup context, as munmap needs the
size of the mappings.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-Id: <20230602143854.1879091-3-eperezma@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
net/vhost-vdpa.c | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 4345f1e6de83..e425fabc3489 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -110,6 +110,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
return s->vhost_net;
}
+static size_t vhost_vdpa_net_cvq_cmd_len(void)
+{
+ /*
+ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
+ * In buffer is always 1 byte, so it should fit here
+ */
+ return sizeof(struct virtio_net_ctrl_hdr) +
+ 2 * sizeof(struct virtio_net_ctrl_mac) +
+ MAC_TABLE_ENTRIES * ETH_ALEN;
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
+{
+ return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
+}
+
static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp)
{
uint64_t invalid_dev_features =
@@ -362,22 +378,6 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
vhost_iova_tree_remove(tree, *map);
}
-static size_t vhost_vdpa_net_cvq_cmd_len(void)
-{
- /*
- * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
- * In buffer is always 1 byte, so it should fit here
- */
- return sizeof(struct virtio_net_ctrl_hdr) +
- 2 * sizeof(struct virtio_net_ctrl_mac) +
- MAC_TABLE_ENTRIES * ETH_ALEN;
-}
-
-static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
-{
- return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
-}
-
/** Map CVQ buffer. */
static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
bool write)

View File

@ -0,0 +1,56 @@
From a0d7215e339b61c7d7a7b3fcf754954d80d93eb8 Mon Sep 17 00:00:00 2001
From: Ani Sinha <anisinha@redhat.com>
Date: Mon, 19 Jun 2023 12:22:09 +0530
Subject: [PATCH] vhost-vdpa: do not cleanup the vdpa/vhost-net structures if
peer nic is present
When a peer nic is still attached to the vdpa backend, it is too early to free
up the vhost-net and vdpa structures. If these structures are freed here, then
QEMU crashes when the guest is being shut down. The following call chain
would result in an assertion failure since the pointer returned from
vhost_vdpa_get_vhost_net() would be NULL:
do_vm_stop() -> vm_state_notify() -> virtio_set_status() ->
virtio_net_vhost_status() -> get_vhost_net().
Therefore, we defer freeing up the structures until at guest shutdown
time when qemu_cleanup() calls net_cleanup() which then calls
qemu_del_net_client() which would eventually call vhost_vdpa_cleanup()
again to free up the structures. This time, the loop in net_cleanup()
ensures that vhost_vdpa_cleanup() will be called one last time when
all the peer nics are detached and freed.
All unit tests pass with this change.
CC: imammedo@redhat.com
CC: jusual@redhat.com
CC: mst@redhat.com
Fixes: CVE-2023-3301
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
Signed-off-by: Ani Sinha <anisinha@redhat.com>
Message-Id: <20230619065209.442185-1-anisinha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
net/vhost-vdpa.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 9e92b3558c75..e19ab063fa11 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -180,6 +180,14 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
struct vhost_dev *dev = &s->vhost_net->dev;
+ /*
+ * If a peer NIC is attached, do not cleanup anything.
+ * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup()
+ * when the guest is shutting down.
+ */
+ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
+ return;
+ }
qemu_vfree(s->cvq_cmd_out_buffer);
qemu_vfree(s->status);
if (dev->vq_index + dev->nvqs == dev->vq_index_end) {

View File

@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \
Summary: QEMU is a machine emulator and virtualizer Summary: QEMU is a machine emulator and virtualizer
Name: qemu-kvm Name: qemu-kvm
Version: 7.2.0 Version: 7.2.0
Release: 14%{?rcrel}%{?dist}%{?cc_suffix}.3.alma Release: 14%{?rcrel}%{?dist}%{?cc_suffix}.5.alma.1
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
# Epoch 15 used for RHEL 8 # Epoch 15 used for RHEL 8
# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
@ -419,11 +419,23 @@ Patch135: kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch
# For bz#2203745 - Disk detach is unsuccessful while the guest is still booting [rhel-9.2.0.z] # For bz#2203745 - Disk detach is unsuccessful while the guest is still booting [rhel-9.2.0.z]
Patch136: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch Patch136: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch
# Patches were taken from upstream: # Patches were taken from upstream and backported to apply cleanly:
# https://github.com/qemu/qemu/commit/3b6f485275ae95a81eec589d2773b86ca9ddec4d # https://github.com/qemu/qemu/commit/3b6f485275ae95a81eec589d2773b86ca9ddec4d
Patch1001: 0001-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interruption.patch Patch1001: 0001-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interruption.patch
# https://github.com/qemu/qemu/commit/2e1a9de96b487cf818a22d681cad8d3f5d18dcca # https://github.com/qemu/qemu/commit/2e1a9de96b487cf818a22d681cad8d3f5d18dcca
Patch1002: 0002-vdpa-stop-all-svq-on-device-deletion.patch Patch1002: 0002-vdpa-stop-all-svq-on-device-deletion.patch
# https://github.com/qemu/qemu/commit/e62da98527fa35fe5f532cded01a33edf9fbe7b2.patch
Patch1003: aio-posix-fix-race-between-epoll-upgrade.patch
# https://github.com/qemu/qemu/commit/a0d7215e339b61c7d7a7b3fcf754954d80d93eb8.patch
Patch1008: vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net.patch
# https://github.com/qemu/qemu/commit/00ef422e9fbfef1fb40447b08826db0951d788dd.patch
Patch1006: vdpa-net-move-iova-tree-creation.patch
# https://github.com/qemu/qemu/commit/915bf6ccd7a5c9b6cbea7a72f153597d1b98834f.patch
Patch1007: vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-function.patch
# https://github.com/qemu/qemu/commit/babf8b87127ae809b31b3c0a117dcbc91aaf9aba.patch
Patch1005: vdpa-map-shadow-vrings-with-MAP_SHARED.patch
# https://github.com/qemu/qemu/commit/10be627d2b5ec2d6b3dce045144aa739eef678b4.patch
Patch1004: io-remove-io-watch-if-TLS-channel-is-closed.patch
%if %{have_clang} %if %{have_clang}
BuildRequires: clang BuildRequires: clang
@ -1454,6 +1466,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \
%endif %endif
%changelog %changelog
* Wed Sep 13 2023 Eduard Abdullin <eabdullin@almalinux.org> - 7.2.0-14.el9_2.5.alma.1
- Apply aio-posix-fix-race-between-epoll-upgrade.patch
- Apply io-remove-io-watch-if-TLS-channel-is-closed.patch
- Apply vdpa-map-shadow-vrings-with-MAP_SHARED.patch
- Apply vdpa-net-move-iova-tree-creation.patch
- Apply vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-function.patch
- Apply vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net.patch
* Thu Aug 03 2023 Eduard Abdullin <eabdullin@almalinux.org> - 7.2.0-14.el9_2.3.alma * Thu Aug 03 2023 Eduard Abdullin <eabdullin@almalinux.org> - 7.2.0-14.el9_2.3.alma
- Apply 0001-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interruption.patch - Apply 0001-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interruption.patch
and 0002-vdpa-stop-all-svq-on-device-deletion.patch and 0002-vdpa-stop-all-svq-on-device-deletion.patch