From d24efdca6de76d355de467c3a210495187df305c Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 21 Mar 2024 02:32:26 -0400 Subject: [PATCH] * Thu Mar 21 2024 Miroslav Rezanina - 8.2.0-10 - kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch [RHEL-24614] - kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch [RHEL-24614] - kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch [RHEL-24614] - Resolves: RHEL-24614 ([RHEL9][chardev] qemu hit core dump while using TLS server from host to guest) --- ...har-socket-Fix-TLS-io-channels-sendi.patch | 60 +++++ ...se-a-child-source-for-qio-input-sour.patch | 216 ++++++++++++++++++ ...iority-of-the-HUP-GSource-in-socket-.patch | 78 +++++++ qemu-kvm.spec | 15 +- 4 files changed, 368 insertions(+), 1 deletion(-) create mode 100644 kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch create mode 100644 kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch create mode 100644 kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch diff --git a/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch b/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch new file mode 100644 index 0000000..bc0f15a --- /dev/null +++ b/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch @@ -0,0 +1,60 @@ +From 2e0e4355b2d4edb66b7d8c198339e17940abd682 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 18 Mar 2024 13:03:19 +0000 +Subject: [PATCH 2/3] Revert "chardev/char-socket: Fix TLS io channels sending + too much data to the backend" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs +RH-Jira: RHEL-24614 +RH-Acked-by: Thomas Huth +RH-Acked-by: Marc-André Lureau +RH-Commit: [2/3] 1cb3d72b86ced0f70a09dfa0d325ae8a85db1b2b (berrange/centos-src-qemu) + +This commit results in unexpected termination of the TLS connection. +When 'fd_can_read' returns 0, the code goes on to pass a zero length +buffer to qio_channel_read. The TLS impl calls into gnutls_recv() +with this zero length buffer, at which point GNUTLS returns an error +GNUTLS_E_INVALID_REQUEST. This is treated as fatal by QEMU's TLS code +resulting in the connection being torn down by the chardev. + +Simply skipping the qio_channel_read when the buffer length is zero +is also not satisfactory, as it results in a high CPU burn busy loop +massively slowing QEMU's functionality. + +The proper solution is to avoid tcp_chr_read being called at all +unless the frontend is able to accept more data. This will be done +in a followup commit. + +This reverts commit 462945cd22d2bcd233401ed3aa167d83a8e35b05 + +Reviewed-by: Thomas Huth +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit e8ee827ffdb86ebbd5f5213a1f78123c25a90864) +--- + chardev/char-socket.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index f48d341ebc..51d0943fce 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) + s->max_size <= 0) { + return TRUE; + } +- len = tcp_chr_read_poll(opaque); +- if (len > sizeof(buf)) { +- len = sizeof(buf); ++ len = sizeof(buf); ++ if (len > s->max_size) { ++ len = s->max_size; + } + size = tcp_chr_recv(chr, (void *)buf, len); + if (size == 0 || (size == -1 && errno != EAGAIN)) { +-- +2.39.3 + diff --git a/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch b/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch new file mode 100644 index 0000000..135afbe --- /dev/null +++ b/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch @@ -0,0 +1,216 @@ +From ab5a33d57b48e35388928e388bb6e6479bc77651 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 18 Mar 2024 17:08:30 +0000 +Subject: [PATCH 3/3] Revert "chardev: use a child source for qio input source" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs +RH-Jira: RHEL-24614 +RH-Acked-by: Thomas Huth +RH-Acked-by: Marc-André Lureau +RH-Commit: [3/3] b58e6c19c2b11d5d28db31cf1386226fb01d195e (berrange/centos-src-qemu) + +This reverts commit a7077b8e354d90fec26c2921aa2dea85b90dff90, +and add comments to explain why child sources cannot be used. + +When a GSource is added as a child of another GSource, if its +'prepare' function indicates readiness, then the parent's +'prepare' function will never be run. The io_watch_poll_prepare +absolutely *must* be run on every iteration of the main loop, +to ensure that the chardev backend doesn't feed data to the +frontend that it is unable to consume. + +At the time a7077b8e354d90fec26c2921aa2dea85b90dff90 was made, +all the child GSource impls were relying on poll'ing an FD, +so their 'prepare' functions would never indicate readiness +ahead of poll() being invoked. So the buggy behaviour was +not noticed and lay dormant. + +Relatively recently the QIOChannelTLS impl introduced a +level 2 child GSource, which checks with GNUTLS whether it +has cached any data that was decoded but not yet consumed: + + commit ffda5db65aef42266a5053a4be34515106c4c7ee + Author: Antoine Damhet + Date: Tue Nov 15 15:23:29 2022 +0100 + + io/channel-tls: fix handling of bigger read buffers + + Since the TLS backend can read more data from the underlying QIOChannel + we introduce a minimal child GSource to notify if we still have more + data available to be read. + + Signed-off-by: Antoine Damhet + Signed-off-by: Charles Frey + Signed-off-by: Daniel P. Berrangé + +With this, it is now quite common for the 'prepare' function +on a QIOChannelTLS GSource to indicate immediate readiness, +bypassing the parent GSource 'prepare' function. IOW, the +critical 'io_watch_poll_prepare' is being skipped on some +iterations of the main loop. As a result chardev frontend +asserts are now being triggered as they are fed data they +are not ready to consume. + +A reproducer is as follows: + + * In terminal 1 run a GNUTLS *echo* server + + $ gnutls-serv --echo \ + --x509cafile ca-cert.pem \ + --x509keyfile server-key.pem \ + --x509certfile server-cert.pem \ + -p 9000 + + * In terminal 2 run a QEMU guest + + $ qemu-system-s390x \ + -nodefaults \ + -display none \ + -object tls-creds-x509,id=tls0,dir=$PWD,endpoint=client \ + -chardev socket,id=con0,host=localhost,port=9000,tls-creds=tls0 \ + -device sclpconsole,chardev=con0 \ + -hda Fedora-Cloud-Base-39-1.5.s390x.qcow2 + +After the previous patch revert, but before this patch revert, +this scenario will crash: + + qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion + `size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed. + +This assert indicates that 'tcp_chr_read' was called without +'tcp_chr_read_poll' having first been checked for ability to +receive more data + +QEMU's use of a 'prepare' function to create/delete another +GSource is rather a hack and not normally the kind of thing that +is expected to be done by a GSource. There is no mechanism to +force GLib to always run the 'prepare' function of a parent +GSource. The best option is to simply not use the child source +concept, and go back to the functional approach previously +relied on. + +Reviewed-by: Marc-André Lureau +Reviewed-by: Thomas Huth +Tested-by: Thomas Huth +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit 038b4217884c6f297278bb1ec6f0463c6c8221de) +--- + chardev/char-io.c | 56 ++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 51 insertions(+), 5 deletions(-) + +diff --git a/chardev/char-io.c b/chardev/char-io.c +index 4451128cba..dab77b112e 100644 +--- a/chardev/char-io.c ++++ b/chardev/char-io.c +@@ -33,6 +33,7 @@ typedef struct IOWatchPoll { + IOCanReadHandler *fd_can_read; + GSourceFunc fd_read; + void *opaque; ++ GMainContext *context; + } IOWatchPoll; + + static IOWatchPoll *io_watch_poll_from_source(GSource *source) +@@ -50,28 +51,59 @@ static gboolean io_watch_poll_prepare(GSource *source, + return FALSE; + } + ++ /* ++ * We do not register the QIOChannel watch as a child GSource. ++ * The 'prepare' function on the parent GSource will be ++ * skipped if a child GSource's 'prepare' function indicates ++ * readiness. We need this prepare function be guaranteed ++ * to run on *every* iteration of the main loop, because ++ * it is critical to ensure we remove the QIOChannel watch ++ * if 'fd_can_read' indicates the frontend cannot receive ++ * more data. ++ */ + if (now_active) { + iwp->src = qio_channel_create_watch( + iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL); + g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL); +- g_source_add_child_source(source, iwp->src); +- g_source_unref(iwp->src); ++ g_source_attach(iwp->src, iwp->context); + } else { +- g_source_remove_child_source(source, iwp->src); ++ g_source_destroy(iwp->src); ++ g_source_unref(iwp->src); + iwp->src = NULL; + } + return FALSE; + } + ++static gboolean io_watch_poll_check(GSource *source) ++{ ++ return FALSE; ++} ++ + static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback, + gpointer user_data) + { +- return G_SOURCE_CONTINUE; ++ abort(); ++} ++ ++static void io_watch_poll_finalize(GSource *source) ++{ ++ /* ++ * Due to a glib bug, removing the last reference to a source ++ * inside a finalize callback causes recursive locking (and a ++ * deadlock). This is not a problem inside other callbacks, ++ * including dispatch callbacks, so we call io_remove_watch_poll ++ * to remove this source. At this point, iwp->src must ++ * be NULL, or we would leak it. ++ */ ++ IOWatchPoll *iwp = io_watch_poll_from_source(source); ++ assert(iwp->src == NULL); + } + + static GSourceFuncs io_watch_poll_funcs = { + .prepare = io_watch_poll_prepare, ++ .check = io_watch_poll_check, + .dispatch = io_watch_poll_dispatch, ++ .finalize = io_watch_poll_finalize, + }; + + GSource *io_add_watch_poll(Chardev *chr, +@@ -91,6 +123,7 @@ GSource *io_add_watch_poll(Chardev *chr, + iwp->ioc = ioc; + iwp->fd_read = (GSourceFunc) fd_read; + iwp->src = NULL; ++ iwp->context = context; + + name = g_strdup_printf("chardev-iowatch-%s", chr->label); + g_source_set_name((GSource *)iwp, name); +@@ -101,10 +134,23 @@ GSource *io_add_watch_poll(Chardev *chr, + return (GSource *)iwp; + } + ++static void io_remove_watch_poll(GSource *source) ++{ ++ IOWatchPoll *iwp; ++ ++ iwp = io_watch_poll_from_source(source); ++ if (iwp->src) { ++ g_source_destroy(iwp->src); ++ g_source_unref(iwp->src); ++ iwp->src = NULL; ++ } ++ g_source_destroy(&iwp->parent); ++} ++ + void remove_fd_in_watch(Chardev *chr) + { + if (chr->gsource) { +- g_source_destroy(chr->gsource); ++ io_remove_watch_poll(chr->gsource); + chr->gsource = NULL; + } + } +-- +2.39.3 + diff --git a/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch b/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch new file mode 100644 index 0000000..30e055f --- /dev/null +++ b/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch @@ -0,0 +1,78 @@ +From 4d4102f6e2f9afd6182888787ae8b570347df87d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 18 Mar 2024 18:06:59 +0000 +Subject: [PATCH 1/3] chardev: lower priority of the HUP GSource in socket + chardev +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs +RH-Jira: RHEL-24614 +RH-Acked-by: Thomas Huth +RH-Acked-by: Marc-André Lureau +RH-Commit: [1/3] 842f54349191b0206e68f35a7a80155f5a584942 (berrange/centos-src-qemu) + +The socket chardev often has 2 GSource object registered against the +same FD. One is registered all the time and is just intended to handle +POLLHUP events, while the other gets registered & unregistered on the +fly as the frontend is ready to receive more data or not. + +It is very common for poll() to signal a POLLHUP event at the same time +as there is pending incoming data from the disconnected client. It is +therefore essential to process incoming data prior to processing HUP. +The problem with having 2 GSource on the same FD is that there is no +guaranteed ordering of execution between them, so the chardev code may +process HUP first and thus discard data. + +This failure scenario is non-deterministic but can be seen fairly +reliably by reverting a7077b8e354d90fec26c2921aa2dea85b90dff90, and +then running 'tests/unit/test-char', which will sometimes fail with +missing data. + +Ideally QEMU would only have 1 GSource, but that's a complex code +refactoring job. The next best solution is to try to ensure ordering +between the 2 GSource objects. This can be achieved by lowering the +priority of the HUP GSource, so that it is never dispatched if the +main GSource is also ready to dispatch. Counter-intuitively, lowering +the priority of a GSource is done by raising its priority number. + +Reviewed-by: Marc-André Lureau +Reviewed-by: Thomas Huth +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit 8bd8b04adc9f18904f323dff085f8b4ec77915c6) +--- + chardev/char-socket.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 034840593d..f48d341ebc 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -597,6 +597,22 @@ static void update_ioc_handlers(SocketChardev *s) + + remove_hup_source(s); + s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP); ++ /* ++ * poll() is liable to return POLLHUP even when there is ++ * still incoming data available to read on the FD. If ++ * we have the hup_source at the same priority as the ++ * main io_add_watch_poll GSource, then we might end up ++ * processing the POLLHUP event first, closing the FD, ++ * and as a result silently discard data we should have ++ * read. ++ * ++ * By setting the hup_source to G_PRIORITY_DEFAULT + 1, ++ * we ensure that io_add_watch_poll GSource will always ++ * be dispatched first, thus guaranteeing we will be ++ * able to process all incoming data before closing the ++ * FD ++ */ ++ g_source_set_priority(s->hup_source, G_PRIORITY_DEFAULT + 1); + g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup, + chr, NULL); + g_source_attach(s->hup_source, chr->gcontext); +-- +2.39.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index e614d12..120bf45 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.2.0 -Release: 9%{?rcrel}%{?dist}%{?cc_suffix} +Release: 10%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -600,6 +600,12 @@ Patch170: kvm-nbd-server-Fix-race-in-draining-the-export.patch Patch171: kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch # For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information Patch172: kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch +# For RHEL-24614 - [RHEL9][chardev] qemu hit core dump while using TLS server from host to guest +Patch173: kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch +# For RHEL-24614 - [RHEL9][chardev] qemu hit core dump while using TLS server from host to guest +Patch174: kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch +# For RHEL-24614 - [RHEL9][chardev] qemu hit core dump while using TLS server from host to guest +Patch175: kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch %if %{have_clang} BuildRequires: clang @@ -1661,6 +1667,13 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Mar 21 2024 Miroslav Rezanina - 8.2.0-10 +- kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch [RHEL-24614] +- kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch [RHEL-24614] +- kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch [RHEL-24614] +- Resolves: RHEL-24614 + ([RHEL9][chardev] qemu hit core dump while using TLS server from host to guest) + * Wed Mar 20 2024 Miroslav Rezanina - 8.2.0-9 - kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch [RHEL-28125] - kvm-nbd-server-Fix-race-in-draining-the-export.patch [RHEL-28125]