From c5e28e5aba7bb801d3ebd1a410201eb155ea777c Mon Sep 17 00:00:00 2001 From: David Rheinsberg Date: Wed, 17 Feb 2021 12:40:57 +0100 Subject: [PATCH] dbus-broker: apply activation-tracking fixes Pull in 2 activation-tracking fixes from upstream dbus-broker. Signed-off-by: David Rheinsberg --- ...ab38eae2300e3d6f4e3eac0962ebe6e60ade.patch | 226 ++++++++++++++++++ ...227ec6ca25ad7d352c4e2d48a9937b19e2d0.patch | 207 ++++++++++++++++ dbus-broker.spec | 7 +- 3 files changed, 439 insertions(+), 1 deletion(-) create mode 100644 41ccab38eae2300e3d6f4e3eac0962ebe6e60ade.patch create mode 100644 bd5a227ec6ca25ad7d352c4e2d48a9937b19e2d0.patch diff --git a/41ccab38eae2300e3d6f4e3eac0962ebe6e60ade.patch b/41ccab38eae2300e3d6f4e3eac0962ebe6e60ade.patch new file mode 100644 index 0000000..0dff697 --- /dev/null +++ b/41ccab38eae2300e3d6f4e3eac0962ebe6e60ade.patch @@ -0,0 +1,226 @@ +From 41ccab38eae2300e3d6f4e3eac0962ebe6e60ade Mon Sep 17 00:00:00 2001 +From: David Rheinsberg +Date: Wed, 17 Feb 2021 12:01:49 +0100 +Subject: [PATCH] launch/service: track systemd jobs to close a race condition + +Start tracking the `JobRemoved` signal of systemd to know the point at +which systemd fully handled our submitted job. There is no other way to +reliably know when we can start handling PropertiesChanged signals. +systemd does not guarantee any other barrier on the bus but the +JobRemoved signal. + +This is unfortunate, since it requires us to watch *all* JobRemoved +signals of systemd. However, according to systemd developers, it is the +only supported way to track Job submission. Hence, we have to comply. + +Signed-off-by: David Rheinsberg +--- + src/launch/service.c | 119 ++++++++++++++++++++++++++++++++++++++++++- + src/launch/service.h | 2 + + 2 files changed, 119 insertions(+), 2 deletions(-) + +diff --git a/src/launch/service.c b/src/launch/service.c +index ab1a7b8..f23fa2d 100644 +--- a/src/launch/service.c ++++ b/src/launch/service.c +@@ -80,6 +80,7 @@ Service *service_free(Service *service) { + + c_rbnode_unlink(&service->rb_by_name); + c_rbnode_unlink(&service->rb); ++ free(service->job); + free(service->user); + for (size_t i = 0; i < service->argc; ++i) + free(service->argv[i]); +@@ -89,6 +90,7 @@ Service *service_free(Service *service) { + free(service->path); + service->slot_start_unit = sd_bus_slot_unref(service->slot_start_unit); + service->slot_watch_unit = sd_bus_slot_unref(service->slot_watch_unit); ++ service->slot_watch_jobs = sd_bus_slot_unref(service->slot_watch_jobs); + free(service); + + return NULL; +@@ -180,8 +182,10 @@ int service_new(Service **servicep, + } + + static void service_discard_activation(Service *service) { ++ service->job = c_free(service->job); + service->slot_start_unit = sd_bus_slot_unref(service->slot_start_unit); + service->slot_watch_unit = sd_bus_slot_unref(service->slot_watch_unit); ++ service->slot_watch_jobs = sd_bus_slot_unref(service->slot_watch_jobs); + } + + static int service_reset_activation(Service *service) { +@@ -216,11 +220,97 @@ static int service_reset_activation(Service *service) { + return 0; + } + ++static int service_watch_jobs_handler(sd_bus_message *message, void *userdata, sd_bus_error *errorp) { ++ Service *service = userdata; ++ const char *path = NULL, *unit = NULL, *result = NULL; ++ uint32_t id; ++ int r; ++ ++ /* ++ * Whenever we have an activation job queued, we want to know when it ++ * is done. We get a `JobRemoved` signal from systemd, which includes ++ * the reason why the job is done. ++ * We get those signals for all jobs, since we cannot know a job-id to ++ * match for before we create job. This is quite unfortunate, but ++ * little we can do about it now. ++ * ++ * We require this signal merely to know when systemd finished handling ++ * a job. For properly configured services, this should already tell us ++ * whether the startup was successful or failed. But for basic services ++ * that use no notify or dbus systemd-startup handling, we have to ++ * continue tracking their `ActiveState` to see whether they failed. ++ */ ++ ++ r = sd_bus_message_read(message, "uoss", &id, &path, &unit, &result); ++ if (r < 0) ++ return error_origin(r); ++ ++ if (!service->job || strcmp(path, service->job)) ++ return 0; ++ ++ if (!strcmp(result, "done") || !strcmp(result, "skipped")) { ++ /* ++ * Our job completed successfully. Make sure to stop watching ++ * it so the `ActiveState` handling will take effect. ++ */ ++ service->job = c_free(service->job); ++ service->slot_watch_jobs = sd_bus_slot_unref(service->slot_watch_jobs); ++ } else { ++ /* ++ * Our job failed. Forward this information to the broker so it ++ * can fail pending activations. ++ */ ++ r = service_reset_activation(service); ++ if (r) ++ return error_trace(r); ++ } ++ ++ return 0; ++} ++ ++static int service_watch_jobs(Service *service) { ++ int r; ++ ++ assert(!service->slot_watch_jobs); ++ assert(!service->job); ++ ++ r = sd_bus_match_signal_async( ++ service->launcher->bus_regular, ++ &service->slot_watch_jobs, ++ "org.freedesktop.systemd1", ++ "/org/freedesktop/systemd1", ++ "org.freedesktop.systemd1.Manager", ++ "JobRemoved", ++ service_watch_jobs_handler, ++ NULL, ++ service ++ ); ++ if (r < 0) ++ return error_origin(r); ++ ++ return 0; ++} ++ + static int service_watch_unit_handler(sd_bus_message *message, void *userdata, sd_bus_error *errorp) { + Service *service = userdata; + const char *interface = NULL, *property = NULL, *value = NULL; + int r; + ++ /* ++ * If we still watch the job-signals it means systemd has not yet fully ++ * finished our job. In this case, any errors will be caught by the ++ * `JobRemoved` handler and we do not have to track the unit, yet. ++ * Moreover, we must not track the unit, since it might still return ++ * failures before our job was actually handled by systemd. ++ * ++ * Hence, simply ignore any PropertiesChanged signals until we got ++ * confirmation by systemd that our job finished. Bus ordering will ++ * guarantee that we catch unit-failures both before and after the job ++ * completion. ++ */ ++ if (service->slot_watch_jobs) ++ return 0; ++ + /* + * The properties of the bus unit changed. We are only interested in + * the "ActiveState" property. We check whether it is included in the +@@ -343,14 +433,31 @@ static int service_start_unit_handler(sd_bus_message *message, void *userdata, s + Service *service = userdata; + Launcher *launcher = service->launcher; + const sd_bus_error *error; ++ const char *job; + int r; + + service->slot_start_unit = sd_bus_slot_unref(service->slot_start_unit); + + error = sd_bus_message_get_error(message); +- if (!error) +- /* unit queued successfully */ ++ if (!error) { ++ /* ++ * We successfully queued the job. Now remember the job path, ++ * so we can properly track when it finishes (via the ++ * JobRemoved signal). ++ */ ++ ++ assert(!service->job); ++ ++ r = sd_bus_message_read(message, "o", &job); ++ if (r < 0) ++ return error_origin(r); ++ ++ service->job = strdup(job); ++ if (!service->job) ++ return error_origin(-ENOMEM); ++ + return 1; ++ } + + /* + * We always forward activation failure to the broker, which then +@@ -437,6 +544,10 @@ static int service_start_unit(Service *service) { + _c_cleanup_(sd_bus_message_unrefp) sd_bus_message *method_call = NULL; + int r; + ++ r = service_watch_jobs(service); ++ if (r) ++ return error_trace(r); ++ + r = service_watch_unit(service, service->unit); + if (r) + return error_trace(r); +@@ -467,6 +578,10 @@ static int service_start_transient_unit(Service *service) { + const char *unique_name; + int r; + ++ r = service_watch_jobs(service); ++ if (r) ++ return error_trace(r); ++ + r = sd_bus_get_unique_name(launcher->bus_regular, &unique_name); + if (r < 0) + return error_origin(r); +diff --git a/src/launch/service.h b/src/launch/service.h +index fa5bee2..2d3f870 100644 +--- a/src/launch/service.h ++++ b/src/launch/service.h +@@ -17,6 +17,7 @@ struct Service { + bool not_found; + bool running; + bool reload_tag; ++ sd_bus_slot *slot_watch_jobs; + sd_bus_slot *slot_watch_unit; + sd_bus_slot *slot_start_unit; + CRBNode rb; +@@ -32,6 +33,7 @@ struct Service { + uint64_t n_missing_unit; + uint64_t n_masked_unit; + uint64_t last_serial; ++ char *job; + char id[]; + }; + diff --git a/bd5a227ec6ca25ad7d352c4e2d48a9937b19e2d0.patch b/bd5a227ec6ca25ad7d352c4e2d48a9937b19e2d0.patch new file mode 100644 index 0000000..39f2610 --- /dev/null +++ b/bd5a227ec6ca25ad7d352c4e2d48a9937b19e2d0.patch @@ -0,0 +1,207 @@ +From bd5a227ec6ca25ad7d352c4e2d48a9937b19e2d0 Mon Sep 17 00:00:00 2001 +From: David Rheinsberg +Date: Wed, 17 Feb 2021 11:33:27 +0100 +Subject: [PATCH] launch/service: rely on systemd PropertiesChanged behavior + +The systemd API guarantees that basic properties are always included +with value in the PropertiesChanged signals. They are never invalidated. +If that was the case, there would be no way to reliably track all state +changes of a unit. + +Drop the explicit query behavior and simplify the property handling. We +can safely rely on this API behavior. + +Signed-off-by: David Rheinsberg +--- + src/launch/service.c | 127 +++++++------------------------------------ + src/launch/service.h | 1 - + 2 files changed, 21 insertions(+), 107 deletions(-) + +diff --git a/src/launch/service.c b/src/launch/service.c +index 3f9d971..ab1a7b8 100644 +--- a/src/launch/service.c ++++ b/src/launch/service.c +@@ -88,7 +88,6 @@ Service *service_free(Service *service) { + free(service->name); + free(service->path); + service->slot_start_unit = sd_bus_slot_unref(service->slot_start_unit); +- service->slot_query_unit = sd_bus_slot_unref(service->slot_query_unit); + service->slot_watch_unit = sd_bus_slot_unref(service->slot_watch_unit); + free(service); + +@@ -182,7 +181,6 @@ int service_new(Service **servicep, + + static void service_discard_activation(Service *service) { + service->slot_start_unit = sd_bus_slot_unref(service->slot_start_unit); +- service->slot_query_unit = sd_bus_slot_unref(service->slot_query_unit); + service->slot_watch_unit = sd_bus_slot_unref(service->slot_watch_unit); + } + +@@ -218,71 +216,19 @@ static int service_reset_activation(Service *service) { + return 0; + } + +-static int service_handle_active_state(Service *service, const char *value) { +- int r; +- +- /* +- * The possible values of "ActiveState" are: +- * +- * active, reloading, inactive, failed, activating, deactivating +- * +- * We are never interested in positive results, because the broker +- * already gets those by tracking the name to be acquired. Therefore, +- * we only ever track negative results. This means we only ever react +- * to "failed". +- * We could also react to units entering "inactive", but we cannot know +- * upfront whether the unit is just a oneshot unit and thus is expected +- * to enter "inactive" when it finished. Hence, we simply require +- * anything to explicitly fail if they want to reset the activation. +- */ +- +- if (!strcmp(value, "failed")) { +- r = service_reset_activation(service); +- if (r) +- return error_trace(r); +- } +- +- return 0; +-} +- +-static int service_query_unit_handler(sd_bus_message *message, void *userdata, sd_bus_error *errorp) { +- Service *service = userdata; +- const char *v; +- int r; +- +- service->slot_query_unit = sd_bus_slot_unref(service->slot_query_unit); +- +- r = sd_bus_message_enter_container(message, 'v', "s"); +- if (r < 0) +- return error_origin(r); +- +- { +- r = sd_bus_message_read(message, "s", &v); +- if (r < 0) +- return error_origin(r); +- } +- +- r = sd_bus_message_exit_container(message); +- if (r < 0) +- return error_origin(r); +- +- r = service_handle_active_state(service, v); +- if (r) +- return error_trace(r); +- +- return 0; +-} +- + static int service_watch_unit_handler(sd_bus_message *message, void *userdata, sd_bus_error *errorp) { + Service *service = userdata; + const char *interface = NULL, *property = NULL, *value = NULL; +- bool invalidated = false; + int r; + + /* + * The properties of the bus unit changed. We are only interested in + * the "ActiveState" property. We check whether it is included in the +- * payload. If not, we query it, in case it was invalidated. ++ * payload. If not, we ignore the signal. ++ * ++ * Note that we rely on systemd including it with value in the signal. ++ * We will not query it, if it was merely invalidated. This is a ++ * systemd API guarantee, and we rely on it. + */ + + /* Parse: "s" */ +@@ -339,54 +285,24 @@ static int service_watch_unit_handler(sd_bus_message *message, void *userdata, s + return error_origin(r); + } + +- /* Parse: "as" */ +- { +- r = sd_bus_message_enter_container(message, 'a', "s"); +- if (r < 0) +- return error_origin(r); +- +- while (!sd_bus_message_at_end(message, false)) { +- r = sd_bus_message_read(message, "s", &property); +- if (r < 0) +- return error_origin(r); +- +- if (!strcmp(property, "ActiveState")) +- invalidated = true; +- } +- +- r = sd_bus_message_exit_container(message); +- if (r < 0) +- return error_origin(r); +- } +- +- /* If the value neither changed or was invalidated, discard it. */ +- if (!value && !invalidated) +- return 0; +- +- service->slot_query_unit = sd_bus_slot_unref(service->slot_query_unit); +- +- if (value) { +- /* We got a new property value, so handle it. */ +- r = service_handle_active_state(service, value); ++ /* ++ * The possible values of "ActiveState" are: ++ * ++ * active, reloading, inactive, failed, activating, deactivating ++ * ++ * We are never interested in positive results, because the broker ++ * already gets those by tracking the name to be acquired. Therefore, ++ * we only ever track negative results. This means we only ever react ++ * to "failed". ++ * We could also react to units entering "inactive", but we cannot know ++ * upfront whether the unit is just a oneshot unit and thus is expected ++ * to enter "inactive" when it finished. Hence, we simply require ++ * anything to explicitly fail if they want to reset the activation. ++ */ ++ if (value && !strcmp(value, "failed")) { ++ r = service_reset_activation(service); + if (r) + return error_trace(r); +- } else { +- /* The property was invalidated, so query it. */ +- r = sd_bus_call_method_async( +- service->launcher->bus_regular, +- &service->slot_query_unit, +- "org.freedesktop.systemd1", +- sd_bus_message_get_path(message), +- "org.freedesktop.DBus.Properties", +- "Get", +- service_query_unit_handler, +- service, +- "ss", +- "org.freedesktop.systemd1.Unit", +- "ActiveState" +- ); +- if (r < 0) +- return error_origin(r); + } + + return 0; +@@ -397,7 +313,6 @@ static int service_watch_unit(Service *service, const char *unit) { + int r; + + assert(!service->slot_watch_unit); +- assert(!service->slot_query_unit); + + r = sd_bus_path_encode( + "/org/freedesktop/systemd1/unit", +diff --git a/src/launch/service.h b/src/launch/service.h +index ede29ab..fa5bee2 100644 +--- a/src/launch/service.h ++++ b/src/launch/service.h +@@ -18,7 +18,6 @@ struct Service { + bool running; + bool reload_tag; + sd_bus_slot *slot_watch_unit; +- sd_bus_slot *slot_query_unit; + sd_bus_slot *slot_start_unit; + CRBNode rb; + CRBNode rb_by_name; diff --git a/dbus-broker.spec b/dbus-broker.spec index 1748693..adb947c 100644 --- a/dbus-broker.spec +++ b/dbus-broker.spec @@ -2,11 +2,13 @@ Name: dbus-broker Version: 27 -Release: 1%{?dist} +Release: 2%{?dist} Summary: Linux D-Bus Message Broker License: ASL 2.0 URL: https://github.com/bus1/dbus-broker Source0: https://github.com/bus1/dbus-broker/releases/download/v%{version}/dbus-broker-%{version}.tar.xz +Patch0000: https://github.com/bus1/dbus-broker/commit/bd5a227ec6ca25ad7d352c4e2d48a9937b19e2d0.patch +Patch0001: https://github.com/bus1/dbus-broker/commit/41ccab38eae2300e3d6f4e3eac0962ebe6e60ade.patch Provides: bundled(c-dvar) = 1 Provides: bundled(c-ini) = 1 Provides: bundled(c-list) = 3 @@ -98,6 +100,9 @@ fi %{_userunitdir}/dbus-broker.service %changelog +* Wed Feb 17 2021 David Rheinsberg - 27-2 +- Apply activation-tracking bugfixes from upstream. + * Mon Feb 15 2021 David Rheinsberg - 27-1 - Update to upstream v27.