dbus-broker: apply activation-tracking fixes

Pull in 2 activation-tracking fixes from upstream dbus-broker.

Signed-off-by: David Rheinsberg <david.rheinsberg@gmail.com>
This commit is contained in:
David Rheinsberg 2021-02-17 12:40:57 +01:00
parent 4798e5c18b
commit c5e28e5aba
3 changed files with 439 additions and 1 deletions

View File

@ -0,0 +1,226 @@
From 41ccab38eae2300e3d6f4e3eac0962ebe6e60ade Mon Sep 17 00:00:00 2001
From: David Rheinsberg <david.rheinsberg@gmail.com>
Date: Wed, 17 Feb 2021 12:01:49 +0100
Subject: [PATCH] launch/service: track systemd jobs to close a race condition
Start tracking the `JobRemoved` signal of systemd to know the point at
which systemd fully handled our submitted job. There is no other way to
reliably know when we can start handling PropertiesChanged signals.
systemd does not guarantee any other barrier on the bus but the
JobRemoved signal.
This is unfortunate, since it requires us to watch *all* JobRemoved
signals of systemd. However, according to systemd developers, it is the
only supported way to track Job submission. Hence, we have to comply.
Signed-off-by: David Rheinsberg <david.rheinsberg@gmail.com>
---
src/launch/service.c | 119 ++++++++++++++++++++++++++++++++++++++++++-
src/launch/service.h | 2 +
2 files changed, 119 insertions(+), 2 deletions(-)
diff --git a/src/launch/service.c b/src/launch/service.c
index ab1a7b8..f23fa2d 100644
--- a/src/launch/service.c
+++ b/src/launch/service.c
@@ -80,6 +80,7 @@ Service *service_free(Service *service) {
c_rbnode_unlink(&service->rb_by_name);
c_rbnode_unlink(&service->rb);
+ free(service->job);
free(service->user);
for (size_t i = 0; i < service->argc; ++i)
free(service->argv[i]);
@@ -89,6 +90,7 @@ Service *service_free(Service *service) {
free(service->path);
service->slot_start_unit = sd_bus_slot_unref(service->slot_start_unit);
service->slot_watch_unit = sd_bus_slot_unref(service->slot_watch_unit);
+ service->slot_watch_jobs = sd_bus_slot_unref(service->slot_watch_jobs);
free(service);
return NULL;
@@ -180,8 +182,10 @@ int service_new(Service **servicep,
}
static void service_discard_activation(Service *service) {
+ service->job = c_free(service->job);
service->slot_start_unit = sd_bus_slot_unref(service->slot_start_unit);
service->slot_watch_unit = sd_bus_slot_unref(service->slot_watch_unit);
+ service->slot_watch_jobs = sd_bus_slot_unref(service->slot_watch_jobs);
}
static int service_reset_activation(Service *service) {
@@ -216,11 +220,97 @@ static int service_reset_activation(Service *service) {
return 0;
}
+static int service_watch_jobs_handler(sd_bus_message *message, void *userdata, sd_bus_error *errorp) {
+ Service *service = userdata;
+ const char *path = NULL, *unit = NULL, *result = NULL;
+ uint32_t id;
+ int r;
+
+ /*
+ * Whenever we have an activation job queued, we want to know when it
+ * is done. We get a `JobRemoved` signal from systemd, which includes
+ * the reason why the job is done.
+ * We get those signals for all jobs, since we cannot know a job-id to
+ * match for before we create job. This is quite unfortunate, but
+ * little we can do about it now.
+ *
+ * We require this signal merely to know when systemd finished handling
+ * a job. For properly configured services, this should already tell us
+ * whether the startup was successful or failed. But for basic services
+ * that use no notify or dbus systemd-startup handling, we have to
+ * continue tracking their `ActiveState` to see whether they failed.
+ */
+
+ r = sd_bus_message_read(message, "uoss", &id, &path, &unit, &result);
+ if (r < 0)
+ return error_origin(r);
+
+ if (!service->job || strcmp(path, service->job))
+ return 0;
+
+ if (!strcmp(result, "done") || !strcmp(result, "skipped")) {
+ /*
+ * Our job completed successfully. Make sure to stop watching
+ * it so the `ActiveState` handling will take effect.
+ */
+ service->job = c_free(service->job);
+ service->slot_watch_jobs = sd_bus_slot_unref(service->slot_watch_jobs);
+ } else {
+ /*
+ * Our job failed. Forward this information to the broker so it
+ * can fail pending activations.
+ */
+ r = service_reset_activation(service);
+ if (r)
+ return error_trace(r);
+ }
+
+ return 0;
+}
+
+static int service_watch_jobs(Service *service) {
+ int r;
+
+ assert(!service->slot_watch_jobs);
+ assert(!service->job);
+
+ r = sd_bus_match_signal_async(
+ service->launcher->bus_regular,
+ &service->slot_watch_jobs,
+ "org.freedesktop.systemd1",
+ "/org/freedesktop/systemd1",
+ "org.freedesktop.systemd1.Manager",
+ "JobRemoved",
+ service_watch_jobs_handler,
+ NULL,
+ service
+ );
+ if (r < 0)
+ return error_origin(r);
+
+ return 0;
+}
+
static int service_watch_unit_handler(sd_bus_message *message, void *userdata, sd_bus_error *errorp) {
Service *service = userdata;
const char *interface = NULL, *property = NULL, *value = NULL;
int r;
+ /*
+ * If we still watch the job-signals it means systemd has not yet fully
+ * finished our job. In this case, any errors will be caught by the
+ * `JobRemoved` handler and we do not have to track the unit, yet.
+ * Moreover, we must not track the unit, since it might still return
+ * failures before our job was actually handled by systemd.
+ *
+ * Hence, simply ignore any PropertiesChanged signals until we got
+ * confirmation by systemd that our job finished. Bus ordering will
+ * guarantee that we catch unit-failures both before and after the job
+ * completion.
+ */
+ if (service->slot_watch_jobs)
+ return 0;
+
/*
* The properties of the bus unit changed. We are only interested in
* the "ActiveState" property. We check whether it is included in the
@@ -343,14 +433,31 @@ static int service_start_unit_handler(sd_bus_message *message, void *userdata, s
Service *service = userdata;
Launcher *launcher = service->launcher;
const sd_bus_error *error;
+ const char *job;
int r;
service->slot_start_unit = sd_bus_slot_unref(service->slot_start_unit);
error = sd_bus_message_get_error(message);
- if (!error)
- /* unit queued successfully */
+ if (!error) {
+ /*
+ * We successfully queued the job. Now remember the job path,
+ * so we can properly track when it finishes (via the
+ * JobRemoved signal).
+ */
+
+ assert(!service->job);
+
+ r = sd_bus_message_read(message, "o", &job);
+ if (r < 0)
+ return error_origin(r);
+
+ service->job = strdup(job);
+ if (!service->job)
+ return error_origin(-ENOMEM);
+
return 1;
+ }
/*
* We always forward activation failure to the broker, which then
@@ -437,6 +544,10 @@ static int service_start_unit(Service *service) {
_c_cleanup_(sd_bus_message_unrefp) sd_bus_message *method_call = NULL;
int r;
+ r = service_watch_jobs(service);
+ if (r)
+ return error_trace(r);
+
r = service_watch_unit(service, service->unit);
if (r)
return error_trace(r);
@@ -467,6 +578,10 @@ static int service_start_transient_unit(Service *service) {
const char *unique_name;
int r;
+ r = service_watch_jobs(service);
+ if (r)
+ return error_trace(r);
+
r = sd_bus_get_unique_name(launcher->bus_regular, &unique_name);
if (r < 0)
return error_origin(r);
diff --git a/src/launch/service.h b/src/launch/service.h
index fa5bee2..2d3f870 100644
--- a/src/launch/service.h
+++ b/src/launch/service.h
@@ -17,6 +17,7 @@ struct Service {
bool not_found;
bool running;
bool reload_tag;
+ sd_bus_slot *slot_watch_jobs;
sd_bus_slot *slot_watch_unit;
sd_bus_slot *slot_start_unit;
CRBNode rb;
@@ -32,6 +33,7 @@ struct Service {
uint64_t n_missing_unit;
uint64_t n_masked_unit;
uint64_t last_serial;
+ char *job;
char id[];
};

View File

@ -0,0 +1,207 @@
From bd5a227ec6ca25ad7d352c4e2d48a9937b19e2d0 Mon Sep 17 00:00:00 2001
From: David Rheinsberg <david.rheinsberg@gmail.com>
Date: Wed, 17 Feb 2021 11:33:27 +0100
Subject: [PATCH] launch/service: rely on systemd PropertiesChanged behavior
The systemd API guarantees that basic properties are always included
with value in the PropertiesChanged signals. They are never invalidated.
If that was the case, there would be no way to reliably track all state
changes of a unit.
Drop the explicit query behavior and simplify the property handling. We
can safely rely on this API behavior.
Signed-off-by: David Rheinsberg <david.rheinsberg@gmail.com>
---
src/launch/service.c | 127 +++++++------------------------------------
src/launch/service.h | 1 -
2 files changed, 21 insertions(+), 107 deletions(-)
diff --git a/src/launch/service.c b/src/launch/service.c
index 3f9d971..ab1a7b8 100644
--- a/src/launch/service.c
+++ b/src/launch/service.c
@@ -88,7 +88,6 @@ Service *service_free(Service *service) {
free(service->name);
free(service->path);
service->slot_start_unit = sd_bus_slot_unref(service->slot_start_unit);
- service->slot_query_unit = sd_bus_slot_unref(service->slot_query_unit);
service->slot_watch_unit = sd_bus_slot_unref(service->slot_watch_unit);
free(service);
@@ -182,7 +181,6 @@ int service_new(Service **servicep,
static void service_discard_activation(Service *service) {
service->slot_start_unit = sd_bus_slot_unref(service->slot_start_unit);
- service->slot_query_unit = sd_bus_slot_unref(service->slot_query_unit);
service->slot_watch_unit = sd_bus_slot_unref(service->slot_watch_unit);
}
@@ -218,71 +216,19 @@ static int service_reset_activation(Service *service) {
return 0;
}
-static int service_handle_active_state(Service *service, const char *value) {
- int r;
-
- /*
- * The possible values of "ActiveState" are:
- *
- * active, reloading, inactive, failed, activating, deactivating
- *
- * We are never interested in positive results, because the broker
- * already gets those by tracking the name to be acquired. Therefore,
- * we only ever track negative results. This means we only ever react
- * to "failed".
- * We could also react to units entering "inactive", but we cannot know
- * upfront whether the unit is just a oneshot unit and thus is expected
- * to enter "inactive" when it finished. Hence, we simply require
- * anything to explicitly fail if they want to reset the activation.
- */
-
- if (!strcmp(value, "failed")) {
- r = service_reset_activation(service);
- if (r)
- return error_trace(r);
- }
-
- return 0;
-}
-
-static int service_query_unit_handler(sd_bus_message *message, void *userdata, sd_bus_error *errorp) {
- Service *service = userdata;
- const char *v;
- int r;
-
- service->slot_query_unit = sd_bus_slot_unref(service->slot_query_unit);
-
- r = sd_bus_message_enter_container(message, 'v', "s");
- if (r < 0)
- return error_origin(r);
-
- {
- r = sd_bus_message_read(message, "s", &v);
- if (r < 0)
- return error_origin(r);
- }
-
- r = sd_bus_message_exit_container(message);
- if (r < 0)
- return error_origin(r);
-
- r = service_handle_active_state(service, v);
- if (r)
- return error_trace(r);
-
- return 0;
-}
-
static int service_watch_unit_handler(sd_bus_message *message, void *userdata, sd_bus_error *errorp) {
Service *service = userdata;
const char *interface = NULL, *property = NULL, *value = NULL;
- bool invalidated = false;
int r;
/*
* The properties of the bus unit changed. We are only interested in
* the "ActiveState" property. We check whether it is included in the
- * payload. If not, we query it, in case it was invalidated.
+ * payload. If not, we ignore the signal.
+ *
+ * Note that we rely on systemd including it with value in the signal.
+ * We will not query it, if it was merely invalidated. This is a
+ * systemd API guarantee, and we rely on it.
*/
/* Parse: "s" */
@@ -339,54 +285,24 @@ static int service_watch_unit_handler(sd_bus_message *message, void *userdata, s
return error_origin(r);
}
- /* Parse: "as" */
- {
- r = sd_bus_message_enter_container(message, 'a', "s");
- if (r < 0)
- return error_origin(r);
-
- while (!sd_bus_message_at_end(message, false)) {
- r = sd_bus_message_read(message, "s", &property);
- if (r < 0)
- return error_origin(r);
-
- if (!strcmp(property, "ActiveState"))
- invalidated = true;
- }
-
- r = sd_bus_message_exit_container(message);
- if (r < 0)
- return error_origin(r);
- }
-
- /* If the value neither changed or was invalidated, discard it. */
- if (!value && !invalidated)
- return 0;
-
- service->slot_query_unit = sd_bus_slot_unref(service->slot_query_unit);
-
- if (value) {
- /* We got a new property value, so handle it. */
- r = service_handle_active_state(service, value);
+ /*
+ * The possible values of "ActiveState" are:
+ *
+ * active, reloading, inactive, failed, activating, deactivating
+ *
+ * We are never interested in positive results, because the broker
+ * already gets those by tracking the name to be acquired. Therefore,
+ * we only ever track negative results. This means we only ever react
+ * to "failed".
+ * We could also react to units entering "inactive", but we cannot know
+ * upfront whether the unit is just a oneshot unit and thus is expected
+ * to enter "inactive" when it finished. Hence, we simply require
+ * anything to explicitly fail if they want to reset the activation.
+ */
+ if (value && !strcmp(value, "failed")) {
+ r = service_reset_activation(service);
if (r)
return error_trace(r);
- } else {
- /* The property was invalidated, so query it. */
- r = sd_bus_call_method_async(
- service->launcher->bus_regular,
- &service->slot_query_unit,
- "org.freedesktop.systemd1",
- sd_bus_message_get_path(message),
- "org.freedesktop.DBus.Properties",
- "Get",
- service_query_unit_handler,
- service,
- "ss",
- "org.freedesktop.systemd1.Unit",
- "ActiveState"
- );
- if (r < 0)
- return error_origin(r);
}
return 0;
@@ -397,7 +313,6 @@ static int service_watch_unit(Service *service, const char *unit) {
int r;
assert(!service->slot_watch_unit);
- assert(!service->slot_query_unit);
r = sd_bus_path_encode(
"/org/freedesktop/systemd1/unit",
diff --git a/src/launch/service.h b/src/launch/service.h
index ede29ab..fa5bee2 100644
--- a/src/launch/service.h
+++ b/src/launch/service.h
@@ -18,7 +18,6 @@ struct Service {
bool running;
bool reload_tag;
sd_bus_slot *slot_watch_unit;
- sd_bus_slot *slot_query_unit;
sd_bus_slot *slot_start_unit;
CRBNode rb;
CRBNode rb_by_name;

View File

@ -2,11 +2,13 @@
Name: dbus-broker
Version: 27
Release: 1%{?dist}
Release: 2%{?dist}
Summary: Linux D-Bus Message Broker
License: ASL 2.0
URL: https://github.com/bus1/dbus-broker
Source0: https://github.com/bus1/dbus-broker/releases/download/v%{version}/dbus-broker-%{version}.tar.xz
Patch0000: https://github.com/bus1/dbus-broker/commit/bd5a227ec6ca25ad7d352c4e2d48a9937b19e2d0.patch
Patch0001: https://github.com/bus1/dbus-broker/commit/41ccab38eae2300e3d6f4e3eac0962ebe6e60ade.patch
Provides: bundled(c-dvar) = 1
Provides: bundled(c-ini) = 1
Provides: bundled(c-list) = 3
@ -98,6 +100,9 @@ fi
%{_userunitdir}/dbus-broker.service
%changelog
* Wed Feb 17 2021 David Rheinsberg <david.rheinsberg@gmail.com> - 27-2
- Apply activation-tracking bugfixes from upstream.
* Mon Feb 15 2021 David Rheinsberg <david.rheinsberg@gmail.com> - 27-1
- Update to upstream v27.