Record exceptions for top level OTel span

If there is an exception in the code, the cli_main function captures it,
saves the traceback and exits the process.

With the original tracing span, the instrumentation never saw the actual
exception, only SystemExit. This meant the span was not recorded as
failed. (Technically python-opentelemetry 1.31.0 does record it, but
that change was reverted in 1.32.0.)

It is somewhat tricky to structure the code so that the exception is
recorded implicitly. The status update to DOOMED must happen inside the
span (in order to propagate it to the trace). Thus a new function is
exported from the tracing module to record the exception explicitly
before it gets discarded and replaced with the exit.

Signed-off-by: Lubomír Sedlář <lsedlar@redhat.com>
(cherry picked from commit d3630bfa6f8dc5ccf3dcef9cb4a947d82d7f09b8)
This commit is contained in:
Lubomír Sedlář 2025-05-12 14:02:56 +02:00 committed by Stepan Oksanichenko
parent 5cf13491df
commit 37479bbc6a
2 changed files with 15 additions and 0 deletions

View File

@ -31,6 +31,9 @@ class DummyTracing:
def set_context(self, traceparent): def set_context(self, traceparent):
pass pass
def record_exception(self, exc, set_error_status=True):
pass
class OtelTracing: class OtelTracing:
"""This class implements the actual integration with opentelemetry.""" """This class implements the actual integration with opentelemetry."""
@ -114,6 +117,17 @@ class OtelTracing:
) )
context.attach(ctx) context.attach(ctx)
def record_exception(self, exc, set_error_status=True):
"""Records an exception for the current span and optionally marks the
span as failed."""
from opentelemetry import trace
span = trace.get_current_span()
span.record_exception(exc)
if set_error_status:
span.set_status(trace.status.StatusCode.ERROR)
class InstrumentedClientSession: class InstrumentedClientSession:
"""Wrapper around koji.ClientSession that creates spans for each API call. """Wrapper around koji.ClientSession that creates spans for each API call.

View File

@ -657,6 +657,7 @@ def cli_main():
try: try:
main() main()
except (Exception, KeyboardInterrupt) as ex: except (Exception, KeyboardInterrupt) as ex:
tracing.record_exception(ex)
if COMPOSE: if COMPOSE:
COMPOSE.log_error("Compose run failed: %s" % ex) COMPOSE.log_error("Compose run failed: %s" % ex)
COMPOSE.traceback(show_locals=getattr(ex, "show_locals", True)) COMPOSE.traceback(show_locals=getattr(ex, "show_locals", True))