import CS keylime-7.14.1-5.el10

This commit is contained in:
AlmaLinux RelEng Bot 2026-05-19 15:12:29 -04:00
parent 9a162b54d2
commit 64744e5570
37 changed files with 8566 additions and 7178 deletions

4
.gitignore vendored
View File

@ -1,2 +1,2 @@
keylime-selinux-42.1.2.tar.gz
v7.12.1.tar.gz
keylime-selinux-43.2.1.tar.gz
v7.14.1.tar.gz

View File

@ -0,0 +1,40 @@
From 7cf07986522fda7691d9135ad4f8d31d030e8b59 Mon Sep 17 00:00:00 2001
From: Sergio Correia <scorreia@redhat.com>
Date: Fri, 13 Feb 2026 04:46:20 -0500
Subject: [PATCH 1/2] Fix timestamp conversion to use UTC timezone
Ensure Unix timestamps are converted to UTC datetimes by passing
tz=timezone.utc to datetime.fromtimestamp(). Previously, timestamps
were converted using the local timezone, causing test failures when
epoch (0) was incorrectly converted to 1969 instead of 1970.
Signed-off-by: Sergio Correia <scorreia@redhat.com>
---
keylime/models/base/types/timestamp.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/keylime/models/base/types/timestamp.py b/keylime/models/base/types/timestamp.py
index 8f6782f..22c1fcb 100644
--- a/keylime/models/base/types/timestamp.py
+++ b/keylime/models/base/types/timestamp.py
@@ -36,7 +36,7 @@ class Timestamp(ModelType):
if not ts:
try:
- ts = datetime.fromtimestamp(float(value))
+ ts = datetime.fromtimestamp(float(value), tz=timezone.utc)
except ValueError:
pass
@@ -49,7 +49,7 @@ class Timestamp(ModelType):
return self._load_datetime(ts)
def _load_float(self, value: float) -> datetime:
- ts = datetime.fromtimestamp(value)
+ ts = datetime.fromtimestamp(value, tz=timezone.utc)
return self._load_datetime(ts)
def _load_int(self, value: int) -> datetime:
--
2.53.0

View File

@ -0,0 +1,36 @@
From be3243b5f4f3423b8e8e29245a2401e52dd52baf Mon Sep 17 00:00:00 2001
From: Sergio Correia <scorreia@redhat.com>
Date: Fri, 13 Feb 2026 07:22:46 -0500
Subject: [PATCH 2/2] Fix efivar availability check in test_create_mb_policy
Import tpm_bootlog_enrich instead of the elparsing package so the
CDLL("libefivar.so.1") load is actually triggered, allowing tests
to skip gracefully when the library is absent.
Signed-off-by: Sergio Correia <scorreia@redhat.com>
---
test/test_create_mb_policy.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/test/test_create_mb_policy.py b/test/test_create_mb_policy.py
index 04ed779..fc79c3b 100644
--- a/test/test_create_mb_policy.py
+++ b/test/test_create_mb_policy.py
@@ -10,9 +10,12 @@ import unittest
from keylime.policy import create_mb_policy
-# Check if efivarlibs is available for measured boot parsing
+# Check if efivarlibs is available for measured boot parsing.
+# We need to import tpm_bootlog_enrich (not just the elparsing package)
+# because the CDLL("libefivar.so.1") load happens at module level in
+# tpm_bootlog_enrich, and importing just the package won't trigger it.
try:
- from keylime.mba import elparsing # pylint: disable=unused-import
+ from keylime.mba.elparsing import tpm_bootlog_enrich # pylint: disable=unused-import
EFIVAR_AVAILABLE = True
except Exception:
--
2.53.0

View File

@ -1,29 +0,0 @@
From 52944972182639a625599e29ebe65b91714a3a41 Mon Sep 17 00:00:00 2001
From: Sergio Correia <scorreia@redhat.com>
Date: Fri, 8 Aug 2025 16:40:01 +0100
Subject: [PATCH 2/3] mb: support EV_EFI_HANDOFF_TABLES events on PCR1
Allow EV_EFI_HANDOFF_TABLES events on PCR1 alongside the existing
EV_EFI_HANDOFF_TABLES2 support to handle different firmware
implementations, in the example policy.
Signed-off-by: Sergio Correia <scorreia@redhat.com>
---
keylime/mba/elchecking/example.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/keylime/mba/elchecking/example.py b/keylime/mba/elchecking/example.py
index 2c6f699..a3d918a 100644
--- a/keylime/mba/elchecking/example.py
+++ b/keylime/mba/elchecking/example.py
@@ -185,6 +185,7 @@ class Example(policies.Policy):
# We only expect one EV_NO_ACTION event at the start.
dispatcher.set((0, "EV_NO_ACTION"), tests.OnceTest(tests.AcceptAll()))
dispatcher.set((1, "EV_CPU_MICROCODE"), tests.OnceTest(tests.AcceptAll()))
+ dispatcher.set((1, "EV_EFI_HANDOFF_TABLES"), tests.OnceTest(tests.AcceptAll()))
dispatcher.set((1, "EV_EFI_HANDOFF_TABLES2"), tests.OnceTest(tests.AcceptAll()))
dispatcher.set((0, "EV_S_CRTM_VERSION"), events_final.get("s_crtms"))
dispatcher.set((0, "EV_EFI_PLATFORM_FIRMWARE_BLOB"), events_final.get("platform_firmware_blobs"))
--
2.47.3

View File

@ -0,0 +1,343 @@
From 5b622eae9244b5a820263609cae6bd4681d3fbb2 Mon Sep 17 00:00:00 2001
From: Sergio Arroutbi <sarroutb@redhat.com>
Date: Tue, 10 Mar 2026 11:26:49 +0100
Subject: [PATCH 3/6] Close DB sessions to prevent connection exhaustion
Resolves: #1861
The get_session() function in session_controller.py and auth_session.py
returned SQLAlchemy sessions that were never closed, leaking connections
back to the pool. Under load (e.g., multi-host push attestation with
multiple agents), this exhausted the QueuePool (size 5, overflow 10),
causing a 30-second timeout and HTTP 500 errors.
Replace get_session() with a get_session_context() context manager that
guarantees session.close() via try/finally. Define it once in
auth_session.py and import it in session_controller.py.
Resolves: connection pool exhaustion (QueuePool limit of size 5
overflow 10 reached) during push attestation multi-host tests.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Sergio Arroutbi <sarroutb@redhat.com>
---
keylime/models/verifier/auth_session.py | 25 +++++++++-----
keylime/web/verifier/session_controller.py | 27 ++++-----------
test/test_auth_session.py | 38 ++++++++++++++++++++--
test/test_session_controller.py | 32 ++++++++++--------
4 files changed, 77 insertions(+), 45 deletions(-)
diff --git a/keylime/models/verifier/auth_session.py b/keylime/models/verifier/auth_session.py
index df01668..545995f 100644
--- a/keylime/models/verifier/auth_session.py
+++ b/keylime/models/verifier/auth_session.py
@@ -1,8 +1,9 @@
import base64
import hmac
import uuid
+from contextlib import contextmanager
from datetime import timedelta
-from typing import Any, Dict, Optional, Sequence
+from typing import Any, Dict, Iterator, Optional, Sequence
from sqlalchemy.orm import Session
@@ -32,11 +33,17 @@ logger = keylime_logging.init_logging("verifier")
_engine = None
-def get_session() -> Session:
+@contextmanager
+def get_session_context() -> Iterator[Session]:
global _engine
if _engine is None:
_engine = make_engine("cloud_verifier")
- return SessionManager().make_session(_engine)
+ session_manager = SessionManager()
+ session = session_manager.make_session(_engine)
+ try:
+ yield session
+ finally:
+ session.close()
class AuthSession(PersistableModel):
@@ -270,12 +277,12 @@ class AuthSession(PersistableModel):
return False
# Use old engine to query VerfierMain (legacy model)
- session = get_session()
- agent = (
- session.query(VerfierMain)
- .filter(VerfierMain.agent_id == auth_session.agent_id) # type: ignore[attr-defined]
- .one_or_none()
- )
+ with get_session_context() as session:
+ agent = (
+ session.query(VerfierMain)
+ .filter(VerfierMain.agent_id == auth_session.agent_id) # type: ignore[attr-defined]
+ .one_or_none()
+ )
return agent
diff --git a/keylime/web/verifier/session_controller.py b/keylime/web/verifier/session_controller.py
index 9fc3bb5..49cd758 100644
--- a/keylime/web/verifier/session_controller.py
+++ b/keylime/web/verifier/session_controller.py
@@ -1,27 +1,14 @@
import base64
-from sqlalchemy.orm import Session
-
from keylime import config, keylime_logging
-from keylime.db.keylime_db import SessionManager, make_engine
from keylime.db.verifier_db import VerfierMain
from keylime.models.base import Timestamp
from keylime.models.verifier import AuthSession
+from keylime.models.verifier.auth_session import get_session_context
from keylime.web.base import Controller
logger = keylime_logging.init_logging("verifier")
-# GLOBAL_POLICY_CACHE: Dict[str, Dict[str, str]] = {}
-
-_engine = None
-
-
-def get_session() -> Session:
- global _engine
- if _engine is None:
- _engine = make_engine("cloud_verifier")
- return SessionManager().make_session(_engine)
-
class SessionController(Controller):
# POST /v3[.:minor]/sessions
@@ -198,8 +185,8 @@ class SessionController(Controller):
return
# Check if agent exists - this is where we validate enrollment
- session = get_session()
- agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
+ with get_session_context() as session:
+ agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
if not agent:
# Agent not enrolled - return 200 with evaluation:fail
@@ -393,8 +380,8 @@ class SessionController(Controller):
# POST /v3[.:minor]/agents/:agent_id/session
def create(self, agent_id, **params):
- session = get_session()
- agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
+ with get_session_context() as session:
+ agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
if not agent:
self.respond(404, "here")
@@ -416,8 +403,8 @@ class SessionController(Controller):
self.respond(200, "Success", auth_session.render())
def update(self, agent_id, token, **params):
- session = get_session()
- agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
+ with get_session_context() as session:
+ agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
# Look up session by token hash (tokens are never stored in plaintext)
auth_session = AuthSession.get_by_token(token)
diff --git a/test/test_auth_session.py b/test/test_auth_session.py
index 62b4244..8e9ec98 100644
--- a/test/test_auth_session.py
+++ b/test/test_auth_session.py
@@ -7,10 +7,41 @@ from unittest.mock import MagicMock, PropertyMock, patch
from keylime.crypto import generate_session_token, generate_token_salt, hash_token_for_storage
from keylime.models.base.types import Timestamp
-from keylime.models.verifier.auth_session import AuthSession
+from keylime.models.verifier.auth_session import AuthSession, get_session_context
from keylime.shared_data import cleanup_global_shared_memory, get_shared_memory
+class TestGetSessionContext(unittest.TestCase):
+ """Test cases for get_session_context context manager."""
+
+ @patch("keylime.models.verifier.auth_session.make_engine")
+ @patch("keylime.models.verifier.auth_session.SessionManager")
+ def test_session_closed_on_normal_exit(self, mock_session_manager_cls, _mock_make_engine):
+ """Test that session.close() is called when context manager exits normally."""
+ mock_session = MagicMock()
+ mock_session_manager_cls.return_value.make_session.return_value = mock_session
+
+ with patch("keylime.models.verifier.auth_session._engine", None):
+ with get_session_context() as session:
+ self.assertIs(session, mock_session)
+
+ mock_session.close.assert_called_once()
+
+ @patch("keylime.models.verifier.auth_session.make_engine")
+ @patch("keylime.models.verifier.auth_session.SessionManager")
+ def test_session_closed_on_exception(self, mock_session_manager_cls, _mock_make_engine):
+ """Test that session.close() is called even when an exception occurs."""
+ mock_session = MagicMock()
+ mock_session_manager_cls.return_value.make_session.return_value = mock_session
+
+ with patch("keylime.models.verifier.auth_session._engine", None):
+ with self.assertRaises(RuntimeError):
+ with get_session_context():
+ raise RuntimeError("simulated error")
+
+ mock_session.close.assert_called_once()
+
+
class TestAuthSessionHelpers(unittest.TestCase):
"""Test cases for AuthSession helper methods."""
@@ -398,7 +429,7 @@ class TestAuthSessionCore(unittest.TestCase):
self.assertIn("errors", result)
self.assertIn("authentication_supported", result["errors"])
- @patch("keylime.models.verifier.auth_session.get_session")
+ @patch("keylime.models.verifier.auth_session.get_session_context")
@patch.object(AuthSession, "get_by_token")
def test_authenticate_agent_success(self, mock_get_by_token, mock_get_session):
"""Test successful agent authentication with valid token."""
@@ -409,7 +440,8 @@ class TestAuthSessionCore(unittest.TestCase):
# Mock session query
mock_db_session = MagicMock()
mock_db_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent
- mock_get_session.return_value = mock_db_session
+ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_db_session)
+ mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
# Mock AuthSession.get_by_token to return an active session
mock_auth_session = MagicMock()
diff --git a/test/test_session_controller.py b/test/test_session_controller.py
index d807119..eec7fef 100644
--- a/test/test_session_controller.py
+++ b/test/test_session_controller.py
@@ -272,7 +272,7 @@ class TestSessionControllerUpdateSession(unittest.TestCase):
# Verify session was deleted from cache
self.assertNotIn(self.test_session_id, self.sessions_cache)
- @patch("keylime.web.verifier.session_controller.get_session")
+ @patch("keylime.web.verifier.session_controller.get_session_context")
def test_update_session_agent_not_enrolled(self, mock_get_session):
"""Test update_session with unenrolled agent."""
# Create session in cache
@@ -290,7 +290,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase):
# Mock database query to return no agent
mock_session = MagicMock()
mock_session.query.return_value.filter.return_value.one_or_none.return_value = None
- mock_get_session.return_value = mock_session
+ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
+ mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
# Call update_session
params = {
@@ -318,7 +319,7 @@ class TestSessionControllerUpdateSession(unittest.TestCase):
body = call_args[1]["body"]
self.assertEqual(body["data"]["attributes"]["evaluation"], "fail")
- @patch("keylime.web.verifier.session_controller.get_session")
+ @patch("keylime.web.verifier.session_controller.get_session_context")
@patch("keylime.models.verifier.auth_session.AuthSession.create_from_memory")
def test_update_session_authentication_failed(self, mock_create_from_memory, mock_get_session):
"""Test update_session with failed authentication."""
@@ -339,7 +340,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase):
mock_agent.agent_id = self.test_agent_id
mock_session = MagicMock()
mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent
- mock_get_session.return_value = mock_session
+ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
+ mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
# Mock AuthSession.create_from_memory to return errors
mock_auth_session = MagicMock()
@@ -377,7 +379,7 @@ class TestSessionControllerUpdateSession(unittest.TestCase):
call_args = self.controller.send_response.call_args # type: ignore[attr-defined]
self.assertEqual(call_args[1]["code"], 401)
- @patch("keylime.web.verifier.session_controller.get_session")
+ @patch("keylime.web.verifier.session_controller.get_session_context")
@patch("keylime.models.verifier.auth_session.AuthSession.create_from_memory")
@patch("keylime.models.verifier.auth_session.AuthSession.delete_active_session_for_agent")
@patch("keylime.web.verifier.session_controller.config")
@@ -403,7 +405,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase):
mock_agent.agent_id = self.test_agent_id
mock_session = MagicMock()
mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent
- mock_get_session.return_value = mock_session
+ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
+ mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
# Mock config
mock_config.getboolean.return_value = False # Don't keep in memory
@@ -522,7 +525,7 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase):
self.assertEqual(call_args[0][0], 404)
@patch("keylime.models.verifier.auth_session.AuthSession.delete_stale")
- @patch("keylime.web.verifier.session_controller.get_session")
+ @patch("keylime.web.verifier.session_controller.get_session_context")
@patch("keylime.models.verifier.auth_session.AuthSession.create")
def test_create_success(self, mock_create, mock_get_session, _mock_delete_stale):
"""Test successful create endpoint."""
@@ -531,7 +534,8 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase):
mock_agent.agent_id = self.test_agent_id
mock_session = MagicMock()
mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent
- mock_get_session.return_value = mock_session
+ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
+ mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
# Mock AuthSession.create
mock_auth_session = MagicMock()
@@ -549,13 +553,14 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase):
call_args = self.controller.respond.call_args # type: ignore[attr-defined]
self.assertEqual(call_args[0][0], 200)
- @patch("keylime.web.verifier.session_controller.get_session")
+ @patch("keylime.web.verifier.session_controller.get_session_context")
def test_create_agent_not_found(self, mock_get_session):
"""Test create endpoint with non-existent agent."""
# Mock database query to return None
mock_session = MagicMock()
mock_session.query.return_value.filter.return_value.one_or_none.return_value = None
- mock_get_session.return_value = mock_session
+ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
+ mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
# Call create
params = {"data": {}}
@@ -566,7 +571,7 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase):
call_args = self.controller.respond.call_args # type: ignore[attr-defined]
self.assertEqual(call_args[0][0], 404)
- @patch("keylime.web.verifier.session_controller.get_session")
+ @patch("keylime.web.verifier.session_controller.get_session_context")
@patch("keylime.models.verifier.auth_session.AuthSession.get_by_token")
def test_update_success(self, mock_get, mock_get_session):
"""Test successful update endpoint."""
@@ -575,7 +580,8 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase):
mock_agent.agent_id = self.test_agent_id
mock_session = MagicMock()
mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent
- mock_get_session.return_value = mock_session
+ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
+ mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
# Mock AuthSession.get_by_token
mock_auth_session = MagicMock()
@@ -595,7 +601,7 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase):
call_args = self.controller.respond.call_args # type: ignore[attr-defined]
self.assertEqual(call_args[0][0], 200)
- @patch("keylime.web.verifier.session_controller.get_session")
+ @patch("keylime.web.verifier.session_controller.get_session_context")
@patch("keylime.models.verifier.auth_session.AuthSession.get_by_token")
def test_update_not_found(self, mock_get, _mock_get_session):
"""Test update endpoint with non-existent session."""
--
2.53.0

View File

@ -1,356 +0,0 @@
From 34bd283113f13c251114507315c647975beede2f Mon Sep 17 00:00:00 2001
From: Sergio Correia <scorreia@redhat.com>
Date: Fri, 8 Aug 2025 16:41:54 +0100
Subject: [PATCH 3/3] mb: support vendor_db as logged by newer shim versions
- Updated example policy to properly handle different event structures
for vendor_db validation:
- KeySubsetMulti for EV_EFI_VARIABLE_DRIVER_CONFIG (has SignatureType field)
- SignatureSetMember for EV_EFI_VARIABLE_AUTHORITY (direct signature format)
- Added method to extract vendor_db from EV_EFI_VARIABLE_AUTHORITY events
in reference state generation (keylime-policy create measured-boot and
the legacy create_mb_refstate script)
- Made vendor_db optional for backward compatibility
This fixes attestation failures when vendor_db variables are present but
missing from reference states or validated with incorrect test types.
See: https://github.com/rhboot/shim/pull/728
Signed-off-by: Sergio Correia <scorreia@redhat.com>
---
keylime/mba/elchecking/example.py | 45 +++++++++
keylime/policy/create_mb_policy.py | 30 ++++++
scripts/create_mb_refstate | 30 ++++++
test/test_create_mb_policy.py | 142 +++++++++++++++++++++++++++++
4 files changed, 247 insertions(+)
diff --git a/keylime/mba/elchecking/example.py b/keylime/mba/elchecking/example.py
index a3d918a..5a933ac 100644
--- a/keylime/mba/elchecking/example.py
+++ b/keylime/mba/elchecking/example.py
@@ -21,6 +21,7 @@ from . import policies, tests
# kek - list of allowed KEK keys
# db - list of allowed db keys
# dbx - list of required dbx keys
+# vendor_db - list of allowed vendor_db keys (optional, for newer shim versions)
# mokdig - list of allowed digests of MoKList (PCR 14 EV_IPL)
# mokxdig - list of allowed digests of MoKListX (PCR 14 EV_IPL)
# kernels - list of allowed {
@@ -121,6 +122,10 @@ class Example(policies.Policy):
if req not in refstate:
raise Exception(f"refstate lacks {req}")
+ # vendor_db is optional for backward compatibility
+ if "vendor_db" not in refstate:
+ refstate["vendor_db"] = []
+
dispatcher = tests.Dispatcher(("PCRIndex", "EventType"))
vd_driver_config = tests.VariableDispatch()
vd_authority = tests.VariableDispatch()
@@ -268,6 +273,34 @@ class Example(policies.Policy):
"db",
db_test,
)
+ # Support vendor_db as logged by newer shim versions
+ # See: https://github.com/rhboot/shim/pull/728
+ if not has_secureboot and not refstate["vendor_db"]:
+ vendor_db_test = tests.OnceTest(tests.AcceptAll())
+ else:
+ vendor_db_test = tests.OnceTest(
+ tests.Or(
+ tests.KeySubsetMulti(
+ ["a159c0a5-e494-a74a-87b5-ab155c2bf072", "2616c4c1-4c50-9240-aca9-41f936934328"],
+ sigs_strip0x(refstate["vendor_db"]),
+ ),
+ tests.KeySubsetMulti(
+ ["a5c059a1-94e4-4aa7-87b5-ab155c2bf072", "c1c41626-504c-4092-aca9-41f936934328"],
+ sigs_strip0x(refstate["vendor_db"]),
+ ),
+ )
+ )
+
+ vd_driver_config.set(
+ "cbb219d7-3a3d-9645-a3bc-dad00e67656f",
+ "vendor_db",
+ vendor_db_test,
+ )
+ vd_driver_config.set(
+ "d719b2cb-3d3a-4596-a3bc-dad00e67656f",
+ "vendor_db",
+ vendor_db_test,
+ )
if not has_secureboot and not refstate["dbx"]:
dbx_test = tests.OnceTest(tests.AcceptAll())
@@ -295,6 +328,18 @@ class Example(policies.Policy):
vd_db_test = tests.OnceTest(tests.AcceptAll())
vd_authority.set("cbb219d7-3a3d-9645-a3bc-dad00e67656f", "db", vd_db_test)
vd_authority.set("d719b2cb-3d3a-4596-a3bc-dad00e67656f", "db", vd_db_test)
+ # Support vendor_db as logged by newer shim versions in EV_EFI_VARIABLE_AUTHORITY events
+ # See: https://github.com/rhboot/shim/pull/728
+ # EV_EFI_VARIABLE_AUTHORITY events have different structure than EV_EFI_VARIABLE_DRIVER_CONFIG
+ # They contain direct signature data without SignatureType field
+ if not has_secureboot and not refstate["vendor_db"]:
+ vendor_db_authority_test = tests.OnceTest(tests.AcceptAll())
+ else:
+ vendor_db_authority_test = tests.OnceTest(
+ tests.IterateTest(tests.SignatureSetMember(sigs_strip0x(refstate["vendor_db"])))
+ )
+ vd_authority.set("cbb219d7-3a3d-9645-a3bc-dad00e67656f", "vendor_db", vendor_db_authority_test)
+ vd_authority.set("d719b2cb-3d3a-4596-a3bc-dad00e67656f", "vendor_db", vendor_db_authority_test)
# Accept all SbatLevels of the Shim, because we already checked the hash of the Shim itself.
vd_sbat_level_test = tests.OnceTest(tests.AcceptAll())
vd_authority.set("50ab5d60-46e0-0043-abb6-3dd810dd8b23", "SbatLevel", vd_sbat_level_test)
diff --git a/keylime/policy/create_mb_policy.py b/keylime/policy/create_mb_policy.py
index 859e652..b2b48f7 100644
--- a/keylime/policy/create_mb_policy.py
+++ b/keylime/policy/create_mb_policy.py
@@ -93,6 +93,35 @@ def get_keys(events: List[Dict[str, Any]]) -> Dict[str, List[Any]]:
return out
+def get_vendor_db(events: List[Dict[str, Any]]) -> Dict[str, List[Any]]:
+ """Get vendor_db signatures from EV_EFI_VARIABLE_AUTHORITY events."""
+ out: Dict[str, List[Any]] = {"vendor_db": []}
+
+ for event in events:
+ if "EventType" not in event:
+ continue
+ if event["EventType"] != "EV_EFI_VARIABLE_AUTHORITY":
+ continue
+ if "Event" not in event or "UnicodeName" not in event["Event"]:
+ continue
+
+ event_name = event["Event"]["UnicodeName"].lower()
+ if event_name == "vendor_db":
+ data = None
+ if "VariableData" in event["Event"]:
+ data = event["Event"]["VariableData"]
+
+ if data is not None:
+ # VariableData for EV_EFI_VARIABLE_AUTHORITY is a list of signatures
+ for entry in data:
+ if "SignatureOwner" in entry and "SignatureData" in entry:
+ out["vendor_db"].append(
+ {"SignatureOwner": entry["SignatureOwner"], "SignatureData": f"0x{entry['SignatureData']}"}
+ )
+
+ return out
+
+
def get_kernel(events: List[Dict[str, Any]], secure_boot: bool) -> Dict[str, List[Dict[str, Any]]]:
"""Extract digest for Shim, Grub, Linux Kernel and initrd."""
out = []
@@ -259,6 +288,7 @@ def create_mb_refstate(args: argparse.Namespace) -> Optional[Dict[str, object]]:
}
],
**get_keys(events),
+ **get_vendor_db(events),
**get_mok(events),
**get_kernel(events, has_secureboot),
}
diff --git a/scripts/create_mb_refstate b/scripts/create_mb_refstate
index 23cafb9..c98e61d 100755
--- a/scripts/create_mb_refstate
+++ b/scripts/create_mb_refstate
@@ -78,6 +78,35 @@ def get_keys(events):
return out
+def get_vendor_db(events):
+ """Get vendor_db signatures from EV_EFI_VARIABLE_AUTHORITY events."""
+ out = {"vendor_db": []}
+
+ for event in events:
+ if "EventType" not in event:
+ continue
+ if event["EventType"] != "EV_EFI_VARIABLE_AUTHORITY":
+ continue
+ if "Event" not in event or "UnicodeName" not in event["Event"]:
+ continue
+
+ event_name = event["Event"]["UnicodeName"].lower()
+ if event_name == "vendor_db":
+ data = None
+ if "VariableData" in event["Event"]:
+ data = event["Event"]["VariableData"]
+
+ if data is not None:
+ # VariableData for EV_EFI_VARIABLE_AUTHORITY is a list of signatures
+ for entry in data:
+ if "SignatureOwner" in entry and "SignatureData" in entry:
+ out["vendor_db"].append(
+ {"SignatureOwner": entry["SignatureOwner"], "SignatureData": f"0x{entry['SignatureData']}"}
+ )
+
+ return out
+
+
def get_kernel(events, secure_boot):
"""
Extract digest for Shim, Grub, Linux Kernel and initrd.
@@ -197,6 +226,7 @@ def main():
}
],
**get_keys(events),
+ **get_vendor_db(events),
**get_mok(events),
**get_kernel(events, has_secureboot),
}
diff --git a/test/test_create_mb_policy.py b/test/test_create_mb_policy.py
index eaed0e3..aa7a4b9 100644
--- a/test/test_create_mb_policy.py
+++ b/test/test_create_mb_policy.py
@@ -362,6 +362,148 @@ class CreateMeasuredBootPolicy_Test(unittest.TestCase):
for c in test_cases:
self.assertDictEqual(create_mb_policy.get_mok(c["events"]), c["expected"])
+ def test_get_vendor_db(self):
+ test_cases = [
+ {"events": [], "expected": {"vendor_db": []}},
+ # No EV_EFI_VARIABLE_AUTHORITY events.
+ {
+ "events": [
+ {
+ "EventType": "EV_EFI_VARIABLE_DRIVER_CONFIG",
+ "Event": {"UnicodeName": "vendor_db", "VariableData": []},
+ }
+ ],
+ "expected": {"vendor_db": []},
+ },
+ # Good vendor_db event with EV_EFI_VARIABLE_AUTHORITY.
+ {
+ "events": [
+ {
+ "EventType": "EV_EFI_VARIABLE_AUTHORITY",
+ "Event": {
+ "UnicodeName": "vendor_db",
+ "VariableData": [
+ {
+ "SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b",
+ "SignatureData": "sig-data-1",
+ }
+ ],
+ },
+ }
+ ],
+ "expected": {
+ "vendor_db": [
+ {"SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b", "SignatureData": "0xsig-data-1"}
+ ]
+ },
+ },
+ # Multiple vendor_db signatures.
+ {
+ "events": [
+ {
+ "EventType": "EV_EFI_VARIABLE_AUTHORITY",
+ "Event": {
+ "UnicodeName": "vendor_db",
+ "VariableData": [
+ {
+ "SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b",
+ "SignatureData": "sig-data-1",
+ },
+ {
+ "SignatureOwner": "77fa9abd-0359-4d32-bd60-28f4e78f784b",
+ "SignatureData": "sig-data-2",
+ },
+ ],
+ },
+ }
+ ],
+ "expected": {
+ "vendor_db": [
+ {"SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b", "SignatureData": "0xsig-data-1"},
+ {"SignatureOwner": "77fa9abd-0359-4d32-bd60-28f4e78f784b", "SignatureData": "0xsig-data-2"},
+ ]
+ },
+ },
+ # Missing EventType.
+ {
+ "events": [
+ {
+ "Event": {
+ "UnicodeName": "vendor_db",
+ "VariableData": [
+ {
+ "SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b",
+ "SignatureData": "sig-data-1",
+ }
+ ],
+ }
+ }
+ ],
+ "expected": {"vendor_db": []},
+ },
+ # Wrong EventType.
+ {
+ "events": [
+ {
+ "EventType": "EV_EFI_VARIABLE_DRIVER_CONFIG",
+ "Event": {
+ "UnicodeName": "vendor_db",
+ "VariableData": [
+ {
+ "SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b",
+ "SignatureData": "sig-data-1",
+ }
+ ],
+ },
+ }
+ ],
+ "expected": {"vendor_db": []},
+ },
+ # Missing Event.
+ {
+ "events": [{"EventType": "EV_EFI_VARIABLE_AUTHORITY"}],
+ "expected": {"vendor_db": []},
+ },
+ # Missing UnicodeName.
+ {
+ "events": [
+ {
+ "EventType": "EV_EFI_VARIABLE_AUTHORITY",
+ "Event": {
+ "VariableData": [
+ {
+ "SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b",
+ "SignatureData": "sig-data-1",
+ }
+ ]
+ },
+ }
+ ],
+ "expected": {"vendor_db": []},
+ },
+ # Wrong UnicodeName.
+ {
+ "events": [
+ {
+ "EventType": "EV_EFI_VARIABLE_AUTHORITY",
+ "Event": {
+ "UnicodeName": "db",
+ "VariableData": [
+ {
+ "SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b",
+ "SignatureData": "sig-data-1",
+ }
+ ],
+ },
+ }
+ ],
+ "expected": {"vendor_db": []},
+ },
+ ]
+
+ for c in test_cases:
+ self.assertDictEqual(create_mb_policy.get_vendor_db(c["events"]), c["expected"])
+
def test_get_kernel(self):
test_cases = [
{"events": [], "secureboot": False, "expected": {}},
--
2.47.3

View File

@ -0,0 +1,198 @@
From bc28d5d228d005702f72e98646c8cad73196ccfb Mon Sep 17 00:00:00 2001
From: Sergio Arroutbi <sarroutb@redhat.com>
Date: Tue, 10 Mar 2026 13:22:04 +0100
Subject: [PATCH 4/6] Include thread-safe session management
Replace open-ended SQLAlchemy sessions with a context manager that
guarantees connection release, preventing QueuePool exhaustion under
multi-host push attestation load.
Key changes:
- Add double-checked locking for lazy engine initialization to prevent
race conditions in multi-threaded verifier
- Delegate session lifecycle to SessionManager.session_context() which
provides proper rollback on exception and scoped_session.remove()
cleanup, eliminating thread-local connection leaks
- Use session.expunge(agent) before exiting context manager scope so
VerfierMain instances safely cross session boundaries without
DetachedInstanceError
- Scope with-blocks narrowly: connection is returned to pool before
any subsequent DB calls (e.g. AuthSession.get_by_token) to prevent
connection hoarding across separate pool boundaries
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Sergio Arroutbi <sarroutb@redhat.com>
---
keylime/models/verifier/auth_session.py | 15 +++---
keylime/web/verifier/session_controller.py | 6 +++
test/test_auth_session.py | 60 ++++++++++++++++------
3 files changed, 59 insertions(+), 22 deletions(-)
diff --git a/keylime/models/verifier/auth_session.py b/keylime/models/verifier/auth_session.py
index 545995f..918dfb4 100644
--- a/keylime/models/verifier/auth_session.py
+++ b/keylime/models/verifier/auth_session.py
@@ -1,5 +1,6 @@
import base64
import hmac
+import threading
import uuid
from contextlib import contextmanager
from datetime import timedelta
@@ -31,19 +32,19 @@ from keylime.tpm.tpm_main import Tpm
logger = keylime_logging.init_logging("verifier")
_engine = None
+_engine_lock = threading.Lock()
+_session_manager = SessionManager()
@contextmanager
def get_session_context() -> Iterator[Session]:
global _engine
if _engine is None:
- _engine = make_engine("cloud_verifier")
- session_manager = SessionManager()
- session = session_manager.make_session(_engine)
- try:
+ with _engine_lock:
+ if _engine is None:
+ _engine = make_engine("cloud_verifier")
+ with _session_manager.session_context(_engine) as session:
yield session
- finally:
- session.close()
class AuthSession(PersistableModel):
@@ -283,6 +284,8 @@ class AuthSession(PersistableModel):
.filter(VerfierMain.agent_id == auth_session.agent_id) # type: ignore[attr-defined]
.one_or_none()
)
+ if agent:
+ session.expunge(agent) # type: ignore[no-untyped-call]
return agent
diff --git a/keylime/web/verifier/session_controller.py b/keylime/web/verifier/session_controller.py
index 49cd758..3faa310 100644
--- a/keylime/web/verifier/session_controller.py
+++ b/keylime/web/verifier/session_controller.py
@@ -187,6 +187,8 @@ class SessionController(Controller):
# Check if agent exists - this is where we validate enrollment
with get_session_context() as session:
agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
+ if agent:
+ session.expunge(agent) # type: ignore[no-untyped-call]
if not agent:
# Agent not enrolled - return 200 with evaluation:fail
@@ -382,6 +384,8 @@ class SessionController(Controller):
def create(self, agent_id, **params):
with get_session_context() as session:
agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
+ if agent:
+ session.expunge(agent) # type: ignore[no-untyped-call]
if not agent:
self.respond(404, "here")
@@ -405,6 +409,8 @@ class SessionController(Controller):
def update(self, agent_id, token, **params):
with get_session_context() as session:
agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
+ if agent:
+ session.expunge(agent) # type: ignore[no-untyped-call]
# Look up session by token hash (tokens are never stored in plaintext)
auth_session = AuthSession.get_by_token(token)
diff --git a/test/test_auth_session.py b/test/test_auth_session.py
index 8e9ec98..2c78547 100644
--- a/test/test_auth_session.py
+++ b/test/test_auth_session.py
@@ -2,6 +2,7 @@
import base64
import unittest
+from contextlib import contextmanager
from datetime import timedelta
from unittest.mock import MagicMock, PropertyMock, patch
@@ -14,32 +15,59 @@ from keylime.shared_data import cleanup_global_shared_memory, get_shared_memory
class TestGetSessionContext(unittest.TestCase):
"""Test cases for get_session_context context manager."""
+ def _make_mock_session_manager(self, mock_session):
+ """Create a mock SessionManager whose session_context() mirrors real lifecycle."""
+ mock_scoped = MagicMock()
+ mock_session_manager = MagicMock()
+ mock_session_manager.make_session.return_value = mock_session
+ mock_session_manager._scoped_session = mock_scoped # pylint: disable=protected-access
+
+ @contextmanager
+ def fake_session_context(engine): # pylint: disable=unused-argument
+ session = mock_session_manager.make_session(engine)
+ try:
+ yield session
+ session.commit()
+ except Exception:
+ session.rollback()
+ raise
+ finally:
+ scoped = mock_session_manager._scoped_session # pylint: disable=protected-access
+ if scoped is not None:
+ scoped.remove()
+
+ mock_session_manager.session_context = fake_session_context
+ return mock_session_manager, mock_scoped
+
@patch("keylime.models.verifier.auth_session.make_engine")
- @patch("keylime.models.verifier.auth_session.SessionManager")
- def test_session_closed_on_normal_exit(self, mock_session_manager_cls, _mock_make_engine):
- """Test that session.close() is called when context manager exits normally."""
+ def test_session_cleanup_on_normal_exit(self, _mock_make_engine):
+ """Test that session is committed and cleaned up when context manager exits normally."""
mock_session = MagicMock()
- mock_session_manager_cls.return_value.make_session.return_value = mock_session
+ mock_session_manager, mock_scoped = self._make_mock_session_manager(mock_session)
with patch("keylime.models.verifier.auth_session._engine", None):
- with get_session_context() as session:
- self.assertIs(session, mock_session)
+ with patch("keylime.models.verifier.auth_session._session_manager", mock_session_manager):
+ with get_session_context() as session:
+ self.assertIs(session, mock_session)
- mock_session.close.assert_called_once()
+ mock_session.commit.assert_called_once()
+ mock_scoped.remove.assert_called_once()
@patch("keylime.models.verifier.auth_session.make_engine")
- @patch("keylime.models.verifier.auth_session.SessionManager")
- def test_session_closed_on_exception(self, mock_session_manager_cls, _mock_make_engine):
- """Test that session.close() is called even when an exception occurs."""
+ def test_session_rollback_on_exception(self, _mock_make_engine):
+ """Test that session is rolled back and cleaned up when an exception occurs."""
mock_session = MagicMock()
- mock_session_manager_cls.return_value.make_session.return_value = mock_session
+ mock_session_manager, mock_scoped = self._make_mock_session_manager(mock_session)
with patch("keylime.models.verifier.auth_session._engine", None):
- with self.assertRaises(RuntimeError):
- with get_session_context():
- raise RuntimeError("simulated error")
-
- mock_session.close.assert_called_once()
+ with patch("keylime.models.verifier.auth_session._session_manager", mock_session_manager):
+ with self.assertRaises(RuntimeError):
+ with get_session_context():
+ raise RuntimeError("simulated error")
+
+ mock_session.rollback.assert_called_once()
+ mock_session.commit.assert_not_called()
+ mock_scoped.remove.assert_called_once()
class TestAuthSessionHelpers(unittest.TestCase):
--
2.53.0

View File

@ -1,42 +0,0 @@
From c530c332321c1daffa5bfcd08754179012dd21cc Mon Sep 17 00:00:00 2001
From: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
Date: Mon, 18 Aug 2025 12:12:16 +0000
Subject: [PATCH 4/7] verifier: Gracefully shutdown on signal
Wait for the processes to finish when interrupted by a signal. Do not
call exit(0) in the signal handler.
Assisted-by: Claude 4 Sonnet
Signed-off-by: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
---
keylime/cloud_verifier_tornado.py | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/keylime/cloud_verifier_tornado.py b/keylime/cloud_verifier_tornado.py
index 7553ac8..7065661 100644
--- a/keylime/cloud_verifier_tornado.py
+++ b/keylime/cloud_verifier_tornado.py
@@ -2138,7 +2138,7 @@ def main() -> None:
revocation_notifier.stop_broker()
for p in processes:
p.join()
- sys.exit(0)
+ # Do not call sys.exit(0) here as it interferes with multiprocessing cleanup
signal.signal(signal.SIGINT, sig_handler)
signal.signal(signal.SIGTERM, sig_handler)
@@ -2159,3 +2159,11 @@ def main() -> None:
process = Process(target=server_process, args=(task_id, active_agents))
process.start()
processes.append(process)
+
+ # Wait for all worker processes to complete
+ try:
+ for p in processes:
+ p.join()
+ except KeyboardInterrupt:
+ # Signal handler will take care of cleanup
+ pass
--
2.47.3

View File

@ -0,0 +1,79 @@
From 4f5f09a69e01c0116f1977aa3a741f3678bb8e67 Mon Sep 17 00:00:00 2001
From: Sergio Arroutbi <sarroutb@redhat.com>
Date: Thu, 12 Mar 2026 15:18:56 +0100
Subject: [PATCH 5/6] Address some improvements from code review
Include agent variable None initialization
and address thread safety for ContextManager
Signed-off-by: Sergio Arroutbi <sarroutb@redhat.com>
---
keylime/db/keylime_db.py | 7 ++++++-
keylime/web/verifier/session_controller.py | 3 +++
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/keylime/db/keylime_db.py b/keylime/db/keylime_db.py
index 6fd3f08..cf608fa 100644
--- a/keylime/db/keylime_db.py
+++ b/keylime/db/keylime_db.py
@@ -1,4 +1,5 @@
import os
+import threading
from configparser import NoOptionError
from contextlib import contextmanager
from sqlite3 import Connection as SQLite3Connection
@@ -89,10 +90,12 @@ def make_engine(service: str, **engine_args: Any) -> Engine:
class SessionManager:
engine: Optional[Engine]
_scoped_session: Optional[scoped_session]
+ _lock: threading.Lock
def __init__(self) -> None:
self.engine = None
self._scoped_session = None
+ self._lock = threading.Lock()
def make_session(self, engine: Engine) -> Session:
"""
@@ -100,7 +103,9 @@ class SessionManager:
"""
self.engine = engine
if self._scoped_session is None:
- self._scoped_session = scoped_session(sessionmaker())
+ with self._lock:
+ if self._scoped_session is None:
+ self._scoped_session = scoped_session(sessionmaker())
try:
self._scoped_session.configure(bind=self.engine) # type: ignore
self._scoped_session.configure(expire_on_commit=False) # type: ignore
diff --git a/keylime/web/verifier/session_controller.py b/keylime/web/verifier/session_controller.py
index 3faa310..c8664e2 100644
--- a/keylime/web/verifier/session_controller.py
+++ b/keylime/web/verifier/session_controller.py
@@ -185,6 +185,7 @@ class SessionController(Controller):
return
# Check if agent exists - this is where we validate enrollment
+ agent = None
with get_session_context() as session:
agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
if agent:
@@ -382,6 +383,7 @@ class SessionController(Controller):
# POST /v3[.:minor]/agents/:agent_id/session
def create(self, agent_id, **params):
+ agent = None
with get_session_context() as session:
agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
if agent:
@@ -407,6 +409,7 @@ class SessionController(Controller):
self.respond(200, "Success", auth_session.render())
def update(self, agent_id, token, **params):
+ agent = None
with get_session_context() as session:
agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
if agent:
--
2.53.0

View File

@ -1,308 +0,0 @@
From 565889ab6c90823a5096e39a58e9599fa49072f6 Mon Sep 17 00:00:00 2001
From: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
Date: Wed, 23 Jul 2025 15:39:49 +0200
Subject: [PATCH 5/7] revocations: Try to send notifications on shutdown
During verifier shutdown, try to send any pending revocation
notification in a best-effort manner. In future, the pending revocation
notifications should be persisted to be processed during next startup.
Assisted-by: Claude 4 Sonnet
Signed-off-by: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
---
keylime/cloud_verifier_tornado.py | 7 +
keylime/revocation_notifier.py | 239 ++++++++++++++++++++++--------
2 files changed, 184 insertions(+), 62 deletions(-)
diff --git a/keylime/cloud_verifier_tornado.py b/keylime/cloud_verifier_tornado.py
index 7065661..89aa703 100644
--- a/keylime/cloud_verifier_tornado.py
+++ b/keylime/cloud_verifier_tornado.py
@@ -2109,6 +2109,10 @@ def main() -> None:
# Stop server to not accept new incoming connections
server.stop()
+ # Gracefully shutdown webhook workers to prevent connection errors
+ if "webhook" in revocation_notifier.get_notifiers():
+ revocation_notifier.shutdown_webhook_workers()
+
# Wait for all connections to be closed and then stop ioloop
async def stop() -> None:
await server.close_all_connections()
@@ -2136,6 +2140,9 @@ def main() -> None:
def sig_handler(*_: Any) -> None:
if run_revocation_notifier:
revocation_notifier.stop_broker()
+ # Gracefully shutdown webhook workers to prevent connection errors
+ if "webhook" in revocation_notifier.get_notifiers():
+ revocation_notifier.shutdown_webhook_workers()
for p in processes:
p.join()
# Do not call sys.exit(0) here as it interferes with multiprocessing cleanup
diff --git a/keylime/revocation_notifier.py b/keylime/revocation_notifier.py
index 5a7cc4b..c154028 100644
--- a/keylime/revocation_notifier.py
+++ b/keylime/revocation_notifier.py
@@ -18,6 +18,174 @@ broker_proc: Optional[Process] = None
_SOCKET_PATH = "/var/run/keylime/keylime.verifier.ipc"
+# Global webhook manager instance (initialized when needed)
+_webhook_manager: Optional["WebhookNotificationManager"] = None
+
+
+class WebhookNotificationManager:
+ """Manages webhook worker threads and graceful shutdown for revocation notifications."""
+
+ def __init__(self) -> None:
+ self._shutdown_event = threading.Event()
+ self._workers: Set[threading.Thread] = set()
+ self._workers_lock = threading.Lock()
+
+ def notify_webhook(self, tosend: Dict[str, Any]) -> None:
+ """Send webhook notification with worker thread management."""
+ url = config.get("verifier", "webhook_url", section="revocations", fallback="")
+ # Check if a url was specified
+ if url == "":
+ return
+
+ # Similarly to notify(), let's convert `tosend' to str to prevent
+ # possible issues with json handling by python-requests.
+ tosend = json.bytes_to_str(tosend)
+
+ def worker_webhook(tosend: Dict[str, Any], url: str) -> None:
+ is_shutdown_mode = False
+ try:
+ interval = config.getfloat("verifier", "retry_interval")
+ exponential_backoff = config.getboolean("verifier", "exponential_backoff")
+
+ max_retries = config.getint("verifier", "max_retries")
+ if max_retries <= 0:
+ logger.info("Invalid value found in 'max_retries' option for verifier, using default value")
+ max_retries = 5
+
+ # During shutdown, use fewer retries but still make best effort
+ if self._shutdown_event.is_set():
+ is_shutdown_mode = True
+ max_retries = min(max_retries, 3) # Reduce retries during shutdown but still try
+ logger.info(
+ "Shutdown mode: attempting to send critical revocation notification with %d retries",
+ max_retries,
+ )
+
+ # Get TLS options from the configuration
+ (cert, key, trusted_ca, key_password), verify_server_cert = web_util.get_tls_options(
+ "verifier", is_client=True, logger=logger
+ )
+
+ # Generate the TLS context using the obtained options
+ tls_context = web_util.generate_tls_context(
+ cert, key, trusted_ca, key_password, is_client=True, logger=logger
+ )
+
+ logger.info("Sending revocation event via webhook to %s ...", url)
+ for i in range(max_retries):
+ next_retry = retry.retry_time(exponential_backoff, interval, i, logger)
+
+ with RequestsClient(
+ url,
+ verify_server_cert,
+ tls_context,
+ ) as client:
+ try:
+ res = client.post("", json=tosend, timeout=5)
+ except requests.exceptions.SSLError as ssl_error:
+ if "TLSV1_ALERT_UNKNOWN_CA" in str(ssl_error):
+ logger.warning(
+ "Keylime does not recognize certificate from peer. Check if verifier 'trusted_server_ca' is configured correctly"
+ )
+
+ raise ssl_error from ssl_error
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
+ # During shutdown, only suppress errors on the final attempt after all retries exhausted
+ if is_shutdown_mode and i == max_retries - 1:
+ logger.warning(
+ "Final attempt to send revocation notification failed during shutdown: %s", e
+ )
+ return
+ # Otherwise, let the retry logic handle it
+ raise e
+
+ if res and res.status_code in [200, 202]:
+ if is_shutdown_mode:
+ logger.info("Successfully sent revocation notification during shutdown")
+ break
+
+ logger.debug(
+ "Unable to publish revocation message %d times via webhook, "
+ "trying again in %d seconds. "
+ "Server returned status code: %s",
+ i + 1,
+ next_retry,
+ res.status_code,
+ )
+
+ # During shutdown, use shorter retry intervals to complete faster
+ if is_shutdown_mode:
+ next_retry = min(next_retry, 2.0) # Cap retry interval during shutdown
+
+ time.sleep(next_retry)
+
+ except Exception as e:
+ # Only suppress errors during final shutdown phase and log appropriately
+ if is_shutdown_mode:
+ logger.warning("Failed to send revocation notification during shutdown: %s", e)
+ else:
+ logger.error("Error in webhook worker: %s", e)
+ finally:
+ # Remove this worker from the active set
+ current_thread = threading.current_thread()
+ with self._workers_lock:
+ self._workers.discard(current_thread)
+
+ w = functools.partial(worker_webhook, tosend, url)
+ t = threading.Thread(target=w, daemon=True)
+
+ # Add this worker to the active set
+ with self._workers_lock:
+ self._workers.add(t)
+
+ t.start()
+
+ def shutdown_workers(self) -> None:
+ """Signal webhook workers to shut down gracefully and wait for them to complete.
+
+ This gives workers time to complete their critical revocation notifications
+ before the service shuts down completely.
+ """
+ logger.info("Shutting down webhook workers gracefully...")
+ self._shutdown_event.set()
+
+ # Give workers generous time to complete critical revocation notifications
+ timeout = 30.0 # Increased timeout for critical security notifications
+ end_time = time.time() + timeout
+
+ with self._workers_lock:
+ workers_to_wait = list(self._workers)
+
+ if workers_to_wait:
+ logger.info("Waiting for %d webhook workers to complete revocation notifications...", len(workers_to_wait))
+
+ for worker in workers_to_wait:
+ remaining_time = max(0, end_time - time.time())
+ if remaining_time > 0:
+ logger.debug(
+ "Waiting for webhook worker %s to complete (timeout: %.1f seconds)", worker.name, remaining_time
+ )
+ worker.join(timeout=remaining_time)
+ if worker.is_alive():
+ logger.warning("Webhook worker %s did not complete within timeout", worker.name)
+ else:
+ logger.warning("Timeout exceeded while waiting for webhook workers")
+ break
+
+ # Clean up completed workers
+ with self._workers_lock:
+ self._workers.clear()
+
+ logger.info("Webhook workers shutdown complete")
+
+
+def _get_webhook_manager() -> WebhookNotificationManager:
+ """Get the global webhook manager instance, creating it if needed."""
+ global _webhook_manager
+ if _webhook_manager is None:
+ _webhook_manager = WebhookNotificationManager()
+ return _webhook_manager
+
# return the revocation notification methods for cloud verifier
def get_notifiers() -> Set[str]:
@@ -83,6 +251,12 @@ def stop_broker() -> None:
broker_proc.kill() # pylint: disable=E1101
+def shutdown_webhook_workers() -> None:
+ """Convenience function to shutdown webhook workers using the global manager."""
+ manager = _get_webhook_manager()
+ manager.shutdown_workers()
+
+
def notify(tosend: Dict[str, Any]) -> None:
assert "zeromq" in get_notifiers()
try:
@@ -127,68 +301,9 @@ def notify(tosend: Dict[str, Any]) -> None:
def notify_webhook(tosend: Dict[str, Any]) -> None:
- url = config.get("verifier", "webhook_url", section="revocations", fallback="")
- # Check if a url was specified
- if url == "":
- return
-
- # Similarly to notify(), let's convert `tosend' to str to prevent
- # possible issues with json handling by python-requests.
- tosend = json.bytes_to_str(tosend)
-
- def worker_webhook(tosend: Dict[str, Any], url: str) -> None:
- interval = config.getfloat("verifier", "retry_interval")
- exponential_backoff = config.getboolean("verifier", "exponential_backoff")
-
- max_retries = config.getint("verifier", "max_retries")
- if max_retries <= 0:
- logger.info("Invalid value found in 'max_retries' option for verifier, using default value")
- max_retries = 5
-
- # Get TLS options from the configuration
- (cert, key, trusted_ca, key_password), verify_server_cert = web_util.get_tls_options(
- "verifier", is_client=True, logger=logger
- )
-
- # Generate the TLS context using the obtained options
- tls_context = web_util.generate_tls_context(cert, key, trusted_ca, key_password, is_client=True, logger=logger)
-
- logger.info("Sending revocation event via webhook to %s ...", url)
- for i in range(max_retries):
- next_retry = retry.retry_time(exponential_backoff, interval, i, logger)
-
- with RequestsClient(
- url,
- verify_server_cert,
- tls_context,
- ) as client:
- try:
- res = client.post("", json=tosend, timeout=5)
- except requests.exceptions.SSLError as ssl_error:
- if "TLSV1_ALERT_UNKNOWN_CA" in str(ssl_error):
- logger.warning(
- "Keylime does not recognize certificate from peer. Check if verifier 'trusted_server_ca' is configured correctly"
- )
-
- raise ssl_error from ssl_error
-
- if res and res.status_code in [200, 202]:
- break
-
- logger.debug(
- "Unable to publish revocation message %d times via webhook, "
- "trying again in %d seconds. "
- "Server returned status code: %s",
- i + 1,
- next_retry,
- res.status_code,
- )
-
- time.sleep(next_retry)
-
- w = functools.partial(worker_webhook, tosend, url)
- t = threading.Thread(target=w, daemon=True)
- t.start()
+ """Send webhook notification using the global webhook manager."""
+ manager = _get_webhook_manager()
+ manager.notify_webhook(tosend)
cert_key = None
--
2.47.3

View File

@ -0,0 +1,42 @@
From 309a0ef0fe1d0917ad9d4fd7ab4327570a59cf34 Mon Sep 17 00:00:00 2001
From: Sergio Arroutbi <sarroutb@redhat.com>
Date: Thu, 12 Mar 2026 19:18:56 +0100
Subject: [PATCH 6/6] Fix race condition on in SessionManager
Move self.engine assignment inside the lock so it is set atomically
with _scoped_session creation. Without this, concurrent threads calling
make_session() with different engines could race on the assignment,
causing _scoped_session to be configured with a stale engine reference.
Also log a warning if make_session() is called with a different engine
after initialization, since the scoped_session is cached and bound to
the original engine.
Suggested-by: coderabbitai
Signed-off-by: Sergio Arroutbi <sarroutb@redhat.com>
---
keylime/db/keylime_db.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/keylime/db/keylime_db.py b/keylime/db/keylime_db.py
index cf608fa..a622b09 100644
--- a/keylime/db/keylime_db.py
+++ b/keylime/db/keylime_db.py
@@ -101,11 +101,13 @@ class SessionManager:
"""
To use: session = self.make_session(engine)
"""
- self.engine = engine
if self._scoped_session is None:
with self._lock:
if self._scoped_session is None:
+ self.engine = engine
self._scoped_session = scoped_session(sessionmaker())
+ elif self.engine is not engine:
+ logger.warning("SessionManager called with different engine than originally configured")
try:
self._scoped_session.configure(bind=self.engine) # type: ignore
self._scoped_session.configure(expire_on_commit=False) # type: ignore
--
2.53.0

View File

@ -1,45 +0,0 @@
From e6fb5090df3e35c7d44bc8f7f37d420d7ee8a05c Mon Sep 17 00:00:00 2001
From: Sergio Correia <scorreia@redhat.com>
Date: Wed, 4 Jun 2025 19:52:37 +0100
Subject: [PATCH 6/7] requests_client: close the session at the end of the
resource manager
We had an issue in the past in which the webhook worker would not
properly close the opened session. This was fixed in #1456 (Close
session in worker_webhook function).
At some later point, in #1566 (revocation_notifier: Take into account CA
certificates added via configuration), some refactoring around the
webhook_worker() in revocation_notifier happened and it started using
the RequestsClient resource manager.
However, the RequestsClient does not close the session at its end, which
in turns makes that the old issue of not closing properly the session
in the webhook_worker() returned.
We now issue a session.close() at the end of the RequestsClient.
Signed-off-by: Sergio Correia <scorreia@redhat.com>
---
keylime/requests_client.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/keylime/requests_client.py b/keylime/requests_client.py
index 16615f7..b7da484 100644
--- a/keylime/requests_client.py
+++ b/keylime/requests_client.py
@@ -40,7 +40,10 @@ class RequestsClient:
return self
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
- pass
+ try:
+ self.session.close()
+ except Exception:
+ pass
def request(self, method: str, url: str, **kwargs: Any) -> requests.Response:
return self.session.request(method, self.base_url + url, **kwargs)
--
2.47.3

View File

@ -0,0 +1,160 @@
From e75921f02393277e8bc5ba3d058131376516a099 Mon Sep 17 00:00:00 2001
From: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
Date: Thu, 5 Mar 2026 17:27:41 +0100
Subject: [PATCH] Fix linter errors in PersistableModel.get() and .all()
PersistableModel.get() and .all() returned Optional[PersistableModel]
and Sequence[PersistableModel] respectively, which caused pyright errors
when subclasses like IMAPolicy or MBPolicy called cls.get() and expected
the return type to match their own class.
Use a TypeVar bound to PersistableModel so cls.get() on a subclass
correctly returns Optional[SubclassType].
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
---
keylime/models/base/persistable_model.py | 8 +++++---
keylime/web/registrar/agents_controller.py | 6 +++---
keylime/web/verifier/attestation_controller.py | 17 ++++++++---------
3 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/keylime/models/base/persistable_model.py b/keylime/models/base/persistable_model.py
index 3380eb6..4aa596e 100644
--- a/keylime/models/base/persistable_model.py
+++ b/keylime/models/base/persistable_model.py
@@ -1,4 +1,4 @@
-from typing import Any, Optional, Sequence
+from typing import Any, Optional, Sequence, TypeVar
from sqlalchemy import asc, desc, or_
from sqlalchemy.sql.expression import ClauseElement
@@ -18,6 +18,8 @@ from keylime.models.base.persistable_model_meta import PersistableModelMeta
from keylime.models.base.types.dictionary import Dictionary
from keylime.models.base.types.list import List
+_PM = TypeVar("_PM", bound="PersistableModel")
+
class PersistableModel(BasicModel, metaclass=PersistableModelMeta):
"""PersistableModel extends the BasicModel class to provide additional functionality for saving and retrieving
@@ -181,7 +183,7 @@ class PersistableModel(BasicModel, metaclass=PersistableModelMeta):
return session.query(subject).filter(*filter_criteria).order_by(*sort_criteria)
@classmethod
- def get(cls, *args: Any, **kwargs: Any) -> Optional["PersistableModel"]:
+ def get(cls: type[_PM], *args: Any, **kwargs: Any) -> Optional[_PM]:
# pylint: disable=no-else-return
if cls.schema_awaiting_processing:
@@ -203,7 +205,7 @@ class PersistableModel(BasicModel, metaclass=PersistableModelMeta):
return None
@classmethod
- def all(cls, *args: Any, **kwargs: Any) -> Sequence["PersistableModel"]:
+ def all(cls: type[_PM], *args: Any, **kwargs: Any) -> Sequence[_PM]:
if cls.schema_awaiting_processing:
cls.process_schema()
diff --git a/keylime/web/registrar/agents_controller.py b/keylime/web/registrar/agents_controller.py
index 290317f..c918f95 100644
--- a/keylime/web/registrar/agents_controller.py
+++ b/keylime/web/registrar/agents_controller.py
@@ -27,7 +27,7 @@ class AgentsController(Controller):
self.respond(404, f"Agent with ID '{agent_id}' has not been activated")
return
- self.respond(200, "Success", agent.render())
+ self.respond(200, "Success", agent.render()) # type: ignore[no-untyped-call]
# POST /v2[.:minor]/agents/[:agent_id]
def create(self, agent_id, **params):
@@ -143,10 +143,10 @@ class AgentsController(Controller):
self.respond(404, f"Agent with ID '{agent_id}' not found")
return
- accepted = agent.verify_ak_response(auth_tag) # type: ignore[attr-defined]
+ accepted = agent.verify_ak_response(auth_tag) # type: ignore[attr-defined,no-untyped-call]
if accepted:
- agent.commit_changes()
+ agent.commit_changes() # type: ignore[no-untyped-call]
self.respond(200, "Success")
else:
agent.delete()
diff --git a/keylime/web/verifier/attestation_controller.py b/keylime/web/verifier/attestation_controller.py
index 0e50b8a..59f280c 100755
--- a/keylime/web/verifier/attestation_controller.py
+++ b/keylime/web/verifier/attestation_controller.py
@@ -1,6 +1,5 @@
# pyright: reportAttributeAccessIssue=false
# Uses ORM models with dynamically-created attributes from metaclasses
-from typing import cast
from keylime import agent_util, config, keylime_logging
from keylime.common import retry
@@ -158,12 +157,12 @@ class AttestationController(Controller):
# GET /v3[.:minor]/agents/:agent_id/attestations
def index(self, agent_id, **_params): # type: ignore[no-untyped-def]
- agent = cast(VerifierAgent | None, VerifierAgent.get(agent_id))
+ agent = VerifierAgent.get(agent_id)
if not agent:
APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self)
- results = cast(list[Attestation], Attestation.all(agent_id=agent_id))
+ results = Attestation.all(agent_id=agent_id)
resources = [
APIResource("attestation", attestation.render_state()).include( # type: ignore[no-untyped-call]
@@ -184,8 +183,8 @@ class AttestationController(Controller):
# GET /v3[.:minor]/agents/:agent_id/attestations/:index
def show(self, agent_id, index, **_params): # type: ignore[no-untyped-def]
- agent = cast(VerifierAgent | None, VerifierAgent.get(agent_id))
- attestation = cast(Attestation | None, Attestation.get(agent_id=agent_id, index=index))
+ agent = VerifierAgent.get(agent_id)
+ attestation = Attestation.get(agent_id=agent_id, index=index)
if not agent:
APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self)
@@ -201,7 +200,7 @@ class AttestationController(Controller):
# GET /v3[.:minor]/agents/:agent_id/attestations/latest
def show_latest(self, agent_id, **_params): # type: ignore[no-untyped-def]
- agent = cast(VerifierAgent | None, VerifierAgent.get(agent_id))
+ agent = VerifierAgent.get(agent_id)
if not agent:
APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self)
@@ -214,7 +213,7 @@ class AttestationController(Controller):
# POST /v3[.:minor]/agents/:agent_id/attestations
@Controller.require_json_api
def create(self, agent_id, attestation, **params): # type: ignore[no-untyped-def] # pylint: disable=unused-argument
- agent = cast(VerifierAgent | None, VerifierAgent.get(agent_id))
+ agent = VerifierAgent.get(agent_id)
if not agent:
APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self)
@@ -310,7 +309,7 @@ class AttestationController(Controller):
# Extract attestation from params - it should be provided by the API request
attestation = params.get("attestation")
- agent = cast(VerifierAgent | None, VerifierAgent.get(agent_id))
+ agent = VerifierAgent.get(agent_id)
if not agent:
APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self)
@@ -368,7 +367,7 @@ class AttestationController(Controller):
# PATCH /v3[.:minor]/agents/:agent_id/attestations/latest
@Controller.require_json_api
def update_latest(self, agent_id, **params): # type: ignore[no-untyped-def]
- agent = cast(VerifierAgent | None, VerifierAgent.get(agent_id))
+ agent = VerifierAgent.get(agent_id)
if not agent:
APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self)
--
2.53.0

View File

@ -1,91 +0,0 @@
From 39ea2efb72b383f729474a1583d4b8c097cf848a Mon Sep 17 00:00:00 2001
From: Sergio Correia <scorreia@redhat.com>
Date: Thu, 6 Feb 2025 21:29:56 +0000
Subject: [PATCH 07/10] tests: change test_mba_parsing to not need keylime
installed
This test needs the verifier configuration file available, and on
systems that do not have keylime installed (hence, no config file),
it would fail.
This commit changes the test so that it creates a verifier conf file
in a temporary directory with default values, so that it can use it.
Signed-off-by: Sergio Correia <scorreia@redhat.com>
---
test/test_mba_parsing.py | 52 +++++++++++++++++++++++++++++-----------
1 file changed, 38 insertions(+), 14 deletions(-)
diff --git a/test/test_mba_parsing.py b/test/test_mba_parsing.py
index 670a602..4ee4e3b 100644
--- a/test/test_mba_parsing.py
+++ b/test/test_mba_parsing.py
@@ -1,27 +1,51 @@
import os
+import tempfile
import unittest
+from configparser import RawConfigParser
+from keylime import config
+from keylime.cmd import convert_config
from keylime.common.algorithms import Hash
from keylime.mba import mba
+TEMPLATES_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "templates"))
+
class TestMBAParsing(unittest.TestCase):
def test_parse_bootlog(self):
"""Test parsing binary measured boot event log"""
- mba.load_imports()
- # Use the file that triggered https://github.com/keylime/keylime/issues/1153
- mb_log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data/mb_log.b64"))
- with open(mb_log_path, encoding="utf-8") as f:
- # Read the base64 input and remove the newlines
- b64 = "".join(f.read().splitlines())
- pcr_hashes, boot_aggregates, measurement_data, failure = mba.bootlog_parse(b64, Hash.SHA256)
-
- self.assertFalse(
- failure, f"Parsing of measured boot log failed with: {list(map(lambda x: x.context, failure.events))}"
- )
- self.assertTrue(isinstance(pcr_hashes, dict))
- self.assertTrue(isinstance(boot_aggregates, dict))
- self.assertTrue(isinstance(measurement_data, dict))
+ # This test requires the verifier configuration file, so let's create
+ # one with the default values to use, so that we do not depend on the
+ # configuration files existing in the test system.
+ with tempfile.TemporaryDirectory() as config_dir:
+ # Let's write the config file for the verifier.
+ verifier_config = convert_config.process_versions(["verifier"], TEMPLATES_DIR, RawConfigParser(), True)
+ convert_config.output(["verifier"], verifier_config, TEMPLATES_DIR, config_dir)
+
+ # As we want to use a config file from a different location, the
+ # proper way would be to define an environment variable for the
+ # module of interest, e.g. in our case it would be the
+ # KEYLIME_VERIFIER_CONFIG variable. However, the config module
+ # reads such env vars at first load, and there is no clean way
+ # to have it re-read them, so for this test we will override it
+ # manually.
+ config.CONFIG_ENV["verifier"] = os.path.abspath(os.path.join(config_dir, "verifier.conf"))
+
+ mba.load_imports()
+ # Use the file that triggered https://github.com/keylime/keylime/issues/1153
+ mb_log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data/mb_log.b64"))
+ with open(mb_log_path, encoding="utf-8") as f:
+ # Read the base64 input and remove the newlines
+ b64 = "".join(f.read().splitlines())
+ pcr_hashes, boot_aggregates, measurement_data, failure = mba.bootlog_parse(b64, Hash.SHA256)
+
+ self.assertFalse(
+ failure,
+ f"Parsing of measured boot log failed with: {list(map(lambda x: x.context, failure.events))}",
+ )
+ self.assertTrue(isinstance(pcr_hashes, dict))
+ self.assertTrue(isinstance(boot_aggregates, dict))
+ self.assertTrue(isinstance(measurement_data, dict))
if __name__ == "__main__":
--
2.47.3

View File

@ -0,0 +1,457 @@
From 2d809d8b537c0d9faab05ee5fe7efb85f48918f3 Mon Sep 17 00:00:00 2001
From: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
Date: Fri, 13 Mar 2026 10:53:54 +0100
Subject: [PATCH] refactor: Remove dead code AuthSession.authenticate_agent()
authenticate_agent() was superseded by _extract_identity() in
action_handler.py, which performs token-based agent authentication
directly via AuthSession.get_by_token(). The method, its helper
get_session(), the module-level _engine global, and the associated
unused imports (Session, SessionManager, make_engine) are all removed.
The corresponding tests (test_authenticate_agent_success,
test_authenticate_agent_inactive_session,
test_authenticate_agent_no_session) are also removed.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
---
keylime/models/verifier/auth_session.py | 67 +-----------
keylime/web/verifier/session_controller.py | 9 +-
test/test_auth_session.py | 113 +--------------------
test/test_session_controller.py | 52 +++++-----
4 files changed, 32 insertions(+), 209 deletions(-)
diff --git a/keylime/models/verifier/auth_session.py b/keylime/models/verifier/auth_session.py
index 918dfb4..b0b40b0 100644
--- a/keylime/models/verifier/auth_session.py
+++ b/keylime/models/verifier/auth_session.py
@@ -1,12 +1,8 @@
import base64
import hmac
-import threading
import uuid
-from contextlib import contextmanager
from datetime import timedelta
-from typing import Any, Dict, Iterator, Optional, Sequence
-
-from sqlalchemy.orm import Session
+from typing import Any, Dict, Optional, Sequence
from keylime import config, keylime_logging
from keylime.crypto import (
@@ -16,7 +12,6 @@ from keylime.crypto import (
parse_session_token,
verify_token_hash,
)
-from keylime.db.keylime_db import SessionManager, make_engine
from keylime.db.verifier_db import VerfierMain
from keylime.models.base import *
from keylime.shared_data import get_shared_memory
@@ -31,21 +26,6 @@ from keylime.tpm.tpm_main import Tpm
logger = keylime_logging.init_logging("verifier")
-_engine = None
-_engine_lock = threading.Lock()
-_session_manager = SessionManager()
-
-
-@contextmanager
-def get_session_context() -> Iterator[Session]:
- global _engine
- if _engine is None:
- with _engine_lock:
- if _engine is None:
- _engine = make_engine("cloud_verifier")
- with _session_manager.session_context(_engine) as session:
- yield session
-
class AuthSession(PersistableModel):
# Explicit attribute declarations for type checkers
@@ -244,51 +224,6 @@ class AuthSession(PersistableModel):
# Slow path: query database by primary key
return cls.get(session_id) # type: ignore[return-value]
- @classmethod
- def authenticate_agent(cls, token: str): # type: ignore[no-untyped-def]
- """Authenticate an agent using their session token.
-
- Uses indexed database lookup by token hash for performance (O(1) instead of O(n)).
- Tokens are hashed before lookup since only hashes are stored in the database.
-
- Args:
- token: The session token to verify
-
- Returns:
- VerfierMain object if authenticated, False otherwise
- """
- # Use indexed lookup by token hash (much faster than scanning all sessions)
- auth_session = cls.get_by_token(token)
-
- if not auth_session:
- return False
-
- # Validate session is active
- if not getattr(auth_session, "active", False):
- return False
-
- # Validate session hasn't expired
- token_expires_at = getattr(auth_session, "token_expires_at", None)
- if token_expires_at and token_expires_at < Timestamp.now():
- logger.debug(
- "Authentication attempted with expired token for agent '%s' (expired at %s)",
- getattr(auth_session, "agent_id", "unknown"),
- token_expires_at,
- )
- return False
-
- # Use old engine to query VerfierMain (legacy model)
- with get_session_context() as session:
- agent = (
- session.query(VerfierMain)
- .filter(VerfierMain.agent_id == auth_session.agent_id) # type: ignore[attr-defined]
- .one_or_none()
- )
- if agent:
- session.expunge(agent) # type: ignore[no-untyped-call]
-
- return agent
-
@classmethod
def create(
cls, agent: Optional[VerfierMain], data: Dict[str, Any], agent_id: Optional[str] = None
diff --git a/keylime/web/verifier/session_controller.py b/keylime/web/verifier/session_controller.py
index c8664e2..9a314f2 100644
--- a/keylime/web/verifier/session_controller.py
+++ b/keylime/web/verifier/session_controller.py
@@ -2,9 +2,8 @@ import base64
from keylime import config, keylime_logging
from keylime.db.verifier_db import VerfierMain
-from keylime.models.base import Timestamp
+from keylime.models.base import Timestamp, db_manager
from keylime.models.verifier import AuthSession
-from keylime.models.verifier.auth_session import get_session_context
from keylime.web.base import Controller
logger = keylime_logging.init_logging("verifier")
@@ -186,7 +185,7 @@ class SessionController(Controller):
# Check if agent exists - this is where we validate enrollment
agent = None
- with get_session_context() as session:
+ with db_manager.session_context() as session:
agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
if agent:
session.expunge(agent) # type: ignore[no-untyped-call]
@@ -384,7 +383,7 @@ class SessionController(Controller):
# POST /v3[.:minor]/agents/:agent_id/session
def create(self, agent_id, **params):
agent = None
- with get_session_context() as session:
+ with db_manager.session_context() as session:
agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
if agent:
session.expunge(agent) # type: ignore[no-untyped-call]
@@ -410,7 +409,7 @@ class SessionController(Controller):
def update(self, agent_id, token, **params):
agent = None
- with get_session_context() as session:
+ with db_manager.session_context() as session:
agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none()
if agent:
session.expunge(agent) # type: ignore[no-untyped-call]
diff --git a/test/test_auth_session.py b/test/test_auth_session.py
index 2c78547..dd554b6 100644
--- a/test/test_auth_session.py
+++ b/test/test_auth_session.py
@@ -2,74 +2,15 @@
import base64
import unittest
-from contextlib import contextmanager
from datetime import timedelta
from unittest.mock import MagicMock, PropertyMock, patch
from keylime.crypto import generate_session_token, generate_token_salt, hash_token_for_storage
from keylime.models.base.types import Timestamp
-from keylime.models.verifier.auth_session import AuthSession, get_session_context
+from keylime.models.verifier.auth_session import AuthSession
from keylime.shared_data import cleanup_global_shared_memory, get_shared_memory
-class TestGetSessionContext(unittest.TestCase):
- """Test cases for get_session_context context manager."""
-
- def _make_mock_session_manager(self, mock_session):
- """Create a mock SessionManager whose session_context() mirrors real lifecycle."""
- mock_scoped = MagicMock()
- mock_session_manager = MagicMock()
- mock_session_manager.make_session.return_value = mock_session
- mock_session_manager._scoped_session = mock_scoped # pylint: disable=protected-access
-
- @contextmanager
- def fake_session_context(engine): # pylint: disable=unused-argument
- session = mock_session_manager.make_session(engine)
- try:
- yield session
- session.commit()
- except Exception:
- session.rollback()
- raise
- finally:
- scoped = mock_session_manager._scoped_session # pylint: disable=protected-access
- if scoped is not None:
- scoped.remove()
-
- mock_session_manager.session_context = fake_session_context
- return mock_session_manager, mock_scoped
-
- @patch("keylime.models.verifier.auth_session.make_engine")
- def test_session_cleanup_on_normal_exit(self, _mock_make_engine):
- """Test that session is committed and cleaned up when context manager exits normally."""
- mock_session = MagicMock()
- mock_session_manager, mock_scoped = self._make_mock_session_manager(mock_session)
-
- with patch("keylime.models.verifier.auth_session._engine", None):
- with patch("keylime.models.verifier.auth_session._session_manager", mock_session_manager):
- with get_session_context() as session:
- self.assertIs(session, mock_session)
-
- mock_session.commit.assert_called_once()
- mock_scoped.remove.assert_called_once()
-
- @patch("keylime.models.verifier.auth_session.make_engine")
- def test_session_rollback_on_exception(self, _mock_make_engine):
- """Test that session is rolled back and cleaned up when an exception occurs."""
- mock_session = MagicMock()
- mock_session_manager, mock_scoped = self._make_mock_session_manager(mock_session)
-
- with patch("keylime.models.verifier.auth_session._engine", None):
- with patch("keylime.models.verifier.auth_session._session_manager", mock_session_manager):
- with self.assertRaises(RuntimeError):
- with get_session_context():
- raise RuntimeError("simulated error")
-
- mock_session.rollback.assert_called_once()
- mock_session.commit.assert_not_called()
- mock_scoped.remove.assert_called_once()
-
-
class TestAuthSessionHelpers(unittest.TestCase):
"""Test cases for AuthSession helper methods."""
@@ -457,58 +398,6 @@ class TestAuthSessionCore(unittest.TestCase):
self.assertIn("errors", result)
self.assertIn("authentication_supported", result["errors"])
- @patch("keylime.models.verifier.auth_session.get_session_context")
- @patch.object(AuthSession, "get_by_token")
- def test_authenticate_agent_success(self, mock_get_by_token, mock_get_session):
- """Test successful agent authentication with valid token."""
- # Create a mock agent
- mock_agent = MagicMock()
- mock_agent.agent_id = self.test_agent_id
-
- # Mock session query
- mock_db_session = MagicMock()
- mock_db_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent
- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_db_session)
- mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
-
- # Mock AuthSession.get_by_token to return an active session
- mock_auth_session = MagicMock()
- mock_auth_session.session_id = "550e8400-e29b-41d4-a716-446655440000"
- mock_auth_session.active = True
- mock_auth_session.agent_id = self.test_agent_id
- mock_auth_session.token_expires_at = Timestamp.now() + timedelta(hours=1)
- mock_get_by_token.return_value = mock_auth_session
-
- result = AuthSession.authenticate_agent("test-token")
-
- # Should return the agent
- self.assertIsNotNone(result)
- self.assertEqual(result.agent_id, self.test_agent_id) # type: ignore[union-attr]
-
- @patch.object(AuthSession, "get_by_token")
- def test_authenticate_agent_inactive_session(self, mock_get_by_token):
- """Test that inactive sessions cannot authenticate."""
- # Mock AuthSession.get_by_token to return an inactive session
- mock_auth_session = MagicMock()
- mock_auth_session.active = False
- mock_get_by_token.return_value = mock_auth_session
-
- result = AuthSession.authenticate_agent("test-token")
-
- # Should return False
- self.assertFalse(result)
-
- @patch.object(AuthSession, "get_by_token")
- def test_authenticate_agent_no_session(self, mock_get_by_token):
- """Test that authentication fails when session doesn't exist."""
- # Mock AuthSession.get_by_token to return None (no session found)
- mock_get_by_token.return_value = None
-
- result = AuthSession.authenticate_agent("test-token")
-
- # Should return False
- self.assertFalse(result)
-
@patch.object(AuthSession, "empty")
def test_create_with_agent(self, mock_empty):
"""Test AuthSession.create() with an enrolled agent."""
diff --git a/test/test_session_controller.py b/test/test_session_controller.py
index eec7fef..f8db8db 100644
--- a/test/test_session_controller.py
+++ b/test/test_session_controller.py
@@ -272,8 +272,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase):
# Verify session was deleted from cache
self.assertNotIn(self.test_session_id, self.sessions_cache)
- @patch("keylime.web.verifier.session_controller.get_session_context")
- def test_update_session_agent_not_enrolled(self, mock_get_session):
+ @patch("keylime.web.verifier.session_controller.db_manager")
+ def test_update_session_agent_not_enrolled(self, mock_db_manager):
"""Test update_session with unenrolled agent."""
# Create session in cache
now = Timestamp.now()
@@ -290,8 +290,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase):
# Mock database query to return no agent
mock_session = MagicMock()
mock_session.query.return_value.filter.return_value.one_or_none.return_value = None
- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
- mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
+ mock_db_manager.session_context.return_value.__enter__ = MagicMock(return_value=mock_session)
+ mock_db_manager.session_context.return_value.__exit__ = MagicMock(return_value=False)
# Call update_session
params = {
@@ -319,9 +319,9 @@ class TestSessionControllerUpdateSession(unittest.TestCase):
body = call_args[1]["body"]
self.assertEqual(body["data"]["attributes"]["evaluation"], "fail")
- @patch("keylime.web.verifier.session_controller.get_session_context")
+ @patch("keylime.web.verifier.session_controller.db_manager")
@patch("keylime.models.verifier.auth_session.AuthSession.create_from_memory")
- def test_update_session_authentication_failed(self, mock_create_from_memory, mock_get_session):
+ def test_update_session_authentication_failed(self, mock_create_from_memory, mock_db_manager):
"""Test update_session with failed authentication."""
# Create session in cache
now = Timestamp.now()
@@ -340,8 +340,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase):
mock_agent.agent_id = self.test_agent_id
mock_session = MagicMock()
mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent
- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
- mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
+ mock_db_manager.session_context.return_value.__enter__ = MagicMock(return_value=mock_session)
+ mock_db_manager.session_context.return_value.__exit__ = MagicMock(return_value=False)
# Mock AuthSession.create_from_memory to return errors
mock_auth_session = MagicMock()
@@ -379,11 +379,11 @@ class TestSessionControllerUpdateSession(unittest.TestCase):
call_args = self.controller.send_response.call_args # type: ignore[attr-defined]
self.assertEqual(call_args[1]["code"], 401)
- @patch("keylime.web.verifier.session_controller.get_session_context")
+ @patch("keylime.web.verifier.session_controller.db_manager")
@patch("keylime.models.verifier.auth_session.AuthSession.create_from_memory")
@patch("keylime.models.verifier.auth_session.AuthSession.delete_active_session_for_agent")
@patch("keylime.web.verifier.session_controller.config")
- def test_update_session_success(self, mock_config, _mock_delete_active, mock_create_from_memory, mock_get_session):
+ def test_update_session_success(self, mock_config, _mock_delete_active, mock_create_from_memory, mock_db_manager):
"""Test successful session update."""
# Create session in cache
now = Timestamp.now()
@@ -405,8 +405,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase):
mock_agent.agent_id = self.test_agent_id
mock_session = MagicMock()
mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent
- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
- mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
+ mock_db_manager.session_context.return_value.__enter__ = MagicMock(return_value=mock_session)
+ mock_db_manager.session_context.return_value.__exit__ = MagicMock(return_value=False)
# Mock config
mock_config.getboolean.return_value = False # Don't keep in memory
@@ -525,17 +525,17 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase):
self.assertEqual(call_args[0][0], 404)
@patch("keylime.models.verifier.auth_session.AuthSession.delete_stale")
- @patch("keylime.web.verifier.session_controller.get_session_context")
+ @patch("keylime.web.verifier.session_controller.db_manager")
@patch("keylime.models.verifier.auth_session.AuthSession.create")
- def test_create_success(self, mock_create, mock_get_session, _mock_delete_stale):
+ def test_create_success(self, mock_create, mock_db_manager, _mock_delete_stale):
"""Test successful create endpoint."""
# Mock database query
mock_agent = MagicMock()
mock_agent.agent_id = self.test_agent_id
mock_session = MagicMock()
mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent
- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
- mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
+ mock_db_manager.session_context.return_value.__enter__ = MagicMock(return_value=mock_session)
+ mock_db_manager.session_context.return_value.__exit__ = MagicMock(return_value=False)
# Mock AuthSession.create
mock_auth_session = MagicMock()
@@ -553,14 +553,14 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase):
call_args = self.controller.respond.call_args # type: ignore[attr-defined]
self.assertEqual(call_args[0][0], 200)
- @patch("keylime.web.verifier.session_controller.get_session_context")
- def test_create_agent_not_found(self, mock_get_session):
+ @patch("keylime.web.verifier.session_controller.db_manager")
+ def test_create_agent_not_found(self, mock_db_manager):
"""Test create endpoint with non-existent agent."""
# Mock database query to return None
mock_session = MagicMock()
mock_session.query.return_value.filter.return_value.one_or_none.return_value = None
- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
- mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
+ mock_db_manager.session_context.return_value.__enter__ = MagicMock(return_value=mock_session)
+ mock_db_manager.session_context.return_value.__exit__ = MagicMock(return_value=False)
# Call create
params = {"data": {}}
@@ -571,17 +571,17 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase):
call_args = self.controller.respond.call_args # type: ignore[attr-defined]
self.assertEqual(call_args[0][0], 404)
- @patch("keylime.web.verifier.session_controller.get_session_context")
+ @patch("keylime.web.verifier.session_controller.db_manager")
@patch("keylime.models.verifier.auth_session.AuthSession.get_by_token")
- def test_update_success(self, mock_get, mock_get_session):
+ def test_update_success(self, mock_get, mock_db_manager):
"""Test successful update endpoint."""
# Mock database query
mock_agent = MagicMock()
mock_agent.agent_id = self.test_agent_id
mock_session = MagicMock()
mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent
- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
- mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
+ mock_db_manager.session_context.return_value.__enter__ = MagicMock(return_value=mock_session)
+ mock_db_manager.session_context.return_value.__exit__ = MagicMock(return_value=False)
# Mock AuthSession.get_by_token
mock_auth_session = MagicMock()
@@ -601,9 +601,9 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase):
call_args = self.controller.respond.call_args # type: ignore[attr-defined]
self.assertEqual(call_args[0][0], 200)
- @patch("keylime.web.verifier.session_controller.get_session_context")
+ @patch("keylime.web.verifier.session_controller.db_manager")
@patch("keylime.models.verifier.auth_session.AuthSession.get_by_token")
- def test_update_not_found(self, mock_get, _mock_get_session):
+ def test_update_not_found(self, mock_get, _mock_db_manager):
"""Test update endpoint with non-existent session."""
# Mock AuthSession.get_by_token to return None
mock_get.return_value = None
--
2.53.0

View File

@ -1,53 +0,0 @@
From 1496567e4b06f7a8eff9f758ea2e4e00ffa89f9b Mon Sep 17 00:00:00 2001
From: Sergio Correia <scorreia@redhat.com>
Date: Wed, 4 Jun 2025 07:28:54 +0100
Subject: [PATCH 08/10] tests: skip measured-boot related tests for s390x and
ppc64le
Signed-off-by: Sergio Correia <scorreia@redhat.com>
---
test/test_create_mb_policy.py | 2 ++
test/test_mba_parsing.py | 2 ++
2 files changed, 4 insertions(+)
diff --git a/test/test_create_mb_policy.py b/test/test_create_mb_policy.py
index aa7a4b9..cd32bda 100644
--- a/test/test_create_mb_policy.py
+++ b/test/test_create_mb_policy.py
@@ -5,6 +5,7 @@ Copyright 2024 Red Hat, Inc.
import argparse
import os
+import platform
import unittest
from keylime.policy import create_mb_policy
@@ -12,6 +13,7 @@ from keylime.policy import create_mb_policy
DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "data", "create-mb-policy"))
+@unittest.skipIf(platform.machine() in ["ppc64le", "s390x"], "ppc64le and s390x are not supported")
class CreateMeasuredBootPolicy_Test(unittest.TestCase):
def test_event_to_sha256(self):
test_cases = [
diff --git a/test/test_mba_parsing.py b/test/test_mba_parsing.py
index 4ee4e3b..82e6086 100644
--- a/test/test_mba_parsing.py
+++ b/test/test_mba_parsing.py
@@ -1,4 +1,5 @@
import os
+import platform
import tempfile
import unittest
from configparser import RawConfigParser
@@ -11,6 +12,7 @@ from keylime.mba import mba
TEMPLATES_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "templates"))
+@unittest.skipIf(platform.machine() in ["ppc64le", "s390x"], "ppc64le and s390x are not supported")
class TestMBAParsing(unittest.TestCase):
def test_parse_bootlog(self):
"""Test parsing binary measured boot event log"""
--
2.47.3

View File

@ -0,0 +1,205 @@
From e935df8fb9ad36daa41e079d19964678b28be246 Mon Sep 17 00:00:00 2001
From: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
Date: Fri, 6 Mar 2026 11:47:04 +0100
Subject: [PATCH] db: Clean up scoped session after each request
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The scoped_session was never cleaned up between requests, causing its
identity map to accumulate objects indefinitely. When subsequent
requests loaded objects with PKs already present in the identity map,
SQLAlchemy emitted SAWarning about identity map conflicts during flush.
Add DBManager.remove_session() and call it from two places:
1. ActionHandler.process_request() finally block — the primary cleanup
point, runs after all action code completes (including work done
after the response is sent via stop_action=False).
2. ActionHandler.on_finish() — guarded by _entered_process_request
flag, only runs when prepare() returned early (e.g., auth/authz
failure) without entering process_request(). Cannot be called
unconditionally because on_finish() is triggered by finish(), which
may fire mid-action when stop_action=False is used.
This also prevents unbounded memory growth from the identity map over
the verifier's lifetime.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
---
keylime/models/base/db.py | 9 +++
keylime/web/base/action_handler.py | 115 +++++++++++++++++------------
2 files changed, 78 insertions(+), 46 deletions(-)
diff --git a/keylime/models/base/db.py b/keylime/models/base/db.py
index c758fc1..9678098 100644
--- a/keylime/models/base/db.py
+++ b/keylime/models/base/db.py
@@ -138,6 +138,15 @@ class DBManager:
return cast(Session, self._scoped_session())
+ def remove_session(self) -> None:
+ """Remove the current scoped session, releasing its connection back to the pool and clearing the identity map.
+
+ Should be called at the end of each request to prevent stale objects from accumulating in the session across
+ request boundaries.
+ """
+ if self._scoped_session:
+ self._scoped_session.remove()
+
@contextmanager
def session_context(self, session: Session | None = None) -> Iterator[Session]:
if session:
diff --git a/keylime/web/base/action_handler.py b/keylime/web/base/action_handler.py
index 8410b40..d14c9ee 100644
--- a/keylime/web/base/action_handler.py
+++ b/keylime/web/base/action_handler.py
@@ -9,6 +9,7 @@ from tornado.web import RequestHandler
from keylime import keylime_logging
from keylime.authorization.manager import get_authorization_manager
from keylime.authorization.provider import Action, AuthorizationRequest
+from keylime.models.base.db import db_manager
from keylime.models.base.types import Timestamp # type: ignore[attr-defined]
from keylime.models.verifier.auth_session import AuthSession
from keylime.web.base.default_controller import DefaultController
@@ -549,6 +550,7 @@ class ActionHandler(RequestHandler):
self._action_call_stack: list[tuple["Controller", str]] = []
self._received_at: int = time.time_ns()
self._finished: bool = False
+ self._entered_process_request: bool = False
async def prepare(self) -> None:
# Tornado allows the prepare method to be overridden as async in subclasses of RequestHandler
@@ -598,59 +600,80 @@ class ActionHandler(RequestHandler):
return
async def process_request(self) -> None:
+ self._entered_process_request = True # pylint: disable=attribute-defined-outside-init
# If a route matches the request, invoke action determined by the matching route
- if self.matching_route and self.controller:
- try:
- await self._invoke_action()
- except StopAction:
- # If the action is terminated early, continue
- pass
- except ParamDecodeError:
- # If the query, form or JSON parameters are malformed, respond using error-handling action
- await self._invoke_action("malformed_params", ignore_param_errors=True)
- except ActionDispatchError:
- # If the union of path, query, form and JSON parameters and do not match the method signature
- # of the action, respond using error-handling action
- await self._invoke_action("action_dispatch_error", ignore_param_errors=True)
- except RequiredContentMissing:
- # If a decorator from the Controller class has been used to mark a certain content format as required
- # for the action and the request body or Content-Type do not adhere, respond using error-handling action
- await self._invoke_action("format_not_allowed", ignore_param_errors=True)
- except Exception as err:
- # Any other exception which is not caught within the action body should be logged as an unexpected
- # internal error before responding using error-handling action
- self._log_exception(err)
- await self._invoke_action("action_exception", ignore_param_errors=True)
-
- # Handle situation in which no invoked action produces a response
- self._handle_incomplete_action()
+ try:
+ if self.matching_route and self.controller:
+ try:
+ await self._invoke_action()
+ except StopAction:
+ # If the action is terminated early, continue
+ pass
+ except ParamDecodeError:
+ # If the query, form or JSON parameters are malformed, respond using error-handling action
+ await self._invoke_action("malformed_params", ignore_param_errors=True)
+ except ActionDispatchError:
+ # If the union of path, query, form and JSON parameters and do not match the method signature
+ # of the action, respond using error-handling action
+ await self._invoke_action("action_dispatch_error", ignore_param_errors=True)
+ except RequiredContentMissing:
+ # If a decorator from the Controller class has been used to mark a certain content format as
+ # required for the action and the request body or Content-Type do not adhere, respond using
+ # error-handling action
+ await self._invoke_action("format_not_allowed", ignore_param_errors=True)
+ except Exception as err:
+ # Any other exception which is not caught within the action body should be logged as an
+ # unexpected internal error before responding using error-handling action
+ self._log_exception(err)
+ await self._invoke_action("action_exception", ignore_param_errors=True)
+
+ # Handle situation in which no invoked action produces a response
+ self._handle_incomplete_action()
+ finally:
+ # Clean up the scoped session after all action code completes (including any work done after the
+ # response is sent via stop_action=False). This prevents stale objects from accumulating in the
+ # identity map across request boundaries. Must be here rather than on_finish(), because on_finish()
+ # is called by Tornado's finish() when the response is sent, which may be before action code completes.
+ db_manager.remove_session()
def write_error(self, status_code: int, **kwargs: Any) -> None:
- if status_code == 405 and kwargs.get("exc_info"):
- # Handle situation in which the HTTP method given in the request is not supported by the server (Tornado
- # produces a 405 error by default in this case)
-
- # self.prepare() is not triggered in this case, so perform request reporting tasks
- self._process_request_id()
- logger.info("%s %s", self.request.method, self.request.path)
- # Produce a response using the appropriate error-handling action
- self._invoke_action_sync("unsupported_method", ignore_param_errors=True)
-
- elif kwargs.get("exc_info"):
- # For any other exception produced by this class and not caught elsewhere, log the exception and invoke
- # the appropriate error-handling action
- _, err, _ = kwargs["exc_info"]
- self._log_exception(err)
- self._invoke_action_sync("handler_exception", ignore_param_errors=True)
+ try:
+ if status_code == 405 and kwargs.get("exc_info"):
+ # Handle situation in which the HTTP method given in the request is not supported by the server
+ # (Tornado produces a 405 error by default in this case)
+
+ # self.prepare() is not triggered in this case, so perform request reporting tasks
+ self._process_request_id()
+ logger.info("%s %s", self.request.method, self.request.path)
+ # Produce a response using the appropriate error-handling action
+ self._invoke_action_sync("unsupported_method", ignore_param_errors=True)
+
+ elif kwargs.get("exc_info"):
+ # For any other exception produced by this class and not caught elsewhere, log the exception and
+ # invoke the appropriate error-handling action
+ _, err, _ = kwargs["exc_info"]
+ self._log_exception(err)
+ self._invoke_action_sync("handler_exception", ignore_param_errors=True)
- else:
- # Catch-all for all other errors (typically those produced by calling Tornado's send_error method)
- self.default_controller.send_response(status_code)
+ else:
+ # Catch-all for all other errors (typically those produced by calling Tornado's send_error method)
+ self.default_controller.send_response(status_code)
- # Handle situation in which none of the above-invoked error-handling actions produce a response
- self._handle_incomplete_action()
+ # Handle situation in which none of the above-invoked error-handling actions produce a response
+ self._handle_incomplete_action()
+ finally:
+ db_manager.remove_session()
def on_finish(self) -> None:
+ # Clean up the scoped session only if process_request() was never
+ # entered (e.g., prepare() returned early due to auth/authz failure).
+ # When process_request() runs, its finally block handles cleanup —
+ # calling remove_session() here would be premature because on_finish()
+ # is triggered by finish() which may be called mid-action when
+ # stop_action=False is used (the action continues after the response).
+ if not self._entered_process_request:
+ db_manager.remove_session()
+
message = f"Sent {self.get_status()} in {self.elapsed_time}"
if self.get_status() < 400:
--
2.53.0

View File

@ -1,58 +0,0 @@
From be968fd54198042d2014ad63368b78e9d4609169 Mon Sep 17 00:00:00 2001
From: Sergio Correia <scorreia@redhat.com>
Date: Thu, 22 May 2025 11:25:15 -0400
Subject: [PATCH 09/10] tests: fix rpm repo tests from create-runtime-policy
Signed-off-by: Sergio Correia <scorreia@redhat.com>
---
.../create-runtime-policy/setup-rpm-tests | 28 +++++++++++++------
1 file changed, 20 insertions(+), 8 deletions(-)
diff --git a/test/data/create-runtime-policy/setup-rpm-tests b/test/data/create-runtime-policy/setup-rpm-tests
index 708438c..b62729b 100755
--- a/test/data/create-runtime-policy/setup-rpm-tests
+++ b/test/data/create-runtime-policy/setup-rpm-tests
@@ -217,20 +217,32 @@ create_rpm() {
# https://github.com/rpm-software-management/rpm/commit/96467dce18f264b278e17ffe1859c88d9b5aa4b6
_pkgname="DUMMY-${_name}-${_version}-${_rel}.noarch.rpm"
- _expected_pkg="${RPMSDIR}/noarch/${_pkgname}"
- [ -e "${_expected_pkg}" ] && return 0
+ # For some reason, it may not store the built package within the
+ # noarch directory, but directly in RPMS, so let's check both
+ # locations.
+ _expected_pkg="${RPMSDIR}/noarch/${_pkgname} ${RPMSDIR}/${_pkgname}"
+ for _expected in ${_expected_pkg}; do
+ if [ -e "${_expected}" ]; then
+ echo "(create_rpm) CREATED RPM: ${_expected}" >&2
+ return 0
+ fi
+ done
# OK, the package was not built where it should. Let us see if
# it was built in ~/rpmbuild instead, and if that is the case,
# copy it to the expected location.
- _bad_location_pkg="${HOME}/rpmbuild/RPMS/noarch/${_pkgname}"
- if [ -e "${_bad_location_pkg}" ]; then
- echo "WARNING: the package ${_pkgname} was built into ~/rpmbuild despite rpmbuild being instructed to build it at a different location. Probably a fallout from https://github.com/rpm-software-management/rpm/commit/96467dce" >&2
- install -D -m644 "${_bad_location_pkg}" "${_expected_pkg}"
- return 0
- fi
+ _bad_location_pkg="${HOME}/rpmbuild/RPMS/noarch/${_pkgname} ${HOME}/rpmbuild/RPMS/${_pkgname}"
+ for _bad_l in ${_bad_location_pkg}; do
+ if [ -e "${_bad_l}" ]; then
+ echo "WARNING: the package ${_pkgname} was built into ~/rpmbuild despite rpmbuild being instructed to build it at a different location. Probably a fallout from https://github.com/rpm-software-management/rpm/commit/96467dce" >&2
+ install -D -m644 "${_bad_l}" "${RPMSDIR}/noarch/${_pkgname}"
+ echo "(create_rpm) CREATED RPM: ${RPMSDIR}/noarch/${_pkgname}" >&2
+ return 0
+ fi
+ done
# Should not be here.
+ echo "create_rpm() ended with error; probably an issue with the location where the RPMs were built" >&2
return 1
}
--
2.47.3

View File

@ -0,0 +1,108 @@
From 08c0c67c492ef27df53fa9bff899597c46ae6fc8 Mon Sep 17 00:00:00 2001
From: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
Date: Fri, 13 Mar 2026 13:59:23 +0100
Subject: [PATCH] fix: Check active flag in _extract_identity and guard
receive_pop
receive_pop() was unconditionally setting active=True and
token_expires_at even when TPM verification failed. Use `any(errs for
errs in self.errors.values())` to check for non-empty error lists,
matching the pattern already used in session_controller.py.
This didn't affect the security because on failure the state was not
persisted in the database. Now these are only set when no errors
occurred.
_extract_identity() was not checking the session active flag, which
could allow authentication with an inactive session if the state was
persisted. Add the active check as defense-in-depth.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
---
keylime/models/verifier/auth_session.py | 9 ++++---
keylime/web/base/action_handler.py | 36 +++++++++++++++++++------
2 files changed, 33 insertions(+), 12 deletions(-)
diff --git a/keylime/models/verifier/auth_session.py b/keylime/models/verifier/auth_session.py
index b0b40b0..fc5c8df 100644
--- a/keylime/models/verifier/auth_session.py
+++ b/keylime/models/verifier/auth_session.py
@@ -630,10 +630,11 @@ class AuthSession(PersistableModel):
logger.error("Unexpected error during TPM verification: %s: %s", type(e).__name__, e)
self._add_error("verification", f"TPM verification failed: {str(e)}")
- # Set token expiration (only on successful validation)
- session_lifetime = config.getint("verifier", "session_lifetime", fallback=config.DEFAULT_SESSION_LIFETIME)
- self.token_expires_at = Timestamp.now() + timedelta(seconds=session_lifetime)
- self.active = True
+ # Set token expiration and activate only on successful validation
+ if not any(errs for errs in self.errors.values()):
+ session_lifetime = config.getint("verifier", "session_lifetime", fallback=config.DEFAULT_SESSION_LIFETIME)
+ self.token_expires_at = Timestamp.now() + timedelta(seconds=session_lifetime)
+ self.active = True
def _set_nonce(self):
if "nonce" not in self.values:
diff --git a/keylime/web/base/action_handler.py b/keylime/web/base/action_handler.py
index d14c9ee..68dd30d 100644
--- a/keylime/web/base/action_handler.py
+++ b/keylime/web/base/action_handler.py
@@ -265,12 +265,18 @@ class ActionHandler(RequestHandler):
# Look up by token hash (tokens are never stored in plaintext)
auth_session = AuthSession.get_by_token(token)
if auth_session and auth_session.agent_id: # type: ignore[attr-defined]
- # Check if token is still valid
- now = Timestamp.now()
- if auth_session.token_expires_at >= now: # type: ignore[attr-defined]
- logger.debug("Extracted agent identity from bearer token: %s", auth_session.agent_id) # type: ignore[attr-defined]
- return (auth_session.agent_id, "agent") # type: ignore[attr-defined]
- logger.debug("Bearer token expired for agent: %s", auth_session.agent_id) # type: ignore[attr-defined]
+ # Check if session is active and token is still valid
+ if not getattr(auth_session, "active", False):
+ logger.debug("Session not active for agent: %s", auth_session.agent_id) # type: ignore[attr-defined]
+ else:
+ token_expires_at = getattr(auth_session, "token_expires_at", None)
+ if token_expires_at is None:
+ logger.debug("Session has no expiry for agent: %s", auth_session.agent_id) # type: ignore[attr-defined]
+ elif token_expires_at >= Timestamp.now():
+ logger.debug("Extracted agent identity from bearer token: %s", auth_session.agent_id) # type: ignore[attr-defined]
+ return (auth_session.agent_id, "agent") # type: ignore[attr-defined]
+ else:
+ logger.debug("Bearer token expired for agent: %s", auth_session.agent_id) # type: ignore[attr-defined]
else:
logger.debug("Invalid bearer token provided")
else:
@@ -520,13 +526,27 @@ class ActionHandler(RequestHandler):
self.finish()
return False
+ # Check if session is active
+ if not getattr(auth_session, "active", False):
+ logger.info(
+ "Authentication session not active for agent '%s'",
+ auth_session.agent_id, # type: ignore[attr-defined]
+ )
+ self.set_status(401)
+ self.write(
+ {"errors": [{"status": "401", "title": "Unauthorized", "detail": "Authentication session not active"}]}
+ )
+ self.finish()
+ return False
+
# Check if token has expired
+ token_expires_at = getattr(auth_session, "token_expires_at", None)
now = Timestamp.now()
- if auth_session.token_expires_at < now: # type: ignore[attr-defined]
+ if token_expires_at is None or token_expires_at < now:
logger.info(
"Authentication token expired for agent '%s' (expired at %s)",
auth_session.agent_id, # type: ignore[attr-defined]
- auth_session.token_expires_at, # type: ignore[attr-defined]
+ token_expires_at,
)
self.set_status(401)
self.write(
--
2.53.0

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,92 @@
From d74e7499746917fa7b9fbba02972eed82bc7ece9 Mon Sep 17 00:00:00 2001
From: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
Date: Fri, 13 Mar 2026 16:04:45 +0100
Subject: [PATCH] fix: Add fork-safety to DBManager via dispose()
After forking worker processes, child processes inherited the parent's
db_manager engine and connection pool. Sharing SQLAlchemy connection
pools across fork boundaries is unsafe and can lead to corruption.
Add DBManager.dispose() to clear engine, scoped session, and registry
state. Call it in verifier_server.py after fork (alongside the existing
reset_verifier_config()), then immediately re-create the engine with
make_engine() so the child has its own fresh connection pool.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
---
keylime/models/base/db.py | 22 +++++++++++++++++++---
keylime/web/base/server.py | 10 ++++++++++
2 files changed, 29 insertions(+), 3 deletions(-)
diff --git a/keylime/models/base/db.py b/keylime/models/base/db.py
index 9678098..bb218cf 100644
--- a/keylime/models/base/db.py
+++ b/keylime/models/base/db.py
@@ -101,9 +101,6 @@ class DBManager:
@property
def service(self) -> Optional[str]:
- if not self._service:
- raise BackendMissing("cannot access the service for a DBManager before a call to db_manager.make_engine()")
-
return self._service
@property
@@ -138,6 +135,25 @@ class DBManager:
return cast(Session, self._scoped_session())
+ def dispose(self) -> None:
+ """Dispose the engine and clear all state.
+
+ Must be called after fork to avoid sharing the parent's connection pool
+ across child processes. The next call to make_engine() will create fresh
+ connections for the child process.
+ """
+ if self._scoped_session:
+ self._scoped_session.remove()
+ if self._engine:
+ # Use close=False so the child discards the inherited pool
+ # without closing the parent's underlying connections. Per
+ # SQLAlchemy docs, this is the recommended approach after fork.
+ self._engine.dispose(close=False) # type: ignore[call-arg]
+ self._engine = None
+ self._scoped_session = None
+ self._registry = None
+ self._service = None
+
def remove_session(self) -> None:
"""Remove the current scoped session, releasing its connection back to the pool and clearing the identity map.
diff --git a/keylime/web/base/server.py b/keylime/web/base/server.py
index e053bbb..913a498 100644
--- a/keylime/web/base/server.py
+++ b/keylime/web/base/server.py
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Any, Callable, Optional
import tornado
from keylime import api_version, config, keylime_logging, web_util
+from keylime.models.base.db import db_manager
from keylime.web.base.action_handler import ActionHandler
from keylime.web.base.route import Route
@@ -299,6 +300,15 @@ class Server(ABC):
tornado.process.fork_processes(self.worker_count)
# num.value = num.value + 1
# print(num.value)
+
+ # Dispose inherited db_manager engine after fork to avoid sharing the
+ # parent's connection pool, then re-create with a fresh pool for this
+ # child process.
+ service = db_manager.service
+ db_manager.dispose()
+ if service:
+ db_manager.make_engine(service)
+
asyncio.run(self.start_single())
def _setup(self) -> None:
--
2.53.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,274 @@
From fb06907b383512a6942dc489a62eee0da92fbac6 Mon Sep 17 00:00:00 2001
From: Sergio Correia <scorreia@redhat.com>
Date: Wed, 18 Mar 2026 05:34:30 +0000
Subject: [PATCH 12/12] fix(mem leak) - remove unbounded functools.cache from
latest_attestation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@cache (= lru_cache(maxsize=None)) on the latest_attestation property
creates a class-level cache keyed by self. Since VerifierAgent.get()
creates a new instance per call, and the push-mode controller calls it
2-3x per attestation cycle, the cache permanently holds strong references
to every instance (and its eagerly-loaded IMAPolicy data), preventing
garbage collection.
The property is accessed a few times per attestation cycle — a simple DB
query each time is negligible compared to the cost of permanently
retaining every VerifierAgent instance in memory.
Additionally, cache `agent.latest_attestation` in a local variable in
each controller method to avoid redundant DB queries per request, and
add an inline warning comment to prevent re-introduction of the cache.
Assisted-by: Claude Sonnet 4.6
Signed-off-by: Sergio Correia <scorreia@redhat.com>
---
keylime/models/verifier/verifier_agent.py | 5 +-
.../web/verifier/attestation_controller.py | 58 +++++++++++--------
test/test_attestation_controller.py | 8 +--
test/test_attestation_model.py | 26 +++++++++
4 files changed, 65 insertions(+), 32 deletions(-)
diff --git a/keylime/models/verifier/verifier_agent.py b/keylime/models/verifier/verifier_agent.py
index 0373e87..515df07 100644
--- a/keylime/models/verifier/verifier_agent.py
+++ b/keylime/models/verifier/verifier_agent.py
@@ -1,7 +1,5 @@
# pyright: reportAttributeAccessIssue=false
# ORM model with dynamically-created attributes from metaclasses
-from functools import cache
-
from keylime.models.base import *
@@ -97,8 +95,9 @@ class VerifierAgent(PersistableModel):
# TODO: remove above, based on feedback
@property
- @cache # pylint: disable=method-cache-max-size-none # Intentional unbounded cache for ORM property
def latest_attestation(self):
+ # NOTE: Do not cache this property. Caching causes a memory leak because
+ # the cache holds strong references to every VerifierAgent instance.
# Lazy import to avoid circular dependency
import keylime.models.verifier as verifier_models # pylint: disable=import-outside-toplevel
diff --git a/keylime/web/verifier/attestation_controller.py b/keylime/web/verifier/attestation_controller.py
index d660c0f..5951e4c 100755
--- a/keylime/web/verifier/attestation_controller.py
+++ b/keylime/web/verifier/attestation_controller.py
@@ -205,10 +205,12 @@ class AttestationController(Controller):
if not agent:
APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self)
- if not agent.latest_attestation: # type: ignore[union-attr]
+ latest = agent.latest_attestation # type: ignore[union-attr]
+
+ if not latest:
APIError("not_found", f"No attestation exists for agent '{agent_id}'.").send_via(self)
- self.show(agent_id, agent.latest_attestation.index, **_params) # type: ignore[union-attr, no-untyped-call]
+ self.show(agent_id, latest.index, **_params) # type: ignore[union-attr, no-untyped-call]
# POST /v3[.:minor]/agents/:agent_id/attestations
@Controller.require_json_api
@@ -231,13 +233,15 @@ class AttestationController(Controller):
f"attestation not passing verification."
).send_via(self)
+ latest = agent.latest_attestation # type: ignore[union-attr]
+
# Per enhancement #103, section "Error Conditions for Attestation Protocol":
# If last attestation failed AND policy hasn't changed, return 503 with exponential backoff
# Skip this for PUSH mode agents to allow immediate recovery from timeout-induced failures
if (
- agent.latest_attestation # type: ignore[union-attr]
- and agent.latest_attestation.evaluation == "fail" # type: ignore[union-attr]
- and agent.latest_attestation.stage == "verification_complete" # type: ignore[union-attr]
+ latest
+ and latest.evaluation == "fail"
+ and latest.stage == "verification_complete"
and not agent_util.is_push_mode_agent(agent) # type: ignore[arg-type]
):
# Calculate retry-after using exponential backoff (same formula as rest of codebase)
@@ -257,19 +261,19 @@ class AttestationController(Controller):
f"If the failure was due to policy violation, update the policy or fix the agent before retrying."
).send_via(self)
- if agent.latest_attestation and agent.latest_attestation.verification_in_progress: # type: ignore[union-attr]
- self.set_header("Retry-After", str(agent.latest_attestation.seconds_to_decision)) # type: ignore[no-untyped-call, union-attr]
+ if latest and latest.verification_in_progress:
+ self.set_header("Retry-After", str(latest.seconds_to_decision)) # type: ignore[no-untyped-call]
APIError("verification_in_progress", 503).set_detail(
f"Cannot create attestation for agent '{agent_id}' while the last attestation is still being "
f"verified. The active verification task is expected to complete or time out within "
- f"{agent.latest_attestation.seconds_to_decision} seconds." # type: ignore[union-attr]
+ f"{latest.seconds_to_decision} seconds."
).send_via(self)
- if agent.latest_attestation and not agent.latest_attestation.ready_for_next_attestation: # type: ignore[union-attr]
- self.set_header("Retry-After", str(agent.latest_attestation.seconds_to_next_attestation)) # type: ignore[no-untyped-call, union-attr]
+ if latest and not latest.ready_for_next_attestation:
+ self.set_header("Retry-After", str(latest.seconds_to_next_attestation)) # type: ignore[no-untyped-call]
APIError("premature_attestation", 429).set_detail(
f"Cannot create attestation for agent '{agent_id}' before the configured interval has elapsed. "
- f"Wait {agent.latest_attestation.seconds_to_next_attestation} seconds before trying again." # type: ignore[union-attr]
+ f"Wait {latest.seconds_to_next_attestation} seconds before trying again."
).send_via(self)
attestation_record = Attestation.create(agent) # type: ignore[no-untyped-call]
@@ -314,25 +318,27 @@ class AttestationController(Controller):
if not agent:
APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self)
+ latest = agent.latest_attestation # type: ignore[union-attr]
+
# If there are no attestations for the agent, the attestation at 'index' does not exist
- if not agent.latest_attestation: # type: ignore[union-attr]
+ if not latest:
APIError("not_found", f"No attestation {index} exists for agent '{agent_id}'.").send_via(self)
# Only allow the attestation at 'index' to be updated if it is the latest attestation
- if str(agent.latest_attestation.index) != index: # type: ignore[union-attr]
+ if str(latest.index) != str(index): # type: ignore[union-attr]
APIError("old_attestation", 403).set_detail(
f"Attestation {index} is not the latest for agent '{agent_id}'. Only evidence for the most recent "
f"attestation may be updated."
).send_via(self)
- if agent.latest_attestation.stage != "awaiting_evidence": # type: ignore[union-attr]
+ if latest.stage != "awaiting_evidence": # type: ignore[union-attr]
APIError("evidence_immutable", 403).set_detail(
f"Cannot alter evidence for attestation {index} which has already been received and accepted."
).send_via(self)
- if not agent.latest_attestation.challenges_valid: # type: ignore[union-attr]
+ if not latest.challenges_valid: # type: ignore[union-attr]
APIError("challenges_expired", 403).set_detail(
- f"Challenges for attestation {index} expired at {agent.latest_attestation.challenges_expire_at}. " # type: ignore[union-attr]
+ f"Challenges for attestation {index} expired at {latest.challenges_expire_at}. " # type: ignore[union-attr]
f"Create a new attestation and try again."
).send_via(self)
@@ -341,21 +347,21 @@ class AttestationController(Controller):
"Request body must include attestation evidence data."
).send_via(self)
- agent.latest_attestation.receive_evidence(attestation) # type: ignore[no-untyped-call, union-attr]
- driver = EngineDriver(agent.latest_attestation).process_evidence() # type: ignore[no-untyped-call, union-attr]
+ latest.receive_evidence(attestation) # type: ignore[no-untyped-call, union-attr]
+ driver = EngineDriver(latest).process_evidence() # type: ignore[no-untyped-call, union-attr]
# Send error if the received evidence appears invalid
- if not agent.latest_attestation.changes_valid: # type: ignore[union-attr]
- APIMessageBody.from_record_errors(agent.latest_attestation).send_via(self) # type: ignore[no-untyped-call, union-attr]
+ if not latest.changes_valid: # type: ignore[union-attr]
+ APIMessageBody.from_record_errors(latest).send_via(self) # type: ignore[no-untyped-call, union-attr]
- agent.latest_attestation.commit_changes() # type: ignore[no-untyped-call, union-attr]
+ latest.commit_changes() # type: ignore[no-untyped-call, union-attr]
# Send acknowledgement of received evidence, but continue executing
APIMessageBody(
- APIResource("attestation", agent.latest_attestation.render_evidence_acknowledged()).include( # type: ignore[no-untyped-call, union-attr]
+ APIResource("attestation", latest.render_evidence_acknowledged()).include( # type: ignore[no-untyped-call, union-attr]
APILink("self", f"/{self.version}/agents/{agent_id}/attestations/{index}")
),
- APIMeta("seconds_to_next_attestation", agent.latest_attestation.seconds_to_next_attestation), # type: ignore[union-attr]
+ APIMeta("seconds_to_next_attestation", latest.seconds_to_next_attestation), # type: ignore[union-attr]
).send_via(
self, code=202, stop_action=False
) # type: ignore[no-untyped-call]
@@ -372,8 +378,10 @@ class AttestationController(Controller):
if not agent:
APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self)
- if not agent.latest_attestation: # type: ignore[union-attr]
+ latest = agent.latest_attestation # type: ignore[union-attr]
+
+ if not latest:
APIError("not_found", f"No attestation exists for agent '{agent_id}'.").send_via(self)
# Call update with the same params, which includes attestation
- self.update(agent_id, agent.latest_attestation.index, **params) # type: ignore[union-attr]
+ self.update(agent_id, latest.index, **params) # type: ignore[union-attr]
diff --git a/test/test_attestation_controller.py b/test/test_attestation_controller.py
index e644e10..37e059c 100644
--- a/test/test_attestation_controller.py
+++ b/test/test_attestation_controller.py
@@ -43,7 +43,7 @@ class TestAttestationControllerParameterHandling(unittest.TestCase):
self.controller._api_request_body = Mock() # pylint: disable=protected-access
self.agent_id = "test-agent-123"
- self.attestation_index = "1" # String, as it comes from URL route
+ self.attestation_index = 1 # Integer, as it comes from the ORM Integer column
# Mock attestation evidence data
self.attestation_data = {
@@ -270,7 +270,7 @@ class TestAttestationControllerErrorMessages(unittest.TestCase):
self.controller._api_request_body = Mock() # pylint: disable=protected-access
self.agent_id = "test-agent-123"
- self.attestation_index = "1" # String, as it comes from URL route
+ self.attestation_index = 1 # Integer, as it comes from the ORM Integer column
@patch("keylime.web.verifier.attestation_controller.APIError")
@patch("keylime.web.verifier.attestation_controller.VerifierAgent")
@@ -772,7 +772,7 @@ class TestAttestationControllerGetMethods(unittest.TestCase):
# Setup mock agent with latest attestation
mock_agent = Mock(spec=VerifierAgent)
mock_attestation = Mock()
- mock_attestation.index = "5"
+ mock_attestation.index = 5
mock_attestation.render_state = Mock(return_value={})
mock_agent.latest_attestation = mock_attestation
mock_agent_class.get.return_value = mock_agent
@@ -792,7 +792,7 @@ class TestAttestationControllerGetMethods(unittest.TestCase):
self.controller.show_latest(self.agent_id)
# Verify it called show() with the latest attestation index
- mock_attestation_class.get.assert_called_once_with(agent_id=self.agent_id, index="5")
+ mock_attestation_class.get.assert_called_once_with(agent_id=self.agent_id, index=5)
if __name__ == "__main__":
diff --git a/test/test_attestation_model.py b/test/test_attestation_model.py
index 9bc1abb..2651bc2 100644
--- a/test/test_attestation_model.py
+++ b/test/test_attestation_model.py
@@ -964,5 +964,31 @@ class TestAttestationModel(unittest.TestCase):
self.assertFalse(attestation.ready_for_next_attestation)
+class TestVerifierAgentLatestAttestation(unittest.TestCase):
+ """Test that VerifierAgent.latest_attestation is not cached (memory leak fix)"""
+
+ def test_latest_attestation_not_cached(self):
+ """Verify the property has no functools.cache wrapper"""
+ prop_fget = VerifierAgent.latest_attestation.fget
+ # @cache adds cache_info and __wrapped__ attributes
+ self.assertFalse(hasattr(prop_fget, "cache_info"))
+ self.assertFalse(hasattr(prop_fget, "__wrapped__"))
+
+ def test_latest_attestation_calls_db_each_time(self):
+ """Verify each access queries the DB (no stale cache)"""
+ with patch("keylime.models.verifier.Attestation.get_latest") as mock_get:
+ mock_get.return_value = None
+
+ # Call the underlying function directly to avoid needing db_manager setup
+ prop_fget = VerifierAgent.latest_attestation.fget
+ assert prop_fget is not None
+ fake_agent = MagicMock()
+ fake_agent.agent_id = "test-agent"
+
+ prop_fget(fake_agent)
+ prop_fget(fake_agent)
+ self.assertEqual(mock_get.call_count, 2)
+
+
if __name__ == "__main__":
unittest.main()
--
2.52.0

View File

@ -1,37 +0,0 @@
From e9a6615ea3ab60b9248377071ea2f5cc7b45dfda Mon Sep 17 00:00:00 2001
From: Sergio Correia <scorreia@redhat.com>
Date: Thu, 28 Aug 2025 14:33:59 +0100
Subject: [PATCH] policy/sign: use print() when writing to /dev/stdout
Signed-off-by: Sergio Correia <scorreia@redhat.com>
---
keylime/policy/sign_runtime_policy.py | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/keylime/policy/sign_runtime_policy.py b/keylime/policy/sign_runtime_policy.py
index 87529065d..316ee15aa 100644
--- a/keylime/policy/sign_runtime_policy.py
+++ b/keylime/policy/sign_runtime_policy.py
@@ -2,6 +2,7 @@
import argparse
import json
+import sys
from json.decoder import JSONDecodeError
from typing import TYPE_CHECKING, Any, Optional
@@ -191,8 +192,12 @@ def sign_runtime_policy(args: argparse.Namespace) -> Optional[str]:
return None
try:
- with open(args.output_file, "wb") as f:
- f.write(signed_policy.encode("UTF-8"))
+ if args.output_file == "/dev/stdout":
+ # Let's simply print to stdout the regular way.
+ print(signed_policy, file=sys.stdout)
+ else:
+ with open(args.output_file, "wb") as f:
+ f.write(signed_policy.encode("UTF-8"))
except Exception as exc:
logger.error("Unable to write signed policy to destination file '%s': %s", args.output_file, exc)
return None

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,449 @@
From 739a8a97357f1b52c3944706479740fb7b71fb33 Mon Sep 17 00:00:00 2001
From: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
Date: Thu, 26 Mar 2026 11:28:52 +0100
Subject: [PATCH 13/13] fix: verifier race condition on agent delete
Fix a race condition in the pull-mode DELETE handler where an agent
could be deleted from the database while an in-flight attestation cycle
was still running, causing 'tenant -c update' to intermittently fail
with "Agent was not deleted from Verifier after 5 tries".
The race had two interacting causes:
1. TERMINATED was in the immediate-deletion states list. When a second
DELETE arrived and found the agent in TERMINATED state, it deleted
immediately (200), even though an invoke_get_quote() coroutine from
the first DELETE's cycle was still in-flight. The orphaned coroutine
then crashed in store_attestation_state() with AssertionError.
2. The DELETE handler did not cancel the scheduled IOLoop poll timer, so
new attestation cycles could start even after deletion was requested.
Changes:
- Add _pending_events registry and _register_pending_event /
_cancel_pending_event helpers to track IOLoop timer handles (from
upstream commit 59ac386).
- Replace assert statements in store_attestation_state() and the DELETE
handler with proper error handling (graceful log+return and 404).
- Remove TERMINATED from the immediate-deletion states list so that a
second DELETE returns 202 instead of deleting while in-flight work
exists.
- Cancel the pending poll timer via _pending_events on DELETE to prevent
new attestation cycles from starting.
- Fix the tenant's do_cvdelete() to handle 200/202/404 response codes
properly and fix a typo (reponse_json -> response_json).
- Cancel the pending poll timer in the PUT "stop" handler to prevent
new attestation cycles after agent stop.
- Suppress a mypy false positive in keylime/json.py.
- Add unit tests for pending-event management and store_attestation_state
graceful handling when agent is deleted.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
---
keylime/cloud_verifier_tornado.py | 180 ++++++++++++++++++++++++++++++------
keylime/json.py | 2 +-
keylime/tenant.py | 26 +++---
test/test_cloud_verifier_tornado.py | 114 +++++++++++++++++++++++
4 files changed, 282 insertions(+), 40 deletions(-)
diff --git a/keylime/cloud_verifier_tornado.py b/keylime/cloud_verifier_tornado.py
index 17bec75..75f117b 100644
--- a/keylime/cloud_verifier_tornado.py
+++ b/keylime/cloud_verifier_tornado.py
@@ -171,6 +171,35 @@ exclude_db: Dict[str, Any] = {
"ssl_context": None,
}
+# Registry of agent_id -> IOLoop timeout handle for all scheduled pending
+# events (quote polls, retries). Used to cancel them all on shutdown.
+_pending_events: Dict[str, object] = {}
+
+
+def _register_pending_event(agent: Dict[str, Any], handle: object) -> None:
+ """Track a pending IOLoop timeout in both the agent dict and the global registry.
+
+ The agent dict field ``pending_event`` is the per-agent reference used during
+ normal operation (e.g. cancelling on state change). The module-level
+ ``_pending_events`` dict mirrors it so that *all* handles can be
+ bulk-cancelled on shutdown without iterating over every agent.
+ """
+ agent["pending_event"] = handle
+ _pending_events[agent["agent_id"]] = handle
+
+
+def _cancel_pending_event(agent: Dict[str, Any]) -> None:
+ """Cancel and unregister the pending IOLoop timeout for *agent*, if any."""
+ handle = agent.get("pending_event")
+ if handle is None:
+ return
+ agent["pending_event"] = None
+ _pending_events.pop(agent["agent_id"], None)
+ try:
+ tornado.ioloop.IOLoop.current().remove_timeout(handle)
+ except Exception as e:
+ logger.debug("Could not remove pending event for agent %s: %s", agent["agent_id"], e)
+
def _from_db_obj(agent_db_obj: VerfierMain) -> Dict[str, Any]:
fields = [
@@ -286,7 +315,12 @@ def store_attestation_state(agentAttestState: AgentAttestState) -> None:
try:
with session_context() as session:
update_agent = session.get(VerfierMain, agentAttestState.get_agent_id()) # type: ignore[attr-defined]
- assert update_agent
+ if update_agent is None:
+ logger.warning(
+ "Agent %s no longer in database, skipping attestation state storage",
+ agent_id,
+ )
+ return
update_agent.boottime = agentAttestState.get_boottime() # pyright: ignore
update_agent.next_ima_ml_entry = agentAttestState.get_next_ima_ml_entry() # pyright: ignore
ima_pcrs_dict = agentAttestState.get_ima_pcrs()
@@ -605,34 +639,115 @@ class AgentsHandler(BaseHandler):
except SQLAlchemyError as e:
logger.error("SQLAlchemy Error deleting agent in push mode: %s", e)
web_util.echo_json_response(self.req_handler, 500, "Internal Server Error")
- else:
- # Pull mode: Use operational_state to determine deletion behavior
- op_state = agent.operational_state
- if op_state in (
- states.SAVED,
- states.FAILED,
- states.TERMINATED,
- states.TENANT_FAILED,
- states.INVALID_QUOTE,
- ):
- try:
- verifier_db_delete_agent(session, agent_id)
- web_util.echo_json_response(self.req_handler, 200, "Success")
- logger.info("DELETE (pull mode) returning 200 response for agent id: %s", agent_id)
- except SQLAlchemyError as e:
- logger.error("SQLAlchemy Error deleting agent in pull mode: %s", e)
- web_util.echo_json_response(self.req_handler, 500, "Internal Server Error")
+ return
+
+ # Pull mode: Use operational_state to determine deletion behavior.
+ #
+ # Terminal states with no in-flight work can be deleted
+ # immediately (200). Note that TERMINATED is intentionally
+ # excluded: it means a previous DELETE was accepted but the
+ # attestation cycle has not yet finished. Deleting immediately
+ # while in-flight work exists causes store_attestation_state()
+ # to fail when it tries to persist results for the now-gone
+ # agent.
+ op_state = agent.operational_state
+ if op_state in (
+ states.SAVED,
+ states.FAILED,
+ states.TENANT_FAILED,
+ states.INVALID_QUOTE,
+ ):
+ # Agent is in a terminal state with no in-flight work — delete immediately.
+ # Cancel any local pending poll timer first (same-worker
+ # defensive cleanup). This matters when a cross-worker
+ # PUT /stop sets TENANT_FAILED in the DB but cannot cancel
+ # the timer in this worker's _pending_events.
+ pending_handle = _pending_events.pop(agent_id, None)
+ if pending_handle is not None:
+ tornado.ioloop.IOLoop.current().remove_timeout(pending_handle)
+ try:
+ verifier_db_delete_agent(session, agent_id)
+ web_util.echo_json_response(self.req_handler, 200, "Success")
+ logger.info("DELETE (pull mode) returning 200 response for agent id: %s", agent_id)
+ except SQLAlchemyError as e:
+ logger.error("SQLAlchemy Error deleting agent in pull mode: %s", e)
+ web_util.echo_json_response(self.req_handler, 500, "Internal Server Error")
+ return
+
+ # Agent is in an active state or already TERMINATED from a
+ # previous DELETE.
+ #
+ # Multi-worker note: _pending_events is process-local. Each
+ # agent's attestation cycle runs in the worker process it was
+ # assigned to at startup (round-robin), but this DELETE
+ # request may arrive at any worker.
+ #
+ # - Same worker: pending_handle is accurate — if found, the
+ # agent was idle (timer pending) and we can delete
+ # immediately since no coroutine is in-flight.
+ # - Different worker: pending_handle is always None, so we
+ # fall through to the 202/TERMINATED path. The managing
+ # worker's timer fires normally, process_agent() detects
+ # TERMINATED, and completes the deletion.
+ #
+ # Important: when the agent is already TERMINATED, do NOT
+ # cancel the pending poll timer — it is the only mechanism
+ # that will trigger process_agent() to detect TERMINATED and
+ # complete the deletion.
+ if op_state == states.TERMINATED: # pyright: ignore
+ # Agent is already TERMINATED from a previous DELETE.
+ # Leave the pending poll timer alone so process_agent()
+ # can detect TERMINATED and complete the deletion.
+ web_util.echo_json_response(self.req_handler, 202, "Accepted")
+ logger.info(
+ "DELETE (pull mode) returning 202 response for agent id: %s "
+ "(already TERMINATED, waiting for deletion to complete)",
+ agent_id,
+ )
+ return
+
+ # First DELETE for this agent. Try to cancel the pending
+ # poll timer (same-worker optimization).
+ #
+ # Pop the handle first but do NOT cancel the timer yet —
+ # if the DB operation fails we restore the handle so the
+ # attestation cycle can continue.
+ pending_handle = _pending_events.pop(agent_id, None)
+ try:
+ if pending_handle is not None:
+ # Same-worker optimization: the agent was idle
+ # (waiting for the next poll timer) — no in-flight
+ # coroutine will come along to detect TERMINATED and
+ # complete the deletion, so delete immediately.
+ verifier_db_delete_agent(session, agent_id)
+ # DB succeeded — now safe to cancel the timer.
+ tornado.ioloop.IOLoop.current().remove_timeout(pending_handle)
+ web_util.echo_json_response(self.req_handler, 200, "Success")
+ logger.info("DELETE (pull mode) returning 200 response for agent id: %s", agent_id)
else:
- try:
- update_agent = session.get(VerfierMain, agent_id) # type: ignore[attr-defined]
- assert update_agent
- update_agent.operational_state = states.TERMINATED # pyright: ignore
- session.add(update_agent)
- # session.commit() is automatically called by context manager
- web_util.echo_json_response(self.req_handler, 202, "Accepted")
- logger.info("DELETE (pull mode) returning 202 response for agent id: %s", agent_id)
- except SQLAlchemyError as e:
- logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e)
+ # Either an invoke_get_quote() / invoke_provide_v()
+ # coroutine is in-flight (no pending_handle — the
+ # timer already fired), or this DELETE arrived at a
+ # different worker process. Mark as TERMINATED and
+ # let process_agent() perform the actual deletion
+ # when the in-flight work finishes (or the timer
+ # fires in the managing worker).
+ update_agent = session.get(VerfierMain, agent_id) # type: ignore[attr-defined]
+ if update_agent is None:
+ web_util.echo_json_response(self.req_handler, 404, "agent id not found")
+ return
+ update_agent.operational_state = states.TERMINATED # pyright: ignore
+ session.add(update_agent)
+ web_util.echo_json_response(self.req_handler, 202, "Accepted")
+ logger.info("DELETE (pull mode) returning 202 response for agent id: %s", agent_id)
+ except SQLAlchemyError as e:
+ logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e)
+ if pending_handle is not None:
+ # Restore the timer so the attestation cycle can
+ # continue — the DB operation failed so the agent
+ # is still there.
+ _pending_events[agent_id] = pending_handle
+ web_util.echo_json_response(self.req_handler, 500, "Internal server error")
def post(self) -> None:
"""This method handles the POST requests to add agents to the Cloud Verifier.
@@ -1045,6 +1160,15 @@ class AgentsHandler(BaseHandler):
# session.commit() is automatically called by context manager
except SQLAlchemyError as e:
logger.error("SQLAlchemy Error: %s", e)
+ web_util.echo_json_response(self.req_handler, 500, "Internal server error")
+ return
+
+ # DB succeeded — now safe to cancel the pending poll
+ # timer to prevent new attestation cycles.
+ if agent_id is not None:
+ pending_handle = _pending_events.pop(agent_id, None)
+ if pending_handle is not None:
+ tornado.ioloop.IOLoop.current().remove_timeout(pending_handle)
web_util.echo_json_response(self.req_handler, 200, "Success")
logger.info("PUT returning 200 response for agent id: %s", agent_id)
diff --git a/keylime/json.py b/keylime/json.py
index 82292a1..4c4b897 100644
--- a/keylime/json.py
+++ b/keylime/json.py
@@ -24,7 +24,7 @@ def bytes_to_str(data: Any) -> Any:
for _k, _v in data.items():
data[_k] = bytes_to_str(_v)
elif isinstance(data, tuple(_list_types)):
- _l = list(data)
+ _l = list(data) # type: ignore[call-overload]
for _k, _v in enumerate(_l):
_l[_k] = bytes_to_str(_v)
data = _l
diff --git a/keylime/tenant.py b/keylime/tenant.py
index 0cdeada..4cb3698 100644
--- a/keylime/tenant.py
+++ b/keylime/tenant.py
@@ -1074,22 +1074,17 @@ class Tenant:
# keylime_logging.log_http_response(logger, logging.ERROR, response_json)
raise UserError(f"{self.verifier_fid_str} timed out while deleting {self.agent_fid_str}.")
- if response_json["code"] == 202:
+ if response_json["code"] == 200:
+ logger.info("Agent %s deleted from the CV", self.agent_uuid)
+ elif response_json["code"] == 202:
numtries = 0
deleted = False
while not deleted:
- reponse_json = self.do_cvstatus(not_found_fail=False)
- if reponse_json["code"] != 404:
+ response_json = self.do_cvstatus(not_found_fail=False)
+ if response_json["code"] != 404:
numtries += 1
if numtries >= self.maxr:
- # EVALUATE DELETION
- # logger.error(
- # "%s was not deleted from %s after %d tries",
- # self.agent_fid_str,
- # self.verifier_fid_str,
- # numtries,
- # )
raise UserError(
f"{self.agent_fid_str} was not deleted from {self.verifier_fid_str} after {numtries} tries"
)
@@ -1114,8 +1109,17 @@ class Tenant:
self.verifier_fid_str,
numtries,
)
- # Marked for deletion (need to modify the code on CI tests)
logger.info("Agent %s deleted from the CV", self.agent_uuid)
+ elif response_json["code"] == 404:
+ # The agent was already deleted (e.g. by another caller
+ # between the do_cvstatus check and the DELETE request).
+ # The desired end state is reached — treat as success.
+ logger.info("Agent %s is already absent from the CV", self.agent_uuid)
+ else:
+ raise UserError(
+ f"Unexpected response code {response_json['code']} from "
+ f"{self.verifier_fid_str} while deleting {self.agent_fid_str}"
+ )
def do_regstatus(self) -> Dict[str, Any]:
if not self.registrar_ip or not self.registrar_port:
diff --git a/test/test_cloud_verifier_tornado.py b/test/test_cloud_verifier_tornado.py
new file mode 100644
index 0000000..7515b2e
--- /dev/null
+++ b/test/test_cloud_verifier_tornado.py
@@ -0,0 +1,114 @@
+"""Unit tests for cloud_verifier_tornado deletion and pending-event management.
+
+Tests cover:
+1. _register_pending_event / _cancel_pending_event helpers
+2. store_attestation_state graceful handling when agent is deleted
+"""
+
+# pylint: disable=protected-access
+
+import unittest
+from unittest.mock import MagicMock, patch
+
+from keylime import cloud_verifier_tornado
+
+
+class TestPendingEventRegistry(unittest.TestCase):
+ """Test the _pending_events registry helpers."""
+
+ def setUp(self):
+ cloud_verifier_tornado._pending_events.clear()
+
+ def tearDown(self):
+ cloud_verifier_tornado._pending_events.clear()
+
+ def test_register_pending_event(self):
+ """_register_pending_event stores handle in agent dict and global registry."""
+ agent = {"agent_id": "test-agent-1", "pending_event": None}
+ handle = object()
+
+ cloud_verifier_tornado._register_pending_event(agent, handle)
+
+ self.assertIs(agent["pending_event"], handle)
+ self.assertIs(cloud_verifier_tornado._pending_events["test-agent-1"], handle)
+
+ def test_cancel_pending_event_removes_from_both(self):
+ """_cancel_pending_event clears agent dict and global registry."""
+ agent = {"agent_id": "test-agent-1", "pending_event": None}
+ handle = object()
+ cloud_verifier_tornado._register_pending_event(agent, handle)
+
+ with patch("tornado.ioloop.IOLoop") as mock_ioloop_cls:
+ mock_ioloop = MagicMock()
+ mock_ioloop_cls.current.return_value = mock_ioloop
+
+ cloud_verifier_tornado._cancel_pending_event(agent)
+
+ self.assertIsNone(agent["pending_event"])
+ self.assertNotIn("test-agent-1", cloud_verifier_tornado._pending_events)
+ mock_ioloop.remove_timeout.assert_called_once_with(handle)
+
+ def test_cancel_pending_event_noop_when_none(self):
+ """_cancel_pending_event is a no-op when no pending event exists."""
+ agent = {"agent_id": "test-agent-1", "pending_event": None}
+
+ # Should not raise
+ cloud_verifier_tornado._cancel_pending_event(agent)
+
+ self.assertIsNone(agent["pending_event"])
+
+ def test_cancel_pending_event_handles_remove_timeout_error(self):
+ """_cancel_pending_event logs but doesn't raise on remove_timeout failure."""
+ agent = {"agent_id": "test-agent-1", "pending_event": None}
+ handle = object()
+ cloud_verifier_tornado._register_pending_event(agent, handle)
+
+ with patch("tornado.ioloop.IOLoop") as mock_ioloop_cls:
+ mock_ioloop = MagicMock()
+ mock_ioloop_cls.current.return_value = mock_ioloop
+ mock_ioloop.remove_timeout.side_effect = RuntimeError("IOLoop stopped")
+
+ # Should not raise
+ cloud_verifier_tornado._cancel_pending_event(agent)
+
+ self.assertIsNone(agent["pending_event"])
+ self.assertNotIn("test-agent-1", cloud_verifier_tornado._pending_events)
+
+ def test_register_replaces_previous_handle(self):
+ """_register_pending_event replaces a previously registered handle."""
+ agent = {"agent_id": "test-agent-1", "pending_event": None}
+ handle1 = object()
+ handle2 = object()
+
+ cloud_verifier_tornado._register_pending_event(agent, handle1)
+ cloud_verifier_tornado._register_pending_event(agent, handle2)
+
+ self.assertIs(agent["pending_event"], handle2)
+ self.assertIs(cloud_verifier_tornado._pending_events["test-agent-1"], handle2)
+
+
+class TestStoreAttestationState(unittest.TestCase):
+ """Test store_attestation_state graceful handling of deleted agents."""
+
+ @patch("keylime.cloud_verifier_tornado.session_context")
+ def test_skips_when_agent_not_in_db(self, mock_session_ctx):
+ """store_attestation_state returns gracefully when agent is deleted from DB."""
+ mock_session = MagicMock()
+ mock_session.get.return_value = None
+ mock_session_ctx.return_value.__enter__ = MagicMock(return_value=mock_session)
+ mock_session_ctx.return_value.__exit__ = MagicMock(return_value=False)
+
+ mock_attest_state = MagicMock()
+ mock_attest_state.get_ima_pcrs.return_value = {"10": "some_value"}
+ mock_attest_state.agent_id = "deleted-agent"
+ mock_attest_state.get_agent_id.return_value = "deleted-agent"
+
+ # Should not raise (previously would AssertionError)
+ cloud_verifier_tornado.store_attestation_state(mock_attest_state)
+
+ # Verify no attempt to set attributes on None
+ mock_session.add.assert_not_called()
+
+
+if __name__ == "__main__":
+ unittest.main()
--
2.49.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +0,0 @@
diff --git a/keylime/web/base/server.py b/keylime/web/base/server.py
index 1d9a9c2..859b23a 100644
--- a/keylime/web/base/server.py
+++ b/keylime/web/base/server.py
@@ -2,7 +2,6 @@ import asyncio
import multiprocessing
from abc import ABC, abstractmethod
from functools import wraps
-from ssl import CERT_OPTIONAL
from typing import TYPE_CHECKING, Any, Callable, Optional
import tornado
@@ -252,7 +251,6 @@ class Server(ABC):
self._https_port = config.getint(component, "tls_port", fallback=0)
self._max_upload_size = config.getint(component, "max_upload_size", fallback=104857600)
self._ssl_ctx = web_util.init_mtls(component)
- self._ssl_ctx.verify_mode = CERT_OPTIONAL
def _get(self, pattern: str, controller: type["Controller"], action: str, allow_insecure: bool = False) -> None:
"""Creates a new route to handle incoming GET requests issued for paths which match the given

View File

@ -0,0 +1,46 @@
From 416d3906fe4071132d5cdc494f828ce3a909f336 Mon Sep 17 00:00:00 2001
From: Sergio Arroutbi <sarroutb@redhat.com>
Date: Fri, 20 Mar 2026 10:57:23 +0100
Subject: [PATCH] Remove enable_authentication agent config option
The Rust agent does not parse the enable_authentication
configuration option and always performs authentication.
Remove the option from both the agent.j2 template and the
2.5 mapping.json to avoid exposing a non-functional setting
to users.
Signed-off-by: Sergio Arroutbi <sarroutb@redhat.com>
---
templates/2.5/agent.j2 | 7 -------
templates/2.5/mapping.json | 3 +--
2 files changed, 1 insertion(+), 9 deletions(-)
diff --git a/templates/2.5/agent.j2 b/templates/2.5/agent.j2
index d5eec733d..5e9a1a706 100644
--- a/templates/2.5/agent.j2
+++ b/templates/2.5/agent.j2
@@ -274,10 +274,3 @@ ima_ml_path = "{{ agent.ima_ml_path }}"
# If set as a relative path, it will be considered from the root path "/".
# If set as an absolute path, it will use it without changes
measuredboot_ml_path = "{{ agent.measuredboot_ml_path }}"
-
-# Enable challenge-response authentication for push model attestation.
-# When enabled, the agent will authenticate with the verifier using TPM-based
-# proof of possession before sending attestation evidence.
-# This option is specific to the push attestation model.
-# The default is False (disabled).
-enable_authentication = {{ agent.enable_authentication }}
diff --git a/templates/2.5/mapping.json b/templates/2.5/mapping.json
index 522aa4ce9..4b198e768 100644
--- a/templates/2.5/mapping.json
+++ b/templates/2.5/mapping.json
@@ -13,8 +13,7 @@
"ima_ml_count_file": "/tmp/ima_ml_count",
"uefi_logs_evidence_version": "1.0",
"tls_accept_invalid_certs": "false",
- "tls_accept_invalid_hostnames": "false",
- "enable_authentication": "true"
+ "tls_accept_invalid_hostnames": "false"
}
},
"verifier": {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,151 @@
From 15f20d2dd2e63cc621295befef46bc4161a1f636 Mon Sep 17 00:00:00 2001
From: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
Date: Fri, 10 Apr 2026 13:22:44 +0200
Subject: [PATCH] shared_data: Ignore SIGTERM and SIGINT on Manager and parent
processes
When systemd stops the verifier (or registrar), SIGTERM is delivered to
the entire process group, including the multiprocessing Manager's server
process that hosts the shared policy cache. The Manager dies
immediately, but worker processes still have in-flight process_agent()
coroutines that need the cache, causing ConnectionResetError.
The same race occurs with SIGINT (Ctrl+C) when running the daemon in
the foreground.
Fix this in two parts:
1. Use SyncManager.start(initializer=...) to install SIG_IGN for both
SIGTERM and SIGINT in the Manager's server process, so it survives
process-group signals and stays available while workers drain.
2. Ignore SIGTERM and SIGINT in the new architecture's parent process
(start_multi) so it stays in tornado's monitor loop until all
children have drained and exited. Once all children exit, tornado
calls sys.exit(0), triggering atexit handlers which shut down the
Manager via IPC. Without this, the default signal disposition kills
the parent immediately (no atexit), leaving the Manager orphaned.
Resolves: #1882
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
---
keylime/shared_data.py | 28 +++++++++++++++++++++++++++-
keylime/web/base/server.py | 18 ++++++++++++++++++
test/test_verifier_server.py | 11 ++++++++---
3 files changed, 53 insertions(+), 4 deletions(-)
diff --git a/keylime/shared_data.py b/keylime/shared_data.py
index 09cbb97bb..494f2f53b 100644
--- a/keylime/shared_data.py
+++ b/keylime/shared_data.py
@@ -8,8 +8,10 @@
import multiprocessing as mp
import multiprocessing.process
import os
+import signal
import threading
import time
+from multiprocessing.managers import SyncManager
from typing import Any, Dict, List, Optional
from keylime import keylime_logging
@@ -17,6 +19,17 @@
logger = keylime_logging.init_logging("shared_data")
+def _manager_ignore_signals() -> None:
+ """Ignore SIGTERM and SIGINT in the Manager's server process.
+
+ Called as the ``initializer`` for ``SyncManager.start()`` so that
+ the Manager survives process-group signals (systemd SIGTERM, Ctrl+C)
+ and stays available while workers drain in-flight work.
+ """
+ signal.signal(signal.SIGTERM, signal.SIG_IGN)
+ signal.signal(signal.SIGINT, signal.SIG_IGN)
+
+
class FlatDictView:
"""A dictionary-like view over a flat key-value store.
@@ -127,7 +140,20 @@ def __init__(self) -> None:
# Use explicit context to ensure fork compatibility
# The Manager must be started BEFORE any fork() calls
ctx = mp.get_context("fork")
- self._manager = ctx.Manager()
+ # Use SyncManager directly (instead of the ctx.Manager() shortcut)
+ # so we can pass an initializer that makes the Manager's server
+ # process ignore SIGTERM and SIGINT. Without this, systemd's
+ # cgroup-wide SIGTERM (or Ctrl+C SIGINT in foreground) kills the
+ # Manager before workers finish draining, causing
+ # ConnectionResetError in proxy objects. The Manager is still
+ # cleanable via IPC shutdown message, process.kill(), or systemd
+ # SIGKILL escalation.
+ # Cannot use 'with' context manager here: the Manager must outlive
+ # __init__ and persist for the lifetime of SharedDataManager.
+ self._manager = SyncManager(ctx=ctx)
+ self._manager.start( # pylint: disable=consider-using-with
+ initializer=_manager_ignore_signals,
+ )
# CRITICAL FIX: Use a SINGLE flat dict instead of nested dicts
# Nested DictProxy objects have synchronization issues
diff --git a/keylime/web/base/server.py b/keylime/web/base/server.py
index 4dd02b79e..8e9cce69d 100644
--- a/keylime/web/base/server.py
+++ b/keylime/web/base/server.py
@@ -376,12 +376,30 @@ def start_multi(self) -> None:
self._pre_fork()
+ # Ignore SIGTERM/SIGINT in the parent so it stays in tornado's
+ # monitor loop (os.wait) until all children have drained and
+ # exited cleanly. Once all children exit, tornado calls
+ # sys.exit(0) which triggers atexit → SharedDataManager.cleanup()
+ # → Manager shutdown via IPC. Without this, the default signal
+ # disposition kills the parent immediately (no atexit), leaving
+ # the Manager process orphaned.
+ # Children inherit SIG_IGN but override it in
+ # _install_signal_handlers() before entering the event loop.
+ signal.signal(signal.SIGTERM, signal.SIG_IGN)
+ signal.signal(signal.SIGINT, signal.SIG_IGN)
+
# with StatsCollector():
# num = manager.Value('i', 0)
task_id = tornado.process.fork_processes(self.worker_count)
# num.value = num.value + 1
# print(num.value)
+ # Restore default signal disposition in children so they don't
+ # silently ignore SIGTERM/SIGINT before _install_signal_handlers()
+ # replaces these with asyncio-based handlers in start_single().
+ signal.signal(signal.SIGTERM, signal.SIG_DFL)
+ signal.signal(signal.SIGINT, signal.SIG_DFL)
+
# Remove the Manager's server process from multiprocessing's child
# tracking so Python's atexit handler does not try to join() it in
# child workers (the Manager was spawned by the parent).
diff --git a/test/test_verifier_server.py b/test/test_verifier_server.py
index e9a47ef70..7601b9cb0 100644
--- a/test/test_verifier_server.py
+++ b/test/test_verifier_server.py
@@ -300,10 +300,15 @@ def test_base_server_calls_post_fork_before_start_single(self):
assert match is not None
method_body = match.group(0)
+ # Strip comment lines to avoid false matches from mentions
+ # in comments (e.g. "# ... before start_single()").
+ code_lines = [line for line in method_body.splitlines() if not line.lstrip().startswith("#")]
+ code_body = "\n".join(code_lines)
+
# Extract the order of operations
- fork_index = method_body.find("fork_processes")
- post_fork_index = method_body.find("_post_fork")
- start_single_index = method_body.find("start_single()")
+ fork_index = code_body.find("fork_processes")
+ post_fork_index = code_body.find("_post_fork")
+ start_single_index = code_body.find("start_single()")
# All should be present
self.assertNotEqual(fork_index, -1, "fork_processes call not found")

View File

@ -0,0 +1,348 @@
From a50c7e50171d8f5999bdd927b6306f6d14974c57 Mon Sep 17 00:00:00 2001
From: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
Date: Thu, 16 Apr 2026 14:14:06 +0200
Subject: [PATCH 1/2] shared_data: Move SyncManager socket to /var/run/keylime/
The SyncManager's server process creates a Unix domain socket for IPC
with worker processes. By default, this socket was placed in /tmp with
a random name (listener-*).
Move the socket to /var/run/keylime/, following standard daemon
practice. Keylime already uses this directory for its ZeroMQ revocation
notification socket.
Changes:
- Pass explicit address to SyncManager so the socket is created at
/var/run/keylime/shared_data.<pid>.sock instead of /tmp/listener-*
- Add _ensure_runtime_dir() to create or validate the directory
- Add test conftest.py to redirect sockets to a temp directory
- Add pytest to test-requirements.txt for pylint to resolve imports
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
---
keylime/shared_data.py | 54 +++++++++++++++++++++++++++++++++++++-----
test-requirements.txt | 1 +
test/conftest.py | 30 +++++++++++++++++++++++
3 files changed, 79 insertions(+), 6 deletions(-)
create mode 100644 test/conftest.py
diff --git a/keylime/shared_data.py b/keylime/shared_data.py
index 494f2f53b..aef39bcc4 100644
--- a/keylime/shared_data.py
+++ b/keylime/shared_data.py
@@ -18,6 +18,23 @@
logger = keylime_logging.init_logging("shared_data")
+_RUNTIME_DIR = "/var/run/keylime"
+
+
+def _ensure_runtime_dir() -> None:
+ """Ensure the runtime directory exists with correct permissions.
+
+ Under systemd, ``tmpfiles.d`` creates ``/var/run/keylime/`` at boot.
+ This function provides a fallback for non-systemd execution and
+ validates permissions in either case.
+ """
+ os.makedirs(_RUNTIME_DIR, mode=0o700, exist_ok=True)
+ perms = os.stat(_RUNTIME_DIR).st_mode & 0o777
+ if perms != 0o700 or not os.access(_RUNTIME_DIR, os.W_OK | os.X_OK):
+ msg = f"{_RUNTIME_DIR} is not usable by the current process"
+ logger.error(msg)
+ raise PermissionError(msg)
+
def _manager_ignore_signals() -> None:
"""Ignore SIGTERM and SIGINT in the Manager's server process.
@@ -137,8 +154,20 @@ def __init__(self) -> None:
"""
logger.debug("Initializing SharedDataManager")
- # Use explicit context to ensure fork compatibility
- # The Manager must be started BEFORE any fork() calls
+ # Ensure /var/run/keylime/ exists with correct permissions
+ # before forking the Manager server process.
+ _ensure_runtime_dir()
+ self._socket_path = os.path.join(_RUNTIME_DIR, f"shared_data.{os.getpid()}.sock")
+
+ # Remove stale socket from a previous run (e.g. after a crash).
+ # CPython's SocketListener does not pre-unlink before bind().
+ try:
+ os.unlink(self._socket_path)
+ except (FileNotFoundError, PermissionError):
+ pass
+
+ # Use explicit context to ensure fork compatibility.
+ # The Manager must be started BEFORE any fork() calls.
ctx = mp.get_context("fork")
# Use SyncManager directly (instead of the ctx.Manager() shortcut)
# so we can pass an initializer that makes the Manager's server
@@ -150,7 +179,7 @@ def __init__(self) -> None:
# SIGKILL escalation.
# Cannot use 'with' context manager here: the Manager must outlive
# __init__ and persist for the lifetime of SharedDataManager.
- self._manager = SyncManager(ctx=ctx)
+ self._manager = SyncManager(address=self._socket_path, ctx=ctx)
self._manager.start( # pylint: disable=consider-using-with
initializer=_manager_ignore_signals,
)
@@ -162,8 +191,6 @@ def __init__(self) -> None:
self._lock = self._manager.Lock()
self._initialized_at = time.time()
- # Register handler to reinitialize manager connection after fork
- # This is needed because Manager uses network connections that don't survive fork
try:
self._parent_pid = os.getpid()
logger.debug("SharedDataManager initialized in process %d", self._parent_pid)
@@ -173,7 +200,10 @@ def __init__(self) -> None:
# Ensure cleanup on exit
atexit.register(self.cleanup)
- logger.info("SharedDataManager initialized successfully")
+ logger.info(
+ "SharedDataManager initialized successfully (socket: %s)",
+ self._socket_path,
+ )
def set_data(self, key: str, value: Any) -> None:
"""Store arbitrary pickleable data by key.
@@ -333,6 +363,18 @@ def cleanup(self) -> None:
except Exception:
logger.exception("Error during SharedDataManager shutdown")
+ # Remove socket file if it still exists. The Manager server
+ # process normally unlinks it on exit, but if it was killed
+ # (SIGKILL) the file may be left behind.
+ socket_path = getattr(self, "_socket_path", None)
+ if socket_path:
+ try:
+ os.unlink(socket_path)
+ except FileNotFoundError:
+ pass
+ except OSError as e:
+ logger.debug("Could not remove socket file %s: %s", socket_path, e)
+
def deregister_child(self) -> None:
"""Remove the Manager's server process from multiprocessing's child tracking.
diff --git a/test-requirements.txt b/test-requirements.txt
index bdd44e3e9..bf74580a9 100644
--- a/test-requirements.txt
+++ b/test-requirements.txt
@@ -1,6 +1,7 @@
dbus-python
# modules required for pylint
setuptools
+pytest
# packages required for mypy
sqlalchemy-stubs
types-python-dateutil
diff --git a/test/conftest.py b/test/conftest.py
new file mode 100644
index 000000000..da2843922
--- /dev/null
+++ b/test/conftest.py
@@ -0,0 +1,30 @@
+"""Shared pytest fixtures for keylime tests."""
+
+import shutil
+import tempfile
+from unittest.mock import patch
+
+import pytest
+
+from keylime.shared_data import cleanup_global_shared_memory
+
+
+@pytest.fixture(autouse=True)
+def _shared_data_runtime_dir():
+ """Redirect SharedDataManager sockets to a temporary directory.
+
+ The SyncManager creates Unix domain sockets in /var/run/keylime/,
+ which may not be writable by the test user. This fixture patches
+ the runtime directory to a per-test temp directory so that tests
+ work in any environment.
+
+ After each test, any global SharedDataManager is shut down to
+ prevent stale managers from referencing deleted temp directories.
+ """
+ tmpdir = tempfile.mkdtemp()
+ with patch("keylime.shared_data._RUNTIME_DIR", tmpdir):
+ yield
+ # Shut down any global SharedDataManager left alive by the test
+ # so the next test starts fresh with a new temp directory.
+ cleanup_global_shared_memory()
+ shutil.rmtree(tmpdir, ignore_errors=True)
From 712ab6c841e258e463f858904bfc0991f704a3b9 Mon Sep 17 00:00:00 2001
From: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
Date: Thu, 16 Apr 2026 14:14:45 +0200
Subject: [PATCH 2/2] installer: Add tmpfiles.d config for all keylime
directories
Add keylime-tmpfiles.conf to manage all keylime directories.
This includes:
- /var/run/keylime (runtime IPC sockets)
- /var/lib/keylime (persistent state)
- /etc/keylime and config snippet directories (configuration)
- TPM certificate store copy from /usr/share to /var/lib
Simplify installer.sh to avoid redundant directory creation and
ownership setting. The installer only needs to install the tmpfiles.d
config to /usr/lib/tmpfiles.d/keylime.conf and apply it immediately with
systemd-tmpfiles --create so the directories exist before the services
start.
The installer validates the TPM cert store source exists before copying
and includes a non-systemd fallback for manual directory creation.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Signed-off-by: Anderson Toshiyuki Sasaki <ansasaki@redhat.com>
---
services/installer.sh | 61 ++++++++++++++++++++++++++--------
services/keylime-tmpfiles.conf | 40 ++++++++++++++++++++++
2 files changed, 87 insertions(+), 14 deletions(-)
create mode 100644 services/keylime-tmpfiles.conf
diff --git a/services/installer.sh b/services/installer.sh
index f34027c61..f462f136b 100755
--- a/services/installer.sh
+++ b/services/installer.sh
@@ -11,7 +11,7 @@ fi
BASEDIR=$(dirname "$0")
# check keylime scripts directory (same for verifier, agent, registrar)
-KEYLIMEDIR=$(dirname $(whereis keylime_verifier | cut -d " " -f 2))
+KEYLIMEDIR=$(dirname "$(whereis keylime_verifier | cut -d " " -f 2)")
if [[ $KEYLIMEDIR == "." ]]; then
echo "Unable to find keylime scripts" 1>&2
exit 1
@@ -20,8 +20,8 @@ fi
echo "Using keylime scripts directory: ${KEYLIMEDIR}"
# prepare keylime service files and store them in systemd path
-sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" $BASEDIR/keylime_registrar.service.template > /etc/systemd/system/keylime_registrar.service
-sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" $BASEDIR/keylime_verifier.service.template > /etc/systemd/system/keylime_verifier.service
+sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" "$BASEDIR/keylime_registrar.service.template" > /etc/systemd/system/keylime_registrar.service
+sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" "$BASEDIR/keylime_verifier.service.template" > /etc/systemd/system/keylime_verifier.service
echo "Creating keylime user if it not exists"
if ! getent passwd keylime >/dev/null; then
@@ -30,23 +30,56 @@ if ! getent passwd keylime >/dev/null; then
keylime
fi
-echo "Changing files to be owned by the keylime user"
-# Create all directories required if not there
-mkdir -p /var/lib/keylime
-mkdir -p /var/log/keylime
-mkdir -p /var/run/keylime
+# install TPM certificate store to /usr/share/keylime/
+# tmpfiles.d will copy this to /var/lib/keylime/tpm_cert_store
+TPM_CERT_STORE_SRC="$BASEDIR/../tpm_cert_store"
+if [[ ! -d "$TPM_CERT_STORE_SRC" ]]; then
+ echo "Missing TPM certificate store: $TPM_CERT_STORE_SRC" 1>&2
+ exit 1
+fi
+
+mkdir -p /usr/share/keylime
+cp -a "$TPM_CERT_STORE_SRC" /usr/share/keylime/ || exit 1
-chown keylime:keylime -R /etc/keylime
-chown keylime:keylime -R /var/lib/keylime
-chown keylime:keylime -R /var/log/keylime
-chown keylime:keylime -R /var/run/keylime
+# install tmpfiles.d config for keylime directories
+mkdir -p /usr/lib/tmpfiles.d
+cp "$BASEDIR/keylime-tmpfiles.conf" /usr/lib/tmpfiles.d/keylime.conf
+
+# apply the tmpfiles.d config immediately to create directories with correct ownership
+if command -v systemd-tmpfiles >/dev/null 2>&1; then
+ systemd-tmpfiles --create keylime.conf
+else
+ echo "Warning: systemd-tmpfiles not found, creating directories manually"
+ # Create essential directories as fallback for non-systemd systems
+ mkdir -p /var/run/keylime /var/lib/keylime \
+ /etc/keylime/ca.conf.d \
+ /etc/keylime/logging.conf.d \
+ /etc/keylime/verifier.conf.d \
+ /etc/keylime/registrar.conf.d \
+ /etc/keylime/tenant.conf.d \
+ /etc/keylime/agent.conf.d
+ chown keylime:keylime /var/run/keylime /var/lib/keylime
+ chmod 700 /var/run/keylime /var/lib/keylime
+ # Mirror tmpfiles.d Z/z semantics: recursively set ownership and
+ # file permissions under /etc/keylime, then fix directories to 0500.
+ chown -R keylime:keylime /etc/keylime
+ find /etc/keylime -type f -exec chmod 400 {} \;
+ find /etc/keylime -type d -exec chmod 500 {} \;
+ # Copy TPM cert store from /usr/share to /var/lib only if the
+ # target does not exist yet (mirrors the tmpfiles.d C directive).
+ # This preserves operator-added EK certificates.
+ if [ -d /usr/share/keylime/tpm_cert_store ] && [ ! -d /var/lib/keylime/tpm_cert_store ]; then
+ cp -r /usr/share/keylime/tpm_cert_store /var/lib/keylime/
+ chown -R keylime:keylime /var/lib/keylime/tpm_cert_store
+ find /var/lib/keylime/tpm_cert_store -type f -exec chmod 400 {} \;
+ chmod 500 /var/lib/keylime/tpm_cert_store
+ fi
+fi
# set permissions
chmod 664 /etc/systemd/system/keylime_registrar.service
chmod 664 /etc/systemd/system/keylime_verifier.service
-chmod 700 /var/run/keylime
-
# enable at startup
systemctl enable keylime_registrar.service
systemctl enable keylime_verifier.service
diff --git a/services/keylime-tmpfiles.conf b/services/keylime-tmpfiles.conf
new file mode 100644
index 000000000..f3c0b43d6
--- /dev/null
+++ b/services/keylime-tmpfiles.conf
@@ -0,0 +1,40 @@
+d /run/keylime 0700 keylime keylime -
+
+d /var/lib/keylime 0700 keylime keylime -
+
+d /etc/keylime 0500 keylime keylime -
+d /etc/keylime/ca.conf.d 0500 keylime keylime -
+d /etc/keylime/logging.conf.d 0500 keylime keylime -
+d /etc/keylime/verifier.conf.d 0500 keylime keylime -
+d /etc/keylime/registrar.conf.d 0500 keylime keylime -
+d /etc/keylime/tenant.conf.d 0500 keylime keylime -
+d /etc/keylime/agent.conf.d 0500 keylime keylime -
+
+# TPM certificate store.
+# Copy the cert store from /usr/share/keylime/tpm_cert_store
+# to /var/lib/keylime/tpm_cert_store.
+# Files inside /var/lib/keylime/tpm_cert_store/ have
+# 0400 permission and are owned by keylime/keylime,
+# while /var/lib/keylime/tpm_cert_store/ itself has
+# permission 0500, also owned by keylime/keylime.
+C /var/lib/keylime/tpm_cert_store 0500 keylime keylime - /usr/share/keylime/tpm_cert_store
+Z /var/lib/keylime/tpm_cert_store 0400 keylime keylime -
+z /var/lib/keylime/tpm_cert_store 0500 keylime keylime -
+# Finally, /var/lib/keylime itself has 0700 permission,
+# and is owned by keylime/keylime.
+z /var/lib/keylime 0700 keylime keylime -
+
+# Keylime configuration in /etc/keylime has permission 0400
+# owned by keylime/keylime, while snippet directories and
+# the actual /etc/keylime directory have permission 0500,
+# also owned by keylime/keylime.
+Z /etc/keylime 0400 keylime keylime -
+# Now fix the directories:
+z /etc/keylime/ca.conf.d 0500 keylime keylime -
+z /etc/keylime/logging.conf.d 0500 keylime keylime -
+z /etc/keylime/verifier.conf.d 0500 keylime keylime -
+z /etc/keylime/registrar.conf.d 0500 keylime keylime -
+z /etc/keylime/tenant.conf.d 0500 keylime keylime -
+z /etc/keylime/agent.conf.d 0500 keylime keylime -
+# And finally, /etc/keylime itself.
+z /etc/keylime 0500 keylime keylime -

File diff suppressed because it is too large Load Diff

View File

@ -4,7 +4,7 @@
## END: Set by rpmautospec
%global srcname keylime
%global policy_version 42.1.2
%global policy_version 43.2.1
# Package is actually noarch, but it has an optional dependency that is
# arch-specific.
@ -13,8 +13,8 @@
%global selinuxtype targeted
Name: keylime
Version: 7.12.1
Release: 11%{?dist}.4
Version: 7.14.1
Release: 5%{?dist}
Summary: Open source TPM software for Bootstrapping and Maintaining Trust
URL: https://github.com/keylime/keylime
@ -24,44 +24,40 @@ Source1: https://github.com/RedHat-SP-Security/%{name}-selinux/archive/v%
Source2: %{srcname}.sysusers
Source3: %{srcname}.tmpfiles
# Backported from https://github.com/keylime/keylime/pull/1782
# Fixes DB connections leaks (https://issues.redhat.com/browse/RHEL-102995)
Patch: keylime-fix-db-connection-leaks.patch
Patch: 0001-Fix-timestamp-conversion-to-use-UTC-timezone.patch
Patch: 0002-Fix-efivar-availability-check-in-test_create_mb_poli.patch
Patch: 0003-Close-DB-sessions-to-prevent-connection-exhaustion.patch
Patch: 0004-Include-thread-safe-session-management.patch
Patch: 0005-Address-some-improvements-from-code-review.patch
Patch: 0006-Fix-race-condition-on-in-SessionManager.patch
Patch: 0007-Fix-linter-errors-in-PersistableModel.get-and-.all.patch
Patch: 0008-refactor-Remove-dead-code-AuthSession.authenticate_a.patch
Patch: 0009-db-Clean-up-scoped-session-after-each-request.patch
Patch: 0010-fix-Check-active-flag-in-_extract_identity-and-guard.patch
Patch: 0011-fix-Add-fork-safety-to-DBManager-via-dispose.patch
# Backported from https://github.com/keylime/keylime/pull/1791
Patch: 0002-mb-support-EV_EFI_HANDOFF_TABLES-events-on-PCR1.patch
Patch: 0003-mb-support-vendor_db-as-logged-by-newer-shim-version.patch
# RHEL-154295 - memleaks in verifier push-mode.
# Backport https://github.com/keylime/keylime/pull/1866
Patch: 0012-fix-mem-leak-remove-unbounded-functools.cache-from-l.patch
# Backported from https://github.com/keylime/keylime/pull/1784
# and https://github.com/keylime/keylime/pull/1785
Patch: 0004-verifier-Gracefully-shutdown-on-signal.patch
Patch: 0005-revocations-Try-to-send-notifications-on-shutdown.patch
Patch: 0006-requests_client-close-the-session-at-the-end-of-the-.patch
# RHEL-153121 - fix verifier race condition on agent delete.
# Backport https://github.com/keylime/keylime/pulls/1874
Patch: 0013-fix-verifier-race-condition-on-agent-delete.patch
# Backported from https://github.com/keylime/keylime/pull/1736,
# https://github.com/keylime/keylime/commit/11c6b7f and
# https://github.com/keylime/keylime/commit/dd63459
Patch: 0007-tests-change-test_mba_parsing-to-not-need-keylime-in.patch
Patch: 0008-tests-skip-measured-boot-related-tests-for-s390x-and.patch
Patch: 0009-tests-fix-rpm-repo-tests-from-create-runtime-policy.patch
# Backported from https://github.com/keylime/keylime/pull/1793
Patch: 0010-mba-normalize-vendor_db-in-EV_EFI_VARIABLE_AUTHORITY.patch
# Backported from https://github.com/keylime/keylime/pull/1794
Patch: 0011-fix-malformed-certs-workaround.patch
# Backported from https://github.com/keylime/keylime/pull/1795
Patch: 0012-keylime-policy-avoid-opening-dev-stdout.patch
# CVE-2025-13609
# Backports from:
# - https://github.com/keylime/keylime/pull/1817/commits/1024e19d
# - https://github.com/keylime/keylime/pull/1825
Patch: 0013-Add-shared-memory-infrastructure-for-multiprocess-co.patch
Patch: 0014-Fix-registrar-duplicate-UUID-vulnerability.patch
# CVE-2026-1709
Patch: 0015-CVE-2026-1709.patch
# RHEL-151493 - verifier graceful shutdown.
# Backport:
# - https://github.com/keylime/keylime/pull/1809
# - https://github.com/keylime/keylime/pull/1868
# - https://github.com/keylime/keylime/pull/1855
# - https://github.com/keylime/keylime/pull/1869
# - https://github.com/keylime/keylime/pull/1883
# - https://github.com/keylime/keylime/pull/1886
Patch: 0014-push-attestation-documentation.patch
Patch: 0015-remove-enable-authentication-config-option.patch
Patch: 0016-docs-push-attestation-config-tables.patch
Patch: 0017-verifier-graceful-shutdown.patch
Patch: 0018-ignore-sigterm-sigint-manager-parent-processes.patch
Patch: 0019-move-socket-var-run.patch
# Main program: Apache-2.0
# Icons: MIT
@ -74,13 +70,16 @@ BuildRequires: python3-devel
BuildRequires: python3-dbus
BuildRequires: python3-jinja2
BuildRequires: python3-cryptography
BuildRequires: python3-docutils
BuildRequires: python3-gpg
BuildRequires: python3-pyasn1
BuildRequires: python3-pyasn1-modules
BuildRequires: python3-requests
BuildRequires: python3-tornado
BuildRequires: python3-sqlalchemy
BuildRequires: python3-lark
BuildRequires: python3-psutil
BuildRequires: python3-pytest
BuildRequires: python3-pyyaml
BuildRequires: python3-jsonschema
BuildRequires: python3-setuptools
@ -256,6 +255,12 @@ bzip2 -9 %{srcname}.pp
%build
%py3_build
mkdir -p manpages
rst2man --syntax-highlight=none docs/man/keylime_tenant.1.rst manpages/keylime_tenant.1
rst2man --syntax-highlight=none docs/man/keylime-policy.1.rst manpages/keylime-policy.1
rst2man --syntax-highlight=none docs/man/keylime_registrar.8.rst manpages/keylime_registrar.8
rst2man --syntax-highlight=none docs/man/keylime_verifier.8.rst manpages/keylime_verifier.8
%install
%py3_install
mkdir -p %{buildroot}/%{_sharedstatedir}/%{srcname}
@ -277,8 +282,10 @@ done
# Ship the ek-openssl-verify script.
mkdir -p %{buildroot}/%{_datadir}/%{srcname}/scripts
install -Dpm 755 scripts/ek-openssl-verify \
%{buildroot}/%{_datadir}/%{srcname}/scripts/ek-openssl-verify
for s in ek-openssl-verify keylime_oneshot_attestation; do
install -Dpm 755 scripts/"${s}" \
%{buildroot}/%{_datadir}/%{srcname}/scripts/"${s}"
done
# Ship configuration templates.
cp -r ./templates %{buildroot}%{_datadir}/%{srcname}/templates/
@ -308,6 +315,14 @@ done
install -p -D -m 0644 %{SOURCE2} %{buildroot}/%{_sysusersdir}/%{srcname}.conf
install -p -D -m 0644 %{SOURCE3} %{buildroot}/%{_tmpfilesdir}/%{name}.conf
# Install manpages
install -d %{buildroot}%{_mandir}/man1
install -d %{buildroot}%{_mandir}/man8
install -m 644 manpages/keylime_tenant.1 %{buildroot}%{_mandir}/man1/
install -m 644 manpages/keylime-policy.1 %{buildroot}%{_mandir}/man1/
install -m 644 manpages/keylime_registrar.8 %{buildroot}%{_mandir}/man8/
install -m 644 manpages/keylime_verifier.8 %{buildroot}%{_mandir}/man8/
%check
# Create the default configuration files to be used by the tests.
# Also set the associated environment variables so that the tests
@ -322,7 +337,7 @@ export KEYLIME_CA_CONFIG="${CONF_TEMP_DIR}/ca.conf"
export KEYLIME_LOGGING_CONFIG="${CONF_TEMP_DIR}/logging.conf"
# Run the tests.
%{python3} -m unittest
%pytest
# Cleanup.
[ "${CONF_TEMP_DIR}" ] && rm -rf "${CONF_TEMP_DIR}"
@ -423,6 +438,7 @@ fi
%{_bindir}/%{srcname}_verifier
%{_bindir}/%{srcname}_ca
%{_unitdir}/keylime_verifier.service
%{_mandir}/man8/keylime_verifier.8*
%files registrar
%license LICENSE
@ -430,6 +446,7 @@ fi
%config(noreplace) %verify(not md5 size mode mtime) %attr(400,%{srcname},%{srcname}) %{_sysconfdir}/%{srcname}/registrar.conf
%{_bindir}/%{srcname}_registrar
%{_unitdir}/keylime_registrar.service
%{_mandir}/man8/keylime_registrar.8*
%if 0%{?with_selinux}
%files selinux
@ -443,6 +460,7 @@ fi
%attr(500,%{srcname},%{srcname}) %dir %{_sysconfdir}/%{srcname}/tenant.conf.d
%config(noreplace) %verify(not md5 size mode mtime) %attr(400,%{srcname},%{srcname}) %{_sysconfdir}/%{srcname}/tenant.conf
%{_bindir}/%{srcname}_tenant
%{_mandir}/man1/keylime_tenant.1*
%files -n python3-%{srcname}
%license LICENSE
@ -450,6 +468,7 @@ fi
%{python3_sitelib}/%{srcname}
%{_bindir}/keylime_attest
%{_bindir}/keylime-policy
%{_mandir}/man1/keylime-policy.1*
%files tools
@ -465,13 +484,14 @@ fi
%config(noreplace) %verify(not md5 size mode mtime) %attr(400,%{srcname},%{srcname}) %{_sysconfdir}/%{srcname}/logging.conf
%attr(700,%{srcname},%{srcname}) %dir %{_rundir}/%{srcname}
%attr(700,%{srcname},%{srcname}) %dir %{_sharedstatedir}/%{srcname}
%attr(500,%{srcname},%{srcname}) %dir %{_datadir}/%{srcname}/tpm_cert_store
%attr(400,%{srcname},%{srcname}) %{_datadir}/%{srcname}/tpm_cert_store/*.pem
%attr(755,root,root) %dir %{_datadir}/%{srcname}/tpm_cert_store
%attr(644,root,root) %{_datadir}/%{srcname}/tpm_cert_store/*.pem
%attr(500,%{srcname},%{srcname}) %dir %{_sharedstatedir}/%{srcname}/tpm_cert_store
%attr(400,%{srcname},%{srcname}) %{_sharedstatedir}/%{srcname}/tpm_cert_store/*.pem
%{_tmpfilesdir}/%{srcname}.conf
%{_sysusersdir}/%{srcname}.conf
%{_datadir}/%{srcname}/scripts/ek-openssl-verify
%{_datadir}/%{srcname}/scripts/keylime_oneshot_attestation
%{_datadir}/%{srcname}/templates
%{_bindir}/keylime_upgrade_config
@ -480,11 +500,31 @@ fi
%changelog
## START: Generated by rpmautospec
* Tue Feb 03 2026 Anderson Toshiyuki Sasaki <ansasaki@redhat.com> - 7.12.1-16
- CVE-2026-1709: Registrar authentication bypass
* Fri Apr 17 2026 Anderson Toshiyuki Sasaki <ansasaki@redhat.com> - 7.14.1-5
- Implement verifier graceful shutdown
* Thu Dec 11 2025 Sergio Correia <scorreia@redhat.com> - 7.12.1-15
- Registrar allows identity takeover via duplicate UUID registration
* Tue Apr 14 2026 Sergio Arroutbi <sarroutb@redhat.com> - 7.14.1-4
- Fix verifier race condition on agent delete
* Wed Apr 01 2026 Sergio Correia <scorreia@redhat.com> - 7.14.1-3
- Remove unbounded functools.cache from latest_attestation
* Mon Mar 23 2026 Sergio Arroutbi <sarroutb@redhat.com> - 7.14.1-2
- Add patches to fix DB connection leaks
* Fri Feb 13 2026 Sergio Correia <scorreia@redhat.com> - 7.14.1-1
- Updating for Keylime release v7.14.1
* Mon Feb 02 2026 Sergio Correia <scorreia@redhat.com> - 7.12.1-17
- Change ownership of /usr/share/keylime/tpm_cert_store to root
* Wed Oct 15 2025 Marek Safarik <msafarik@redhat.com> - 7.12.1-16
- Added manpages for keylime services and the tenant
- Added support for ECC attestation
- Fixed man page RST formatting for rst2man compatibility
* Mon Oct 06 2025 Sergio Correia <scorreia@redhat.com> - 7.12.1-15
- Add support for ECC attestation
* Mon Sep 15 2025 Anderson Toshiyuki Sasaki <ansasaki@redhat.com> - 7.12.1-14
- Properly fix malformed TPM certificates workaround

View File

@ -1,2 +1,2 @@
SHA512 (keylime-selinux-42.1.2.tar.gz) = cb7b7b10d1d81af628a7ffdadc1be5af6d75851a44f58cff04edc575cbba1613447e56bfa1fb86660ec7c15e5fcf16ba51f2984094550ba3e08f8095b800b741
SHA512 (v7.12.1.tar.gz) = c1297ebfc659102d73283255cfda4a977dfbff9bdd3748e05de405dadb70f752ad39aa5848edda9143d8ec620d07c21f1551fa4a914c99397620ab1682e58458
SHA512 (keylime-selinux-43.2.1.tar.gz) = 8cb8b032819d3b87e1dceaa7094385b4468c0d6be1e5dfc6d8b6758e6281def5255120ff34d71b5d4bc7fe9b9e960f1a98011e5bf7149df5704d0bbf6afbfad3
SHA512 (v7.14.1.tar.gz) = d94cd1e25ec31e43fea05d0c404dd25c05b6b28435db2f8ca34546f6ff8bfd5da12d2dcd3b5cf4772c44688ae8968468dc2470da23596714e7615dbf6dfbe841