From 64744e55700852dd9452e6fd2a6eb86b5b811469 Mon Sep 17 00:00:00 2001 From: AlmaLinux RelEng Bot Date: Tue, 19 May 2026 15:12:29 -0400 Subject: [PATCH] import CS keylime-7.14.1-5.el10 --- .gitignore | 4 +- ...stamp-conversion-to-use-UTC-timezone.patch | 40 + ...ability-check-in-test_create_mb_poli.patch | 36 + ...EV_EFI_HANDOFF_TABLES-events-on-PCR1.patch | 29 - ...ons-to-prevent-connection-exhaustion.patch | 343 +++ ...r_db-as-logged-by-newer-shim-version.patch | 356 --- ...clude-thread-safe-session-management.patch | 198 ++ ...rifier-Gracefully-shutdown-on-signal.patch | 42 - ...s-some-improvements-from-code-review.patch | 79 + ...ry-to-send-notifications-on-shutdown.patch | 308 --- ...-race-condition-on-in-SessionManager.patch | 42 + ...close-the-session-at-the-end-of-the-.patch | 45 - ...ors-in-PersistableModel.get-and-.all.patch | 160 ++ ...t_mba_parsing-to-not-need-keylime-in.patch | 91 - ...dead-code-AuthSession.authenticate_a.patch | 457 ++++ ...red-boot-related-tests-for-s390x-and.patch | 53 - ...up-scoped-session-after-each-request.patch | 205 ++ ...epo-tests-from-create-runtime-policy.patch | 58 - ...-flag-in-_extract_identity-and-guard.patch | 108 + ...ndor_db-in-EV_EFI_VARIABLE_AUTHORITY.patch | 281 -- ...fork-safety-to-DBManager-via-dispose.patch | 92 + 0011-fix-malformed-certs-workaround.patch | 1304 --------- ...ove-unbounded-functools.cache-from-l.patch | 274 ++ ...lime-policy-avoid-opening-dev-stdout.patch | 37 - ...y-infrastructure-for-multiprocess-co.patch | 1107 -------- ...ifier-race-condition-on-agent-delete.patch | 449 ++++ ...gistrar-duplicate-UUID-vulnerability.patch | 1188 --------- 0014-push-attestation-documentation.patch | 1910 +++++++++++++ 0015-CVE-2026-1709.patch | 20 - ...-enable-authentication-config-option.patch | 46 + ...-docs-push-attestation-config-tables.patch | 1164 ++++++++ 0017-verifier-graceful-shutdown.patch | 2373 +++++++++++++++++ ...term-sigint-manager-parent-processes.patch | 151 ++ 0019-move-socket-var-run.patch | 348 +++ keylime-fix-db-connection-leaks.patch | 2208 --------------- keylime.spec | 134 +- sources | 4 +- 37 files changed, 8566 insertions(+), 7178 deletions(-) create mode 100644 0001-Fix-timestamp-conversion-to-use-UTC-timezone.patch create mode 100644 0002-Fix-efivar-availability-check-in-test_create_mb_poli.patch delete mode 100644 0002-mb-support-EV_EFI_HANDOFF_TABLES-events-on-PCR1.patch create mode 100644 0003-Close-DB-sessions-to-prevent-connection-exhaustion.patch delete mode 100644 0003-mb-support-vendor_db-as-logged-by-newer-shim-version.patch create mode 100644 0004-Include-thread-safe-session-management.patch delete mode 100644 0004-verifier-Gracefully-shutdown-on-signal.patch create mode 100644 0005-Address-some-improvements-from-code-review.patch delete mode 100644 0005-revocations-Try-to-send-notifications-on-shutdown.patch create mode 100644 0006-Fix-race-condition-on-in-SessionManager.patch delete mode 100644 0006-requests_client-close-the-session-at-the-end-of-the-.patch create mode 100644 0007-Fix-linter-errors-in-PersistableModel.get-and-.all.patch delete mode 100644 0007-tests-change-test_mba_parsing-to-not-need-keylime-in.patch create mode 100644 0008-refactor-Remove-dead-code-AuthSession.authenticate_a.patch delete mode 100644 0008-tests-skip-measured-boot-related-tests-for-s390x-and.patch create mode 100644 0009-db-Clean-up-scoped-session-after-each-request.patch delete mode 100644 0009-tests-fix-rpm-repo-tests-from-create-runtime-policy.patch create mode 100644 0010-fix-Check-active-flag-in-_extract_identity-and-guard.patch delete mode 100644 0010-mba-normalize-vendor_db-in-EV_EFI_VARIABLE_AUTHORITY.patch create mode 100644 0011-fix-Add-fork-safety-to-DBManager-via-dispose.patch delete mode 100644 0011-fix-malformed-certs-workaround.patch create mode 100644 0012-fix-mem-leak-remove-unbounded-functools.cache-from-l.patch delete mode 100644 0012-keylime-policy-avoid-opening-dev-stdout.patch delete mode 100644 0013-Add-shared-memory-infrastructure-for-multiprocess-co.patch create mode 100644 0013-fix-verifier-race-condition-on-agent-delete.patch delete mode 100644 0014-Fix-registrar-duplicate-UUID-vulnerability.patch create mode 100644 0014-push-attestation-documentation.patch delete mode 100644 0015-CVE-2026-1709.patch create mode 100644 0015-remove-enable-authentication-config-option.patch create mode 100644 0016-docs-push-attestation-config-tables.patch create mode 100644 0017-verifier-graceful-shutdown.patch create mode 100644 0018-ignore-sigterm-sigint-manager-parent-processes.patch create mode 100644 0019-move-socket-var-run.patch delete mode 100644 keylime-fix-db-connection-leaks.patch diff --git a/.gitignore b/.gitignore index 50bf933..b38fcf0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ -keylime-selinux-42.1.2.tar.gz -v7.12.1.tar.gz +keylime-selinux-43.2.1.tar.gz +v7.14.1.tar.gz diff --git a/0001-Fix-timestamp-conversion-to-use-UTC-timezone.patch b/0001-Fix-timestamp-conversion-to-use-UTC-timezone.patch new file mode 100644 index 0000000..2d7d76d --- /dev/null +++ b/0001-Fix-timestamp-conversion-to-use-UTC-timezone.patch @@ -0,0 +1,40 @@ +From 7cf07986522fda7691d9135ad4f8d31d030e8b59 Mon Sep 17 00:00:00 2001 +From: Sergio Correia +Date: Fri, 13 Feb 2026 04:46:20 -0500 +Subject: [PATCH 1/2] Fix timestamp conversion to use UTC timezone + +Ensure Unix timestamps are converted to UTC datetimes by passing +tz=timezone.utc to datetime.fromtimestamp(). Previously, timestamps +were converted using the local timezone, causing test failures when +epoch (0) was incorrectly converted to 1969 instead of 1970. + +Signed-off-by: Sergio Correia +--- + keylime/models/base/types/timestamp.py | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/keylime/models/base/types/timestamp.py b/keylime/models/base/types/timestamp.py +index 8f6782f..22c1fcb 100644 +--- a/keylime/models/base/types/timestamp.py ++++ b/keylime/models/base/types/timestamp.py +@@ -36,7 +36,7 @@ class Timestamp(ModelType): + + if not ts: + try: +- ts = datetime.fromtimestamp(float(value)) ++ ts = datetime.fromtimestamp(float(value), tz=timezone.utc) + except ValueError: + pass + +@@ -49,7 +49,7 @@ class Timestamp(ModelType): + return self._load_datetime(ts) + + def _load_float(self, value: float) -> datetime: +- ts = datetime.fromtimestamp(value) ++ ts = datetime.fromtimestamp(value, tz=timezone.utc) + return self._load_datetime(ts) + + def _load_int(self, value: int) -> datetime: +-- +2.53.0 + diff --git a/0002-Fix-efivar-availability-check-in-test_create_mb_poli.patch b/0002-Fix-efivar-availability-check-in-test_create_mb_poli.patch new file mode 100644 index 0000000..76601af --- /dev/null +++ b/0002-Fix-efivar-availability-check-in-test_create_mb_poli.patch @@ -0,0 +1,36 @@ +From be3243b5f4f3423b8e8e29245a2401e52dd52baf Mon Sep 17 00:00:00 2001 +From: Sergio Correia +Date: Fri, 13 Feb 2026 07:22:46 -0500 +Subject: [PATCH 2/2] Fix efivar availability check in test_create_mb_policy + +Import tpm_bootlog_enrich instead of the elparsing package so the +CDLL("libefivar.so.1") load is actually triggered, allowing tests +to skip gracefully when the library is absent. + +Signed-off-by: Sergio Correia +--- + test/test_create_mb_policy.py | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/test/test_create_mb_policy.py b/test/test_create_mb_policy.py +index 04ed779..fc79c3b 100644 +--- a/test/test_create_mb_policy.py ++++ b/test/test_create_mb_policy.py +@@ -10,9 +10,12 @@ import unittest + + from keylime.policy import create_mb_policy + +-# Check if efivarlibs is available for measured boot parsing ++# Check if efivarlibs is available for measured boot parsing. ++# We need to import tpm_bootlog_enrich (not just the elparsing package) ++# because the CDLL("libefivar.so.1") load happens at module level in ++# tpm_bootlog_enrich, and importing just the package won't trigger it. + try: +- from keylime.mba import elparsing # pylint: disable=unused-import ++ from keylime.mba.elparsing import tpm_bootlog_enrich # pylint: disable=unused-import + + EFIVAR_AVAILABLE = True + except Exception: +-- +2.53.0 + diff --git a/0002-mb-support-EV_EFI_HANDOFF_TABLES-events-on-PCR1.patch b/0002-mb-support-EV_EFI_HANDOFF_TABLES-events-on-PCR1.patch deleted file mode 100644 index e6f5bef..0000000 --- a/0002-mb-support-EV_EFI_HANDOFF_TABLES-events-on-PCR1.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 52944972182639a625599e29ebe65b91714a3a41 Mon Sep 17 00:00:00 2001 -From: Sergio Correia -Date: Fri, 8 Aug 2025 16:40:01 +0100 -Subject: [PATCH 2/3] mb: support EV_EFI_HANDOFF_TABLES events on PCR1 - -Allow EV_EFI_HANDOFF_TABLES events on PCR1 alongside the existing -EV_EFI_HANDOFF_TABLES2 support to handle different firmware -implementations, in the example policy. - -Signed-off-by: Sergio Correia ---- - keylime/mba/elchecking/example.py | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/keylime/mba/elchecking/example.py b/keylime/mba/elchecking/example.py -index 2c6f699..a3d918a 100644 ---- a/keylime/mba/elchecking/example.py -+++ b/keylime/mba/elchecking/example.py -@@ -185,6 +185,7 @@ class Example(policies.Policy): - # We only expect one EV_NO_ACTION event at the start. - dispatcher.set((0, "EV_NO_ACTION"), tests.OnceTest(tests.AcceptAll())) - dispatcher.set((1, "EV_CPU_MICROCODE"), tests.OnceTest(tests.AcceptAll())) -+ dispatcher.set((1, "EV_EFI_HANDOFF_TABLES"), tests.OnceTest(tests.AcceptAll())) - dispatcher.set((1, "EV_EFI_HANDOFF_TABLES2"), tests.OnceTest(tests.AcceptAll())) - dispatcher.set((0, "EV_S_CRTM_VERSION"), events_final.get("s_crtms")) - dispatcher.set((0, "EV_EFI_PLATFORM_FIRMWARE_BLOB"), events_final.get("platform_firmware_blobs")) --- -2.47.3 - diff --git a/0003-Close-DB-sessions-to-prevent-connection-exhaustion.patch b/0003-Close-DB-sessions-to-prevent-connection-exhaustion.patch new file mode 100644 index 0000000..732fccd --- /dev/null +++ b/0003-Close-DB-sessions-to-prevent-connection-exhaustion.patch @@ -0,0 +1,343 @@ +From 5b622eae9244b5a820263609cae6bd4681d3fbb2 Mon Sep 17 00:00:00 2001 +From: Sergio Arroutbi +Date: Tue, 10 Mar 2026 11:26:49 +0100 +Subject: [PATCH 3/6] Close DB sessions to prevent connection exhaustion + +Resolves: #1861 + +The get_session() function in session_controller.py and auth_session.py +returned SQLAlchemy sessions that were never closed, leaking connections +back to the pool. Under load (e.g., multi-host push attestation with +multiple agents), this exhausted the QueuePool (size 5, overflow 10), +causing a 30-second timeout and HTTP 500 errors. + +Replace get_session() with a get_session_context() context manager that +guarantees session.close() via try/finally. Define it once in +auth_session.py and import it in session_controller.py. + +Resolves: connection pool exhaustion (QueuePool limit of size 5 +overflow 10 reached) during push attestation multi-host tests. + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Sergio Arroutbi +--- + keylime/models/verifier/auth_session.py | 25 +++++++++----- + keylime/web/verifier/session_controller.py | 27 ++++----------- + test/test_auth_session.py | 38 ++++++++++++++++++++-- + test/test_session_controller.py | 32 ++++++++++-------- + 4 files changed, 77 insertions(+), 45 deletions(-) + +diff --git a/keylime/models/verifier/auth_session.py b/keylime/models/verifier/auth_session.py +index df01668..545995f 100644 +--- a/keylime/models/verifier/auth_session.py ++++ b/keylime/models/verifier/auth_session.py +@@ -1,8 +1,9 @@ + import base64 + import hmac + import uuid ++from contextlib import contextmanager + from datetime import timedelta +-from typing import Any, Dict, Optional, Sequence ++from typing import Any, Dict, Iterator, Optional, Sequence + + from sqlalchemy.orm import Session + +@@ -32,11 +33,17 @@ logger = keylime_logging.init_logging("verifier") + _engine = None + + +-def get_session() -> Session: ++@contextmanager ++def get_session_context() -> Iterator[Session]: + global _engine + if _engine is None: + _engine = make_engine("cloud_verifier") +- return SessionManager().make_session(_engine) ++ session_manager = SessionManager() ++ session = session_manager.make_session(_engine) ++ try: ++ yield session ++ finally: ++ session.close() + + + class AuthSession(PersistableModel): +@@ -270,12 +277,12 @@ class AuthSession(PersistableModel): + return False + + # Use old engine to query VerfierMain (legacy model) +- session = get_session() +- agent = ( +- session.query(VerfierMain) +- .filter(VerfierMain.agent_id == auth_session.agent_id) # type: ignore[attr-defined] +- .one_or_none() +- ) ++ with get_session_context() as session: ++ agent = ( ++ session.query(VerfierMain) ++ .filter(VerfierMain.agent_id == auth_session.agent_id) # type: ignore[attr-defined] ++ .one_or_none() ++ ) + + return agent + +diff --git a/keylime/web/verifier/session_controller.py b/keylime/web/verifier/session_controller.py +index 9fc3bb5..49cd758 100644 +--- a/keylime/web/verifier/session_controller.py ++++ b/keylime/web/verifier/session_controller.py +@@ -1,27 +1,14 @@ + import base64 + +-from sqlalchemy.orm import Session +- + from keylime import config, keylime_logging +-from keylime.db.keylime_db import SessionManager, make_engine + from keylime.db.verifier_db import VerfierMain + from keylime.models.base import Timestamp + from keylime.models.verifier import AuthSession ++from keylime.models.verifier.auth_session import get_session_context + from keylime.web.base import Controller + + logger = keylime_logging.init_logging("verifier") + +-# GLOBAL_POLICY_CACHE: Dict[str, Dict[str, str]] = {} +- +-_engine = None +- +- +-def get_session() -> Session: +- global _engine +- if _engine is None: +- _engine = make_engine("cloud_verifier") +- return SessionManager().make_session(_engine) +- + + class SessionController(Controller): + # POST /v3[.:minor]/sessions +@@ -198,8 +185,8 @@ class SessionController(Controller): + return + + # Check if agent exists - this is where we validate enrollment +- session = get_session() +- agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() ++ with get_session_context() as session: ++ agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() + + if not agent: + # Agent not enrolled - return 200 with evaluation:fail +@@ -393,8 +380,8 @@ class SessionController(Controller): + + # POST /v3[.:minor]/agents/:agent_id/session + def create(self, agent_id, **params): +- session = get_session() +- agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() ++ with get_session_context() as session: ++ agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() + + if not agent: + self.respond(404, "here") +@@ -416,8 +403,8 @@ class SessionController(Controller): + self.respond(200, "Success", auth_session.render()) + + def update(self, agent_id, token, **params): +- session = get_session() +- agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() ++ with get_session_context() as session: ++ agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() + + # Look up session by token hash (tokens are never stored in plaintext) + auth_session = AuthSession.get_by_token(token) +diff --git a/test/test_auth_session.py b/test/test_auth_session.py +index 62b4244..8e9ec98 100644 +--- a/test/test_auth_session.py ++++ b/test/test_auth_session.py +@@ -7,10 +7,41 @@ from unittest.mock import MagicMock, PropertyMock, patch + + from keylime.crypto import generate_session_token, generate_token_salt, hash_token_for_storage + from keylime.models.base.types import Timestamp +-from keylime.models.verifier.auth_session import AuthSession ++from keylime.models.verifier.auth_session import AuthSession, get_session_context + from keylime.shared_data import cleanup_global_shared_memory, get_shared_memory + + ++class TestGetSessionContext(unittest.TestCase): ++ """Test cases for get_session_context context manager.""" ++ ++ @patch("keylime.models.verifier.auth_session.make_engine") ++ @patch("keylime.models.verifier.auth_session.SessionManager") ++ def test_session_closed_on_normal_exit(self, mock_session_manager_cls, _mock_make_engine): ++ """Test that session.close() is called when context manager exits normally.""" ++ mock_session = MagicMock() ++ mock_session_manager_cls.return_value.make_session.return_value = mock_session ++ ++ with patch("keylime.models.verifier.auth_session._engine", None): ++ with get_session_context() as session: ++ self.assertIs(session, mock_session) ++ ++ mock_session.close.assert_called_once() ++ ++ @patch("keylime.models.verifier.auth_session.make_engine") ++ @patch("keylime.models.verifier.auth_session.SessionManager") ++ def test_session_closed_on_exception(self, mock_session_manager_cls, _mock_make_engine): ++ """Test that session.close() is called even when an exception occurs.""" ++ mock_session = MagicMock() ++ mock_session_manager_cls.return_value.make_session.return_value = mock_session ++ ++ with patch("keylime.models.verifier.auth_session._engine", None): ++ with self.assertRaises(RuntimeError): ++ with get_session_context(): ++ raise RuntimeError("simulated error") ++ ++ mock_session.close.assert_called_once() ++ ++ + class TestAuthSessionHelpers(unittest.TestCase): + """Test cases for AuthSession helper methods.""" + +@@ -398,7 +429,7 @@ class TestAuthSessionCore(unittest.TestCase): + self.assertIn("errors", result) + self.assertIn("authentication_supported", result["errors"]) + +- @patch("keylime.models.verifier.auth_session.get_session") ++ @patch("keylime.models.verifier.auth_session.get_session_context") + @patch.object(AuthSession, "get_by_token") + def test_authenticate_agent_success(self, mock_get_by_token, mock_get_session): + """Test successful agent authentication with valid token.""" +@@ -409,7 +440,8 @@ class TestAuthSessionCore(unittest.TestCase): + # Mock session query + mock_db_session = MagicMock() + mock_db_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent +- mock_get_session.return_value = mock_db_session ++ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_db_session) ++ mock_get_session.return_value.__exit__ = MagicMock(return_value=False) + + # Mock AuthSession.get_by_token to return an active session + mock_auth_session = MagicMock() +diff --git a/test/test_session_controller.py b/test/test_session_controller.py +index d807119..eec7fef 100644 +--- a/test/test_session_controller.py ++++ b/test/test_session_controller.py +@@ -272,7 +272,7 @@ class TestSessionControllerUpdateSession(unittest.TestCase): + # Verify session was deleted from cache + self.assertNotIn(self.test_session_id, self.sessions_cache) + +- @patch("keylime.web.verifier.session_controller.get_session") ++ @patch("keylime.web.verifier.session_controller.get_session_context") + def test_update_session_agent_not_enrolled(self, mock_get_session): + """Test update_session with unenrolled agent.""" + # Create session in cache +@@ -290,7 +290,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase): + # Mock database query to return no agent + mock_session = MagicMock() + mock_session.query.return_value.filter.return_value.one_or_none.return_value = None +- mock_get_session.return_value = mock_session ++ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session) ++ mock_get_session.return_value.__exit__ = MagicMock(return_value=False) + + # Call update_session + params = { +@@ -318,7 +319,7 @@ class TestSessionControllerUpdateSession(unittest.TestCase): + body = call_args[1]["body"] + self.assertEqual(body["data"]["attributes"]["evaluation"], "fail") + +- @patch("keylime.web.verifier.session_controller.get_session") ++ @patch("keylime.web.verifier.session_controller.get_session_context") + @patch("keylime.models.verifier.auth_session.AuthSession.create_from_memory") + def test_update_session_authentication_failed(self, mock_create_from_memory, mock_get_session): + """Test update_session with failed authentication.""" +@@ -339,7 +340,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase): + mock_agent.agent_id = self.test_agent_id + mock_session = MagicMock() + mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent +- mock_get_session.return_value = mock_session ++ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session) ++ mock_get_session.return_value.__exit__ = MagicMock(return_value=False) + + # Mock AuthSession.create_from_memory to return errors + mock_auth_session = MagicMock() +@@ -377,7 +379,7 @@ class TestSessionControllerUpdateSession(unittest.TestCase): + call_args = self.controller.send_response.call_args # type: ignore[attr-defined] + self.assertEqual(call_args[1]["code"], 401) + +- @patch("keylime.web.verifier.session_controller.get_session") ++ @patch("keylime.web.verifier.session_controller.get_session_context") + @patch("keylime.models.verifier.auth_session.AuthSession.create_from_memory") + @patch("keylime.models.verifier.auth_session.AuthSession.delete_active_session_for_agent") + @patch("keylime.web.verifier.session_controller.config") +@@ -403,7 +405,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase): + mock_agent.agent_id = self.test_agent_id + mock_session = MagicMock() + mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent +- mock_get_session.return_value = mock_session ++ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session) ++ mock_get_session.return_value.__exit__ = MagicMock(return_value=False) + + # Mock config + mock_config.getboolean.return_value = False # Don't keep in memory +@@ -522,7 +525,7 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase): + self.assertEqual(call_args[0][0], 404) + + @patch("keylime.models.verifier.auth_session.AuthSession.delete_stale") +- @patch("keylime.web.verifier.session_controller.get_session") ++ @patch("keylime.web.verifier.session_controller.get_session_context") + @patch("keylime.models.verifier.auth_session.AuthSession.create") + def test_create_success(self, mock_create, mock_get_session, _mock_delete_stale): + """Test successful create endpoint.""" +@@ -531,7 +534,8 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase): + mock_agent.agent_id = self.test_agent_id + mock_session = MagicMock() + mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent +- mock_get_session.return_value = mock_session ++ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session) ++ mock_get_session.return_value.__exit__ = MagicMock(return_value=False) + + # Mock AuthSession.create + mock_auth_session = MagicMock() +@@ -549,13 +553,14 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase): + call_args = self.controller.respond.call_args # type: ignore[attr-defined] + self.assertEqual(call_args[0][0], 200) + +- @patch("keylime.web.verifier.session_controller.get_session") ++ @patch("keylime.web.verifier.session_controller.get_session_context") + def test_create_agent_not_found(self, mock_get_session): + """Test create endpoint with non-existent agent.""" + # Mock database query to return None + mock_session = MagicMock() + mock_session.query.return_value.filter.return_value.one_or_none.return_value = None +- mock_get_session.return_value = mock_session ++ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session) ++ mock_get_session.return_value.__exit__ = MagicMock(return_value=False) + + # Call create + params = {"data": {}} +@@ -566,7 +571,7 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase): + call_args = self.controller.respond.call_args # type: ignore[attr-defined] + self.assertEqual(call_args[0][0], 404) + +- @patch("keylime.web.verifier.session_controller.get_session") ++ @patch("keylime.web.verifier.session_controller.get_session_context") + @patch("keylime.models.verifier.auth_session.AuthSession.get_by_token") + def test_update_success(self, mock_get, mock_get_session): + """Test successful update endpoint.""" +@@ -575,7 +580,8 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase): + mock_agent.agent_id = self.test_agent_id + mock_session = MagicMock() + mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent +- mock_get_session.return_value = mock_session ++ mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session) ++ mock_get_session.return_value.__exit__ = MagicMock(return_value=False) + + # Mock AuthSession.get_by_token + mock_auth_session = MagicMock() +@@ -595,7 +601,7 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase): + call_args = self.controller.respond.call_args # type: ignore[attr-defined] + self.assertEqual(call_args[0][0], 200) + +- @patch("keylime.web.verifier.session_controller.get_session") ++ @patch("keylime.web.verifier.session_controller.get_session_context") + @patch("keylime.models.verifier.auth_session.AuthSession.get_by_token") + def test_update_not_found(self, mock_get, _mock_get_session): + """Test update endpoint with non-existent session.""" +-- +2.53.0 + diff --git a/0003-mb-support-vendor_db-as-logged-by-newer-shim-version.patch b/0003-mb-support-vendor_db-as-logged-by-newer-shim-version.patch deleted file mode 100644 index c079994..0000000 --- a/0003-mb-support-vendor_db-as-logged-by-newer-shim-version.patch +++ /dev/null @@ -1,356 +0,0 @@ -From 34bd283113f13c251114507315c647975beede2f Mon Sep 17 00:00:00 2001 -From: Sergio Correia -Date: Fri, 8 Aug 2025 16:41:54 +0100 -Subject: [PATCH 3/3] mb: support vendor_db as logged by newer shim versions - -- Updated example policy to properly handle different event structures - for vendor_db validation: - - KeySubsetMulti for EV_EFI_VARIABLE_DRIVER_CONFIG (has SignatureType field) - - SignatureSetMember for EV_EFI_VARIABLE_AUTHORITY (direct signature format) - -- Added method to extract vendor_db from EV_EFI_VARIABLE_AUTHORITY events - in reference state generation (keylime-policy create measured-boot and - the legacy create_mb_refstate script) -- Made vendor_db optional for backward compatibility - -This fixes attestation failures when vendor_db variables are present but -missing from reference states or validated with incorrect test types. - -See: https://github.com/rhboot/shim/pull/728 -Signed-off-by: Sergio Correia ---- - keylime/mba/elchecking/example.py | 45 +++++++++ - keylime/policy/create_mb_policy.py | 30 ++++++ - scripts/create_mb_refstate | 30 ++++++ - test/test_create_mb_policy.py | 142 +++++++++++++++++++++++++++++ - 4 files changed, 247 insertions(+) - -diff --git a/keylime/mba/elchecking/example.py b/keylime/mba/elchecking/example.py -index a3d918a..5a933ac 100644 ---- a/keylime/mba/elchecking/example.py -+++ b/keylime/mba/elchecking/example.py -@@ -21,6 +21,7 @@ from . import policies, tests - # kek - list of allowed KEK keys - # db - list of allowed db keys - # dbx - list of required dbx keys -+# vendor_db - list of allowed vendor_db keys (optional, for newer shim versions) - # mokdig - list of allowed digests of MoKList (PCR 14 EV_IPL) - # mokxdig - list of allowed digests of MoKListX (PCR 14 EV_IPL) - # kernels - list of allowed { -@@ -121,6 +122,10 @@ class Example(policies.Policy): - if req not in refstate: - raise Exception(f"refstate lacks {req}") - -+ # vendor_db is optional for backward compatibility -+ if "vendor_db" not in refstate: -+ refstate["vendor_db"] = [] -+ - dispatcher = tests.Dispatcher(("PCRIndex", "EventType")) - vd_driver_config = tests.VariableDispatch() - vd_authority = tests.VariableDispatch() -@@ -268,6 +273,34 @@ class Example(policies.Policy): - "db", - db_test, - ) -+ # Support vendor_db as logged by newer shim versions -+ # See: https://github.com/rhboot/shim/pull/728 -+ if not has_secureboot and not refstate["vendor_db"]: -+ vendor_db_test = tests.OnceTest(tests.AcceptAll()) -+ else: -+ vendor_db_test = tests.OnceTest( -+ tests.Or( -+ tests.KeySubsetMulti( -+ ["a159c0a5-e494-a74a-87b5-ab155c2bf072", "2616c4c1-4c50-9240-aca9-41f936934328"], -+ sigs_strip0x(refstate["vendor_db"]), -+ ), -+ tests.KeySubsetMulti( -+ ["a5c059a1-94e4-4aa7-87b5-ab155c2bf072", "c1c41626-504c-4092-aca9-41f936934328"], -+ sigs_strip0x(refstate["vendor_db"]), -+ ), -+ ) -+ ) -+ -+ vd_driver_config.set( -+ "cbb219d7-3a3d-9645-a3bc-dad00e67656f", -+ "vendor_db", -+ vendor_db_test, -+ ) -+ vd_driver_config.set( -+ "d719b2cb-3d3a-4596-a3bc-dad00e67656f", -+ "vendor_db", -+ vendor_db_test, -+ ) - - if not has_secureboot and not refstate["dbx"]: - dbx_test = tests.OnceTest(tests.AcceptAll()) -@@ -295,6 +328,18 @@ class Example(policies.Policy): - vd_db_test = tests.OnceTest(tests.AcceptAll()) - vd_authority.set("cbb219d7-3a3d-9645-a3bc-dad00e67656f", "db", vd_db_test) - vd_authority.set("d719b2cb-3d3a-4596-a3bc-dad00e67656f", "db", vd_db_test) -+ # Support vendor_db as logged by newer shim versions in EV_EFI_VARIABLE_AUTHORITY events -+ # See: https://github.com/rhboot/shim/pull/728 -+ # EV_EFI_VARIABLE_AUTHORITY events have different structure than EV_EFI_VARIABLE_DRIVER_CONFIG -+ # They contain direct signature data without SignatureType field -+ if not has_secureboot and not refstate["vendor_db"]: -+ vendor_db_authority_test = tests.OnceTest(tests.AcceptAll()) -+ else: -+ vendor_db_authority_test = tests.OnceTest( -+ tests.IterateTest(tests.SignatureSetMember(sigs_strip0x(refstate["vendor_db"]))) -+ ) -+ vd_authority.set("cbb219d7-3a3d-9645-a3bc-dad00e67656f", "vendor_db", vendor_db_authority_test) -+ vd_authority.set("d719b2cb-3d3a-4596-a3bc-dad00e67656f", "vendor_db", vendor_db_authority_test) - # Accept all SbatLevels of the Shim, because we already checked the hash of the Shim itself. - vd_sbat_level_test = tests.OnceTest(tests.AcceptAll()) - vd_authority.set("50ab5d60-46e0-0043-abb6-3dd810dd8b23", "SbatLevel", vd_sbat_level_test) -diff --git a/keylime/policy/create_mb_policy.py b/keylime/policy/create_mb_policy.py -index 859e652..b2b48f7 100644 ---- a/keylime/policy/create_mb_policy.py -+++ b/keylime/policy/create_mb_policy.py -@@ -93,6 +93,35 @@ def get_keys(events: List[Dict[str, Any]]) -> Dict[str, List[Any]]: - return out - - -+def get_vendor_db(events: List[Dict[str, Any]]) -> Dict[str, List[Any]]: -+ """Get vendor_db signatures from EV_EFI_VARIABLE_AUTHORITY events.""" -+ out: Dict[str, List[Any]] = {"vendor_db": []} -+ -+ for event in events: -+ if "EventType" not in event: -+ continue -+ if event["EventType"] != "EV_EFI_VARIABLE_AUTHORITY": -+ continue -+ if "Event" not in event or "UnicodeName" not in event["Event"]: -+ continue -+ -+ event_name = event["Event"]["UnicodeName"].lower() -+ if event_name == "vendor_db": -+ data = None -+ if "VariableData" in event["Event"]: -+ data = event["Event"]["VariableData"] -+ -+ if data is not None: -+ # VariableData for EV_EFI_VARIABLE_AUTHORITY is a list of signatures -+ for entry in data: -+ if "SignatureOwner" in entry and "SignatureData" in entry: -+ out["vendor_db"].append( -+ {"SignatureOwner": entry["SignatureOwner"], "SignatureData": f"0x{entry['SignatureData']}"} -+ ) -+ -+ return out -+ -+ - def get_kernel(events: List[Dict[str, Any]], secure_boot: bool) -> Dict[str, List[Dict[str, Any]]]: - """Extract digest for Shim, Grub, Linux Kernel and initrd.""" - out = [] -@@ -259,6 +288,7 @@ def create_mb_refstate(args: argparse.Namespace) -> Optional[Dict[str, object]]: - } - ], - **get_keys(events), -+ **get_vendor_db(events), - **get_mok(events), - **get_kernel(events, has_secureboot), - } -diff --git a/scripts/create_mb_refstate b/scripts/create_mb_refstate -index 23cafb9..c98e61d 100755 ---- a/scripts/create_mb_refstate -+++ b/scripts/create_mb_refstate -@@ -78,6 +78,35 @@ def get_keys(events): - return out - - -+def get_vendor_db(events): -+ """Get vendor_db signatures from EV_EFI_VARIABLE_AUTHORITY events.""" -+ out = {"vendor_db": []} -+ -+ for event in events: -+ if "EventType" not in event: -+ continue -+ if event["EventType"] != "EV_EFI_VARIABLE_AUTHORITY": -+ continue -+ if "Event" not in event or "UnicodeName" not in event["Event"]: -+ continue -+ -+ event_name = event["Event"]["UnicodeName"].lower() -+ if event_name == "vendor_db": -+ data = None -+ if "VariableData" in event["Event"]: -+ data = event["Event"]["VariableData"] -+ -+ if data is not None: -+ # VariableData for EV_EFI_VARIABLE_AUTHORITY is a list of signatures -+ for entry in data: -+ if "SignatureOwner" in entry and "SignatureData" in entry: -+ out["vendor_db"].append( -+ {"SignatureOwner": entry["SignatureOwner"], "SignatureData": f"0x{entry['SignatureData']}"} -+ ) -+ -+ return out -+ -+ - def get_kernel(events, secure_boot): - """ - Extract digest for Shim, Grub, Linux Kernel and initrd. -@@ -197,6 +226,7 @@ def main(): - } - ], - **get_keys(events), -+ **get_vendor_db(events), - **get_mok(events), - **get_kernel(events, has_secureboot), - } -diff --git a/test/test_create_mb_policy.py b/test/test_create_mb_policy.py -index eaed0e3..aa7a4b9 100644 ---- a/test/test_create_mb_policy.py -+++ b/test/test_create_mb_policy.py -@@ -362,6 +362,148 @@ class CreateMeasuredBootPolicy_Test(unittest.TestCase): - for c in test_cases: - self.assertDictEqual(create_mb_policy.get_mok(c["events"]), c["expected"]) - -+ def test_get_vendor_db(self): -+ test_cases = [ -+ {"events": [], "expected": {"vendor_db": []}}, -+ # No EV_EFI_VARIABLE_AUTHORITY events. -+ { -+ "events": [ -+ { -+ "EventType": "EV_EFI_VARIABLE_DRIVER_CONFIG", -+ "Event": {"UnicodeName": "vendor_db", "VariableData": []}, -+ } -+ ], -+ "expected": {"vendor_db": []}, -+ }, -+ # Good vendor_db event with EV_EFI_VARIABLE_AUTHORITY. -+ { -+ "events": [ -+ { -+ "EventType": "EV_EFI_VARIABLE_AUTHORITY", -+ "Event": { -+ "UnicodeName": "vendor_db", -+ "VariableData": [ -+ { -+ "SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b", -+ "SignatureData": "sig-data-1", -+ } -+ ], -+ }, -+ } -+ ], -+ "expected": { -+ "vendor_db": [ -+ {"SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b", "SignatureData": "0xsig-data-1"} -+ ] -+ }, -+ }, -+ # Multiple vendor_db signatures. -+ { -+ "events": [ -+ { -+ "EventType": "EV_EFI_VARIABLE_AUTHORITY", -+ "Event": { -+ "UnicodeName": "vendor_db", -+ "VariableData": [ -+ { -+ "SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b", -+ "SignatureData": "sig-data-1", -+ }, -+ { -+ "SignatureOwner": "77fa9abd-0359-4d32-bd60-28f4e78f784b", -+ "SignatureData": "sig-data-2", -+ }, -+ ], -+ }, -+ } -+ ], -+ "expected": { -+ "vendor_db": [ -+ {"SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b", "SignatureData": "0xsig-data-1"}, -+ {"SignatureOwner": "77fa9abd-0359-4d32-bd60-28f4e78f784b", "SignatureData": "0xsig-data-2"}, -+ ] -+ }, -+ }, -+ # Missing EventType. -+ { -+ "events": [ -+ { -+ "Event": { -+ "UnicodeName": "vendor_db", -+ "VariableData": [ -+ { -+ "SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b", -+ "SignatureData": "sig-data-1", -+ } -+ ], -+ } -+ } -+ ], -+ "expected": {"vendor_db": []}, -+ }, -+ # Wrong EventType. -+ { -+ "events": [ -+ { -+ "EventType": "EV_EFI_VARIABLE_DRIVER_CONFIG", -+ "Event": { -+ "UnicodeName": "vendor_db", -+ "VariableData": [ -+ { -+ "SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b", -+ "SignatureData": "sig-data-1", -+ } -+ ], -+ }, -+ } -+ ], -+ "expected": {"vendor_db": []}, -+ }, -+ # Missing Event. -+ { -+ "events": [{"EventType": "EV_EFI_VARIABLE_AUTHORITY"}], -+ "expected": {"vendor_db": []}, -+ }, -+ # Missing UnicodeName. -+ { -+ "events": [ -+ { -+ "EventType": "EV_EFI_VARIABLE_AUTHORITY", -+ "Event": { -+ "VariableData": [ -+ { -+ "SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b", -+ "SignatureData": "sig-data-1", -+ } -+ ] -+ }, -+ } -+ ], -+ "expected": {"vendor_db": []}, -+ }, -+ # Wrong UnicodeName. -+ { -+ "events": [ -+ { -+ "EventType": "EV_EFI_VARIABLE_AUTHORITY", -+ "Event": { -+ "UnicodeName": "db", -+ "VariableData": [ -+ { -+ "SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b", -+ "SignatureData": "sig-data-1", -+ } -+ ], -+ }, -+ } -+ ], -+ "expected": {"vendor_db": []}, -+ }, -+ ] -+ -+ for c in test_cases: -+ self.assertDictEqual(create_mb_policy.get_vendor_db(c["events"]), c["expected"]) -+ - def test_get_kernel(self): - test_cases = [ - {"events": [], "secureboot": False, "expected": {}}, --- -2.47.3 - diff --git a/0004-Include-thread-safe-session-management.patch b/0004-Include-thread-safe-session-management.patch new file mode 100644 index 0000000..f9fb4d1 --- /dev/null +++ b/0004-Include-thread-safe-session-management.patch @@ -0,0 +1,198 @@ +From bc28d5d228d005702f72e98646c8cad73196ccfb Mon Sep 17 00:00:00 2001 +From: Sergio Arroutbi +Date: Tue, 10 Mar 2026 13:22:04 +0100 +Subject: [PATCH 4/6] Include thread-safe session management + +Replace open-ended SQLAlchemy sessions with a context manager that +guarantees connection release, preventing QueuePool exhaustion under +multi-host push attestation load. + +Key changes: +- Add double-checked locking for lazy engine initialization to prevent + race conditions in multi-threaded verifier +- Delegate session lifecycle to SessionManager.session_context() which + provides proper rollback on exception and scoped_session.remove() + cleanup, eliminating thread-local connection leaks +- Use session.expunge(agent) before exiting context manager scope so + VerfierMain instances safely cross session boundaries without + DetachedInstanceError +- Scope with-blocks narrowly: connection is returned to pool before + any subsequent DB calls (e.g. AuthSession.get_by_token) to prevent + connection hoarding across separate pool boundaries + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Sergio Arroutbi +--- + keylime/models/verifier/auth_session.py | 15 +++--- + keylime/web/verifier/session_controller.py | 6 +++ + test/test_auth_session.py | 60 ++++++++++++++++------ + 3 files changed, 59 insertions(+), 22 deletions(-) + +diff --git a/keylime/models/verifier/auth_session.py b/keylime/models/verifier/auth_session.py +index 545995f..918dfb4 100644 +--- a/keylime/models/verifier/auth_session.py ++++ b/keylime/models/verifier/auth_session.py +@@ -1,5 +1,6 @@ + import base64 + import hmac ++import threading + import uuid + from contextlib import contextmanager + from datetime import timedelta +@@ -31,19 +32,19 @@ from keylime.tpm.tpm_main import Tpm + logger = keylime_logging.init_logging("verifier") + + _engine = None ++_engine_lock = threading.Lock() ++_session_manager = SessionManager() + + + @contextmanager + def get_session_context() -> Iterator[Session]: + global _engine + if _engine is None: +- _engine = make_engine("cloud_verifier") +- session_manager = SessionManager() +- session = session_manager.make_session(_engine) +- try: ++ with _engine_lock: ++ if _engine is None: ++ _engine = make_engine("cloud_verifier") ++ with _session_manager.session_context(_engine) as session: + yield session +- finally: +- session.close() + + + class AuthSession(PersistableModel): +@@ -283,6 +284,8 @@ class AuthSession(PersistableModel): + .filter(VerfierMain.agent_id == auth_session.agent_id) # type: ignore[attr-defined] + .one_or_none() + ) ++ if agent: ++ session.expunge(agent) # type: ignore[no-untyped-call] + + return agent + +diff --git a/keylime/web/verifier/session_controller.py b/keylime/web/verifier/session_controller.py +index 49cd758..3faa310 100644 +--- a/keylime/web/verifier/session_controller.py ++++ b/keylime/web/verifier/session_controller.py +@@ -187,6 +187,8 @@ class SessionController(Controller): + # Check if agent exists - this is where we validate enrollment + with get_session_context() as session: + agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() ++ if agent: ++ session.expunge(agent) # type: ignore[no-untyped-call] + + if not agent: + # Agent not enrolled - return 200 with evaluation:fail +@@ -382,6 +384,8 @@ class SessionController(Controller): + def create(self, agent_id, **params): + with get_session_context() as session: + agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() ++ if agent: ++ session.expunge(agent) # type: ignore[no-untyped-call] + + if not agent: + self.respond(404, "here") +@@ -405,6 +409,8 @@ class SessionController(Controller): + def update(self, agent_id, token, **params): + with get_session_context() as session: + agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() ++ if agent: ++ session.expunge(agent) # type: ignore[no-untyped-call] + + # Look up session by token hash (tokens are never stored in plaintext) + auth_session = AuthSession.get_by_token(token) +diff --git a/test/test_auth_session.py b/test/test_auth_session.py +index 8e9ec98..2c78547 100644 +--- a/test/test_auth_session.py ++++ b/test/test_auth_session.py +@@ -2,6 +2,7 @@ + + import base64 + import unittest ++from contextlib import contextmanager + from datetime import timedelta + from unittest.mock import MagicMock, PropertyMock, patch + +@@ -14,32 +15,59 @@ from keylime.shared_data import cleanup_global_shared_memory, get_shared_memory + class TestGetSessionContext(unittest.TestCase): + """Test cases for get_session_context context manager.""" + ++ def _make_mock_session_manager(self, mock_session): ++ """Create a mock SessionManager whose session_context() mirrors real lifecycle.""" ++ mock_scoped = MagicMock() ++ mock_session_manager = MagicMock() ++ mock_session_manager.make_session.return_value = mock_session ++ mock_session_manager._scoped_session = mock_scoped # pylint: disable=protected-access ++ ++ @contextmanager ++ def fake_session_context(engine): # pylint: disable=unused-argument ++ session = mock_session_manager.make_session(engine) ++ try: ++ yield session ++ session.commit() ++ except Exception: ++ session.rollback() ++ raise ++ finally: ++ scoped = mock_session_manager._scoped_session # pylint: disable=protected-access ++ if scoped is not None: ++ scoped.remove() ++ ++ mock_session_manager.session_context = fake_session_context ++ return mock_session_manager, mock_scoped ++ + @patch("keylime.models.verifier.auth_session.make_engine") +- @patch("keylime.models.verifier.auth_session.SessionManager") +- def test_session_closed_on_normal_exit(self, mock_session_manager_cls, _mock_make_engine): +- """Test that session.close() is called when context manager exits normally.""" ++ def test_session_cleanup_on_normal_exit(self, _mock_make_engine): ++ """Test that session is committed and cleaned up when context manager exits normally.""" + mock_session = MagicMock() +- mock_session_manager_cls.return_value.make_session.return_value = mock_session ++ mock_session_manager, mock_scoped = self._make_mock_session_manager(mock_session) + + with patch("keylime.models.verifier.auth_session._engine", None): +- with get_session_context() as session: +- self.assertIs(session, mock_session) ++ with patch("keylime.models.verifier.auth_session._session_manager", mock_session_manager): ++ with get_session_context() as session: ++ self.assertIs(session, mock_session) + +- mock_session.close.assert_called_once() ++ mock_session.commit.assert_called_once() ++ mock_scoped.remove.assert_called_once() + + @patch("keylime.models.verifier.auth_session.make_engine") +- @patch("keylime.models.verifier.auth_session.SessionManager") +- def test_session_closed_on_exception(self, mock_session_manager_cls, _mock_make_engine): +- """Test that session.close() is called even when an exception occurs.""" ++ def test_session_rollback_on_exception(self, _mock_make_engine): ++ """Test that session is rolled back and cleaned up when an exception occurs.""" + mock_session = MagicMock() +- mock_session_manager_cls.return_value.make_session.return_value = mock_session ++ mock_session_manager, mock_scoped = self._make_mock_session_manager(mock_session) + + with patch("keylime.models.verifier.auth_session._engine", None): +- with self.assertRaises(RuntimeError): +- with get_session_context(): +- raise RuntimeError("simulated error") +- +- mock_session.close.assert_called_once() ++ with patch("keylime.models.verifier.auth_session._session_manager", mock_session_manager): ++ with self.assertRaises(RuntimeError): ++ with get_session_context(): ++ raise RuntimeError("simulated error") ++ ++ mock_session.rollback.assert_called_once() ++ mock_session.commit.assert_not_called() ++ mock_scoped.remove.assert_called_once() + + + class TestAuthSessionHelpers(unittest.TestCase): +-- +2.53.0 + diff --git a/0004-verifier-Gracefully-shutdown-on-signal.patch b/0004-verifier-Gracefully-shutdown-on-signal.patch deleted file mode 100644 index 39f6327..0000000 --- a/0004-verifier-Gracefully-shutdown-on-signal.patch +++ /dev/null @@ -1,42 +0,0 @@ -From c530c332321c1daffa5bfcd08754179012dd21cc Mon Sep 17 00:00:00 2001 -From: Anderson Toshiyuki Sasaki -Date: Mon, 18 Aug 2025 12:12:16 +0000 -Subject: [PATCH 4/7] verifier: Gracefully shutdown on signal - -Wait for the processes to finish when interrupted by a signal. Do not -call exit(0) in the signal handler. - -Assisted-by: Claude 4 Sonnet -Signed-off-by: Anderson Toshiyuki Sasaki ---- - keylime/cloud_verifier_tornado.py | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/keylime/cloud_verifier_tornado.py b/keylime/cloud_verifier_tornado.py -index 7553ac8..7065661 100644 ---- a/keylime/cloud_verifier_tornado.py -+++ b/keylime/cloud_verifier_tornado.py -@@ -2138,7 +2138,7 @@ def main() -> None: - revocation_notifier.stop_broker() - for p in processes: - p.join() -- sys.exit(0) -+ # Do not call sys.exit(0) here as it interferes with multiprocessing cleanup - - signal.signal(signal.SIGINT, sig_handler) - signal.signal(signal.SIGTERM, sig_handler) -@@ -2159,3 +2159,11 @@ def main() -> None: - process = Process(target=server_process, args=(task_id, active_agents)) - process.start() - processes.append(process) -+ -+ # Wait for all worker processes to complete -+ try: -+ for p in processes: -+ p.join() -+ except KeyboardInterrupt: -+ # Signal handler will take care of cleanup -+ pass --- -2.47.3 - diff --git a/0005-Address-some-improvements-from-code-review.patch b/0005-Address-some-improvements-from-code-review.patch new file mode 100644 index 0000000..b463214 --- /dev/null +++ b/0005-Address-some-improvements-from-code-review.patch @@ -0,0 +1,79 @@ +From 4f5f09a69e01c0116f1977aa3a741f3678bb8e67 Mon Sep 17 00:00:00 2001 +From: Sergio Arroutbi +Date: Thu, 12 Mar 2026 15:18:56 +0100 +Subject: [PATCH 5/6] Address some improvements from code review + +Include agent variable None initialization +and address thread safety for ContextManager + +Signed-off-by: Sergio Arroutbi +--- + keylime/db/keylime_db.py | 7 ++++++- + keylime/web/verifier/session_controller.py | 3 +++ + 2 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/keylime/db/keylime_db.py b/keylime/db/keylime_db.py +index 6fd3f08..cf608fa 100644 +--- a/keylime/db/keylime_db.py ++++ b/keylime/db/keylime_db.py +@@ -1,4 +1,5 @@ + import os ++import threading + from configparser import NoOptionError + from contextlib import contextmanager + from sqlite3 import Connection as SQLite3Connection +@@ -89,10 +90,12 @@ def make_engine(service: str, **engine_args: Any) -> Engine: + class SessionManager: + engine: Optional[Engine] + _scoped_session: Optional[scoped_session] ++ _lock: threading.Lock + + def __init__(self) -> None: + self.engine = None + self._scoped_session = None ++ self._lock = threading.Lock() + + def make_session(self, engine: Engine) -> Session: + """ +@@ -100,7 +103,9 @@ class SessionManager: + """ + self.engine = engine + if self._scoped_session is None: +- self._scoped_session = scoped_session(sessionmaker()) ++ with self._lock: ++ if self._scoped_session is None: ++ self._scoped_session = scoped_session(sessionmaker()) + try: + self._scoped_session.configure(bind=self.engine) # type: ignore + self._scoped_session.configure(expire_on_commit=False) # type: ignore +diff --git a/keylime/web/verifier/session_controller.py b/keylime/web/verifier/session_controller.py +index 3faa310..c8664e2 100644 +--- a/keylime/web/verifier/session_controller.py ++++ b/keylime/web/verifier/session_controller.py +@@ -185,6 +185,7 @@ class SessionController(Controller): + return + + # Check if agent exists - this is where we validate enrollment ++ agent = None + with get_session_context() as session: + agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() + if agent: +@@ -382,6 +383,7 @@ class SessionController(Controller): + + # POST /v3[.:minor]/agents/:agent_id/session + def create(self, agent_id, **params): ++ agent = None + with get_session_context() as session: + agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() + if agent: +@@ -407,6 +409,7 @@ class SessionController(Controller): + self.respond(200, "Success", auth_session.render()) + + def update(self, agent_id, token, **params): ++ agent = None + with get_session_context() as session: + agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() + if agent: +-- +2.53.0 + diff --git a/0005-revocations-Try-to-send-notifications-on-shutdown.patch b/0005-revocations-Try-to-send-notifications-on-shutdown.patch deleted file mode 100644 index dea95ca..0000000 --- a/0005-revocations-Try-to-send-notifications-on-shutdown.patch +++ /dev/null @@ -1,308 +0,0 @@ -From 565889ab6c90823a5096e39a58e9599fa49072f6 Mon Sep 17 00:00:00 2001 -From: Anderson Toshiyuki Sasaki -Date: Wed, 23 Jul 2025 15:39:49 +0200 -Subject: [PATCH 5/7] revocations: Try to send notifications on shutdown - -During verifier shutdown, try to send any pending revocation -notification in a best-effort manner. In future, the pending revocation -notifications should be persisted to be processed during next startup. - -Assisted-by: Claude 4 Sonnet -Signed-off-by: Anderson Toshiyuki Sasaki ---- - keylime/cloud_verifier_tornado.py | 7 + - keylime/revocation_notifier.py | 239 ++++++++++++++++++++++-------- - 2 files changed, 184 insertions(+), 62 deletions(-) - -diff --git a/keylime/cloud_verifier_tornado.py b/keylime/cloud_verifier_tornado.py -index 7065661..89aa703 100644 ---- a/keylime/cloud_verifier_tornado.py -+++ b/keylime/cloud_verifier_tornado.py -@@ -2109,6 +2109,10 @@ def main() -> None: - # Stop server to not accept new incoming connections - server.stop() - -+ # Gracefully shutdown webhook workers to prevent connection errors -+ if "webhook" in revocation_notifier.get_notifiers(): -+ revocation_notifier.shutdown_webhook_workers() -+ - # Wait for all connections to be closed and then stop ioloop - async def stop() -> None: - await server.close_all_connections() -@@ -2136,6 +2140,9 @@ def main() -> None: - def sig_handler(*_: Any) -> None: - if run_revocation_notifier: - revocation_notifier.stop_broker() -+ # Gracefully shutdown webhook workers to prevent connection errors -+ if "webhook" in revocation_notifier.get_notifiers(): -+ revocation_notifier.shutdown_webhook_workers() - for p in processes: - p.join() - # Do not call sys.exit(0) here as it interferes with multiprocessing cleanup -diff --git a/keylime/revocation_notifier.py b/keylime/revocation_notifier.py -index 5a7cc4b..c154028 100644 ---- a/keylime/revocation_notifier.py -+++ b/keylime/revocation_notifier.py -@@ -18,6 +18,174 @@ broker_proc: Optional[Process] = None - - _SOCKET_PATH = "/var/run/keylime/keylime.verifier.ipc" - -+# Global webhook manager instance (initialized when needed) -+_webhook_manager: Optional["WebhookNotificationManager"] = None -+ -+ -+class WebhookNotificationManager: -+ """Manages webhook worker threads and graceful shutdown for revocation notifications.""" -+ -+ def __init__(self) -> None: -+ self._shutdown_event = threading.Event() -+ self._workers: Set[threading.Thread] = set() -+ self._workers_lock = threading.Lock() -+ -+ def notify_webhook(self, tosend: Dict[str, Any]) -> None: -+ """Send webhook notification with worker thread management.""" -+ url = config.get("verifier", "webhook_url", section="revocations", fallback="") -+ # Check if a url was specified -+ if url == "": -+ return -+ -+ # Similarly to notify(), let's convert `tosend' to str to prevent -+ # possible issues with json handling by python-requests. -+ tosend = json.bytes_to_str(tosend) -+ -+ def worker_webhook(tosend: Dict[str, Any], url: str) -> None: -+ is_shutdown_mode = False -+ try: -+ interval = config.getfloat("verifier", "retry_interval") -+ exponential_backoff = config.getboolean("verifier", "exponential_backoff") -+ -+ max_retries = config.getint("verifier", "max_retries") -+ if max_retries <= 0: -+ logger.info("Invalid value found in 'max_retries' option for verifier, using default value") -+ max_retries = 5 -+ -+ # During shutdown, use fewer retries but still make best effort -+ if self._shutdown_event.is_set(): -+ is_shutdown_mode = True -+ max_retries = min(max_retries, 3) # Reduce retries during shutdown but still try -+ logger.info( -+ "Shutdown mode: attempting to send critical revocation notification with %d retries", -+ max_retries, -+ ) -+ -+ # Get TLS options from the configuration -+ (cert, key, trusted_ca, key_password), verify_server_cert = web_util.get_tls_options( -+ "verifier", is_client=True, logger=logger -+ ) -+ -+ # Generate the TLS context using the obtained options -+ tls_context = web_util.generate_tls_context( -+ cert, key, trusted_ca, key_password, is_client=True, logger=logger -+ ) -+ -+ logger.info("Sending revocation event via webhook to %s ...", url) -+ for i in range(max_retries): -+ next_retry = retry.retry_time(exponential_backoff, interval, i, logger) -+ -+ with RequestsClient( -+ url, -+ verify_server_cert, -+ tls_context, -+ ) as client: -+ try: -+ res = client.post("", json=tosend, timeout=5) -+ except requests.exceptions.SSLError as ssl_error: -+ if "TLSV1_ALERT_UNKNOWN_CA" in str(ssl_error): -+ logger.warning( -+ "Keylime does not recognize certificate from peer. Check if verifier 'trusted_server_ca' is configured correctly" -+ ) -+ -+ raise ssl_error from ssl_error -+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: -+ # During shutdown, only suppress errors on the final attempt after all retries exhausted -+ if is_shutdown_mode and i == max_retries - 1: -+ logger.warning( -+ "Final attempt to send revocation notification failed during shutdown: %s", e -+ ) -+ return -+ # Otherwise, let the retry logic handle it -+ raise e -+ -+ if res and res.status_code in [200, 202]: -+ if is_shutdown_mode: -+ logger.info("Successfully sent revocation notification during shutdown") -+ break -+ -+ logger.debug( -+ "Unable to publish revocation message %d times via webhook, " -+ "trying again in %d seconds. " -+ "Server returned status code: %s", -+ i + 1, -+ next_retry, -+ res.status_code, -+ ) -+ -+ # During shutdown, use shorter retry intervals to complete faster -+ if is_shutdown_mode: -+ next_retry = min(next_retry, 2.0) # Cap retry interval during shutdown -+ -+ time.sleep(next_retry) -+ -+ except Exception as e: -+ # Only suppress errors during final shutdown phase and log appropriately -+ if is_shutdown_mode: -+ logger.warning("Failed to send revocation notification during shutdown: %s", e) -+ else: -+ logger.error("Error in webhook worker: %s", e) -+ finally: -+ # Remove this worker from the active set -+ current_thread = threading.current_thread() -+ with self._workers_lock: -+ self._workers.discard(current_thread) -+ -+ w = functools.partial(worker_webhook, tosend, url) -+ t = threading.Thread(target=w, daemon=True) -+ -+ # Add this worker to the active set -+ with self._workers_lock: -+ self._workers.add(t) -+ -+ t.start() -+ -+ def shutdown_workers(self) -> None: -+ """Signal webhook workers to shut down gracefully and wait for them to complete. -+ -+ This gives workers time to complete their critical revocation notifications -+ before the service shuts down completely. -+ """ -+ logger.info("Shutting down webhook workers gracefully...") -+ self._shutdown_event.set() -+ -+ # Give workers generous time to complete critical revocation notifications -+ timeout = 30.0 # Increased timeout for critical security notifications -+ end_time = time.time() + timeout -+ -+ with self._workers_lock: -+ workers_to_wait = list(self._workers) -+ -+ if workers_to_wait: -+ logger.info("Waiting for %d webhook workers to complete revocation notifications...", len(workers_to_wait)) -+ -+ for worker in workers_to_wait: -+ remaining_time = max(0, end_time - time.time()) -+ if remaining_time > 0: -+ logger.debug( -+ "Waiting for webhook worker %s to complete (timeout: %.1f seconds)", worker.name, remaining_time -+ ) -+ worker.join(timeout=remaining_time) -+ if worker.is_alive(): -+ logger.warning("Webhook worker %s did not complete within timeout", worker.name) -+ else: -+ logger.warning("Timeout exceeded while waiting for webhook workers") -+ break -+ -+ # Clean up completed workers -+ with self._workers_lock: -+ self._workers.clear() -+ -+ logger.info("Webhook workers shutdown complete") -+ -+ -+def _get_webhook_manager() -> WebhookNotificationManager: -+ """Get the global webhook manager instance, creating it if needed.""" -+ global _webhook_manager -+ if _webhook_manager is None: -+ _webhook_manager = WebhookNotificationManager() -+ return _webhook_manager -+ - - # return the revocation notification methods for cloud verifier - def get_notifiers() -> Set[str]: -@@ -83,6 +251,12 @@ def stop_broker() -> None: - broker_proc.kill() # pylint: disable=E1101 - - -+def shutdown_webhook_workers() -> None: -+ """Convenience function to shutdown webhook workers using the global manager.""" -+ manager = _get_webhook_manager() -+ manager.shutdown_workers() -+ -+ - def notify(tosend: Dict[str, Any]) -> None: - assert "zeromq" in get_notifiers() - try: -@@ -127,68 +301,9 @@ def notify(tosend: Dict[str, Any]) -> None: - - - def notify_webhook(tosend: Dict[str, Any]) -> None: -- url = config.get("verifier", "webhook_url", section="revocations", fallback="") -- # Check if a url was specified -- if url == "": -- return -- -- # Similarly to notify(), let's convert `tosend' to str to prevent -- # possible issues with json handling by python-requests. -- tosend = json.bytes_to_str(tosend) -- -- def worker_webhook(tosend: Dict[str, Any], url: str) -> None: -- interval = config.getfloat("verifier", "retry_interval") -- exponential_backoff = config.getboolean("verifier", "exponential_backoff") -- -- max_retries = config.getint("verifier", "max_retries") -- if max_retries <= 0: -- logger.info("Invalid value found in 'max_retries' option for verifier, using default value") -- max_retries = 5 -- -- # Get TLS options from the configuration -- (cert, key, trusted_ca, key_password), verify_server_cert = web_util.get_tls_options( -- "verifier", is_client=True, logger=logger -- ) -- -- # Generate the TLS context using the obtained options -- tls_context = web_util.generate_tls_context(cert, key, trusted_ca, key_password, is_client=True, logger=logger) -- -- logger.info("Sending revocation event via webhook to %s ...", url) -- for i in range(max_retries): -- next_retry = retry.retry_time(exponential_backoff, interval, i, logger) -- -- with RequestsClient( -- url, -- verify_server_cert, -- tls_context, -- ) as client: -- try: -- res = client.post("", json=tosend, timeout=5) -- except requests.exceptions.SSLError as ssl_error: -- if "TLSV1_ALERT_UNKNOWN_CA" in str(ssl_error): -- logger.warning( -- "Keylime does not recognize certificate from peer. Check if verifier 'trusted_server_ca' is configured correctly" -- ) -- -- raise ssl_error from ssl_error -- -- if res and res.status_code in [200, 202]: -- break -- -- logger.debug( -- "Unable to publish revocation message %d times via webhook, " -- "trying again in %d seconds. " -- "Server returned status code: %s", -- i + 1, -- next_retry, -- res.status_code, -- ) -- -- time.sleep(next_retry) -- -- w = functools.partial(worker_webhook, tosend, url) -- t = threading.Thread(target=w, daemon=True) -- t.start() -+ """Send webhook notification using the global webhook manager.""" -+ manager = _get_webhook_manager() -+ manager.notify_webhook(tosend) - - - cert_key = None --- -2.47.3 - diff --git a/0006-Fix-race-condition-on-in-SessionManager.patch b/0006-Fix-race-condition-on-in-SessionManager.patch new file mode 100644 index 0000000..36008e5 --- /dev/null +++ b/0006-Fix-race-condition-on-in-SessionManager.patch @@ -0,0 +1,42 @@ +From 309a0ef0fe1d0917ad9d4fd7ab4327570a59cf34 Mon Sep 17 00:00:00 2001 +From: Sergio Arroutbi +Date: Thu, 12 Mar 2026 19:18:56 +0100 +Subject: [PATCH 6/6] Fix race condition on in SessionManager + +Move self.engine assignment inside the lock so it is set atomically +with _scoped_session creation. Without this, concurrent threads calling +make_session() with different engines could race on the assignment, +causing _scoped_session to be configured with a stale engine reference. + +Also log a warning if make_session() is called with a different engine +after initialization, since the scoped_session is cached and bound to +the original engine. + +Suggested-by: coderabbitai +Signed-off-by: Sergio Arroutbi +--- + keylime/db/keylime_db.py | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/keylime/db/keylime_db.py b/keylime/db/keylime_db.py +index cf608fa..a622b09 100644 +--- a/keylime/db/keylime_db.py ++++ b/keylime/db/keylime_db.py +@@ -101,11 +101,13 @@ class SessionManager: + """ + To use: session = self.make_session(engine) + """ +- self.engine = engine + if self._scoped_session is None: + with self._lock: + if self._scoped_session is None: ++ self.engine = engine + self._scoped_session = scoped_session(sessionmaker()) ++ elif self.engine is not engine: ++ logger.warning("SessionManager called with different engine than originally configured") + try: + self._scoped_session.configure(bind=self.engine) # type: ignore + self._scoped_session.configure(expire_on_commit=False) # type: ignore +-- +2.53.0 + diff --git a/0006-requests_client-close-the-session-at-the-end-of-the-.patch b/0006-requests_client-close-the-session-at-the-end-of-the-.patch deleted file mode 100644 index 7fb869a..0000000 --- a/0006-requests_client-close-the-session-at-the-end-of-the-.patch +++ /dev/null @@ -1,45 +0,0 @@ -From e6fb5090df3e35c7d44bc8f7f37d420d7ee8a05c Mon Sep 17 00:00:00 2001 -From: Sergio Correia -Date: Wed, 4 Jun 2025 19:52:37 +0100 -Subject: [PATCH 6/7] requests_client: close the session at the end of the - resource manager - -We had an issue in the past in which the webhook worker would not -properly close the opened session. This was fixed in #1456 (Close -session in worker_webhook function). - -At some later point, in #1566 (revocation_notifier: Take into account CA -certificates added via configuration), some refactoring around the -webhook_worker() in revocation_notifier happened and it started using -the RequestsClient resource manager. - -However, the RequestsClient does not close the session at its end, which -in turns makes that the old issue of not closing properly the session -in the webhook_worker() returned. - -We now issue a session.close() at the end of the RequestsClient. - -Signed-off-by: Sergio Correia ---- - keylime/requests_client.py | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/keylime/requests_client.py b/keylime/requests_client.py -index 16615f7..b7da484 100644 ---- a/keylime/requests_client.py -+++ b/keylime/requests_client.py -@@ -40,7 +40,10 @@ class RequestsClient: - return self - - def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: -- pass -+ try: -+ self.session.close() -+ except Exception: -+ pass - - def request(self, method: str, url: str, **kwargs: Any) -> requests.Response: - return self.session.request(method, self.base_url + url, **kwargs) --- -2.47.3 - diff --git a/0007-Fix-linter-errors-in-PersistableModel.get-and-.all.patch b/0007-Fix-linter-errors-in-PersistableModel.get-and-.all.patch new file mode 100644 index 0000000..c385856 --- /dev/null +++ b/0007-Fix-linter-errors-in-PersistableModel.get-and-.all.patch @@ -0,0 +1,160 @@ +From e75921f02393277e8bc5ba3d058131376516a099 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Thu, 5 Mar 2026 17:27:41 +0100 +Subject: [PATCH] Fix linter errors in PersistableModel.get() and .all() + +PersistableModel.get() and .all() returned Optional[PersistableModel] +and Sequence[PersistableModel] respectively, which caused pyright errors +when subclasses like IMAPolicy or MBPolicy called cls.get() and expected +the return type to match their own class. + +Use a TypeVar bound to PersistableModel so cls.get() on a subclass +correctly returns Optional[SubclassType]. + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + keylime/models/base/persistable_model.py | 8 +++++--- + keylime/web/registrar/agents_controller.py | 6 +++--- + keylime/web/verifier/attestation_controller.py | 17 ++++++++--------- + 3 files changed, 16 insertions(+), 15 deletions(-) + +diff --git a/keylime/models/base/persistable_model.py b/keylime/models/base/persistable_model.py +index 3380eb6..4aa596e 100644 +--- a/keylime/models/base/persistable_model.py ++++ b/keylime/models/base/persistable_model.py +@@ -1,4 +1,4 @@ +-from typing import Any, Optional, Sequence ++from typing import Any, Optional, Sequence, TypeVar + + from sqlalchemy import asc, desc, or_ + from sqlalchemy.sql.expression import ClauseElement +@@ -18,6 +18,8 @@ from keylime.models.base.persistable_model_meta import PersistableModelMeta + from keylime.models.base.types.dictionary import Dictionary + from keylime.models.base.types.list import List + ++_PM = TypeVar("_PM", bound="PersistableModel") ++ + + class PersistableModel(BasicModel, metaclass=PersistableModelMeta): + """PersistableModel extends the BasicModel class to provide additional functionality for saving and retrieving +@@ -181,7 +183,7 @@ class PersistableModel(BasicModel, metaclass=PersistableModelMeta): + return session.query(subject).filter(*filter_criteria).order_by(*sort_criteria) + + @classmethod +- def get(cls, *args: Any, **kwargs: Any) -> Optional["PersistableModel"]: ++ def get(cls: type[_PM], *args: Any, **kwargs: Any) -> Optional[_PM]: + # pylint: disable=no-else-return + + if cls.schema_awaiting_processing: +@@ -203,7 +205,7 @@ class PersistableModel(BasicModel, metaclass=PersistableModelMeta): + return None + + @classmethod +- def all(cls, *args: Any, **kwargs: Any) -> Sequence["PersistableModel"]: ++ def all(cls: type[_PM], *args: Any, **kwargs: Any) -> Sequence[_PM]: + if cls.schema_awaiting_processing: + cls.process_schema() + +diff --git a/keylime/web/registrar/agents_controller.py b/keylime/web/registrar/agents_controller.py +index 290317f..c918f95 100644 +--- a/keylime/web/registrar/agents_controller.py ++++ b/keylime/web/registrar/agents_controller.py +@@ -27,7 +27,7 @@ class AgentsController(Controller): + self.respond(404, f"Agent with ID '{agent_id}' has not been activated") + return + +- self.respond(200, "Success", agent.render()) ++ self.respond(200, "Success", agent.render()) # type: ignore[no-untyped-call] + + # POST /v2[.:minor]/agents/[:agent_id] + def create(self, agent_id, **params): +@@ -143,10 +143,10 @@ class AgentsController(Controller): + self.respond(404, f"Agent with ID '{agent_id}' not found") + return + +- accepted = agent.verify_ak_response(auth_tag) # type: ignore[attr-defined] ++ accepted = agent.verify_ak_response(auth_tag) # type: ignore[attr-defined,no-untyped-call] + + if accepted: +- agent.commit_changes() ++ agent.commit_changes() # type: ignore[no-untyped-call] + self.respond(200, "Success") + else: + agent.delete() +diff --git a/keylime/web/verifier/attestation_controller.py b/keylime/web/verifier/attestation_controller.py +index 0e50b8a..59f280c 100755 +--- a/keylime/web/verifier/attestation_controller.py ++++ b/keylime/web/verifier/attestation_controller.py +@@ -1,6 +1,5 @@ + # pyright: reportAttributeAccessIssue=false + # Uses ORM models with dynamically-created attributes from metaclasses +-from typing import cast + + from keylime import agent_util, config, keylime_logging + from keylime.common import retry +@@ -158,12 +157,12 @@ class AttestationController(Controller): + + # GET /v3[.:minor]/agents/:agent_id/attestations + def index(self, agent_id, **_params): # type: ignore[no-untyped-def] +- agent = cast(VerifierAgent | None, VerifierAgent.get(agent_id)) ++ agent = VerifierAgent.get(agent_id) + + if not agent: + APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self) + +- results = cast(list[Attestation], Attestation.all(agent_id=agent_id)) ++ results = Attestation.all(agent_id=agent_id) + + resources = [ + APIResource("attestation", attestation.render_state()).include( # type: ignore[no-untyped-call] +@@ -184,8 +183,8 @@ class AttestationController(Controller): + + # GET /v3[.:minor]/agents/:agent_id/attestations/:index + def show(self, agent_id, index, **_params): # type: ignore[no-untyped-def] +- agent = cast(VerifierAgent | None, VerifierAgent.get(agent_id)) +- attestation = cast(Attestation | None, Attestation.get(agent_id=agent_id, index=index)) ++ agent = VerifierAgent.get(agent_id) ++ attestation = Attestation.get(agent_id=agent_id, index=index) + + if not agent: + APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self) +@@ -201,7 +200,7 @@ class AttestationController(Controller): + + # GET /v3[.:minor]/agents/:agent_id/attestations/latest + def show_latest(self, agent_id, **_params): # type: ignore[no-untyped-def] +- agent = cast(VerifierAgent | None, VerifierAgent.get(agent_id)) ++ agent = VerifierAgent.get(agent_id) + + if not agent: + APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self) +@@ -214,7 +213,7 @@ class AttestationController(Controller): + # POST /v3[.:minor]/agents/:agent_id/attestations + @Controller.require_json_api + def create(self, agent_id, attestation, **params): # type: ignore[no-untyped-def] # pylint: disable=unused-argument +- agent = cast(VerifierAgent | None, VerifierAgent.get(agent_id)) ++ agent = VerifierAgent.get(agent_id) + + if not agent: + APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self) +@@ -310,7 +309,7 @@ class AttestationController(Controller): + # Extract attestation from params - it should be provided by the API request + attestation = params.get("attestation") + +- agent = cast(VerifierAgent | None, VerifierAgent.get(agent_id)) ++ agent = VerifierAgent.get(agent_id) + + if not agent: + APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self) +@@ -368,7 +367,7 @@ class AttestationController(Controller): + # PATCH /v3[.:minor]/agents/:agent_id/attestations/latest + @Controller.require_json_api + def update_latest(self, agent_id, **params): # type: ignore[no-untyped-def] +- agent = cast(VerifierAgent | None, VerifierAgent.get(agent_id)) ++ agent = VerifierAgent.get(agent_id) + + if not agent: + APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self) +-- +2.53.0 + diff --git a/0007-tests-change-test_mba_parsing-to-not-need-keylime-in.patch b/0007-tests-change-test_mba_parsing-to-not-need-keylime-in.patch deleted file mode 100644 index 3937579..0000000 --- a/0007-tests-change-test_mba_parsing-to-not-need-keylime-in.patch +++ /dev/null @@ -1,91 +0,0 @@ -From 39ea2efb72b383f729474a1583d4b8c097cf848a Mon Sep 17 00:00:00 2001 -From: Sergio Correia -Date: Thu, 6 Feb 2025 21:29:56 +0000 -Subject: [PATCH 07/10] tests: change test_mba_parsing to not need keylime - installed - -This test needs the verifier configuration file available, and on -systems that do not have keylime installed (hence, no config file), -it would fail. - -This commit changes the test so that it creates a verifier conf file -in a temporary directory with default values, so that it can use it. - -Signed-off-by: Sergio Correia ---- - test/test_mba_parsing.py | 52 +++++++++++++++++++++++++++++----------- - 1 file changed, 38 insertions(+), 14 deletions(-) - -diff --git a/test/test_mba_parsing.py b/test/test_mba_parsing.py -index 670a602..4ee4e3b 100644 ---- a/test/test_mba_parsing.py -+++ b/test/test_mba_parsing.py -@@ -1,27 +1,51 @@ - import os -+import tempfile - import unittest -+from configparser import RawConfigParser - -+from keylime import config -+from keylime.cmd import convert_config - from keylime.common.algorithms import Hash - from keylime.mba import mba - -+TEMPLATES_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "templates")) -+ - - class TestMBAParsing(unittest.TestCase): - def test_parse_bootlog(self): - """Test parsing binary measured boot event log""" -- mba.load_imports() -- # Use the file that triggered https://github.com/keylime/keylime/issues/1153 -- mb_log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data/mb_log.b64")) -- with open(mb_log_path, encoding="utf-8") as f: -- # Read the base64 input and remove the newlines -- b64 = "".join(f.read().splitlines()) -- pcr_hashes, boot_aggregates, measurement_data, failure = mba.bootlog_parse(b64, Hash.SHA256) -- -- self.assertFalse( -- failure, f"Parsing of measured boot log failed with: {list(map(lambda x: x.context, failure.events))}" -- ) -- self.assertTrue(isinstance(pcr_hashes, dict)) -- self.assertTrue(isinstance(boot_aggregates, dict)) -- self.assertTrue(isinstance(measurement_data, dict)) -+ # This test requires the verifier configuration file, so let's create -+ # one with the default values to use, so that we do not depend on the -+ # configuration files existing in the test system. -+ with tempfile.TemporaryDirectory() as config_dir: -+ # Let's write the config file for the verifier. -+ verifier_config = convert_config.process_versions(["verifier"], TEMPLATES_DIR, RawConfigParser(), True) -+ convert_config.output(["verifier"], verifier_config, TEMPLATES_DIR, config_dir) -+ -+ # As we want to use a config file from a different location, the -+ # proper way would be to define an environment variable for the -+ # module of interest, e.g. in our case it would be the -+ # KEYLIME_VERIFIER_CONFIG variable. However, the config module -+ # reads such env vars at first load, and there is no clean way -+ # to have it re-read them, so for this test we will override it -+ # manually. -+ config.CONFIG_ENV["verifier"] = os.path.abspath(os.path.join(config_dir, "verifier.conf")) -+ -+ mba.load_imports() -+ # Use the file that triggered https://github.com/keylime/keylime/issues/1153 -+ mb_log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "data/mb_log.b64")) -+ with open(mb_log_path, encoding="utf-8") as f: -+ # Read the base64 input and remove the newlines -+ b64 = "".join(f.read().splitlines()) -+ pcr_hashes, boot_aggregates, measurement_data, failure = mba.bootlog_parse(b64, Hash.SHA256) -+ -+ self.assertFalse( -+ failure, -+ f"Parsing of measured boot log failed with: {list(map(lambda x: x.context, failure.events))}", -+ ) -+ self.assertTrue(isinstance(pcr_hashes, dict)) -+ self.assertTrue(isinstance(boot_aggregates, dict)) -+ self.assertTrue(isinstance(measurement_data, dict)) - - - if __name__ == "__main__": --- -2.47.3 - diff --git a/0008-refactor-Remove-dead-code-AuthSession.authenticate_a.patch b/0008-refactor-Remove-dead-code-AuthSession.authenticate_a.patch new file mode 100644 index 0000000..224d429 --- /dev/null +++ b/0008-refactor-Remove-dead-code-AuthSession.authenticate_a.patch @@ -0,0 +1,457 @@ +From 2d809d8b537c0d9faab05ee5fe7efb85f48918f3 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Fri, 13 Mar 2026 10:53:54 +0100 +Subject: [PATCH] refactor: Remove dead code AuthSession.authenticate_agent() + +authenticate_agent() was superseded by _extract_identity() in +action_handler.py, which performs token-based agent authentication +directly via AuthSession.get_by_token(). The method, its helper +get_session(), the module-level _engine global, and the associated +unused imports (Session, SessionManager, make_engine) are all removed. + +The corresponding tests (test_authenticate_agent_success, +test_authenticate_agent_inactive_session, +test_authenticate_agent_no_session) are also removed. + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + keylime/models/verifier/auth_session.py | 67 +----------- + keylime/web/verifier/session_controller.py | 9 +- + test/test_auth_session.py | 113 +-------------------- + test/test_session_controller.py | 52 +++++----- + 4 files changed, 32 insertions(+), 209 deletions(-) + +diff --git a/keylime/models/verifier/auth_session.py b/keylime/models/verifier/auth_session.py +index 918dfb4..b0b40b0 100644 +--- a/keylime/models/verifier/auth_session.py ++++ b/keylime/models/verifier/auth_session.py +@@ -1,12 +1,8 @@ + import base64 + import hmac +-import threading + import uuid +-from contextlib import contextmanager + from datetime import timedelta +-from typing import Any, Dict, Iterator, Optional, Sequence +- +-from sqlalchemy.orm import Session ++from typing import Any, Dict, Optional, Sequence + + from keylime import config, keylime_logging + from keylime.crypto import ( +@@ -16,7 +12,6 @@ from keylime.crypto import ( + parse_session_token, + verify_token_hash, + ) +-from keylime.db.keylime_db import SessionManager, make_engine + from keylime.db.verifier_db import VerfierMain + from keylime.models.base import * + from keylime.shared_data import get_shared_memory +@@ -31,21 +26,6 @@ from keylime.tpm.tpm_main import Tpm + + logger = keylime_logging.init_logging("verifier") + +-_engine = None +-_engine_lock = threading.Lock() +-_session_manager = SessionManager() +- +- +-@contextmanager +-def get_session_context() -> Iterator[Session]: +- global _engine +- if _engine is None: +- with _engine_lock: +- if _engine is None: +- _engine = make_engine("cloud_verifier") +- with _session_manager.session_context(_engine) as session: +- yield session +- + + class AuthSession(PersistableModel): + # Explicit attribute declarations for type checkers +@@ -244,51 +224,6 @@ class AuthSession(PersistableModel): + # Slow path: query database by primary key + return cls.get(session_id) # type: ignore[return-value] + +- @classmethod +- def authenticate_agent(cls, token: str): # type: ignore[no-untyped-def] +- """Authenticate an agent using their session token. +- +- Uses indexed database lookup by token hash for performance (O(1) instead of O(n)). +- Tokens are hashed before lookup since only hashes are stored in the database. +- +- Args: +- token: The session token to verify +- +- Returns: +- VerfierMain object if authenticated, False otherwise +- """ +- # Use indexed lookup by token hash (much faster than scanning all sessions) +- auth_session = cls.get_by_token(token) +- +- if not auth_session: +- return False +- +- # Validate session is active +- if not getattr(auth_session, "active", False): +- return False +- +- # Validate session hasn't expired +- token_expires_at = getattr(auth_session, "token_expires_at", None) +- if token_expires_at and token_expires_at < Timestamp.now(): +- logger.debug( +- "Authentication attempted with expired token for agent '%s' (expired at %s)", +- getattr(auth_session, "agent_id", "unknown"), +- token_expires_at, +- ) +- return False +- +- # Use old engine to query VerfierMain (legacy model) +- with get_session_context() as session: +- agent = ( +- session.query(VerfierMain) +- .filter(VerfierMain.agent_id == auth_session.agent_id) # type: ignore[attr-defined] +- .one_or_none() +- ) +- if agent: +- session.expunge(agent) # type: ignore[no-untyped-call] +- +- return agent +- + @classmethod + def create( + cls, agent: Optional[VerfierMain], data: Dict[str, Any], agent_id: Optional[str] = None +diff --git a/keylime/web/verifier/session_controller.py b/keylime/web/verifier/session_controller.py +index c8664e2..9a314f2 100644 +--- a/keylime/web/verifier/session_controller.py ++++ b/keylime/web/verifier/session_controller.py +@@ -2,9 +2,8 @@ import base64 + + from keylime import config, keylime_logging + from keylime.db.verifier_db import VerfierMain +-from keylime.models.base import Timestamp ++from keylime.models.base import Timestamp, db_manager + from keylime.models.verifier import AuthSession +-from keylime.models.verifier.auth_session import get_session_context + from keylime.web.base import Controller + + logger = keylime_logging.init_logging("verifier") +@@ -186,7 +185,7 @@ class SessionController(Controller): + + # Check if agent exists - this is where we validate enrollment + agent = None +- with get_session_context() as session: ++ with db_manager.session_context() as session: + agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() + if agent: + session.expunge(agent) # type: ignore[no-untyped-call] +@@ -384,7 +383,7 @@ class SessionController(Controller): + # POST /v3[.:minor]/agents/:agent_id/session + def create(self, agent_id, **params): + agent = None +- with get_session_context() as session: ++ with db_manager.session_context() as session: + agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() + if agent: + session.expunge(agent) # type: ignore[no-untyped-call] +@@ -410,7 +409,7 @@ class SessionController(Controller): + + def update(self, agent_id, token, **params): + agent = None +- with get_session_context() as session: ++ with db_manager.session_context() as session: + agent = session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).one_or_none() + if agent: + session.expunge(agent) # type: ignore[no-untyped-call] +diff --git a/test/test_auth_session.py b/test/test_auth_session.py +index 2c78547..dd554b6 100644 +--- a/test/test_auth_session.py ++++ b/test/test_auth_session.py +@@ -2,74 +2,15 @@ + + import base64 + import unittest +-from contextlib import contextmanager + from datetime import timedelta + from unittest.mock import MagicMock, PropertyMock, patch + + from keylime.crypto import generate_session_token, generate_token_salt, hash_token_for_storage + from keylime.models.base.types import Timestamp +-from keylime.models.verifier.auth_session import AuthSession, get_session_context ++from keylime.models.verifier.auth_session import AuthSession + from keylime.shared_data import cleanup_global_shared_memory, get_shared_memory + + +-class TestGetSessionContext(unittest.TestCase): +- """Test cases for get_session_context context manager.""" +- +- def _make_mock_session_manager(self, mock_session): +- """Create a mock SessionManager whose session_context() mirrors real lifecycle.""" +- mock_scoped = MagicMock() +- mock_session_manager = MagicMock() +- mock_session_manager.make_session.return_value = mock_session +- mock_session_manager._scoped_session = mock_scoped # pylint: disable=protected-access +- +- @contextmanager +- def fake_session_context(engine): # pylint: disable=unused-argument +- session = mock_session_manager.make_session(engine) +- try: +- yield session +- session.commit() +- except Exception: +- session.rollback() +- raise +- finally: +- scoped = mock_session_manager._scoped_session # pylint: disable=protected-access +- if scoped is not None: +- scoped.remove() +- +- mock_session_manager.session_context = fake_session_context +- return mock_session_manager, mock_scoped +- +- @patch("keylime.models.verifier.auth_session.make_engine") +- def test_session_cleanup_on_normal_exit(self, _mock_make_engine): +- """Test that session is committed and cleaned up when context manager exits normally.""" +- mock_session = MagicMock() +- mock_session_manager, mock_scoped = self._make_mock_session_manager(mock_session) +- +- with patch("keylime.models.verifier.auth_session._engine", None): +- with patch("keylime.models.verifier.auth_session._session_manager", mock_session_manager): +- with get_session_context() as session: +- self.assertIs(session, mock_session) +- +- mock_session.commit.assert_called_once() +- mock_scoped.remove.assert_called_once() +- +- @patch("keylime.models.verifier.auth_session.make_engine") +- def test_session_rollback_on_exception(self, _mock_make_engine): +- """Test that session is rolled back and cleaned up when an exception occurs.""" +- mock_session = MagicMock() +- mock_session_manager, mock_scoped = self._make_mock_session_manager(mock_session) +- +- with patch("keylime.models.verifier.auth_session._engine", None): +- with patch("keylime.models.verifier.auth_session._session_manager", mock_session_manager): +- with self.assertRaises(RuntimeError): +- with get_session_context(): +- raise RuntimeError("simulated error") +- +- mock_session.rollback.assert_called_once() +- mock_session.commit.assert_not_called() +- mock_scoped.remove.assert_called_once() +- +- + class TestAuthSessionHelpers(unittest.TestCase): + """Test cases for AuthSession helper methods.""" + +@@ -457,58 +398,6 @@ class TestAuthSessionCore(unittest.TestCase): + self.assertIn("errors", result) + self.assertIn("authentication_supported", result["errors"]) + +- @patch("keylime.models.verifier.auth_session.get_session_context") +- @patch.object(AuthSession, "get_by_token") +- def test_authenticate_agent_success(self, mock_get_by_token, mock_get_session): +- """Test successful agent authentication with valid token.""" +- # Create a mock agent +- mock_agent = MagicMock() +- mock_agent.agent_id = self.test_agent_id +- +- # Mock session query +- mock_db_session = MagicMock() +- mock_db_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent +- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_db_session) +- mock_get_session.return_value.__exit__ = MagicMock(return_value=False) +- +- # Mock AuthSession.get_by_token to return an active session +- mock_auth_session = MagicMock() +- mock_auth_session.session_id = "550e8400-e29b-41d4-a716-446655440000" +- mock_auth_session.active = True +- mock_auth_session.agent_id = self.test_agent_id +- mock_auth_session.token_expires_at = Timestamp.now() + timedelta(hours=1) +- mock_get_by_token.return_value = mock_auth_session +- +- result = AuthSession.authenticate_agent("test-token") +- +- # Should return the agent +- self.assertIsNotNone(result) +- self.assertEqual(result.agent_id, self.test_agent_id) # type: ignore[union-attr] +- +- @patch.object(AuthSession, "get_by_token") +- def test_authenticate_agent_inactive_session(self, mock_get_by_token): +- """Test that inactive sessions cannot authenticate.""" +- # Mock AuthSession.get_by_token to return an inactive session +- mock_auth_session = MagicMock() +- mock_auth_session.active = False +- mock_get_by_token.return_value = mock_auth_session +- +- result = AuthSession.authenticate_agent("test-token") +- +- # Should return False +- self.assertFalse(result) +- +- @patch.object(AuthSession, "get_by_token") +- def test_authenticate_agent_no_session(self, mock_get_by_token): +- """Test that authentication fails when session doesn't exist.""" +- # Mock AuthSession.get_by_token to return None (no session found) +- mock_get_by_token.return_value = None +- +- result = AuthSession.authenticate_agent("test-token") +- +- # Should return False +- self.assertFalse(result) +- + @patch.object(AuthSession, "empty") + def test_create_with_agent(self, mock_empty): + """Test AuthSession.create() with an enrolled agent.""" +diff --git a/test/test_session_controller.py b/test/test_session_controller.py +index eec7fef..f8db8db 100644 +--- a/test/test_session_controller.py ++++ b/test/test_session_controller.py +@@ -272,8 +272,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase): + # Verify session was deleted from cache + self.assertNotIn(self.test_session_id, self.sessions_cache) + +- @patch("keylime.web.verifier.session_controller.get_session_context") +- def test_update_session_agent_not_enrolled(self, mock_get_session): ++ @patch("keylime.web.verifier.session_controller.db_manager") ++ def test_update_session_agent_not_enrolled(self, mock_db_manager): + """Test update_session with unenrolled agent.""" + # Create session in cache + now = Timestamp.now() +@@ -290,8 +290,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase): + # Mock database query to return no agent + mock_session = MagicMock() + mock_session.query.return_value.filter.return_value.one_or_none.return_value = None +- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session) +- mock_get_session.return_value.__exit__ = MagicMock(return_value=False) ++ mock_db_manager.session_context.return_value.__enter__ = MagicMock(return_value=mock_session) ++ mock_db_manager.session_context.return_value.__exit__ = MagicMock(return_value=False) + + # Call update_session + params = { +@@ -319,9 +319,9 @@ class TestSessionControllerUpdateSession(unittest.TestCase): + body = call_args[1]["body"] + self.assertEqual(body["data"]["attributes"]["evaluation"], "fail") + +- @patch("keylime.web.verifier.session_controller.get_session_context") ++ @patch("keylime.web.verifier.session_controller.db_manager") + @patch("keylime.models.verifier.auth_session.AuthSession.create_from_memory") +- def test_update_session_authentication_failed(self, mock_create_from_memory, mock_get_session): ++ def test_update_session_authentication_failed(self, mock_create_from_memory, mock_db_manager): + """Test update_session with failed authentication.""" + # Create session in cache + now = Timestamp.now() +@@ -340,8 +340,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase): + mock_agent.agent_id = self.test_agent_id + mock_session = MagicMock() + mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent +- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session) +- mock_get_session.return_value.__exit__ = MagicMock(return_value=False) ++ mock_db_manager.session_context.return_value.__enter__ = MagicMock(return_value=mock_session) ++ mock_db_manager.session_context.return_value.__exit__ = MagicMock(return_value=False) + + # Mock AuthSession.create_from_memory to return errors + mock_auth_session = MagicMock() +@@ -379,11 +379,11 @@ class TestSessionControllerUpdateSession(unittest.TestCase): + call_args = self.controller.send_response.call_args # type: ignore[attr-defined] + self.assertEqual(call_args[1]["code"], 401) + +- @patch("keylime.web.verifier.session_controller.get_session_context") ++ @patch("keylime.web.verifier.session_controller.db_manager") + @patch("keylime.models.verifier.auth_session.AuthSession.create_from_memory") + @patch("keylime.models.verifier.auth_session.AuthSession.delete_active_session_for_agent") + @patch("keylime.web.verifier.session_controller.config") +- def test_update_session_success(self, mock_config, _mock_delete_active, mock_create_from_memory, mock_get_session): ++ def test_update_session_success(self, mock_config, _mock_delete_active, mock_create_from_memory, mock_db_manager): + """Test successful session update.""" + # Create session in cache + now = Timestamp.now() +@@ -405,8 +405,8 @@ class TestSessionControllerUpdateSession(unittest.TestCase): + mock_agent.agent_id = self.test_agent_id + mock_session = MagicMock() + mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent +- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session) +- mock_get_session.return_value.__exit__ = MagicMock(return_value=False) ++ mock_db_manager.session_context.return_value.__enter__ = MagicMock(return_value=mock_session) ++ mock_db_manager.session_context.return_value.__exit__ = MagicMock(return_value=False) + + # Mock config + mock_config.getboolean.return_value = False # Don't keep in memory +@@ -525,17 +525,17 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase): + self.assertEqual(call_args[0][0], 404) + + @patch("keylime.models.verifier.auth_session.AuthSession.delete_stale") +- @patch("keylime.web.verifier.session_controller.get_session_context") ++ @patch("keylime.web.verifier.session_controller.db_manager") + @patch("keylime.models.verifier.auth_session.AuthSession.create") +- def test_create_success(self, mock_create, mock_get_session, _mock_delete_stale): ++ def test_create_success(self, mock_create, mock_db_manager, _mock_delete_stale): + """Test successful create endpoint.""" + # Mock database query + mock_agent = MagicMock() + mock_agent.agent_id = self.test_agent_id + mock_session = MagicMock() + mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent +- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session) +- mock_get_session.return_value.__exit__ = MagicMock(return_value=False) ++ mock_db_manager.session_context.return_value.__enter__ = MagicMock(return_value=mock_session) ++ mock_db_manager.session_context.return_value.__exit__ = MagicMock(return_value=False) + + # Mock AuthSession.create + mock_auth_session = MagicMock() +@@ -553,14 +553,14 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase): + call_args = self.controller.respond.call_args # type: ignore[attr-defined] + self.assertEqual(call_args[0][0], 200) + +- @patch("keylime.web.verifier.session_controller.get_session_context") +- def test_create_agent_not_found(self, mock_get_session): ++ @patch("keylime.web.verifier.session_controller.db_manager") ++ def test_create_agent_not_found(self, mock_db_manager): + """Test create endpoint with non-existent agent.""" + # Mock database query to return None + mock_session = MagicMock() + mock_session.query.return_value.filter.return_value.one_or_none.return_value = None +- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session) +- mock_get_session.return_value.__exit__ = MagicMock(return_value=False) ++ mock_db_manager.session_context.return_value.__enter__ = MagicMock(return_value=mock_session) ++ mock_db_manager.session_context.return_value.__exit__ = MagicMock(return_value=False) + + # Call create + params = {"data": {}} +@@ -571,17 +571,17 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase): + call_args = self.controller.respond.call_args # type: ignore[attr-defined] + self.assertEqual(call_args[0][0], 404) + +- @patch("keylime.web.verifier.session_controller.get_session_context") ++ @patch("keylime.web.verifier.session_controller.db_manager") + @patch("keylime.models.verifier.auth_session.AuthSession.get_by_token") +- def test_update_success(self, mock_get, mock_get_session): ++ def test_update_success(self, mock_get, mock_db_manager): + """Test successful update endpoint.""" + # Mock database query + mock_agent = MagicMock() + mock_agent.agent_id = self.test_agent_id + mock_session = MagicMock() + mock_session.query.return_value.filter.return_value.one_or_none.return_value = mock_agent +- mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session) +- mock_get_session.return_value.__exit__ = MagicMock(return_value=False) ++ mock_db_manager.session_context.return_value.__enter__ = MagicMock(return_value=mock_session) ++ mock_db_manager.session_context.return_value.__exit__ = MagicMock(return_value=False) + + # Mock AuthSession.get_by_token + mock_auth_session = MagicMock() +@@ -601,9 +601,9 @@ class TestSessionControllerLegacyEndpoints(unittest.TestCase): + call_args = self.controller.respond.call_args # type: ignore[attr-defined] + self.assertEqual(call_args[0][0], 200) + +- @patch("keylime.web.verifier.session_controller.get_session_context") ++ @patch("keylime.web.verifier.session_controller.db_manager") + @patch("keylime.models.verifier.auth_session.AuthSession.get_by_token") +- def test_update_not_found(self, mock_get, _mock_get_session): ++ def test_update_not_found(self, mock_get, _mock_db_manager): + """Test update endpoint with non-existent session.""" + # Mock AuthSession.get_by_token to return None + mock_get.return_value = None +-- +2.53.0 + diff --git a/0008-tests-skip-measured-boot-related-tests-for-s390x-and.patch b/0008-tests-skip-measured-boot-related-tests-for-s390x-and.patch deleted file mode 100644 index 14e7247..0000000 --- a/0008-tests-skip-measured-boot-related-tests-for-s390x-and.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 1496567e4b06f7a8eff9f758ea2e4e00ffa89f9b Mon Sep 17 00:00:00 2001 -From: Sergio Correia -Date: Wed, 4 Jun 2025 07:28:54 +0100 -Subject: [PATCH 08/10] tests: skip measured-boot related tests for s390x and - ppc64le - -Signed-off-by: Sergio Correia ---- - test/test_create_mb_policy.py | 2 ++ - test/test_mba_parsing.py | 2 ++ - 2 files changed, 4 insertions(+) - -diff --git a/test/test_create_mb_policy.py b/test/test_create_mb_policy.py -index aa7a4b9..cd32bda 100644 ---- a/test/test_create_mb_policy.py -+++ b/test/test_create_mb_policy.py -@@ -5,6 +5,7 @@ Copyright 2024 Red Hat, Inc. - - import argparse - import os -+import platform - import unittest - - from keylime.policy import create_mb_policy -@@ -12,6 +13,7 @@ from keylime.policy import create_mb_policy - DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "data", "create-mb-policy")) - - -+@unittest.skipIf(platform.machine() in ["ppc64le", "s390x"], "ppc64le and s390x are not supported") - class CreateMeasuredBootPolicy_Test(unittest.TestCase): - def test_event_to_sha256(self): - test_cases = [ -diff --git a/test/test_mba_parsing.py b/test/test_mba_parsing.py -index 4ee4e3b..82e6086 100644 ---- a/test/test_mba_parsing.py -+++ b/test/test_mba_parsing.py -@@ -1,4 +1,5 @@ - import os -+import platform - import tempfile - import unittest - from configparser import RawConfigParser -@@ -11,6 +12,7 @@ from keylime.mba import mba - TEMPLATES_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "templates")) - - -+@unittest.skipIf(platform.machine() in ["ppc64le", "s390x"], "ppc64le and s390x are not supported") - class TestMBAParsing(unittest.TestCase): - def test_parse_bootlog(self): - """Test parsing binary measured boot event log""" --- -2.47.3 - diff --git a/0009-db-Clean-up-scoped-session-after-each-request.patch b/0009-db-Clean-up-scoped-session-after-each-request.patch new file mode 100644 index 0000000..2a59d0d --- /dev/null +++ b/0009-db-Clean-up-scoped-session-after-each-request.patch @@ -0,0 +1,205 @@ +From e935df8fb9ad36daa41e079d19964678b28be246 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Fri, 6 Mar 2026 11:47:04 +0100 +Subject: [PATCH] db: Clean up scoped session after each request +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The scoped_session was never cleaned up between requests, causing its +identity map to accumulate objects indefinitely. When subsequent +requests loaded objects with PKs already present in the identity map, +SQLAlchemy emitted SAWarning about identity map conflicts during flush. + +Add DBManager.remove_session() and call it from two places: + +1. ActionHandler.process_request() finally block — the primary cleanup + point, runs after all action code completes (including work done + after the response is sent via stop_action=False). + +2. ActionHandler.on_finish() — guarded by _entered_process_request + flag, only runs when prepare() returned early (e.g., auth/authz + failure) without entering process_request(). Cannot be called + unconditionally because on_finish() is triggered by finish(), which + may fire mid-action when stop_action=False is used. + +This also prevents unbounded memory growth from the identity map over +the verifier's lifetime. + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + keylime/models/base/db.py | 9 +++ + keylime/web/base/action_handler.py | 115 +++++++++++++++++------------ + 2 files changed, 78 insertions(+), 46 deletions(-) + +diff --git a/keylime/models/base/db.py b/keylime/models/base/db.py +index c758fc1..9678098 100644 +--- a/keylime/models/base/db.py ++++ b/keylime/models/base/db.py +@@ -138,6 +138,15 @@ class DBManager: + + return cast(Session, self._scoped_session()) + ++ def remove_session(self) -> None: ++ """Remove the current scoped session, releasing its connection back to the pool and clearing the identity map. ++ ++ Should be called at the end of each request to prevent stale objects from accumulating in the session across ++ request boundaries. ++ """ ++ if self._scoped_session: ++ self._scoped_session.remove() ++ + @contextmanager + def session_context(self, session: Session | None = None) -> Iterator[Session]: + if session: +diff --git a/keylime/web/base/action_handler.py b/keylime/web/base/action_handler.py +index 8410b40..d14c9ee 100644 +--- a/keylime/web/base/action_handler.py ++++ b/keylime/web/base/action_handler.py +@@ -9,6 +9,7 @@ from tornado.web import RequestHandler + from keylime import keylime_logging + from keylime.authorization.manager import get_authorization_manager + from keylime.authorization.provider import Action, AuthorizationRequest ++from keylime.models.base.db import db_manager + from keylime.models.base.types import Timestamp # type: ignore[attr-defined] + from keylime.models.verifier.auth_session import AuthSession + from keylime.web.base.default_controller import DefaultController +@@ -549,6 +550,7 @@ class ActionHandler(RequestHandler): + self._action_call_stack: list[tuple["Controller", str]] = [] + self._received_at: int = time.time_ns() + self._finished: bool = False ++ self._entered_process_request: bool = False + + async def prepare(self) -> None: + # Tornado allows the prepare method to be overridden as async in subclasses of RequestHandler +@@ -598,59 +600,80 @@ class ActionHandler(RequestHandler): + return + + async def process_request(self) -> None: ++ self._entered_process_request = True # pylint: disable=attribute-defined-outside-init + # If a route matches the request, invoke action determined by the matching route +- if self.matching_route and self.controller: +- try: +- await self._invoke_action() +- except StopAction: +- # If the action is terminated early, continue +- pass +- except ParamDecodeError: +- # If the query, form or JSON parameters are malformed, respond using error-handling action +- await self._invoke_action("malformed_params", ignore_param_errors=True) +- except ActionDispatchError: +- # If the union of path, query, form and JSON parameters and do not match the method signature +- # of the action, respond using error-handling action +- await self._invoke_action("action_dispatch_error", ignore_param_errors=True) +- except RequiredContentMissing: +- # If a decorator from the Controller class has been used to mark a certain content format as required +- # for the action and the request body or Content-Type do not adhere, respond using error-handling action +- await self._invoke_action("format_not_allowed", ignore_param_errors=True) +- except Exception as err: +- # Any other exception which is not caught within the action body should be logged as an unexpected +- # internal error before responding using error-handling action +- self._log_exception(err) +- await self._invoke_action("action_exception", ignore_param_errors=True) +- +- # Handle situation in which no invoked action produces a response +- self._handle_incomplete_action() ++ try: ++ if self.matching_route and self.controller: ++ try: ++ await self._invoke_action() ++ except StopAction: ++ # If the action is terminated early, continue ++ pass ++ except ParamDecodeError: ++ # If the query, form or JSON parameters are malformed, respond using error-handling action ++ await self._invoke_action("malformed_params", ignore_param_errors=True) ++ except ActionDispatchError: ++ # If the union of path, query, form and JSON parameters and do not match the method signature ++ # of the action, respond using error-handling action ++ await self._invoke_action("action_dispatch_error", ignore_param_errors=True) ++ except RequiredContentMissing: ++ # If a decorator from the Controller class has been used to mark a certain content format as ++ # required for the action and the request body or Content-Type do not adhere, respond using ++ # error-handling action ++ await self._invoke_action("format_not_allowed", ignore_param_errors=True) ++ except Exception as err: ++ # Any other exception which is not caught within the action body should be logged as an ++ # unexpected internal error before responding using error-handling action ++ self._log_exception(err) ++ await self._invoke_action("action_exception", ignore_param_errors=True) ++ ++ # Handle situation in which no invoked action produces a response ++ self._handle_incomplete_action() ++ finally: ++ # Clean up the scoped session after all action code completes (including any work done after the ++ # response is sent via stop_action=False). This prevents stale objects from accumulating in the ++ # identity map across request boundaries. Must be here rather than on_finish(), because on_finish() ++ # is called by Tornado's finish() when the response is sent, which may be before action code completes. ++ db_manager.remove_session() + + def write_error(self, status_code: int, **kwargs: Any) -> None: +- if status_code == 405 and kwargs.get("exc_info"): +- # Handle situation in which the HTTP method given in the request is not supported by the server (Tornado +- # produces a 405 error by default in this case) +- +- # self.prepare() is not triggered in this case, so perform request reporting tasks +- self._process_request_id() +- logger.info("%s %s", self.request.method, self.request.path) +- # Produce a response using the appropriate error-handling action +- self._invoke_action_sync("unsupported_method", ignore_param_errors=True) +- +- elif kwargs.get("exc_info"): +- # For any other exception produced by this class and not caught elsewhere, log the exception and invoke +- # the appropriate error-handling action +- _, err, _ = kwargs["exc_info"] +- self._log_exception(err) +- self._invoke_action_sync("handler_exception", ignore_param_errors=True) ++ try: ++ if status_code == 405 and kwargs.get("exc_info"): ++ # Handle situation in which the HTTP method given in the request is not supported by the server ++ # (Tornado produces a 405 error by default in this case) ++ ++ # self.prepare() is not triggered in this case, so perform request reporting tasks ++ self._process_request_id() ++ logger.info("%s %s", self.request.method, self.request.path) ++ # Produce a response using the appropriate error-handling action ++ self._invoke_action_sync("unsupported_method", ignore_param_errors=True) ++ ++ elif kwargs.get("exc_info"): ++ # For any other exception produced by this class and not caught elsewhere, log the exception and ++ # invoke the appropriate error-handling action ++ _, err, _ = kwargs["exc_info"] ++ self._log_exception(err) ++ self._invoke_action_sync("handler_exception", ignore_param_errors=True) + +- else: +- # Catch-all for all other errors (typically those produced by calling Tornado's send_error method) +- self.default_controller.send_response(status_code) ++ else: ++ # Catch-all for all other errors (typically those produced by calling Tornado's send_error method) ++ self.default_controller.send_response(status_code) + +- # Handle situation in which none of the above-invoked error-handling actions produce a response +- self._handle_incomplete_action() ++ # Handle situation in which none of the above-invoked error-handling actions produce a response ++ self._handle_incomplete_action() ++ finally: ++ db_manager.remove_session() + + def on_finish(self) -> None: ++ # Clean up the scoped session only if process_request() was never ++ # entered (e.g., prepare() returned early due to auth/authz failure). ++ # When process_request() runs, its finally block handles cleanup — ++ # calling remove_session() here would be premature because on_finish() ++ # is triggered by finish() which may be called mid-action when ++ # stop_action=False is used (the action continues after the response). ++ if not self._entered_process_request: ++ db_manager.remove_session() ++ + message = f"Sent {self.get_status()} in {self.elapsed_time}" + + if self.get_status() < 400: +-- +2.53.0 + diff --git a/0009-tests-fix-rpm-repo-tests-from-create-runtime-policy.patch b/0009-tests-fix-rpm-repo-tests-from-create-runtime-policy.patch deleted file mode 100644 index 9643ec5..0000000 --- a/0009-tests-fix-rpm-repo-tests-from-create-runtime-policy.patch +++ /dev/null @@ -1,58 +0,0 @@ -From be968fd54198042d2014ad63368b78e9d4609169 Mon Sep 17 00:00:00 2001 -From: Sergio Correia -Date: Thu, 22 May 2025 11:25:15 -0400 -Subject: [PATCH 09/10] tests: fix rpm repo tests from create-runtime-policy - -Signed-off-by: Sergio Correia ---- - .../create-runtime-policy/setup-rpm-tests | 28 +++++++++++++------ - 1 file changed, 20 insertions(+), 8 deletions(-) - -diff --git a/test/data/create-runtime-policy/setup-rpm-tests b/test/data/create-runtime-policy/setup-rpm-tests -index 708438c..b62729b 100755 ---- a/test/data/create-runtime-policy/setup-rpm-tests -+++ b/test/data/create-runtime-policy/setup-rpm-tests -@@ -217,20 +217,32 @@ create_rpm() { - # https://github.com/rpm-software-management/rpm/commit/96467dce18f264b278e17ffe1859c88d9b5aa4b6 - _pkgname="DUMMY-${_name}-${_version}-${_rel}.noarch.rpm" - -- _expected_pkg="${RPMSDIR}/noarch/${_pkgname}" -- [ -e "${_expected_pkg}" ] && return 0 -+ # For some reason, it may not store the built package within the -+ # noarch directory, but directly in RPMS, so let's check both -+ # locations. -+ _expected_pkg="${RPMSDIR}/noarch/${_pkgname} ${RPMSDIR}/${_pkgname}" -+ for _expected in ${_expected_pkg}; do -+ if [ -e "${_expected}" ]; then -+ echo "(create_rpm) CREATED RPM: ${_expected}" >&2 -+ return 0 -+ fi -+ done - - # OK, the package was not built where it should. Let us see if - # it was built in ~/rpmbuild instead, and if that is the case, - # copy it to the expected location. -- _bad_location_pkg="${HOME}/rpmbuild/RPMS/noarch/${_pkgname}" -- if [ -e "${_bad_location_pkg}" ]; then -- echo "WARNING: the package ${_pkgname} was built into ~/rpmbuild despite rpmbuild being instructed to build it at a different location. Probably a fallout from https://github.com/rpm-software-management/rpm/commit/96467dce" >&2 -- install -D -m644 "${_bad_location_pkg}" "${_expected_pkg}" -- return 0 -- fi -+ _bad_location_pkg="${HOME}/rpmbuild/RPMS/noarch/${_pkgname} ${HOME}/rpmbuild/RPMS/${_pkgname}" -+ for _bad_l in ${_bad_location_pkg}; do -+ if [ -e "${_bad_l}" ]; then -+ echo "WARNING: the package ${_pkgname} was built into ~/rpmbuild despite rpmbuild being instructed to build it at a different location. Probably a fallout from https://github.com/rpm-software-management/rpm/commit/96467dce" >&2 -+ install -D -m644 "${_bad_l}" "${RPMSDIR}/noarch/${_pkgname}" -+ echo "(create_rpm) CREATED RPM: ${RPMSDIR}/noarch/${_pkgname}" >&2 -+ return 0 -+ fi -+ done - - # Should not be here. -+ echo "create_rpm() ended with error; probably an issue with the location where the RPMs were built" >&2 - return 1 - } - --- -2.47.3 - diff --git a/0010-fix-Check-active-flag-in-_extract_identity-and-guard.patch b/0010-fix-Check-active-flag-in-_extract_identity-and-guard.patch new file mode 100644 index 0000000..c160dff --- /dev/null +++ b/0010-fix-Check-active-flag-in-_extract_identity-and-guard.patch @@ -0,0 +1,108 @@ +From 08c0c67c492ef27df53fa9bff899597c46ae6fc8 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Fri, 13 Mar 2026 13:59:23 +0100 +Subject: [PATCH] fix: Check active flag in _extract_identity and guard + receive_pop + +receive_pop() was unconditionally setting active=True and +token_expires_at even when TPM verification failed. Use `any(errs for +errs in self.errors.values())` to check for non-empty error lists, +matching the pattern already used in session_controller.py. + +This didn't affect the security because on failure the state was not +persisted in the database. Now these are only set when no errors +occurred. + +_extract_identity() was not checking the session active flag, which +could allow authentication with an inactive session if the state was +persisted. Add the active check as defense-in-depth. + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + keylime/models/verifier/auth_session.py | 9 ++++--- + keylime/web/base/action_handler.py | 36 +++++++++++++++++++------ + 2 files changed, 33 insertions(+), 12 deletions(-) + +diff --git a/keylime/models/verifier/auth_session.py b/keylime/models/verifier/auth_session.py +index b0b40b0..fc5c8df 100644 +--- a/keylime/models/verifier/auth_session.py ++++ b/keylime/models/verifier/auth_session.py +@@ -630,10 +630,11 @@ class AuthSession(PersistableModel): + logger.error("Unexpected error during TPM verification: %s: %s", type(e).__name__, e) + self._add_error("verification", f"TPM verification failed: {str(e)}") + +- # Set token expiration (only on successful validation) +- session_lifetime = config.getint("verifier", "session_lifetime", fallback=config.DEFAULT_SESSION_LIFETIME) +- self.token_expires_at = Timestamp.now() + timedelta(seconds=session_lifetime) +- self.active = True ++ # Set token expiration and activate only on successful validation ++ if not any(errs for errs in self.errors.values()): ++ session_lifetime = config.getint("verifier", "session_lifetime", fallback=config.DEFAULT_SESSION_LIFETIME) ++ self.token_expires_at = Timestamp.now() + timedelta(seconds=session_lifetime) ++ self.active = True + + def _set_nonce(self): + if "nonce" not in self.values: +diff --git a/keylime/web/base/action_handler.py b/keylime/web/base/action_handler.py +index d14c9ee..68dd30d 100644 +--- a/keylime/web/base/action_handler.py ++++ b/keylime/web/base/action_handler.py +@@ -265,12 +265,18 @@ class ActionHandler(RequestHandler): + # Look up by token hash (tokens are never stored in plaintext) + auth_session = AuthSession.get_by_token(token) + if auth_session and auth_session.agent_id: # type: ignore[attr-defined] +- # Check if token is still valid +- now = Timestamp.now() +- if auth_session.token_expires_at >= now: # type: ignore[attr-defined] +- logger.debug("Extracted agent identity from bearer token: %s", auth_session.agent_id) # type: ignore[attr-defined] +- return (auth_session.agent_id, "agent") # type: ignore[attr-defined] +- logger.debug("Bearer token expired for agent: %s", auth_session.agent_id) # type: ignore[attr-defined] ++ # Check if session is active and token is still valid ++ if not getattr(auth_session, "active", False): ++ logger.debug("Session not active for agent: %s", auth_session.agent_id) # type: ignore[attr-defined] ++ else: ++ token_expires_at = getattr(auth_session, "token_expires_at", None) ++ if token_expires_at is None: ++ logger.debug("Session has no expiry for agent: %s", auth_session.agent_id) # type: ignore[attr-defined] ++ elif token_expires_at >= Timestamp.now(): ++ logger.debug("Extracted agent identity from bearer token: %s", auth_session.agent_id) # type: ignore[attr-defined] ++ return (auth_session.agent_id, "agent") # type: ignore[attr-defined] ++ else: ++ logger.debug("Bearer token expired for agent: %s", auth_session.agent_id) # type: ignore[attr-defined] + else: + logger.debug("Invalid bearer token provided") + else: +@@ -520,13 +526,27 @@ class ActionHandler(RequestHandler): + self.finish() + return False + ++ # Check if session is active ++ if not getattr(auth_session, "active", False): ++ logger.info( ++ "Authentication session not active for agent '%s'", ++ auth_session.agent_id, # type: ignore[attr-defined] ++ ) ++ self.set_status(401) ++ self.write( ++ {"errors": [{"status": "401", "title": "Unauthorized", "detail": "Authentication session not active"}]} ++ ) ++ self.finish() ++ return False ++ + # Check if token has expired ++ token_expires_at = getattr(auth_session, "token_expires_at", None) + now = Timestamp.now() +- if auth_session.token_expires_at < now: # type: ignore[attr-defined] ++ if token_expires_at is None or token_expires_at < now: + logger.info( + "Authentication token expired for agent '%s' (expired at %s)", + auth_session.agent_id, # type: ignore[attr-defined] +- auth_session.token_expires_at, # type: ignore[attr-defined] ++ token_expires_at, + ) + self.set_status(401) + self.write( +-- +2.53.0 + diff --git a/0010-mba-normalize-vendor_db-in-EV_EFI_VARIABLE_AUTHORITY.patch b/0010-mba-normalize-vendor_db-in-EV_EFI_VARIABLE_AUTHORITY.patch deleted file mode 100644 index 59cf28c..0000000 --- a/0010-mba-normalize-vendor_db-in-EV_EFI_VARIABLE_AUTHORITY.patch +++ /dev/null @@ -1,281 +0,0 @@ -From 05b694b83ecd62680b64f4a27a95562b87352a46 Mon Sep 17 00:00:00 2001 -From: Sergio Correia -Date: Tue, 19 Aug 2025 20:35:50 +0100 -Subject: [PATCH 10/10] mba: normalize vendor_db in EV_EFI_VARIABLE_AUTHORITY - events - -tpm2_eventlog may provide the vendor_db data as either a parsed signature -list or raw hex bytes, depending on the version used. - -In this commit we add a enrich_vendor_db_authority_variable() function to -make sure we end up with a signature list independent on the format of -the data obtained from tpm2_eventlog. - -Signed-off-by: Sergio Correia ---- - keylime/mba/elparsing/tpm_bootlog_enrich.py | 87 +++++++++++++- - test/test_mba_parsing.py | 120 ++++++++++++++++++++ - 2 files changed, 205 insertions(+), 2 deletions(-) - -diff --git a/keylime/mba/elparsing/tpm_bootlog_enrich.py b/keylime/mba/elparsing/tpm_bootlog_enrich.py -index 4551995..d2df533 100644 ---- a/keylime/mba/elparsing/tpm_bootlog_enrich.py -+++ b/keylime/mba/elparsing/tpm_bootlog_enrich.py -@@ -88,6 +88,18 @@ def getGUID(b: bytes) -> str: - # - ################################################################################## - -+EFI_SIGNATURE_OWNER_SIZE = 16 # Size of SignatureOwner field (GUID). -+ -+# DER (Distinguished Encoding Rules) ASN.1 constants for X.509 certificate parsing. -+# X.509 certificates start with: 0x30 0x82 [length-high] [length-low] [certificate-data...] -+# where 0x30 = SEQUENCE tag, 0x82 = long form length encoding (next 2 bytes = length). -+DER_SEQUENCE_TAG = 0x30 # ASN.1 SEQUENCE tag. -+DER_LONG_LENGTH_FORM = 0x82 # Long form length encoding (2 bytes follow). -+DER_TAG_BYTES = 2 # Bytes needed to check tag + length form (0x30 0x82). -+DER_LENGTH_BYTES = 2 # Length field size in long form encoding. -+DER_HEADER_SIZE = 4 # Total DER header size (tag + length-form + 2-byte length). -+MAX_HEADER_SEARCH_BYTES = 100 # Maximum bytes to search for DER certificate start after GUID. -+ - ################################################################################## - # Parse EFI_SIGNATURE_DATA - ################################################################################## -@@ -95,10 +107,10 @@ def getGUID(b: bytes) -> str: - - def getKey(b: bytes, start: int, size: int) -> Dict[str, Any]: - key = {} -- signatureOwner = getGUID(b[start : start + 16]) -+ signatureOwner = getGUID(b[start : start + EFI_SIGNATURE_OWNER_SIZE]) - key["SignatureOwner"] = signatureOwner - -- signatureData = b[start + 16 : start + size] -+ signatureData = b[start + EFI_SIGNATURE_OWNER_SIZE : start + size] - key["SignatureData"] = signatureData.hex() - return key - -@@ -200,6 +212,73 @@ def enrich_boot_variable(d: Dict[str, Any]) -> None: - d["VariableData"] = k - - -+def enrich_vendor_db_authority_variable(d: Dict[str, Any]) -> None: -+ """Normalize vendor_db in EV_EFI_VARIABLE_AUTHORITY events to signature list format. -+ -+ Different versions of tmp2_eventlog may provide vendor_db data in different formats: -+ - Some versions output hex strings containing raw signature data (GUID + certificate data) -+ - Other versions output parsed signature lists -+ -+ This function ensures we always end up with a list of signatures, regardless of -+ how tpm2_eventlog provided the data. -+ """ -+ # We are only interested in the vendor_db variable, and when it is an hex string. -+ if d.get("UnicodeName") != "vendor_db": -+ return -+ -+ if not isinstance(d.get("VariableData"), str): -+ return -+ -+ try: -+ b = bytes.fromhex(d["VariableData"]) -+ signatures = [] -+ -+ offset = 0 -+ while offset < len(b): -+ if offset + EFI_SIGNATURE_OWNER_SIZE >= len(b): -+ break -+ -+ # Extract GUID at current offset. -+ guid_bytes = b[offset : offset + EFI_SIGNATURE_OWNER_SIZE] -+ guid = getGUID(guid_bytes) -+ -+ # Look for DER certificate signature (SEQUENCE + long form length) after some header data. -+ cert_start = None -+ search_end = min(offset + EFI_SIGNATURE_OWNER_SIZE + MAX_HEADER_SEARCH_BYTES, len(b) - DER_TAG_BYTES) -+ for i in range(offset + EFI_SIGNATURE_OWNER_SIZE, search_end): -+ if b[i] == DER_SEQUENCE_TAG and b[i + 1] == DER_LONG_LENGTH_FORM: -+ cert_start = i -+ break -+ -+ if cert_start is None: -+ break -+ -+ # Parse DER certificate length. -+ if cert_start + DER_HEADER_SIZE > len(b): -+ break -+ -+ cert_length_bytes = b[cert_start + DER_TAG_BYTES : cert_start + DER_HEADER_SIZE] -+ cert_length = (cert_length_bytes[0] << 8) | cert_length_bytes[1] -+ cert_end = cert_start + DER_HEADER_SIZE + cert_length -+ -+ if cert_end > len(b): -+ break -+ -+ # Extract certificate data (from GUID start to end of certificate). -+ sig_data = b[offset + EFI_SIGNATURE_OWNER_SIZE : cert_end] -+ -+ signatures.append({"SignatureOwner": guid, "SignatureData": sig_data.hex()}) -+ -+ # Move to next signature. -+ offset = cert_end -+ -+ if signatures: -+ d["VariableData"] = signatures -+ except Exception: -+ # If parsing fails, leave the hex string unchanged. -+ pass -+ -+ - def enrich(log: Dict[str, Any]) -> None: - """Make the given BIOS boot log easier to understand and process""" - if "events" in log: -@@ -220,6 +299,10 @@ def enrich(log: Dict[str, Any]) -> None: - if "Event" in event: - d = event["Event"] - enrich_boot_variable(d) -+ elif t == "EV_EFI_VARIABLE_AUTHORITY": -+ if "Event" in event: -+ d = event["Event"] -+ enrich_vendor_db_authority_variable(d) - - - def main() -> None: -diff --git a/test/test_mba_parsing.py b/test/test_mba_parsing.py -index 82e6086..04d7afb 100644 ---- a/test/test_mba_parsing.py -+++ b/test/test_mba_parsing.py -@@ -9,6 +9,11 @@ from keylime.cmd import convert_config - from keylime.common.algorithms import Hash - from keylime.mba import mba - -+try: -+ from keylime.mba.elparsing import tpm_bootlog_enrich -+except Exception: -+ unittest.skip(f"tpm_bootlog_enrich not available, architecture ({platform.machine()}) not supported") -+ - TEMPLATES_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "templates")) - - -@@ -49,6 +54,121 @@ class TestMBAParsing(unittest.TestCase): - self.assertTrue(isinstance(boot_aggregates, dict)) - self.assertTrue(isinstance(measurement_data, dict)) - -+ def test_vendor_db_enrichment_actual_hex(self): -+ """Test vendor_db enrichment with actual hex string from real vendor_db event. -+ -+ Different versions of tpm2_eventlog may provide vendor_db data in different formats. -+ This test uses actual hex data and verifies it gets normalized to signature list format. -+ """ -+ # Actual vendor_db hex string from real EV_EFI_VARIABLE_AUTHORITY event. -+ actual_vendor_db_hex = "dbed230279908843af772d65b1c35d3b308203943082027ca00302010202090083730d2b7280d15a300d06092a864886f70d01010b0500305f31163014060355040a0c0d526564204861742c20496e632e3121301f06035504030c18526564204861742053656375726520426f6f7420434120353122302006092a864886f70d0109011613736563616c657274407265646861742e636f6d301e170d3230303630393038313533365a170d3338303131383038313533365a305f31163014060355040a0c0d526564204861742c20496e432e3121301f06035504030c18526564204861742053656375726520426f6f7420434120353122302006092a864886f70d0109011613736563616c657274407265646861742e636f6d30820122300d06092a864886f70d01010105000382010f003082010a0282010100cebaea41171c81a18809bfa1d4a9fa532e9d9ebcfc3b289c3052a00bf4000f36c88341f6a9c915496564d5b2769e58c12e1eeacf93386b47d6ba92c5f800e777a55769df41b1c4905b2d20c174aa038680b6a459efa988445e5240d47715a104859ceff3c69ff30f0fd68446e466dc266ad6d88a6e474acae34c431574997a06328ce033bfe5f846673dea0e943bbf3ddd8bf67f308c45540ba4de23355a997305d880e765141a07302c7386b02da3a636a64d815d91a767bbea3b5b828a9ccf83da31d1543416bc1907172a944ef0cecf0dbaf4fbe4d44889238b8cdc8e4513d77aa8d5e5840313520206c2d590763ab5d7b89d7ab0c9d09869fb8e0d01f5850203010001a3533051301d0603551d0e04160414cc6fa5e72868ba494e939bbd680b9144769a9f8f301f0603551d23041830168014cc6fa5e72868ba494e939bbd680b9144769a9f8f300f0603551d130101ff040530030101ff300d06092a864886f70d01010b050003820101001de75e426a66cc723e9b5cc9afa3ca542eed64abc0b917be27a91e58b1593c4d1174d1971a520584058ad9f085c8f5ec8f9ce9e7086dbb3acbfa6f3c33e6784d75bddfc095729f0350d2752a7cb481e08762945cefcf6bda3ae3bf6e18743455500c22518eaa5830bebd3e304db697b5131b6daf6c183b714a09a18917a7e718f56d51b1d310c80ed6e43219024b1ab2d2dc29a326951d0106e452697806d3304444b07577cc54ade46e2222ff5dff93060cf9983a9c39b70c81d0f3f807a7098b6f9c8ae1adfc419850a65f0bbaa57f1cfc838d06592e9e6ebff43ec31a746625948a5dbf21b6139b9f67f87edc421f4c0edd88737d8c95d03f77c190b864f1" -+ -+ # Expected parsed format - certificate data without the GUID prefix. -+ expected_cert_data = actual_vendor_db_hex[32:] # Skip first 32 chars (16 bytes GUID). -+ expected_parsed_format = [ -+ {"SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b", "SignatureData": expected_cert_data} -+ ] -+ -+ # Test event matching the structure from the debug logs. -+ test_event = { -+ "VariableName": "d719b2cb-3d3a-4596-a3bc-dad00e67656f", -+ "UnicodeNameLength": 9, -+ "VariableDataLength": len(actual_vendor_db_hex) // 2, -+ "UnicodeName": "vendor_db", -+ "VariableData": actual_vendor_db_hex, -+ } -+ -+ # Apply vendor_db enrichment. -+ tpm_bootlog_enrich.enrich_vendor_db_authority_variable(test_event) # type: ignore[reportPossiblyUnboundVariable] -+ -+ # Verify that VariableData gets normalized to signature list format. -+ self.assertIsInstance(test_event["VariableData"], list) -+ self.assertEqual(len(test_event["VariableData"]), 1) -+ -+ signature = test_event["VariableData"][0] -+ self.assertIn("SignatureOwner", signature) -+ self.assertIn("SignatureData", signature) -+ # pylint: disable=invalid-sequence-index -+ self.assertEqual(signature["SignatureOwner"], expected_parsed_format[0]["SignatureOwner"]) -+ self.assertEqual(signature["SignatureData"], expected_parsed_format[0]["SignatureData"]) -+ # pylint: enable=invalid-sequence-index -+ -+ def test_vendor_db_enrichment_multiple_certificates_real_data(self): -+ """Test vendor_db enrichment with real data containing multiple certificates. -+ -+ This test uses actual hex data from a real secureboot db variable containing -+ multiple Microsoft certificates to verify correct parsing of complex vendor_db data. -+ """ -+ # Real hex string from secureboot db variable with multiple certificates. -+ # The format of db is similar to vendor_db, so we use it here to test tpm_bootlog_enrich with multiple certificates. -+ real_vendor_db_hex = "a159c0a5e494a74a87b5ab155c2bf0720706000000000000eb050000bd9afa775903324dbd6028f4e78f784b308205d7308203bfa003020102020a61077656000000000008300d06092a864886f70d01010b0500308188310b3009060355040613025553311330110603550408130a57617368696e67746f6e3110300e060355040713075265646d6f6e64311e301c060355040a13154d6963726f736f667420436f72706f726174696f6e31323030060355040313294d6963726f736f667420526f6f7420436572746966696361746520417574686f726974792032303130301e170d3131313031393138343134325a170d3236313031393138353134325a308184310b3009060355040613025553311330110603550408130a57617368696e67746f6e3110300e060355040713075265646d6f6e64311e301c060355040a13154d6963726f736f667420436f72706f726174696f6e312e302c060355040313254d6963726f736f66742057696e646f77732050726f64756374696f6e20504341203230313130820122300d06092a864886f70d01010105000382010f003082010a0282010100dd0cbba2e42e09e3e7c5f79669bc0021bd693333efad04cb5480ee0683bbc52084d9f7d28bf338b0aba4ad2d7c627905ffe34a3f04352070e3c4e76be09cc03675e98a31dd8d70e5dc37b5744696285b8760232cbfdc47a567f751279e72eb07a6c9b91e3b53357ce5d3ec27b9871cfeb9c923096fa84691c16e963c41d3cba33f5d026a4dec691f25285c36fffd43150a94e019b4cfdfc212e2c25b27ee2778308b5b2a096b22895360162cc0681d53baec49f39d618c85680973445d7da2542bdd79f715cf355d6c1c2b5ccebc9c238b6f6eb526d93613c34fd627aeb9323b41922ce1c7cd77e8aa544ef75c0b048765b44318a8b2e06d1977ec5a24fa48030203010001a38201433082013f301006092b06010401823715010403020100301d0603551d0e04160414a92902398e16c49778cd90f99e4f9ae17c55af53301906092b0601040182371402040c1e0a00530075006200430041300b0603551d0f040403020186300f0603551d130101ff040530030101ff301f0603551d23041830168014d5f656cb8fe8a25c6268d13d94905bd7ce9a18c430560603551d1f044f304d304ba049a0478645687474703a2f2f63726c2e6d6963726f736f66742e636f6d2f706b692f63726c2f70726f64756374732f4d6963526f6f4365724175745f323031302d30362d32332e63726c305a06082b06010505070101044e304c304a06082b06010505073002863e687474703a2f2f7777772e6d6963726f736f66742e636f6d2f706b692f63657274732f4d6963526f6f4365724175745f323031302d30362d32332e637274300d06092a864886f70d01010b0500038202010014fc7c7151a579c26eb2ef393ebc3c520f6e2b3f101373fea868d048a6344d8a960526ee3146906179d6ff382e456bf4c0e528b8da1d8f8adb09d71ac74c0a36666a8cec1bd70490a81817a49bb9e240323676c4c15ac6bfe404c0ea16d3acc368ef62acdd546c503058a6eb7cfe94a74e8ef4ec7c867357c2522173345af3a38a56c804da0709edf88be3cef47e8eaef0f60b8a08fb3fc91d727f53b8ebbe63e0e33d3165b081e5f2accd16a49f3da8b19bc242d090845f541dff89eaba1d47906fb0734e419f409f5fe5a12ab21191738a2128f0cede73395f3eab5c60ecdf0310a8d309e9f4f69685b67f51886647198da2b0123d812a680577bb914c627bb6c107c7ba7a8734030e4b627a99e9cafcce4a37c92da4577c1cfe3ddcb80f5afad6c4b30285023aeab3d96ee4692137de81d1f675190567d393575e291b39c8ee2de1cde445735bd0d2ce7aab1619824658d05e9d81b367af6c35f2bce53f24e235a20a7506f6185699d4782cd1051bebd088019daa10f105dfba7e2c63b7069b2321c4f9786ce2581706362b911203cca4d9f22dbaf9949d40ed1845f1ce8a5c6b3eab03d370182a0a6ae05f47d1d5630a32f2afd7361f2a705ae5425908714b57ba7e8381f0213cf41cc1c5b990930e88459386e9b12099be98cbc595a45d62d6a0630820bd7510777d3df345b99f979fcb57806f33a904cf77a4621c597ea159c0a5e494a74a87b5ab155c2bf072da05000000000000be050000bd9afa775903324dbd6028f4e78f784b308205aa30820392a0030201020213330000001a888b9800562284c100000000001a300d06092a864886f70d01010b0500308188310b3009060355040613025553311330110603550408130a57617368696e67746f6e3110300e060355040713075265646d6f6e64311e301c060355040a13154d6963726f736f667420436f72706f726174696f6e31323030060355040313294d6963726f736f667420526f6f7420436572746966696361746520417574686f726974792032303130301e170d3233303631333138353832395a170d3335303631333139303832395a304c310b3009060355040613025553311e301c060355040a13154d6963726f736f667420436f72706f726174696f6e311d301b0603550403131457696e646f77732055454649204341203230323330820122300d06092a864886f70d01010105000382010f003082010a0282010100bcb235d15479b48fcc812a6eb312d69397307c385cbf7992190a0f2d0afebfe0a8d8323fd2ab6f6f81c14d176945cf858027a37cb331cca5a74df943d05a2fd7181bd258960539a395b7bcdd79c1a0cf8fe2531e2b2662a81cae361e4fa1dfb913ba0c25bb24656701aa1d4110b736c16b2eb56c10d34e96d09f2aa1f1eda1150b8295c5ff638a13b592341e315e6111ae5dccf110e64c79c972b2348a82562dab0f7cc04f938e59754186ac091009f2516550b5f521b326398daac491b3dcac642306cd355f0d42499c4f0dce80838259fedf4b44e140c83d63b6cfb4420d395cd242100c08c274eb1cdc6ebc0aac98bbccfa1e3ca78316c5db02dad996df6b0203010001a382014630820142300e0603551d0f0101ff040403020186301006092b06010401823715010403020100301d0603551d0e04160414aefc5fbbbe055d8f8daa585473499417ab5a5272301906092b0601040182371402040c1e0a00530075006200430041300f0603551d130101ff040530030101ff301f0603551d23041830168014d5f656cb8fe8a25c6268d13d94905bd7ce9a18c430560603551d1f044f304d304ba049a0478645687474703a2f2f63726c2e6d6963726f736f66742e636f6d2f706b692f63726c2f70726f64756374732f4d6963526f6f4365724175745f323031302d30362d32332e63726c305a06082b06010505070101044e304c304a06082b06010505073002863e687474703a2f2f7777772e6d6963726f736f66742e636f6d2f706b692f63657274732f4d6963526f6f4365724175745f323031302d30362d32332e637274300d06092a864886f70d01010b050003820201009fc9b6ff6ee19c3b55f6fe8b39dd61046fd0ad63cd17764aa843898df8c6f28c5e90e1e468a515ecb8d3600c40571ffb5e357261de97316c79a0f516ae4b1ced010ceff7570f42301869f8a1a32e9792b8be1bfe2b865e4242118f8e704d90a7fd0163f264bf9be27b0881cf49f23717dff1f972d3c31dc390454de68006bdfde56a69ceb37e4e315b8473a8e8723f2735c97c20ce009b4fe04cb43669cbf734111174127aa88c2e816ca650ad19faa846456fb16773c36be340e82a698f2410e1296e8d1688ee8e7f6693026f5b9e048ccc811cad9754f1182e7e5290bc51de2a0eae66eabc646ea09164e42f12a8bce76bbac71b9b791a6466f143b4d1c346213881794cfaf0310dd379ff7a12a51dd9ddaca20f7182f793ff5ca161ae65f21481ed795a9a87ea607bcbb34f7534cabaa1efa2f6a28045a18b2781cdd577383eca4edd28ea58bac5a029de868c88fc952751ddabd3d05b0d77c76c8f55d7d4a20e5be4344614161de31cd66d99ad4cec71732fabceb2b429de553053393a328bf0ea9c88123b056819bfcf875210fbd61360f34164f4085781cb9d11a58ef4e527f5a33aece43d4ab7cef9880d9fbdca6dd24abc58768e3204946eddf4cf6d476dc2d76adc8771eaa4bfef67979cb8c780362a2a59c9c00ca744a073b58ccf385aaef8bb8695f044ad667a33ed71e4458783e5a7cea240d072d24800faf91aa159c0a5e494a74a87b5ab155c2bf072400600000000000024060000bd9afa775903324dbd6028f4e78f784b308205aa30820392a0030201020213330000001a888b9800562284c100000000001a300d06092a864886f70d01010b0500308188310b3009060355040613025553311330110603550408130a57617368696e67746f6e3110300e060355040713075265646d6f6e64311e301c060355040a13154d6963726f736f667420436f72706f726174696f6e31323030060355040313294d6963726f736f667420526f6f7420436572746966696361746520417574686f726974792032303130301e170d3233303631333138353832395a170d3335303631333139303832395a304c310b3009060355040613025553311e301c060355040a13154d6963726f736f667420436f72706f726174696f6e311d301b0603550403131457696e646f77732055454649204341203230323330820122300d06092a864886f70d01010105000382010f003082010a0282010100bcb235d15479b48fcc812a6eb312d69397307c385cbf7992190a0f2d0afebfe0a8d8323fd2ab6f6f81c14d176945cf858027a37cb331cca5a74df943d05a2fd7181bd258960539a395b7bcdd79c1a0cf8fe2531e2b2662a81cae361e4fa1dfb913ba0c25bb24656701aa1d4110b736c16b2eb56c10d34e96d09f2aa1f1eda1150b8295c5ff638a13b592341e315e6111ae5dccf110e64c79c972b2348a82562dab0f7cc04f938e59754186ac091009f2516550b5f521b326398daac491b3dcac642306cd355f0d42499c4f0dce80838259fedf4b44e140c83d63b6cfb4420d395cd242100c08c274eb1cdc6ebc0aac98bbccfa1e3ca78316c5db02dad996df6b0203010001a382014630820142300e0603551d0f0101ff040403020186301006092b06010401823715010403020100301d0603551d0e04160414aefc5fbbbe055d8f8daa585473499417ab5a5272301906092b0601040182371402040c1e0a00530075006200430041300f0603551d130101ff040530030101ff301f0603551d23041830168014d5f656cb8fe8a25c6268d13d94905bd7ce9a18c430560603551d1f044f304d304ba049a0478645687474703a2f2f63726c2e6d6963726f736f66742e636f6d2f706b692f63726c2f70726f64756374732f4d6963526f6f4365724175745f323031302d30362d32332e63726c305a06082b06010505070101044e304c304a06082b06010505073002863e687474703a2f2f7777772e6d6963726f736f66742e636f6d2f706b692f63657274732f4d6963526f6f4365724175745f323031302d30362d32332e637274300d06092a864886f70d01010b050003820201009fc9b6ff6ee19c3b55f6fe8b39dd61046fd0ad63cd17764aa843898df8c6f28c5e90e1e468a515ecb8d3600c40571ffb5e357261de97316c79a0f516ae4b1ced010ceff7570f42301869f8a1a32e9792b8be1bfe2b865e4242118f8e704d90a7fd0163f264bf9be27b0881cf49f23717dff1f972d3c31dc390454de68006bdfde56a69ceb37e4e315b8473a8e8723f2735c97c20ce009b4fe04cb43669cbf734111174127aa88c2e816ca650ad19faa846456fb16773c36be340e82a698f2410e1296e8d1688ee8e7f6693026f5b9e048ccc811cad9754f1182e7e5290bc51de2a0eae66eabc646ea09164e42f12a8bce76bbac71b9b791a6466f143b4d1c346213881794cfaf0310dd379ff7a12a51dd9ddaca20f7182f793ff5ca161ae65f21481ed795a9a87ea607bcbb34f7534cabaa1efa2f6a28045a18b2781cdd577383eca4edd28ea58bac5a029de868c88fc952751ddabd3d05b0d77c76c8f55d7d4a20e5be4344614161de31cd66d99ad4cec71732fabceb2b429de553053393a328bf0ea9c88123b056819bfcf875210fbd61360f34164f4085781cb9d11a58ef4e527f5a33aece43d4ab7cef9880d9fbdca6dd24abc58768e3204946eddf4cf6d476dc2d76adc8771eaa4bfef67979cb8c780362a2a59c9c00ca744a073b58ccf385aaef8bb8695f044ad667a33ed71e4458783e5a7cea240d072d24800faf91aa159c0a5e494a74a87b5ab155c2bf072d405000000000000b8050000bd9afa775903324dbd6028f4e78f784b308205a43082038ca0030201020213330000001636bf36899f1575cc000000000016300d06092a864886f70d01010b0500305a310b3009060355040613025553311e301c060355040a13154d6963726f736f667420436f72706f726174696f6e312b3029060355040313224d6963726f736f667420525341204465766963657320526f6f742043412032303231301e170d3233303631333139323134375a170d3338303631333139333134375a304e310b3009060355040613025553311e301c060355040a13154d6963726f736f667420436f72706f726174696f6e311f301d060355040313164d6963726f736f66742055454649204341203230323330820122300d06092a864886f70d01010105000382010f003082010a0282010100bd222aaeef1a3185137851a79bfdfc78d163b81a9b63f51206db4b41356a6fabf56a04cc97cfbbd408091a613a0de6b3a046ff09adde8024dc1280f25fd916ede2429dcd2f4d6102618a1c4b1d186239869771ad3e7f5d71134be92a00c1bed5b7009f5e65b22c1aff74edea83d239893335737da0a2fa40e4665058aafc87e85c208334ecabe20bc55f3eff482b119126ef186e57c59f187399efe16a742bbb2f7f508e1dda3d76b604e5cc2e10c7831b83a3e4a51313716e3378a3a83cec48265ec7c65e0d879aaacc553481ad9d90f5e69663a6e8072017c8931ed2aea4dcae7d59bf885e620cae5bf22940561d2640de85a6ad56d1cf5547765f9c39db030203010001a382016d30820169300e0603551d0f0101ff040403020186301006092b06010401823715010403020100301d0603551d0e0416041481aa6b3244c935bce0d6628af39827421e32497d301906092b0601040182371402040c1e0a00530075006200430041300f0603551d130101ff040530030101ff301f0603551d230418301680148444860600983f2caab3c589f3ac2ec9e69d090330650603551d1f045e305c305aa058a0568654687474703a2f2f7777772e6d6963726f736f66742e636f6d2f706b696f70732f63726c2f4d6963726f736f667425323052534125323044657669636573253230526f6f742532304341253230323032312e63726c307206082b0601050507010104663064306206082b060105050730028656687474703a2f2f7777772e6d6963726f736f66742e636f6d2f706b696f70732f63657274732f4d6963726f736f667425323052534125323044657669636573253230526f6f742532304341253230323032312e637274300d06092a864886f70d01010b050003820201000760132a5387120f1af35a149517e5d8d795549b8b0edd91a5edc75d47509345b795885f1719416376b582b0a8c59d9915368949be12c266fb830cb081cee5a4abc2a09aebf5073cfe21f89adc19210c9e242cd15ca2160a4bebec489cb15b74db0164c2e3806aab1acd771b6a399ab7ba7044ff6794c58106f0cb810493272199bd8788149c22710e0b2f5cbeb890547cc01ebc2b9ba356174b97e7e37f1334fab0346b9bf6b22df7d87bd820d35ca7954c4f2af9e71e68affc6c8fc8863d9fc8d1ef4d1ac8d1f6fd2d7ce3e841c1ea27c1fb8e25865a89a610becee38fa57bc41aa0e87590fd21b0c1a3c516235e3cce2ffe8c98bf085cf6b9c5b23cb6ccc8ec7fd27774cbedf396c98b8d1c2a890fa38fbdce2a85469a23a28f42c099d6ea851f6119be1635b775a09580650687d40b35c8c4aa0ecea20a6360ca4b2b5c270482af3e58837a5ad8673f1053f50c16f7264b8a80b9c51fa0ded8d361441445a7f5ab9a8817fdb79454028be4b753a13e8d9e5082a800e078941bbeb3c4301fb20edbf04690c1e657fe7cc170b21c4b64d910031b34fb66cf826e9e40a81137f2658b2109af3c93623df3bc83dd3f559015d231af11e7f8caa082e1b9cfb35793c75537ac7f41bf1f963cf32694f9d8d255248a8ab641f0e016c023928c710a4c6a0d1955f73a9c922196a1d5f80a8c9dbfc9ebca8842fc4bb4efff27302161" -+ -+ # Test event structure matching EV_EFI_VARIABLE_AUTHORITY format. -+ test_event = { -+ "VariableName": "d719b2cb-3d3a-4596-a3bc-dad00e67656f", -+ "UnicodeNameLength": 9, -+ "VariableDataLength": len(real_vendor_db_hex) // 2, -+ "UnicodeName": "vendor_db", -+ "VariableData": real_vendor_db_hex, -+ } -+ -+ # Apply vendor_db enrichment -+ tpm_bootlog_enrich.enrich_vendor_db_authority_variable(test_event) # type: ignore[reportPossiblyUnboundVariable] -+ -+ # Verify enrichment results. -+ self.assertIsInstance(test_event["VariableData"], list) -+ # Real data should contain multiple certificates (4 in this case based on the structure). -+ self.assertGreater(len(test_event["VariableData"]), 1, "Real vendor_db should contain multiple certificates") -+ -+ # Verify each certificate has the required structure. -+ for i, signature in enumerate(test_event["VariableData"]): -+ with self.subTest(certificate=i): -+ self.assertIn("SignatureOwner", signature, f"Certificate {i} missing SignatureOwner") -+ self.assertIn("SignatureData", signature, f"Certificate {i} missing SignatureData") -+ -+ # Verify SignatureOwner is a valid GUID format. -+ guid = signature["SignatureOwner"] -+ self.assertRegex( -+ guid, -+ r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", -+ f"Certificate {i} has invalid GUID format: {guid}", -+ ) -+ -+ # Verify SignatureData is hex string and not empty. -+ cert_data = signature["SignatureData"] -+ self.assertIsInstance(cert_data, str, f"Certificate {i} SignatureData should be string") -+ self.assertGreater(len(cert_data), 0, f"Certificate {i} SignatureData should not be empty") -+ # Verify it's valid hex. -+ try: -+ bytes.fromhex(cert_data) -+ except ValueError: -+ self.fail(f"Certificate {i} SignatureData is not valid hex: {cert_data[:100]}...") -+ -+ def test_vendor_db_enrichment_preserves_signature_lists(self): -+ """Test that enrichment preserves VariableData that's already in signature list format""" -+ # VariableData that is already in the expected signature list format. -+ signature_list_format = [ -+ { -+ "SignatureOwner": "0223eddb-9079-4388-af77-2d65b1c35d3b", -+ "SignatureData": "308203943082027ca00302010202090083730d2b7280d15a300d06092a864886f70d01010b0500305f31163014060355040a0c0d526564204861742c20496e632e3121301f06035504030c18526564204861742053656375726520426f6f7420434120353122302006092a864886f70d0109011613736563616c657274407265646861742e636f6d301e170d3230303630393038313533365a170d3338303131383038313533365a305f31163014060355040a0c0d526564204861742c20496e632e3121301f06035504030c18526564204861742053656375726520426f6f7420434120353122302006092a864886f70d0109011613736563616c657274407265646861742e636f6d30820122300d06092a864886f70d01010105000382010f003082010a0282010100cebaea41171c81a18809bfa1d4a9fa532e9d9ebcfc3b289c3052a00bf4000f36c88341f6a9c915496564d5b2769e58c12e1eeacf93386b47d6ba92c5f800e777a55769df41b1c4905b2d20c174aa038680b6a459efa988445e5240d47715a104859ceff3c69ff30f0fd68446e466dc266ad6d88a6e474acae34c431574997a06328ce033bfe5f846673dea0e943bbf3ddd8bf67f308c45540ba4de23355a997305d880e765141a07302c7386b02da3a636a64d815d91a767bbea3b5b828a9ccf83da31d1543416bc1907172a944ef0cecf0dbaf4fbe4d44889238b8cdc8e4513d77aa8d5e5840313520206c2d590763ab5d7b89d7ab0c9d09869fb8e0d01f5850203010001a3533051301d0603551d0e04160414cc6fa5e72868ba494e939bbd680b9144769a9f8f301f0603551d23041830168014cc6fa5e72868ba494e939bbd680b9144769a9f8f300f0603551d130101ff040530030101ff300d06092a864886f70d01010b050003820101001de75e426a66cc723e9b5cc9afa3ca542eed64abc0b917be27a91e58b1593c4d1174d1971a520584058ad9f085c8f5ec8f9ce9e7086dbb3acbfa6f3c33e6784d75bddfc095729f0350d2752a7cb481e08762945cefcf6bda3ae3bf6e18743455500c22518eaa5830bebd3e304db697b5131b6daf6c183b714a09a18917a7e718f56d51b1d310c80ed6e43219024b1ab2d2dc29a326951d0106e452697806d3304444b07577cc54ade46e2222ff5dff93060cf9983a9c39b70c81d0f3f807a7098b6f9c8ae1adfc419850a65f0bbaa57f1cfc838d06592e9e6ebff43ec31a746625948a5dbf21b6139b9f67f87edc421f4c0edd88737d8c95d03f77c190b864f1", -+ } -+ ] -+ -+ # Test event with VariableData already in signature list format. -+ test_event = { -+ "VariableName": "d719b2cb-3d3a-4596-a3bc-dad00e67656f", -+ "UnicodeName": "vendor_db", -+ "VariableData": signature_list_format.copy(), -+ } -+ -+ original_data = test_event["VariableData"].copy() -+ -+ # Apply enrichment -+ tpm_bootlog_enrich.enrich_vendor_db_authority_variable(test_event) # type: ignore[reportPossiblyUnboundVariable] -+ -+ # Verify that VariableData in signature list format remains unchanged -+ self.assertEqual(test_event["VariableData"], original_data) -+ - - if __name__ == "__main__": - unittest.main() --- -2.47.3 - diff --git a/0011-fix-Add-fork-safety-to-DBManager-via-dispose.patch b/0011-fix-Add-fork-safety-to-DBManager-via-dispose.patch new file mode 100644 index 0000000..b29a111 --- /dev/null +++ b/0011-fix-Add-fork-safety-to-DBManager-via-dispose.patch @@ -0,0 +1,92 @@ +From d74e7499746917fa7b9fbba02972eed82bc7ece9 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Fri, 13 Mar 2026 16:04:45 +0100 +Subject: [PATCH] fix: Add fork-safety to DBManager via dispose() + +After forking worker processes, child processes inherited the parent's +db_manager engine and connection pool. Sharing SQLAlchemy connection +pools across fork boundaries is unsafe and can lead to corruption. + +Add DBManager.dispose() to clear engine, scoped session, and registry +state. Call it in verifier_server.py after fork (alongside the existing +reset_verifier_config()), then immediately re-create the engine with +make_engine() so the child has its own fresh connection pool. + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + keylime/models/base/db.py | 22 +++++++++++++++++++--- + keylime/web/base/server.py | 10 ++++++++++ + 2 files changed, 29 insertions(+), 3 deletions(-) + +diff --git a/keylime/models/base/db.py b/keylime/models/base/db.py +index 9678098..bb218cf 100644 +--- a/keylime/models/base/db.py ++++ b/keylime/models/base/db.py +@@ -101,9 +101,6 @@ class DBManager: + + @property + def service(self) -> Optional[str]: +- if not self._service: +- raise BackendMissing("cannot access the service for a DBManager before a call to db_manager.make_engine()") +- + return self._service + + @property +@@ -138,6 +135,25 @@ class DBManager: + + return cast(Session, self._scoped_session()) + ++ def dispose(self) -> None: ++ """Dispose the engine and clear all state. ++ ++ Must be called after fork to avoid sharing the parent's connection pool ++ across child processes. The next call to make_engine() will create fresh ++ connections for the child process. ++ """ ++ if self._scoped_session: ++ self._scoped_session.remove() ++ if self._engine: ++ # Use close=False so the child discards the inherited pool ++ # without closing the parent's underlying connections. Per ++ # SQLAlchemy docs, this is the recommended approach after fork. ++ self._engine.dispose(close=False) # type: ignore[call-arg] ++ self._engine = None ++ self._scoped_session = None ++ self._registry = None ++ self._service = None ++ + def remove_session(self) -> None: + """Remove the current scoped session, releasing its connection back to the pool and clearing the identity map. + +diff --git a/keylime/web/base/server.py b/keylime/web/base/server.py +index e053bbb..913a498 100644 +--- a/keylime/web/base/server.py ++++ b/keylime/web/base/server.py +@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Any, Callable, Optional + import tornado + + from keylime import api_version, config, keylime_logging, web_util ++from keylime.models.base.db import db_manager + from keylime.web.base.action_handler import ActionHandler + from keylime.web.base.route import Route + +@@ -299,6 +300,15 @@ class Server(ABC): + tornado.process.fork_processes(self.worker_count) + # num.value = num.value + 1 + # print(num.value) ++ ++ # Dispose inherited db_manager engine after fork to avoid sharing the ++ # parent's connection pool, then re-create with a fresh pool for this ++ # child process. ++ service = db_manager.service ++ db_manager.dispose() ++ if service: ++ db_manager.make_engine(service) ++ + asyncio.run(self.start_single()) + + def _setup(self) -> None: +-- +2.53.0 + diff --git a/0011-fix-malformed-certs-workaround.patch b/0011-fix-malformed-certs-workaround.patch deleted file mode 100644 index 1891260..0000000 --- a/0011-fix-malformed-certs-workaround.patch +++ /dev/null @@ -1,1304 +0,0 @@ -From 531a07ce041426efc762afb48abcd0e1abaa90ec Mon Sep 17 00:00:00 2001 -From: Anderson Toshiyuki Sasaki -Date: Mon, 8 Sep 2025 13:55:24 +0200 -Subject: [PATCH] Avoid re-encoding certificate stored in DB - -The previous attempt (see 85b2bf6eeb2326b1a8b28848c32d442c0f464d6f) to -fix the re-encoding of malformed certificates that do not strictly -follow ASN.1 encoding rules, but has a valid certificate signature, was -incorrect because the cached original certificate would affect all -certificate objects. In that case, the cache was stored in the model -definition, which is shared between all certificate object instances. - -This changes the approach by creating a new class `CertificateWrapper` -that holds the original bytes in a cache in the object itself. This -makes the cache from a certificate to be independent from the cache of -another certificate. - -The python-cryptography ASN.1 parser is strict and does not accept -malformed certificates. This makes it impossible to use some affected -devices, notably TPM certificates from Nuvoton. To workaround this, the -certificate is re-encoded using pyasn1, but this effectively modify the -certificate making its signature invalid. To avoid storing invalid -certificates to the database, the original bits of the certificate (as -received from the agent) are cached and later used when storing the -certificate into the database. - -Assisted-by: Claude -Signed-off-by: Anderson Toshiyuki Sasaki ---- - keylime/certificate_wrapper.py | 99 +++++ - keylime/models/base/types/certificate.py | 70 ++-- - keylime/models/registrar/registrar_agent.py | 25 +- - test/test_certificate_modeltype.py | 197 ++++++++++ - test/test_certificate_wrapper.py | 385 +++++++++++++++++++ - test/test_registrar_agent_cert_compliance.py | 289 ++++++++++++++ - 6 files changed, 1029 insertions(+), 36 deletions(-) - create mode 100644 keylime/certificate_wrapper.py - create mode 100644 test/test_certificate_modeltype.py - create mode 100644 test/test_certificate_wrapper.py - create mode 100644 test/test_registrar_agent_cert_compliance.py - -diff --git a/keylime/certificate_wrapper.py b/keylime/certificate_wrapper.py -new file mode 100644 -index 000000000..899a19a8d ---- /dev/null -+++ b/keylime/certificate_wrapper.py -@@ -0,0 +1,99 @@ -+""" -+X.509 Certificate wrapper that preserves original bytes for malformed certificates. -+ -+This module provides a wrapper around cryptography.x509.Certificate that preserves -+the original certificate bytes when the certificate required pyasn1 re-encoding -+due to ASN.1 DER non-compliance. This ensures signature validity is maintained -+throughout the database lifecycle. -+""" -+ -+import base64 -+from typing import Any, Dict, Optional -+ -+import cryptography.x509 -+from cryptography.hazmat.primitives.serialization import Encoding -+ -+ -+class CertificateWrapper: -+ """ -+ A wrapper around cryptography.x509.Certificate that preserves original bytes -+ when malformed certificates require pyasn1 re-encoding. -+ -+ This class wraps a cryptography.x509.Certificate and adds the ability -+ to store the original certificate bytes when the certificate was malformed -+ and required re-encoding using pyasn1. This ensures that signature validation -+ works correctly even for certificates that don't strictly follow ASN.1 DER. -+ """ -+ -+ def __init__(self, cert: cryptography.x509.Certificate, original_bytes: Optional[bytes] = None): -+ """ -+ Initialize the wrapper certificate. -+ -+ :param cert: The cryptography.x509.Certificate object -+ :param original_bytes: The original DER bytes if certificate was re-encoded, None otherwise -+ """ -+ self._cert = cert -+ self._original_bytes = original_bytes -+ -+ def __getattr__(self, name: str) -> Any: -+ """Delegate attribute access to the wrapped certificate.""" -+ return getattr(self._cert, name) -+ -+ def __setstate__(self, state: Dict[str, Any]) -> None: -+ """Support for pickling.""" -+ self.__dict__.update(state) -+ -+ def __getstate__(self) -> Dict[str, Any]: -+ """Support for pickling.""" -+ return self.__dict__ -+ -+ @property -+ def has_original_bytes(self) -> bool: -+ """Check if this certificate has preserved original bytes.""" -+ return self._original_bytes is not None -+ -+ @property -+ def original_bytes(self) -> Optional[bytes]: -+ """Return the preserved original bytes if available.""" -+ return self._original_bytes -+ -+ def public_bytes(self, encoding: Encoding) -> bytes: -+ """ -+ Return certificate bytes, using original bytes when available. -+ -+ For certificates with preserved original bytes, this method always uses -+ the original DER bytes to maintain signature validity. For PEM encoding, -+ it converts the original DER bytes to PEM format. -+ """ -+ if self.has_original_bytes: -+ if encoding == Encoding.DER: -+ return self._original_bytes # type: ignore[return-value] -+ if encoding == Encoding.PEM: -+ # Convert original DER bytes to PEM format -+ der_b64 = base64.b64encode(self._original_bytes).decode("utf-8") # type: ignore[arg-type] -+ # Split into 64-character lines per PEM specification (RFC 1421) -+ lines = [der_b64[i : i + 64] for i in range(0, len(der_b64), 64)] -+ # Create PEM format with proper headers -+ pem_content = "\n".join(["-----BEGIN CERTIFICATE-----"] + lines + ["-----END CERTIFICATE-----"]) + "\n" -+ return pem_content.encode("utf-8") -+ -+ # For certificates without original bytes, use standard method -+ return self._cert.public_bytes(encoding) -+ -+ # Delegate common certificate methods to maintain full compatibility -+ def __str__(self) -> str: -+ return f"CertificateWrapper(subject={self._cert.subject})" -+ -+ def __repr__(self) -> str: -+ return f"CertificateWrapper(subject={self._cert.subject}, has_original_bytes={self.has_original_bytes})" -+ -+ -+def wrap_certificate(cert: cryptography.x509.Certificate, original_bytes: Optional[bytes] = None) -> CertificateWrapper: -+ """ -+ Factory function to create a wrapped certificate. -+ -+ :param cert: The cryptography.x509.Certificate object -+ :param original_bytes: The original DER bytes if certificate was re-encoded -+ :returns: Wrapped certificate that preserves original bytes -+ """ -+ return CertificateWrapper(cert, original_bytes) -diff --git a/keylime/models/base/types/certificate.py b/keylime/models/base/types/certificate.py -index 2c27603ba..bf2d62f9d 100644 ---- a/keylime/models/base/types/certificate.py -+++ b/keylime/models/base/types/certificate.py -@@ -12,6 +12,7 @@ - from pyasn1_modules import rfc2459 as pyasn1_rfc2459 - from sqlalchemy.types import Text - -+from keylime.certificate_wrapper import CertificateWrapper, wrap_certificate - from keylime.models.base.type import ModelType - - -@@ -78,19 +79,20 @@ def _schema(self): - cert = Certificate().cast("-----BEGIN CERTIFICATE-----\nMIIE...") - """ - -- IncomingValue: TypeAlias = Union[cryptography.x509.Certificate, bytes, str, None] -+ IncomingValue: TypeAlias = Union[cryptography.x509.Certificate, CertificateWrapper, bytes, str, None] - - def __init__(self) -> None: - super().__init__(Text) - -- def _load_der_cert(self, der_cert_data: bytes) -> cryptography.x509.Certificate: -- """Loads a binary x509 certificate encoded using ASN.1 DER as a ``cryptography.x509.Certificate`` object. This -+ def _load_der_cert(self, der_cert_data: bytes) -> CertificateWrapper: -+ """Loads a binary x509 certificate encoded using ASN.1 DER as a ``CertificateWrapper`` object. This - method does not require strict adherence to ASN.1 DER thereby making it possible to accept certificates which do - not follow every detail of the spec (this is the case for a number of TPM certs) [1,2]. - - It achieves this by first using the strict parser provided by python-cryptography. If that fails, it decodes the - certificate and re-encodes it using the more-forgiving pyasn1 library. The re-encoded certificate is then -- re-parsed by python-cryptography. -+ re-parsed by python-cryptography. For malformed certificates requiring re-encoding, the original bytes are -+ preserved in the wrapper to maintain signature validity. - - This method is equivalent to the ``cert_utils.x509_der_cert`` function but does not produce a warning when the - backup parser is used, allowing this condition to be optionally detected and handled by the model where -@@ -106,24 +108,28 @@ def _load_der_cert(self, der_cert_data: bytes) -> cryptography.x509.Certificate: - - :raises: :class:`SubstrateUnderrunError`: cert could not be deserialized even using the fallback pyasn1 parser - -- :returns: A ``cryptography.x509.Certificate`` object -+ :returns: A ``CertificateWrapper`` object - """ - - try: -- return cryptography.x509.load_der_x509_certificate(der_cert_data) -+ cert = cryptography.x509.load_der_x509_certificate(der_cert_data) -+ return wrap_certificate(cert, None) - except Exception: - pyasn1_cert = pyasn1_decoder.decode(der_cert_data, asn1Spec=pyasn1_rfc2459.Certificate())[0] -- return cryptography.x509.load_der_x509_certificate(pyasn1_encoder.encode(pyasn1_cert)) -+ cert = cryptography.x509.load_der_x509_certificate(pyasn1_encoder.encode(pyasn1_cert)) -+ # Preserve the original bytes when re-encoding is necessary -+ return wrap_certificate(cert, der_cert_data) - -- def _load_pem_cert(self, pem_cert_data: str) -> cryptography.x509.Certificate: -+ def _load_pem_cert(self, pem_cert_data: str) -> CertificateWrapper: - """Loads a text x509 certificate encoded using PEM (Base64ed DER with header and footer) as a -- ``cryptography.x509.Certificate`` object. This method does not require strict adherence to ASN.1 DER thereby -+ ``CertificateWrapper`` object. This method does not require strict adherence to ASN.1 DER thereby - making it possible to accept certificates which do not follow every detail of the spec (this is the case for - a number of TPM certs) [1,2]. - - It achieves this by first using the strict parser provided by python-cryptography. If that fails, it decodes the - certificate and re-encodes it using the more-forgiving pyasn1 library. The re-encoded certificate is then -- re-parsed by python-cryptography. -+ re-parsed by python-cryptography. For malformed certificates requiring re-encoding, the original DER bytes are -+ preserved in the wrapper to maintain signature validity. - - This method is equivalent to the ``cert_utils.x509_der_cert`` function but does not produce a warning when the - backup parser is used, allowing this condition to be optionally detected and handled by the model where -@@ -135,19 +141,24 @@ def _load_pem_cert(self, pem_cert_data: str) -> cryptography.x509.Certificate: - [2] https://github.com/pyca/cryptography/issues/7189 - [3] https://github.com/keylime/keylime/issues/1559 - -- :param der_cert_data: the DER bytes of the certificate -+ :param pem_cert_data: the PEM text of the certificate - - :raises: :class:`SubstrateUnderrunError`: cert could not be deserialized even using the fallback pyasn1 parser - -- :returns: A ``cryptography.x509.Certificate`` object -+ :returns: A ``CertificateWrapper`` object - """ - - try: -- return cryptography.x509.load_pem_x509_certificate(pem_cert_data.encode("utf-8")) -+ cert = cryptography.x509.load_pem_x509_certificate(pem_cert_data.encode("utf-8")) -+ return wrap_certificate(cert, None) - except Exception: - der_data = pyasn1_pem.readPemFromFile(io.StringIO(pem_cert_data)) - pyasn1_cert = pyasn1_decoder.decode(der_data, asn1Spec=pyasn1_rfc2459.Certificate())[0] -- return cryptography.x509.load_der_x509_certificate(pyasn1_encoder.encode(pyasn1_cert)) -+ cert = cryptography.x509.load_der_x509_certificate(pyasn1_encoder.encode(pyasn1_cert)) -+ # Only preserve original bytes if we have valid DER data -+ original_bytes = der_data if isinstance(der_data, bytes) and der_data else None -+ # Preserve the original bytes when re-encoding is necessary -+ return wrap_certificate(cert, original_bytes) - - def infer_encoding(self, value: IncomingValue) -> Optional[str]: - """Tries to infer the certificate encoding from the given value based on the data type and other surface-level -@@ -159,15 +170,21 @@ def infer_encoding(self, value: IncomingValue) -> Optional[str]: - :returns: ``"der"`` when the value appears to be DER encoded - :returns: ``"pem"`` when the value appears to be PEM encoded - :returns: ``"base64"`` when the value appears to be Base64(DER) encoded (without PEM headers) -+ :returns: ``"wrapped"`` when the value is already a ``CertificateWrapper`` object - :returns: ``"decoded"`` when the value is already a ``cryptography.x509.Certificate`` object -+ :returns: ``"disabled"`` when the value is the string "disabled" - :returns: ``None`` when the encoding cannot be inferred - """ - # pylint: disable=no-else-return - -- if isinstance(value, cryptography.x509.Certificate): -+ if isinstance(value, CertificateWrapper): -+ return "wrapped" -+ elif isinstance(value, cryptography.x509.Certificate): - return "decoded" - elif isinstance(value, bytes): - return "der" -+ elif isinstance(value, str) and value == "disabled": -+ return "disabled" - elif isinstance(value, str) and value.startswith("-----BEGIN CERTIFICATE-----"): - return "pem" - elif isinstance(value, str): -@@ -190,18 +207,24 @@ def asn1_compliant(self, value: IncomingValue) -> Optional[bool]: - :param value: The value in DER, Base64(DER), or PEM format (or an already deserialized certificate object) - - :returns: ``"True"`` if the value can be deserialized by python-cryptography and is ASN.1 DER compliant -+ :returns: ``"True"`` if the value is the string "disabled" (considered compliant as it's a valid field value) - :returns: ``"False"`` if the value cannot be deserialized by python-cryptography - :returns: ``None`` if the value is already a deserialized certificate of type ``cryptography.x509.Certificate`` - """ - - try: - match self.infer_encoding(value): -+ case "wrapped": -+ # For CertificateWrapper objects, check if they have original bytes (indicating re-encoding was needed) -+ return not value.has_original_bytes # type: ignore[union-attr] - case "decoded": - return None -+ case "disabled": -+ return True - case "der": - cryptography.x509.load_der_x509_certificate(value) # type: ignore[reportArgumentType, arg-type] - case "pem": -- cryptography.x509.load_pem_x509_certificate(value) # type: ignore[reportArgumentType, arg-type] -+ cryptography.x509.load_pem_x509_certificate(value.encode("utf-8")) # type: ignore[reportArgumentType, arg-type, union-attr] - case "base64": - der_value = base64.b64decode(value, validate=True) # type: ignore[reportArgumentType, arg-type] - cryptography.x509.load_der_x509_certificate(der_value) -@@ -212,24 +235,27 @@ def asn1_compliant(self, value: IncomingValue) -> Optional[bool]: - - return True - -- def cast(self, value: IncomingValue) -> Optional[cryptography.x509.Certificate]: -+ def cast(self, value: IncomingValue) -> Optional[CertificateWrapper]: - """Tries to interpret the given value as an X.509 certificate and convert it to a -- ``cryptography.x509.Certificate`` object. Values which do not require conversion are returned unchanged. -+ ``CertificateWrapper`` object. Values which do not require conversion are returned unchanged. - - :param value: The value to convert (may be in DER, Base64(DER), or PEM format) - - :raises: :class:`TypeError`: ``value`` is of an unexpected data type - :raises: :class:`ValueError`: ``value`` does not contain data which is interpretable as a certificate - -- :returns: A ``cryptography.x509.Certificate`` object or None if an empty value is given -+ :returns: A ``CertificateWrapper`` object or None if an empty value is given - """ - - if not value: - return None - - match self.infer_encoding(value): -+ case "wrapped": -+ return value # type: ignore[return-value] - case "decoded": -- return value # type: ignore[reportReturnType, return-value] -+ # Wrap raw cryptography certificate without original bytes -+ return wrap_certificate(value, None) # type: ignore[arg-type] - case "der": - try: - return self._load_der_cert(value) # type: ignore[reportArgumentType, arg-type] -@@ -269,7 +295,6 @@ def _dump(self, value: IncomingValue) -> Optional[str]: - if not cert: - return None - -- # Save as Base64-encoded value (without the PEM "BEGIN" and "END" header/footer for efficiency) - return base64.b64encode(cert.public_bytes(Encoding.DER)).decode("utf-8") - - def render(self, value: IncomingValue) -> Optional[str]: -@@ -279,9 +304,8 @@ def render(self, value: IncomingValue) -> Optional[str]: - if not cert: - return None - -- # Render certificate in PEM format - return cert.public_bytes(Encoding.PEM).decode("utf-8") # type: ignore[no-any-return] - - @property - def native_type(self) -> type: -- return cryptography.x509.Certificate -+ return CertificateWrapper -diff --git a/keylime/models/registrar/registrar_agent.py b/keylime/models/registrar/registrar_agent.py -index 560c18838..fc7e1be87 100644 ---- a/keylime/models/registrar/registrar_agent.py -+++ b/keylime/models/registrar/registrar_agent.py -@@ -1,7 +1,6 @@ - import base64 - import hmac - --import cryptography.x509 - from cryptography.hazmat.primitives.asymmetric import ec, rsa - from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat - -@@ -116,35 +115,35 @@ def _check_cert_trust_status(self, cert_field, cert_type=""): - if not cert_utils.verify_cert(cert, trust_store, cert_type): - self._add_error(cert_field, "must contain a certificate issued by a CA present in the trust store") - -- def _check_cert_compliance(self, cert_field, raw_cert): -+ def _check_cert_compliance(self, cert_field): - new_cert = self.changes.get(cert_field) - old_cert = self.values.get(cert_field) - - # If the certificate field has not been changed, no need to perform check -- if not raw_cert or not new_cert: -+ if not new_cert: -+ return True -+ -+ # If the certificate field is set as "disabled" (for mtls_cert) -+ if new_cert == "disabled": - return True - - # If the new certificate value is the same as the old certificate value, no need to perform check -- if ( -- isinstance(new_cert, cryptography.x509.Certificate) -- and isinstance(old_cert, cryptography.x509.Certificate) -- and new_cert.public_bytes(Encoding.DER) == old_cert.public_bytes(Encoding.DER) -- ): -+ if old_cert and new_cert.public_bytes(Encoding.DER) == old_cert.public_bytes(Encoding.DER): - return True - -- compliant = Certificate().asn1_compliant(raw_cert) -+ compliant = Certificate().asn1_compliant(new_cert) - - if not compliant: - if config.get("registrar", "malformed_cert_action") == "reject": -- self._add_error(cert_field, Certificate().generate_error_msg(raw_cert)) -+ self._add_error(cert_field, Certificate().generate_error_msg(new_cert)) - - return compliant - -- def _check_all_cert_compliance(self, data): -+ def _check_all_cert_compliance(self): - non_compliant_certs = [] - - for field_name in ("ekcert", "iak_cert", "idevid_cert", "mtls_cert"): -- if not self._check_cert_compliance(field_name, data.get(field_name)): -+ if not self._check_cert_compliance(field_name): - non_compliant_certs.append(f"'{field_name}'") - - if not non_compliant_certs: -@@ -290,7 +289,7 @@ def update(self, data): - # Ensure either an EK or IAK/IDevID is present, depending on configuration - self._check_root_identity_presence() - # Handle certificates which are not fully compliant with ASN.1 DER -- self._check_all_cert_compliance(data) -+ self._check_all_cert_compliance() - - # Basic validation of values - self.validate_required(["aik_tpm"]) -diff --git a/test/test_certificate_modeltype.py b/test/test_certificate_modeltype.py -new file mode 100644 -index 000000000..335ae0fc8 ---- /dev/null -+++ b/test/test_certificate_modeltype.py -@@ -0,0 +1,197 @@ -+""" -+Unit tests for the Certificate ModelType class. -+ -+This module tests the certificate model type functionality including -+encoding inference and ASN.1 compliance checking. -+""" -+ -+import base64 -+import unittest -+ -+import cryptography.x509 -+from cryptography.hazmat.primitives.serialization import Encoding -+ -+from keylime.certificate_wrapper import CertificateWrapper, wrap_certificate -+from keylime.models.base.types.certificate import Certificate -+ -+ -+class TestCertificateModelType(unittest.TestCase): -+ """Test cases for Certificate ModelType class.""" -+ -+ def setUp(self): -+ """Set up test fixtures.""" -+ self.cert_type = Certificate() -+ -+ # Compliant certificate for testing (loads fine with python-cryptography) -+ self.compliant_cert_pem = """-----BEGIN CERTIFICATE----- -+MIIClzCCAX+gAwIBAgIBATANBgkqhkiG9w0BAQsFADAPMQ0wCwYDVQQDDARUZXN0 -+MB4XDTI1MDkxMTEyNDU1MVoXDTI2MDkxMTEyNDU1MVowDzENMAsGA1UEAwwEVGVz -+dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAO2V27HsMnKczHCaLgf9 -+FtxuorvkA5OMkz6KsW1eyryHr0TJ801prLpeNnMZ3U4pqLMqocMc7T2KO6nPZJxO -+7zRzehyo9pBBVO4pUR1QMGoTWuJQbqNieDQ4V9dW67N5wp/UWEkK6CNNd6aXjswb -+dVaDbIfDL8hMX6Lil3+pTysRWGqjRvBGJxS9r/mYRAvbz1JHPjfegSc0uxnUE+qZ -+SrbWa3TN82LX6jw6tKk0Z3CcPJC6QN+ijCxxAoHyLRYUIgZbAKe/FGRbjO0fuW11 -+L7TcE1k3eaC7RkvotIaCOW/RMOkwKu1MbCzFEA2YRYf9covEwdItzI4FE++ZJrsz -+LhUCAwEAaTANBgkqhkiG9w0BAQsFAAOCAQEAeqqJT0LnmAluAjrsCSK/eYYjwjhZ -+aKMi/iBO10zfb+GvT4yqEL5gnuWxJEx4TTcDww1clvOC1EcPUZFaKR3GIBGy0ZgJ -+zGCfg+sC6liyZ+4PSWSJHD2dT5N3IGp4/hPsrhKnVb9fYbRc0Bc5VHeS9QQoSJDH -+f9EbxCcwdErVllRter29OZCb4XnEEbTqLIKRYVrbsu/t4C+vzi0tmKg5HZXf9PMo -+D28zJGsCAr8sKW/iUKObqDOHEn56lk12NTJmJmi+g6rEikk/0czJlRjSGnJQLjUg -+d4wslruibXBsLPtJw2c6vTC2SV2F1PXwy5j1OKU+D6nxaaItQvWADEjcTg== -+-----END CERTIFICATE-----""" -+ -+ # Malformed certificate that requires pyasn1 re-encoding (fails with python-cryptography) -+ self.malformed_cert_b64 = ( -+ "MIIDUjCCAvegAwIBAgILAI5xYHQ14nH5hdYwCgYIKoZIzj0EAwIwVTFTMB8GA1UEAxMYTnV2b3Rv" -+ "biBUUE0gUm9vdCBDQSAyMTExMCUGA1UEChMeTnV2b3RvbiBUZWNobm9sb2d5IENvcnBvcmF0aW9u" -+ "MAkGA1UEBhMCVFcwHhcNMTkwNzIzMTcxNTEzWhcNMzkwNzE5MTcxNTEzWjAAMIIBIjANBgkqhkiG" -+ "9w0BAQEFAAOCAQ8AMIIBCgKCAQEAk8kCj7srY/Zlvm1795fVXdyX44w5qsd1m5VywMDgSOavzPKO" -+ "kgbHgQNx6Ak5+4Q43EJ/5qsaDBv59F8W7K69maUwcMNq1xpuq0V/LiwgJVAtc3CdvlxtwQrn7+Uq" -+ "ieIGf+i8sGxpeUCSmYHJPTHNHqjQnvUtdGoy/+WO0i7WsAvX3k/gHHr4p58a8urjJ1RG2Lk1g48D" -+ "ESwl+D7atQEPWzgjr6vK/s5KpLrn7M+dh97TUbG1510AOWBPP35MtT8IZbqC4hs2Ol16gT1M3a9e" -+ "+GaMZkItLUwV76vKDNEgTZG8M1C9OItA/xwzlfXbPepzpxWb4kzHS4qZoQtl4vBZrQIDAQABo4IB" -+ "NjCCATIwUAYDVR0RAQH/BEYwRKRCMEAxPjAUBgVngQUCARMLaWQ6NEU1NDQzMDAwEAYFZ4EFAgIT" -+ "B05QQ1Q3NXgwFAYFZ4EFAgMTC2lkOjAwMDcwMDAyMAwGA1UdEwEB/wQCMAAwEAYDVR0lBAkwBwYF" -+ "Z4EFCAEwHwYDVR0jBBgwFoAUI/TiKtO+N0pEl3KVSqKDrtdSVy4wDgYDVR0PAQH/BAQDAgUgMCIG" -+ "A1UdCQQbMBkwFwYFZ4EFAhAxDjAMDAMyLjACAQACAgCKMGkGCCsGAQUFBwEBBF0wWzBZBggrBgEF" -+ "BQcwAoZNaHR0cHM6Ly93d3cubnV2b3Rvbi5jb20vc2VjdXJpdHkvTlRDLVRQTS1FSy1DZXJ0L051" -+ "dm90b24gVFBNIFJvb3QgQ0EgMjExMS5jZXIwCgYIKoZIzj0EAwIDSQAwRgIhAPHOFiBDZd0dfml2" -+ "a/KlPFhmX7Ahpd0Wq11ZUW1/ixviAiEAlex8BB5nsR6w8QrANwCxc7fH/YnbjXfMCFiWzeZH7ps=" -+ ) -+ -+ # Load certificates for testing -+ self.compliant_cert = cryptography.x509.load_pem_x509_certificate(self.compliant_cert_pem.encode()) -+ self.malformed_cert_der = base64.b64decode(self.malformed_cert_b64) -+ -+ def test_infer_encoding_wrapped_certificate(self): -+ """Test that CertificateWrapper objects are identified as 'wrapped'.""" -+ wrapped_cert = wrap_certificate(self.compliant_cert, None) -+ encoding = self.cert_type.infer_encoding(wrapped_cert) -+ self.assertEqual(encoding, "wrapped") -+ -+ def test_infer_encoding_raw_certificate(self): -+ """Test that raw cryptography.x509.Certificate objects are identified as 'decoded'.""" -+ encoding = self.cert_type.infer_encoding(self.compliant_cert) -+ self.assertEqual(encoding, "decoded") -+ -+ def test_infer_encoding_der_bytes(self): -+ """Test that DER bytes are identified as 'der'.""" -+ der_bytes = self.compliant_cert.public_bytes(Encoding.DER) -+ encoding = self.cert_type.infer_encoding(der_bytes) -+ self.assertEqual(encoding, "der") -+ -+ def test_infer_encoding_pem_string(self): -+ """Test that PEM strings are identified as 'pem'.""" -+ encoding = self.cert_type.infer_encoding(self.compliant_cert_pem) -+ self.assertEqual(encoding, "pem") -+ -+ def test_infer_encoding_base64_string(self): -+ """Test that Base64 strings are identified as 'base64'.""" -+ encoding = self.cert_type.infer_encoding(self.malformed_cert_b64) -+ self.assertEqual(encoding, "base64") -+ -+ def test_infer_encoding_none_for_invalid(self): -+ """Test that invalid types return None.""" -+ encoding = self.cert_type.infer_encoding(12345) # type: ignore[arg-type] # Testing invalid type -+ self.assertIsNone(encoding) -+ -+ def test_asn1_compliant_wrapped_without_original_bytes(self): -+ """Test that CertificateWrapper without original bytes is ASN.1 compliant.""" -+ wrapped_cert = wrap_certificate(self.compliant_cert, None) -+ compliant = self.cert_type.asn1_compliant(wrapped_cert) -+ self.assertTrue(compliant) -+ -+ def test_asn1_compliant_wrapped_with_original_bytes(self): -+ """Test that CertificateWrapper with original bytes is not ASN.1 compliant.""" -+ wrapped_cert = wrap_certificate(self.compliant_cert, b"fake_original_bytes") -+ compliant = self.cert_type.asn1_compliant(wrapped_cert) -+ self.assertFalse(compliant) -+ -+ def test_asn1_compliant_raw_certificate(self): -+ """Test that raw cryptography.x509.Certificate returns None (already decoded).""" -+ compliant = self.cert_type.asn1_compliant(self.compliant_cert) -+ self.assertIsNone(compliant) -+ -+ def test_asn1_compliant_pem_strings(self): -+ """Test ASN.1 compliance checking on PEM strings.""" -+ # The regular certificate and TPM certificate from test_registrar_db.py are actually ASN.1 compliant -+ # and can be loaded directly by python-cryptography without requiring pyasn1 re-encoding -+ compliant_regular = self.cert_type.asn1_compliant(self.compliant_cert_pem) -+ # Only test one certificate since both are the same type (ASN.1 compliant) -+ -+ # Should be ASN.1 compliant (True) since it loads fine with python-cryptography -+ self.assertTrue(compliant_regular) -+ -+ def test_asn1_compliant_der_and_base64(self): -+ """Test ASN.1 compliance checking on DER and Base64 formats.""" -+ # Test DER bytes - regular certificate should be compliant -+ der_bytes = self.compliant_cert.public_bytes(Encoding.DER) -+ compliant_der = self.cert_type.asn1_compliant(der_bytes) -+ self.assertTrue(compliant_der) -+ -+ # Test Base64 string - regular certificate should be compliant -+ b64_string = base64.b64encode(der_bytes).decode("utf-8") -+ compliant_b64 = self.cert_type.asn1_compliant(b64_string) -+ self.assertTrue(compliant_b64) -+ -+ def test_asn1_compliant_malformed_certificate(self): -+ """Test ASN.1 compliance checking on a truly malformed certificate.""" -+ # Test the malformed certificate that requires pyasn1 re-encoding -+ compliant = self.cert_type.asn1_compliant(self.malformed_cert_b64) -+ self.assertFalse(compliant) # Should be non-compliant since it needs pyasn1 fallback -+ -+ def test_asn1_compliant_invalid_data(self): -+ """Test that invalid certificate data is not ASN.1 compliant.""" -+ compliant = self.cert_type.asn1_compliant("invalid_certificate_data") -+ self.assertFalse(compliant) -+ -+ def test_cast_wrapped_certificate(self): -+ """Test that CertificateWrapper objects are returned unchanged.""" -+ wrapped_cert = wrap_certificate(self.compliant_cert, None) -+ result = self.cert_type.cast(wrapped_cert) -+ self.assertIs(result, wrapped_cert) -+ -+ def test_cast_raw_certificate_to_wrapped(self): -+ """Test that raw certificates are wrapped without original bytes.""" -+ result = self.cert_type.cast(self.compliant_cert) -+ self.assertIsInstance(result, CertificateWrapper) -+ assert result is not None # For type checker -+ self.assertFalse(result.has_original_bytes) -+ -+ def test_cast_pem_strings(self): -+ """Test casting PEM strings to CertificateWrapper.""" -+ # Test regular certificate - should be ASN.1 compliant, no original bytes needed -+ result_regular = self.cert_type.cast(self.compliant_cert_pem) -+ self.assertIsInstance(result_regular, CertificateWrapper) -+ assert result_regular is not None # For type checker -+ self.assertFalse(result_regular.has_original_bytes) -+ -+ # Note: Only testing compliant certificate since we now use one consistent certificate for all compliant scenarios -+ -+ def test_cast_malformed_certificate(self): -+ """Test casting the malformed certificate that requires pyasn1 re-encoding.""" -+ result = self.cert_type.cast(self.malformed_cert_b64) -+ self.assertIsInstance(result, CertificateWrapper) -+ assert result is not None # For type checker -+ # Malformed certificate should have original bytes since it needs re-encoding -+ self.assertTrue(result.has_original_bytes) -+ -+ def test_cast_der_bytes(self): -+ """Test casting DER bytes to CertificateWrapper.""" -+ der_bytes = self.compliant_cert.public_bytes(Encoding.DER) -+ result = self.cert_type.cast(der_bytes) -+ self.assertIsInstance(result, CertificateWrapper) -+ -+ def test_cast_none_value(self): -+ """Test that None values return None.""" -+ result = self.cert_type.cast(None) -+ self.assertIsNone(result) -+ -+ def test_cast_empty_string(self): -+ """Test that empty strings return None.""" -+ result = self.cert_type.cast("") -+ self.assertIsNone(result) -+ -+ -+if __name__ == "__main__": -+ unittest.main() -diff --git a/test/test_certificate_wrapper.py b/test/test_certificate_wrapper.py -new file mode 100644 -index 000000000..6b47260d9 ---- /dev/null -+++ b/test/test_certificate_wrapper.py -@@ -0,0 +1,385 @@ -+""" -+Unit tests for the CertificateWrapper class. -+ -+This module tests the certificate wrapper functionality that preserves original bytes -+for malformed certificates requiring pyasn1 re-encoding. -+""" -+ -+import base64 -+import subprocess -+import tempfile -+import unittest -+from unittest.mock import Mock -+ -+import cryptography.x509 -+from cryptography.hazmat.primitives.serialization import Encoding -+from pyasn1.codec.der import decoder as pyasn1_decoder -+from pyasn1.codec.der import encoder as pyasn1_encoder -+from pyasn1_modules import rfc2459 as pyasn1_rfc2459 -+ -+from keylime.certificate_wrapper import CertificateWrapper, wrap_certificate -+ -+ -+class TestCertificateWrapper(unittest.TestCase): -+ """Test cases for CertificateWrapper class.""" -+ -+ def setUp(self): -+ """Set up test fixtures.""" -+ # Malformed certificate (Base64 encoded) that requires pyasn1 re-encoding -+ # This is a real TPM certificate that doesn't strictly follow ASN.1 DER rules -+ self.malformed_cert_b64 = ( -+ "MIIDUjCCAvegAwIBAgILAI5xYHQ14nH5hdYwCgYIKoZIzj0EAwIwVTFTMB8GA1UEAxMYTnV2b3Rv" -+ "biBUUE0gUm9vdCBDQSAyMTExMCUGA1UEChMeTnV2b3RvbiBUZWNobm9sb2d5IENvcnBvcmF0aW9u" -+ "MAkGA1UEBhMCVFcwHhcNMTkwNzIzMTcxNTEzWhcNMzkwNzE5MTcxNTEzWjAAMIIBIjANBgkqhkiG" -+ "9w0BAQEFAAOCAQ8AMIIBCgKCAQEAk8kCj7srY/Zlvm1795fVXdyX44w5qsd1m5VywMDgSOavzPKO" -+ "kgbHgQNx6Ak5+4Q43EJ/5qsaDBv59F8W7K69maUwcMNq1xpuq0V/LiwgJVAtc3CdvlxtwQrn7+Uq" -+ "ieIGf+i8sGxpeUCSmYHJPTHNHqjQnvUtdGoy/+WO0i7WsAvX3k/gHHr4p58a8urjJ1RG2Lk1g48D" -+ "ESwl+D7atQEPWzgjr6vK/s5KpLrn7M+dh97TUbG1510AOWBPP35MtT8IZbqC4hs2Ol16gT1M3a9e" -+ "+GaMZkItLUwV76vKDNEgTZG8M1C9OItA/xwzlfXbPepzpxWb4kzHS4qZoQtl4vBZrQIDAQABo4IB" -+ "NjCCATIwUAYDVR0RAQH/BEYwRKRCMEAxPjAUBgVngQUCARMLaWQ6NEU1NDQzMDAwEAYFZ4EFAgIT" -+ "B05QQ1Q3NXgwFAYFZ4EFAgMTC2lkOjAwMDcwMDAyMAwGA1UdEwEB/wQCMAAwEAYDVR0lBAkwBwYF" -+ "Z4EFCAEwHwYDVR0jBBgwFoAUI/TiKtO+N0pEl3KVSqKDrtdSVy4wDgYDVR0PAQH/BAQDAgUgMCIG" -+ "A1UdCQQbMBkwFwYFZ4EFAhAxDjAMDAMyLjACAQACAgCKMGkGCCsGAQUFBwEBBF0wWzBZBggrBgEF" -+ "BQcwAoZNaHR0cHM6Ly93d3cubnV2b3Rvbi5jb20vc2VjdXJpdHkvTlRDLVRQTS1FSy1DZXJ0L051" -+ "dm90b24gVFBNIFJvb3QgQ0EgMjExMS5jZXIwCgYIKoZIzj0EAwIDSQAwRgIhAPHOFiBDZd0dfml2" -+ "a/KlPFhmX7Ahpd0Wq11ZUW1/ixviAiEAlex8BB5nsR6w8QrANwCxc7fH/YnbjXfMCFiWzeZH7ps=" -+ ) -+ self.malformed_cert_der = base64.b64decode(self.malformed_cert_b64) -+ -+ # Create a mock certificate for testing -+ self.mock_cert = Mock(spec=cryptography.x509.Certificate) -+ self.mock_cert.subject = Mock() -+ self.mock_cert.subject.__str__ = Mock(return_value="CN=Test Certificate") -+ self.mock_cert.public_bytes.return_value = b"mock_der_data" -+ -+ def test_init_without_original_bytes(self): -+ """Test wrapper initialization without original bytes.""" -+ wrapper = CertificateWrapper(self.mock_cert) -+ -+ # Test through public interface -+ self.assertFalse(wrapper.has_original_bytes) -+ self.assertIsNone(wrapper.original_bytes) -+ # Test delegation works -+ self.assertEqual(wrapper.subject, self.mock_cert.subject) -+ -+ def test_init_with_original_bytes(self): -+ """Test wrapper initialization with original bytes.""" -+ original_data = b"original_certificate_data" -+ wrapper = CertificateWrapper(self.mock_cert, original_data) -+ -+ # Test through public interface -+ self.assertTrue(wrapper.has_original_bytes) -+ self.assertEqual(wrapper.original_bytes, original_data) -+ # Test delegation works -+ self.assertEqual(wrapper.subject, self.mock_cert.subject) -+ -+ def test_getattr_delegation(self): -+ """Test that attributes are properly delegated to the wrapped certificate.""" -+ wrapper = CertificateWrapper(self.mock_cert) -+ -+ # Access an attribute that should be delegated -+ result = wrapper.subject -+ self.assertEqual(result, self.mock_cert.subject) -+ -+ def test_public_bytes_der_without_original(self): -+ """Test public_bytes DER encoding without original bytes.""" -+ wrapper = CertificateWrapper(self.mock_cert) -+ -+ result = wrapper.public_bytes(Encoding.DER) -+ -+ self.mock_cert.public_bytes.assert_called_once_with(Encoding.DER) -+ self.assertEqual(result, b"mock_der_data") -+ -+ def test_public_bytes_der_with_original(self): -+ """Test public_bytes DER encoding with original bytes.""" -+ original_data = b"original_certificate_data" -+ wrapper = CertificateWrapper(self.mock_cert, original_data) -+ -+ result = wrapper.public_bytes(Encoding.DER) -+ -+ # Should return original bytes, not call the wrapped certificate -+ self.mock_cert.public_bytes.assert_not_called() -+ self.assertEqual(result, original_data) -+ -+ def test_public_bytes_pem_without_original(self): -+ """Test public_bytes PEM encoding without original bytes.""" -+ self.mock_cert.public_bytes.return_value = b"-----BEGIN CERTIFICATE-----\nMIIB...\n-----END CERTIFICATE-----\n" -+ wrapper = CertificateWrapper(self.mock_cert) -+ -+ result = wrapper.public_bytes(Encoding.PEM) -+ -+ self.mock_cert.public_bytes.assert_called_once_with(Encoding.PEM) -+ self.assertEqual(result, b"-----BEGIN CERTIFICATE-----\nMIIB...\n-----END CERTIFICATE-----\n") -+ -+ def test_public_bytes_pem_with_original(self): -+ """Test public_bytes PEM encoding with original bytes.""" -+ original_data = self.malformed_cert_der -+ wrapper = CertificateWrapper(self.mock_cert, original_data) -+ -+ result = wrapper.public_bytes(Encoding.PEM) -+ -+ # Should not call the wrapped certificate's method -+ self.mock_cert.public_bytes.assert_not_called() -+ -+ # Result should be PEM format derived from original bytes -+ self.assertIsInstance(result, bytes) -+ result_str = result.decode("utf-8") -+ self.assertTrue(result_str.startswith("-----BEGIN CERTIFICATE-----")) -+ self.assertTrue(result_str.endswith("-----END CERTIFICATE-----\n")) -+ -+ # Verify that the PEM content can be converted back to the original DER -+ pem_lines = result_str.strip().split("\n") -+ pem_content = "".join(pem_lines[1:-1]) # Remove headers and join -+ recovered_der = base64.b64decode(pem_content) -+ self.assertEqual(recovered_der, original_data) -+ -+ def test_pem_line_length_compliance(self): -+ """Test that PEM output follows RFC 1421 line length requirements (64 chars).""" -+ original_data = self.malformed_cert_der -+ wrapper = CertificateWrapper(self.mock_cert, original_data) -+ -+ result = wrapper.public_bytes(Encoding.PEM) -+ result_str = result.decode("utf-8") -+ -+ lines = result_str.strip().split("\n") -+ # Check that content lines (excluding headers) are max 64 chars -+ for line in lines[1:-1]: # Skip header and footer -+ self.assertLessEqual(len(line), 64) -+ -+ def test_str_representation(self): -+ """Test string representation of the wrapper.""" -+ wrapper = CertificateWrapper(self.mock_cert) -+ -+ result = str(wrapper) -+ -+ expected = f"CertificateWrapper(subject={self.mock_cert.subject})" -+ self.assertEqual(result, expected) -+ -+ def test_repr_representation_without_original(self): -+ """Test repr representation without original bytes.""" -+ wrapper = CertificateWrapper(self.mock_cert) -+ -+ result = repr(wrapper) -+ -+ expected = f"CertificateWrapper(subject={self.mock_cert.subject}, has_original_bytes=False)" -+ self.assertEqual(result, expected) -+ -+ def test_repr_representation_with_original(self): -+ """Test repr representation with original bytes.""" -+ original_data = b"original_data" -+ wrapper = CertificateWrapper(self.mock_cert, original_data) -+ -+ result = repr(wrapper) -+ -+ expected = f"CertificateWrapper(subject={self.mock_cert.subject}, has_original_bytes=True)" -+ self.assertEqual(result, expected) -+ -+ def test_pickling_support(self): -+ """Test that the wrapper supports pickling operations.""" -+ original_data = b"test_data" -+ wrapper = CertificateWrapper(self.mock_cert, original_data) -+ -+ # Test getstate -+ state = wrapper.__getstate__() -+ self.assertIsInstance(state, dict) -+ self.assertIn("_cert", state) -+ self.assertIn("_original_bytes", state) -+ -+ # Test setstate -+ new_wrapper = CertificateWrapper(Mock(), None) -+ new_wrapper.__setstate__(state) -+ # Verify state was restored correctly through public interface -+ self.assertTrue(new_wrapper.has_original_bytes) -+ self.assertEqual(new_wrapper.original_bytes, original_data) -+ -+ def test_wrap_certificate_function_without_original(self): -+ """Test the wrap_certificate factory function without original bytes.""" -+ wrapper = wrap_certificate(self.mock_cert) -+ -+ self.assertIsInstance(wrapper, CertificateWrapper) -+ self.assertFalse(wrapper.has_original_bytes) -+ self.assertIsNone(wrapper.original_bytes) -+ -+ def test_wrap_certificate_function_with_original(self): -+ """Test the wrap_certificate factory function with original bytes.""" -+ original_data = b"original_certificate_data" -+ wrapper = wrap_certificate(self.mock_cert, original_data) -+ -+ self.assertIsInstance(wrapper, CertificateWrapper) -+ self.assertTrue(wrapper.has_original_bytes) -+ self.assertEqual(wrapper.original_bytes, original_data) -+ -+ def test_real_malformed_certificate_handling(self): -+ """Test with a real malformed certificate that requires pyasn1 re-encoding.""" -+ # This test simulates the scenario where a malformed certificate is processed -+ -+ # Mock the scenario where cryptography fails but pyasn1 succeeds -+ mock_reencoded_cert = Mock(spec=cryptography.x509.Certificate) -+ mock_reencoded_cert.subject = Mock() -+ mock_reencoded_cert.subject.__str__ = Mock(return_value="CN=Nuvoton TPM") -+ -+ # Create wrapper as if it came from the certificate loading process -+ wrapper = wrap_certificate(mock_reencoded_cert, self.malformed_cert_der) -+ -+ # Test that original bytes are preserved -+ self.assertTrue(wrapper.has_original_bytes) -+ self.assertEqual(wrapper.original_bytes, self.malformed_cert_der) -+ -+ # Test DER output uses original bytes -+ der_output = wrapper.public_bytes(Encoding.DER) -+ self.assertEqual(der_output, self.malformed_cert_der) -+ -+ # Test PEM output is derived from original bytes -+ pem_output = wrapper.public_bytes(Encoding.PEM) -+ self.assertIsInstance(pem_output, bytes) -+ -+ # Verify PEM can be converted back to original DER -+ pem_str = pem_output.decode("utf-8") -+ lines = pem_str.strip().split("\n") -+ content = "".join(lines[1:-1]) -+ recovered_der = base64.b64decode(content) -+ self.assertEqual(recovered_der, self.malformed_cert_der) -+ -+ def test_unsupported_encoding_fallback(self): -+ """Test that unsupported encoding types fall back to wrapped certificate.""" -+ # Create a custom encoding that's not DER or PEM -+ custom_encoding = Mock() -+ custom_encoding.name = "CUSTOM" -+ -+ original_data = b"original_data" -+ wrapper = CertificateWrapper(self.mock_cert, original_data) -+ -+ # Should fall back to wrapped certificate for unknown encoding -+ wrapper.public_bytes(custom_encoding) -+ self.mock_cert.public_bytes.assert_called_once_with(custom_encoding) -+ -+ def test_malformed_certificate_cryptography_failure_and_verification(self): -+ """ -+ Comprehensive test demonstrating that the malformed certificate: -+ 1. Fails to load with python-cryptography -+ 2. Can be verified with OpenSSL -+ 3. Is successfully handled by our wrapper after pyasn1 re-encoding -+ """ -+ # Test 1: Demonstrate that python-cryptography fails to load the malformed certificate -+ with self.assertRaises(Exception) as context: -+ cryptography.x509.load_der_x509_certificate(self.malformed_cert_der) -+ -+ # The specific exception type may vary, but it should fail -+ self.assertIsInstance(context.exception, Exception) -+ -+ # Test 2: Demonstrate that pyasn1 can handle the malformed certificate -+ try: -+ # Decode and re-encode using pyasn1 (simulating what the Certificate type does) -+ pyasn1_cert = pyasn1_decoder.decode(self.malformed_cert_der, asn1Spec=pyasn1_rfc2459.Certificate())[0] -+ reencoded_der = pyasn1_encoder.encode(pyasn1_cert) -+ -+ # Now cryptography should be able to load the re-encoded certificate -+ reencoded_cert = cryptography.x509.load_der_x509_certificate(reencoded_der) -+ self.assertIsNotNone(reencoded_cert) -+ -+ except Exception as e: -+ self.fail(f"pyasn1 should handle the malformed certificate, but got: {e}") -+ -+ # Test 3: Verify that our wrapper preserves the original bytes correctly -+ wrapper = wrap_certificate(reencoded_cert, self.malformed_cert_der) -+ -+ # The wrapper should preserve original bytes -+ self.assertTrue(wrapper.has_original_bytes) -+ self.assertEqual(wrapper.original_bytes, self.malformed_cert_der) -+ -+ # DER output should use original bytes -+ der_output = wrapper.public_bytes(Encoding.DER) -+ self.assertEqual(der_output, self.malformed_cert_der) -+ -+ # PEM output should be derived from original bytes -+ pem_output = wrapper.public_bytes(Encoding.PEM) -+ pem_str = pem_output.decode("utf-8") -+ -+ # Verify PEM format is correct -+ self.assertTrue(pem_str.startswith("-----BEGIN CERTIFICATE-----")) -+ self.assertTrue(pem_str.endswith("-----END CERTIFICATE-----\n")) -+ -+ # Test 4: Demonstrate OpenSSL can verify the certificate structure -+ # (Even without the root CA, OpenSSL should be able to parse the certificate) -+ try: -+ with tempfile.NamedTemporaryFile(mode="wb", suffix=".der", delete=False) as temp_file: -+ temp_file.write(self.malformed_cert_der) -+ temp_file.flush() -+ -+ # Use OpenSSL to parse the certificate (should succeed) -+ result = subprocess.run( -+ ["openssl", "x509", "-in", temp_file.name, "-inform", "DER", "-text", "-noout"], -+ capture_output=True, -+ text=True, -+ check=False, -+ ) -+ -+ # OpenSSL should successfully parse the certificate -+ self.assertEqual(result.returncode, 0) -+ self.assertIn("Nuvoton TPM Root CA 2111", result.stdout) -+ self.assertIn("Certificate:", result.stdout) -+ -+ except (subprocess.CalledProcessError, FileNotFoundError) as e: -+ # Skip if OpenSSL is not available, but don't fail the test -+ self.skipTest(f"OpenSSL not available for verification test: {e}") -+ -+ # Test 5: Verify certificate details are accessible through wrapper -+ # The subject should be empty (as shown in the OpenSSL output) -+ self.assertEqual(len(reencoded_cert.subject), 0) -+ -+ # The issuer should contain Nuvoton information -+ issuer_attrs = {} -+ for attr in reencoded_cert.issuer: -+ # Use dotted string representation to avoid accessing private _name -+ oid_name = attr.oid.dotted_string -+ if oid_name == "2.5.4.3": # Common Name OID -+ issuer_attrs["commonName"] = attr.value -+ self.assertIn("commonName", issuer_attrs) -+ self.assertEqual(issuer_attrs["commonName"], "Nuvoton TPM Root CA 2111") -+ -+ # Test 6: Demonstrate that even re-encoded certificates may have parsing issues -+ # This shows why preserving original bytes is crucial -+ try: -+ # Try to access extensions - this may fail due to malformed ASN.1 -+ extensions = list(reencoded_cert.extensions) -+ # If it succeeds, verify it has the expected Subject Alternative Name -+ # Subject Alternative Name OID is 2.5.29.17 -+ has_subject_alt_name = any(ext.oid.dotted_string == "2.5.29.17" for ext in extensions) -+ self.assertTrue(has_subject_alt_name, "EK certificate should have Subject Alternative Name extension") -+ except (ValueError, Exception) as e: -+ # This is actually expected for malformed certificates! -+ # Even after pyasn1 re-encoding, some parsing issues may remain -+ self.assertIn("parsing asn1", str(e).lower(), f"Expected ASN.1 parsing error, got: {e}") -+ # This demonstrates why our wrapper preserves original bytes - -+ # they maintain signature validity even when parsing has issues -+ -+ def test_certificate_chain_verification_simulation(self): -+ """ -+ Test that simulates certificate chain verification where original bytes matter. -+ This demonstrates why preserving original bytes is crucial for signature validation. -+ """ -+ # Create a wrapper with the malformed certificate -+ mock_reencoded_cert = Mock(spec=cryptography.x509.Certificate) -+ mock_reencoded_cert.subject = Mock() -+ mock_reencoded_cert.public_key.return_value = Mock() -+ -+ wrapper = wrap_certificate(mock_reencoded_cert, self.malformed_cert_der) -+ -+ # Simulate signature verification scenario -+ # In real verification, the signature is computed over the exact DER bytes -+ original_bytes_for_verification = wrapper.public_bytes(Encoding.DER) -+ -+ # Should get the original malformed bytes (preserving signature validity) -+ self.assertEqual(original_bytes_for_verification, self.malformed_cert_der) -+ -+ # If we didn't preserve original bytes, we'd get re-encoded bytes which would -+ # invalidate the signature even though the certificate content is the same -+ mock_reencoded_cert.public_bytes.return_value = b"reencoded_different_bytes" -+ -+ # Verify that using the wrapper gets original bytes, not re-encoded bytes -+ self.assertNotEqual(original_bytes_for_verification, b"reencoded_different_bytes") -+ self.assertEqual(original_bytes_for_verification, self.malformed_cert_der) -+ -+ -+if __name__ == "__main__": -+ unittest.main() -diff --git a/test/test_registrar_agent_cert_compliance.py b/test/test_registrar_agent_cert_compliance.py -new file mode 100644 -index 000000000..ede9b9f26 ---- /dev/null -+++ b/test/test_registrar_agent_cert_compliance.py -@@ -0,0 +1,289 @@ -+""" -+Integration tests for RegistrarAgent certificate compliance functionality. -+ -+This module tests the simplified certificate compliance checking methods -+to ensure they work correctly with the new CertificateWrapper-based approach. -+""" -+ -+import types -+import unittest -+from unittest.mock import Mock, patch -+ -+import cryptography.x509 -+ -+from keylime.certificate_wrapper import wrap_certificate -+from keylime.models.base.types.certificate import Certificate -+from keylime.models.registrar.registrar_agent import RegistrarAgent -+ -+ -+class TestRegistrarAgentCertCompliance(unittest.TestCase): -+ """Test cases for RegistrarAgent certificate compliance methods.""" -+ -+ # pylint: disable=protected-access,not-callable # Testing protected methods and dynamic method binding -+ -+ def setUp(self): -+ """Set up test fixtures.""" -+ # Create a test certificate -+ self.valid_cert_pem = """-----BEGIN CERTIFICATE----- -+MIIEnzCCA4egAwIBAgIEMV64bDANBgkqhkiG9w0BAQUFADBtMQswCQYDVQQGEwJE -+RTEQMA4GA1UECBMHQmF2YXJpYTEhMB8GA1UEChMYSW5maW5lb24gVGVjaG5vbG9n -+aWVzIEFHMQwwCgYDVQQLEwNBSU0xGzAZBgNVBAMTEklGWCBUUE0gRUsgUm9vdCBD -+QTAeFw0wNTEwMjAxMzQ3NDNaFw0yNTEwMjAxMzQ3NDNaMHcxCzAJBgNVBAYTAkRF -+MQ8wDQYDVQQIEwZTYXhvbnkxITAfBgNVBAoTGEluZmluZW9uIFRlY2hub2xvZ2ll -+cyBBRzEMMAoGA1UECxMDQUlNMSYwJAYDVQQDEx1JRlggVFBNIEVLIEludGVybWVk -+aWF0ZSBDQSAwMTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALftPhYN -+t4rE+JnU/XOPICbOBLvfo6iA7nuq7zf4DzsAWBdsZEdFJQfaK331ihG3IpQnlQ2i -+YtDim289265f0J4OkPFpKeFU27CsfozVaNUm6UR/uzwA8ncxFc3iZLRMRNLru/Al -+VG053ULVDQMVx2iwwbBSAYO9pGiGbk1iMmuZaSErMdb9v0KRUyZM7yABiyDlM3cz -+UQX5vLWV0uWqxdGoHwNva5u3ynP9UxPTZWHZOHE6+14rMzpobs6Ww2RR8BgF96rh -+4rRAZEl8BXhwiQq4STvUXkfvdpWH4lzsGcDDtrB6Nt3KvVNvsKz+b07Dk+Xzt+EH -+NTf3Byk2HlvX+scCAwEAAaOCATswggE3MB0GA1UdDgQWBBQ4k8292HPEIzMV4bE7 -+qWoNI8wQxzAOBgNVHQ8BAf8EBAMCAgQwEgYDVR0TAQH/BAgwBgEB/wIBADBYBgNV -+HSABAf8ETjBMMEoGC2CGSAGG+EUBBy8BMDswOQYIKwYBBQUHAgEWLWh0dHA6Ly93 -+d3cudmVyaXNpZ24uY29tL3JlcG9zaXRvcnkvaW5kZXguaHRtbDCBlwYDVR0jBIGP -+MIGMgBRW65FEhWPWcrOu1EWWC/eUDlRCpqFxpG8wbTELMAkGA1UEBhMCREUxEDAO -+BgNVBAgTB0JhdmFyaWExITAfBgNVBAoTGEluZmluZW9uIFRlY2hub2xvZ2llcyBB -+RzEMMAoGA1UECxMDQUlNMRswGQYDVQQDExJJRlggVFBNIEVLIFJvb3QgQ0GCAQMw -+DQYJKoZIhvcNAQEFBQADggEBABJ1+Ap3rNlxZ0FW0aIgdzktbNHlvXWNxFdYIBbM -+OKjmbOos0Y4O60eKPu259XmMItCUmtbzF3oKYXq6ybARUT2Lm+JsseMF5VgikSlU -+BJALqpKVjwAds81OtmnIQe2LSu4xcTSavpsL4f52cUAu/maMhtSgN9mq5roYptq9 -+DnSSDZrX4uYiMPl//rBaNDBflhJ727j8xo9CCohF3yQUoQm7coUgbRMzyO64yMIO -+3fhb+Vuc7sNwrMOz3VJN14C3JMoGgXy0c57IP/kD5zGRvljKEvrRC2I147+fPeLS -+DueRMS6lblvRKiZgmGAg7YaKOkOaEmVDMQ+fTo2Po7hI5wc= -+-----END CERTIFICATE-----""" -+ -+ self.valid_cert = cryptography.x509.load_pem_x509_certificate(self.valid_cert_pem.encode()) -+ -+ # Malformed certificate that actually requires pyasn1 re-encoding -+ self.malformed_cert_b64 = ( -+ "MIIDUjCCAvegAwIBAgILAI5xYHQ14nH5hdYwCgYIKoZIzj0EAwIwVTFTMB8GA1UEAxMYTnV2b3Rv" -+ "biBUUE0gUm9vdCBDQSAyMTExMCUGA1UEChMeTnV2b3RvbiBUZWNobm9sb2d5IENvcnBvcmF0aW9u" -+ "MAkGA1UEBhMCVFcwHhcNMTkwNzIzMTcxNTEzWhcNMzkwNzE5MTcxNTEzWjAAMIIBIjANBgkqhkiG" -+ "9w0BAQEFAAOCAQ8AMIIBCgKCAQEAk8kCj7srY/Zlvm1795fVXdyX44w5qsd1m5VywMDgSOavzPKO" -+ "kgbHgQNx6Ak5+4Q43EJ/5qsaDBv59F8W7K69maUwcMNq1xpuq0V/LiwgJVAtc3CdvlxtwQrn7+Uq" -+ "ieIGf+i8sGxpeUCSmYHJPTHNHqjQnvUtdGoy/+WO0i7WsAvX3k/gHHr4p58a8urjJ1RG2Lk1g48D" -+ "ESwl+D7atQEPWzgjr6vK/s5KpLrn7M+dh97TUbG1510AOWBPP35MtT8IZbqC4hs2Ol16gT1M3a9e" -+ "+GaMZkItLUwV76vKDNEgTZG8M1C9OItA/xwzlfXbPepzpxWb4kzHS4qZoQtl4vBZrQIDAQABo4IB" -+ "NjCCATIwUAYDVR0RAQH/BEYwRKRCMEAxPjAUBgVngQUCARMLaWQ6NEU1NDQzMDAwEAYFZ4EFAgIT" -+ "B05QQ1Q3NXgwFAYFZ4EFAgMTC2lkOjAwMDcwMDAyMAwGA1UdEwEB/wQCMAAwEAYDVR0lBAkwBwYF" -+ "Z4EFCAEwHwYDVR0jBBgwFoAUI/TiKtO+N0pEl3KVSqKDrtdSVy4wDgYDVR0PAQH/BAQDAgUgMCIG" -+ "A1UdCQQbMBkwFwYFZ4EFAhAxDjAMDAMyLjACAQACAgCKMGkGCCsGAQUFBwEBBF0wWzBZBggrBgEF" -+ "BQcwAoZNaHR0cHM6Ly93d3cubnV2b3Rvbi5jb20vc2VjdXJpdHkvTlRDLVRQTS1FSy1DZXJ0L051" -+ "dm90b24gVFBNIFJvb3QgQ0EgMjExMS5jZXIwCgYIKoZIzj0EAwIDSQAwRgIhAPHOFiBDZd0dfml2" -+ "a/KlPFhmX7Ahpd0Wq11ZUW1/ixviAiEAlex8BB5nsR6w8QrANwCxc7fH/YnbjXfMCFiWzeZH7ps=" -+ ) -+ -+ # Create wrapped certificates for testing using Certificate type to ensure proper behavior -+ cert_type = Certificate() -+ -+ # Create compliant certificate (no original bytes needed) -+ self.compliant_wrapped_cert = wrap_certificate(self.valid_cert, None) -+ -+ # Create non-compliant certificate using the malformed cert data -+ self.non_compliant_wrapped_cert = cert_type.cast(self.malformed_cert_b64) -+ -+ def create_mock_registrar_agent(self): -+ """Create a mock RegistrarAgent with necessary attributes.""" -+ agent = Mock() -+ agent.changes = {} -+ agent.values = {} -+ agent._add_error = Mock() -+ -+ # Bind the actual methods to the mock instance -+ agent._check_cert_compliance = types.MethodType(RegistrarAgent._check_cert_compliance, agent) -+ agent._check_all_cert_compliance = types.MethodType(RegistrarAgent._check_all_cert_compliance, agent) -+ -+ return agent -+ -+ def test_check_cert_compliance_no_new_cert(self): -+ """Test _check_cert_compliance when no new certificate is provided.""" -+ agent = self.create_mock_registrar_agent() -+ agent.changes = {} # No new certificate -+ -+ result = agent._check_cert_compliance("ekcert") -+ self.assertTrue(result) -+ agent._add_error.assert_not_called() -+ -+ def test_check_cert_compliance_same_cert(self): -+ """Test _check_cert_compliance when new cert is same as old cert.""" -+ agent = self.create_mock_registrar_agent() -+ agent.changes = {"ekcert": self.compliant_wrapped_cert} -+ agent.values = {"ekcert": self.compliant_wrapped_cert} -+ -+ result = agent._check_cert_compliance("ekcert") -+ self.assertTrue(result) -+ agent._add_error.assert_not_called() -+ -+ def test_check_cert_compliance_different_cert_same_der(self): -+ """Test _check_cert_compliance when certificates have same DER bytes.""" -+ agent = self.create_mock_registrar_agent() -+ # Create two different wrapper objects but with same underlying certificate -+ cert1 = wrap_certificate(self.valid_cert, None) -+ cert2 = wrap_certificate(self.valid_cert, None) -+ -+ agent.changes = {"ekcert": cert1} -+ agent.values = {"ekcert": cert2} -+ -+ result = agent._check_cert_compliance("ekcert") -+ self.assertTrue(result) -+ agent._add_error.assert_not_called() -+ -+ @patch("keylime.config.get") -+ def test_check_cert_compliance_compliant_cert(self, mock_config): -+ """Test _check_cert_compliance with ASN.1 compliant certificate.""" -+ mock_config.return_value = "warn" # Default action -+ -+ agent = self.create_mock_registrar_agent() -+ agent.changes = {"ekcert": self.compliant_wrapped_cert} -+ agent.values = {} # No old certificate -+ -+ result = agent._check_cert_compliance("ekcert") -+ self.assertTrue(result) -+ agent._add_error.assert_not_called() -+ -+ @patch("keylime.config.get") -+ def test_check_cert_compliance_non_compliant_cert_warn(self, mock_config): -+ """Test _check_cert_compliance with non-compliant certificate (warn mode).""" -+ mock_config.return_value = "warn" # Warn action -+ -+ agent = self.create_mock_registrar_agent() -+ agent.changes = {"ekcert": self.non_compliant_wrapped_cert} -+ agent.values = {} # No old certificate -+ -+ result = agent._check_cert_compliance("ekcert") -+ self.assertFalse(result) -+ agent._add_error.assert_not_called() # Should not add error in warn mode -+ -+ @patch("keylime.config.get") -+ def test_check_cert_compliance_non_compliant_cert_reject(self, mock_config): -+ """Test _check_cert_compliance with non-compliant certificate (reject mode).""" -+ mock_config.return_value = "reject" # Reject action -+ -+ agent = self.create_mock_registrar_agent() -+ agent.changes = {"ekcert": self.non_compliant_wrapped_cert} -+ agent.values = {} # No old certificate -+ -+ result = agent._check_cert_compliance("ekcert") -+ self.assertFalse(result) -+ agent._add_error.assert_called_once() # Should add error in reject mode -+ -+ @patch("keylime.config.get") -+ def test_check_all_cert_compliance_no_non_compliant(self, mock_config): -+ """Test _check_all_cert_compliance when all certificates are compliant.""" -+ mock_config.return_value = "warn" -+ -+ agent = self.create_mock_registrar_agent() -+ agent.changes = { -+ "ekcert": self.compliant_wrapped_cert, -+ "iak_cert": self.compliant_wrapped_cert, -+ } -+ agent.values = {} -+ -+ # Should not raise any exceptions or log warnings -+ with patch("keylime.models.registrar.registrar_agent.logger") as mock_logger: -+ agent._check_all_cert_compliance() -+ mock_logger.warning.assert_not_called() -+ mock_logger.error.assert_not_called() -+ -+ @patch("keylime.config.get") -+ def test_check_all_cert_compliance_with_non_compliant_warn(self, mock_config): -+ """Test _check_all_cert_compliance with non-compliant certificates (warn mode).""" -+ mock_config.return_value = "warn" -+ -+ agent = self.create_mock_registrar_agent() -+ agent.changes = { -+ "ekcert": self.non_compliant_wrapped_cert, -+ "iak_cert": self.compliant_wrapped_cert, -+ "idevid_cert": self.non_compliant_wrapped_cert, -+ } -+ agent.values = {} -+ -+ with patch("keylime.models.registrar.registrar_agent.logger") as mock_logger: -+ agent._check_all_cert_compliance() -+ # Should log warning for non-compliant certificates -+ mock_logger.warning.assert_called_once() -+ format_string = mock_logger.warning.call_args[0][0] -+ cert_names = mock_logger.warning.call_args[0][1] -+ self.assertIn("Certificate(s) %s may not conform", format_string) -+ self.assertEqual("'ekcert' and 'idevid_cert'", cert_names) -+ -+ @patch("keylime.config.get") -+ def test_check_all_cert_compliance_with_non_compliant_reject(self, mock_config): -+ """Test _check_all_cert_compliance with non-compliant certificates (reject mode).""" -+ mock_config.return_value = "reject" -+ -+ agent = self.create_mock_registrar_agent() -+ agent.changes = { -+ "ekcert": self.non_compliant_wrapped_cert, -+ "mtls_cert": self.non_compliant_wrapped_cert, -+ } -+ agent.values = {} -+ -+ with patch("keylime.models.registrar.registrar_agent.logger") as mock_logger: -+ agent._check_all_cert_compliance() -+ # Should log error for non-compliant certificates -+ mock_logger.error.assert_called_once() -+ format_string = mock_logger.error.call_args[0][0] -+ cert_names = mock_logger.error.call_args[0][1] -+ self.assertIn("Certificate(s) %s may not conform", format_string) -+ self.assertIn("were rejected due to config", format_string) -+ self.assertEqual("'ekcert' and 'mtls_cert'", cert_names) -+ -+ @patch("keylime.config.get") -+ def test_check_all_cert_compliance_ignore_mode(self, mock_config): -+ """Test _check_all_cert_compliance with ignore mode.""" -+ mock_config.return_value = "ignore" -+ -+ agent = self.create_mock_registrar_agent() -+ agent.changes = { -+ "ekcert": self.non_compliant_wrapped_cert, -+ "iak_cert": self.non_compliant_wrapped_cert, -+ } -+ agent.values = {} -+ -+ with patch("keylime.models.registrar.registrar_agent.logger") as mock_logger: -+ agent._check_all_cert_compliance() -+ # Should not log anything in ignore mode -+ mock_logger.warning.assert_not_called() -+ mock_logger.error.assert_not_called() -+ -+ def test_check_all_cert_compliance_single_non_compliant(self): -+ """Test _check_all_cert_compliance message formatting for single certificate.""" -+ agent = self.create_mock_registrar_agent() -+ agent.changes = {"ekcert": self.non_compliant_wrapped_cert} -+ agent.values = {} -+ -+ with patch("keylime.config.get", return_value="warn"): -+ with patch("keylime.models.registrar.registrar_agent.logger") as mock_logger: -+ agent._check_all_cert_compliance() -+ # Should format message correctly for single certificate -+ format_string = mock_logger.warning.call_args[0][0] -+ cert_names = mock_logger.warning.call_args[0][1] -+ self.assertIn("Certificate(s) %s may not conform", format_string) -+ self.assertEqual("'ekcert'", cert_names) -+ self.assertNotIn(" and", cert_names) # Should not have "and" for single cert -+ -+ def test_field_names_coverage(self): -+ """Test that all expected certificate field names are checked.""" -+ agent = self.create_mock_registrar_agent() -+ agent.changes = { -+ "ekcert": self.non_compliant_wrapped_cert, -+ "iak_cert": self.non_compliant_wrapped_cert, -+ "idevid_cert": self.non_compliant_wrapped_cert, -+ "mtls_cert": self.non_compliant_wrapped_cert, -+ } -+ agent.values = {} -+ -+ with patch("keylime.config.get", return_value="warn"): -+ with patch("keylime.models.registrar.registrar_agent.logger") as mock_logger: -+ agent._check_all_cert_compliance() -+ # Should check all four certificate fields -+ format_string = mock_logger.warning.call_args[0][0] -+ cert_names = mock_logger.warning.call_args[0][1] -+ self.assertIn("Certificate(s) %s may not conform", format_string) -+ expected_names = "'ekcert', 'iak_cert', 'idevid_cert' and 'mtls_cert'" -+ self.assertEqual(expected_names, cert_names) -+ -+ -+if __name__ == "__main__": -+ unittest.main() diff --git a/0012-fix-mem-leak-remove-unbounded-functools.cache-from-l.patch b/0012-fix-mem-leak-remove-unbounded-functools.cache-from-l.patch new file mode 100644 index 0000000..ee1e746 --- /dev/null +++ b/0012-fix-mem-leak-remove-unbounded-functools.cache-from-l.patch @@ -0,0 +1,274 @@ +From fb06907b383512a6942dc489a62eee0da92fbac6 Mon Sep 17 00:00:00 2001 +From: Sergio Correia +Date: Wed, 18 Mar 2026 05:34:30 +0000 +Subject: [PATCH 12/12] fix(mem leak) - remove unbounded functools.cache from + latest_attestation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +@cache (= lru_cache(maxsize=None)) on the latest_attestation property +creates a class-level cache keyed by self. Since VerifierAgent.get() +creates a new instance per call, and the push-mode controller calls it +2-3x per attestation cycle, the cache permanently holds strong references +to every instance (and its eagerly-loaded IMAPolicy data), preventing +garbage collection. + +The property is accessed a few times per attestation cycle — a simple DB +query each time is negligible compared to the cost of permanently +retaining every VerifierAgent instance in memory. + +Additionally, cache `agent.latest_attestation` in a local variable in +each controller method to avoid redundant DB queries per request, and +add an inline warning comment to prevent re-introduction of the cache. + +Assisted-by: Claude Sonnet 4.6 +Signed-off-by: Sergio Correia +--- + keylime/models/verifier/verifier_agent.py | 5 +- + .../web/verifier/attestation_controller.py | 58 +++++++++++-------- + test/test_attestation_controller.py | 8 +-- + test/test_attestation_model.py | 26 +++++++++ + 4 files changed, 65 insertions(+), 32 deletions(-) + +diff --git a/keylime/models/verifier/verifier_agent.py b/keylime/models/verifier/verifier_agent.py +index 0373e87..515df07 100644 +--- a/keylime/models/verifier/verifier_agent.py ++++ b/keylime/models/verifier/verifier_agent.py +@@ -1,7 +1,5 @@ + # pyright: reportAttributeAccessIssue=false + # ORM model with dynamically-created attributes from metaclasses +-from functools import cache +- + from keylime.models.base import * + + +@@ -97,8 +95,9 @@ class VerifierAgent(PersistableModel): + # TODO: remove above, based on feedback + + @property +- @cache # pylint: disable=method-cache-max-size-none # Intentional unbounded cache for ORM property + def latest_attestation(self): ++ # NOTE: Do not cache this property. Caching causes a memory leak because ++ # the cache holds strong references to every VerifierAgent instance. + # Lazy import to avoid circular dependency + import keylime.models.verifier as verifier_models # pylint: disable=import-outside-toplevel + +diff --git a/keylime/web/verifier/attestation_controller.py b/keylime/web/verifier/attestation_controller.py +index d660c0f..5951e4c 100755 +--- a/keylime/web/verifier/attestation_controller.py ++++ b/keylime/web/verifier/attestation_controller.py +@@ -205,10 +205,12 @@ class AttestationController(Controller): + if not agent: + APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self) + +- if not agent.latest_attestation: # type: ignore[union-attr] ++ latest = agent.latest_attestation # type: ignore[union-attr] ++ ++ if not latest: + APIError("not_found", f"No attestation exists for agent '{agent_id}'.").send_via(self) + +- self.show(agent_id, agent.latest_attestation.index, **_params) # type: ignore[union-attr, no-untyped-call] ++ self.show(agent_id, latest.index, **_params) # type: ignore[union-attr, no-untyped-call] + + # POST /v3[.:minor]/agents/:agent_id/attestations + @Controller.require_json_api +@@ -231,13 +233,15 @@ class AttestationController(Controller): + f"attestation not passing verification." + ).send_via(self) + ++ latest = agent.latest_attestation # type: ignore[union-attr] ++ + # Per enhancement #103, section "Error Conditions for Attestation Protocol": + # If last attestation failed AND policy hasn't changed, return 503 with exponential backoff + # Skip this for PUSH mode agents to allow immediate recovery from timeout-induced failures + if ( +- agent.latest_attestation # type: ignore[union-attr] +- and agent.latest_attestation.evaluation == "fail" # type: ignore[union-attr] +- and agent.latest_attestation.stage == "verification_complete" # type: ignore[union-attr] ++ latest ++ and latest.evaluation == "fail" ++ and latest.stage == "verification_complete" + and not agent_util.is_push_mode_agent(agent) # type: ignore[arg-type] + ): + # Calculate retry-after using exponential backoff (same formula as rest of codebase) +@@ -257,19 +261,19 @@ class AttestationController(Controller): + f"If the failure was due to policy violation, update the policy or fix the agent before retrying." + ).send_via(self) + +- if agent.latest_attestation and agent.latest_attestation.verification_in_progress: # type: ignore[union-attr] +- self.set_header("Retry-After", str(agent.latest_attestation.seconds_to_decision)) # type: ignore[no-untyped-call, union-attr] ++ if latest and latest.verification_in_progress: ++ self.set_header("Retry-After", str(latest.seconds_to_decision)) # type: ignore[no-untyped-call] + APIError("verification_in_progress", 503).set_detail( + f"Cannot create attestation for agent '{agent_id}' while the last attestation is still being " + f"verified. The active verification task is expected to complete or time out within " +- f"{agent.latest_attestation.seconds_to_decision} seconds." # type: ignore[union-attr] ++ f"{latest.seconds_to_decision} seconds." + ).send_via(self) + +- if agent.latest_attestation and not agent.latest_attestation.ready_for_next_attestation: # type: ignore[union-attr] +- self.set_header("Retry-After", str(agent.latest_attestation.seconds_to_next_attestation)) # type: ignore[no-untyped-call, union-attr] ++ if latest and not latest.ready_for_next_attestation: ++ self.set_header("Retry-After", str(latest.seconds_to_next_attestation)) # type: ignore[no-untyped-call] + APIError("premature_attestation", 429).set_detail( + f"Cannot create attestation for agent '{agent_id}' before the configured interval has elapsed. " +- f"Wait {agent.latest_attestation.seconds_to_next_attestation} seconds before trying again." # type: ignore[union-attr] ++ f"Wait {latest.seconds_to_next_attestation} seconds before trying again." + ).send_via(self) + + attestation_record = Attestation.create(agent) # type: ignore[no-untyped-call] +@@ -314,25 +318,27 @@ class AttestationController(Controller): + if not agent: + APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self) + ++ latest = agent.latest_attestation # type: ignore[union-attr] ++ + # If there are no attestations for the agent, the attestation at 'index' does not exist +- if not agent.latest_attestation: # type: ignore[union-attr] ++ if not latest: + APIError("not_found", f"No attestation {index} exists for agent '{agent_id}'.").send_via(self) + + # Only allow the attestation at 'index' to be updated if it is the latest attestation +- if str(agent.latest_attestation.index) != index: # type: ignore[union-attr] ++ if str(latest.index) != str(index): # type: ignore[union-attr] + APIError("old_attestation", 403).set_detail( + f"Attestation {index} is not the latest for agent '{agent_id}'. Only evidence for the most recent " + f"attestation may be updated." + ).send_via(self) + +- if agent.latest_attestation.stage != "awaiting_evidence": # type: ignore[union-attr] ++ if latest.stage != "awaiting_evidence": # type: ignore[union-attr] + APIError("evidence_immutable", 403).set_detail( + f"Cannot alter evidence for attestation {index} which has already been received and accepted." + ).send_via(self) + +- if not agent.latest_attestation.challenges_valid: # type: ignore[union-attr] ++ if not latest.challenges_valid: # type: ignore[union-attr] + APIError("challenges_expired", 403).set_detail( +- f"Challenges for attestation {index} expired at {agent.latest_attestation.challenges_expire_at}. " # type: ignore[union-attr] ++ f"Challenges for attestation {index} expired at {latest.challenges_expire_at}. " # type: ignore[union-attr] + f"Create a new attestation and try again." + ).send_via(self) + +@@ -341,21 +347,21 @@ class AttestationController(Controller): + "Request body must include attestation evidence data." + ).send_via(self) + +- agent.latest_attestation.receive_evidence(attestation) # type: ignore[no-untyped-call, union-attr] +- driver = EngineDriver(agent.latest_attestation).process_evidence() # type: ignore[no-untyped-call, union-attr] ++ latest.receive_evidence(attestation) # type: ignore[no-untyped-call, union-attr] ++ driver = EngineDriver(latest).process_evidence() # type: ignore[no-untyped-call, union-attr] + + # Send error if the received evidence appears invalid +- if not agent.latest_attestation.changes_valid: # type: ignore[union-attr] +- APIMessageBody.from_record_errors(agent.latest_attestation).send_via(self) # type: ignore[no-untyped-call, union-attr] ++ if not latest.changes_valid: # type: ignore[union-attr] ++ APIMessageBody.from_record_errors(latest).send_via(self) # type: ignore[no-untyped-call, union-attr] + +- agent.latest_attestation.commit_changes() # type: ignore[no-untyped-call, union-attr] ++ latest.commit_changes() # type: ignore[no-untyped-call, union-attr] + + # Send acknowledgement of received evidence, but continue executing + APIMessageBody( +- APIResource("attestation", agent.latest_attestation.render_evidence_acknowledged()).include( # type: ignore[no-untyped-call, union-attr] ++ APIResource("attestation", latest.render_evidence_acknowledged()).include( # type: ignore[no-untyped-call, union-attr] + APILink("self", f"/{self.version}/agents/{agent_id}/attestations/{index}") + ), +- APIMeta("seconds_to_next_attestation", agent.latest_attestation.seconds_to_next_attestation), # type: ignore[union-attr] ++ APIMeta("seconds_to_next_attestation", latest.seconds_to_next_attestation), # type: ignore[union-attr] + ).send_via( + self, code=202, stop_action=False + ) # type: ignore[no-untyped-call] +@@ -372,8 +378,10 @@ class AttestationController(Controller): + if not agent: + APIError("not_found", f"No enrolled agent with ID '{agent_id}'.").send_via(self) + +- if not agent.latest_attestation: # type: ignore[union-attr] ++ latest = agent.latest_attestation # type: ignore[union-attr] ++ ++ if not latest: + APIError("not_found", f"No attestation exists for agent '{agent_id}'.").send_via(self) + + # Call update with the same params, which includes attestation +- self.update(agent_id, agent.latest_attestation.index, **params) # type: ignore[union-attr] ++ self.update(agent_id, latest.index, **params) # type: ignore[union-attr] +diff --git a/test/test_attestation_controller.py b/test/test_attestation_controller.py +index e644e10..37e059c 100644 +--- a/test/test_attestation_controller.py ++++ b/test/test_attestation_controller.py +@@ -43,7 +43,7 @@ class TestAttestationControllerParameterHandling(unittest.TestCase): + self.controller._api_request_body = Mock() # pylint: disable=protected-access + + self.agent_id = "test-agent-123" +- self.attestation_index = "1" # String, as it comes from URL route ++ self.attestation_index = 1 # Integer, as it comes from the ORM Integer column + + # Mock attestation evidence data + self.attestation_data = { +@@ -270,7 +270,7 @@ class TestAttestationControllerErrorMessages(unittest.TestCase): + self.controller._api_request_body = Mock() # pylint: disable=protected-access + + self.agent_id = "test-agent-123" +- self.attestation_index = "1" # String, as it comes from URL route ++ self.attestation_index = 1 # Integer, as it comes from the ORM Integer column + + @patch("keylime.web.verifier.attestation_controller.APIError") + @patch("keylime.web.verifier.attestation_controller.VerifierAgent") +@@ -772,7 +772,7 @@ class TestAttestationControllerGetMethods(unittest.TestCase): + # Setup mock agent with latest attestation + mock_agent = Mock(spec=VerifierAgent) + mock_attestation = Mock() +- mock_attestation.index = "5" ++ mock_attestation.index = 5 + mock_attestation.render_state = Mock(return_value={}) + mock_agent.latest_attestation = mock_attestation + mock_agent_class.get.return_value = mock_agent +@@ -792,7 +792,7 @@ class TestAttestationControllerGetMethods(unittest.TestCase): + self.controller.show_latest(self.agent_id) + + # Verify it called show() with the latest attestation index +- mock_attestation_class.get.assert_called_once_with(agent_id=self.agent_id, index="5") ++ mock_attestation_class.get.assert_called_once_with(agent_id=self.agent_id, index=5) + + + if __name__ == "__main__": +diff --git a/test/test_attestation_model.py b/test/test_attestation_model.py +index 9bc1abb..2651bc2 100644 +--- a/test/test_attestation_model.py ++++ b/test/test_attestation_model.py +@@ -964,5 +964,31 @@ class TestAttestationModel(unittest.TestCase): + self.assertFalse(attestation.ready_for_next_attestation) + + ++class TestVerifierAgentLatestAttestation(unittest.TestCase): ++ """Test that VerifierAgent.latest_attestation is not cached (memory leak fix)""" ++ ++ def test_latest_attestation_not_cached(self): ++ """Verify the property has no functools.cache wrapper""" ++ prop_fget = VerifierAgent.latest_attestation.fget ++ # @cache adds cache_info and __wrapped__ attributes ++ self.assertFalse(hasattr(prop_fget, "cache_info")) ++ self.assertFalse(hasattr(prop_fget, "__wrapped__")) ++ ++ def test_latest_attestation_calls_db_each_time(self): ++ """Verify each access queries the DB (no stale cache)""" ++ with patch("keylime.models.verifier.Attestation.get_latest") as mock_get: ++ mock_get.return_value = None ++ ++ # Call the underlying function directly to avoid needing db_manager setup ++ prop_fget = VerifierAgent.latest_attestation.fget ++ assert prop_fget is not None ++ fake_agent = MagicMock() ++ fake_agent.agent_id = "test-agent" ++ ++ prop_fget(fake_agent) ++ prop_fget(fake_agent) ++ self.assertEqual(mock_get.call_count, 2) ++ ++ + if __name__ == "__main__": + unittest.main() +-- +2.52.0 + diff --git a/0012-keylime-policy-avoid-opening-dev-stdout.patch b/0012-keylime-policy-avoid-opening-dev-stdout.patch deleted file mode 100644 index eea629d..0000000 --- a/0012-keylime-policy-avoid-opening-dev-stdout.patch +++ /dev/null @@ -1,37 +0,0 @@ -From e9a6615ea3ab60b9248377071ea2f5cc7b45dfda Mon Sep 17 00:00:00 2001 -From: Sergio Correia -Date: Thu, 28 Aug 2025 14:33:59 +0100 -Subject: [PATCH] policy/sign: use print() when writing to /dev/stdout - -Signed-off-by: Sergio Correia ---- - keylime/policy/sign_runtime_policy.py | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - -diff --git a/keylime/policy/sign_runtime_policy.py b/keylime/policy/sign_runtime_policy.py -index 87529065d..316ee15aa 100644 ---- a/keylime/policy/sign_runtime_policy.py -+++ b/keylime/policy/sign_runtime_policy.py -@@ -2,6 +2,7 @@ - - import argparse - import json -+import sys - from json.decoder import JSONDecodeError - from typing import TYPE_CHECKING, Any, Optional - -@@ -191,8 +192,12 @@ def sign_runtime_policy(args: argparse.Namespace) -> Optional[str]: - return None - - try: -- with open(args.output_file, "wb") as f: -- f.write(signed_policy.encode("UTF-8")) -+ if args.output_file == "/dev/stdout": -+ # Let's simply print to stdout the regular way. -+ print(signed_policy, file=sys.stdout) -+ else: -+ with open(args.output_file, "wb") as f: -+ f.write(signed_policy.encode("UTF-8")) - except Exception as exc: - logger.error("Unable to write signed policy to destination file '%s': %s", args.output_file, exc) - return None diff --git a/0013-Add-shared-memory-infrastructure-for-multiprocess-co.patch b/0013-Add-shared-memory-infrastructure-for-multiprocess-co.patch deleted file mode 100644 index 34bca9f..0000000 --- a/0013-Add-shared-memory-infrastructure-for-multiprocess-co.patch +++ /dev/null @@ -1,1107 +0,0 @@ -From 1eaad216e290d5935f59e9137a233ac8516a8afb Mon Sep 17 00:00:00 2001 -From: Sergio Correia -Date: Tue, 9 Dec 2025 11:11:43 +0000 -Subject: [PATCH 13/14] Add shared memory infrastructure for multiprocess - communication - -Backport of upstream https://github.com/keylime/keylime/pull/1817/commits/1024e19d - -Signed-off-by: Sergio Correia ---- - keylime-selinux-42.1.2/keylime.te | 2 + - keylime/cloud_verifier_tornado.py | 89 ++--- - keylime/cmd/verifier.py | 6 + - keylime/config.py | 87 +++++ - keylime/shared_data.py | 513 +++++++++++++++++++++++++ - keylime/tpm/tpm_main.py | 17 +- - keylime/web/base/default_controller.py | 6 + - test/test_shared_data.py | 199 ++++++++++ - 8 files changed, 868 insertions(+), 51 deletions(-) - create mode 100644 keylime/shared_data.py - create mode 100644 test/test_shared_data.py - -diff --git a/keylime-selinux-42.1.2/keylime.te b/keylime-selinux-42.1.2/keylime.te -index 2c6a59e..8b8a615 100644 ---- a/keylime-selinux-42.1.2/keylime.te -+++ b/keylime-selinux-42.1.2/keylime.te -@@ -77,6 +77,8 @@ optional_policy(` - allow keylime_server_t self:key { create read setattr view write }; - allow keylime_server_t self:netlink_route_socket { create_stream_socket_perms nlmsg_read }; - allow keylime_server_t self:udp_socket create_stream_socket_perms; -+allow keylime_server_t keylime_tmp_t:sock_file { create write }; -+allow keylime_server_t self:unix_stream_socket connectto; - - fs_dontaudit_search_cgroup_dirs(keylime_server_t) - -diff --git a/keylime/cloud_verifier_tornado.py b/keylime/cloud_verifier_tornado.py -index 89aa703..67ba8af 100644 ---- a/keylime/cloud_verifier_tornado.py -+++ b/keylime/cloud_verifier_tornado.py -@@ -6,8 +6,8 @@ import signal - import sys - import traceback - from concurrent.futures import ThreadPoolExecutor --from multiprocessing import Process - from contextlib import contextmanager -+from multiprocessing import Process - from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, cast - - import tornado.httpserver -@@ -27,6 +27,7 @@ from keylime import ( - json, - keylime_logging, - revocation_notifier, -+ shared_data, - signing, - tornado_requests, - web_util, -@@ -43,7 +44,6 @@ from keylime.mba import mba - - logger = keylime_logging.init_logging("verifier") - --GLOBAL_POLICY_CACHE: Dict[str, Dict[str, str]] = {} - - set_severity_config(config.getlist("verifier", "severity_labels"), config.getlist("verifier", "severity_policy")) - -@@ -140,44 +140,41 @@ def _from_db_obj(agent_db_obj: VerfierMain) -> Dict[str, Any]: - return agent_dict - - --def verifier_read_policy_from_cache(ima_policy_data: Dict[str, str]) -> str: -- checksum = ima_policy_data.get("checksum", "") -- name = ima_policy_data.get("name", "empty") -- agent_id = ima_policy_data.get("agent_id", "") -+def verifier_read_policy_from_cache(stored_agent: VerfierMain) -> str: -+ checksum = "" -+ name = "empty" -+ agent_id = str(stored_agent.agent_id) - -- if not agent_id: -- return "" -+ # Initialize agent policy cache if it doesn't exist -+ shared_data.initialize_agent_policy_cache(agent_id) - -- if agent_id not in GLOBAL_POLICY_CACHE: -- GLOBAL_POLICY_CACHE[agent_id] = {} -- GLOBAL_POLICY_CACHE[agent_id][""] = "" -+ if stored_agent.ima_policy: -+ checksum = str(stored_agent.ima_policy.checksum) -+ name = stored_agent.ima_policy.name - -- if checksum not in GLOBAL_POLICY_CACHE[agent_id]: -- if len(GLOBAL_POLICY_CACHE[agent_id]) > 1: -- # Perform a cleanup of the contents, IMA policy checksum changed -- logger.debug( -- "Cleaning up policy cache for policy named %s, with checksum %s, used by agent %s", -- name, -- checksum, -- agent_id, -- ) -+ # Check if policy is already cached -+ cached_policy = shared_data.get_cached_policy(agent_id, checksum) -+ if cached_policy is not None: -+ return cached_policy - -- GLOBAL_POLICY_CACHE[agent_id] = {} -- GLOBAL_POLICY_CACHE[agent_id][""] = "" -+ # Policy not cached, need to clean up and load from database -+ shared_data.cleanup_agent_policy_cache(agent_id, checksum) - -- logger.debug( -- "IMA policy named %s, with checksum %s, used by agent %s is not present on policy cache on this verifier, performing SQLAlchemy load", -- name, -- checksum, -- agent_id, -- ) -+ logger.debug( -+ "IMA policy named %s, with checksum %s, used by agent %s is not present on policy cache on this verifier, performing SQLAlchemy load", -+ name, -+ checksum, -+ agent_id, -+ ) - -- # Get the large ima_policy content - it's already loaded in ima_policy_data -- ima_policy = ima_policy_data.get("ima_policy", "") -- assert isinstance(ima_policy, str) -- GLOBAL_POLICY_CACHE[agent_id][checksum] = ima_policy -+ # Actually contacts the database and load the (large) ima_policy column for "allowlists" table -+ ima_policy = stored_agent.ima_policy.ima_policy -+ assert isinstance(ima_policy, str) - -- return GLOBAL_POLICY_CACHE[agent_id][checksum] -+ # Cache the policy for future use -+ shared_data.cache_policy(agent_id, checksum, ima_policy) -+ -+ return ima_policy - - - def verifier_db_delete_agent(session: Session, agent_id: str) -> None: -@@ -475,12 +472,11 @@ class AgentsHandler(BaseHandler): - return - - # Cleanup the cache when the agent is deleted. Do it early. -- if agent_id in GLOBAL_POLICY_CACHE: -- del GLOBAL_POLICY_CACHE[agent_id] -- logger.debug( -- "Cleaned up policy cache from all entries used by agent %s", -- agent_id, -- ) -+ shared_data.clear_agent_policy_cache(agent_id) -+ logger.debug( -+ "Cleaned up policy cache from all entries used by agent %s", -+ agent_id, -+ ) - - op_state = agent.operational_state - if op_state in (states.SAVED, states.FAILED, states.TERMINATED, states.TENANT_FAILED, states.INVALID_QUOTE): -@@ -1763,7 +1759,6 @@ async def process_agent( - stored_agent = None - - # First database operation - read agent data and extract all needed data within session context -- ima_policy_data = {} - mb_policy_data = None - with session_context() as session: - try: -@@ -1779,15 +1774,6 @@ async def process_agent( - .first() - ) - -- # Extract IMA policy data within session context to avoid DetachedInstanceError -- if stored_agent and stored_agent.ima_policy: -- ima_policy_data = { -- "checksum": str(stored_agent.ima_policy.checksum), -- "name": stored_agent.ima_policy.name, -- "agent_id": str(stored_agent.agent_id), -- "ima_policy": stored_agent.ima_policy.ima_policy, # Extract the large content too -- } -- - # Extract MB policy data within session context - if stored_agent and stored_agent.mb_policy: - mb_policy_data = stored_agent.mb_policy.mb_policy -@@ -1869,7 +1855,10 @@ async def process_agent( - logger.error("SQLAlchemy Error for agent ID %s: %s", agent["agent_id"], e) - - # Load agent's IMA policy -- runtime_policy = verifier_read_policy_from_cache(ima_policy_data) -+ if stored_agent: -+ runtime_policy = verifier_read_policy_from_cache(stored_agent) -+ else: -+ runtime_policy = "" - - # Get agent's measured boot policy - mb_policy = mb_policy_data -diff --git a/keylime/cmd/verifier.py b/keylime/cmd/verifier.py -index f3e1a86..1f9f4e5 100644 ---- a/keylime/cmd/verifier.py -+++ b/keylime/cmd/verifier.py -@@ -1,6 +1,7 @@ - from keylime import cloud_verifier_tornado, config, keylime_logging - from keylime.common.migrations import apply - from keylime.mba import mba -+from keylime.shared_data import initialize_shared_memory - - logger = keylime_logging.init_logging("verifier") - -@@ -10,6 +11,11 @@ def main() -> None: - if config.has_option("verifier", "auto_migrate_db") and config.getboolean("verifier", "auto_migrate_db"): - apply("cloud_verifier") - -+ # Initialize shared memory BEFORE creating server instance -+ # This MUST happen before verifier instantiation and worker forking -+ logger.info("Initializing shared memory manager in main process before server creation") -+ initialize_shared_memory() -+ - # Explicitly load and initialize measured boot components - mba.load_imports() - cloud_verifier_tornado.main() -diff --git a/keylime/config.py b/keylime/config.py -index e7ac634..b5cd546 100644 ---- a/keylime/config.py -+++ b/keylime/config.py -@@ -114,6 +114,85 @@ if "KEYLIME_LOGGING_CONFIG" in os.environ: - _config: Optional[Dict[str, RawConfigParser]] = None - - -+def _check_file_permissions(component: str, file_path: str) -> bool: -+ """Check if a config file has correct permissions and is readable. -+ -+ Args: -+ component: The component name (e.g., 'verifier', 'agent') -+ file_path: Path to the config file -+ -+ Returns: -+ True if file is readable, False otherwise -+ """ -+ if not os.path.exists(file_path): -+ return False -+ -+ if not os.access(file_path, os.R_OK): -+ import grp # pylint: disable=import-outside-toplevel -+ import pwd # pylint: disable=import-outside-toplevel -+ import stat # pylint: disable=import-outside-toplevel -+ -+ try: -+ file_stat = os.stat(file_path) -+ owner = pwd.getpwuid(file_stat.st_uid).pw_name -+ group = grp.getgrgid(file_stat.st_gid).gr_name -+ mode = stat.filemode(file_stat.st_mode) -+ except Exception: -+ owner = group = mode = "unknown" -+ -+ base_logger.error( # pylint: disable=logging-not-lazy -+ "=" * 80 -+ + "\n" -+ + "CRITICAL CONFIG ERROR: Config file %s exists but is not readable!\n" -+ + "File permissions: %s (owner: %s, group: %s)\n" -+ + "The keylime_%s service needs read access to this file.\n" -+ + "Fix with: chown keylime:keylime %s && chmod 440 %s\n" -+ + "=" * 80, -+ file_path, -+ mode, -+ owner, -+ group, -+ component, -+ file_path, -+ file_path, -+ ) -+ return False -+ -+ return True -+ -+ -+def _validate_config_files(component: str, file_paths: List[str], files_read: List[str]) -> None: -+ """Validate that config files were successfully parsed. -+ -+ Args: -+ component: The component name (e.g., 'verifier', 'agent') -+ file_paths: List of file paths that were attempted to be read -+ files_read: List of files that ConfigParser successfully read -+ """ -+ for file_path in file_paths: -+ # Check file permissions first -+ if not _check_file_permissions(component, file_path): -+ continue -+ -+ if file_path not in files_read: -+ base_logger.error( # pylint: disable=logging-not-lazy -+ "=" * 80 -+ + "\n" -+ + "CRITICAL CONFIG ERROR: Config file %s exists but failed to parse!\n" -+ + "This usually indicates duplicate keys within the same file.\n" -+ + "Common issues:\n" -+ + " - Same option appears multiple times in the same [%s] section\n" -+ + " - Empty values (key = ) conflicting with defined values\n" -+ + " - Invalid INI file syntax\n" -+ + "Please check the file for duplicate entries.\n" -+ + "You can validate the file with: python3 -c \"import configparser; c = configparser.RawConfigParser(); print(c.read('%s'))\"\n" -+ + "=" * 80, -+ file_path, -+ component, -+ file_path, -+ ) -+ -+ - def get_config(component: str) -> RawConfigParser: - """Find the configuration file to use for the given component and apply the - overrides defined by configuration snippets. -@@ -216,6 +295,10 @@ def get_config(component: str) -> RawConfigParser: - - # Validate that at least one config file is present - config_file = _config[component].read(c) -+ -+ # Validate the config file was parsed successfully -+ _validate_config_files(component, [c], config_file) -+ - if config_file: - base_logger.info("Reading configuration from %s", config_file) - -@@ -230,6 +313,10 @@ def get_config(component: str) -> RawConfigParser: - [os.path.join(d, f) for f in os.listdir(d) if f and os.path.isfile(os.path.join(d, f))] - ) - applied_snippets = _config[component].read(snippets) -+ -+ # Validate all snippet files were parsed successfully -+ _validate_config_files(component, snippets, applied_snippets) -+ - if applied_snippets: - base_logger.info("Applied configuration snippets from %s", d) - -diff --git a/keylime/shared_data.py b/keylime/shared_data.py -new file mode 100644 -index 0000000..23a3d81 ---- /dev/null -+++ b/keylime/shared_data.py -@@ -0,0 +1,513 @@ -+"""Shared memory management for keylime multiprocess applications. -+ -+This module provides thread-safe shared data management between processes -+using multiprocessing.Manager(). -+""" -+ -+import atexit -+import multiprocessing as mp -+import threading -+import time -+from typing import Any, Dict, List, Optional -+ -+from keylime import keylime_logging -+ -+logger = keylime_logging.init_logging("shared_data") -+ -+ -+class FlatDictView: -+ """A dictionary-like view over a flat key-value store. -+ -+ This class provides dict-like access to a subset of keys in a flat store, -+ identified by a namespace prefix. This avoids the nested DictProxy issues. -+ -+ Example: -+ store = manager.dict() # Flat store -+ view = FlatDictView(store, lock, "sessions") -+ view["123"] = "data" # Stores as "dict:sessions:123" in flat store -+ val = view["123"] # Retrieves from "dict:sessions:123" -+ """ -+ -+ def __init__(self, store: Any, lock: Any, namespace: str) -> None: -+ self._store = store -+ self._lock = lock -+ self._namespace = namespace -+ -+ def _make_key(self, key: Any) -> str: -+ """Convert user key to internal flat key with namespace prefix.""" -+ return f"dict:{self._namespace}:{key}" -+ -+ def __getitem__(self, key: Any) -> Any: -+ with self._lock: -+ return self._store[self._make_key(key)] -+ -+ def __setitem__(self, key: Any, value: Any) -> None: -+ flat_key = self._make_key(key) -+ with self._lock: -+ self._store[flat_key] = value -+ -+ def __delitem__(self, key: Any) -> None: -+ flat_key = self._make_key(key) -+ with self._lock: -+ del self._store[flat_key] -+ -+ def __contains__(self, key: Any) -> bool: -+ return self._make_key(key) in self._store -+ -+ def get(self, key: Any, default: Any = None) -> Any: -+ with self._lock: -+ return self._store.get(self._make_key(key), default) -+ -+ def keys(self) -> List[Any]: -+ """Return keys in this namespace.""" -+ prefix = f"dict:{self._namespace}:" -+ all_store_keys = list(self._store.keys()) -+ matching_keys = [k[len(prefix) :] for k in all_store_keys if k.startswith(prefix)] -+ return matching_keys -+ -+ def values(self) -> List[Any]: -+ """Return values in this namespace.""" -+ prefix = f"dict:{self._namespace}:" -+ with self._lock: -+ return [v for k, v in self._store.items() if k.startswith(prefix)] -+ -+ def items(self) -> List[tuple[Any, Any]]: -+ """Return (key, value) pairs in this namespace.""" -+ prefix = f"dict:{self._namespace}:" -+ with self._lock: -+ result = [(k[len(prefix) :], v) for k, v in self._store.items() if k.startswith(prefix)] -+ return result -+ -+ def __len__(self) -> int: -+ """Return number of items in this namespace.""" -+ return len(self.keys()) -+ -+ def __repr__(self) -> str: -+ return f"FlatDictView({self._namespace}, {len(self)} items)" -+ -+ -+class SharedDataManager: -+ """Thread-safe shared data manager for multiprocess applications. -+ -+ This class uses multiprocessing.Manager() to create proxy objects that can -+ be safely accessed from multiple processes. All data stored must be pickleable. -+ -+ Example: -+ manager = SharedDataManager() -+ -+ # Store simple data -+ manager.set_data("config_value", "some_config") -+ value = manager.get_data("config_value") -+ -+ # Work with shared dictionaries -+ agent_cache = manager.get_or_create_dict("agent_cache") -+ agent_cache["agent_123"] = {"last_seen": time.time()} -+ -+ # Work with shared lists -+ event_log = manager.get_or_create_list("events") -+ event_log.append({"type": "attestation", "agent": "agent_123"}) -+ """ -+ -+ def __init__(self) -> None: -+ """Initialize the shared data manager. -+ -+ This must be called before any process forking occurs to ensure -+ all child processes inherit access to the shared data. -+ """ -+ logger.debug("Initializing SharedDataManager") -+ -+ # Use explicit context to ensure fork compatibility -+ # The Manager must be started BEFORE any fork() calls -+ ctx = mp.get_context("fork") -+ self._manager = ctx.Manager() -+ -+ # CRITICAL FIX: Use a SINGLE flat dict instead of nested dicts -+ # Nested DictProxy objects have synchronization issues -+ # We'll use key prefixes like "dict:auth_sessions:session_id" instead -+ self._store = self._manager.dict() # Single flat store for all data -+ self._lock = self._manager.Lock() -+ self._initialized_at = time.time() -+ -+ # Register handler to reinitialize manager connection after fork -+ # This is needed because Manager uses network connections that don't survive fork -+ try: -+ import os # pylint: disable=import-outside-toplevel -+ -+ self._parent_pid = os.getpid() -+ logger.debug("SharedDataManager initialized in process %d", self._parent_pid) -+ except Exception as e: -+ logger.warning("Could not register PID tracking: %s", e) -+ -+ # Ensure cleanup on exit -+ atexit.register(self.cleanup) -+ -+ logger.info("SharedDataManager initialized successfully") -+ -+ def set_data(self, key: str, value: Any) -> None: -+ """Store arbitrary pickleable data by key. -+ -+ Args: -+ key: Unique identifier for the data -+ value: Any pickleable Python object -+ -+ Raises: -+ TypeError: If value is not pickleable -+ """ -+ with self._lock: -+ try: -+ self._store[key] = value -+ logger.debug("Stored data for key: %s", key) -+ except Exception as e: -+ logger.error("Failed to store data for key '%s': %s", key, e) -+ raise -+ -+ def get_data(self, key: str, default: Any = None) -> Any: -+ """Retrieve data by key. -+ -+ Args: -+ key: The key to retrieve -+ default: Value to return if key doesn't exist -+ -+ Returns: -+ The stored value or default if key doesn't exist -+ """ -+ with self._lock: -+ value = self._store.get(key, default) -+ logger.debug("Retrieved data for key: %s (found: %s)", key, value is not default) -+ return value -+ -+ def get_or_create_dict(self, key: str) -> Dict[str, Any]: -+ """Get or create a shared dictionary. -+ -+ Args: -+ key: Unique identifier for the dictionary -+ -+ Returns: -+ A shared dictionary-like object that syncs across processes -+ -+ Note: -+ Returns a FlatDictView that uses key prefixes in the flat store -+ instead of actual nested dicts, to avoid DictProxy nesting issues. -+ """ -+ # Mark that this namespace exists -+ namespace_key = f"__namespace__{key}" -+ if namespace_key not in self._store: -+ with self._lock: -+ self._store[namespace_key] = True -+ -+ # Return a view that operates on the flat store with key prefix -+ return FlatDictView(self._store, self._lock, key) # type: ignore[return-value,no-untyped-call] -+ -+ def get_or_create_list(self, key: str) -> List[Any]: -+ """Get or create a shared list. -+ -+ Args: -+ key: Unique identifier for the list -+ -+ Returns: -+ A shared list (proxy object) that syncs across processes -+ """ -+ with self._lock: -+ if key not in self._store: -+ self._store[key] = self._manager.list() -+ logger.debug("Created new shared list for key: %s", key) -+ else: -+ logger.debug("Retrieved existing shared list for key: %s", key) -+ return self._store[key] # type: ignore[no-any-return] -+ -+ def delete_data(self, key: str) -> bool: -+ """Delete data by key. -+ -+ Args: -+ key: The key to delete -+ -+ Returns: -+ True if the key existed and was deleted, False otherwise -+ """ -+ with self._lock: -+ if key in self._store: -+ del self._store[key] -+ logger.debug("Deleted data for key: %s", key) -+ return True -+ logger.debug("Key not found for deletion: %s", key) -+ return False -+ -+ def has_key(self, key: str) -> bool: -+ """Check if a key exists. -+ -+ Args: -+ key: The key to check -+ -+ Returns: -+ True if key exists, False otherwise -+ """ -+ with self._lock: -+ return key in self._store -+ -+ def get_keys(self) -> List[str]: -+ """Get all stored keys. -+ -+ Returns: -+ List of all keys in the store -+ """ -+ with self._lock: -+ return list(self._store.keys()) -+ -+ def clear_all(self) -> None: -+ """Clear all stored data. Use with caution!""" -+ with self._lock: -+ key_count = len(self._store) -+ self._store.clear() -+ logger.warning("Cleared all shared data (%d keys)", key_count) -+ -+ def get_stats(self) -> Dict[str, Any]: -+ """Get statistics about stored data. -+ -+ Returns: -+ Dictionary containing storage statistics -+ """ -+ with self._lock: -+ return { -+ "total_keys": len(self._store), -+ "initialized_at": self._initialized_at, -+ "uptime_seconds": time.time() - self._initialized_at, -+ } -+ -+ def cleanup(self) -> None: -+ """Cleanup shared resources. -+ -+ This is automatically called on exit but can be called manually -+ for explicit cleanup. -+ """ -+ if hasattr(self, "_manager"): -+ logger.debug("Shutting down SharedDataManager") -+ try: -+ self._manager.shutdown() -+ logger.info("SharedDataManager shutdown complete") -+ except Exception as e: -+ logger.error("Error during SharedDataManager shutdown: %s", e) -+ -+ def __repr__(self) -> str: -+ stats = self.get_stats() -+ return f"SharedDataManager(keys={stats['total_keys']}, " f"uptime={stats['uptime_seconds']:.1f}s)" -+ -+ @property -+ def manager(self) -> Any: # type: ignore[misc] -+ """Access to the underlying multiprocessing Manager for advanced usage.""" -+ return self._manager -+ -+ -+# Global shared memory manager instance -+_global_shared_manager: Optional[SharedDataManager] = None -+_manager_lock = threading.Lock() -+ -+ -+def initialize_shared_memory() -> SharedDataManager: -+ """Initialize the global shared memory manager. -+ -+ This function MUST be called before any process forking occurs to ensure -+ all child processes share the same manager instance. -+ -+ For tornado/multiprocess servers, call this before starting workers. -+ -+ Returns: -+ SharedDataManager: The global shared memory manager instance -+ -+ Raises: -+ RuntimeError: If called after manager is already initialized -+ """ -+ global _global_shared_manager -+ -+ with _manager_lock: -+ if _global_shared_manager is not None: -+ logger.warning("Shared memory manager already initialized, returning existing instance") -+ return _global_shared_manager -+ -+ logger.info("Initializing global shared memory manager") -+ _global_shared_manager = SharedDataManager() -+ logger.info("Global shared memory manager initialized") -+ -+ return _global_shared_manager -+ -+ -+def get_shared_memory() -> SharedDataManager: -+ """Get the global shared memory manager instance. -+ -+ This function returns a singleton SharedDataManager that can be used -+ throughout keylime for caching and inter-process communication. -+ -+ The manager is automatically initialized on first access and cleaned up -+ on process exit. -+ -+ IMPORTANT: In multiprocess applications (like tornado with workers), -+ you MUST call initialize_shared_memory() BEFORE forking workers. -+ Otherwise each worker will get its own separate manager. -+ -+ Returns: -+ SharedDataManager: The global shared memory manager instance -+ """ -+ global _global_shared_manager -+ -+ if _global_shared_manager is None: -+ with _manager_lock: -+ if _global_shared_manager is None: -+ logger.info("Initializing global shared memory manager") -+ _global_shared_manager = SharedDataManager() # type: ignore[no-untyped-call] -+ logger.info("Global shared memory manager initialized") -+ -+ return _global_shared_manager -+ -+ -+def cleanup_global_shared_memory() -> None: -+ """Cleanup the global shared memory manager. -+ -+ This is automatically called on exit but can be called manually. -+ """ -+ global _global_shared_manager -+ -+ if _global_shared_manager is not None: -+ logger.info("Cleaning up global shared memory manager") -+ _global_shared_manager.cleanup() -+ _global_shared_manager = None -+ -+ -+# Convenience functions for common keylime patterns -+ -+ -+def cache_policy(agent_id: str, checksum: str, policy: str) -> None: -+ """Cache a policy in shared memory. -+ -+ Args: -+ agent_id: The agent identifier -+ checksum: The policy checksum -+ policy: The policy content to cache -+ """ -+ manager = get_shared_memory() -+ policy_cache = manager.get_or_create_dict("policy_cache") -+ -+ if agent_id not in policy_cache: -+ policy_cache[agent_id] = manager.manager.dict() # type: ignore[attr-defined] -+ -+ policy_cache[agent_id][checksum] = policy -+ logger.debug("Cached policy for agent %s with checksum %s", agent_id, checksum) -+ -+ -+def get_cached_policy(agent_id: str, checksum: str) -> Optional[str]: -+ """Retrieve cached policy. -+ -+ Args: -+ agent_id: The agent identifier -+ checksum: The policy checksum -+ -+ Returns: -+ The cached policy content or None if not found -+ """ -+ manager = get_shared_memory() -+ policy_cache = manager.get_or_create_dict("policy_cache") -+ agent_policies = policy_cache.get(agent_id, {}) -+ -+ result = agent_policies.get(checksum) -+ if result: -+ logger.debug("Found cached policy for agent %s with checksum %s", agent_id, checksum) -+ else: -+ logger.debug("No cached policy found for agent %s with checksum %s", agent_id, checksum) -+ -+ return result # type: ignore[no-any-return] -+ -+ -+def clear_agent_policy_cache(agent_id: str) -> None: -+ """Clear all cached policies for an agent. -+ -+ Args: -+ agent_id: The agent identifier -+ """ -+ manager = get_shared_memory() -+ policy_cache = manager.get_or_create_dict("policy_cache") -+ -+ if agent_id in policy_cache: -+ del policy_cache[agent_id] -+ logger.debug("Cleared policy cache for agent %s", agent_id) -+ -+ -+def cleanup_agent_policy_cache(agent_id: str, keep_checksum: str = "") -> None: -+ """Clean up agent policy cache, keeping only the specified checksum. -+ -+ This mimics the cleanup behavior from GLOBAL_POLICY_CACHE where when -+ a new policy checksum is encountered, old cached policies are removed. -+ -+ Args: -+ agent_id: The agent identifier -+ keep_checksum: The checksum to keep in the cache (empty string by default) -+ """ -+ manager = get_shared_memory() -+ policy_cache = manager.get_or_create_dict("policy_cache") -+ -+ if agent_id in policy_cache and len(policy_cache[agent_id]) > 1: -+ # Keep only the empty entry and the specified checksum -+ old_policies = dict(policy_cache[agent_id]) -+ policy_cache[agent_id] = manager.manager.dict() -+ -+ # Always keep the empty entry -+ policy_cache[agent_id][""] = old_policies.get("", "") -+ -+ # Keep the specified checksum if it exists and is not empty -+ if keep_checksum and keep_checksum in old_policies: -+ policy_cache[agent_id][keep_checksum] = old_policies[keep_checksum] -+ -+ logger.debug("Cleaned up policy cache for agent %s, keeping checksum %s", agent_id, keep_checksum) -+ -+ -+def initialize_agent_policy_cache(agent_id: str) -> Dict[str, Any]: -+ """Initialize policy cache for an agent if it doesn't exist. -+ -+ Args: -+ agent_id: The agent identifier -+ -+ Returns: -+ The agent's policy cache dictionary -+ """ -+ manager = get_shared_memory() -+ policy_cache = manager.get_or_create_dict("policy_cache") -+ -+ if agent_id not in policy_cache: -+ policy_cache[agent_id] = manager.manager.dict() # type: ignore[attr-defined] -+ policy_cache[agent_id][""] = "" -+ logger.debug("Initialized policy cache for agent %s", agent_id) -+ -+ return policy_cache[agent_id] # type: ignore[no-any-return] -+ -+ -+def get_agent_cache(agent_id: str) -> Dict[str, Any]: -+ """Get shared cache for a specific agent. -+ -+ Args: -+ agent_id: The agent identifier -+ -+ Returns: -+ A shared dictionary for caching agent-specific data -+ """ -+ manager = get_shared_memory() -+ return manager.get_or_create_dict(f"agent_cache:{agent_id}") -+ -+ -+def get_verification_queue(agent_id: str) -> List[Any]: -+ """Get verification queue for batching database operations. -+ -+ Args: -+ agent_id: The agent identifier -+ -+ Returns: -+ A shared list for queuing verification operations -+ """ -+ manager = get_shared_memory() -+ return manager.get_or_create_list(f"verification_queue:{agent_id}") -+ -+ -+def get_shared_stats() -> Dict[str, Any]: -+ """Get statistics about shared memory usage. -+ -+ Returns: -+ Dictionary containing storage statistics -+ """ -+ manager = get_shared_memory() -+ return manager.get_stats() -diff --git a/keylime/tpm/tpm_main.py b/keylime/tpm/tpm_main.py -index 6f2e89f..9b54fc3 100644 ---- a/keylime/tpm/tpm_main.py -+++ b/keylime/tpm/tpm_main.py -@@ -10,7 +10,7 @@ from cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicKey - - from keylime import cert_utils, config, json, keylime_logging - from keylime.agentstates import AgentAttestState, TPMClockInfo --from keylime.common.algorithms import Hash -+from keylime.common.algorithms import Hash, Sign - from keylime.failure import Component, Failure - from keylime.ima import ima - from keylime.ima.file_signatures import ImaKeyrings -@@ -50,6 +50,21 @@ class Tpm: - - return (keyblob, key) - -+ # Mapping from keylime.common.algorithms enums to TPM algorithm constants -+ # Used for validating that TPM attestations use expected cryptographic algorithms -+ HASH_ALG_TO_TPM = { -+ Hash.SHA1: tpm2_objects.TPM_ALG_SHA1, -+ Hash.SHA256: tpm2_objects.TPM_ALG_SHA256, -+ Hash.SHA384: tpm2_objects.TPM_ALG_SHA384, -+ Hash.SHA512: tpm2_objects.TPM_ALG_SHA512, -+ } -+ -+ SIGN_ALG_TO_TPM = { -+ Sign.RSASSA: tpm2_objects.TPM_ALG_RSASSA, -+ Sign.RSAPSS: tpm2_objects.TPM_ALG_RSAPSS, -+ Sign.ECDSA: tpm2_objects.TPM_ALG_ECDSA, -+ } -+ - @staticmethod - def verify_aik_with_iak(uuid: str, aik_tpm: bytes, iak_tpm: bytes, iak_attest: bytes, iak_sign: bytes) -> bool: - attest_body = iak_attest.split(b"\x00$")[1] -diff --git a/keylime/web/base/default_controller.py b/keylime/web/base/default_controller.py -index 971ed06..ba0782e 100644 ---- a/keylime/web/base/default_controller.py -+++ b/keylime/web/base/default_controller.py -@@ -19,6 +19,12 @@ class DefaultController(Controller): - self.send_response(400, "Bad Request") - - def malformed_params(self, **_params: Any) -> None: -+ import traceback # pylint: disable=import-outside-toplevel -+ -+ from keylime import keylime_logging # pylint: disable=import-outside-toplevel -+ -+ logger = keylime_logging.init_logging("web") -+ logger.error("Malformed params error. Traceback: %s", traceback.format_exc()) - self.send_response(400, "Malformed Request Parameter") - - def action_dispatch_error(self, **_param: Any) -> None: -diff --git a/test/test_shared_data.py b/test/test_shared_data.py -new file mode 100644 -index 0000000..8de7e64 ---- /dev/null -+++ b/test/test_shared_data.py -@@ -0,0 +1,199 @@ -+"""Unit tests for shared memory infrastructure.""" -+ -+import unittest -+ -+from keylime.shared_data import ( -+ SharedDataManager, -+ cache_policy, -+ cleanup_agent_policy_cache, -+ cleanup_global_shared_memory, -+ clear_agent_policy_cache, -+ get_cached_policy, -+ get_shared_memory, -+ initialize_agent_policy_cache, -+) -+ -+ -+class TestSharedDataManager(unittest.TestCase): -+ """Test cases for SharedDataManager class.""" -+ -+ def setUp(self): -+ """Set up test fixtures.""" -+ self.manager = SharedDataManager() -+ -+ def tearDown(self): -+ """Clean up after tests.""" -+ if self.manager: -+ self.manager.cleanup() -+ -+ def test_set_and_get_data(self): -+ """Test basic set and get operations.""" -+ self.manager.set_data("test_key", "test_value") -+ result = self.manager.get_data("test_key") -+ self.assertEqual(result, "test_value") -+ -+ def test_get_nonexistent_data(self): -+ """Test getting data that doesn't exist returns None.""" -+ result = self.manager.get_data("nonexistent_key") -+ self.assertIsNone(result) -+ -+ def test_get_data_with_default(self): -+ """Test getting data with default value.""" -+ result = self.manager.get_data("nonexistent_key", default="default_value") -+ self.assertEqual(result, "default_value") -+ -+ def test_delete_data(self): -+ """Test deleting data.""" -+ self.manager.set_data("test_key", "test_value") -+ result = self.manager.delete_data("test_key") -+ self.assertTrue(result) -+ -+ # Verify it's actually deleted -+ self.assertIsNone(self.manager.get_data("test_key")) -+ -+ def test_delete_nonexistent_data(self): -+ """Test deleting data that doesn't exist returns False.""" -+ result = self.manager.delete_data("nonexistent_key") -+ self.assertFalse(result) -+ -+ def test_has_key(self): -+ """Test checking if key exists.""" -+ self.manager.set_data("test_key", "test_value") -+ self.assertTrue(self.manager.has_key("test_key")) -+ self.assertFalse(self.manager.has_key("nonexistent_key")) -+ -+ def test_get_or_create_dict(self): -+ """Test getting or creating a shared dictionary.""" -+ shared_dict = self.manager.get_or_create_dict("test_dict") -+ shared_dict["key1"] = "value1" -+ shared_dict["key2"] = "value2" -+ -+ # Retrieve the same dict -+ retrieved_dict = self.manager.get_or_create_dict("test_dict") -+ self.assertEqual(retrieved_dict["key1"], "value1") -+ self.assertEqual(retrieved_dict["key2"], "value2") -+ -+ def test_get_or_create_list(self): -+ """Test getting or creating a shared list.""" -+ shared_list = self.manager.get_or_create_list("test_list") -+ shared_list.append("item1") -+ shared_list.append("item2") -+ -+ # Retrieve the same list -+ retrieved_list = self.manager.get_or_create_list("test_list") -+ self.assertEqual(len(retrieved_list), 2) -+ self.assertEqual(retrieved_list[0], "item1") -+ self.assertEqual(retrieved_list[1], "item2") -+ -+ def test_get_stats(self): -+ """Test getting manager statistics.""" -+ self.manager.set_data("key1", "value1") -+ self.manager.set_data("key2", "value2") -+ -+ stats = self.manager.get_stats() -+ self.assertIn("total_keys", stats) -+ self.assertIn("uptime_seconds", stats) -+ self.assertEqual(stats["total_keys"], 2) -+ self.assertGreaterEqual(stats["uptime_seconds"], 0) -+ -+ -+class TestPolicyCacheFunctions(unittest.TestCase): -+ """Test cases for policy cache functions.""" -+ -+ def setUp(self): -+ """Set up test fixtures.""" -+ # Get the global shared memory manager -+ self.manager = get_shared_memory() -+ -+ def tearDown(self): -+ """Clean up after tests.""" -+ # Clean up global shared memory -+ cleanup_global_shared_memory() -+ -+ def test_initialize_agent_policy_cache(self): -+ """Test initializing agent policy cache.""" -+ agent_id = "test_agent_123" -+ initialize_agent_policy_cache(agent_id) -+ -+ # Verify the cache was initialized -+ policy_cache = self.manager.get_or_create_dict("policy_cache") -+ self.assertIn(agent_id, policy_cache) -+ -+ def test_cache_and_get_policy(self): -+ """Test caching and retrieving a policy.""" -+ agent_id = "test_agent_123" -+ checksum = "abc123def456" -+ policy_content = '{"policy": "test_policy_content"}' -+ -+ # Initialize and cache policy -+ initialize_agent_policy_cache(agent_id) -+ cache_policy(agent_id, checksum, policy_content) -+ -+ # Retrieve cached policy -+ cached = get_cached_policy(agent_id, checksum) -+ self.assertEqual(cached, policy_content) -+ -+ def test_get_nonexistent_cached_policy(self): -+ """Test getting a policy that hasn't been cached.""" -+ agent_id = "test_agent_123" -+ checksum = "nonexistent_checksum" -+ -+ initialize_agent_policy_cache(agent_id) -+ cached = get_cached_policy(agent_id, checksum) -+ self.assertIsNone(cached) -+ -+ def test_clear_agent_policy_cache(self): -+ """Test clearing an agent's policy cache.""" -+ agent_id = "test_agent_123" -+ checksum = "abc123def456" -+ policy_content = '{"policy": "test_policy_content"}' -+ -+ # Initialize, cache, and then clear -+ initialize_agent_policy_cache(agent_id) -+ cache_policy(agent_id, checksum, policy_content) -+ clear_agent_policy_cache(agent_id) -+ -+ # Verify it's cleared -+ cached = get_cached_policy(agent_id, checksum) -+ self.assertIsNone(cached) -+ -+ def test_cleanup_agent_policy_cache(self): -+ """Test cleaning up old policy checksums.""" -+ agent_id = "test_agent_123" -+ old_checksum = "old_checksum" -+ new_checksum = "new_checksum" -+ policy_content = '{"policy": "test"}' -+ -+ # Initialize and cache multiple policies -+ initialize_agent_policy_cache(agent_id) -+ cache_policy(agent_id, old_checksum, policy_content) -+ cache_policy(agent_id, new_checksum, policy_content) -+ -+ # Cleanup old checksums (keeping only new_checksum) -+ cleanup_agent_policy_cache(agent_id, new_checksum) -+ -+ # Verify old checksum is removed but new one remains -+ self.assertIsNone(get_cached_policy(agent_id, old_checksum)) -+ self.assertEqual(get_cached_policy(agent_id, new_checksum), policy_content) -+ -+ def test_cache_multiple_agents(self): -+ """Test caching policies for multiple agents.""" -+ agent1 = "agent_1" -+ agent2 = "agent_2" -+ checksum = "same_checksum" -+ policy1 = '{"policy": "agent1_policy"}' -+ policy2 = '{"policy": "agent2_policy"}' -+ -+ # Cache policies for different agents -+ initialize_agent_policy_cache(agent1) -+ initialize_agent_policy_cache(agent2) -+ cache_policy(agent1, checksum, policy1) -+ cache_policy(agent2, checksum, policy2) -+ -+ # Verify each agent has its own policy -+ self.assertEqual(get_cached_policy(agent1, checksum), policy1) -+ self.assertEqual(get_cached_policy(agent2, checksum), policy2) -+ -+ -+if __name__ == "__main__": -+ unittest.main() --- -2.47.3 - diff --git a/0013-fix-verifier-race-condition-on-agent-delete.patch b/0013-fix-verifier-race-condition-on-agent-delete.patch new file mode 100644 index 0000000..e6d6d33 --- /dev/null +++ b/0013-fix-verifier-race-condition-on-agent-delete.patch @@ -0,0 +1,449 @@ +From 739a8a97357f1b52c3944706479740fb7b71fb33 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Thu, 26 Mar 2026 11:28:52 +0100 +Subject: [PATCH 13/13] fix: verifier race condition on agent delete + +Fix a race condition in the pull-mode DELETE handler where an agent +could be deleted from the database while an in-flight attestation cycle +was still running, causing 'tenant -c update' to intermittently fail +with "Agent was not deleted from Verifier after 5 tries". + +The race had two interacting causes: + +1. TERMINATED was in the immediate-deletion states list. When a second + DELETE arrived and found the agent in TERMINATED state, it deleted + immediately (200), even though an invoke_get_quote() coroutine from + the first DELETE's cycle was still in-flight. The orphaned coroutine + then crashed in store_attestation_state() with AssertionError. + +2. The DELETE handler did not cancel the scheduled IOLoop poll timer, so + new attestation cycles could start even after deletion was requested. + +Changes: +- Add _pending_events registry and _register_pending_event / + _cancel_pending_event helpers to track IOLoop timer handles (from + upstream commit 59ac386). +- Replace assert statements in store_attestation_state() and the DELETE + handler with proper error handling (graceful log+return and 404). +- Remove TERMINATED from the immediate-deletion states list so that a + second DELETE returns 202 instead of deleting while in-flight work + exists. +- Cancel the pending poll timer via _pending_events on DELETE to prevent + new attestation cycles from starting. +- Fix the tenant's do_cvdelete() to handle 200/202/404 response codes + properly and fix a typo (reponse_json -> response_json). +- Cancel the pending poll timer in the PUT "stop" handler to prevent + new attestation cycles after agent stop. +- Suppress a mypy false positive in keylime/json.py. +- Add unit tests for pending-event management and store_attestation_state + graceful handling when agent is deleted. + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + keylime/cloud_verifier_tornado.py | 180 ++++++++++++++++++++++++++++++------ + keylime/json.py | 2 +- + keylime/tenant.py | 26 +++--- + test/test_cloud_verifier_tornado.py | 114 +++++++++++++++++++++++ + 4 files changed, 282 insertions(+), 40 deletions(-) + +diff --git a/keylime/cloud_verifier_tornado.py b/keylime/cloud_verifier_tornado.py +index 17bec75..75f117b 100644 +--- a/keylime/cloud_verifier_tornado.py ++++ b/keylime/cloud_verifier_tornado.py +@@ -171,6 +171,35 @@ exclude_db: Dict[str, Any] = { + "ssl_context": None, + } + ++# Registry of agent_id -> IOLoop timeout handle for all scheduled pending ++# events (quote polls, retries). Used to cancel them all on shutdown. ++_pending_events: Dict[str, object] = {} ++ ++ ++def _register_pending_event(agent: Dict[str, Any], handle: object) -> None: ++ """Track a pending IOLoop timeout in both the agent dict and the global registry. ++ ++ The agent dict field ``pending_event`` is the per-agent reference used during ++ normal operation (e.g. cancelling on state change). The module-level ++ ``_pending_events`` dict mirrors it so that *all* handles can be ++ bulk-cancelled on shutdown without iterating over every agent. ++ """ ++ agent["pending_event"] = handle ++ _pending_events[agent["agent_id"]] = handle ++ ++ ++def _cancel_pending_event(agent: Dict[str, Any]) -> None: ++ """Cancel and unregister the pending IOLoop timeout for *agent*, if any.""" ++ handle = agent.get("pending_event") ++ if handle is None: ++ return ++ agent["pending_event"] = None ++ _pending_events.pop(agent["agent_id"], None) ++ try: ++ tornado.ioloop.IOLoop.current().remove_timeout(handle) ++ except Exception as e: ++ logger.debug("Could not remove pending event for agent %s: %s", agent["agent_id"], e) ++ + + def _from_db_obj(agent_db_obj: VerfierMain) -> Dict[str, Any]: + fields = [ +@@ -286,7 +315,12 @@ def store_attestation_state(agentAttestState: AgentAttestState) -> None: + try: + with session_context() as session: + update_agent = session.get(VerfierMain, agentAttestState.get_agent_id()) # type: ignore[attr-defined] +- assert update_agent ++ if update_agent is None: ++ logger.warning( ++ "Agent %s no longer in database, skipping attestation state storage", ++ agent_id, ++ ) ++ return + update_agent.boottime = agentAttestState.get_boottime() # pyright: ignore + update_agent.next_ima_ml_entry = agentAttestState.get_next_ima_ml_entry() # pyright: ignore + ima_pcrs_dict = agentAttestState.get_ima_pcrs() +@@ -605,34 +639,115 @@ class AgentsHandler(BaseHandler): + except SQLAlchemyError as e: + logger.error("SQLAlchemy Error deleting agent in push mode: %s", e) + web_util.echo_json_response(self.req_handler, 500, "Internal Server Error") +- else: +- # Pull mode: Use operational_state to determine deletion behavior +- op_state = agent.operational_state +- if op_state in ( +- states.SAVED, +- states.FAILED, +- states.TERMINATED, +- states.TENANT_FAILED, +- states.INVALID_QUOTE, +- ): +- try: +- verifier_db_delete_agent(session, agent_id) +- web_util.echo_json_response(self.req_handler, 200, "Success") +- logger.info("DELETE (pull mode) returning 200 response for agent id: %s", agent_id) +- except SQLAlchemyError as e: +- logger.error("SQLAlchemy Error deleting agent in pull mode: %s", e) +- web_util.echo_json_response(self.req_handler, 500, "Internal Server Error") ++ return ++ ++ # Pull mode: Use operational_state to determine deletion behavior. ++ # ++ # Terminal states with no in-flight work can be deleted ++ # immediately (200). Note that TERMINATED is intentionally ++ # excluded: it means a previous DELETE was accepted but the ++ # attestation cycle has not yet finished. Deleting immediately ++ # while in-flight work exists causes store_attestation_state() ++ # to fail when it tries to persist results for the now-gone ++ # agent. ++ op_state = agent.operational_state ++ if op_state in ( ++ states.SAVED, ++ states.FAILED, ++ states.TENANT_FAILED, ++ states.INVALID_QUOTE, ++ ): ++ # Agent is in a terminal state with no in-flight work — delete immediately. ++ # Cancel any local pending poll timer first (same-worker ++ # defensive cleanup). This matters when a cross-worker ++ # PUT /stop sets TENANT_FAILED in the DB but cannot cancel ++ # the timer in this worker's _pending_events. ++ pending_handle = _pending_events.pop(agent_id, None) ++ if pending_handle is not None: ++ tornado.ioloop.IOLoop.current().remove_timeout(pending_handle) ++ try: ++ verifier_db_delete_agent(session, agent_id) ++ web_util.echo_json_response(self.req_handler, 200, "Success") ++ logger.info("DELETE (pull mode) returning 200 response for agent id: %s", agent_id) ++ except SQLAlchemyError as e: ++ logger.error("SQLAlchemy Error deleting agent in pull mode: %s", e) ++ web_util.echo_json_response(self.req_handler, 500, "Internal Server Error") ++ return ++ ++ # Agent is in an active state or already TERMINATED from a ++ # previous DELETE. ++ # ++ # Multi-worker note: _pending_events is process-local. Each ++ # agent's attestation cycle runs in the worker process it was ++ # assigned to at startup (round-robin), but this DELETE ++ # request may arrive at any worker. ++ # ++ # - Same worker: pending_handle is accurate — if found, the ++ # agent was idle (timer pending) and we can delete ++ # immediately since no coroutine is in-flight. ++ # - Different worker: pending_handle is always None, so we ++ # fall through to the 202/TERMINATED path. The managing ++ # worker's timer fires normally, process_agent() detects ++ # TERMINATED, and completes the deletion. ++ # ++ # Important: when the agent is already TERMINATED, do NOT ++ # cancel the pending poll timer — it is the only mechanism ++ # that will trigger process_agent() to detect TERMINATED and ++ # complete the deletion. ++ if op_state == states.TERMINATED: # pyright: ignore ++ # Agent is already TERMINATED from a previous DELETE. ++ # Leave the pending poll timer alone so process_agent() ++ # can detect TERMINATED and complete the deletion. ++ web_util.echo_json_response(self.req_handler, 202, "Accepted") ++ logger.info( ++ "DELETE (pull mode) returning 202 response for agent id: %s " ++ "(already TERMINATED, waiting for deletion to complete)", ++ agent_id, ++ ) ++ return ++ ++ # First DELETE for this agent. Try to cancel the pending ++ # poll timer (same-worker optimization). ++ # ++ # Pop the handle first but do NOT cancel the timer yet — ++ # if the DB operation fails we restore the handle so the ++ # attestation cycle can continue. ++ pending_handle = _pending_events.pop(agent_id, None) ++ try: ++ if pending_handle is not None: ++ # Same-worker optimization: the agent was idle ++ # (waiting for the next poll timer) — no in-flight ++ # coroutine will come along to detect TERMINATED and ++ # complete the deletion, so delete immediately. ++ verifier_db_delete_agent(session, agent_id) ++ # DB succeeded — now safe to cancel the timer. ++ tornado.ioloop.IOLoop.current().remove_timeout(pending_handle) ++ web_util.echo_json_response(self.req_handler, 200, "Success") ++ logger.info("DELETE (pull mode) returning 200 response for agent id: %s", agent_id) + else: +- try: +- update_agent = session.get(VerfierMain, agent_id) # type: ignore[attr-defined] +- assert update_agent +- update_agent.operational_state = states.TERMINATED # pyright: ignore +- session.add(update_agent) +- # session.commit() is automatically called by context manager +- web_util.echo_json_response(self.req_handler, 202, "Accepted") +- logger.info("DELETE (pull mode) returning 202 response for agent id: %s", agent_id) +- except SQLAlchemyError as e: +- logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) ++ # Either an invoke_get_quote() / invoke_provide_v() ++ # coroutine is in-flight (no pending_handle — the ++ # timer already fired), or this DELETE arrived at a ++ # different worker process. Mark as TERMINATED and ++ # let process_agent() perform the actual deletion ++ # when the in-flight work finishes (or the timer ++ # fires in the managing worker). ++ update_agent = session.get(VerfierMain, agent_id) # type: ignore[attr-defined] ++ if update_agent is None: ++ web_util.echo_json_response(self.req_handler, 404, "agent id not found") ++ return ++ update_agent.operational_state = states.TERMINATED # pyright: ignore ++ session.add(update_agent) ++ web_util.echo_json_response(self.req_handler, 202, "Accepted") ++ logger.info("DELETE (pull mode) returning 202 response for agent id: %s", agent_id) ++ except SQLAlchemyError as e: ++ logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) ++ if pending_handle is not None: ++ # Restore the timer so the attestation cycle can ++ # continue — the DB operation failed so the agent ++ # is still there. ++ _pending_events[agent_id] = pending_handle ++ web_util.echo_json_response(self.req_handler, 500, "Internal server error") + + def post(self) -> None: + """This method handles the POST requests to add agents to the Cloud Verifier. +@@ -1045,6 +1160,15 @@ class AgentsHandler(BaseHandler): + # session.commit() is automatically called by context manager + except SQLAlchemyError as e: + logger.error("SQLAlchemy Error: %s", e) ++ web_util.echo_json_response(self.req_handler, 500, "Internal server error") ++ return ++ ++ # DB succeeded — now safe to cancel the pending poll ++ # timer to prevent new attestation cycles. ++ if agent_id is not None: ++ pending_handle = _pending_events.pop(agent_id, None) ++ if pending_handle is not None: ++ tornado.ioloop.IOLoop.current().remove_timeout(pending_handle) + + web_util.echo_json_response(self.req_handler, 200, "Success") + logger.info("PUT returning 200 response for agent id: %s", agent_id) +diff --git a/keylime/json.py b/keylime/json.py +index 82292a1..4c4b897 100644 +--- a/keylime/json.py ++++ b/keylime/json.py +@@ -24,7 +24,7 @@ def bytes_to_str(data: Any) -> Any: + for _k, _v in data.items(): + data[_k] = bytes_to_str(_v) + elif isinstance(data, tuple(_list_types)): +- _l = list(data) ++ _l = list(data) # type: ignore[call-overload] + for _k, _v in enumerate(_l): + _l[_k] = bytes_to_str(_v) + data = _l +diff --git a/keylime/tenant.py b/keylime/tenant.py +index 0cdeada..4cb3698 100644 +--- a/keylime/tenant.py ++++ b/keylime/tenant.py +@@ -1074,22 +1074,17 @@ class Tenant: + # keylime_logging.log_http_response(logger, logging.ERROR, response_json) + raise UserError(f"{self.verifier_fid_str} timed out while deleting {self.agent_fid_str}.") + +- if response_json["code"] == 202: ++ if response_json["code"] == 200: ++ logger.info("Agent %s deleted from the CV", self.agent_uuid) ++ elif response_json["code"] == 202: + numtries = 0 + deleted = False + + while not deleted: +- reponse_json = self.do_cvstatus(not_found_fail=False) +- if reponse_json["code"] != 404: ++ response_json = self.do_cvstatus(not_found_fail=False) ++ if response_json["code"] != 404: + numtries += 1 + if numtries >= self.maxr: +- # EVALUATE DELETION +- # logger.error( +- # "%s was not deleted from %s after %d tries", +- # self.agent_fid_str, +- # self.verifier_fid_str, +- # numtries, +- # ) + raise UserError( + f"{self.agent_fid_str} was not deleted from {self.verifier_fid_str} after {numtries} tries" + ) +@@ -1114,8 +1109,17 @@ class Tenant: + self.verifier_fid_str, + numtries, + ) +- # Marked for deletion (need to modify the code on CI tests) + logger.info("Agent %s deleted from the CV", self.agent_uuid) ++ elif response_json["code"] == 404: ++ # The agent was already deleted (e.g. by another caller ++ # between the do_cvstatus check and the DELETE request). ++ # The desired end state is reached — treat as success. ++ logger.info("Agent %s is already absent from the CV", self.agent_uuid) ++ else: ++ raise UserError( ++ f"Unexpected response code {response_json['code']} from " ++ f"{self.verifier_fid_str} while deleting {self.agent_fid_str}" ++ ) + + def do_regstatus(self) -> Dict[str, Any]: + if not self.registrar_ip or not self.registrar_port: +diff --git a/test/test_cloud_verifier_tornado.py b/test/test_cloud_verifier_tornado.py +new file mode 100644 +index 0000000..7515b2e +--- /dev/null ++++ b/test/test_cloud_verifier_tornado.py +@@ -0,0 +1,114 @@ ++"""Unit tests for cloud_verifier_tornado deletion and pending-event management. ++ ++Tests cover: ++1. _register_pending_event / _cancel_pending_event helpers ++2. store_attestation_state graceful handling when agent is deleted ++""" ++ ++# pylint: disable=protected-access ++ ++import unittest ++from unittest.mock import MagicMock, patch ++ ++from keylime import cloud_verifier_tornado ++ ++ ++class TestPendingEventRegistry(unittest.TestCase): ++ """Test the _pending_events registry helpers.""" ++ ++ def setUp(self): ++ cloud_verifier_tornado._pending_events.clear() ++ ++ def tearDown(self): ++ cloud_verifier_tornado._pending_events.clear() ++ ++ def test_register_pending_event(self): ++ """_register_pending_event stores handle in agent dict and global registry.""" ++ agent = {"agent_id": "test-agent-1", "pending_event": None} ++ handle = object() ++ ++ cloud_verifier_tornado._register_pending_event(agent, handle) ++ ++ self.assertIs(agent["pending_event"], handle) ++ self.assertIs(cloud_verifier_tornado._pending_events["test-agent-1"], handle) ++ ++ def test_cancel_pending_event_removes_from_both(self): ++ """_cancel_pending_event clears agent dict and global registry.""" ++ agent = {"agent_id": "test-agent-1", "pending_event": None} ++ handle = object() ++ cloud_verifier_tornado._register_pending_event(agent, handle) ++ ++ with patch("tornado.ioloop.IOLoop") as mock_ioloop_cls: ++ mock_ioloop = MagicMock() ++ mock_ioloop_cls.current.return_value = mock_ioloop ++ ++ cloud_verifier_tornado._cancel_pending_event(agent) ++ ++ self.assertIsNone(agent["pending_event"]) ++ self.assertNotIn("test-agent-1", cloud_verifier_tornado._pending_events) ++ mock_ioloop.remove_timeout.assert_called_once_with(handle) ++ ++ def test_cancel_pending_event_noop_when_none(self): ++ """_cancel_pending_event is a no-op when no pending event exists.""" ++ agent = {"agent_id": "test-agent-1", "pending_event": None} ++ ++ # Should not raise ++ cloud_verifier_tornado._cancel_pending_event(agent) ++ ++ self.assertIsNone(agent["pending_event"]) ++ ++ def test_cancel_pending_event_handles_remove_timeout_error(self): ++ """_cancel_pending_event logs but doesn't raise on remove_timeout failure.""" ++ agent = {"agent_id": "test-agent-1", "pending_event": None} ++ handle = object() ++ cloud_verifier_tornado._register_pending_event(agent, handle) ++ ++ with patch("tornado.ioloop.IOLoop") as mock_ioloop_cls: ++ mock_ioloop = MagicMock() ++ mock_ioloop_cls.current.return_value = mock_ioloop ++ mock_ioloop.remove_timeout.side_effect = RuntimeError("IOLoop stopped") ++ ++ # Should not raise ++ cloud_verifier_tornado._cancel_pending_event(agent) ++ ++ self.assertIsNone(agent["pending_event"]) ++ self.assertNotIn("test-agent-1", cloud_verifier_tornado._pending_events) ++ ++ def test_register_replaces_previous_handle(self): ++ """_register_pending_event replaces a previously registered handle.""" ++ agent = {"agent_id": "test-agent-1", "pending_event": None} ++ handle1 = object() ++ handle2 = object() ++ ++ cloud_verifier_tornado._register_pending_event(agent, handle1) ++ cloud_verifier_tornado._register_pending_event(agent, handle2) ++ ++ self.assertIs(agent["pending_event"], handle2) ++ self.assertIs(cloud_verifier_tornado._pending_events["test-agent-1"], handle2) ++ ++ ++class TestStoreAttestationState(unittest.TestCase): ++ """Test store_attestation_state graceful handling of deleted agents.""" ++ ++ @patch("keylime.cloud_verifier_tornado.session_context") ++ def test_skips_when_agent_not_in_db(self, mock_session_ctx): ++ """store_attestation_state returns gracefully when agent is deleted from DB.""" ++ mock_session = MagicMock() ++ mock_session.get.return_value = None ++ mock_session_ctx.return_value.__enter__ = MagicMock(return_value=mock_session) ++ mock_session_ctx.return_value.__exit__ = MagicMock(return_value=False) ++ ++ mock_attest_state = MagicMock() ++ mock_attest_state.get_ima_pcrs.return_value = {"10": "some_value"} ++ mock_attest_state.agent_id = "deleted-agent" ++ mock_attest_state.get_agent_id.return_value = "deleted-agent" ++ ++ # Should not raise (previously would AssertionError) ++ cloud_verifier_tornado.store_attestation_state(mock_attest_state) ++ ++ # Verify no attempt to set attributes on None ++ mock_session.add.assert_not_called() ++ ++ ++if __name__ == "__main__": ++ unittest.main() +-- +2.49.0 + diff --git a/0014-Fix-registrar-duplicate-UUID-vulnerability.patch b/0014-Fix-registrar-duplicate-UUID-vulnerability.patch deleted file mode 100644 index 1fe3f80..0000000 --- a/0014-Fix-registrar-duplicate-UUID-vulnerability.patch +++ /dev/null @@ -1,1188 +0,0 @@ -From 2da614d212f58071f54c883ee368ffac4bc5e6b4 Mon Sep 17 00:00:00 2001 -From: Sergio Correia -Date: Tue, 9 Dec 2025 12:12:22 +0000 -Subject: [PATCH 14/14] Fix registrar duplicate UUID vulnerability - -Backport upstream PR#1825 - -Signed-off-by: Sergio Correia ---- - keylime/cmd/registrar.py | 6 + - keylime/models/registrar/registrar_agent.py | 116 +++++ - keylime/shared_data.py | 6 + - keylime/web/registrar/agents_controller.py | 98 +++- - test/test_agents_controller.py | 513 ++++++++++++++++++++ - test/test_registrar_tpm_identity.py | 342 +++++++++++++ - 6 files changed, 1071 insertions(+), 10 deletions(-) - create mode 100644 test/test_agents_controller.py - create mode 100644 test/test_registrar_tpm_identity.py - -diff --git a/keylime/cmd/registrar.py b/keylime/cmd/registrar.py -index 584275a..2e2b25e 100644 ---- a/keylime/cmd/registrar.py -+++ b/keylime/cmd/registrar.py -@@ -5,6 +5,7 @@ import cryptography - from keylime import config, keylime_logging - from keylime.common.migrations import apply - from keylime.models import da_manager, db_manager -+from keylime.shared_data import initialize_shared_memory - from keylime.web import RegistrarServer - - logger = keylime_logging.init_logging("registrar") -@@ -47,6 +48,11 @@ def main() -> None: - # Prepare backend for durable attestation, if configured - da_manager.make_backend("registrar") - -+ # Initialize shared memory for cross-process synchronization -+ # CRITICAL: Must be called before server.start_multi() to ensure all forked -+ # worker processes share the same manager instance for agent registration locks -+ initialize_shared_memory() -+ - # Start HTTP server - server = RegistrarServer() - server.start_multi() -diff --git a/keylime/models/registrar/registrar_agent.py b/keylime/models/registrar/registrar_agent.py -index fc7e1be..e26ae41 100644 ---- a/keylime/models/registrar/registrar_agent.py -+++ b/keylime/models/registrar/registrar_agent.py -@@ -65,8 +65,14 @@ class RegistrarAgent(PersistableModel): - def empty(cls): - agent = super().empty() - agent.provider_keys = {} -+ object.__setattr__(agent, "_tpm_identity_violation", False) - return agent - -+ @property -+ def has_tpm_identity_violation(self): -+ """Returns True if a TPM identity violation was detected during validation.""" -+ return getattr(self, "_tpm_identity_violation", False) -+ - def _check_key_against_cert(self, tpm_key_field, cert_field): - # If neither key nor certificate is being updated, no need to check - if tpm_key_field not in self.changes and cert_field not in self.changes: -@@ -139,6 +145,111 @@ class RegistrarAgent(PersistableModel): - - return compliant - -+ def _check_tpm_identity_immutable(self): -+ """ -+ Checks that TPM identity fields are not being changed during re-registration. -+ -+ This prevents an attacker from registering with the same UUID but a different TPM, -+ which would allow them to impersonate the original agent and bypass attestation. -+ -+ Checked fields (EK-based identity only): -+ - ek_tpm: Endorsement Key (primary TPM identity) -+ - ekcert: EK Certificate (binds EK to TPM manufacturer) -+ - aik_tpm: Attestation Key (bound to EK via MakeCredential/ActivateCredential) -+ -+ Note: IAK/IDevID fields are NOT checked and can change on re-registration. -+ -+ This check only applies to existing agents (those loaded from the database). -+ New agents created via RegistrarAgent.empty() have no committed values and are -+ allowed to set identity fields during initial registration. -+ -+ If the agent needs to be registered with a new TPM (e.g., hardware replacement), -+ the old agent record must be explicitly deleted first. -+ """ -+ # Define TPM identity fields that must remain immutable once set -+ # Only checking EK-based identity (ek_tpm, ekcert, aik_tpm) -+ # IAK/IDevID fields (iak_tpm, iak_cert, idevid_tpm, idevid_cert) are not checked -+ identity_fields = ["ek_tpm", "ekcert", "aik_tpm"] -+ -+ # Only check for existing agents (those loaded from database) -+ # New agents created via empty() will have no committed values -+ if not self.committed: -+ return -+ -+ # Track which fields have been changed -+ changed_fields = [] -+ -+ for field_name in identity_fields: -+ # Skip fields that are not being changed in this update -+ if field_name not in self.changes: -+ continue -+ -+ # Get the old (committed/database) and new (proposed) values -+ old_value = self.committed.get(field_name) -+ new_value = self.changes.get(field_name) -+ -+ # Allow setting a previously unset field (e.g., adding EK cert later) -+ if old_value is None: -+ continue -+ -+ # Reject attempts to remove an already-set identity field -+ if new_value is None: -+ changed_fields.append(field_name) -+ continue -+ -+ # Compare values based on field type -+ if field_name == "ekcert": -+ # For certificates, compare the actual certificate bytes -+ # Note: We compare full certificate, not just public key, because: -+ # 1. User requirement: reject if certificate changed even if same public key -+ # 2. Certificate contains more than just key (issuer, validity period, etc.) -+ # 3. Different cert for same key could indicate compromise or unauthorized replacement -+ try: -+ old_cert_bytes = old_value.public_bytes(Encoding.DER) -+ new_cert_bytes = new_value.public_bytes(Encoding.DER) -+ -+ if old_cert_bytes != new_cert_bytes: -+ changed_fields.append(field_name) -+ except Exception: -+ # If we can't extract certificate bytes, treat as changed to be safe -+ changed_fields.append(field_name) -+ else: -+ # For TPM keys (ek_tpm, aik_tpm), compare as binary data -+ # These are Binary(persist_as=String) fields, so they could be bytes or base64 strings -+ try: -+ old_bytes = old_value if isinstance(old_value, bytes) else base64.b64decode(old_value) -+ new_bytes = new_value if isinstance(new_value, bytes) else base64.b64decode(new_value) -+ -+ if old_bytes != new_bytes: -+ changed_fields.append(field_name) -+ except Exception: -+ # If comparison fails (e.g., invalid base64), treat as changed to be safe -+ changed_fields.append(field_name) -+ -+ # If any TPM identity fields were changed, this is a security violation -+ if changed_fields: -+ # Set flag to indicate TPM identity violation occurred -+ object.__setattr__(self, "_tpm_identity_violation", True) -+ -+ # Log security warning for audit trail -+ # Include agent_id and changed fields, but NOT the actual TPM values (sensitive data) -+ logger.warning( -+ "SECURITY: Rejected attempt to re-register agent '%s' with different TPM identity. " -+ "Changed fields: %s. This indicates a potential UUID spoofing attack. " -+ "The existing agent must be deleted before registering with a new TPM. " -+ "If this is unexpected, investigate for compromise.", -+ self.agent_id, -+ ", ".join(changed_fields), -+ ) -+ -+ # Add validation error to prevent registration -+ # Using "agent_id" field for the error because it's the UUID that's being improperly reused -+ self._add_error( -+ "agent_id", -+ f"cannot re-register with different TPM identity. Changed fields: {', '.join(changed_fields)}. " -+ "To register this UUID with a new TPM, delete the existing agent record first.", -+ ) -+ - def _check_all_cert_compliance(self): - non_compliant_certs = [] - -@@ -280,6 +391,11 @@ class RegistrarAgent(PersistableModel): - + ["port", "mtls_cert"], - ) - -+ # SECURITY CHECK: Verify TPM identity is not being changed on re-registration -+ # This must happen after cast_changes() (so we have new values to compare) -+ # but before other validation (so we reject immediately without processing further) -+ self._check_tpm_identity_immutable() -+ - # Log info about received EK or IAK/IDevID - self._log_root_identity() - # Verify EK as valid -diff --git a/keylime/shared_data.py b/keylime/shared_data.py -index 23a3d81..a415496 100644 ---- a/keylime/shared_data.py -+++ b/keylime/shared_data.py -@@ -58,6 +58,12 @@ class FlatDictView: - with self._lock: - return self._store.get(self._make_key(key), default) - -+ def pop(self, key: Any, default: Any = None) -> Any: -+ """Remove and return value for key, or default if key not present.""" -+ flat_key = self._make_key(key) -+ with self._lock: -+ return self._store.pop(flat_key, default) -+ - def keys(self) -> List[Any]: - """Return keys in this namespace.""" - prefix = f"dict:{self._namespace}:" -diff --git a/keylime/web/registrar/agents_controller.py b/keylime/web/registrar/agents_controller.py -index 9be2ef9..f2246de 100644 ---- a/keylime/web/registrar/agents_controller.py -+++ b/keylime/web/registrar/agents_controller.py -@@ -1,5 +1,8 @@ -+from sqlalchemy.exc import IntegrityError -+ - from keylime import keylime_logging - from keylime.models import RegistrarAgent -+from keylime.shared_data import get_shared_memory - from keylime.web.base import Controller - - logger = keylime_logging.init_logging("registrar") -@@ -28,16 +31,91 @@ class AgentsController(Controller): - - # POST /v2[.:minor]/agents/[:agent_id] - def create(self, agent_id, **params): -- agent = RegistrarAgent.get(agent_id) or RegistrarAgent.empty() # type: ignore[no-untyped-call] -- agent.update({"agent_id": agent_id, **params}) -- challenge = agent.produce_ak_challenge() -- -- if not challenge or not agent.changes_valid: -- self.log_model_errors(agent, logger) -- self.respond(400, "Could not register agent with invalid data") -- return -- -- agent.commit_changes() -+ """Register a new agent or re-register an existing agent. -+ -+ For new agents, this: -+ 1. Validates TPM identity (EK/AIK or IAK/IDevID) -+ 2. Generates an AK challenge encrypted with the EK -+ 3. Stores agent record in pending state -+ 4. Returns challenge blob to agent -+ -+ For existing agents (re-registration with same UUID): -+ 1. Verifies TPM identity has not changed (security check) -+ 2. If identity changed: rejects with 403 Forbidden -+ 3. If identity same: allows re-registration (e.g., after agent restart) -+ -+ Security: Re-registration with a different TPM is forbidden to prevent -+ UUID spoofing attacks where an attacker could impersonate a legitimate -+ agent by reusing its UUID. -+ -+ Race condition protection: Uses per-agent locks from SharedDataManager to prevent -+ race conditions between concurrent registration requests for the same agent_id. -+ This ensures the check-validate-commit sequence is atomic. Additionally, database -+ constraint violations (e.g., duplicate UUIDs from concurrent requests) are caught -+ and returned as 403 Forbidden. -+ """ -+ # Get shared memory manager and per-agent lock storage -+ shared_mem = get_shared_memory() -+ agent_locks = shared_mem.get_or_create_dict("agent_registration_locks") -+ -+ # Get or create a lock specific to this agent_id -+ if agent_id not in agent_locks: -+ agent_locks[agent_id] = shared_mem.manager.Lock() -+ -+ agent_lock = agent_locks[agent_id] -+ -+ # CRITICAL SECTION: Acquire lock to make check-validate-commit atomic -+ with agent_lock: -+ # Step 1: Load existing agent or create new one (inside lock) -+ agent = RegistrarAgent.get(agent_id) or RegistrarAgent.empty() # type: ignore[no-untyped-call] -+ -+ # Step 2: Update agent with new data and validate (inside lock) -+ agent.update({"agent_id": agent_id, **params}) -+ -+ # Step 3: Check for TPM identity change security violation -+ # Use explicit flag instead of fragile string matching for security check -+ if not agent.changes_valid and agent.has_tpm_identity_violation: -+ # Log the validation errors (includes security warning) -+ self.log_model_errors(agent, logger) -+ -+ # Return 403 Forbidden -+ # 403 indicates a policy violation, not a malformed request -+ self.respond(403, "Agent re-registration with different TPM identity is forbidden for security reasons") -+ return -+ -+ # Step 4: Generate AK challenge (inside lock) -+ challenge = agent.produce_ak_challenge() -+ -+ # Step 5: Check for any validation errors or challenge generation failure -+ if not challenge or not agent.changes_valid: -+ self.log_model_errors(agent, logger) -+ self.respond(400, "Could not register agent with invalid data") -+ return -+ -+ # Step 6: Commit to database (inside lock) -+ # This ensures no other request can modify the agent between validation and commit -+ try: -+ agent.commit_changes() -+ except IntegrityError as e: -+ # Database constraint violation - most likely duplicate agent_id -+ # This can happen if two requests try to register the same new UUID simultaneously -+ # and both pass validation before either commits (database race condition) -+ logger.warning( -+ "SECURITY: Agent registration failed due to database constraint violation for agent_id '%s'. " -+ "This UUID may already be registered by a concurrent request or the agent already exists. " -+ "Database error: %s", -+ agent_id, -+ str(e), -+ ) -+ self.respond( -+ 403, -+ f"Agent with UUID '{agent_id}' cannot be registered. " -+ "This UUID is already in use or a concurrent registration is in progress.", -+ ) -+ return -+ -+ # Lock released - safe to respond to client -+ # Return challenge blob for agent to decrypt - self.respond(200, "Success", {"blob": challenge}) - - # DELETE /v2[.:minor]/agents/:agent_id/ -diff --git a/test/test_agents_controller.py b/test/test_agents_controller.py -new file mode 100644 -index 0000000..898d8f0 ---- /dev/null -+++ b/test/test_agents_controller.py -@@ -0,0 +1,513 @@ -+"""Unit tests for AgentsController (registrar). -+ -+Tests the registrar's agent registration endpoints, including the -+security fix that prevents UUID spoofing via re-registration with -+a different TPM identity. -+""" -+ -+# type: ignore - Controller methods are dynamically bound -+ -+import unittest -+from typing import cast -+from unittest.mock import MagicMock, patch -+ -+from sqlalchemy.exc import IntegrityError -+ -+from keylime.web.registrar.agents_controller import AgentsController -+ -+ -+class TestAgentsControllerIndex(unittest.TestCase): -+ """Test cases for AgentsController.index().""" -+ -+ def setUp(self): -+ """Set up test fixtures.""" -+ mock_action_handler = MagicMock() -+ self.controller = cast(AgentsController, AgentsController(mock_action_handler)) -+ self.controller.respond = MagicMock() -+ -+ @patch("keylime.models.RegistrarAgent.all_ids") -+ def test_index_success(self, mock_all_ids): -+ """Test successful retrieval of all agent IDs.""" -+ mock_all_ids.return_value = ["agent-1", "agent-2", "agent-3"] -+ -+ self.controller.index() -+ -+ self.controller.respond.assert_called_once_with(200, "Success", {"uuids": ["agent-1", "agent-2", "agent-3"]}) # type: ignore[attr-defined] -+ -+ -+class TestAgentsControllerShow(unittest.TestCase): -+ """Test cases for AgentsController.show().""" -+ -+ def setUp(self): -+ """Set up test fixtures.""" -+ mock_action_handler = MagicMock() -+ self.controller = cast(AgentsController, AgentsController(mock_action_handler)) -+ self.controller.respond = MagicMock() -+ self.test_agent_id = "test-agent-123" -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_show_not_found(self, mock_get): -+ """Test show with non-existent agent.""" -+ mock_get.return_value = None -+ -+ self.controller.show(self.test_agent_id) -+ -+ self.controller.respond.assert_called_once_with(404, f"Agent with ID '{self.test_agent_id}' not found") # type: ignore[attr-defined] -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_show_not_active(self, mock_get): -+ """Test show with inactive agent.""" -+ mock_agent = MagicMock() -+ mock_agent.active = False -+ mock_get.return_value = mock_agent -+ -+ self.controller.show(self.test_agent_id) -+ -+ self.controller.respond.assert_called_once_with( # type: ignore[attr-defined] -+ 404, f"Agent with ID '{self.test_agent_id}' has not been activated" -+ ) -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_show_success(self, mock_get): -+ """Test successful show of active agent.""" -+ mock_agent = MagicMock() -+ mock_agent.active = True -+ mock_agent.render.return_value = {"agent_id": self.test_agent_id, "active": True} -+ mock_get.return_value = mock_agent -+ -+ self.controller.show(self.test_agent_id) -+ -+ self.controller.respond.assert_called_once_with( # type: ignore[attr-defined] -+ 200, "Success", {"agent_id": self.test_agent_id, "active": True} -+ ) -+ -+ -+class TestAgentsControllerCreate(unittest.TestCase): -+ """Test cases for AgentsController.create() - the main registration endpoint.""" -+ -+ def setUp(self): -+ """Set up test fixtures.""" -+ mock_action_handler = MagicMock() -+ self.controller = cast(AgentsController, AgentsController(mock_action_handler)) -+ self.controller.respond = MagicMock() -+ self.controller.log_model_errors = MagicMock() -+ self.test_agent_id = "test-agent-123" -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_create_new_agent_success(self, mock_get): -+ """Test successful registration of a new agent.""" -+ # Mock that agent doesn't exist yet -+ mock_get.return_value = None -+ -+ # Create mock agent that will be returned by empty() -+ mock_agent = MagicMock() -+ mock_agent.changes_valid = True -+ mock_agent.errors = {} -+ mock_agent.produce_ak_challenge.return_value = "challenge_blob_data" -+ -+ # Patch RegistrarAgent.empty to return our mock -+ with patch("keylime.models.RegistrarAgent.empty", return_value=mock_agent): -+ params = {"ek_tpm": "ek_key", "aik_tpm": "aik_key"} -+ self.controller.create(self.test_agent_id, **params) -+ -+ # Verify agent was updated with params -+ mock_agent.update.assert_called_once_with({"agent_id": self.test_agent_id, **params}) -+ -+ # Verify challenge was generated -+ mock_agent.produce_ak_challenge.assert_called_once() -+ -+ # Verify agent was saved -+ mock_agent.commit_changes.assert_called_once() -+ -+ # Verify 200 response with challenge -+ self.controller.respond.assert_called_once_with(200, "Success", {"blob": "challenge_blob_data"}) # type: ignore[attr-defined] -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_create_reregistration_same_tpm_identity(self, mock_get): -+ """Test successful re-registration with same TPM identity.""" -+ # Mock existing agent -+ mock_existing_agent = MagicMock() -+ mock_existing_agent.changes_valid = True -+ mock_existing_agent.errors = {} -+ mock_existing_agent.produce_ak_challenge.return_value = "challenge_blob_data" -+ mock_get.return_value = mock_existing_agent -+ -+ params = {"ek_tpm": "same_ek_key", "aik_tpm": "same_aik_key"} -+ self.controller.create(self.test_agent_id, **params) -+ -+ # Verify agent was updated -+ mock_existing_agent.update.assert_called_once_with({"agent_id": self.test_agent_id, **params}) -+ -+ # Verify challenge was generated -+ mock_existing_agent.produce_ak_challenge.assert_called_once() -+ -+ # Verify agent was saved -+ mock_existing_agent.commit_changes.assert_called_once() -+ -+ # Verify 200 response -+ self.controller.respond.assert_called_once_with(200, "Success", {"blob": "challenge_blob_data"}) # type: ignore[attr-defined] -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_create_reregistration_different_tpm_identity_forbidden(self, mock_get): -+ """Test re-registration with different TPM identity is rejected with 403. -+ -+ This is the key security fix: preventing UUID spoofing by rejecting -+ attempts to re-register an agent with a different TPM identity. -+ """ -+ # Mock existing agent -+ mock_existing_agent = MagicMock() -+ mock_existing_agent.changes_valid = False # Validation failed -+ # Simulate the error added by _check_tpm_identity_immutable -+ mock_existing_agent.errors = { -+ "agent_id": [ -+ "Agent re-registration attempted with different TPM identity (changed fields: ek_tpm). " -+ "This is a security violation - the same agent UUID cannot be reused with a different TPM." -+ ] -+ } -+ mock_get.return_value = mock_existing_agent -+ -+ params = {"ek_tpm": "different_ek_key", "aik_tpm": "same_aik_key"} -+ self.controller.create(self.test_agent_id, **params) -+ -+ # Verify agent was updated (which triggers validation) -+ mock_existing_agent.update.assert_called_once_with({"agent_id": self.test_agent_id, **params}) -+ -+ # Verify errors were logged -+ self.controller.log_model_errors.assert_called_once() # type: ignore[attr-defined] -+ -+ # Verify 403 Forbidden response (not 400!) -+ self.controller.respond.assert_called_once_with( # type: ignore[attr-defined] -+ 403, "Agent re-registration with different TPM identity is forbidden for security reasons" -+ ) -+ -+ # Verify agent was NOT saved -+ mock_existing_agent.commit_changes.assert_not_called() -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_create_invalid_data_other_validation_error(self, mock_get): -+ """Test registration with other validation errors returns 400.""" -+ # Mock agent with validation errors (not TPM identity related) -+ mock_agent = MagicMock() -+ mock_agent.changes_valid = False -+ # Error not related to TPM identity -+ mock_agent.errors = {"ek_tpm": ["must be a valid TPM2B_PUBLIC structure"]} -+ mock_agent.has_tpm_identity_violation = False # Not a TPM identity violation -+ mock_agent.produce_ak_challenge.return_value = None -+ mock_get.return_value = None -+ -+ with patch("keylime.models.RegistrarAgent.empty", return_value=mock_agent): -+ params = {"ek_tpm": "invalid_ek_format"} -+ self.controller.create(self.test_agent_id, **params) -+ -+ # Verify errors were logged -+ self.controller.log_model_errors.assert_called_once() # type: ignore[attr-defined] -+ -+ # Verify 400 Bad Request (not 403) -+ self.controller.respond.assert_called_once_with(400, "Could not register agent with invalid data") # type: ignore[attr-defined] -+ -+ # Verify agent was NOT saved -+ mock_agent.commit_changes.assert_not_called() -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_create_challenge_generation_failure(self, mock_get): -+ """Test registration fails if challenge generation fails.""" -+ # Mock agent where challenge generation fails -+ mock_agent = MagicMock() -+ mock_agent.changes_valid = True -+ mock_agent.errors = {} -+ mock_agent.produce_ak_challenge.return_value = None # Challenge generation failed -+ mock_get.return_value = None -+ -+ with patch("keylime.models.RegistrarAgent.empty", return_value=mock_agent): -+ params = {"ek_tpm": "ek_key", "aik_tpm": "aik_key"} -+ self.controller.create(self.test_agent_id, **params) -+ -+ # Verify errors were logged -+ self.controller.log_model_errors.assert_called_once() # type: ignore[attr-defined] -+ -+ # Verify 400 response -+ self.controller.respond.assert_called_once_with(400, "Could not register agent with invalid data") # type: ignore[attr-defined] -+ -+ # Verify agent was NOT saved -+ mock_agent.commit_changes.assert_not_called() -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_create_validation_error_with_agent_id_but_not_tpm_identity(self, mock_get): -+ """Test that agent_id errors unrelated to TPM identity get 400, not 403.""" -+ # Mock agent with agent_id error, but not about TPM identity -+ mock_agent = MagicMock() -+ mock_agent.changes_valid = False -+ mock_agent.errors = {"agent_id": ["must be a valid UUID format"]} # Not about TPM identity -+ mock_agent.has_tpm_identity_violation = False # Not a TPM identity violation -+ mock_agent.produce_ak_challenge.return_value = None -+ mock_get.return_value = None -+ -+ with patch("keylime.models.RegistrarAgent.empty", return_value=mock_agent): -+ params = {"ek_tpm": "ek_key", "aik_tpm": "aik_key"} -+ self.controller.create(self.test_agent_id, **params) -+ -+ # Verify 400 Bad Request (not 403) because it's not a TPM identity violation -+ self.controller.respond.assert_called_once_with(400, "Could not register agent with invalid data") # type: ignore[attr-defined] -+ -+ -+class TestAgentsControllerDelete(unittest.TestCase): -+ """Test cases for AgentsController.delete().""" -+ -+ def setUp(self): -+ """Set up test fixtures.""" -+ mock_action_handler = MagicMock() -+ self.controller = cast(AgentsController, AgentsController(mock_action_handler)) -+ self.controller.respond = MagicMock() -+ self.test_agent_id = "test-agent-123" -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_delete_not_found(self, mock_get): -+ """Test delete with non-existent agent.""" -+ mock_get.return_value = None -+ -+ self.controller.delete(self.test_agent_id) -+ -+ self.controller.respond.assert_called_once_with(404, f"Agent with ID '{self.test_agent_id}' not found") # type: ignore[attr-defined] -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_delete_success(self, mock_get): -+ """Test successful agent deletion.""" -+ mock_agent = MagicMock() -+ mock_get.return_value = mock_agent -+ -+ self.controller.delete(self.test_agent_id) -+ -+ # Verify agent was deleted -+ mock_agent.delete.assert_called_once() -+ -+ # Verify 200 response -+ self.controller.respond.assert_called_once_with(200, "Success") # type: ignore[attr-defined] -+ -+ -+class TestAgentsControllerActivate(unittest.TestCase): -+ """Test cases for AgentsController.activate().""" -+ -+ def setUp(self): -+ """Set up test fixtures.""" -+ mock_action_handler = MagicMock() -+ self.controller = cast(AgentsController, AgentsController(mock_action_handler)) -+ self.controller.respond = MagicMock() -+ self.test_agent_id = "test-agent-123" -+ self.test_auth_tag = "valid_auth_tag" -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_activate_not_found(self, mock_get): -+ """Test activate with non-existent agent.""" -+ mock_get.return_value = None -+ -+ self.controller.activate(self.test_agent_id, self.test_auth_tag) -+ -+ self.controller.respond.assert_called_once_with(404, f"Agent with ID '{self.test_agent_id}' not found") # type: ignore[attr-defined] -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_activate_success(self, mock_get): -+ """Test successful agent activation.""" -+ mock_agent = MagicMock() -+ mock_agent.verify_ak_response.return_value = True # Auth tag is valid -+ mock_get.return_value = mock_agent -+ -+ self.controller.activate(self.test_agent_id, self.test_auth_tag) -+ -+ # Verify auth tag was verified -+ mock_agent.verify_ak_response.assert_called_once_with(self.test_auth_tag) -+ -+ # Verify agent was saved -+ mock_agent.commit_changes.assert_called_once() -+ -+ # Verify 200 response -+ self.controller.respond.assert_called_once_with(200, "Success") # type: ignore[attr-defined] -+ -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_activate_invalid_auth_tag(self, mock_get): -+ """Test activation with invalid auth tag.""" -+ mock_agent = MagicMock() -+ mock_agent.verify_ak_response.return_value = False # Auth tag is invalid -+ mock_get.return_value = mock_agent -+ -+ self.controller.activate(self.test_agent_id, self.test_auth_tag) -+ -+ # Verify auth tag was verified -+ mock_agent.verify_ak_response.assert_called_once_with(self.test_auth_tag) -+ -+ # Verify agent was deleted (due to failed activation) -+ mock_agent.delete.assert_called_once() -+ -+ # Verify agent was NOT saved -+ mock_agent.commit_changes.assert_not_called() -+ -+ # Verify 400 response with detailed error message -+ self.controller.respond.assert_called_once() # type: ignore[attr-defined] -+ call_args = self.controller.respond.call_args # type: ignore[attr-defined] -+ self.assertEqual(call_args[0][0], 400) -+ self.assertIn(self.test_auth_tag, call_args[0][1]) -+ self.assertIn(self.test_agent_id, call_args[0][1]) -+ self.assertIn("deleted", call_args[0][1]) -+ -+ -+class TestAgentsControllerConcurrency(unittest.TestCase): -+ """Test cases for concurrent registration TOCTOU race condition protection.""" -+ -+ def setUp(self): -+ """Set up test fixtures.""" -+ mock_action_handler = MagicMock() -+ self.controller = cast(AgentsController, AgentsController(mock_action_handler)) -+ self.controller.respond = MagicMock() -+ self.controller.log_model_errors = MagicMock() -+ self.test_agent_id = "concurrent-test-agent" -+ -+ @patch("keylime.web.registrar.agents_controller.get_shared_memory") -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_concurrent_registration_uses_locking(self, mock_get, mock_shared_mem): -+ """Test that concurrent registration attempts use per-agent locking. -+ -+ This test verifies that the locking mechanism is invoked to prevent -+ TOCTOU race conditions during concurrent registration. -+ """ -+ # Mock shared memory manager with lock support -+ mock_manager = MagicMock() -+ mock_lock = MagicMock() -+ mock_lock.__enter__ = MagicMock(return_value=None) -+ mock_lock.__exit__ = MagicMock(return_value=None) -+ mock_manager.Lock.return_value = mock_lock -+ -+ mock_agent_locks = MagicMock() -+ mock_agent_locks.__contains__ = MagicMock(return_value=False) -+ mock_agent_locks.__setitem__ = MagicMock() -+ mock_agent_locks.__getitem__ = MagicMock(return_value=mock_lock) -+ -+ mock_shared_mem.return_value.get_or_create_dict.return_value = mock_agent_locks -+ mock_shared_mem.return_value.manager = mock_manager -+ -+ # Mock agent that doesn't exist yet (new registration) -+ mock_get.return_value = None -+ -+ mock_agent = MagicMock() -+ mock_agent.changes_valid = True -+ mock_agent.errors = {} -+ mock_agent.produce_ak_challenge.return_value = "challenge_blob" -+ -+ with patch("keylime.models.RegistrarAgent.empty", return_value=mock_agent): -+ params = {"ek_tpm": "ek_key", "aik_tpm": "aik_key"} -+ self.controller.create(self.test_agent_id, **params) -+ -+ # Verify lock was acquired and released -+ mock_lock.__enter__.assert_called_once() -+ mock_lock.__exit__.assert_called_once() -+ -+ # Verify successful registration -+ mock_agent.commit_changes.assert_called_once() -+ self.controller.respond.assert_called_once_with(200, "Success", {"blob": "challenge_blob"}) # type: ignore[attr-defined] -+ -+ @patch("keylime.web.registrar.agents_controller.get_shared_memory") -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_different_agents_use_different_locks(self, mock_get, mock_shared_mem): -+ """Test that different agent_ids use different locks for parallel registration. -+ -+ This ensures that registrations for different agents don't block each other, -+ only concurrent registrations for the same agent_id are serialized. -+ """ -+ # Mock shared memory manager -+ mock_manager = MagicMock() -+ -+ def mock_lock_factory(): -+ """Return a new lock each time.""" -+ return MagicMock() -+ -+ mock_manager.Lock.side_effect = mock_lock_factory -+ -+ mock_agent_locks = {} -+ -+ def mock_getitem(_self, key): # pylint: disable=unused-argument -+ return mock_agent_locks.get(key) -+ -+ def mock_setitem(_self, key, value): # pylint: disable=unused-argument -+ mock_agent_locks[key] = value -+ -+ def mock_contains(_self, key): # pylint: disable=unused-argument -+ return key in mock_agent_locks -+ -+ mock_locks_dict = MagicMock() -+ mock_locks_dict.__contains__ = mock_contains -+ mock_locks_dict.__setitem__ = mock_setitem -+ mock_locks_dict.__getitem__ = mock_getitem -+ -+ mock_shared_mem.return_value.get_or_create_dict.return_value = mock_locks_dict -+ mock_shared_mem.return_value.manager = mock_manager -+ -+ # Register two different agents -+ mock_get.return_value = None -+ -+ for agent_id in ["agent-a", "agent-b"]: -+ mock_agent = MagicMock() -+ mock_agent.changes_valid = True -+ mock_agent.errors = {} -+ mock_agent.produce_ak_challenge.return_value = f"challenge_{agent_id}" -+ -+ with patch("keylime.models.RegistrarAgent.empty", return_value=mock_agent): -+ self.controller.respond = MagicMock() # Reset for each call -+ params = {"ek_tpm": f"ek_{agent_id}", "aik_tpm": f"aik_{agent_id}"} -+ self.controller.create(agent_id, **params) -+ -+ # Verify that two different locks were created (one per agent) -+ self.assertEqual(len(mock_agent_locks), 2) -+ self.assertIn("agent-a", mock_agent_locks) -+ self.assertIn("agent-b", mock_agent_locks) -+ # Verify they are different lock objects -+ self.assertIsNot(mock_agent_locks["agent-a"], mock_agent_locks["agent-b"]) -+ -+ @patch("keylime.web.registrar.agents_controller.get_shared_memory") -+ @patch("keylime.models.RegistrarAgent.get") -+ def test_concurrent_new_registration_database_constraint_violation(self, mock_get, mock_shared_mem): -+ """Test that database constraint violations during concurrent new agent registration return 403. -+ -+ This handles the edge case where two requests both create empty agents for the same UUID, -+ both pass validation, but the second commit fails with IntegrityError due to duplicate -+ primary key. This should return 403 Forbidden, not 500 Internal Server Error. -+ """ -+ # Mock shared memory manager with lock support -+ mock_manager = MagicMock() -+ mock_lock = MagicMock() -+ mock_lock.__enter__ = MagicMock(return_value=None) -+ mock_lock.__exit__ = MagicMock(return_value=None) -+ mock_manager.Lock.return_value = mock_lock -+ -+ mock_agent_locks = MagicMock() -+ mock_agent_locks.__contains__ = MagicMock(return_value=False) -+ mock_agent_locks.__setitem__ = MagicMock() -+ mock_agent_locks.__getitem__ = MagicMock(return_value=mock_lock) -+ -+ mock_shared_mem.return_value.get_or_create_dict.return_value = mock_agent_locks -+ mock_shared_mem.return_value.manager = mock_manager -+ -+ # Mock agent that doesn't exist yet (new registration) -+ mock_get.return_value = None -+ -+ mock_agent = MagicMock() -+ mock_agent.changes_valid = True -+ mock_agent.errors = {} -+ mock_agent.produce_ak_challenge.return_value = "challenge_blob" -+ -+ # Simulate IntegrityError during commit (duplicate primary key) -+ # IntegrityError(statement, params, orig) where orig is the original exception -+ orig_exception = Exception("UNIQUE constraint failed: registrarmain.agent_id") -+ mock_agent.commit_changes.side_effect = IntegrityError("INSERT INTO registrarmain ...", None, orig_exception) -+ -+ with patch("keylime.models.RegistrarAgent.empty", return_value=mock_agent): -+ params = {"ek_tpm": "ek_key", "aik_tpm": "aik_key"} -+ self.controller.create(self.test_agent_id, **params) -+ -+ # Verify 403 Forbidden response (not 500) -+ self.controller.respond.assert_called_once() # type: ignore[attr-defined] -+ call_args = self.controller.respond.call_args # type: ignore[attr-defined] -+ self.assertEqual(call_args[0][0], 403) -+ self.assertIn(self.test_agent_id, call_args[0][1]) -+ self.assertIn("already in use", call_args[0][1]) -+ -+ -+if __name__ == "__main__": -+ unittest.main() -diff --git a/test/test_registrar_tpm_identity.py b/test/test_registrar_tpm_identity.py -new file mode 100644 -index 0000000..2fc69b2 ---- /dev/null -+++ b/test/test_registrar_tpm_identity.py -@@ -0,0 +1,342 @@ -+""" -+Unit tests for RegistrarAgent TPM identity immutability security check. -+ -+This module tests the _check_tpm_identity_immutable() method which prevents -+UUID spoofing attacks by rejecting re-registration attempts with different TPM identities. -+""" -+ -+import base64 -+import types -+import unittest -+from unittest.mock import Mock -+ -+import cryptography.x509 -+ -+from keylime.certificate_wrapper import wrap_certificate -+from keylime.models.registrar.registrar_agent import RegistrarAgent -+ -+ -+class TestRegistrarAgentTPMIdentity(unittest.TestCase): -+ """Test cases for RegistrarAgent TPM identity immutability.""" -+ -+ # pylint: disable=protected-access # Testing protected methods -+ # pylint: disable=not-callable # False positive: methods bound via types.MethodType are callable -+ -+ def setUp(self): -+ """Set up test fixtures.""" -+ # EK certificate (used for testing certificate comparison) -+ self.ek_cert_pem = """-----BEGIN CERTIFICATE----- -+MIIEnzCCA4egAwIBAgIEMV64bDANBgkqhkiG9w0BAQUFADBtMQswCQYDVQQGEwJE -+RTEQMA4GA1UECBMHQmF2YXJpYTEhMB8GA1UEChMYSW5maW5lb24gVGVjaG5vbG9n -+aWVzIEFHMQwwCgYDVQQLEwNBSU0xGzAZBgNVBAMTEklGWCBUUE0gRUsgUm9vdCBD -+QTAeFw0wNTEwMjAxMzQ3NDNaFw0yNTEwMjAxMzQ3NDNaMHcxCzAJBgNVBAYTAkRF -+MQ8wDQYDVQQIEwZTYXhvbnkxITAfBgNVBAoTGEluZmluZW9uIFRlY2hub2xvZ2ll -+cyBBRzEMMAoGA1UECxMDQUlNMSYwJAYDVQQDEx1JRlggVFBNIEVLIEludGVybWVk -+aWF0ZSBDQSAwMTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALftPhYN -+t4rE+JnU/XOPICbOBLvfo6iA7nuq7zf4DzsAWBdsZEdFJQfaK331ihG3IpQnlQ2i -+YtDim289265f0J4OkPFpKeFU27CsfozVaNUm6UR/uzwA8ncxFc3iZLRMRNLru/Al -+VG053ULVDQMVx2iwwbBSAYO9pGiGbk1iMmuZaSErMdb9v0KRUyZM7yABiyDlM3cz -+UQX5vLWV0uWqxdGoHwNva5u3ynP9UxPTZWHZOHE6+14rMzpobs6Ww2RR8BgF96rh -+4rRAZEl8BXhwiQq4STvUXkfvdpWH4lzsGcDDtrB6Nt3KvVNvsKz+b07Dk+Xzt+EH -+NTf3Byk2HlvX+scCAwEAAaOCATswggE3MB0GA1UdDgQWBBQ4k8292HPEIzMV4bE7 -+qWoNI8wQxzAOBgNVHQ8BAf8EBAMCAgQwEgYDVR0TAQH/BAgwBgEB/wIBADBYBgNV -+HSABAf8ETjBMMEoGC2CGSAGG+EUBBy8BMDswOQYIKwYBBQUHAgEWLWh0dHA6Ly93 -+d3cudmVyaXNpZ24uY29tL3JlcG9zaXRvcnkvaW5kZXguaHRtbDCBlwYDVR0jBIGP -+MIGMgBRW65FEhWPWcrOu1EWWC/eUDlRCpqFxpG8wbTELMAkGA1UEBhMCREUxEDAO -+BgNVBAgTB0JhdmFyaWExITAfBgNVBAoTGEluZmluZW9uIFRlY2hub2xvZ2llcyBB -+RzEMMAoGA1UECxMDQUlNMRswGQYDVQQDExJJRlggVFBNIEVLIFJvb3QgQ0GCAQMw -+DQYJKoZIhvcNAQEFBQADggEBABJ1+Ap3rNlxZ0FW0aIgdzktbNHlvXWNxFdYIBbM -+OKjmbOos0Y4O60eKPu259XmMItCUmtbzF3oKYXq6ybARUT2Lm+JsseMF5VgikSlU -+BJALqpKVjwAds81OtmnIQe2LSu4xcTSavpsL4f52cUAu/maMhtSgN9mq5roYptq9 -+DnSSDZrX4uYiMPl//rBaNDBflhJ727j8xo9CCohF3yQUoQm7coUgbRMzyO64yMIO -+3fhb+Vuc7sNwrMOz3VJN14C3JMoGgXy0c57IP/kD5zGRvljKEvrRC2I147+fPeLS -+DueRMS6lblvRKiZgmGAg7YaKOkOaEmVDMQ+fTo2Po7hI5wc= -+-----END CERTIFICATE-----""" -+ -+ # Create wrapped cert from real certificate -+ self.ek_cert = cryptography.x509.load_pem_x509_certificate(self.ek_cert_pem.encode()) -+ self.ek_cert_wrapped = wrap_certificate(self.ek_cert, None) -+ -+ # Create a different cert mock that returns different DER bytes -+ self.different_ek_cert_wrapped = Mock() -+ self.different_ek_cert_wrapped.public_bytes = Mock(return_value=b"DIFFERENT_CERTIFICATE_DER_BYTES_FOR_TESTING") -+ -+ # Sample TPM keys (base64 encoded for simplicity in tests) -+ self.ek_tpm_1 = b"EK_TPM_KEY_NUMBER_ONE_SAMPLE_DATA" -+ self.ek_tpm_2 = b"EK_TPM_KEY_NUMBER_TWO_DIFFERENT_" -+ self.aik_tpm_1 = b"AIK_TPM_KEY_NUMBER_ONE_SAMPLE_DATA" -+ self.aik_tpm_2 = b"AIK_TPM_KEY_NUMBER_TWO_DIFFERENT_" -+ -+ # IAK/IDevID keys for testing that they are not checked -+ self.iak_tpm_1 = b"IAK_TPM_KEY_NUMBER_ONE" -+ self.iak_tpm_2 = b"IAK_TPM_KEY_NUMBER_TWO" -+ -+ def create_mock_registrar_agent(self, agent_id="test-agent-uuid"): -+ """Create a mock RegistrarAgent with necessary attributes.""" -+ agent = Mock() -+ agent.agent_id = agent_id -+ agent.changes = {} -+ agent.values = {} -+ agent.committed = {} -+ agent._add_error = Mock() -+ agent.errors = {} -+ -+ # Bind the actual method to the mock instance -+ agent._check_tpm_identity_immutable = types.MethodType(RegistrarAgent._check_tpm_identity_immutable, agent) -+ -+ return agent -+ -+ def test_new_agent_no_committed_values(self): -+ """Test that new agents (no committed values) are not checked.""" -+ agent = self.create_mock_registrar_agent() -+ agent.committed = {} # New agent, no previous values -+ agent.changes = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": self.ek_cert_wrapped, -+ "aik_tpm": self.aik_tpm_1, -+ } -+ -+ agent._check_tpm_identity_immutable() -+ -+ # Should not add any errors for new agents -+ agent._add_error.assert_not_called() -+ -+ def test_reregistration_same_tpm_all_fields_identical(self): -+ """Test re-registration with identical TPM identity passes.""" -+ agent = self.create_mock_registrar_agent() -+ agent.committed = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": self.ek_cert_wrapped, -+ "aik_tpm": self.aik_tpm_1, -+ } -+ agent.changes = { -+ "ek_tpm": self.ek_tpm_1, # Same -+ "ekcert": self.ek_cert_wrapped, # Same -+ "aik_tpm": self.aik_tpm_1, # Same -+ } -+ -+ agent._check_tpm_identity_immutable() -+ -+ # Should not add any errors -+ agent._add_error.assert_not_called() -+ -+ def test_reregistration_different_ek_tpm(self): -+ """Test re-registration with different EK TPM is rejected.""" -+ agent = self.create_mock_registrar_agent() -+ agent.committed = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": self.ek_cert_wrapped, -+ "aik_tpm": self.aik_tpm_1, -+ } -+ agent.changes = { -+ "ek_tpm": self.ek_tpm_2, # DIFFERENT -+ "ekcert": self.ek_cert_wrapped, -+ "aik_tpm": self.aik_tpm_1, -+ } -+ -+ agent._check_tpm_identity_immutable() -+ -+ # Should add error for agent_id field -+ agent._add_error.assert_called_once() -+ call_args = agent._add_error.call_args -+ self.assertEqual(call_args[0][0], "agent_id") -+ self.assertIn("different TPM identity", call_args[0][1]) -+ self.assertIn("ek_tpm", call_args[0][1]) -+ -+ def test_reregistration_different_aik_tpm(self): -+ """Test re-registration with different AIK TPM is rejected.""" -+ agent = self.create_mock_registrar_agent() -+ agent.committed = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": self.ek_cert_wrapped, -+ "aik_tpm": self.aik_tpm_1, -+ } -+ agent.changes = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": self.ek_cert_wrapped, -+ "aik_tpm": self.aik_tpm_2, # DIFFERENT -+ } -+ -+ agent._check_tpm_identity_immutable() -+ -+ # Should add error for agent_id field -+ agent._add_error.assert_called_once() -+ call_args = agent._add_error.call_args -+ self.assertEqual(call_args[0][0], "agent_id") -+ self.assertIn("different TPM identity", call_args[0][1]) -+ self.assertIn("aik_tpm", call_args[0][1]) -+ -+ def test_reregistration_different_ekcert(self): -+ """Test re-registration with different EK certificate is rejected.""" -+ agent = self.create_mock_registrar_agent() -+ agent.committed = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": self.ek_cert_wrapped, -+ "aik_tpm": self.aik_tpm_1, -+ } -+ agent.changes = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": self.different_ek_cert_wrapped, # DIFFERENT -+ "aik_tpm": self.aik_tpm_1, -+ } -+ -+ agent._check_tpm_identity_immutable() -+ -+ # Should add error for agent_id field -+ agent._add_error.assert_called_once() -+ call_args = agent._add_error.call_args -+ self.assertEqual(call_args[0][0], "agent_id") -+ self.assertIn("different TPM identity", call_args[0][1]) -+ self.assertIn("ekcert", call_args[0][1]) -+ -+ def test_reregistration_multiple_fields_changed(self): -+ """Test re-registration with multiple fields changed lists all of them.""" -+ agent = self.create_mock_registrar_agent() -+ agent.committed = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": self.ek_cert_wrapped, -+ "aik_tpm": self.aik_tpm_1, -+ } -+ agent.changes = { -+ "ek_tpm": self.ek_tpm_2, # DIFFERENT -+ "ekcert": self.different_ek_cert_wrapped, # DIFFERENT -+ "aik_tpm": self.aik_tpm_2, # DIFFERENT -+ } -+ -+ agent._check_tpm_identity_immutable() -+ -+ # Should add error listing all changed fields -+ agent._add_error.assert_called_once() -+ call_args = agent._add_error.call_args -+ self.assertEqual(call_args[0][0], "agent_id") -+ error_message = call_args[0][1] -+ self.assertIn("ek_tpm", error_message) -+ self.assertIn("ekcert", error_message) -+ self.assertIn("aik_tpm", error_message) -+ -+ def test_adding_ekcert_to_existing_agent(self): -+ """Test that adding EK cert to existing agent (without cert) is allowed.""" -+ agent = self.create_mock_registrar_agent() -+ agent.committed = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": None, # Previously no cert -+ "aik_tpm": self.aik_tpm_1, -+ } -+ agent.changes = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": self.ek_cert_wrapped, # NOW adding cert -+ "aik_tpm": self.aik_tpm_1, -+ } -+ -+ agent._check_tpm_identity_immutable() -+ -+ # Should not add any errors - adding cert is allowed -+ agent._add_error.assert_not_called() -+ -+ def test_removing_ek_tpm_rejected(self): -+ """Test that removing an existing EK TPM is rejected.""" -+ agent = self.create_mock_registrar_agent() -+ agent.committed = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": self.ek_cert_wrapped, -+ "aik_tpm": self.aik_tpm_1, -+ } -+ agent.changes = { -+ "ek_tpm": None, # Trying to remove -+ "ekcert": self.ek_cert_wrapped, -+ "aik_tpm": self.aik_tpm_1, -+ } -+ -+ agent._check_tpm_identity_immutable() -+ -+ # Should add error -+ agent._add_error.assert_called_once() -+ call_args = agent._add_error.call_args -+ self.assertIn("ek_tpm", call_args[0][1]) -+ -+ def test_iak_idevid_changes_not_checked(self): -+ """Test that IAK/IDevID field changes are NOT checked (allowed).""" -+ agent = self.create_mock_registrar_agent() -+ agent.committed = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": self.ek_cert_wrapped, -+ "aik_tpm": self.aik_tpm_1, -+ "iak_tpm": self.iak_tpm_1, -+ "idevid_tpm": b"IDEVID_OLD", -+ } -+ agent.changes = { -+ "ek_tpm": self.ek_tpm_1, # Same -+ "ekcert": self.ek_cert_wrapped, # Same -+ "aik_tpm": self.aik_tpm_1, # Same -+ "iak_tpm": self.iak_tpm_2, # DIFFERENT - but not checked -+ "idevid_tpm": b"IDEVID_NEW", # DIFFERENT - but not checked -+ } -+ -+ agent._check_tpm_identity_immutable() -+ -+ # Should not add any errors - IAK/IDevID are not checked -+ agent._add_error.assert_not_called() -+ -+ def test_only_changed_fields_are_checked(self): -+ """Test that only fields in changes dict are checked.""" -+ agent = self.create_mock_registrar_agent() -+ agent.committed = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": self.ek_cert_wrapped, -+ "aik_tpm": self.aik_tpm_1, -+ } -+ # Only updating IP, not touching TPM identity fields -+ agent.changes = { -+ "ip": "192.168.1.100", -+ } -+ -+ agent._check_tpm_identity_immutable() -+ -+ # Should not add any errors - no identity fields changed -+ agent._add_error.assert_not_called() -+ -+ def test_base64_encoded_tpm_keys(self): -+ """Test that base64-encoded TPM keys are properly compared.""" -+ agent = self.create_mock_registrar_agent() -+ -+ # Simulate keys stored as base64 strings (as they might be from database) -+ ek_b64 = base64.b64encode(self.ek_tpm_1).decode("utf-8") -+ aik_b64 = base64.b64encode(self.aik_tpm_1).decode("utf-8") -+ -+ agent.committed = { -+ "ek_tpm": self.ek_tpm_1, # As bytes -+ "aik_tpm": self.aik_tpm_1, # As bytes -+ } -+ agent.changes = { -+ "ek_tpm": ek_b64, # As base64 string -+ "aik_tpm": aik_b64, # As base64 string -+ } -+ -+ agent._check_tpm_identity_immutable() -+ -+ # Should not add any errors - should handle both formats -+ agent._add_error.assert_not_called() -+ -+ def test_partial_update_only_one_field(self): -+ """Test updating only one TPM field while others remain unchanged.""" -+ agent = self.create_mock_registrar_agent() -+ agent.committed = { -+ "ek_tpm": self.ek_tpm_1, -+ "ekcert": self.ek_cert_wrapped, -+ "aik_tpm": self.aik_tpm_1, -+ } -+ # Only changing AIK in this update -+ agent.changes = { -+ "aik_tpm": self.aik_tpm_2, # DIFFERENT -+ } -+ -+ agent._check_tpm_identity_immutable() -+ -+ # Should add error for the changed field -+ agent._add_error.assert_called_once() -+ call_args = agent._add_error.call_args -+ self.assertIn("aik_tpm", call_args[0][1]) -+ -+ -+if __name__ == "__main__": -+ unittest.main() --- -2.47.3 - diff --git a/0014-push-attestation-documentation.patch b/0014-push-attestation-documentation.patch new file mode 100644 index 0000000..ae9bf4b --- /dev/null +++ b/0014-push-attestation-documentation.patch @@ -0,0 +1,1910 @@ +From 077762aa335de0cf99e190bd5afb5b77f5403a89 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Tue, 17 Feb 2026 16:43:04 +0100 +Subject: [PATCH] Document agent-driven (push) attestation + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + docs/assets/push-model-architecture.svg | 86 ++++ + docs/assets/push-model-sequence.svg | 122 +++++ + docs/conf.py | 1 + + docs/design.rst | 1 + + docs/design/overview.rst | 11 +- + docs/design/push_model.rst | 226 +++++++++ + docs/index.rst | 1 + + docs/installation.rst | 11 + + docs/man/keylime_push_model_agent.8.rst | 226 +++++++++ + docs/man/keylime_verifier.8.rst | 3 +- + docs/rest_apis.rst | 30 ++ + docs/rest_apis/3_0/3_0.rst | 21 + + docs/rest_apis/3_0/verifier.rst | 608 ++++++++++++++++++++++++ + docs/user_guide.rst | 1 + + docs/user_guide/configuration.rst | 7 + + docs/user_guide/push_model.rst | 370 ++++++++++++++ + 16 files changed, 1721 insertions(+), 4 deletions(-) + create mode 100644 docs/assets/push-model-architecture.svg + create mode 100644 docs/assets/push-model-sequence.svg + create mode 100644 docs/design/push_model.rst + create mode 100644 docs/man/keylime_push_model_agent.8.rst + create mode 100644 docs/rest_apis/3_0/3_0.rst + create mode 100644 docs/rest_apis/3_0/verifier.rst + create mode 100644 docs/user_guide/push_model.rst + +diff --git a/docs/assets/push-model-architecture.svg b/docs/assets/push-model-architecture.svg +new file mode 100644 +index 000000000..82a5672f4 +--- /dev/null ++++ b/docs/assets/push-model-architecture.svg +@@ -0,0 +1,86 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ Push-Model Architecture ++ ++ ++ Pull Model (traditional) ++ ++ ++ ++ Agent ++ (server, port 9002) ++ ++ ++ ++ Registrar ++ ++ ++ ++ Verifier ++ ++ ++ ++ register ++ ++ ++ ++ poll quotes ++ ++ ++ ++ ++ ++ Push Model (new) ++ ++ ++ ++ Agent ++ (client, no ports) ++ ++ ++ ++ Registrar ++ ++ ++ ++ Verifier ++ ++ ++ ++ register ++ ++ ++ ++ push evidence ++ ++ ++ ++ Protocol Flow (Push Model) ++ ++ 1. Agent registers with Registrar (same as pull model) ++ 2. Agent authenticates with Verifier via PoP (POST /v3/sessions) ++ 3. Agent sends capabilities to Verifier (POST /v3/agents/{agent_id}/attestations) — receives challenge nonce ++ 4. Agent sends evidence to Verifier (PATCH /v3/agents/{agent_id}/attestations/latest) — receives 202 Accepted ++ 5. Agent waits for configured interval, then repeats from step 3 ++ +diff --git a/docs/assets/push-model-sequence.svg b/docs/assets/push-model-sequence.svg +new file mode 100644 +index 000000000..d9affe1c9 +--- /dev/null ++++ b/docs/assets/push-model-sequence.svg +@@ -0,0 +1,122 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ Push-Model Agent State Machine ++ ++ ++ ++ Unregistered ++ Initial state ++ ++ ++ ++ Registered ++ Ready for attestation ++ ++ ++ ++ Negotiating ++ Phase 1: capabilities ++ ++ ++ ++ Attesting ++ Phase 2: evidence ++ ++ ++ ++ Reg. Failed ++ Will retry ++ ++ ++ ++ Attest. Failed ++ Will retry ++ ++ ++ ++ registration OK ++ ++ ++ ++ failed ++ ++ ++ ++ retry ++ ++ ++ ++ start negotiation ++ ++ ++ ++ 201 Created ++ ++ ++ ++ error ++ ++ ++ ++ 202 Accepted ++ (wait interval) ++ ++ ++ ++ rejected ++ ++ ++ ++ retry ++ ++ ++ ++ Legend ++ ++ ++ Success transition ++ ++ ++ Error transition ++ ++ ++ Retry (with exponential backoff) ++ ++ Phase 1: Agent POSTs capabilities, receives challenge nonce ++ Phase 2: Agent PATCHes evidence, receives 202 Accepted ++ The Negotiating/Attesting cycle repeats continuously ++ +diff --git a/docs/conf.py b/docs/conf.py +index 5543afa86..00d9735de 100644 +--- a/docs/conf.py ++++ b/docs/conf.py +@@ -154,6 +154,7 @@ + ("man/keylime_registrar.8", "keylime_registrar", "Keylime registrar service", [author], 8), + ("man/keylime_verifier.8", "keylime_verifier", "Keylime verifier service", [author], 8), + ("man/keylime_agent.8", "keylime_agent", "Keylime agent service", [author], 8), ++ ("man/keylime_push_model_agent.8", "keylime_push_model_agent", "Keylime push-model agent service", [author], 8), + ] + + +diff --git a/docs/design.rst b/docs/design.rst +index 522ade113..dd72fd4e7 100644 +--- a/docs/design.rst ++++ b/docs/design.rst +@@ -7,6 +7,7 @@ Design of Keylime + :caption: Contents: + + design/overview.rst ++ design/push_model.rst + design/security.rst + + +diff --git a/docs/design/overview.rst b/docs/design/overview.rst +index 4c7b52227..985cbc94b 100644 +--- a/docs/design/overview.rst ++++ b/docs/design/overview.rst +@@ -51,9 +51,14 @@ Verifier + The verifier implements the actual attestation of an agent and sends revocation messages if an agent leaves the trusted + state. + +-Once an agent is registered for attestation (using the tenant or the API directly) the verifier continuously pulls +-the required attestation data from the agent. This can include: a quote over the PCRs, the PCR values, NK public key, +-IMA log and UEFI event log. After that the quote is validated additional validation of the data can be configured. ++In the default **pull model**, once an agent is registered for attestation (using the tenant or the API directly) ++the verifier continuously pulls the required attestation data from the agent. This can include: a quote over the ++PCRs, the PCR values, NK public key, IMA log and UEFI event log. After that the quote is validated additional ++validation of the data can be configured. ++ ++Keylime also supports a **push model** where the agent initiates connections to the verifier and proactively ++submits attestation evidence. This is useful for environments where the verifier cannot directly reach the ++agent (e.g. behind firewalls or NAT). See :doc:`push_model` for details. + + Static PCR values + """"""""""""""""" +diff --git a/docs/design/push_model.rst b/docs/design/push_model.rst +new file mode 100644 +index 000000000..29f9061e0 +--- /dev/null ++++ b/docs/design/push_model.rst +@@ -0,0 +1,226 @@ ++======================== ++Push-Model Attestation ++======================== ++ ++.. warning:: ++ Push-model attestation is currently experimental. The feature is functional ++ but the API and configuration options may change in future releases. ++ Please report issues at https://github.com/keylime/keylime/issues/?q=label:push-mode ++ ++Introduction ++------------ ++ ++Traditional Keylime attestation uses a **pull model** where the verifier continuously ++polls agents for attestation data. The agent acts as a server and the verifier initiates ++connections to it. This model requires that the verifier can reach the agent over the ++network. ++ ++The **push model** reverses this communication direction: the agent initiates connections ++to the verifier and proactively sends attestation data. The verifier never connects to ++the agent. This makes push-model attestation suitable for environments where the ++verifier cannot directly reach the agent, such as: ++ ++* **Edge and IoT devices** behind firewalls or NAT ++* **Hybrid cloud environments** with restricted network policies ++* **Air-gapped networks** where inbound connections to agents are not permitted ++* **Dynamic environments** where agent IP addresses change frequently ++ ++In push mode, the agent is a separate binary (``keylime-push-model-agent``) that ++implements the push attestation protocol using API version 3.0. ++ ++Architectural Overview ++---------------------- ++ ++In pull-model attestation, the verifier runs a polling loop that periodically contacts ++each registered agent to request a TPM quote and associated evidence. The agent exposes ++an HTTPS server that responds to these requests. ++ ++In push-model attestation, this relationship is inverted: ++ ++* The **agent initiates** all connections to the verifier ++* The agent does **not expose any HTTP endpoints** (no listening ports) ++* The verifier accepts incoming attestation data from agents ++* Verification is performed **asynchronously** after evidence is received ++* An **event-driven timeout** system replaces the polling loop for monitoring agent ++ liveness ++ ++The registrar interaction is unchanged: in both models, the agent registers itself ++with the registrar during startup. ++ ++.. figure:: ../assets/push-model-architecture.svg ++ :width: 600 ++ :align: center ++ :alt: Diagram showing the push-model architecture where the agent initiates ++ connections to both the registrar and the verifier, contrasted with the pull ++ model where the verifier connects to the agent. ++ ++ **Figure 1:** Push-Model Architecture ++ ++The Two-Phase Attestation Protocol ++----------------------------------- ++ ++Push-model attestation uses a two-phase protocol for each attestation cycle. ++ ++Phase 1: Capabilities Negotiation ++"""""""""""""""""""""""""""""""""" ++ ++The agent begins an attestation cycle by sending its capabilities to the verifier. ++This tells the verifier what types of evidence the agent can produce and what ++cryptographic algorithms it supports. ++ ++1. The agent sends a ``POST /v3/agents/{agent_id}/attestations`` request to the ++ verifier containing its supported evidence types (TPM quote parameters, IMA log ++ capabilities, UEFI log capabilities) and the public attestation key (AK). ++ ++2. The verifier creates an attestation resource, selects cryptographic parameters ++ (signature scheme, hash algorithm, PCRs to quote), generates a random challenge ++ nonce, and returns a ``201 Created`` response with: ++ ++ * The challenge nonce for TPM quote generation ++ * The chosen cryptographic parameters ++ * The evidence types requested ++ * A deadline (``challenges_expire_at``) by which evidence must be submitted ++ ++Phase 2: Evidence Submission ++""""""""""""""""""""""""""""" ++ ++The agent collects the requested evidence and submits it to the verifier. ++ ++1. The agent generates a TPM quote using the challenge nonce from Phase 1, ++ collects IMA and/or UEFI event logs as requested, and sends a ++ ``PATCH /v3/agents/{agent_id}/attestations/latest`` request with the evidence. ++ ++2. The verifier returns a ``202 Accepted`` response immediately. The evidence is ++ then verified asynchronously in a background worker process. ++ ++3. If verification succeeds, the attestation is marked as ``pass``. If it fails, ++ the attestation is marked as ``fail`` with a failure reason ++ (``broken_evidence_chain`` or ``policy_violation``). ++ ++4. The response includes a ``seconds_to_next_attestation`` value in the ``meta`` ++ field, indicating when the agent should start its next attestation cycle. ++ ++After a configurable interval, the agent begins a new cycle from Phase 1. ++ ++Agent State Machine ++""""""""""""""""""" ++ ++The push-model agent operates as a state machine with the following states: ++ ++.. figure:: ../assets/push-model-sequence.svg ++ :width: 600 ++ :align: center ++ :alt: Sequence diagram showing the push-model agent state machine transitions ++ from Unregistered through Registered, Negotiating, and Attesting states. ++ ++ **Figure 2:** Push-Model Agent State Machine ++ ++* **Unregistered**: Initial state. The agent registers with the registrar. ++* **Registered**: Registration succeeded. The agent begins negotiation with the ++ verifier. ++* **Negotiating**: The agent sends capabilities to the verifier (Phase 1) and waits ++ for the challenge response. ++* **Attesting**: The agent generates and sends evidence to the verifier (Phase 2). ++ On success, the agent waits for the configured interval and transitions back to ++ Negotiating. ++* **RegistrationFailed**: Registration with the registrar failed. The agent waits ++ and retries. ++* **AttestationFailed**: An attestation attempt failed (network error or verifier ++ rejection). The agent waits and retries from Negotiating. ++ ++The agent uses exponential backoff when retrying failed operations. ++ ++Authentication ++-------------- ++ ++Push-model attestation uses **Proof of Possession (PoP)** authentication instead of ++the mTLS client certificates used in pull mode. This is necessary because the agent ++acts as a client (not a server) and does not have certificates signed by the verifier's ++trusted CA. ++ ++The PoP authentication flow: ++ ++1. The agent creates a session by sending ``POST /v3/sessions`` with its agent ID ++ and supported authentication methods. ++2. The verifier responds with a challenge nonce. ++3. The agent proves possession of its AK by signing the challenge using the TPM ++ (``TPM2_Certify``) and sends the result via ``PATCH /v3/sessions/{session_id}``. ++4. If the signature is valid, the verifier issues a bearer token. ++5. The agent includes this token in the ``Authorization`` header of all subsequent ++ requests. ++6. Tokens have a configurable expiration time and can be refreshed. ++ ++The TLS connection uses **server verification only**: the agent verifies the verifier's ++server certificate but does not present a client certificate. The agent needs the ++verifier's CA certificate for this verification. ++ ++For full details on the authorization framework, including the separation between ++agent and admin authentication, see :doc:`../user_guide/authentication`. ++ ++Timeout Monitoring ++------------------ ++ ++In pull mode, the verifier detects unresponsive agents through its polling loop. In ++push mode, an event-driven timeout system serves this purpose. ++ ++The verifier monitors push-mode agents as follows: ++ ++1. When the verifier receives an attestation from an agent, it schedules a timeout ++ for that agent. The timeout duration is ``quote_interval * 5`` seconds (where ++ ``quote_interval`` is the verifier's configured quote interval). ++ ++2. If the agent does not submit a new attestation before the timeout fires, the ++ verifier sets the agent's ``accept_attestations`` flag to ``False``. ++ ++3. Once ``accept_attestations`` is ``False``, the verifier rejects new attestation ++ requests from that agent with a ``403 Forbidden`` response. ++ ++4. The agent can recover by re-registering or by administrator intervention ++ (reactivation). ++ ++Comparison with Pull Model ++--------------------------- ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 35 35 ++ ++ * - Aspect ++ - Pull Model ++ - Push Model ++ * - Connection direction ++ - Verifier connects to agent ++ - Agent connects to verifier ++ * - Agent binary ++ - ``keylime_agent`` ++ - ``keylime_push_model_agent`` ++ * - Agent network requirements ++ - Must expose HTTP port (default 9002) ++ - No listening ports required ++ * - Firewall requirements ++ - Inbound to agent from verifier ++ - Outbound from agent to verifier ++ * - Authentication method ++ - mTLS (agent as server) ++ - PoP bearer tokens (agent as client) ++ * - API version ++ - v2.x ++ - v3.0 ++ * - Verification trigger ++ - Verifier polls on ``quote_interval`` ++ - Agent pushes on ``attestation_interval_seconds`` ++ * - Liveness detection ++ - Polling loop state machine ++ - Event-driven timeout (``quote_interval * 5``) ++ * - Verifier configuration ++ - ``mode = pull`` (default) ++ - ``mode = push`` ++ * - Suitable for ++ - Controlled networks, data centers ++ - Edge, IoT, NAT, firewalled environments ++ * - Maturity ++ - Stable ++ - Experimental ++ ++For deployment and configuration instructions, see :doc:`../user_guide/push_model`. ++For the v3.0 API reference, see :doc:`../rest_apis/3_0/3_0`. +diff --git a/docs/index.rst b/docs/index.rst +index 8234217fd..fd5f08bed 100644 +--- a/docs/index.rst ++++ b/docs/index.rst +@@ -43,6 +43,7 @@ what the goals of Keylime are and how they are implemented. + man/keylime_verifier.8 + man/keylime_registrar.8 + man/keylime_agent.8 ++ man/keylime_push_model_agent.8 + man/keylime_policy.1 + + Indices and tables +diff --git a/docs/installation.rst b/docs/installation.rst +index 21d35a793..b96574137 100644 +--- a/docs/installation.rst ++++ b/docs/installation.rst +@@ -62,6 +62,17 @@ Rust agent + + Installation instructions can be found in the `README.md `_ for the Rust agent. + ++Push-model agent ++~~~~~~~~~~~~~~~~ ++.. note:: ++ The push-model agent (``keylime-push-model-agent``) is a separate binary from ++ the standard Rust agent. It implements the push attestation protocol where the ++ agent initiates connections to the verifier. This feature is currently experimental. ++ ++ Installation instructions are the same as for the Rust agent. The push-model ++ agent binary is built from the same repository. For configuration and deployment ++ details, see the :doc:`user_guide/push_model` user guide. ++ + Keylime Bash installer + ---------------------- + +diff --git a/docs/man/keylime_push_model_agent.8.rst b/docs/man/keylime_push_model_agent.8.rst +new file mode 100644 +index 000000000..b033db801 +--- /dev/null ++++ b/docs/man/keylime_push_model_agent.8.rst +@@ -0,0 +1,226 @@ ++========================== ++keylime_push_model_agent ++========================== ++ ++------------------------------------------------------------ ++Keylime push-model agent for TPM-based remote attestation ++------------------------------------------------------------ ++ ++:Manual section: 8 ++:Author: Keylime Developers ++:Date: February 2026 ++ ++SYNOPSIS ++======== ++ ++**keylime_push_model_agent** [*OPTIONS*] ++ ++(Most operations require root privileges, use with sudo) ++ ++DESCRIPTION ++=========== ++ ++The push-model agent is a long-running service that runs on systems to be attested. ++Unlike the standard Keylime agent which acts as a server and waits for the verifier ++to poll it, the push-model agent initiates connections to the verifier and proactively ++submits attestation evidence. ++ ++The agent registers with the registrar, authenticates with the verifier using Proof of ++Possession (PoP), and performs periodic attestation cycles consisting of capabilities ++negotiation and evidence submission. ++ ++This agent uses API version 3.0 and requires the verifier to be configured in push ++mode (``mode = push``). ++ ++OPTIONS ++======= ++ ++**--verifier-url** *URL* ++ URL of the verifier (must use HTTPS). Default: ``https://localhost:8881`` ++ ++**--registrar-url** *URL* ++ URL of the registrar. Default: ``http://127.0.0.1:8888`` ++ ++**--agent-identifier** *ID* ++ Agent UUID. Overrides the ``uuid`` configuration option. ++ ++**--attestation-interval-seconds** *SECONDS* ++ Interval between attestation cycles. Default: ``60`` ++ ++**--ca-certificate** *PATH* ++ CA certificate file for verifying the verifier's TLS certificate. Overrides ++ ``verifier_tls_ca_cert``. ++ ++**--api-version** *VERSION* ++ API version to use. Default: ``v3.0`` ++ ++**--timeout** *MILLISECONDS* ++ HTTP request timeout. Default: ``5000`` ++ ++**--insecure** ++ Accept invalid TLS certificates. For testing only. ++ ++**--avoid-tpm** ++ Use a mock TPM instead of hardware TPM. For testing only. ++ ++**--json-file** *FILE* ++ JSON file for payload data. ++ ++**--attestation-index** *INDEX* ++ Attestation index value. Default: ``1`` ++ ++**--session-index** *INDEX* ++ Session index value. Default: ``1`` ++ ++**--message-type** *TYPE* ++ Message type (Attestation, EvidenceHandling, Session). Default: ``Attestation`` ++ ++**--method** *METHOD* ++ HTTP method. Default: ``POST`` ++ ++CONFIGURATION ++============= ++ ++Primary configuration is read from ``/etc/keylime/agent.conf`` (TOML format). ++All options are under the ``[agent]`` section. Command-line arguments override ++configuration file values. ++ ++Drop-in overrides: files in ``/etc/keylime/agent.conf.d/`` are applied in ++lexicographic order. ++ ++Push-model specific options: ++ ++**verifier_url** ++ URL of the verifier. Must use HTTPS. Default: ``https://localhost:8881`` ++ ++**verifier_tls_ca_cert** ++ Path to CA certificate for verifying the verifier's TLS certificate. ++ Relative paths are resolved from ``keylime_dir``. Default: ``cv_ca/cacert.crt`` ++ ++**attestation_interval_seconds** ++ Interval in seconds between attestation cycles. Default: ``60`` ++ ++**api_versions** ++ API versions to use. Default: ``3.0`` ++ ++**certification_keys_server_identifier** ++ Server identifier for attestation key certification. Default: ``ak`` ++ ++**uefi_logs_evidence_version** ++ UEFI logs evidence format version. Default: ``2.1`` ++ ++**exponential_backoff_initial_delay** ++ Initial retry delay in milliseconds. Default: ``10000`` ++ ++**exponential_backoff_max_retries** ++ Maximum number of retry attempts. Default: ``5`` ++ ++**exponential_backoff_max_delay** ++ Maximum retry delay in milliseconds. Default: ``300000`` ++ ++Shared options (same as standard agent): ++ ++**uuid** ++ Agent identifier. Default: auto-generated UUID. ++ ++**registrar_ip**, **registrar_port** ++ Registrar endpoint. Default: ``127.0.0.1:8890`` ++ ++**registrar_tls_enabled** ++ Enable TLS for registrar communication. Default: ``false`` ++ ++**registrar_tls_ca_cert** ++ CA certificate for registrar TLS verification. Default: ``cv_ca/cacert.crt`` ++ ++**tpm_hash_alg**, **tpm_encryption_alg**, **tpm_signing_alg** ++ TPM algorithms. Defaults: ``sha256``, ``rsa``, ``rsassa`` ++ ++**keylime_dir** ++ Working directory. Default: ``/var/lib/keylime`` ++ ++**run_as** ++ User:group to drop privileges to. Default: ``keylime:tss`` ++ ++**enable_iak_idevid** ++ Enable IAK/IDevID usage. Default: ``false`` ++ ++ENVIRONMENT ++=========== ++ ++**KEYLIME_AGENT_CONFIG** ++ Path to agent.conf (highest priority) ++ ++**KEYLIME_DIR** ++ Working directory (default: ``/var/lib/keylime``) ++ ++**RUST_LOG** ++ Log level configuration. Default in systemd service: ++ ``keylime_push_model_agent=info,keylime=info`` ++ ++All configuration options can be overridden via environment variables in the form ++``KEYLIME_AGENT_`` (e.g. ``KEYLIME_AGENT_VERIFIER_URL``). ++ ++FILES ++===== ++ ++``/etc/keylime/agent.conf`` ++ TOML format configuration file (shared with standard agent) ++ ++``/etc/keylime/agent.conf.d/`` ++ Drop-in configuration snippets ++ ++``/var/lib/keylime/cv_ca/cacert.crt`` ++ Default CA certificate for verifier TLS verification ++ ++``/var/lib/keylime/agent_data.json`` ++ Persisted agent TPM data ++ ++RUNTIME ++======= ++ ++Start directly: ++ ++.. code-block:: bash ++ ++ sudo keylime_push_model_agent --verifier-url https://verifier.example.com:8881 ++ ++Start as a systemd service: ++ ++.. code-block:: bash ++ ++ sudo systemctl enable --now keylime_push_model_agent ++ ++Check service status: ++ ++.. code-block:: bash ++ ++ sudo systemctl status keylime_push_model_agent ++ sudo journalctl -u keylime_push_model_agent -f ++ ++PREREQUISITES ++============= ++ ++- Root privileges (use sudo) ++- TPM 2.0 available (verify with ``tpm2_pcrread``) ++- Verifier configured with ``mode = push`` ++- Network connectivity from agent to verifier and registrar ++- Verifier CA certificate available on agent machine ++ ++NOTES ++===== ++ ++- This service conflicts with ``keylime_agent.service``. Only one agent type can ++ run on a machine at a time. ++- The push-model agent does not expose any listening ports. ++- Push-model attestation is currently experimental. ++- Authentication uses PoP bearer tokens, not mTLS client certificates. ++ ++SEE ALSO ++======== ++ ++**keylime_agent**\(8), **keylime_verifier**\(8), **keylime_registrar**\(8), **keylime_tenant**\(1) ++ ++BUGS ++==== ++ ++Report bugs at https://github.com/keylime/rust-keylime/issues +diff --git a/docs/man/keylime_verifier.8.rst b/docs/man/keylime_verifier.8.rst +index fd7cfb941..5303a5f06 100644 +--- a/docs/man/keylime_verifier.8.rst ++++ b/docs/man/keylime_verifier.8.rst +@@ -32,6 +32,7 @@ Primary configuration is read from ``/etc/keylime/verifier.conf`` (or an overrid + All options are under the ``[verifier]`` section. + + Essentials: ++- **mode**: Attestation mode (``pull`` or ``push``). Default: ``pull`` + - **uuid**: Unique identifier for this verifier instance + - **ip**, **port**: Bind address and HTTP port + - **registrar_ip**, **registrar_port**: Registrar endpoint +@@ -108,7 +109,7 @@ NOTES + SEE ALSO + ======== + +-**keylime_registrar**\(8), **keylime_tenant**\(1), **keylime_agent**\(8) ++**keylime_registrar**\(8), **keylime_tenant**\(1), **keylime_agent**\(8), **keylime_push_model_agent**\(8) + + BUGS + ==== +diff --git a/docs/rest_apis.rst b/docs/rest_apis.rst +index edfe8be1c..aba64c338 100644 +--- a/docs/rest_apis.rst ++++ b/docs/rest_apis.rst +@@ -14,10 +14,40 @@ Check the :ref:`Changelog` section for the differences between versions + rest_apis/2_3/2_3.rst + rest_apis/2_4/2_4.rst + rest_apis/2_5/2_5.rst ++ rest_apis/3_0/3_0.rst + + Changelog + _________ + ++Changes from v2.5 to v3.0 ++~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++API version 3.0 introduces push-model attestation. Unlike previous versions where ++the verifier polls agents, in v3.0 agents initiate connections and submit ++attestation evidence to the verifier. The v3.0 endpoints are served by the ++verifier only; the push-model agent does not expose HTTP endpoints. ++ ++* Added `POST /v3/agents/{agent_id}/attestations` endpoint to the verifier: ++ * Allows agents to submit attestation capabilities (Phase 1 of push protocol) ++ * Returns challenge nonce for TPM quote generation ++* Added `PATCH /v3/agents/{agent_id}/attestations/latest` endpoint: ++ * Allows agents to submit attestation evidence (Phase 2 of push protocol) ++ * Returns `202 Accepted` for asynchronous verification ++* Added `PATCH /v3/agents/{agent_id}/attestations/{index}` endpoint: ++ * Submit evidence for a specific attestation by index ++* Added `GET /v3/agents/{agent_id}/attestations` endpoint: ++ * Lists all attestation records for an agent ++* Added `GET /v3/agents/{agent_id}/attestations/latest` endpoint: ++ * Returns the most recent attestation for an agent, including verification status ++* Added `GET /v3/agents/{agent_id}/attestations/{index}` endpoint: ++ * Returns a specific attestation by its index ++* Added `POST /v3/sessions` endpoint: ++ * Creates a PoP authentication session and returns a challenge nonce for the agent ++* Added `PATCH /v3/sessions/{session_id}` endpoint: ++ * Completes PoP authentication by submitting the TPM-signed challenge response ++* Introduced PoP (Proof of Possession) bearer token authentication for ++ agent-to-verifier communication ++ + Changes from v2.4 to v2.5 + ~~~~~~~~~~~~~~~~~~~~~~~~~ + API version 2.5 was first implemented in Keylime 7.14.0. +diff --git a/docs/rest_apis/3_0/3_0.rst b/docs/rest_apis/3_0/3_0.rst +new file mode 100644 +index 000000000..d6cac705d +--- /dev/null ++++ b/docs/rest_apis/3_0/3_0.rst +@@ -0,0 +1,21 @@ ++RESTful API for Keylime (v3.0) ++------------------------------ ++ ++API version 3.0 introduces push-model attestation, where agents initiate ++connections to the verifier and proactively submit attestation evidence. ++ ++Unlike previous API versions where the agent exposed HTTP endpoints for the ++verifier to poll, in v3.0 the agent acts as a client. The v3.0 endpoints are ++served by the **verifier only**. The push-model agent does not expose an API. ++ ++For a conceptual overview of push-model attestation, see ++:doc:`../../design/push_model`. ++ ++.. warning:: ++ Push-model attestation is currently experimental. The API may change in ++ future releases. ++ ++.. toctree:: ++ :maxdepth: 2 ++ ++ verifier.rst +diff --git a/docs/rest_apis/3_0/verifier.rst b/docs/rest_apis/3_0/verifier.rst +new file mode 100644 +index 000000000..3476cc7a3 +--- /dev/null ++++ b/docs/rest_apis/3_0/verifier.rst +@@ -0,0 +1,608 @@ ++Verifier ++~~~~~~~~ ++ ++Push-Model Attestation Endpoints ++""""""""""""""""""""""""""""""""" ++ ++These endpoints implement the two-phase push-model attestation protocol. Agents ++use these endpoints to submit attestation capabilities and evidence. Administrators ++can use the GET endpoints to view attestation results. ++ ++For details on authentication requirements, see :doc:`../../user_guide/authentication`. ++ ++.. http:post:: /v3/agents/{agent_id}/attestations ++ ++ Phase 1: Submit attestation capabilities and receive a challenge. ++ ++ The agent sends its supported evidence types, cryptographic algorithms, and ++ attestation key. The verifier selects parameters and returns a challenge nonce ++ for TPM quote generation. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ ++ **Authentication**: PoP bearer token (agent-only) ++ ++ **Example request**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "attributes": { ++ "evidence_supported": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "capabilities": { ++ "signature_schemes": ["rsassa"], ++ "hash_algorithms": ["sha256", "sha384", "sha512"], ++ "available_subjects": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], ++ "certification_keys": [ ++ { ++ "key_class": "asymmetric", ++ "key_algorithm": "rsa", ++ "key_size": 2048, ++ "server_identifier": "ak", ++ "allowable_signature_schemes": ["rsassa"], ++ "allowable_hash_algorithms": ["sha256", "sha384", "sha512"], ++ "public": "" ++ } ++ ], ++ "component_version": "2.0", ++ "evidence_version": "1.0" ++ } ++ }, ++ { ++ "evidence_class": "log", ++ "evidence_type": "ima_log", ++ "capabilities": { ++ "entry_count": 1024, ++ "supports_partial_access": true, ++ "appendable": true, ++ "formats": ["text/plain"], ++ "component_version": "1.0", ++ "evidence_version": "1.0" ++ } ++ } ++ ], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ } ++ } ++ } ++ } ++ ++ **Example response** (201 Created): ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "id": "0", ++ "attributes": { ++ "stage": "awaiting_evidence", ++ "evidence_requested": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "chosen_parameters": { ++ "challenge": "", ++ "signature_scheme": "rsassa", ++ "hash_algorithm": "sha256", ++ "selected_subjects": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], ++ "certification_key": { ++ "key_class": "asymmetric", ++ "key_algorithm": "rsa", ++ "key_size": 2048, ++ "server_identifier": "ak" ++ } ++ } ++ }, ++ { ++ "evidence_class": "log", ++ "evidence_type": "ima_log", ++ "chosen_parameters": { ++ "starting_offset": 0, ++ "entry_count": 1024, ++ "format": "text/plain" ++ } ++ } ++ ], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ }, ++ "capabilities_received_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:35:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/0" ++ } ++ } ++ } ++ ++ :json string data.id: Attestation index (auto-incremented per agent) ++ :>json string data.attributes.stage: ``"awaiting_evidence"`` ++ :>json array data.attributes.evidence_requested: Evidence the verifier wants the agent to provide ++ :>json string evidence_requested[].chosen_parameters.challenge: Base64-encoded challenge nonce for TPM quote ++ :>json string data.attributes.capabilities_received_at: ISO 8601 timestamp ++ :>json string data.attributes.challenges_expire_at: Deadline for evidence submission ++ :>json string data.links.self: URL to this attestation resource ++ ++ :statuscode 201: Attestation created, challenge issued ++ :statuscode 400: Invalid request body ++ :statuscode 403: Attestations disabled for this agent (timeout or previous failure) ++ :statuscode 404: Agent not found ++ :statuscode 409: Concurrent attestation creation attempt ++ :statuscode 422: Invalid capabilities data ++ :statuscode 429: Rate limited (attestation interval not elapsed). Includes ``Retry-After`` header ++ :statuscode 503: Previous attestation still being verified. Includes ``Retry-After`` header ++ ++ ++.. http:patch:: /v3/agents/{agent_id}/attestations/latest ++ ++ Phase 2: Submit attestation evidence for the latest attestation. ++ ++ The agent sends the TPM quote, PCR values, and event logs generated using the ++ challenge nonce from Phase 1. The verifier accepts the evidence and verifies it ++ asynchronously. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ ++ **Authentication**: PoP bearer token (agent-only) ++ ++ **Example request**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "attributes": { ++ "evidence_collected": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "data": { ++ "subject_data": { ++ "0": "", ++ "1": "" ++ }, ++ "message": "", ++ "signature": "" ++ } ++ }, ++ { ++ "evidence_class": "log", ++ "evidence_type": "ima_log", ++ "data": { ++ "entry_count": 512, ++ "entries": "" ++ } ++ } ++ ] ++ } ++ } ++ } ++ ++ **Example response** (202 Accepted): ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "id": "0", ++ "attributes": { ++ "stage": "evaluating_evidence", ++ "evidence": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "capabilities": {}, ++ "chosen_parameters": {}, ++ "data": { ++ "message": "", ++ "signature": "", ++ "subject_data": {} ++ } ++ } ++ ], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ }, ++ "capabilities_received_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:35:00.123456Z", ++ "evidence_received_at": "2024-01-15T10:31:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/0" ++ } ++ }, ++ "meta": { ++ "seconds_to_next_attestation": 45 ++ } ++ } ++ ++ :json string data.attributes.stage: ``"evaluating_evidence"`` (verification in progress) ++ :>json array data.attributes.evidence: Evidence items with capabilities, parameters, and data ++ :>json string data.attributes.evidence_received_at: ISO 8601 timestamp when evidence was received ++ :>json int meta.seconds_to_next_attestation: Suggested wait before starting the next attestation cycle ++ ++ :statuscode 202: Evidence accepted, verification in progress ++ :statuscode 400: Invalid evidence format ++ :statuscode 403: Evidence already submitted, attestation is not the latest, or challenges expired ++ :statuscode 404: Agent or attestation not found ++ :statuscode 410: Attestation no longer exists ++ :statuscode 503: No available worker processes. Includes ``Retry-After`` header ++ ++ ++.. http:patch:: /v3/agents/{agent_id}/attestations/{index} ++ ++ Submit attestation evidence for a specific attestation by index. ++ ++ Behaves identically to ``PATCH /v3/agents/{agent_id}/attestations/latest`` ++ but targets a specific attestation index. Evidence can only be submitted for ++ the latest attestation. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ :param index: Attestation index ++ :type index: integer ++ ++ **Authentication**: PoP bearer token (agent-only) ++ ++ :statuscode 202: Evidence accepted ++ :statuscode 403: Not the latest attestation, evidence already submitted, or challenges expired ++ :statuscode 404: Agent or attestation not found ++ ++ ++.. http:get:: /v3/agents/{agent_id}/attestations ++ ++ List all attestations for an agent. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ ++ **Authentication**: mTLS (admin) or PoP bearer token (own agent only) ++ ++ **Example response**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": [ ++ { ++ "type": "attestation", ++ "id": "1", ++ "attributes": { ++ "stage": "verification_complete", ++ "evaluation": "pass", ++ "evidence": [], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ }, ++ "capabilities_received_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:35:00.123456Z", ++ "evidence_received_at": "2024-01-15T10:31:00.123456Z", ++ "verification_completed_at": "2024-01-15T10:32:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/1" ++ } ++ }, ++ { ++ "type": "attestation", ++ "id": "0", ++ "attributes": { ++ "stage": "verification_complete", ++ "evaluation": "pass", ++ "evidence": [], ++ "system_info": {}, ++ "capabilities_received_at": "2024-01-15T10:25:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:30:00.123456Z", ++ "evidence_received_at": "2024-01-15T10:26:00.123456Z", ++ "verification_completed_at": "2024-01-15T10:27:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/0" ++ } ++ } ++ ] ++ } ++ ++ :>json array data: List of attestation resources ++ :>json string data[].id: Attestation index ++ :>json string data[].attributes.stage: ``"awaiting_evidence"``, ``"evaluating_evidence"``, or ``"verification_complete"`` ++ :>json string data[].attributes.evaluation: ``"pending"``, ``"pass"``, or ``"fail"`` ++ :>json string data[].attributes.failure_reason: ``"broken_evidence_chain"`` or ``"policy_violation"`` (only when evaluation is ``"fail"``) ++ ++ :statuscode 200: Success ++ :statuscode 404: Agent not found ++ ++ ++.. http:get:: /v3/agents/{agent_id}/attestations/latest ++ ++ Get the latest attestation for an agent. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ ++ **Authentication**: mTLS (admin) or PoP bearer token (own agent only) ++ ++ **Example response**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "id": "1", ++ "attributes": { ++ "stage": "verification_complete", ++ "evaluation": "pass", ++ "failure_reason": null, ++ "evidence": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "capabilities": {}, ++ "chosen_parameters": {}, ++ "data": { ++ "message": "", ++ "signature": "", ++ "subject_data": {} ++ } ++ } ++ ], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ }, ++ "capabilities_received_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:35:00.123456Z", ++ "evidence_received_at": "2024-01-15T10:31:00.123456Z", ++ "verification_completed_at": "2024-01-15T10:32:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/1" ++ } ++ } ++ } ++ ++ :>json string data.attributes.stage: Current stage of the attestation ++ :>json string data.attributes.evaluation: ``"pending"``, ``"pass"``, or ``"fail"`` ++ :>json string data.attributes.failure_reason: ``null``, ``"broken_evidence_chain"``, or ``"policy_violation"`` ++ :>json array data.attributes.evidence: Evidence items with full data ++ :>json string data.attributes.capabilities_received_at: When capabilities were received ++ :>json string data.attributes.challenges_expire_at: When challenges expire ++ :>json string data.attributes.evidence_received_at: When evidence was received (``null`` if still awaiting) ++ :>json string data.attributes.verification_completed_at: When verification completed (``null`` if still in progress) ++ ++ :statuscode 200: Success ++ :statuscode 404: Agent not found or no attestations exist ++ ++ ++.. http:get:: /v3/agents/{agent_id}/attestations/{index} ++ ++ Get a specific attestation by index. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ :param index: Attestation index ++ :type index: integer ++ ++ **Authentication**: mTLS (admin) or PoP bearer token (own agent only) ++ ++ Response format is identical to ``GET /v3/agents/{agent_id}/attestations/latest``. ++ ++ :statuscode 200: Success ++ :statuscode 404: Agent or attestation not found ++ ++ ++Session Endpoints ++""""""""""""""""" ++ ++These endpoints manage PoP (Proof of Possession) authentication sessions for ++push-model agents. Sessions are required before an agent can submit attestations. ++ ++.. http:post:: /v3/sessions ++ ++ Create a new authentication session. ++ ++ The verifier generates a challenge nonce that the agent must sign using its ++ TPM attestation key to prove possession. ++ ++ **Authentication**: None (public endpoint) ++ ++ **Example request**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "session", ++ "attributes": { ++ "agent_id": "d432fbb3-d2f1-4a97-9ef7-75bd81c00000", ++ "authentication_supported": [ ++ { ++ "authentication_class": "pop", ++ "authentication_type": "tpm_pop" ++ } ++ ] ++ } ++ } ++ } ++ ++ **Example response** (200 OK): ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "session", ++ "id": "550e8400-e29b-41d4-a716-446655440000", ++ "attributes": { ++ "agent_id": "d432fbb3-d2f1-4a97-9ef7-75bd81c00000", ++ "authentication_requested": [ ++ { ++ "authentication_class": "pop", ++ "authentication_type": "tpm_pop", ++ "chosen_parameters": { ++ "challenge": "" ++ } ++ } ++ ], ++ "created_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:31:00.123456Z" ++ } ++ } ++ } ++ ++ :json string data.id: Session UUID ++ :>json string data.attributes.challenges_expire_at: Deadline for submitting the PoP response ++ ++ :statuscode 200: Session created ++ :statuscode 400: Missing or invalid agent_id ++ :statuscode 429: Rate limited. Includes ``Retry-After`` header ++ ++ ++.. http:patch:: /v3/sessions/{session_id} ++ ++ Submit Proof of Possession response to complete authentication. ++ ++ The agent signs the challenge nonce from the session creation response using ++ ``TPM2_Certify`` and submits the result. If valid, the verifier issues a bearer ++ token for subsequent API calls. ++ ++ :param session_id: UUID of the session ++ :type session_id: string ++ ++ **Authentication**: None (public endpoint; validates PoP internally) ++ ++ **Example request**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "session", ++ "attributes": { ++ "agent_id": "d432fbb3-d2f1-4a97-9ef7-75bd81c00000", ++ "authentication_provided": [ ++ { ++ "authentication_class": "pop", ++ "authentication_type": "tpm_pop", ++ "data": { ++ "message": "", ++ "signature": "" ++ } ++ } ++ ] ++ } ++ } ++ } ++ ++ **Example response** (200 OK, authentication passed): ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "session", ++ "id": "550e8400-e29b-41d4-a716-446655440000", ++ "attributes": { ++ "agent_id": "d432fbb3-d2f1-4a97-9ef7-75bd81c00000", ++ "evaluation": "pass", ++ "token": "550e8400-e29b-41d4-a716-446655440000.", ++ "authentication": [ ++ { ++ "authentication_class": "pop", ++ "authentication_type": "tpm_pop", ++ "chosen_parameters": { ++ "challenge": "" ++ }, ++ "data": { ++ "message": "", ++ "signature": "" ++ } ++ } ++ ], ++ "created_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:31:00.123456Z", ++ "response_received_at": "2024-01-15T10:30:30.123456Z", ++ "token_expires_at": "2024-01-15T11:30:00.123456Z" ++ } ++ } ++ } ++ ++ :>json string data.attributes.evaluation: ``"pass"`` or ``"fail"`` ++ :>json string data.attributes.token: Bearer token for subsequent requests (only on ``"pass"``) ++ :>json string data.attributes.token_expires_at: Token expiration time (only on ``"pass"``) ++ ++ :statuscode 200: PoP response processed (check ``evaluation`` field for result) ++ :statuscode 400: Missing or invalid request body ++ :statuscode 401: PoP verification failed ++ :statuscode 404: Session not found ++ ++ ++Attestation Stages and Evaluations ++""""""""""""""""""""""""""""""""""" ++ ++Each attestation progresses through the following stages: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 25 75 ++ ++ * - Stage ++ - Description ++ * - ``awaiting_evidence`` ++ - Capabilities received, challenge issued, waiting for evidence ++ * - ``evaluating_evidence`` ++ - Evidence received, verification in progress ++ * - ``verification_complete`` ++ - Verification finished, see ``evaluation`` for result ++ ++The ``evaluation`` field indicates the verification result: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 20 80 ++ ++ * - Evaluation ++ - Description ++ * - ``pending`` ++ - Verification not yet complete ++ * - ``pass`` ++ - Evidence verified successfully ++ * - ``fail`` ++ - Evidence verification failed (see ``failure_reason``) ++ ++When an attestation fails, the ``failure_reason`` field provides the cause: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 70 ++ ++ * - Failure Reason ++ - Description ++ * - ``broken_evidence_chain`` ++ - TPM quote signature invalid or evidence integrity check failed ++ * - ``policy_violation`` ++ - Evidence is valid but violates the configured attestation policy +diff --git a/docs/user_guide.rst b/docs/user_guide.rst +index 9bd44c512..ed052c175 100644 +--- a/docs/user_guide.rst ++++ b/docs/user_guide.rst +@@ -8,6 +8,7 @@ User Guide + + user_guide/authentication.rst + user_guide/configuration.rst ++ user_guide/push_model.rst + user_guide/runtime_ima.rst + user_guide/user_selected_pcr_monitoring.rst + user_guide/use_measured_boot.rst +diff --git a/docs/user_guide/configuration.rst b/docs/user_guide/configuration.rst +index 6d8f35c88..2e50757df 100644 +--- a/docs/user_guide/configuration.rst ++++ b/docs/user_guide/configuration.rst +@@ -40,6 +40,13 @@ The following components can be configured: + - ``/etc/keylime/logging.conf`` + - ``/etc/keylime/logging.conf.d`` + ++.. note:: ++ For push-model attestation, the verifier must be configured with ``mode = push`` ++ in the ``[verifier]`` section. The push-model agent uses the same ++ ``/etc/keylime/agent.conf`` file (TOML format) but with additional options such ++ as ``verifier_url`` and ``attestation_interval_seconds``. See ++ :doc:`push_model` for details. ++ + The next sections contain details of the configuration files + + Configuration file processing order +diff --git a/docs/user_guide/push_model.rst b/docs/user_guide/push_model.rst +new file mode 100644 +index 000000000..773d2aaaa +--- /dev/null ++++ b/docs/user_guide/push_model.rst +@@ -0,0 +1,370 @@ ++======================== ++Push-Model Attestation ++======================== ++ ++.. warning:: ++ Push-model attestation is currently experimental. The feature is functional ++ but the API and configuration options may change in future releases. ++ ++Introduction ++------------ ++ ++In the default pull model, the Keylime verifier continuously polls agents for ++attestation data. This requires the verifier to reach the agent over the network. ++ ++The push model reverses this: the agent initiates connections to the verifier and ++proactively sends attestation evidence. This is useful when the verifier cannot ++directly reach the agent, for example behind firewalls, NAT, or in edge/IoT ++deployments. ++ ++For a detailed description of how push-model attestation works, see ++:doc:`../design/push_model`. ++ ++Prerequisites ++------------- ++ ++* Keylime verifier and registrar installed and running ++* The ``keylime-push-model-agent`` binary installed on the target machine ++* A TPM 2.0 device (hardware or emulated for development) ++* Network connectivity **from the agent to the verifier and registrar** (the ++ reverse is not required) ++* The verifier's CA certificate available on the agent machine ++ ++Configuring the Verifier for Push Mode ++-------------------------------------- ++ ++Set the verifier's attestation mode to ``push`` in ``/etc/keylime/verifier.conf``: ++ ++.. code-block:: ini ++ ++ [verifier] ++ mode = push ++ ++Or use a configuration snippet in ``/etc/keylime/verifier.conf.d/``: ++ ++.. code-block:: ini ++ ++ # /etc/keylime/verifier.conf.d/001-push-mode.conf ++ [verifier] ++ mode = push ++ ++The verifier can also be configured via environment variable: ++ ++.. code-block:: bash ++ ++ export KEYLIME_VERIFIER_MODE=push ++ ++.. note:: ++ The ``mode`` setting affects all agents on this verifier. A verifier in push ++ mode expects agents to submit attestation data; it does not poll agents. A ++ single verifier cannot operate in both modes simultaneously. ++ ++Additional verifier settings relevant to push mode: ++ ++* ``quote_interval``: Used to calculate the agent timeout threshold ++ (``quote_interval * 5``). Default: ``2`` seconds. ++* ``challenge_lifetime``: How long a challenge nonce remains valid for evidence ++ submission. ++* ``verification_timeout``: Maximum time allowed for evidence verification. ++ ++After changing the configuration, restart the verifier: ++ ++.. code-block:: bash ++ ++ sudo systemctl restart keylime_verifier ++ ++Configuring the Push-Model Agent ++--------------------------------- ++ ++The push-model agent is a separate binary from the standard Keylime agent. It is ++installed as ``keylime_push_model_agent`` (or ``keylime-push-model-agent``). ++ ++The agent is configured through ``/etc/keylime/agent.conf`` (TOML format), command-line ++arguments, or environment variables. ++ ++Key Configuration Options ++""""""""""""""""""""""""" ++ ++The following options are specific to or particularly important for push-model ++operation: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 15 55 ++ ++ * - Option ++ - Default ++ - Description ++ * - ``verifier_url`` ++ - ``https://localhost:8881`` ++ - URL of the verifier. Must use HTTPS. ++ * - ``verifier_tls_ca_cert`` ++ - ``cv_ca/cacert.crt`` ++ - Path to the CA certificate for verifying the verifier's TLS certificate. ++ Relative paths are resolved from ``keylime_dir``. ++ * - ``attestation_interval_seconds`` ++ - ``60`` ++ - Interval in seconds between attestation cycles. ++ * - ``registrar_ip`` ++ - ``127.0.0.1`` ++ - IP address of the registrar. ++ * - ``registrar_port`` ++ - ``8890`` ++ - Port of the registrar. ++ * - ``registrar_tls_enabled`` ++ - ``false`` ++ - Enable TLS for registrar communication. ++ * - ``registrar_tls_ca_cert`` ++ - ``cv_ca/cacert.crt`` ++ - CA certificate for registrar TLS verification. ++ * - ``uuid`` ++ - (generated) ++ - Agent UUID. Can be a specific UUID, ``generate`` (random), or ++ ``hash_ek`` (derived from the EK). ++ * - ``api_versions`` ++ - ``3.0`` ++ - API versions supported by the agent. Defaults to ``3.0`` for push model. ++ * - ``tpm_hash_alg`` ++ - ``sha256`` ++ - TPM hash algorithm (``sha256``, ``sha384``, ``sha512``). ++ * - ``tpm_signing_alg`` ++ - ``rsassa`` ++ - TPM signing algorithm (``rsassa``, ``ecdsa``). ++ * - ``keylime_dir`` ++ - ``/var/lib/keylime`` ++ - Working directory for certificates and data files. ++ ++Example Minimal Configuration ++"""""""""""""""""""""""""""""" ++ ++.. code-block:: toml ++ ++ # /etc/keylime/agent.conf (push-model agent) ++ [agent] ++ uuid = "d432fbb3-d2f1-4a97-9ef7-75bd81c00000" ++ verifier_url = "https://verifier.example.com:8881" ++ verifier_tls_ca_cert = "/var/lib/keylime/cv_ca/cacert.crt" ++ attestation_interval_seconds = 60 ++ registrar_ip = "registrar.example.com" ++ registrar_port = 8890 ++ tpm_hash_alg = "sha256" ++ tpm_signing_alg = "rsassa" ++ ++Command-Line Arguments ++"""""""""""""""""""""" ++ ++The push-model agent accepts the following command-line arguments, which override ++configuration file values: ++ ++.. code-block:: text ++ ++ --verifier-url Verifier URL (required) ++ --registrar-url Registrar URL (default: http://127.0.0.1:8888) ++ --agent-identifier Agent UUID ++ --attestation-interval-seconds Attestation interval (default: 60) ++ --ca-certificate CA certificate for TLS verification ++ --api-version API version (default: v3.0) ++ --timeout Request timeout in milliseconds (default: 5000) ++ --insecure Accept invalid TLS certificates (testing only) ++ --avoid-tpm Use mock TPM (testing only) ++ ++Exponential Backoff ++""""""""""""""""""" ++ ++When the agent encounters errors (network failures, verifier unavailable), it uses ++exponential backoff for retries: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 35 15 50 ++ ++ * - Option ++ - Default ++ - Description ++ * - ``exponential_backoff_initial_delay`` ++ - ``10000`` ++ - Initial delay in milliseconds (10 seconds) ++ * - ``exponential_backoff_max_retries`` ++ - ``5`` ++ - Maximum number of retry attempts ++ * - ``exponential_backoff_max_delay`` ++ - ``300000`` ++ - Maximum delay in milliseconds (5 minutes) ++ ++Systemd Service Management ++--------------------------- ++ ++The push-model agent is managed as a systemd service: ++ ++.. code-block:: bash ++ ++ # Enable the service to start on boot ++ sudo systemctl enable keylime_push_model_agent ++ ++ # Start the service ++ sudo systemctl start keylime_push_model_agent ++ ++ # Check service status ++ sudo systemctl status keylime_push_model_agent ++ ++ # View logs ++ sudo journalctl -u keylime_push_model_agent -f ++ ++.. warning:: ++ The push-model agent service (``keylime_push_model_agent.service``) conflicts ++ with the standard pull-model agent service (``keylime_agent.service``). Only one ++ can run at a time on the same machine. Starting one will stop the other. ++ ++The service is configured to restart on failure with a 120-second delay between ++restart attempts. ++ ++Enrolling an Agent for Push-Model Attestation ++--------------------------------------------- ++ ++Use the ``keylime_tenant`` tool with the ``--push-model`` flag to enroll an agent ++for push-model attestation: ++ ++.. code-block:: bash ++ ++ # Add an agent in push mode ++ sudo keylime_tenant -c add --push-model -u ++ ++ # Add with a runtime IMA policy ++ sudo keylime_tenant -c add --push-model -u \ ++ --runtime-policy-name ++ ++ # Add with a measured boot policy ++ sudo keylime_tenant -c add --push-model -u \ ++ --mb-policy-name ++ ++.. note:: ++ In push mode, the ``-t`` / ``--targethost`` option is not required because the ++ verifier does not need to connect to the agent. The agent's IP and port are set ++ to ``None`` in the verifier's database. ++ ++To check the status of a push-model agent: ++ ++.. code-block:: bash ++ ++ sudo keylime_tenant -c cvstatus -u ++ ++To remove an agent: ++ ++.. code-block:: bash ++ ++ sudo keylime_tenant -c delete -u ++ ++TLS Configuration for Push Model ++--------------------------------- ++ ++The push model uses TLS differently from the pull model: ++ ++**Agent-to-verifier connection:** ++ ++* The agent connects to the verifier over HTTPS ++* The agent verifies the verifier's server certificate using the configured CA ++ certificate (``verifier_tls_ca_cert``) ++* The agent does **not** present a client certificate (no mTLS) ++* Authentication is done via PoP bearer tokens (see :doc:`authentication`) ++ ++**Agent-to-registrar connection:** ++ ++* The agent connects to the registrar to register itself ++* TLS can be enabled with ``registrar_tls_enabled = true`` ++* The registrar CA certificate is configured with ``registrar_tls_ca_cert`` ++ ++**Firewall considerations:** ++ ++* No inbound ports need to be opened on the agent machine ++* The agent needs outbound access to the verifier port (default: 8881) ++* The agent needs outbound access to the registrar port (default: 8890) ++ ++To set up TLS, copy the verifier's CA certificate to the agent machine: ++ ++.. code-block:: bash ++ ++ # On the verifier machine, the CA cert is typically at: ++ # /var/lib/keylime/cv_ca/cacert.crt ++ ++ # Copy to the agent machine: ++ scp verifier:/var/lib/keylime/cv_ca/cacert.crt /var/lib/keylime/cv_ca/cacert.crt ++ ++Verifying the Deployment ++------------------------- ++ ++After starting both the verifier (in push mode) and the push-model agent: ++ ++1. **Check agent registration** in the registrar: ++ ++ .. code-block:: bash ++ ++ sudo keylime_tenant -c regstatus -u ++ ++2. **Check attestation status** in the verifier: ++ ++ .. code-block:: bash ++ ++ sudo keylime_tenant -c cvstatus -u ++ ++3. **View verifier logs** for attestation activity: ++ ++ .. code-block:: bash ++ ++ sudo journalctl -u keylime_verifier -f ++ ++ Successful attestations will show evidence receipt and verification completion ++ messages. ++ ++4. **View agent logs** for attestation cycles: ++ ++ .. code-block:: bash ++ ++ sudo journalctl -u keylime_push_model_agent -f ++ ++ The agent logs will show transitions through the state machine: ++ registration, negotiation, and attestation phases. ++ ++Troubleshooting ++---------------- ++ ++Agent cannot connect to verifier ++""""""""""""""""""""""""""""""""" ++ ++* Verify the ``verifier_url`` is correct and uses HTTPS ++* Check that the verifier is running and listening on the configured port ++* Verify network connectivity from the agent to the verifier ++* Check that the CA certificate (``verifier_tls_ca_cert``) matches the verifier's ++ server certificate ++ ++Agent shows timeout failures ++""""""""""""""""""""""""""""" ++ ++The verifier marks an agent as failed if it does not receive an attestation within ++``quote_interval * 5`` seconds. ++ ++* Verify the ``attestation_interval_seconds`` on the agent is less than the ++ verifier's timeout threshold ++* Check for network instability between agent and verifier ++* Review agent logs for errors during attestation cycles ++ ++PoP authentication errors ++"""""""""""""""""""""""""" ++ ++* Ensure the agent is properly registered in the registrar (the AK must be known) ++* Check that the TPM is accessible and functioning ++* Verify the agent UUID matches between agent configuration and verifier enrollment ++ ++Agent state stuck in Negotiating ++""""""""""""""""""""""""""""""""" ++ ++* The verifier may be rejecting capabilities. Check verifier logs for error details ++* Ensure the TPM algorithms configured on the agent are accepted by the verifier ++* Check that the ``api_versions`` setting includes ``3.0`` ++ ++Service fails to start ++"""""""""""""""""""""" ++ ++* Check that the pull-model agent service is not running ++ (``systemctl status keylime_agent``) ++* Verify the configuration file syntax (TOML format) ++* Check file permissions on TLS certificates and TPM device diff --git a/0015-CVE-2026-1709.patch b/0015-CVE-2026-1709.patch deleted file mode 100644 index b494256..0000000 --- a/0015-CVE-2026-1709.patch +++ /dev/null @@ -1,20 +0,0 @@ -diff --git a/keylime/web/base/server.py b/keylime/web/base/server.py -index 1d9a9c2..859b23a 100644 ---- a/keylime/web/base/server.py -+++ b/keylime/web/base/server.py -@@ -2,7 +2,6 @@ import asyncio - import multiprocessing - from abc import ABC, abstractmethod - from functools import wraps --from ssl import CERT_OPTIONAL - from typing import TYPE_CHECKING, Any, Callable, Optional - - import tornado -@@ -252,7 +251,6 @@ class Server(ABC): - self._https_port = config.getint(component, "tls_port", fallback=0) - self._max_upload_size = config.getint(component, "max_upload_size", fallback=104857600) - self._ssl_ctx = web_util.init_mtls(component) -- self._ssl_ctx.verify_mode = CERT_OPTIONAL - - def _get(self, pattern: str, controller: type["Controller"], action: str, allow_insecure: bool = False) -> None: - """Creates a new route to handle incoming GET requests issued for paths which match the given diff --git a/0015-remove-enable-authentication-config-option.patch b/0015-remove-enable-authentication-config-option.patch new file mode 100644 index 0000000..d40247b --- /dev/null +++ b/0015-remove-enable-authentication-config-option.patch @@ -0,0 +1,46 @@ +From 416d3906fe4071132d5cdc494f828ce3a909f336 Mon Sep 17 00:00:00 2001 +From: Sergio Arroutbi +Date: Fri, 20 Mar 2026 10:57:23 +0100 +Subject: [PATCH] Remove enable_authentication agent config option + +The Rust agent does not parse the enable_authentication +configuration option and always performs authentication. +Remove the option from both the agent.j2 template and the +2.5 mapping.json to avoid exposing a non-functional setting +to users. + +Signed-off-by: Sergio Arroutbi +--- + templates/2.5/agent.j2 | 7 ------- + templates/2.5/mapping.json | 3 +-- + 2 files changed, 1 insertion(+), 9 deletions(-) + +diff --git a/templates/2.5/agent.j2 b/templates/2.5/agent.j2 +index d5eec733d..5e9a1a706 100644 +--- a/templates/2.5/agent.j2 ++++ b/templates/2.5/agent.j2 +@@ -274,10 +274,3 @@ ima_ml_path = "{{ agent.ima_ml_path }}" + # If set as a relative path, it will be considered from the root path "/". + # If set as an absolute path, it will use it without changes + measuredboot_ml_path = "{{ agent.measuredboot_ml_path }}" +- +-# Enable challenge-response authentication for push model attestation. +-# When enabled, the agent will authenticate with the verifier using TPM-based +-# proof of possession before sending attestation evidence. +-# This option is specific to the push attestation model. +-# The default is False (disabled). +-enable_authentication = {{ agent.enable_authentication }} +diff --git a/templates/2.5/mapping.json b/templates/2.5/mapping.json +index 522aa4ce9..4b198e768 100644 +--- a/templates/2.5/mapping.json ++++ b/templates/2.5/mapping.json +@@ -13,8 +13,7 @@ + "ima_ml_count_file": "/tmp/ima_ml_count", + "uefi_logs_evidence_version": "1.0", + "tls_accept_invalid_certs": "false", +- "tls_accept_invalid_hostnames": "false", +- "enable_authentication": "true" ++ "tls_accept_invalid_hostnames": "false" + } + }, + "verifier": { diff --git a/0016-docs-push-attestation-config-tables.patch b/0016-docs-push-attestation-config-tables.patch new file mode 100644 index 0000000..0cd863e --- /dev/null +++ b/0016-docs-push-attestation-config-tables.patch @@ -0,0 +1,1164 @@ +From 4a36422caa40bf914b1b9f7ed86efc802e183ef1 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Tue, 24 Mar 2026 18:04:55 +0100 +Subject: [PATCH 1/3] templates: Remove unused ima_ml_count_file option + +This option was defined in the 2.5 config template and mapping but +never used. Remove it to avoid confusion. + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + templates/2.5/agent.j2 | 3 --- + templates/2.5/mapping.json | 1 - + 2 files changed, 4 deletions(-) + +diff --git a/templates/2.5/agent.j2 b/templates/2.5/agent.j2 +index 5e9a1a706..f56010e87 100644 +--- a/templates/2.5/agent.j2 ++++ b/templates/2.5/agent.j2 +@@ -47,9 +47,6 @@ verifier_url = "{{ agent_verifier_url }}" + # Server identifier for certification keys + certification_keys_server_identifier = "{{ agent_certification_keys_server_identifier }}" + +-# File to store the IMA measurement list count +-ima_ml_count_file = "{{ agent_ima_ml_count_file }}" +- + # Evidence version for UEFI logs + uefi_logs_evidence_version = "{{ agent_uefi_logs_evidence_version }}" + +diff --git a/templates/2.5/mapping.json b/templates/2.5/mapping.json +index 4b198e768..04f89e77a 100644 +--- a/templates/2.5/mapping.json ++++ b/templates/2.5/mapping.json +@@ -10,7 +10,6 @@ + "exponential_backoff_initial_delay": "10000", + "exponential_backoff_max_delay": "360000", + "certification_keys_server_identifier": "ak", +- "ima_ml_count_file": "/tmp/ima_ml_count", + "uefi_logs_evidence_version": "1.0", + "tls_accept_invalid_certs": "false", + "tls_accept_invalid_hostnames": "false" + +From baf182680ffd60ab0b4ef8bf42bba3d02208b392 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Fri, 27 Mar 2026 10:55:35 +0100 +Subject: [PATCH 2/3] templates: Sync agent config options with + keylime-agent.conf + +Add missing agent options to the 2.5 upgrade templates and mapping +that are present in keylime-agent.conf and used in the agent code: + +Common options: +- keylime_dir: working directory path +- payload_key: payload encryption private key +- payload_key_password: password for payload key +- revocation_actions_dir: path to pre-installed revocation scripts +- allow_payload_revocation_actions: control payload revocation actions + +Push model options: +- verifier_tls_ca_cert: CA cert for verifier TLS verification +- registrar_tls_port: TLS port for registrar communication +- registrar_tls_enabled: enable TLS with registrar +- registrar_tls_ca_cert: CA cert for registrar TLS verification +- registrar_api_versions: API version negotiation with registrar + +Fix default values to match keylime-agent.conf: +- exponential_backoff_max_delay: 360000 -> 300000 +- uefi_logs_evidence_version: "1.0" -> "2.1" + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + templates/2.5/agent.j2 | 40 ++++++++++++++++++++++++++++++++++++++ + templates/2.5/mapping.json | 14 +++++++++++-- + 2 files changed, 52 insertions(+), 2 deletions(-) + +diff --git a/templates/2.5/agent.j2 b/templates/2.5/agent.j2 +index f56010e87..9f85f8411 100644 +--- a/templates/2.5/agent.j2 ++++ b/templates/2.5/agent.j2 +@@ -44,6 +44,11 @@ agent_data_path = "{{ agent_data_path }}" + # Verifier URL + verifier_url = "{{ agent_verifier_url }}" + ++# Verifier TLS CA certificate (Push Model specific). ++# Used to verify the verifier's server certificate. ++# If set as "default", the "cv_ca/cacert.crt" value relative to keylime_dir is used. ++verifier_tls_ca_cert = "{{ agent.verifier_tls_ca_cert }}" ++ + # Server identifier for certification keys + certification_keys_server_identifier = "{{ agent_certification_keys_server_identifier }}" + +@@ -54,11 +59,31 @@ uefi_logs_evidence_version = "{{ agent_uefi_logs_evidence_version }}" + registrar_ip = "{{ agent.registrar_ip }}" + registrar_port = {{ agent.registrar_port }} + ++# The TLS port of the registrar server (Push Model specific). ++# Used when registrar_tls_enabled is set to true. ++registrar_tls_port = {{ agent.registrar_tls_port }} ++ ++# Enable TLS communication between agent and registrar (Push Model specific). ++# When enabled, the agent uses TLS (server verification only) with the registrar. ++registrar_tls_enabled = {{ agent.registrar_tls_enabled }} ++ ++# TLS CA certificate for verifying the registrar's server certificate (Push Model specific). ++# Only used when registrar_tls_enabled is true. ++# If set as "default", the "cv_ca/cacert.crt" value relative to keylime_dir is used. ++registrar_tls_ca_cert = "{{ agent.registrar_tls_ca_cert }}" ++ ++# The API versions to use when communicating with the registrar (Push Model specific). ++# Supports "default" (all supported), "latest", or a comma-separated list. ++registrar_api_versions = "{{ agent.registrar_api_versions }}" ++ + # Enable mTLS communication between agent, verifier and tenant. + # Details on why setting it to "False" is generally considered insecure can be found + # on https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r + enable_agent_mtls = {{ agent.enable_agent_mtls }} + ++# The keylime working directory. ++keylime_dir = "{{ agent.keylime_dir }}" ++ + # Accept invalid TLS certificates (INSECURE - for testing only) + # When enabled, the agent will accept self-signed or invalid certificates + # This option is specific to the push attestation model. +@@ -100,6 +125,14 @@ server_key = "{{ agent.server_key }}" + # If left empty, the private key will not be encrypted. + server_key_password = "{{ agent.server_key_password }}" + ++# The name of the file containing the payload encryption private key. ++# If set as "default", the "payload-private.pem" value is used. ++payload_key = "{{ agent.payload_key }}" ++ ++# Set the password used to encrypt the payload private key file. ++# If left empty, the private key will not be encrypted. ++payload_key_password = "{{ agent.payload_key_password }}" ++ + # The name of the file containing the X509 certificate used as the Keylime agent + # server TLS certificate. + # This certificate must be self signed. +@@ -159,6 +192,9 @@ revocation_cert = "{{ agent.revocation_cert }}" + # action_list in the unzipped payload content. + revocation_actions = "{{ agent.revocation_actions }}" + ++# The path to the directory containing pre-installed revocation action scripts. ++revocation_actions_dir = "{{ agent.revocation_actions_dir }}" ++ + # A script to execute after unzipping the tenant payload. This is like + # cloud-init lite =) Keylime will run it with a /bin/sh environment and + # with a working directory of /var/lib/keylime/secure/unzipped. +@@ -171,6 +207,10 @@ payload_script = "{{ agent.payload_script }}" + # https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r + enable_insecure_payload = {{ agent.enable_insecure_payload }} + ++# Whether to allow running revocation actions sent as part of the payload. ++# Setting to false limits revocation actions to pre-installed ones. ++allow_payload_revocation_actions = {{ agent.allow_payload_revocation_actions }} ++ + # Maximum number of retries for exponential backoff + exponential_backoff_max_retries = {{ agent.exponential_backoff_max_retries }} + # Initial delay in milliseconds for exponential backoff +diff --git a/templates/2.5/mapping.json b/templates/2.5/mapping.json +index 04f89e77a..f3eaf8dbb 100644 +--- a/templates/2.5/mapping.json ++++ b/templates/2.5/mapping.json +@@ -4,13 +4,23 @@ + "components": { + "agent": { + "add": { ++ "keylime_dir": "/var/lib/keylime", ++ "payload_key": "default", ++ "payload_key_password": "", ++ "revocation_actions_dir": "/usr/libexec/keylime", ++ "allow_payload_revocation_actions": "true", + "agent_data_path": "/var/lib/keylime/agent_data.json", + "verifier_url": "https://localhost:8881", ++ "verifier_tls_ca_cert": "default", ++ "registrar_tls_port": "8891", ++ "registrar_tls_enabled": "false", ++ "registrar_tls_ca_cert": "default", ++ "registrar_api_versions": "default", + "exponential_backoff_max_retries": "5", + "exponential_backoff_initial_delay": "10000", +- "exponential_backoff_max_delay": "360000", ++ "exponential_backoff_max_delay": "300000", + "certification_keys_server_identifier": "ak", +- "uefi_logs_evidence_version": "1.0", ++ "uefi_logs_evidence_version": "2.1", + "tls_accept_invalid_certs": "false", + "tls_accept_invalid_hostnames": "false" + } + +From bd392633a36839dfa51f86a1568370a87b3ecd37 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Fri, 27 Mar 2026 10:57:04 +0100 +Subject: [PATCH 3/3] docs: Add tables with push-attestation configuration + options + +Add comprehensive configuration reference tables for all Keylime +components documenting option names, defaults, config versions, and +environment variable overrides. Tables are organized by component and +separated into common, pull-model, and push-model sections. + +Mark removed agent options with "(removed in 2.5)" and version range +2.0-2.4: +- measure_payload_pcr +- exponential_backoff +- retry_interval +- max_retries + +Add missing agent common options: +- keylime_dir +- payload_key +- payload_key_password +- revocation_actions_dir +- allow_payload_revocation_actions + +Add missing agent push-model options: +- attestation_interval_seconds +- verifier_tls_ca_cert +- registrar_tls_port +- registrar_tls_enabled +- registrar_tls_ca_cert +- registrar_api_versions + +Fix default values to match keylime-agent.conf: +- exponential_backoff_max_delay: 360000 -> 300000 +- uefi_logs_evidence_version: "1.0" -> "2.1" + +Use consistent formatting for default values (unquoted for INI +components, quoted strings and unquoted booleans/integers for TOML). + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + docs/user_guide/configuration.rst | 897 +++++++++++++++++++++++++++++- + 1 file changed, 893 insertions(+), 4 deletions(-) + +diff --git a/docs/user_guide/configuration.rst b/docs/user_guide/configuration.rst +index 2e50757df..aae534423 100644 +--- a/docs/user_guide/configuration.rst ++++ b/docs/user_guide/configuration.rst +@@ -117,7 +117,7 @@ The environment variables are defined as + The section can be omitted if the option to set is located in the main section + (the section named after the component). Otherwise the section is required. + +-For example, to set the ``webhook_url` option from the `[revocations]`` section in ++For example, to set the ``webhook_url`` option from the ``[revocations]`` section in + the ``verifier.conf`` file, the environment variable to set is + ``KEYLIME_VERIFIER_REVOCATIONS_WEBHOOK_URL``. + +@@ -126,7 +126,7 @@ option from the ``[verifier]`` section in the ``verifier.conf``, the environment + variable to set is ``KEYLIME_VERIFIER_SERVER_KEY`` (note that the section can be + omitted). + +-Configuraton upgrades ++Configuration upgrades + --------------------- + + When updating keylime, it is also recommended to upgrade the configuration to +@@ -183,9 +183,9 @@ configuration files are kept intact as backup and renamed with the ``.bkp`` exte + appended to the file names. + + In case the ``--output`` option is provided to the ``keylime_upgrade_config`` +-script, the configuration files are written even when they were alredy ++script, the configuration files are written even when they were already + up-to-date using the available templates. It can be seen as a way to force the +-creation of the configuration fiels, fitting the options read into the new ++creation of the configuration files, fitting the options read into the new + templates. + + Passing the ``--debug`` option to the ``keylime_upgrade_config``, the logging level +@@ -211,3 +211,892 @@ To ignore the input files and use the default value for all options, the + + Finally, to process a single mapping file, the mapping file path can be passed + via the ``--mapping`` option ++ ++Attestation Models: Pull vs Push ++--------------------------------- ++ ++Keylime supports two attestation models that determine how the verifier obtains ++attestation evidence from agents: ++ ++Pull Model (Traditional) ++~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++In the pull model, the verifier actively polls agents at regular intervals to ++retrieve attestation evidence. This is the default and traditional mode of ++operation. ++ ++**Use Cases:** ++ ++* Traditional deployments where the verifier can directly connect to agents ++* Environments with stable network connectivity ++* When you need fine-grained control over attestation frequency ++ ++Push Model (Agent-Driven) ++~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++In the push model, agents periodically push their attestation evidence to the ++verifier. This mode is useful when the verifier cannot directly connect to ++agents (e.g., agents behind firewalls or NAT). ++ ++**Use Cases:** ++ ++* Agents deployed behind firewalls or NAT ++* Cloud or edge deployments where direct connectivity is limited ++* When agents need to control their own attestation schedule ++ ++.. note:: ++ The push model options were introduced in configuration version 2.5 and ++ require the push attestation agent. ++ ++Configuration Options Reference ++-------------------------------- ++ ++This section provides comprehensive tables of all configuration options for each ++Keylime component, including default values, environment variable overrides, and ++applicability to pull/push attestation models. ++ ++Verifier Configuration (``/etc/keylime/verifier.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++Common Options (Both Models) ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 25 12 15 48 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``2.5`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_VERSION`` ++ * - ``uuid`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_UUID`` ++ * - ``ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_IP`` ++ * - ``port`` ++ - ``8881`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_PORT`` ++ * - ``registrar_ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REGISTRAR_IP`` ++ * - ``registrar_port`` ++ - ``8891`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REGISTRAR_PORT`` ++ * - ``enable_agent_mtls`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_ENABLE_AGENT_MTLS`` ++ * - ``tls_dir`` ++ - ``generate`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TLS_DIR`` ++ * - ``server_key`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SERVER_KEY`` ++ * - ``server_key_password`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SERVER_KEY_PASSWORD`` ++ * - ``server_cert`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SERVER_CERT`` ++ * - ``trusted_client_ca`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TRUSTED_CLIENT_CA`` ++ * - ``client_key`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_CLIENT_KEY`` ++ * - ``client_key_password`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_CLIENT_KEY_PASSWORD`` ++ * - ``client_cert`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_CLIENT_CERT`` ++ * - ``trusted_server_ca`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TRUSTED_SERVER_CA`` ++ * - ``database_url`` ++ - ``sqlite`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_DATABASE_URL`` ++ * - ``database_pool_sz_ovfl`` ++ - ``5,10`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_DATABASE_POOL_SZ_OVFL`` ++ * - ``auto_migrate_db`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_AUTO_MIGRATE_DB`` ++ * - ``num_workers`` ++ - ``0`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_NUM_WORKERS`` ++ * - ``max_upload_size`` ++ - ``104857600`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MAX_UPLOAD_SIZE`` ++ * - ``measured_boot_policy_name`` ++ - ``accept-all`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MEASURED_BOOT_POLICY_NAME`` ++ * - ``measured_boot_imports`` ++ - ``[]`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MEASURED_BOOT_IMPORTS`` ++ * - ``measured_boot_evaluate`` ++ - ``once`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MEASURED_BOOT_EVALUATE`` ++ * - ``severity_labels`` ++ - ``["info", "notice", ...]`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SEVERITY_LABELS`` ++ * - ``severity_policy`` ++ - ``[{"event_id": ".*", ...}]`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SEVERITY_POLICY`` ++ * - ``ignore_tomtou_errors`` ++ - ``False`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_IGNORE_TOMTOU_ERRORS`` ++ * - ``durable_attestation_import`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_DURABLE_ATTESTATION_IMPORT`` ++ * - ``persistent_store_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_PERSISTENT_STORE_URL`` ++ * - ``transparency_log_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TRANSPARENCY_LOG_URL`` ++ * - ``time_stamp_authority_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TIME_STAMP_AUTHORITY_URL`` ++ * - ``time_stamp_authority_certs_path`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TIME_STAMP_AUTHORITY_CERTS_PATH`` ++ * - ``persistent_store_format`` ++ - ``json`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_PERSISTENT_STORE_FORMAT`` ++ * - ``persistent_store_encoding`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_PERSISTENT_STORE_ENCODING`` ++ * - ``transparency_log_sign_algo`` ++ - ``sha256`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TRANSPARENCY_LOG_SIGN_ALGO`` ++ * - ``signed_attributes`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SIGNED_ATTRIBUTES`` ++ * - ``require_allow_list_signatures`` ++ - ``False`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REQUIRE_ALLOW_LIST_SIGNATURES`` ++ * - ``authorization_provider`` ++ - ``simple`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_AUTHORIZATION_PROVIDER`` ++ * - ``cert_subject_alternative_names`` ++ - (empty) ++ - 2.5 ++ - ``KEYLIME_VERIFIER_CERT_SUBJECT_ALTERNATIVE_NAMES`` ++ ++Pull Model Specific Options ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 25 12 15 48 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``quote_interval`` ++ - ``2`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_QUOTE_INTERVAL`` ++ * - ``retry_interval`` ++ - ``2`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_RETRY_INTERVAL`` ++ * - ``max_retries`` ++ - ``5`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MAX_RETRIES`` ++ * - ``exponential_backoff`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_EXPONENTIAL_BACKOFF`` ++ * - ``request_timeout`` ++ - ``60.0`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REQUEST_TIMEOUT`` ++ ++Push Model Specific Options ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 25 12 15 48 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``mode`` ++ - ``pull`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_MODE`` ++ * - ``challenge_lifetime`` ++ - ``1800`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_CHALLENGE_LIFETIME`` ++ * - ``verification_timeout`` ++ - ``0`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_VERIFICATION_TIMEOUT`` ++ * - ``session_create_rate_limit_per_ip`` ++ - ``50`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_CREATE_RATE_LIMIT_PER_IP`` ++ * - ``session_create_rate_limit_window_ip`` ++ - ``60`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_CREATE_RATE_LIMIT_WINDOW_IP`` ++ * - ``session_create_rate_limit_per_agent`` ++ - ``15`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_CREATE_RATE_LIMIT_PER_AGENT`` ++ * - ``session_create_rate_limit_window_agent`` ++ - ``60`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_CREATE_RATE_LIMIT_WINDOW_AGENT`` ++ * - ``session_lifetime`` ++ - ``180`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_LIFETIME`` ++ * - ``extend_token_on_attestation`` ++ - ``True`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_EXTEND_TOKEN_ON_ATTESTATION`` ++ ++Revocations Section ++^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 12 15 43 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``enabled_revocation_notifications`` ++ - ``['agent']`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REVOCATIONS_ENABLED_REVOCATION_NOTIFICATIONS`` ++ * - ``zmq_ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REVOCATIONS_ZMQ_IP`` ++ * - ``zmq_port`` ++ - ``8992`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REVOCATIONS_ZMQ_PORT`` ++ * - ``webhook_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REVOCATIONS_WEBHOOK_URL`` ++ ++Registrar Configuration (``/etc/keylime/registrar.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 12 15 43 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``2.5`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_VERSION`` ++ * - ``ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_IP`` ++ * - ``port`` ++ - ``8890`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_PORT`` ++ * - ``tls_port`` ++ - ``8891`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TLS_PORT`` ++ * - ``tls_dir`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TLS_DIR`` ++ * - ``server_key`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_SERVER_KEY`` ++ * - ``server_key_password`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_SERVER_KEY_PASSWORD`` ++ * - ``server_cert`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_SERVER_CERT`` ++ * - ``trusted_client_ca`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TRUSTED_CLIENT_CA`` ++ * - ``database_url`` ++ - ``sqlite`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_DATABASE_URL`` ++ * - ``database_pool_sz_ovfl`` ++ - ``5,10`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_DATABASE_POOL_SZ_OVFL`` ++ * - ``auto_migrate_db`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_AUTO_MIGRATE_DB`` ++ * - ``durable_attestation_import`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_DURABLE_ATTESTATION_IMPORT`` ++ * - ``persistent_store_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_PERSISTENT_STORE_URL`` ++ * - ``transparency_log_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TRANSPARENCY_LOG_URL`` ++ * - ``time_stamp_authority_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TIME_STAMP_AUTHORITY_URL`` ++ * - ``time_stamp_authority_certs_path`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TIME_STAMP_AUTHORITY_CERTS_PATH`` ++ * - ``persistent_store_format`` ++ - ``json`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_PERSISTENT_STORE_FORMAT`` ++ * - ``persistent_store_encoding`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_PERSISTENT_STORE_ENCODING`` ++ * - ``transparency_log_sign_algo`` ++ - ``sha256`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TRANSPARENCY_LOG_SIGN_ALGO`` ++ * - ``signed_attributes`` ++ - ``ek_tpm,aik_tpm,ekcert`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_SIGNED_ATTRIBUTES`` ++ * - ``tpm_identity`` ++ - ``default`` ++ - 2.1 ++ - ``KEYLIME_REGISTRAR_TPM_IDENTITY`` ++ * - ``malformed_cert_action`` ++ - ``warn`` ++ - 2.4 ++ - ``KEYLIME_REGISTRAR_MALFORMED_CERT_ACTION`` ++ * - ``authorization_provider`` ++ - ``simple`` ++ - 2.5 ++ - ``KEYLIME_REGISTRAR_AUTHORIZATION_PROVIDER`` ++ * - ``cert_subject_alternative_names`` ++ - (empty) ++ - 2.5 ++ - ``KEYLIME_REGISTRAR_CERT_SUBJECT_ALTERNATIVE_NAMES`` ++ ++Tenant Configuration (``/etc/keylime/tenant.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 12 15 43 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``2.5`` ++ - 2.0 ++ - ``KEYLIME_TENANT_VERSION`` ++ * - ``verifier_ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_TENANT_VERIFIER_IP`` ++ * - ``verifier_port`` ++ - ``8881`` ++ - 2.0 ++ - ``KEYLIME_TENANT_VERIFIER_PORT`` ++ * - ``registrar_ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_TENANT_REGISTRAR_IP`` ++ * - ``registrar_port`` ++ - ``8891`` ++ - 2.0 ++ - ``KEYLIME_TENANT_REGISTRAR_PORT`` ++ * - ``tls_dir`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_TENANT_TLS_DIR`` ++ * - ``enable_agent_mtls`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_TENANT_ENABLE_AGENT_MTLS`` ++ * - ``client_key`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_TENANT_CLIENT_KEY`` ++ * - ``client_key_password`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_TENANT_CLIENT_KEY_PASSWORD`` ++ * - ``client_cert`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_TENANT_CLIENT_CERT`` ++ * - ``trusted_server_ca`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_TENANT_TRUSTED_SERVER_CA`` ++ * - ``tpm_cert_store`` ++ - ``/var/lib/keylime/tpm_cert_store`` ++ - 2.0 ++ - ``KEYLIME_TENANT_TPM_CERT_STORE`` ++ * - ``max_payload_size`` ++ - ``1048576`` ++ - 2.0 ++ - ``KEYLIME_TENANT_MAX_PAYLOAD_SIZE`` ++ * - ``accept_tpm_hash_algs`` ++ - ``['sha512', 'sha384', 'sha256']`` ++ - 2.0 ++ - ``KEYLIME_TENANT_ACCEPT_TPM_HASH_ALGS`` ++ * - ``accept_tpm_encryption_algs`` ++ - ``['ecc', 'rsa']`` ++ - 2.0 ++ - ``KEYLIME_TENANT_ACCEPT_TPM_ENCRYPTION_ALGS`` ++ * - ``accept_tpm_signing_algs`` ++ - ``['ecschnorr', 'rsassa']`` ++ - 2.0 ++ - ``KEYLIME_TENANT_ACCEPT_TPM_SIGNING_ALGS`` ++ * - ``exponential_backoff`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_TENANT_EXPONENTIAL_BACKOFF`` ++ * - ``retry_interval`` ++ - ``2`` ++ - 2.0 ++ - ``KEYLIME_TENANT_RETRY_INTERVAL`` ++ * - ``max_retries`` ++ - ``5`` ++ - 2.0 ++ - ``KEYLIME_TENANT_MAX_RETRIES`` ++ * - ``request_timeout`` ++ - ``60`` ++ - 2.0 ++ - ``KEYLIME_TENANT_REQUEST_TIMEOUT`` ++ * - ``require_ek_cert`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_TENANT_REQUIRE_EK_CERT`` ++ * - ``ek_check_script`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_TENANT_EK_CHECK_SCRIPT`` ++ * - ``mb_refstate`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_TENANT_MB_REFSTATE`` ++ ++CA Configuration (``/etc/keylime/ca.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 15 15 40 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``2.5`` ++ - 2.0 ++ - ``KEYLIME_CA_VERSION`` ++ * - ``password`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_CA_PASSWORD`` ++ * - ``cert_country`` ++ - ``US`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_COUNTRY`` ++ * - ``cert_ca_name`` ++ - ``Keylime Certificate Authority`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_CA_NAME`` ++ * - ``cert_state`` ++ - ``MA`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_STATE`` ++ * - ``cert_locality`` ++ - ``Lexington`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_LOCALITY`` ++ * - ``cert_organization`` ++ - ``MITLL`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_ORGANIZATION`` ++ * - ``cert_org_unit`` ++ - ``53`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_ORG_UNIT`` ++ * - ``cert_ca_lifetime`` ++ - ``3650`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_CA_LIFETIME`` ++ * - ``cert_lifetime`` ++ - ``365`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_LIFETIME`` ++ * - ``cert_bits`` ++ - ``2048`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_BITS`` ++ * - ``cert_crl_dist`` ++ - ``http://localhost:38080/crl`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_CRL_DIST`` ++ ++Agent Configuration (``/etc/keylime/agent.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++.. warning:: ++ The Python agent is deprecated and will be removed in version 7.0.0! ++ Please migrate to the Rust-based agent from https://github.com/keylime/rust-keylime/ ++ ++Common Options (Both Models) ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 28 12 12 48 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``"2.5"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_VERSION`` ++ * - ``api_versions`` ++ - ``"default"`` ++ - 2.4 ++ - ``KEYLIME_AGENT_API_VERSIONS`` ++ * - ``uuid`` ++ - ``"d432fbb3-d2f1-4a97-9ef7-75bd81c00000"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_UUID`` ++ * - ``ip`` ++ - ``"127.0.0.1"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_IP`` ++ * - ``port`` ++ - ``9002`` ++ - 2.0 ++ - ``KEYLIME_AGENT_PORT`` ++ * - ``contact_ip`` ++ - ``"127.0.0.1"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_CONTACT_IP`` ++ * - ``contact_port`` ++ - ``9002`` ++ - 2.0 ++ - ``KEYLIME_AGENT_CONTACT_PORT`` ++ * - ``registrar_ip`` ++ - ``"127.0.0.1"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REGISTRAR_IP`` ++ * - ``registrar_port`` ++ - ``8890`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REGISTRAR_PORT`` ++ * - ``enable_agent_mtls`` ++ - ``true`` ++ - 2.0 ++ - ``KEYLIME_AGENT_ENABLE_AGENT_MTLS`` ++ * - ``tls_dir`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TLS_DIR`` ++ * - ``server_key`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_SERVER_KEY`` ++ * - ``server_key_password`` ++ - ``""`` ++ - 2.0 ++ - ``KEYLIME_AGENT_SERVER_KEY_PASSWORD`` ++ * - ``server_cert`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_SERVER_CERT`` ++ * - ``trusted_client_ca`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TRUSTED_CLIENT_CA`` ++ * - ``enc_keyname`` ++ - ``"derived_tci_key"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_ENC_KEYNAME`` ++ * - ``dec_payload_file`` ++ - ``"decrypted_payload"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_DEC_PAYLOAD_FILE`` ++ * - ``secure_size`` ++ - ``"1m"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_SECURE_SIZE`` ++ * - ``tpm_ownerpassword`` ++ - ``""`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TPM_OWNERPASSWORD`` ++ * - ``extract_payload_zip`` ++ - ``true`` ++ - 2.0 ++ - ``KEYLIME_AGENT_EXTRACT_PAYLOAD_ZIP`` ++ * - ``enable_revocation_notifications`` ++ - ``true`` ++ - 2.0 ++ - ``KEYLIME_AGENT_ENABLE_REVOCATION_NOTIFICATIONS`` ++ * - ``revocation_notification_ip`` ++ - ``"127.0.0.1"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REVOCATION_NOTIFICATION_IP`` ++ * - ``revocation_notification_port`` ++ - ``8992`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REVOCATION_NOTIFICATION_PORT`` ++ * - ``revocation_cert`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REVOCATION_CERT`` ++ * - ``revocation_actions`` ++ - ``"[]"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REVOCATION_ACTIONS`` ++ * - ``payload_script`` ++ - ``"autorun.sh"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_PAYLOAD_SCRIPT`` ++ * - ``enable_insecure_payload`` ++ - ``false`` ++ - 2.0 ++ - ``KEYLIME_AGENT_ENABLE_INSECURE_PAYLOAD`` ++ * - ``measure_payload_pcr`` ++ - ``-1`` ++ - 2.0 ++ - ``KEYLIME_AGENT_MEASURE_PAYLOAD_PCR`` ++ * - ``exponential_backoff`` ++ - ``true`` ++ - 2.0 ++ - ``KEYLIME_AGENT_EXPONENTIAL_BACKOFF`` ++ * - ``retry_interval`` ++ - ``2`` ++ - 2.0 ++ - ``KEYLIME_AGENT_RETRY_INTERVAL`` ++ * - ``max_retries`` ++ - ``4`` ++ - 2.0 ++ - ``KEYLIME_AGENT_MAX_RETRIES`` ++ * - ``tpm_hash_alg`` ++ - ``"sha256"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TPM_HASH_ALG`` ++ * - ``tpm_encryption_alg`` ++ - ``"rsa"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TPM_ENCRYPTION_ALG`` ++ * - ``tpm_signing_alg`` ++ - ``"rsassa"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TPM_SIGNING_ALG`` ++ * - ``ek_handle`` ++ - ``"generate"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_EK_HANDLE`` ++ * - ``enable_iak_idevid`` ++ - ``false`` ++ - 2.1 ++ - ``KEYLIME_AGENT_ENABLE_IAK_IDEVID`` ++ * - ``iak_idevid_template`` ++ - ``"detect"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IAK_IDEVID_TEMPLATE`` ++ * - ``iak_idevid_asymmetric_alg`` ++ - ``"rsa"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IAK_IDEVID_ASYMMETRIC_ALG`` ++ * - ``iak_idevid_name_alg`` ++ - ``"sha256"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IAK_IDEVID_NAME_ALG`` ++ * - ``idevid_password`` ++ - ``""`` ++ - 2.3 ++ - ``KEYLIME_AGENT_IDEVID_PASSWORD`` ++ * - ``idevid_handle`` ++ - ``""`` ++ - 2.3 ++ - ``KEYLIME_AGENT_IDEVID_HANDLE`` ++ * - ``iak_password`` ++ - ``""`` ++ - 2.3 ++ - ``KEYLIME_AGENT_IAK_PASSWORD`` ++ * - ``iak_handle`` ++ - ``""`` ++ - 2.3 ++ - ``KEYLIME_AGENT_IAK_HANDLE`` ++ * - ``iak_cert`` ++ - ``"default"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IAK_CERT`` ++ * - ``idevid_cert`` ++ - ``"default"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IDEVID_CERT`` ++ * - ``run_as`` ++ - ``"keylime:tss"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_RUN_AS`` ++ * - ``ima_ml_path`` ++ - ``"default"`` ++ - 2.2 ++ - ``KEYLIME_AGENT_IMA_ML_PATH`` ++ * - ``measuredboot_ml_path`` ++ - ``"default"`` ++ - 2.2 ++ - ``KEYLIME_AGENT_MEASUREDBOOT_ML_PATH`` ++ ++Push Model Specific Options ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 35 12 12 41 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``agent_data_path`` ++ - ``"/var/lib/keylime/agent_data.json"`` ++ - 2.5 ++ - ``KEYLIME_AGENT_AGENT_DATA_PATH`` ++ * - ``verifier_url`` ++ - ``"https://localhost:8881"`` ++ - 2.5 ++ - ``KEYLIME_AGENT_VERIFIER_URL`` ++ * - ``certification_keys_server_identifier`` ++ - ``"ak"`` ++ - 2.5 ++ - ``KEYLIME_AGENT_CERTIFICATION_KEYS_SERVER_IDENTIFIER`` ++ * - ``uefi_logs_evidence_version`` ++ - ``"1.0"`` ++ - 2.5 ++ - ``KEYLIME_AGENT_UEFI_LOGS_EVIDENCE_VERSION`` ++ * - ``tls_accept_invalid_certs`` ++ - ``false`` ++ - 2.5 ++ - ``KEYLIME_AGENT_TLS_ACCEPT_INVALID_CERTS`` ++ * - ``tls_accept_invalid_hostnames`` ++ - ``false`` ++ - 2.5 ++ - ``KEYLIME_AGENT_TLS_ACCEPT_INVALID_HOSTNAMES`` ++ * - ``exponential_backoff_max_retries`` ++ - ``5`` ++ - 2.5 ++ - ``KEYLIME_AGENT_EXPONENTIAL_BACKOFF_MAX_RETRIES`` ++ * - ``exponential_backoff_initial_delay`` ++ - ``10000`` ++ - 2.5 ++ - ``KEYLIME_AGENT_EXPONENTIAL_BACKOFF_INITIAL_DELAY`` ++ * - ``exponential_backoff_max_delay`` ++ - ``360000`` ++ - 2.5 ++ - ``KEYLIME_AGENT_EXPONENTIAL_BACKOFF_MAX_DELAY`` ++ ++Logging Configuration (``/etc/keylime/logging.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++The logging configuration follows Python's standard logging configuration format. ++See the Python logging documentation for details on configuring handlers, formatters, ++and loggers. The version option can be overridden with ``KEYLIME_LOGGING_VERSION``. ++ ++Configuration Version History ++------------------------------ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 15 70 ++ ++ * - Version ++ - Changes ++ * - 2.0 ++ - Base configuration structure, pull model support ++ * - 2.1 ++ - Added IAK/IDevID support, ``tpm_identity`` for registrar ++ * - 2.2 ++ - Added ``ima_ml_path`` and ``measuredboot_ml_path`` configuration ++ * - 2.3 ++ - Added persisted key handles for IAK/IDevID (``iak_handle``, ``idevid_handle``) ++ * - 2.4 ++ - Added ``api_versions`` for agent, ``malformed_cert_action`` for registrar ++ * - 2.5 ++ - **Push model support**: Added ``mode``, ``challenge_lifetime``, ``verification_timeout``, session rate limiting and lifetime options for verifier; ``verifier_url``, ``agent_data_path``, TLS validation, exponential backoff options for agent. Added ``authorization_provider`` and ``cert_subject_alternative_names`` for verifier and registrar ++ ++For detailed information on all configuration options for each component, refer ++to the configuration files in ``/etc/keylime/`` and their inline documentation. diff --git a/0017-verifier-graceful-shutdown.patch b/0017-verifier-graceful-shutdown.patch new file mode 100644 index 0000000..a90637c --- /dev/null +++ b/0017-verifier-graceful-shutdown.patch @@ -0,0 +1,2373 @@ +From cb944ee9c178f7a717e904ddbf85aac5b27a2eac Mon Sep 17 00:00:00 2001 +From: rpm-build +Date: Fri, 17 Apr 2026 14:52:35 +0200 +Subject: [PATCH] verifier: Implement graceful shutdown + +Implement graceful shutdown, cancel pending retries, and drain in-flight +work. + +Backported from https://github.com/keylime/keylime/pull/1869 + +Signed-off-by: Anderson Toshiyuki Sasaki +--- + docs/man/keylime_verifier.8.rst | 1 + + docs/user_guide/configuration.rst | 6 + + keylime/cloud_verifier_tornado.py | 180 ++++++++++++--- + keylime/push_agent_monitor.py | 23 ++ + keylime/revocation_notifier.py | 10 +- + keylime/shared_data.py | 65 +++++- + keylime/shutdown.py | 21 ++ + keylime/web/base/server.py | 111 +++++++++- + keylime/web/verifier_server.py | 122 +++++------ + templates/2.6/agent.j2 | 313 ++++++++++++++++++++++++++ + templates/2.6/ca.j2 | 39 ++++ + templates/2.6/logging.j2 | 33 +++ + templates/2.6/mapping.json | 11 + + templates/2.6/registrar.j2 | 168 ++++++++++++++ + templates/2.6/tenant.j2 | 130 +++++++++++ + templates/2.6/verifier.j2 | 350 ++++++++++++++++++++++++++++++ + test/test_shutdown.py | 210 ++++++++++++++++++ + test/test_verifier_server.py | 82 +++---- + 18 files changed, 1722 insertions(+), 153 deletions(-) + create mode 100644 keylime/shutdown.py + create mode 100644 templates/2.6/agent.j2 + create mode 100644 templates/2.6/ca.j2 + create mode 100644 templates/2.6/logging.j2 + create mode 100644 templates/2.6/mapping.json + create mode 100644 templates/2.6/registrar.j2 + create mode 100644 templates/2.6/tenant.j2 + create mode 100644 templates/2.6/verifier.j2 + create mode 100644 test/test_shutdown.py + +diff --git a/docs/man/keylime_verifier.8.rst b/docs/man/keylime_verifier.8.rst +index 5303a5f..d22d211 100644 +--- a/docs/man/keylime_verifier.8.rst ++++ b/docs/man/keylime_verifier.8.rst +@@ -53,6 +53,7 @@ Essentials: + - **quote_interval**: Time between integrity checks (seconds) + - **max_upload_size**: Upload size limit (bytes) + - **request_timeout**: Agent request timeout (seconds) ++- **shutdown_drain_timeout**: Max time (seconds) to wait for in-flight operations during shutdown + - **measured_boot_policy_name**, **measured_boot_imports**, **measured_boot_evaluate**: measured boot policy settings + - **severity_labels**, **severity_policy**: revocation severity config + - **ignore_tomtou_errors**: handle ToMToU IMA entries (bool) +diff --git a/docs/user_guide/configuration.rst b/docs/user_guide/configuration.rst +index aae5344..327c370 100644 +--- a/docs/user_guide/configuration.rst ++++ b/docs/user_guide/configuration.rst +@@ -425,6 +425,10 @@ Common Options (Both Models) + - (empty) + - 2.5 + - ``KEYLIME_VERIFIER_CERT_SUBJECT_ALTERNATIVE_NAMES`` ++ * - ``shutdown_drain_timeout`` ++ - ``10`` ++ - 2.6 ++ - ``KEYLIME_VERIFIER_SHUTDOWN_DRAIN_TIMEOUT`` + + Pull Model Specific Options + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +@@ -1097,6 +1101,8 @@ Configuration Version History + - Added ``api_versions`` for agent, ``malformed_cert_action`` for registrar + * - 2.5 + - **Push model support**: Added ``mode``, ``challenge_lifetime``, ``verification_timeout``, session rate limiting and lifetime options for verifier; ``verifier_url``, ``agent_data_path``, TLS validation, exponential backoff options for agent. Added ``authorization_provider`` and ``cert_subject_alternative_names`` for verifier and registrar ++ * - 2.6 ++ - Added ``shutdown_drain_timeout`` for verifier graceful shutdown + + For detailed information on all configuration options for each component, refer + to the configuration files in ``/etc/keylime/`` and their inline documentation. +diff --git a/keylime/cloud_verifier_tornado.py b/keylime/cloud_verifier_tornado.py +index 75f117b..eb57de5 100644 +--- a/keylime/cloud_verifier_tornado.py ++++ b/keylime/cloud_verifier_tornado.py +@@ -29,6 +29,7 @@ from keylime import ( + keylime_logging, + push_agent_monitor, + revocation_notifier, ++ shutdown, + signing, + tornado_requests, + web_util, +@@ -175,6 +176,29 @@ exclude_db: Dict[str, Any] = { + # events (quote polls, retries). Used to cancel them all on shutdown. + _pending_events: Dict[str, object] = {} + ++# Counter of currently executing process_agent() coroutines. The shutdown ++# handler waits for this to reach zero before stopping the IOLoop so that ++# in-flight DB writes can finish. ++_active_operations = 0 ++# Event signalled when _active_operations drops to zero during shutdown. ++_operations_drained = asyncio.Event() ++_operations_drained.set() # initially no operations are active ++ ++ ++def _enter_operation() -> None: ++ """Increment the active operations counter.""" ++ global _active_operations ++ _active_operations += 1 ++ _operations_drained.clear() ++ ++ ++def _exit_operation() -> None: ++ """Decrement the active operations counter; signal if drained.""" ++ global _active_operations ++ _active_operations -= 1 ++ if _active_operations <= 0: ++ _operations_drained.set() ++ + + def _register_pending_event(agent: Dict[str, Any], handle: object) -> None: + """Track a pending IOLoop timeout in both the agent dict and the global registry. +@@ -201,6 +225,38 @@ def _cancel_pending_event(agent: Dict[str, Any]) -> None: + logger.debug("Could not remove pending event for agent %s: %s", agent["agent_id"], e) + + ++def get_active_operations() -> int: ++ """Return the number of currently executing process_agent() coroutines.""" ++ return _active_operations ++ ++ ++async def wait_for_drain(timeout: float) -> bool: ++ """Wait up to *timeout* seconds for all active operations to finish. ++ ++ Returns True if all operations drained, False if the timeout expired. ++ """ ++ try: ++ await asyncio.wait_for(_operations_drained.wait(), timeout=timeout) ++ return True ++ except asyncio.TimeoutError: ++ return False ++ ++ ++def cancel_all_pending_events() -> None: ++ """Cancel every tracked pending IOLoop timeout. Called on shutdown.""" ++ if not _pending_events: ++ return ++ io_loop = tornado.ioloop.IOLoop.current() ++ for agent_id, handle in _pending_events.items(): ++ try: ++ io_loop.remove_timeout(handle) ++ except Exception as e: ++ logger.debug("Could not remove pending event for agent %s: %s", agent_id, e) ++ count = len(_pending_events) ++ _pending_events.clear() ++ logger.info("Cancelled %d pending attestation event(s) for shutdown", count) ++ ++ + def _from_db_obj(agent_db_obj: VerfierMain) -> Dict[str, Any]: + fields = [ + "agent_id", +@@ -2159,6 +2215,17 @@ async def invoke_get_quote( + need_pubkey: bool, + timeout: float = DEFAULT_TIMEOUT, + ) -> None: ++ # Clear tracking only — the timeout already fired (this *is* the callback), ++ # so there is no handle to cancel via remove_timeout(). Done before the ++ # shutdown check so tracking state is cleaned up even on early return. ++ if agent.get("pending_event") is not None: ++ agent["pending_event"] = None ++ _pending_events.pop(agent["agent_id"], None) ++ ++ if shutdown.is_shutting_down(): ++ logger.debug("Skipping get_quote for agent %s — shutting down", agent["agent_id"]) ++ return ++ + failure = Failure(Component.INTERNAL, ["verifier"]) + + params = cloud_verifier_common.prepare_get_quote(agent) +@@ -2262,10 +2329,17 @@ async def invoke_get_quote( + + + async def invoke_provide_v(agent: Dict[str, Any], timeout: float = DEFAULT_TIMEOUT) -> None: +- failure = Failure(Component.INTERNAL, ["verifier"]) +- ++ # Clear tracking only — the timeout already fired (this *is* the callback), ++ # so there is no handle to cancel via remove_timeout(). Done before the ++ # shutdown check so tracking state is cleaned up even on early return. + if agent.get("pending_event") is not None: + agent["pending_event"] = None ++ _pending_events.pop(agent["agent_id"], None) ++ ++ if shutdown.is_shutting_down(): ++ logger.debug("Skipping provide_v for agent %s — shutting down", agent["agent_id"]) ++ return ++ failure = Failure(Component.INTERNAL, ["verifier"]) + + v_json_message = cloud_verifier_common.prepare_v(agent) + +@@ -2422,6 +2496,14 @@ async def notify_error( + async def process_agent( + agent: Dict[str, Any], new_operational_state: int, failure: Failure = Failure(Component.INTERNAL, ["verifier"]) + ) -> None: ++ # During shutdown, allow terminal-state transitions (FAILED, INVALID_QUOTE) ++ # through so that final DB writes and revocation notifications complete. ++ # Only skip non-terminal transitions that would schedule new polls/retries. ++ if shutdown.is_shutting_down() and new_operational_state not in (states.FAILED, states.INVALID_QUOTE): ++ logger.debug("Skipping process_agent for agent %s — shutting down", agent["agent_id"]) ++ return ++ ++ _enter_operation() + try: # pylint: disable=R1702 + main_agent_operational_state = agent["operational_state"] + stored_agent = None +@@ -2452,15 +2534,13 @@ async def process_agent( + # if the stored agent could not be recovered from the database, stop polling + if not stored_agent: + logger.warning("Unable to retrieve agent %s from database. Stopping polling", agent["agent_id"]) +- if agent["pending_event"] is not None: +- tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) ++ _cancel_pending_event(agent) + return + + # if the user did terminated this agent + if stored_agent.operational_state == states.TERMINATED: # pyright: ignore + logger.warning("Agent %s terminated by user.", agent["agent_id"]) +- if agent["pending_event"] is not None: +- tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) ++ _cancel_pending_event(agent) + + # Second database operation - delete agent + with session_context() as session: +@@ -2470,8 +2550,7 @@ async def process_agent( + # if the user tells us to stop polling because the tenant quote check failed + if stored_agent.operational_state == states.TENANT_FAILED: # pyright: ignore + logger.warning("Agent %s has failed tenant quote. Stopping polling", agent["agent_id"]) +- if agent["pending_event"] is not None: +- tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) ++ _cancel_pending_event(agent) + return + + # Use the request timeout stored in the agent dict (read from the +@@ -2498,8 +2577,7 @@ async def process_agent( + + # When the failure is irrecoverable we stop polling the agent + if not failure.recoverable or failure.highest_severity == MAX_SEVERITY_LABEL: +- if agent["pending_event"] is not None: +- tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) ++ _cancel_pending_event(agent) + + # Third database operation - update agent with failure state + with session_context() as session: +@@ -2575,6 +2653,10 @@ async def process_agent( + "Setting up callback to check agent ID %s again in %f seconds", agent["agent_id"], interval + ) + ++ if shutdown.is_shutting_down(): ++ logger.debug("Not scheduling next poll for agent %s — shutting down", agent["agent_id"]) ++ return ++ + pending = tornado.ioloop.IOLoop.current().call_later( + # type: ignore # due to python <3.9 + interval, +@@ -2585,7 +2667,7 @@ async def process_agent( + False, + timeout=timeout, + ) +- agent["pending_event"] = pending ++ _register_pending_event(agent, pending) + return + + maxr = config.getint("verifier", "max_retries") +@@ -2617,7 +2699,11 @@ async def process_agent( + maxr, + next_retry, + ) +- tornado.ioloop.IOLoop.current().call_later( ++ if shutdown.is_shutting_down(): ++ logger.debug("Not scheduling retry for agent %s — shutting down", agent["agent_id"]) ++ return ++ ++ pending = tornado.ioloop.IOLoop.current().call_later( + # type: ignore # due to python <3.9 + next_retry, + invoke_get_quote, +@@ -2627,6 +2713,7 @@ async def process_agent( + True, + timeout=timeout, + ) ++ _register_pending_event(agent, pending) + return + + if main_agent_operational_state == states.PROVIDE_V and new_operational_state == states.PROVIDE_V_RETRY: +@@ -2651,9 +2738,17 @@ async def process_agent( + maxr, + next_retry, + ) +- tornado.ioloop.IOLoop.current().call_later( +- next_retry, invoke_provide_v, agent # type: ignore # due to python <3.9 ++ if shutdown.is_shutting_down(): ++ logger.debug("Not scheduling retry for agent %s — shutting down", agent["agent_id"]) ++ return ++ ++ pending = tornado.ioloop.IOLoop.current().call_later( ++ next_retry, # type: ignore # due to python <3.9 ++ invoke_provide_v, ++ agent, ++ timeout, + ) ++ _register_pending_event(agent, pending) + return + raise Exception("nothing should ever fall out of this!") + +@@ -2663,6 +2758,8 @@ async def process_agent( + "exception", {"context": "Agent caused the verifier to throw an exception", "data": str(e)}, False + ) + await process_agent(agent, states.FAILED, failure) ++ finally: ++ _exit_operation() + + + async def activate_agents(agents: List[VerfierMain], verifier_ip: str, verifier_port: int) -> None: +@@ -2769,31 +2866,62 @@ def main() -> None: + server = tornado.httpserver.HTTPServer(app, ssl_options=ssl_ctx, max_buffer_size=max_upload_size) + server.add_sockets(sockets) + +- def server_sig_handler(*_: Any) -> None: +- logger.info("Shutting down server %s..", task_id) ++ # Hold strong references to async tasks to prevent GC from collecting them mid-run ++ _background_tasks: List[asyncio.Task[None]] = [] ++ ++ def server_sig_handler(signame: str = "signal") -> None: ++ if shutdown.is_shutting_down(): ++ logger.warning("Shutdown already in progress, ignoring %s (server %s)", signame, task_id) ++ return ++ logger.info("Received %s, shutting down server %s..", signame, task_id) ++ ++ # Signal all attestation loops to stop scheduling new work ++ shutdown.request_shutdown() ++ + # Stop server to not accept new incoming connections + server.stop() + +- # Gracefully shutdown webhook workers to prevent connection errors +- if "webhook" in revocation_notifier.get_notifiers(): +- revocation_notifier.shutdown_webhook_workers() ++ # Cancel all pending attestation timeouts (retries, polls) ++ cancel_all_pending_events() ++ push_agent_monitor.cancel_all_timeouts() + +- # Wait for all connections to be closed and then stop ioloop ++ # Wait for in-flight operations, then close connections and stop + async def stop() -> None: +- await server.close_all_connections() +- tornado.ioloop.IOLoop.current().stop() ++ try: ++ # Give in-flight process_agent() coroutines time to finish ++ # DB writes and revocation notifications before tearing ++ # down webhook workers. ++ drain_timeout = config.getfloat("verifier", "shutdown_drain_timeout", fallback=10.0) ++ drained = await wait_for_drain(drain_timeout) ++ if not drained: ++ logger.warning( ++ "Shutting down with %d operation(s) still active after %.1fs", ++ get_active_operations(), ++ drain_timeout, ++ ) ++ ++ # Shutdown webhook workers after draining so revocation ++ # notifications from in-flight attestations are delivered. ++ if "webhook" in revocation_notifier.get_notifiers(): ++ revocation_notifier.shutdown_webhook_workers() ++ ++ await server.close_all_connections() ++ except Exception: ++ logger.exception("Error during shutdown cleanup") ++ finally: ++ tornado.ioloop.IOLoop.current().stop() + +- asyncio.ensure_future(stop()) ++ _background_tasks.append(asyncio.ensure_future(stop())) + + # Attach signal handler to ioloop. + # Do not use signal.signal(..) for that because it does not work! + loop = asyncio.get_event_loop() +- loop.add_signal_handler(signal.SIGINT, server_sig_handler) +- loop.add_signal_handler(signal.SIGTERM, server_sig_handler) ++ loop.add_signal_handler(signal.SIGINT, lambda: server_sig_handler("SIGINT")) ++ loop.add_signal_handler(signal.SIGTERM, lambda: server_sig_handler("SIGTERM")) + + server.start() + # Reactivate agents +- asyncio.ensure_future(activate_agents(agents, verifier_host, int(verifier_port))) ++ _background_tasks.append(asyncio.ensure_future(activate_agents(agents, verifier_host, int(verifier_port)))) + tornado.ioloop.IOLoop.current().start() + logger.debug("Server %s stopped.", task_id) + sys.exit(0) +diff --git a/keylime/push_agent_monitor.py b/keylime/push_agent_monitor.py +index f41befc..6537a31 100644 +--- a/keylime/push_agent_monitor.py ++++ b/keylime/push_agent_monitor.py +@@ -171,6 +171,29 @@ def cancel_agent_timeout(agent_id: str) -> None: + logger.error("Error cancelling timeout for agent %s: %s", agent_id, e) + + ++def cancel_all_timeouts() -> None: ++ """Cancel all scheduled PUSH mode agent timeouts. ++ ++ Called during shutdown to prevent timeout callbacks from firing ++ against a stopping event loop. ++ """ ++ with _agent_timeout_handles_lock: ++ handles = dict(_agent_timeout_handles) ++ _agent_timeout_handles.clear() ++ ++ if not handles: ++ return ++ ++ io_loop = tornado.ioloop.IOLoop.current() ++ for agent_id, handle in handles.items(): ++ try: ++ io_loop.remove_timeout(handle) ++ except Exception as e: ++ logger.debug("Could not remove timeout for agent %s during shutdown: %s", agent_id, e) ++ ++ logger.info("Cancelled %d PUSH mode agent timeout(s) for shutdown", len(handles)) ++ ++ + def check_push_agent_timeouts() -> None: + """Check all PUSH mode agents for timeouts and mark failed ones. + +diff --git a/keylime/revocation_notifier.py b/keylime/revocation_notifier.py +index abab08b..f7efece 100644 +--- a/keylime/revocation_notifier.py ++++ b/keylime/revocation_notifier.py +@@ -259,9 +259,13 @@ def stop_broker() -> None: + + + def shutdown_webhook_workers() -> None: +- """Convenience function to shutdown webhook workers using the global manager.""" +- manager = _get_webhook_manager() +- manager.shutdown_workers() ++ """Shutdown webhook workers if the manager was ever initialized. ++ ++ If no revocation notifications were sent in this process, the manager ++ is still None and there is nothing to shut down. ++ """ ++ if _webhook_manager is not None: ++ _webhook_manager.shutdown_workers() + + + def notify(tosend: Dict[str, Any]) -> None: +diff --git a/keylime/shared_data.py b/keylime/shared_data.py +index a415496..09cbb97 100644 +--- a/keylime/shared_data.py ++++ b/keylime/shared_data.py +@@ -6,6 +6,8 @@ using multiprocessing.Manager(). + + import atexit + import multiprocessing as mp ++import multiprocessing.process ++import os + import threading + import time + from typing import Any, Dict, List, Optional +@@ -137,8 +139,6 @@ class SharedDataManager: + # Register handler to reinitialize manager connection after fork + # This is needed because Manager uses network connections that don't survive fork + try: +- import os # pylint: disable=import-outside-toplevel +- + self._parent_pid = os.getpid() + logger.debug("SharedDataManager initialized in process %d", self._parent_pid) + except Exception as e: +@@ -283,15 +283,48 @@ class SharedDataManager: + """Cleanup shared resources. + + This is automatically called on exit but can be called manually +- for explicit cleanup. ++ for explicit cleanup. Only the parent process (the one that ++ created the Manager) is allowed to shut it down; child workers ++ forked from the parent skip the call to avoid the ++ ``AssertionError: can only join a child process`` raised by ++ ``multiprocessing`` when a non-parent tries to join. + """ +- if hasattr(self, "_manager"): +- logger.debug("Shutting down SharedDataManager") +- try: +- self._manager.shutdown() +- logger.info("SharedDataManager shutdown complete") +- except Exception as e: +- logger.error("Error during SharedDataManager shutdown: %s", e) ++ if not hasattr(self, "_manager"): ++ return ++ ++ if hasattr(self, "_parent_pid") and os.getpid() != self._parent_pid: ++ logger.debug( ++ "Skipping SharedDataManager shutdown in child process %d (parent is %d)", ++ os.getpid(), ++ self._parent_pid, ++ ) ++ return ++ ++ logger.debug("Shutting down SharedDataManager") ++ try: ++ self._manager.shutdown() ++ logger.info("SharedDataManager shutdown complete") ++ except Exception: ++ logger.exception("Error during SharedDataManager shutdown") ++ ++ def deregister_child(self) -> None: ++ """Remove the Manager's server process from multiprocessing's child tracking. ++ ++ Must be called in each forked worker **after** ``fork()``. Without ++ this, Python's ``multiprocessing.util._exit_function`` atexit handler ++ tries to ``join()`` the Manager server process in every child worker, ++ causing ``AssertionError: can only join a child process`` because the ++ Manager was spawned by the parent, not the child. ++ """ ++ # The Manager's server process is stored in _manager._process ++ server_process = getattr(self._manager, "_process", None) ++ if server_process is not None: ++ multiprocessing.process._children.discard(server_process) # type: ignore[attr-defined] # pylint: disable=protected-access ++ logger.debug( ++ "Deregistered Manager server process (pid %s) from child tracking in worker %d", ++ getattr(server_process, "pid", "?"), ++ os.getpid(), ++ ) + + def __repr__(self) -> str: + stats = self.get_stats() +@@ -364,6 +397,18 @@ def get_shared_memory() -> SharedDataManager: + return _global_shared_manager + + ++def deregister_shared_memory_child() -> None: ++ """Deregister the Manager's server process in a forked child worker. ++ ++ Call this after ``tornado.process.fork_processes()`` (or any ``fork()``) ++ to prevent Python's atexit handler from trying to ``join()`` the Manager ++ server process in the child, which would raise ++ ``AssertionError: can only join a child process``. ++ """ ++ if _global_shared_manager is not None: ++ _global_shared_manager.deregister_child() ++ ++ + def cleanup_global_shared_memory() -> None: + """Cleanup the global shared memory manager. + +diff --git a/keylime/shutdown.py b/keylime/shutdown.py +new file mode 100644 +index 0000000..72f1c76 +--- /dev/null ++++ b/keylime/shutdown.py +@@ -0,0 +1,21 @@ ++"""Shutdown coordination for graceful server termination. ++ ++Provides a process-wide shutdown flag that attestation loops and retry ++schedulers check before starting new work. Setting the flag prevents ++new IOLoop callbacks from being scheduled and allows in-flight ++operations to drain before the event loop stops. ++""" ++ ++import asyncio ++ ++_shutdown_event = asyncio.Event() ++ ++ ++def request_shutdown() -> None: ++ """Signal that the process is shutting down.""" ++ _shutdown_event.set() ++ ++ ++def is_shutting_down() -> bool: ++ """Return True if shutdown has been requested.""" ++ return _shutdown_event.is_set() +diff --git a/keylime/web/base/server.py b/keylime/web/base/server.py +index b62debd..7c8a71b 100644 +--- a/keylime/web/base/server.py ++++ b/keylime/web/base/server.py +@@ -1,5 +1,6 @@ + import asyncio + import multiprocessing ++import signal + from abc import ABC, abstractmethod + from functools import wraps + from ssl import CERT_OPTIONAL +@@ -7,8 +8,9 @@ from typing import TYPE_CHECKING, Any, Callable, Optional + + import tornado + +-from keylime import api_version, config, keylime_logging, web_util ++from keylime import api_version, config, keylime_logging, shutdown, web_util + from keylime.models.base.db import db_manager ++from keylime.shared_data import deregister_shared_memory_child + from keylime.web.base.action_handler import ActionHandler + from keylime.web.base.route import Route + +@@ -251,6 +253,8 @@ class Server(ABC): + # Tornado servers are instantiated by calling start_single() or start_multi(), so set to None initially + self.__tornado_http_server: Optional[tornado.httpserver.HTTPServer] = None + self.__tornado_https_server: Optional[tornado.httpserver.HTTPServer] = None ++ self._server_stopped: Optional[asyncio.Event] = None ++ self._shutdown_task: Optional[asyncio.Task[None]] = None + + async def start_single(self) -> None: + """Instantiates and starts the server (with one Tornado HTTPServer instance to handle HTTP connections +@@ -273,7 +277,82 @@ class Server(ABC): + https_server.add_sockets(self.__tornado_https_sockets) + self.__tornado_https_server = https_server + +- await asyncio.Event().wait() ++ # Create the stop event before installing signal handlers so that ++ # _graceful_shutdown() can always set it, even if a signal arrives ++ # before we reach the wait(). ++ self._server_stopped = asyncio.Event() ++ ++ # Install signal handlers for graceful shutdown ++ self._install_signal_handlers() ++ ++ try: ++ # Hook for subclasses to perform work after servers are listening ++ # but before blocking (e.g. activate agents). ++ await self._on_server_started() ++ await self._server_stopped.wait() ++ finally: ++ # Remove signal handlers before returning to asyncio.run()'s ++ # teardown, which closes the wakeup fd and replaces remaining ++ # handlers with _sighandler_noop. Any signal arriving after ++ # that would write to the closed fd, causing ++ # "OSError: Bad file descriptor". ++ self._remove_signal_handlers() ++ ++ async def _on_server_started(self) -> None: ++ """Called after servers are listening but before blocking. ++ ++ Override in subclasses to perform post-startup work such as ++ activating agents. The default implementation does nothing. ++ """ ++ ++ def _install_signal_handlers(self) -> None: ++ """Install SIGINT/SIGTERM handlers for graceful shutdown.""" ++ loop = asyncio.get_event_loop() ++ ++ async def _run_graceful_shutdown() -> None: ++ try: ++ await self._graceful_shutdown() ++ except Exception: ++ logger.exception("Graceful shutdown failed") ++ finally: ++ if self._server_stopped is not None: ++ self._server_stopped.set() ++ ++ def _make_handler(signame: str) -> Callable[[], None]: ++ def _handler() -> None: ++ if shutdown.is_shutting_down(): ++ logger.warning("Shutdown already in progress, ignoring %s", signame) ++ return ++ logger.info("Received %s, shutting down", signame) ++ shutdown.request_shutdown() ++ self._shutdown_task = asyncio.ensure_future(_run_graceful_shutdown()) ++ ++ return _handler ++ ++ loop.add_signal_handler(signal.SIGINT, _make_handler("SIGINT")) ++ loop.add_signal_handler(signal.SIGTERM, _make_handler("SIGTERM")) ++ ++ def _remove_signal_handlers(self) -> None: ++ """Remove SIGINT/SIGTERM handlers from the event loop.""" ++ loop = asyncio.get_event_loop() ++ loop.remove_signal_handler(signal.SIGINT) ++ loop.remove_signal_handler(signal.SIGTERM) ++ ++ async def _graceful_shutdown(self) -> None: ++ """Stop servers and close connections gracefully. ++ ++ Subclasses can override this to cancel component-specific pending work ++ before calling super(). ++ """ ++ if self.__tornado_http_server: ++ self.__tornado_http_server.stop() ++ if self.__tornado_https_server: ++ self.__tornado_https_server.stop() ++ ++ if self.__tornado_http_server: ++ await self.__tornado_http_server.close_all_connections() ++ if self.__tornado_https_server: ++ await self.__tornado_https_server.close_all_connections() + + def start_multi(self) -> None: + ports = "" +@@ -295,12 +374,19 @@ class Server(ABC): + self.worker_count, + ) + ++ self._pre_fork() ++ + # with StatsCollector(): + # num = manager.Value('i', 0) +- tornado.process.fork_processes(self.worker_count) ++ task_id = tornado.process.fork_processes(self.worker_count) + # num.value = num.value + 1 + # print(num.value) + ++ # Remove the Manager's server process from multiprocessing's child ++ # tracking so Python's atexit handler does not try to join() it in ++ # child workers (the Manager was spawned by the parent). ++ deregister_shared_memory_child() ++ + # Dispose inherited db_manager engine after fork to avoid sharing the + # parent's connection pool, then re-create with a fresh pool for this + # child process. +@@ -309,8 +395,27 @@ class Server(ABC): + if service: + db_manager.make_engine(service) + ++ self._post_fork(task_id) ++ + asyncio.run(self.start_single()) + ++ def _pre_fork(self) -> None: ++ """Called before ``fork_processes()`` in ``start_multi()``. ++ ++ Override in subclasses to perform work that must happen in the ++ parent process before forking (e.g. querying the database for ++ agent lists to distribute across workers). ++ """ ++ ++ def _post_fork(self, task_id: int) -> None: ++ """Called after ``fork_processes()`` in each child worker. ++ ++ *task_id* is the worker index returned by Tornado's ++ ``fork_processes()``. Override to perform per-worker ++ initialization (e.g. resetting inherited DB state, distributing ++ agents). ++ """ ++ + def _setup(self) -> None: + """Defines values to use in place of the defaults for the various server options. It is suggested that this is + overriden by the implementing class.""" +diff --git a/keylime/web/verifier_server.py b/keylime/web/verifier_server.py +index 6a0261c..1ada86e 100755 +--- a/keylime/web/verifier_server.py ++++ b/keylime/web/verifier_server.py +@@ -1,12 +1,16 @@ + import asyncio + from typing import List, Optional + +-import tornado.httpserver +-import tornado.ioloop +-import tornado.process + from sqlalchemy.exc import SQLAlchemyError + +-from keylime import cloud_verifier_common, cloud_verifier_tornado, config, keylime_logging ++from keylime import ( ++ cloud_verifier_common, ++ cloud_verifier_tornado, ++ config, ++ keylime_logging, ++ push_agent_monitor, ++ revocation_notifier, ++) + from keylime.authorization.provider import Action + from keylime.common import states + from keylime.db.keylime_db import SessionManager, make_engine +@@ -30,82 +34,76 @@ class VerifierServer(Server): + super().__init__() + self._prepare_agents_on_startup() + self._clear_stale_sessions_on_startup() ++ self._all_agents: List[VerfierMain] = [] + self._worker_agents: Optional[List[VerfierMain]] = None ++ self._activate_task: Optional[asyncio.Task[None]] = None + +- def start_multi(self) -> None: # pylint: disable=no-member +- """Override to support PULL mode agent activation across multiple workers.""" +- # Get all agents from database before forking (only needed for PULL mode) ++ def _pre_fork(self) -> None: ++ """Query agents from database before forking (only needed for PULL mode).""" + logger.info("start_multi() called with operating_mode: %s", self.operating_mode) +- all_agents: List[VerfierMain] = [] ++ self._all_agents = [] + if self.operating_mode == "pull": + verifier_id = config.get("verifier", "uuid", fallback=cloud_verifier_common.DEFAULT_VERIFIER_ID) + logger.info("Querying agents for verifier_id: %s", verifier_id) +- all_agents = cloud_verifier_tornado.get_agents_by_verifier_id(verifier_id) +- logger.info("Found %d agents in database before forking", len(all_agents)) +- +- # Log server startup (copied from base class) +- ports = "" +- protocols = "" +- if self._Server__tornado_http_sockets: # type: ignore # pylint: disable=no-member +- ports = str(self.http_port) +- protocols = "HTTP" +- if self._Server__tornado_https_sockets and self.ssl_ctx: # type: ignore # pylint: disable=no-member +- ports = f"{ports}/{self.https_port}" if ports else f"{self.https_port}" +- protocols = f"{protocols}/S" if protocols else "HTTPS" +- logger.info( +- "Listening on %s:%s (%s) with %s worker processes...", +- self.bind_interface, +- ports, +- protocols, +- self.worker_count, +- ) +- +- # Fork worker processes - returns task_id in each child process +- task_id = tornado.process.fork_processes(self.worker_count) ++ self._all_agents = cloud_verifier_tornado.get_agents_by_verifier_id(verifier_id) ++ logger.info("Found %d agents in database before forking", len(self._all_agents)) + ++ def _post_fork(self, task_id: int) -> None: ++ """Reset inherited DB state and distribute agents to this worker.""" + # CRITICAL: Reset any database state inherited from parent process. +- # The parent initializes globals when querying agents (line 39), so children +- # inherit initialized state. We must reset to trigger lazy re-initialization. ++ # The parent initializes globals when querying agents in _pre_fork(), ++ # so children inherit initialized state. We must reset to trigger ++ # lazy re-initialization. + cloud_verifier_tornado.reset_verifier_config() + + # Distribute agents to this worker using round-robin (task_id is the worker index) +- if self.operating_mode == "pull" and all_agents: +- self._worker_agents = [all_agents[i] for i in range(task_id, len(all_agents), self.worker_count)] ++ if self.operating_mode == "pull" and self._all_agents: ++ self._worker_agents = [ ++ self._all_agents[i] for i in range(task_id, len(self._all_agents), self.worker_count) ++ ] + logger.info("Worker %d assigned %d agent(s)", task_id, len(self._worker_agents)) + +- # Start this worker's HTTP/HTTPS servers and activate agents +- self.start_single() +- +- def start_single(self) -> None: # type: ignore[override] # pylint: disable=attribute-defined-outside-init,invalid-overridden-method +- """Override to support PULL mode agent activation after server startup.""" +- # Start HTTP/HTTPS servers (logic copied from parent to allow agent activation before blocking) +- # pylint: disable=no-member +- if self._Server__tornado_http_sockets: # type: ignore +- http_server = tornado.httpserver.HTTPServer( +- self._Server__tornado_app, ssl_options=None, max_buffer_size=self.max_upload_size # type: ignore ++ async def _on_server_started(self) -> None: ++ """Activate agents for PULL mode after servers are listening.""" ++ # In start_single() mode (single-process), _pre_fork/_post_fork ++ # are never called so _worker_agents is None and _all_agents is ++ # empty. Query agents directly in that case. ++ agents = self._worker_agents if self._worker_agents is not None else self._all_agents ++ if self.operating_mode == "pull" and not agents and self._worker_agents is None: ++ verifier_id = config.get("verifier", "uuid", fallback=cloud_verifier_common.DEFAULT_VERIFIER_ID) ++ agents = cloud_verifier_tornado.get_agents_by_verifier_id(verifier_id) ++ if self.operating_mode == "pull" and agents: ++ verifier_host = config.get("verifier", "ip") ++ verifier_port = config.get("verifier", "port") ++ logger.info("Activating %d agent(s) for PULL mode", len(agents)) ++ self._activate_task = asyncio.ensure_future( ++ cloud_verifier_tornado.activate_agents(agents, verifier_host, int(verifier_port)) + ) +- http_server.add_sockets(self._Server__tornado_http_sockets) # type: ignore +- self._Server__tornado_http_server = http_server # type: ignore # pylint: disable=attribute-defined-outside-init + +- if self._Server__tornado_https_sockets and self.ssl_ctx: # type: ignore +- https_server = tornado.httpserver.HTTPServer( +- self._Server__tornado_app, ssl_options=self.ssl_ctx, max_buffer_size=self.max_upload_size # type: ignore ++ async def _graceful_shutdown(self) -> None: ++ """Cancel attestation-specific pending work and drain in-flight operations before stopping servers.""" ++ # Cancel all pending attestation timeouts (retries, polls) ++ cloud_verifier_tornado.cancel_all_pending_events() ++ push_agent_monitor.cancel_all_timeouts() ++ ++ # Wait for in-flight attestation operations to complete before ++ # tearing down webhook workers — in-flight process_agent() calls ++ # may still need to send revocation notifications. ++ drain_timeout = config.getfloat("verifier", "shutdown_drain_timeout", fallback=10.0) ++ drained = await cloud_verifier_tornado.wait_for_drain(drain_timeout) ++ if not drained: ++ logger.warning( ++ "Shutting down with %d attestation operation(s) still active after %.1fs", ++ cloud_verifier_tornado.get_active_operations(), ++ drain_timeout, + ) +- https_server.add_sockets(self._Server__tornado_https_sockets) # type: ignore +- self._Server__tornado_https_server = https_server # type: ignore # pylint: disable=attribute-defined-outside-init +- # pylint: enable=no-member + +- # Activate agents for PULL mode +- if self.operating_mode == "pull" and self._worker_agents: +- verifier_host = config.get("verifier", "ip") +- verifier_port = config.get("verifier", "port") +- logger.info("Activating %d agent(s) for PULL mode", len(self._worker_agents)) +- asyncio.ensure_future( +- cloud_verifier_tornado.activate_agents(self._worker_agents, verifier_host, int(verifier_port)) +- ) ++ # Shutdown webhook workers after draining so revocation ++ # notifications from in-flight attestations are delivered. ++ if "webhook" in revocation_notifier.get_notifiers(): ++ revocation_notifier.shutdown_webhook_workers() + +- # Wait forever (until event loop is stopped) +- tornado.ioloop.IOLoop.current().start() ++ await super()._graceful_shutdown() + + def _prepare_agents_on_startup(self) -> None: + """Prepare agents in database for verifier startup. +diff --git a/templates/2.6/agent.j2 b/templates/2.6/agent.j2 +new file mode 100644 +index 0000000..26d5b7c +--- /dev/null ++++ b/templates/2.6/agent.j2 +@@ -0,0 +1,313 @@ ++# Keylime agent configuration ++# The Python agent is deprecated and will be removed with the next major release (7.0.0)! ++# Please migrate to the Rust based agent: https://github.com/keylime/rust-keylime/ ++[agent] ++ ++# The configuration file version number ++version = "{{ agent.version }}" ++ ++# The enabled API versions ++# This sets which of the supported API versions to enable. ++# Only supported versions can be set, which are defined by ++# api::SUPPORTED_API_VERSIONS ++# A list of versions to enable can be provided (e.g. "2.1, 2.2") ++# The following keywords are also supported: ++# - "default": Enables all supported API versions ++# - "latest": Enables only the latest supported API version ++api_versions = "{{ agent.api_versions }}" ++ ++# The agent's UUID. ++# If you set this to "generate", Keylime will create a random UUID. ++# If you set this to "hash_ek", Keylime will set the UUID to the result ++# of 'SHA256(public EK in PEM format)'. ++# If you set this to "environment", Keylime will use the value of the ++# environment variable "KEYLIME_AGENT_UUID" as UUID. ++# If you set this to "dmidecode", Keylime will use the UUID from ++# 'dmidecode -s system-uuid'. ++# If you set this to "hostname", Keylime will use the full qualified domain ++# name of current host as the agent id. ++uuid = "{{ agent.uuid }}" ++ ++# The binding address and port for the agent server ++ip = "{{ agent.ip }}" ++port = {{ agent.port }} ++ ++# Address and port where the verifier and tenant can connect to reach the agent. ++# These keys are optional. ++contact_ip = "{{ agent.contact_ip }}" ++contact_port = {{ agent.contact_port }} ++ ++# Path to store agent persistent data ++agent_data_path = "{{ agent_data_path }}" ++ ++# Verifier Information (Push Model specific). ++# Verifier URL ++verifier_url = "{{ agent_verifier_url }}" ++ ++# Verifier TLS CA certificate (Push Model specific). ++# Used to verify the verifier's server certificate. ++# If set as "default", the "cv_ca/cacert.crt" value relative to keylime_dir is used. ++verifier_tls_ca_cert = "{{ agent.verifier_tls_ca_cert }}" ++ ++# Server identifier for certification keys ++certification_keys_server_identifier = "{{ agent_certification_keys_server_identifier }}" ++ ++# Evidence version for UEFI logs ++uefi_logs_evidence_version = "{{ agent_uefi_logs_evidence_version }}" ++ ++# The address and port of the registrar server which the agent communicates with ++registrar_ip = "{{ agent.registrar_ip }}" ++registrar_port = {{ agent.registrar_port }} ++ ++# The TLS port of the registrar server (Push Model specific). ++# Used when registrar_tls_enabled is set to true. ++registrar_tls_port = {{ agent.registrar_tls_port }} ++ ++# Enable TLS communication between agent and registrar (Push Model specific). ++# When enabled, the agent uses TLS (server verification only) with the registrar. ++registrar_tls_enabled = {{ agent.registrar_tls_enabled }} ++ ++# TLS CA certificate for verifying the registrar's server certificate (Push Model specific). ++# Only used when registrar_tls_enabled is true. ++# If set as "default", the "cv_ca/cacert.crt" value relative to keylime_dir is used. ++registrar_tls_ca_cert = "{{ agent.registrar_tls_ca_cert }}" ++ ++# The API versions to use when communicating with the registrar (Push Model specific). ++# Supports "default" (all supported), "latest", or a comma-separated list. ++registrar_api_versions = "{{ agent.registrar_api_versions }}" ++ ++# Enable mTLS communication between agent, verifier and tenant. ++# Details on why setting it to "False" is generally considered insecure can be found ++# on https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r ++enable_agent_mtls = {{ agent.enable_agent_mtls }} ++ ++# The keylime working directory. ++keylime_dir = "{{ agent.keylime_dir }}" ++ ++# Accept invalid TLS certificates (INSECURE - for testing only) ++# When enabled, the agent will accept self-signed or invalid certificates ++# This option is specific to the push attestation model. ++# This should ONLY be used for testing or development environments ++# Default: False (secure) ++tls_accept_invalid_certs = {{ agent.tls_accept_invalid_certs }} ++ ++# Accept invalid TLS hostnames (INSECURE - for testing only) ++# When enabled, the agent will accept certificates with mismatched hostnames ++# This option is specific to the push attestation model. ++# This should ONLY be used for testing or development environments ++# Default: False (secure) ++tls_accept_invalid_hostnames = {{ agent.tls_accept_invalid_hostnames }} ++ ++# The 'tls_dir' option defines the directory where the keys and certificates are ++# stored. ++# ++# If set as 'generate', automatically generate a CA, keys, and certificates for ++# the client and the server in the /var/lib/keylime/cv_ca directory, if not ++# present. ++# ++# The 'server_key', 'server_cert', and 'trusted_client_ca' options should all be ++# set with the 'default' keyword when 'generate' keyword is set for 'tls_dir'. ++# ++# If set as 'default', the '/var/lib/keylime/secure' directory is used, which ++# should contain the files indicated by the 'server_key', 'server_cert', ++# and 'trusted_client_ca' options. ++tls_dir = "{{ agent.tls_dir }}" ++ ++# The name of the file containing the Keylime agent TLS server private key. ++# This private key is used to serve the Keylime agent REST API ++# A new private key is generated in case it is not found. ++# If set as 'default', the 'server-private.pem' value is used. ++server_key = "{{ agent.server_key }}" ++ ++# Set the password used to decrypt the private key file. ++# This password will also be used to protect the generated private key used for ++# mTLS authentication ++# If left empty, the private key will not be encrypted. ++server_key_password = "{{ agent.server_key_password }}" ++ ++# The name of the file containing the payload encryption private key. ++# If set as "default", the "payload-private.pem" value is used. ++payload_key = "{{ agent.payload_key }}" ++ ++# Set the password used to encrypt the payload private key file. ++# If left empty, the private key will not be encrypted. ++payload_key_password = "{{ agent.payload_key_password }}" ++ ++# The name of the file containing the X509 certificate used as the Keylime agent ++# server TLS certificate. ++# This certificate must be self signed. ++server_cert = "{{ agent.server_cert }}" ++ ++# A list of trusted client CA certificates ++trusted_client_ca = "{{ agent.trusted_client_ca }}" ++ ++# The name of the file used to store the payload encryption key, derived from ++# the U and V parts. ++# This file is stored in the /var/lib/keylime/secure/ directory. ++enc_keyname = "{{ agent.enc_keyname }}" ++ ++# The name of the file used to store the optional decrypted payload. ++# This file is stored in the /var/lib/keylime/secure/ directory. ++dec_payload_file = "{{ agent.dec_payload_file }}" ++ ++# The size of the memory-backed tmpfs partition where Keylime stores keys and ++# the decrypted payload. ++# Use syntax that the 'mount' command would accept as a size parameter for tmpfs. ++# The default below sets it to 1 megabyte. ++secure_size = "{{ agent.secure_size }}" ++ ++# Use this option to set the TPM ownerpassword to something you want to use. ++# Set it to "generate" if you want Keylime to choose a random owner password ++# for you. ++tpm_ownerpassword = "{{ agent.tpm_ownerpassword }}" ++ ++# Whether to allow the agent to automatically extract a zip file in ++# the delivered payload after it has been decrypted, or not. Defaults to "True". ++# After decryption, the archive will be unzipped to a directory in /var/lib/keylime/secure. ++# Note: the limits on the size of the tmpfs partition set above with the 'secure_size' ++# option will affect this. ++extract_payload_zip = {{ agent.extract_payload_zip }} ++ ++# Whether to listen for revocation notifications from the verifier via ZeroMQ ++enable_revocation_notifications = {{ agent.enable_revocation_notifications }} ++ ++# The IP to listen for revocation notifications via ZeroMQ ++revocation_notification_ip = "{{ agent.revocation_notification_ip }}" ++ ++# The port to listen for revocation notifications via ZeroMQ ++revocation_notification_port = {{ agent.revocation_notification_port }} ++ ++# The path to the certificate to verify revocation messages received from the ++# verifier. The path is relative to /var/lib/keylime. ++# If set to "default", Keylime will use the file RevocationNotifier-cert.crt ++# from the unzipped contents provided by the tenant. ++revocation_cert = "{{ agent.revocation_cert }}" ++ ++# A comma-separated list of Python scripts to run upon receiving a revocation ++# message. Keylime will verify the signature first, then call these Python ++# scripts with the json revocation message passed as argument. The scripts must ++# be located in the directory set via 'revocation_actions_dir' ++# ++# Keylime will also get the list of revocation actions from the file ++# action_list in the unzipped payload content. ++revocation_actions = "{{ agent.revocation_actions }}" ++ ++# The path to the directory containing pre-installed revocation action scripts. ++revocation_actions_dir = "{{ agent.revocation_actions_dir }}" ++ ++# A script to execute after unzipping the tenant payload. This is like ++# cloud-init lite =) Keylime will run it with a /bin/sh environment and ++# with a working directory of /var/lib/keylime/secure/unzipped. ++payload_script = "{{ agent.payload_script }}" ++ ++# In case mTLS for the agent is disabled and the use of payloads is still ++# required, this option has to be set to "True" in order to allow the agent ++# to start. Details on why this configuration (mTLS disabled and payload enabled) ++# is generally considered insecure can be found on ++# https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r ++enable_insecure_payload = {{ agent.enable_insecure_payload }} ++ ++# Whether to allow running revocation actions sent as part of the payload. ++# Setting to false limits revocation actions to pre-installed ones. ++allow_payload_revocation_actions = {{ agent.allow_payload_revocation_actions }} ++ ++# Maximum number of retries for exponential backoff ++exponential_backoff_max_retries = {{ agent.exponential_backoff_max_retries }} ++# Initial delay in milliseconds for exponential backoff ++exponential_backoff_initial_delay = {{ agent.exponential_backoff_initial_delay }} ++# Maximum delay in milliseconds for exponential backoff ++exponential_backoff_max_delay = {{ agent.exponential_backoff_max_delay }} ++ ++# List of hash algorithms used for PCRs ++# Accepted values: sha512, sha384, sha256, sha1 ++tpm_hash_alg = "{{ agent.tpm_hash_alg }}" ++ ++# List of encryption algorithms to use with the TPM ++# Accepted values: ecc, rsa ++tpm_encryption_alg = "{{ agent.tpm_encryption_alg }}" ++ ++# List of signature algorithms to use ++# Accepted values: rsassa, rsapss, ecdsa, ecdaa, ecschnorr ++tpm_signing_alg = "{{ agent.tpm_signing_alg }}" ++ ++# If an EK is already present on the TPM (e.g., with "tpm2_createek") and ++# you require Keylime to use this EK, change "generate" to the actual EK ++# handle (e.g. "0x81000000"). The Keylime agent will then not attempt to ++# create a new EK upon startup, and neither will it flush the EK upon exit. ++ek_handle = "{{ agent.ek_handle }}" ++ ++# Enable IDevID and IAK usage ++enable_iak_idevid = {{ agent.enable_iak_idevid }} ++ ++# Select IDevID and IAK templates or algorithms for regenerating the keys. ++# By default the template will be detected automatically from the certificates. This will happen if iak_idevid_template is left empty or set as "default" or "detect". ++# Choosing a template will override the name and asymmetric algorithm choices. To use these choices, set iak_idevid_template to "manual" ++# Templates are specified in the TCG document found here, section 7.3.4: ++# https://trustedcomputinggroup.org/wp-content/uploads/TPM-2p0-Keys-for-Device-Identity-and-Attestation_v1_r12_pub10082021.pdf ++# ++# Accepted values: ++# iak_idevid_template: default, detect, H-1, H-2, H-3, H-4, H-5, manual ++# iak_idevid_asymmetric_alg: rsa, ecc ++# iak_idevid_name_alg: sha256, sm3_256, sha384, sha512 ++iak_idevid_template = "{{ agent.iak_idevid_template }}" ++# In order for these values to be used, set the iak_idevid_template option to manual ++iak_idevid_asymmetric_alg = "{{ agent.iak_idevid_asymmetric_alg }}" ++iak_idevid_name_alg = "{{ agent.iak_idevid_name_alg }}" ++ ++# Alternatively if the keys are persisted, provide the handles for their location below, and optionally their passwords. ++# If handles are provided, they will take priority over templates/algorithms selected above. ++# To use a hex password, use the prefix "hex:" at the start of the password. ++idevid_password = "{{ agent.idevid_password }}" ++idevid_handle = "{{ agent.idevid_handle }}" ++ ++iak_password = "{{ agent.iak_password }}" ++iak_handle = "{{ agent.iak_handle }}" ++ ++# The name of the file containing the X509 IAK certificate. ++# If set as "default", the "iak-cert.crt" value is used ++# If a relative path is set, it will be considered relative from the keylime_dir. ++# If an absolute path is set, it is used without change. ++# ++# To override iak_cert, set KEYLIME_AGENT_IAK_CERT environment variable. ++iak_cert = "{{ agent.iak_cert }}" ++ ++# The name of the file containing the X509 IDevID certificate. ++# If set as "default", the "idevid-cert.crt" value is used ++# If a relative path is set, it will be considered relative from the keylime_dir. ++# If an absolute path is set, it is used without change. ++# ++# To override idevid_cert, set KEYLIME_AGENT_IDEVID_CERT environment variable. ++idevid_cert = "{{ agent.idevid_cert }}" ++ ++# The user account to switch to to drop privileges when started as root ++# If left empty, the agent will keep running with high privileges. ++# The user and group specified here must allow the user to access the ++# WORK_DIR (typically /var/lib/keylime) and /dev/tpmrm0. Therefore, ++# suggested value for the run_as parameter is keylime:tss. ++# The following commands should be used to set ownership before running the ++# agent: ++# chown keylime /var/lib/keylime ++# ++# If tpmdata.yml already exists: ++# chown keylime /var/lib/keylime/tpmdata.yml ++# ++# If cv_ca directory exists: ++# chown keylime /var/lib/keylime/cv_ca ++# chown keylime /var/lib/keylime/cv_ca/cacert.crt ++run_as = "{{ agent.run_as }}" ++ ++# Path from where the agent will read the IMA measurement log. ++# ++# If set as "default", Keylime will use the default path: ++# The default path is /sys/kernel/security/ima/ascii_runtime_measurements ++# If set as a relative path, it will be considered from the root path "/". ++# If set as an absolute path, it will use it without changes ++ima_ml_path = "{{ agent.ima_ml_path }}" ++ ++# Path from where the agent will read the measured boot event log. ++# ++# If set as "default", Keylime will use the default path: ++# The default path is /sys/kernel/security/tpm0/binary_bios_measurements ++# If set as a relative path, it will be considered from the root path "/". ++# If set as an absolute path, it will use it without changes ++measuredboot_ml_path = "{{ agent.measuredboot_ml_path }}" +diff --git a/templates/2.6/ca.j2 b/templates/2.6/ca.j2 +new file mode 100644 +index 0000000..03ebe3b +--- /dev/null ++++ b/templates/2.6/ca.j2 +@@ -0,0 +1,39 @@ ++# Keylime CA configuration ++[ca] ++ ++# The keystore password ++# This password is used to protect the generated CA private key. ++password = {{ ca.password }} ++ ++# The configuration file version number ++version = {{ ca.version }} ++ ++# CountryName argument (C) of the Issuer when generating certificates ++cert_country = {{ ca.cert_country }} ++ ++# CommonName argument (CN) of the Issuer when generating certificates ++cert_ca_name = {{ ca.cert_ca_name }} ++ ++# StateOrProvinceName argument (S) of the Issuer when generating certificates ++cert_state = {{ ca.cert_state }} ++ ++# Locality argument (L) of the Issuer when generating certificates ++cert_locality = {{ ca.cert_locality }} ++ ++# Organization argument (O) of the Issuer when generating certificates ++cert_organization = {{ ca.cert_organization }} ++ ++# OrganizationalUnit argument (OU) of the Issuer when generating certificates ++cert_org_unit = {{ ca.cert_org_unit }} ++ ++# CA certificate validity time in days ++cert_ca_lifetime = {{ ca.cert_ca_lifetime }} ++ ++# Default generated certificate validity time in days ++cert_lifetime = {{ ca.cert_lifetime }} ++ ++# Key length in bits ++cert_bits = {{ ca.cert_bits }} ++ ++# Certificate Revocation List (CRL) distribution address (URL) ++cert_crl_dist = {{ ca.cert_crl_dist }} +diff --git a/templates/2.6/logging.j2 b/templates/2.6/logging.j2 +new file mode 100644 +index 0000000..9bd8deb +--- /dev/null ++++ b/templates/2.6/logging.j2 +@@ -0,0 +1,33 @@ ++# Keylime logging configuration ++ ++# The configuration file version number ++[logging] ++version = {{ logging.version }} ++ ++[loggers] ++keys = {{ loggers.get('keys') }} ++ ++[handlers] ++keys = {{ handlers.get('keys') }} ++ ++[formatters] ++keys = {{ formatters.get('keys') }} ++ ++[formatter_formatter] ++format = {{ formatter_formatter.format }} ++datefmt = {{ formatter_formatter.datefmt }} ++ ++[logger_root] ++level = {{ logger_root.level }} ++handlers = {{ logger_root.handlers }} ++ ++[handler_consoleHandler] ++class = {{ handler_consoleHandler.class }} ++level = {{ handler_consoleHandler.level }} ++formatter = {{ handler_consoleHandler.formatter }} ++args = {{ handler_consoleHandler.args }} ++ ++[logger_keylime] ++level = {{ logger_keylime.level }} ++qualname = {{ logger_keylime.qualname }} ++handlers = {{ logger_keylime.handlers }} +diff --git a/templates/2.6/mapping.json b/templates/2.6/mapping.json +new file mode 100644 +index 0000000..ce10d12 +--- /dev/null ++++ b/templates/2.6/mapping.json +@@ -0,0 +1,11 @@ ++{ ++ "version": "2.6", ++ "type": "update", ++ "components": { ++ "verifier": { ++ "add": { ++ "shutdown_drain_timeout": "10" ++ } ++ } ++ } ++} +diff --git a/templates/2.6/registrar.j2 b/templates/2.6/registrar.j2 +new file mode 100644 +index 0000000..06f026e +--- /dev/null ++++ b/templates/2.6/registrar.j2 +@@ -0,0 +1,168 @@ ++# Keylime registrar configuration ++[registrar] ++ ++# The configuration file version number ++version = {{ registrar.version }} ++ ++# The binding address and port for the registrar server ++ip = "{{ registrar.ip }}" ++port = {{ registrar.port }} ++tls_port = {{ registrar.tls_port }} ++ ++# The 'tls_dir' option defines the directory where the keys and certificates are ++# stored. ++# ++# If set as 'generate', automatically generate a CA, keys, and certificates for ++# the registrar server in the /var/lib/keylime/reg_ca directory, if not present. ++# ++# The 'server_key', 'server_cert', and 'trusted_client_ca' options should all be ++# set with the 'default' keyword when 'generate' keyword is set for 'tls_dir'. ++# ++# If set as 'default', share the files with the verifier by using the ++# '/var/lib/keylime/cv_ca' directory, which should contain the files indicated by ++# the 'server_key', 'server_cert', and 'trusted_client_ca' options. ++tls_dir = {{ registrar.tls_dir }} ++ ++# The name of the file containing the Keylime registrar server private key. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This private key is used to serve the Keylime registrar REST API ++# ++# If set as 'default', the 'server-private.pem' value is used. ++server_key = {{ registrar.server_key }} ++ ++# Set the password used to decrypt the private key file. ++# If 'tls_dir = generate', this password will also be used to protect the ++# generated server private key. ++# If left empty, the private key will not be encrypted. ++server_key_password = {{ registrar.server_key_password }} ++ ++# The name of the file containing the Keylime registrar server certificate. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# ++# If set as 'default', the 'server-cert.crt' value is used. ++server_cert = {{ registrar.server_cert }} ++ ++# Additional Subject Alternative Names (SANs) to include in auto-generated ++# server certificates when 'tls_dir = generate'. ++# ++# This is a comma-separated list of hostnames and/or IP addresses that will be ++# added to the server certificate's SAN extension. This allows clients to verify ++# the server's hostname when connecting. ++# ++# By default, the certificate will automatically include: ++# - localhost, 127.0.0.1, and ::1 ++# - The system's hostname and FQDN ++# - The IP address from the 'ip' option (if not 0.0.0.0 or ::) ++# ++# Use this option to add additional names, such as: ++# - External DNS names (e.g., registrar.example.com) ++# - Load balancer addresses ++# - Additional IP addresses ++# ++# Example: cert_subject_alternative_names = registrar.example.com,10.0.0.5,registrar-internal ++# Leave empty to use only the automatically detected names. ++cert_subject_alternative_names = {{ registrar.cert_subject_alternative_names }} ++ ++# The list of trusted client CA certificates. ++# The files in the list should be stored in the directory set in the 'tls_dir' ++# option. ++# ++# If set as 'default', the value is set as '[cacert.crt]' ++trusted_client_ca = {{ registrar.trusted_client_ca }} ++ ++# Authorization provider to use for access control decisions. ++# ++# Available providers: ++# simple - Default provider implementing 2-category access control: ++# - PUBLIC: No authentication (agent registration, activation, version) ++# - ADMIN: mTLS client certificate (listing, viewing, deleting registrations) ++# ++# The 'simple' provider enforces strict separation: ++# - Agent registration endpoints are public (no authentication required) ++# - Management endpoints require mTLS client certificate authentication ++# ++# SECURITY NOTE: Never distribute client certificates signed by the registrar's ++# trusted CA to agents. Agents should register via public endpoints only. ++# ++# Certificate requirements: ++# - Agents: No client certs needed (public registration endpoints) ++# - Admins: Client certs signed by trusted CA with Client Authentication EKU ++authorization_provider = {{ registrar.authorization_provider }} ++ ++# Database URL Configuration ++# See this document https://keylime.readthedocs.io/en/latest/installation.html#database-support ++# for instructions on using different database configurations. ++# ++# An example of database_url value for using sqlite: ++# sqlite:////var/lib/keylime/reg_data.sqlite ++# An example of database_url value for using mysql: ++# mysql+pymysql://keylime:keylime@keylime_db:[port]/registrar?charset=utf8 ++# ++# If set as 'sqlite' keyword, will use the configuration set by the file located ++# at "/var/lib/keylime/reg_data.sqlite". ++database_url = {{ registrar.database_url }} ++ ++# Limits for DB connection pool size in sqlalchemy ++# (https://docs.sqlalchemy.org/en/14/core/pooling.html#api-documentation-available-pool-implementations) ++database_pool_sz_ovfl = {{ registrar.database_pool_sz_ovfl }} ++ ++# Whether to automatically update the DB schema using alembic ++auto_migrate_db = {{ registrar.auto_migrate_db }} ++ ++# Durable Attestation is currently marked as an experimental feature ++# In order to enable Durable Attestation, an "adapter" for a Persistent data Store ++# (time-series like database) needs to be specified. Some example adapters can be ++# found under "da/examples" so, for instance ++# "durable_attestation_import = keylime.da.examples.redis.py" ++# could be used to interact with a Redis (Persistent data Store) ++durable_attestation_import = {{ registrar.durable_attestation_import }} ++ ++# If an adapter for Durable Attestation was specified, then the URL for a Persistent Store ++# needs to be specified here. A second optional URL could be specified, for a ++# Rekor Transparency Log. A third additional URL could be specified, pointing to a ++# Time Stamp Authority (TSA), compatible with RFC3161. Additionally, one might need to ++# specify a path containing certificates required by the stores or TSA. Continuing with ++# the above example, the following values could be assigned to the parameters: ++# "persistent_store_url=redis://127.0.0.1:6379?db=10&password=/root/redis.auth&prefix=myda" ++# "transparency_log_url=http://127.0.0.1:3000" ++# "time_stamp_authority_url=http://127.0.0.1:2020" ++# "time_stamp_authority_certs_path=~/mycerts/tsa_cert1.pem" ++persistent_store_url = {{ registrar.persistent_store_url }} ++transparency_log_url = {{ registrar.transparency_log_url }} ++time_stamp_authority_url = {{ registrar.time_stamp_authority_url }} ++time_stamp_authority_certs_path = {{ registrar.time_stamp_authority_certs_path }} ++ ++# If Durable Attestation was enabled, which requires a Persistent Store URL ++# to be specified, the two following parameters control the format and encoding ++# of the stored attestation artifacts (defaults "json" for format and "" for encoding) ++persistent_store_format = {{ registrar.persistent_store_format }} ++persistent_store_encoding = {{ registrar.persistent_store_encoding }} ++ ++# If Durable Attestation was enabled and a Transparency Log URL was specified, ++# the digest algorithm for signatures is controlled by this parameter (default "sha256") ++transparency_log_sign_algo = {{ registrar.transparency_log_sign_algo }} ++ ++# If Durable Attestation was enabled and a Transparency Log URL was specified, ++# a keylime administrator can specify some agent attributes (including attestation ++# artifacts, such as quotes and logs) to be signed by the registrar. The use of "all" ++# will result in the whole "package" (agent + artifacts) being signed and leaving it empty ++# will mean no signing should be done. ++signed_attributes = {{ registrar.signed_attributes }} ++ ++# What TPM-based identity is allowed to be used to register agents. ++# The options "default" and "iak_idevid" will only allow registration with IAK and IDevID if python cryptography is version 38.0.0 or higher. ++# The following options are accepted: ++# "default": either an EK or IAK and IDevID may be used. In the case that cryptography version is <38.0.0 only EK will be used ++# "ek_cert_or_iak_idevid": this is equivalent to default ++# "ek_cert": only allow agents to use an EK to register ++# "iak_idevid": only allow agents with an IAK and IDevID to register ++tpm_identity = {{ registrar.tpm_identity }} ++ ++# The below option controls what Keylime does when it encounters a certificate which is not parse-able when strict ++# ASN.1 Distinguished Encoding Rules (DER) are enforced. The default behaviour ("warn") is to log a warning but still ++# accept the certificate, so long as it can be interpreted by a fallback parser. ++# The following values are accepted: ++# "warn": log a warning and re-encode the certificate with the more-forgiving fallback parser (the default) ++# "reject": log an error and refuse to accept the certificate ++# "ignore": silently re-encode the certificate without logging a message ++malformed_cert_action = {{ registrar.malformed_cert_action }} +\ No newline at end of file +diff --git a/templates/2.6/tenant.j2 b/templates/2.6/tenant.j2 +new file mode 100644 +index 0000000..79934bf +--- /dev/null ++++ b/templates/2.6/tenant.j2 +@@ -0,0 +1,130 @@ ++# Keylime tenant configuration ++[tenant] ++ ++# The configuration file version number ++version = {{ tenant.version }} ++ ++# The verifier IP address and port ++verifier_ip = {{ tenant.verifier_ip }} ++verifier_port = {{ tenant.verifier_port }} ++ ++# The registrar IP address and port ++registrar_ip = {{ tenant.registrar_ip }} ++registrar_port = {{ tenant.registrar_port }} ++ ++# The 'tls_dir' option defines the directory where the keys and certificates are ++# stored. ++# ++# If set as 'default', share the files with the verifier by using the ++# '/var/lib/keylime/cv_ca', which should contain the files indicated by the ++# 'client_key', 'client_cert', and 'trusted_server_ca' options. ++tls_dir = {{ tenant.tls_dir }} ++ ++# Enable mTLS communication between agent, verifier and tenant. ++# Details on why setting it to "False" is generally considered insecure can be found ++# on https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r ++enable_agent_mtls = {{ tenant.enable_agent_mtls }} ++ ++# The name of the file containing the Keylime tenant client private key. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This private key is used by the Keylime tenant to connect to the other ++# services using TLS. ++# ++# If set as 'default', the 'client-private.pem' value is used. ++client_key = {{ tenant.client_key }} ++ ++# Set the password used to encrypt the private key file. ++# If client_key is set as 'default', should match the password set in the ++# 'client_key_password' option in the verifier configuration file ++client_key_password = {{ tenant.client_key_password }} ++ ++# The name of the file containing the Keylime tenant client certificate. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This certificate is used by the Keylime tenant to connect to the other ++# services using TLS. ++# ++# If set as 'default', the 'client-cert.crt' value is used. ++client_cert = {{ tenant.client_cert }} ++ ++# The list of trusted server CA certificates. ++# The files in the list should be stored in the directory set in the 'tls_dir' ++# option. ++# ++# If set as 'default', the value is set as '[cacert.crt]' ++trusted_server_ca = {{ tenant.trusted_server_ca }} ++ ++# Directory containing the EK CA certificates. ++# The EK certificate provided by the agent will be validated against the CAs ++# located in this directory. ++tpm_cert_store = {{ tenant.tpm_cert_store }} ++ ++# Maximum size of the payload in bytes. The value should match the 'secure_size' ++# option in the agent configuration ++max_payload_size = {{ tenant.max_payload_size }} ++ ++# List of hash algorithms used for PCRs ++# Accepted values: sha512, sha384, sha256, sha1 ++accept_tpm_hash_algs = {{ tenant.accept_tpm_hash_algs }} ++ ++# List of encryption algorithms to use with the TPM ++# Accepted values: ecc, rsa ++accept_tpm_encryption_algs = {{ tenant.accept_tpm_encryption_algs }} ++ ++# List of signature algorithms to use ++# Accepted values: rsassa, rsapss, ecdsa, ecdaa, ecschnorr ++accept_tpm_signing_algs = {{ tenant.accept_tpm_signing_algs }} ++ ++# Whether or not to use an exponential backoff algorithm for retries. ++exponential_backoff = {{ tenant.exponential_backoff }} ++ ++# Either how long to wait between failed attempts to communicate with the TPM ++# in seconds, or the base for the exponential backoff algorithm if enabled via ++# "exponential_backoff" option. ++# Floating point values are accepted. ++retry_interval = {{ tenant.retry_interval }} ++ ++# Integer number of retries to communicate with the TPM before giving up. ++max_retries = {{ tenant.max_retries }} ++ ++# Request timeout in seconds. ++request_timeout = {{ tenant.request_timeout }} ++ ++# Tell the tenant whether to require an EK certificate from the TPM. ++# If set to False the tenant will ignore EK certificates entirely. ++# ++# WARNING: SETTING THIS OPTION TO FALSE IS VERY DANGEROUS!!! ++# ++# If you disable this check, then you may not be talking to a real TPM. ++# All the security guarantees of Keylime rely upon the security of the EK ++# and the assumption that you are talking to a spec-compliant and honest TPM. ++ ++# Some physical TPMs do not have EK certificates, so you may need to set ++# this to "False" for some deployments. If you do set it to "False", you ++# MUST use the 'ek_check_script' option below to specify a script that will ++# check the provided EK against an allowlist for the environment that has ++# been collected in a trustworthy way. For example, the cloud provider ++# might provide a signed list of EK public key hashes. Then you could write ++# an ek_check_script that checks the signature of the allowlist and then ++# compares the hash of the given EK with the allowlist. ++require_ek_cert = {{ tenant.require_ek_cert }} ++ ++# Optional script to execute to check the EK and/or EK certificate against a ++# allowlist or any other additional EK processing you want to do. Runs in ++# /var/lib/keylime. You can also specify an absolute path to the script. ++# Script should return 0 if the EK or EK certificate are valid. Any other ++# return value will invalidate the tenant quote check and prevent ++# bootstrapping a key. ++# ++# The various keys are passed to the script via environment variables: ++# EK - contains a PEM encoded version of the public EK ++# EK_CERT - contains a DER encoded EK certificate if one is available. ++# PROVKEYS - contains a json document containing EK, EKcert, and AIK from the ++# provider. EK and AIK are in PEM format. The EKcert is in base64 encoded ++# DER format. ++# ++# Set to blank to disable this check. See warning above if require_ek_cert ++# is "False". ++ek_check_script = {{ tenant.ek_check_script }} ++ ++# Path to file containing the measured boot reference state ++mb_refstate = {{ tenant.mb_refstate }} +diff --git a/templates/2.6/verifier.j2 b/templates/2.6/verifier.j2 +new file mode 100644 +index 0000000..43c8e54 +--- /dev/null ++++ b/templates/2.6/verifier.j2 +@@ -0,0 +1,350 @@ ++# Keylime verifier configuration ++[verifier] ++ ++# The configuration file version number ++version = {{ verifier.version }} ++ ++# Unique identifier for each verifier instance. ++uuid = {{ verifier.uuid }} ++ ++# The binding address and port for the verifier server ++ip = "{{ verifier.ip }}" ++port = {{ verifier.port }} ++ ++# The address and port of registrar server that the verifier communicates with ++registrar_ip = {{ verifier.registrar_ip }} ++registrar_port = {{ verifier.registrar_port }} ++ ++# Enable mTLS communication between agent, verifier and tenant. ++# Details on why setting it to "False" is generally considered insecure can be found ++# on https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r ++enable_agent_mtls = {{ verifier.enable_agent_mtls }} ++ ++# The 'tls_dir' option defines the directory where the keys and certificates are ++# stored. ++# ++# If set as 'generate', automatically generate a CA, keys, and certificates for ++# the client and the server in the /var/lib/keylime/cv_ca directory, if not ++# present. ++# ++# The 'server_key', 'server_cert', 'client_key', 'client_cert', ++# 'trusted_client_ca', and 'trusted_server_ca' options should all be set with ++# the 'default' keyword when 'generate' keyword is set for 'tls_dir'. ++# ++# If set as 'default', the '/var/lib/keylime/cv_ca' directory is used, which ++# should contain the files indicated by the 'server_key', 'server_cert', ++# 'client_key', 'client_cert', 'trusted_client_ca', and 'trusted_server_ca' ++# options. ++tls_dir = {{ verifier.tls_dir }} ++ ++# The name of the file containing the Keylime verifier server private key. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This private key is used to serve the Keylime verifier REST API ++# ++# If set as 'default', the 'server-private.pem' value is used. ++server_key = {{ verifier.server_key }} ++ ++# Set the password used to decrypt the server private key file. ++# If 'tls_dir = generate', this password will also be used to protect the ++# generated server private key. ++# If left empty, the private key will not be encrypted. ++server_key_password = {{ verifier.server_key_password }} ++ ++# The name of the file containing the Keylime verifier server certificate. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# ++# If set as 'default', the 'server-cert.crt' value is used. ++server_cert = {{ verifier.server_cert }} ++ ++# Additional Subject Alternative Names (SANs) to include in auto-generated ++# server certificates when 'tls_dir = generate'. ++# ++# This is a comma-separated list of hostnames and/or IP addresses that will be ++# added to the server certificate's SAN extension. This allows clients to verify ++# the server's hostname when connecting. ++# ++# By default, the certificate will automatically include: ++# - localhost, 127.0.0.1, and ::1 ++# - The system's hostname and FQDN ++# - The IP address from the 'ip' option (if not 0.0.0.0 or ::) ++# ++# Use this option to add additional names, such as: ++# - External DNS names (e.g., verifier.example.com) ++# - Load balancer addresses ++# - Additional IP addresses ++# ++# Example: cert_subject_alternative_names = verifier.example.com,10.0.0.5,verifier-internal ++# Leave empty to use only the automatically detected names. ++cert_subject_alternative_names = {{ verifier.cert_subject_alternative_names }} ++ ++# The list of trusted client CA certificates. ++# The files in the list should be stored in the directory set in the 'tls_dir' ++# option. ++# ++# If set as 'default', the value is set as '[cacert.crt]' ++trusted_client_ca = {{ verifier.trusted_client_ca }} ++ ++# The name of the file containing the Keylime verifier client private key. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This private key is used by the Keylime verifier to connect to the other ++# services using TLS. ++# ++# If set as 'default', the 'client-private.pem' value is used. ++client_key = {{ verifier.client_key }} ++ ++# Set the password used to decrypt the client private key file. ++# If 'tls_dir = generate', this password will also be used to protect the ++# generated client private key. ++# If left empty, the private key will not be encrypted. ++client_key_password = {{ verifier.client_key_password }} ++ ++# The name of the file containing the Keylime verifier client certificate. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This certificate is used by the Keylime verifier to connect to the other ++# services using TLS. ++# ++# If set as 'default', the 'client-cert.crt' value is used. ++client_cert = {{ verifier.client_cert }} ++ ++# The list of trusted server CA certificates. ++# The files in the list should be stored in the directory set in the 'tls_dir' ++# option. ++# ++# If set as 'default', the value is set as '[cacert.crt]' ++trusted_server_ca = {{ verifier.trusted_server_ca }} ++ ++# Authorization provider to use for access control decisions. ++# ++# Available providers: ++# simple - Default provider implementing 4-category access control: ++# - PUBLIC: No authentication (version info, evidence verification) ++# - AGENT_ONLY: PoP bearer token + resource ownership (attestations) ++# - AGENT_OR_ADMIN: PoP token or mTLS certificate (read own agent) ++# - ADMIN: mTLS client certificate (all management operations) ++# ++# The 'simple' provider enforces strict separation between agent and admin ++# authentication methods: ++# - Agents authenticate via PoP (Proof-of-Possession) bearer tokens only ++# - Admins authenticate via mTLS client certificates only ++# - If an Authorization header is present, mTLS is never used (prevents ++# privilege escalation) ++# ++# SECURITY NOTE: Never distribute client certificates signed by the verifier's ++# trusted CA to agents. Agents should only have PoP tokens for authentication. ++# ++# Certificate requirements: ++# - Pull mode agents: Self-signed server certs are acceptable. If CA-issued, ++# must have Server Authentication EKU only. ++# - Push mode agents: No client certs from trusted CA. Use PoP tokens only. ++# - Admins: Client certs signed by trusted CA with Client Authentication EKU. ++authorization_provider = {{ verifier.authorization_provider }} ++ ++# Database URL Configuration ++# See this document https://keylime.readthedocs.io/en/latest/installation.html#database-support ++# for instructions on using different database configurations. ++# ++# An example of database_url value for using sqlite: ++# sqlite:////var/lib/keylime/cv_data.sqlite ++# An example of database_url value for using mysql: ++# mysql+pymysql://keylime:keylime@keylime_db:[port]/verifier?charset=utf8 ++# ++# If set as 'sqlite' keyword, will use the configuration set by the file located ++# at "/var/lib/keylime/cv_data.sqlite". ++database_url = {{ verifier.database_url }} ++ ++# Limits for DB connection pool size in sqlalchemy ++# (https://docs.sqlalchemy.org/en/14/core/pooling.html#api-documentation-available-pool-implementations) ++database_pool_sz_ovfl = {{ verifier.database_pool_sz_ovfl }} ++ ++# Whether to automatically update the DB schema using alembic ++auto_migrate_db = {{ verifier.auto_migrate_db }} ++ ++# The number of worker processes to use for the cloud verifier. ++# Set to "0" to create one worker per processor. ++num_workers = {{ verifier.num_workers }} ++ ++# Whether or not to use an exponential backoff algorithm for retries. ++exponential_backoff = {{ verifier.exponential_backoff }} ++ ++# Either how long to wait between failed attempts to connect to a cloud agent ++# in seconds, or the base for the exponential backoff algorithm. ++# Floating point values accepted here. ++retry_interval = {{ verifier.retry_interval }} ++ ++# Number of retries to connect to an agent before giving up. Must be an integer. ++max_retries = {{ verifier.max_retries }} ++ ++# Time between integrity measurement checks, in seconds. If set to "0", checks ++# will be done as fast as possible. Floating point values accepted here. ++quote_interval = {{ verifier.quote_interval }} ++ ++# The verifier limits the size of upload payloads (allowlists) which defaults to ++# 100MB (104857600 bytes). This setting can be raised (or lowered) based on the ++# size of the actual payloads ++max_upload_size = {{ verifier.max_upload_size }} ++ ++# Timeout in seconds for HTTP requests ++request_timeout = {{ verifier.request_timeout }} ++ ++# The name of the boot attestation policy to use in comparing a measured boot event log ++# with a measured boot reference state. ++# A policy is a Python object that `isinstance` of `keylime.elchecking.policies.Policy` ++# and was registered by calling `keylime.elchecking.policies.register`. ++# The keylime agent extracts the measured boot event log. ++# The verifier client specifies the measured boot reference state to use; ++# this is specified independently for each agent. ++# Depending on the policy, the same reference state may be usable with multiple agents. ++# The `accept-all` policy ignores the reference state and approves every log. ++measured_boot_policy_name = {{ verifier.measured_boot_policy_name }} ++ ++# This is a list of Python modules to dynamically load, for example to register ++# additional boot attestation policies. ++# Empty strings in the list are ignored. ++# A module here may be relative, in which case it is interpreted ++# relative to the keylime.elchecking package. ++# The default value for this config item is the empty list. ++measured_boot_imports = {{ verifier.measured_boot_imports }} ++ ++# This is used to manage the number of times measured boot attestation ++# is done. In other words, it controls the number of times the call ++# to the measured boot policy engine is made to evaluate the boot log ++# against the policy specified. ++# Here are its possible values and number of bootlog evaluations. ++# once (default) : Bootlog evaluation will be done for only one time. ++# always : Bootlog evaluation will always be done (i.e. for unlimited times). ++measured_boot_evaluate = {{ verifier.measured_boot_evaluate }} ++ ++# Severity labels for revocation events strictly ordered from least severity to ++# highest severity. ++severity_labels = {{ verifier.severity_labels }} ++ ++# Severity policy that matches different event_ids to the severity label. ++# The rules are evaluated from the beginning of the list and the first match is ++# used. The event_id can also be a regex. Default policy assigns the highest ++# severity to all events. ++severity_policy = {{ verifier.severity_policy }} ++ ++# If files are already opened when IMA tries to measure them this causes ++# a time of measure, time of use (ToMToU) error entry. ++# By default we ignore those entries and only print a warning. ++# Set to False to treat ToMToU entries as errors. ++ignore_tomtou_errors = {{ verifier.ignore_tomtou_errors }} ++ ++# Durable Attestation is currently marked as an experimental feature ++# In order to enable Durable Attestation, an "adapter" for a Persistent data Store ++# (time-series like database) needs to be specified. Some example adapters can be ++# found under "da/examples" so, for instance ++# "durable_attestation_import = keylime.da.examples.redis.py" ++# could be used to interact with a Redis (Persistent data Store) ++durable_attestation_import = {{ verifier.durable_attestation_import }} ++ ++# If an adapter for Durable Attestation was specified, then the URL for a Persistent Store ++# needs to be specified here. A second optional URL could be specified, for a ++# Rekor Transparency Log. A third additional URL could be specified, pointing to a ++# Time Stamp Authority (TSA), compatible with RFC3161. Additionally, one might need to ++# specify a path containing certificates required by the stores or TSA. Continuing with ++# the above example, the following values could be assigned to the parameters: ++# "persistent_store_url=redis://127.0.0.1:6379?db=10&password=/root/redis.auth&prefix=myda" ++# "transparency_log_url=http://127.0.0.1:3000" ++# "time_stamp_authority_url=http://127.0.0.1:2020" ++# "time_stamp_authority_certs_path=~/mycerts/tsa_cert1.pem" ++persistent_store_url = {{ verifier.persistent_store_url }} ++transparency_log_url = {{ verifier.transparency_log_url }} ++time_stamp_authority_url = {{ verifier.time_stamp_authority_url }} ++time_stamp_authority_certs_path = {{ verifier.time_stamp_authority_certs_path }} ++ ++# If Durable Attestation was enabled, which requires a Persistent Store URL ++# to be specified, the two following parameters control the format and encoding ++# of the stored attestation artifacts (defaults "json" for format and "" for encoding) ++persistent_store_format = {{ verifier.persistent_store_format }} ++persistent_store_encoding = {{ verifier.persistent_store_encoding }} ++ ++# If Durable Attestation was enabled and a Transparency Log URL was specified, ++# the digest algorithm for signatures is controlled by this parameter (default "sha256") ++transparency_log_sign_algo = {{ verifier.transparency_log_sign_algo }} ++ ++# If Durable Attestation was enabled and a Transparency Log URL was specified, ++# a keylime administrator can specify some agent attributes (including attestation ++# artifacts, such as quotes and logs) to be signed by the verifier. The use of "all" ++# will result in the whole "package" (agent + artifacts) being signed and leaving it empty ++# will mean no signing should be done. ++signed_attributes = {{ verifier.signed_attributes }} ++ ++# Require that allowlists are signed with a key passed via the tenant tool ++require_allow_list_signatures = {{ verifier.require_allow_list_signatures }} ++ ++# Attestation mode. Can be 'pull' (traditional) or 'push' (agent-driven). ++# Default: pull ++mode = {{ verifier.mode }} ++# ++# Lifetime in seconds for challenges sent to agents in push mode. ++challenge_lifetime = {{ verifier.challenge_lifetime }} ++# ++# Timeout in seconds for a single evidence verification task (0 = auto). ++verification_timeout = {{ verifier.verification_timeout }} ++# ++# Rate limiting for session creation endpoint (POST /sessions) in push mode. ++# These settings prevent denial-of-service attacks where an attacker floods the verifier ++# with session creation requests. Only applies when mode = 'push'. ++# ++# Maximum number of session creation requests per IP address within the time window. ++# Allows for multiple agents from same IP (testing, NAT scenarios). ++# Default: 50 ++session_create_rate_limit_per_ip = {{ verifier.session_create_rate_limit_per_ip }} ++# ++# Time window in seconds for IP-based rate limiting. ++# Default: 60 ++session_create_rate_limit_window_ip = {{ verifier.session_create_rate_limit_window_ip }} ++# ++# Maximum number of session creation requests per agent_id within the time window. ++# Allows for agent retries (agent default is 3 retries, this allows ~5 auth attempts). ++# Default: 15 ++session_create_rate_limit_per_agent = {{ verifier.session_create_rate_limit_per_agent }} ++# ++# Time window in seconds for agent-based rate limiting. ++# Default: 60 ++session_create_rate_limit_window_agent = {{ verifier.session_create_rate_limit_window_agent }} ++# ++# Lifetime in seconds for authentication session tokens. ++# After this time, agents must re-authenticate to continue submitting attestations. ++# Default: 180 (3 minutes) ++session_lifetime = {{ verifier.session_lifetime }} ++# ++# Whether to automatically extend the session token expiry when an agent ++# submits an attestation. When enabled, active agents won't need to re-authenticate ++# as long as they continue attesting within the session_lifetime window. ++# Default: true ++extend_token_on_attestation = {{ verifier.extend_token_on_attestation }} ++ ++# Maximum time in seconds to wait for in-flight attestation operations to ++# complete during shutdown. The verifier will wait up to this long for active ++# database writes and state transitions to finish before stopping the event ++# loop. Increasing this value reduces the risk of inconsistent agent state ++# after an unclean shutdown, at the cost of a slower shutdown. ++# Floating point values accepted here. ++shutdown_drain_timeout = {{ verifier.shutdown_drain_timeout }} ++ ++[revocations] ++ ++# List of revocation notification methods to enable. ++# ++# Available methods are: ++# ++# "agent": Deliver notification directly to the agent via the REST ++# protocol. ++# ++# "zeromq": Enable the ZeroMQ based revocation notification method; ++# zmq_ip and zmq_port options must be set. Currently this only works if you are ++# using keylime-CA. ++# ++# "webhook": Send notification via webhook. The endpoint URL must be ++# configured with 'webhook_url' option. This can be used to notify other ++# systems that do not have a Keylime agent running. ++enabled_revocation_notifications = {{ revocations.enabled_revocation_notifications }} ++ ++# The binding address and port of the revocation notifier service via ZeroMQ. ++zmq_ip = {{ revocations.zmq_ip }} ++zmq_port = {{ revocations.zmq_port }} ++ ++# Webhook url for revocation notifications. ++webhook_url = {{ revocations.webhook_url }} +diff --git a/test/test_shutdown.py b/test/test_shutdown.py +new file mode 100644 +index 0000000..85a10d3 +--- /dev/null ++++ b/test/test_shutdown.py +@@ -0,0 +1,210 @@ ++"""Unit tests for the shutdown coordination module and verifier drain logic.""" ++ ++# pylint: disable=protected-access,import-outside-toplevel ++ ++import asyncio ++import unittest ++from unittest.mock import patch ++ ++from keylime import shutdown ++ ++ ++class TestShutdownFlag(unittest.TestCase): ++ """Test the process-wide shutdown flag.""" ++ ++ def setUp(self) -> None: ++ # Reset the module-level event before each test ++ shutdown._shutdown_event = asyncio.Event() ++ ++ def test_initial_state_not_shutting_down(self) -> None: ++ self.assertFalse(shutdown.is_shutting_down()) ++ ++ def test_request_shutdown_sets_flag(self) -> None: ++ shutdown.request_shutdown() ++ self.assertTrue(shutdown.is_shutting_down()) ++ ++ def test_request_shutdown_is_idempotent(self) -> None: ++ shutdown.request_shutdown() ++ shutdown.request_shutdown() ++ self.assertTrue(shutdown.is_shutting_down()) ++ ++ ++class TestOperationTracking(unittest.TestCase): ++ """Test _enter_operation / _exit_operation and drain logic.""" ++ ++ def setUp(self) -> None: ++ # Import here so we can reset module globals ++ from keylime import cloud_verifier_tornado as cvt ++ ++ self.cvt = cvt ++ # Save and reset module state ++ self._saved_active = cvt._active_operations ++ self._saved_event = cvt._operations_drained ++ cvt._active_operations = 0 ++ cvt._operations_drained = asyncio.Event() ++ cvt._operations_drained.set() ++ ++ def tearDown(self) -> None: ++ self.cvt._active_operations = self._saved_active ++ self.cvt._operations_drained = self._saved_event ++ ++ def test_initial_state_is_drained(self) -> None: ++ self.assertEqual(self.cvt.get_active_operations(), 0) ++ self.assertTrue(self.cvt._operations_drained.is_set()) ++ ++ def test_enter_increments_and_clears_drain(self) -> None: ++ self.cvt._enter_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 1) ++ self.assertFalse(self.cvt._operations_drained.is_set()) ++ ++ def test_exit_decrements_and_signals_drain(self) -> None: ++ self.cvt._enter_operation() ++ self.cvt._exit_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 0) ++ self.assertTrue(self.cvt._operations_drained.is_set()) ++ ++ def test_multiple_operations_drain_on_last_exit(self) -> None: ++ self.cvt._enter_operation() ++ self.cvt._enter_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 2) ++ self.assertFalse(self.cvt._operations_drained.is_set()) ++ ++ self.cvt._exit_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 1) ++ self.assertFalse(self.cvt._operations_drained.is_set()) ++ ++ self.cvt._exit_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 0) ++ self.assertTrue(self.cvt._operations_drained.is_set()) ++ ++ def test_wait_for_drain_returns_true_when_already_drained(self) -> None: ++ loop = asyncio.new_event_loop() ++ try: ++ result = loop.run_until_complete(self.cvt.wait_for_drain(1.0)) ++ self.assertTrue(result) ++ finally: ++ loop.close() ++ ++ def test_wait_for_drain_returns_true_after_exit(self) -> None: ++ self.cvt._enter_operation() ++ ++ async def _drain_after_delay() -> bool: ++ async def _exit_soon() -> None: ++ await asyncio.sleep(0.05) ++ self.cvt._exit_operation() ++ ++ asyncio.ensure_future(_exit_soon()) ++ return await self.cvt.wait_for_drain(2.0) ++ ++ loop = asyncio.new_event_loop() ++ try: ++ result = loop.run_until_complete(_drain_after_delay()) ++ self.assertTrue(result) ++ self.assertEqual(self.cvt.get_active_operations(), 0) ++ finally: ++ loop.close() ++ ++ def test_wait_for_drain_returns_false_on_timeout(self) -> None: ++ self.cvt._enter_operation() ++ ++ loop = asyncio.new_event_loop() ++ try: ++ result = loop.run_until_complete(self.cvt.wait_for_drain(0.1)) ++ self.assertFalse(result) ++ finally: ++ loop.close() ++ ++ ++class TestPendingEventRegistry(unittest.TestCase): ++ """Test _register_pending_event / _cancel_pending_event / cancel_all.""" ++ ++ def setUp(self) -> None: ++ from keylime import cloud_verifier_tornado as cvt ++ ++ self.cvt = cvt ++ self._saved_pending = dict(cvt._pending_events) ++ cvt._pending_events.clear() ++ ++ def tearDown(self) -> None: ++ self.cvt._pending_events.clear() ++ self.cvt._pending_events.update(self._saved_pending) ++ ++ def _make_agent(self, agent_id: str = "test-agent-1") -> dict: ++ return {"agent_id": agent_id, "pending_event": None} ++ ++ def test_register_tracks_in_both_locations(self) -> None: ++ agent = self._make_agent() ++ handle = object() ++ self.cvt._register_pending_event(agent, handle) ++ ++ self.assertIs(agent["pending_event"], handle) ++ self.assertIs(self.cvt._pending_events["test-agent-1"], handle) ++ ++ def test_cancel_clears_both_locations(self) -> None: ++ agent = self._make_agent() ++ handle = object() ++ self.cvt._register_pending_event(agent, handle) ++ ++ with patch("tornado.ioloop.IOLoop.current"): ++ self.cvt._cancel_pending_event(agent) ++ ++ self.assertIsNone(agent["pending_event"]) ++ self.assertNotIn("test-agent-1", self.cvt._pending_events) ++ ++ def test_cancel_noop_when_no_pending_event(self) -> None: ++ agent = self._make_agent() ++ # Should not raise ++ self.cvt._cancel_pending_event(agent) ++ self.assertIsNone(agent["pending_event"]) ++ ++ def test_cancel_all_clears_registry(self) -> None: ++ agents = [self._make_agent(f"agent-{i}") for i in range(3)] ++ for i, agent in enumerate(agents): ++ self.cvt._register_pending_event(agent, object()) ++ ++ self.assertEqual(len(self.cvt._pending_events), 3) ++ ++ with patch("tornado.ioloop.IOLoop.current"): ++ self.cvt.cancel_all_pending_events() ++ ++ self.assertEqual(len(self.cvt._pending_events), 0) ++ ++ def test_cancel_all_noop_when_empty(self) -> None: ++ # Should not raise ++ self.cvt.cancel_all_pending_events() ++ ++ ++class TestPushAgentMonitorCancelAll(unittest.TestCase): ++ """Test cancel_all_timeouts in push_agent_monitor.""" ++ ++ def setUp(self) -> None: ++ from keylime import push_agent_monitor ++ ++ self.pam = push_agent_monitor ++ with self.pam._agent_timeout_handles_lock: ++ self._saved = dict(self.pam._agent_timeout_handles) ++ self.pam._agent_timeout_handles.clear() ++ ++ def tearDown(self) -> None: ++ with self.pam._agent_timeout_handles_lock: ++ self.pam._agent_timeout_handles.clear() ++ self.pam._agent_timeout_handles.update(self._saved) ++ ++ def test_cancel_all_clears_handles(self) -> None: ++ with self.pam._agent_timeout_handles_lock: ++ self.pam._agent_timeout_handles["a1"] = object() ++ self.pam._agent_timeout_handles["a2"] = object() ++ ++ with patch("tornado.ioloop.IOLoop.current"): ++ self.pam.cancel_all_timeouts() ++ ++ with self.pam._agent_timeout_handles_lock: ++ self.assertEqual(len(self.pam._agent_timeout_handles), 0) ++ ++ def test_cancel_all_noop_when_empty(self) -> None: ++ # Should not raise ++ self.pam.cancel_all_timeouts() ++ ++ ++if __name__ == "__main__": ++ unittest.main() +diff --git a/test/test_verifier_server.py b/test/test_verifier_server.py +index da0feae..e9a47ef 100644 +--- a/test/test_verifier_server.py ++++ b/test/test_verifier_server.py +@@ -256,51 +256,39 @@ class TestVerifierServerEngineDisposal(unittest.TestCase): + "_prepare_agents_on_startup should document why engine disposal is needed", + ) + +- def test_start_multi_resets_verifier_config_after_fork(self): +- """Verify start_multi() resets verifier config in each worker after forking.""" ++ def test_post_fork_resets_verifier_config(self): ++ """Verify _post_fork() resets verifier config to clear inherited database state.""" + # Read the source code + server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "verifier_server.py") + + with open(server_path, encoding="utf-8") as f: + source = f.read() + +- # Find the start_multi method +- pattern = r"def start_multi\(self\).*?(?=\n def |\Z)" ++ # Find the _post_fork method ++ pattern = r"def _post_fork\(self.*?\).*?(?=\n def |\Z)" + match = re.search(pattern, source, re.DOTALL) + +- self.assertIsNotNone(match, "start_multi method not found") ++ self.assertIsNotNone(match, "_post_fork method not found") + assert match is not None + + method_body = match.group(0) + +- # Should fork processes +- self.assertIn( +- "fork_processes", +- method_body, +- "start_multi should call tornado.process.fork_processes", +- ) +- +- # After fork, should reset verifier config (which handles engine disposal) +- # Look for the pattern after fork_processes() +- fork_index = method_body.find("fork_processes") +- after_fork = method_body[fork_index:] +- + self.assertIn( + "reset_verifier_config()", +- after_fork, +- "start_multi must call reset_verifier_config() after forking to clear inherited database state", ++ method_body, ++ "_post_fork must call reset_verifier_config() to clear inherited database state", + ) + + self.assertIn( + "cloud_verifier_tornado.reset_verifier_config()", +- after_fork, +- "start_multi should call cloud_verifier_tornado.reset_verifier_config() after forking", ++ method_body, ++ "_post_fork should call cloud_verifier_tornado.reset_verifier_config()", + ) + +- def test_verifier_config_reset_happens_before_worker_operations(self): +- """Verify verifier config reset occurs after fork but before any worker operations.""" +- # Read the source code +- server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "verifier_server.py") ++ def test_base_server_calls_post_fork_before_start_single(self): ++ """Verify base Server.start_multi() calls _post_fork() after fork and before start_single().""" ++ # Read the base server source code ++ server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "base", "server.py") + + with open(server_path, encoding="utf-8") as f: + source = f.read() +@@ -314,53 +302,49 @@ class TestVerifierServerEngineDisposal(unittest.TestCase): + + # Extract the order of operations + fork_index = method_body.find("fork_processes") +- reset_index = method_body.find("reset_verifier_config()") +- start_single_index = method_body.find("self.start_single()") ++ post_fork_index = method_body.find("_post_fork") ++ start_single_index = method_body.find("start_single()") + + # All should be present + self.assertNotEqual(fork_index, -1, "fork_processes call not found") +- self.assertNotEqual(reset_index, -1, "reset_verifier_config() call not found") ++ self.assertNotEqual(post_fork_index, -1, "_post_fork() call not found") + self.assertNotEqual(start_single_index, -1, "start_single() call not found") + +- # Correct order: fork -> reset_verifier_config -> start_single ++ # Correct order: fork -> _post_fork -> start_single + self.assertLess( + fork_index, +- reset_index, +- "Verifier config reset must happen AFTER forking", ++ post_fork_index, ++ "_post_fork must be called AFTER forking", + ) + self.assertLess( +- reset_index, ++ post_fork_index, + start_single_index, +- "Verifier config reset must happen BEFORE starting worker server", ++ "_post_fork must be called BEFORE starting worker server", + ) + +- def test_reset_pattern_is_documented(self): +- """Verify reset_verifier_config() pattern is documented.""" ++ def test_post_fork_is_documented(self): ++ """Verify _post_fork() documents why reset_verifier_config() is needed.""" + # Read the source code + server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "verifier_server.py") + + with open(server_path, encoding="utf-8") as f: + source = f.read() + +- # Find the start_multi method +- pattern = r"def start_multi\(self\).*?(?=\n def |\Z)" ++ # Find the _post_fork method ++ pattern = r"def _post_fork\(self.*?\).*?(?=\n def |\Z)" + match = re.search(pattern, source, re.DOTALL) + + assert match is not None + method_body = match.group(0) + +- # Should document why reset is needed after fork +- fork_index = method_body.find("fork_processes") +- after_fork = method_body[fork_index:] +- + # Should mention critical concepts: reset, inherited state, parent process + critical_terms = ["reset", "inherit", "parent", "database"] +- found_terms = [term for term in critical_terms if term.lower() in after_fork.lower()] ++ found_terms = [term for term in critical_terms if term.lower() in method_body.lower()] + + self.assertGreaterEqual( + len(found_terms), + 3, +- f"start_multi should document why reset_verifier_config() is needed after fork. " ++ f"_post_fork should document why reset_verifier_config() is needed after fork. " + f"Expected mentions of reset/inherit/parent/database, found: {found_terms}", + ) + +@@ -398,9 +382,9 @@ class TestEngineDisposalDocumentation(unittest.TestCase): + f"Expected mentions of fork/connection/dispose/parent/child, found: {found_terms}", + ) + +- def test_start_multi_documents_disposal_reason(self): +- """Verify start_multi() documents why global engine disposal is needed.""" +- server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "verifier_server.py") ++ def test_base_start_multi_documents_disposal_reason(self): ++ """Verify base Server.start_multi() documents why engine disposal after fork is needed.""" ++ server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "base", "server.py") + + with open(server_path, encoding="utf-8") as f: + source = f.read() +@@ -416,15 +400,15 @@ class TestEngineDisposalDocumentation(unittest.TestCase): + fork_index = method_body.find("fork_processes") + after_fork = method_body[fork_index:] + +- critical_terms = ["inherit", "corrupt", "dispose", "worker", "parent"] ++ critical_terms = ["inherit", "connection", "dispose", "worker", "parent"] + + found_terms = [term for term in critical_terms if term.lower() in after_fork.lower()] + + self.assertGreaterEqual( + len(found_terms), + 2, +- f"start_multi should document why global engine disposal after fork is critical. " +- f"Expected mentions of inherit/corrupt/dispose/worker/parent, found: {found_terms}", ++ f"start_multi should document why engine disposal after fork is critical. " ++ f"Expected mentions of inherit/connection/dispose/worker/parent, found: {found_terms}", + ) + + +-- +2.53.0 + diff --git a/0018-ignore-sigterm-sigint-manager-parent-processes.patch b/0018-ignore-sigterm-sigint-manager-parent-processes.patch new file mode 100644 index 0000000..761d88a --- /dev/null +++ b/0018-ignore-sigterm-sigint-manager-parent-processes.patch @@ -0,0 +1,151 @@ +From 15f20d2dd2e63cc621295befef46bc4161a1f636 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Fri, 10 Apr 2026 13:22:44 +0200 +Subject: [PATCH] shared_data: Ignore SIGTERM and SIGINT on Manager and parent + processes + +When systemd stops the verifier (or registrar), SIGTERM is delivered to +the entire process group, including the multiprocessing Manager's server +process that hosts the shared policy cache. The Manager dies +immediately, but worker processes still have in-flight process_agent() +coroutines that need the cache, causing ConnectionResetError. + +The same race occurs with SIGINT (Ctrl+C) when running the daemon in +the foreground. + +Fix this in two parts: + +1. Use SyncManager.start(initializer=...) to install SIG_IGN for both + SIGTERM and SIGINT in the Manager's server process, so it survives + process-group signals and stays available while workers drain. + +2. Ignore SIGTERM and SIGINT in the new architecture's parent process + (start_multi) so it stays in tornado's monitor loop until all + children have drained and exited. Once all children exit, tornado + calls sys.exit(0), triggering atexit handlers which shut down the + Manager via IPC. Without this, the default signal disposition kills + the parent immediately (no atexit), leaving the Manager orphaned. + +Resolves: #1882 + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + keylime/shared_data.py | 28 +++++++++++++++++++++++++++- + keylime/web/base/server.py | 18 ++++++++++++++++++ + test/test_verifier_server.py | 11 ++++++++--- + 3 files changed, 53 insertions(+), 4 deletions(-) + +diff --git a/keylime/shared_data.py b/keylime/shared_data.py +index 09cbb97bb..494f2f53b 100644 +--- a/keylime/shared_data.py ++++ b/keylime/shared_data.py +@@ -8,8 +8,10 @@ + import multiprocessing as mp + import multiprocessing.process + import os ++import signal + import threading + import time ++from multiprocessing.managers import SyncManager + from typing import Any, Dict, List, Optional + + from keylime import keylime_logging +@@ -17,6 +19,17 @@ + logger = keylime_logging.init_logging("shared_data") + + ++def _manager_ignore_signals() -> None: ++ """Ignore SIGTERM and SIGINT in the Manager's server process. ++ ++ Called as the ``initializer`` for ``SyncManager.start()`` so that ++ the Manager survives process-group signals (systemd SIGTERM, Ctrl+C) ++ and stays available while workers drain in-flight work. ++ """ ++ signal.signal(signal.SIGTERM, signal.SIG_IGN) ++ signal.signal(signal.SIGINT, signal.SIG_IGN) ++ ++ + class FlatDictView: + """A dictionary-like view over a flat key-value store. + +@@ -127,7 +140,20 @@ def __init__(self) -> None: + # Use explicit context to ensure fork compatibility + # The Manager must be started BEFORE any fork() calls + ctx = mp.get_context("fork") +- self._manager = ctx.Manager() ++ # Use SyncManager directly (instead of the ctx.Manager() shortcut) ++ # so we can pass an initializer that makes the Manager's server ++ # process ignore SIGTERM and SIGINT. Without this, systemd's ++ # cgroup-wide SIGTERM (or Ctrl+C SIGINT in foreground) kills the ++ # Manager before workers finish draining, causing ++ # ConnectionResetError in proxy objects. The Manager is still ++ # cleanable via IPC shutdown message, process.kill(), or systemd ++ # SIGKILL escalation. ++ # Cannot use 'with' context manager here: the Manager must outlive ++ # __init__ and persist for the lifetime of SharedDataManager. ++ self._manager = SyncManager(ctx=ctx) ++ self._manager.start( # pylint: disable=consider-using-with ++ initializer=_manager_ignore_signals, ++ ) + + # CRITICAL FIX: Use a SINGLE flat dict instead of nested dicts + # Nested DictProxy objects have synchronization issues +diff --git a/keylime/web/base/server.py b/keylime/web/base/server.py +index 4dd02b79e..8e9cce69d 100644 +--- a/keylime/web/base/server.py ++++ b/keylime/web/base/server.py +@@ -376,12 +376,30 @@ def start_multi(self) -> None: + + self._pre_fork() + ++ # Ignore SIGTERM/SIGINT in the parent so it stays in tornado's ++ # monitor loop (os.wait) until all children have drained and ++ # exited cleanly. Once all children exit, tornado calls ++ # sys.exit(0) which triggers atexit → SharedDataManager.cleanup() ++ # → Manager shutdown via IPC. Without this, the default signal ++ # disposition kills the parent immediately (no atexit), leaving ++ # the Manager process orphaned. ++ # Children inherit SIG_IGN but override it in ++ # _install_signal_handlers() before entering the event loop. ++ signal.signal(signal.SIGTERM, signal.SIG_IGN) ++ signal.signal(signal.SIGINT, signal.SIG_IGN) ++ + # with StatsCollector(): + # num = manager.Value('i', 0) + task_id = tornado.process.fork_processes(self.worker_count) + # num.value = num.value + 1 + # print(num.value) + ++ # Restore default signal disposition in children so they don't ++ # silently ignore SIGTERM/SIGINT before _install_signal_handlers() ++ # replaces these with asyncio-based handlers in start_single(). ++ signal.signal(signal.SIGTERM, signal.SIG_DFL) ++ signal.signal(signal.SIGINT, signal.SIG_DFL) ++ + # Remove the Manager's server process from multiprocessing's child + # tracking so Python's atexit handler does not try to join() it in + # child workers (the Manager was spawned by the parent). +diff --git a/test/test_verifier_server.py b/test/test_verifier_server.py +index e9a47ef70..7601b9cb0 100644 +--- a/test/test_verifier_server.py ++++ b/test/test_verifier_server.py +@@ -300,10 +300,15 @@ def test_base_server_calls_post_fork_before_start_single(self): + assert match is not None + method_body = match.group(0) + ++ # Strip comment lines to avoid false matches from mentions ++ # in comments (e.g. "# ... before start_single()"). ++ code_lines = [line for line in method_body.splitlines() if not line.lstrip().startswith("#")] ++ code_body = "\n".join(code_lines) ++ + # Extract the order of operations +- fork_index = method_body.find("fork_processes") +- post_fork_index = method_body.find("_post_fork") +- start_single_index = method_body.find("start_single()") ++ fork_index = code_body.find("fork_processes") ++ post_fork_index = code_body.find("_post_fork") ++ start_single_index = code_body.find("start_single()") + + # All should be present + self.assertNotEqual(fork_index, -1, "fork_processes call not found") diff --git a/0019-move-socket-var-run.patch b/0019-move-socket-var-run.patch new file mode 100644 index 0000000..71755da --- /dev/null +++ b/0019-move-socket-var-run.patch @@ -0,0 +1,348 @@ +From a50c7e50171d8f5999bdd927b6306f6d14974c57 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Thu, 16 Apr 2026 14:14:06 +0200 +Subject: [PATCH 1/2] shared_data: Move SyncManager socket to /var/run/keylime/ + +The SyncManager's server process creates a Unix domain socket for IPC +with worker processes. By default, this socket was placed in /tmp with +a random name (listener-*). + +Move the socket to /var/run/keylime/, following standard daemon +practice. Keylime already uses this directory for its ZeroMQ revocation +notification socket. + +Changes: +- Pass explicit address to SyncManager so the socket is created at + /var/run/keylime/shared_data..sock instead of /tmp/listener-* +- Add _ensure_runtime_dir() to create or validate the directory +- Add test conftest.py to redirect sockets to a temp directory +- Add pytest to test-requirements.txt for pylint to resolve imports + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + keylime/shared_data.py | 54 +++++++++++++++++++++++++++++++++++++----- + test-requirements.txt | 1 + + test/conftest.py | 30 +++++++++++++++++++++++ + 3 files changed, 79 insertions(+), 6 deletions(-) + create mode 100644 test/conftest.py + +diff --git a/keylime/shared_data.py b/keylime/shared_data.py +index 494f2f53b..aef39bcc4 100644 +--- a/keylime/shared_data.py ++++ b/keylime/shared_data.py +@@ -18,6 +18,23 @@ + + logger = keylime_logging.init_logging("shared_data") + ++_RUNTIME_DIR = "/var/run/keylime" ++ ++ ++def _ensure_runtime_dir() -> None: ++ """Ensure the runtime directory exists with correct permissions. ++ ++ Under systemd, ``tmpfiles.d`` creates ``/var/run/keylime/`` at boot. ++ This function provides a fallback for non-systemd execution and ++ validates permissions in either case. ++ """ ++ os.makedirs(_RUNTIME_DIR, mode=0o700, exist_ok=True) ++ perms = os.stat(_RUNTIME_DIR).st_mode & 0o777 ++ if perms != 0o700 or not os.access(_RUNTIME_DIR, os.W_OK | os.X_OK): ++ msg = f"{_RUNTIME_DIR} is not usable by the current process" ++ logger.error(msg) ++ raise PermissionError(msg) ++ + + def _manager_ignore_signals() -> None: + """Ignore SIGTERM and SIGINT in the Manager's server process. +@@ -137,8 +154,20 @@ def __init__(self) -> None: + """ + logger.debug("Initializing SharedDataManager") + +- # Use explicit context to ensure fork compatibility +- # The Manager must be started BEFORE any fork() calls ++ # Ensure /var/run/keylime/ exists with correct permissions ++ # before forking the Manager server process. ++ _ensure_runtime_dir() ++ self._socket_path = os.path.join(_RUNTIME_DIR, f"shared_data.{os.getpid()}.sock") ++ ++ # Remove stale socket from a previous run (e.g. after a crash). ++ # CPython's SocketListener does not pre-unlink before bind(). ++ try: ++ os.unlink(self._socket_path) ++ except (FileNotFoundError, PermissionError): ++ pass ++ ++ # Use explicit context to ensure fork compatibility. ++ # The Manager must be started BEFORE any fork() calls. + ctx = mp.get_context("fork") + # Use SyncManager directly (instead of the ctx.Manager() shortcut) + # so we can pass an initializer that makes the Manager's server +@@ -150,7 +179,7 @@ def __init__(self) -> None: + # SIGKILL escalation. + # Cannot use 'with' context manager here: the Manager must outlive + # __init__ and persist for the lifetime of SharedDataManager. +- self._manager = SyncManager(ctx=ctx) ++ self._manager = SyncManager(address=self._socket_path, ctx=ctx) + self._manager.start( # pylint: disable=consider-using-with + initializer=_manager_ignore_signals, + ) +@@ -162,8 +191,6 @@ def __init__(self) -> None: + self._lock = self._manager.Lock() + self._initialized_at = time.time() + +- # Register handler to reinitialize manager connection after fork +- # This is needed because Manager uses network connections that don't survive fork + try: + self._parent_pid = os.getpid() + logger.debug("SharedDataManager initialized in process %d", self._parent_pid) +@@ -173,7 +200,10 @@ def __init__(self) -> None: + # Ensure cleanup on exit + atexit.register(self.cleanup) + +- logger.info("SharedDataManager initialized successfully") ++ logger.info( ++ "SharedDataManager initialized successfully (socket: %s)", ++ self._socket_path, ++ ) + + def set_data(self, key: str, value: Any) -> None: + """Store arbitrary pickleable data by key. +@@ -333,6 +363,18 @@ def cleanup(self) -> None: + except Exception: + logger.exception("Error during SharedDataManager shutdown") + ++ # Remove socket file if it still exists. The Manager server ++ # process normally unlinks it on exit, but if it was killed ++ # (SIGKILL) the file may be left behind. ++ socket_path = getattr(self, "_socket_path", None) ++ if socket_path: ++ try: ++ os.unlink(socket_path) ++ except FileNotFoundError: ++ pass ++ except OSError as e: ++ logger.debug("Could not remove socket file %s: %s", socket_path, e) ++ + def deregister_child(self) -> None: + """Remove the Manager's server process from multiprocessing's child tracking. + +diff --git a/test-requirements.txt b/test-requirements.txt +index bdd44e3e9..bf74580a9 100644 +--- a/test-requirements.txt ++++ b/test-requirements.txt +@@ -1,6 +1,7 @@ + dbus-python + # modules required for pylint + setuptools ++pytest + # packages required for mypy + sqlalchemy-stubs + types-python-dateutil +diff --git a/test/conftest.py b/test/conftest.py +new file mode 100644 +index 000000000..da2843922 +--- /dev/null ++++ b/test/conftest.py +@@ -0,0 +1,30 @@ ++"""Shared pytest fixtures for keylime tests.""" ++ ++import shutil ++import tempfile ++from unittest.mock import patch ++ ++import pytest ++ ++from keylime.shared_data import cleanup_global_shared_memory ++ ++ ++@pytest.fixture(autouse=True) ++def _shared_data_runtime_dir(): ++ """Redirect SharedDataManager sockets to a temporary directory. ++ ++ The SyncManager creates Unix domain sockets in /var/run/keylime/, ++ which may not be writable by the test user. This fixture patches ++ the runtime directory to a per-test temp directory so that tests ++ work in any environment. ++ ++ After each test, any global SharedDataManager is shut down to ++ prevent stale managers from referencing deleted temp directories. ++ """ ++ tmpdir = tempfile.mkdtemp() ++ with patch("keylime.shared_data._RUNTIME_DIR", tmpdir): ++ yield ++ # Shut down any global SharedDataManager left alive by the test ++ # so the next test starts fresh with a new temp directory. ++ cleanup_global_shared_memory() ++ shutil.rmtree(tmpdir, ignore_errors=True) + +From 712ab6c841e258e463f858904bfc0991f704a3b9 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Thu, 16 Apr 2026 14:14:45 +0200 +Subject: [PATCH 2/2] installer: Add tmpfiles.d config for all keylime + directories + +Add keylime-tmpfiles.conf to manage all keylime directories. + +This includes: + +- /var/run/keylime (runtime IPC sockets) +- /var/lib/keylime (persistent state) +- /etc/keylime and config snippet directories (configuration) +- TPM certificate store copy from /usr/share to /var/lib + +Simplify installer.sh to avoid redundant directory creation and +ownership setting. The installer only needs to install the tmpfiles.d +config to /usr/lib/tmpfiles.d/keylime.conf and apply it immediately with +systemd-tmpfiles --create so the directories exist before the services +start. + +The installer validates the TPM cert store source exists before copying +and includes a non-systemd fallback for manual directory creation. + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + services/installer.sh | 61 ++++++++++++++++++++++++++-------- + services/keylime-tmpfiles.conf | 40 ++++++++++++++++++++++ + 2 files changed, 87 insertions(+), 14 deletions(-) + create mode 100644 services/keylime-tmpfiles.conf + +diff --git a/services/installer.sh b/services/installer.sh +index f34027c61..f462f136b 100755 +--- a/services/installer.sh ++++ b/services/installer.sh +@@ -11,7 +11,7 @@ fi + BASEDIR=$(dirname "$0") + + # check keylime scripts directory (same for verifier, agent, registrar) +-KEYLIMEDIR=$(dirname $(whereis keylime_verifier | cut -d " " -f 2)) ++KEYLIMEDIR=$(dirname "$(whereis keylime_verifier | cut -d " " -f 2)") + if [[ $KEYLIMEDIR == "." ]]; then + echo "Unable to find keylime scripts" 1>&2 + exit 1 +@@ -20,8 +20,8 @@ fi + echo "Using keylime scripts directory: ${KEYLIMEDIR}" + + # prepare keylime service files and store them in systemd path +-sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" $BASEDIR/keylime_registrar.service.template > /etc/systemd/system/keylime_registrar.service +-sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" $BASEDIR/keylime_verifier.service.template > /etc/systemd/system/keylime_verifier.service ++sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" "$BASEDIR/keylime_registrar.service.template" > /etc/systemd/system/keylime_registrar.service ++sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" "$BASEDIR/keylime_verifier.service.template" > /etc/systemd/system/keylime_verifier.service + + echo "Creating keylime user if it not exists" + if ! getent passwd keylime >/dev/null; then +@@ -30,23 +30,56 @@ if ! getent passwd keylime >/dev/null; then + keylime + fi + +-echo "Changing files to be owned by the keylime user" +-# Create all directories required if not there +-mkdir -p /var/lib/keylime +-mkdir -p /var/log/keylime +-mkdir -p /var/run/keylime ++# install TPM certificate store to /usr/share/keylime/ ++# tmpfiles.d will copy this to /var/lib/keylime/tpm_cert_store ++TPM_CERT_STORE_SRC="$BASEDIR/../tpm_cert_store" ++if [[ ! -d "$TPM_CERT_STORE_SRC" ]]; then ++ echo "Missing TPM certificate store: $TPM_CERT_STORE_SRC" 1>&2 ++ exit 1 ++fi ++ ++mkdir -p /usr/share/keylime ++cp -a "$TPM_CERT_STORE_SRC" /usr/share/keylime/ || exit 1 + +-chown keylime:keylime -R /etc/keylime +-chown keylime:keylime -R /var/lib/keylime +-chown keylime:keylime -R /var/log/keylime +-chown keylime:keylime -R /var/run/keylime ++# install tmpfiles.d config for keylime directories ++mkdir -p /usr/lib/tmpfiles.d ++cp "$BASEDIR/keylime-tmpfiles.conf" /usr/lib/tmpfiles.d/keylime.conf ++ ++# apply the tmpfiles.d config immediately to create directories with correct ownership ++if command -v systemd-tmpfiles >/dev/null 2>&1; then ++ systemd-tmpfiles --create keylime.conf ++else ++ echo "Warning: systemd-tmpfiles not found, creating directories manually" ++ # Create essential directories as fallback for non-systemd systems ++ mkdir -p /var/run/keylime /var/lib/keylime \ ++ /etc/keylime/ca.conf.d \ ++ /etc/keylime/logging.conf.d \ ++ /etc/keylime/verifier.conf.d \ ++ /etc/keylime/registrar.conf.d \ ++ /etc/keylime/tenant.conf.d \ ++ /etc/keylime/agent.conf.d ++ chown keylime:keylime /var/run/keylime /var/lib/keylime ++ chmod 700 /var/run/keylime /var/lib/keylime ++ # Mirror tmpfiles.d Z/z semantics: recursively set ownership and ++ # file permissions under /etc/keylime, then fix directories to 0500. ++ chown -R keylime:keylime /etc/keylime ++ find /etc/keylime -type f -exec chmod 400 {} \; ++ find /etc/keylime -type d -exec chmod 500 {} \; ++ # Copy TPM cert store from /usr/share to /var/lib only if the ++ # target does not exist yet (mirrors the tmpfiles.d C directive). ++ # This preserves operator-added EK certificates. ++ if [ -d /usr/share/keylime/tpm_cert_store ] && [ ! -d /var/lib/keylime/tpm_cert_store ]; then ++ cp -r /usr/share/keylime/tpm_cert_store /var/lib/keylime/ ++ chown -R keylime:keylime /var/lib/keylime/tpm_cert_store ++ find /var/lib/keylime/tpm_cert_store -type f -exec chmod 400 {} \; ++ chmod 500 /var/lib/keylime/tpm_cert_store ++ fi ++fi + + # set permissions + chmod 664 /etc/systemd/system/keylime_registrar.service + chmod 664 /etc/systemd/system/keylime_verifier.service + +-chmod 700 /var/run/keylime +- + # enable at startup + systemctl enable keylime_registrar.service + systemctl enable keylime_verifier.service +diff --git a/services/keylime-tmpfiles.conf b/services/keylime-tmpfiles.conf +new file mode 100644 +index 000000000..f3c0b43d6 +--- /dev/null ++++ b/services/keylime-tmpfiles.conf +@@ -0,0 +1,40 @@ ++d /run/keylime 0700 keylime keylime - ++ ++d /var/lib/keylime 0700 keylime keylime - ++ ++d /etc/keylime 0500 keylime keylime - ++d /etc/keylime/ca.conf.d 0500 keylime keylime - ++d /etc/keylime/logging.conf.d 0500 keylime keylime - ++d /etc/keylime/verifier.conf.d 0500 keylime keylime - ++d /etc/keylime/registrar.conf.d 0500 keylime keylime - ++d /etc/keylime/tenant.conf.d 0500 keylime keylime - ++d /etc/keylime/agent.conf.d 0500 keylime keylime - ++ ++# TPM certificate store. ++# Copy the cert store from /usr/share/keylime/tpm_cert_store ++# to /var/lib/keylime/tpm_cert_store. ++# Files inside /var/lib/keylime/tpm_cert_store/ have ++# 0400 permission and are owned by keylime/keylime, ++# while /var/lib/keylime/tpm_cert_store/ itself has ++# permission 0500, also owned by keylime/keylime. ++C /var/lib/keylime/tpm_cert_store 0500 keylime keylime - /usr/share/keylime/tpm_cert_store ++Z /var/lib/keylime/tpm_cert_store 0400 keylime keylime - ++z /var/lib/keylime/tpm_cert_store 0500 keylime keylime - ++# Finally, /var/lib/keylime itself has 0700 permission, ++# and is owned by keylime/keylime. ++z /var/lib/keylime 0700 keylime keylime - ++ ++# Keylime configuration in /etc/keylime has permission 0400 ++# owned by keylime/keylime, while snippet directories and ++# the actual /etc/keylime directory have permission 0500, ++# also owned by keylime/keylime. ++Z /etc/keylime 0400 keylime keylime - ++# Now fix the directories: ++z /etc/keylime/ca.conf.d 0500 keylime keylime - ++z /etc/keylime/logging.conf.d 0500 keylime keylime - ++z /etc/keylime/verifier.conf.d 0500 keylime keylime - ++z /etc/keylime/registrar.conf.d 0500 keylime keylime - ++z /etc/keylime/tenant.conf.d 0500 keylime keylime - ++z /etc/keylime/agent.conf.d 0500 keylime keylime - ++# And finally, /etc/keylime itself. ++z /etc/keylime 0500 keylime keylime - diff --git a/keylime-fix-db-connection-leaks.patch b/keylime-fix-db-connection-leaks.patch deleted file mode 100644 index 64be967..0000000 --- a/keylime-fix-db-connection-leaks.patch +++ /dev/null @@ -1,2208 +0,0 @@ -diff --git a/keylime/cloud_verifier_tornado.py b/keylime/cloud_verifier_tornado.py -index 8ab81d1..7553ac8 100644 ---- a/keylime/cloud_verifier_tornado.py -+++ b/keylime/cloud_verifier_tornado.py -@@ -7,7 +7,8 @@ import sys - import traceback - from concurrent.futures import ThreadPoolExecutor - from multiprocessing import Process --from typing import Any, Dict, List, Optional, Tuple, Union, cast -+from contextlib import contextmanager -+from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, cast - - import tornado.httpserver - import tornado.ioloop -@@ -34,7 +35,7 @@ from keylime.agentstates import AgentAttestState, AgentAttestStates - from keylime.common import retry, states, validators - from keylime.common.version import str_to_version - from keylime.da import record --from keylime.db.keylime_db import DBEngineManager, SessionManager -+from keylime.db.keylime_db import SessionManager, make_engine - from keylime.db.verifier_db import VerfierMain, VerifierAllowlist, VerifierMbpolicy - from keylime.failure import MAX_SEVERITY_LABEL, Component, Event, Failure, set_severity_config - from keylime.ima import ima -@@ -47,7 +48,7 @@ GLOBAL_POLICY_CACHE: Dict[str, Dict[str, str]] = {} - set_severity_config(config.getlist("verifier", "severity_labels"), config.getlist("verifier", "severity_policy")) - - try: -- engine = DBEngineManager().make_engine("cloud_verifier") -+ engine = make_engine("cloud_verifier") - except SQLAlchemyError as err: - logger.error("Error creating SQL engine or session: %s", err) - sys.exit(1) -@@ -61,8 +62,17 @@ except record.RecordManagementException as rme: - sys.exit(1) - - --def get_session() -> Session: -- return SessionManager().make_session(engine) -+@contextmanager -+def session_context() -> Iterator[Session]: -+ """ -+ Context manager for database sessions that ensures proper cleanup. -+ To use: -+ with session_context() as session: -+ # use session -+ """ -+ session_manager = SessionManager() -+ with session_manager.session_context(engine) as session: -+ yield session - - - def get_AgentAttestStates() -> AgentAttestStates: -@@ -130,19 +140,18 @@ def _from_db_obj(agent_db_obj: VerfierMain) -> Dict[str, Any]: - return agent_dict - - --def verifier_read_policy_from_cache(stored_agent: VerfierMain) -> str: -- checksum = "" -- name = "empty" -- agent_id = str(stored_agent.agent_id) -+def verifier_read_policy_from_cache(ima_policy_data: Dict[str, str]) -> str: -+ checksum = ima_policy_data.get("checksum", "") -+ name = ima_policy_data.get("name", "empty") -+ agent_id = ima_policy_data.get("agent_id", "") -+ -+ if not agent_id: -+ return "" - - if agent_id not in GLOBAL_POLICY_CACHE: - GLOBAL_POLICY_CACHE[agent_id] = {} - GLOBAL_POLICY_CACHE[agent_id][""] = "" - -- if stored_agent.ima_policy: -- checksum = str(stored_agent.ima_policy.checksum) -- name = stored_agent.ima_policy.name -- - if checksum not in GLOBAL_POLICY_CACHE[agent_id]: - if len(GLOBAL_POLICY_CACHE[agent_id]) > 1: - # Perform a cleanup of the contents, IMA policy checksum changed -@@ -162,8 +171,9 @@ def verifier_read_policy_from_cache(stored_agent: VerfierMain) -> str: - checksum, - agent_id, - ) -- # Actually contacts the database and load the (large) ima_policy column for "allowlists" table -- ima_policy = stored_agent.ima_policy.ima_policy -+ -+ # Get the large ima_policy content - it's already loaded in ima_policy_data -+ ima_policy = ima_policy_data.get("ima_policy", "") - assert isinstance(ima_policy, str) - GLOBAL_POLICY_CACHE[agent_id][checksum] = ima_policy - -@@ -182,22 +192,19 @@ def store_attestation_state(agentAttestState: AgentAttestState) -> None: - # Only store if IMA log was evaluated - if agentAttestState.get_ima_pcrs(): - agent_id = agentAttestState.agent_id -- session = get_session() - try: -- update_agent = session.query(VerfierMain).get(agentAttestState.get_agent_id()) -- assert update_agent -- update_agent.boottime = agentAttestState.get_boottime() -- update_agent.next_ima_ml_entry = agentAttestState.get_next_ima_ml_entry() -- ima_pcrs_dict = agentAttestState.get_ima_pcrs() -- update_agent.ima_pcrs = list(ima_pcrs_dict.keys()) -- for pcr_num, value in ima_pcrs_dict.items(): -- setattr(update_agent, f"pcr{pcr_num}", value) -- update_agent.learned_ima_keyrings = agentAttestState.get_ima_keyrings().to_json() -- try: -+ with session_context() as session: -+ update_agent = session.query(VerfierMain).get(agentAttestState.get_agent_id()) -+ assert update_agent -+ update_agent.boottime = agentAttestState.get_boottime() -+ update_agent.next_ima_ml_entry = agentAttestState.get_next_ima_ml_entry() -+ ima_pcrs_dict = agentAttestState.get_ima_pcrs() -+ update_agent.ima_pcrs = list(ima_pcrs_dict.keys()) -+ for pcr_num, value in ima_pcrs_dict.items(): -+ setattr(update_agent, f"pcr{pcr_num}", value) -+ update_agent.learned_ima_keyrings = agentAttestState.get_ima_keyrings().to_json() - session.add(update_agent) -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error on storing attestation state for agent %s: %s", agent_id, e) -- session.commit() -+ # session.commit() is automatically called by context manager - except SQLAlchemyError as e: - logger.error("SQLAlchemy Error on storing attestation state for agent %s: %s", agent_id, e) - -@@ -354,45 +361,17 @@ class AgentsHandler(BaseHandler): - was not found, it either completed successfully, or failed. If found, the agent_id is still polling - to contact the Cloud Agent. - """ -- session = get_session() -- - rest_params, agent_id = self.__validate_input("GET") - if not rest_params: - return - -- if (agent_id is not None) and (agent_id != ""): -- # If the agent ID is not valid (wrong set of characters), -- # just do nothing. -- agent = None -- try: -- agent = ( -- session.query(VerfierMain) -- .options( # type: ignore -- joinedload(VerfierMain.ima_policy).load_only( -- VerifierAllowlist.checksum, VerifierAllowlist.generator # pyright: ignore -- ) -- ) -- .options( # type: ignore -- joinedload(VerfierMain.mb_policy).load_only(VerifierMbpolicy.mb_policy) # pyright: ignore -- ) -- .filter_by(agent_id=agent_id) -- .one_or_none() -- ) -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -- -- if agent is not None: -- response = cloud_verifier_common.process_get_status(agent) -- web_util.echo_json_response(self, 200, "Success", response) -- else: -- web_util.echo_json_response(self, 404, "agent id not found") -- else: -- json_response = None -- if "bulk" in rest_params: -- agent_list = None -- -- if ("verifier" in rest_params) and (rest_params["verifier"] != ""): -- agent_list = ( -+ with session_context() as session: -+ if (agent_id is not None) and (agent_id != ""): -+ # If the agent ID is not valid (wrong set of characters), -+ # just do nothing. -+ agent = None -+ try: -+ agent = ( - session.query(VerfierMain) - .options( # type: ignore - joinedload(VerfierMain.ima_policy).load_only( -@@ -402,39 +381,70 @@ class AgentsHandler(BaseHandler): - .options( # type: ignore - joinedload(VerfierMain.mb_policy).load_only(VerifierMbpolicy.mb_policy) # pyright: ignore - ) -- .filter_by(verifier_id=rest_params["verifier"]) -- .all() -+ .filter_by(agent_id=agent_id) -+ .one_or_none() - ) -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -+ -+ if agent is not None: -+ response = cloud_verifier_common.process_get_status(agent) -+ web_util.echo_json_response(self, 200, "Success", response) - else: -- agent_list = ( -- session.query(VerfierMain) -- .options( # type: ignore -- joinedload(VerfierMain.ima_policy).load_only( -- VerifierAllowlist.checksum, VerifierAllowlist.generator # pyright: ignore -+ web_util.echo_json_response(self, 404, "agent id not found") -+ else: -+ json_response = None -+ if "bulk" in rest_params: -+ agent_list = None -+ -+ if ("verifier" in rest_params) and (rest_params["verifier"] != ""): -+ agent_list = ( -+ session.query(VerfierMain) -+ .options( # type: ignore -+ joinedload(VerfierMain.ima_policy).load_only( -+ VerifierAllowlist.checksum, VerifierAllowlist.generator # pyright: ignore -+ ) - ) -+ .options( # type: ignore -+ joinedload(VerfierMain.mb_policy).load_only( -+ VerifierMbpolicy.mb_policy # type: ignore[arg-type] -+ ) -+ ) -+ .filter_by(verifier_id=rest_params["verifier"]) -+ .all() - ) -- .options( # type: ignore -- joinedload(VerfierMain.mb_policy).load_only(VerifierMbpolicy.mb_policy) # pyright: ignore -+ else: -+ agent_list = ( -+ session.query(VerfierMain) -+ .options( # type: ignore -+ joinedload(VerfierMain.ima_policy).load_only( -+ VerifierAllowlist.checksum, VerifierAllowlist.generator # pyright: ignore -+ ) -+ ) -+ .options( # type: ignore -+ joinedload(VerfierMain.mb_policy).load_only( -+ VerifierMbpolicy.mb_policy # type: ignore[arg-type] -+ ) -+ ) -+ .all() - ) -- .all() -- ) - -- json_response = {} -- for agent in agent_list: -- json_response[agent.agent_id] = cloud_verifier_common.process_get_status(agent) -+ json_response = {} -+ for agent in agent_list: -+ json_response[agent.agent_id] = cloud_verifier_common.process_get_status(agent) - -- web_util.echo_json_response(self, 200, "Success", json_response) -- else: -- if ("verifier" in rest_params) and (rest_params["verifier"] != ""): -- json_response_list = ( -- session.query(VerfierMain.agent_id).filter_by(verifier_id=rest_params["verifier"]).all() -- ) -+ web_util.echo_json_response(self, 200, "Success", json_response) - else: -- json_response_list = session.query(VerfierMain.agent_id).all() -+ if ("verifier" in rest_params) and (rest_params["verifier"] != ""): -+ json_response_list = ( -+ session.query(VerfierMain.agent_id).filter_by(verifier_id=rest_params["verifier"]).all() -+ ) -+ else: -+ json_response_list = session.query(VerfierMain.agent_id).all() - -- web_util.echo_json_response(self, 200, "Success", {"uuids": json_response_list}) -+ web_util.echo_json_response(self, 200, "Success", {"uuids": json_response_list}) - -- logger.info("GET returning 200 response for agent_id list") -+ logger.info("GET returning 200 response for agent_id list") - - def delete(self) -> None: - """This method handles the DELETE requests to remove agents from the Cloud Verifier. -@@ -442,59 +452,55 @@ class AgentsHandler(BaseHandler): - Currently, only agents resources are available for DELETEing, i.e. /agents. All other DELETE uri's will return errors. - agents requests require a single agent_id parameter which identifies the agent to be deleted. - """ -- session = get_session() -- - rest_params, agent_id = self.__validate_input("DELETE") - if not rest_params or not agent_id: - return - -- agent = None -- try: -- agent = session.query(VerfierMain).filter_by(agent_id=agent_id).first() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -+ with session_context() as session: -+ agent = None -+ try: -+ agent = session.query(VerfierMain).filter_by(agent_id=agent_id).first() -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) - -- if agent is None: -- web_util.echo_json_response(self, 404, "agent id not found") -- logger.info("DELETE returning 404 response. agent id: %s not found.", agent_id) -- return -+ if agent is None: -+ web_util.echo_json_response(self, 404, "agent id not found") -+ logger.info("DELETE returning 404 response. agent id: %s not found.", agent_id) -+ return - -- verifier_id = config.get("verifier", "uuid", fallback=cloud_verifier_common.DEFAULT_VERIFIER_ID) -- if verifier_id != agent.verifier_id: -- web_util.echo_json_response(self, 404, "agent id associated to this verifier") -- logger.info("DELETE returning 404 response. agent id: %s not associated to this verifer.", agent_id) -- return -+ verifier_id = config.get("verifier", "uuid", fallback=cloud_verifier_common.DEFAULT_VERIFIER_ID) -+ if verifier_id != agent.verifier_id: -+ web_util.echo_json_response(self, 404, "agent id associated to this verifier") -+ logger.info("DELETE returning 404 response. agent id: %s not associated to this verifer.", agent_id) -+ return - -- # Cleanup the cache when the agent is deleted. Do it early. -- if agent_id in GLOBAL_POLICY_CACHE: -- del GLOBAL_POLICY_CACHE[agent_id] -- logger.debug( -- "Cleaned up policy cache from all entries used by agent %s", -- agent_id, -- ) -+ # Cleanup the cache when the agent is deleted. Do it early. -+ if agent_id in GLOBAL_POLICY_CACHE: -+ del GLOBAL_POLICY_CACHE[agent_id] -+ logger.debug( -+ "Cleaned up policy cache from all entries used by agent %s", -+ agent_id, -+ ) - -- op_state = agent.operational_state -- if op_state in (states.SAVED, states.FAILED, states.TERMINATED, states.TENANT_FAILED, states.INVALID_QUOTE): -- try: -- verifier_db_delete_agent(session, agent_id) -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- web_util.echo_json_response(self, 200, "Success") -- logger.info("DELETE returning 200 response for agent id: %s", agent_id) -- else: -- try: -- update_agent = session.query(VerfierMain).get(agent_id) -- assert update_agent -- update_agent.operational_state = states.TERMINATED -+ op_state = agent.operational_state -+ if op_state in (states.SAVED, states.FAILED, states.TERMINATED, states.TENANT_FAILED, states.INVALID_QUOTE): - try: -+ verifier_db_delete_agent(session, agent_id) -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ web_util.echo_json_response(self, 200, "Success") -+ logger.info("DELETE returning 200 response for agent id: %s", agent_id) -+ else: -+ try: -+ update_agent = session.query(VerfierMain).get(agent_id) -+ assert update_agent -+ update_agent.operational_state = states.TERMINATED - session.add(update_agent) -+ # session.commit() is automatically called by context manager -+ web_util.echo_json_response(self, 202, "Accepted") -+ logger.info("DELETE returning 202 response for agent id: %s", agent_id) - except SQLAlchemyError as e: - logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -- session.commit() -- web_util.echo_json_response(self, 202, "Accepted") -- logger.info("DELETE returning 202 response for agent id: %s", agent_id) -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) - - def post(self) -> None: - """This method handles the POST requests to add agents to the Cloud Verifier. -@@ -502,7 +508,6 @@ class AgentsHandler(BaseHandler): - Currently, only agents resources are available for POSTing, i.e. /agents. All other POST uri's will return errors. - agents requests require a json block sent in the body - """ -- session = get_session() - # TODO: exception handling needs fixing - # Maybe handle exceptions with if/else if/else blocks ... simple and avoids nesting - try: # pylint: disable=too-many-nested-blocks -@@ -585,201 +590,208 @@ class AgentsHandler(BaseHandler): - runtime_policy = base64.b64decode(json_body.get("runtime_policy")).decode() - runtime_policy_stored = None - -- if runtime_policy_name: -+ with session_context() as session: -+ if runtime_policy_name: -+ try: -+ runtime_policy_stored = ( -+ session.query(VerifierAllowlist).filter_by(name=runtime_policy_name).one_or_none() -+ ) -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -+ raise -+ -+ # Prevent overwriting existing IMA policies with name provided in request -+ if runtime_policy and runtime_policy_stored: -+ web_util.echo_json_response( -+ self, -+ 409, -+ f"IMA policy with name {runtime_policy_name} already exists. Please use a different name or delete the allowlist from the verifier.", -+ ) -+ logger.warning("IMA policy with name %s already exists", runtime_policy_name) -+ return -+ -+ # Return an error code if the named allowlist does not exist in the database -+ if not runtime_policy and not runtime_policy_stored: -+ web_util.echo_json_response( -+ self, 404, f"Could not find IMA policy with name {runtime_policy_name}!" -+ ) -+ logger.warning("Could not find IMA policy with name %s", runtime_policy_name) -+ return -+ -+ # Prevent overwriting existing agents with UUID provided in request - try: -- runtime_policy_stored = ( -- session.query(VerifierAllowlist).filter_by(name=runtime_policy_name).one_or_none() -- ) -+ new_agent_count = session.query(VerfierMain).filter_by(agent_id=agent_id).count() - except SQLAlchemyError as e: - logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -- raise -+ raise e - -- # Prevent overwriting existing IMA policies with name provided in request -- if runtime_policy and runtime_policy_stored: -+ if new_agent_count > 0: - web_util.echo_json_response( - self, - 409, -- f"IMA policy with name {runtime_policy_name} already exists. Please use a different name or delete the allowlist from the verifier.", -+ f"Agent of uuid {agent_id} already exists. Please use delete or update.", - ) -- logger.warning("IMA policy with name %s already exists", runtime_policy_name) -+ logger.warning("Agent of uuid %s already exists", agent_id) - return - -- # Return an error code if the named allowlist does not exist in the database -- if not runtime_policy and not runtime_policy_stored: -- web_util.echo_json_response( -- self, 404, f"Could not find IMA policy with name {runtime_policy_name}!" -- ) -- logger.warning("Could not find IMA policy with name %s", runtime_policy_name) -- return -- -- # Prevent overwriting existing agents with UUID provided in request -- try: -- new_agent_count = session.query(VerfierMain).filter_by(agent_id=agent_id).count() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -- raise e -- -- if new_agent_count > 0: -- web_util.echo_json_response( -- self, -- 409, -- f"Agent of uuid {agent_id} already exists. Please use delete or update.", -- ) -- logger.warning("Agent of uuid %s already exists", agent_id) -- return -- -- # Write IMA policy to database if needed -- if not runtime_policy_name and not runtime_policy: -- logger.info("IMA policy data not provided with request! Using default empty IMA policy.") -- runtime_policy = json.dumps(cast(Dict[str, Any], ima.EMPTY_RUNTIME_POLICY)) -+ # Write IMA policy to database if needed -+ if not runtime_policy_name and not runtime_policy: -+ logger.info("IMA policy data not provided with request! Using default empty IMA policy.") -+ runtime_policy = json.dumps(cast(Dict[str, Any], ima.EMPTY_RUNTIME_POLICY)) - -- if runtime_policy: -- runtime_policy_key_bytes = signing.get_runtime_policy_keys( -- runtime_policy.encode(), -- json_body.get("runtime_policy_key"), -- ) -- -- try: -- ima.verify_runtime_policy( -+ if runtime_policy: -+ runtime_policy_key_bytes = signing.get_runtime_policy_keys( - runtime_policy.encode(), -- runtime_policy_key_bytes, -- verify_sig=config.getboolean( -- "verifier", "require_allow_list_signatures", fallback=False -- ), -+ json_body.get("runtime_policy_key"), - ) -- except ima.ImaValidationError as e: -- web_util.echo_json_response(self, e.code, e.message) -- logger.warning(e.message) -- return - -- if not runtime_policy_name: -- runtime_policy_name = agent_id -- -- try: -- runtime_policy_db_format = ima.runtime_policy_db_contents( -- runtime_policy_name, runtime_policy -- ) -- except ima.ImaValidationError as e: -- message = f"Runtime policy is malformatted: {e.message}" -- web_util.echo_json_response(self, e.code, message) -- logger.warning(message) -- return -- -- try: -- runtime_policy_stored = ( -- session.query(VerifierAllowlist).filter_by(name=runtime_policy_name).one_or_none() -- ) -- except SQLAlchemyError as e: -- logger.error( -- "SQLAlchemy Error while retrieving stored ima policy for agent ID %s: %s", agent_id, e -- ) -- raise -- try: -- if runtime_policy_stored is None: -- runtime_policy_stored = VerifierAllowlist(**runtime_policy_db_format) -- session.add(runtime_policy_stored) -+ try: -+ ima.verify_runtime_policy( -+ runtime_policy.encode(), -+ runtime_policy_key_bytes, -+ verify_sig=config.getboolean( -+ "verifier", "require_allow_list_signatures", fallback=False -+ ), -+ ) -+ except ima.ImaValidationError as e: -+ web_util.echo_json_response(self, e.code, e.message) -+ logger.warning(e.message) -+ return -+ -+ if not runtime_policy_name: -+ runtime_policy_name = agent_id -+ -+ try: -+ runtime_policy_db_format = ima.runtime_policy_db_contents( -+ runtime_policy_name, runtime_policy -+ ) -+ except ima.ImaValidationError as e: -+ message = f"Runtime policy is malformatted: {e.message}" -+ web_util.echo_json_response(self, e.code, message) -+ logger.warning(message) -+ return -+ -+ try: -+ runtime_policy_stored = ( -+ session.query(VerifierAllowlist).filter_by(name=runtime_policy_name).one_or_none() -+ ) -+ except SQLAlchemyError as e: -+ logger.error( -+ "SQLAlchemy Error while retrieving stored ima policy for agent ID %s: %s", -+ agent_id, -+ e, -+ ) -+ raise -+ try: -+ if runtime_policy_stored is None: -+ runtime_policy_stored = VerifierAllowlist(**runtime_policy_db_format) -+ session.add(runtime_policy_stored) -+ session.commit() -+ except SQLAlchemyError as e: -+ logger.error( -+ "SQLAlchemy Error while updating ima policy for agent ID %s: %s", agent_id, e -+ ) -+ raise -+ -+ # Handle measured boot policy -+ # - No name, mb_policy : store mb_policy using agent UUID as name -+ # - Name, no mb_policy : fetch existing mb_policy from DB -+ # - Name, mb_policy : store mb_policy using name -+ -+ mb_policy_name = json_body["mb_policy_name"] -+ mb_policy = json_body["mb_policy"] -+ mb_policy_stored = None -+ -+ if mb_policy_name: -+ try: -+ mb_policy_stored = ( -+ session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).one_or_none() -+ ) -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -+ raise -+ -+ # Prevent overwriting existing mb_policy with name provided in request -+ if mb_policy and mb_policy_stored: -+ web_util.echo_json_response( -+ self, -+ 409, -+ f"mb_policy with name {mb_policy_name} already exists. Please use a different name or delete the mb_policy from the verifier.", -+ ) -+ logger.warning("mb_policy with name %s already exists", mb_policy_name) -+ return -+ -+ # Return error if the mb_policy is neither provided nor stored. -+ if not mb_policy and not mb_policy_stored: -+ web_util.echo_json_response( -+ self, 404, f"Could not find mb_policy with name {mb_policy_name}!" -+ ) -+ logger.warning("Could not find mb_policy with name %s", mb_policy_name) -+ return -+ -+ else: -+ # Use the UUID of the agent -+ mb_policy_name = agent_id -+ try: -+ mb_policy_stored = ( -+ session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).one_or_none() -+ ) -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -+ raise -+ -+ # Prevent overwriting existing mb_policy -+ if mb_policy and mb_policy_stored: -+ web_util.echo_json_response( -+ self, -+ 409, -+ f"mb_policy with name {mb_policy_name} already exists. You can delete the mb_policy from the verifier.", -+ ) -+ logger.warning("mb_policy with name %s already exists", mb_policy_name) -+ return -+ -+ # Store the policy into database if not stored -+ if mb_policy_stored is None: -+ try: -+ mb_policy_db_format = mba.mb_policy_db_contents(mb_policy_name, mb_policy) -+ mb_policy_stored = VerifierMbpolicy(**mb_policy_db_format) -+ session.add(mb_policy_stored) - session.commit() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error while updating ima policy for agent ID %s: %s", agent_id, e) -- raise -- -- # Handle measured boot policy -- # - No name, mb_policy : store mb_policy using agent UUID as name -- # - Name, no mb_policy : fetch existing mb_policy from DB -- # - Name, mb_policy : store mb_policy using name -- -- mb_policy_name = json_body["mb_policy_name"] -- mb_policy = json_body["mb_policy"] -- mb_policy_stored = None -+ except SQLAlchemyError as e: -+ logger.error( -+ "SQLAlchemy Error while updating mb_policy for agent ID %s: %s", agent_id, e -+ ) -+ raise - -- if mb_policy_name: -+ # Write the agent to the database, attaching associated stored ima_policy and mb_policy - try: -- mb_policy_stored = ( -- session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).one_or_none() -+ assert runtime_policy_stored -+ assert mb_policy_stored -+ session.add( -+ VerfierMain(**agent_data, ima_policy=runtime_policy_stored, mb_policy=mb_policy_stored) - ) -+ session.commit() - except SQLAlchemyError as e: - logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -- raise -+ raise e - -- # Prevent overwriting existing mb_policy with name provided in request -- if mb_policy and mb_policy_stored: -- web_util.echo_json_response( -- self, -- 409, -- f"mb_policy with name {mb_policy_name} already exists. Please use a different name or delete the mb_policy from the verifier.", -- ) -- logger.warning("mb_policy with name %s already exists", mb_policy_name) -- return -+ # add default fields that are ephemeral -+ for key, val in exclude_db.items(): -+ agent_data[key] = val - -- # Return error if the mb_policy is neither provided nor stored. -- if not mb_policy and not mb_policy_stored: -- web_util.echo_json_response( -- self, 404, f"Could not find mb_policy with name {mb_policy_name}!" -+ # Prepare SSLContext for mTLS connections -+ agent_data["ssl_context"] = None -+ if agent_mtls_cert_enabled: -+ agent_data["ssl_context"] = web_util.generate_agent_tls_context( -+ "verifier", agent_data["mtls_cert"], logger=logger - ) -- logger.warning("Could not find mb_policy with name %s", mb_policy_name) -- return - -- else: -- # Use the UUID of the agent -- mb_policy_name = agent_id -- try: -- mb_policy_stored = ( -- session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).one_or_none() -- ) -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -- raise -- -- # Prevent overwriting existing mb_policy -- if mb_policy and mb_policy_stored: -- web_util.echo_json_response( -- self, -- 409, -- f"mb_policy with name {mb_policy_name} already exists. You can delete the mb_policy from the verifier.", -- ) -- logger.warning("mb_policy with name %s already exists", mb_policy_name) -- return -- -- # Store the policy into database if not stored -- if mb_policy_stored is None: -- try: -- mb_policy_db_format = mba.mb_policy_db_contents(mb_policy_name, mb_policy) -- mb_policy_stored = VerifierMbpolicy(**mb_policy_db_format) -- session.add(mb_policy_stored) -- session.commit() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error while updating mb_policy for agent ID %s: %s", agent_id, e) -- raise -+ if agent_data["ssl_context"] is None: -+ logger.warning("Connecting to agent without mTLS: %s", agent_id) - -- # Write the agent to the database, attaching associated stored ima_policy and mb_policy -- try: -- assert runtime_policy_stored -- assert mb_policy_stored -- session.add( -- VerfierMain(**agent_data, ima_policy=runtime_policy_stored, mb_policy=mb_policy_stored) -- ) -- session.commit() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -- raise e -- -- # add default fields that are ephemeral -- for key, val in exclude_db.items(): -- agent_data[key] = val -- -- # Prepare SSLContext for mTLS connections -- agent_data["ssl_context"] = None -- if agent_mtls_cert_enabled: -- agent_data["ssl_context"] = web_util.generate_agent_tls_context( -- "verifier", agent_data["mtls_cert"], logger=logger -- ) -- -- if agent_data["ssl_context"] is None: -- logger.warning("Connecting to agent without mTLS: %s", agent_id) -- -- asyncio.ensure_future(process_agent(agent_data, states.GET_QUOTE)) -- web_util.echo_json_response(self, 200, "Success") -- logger.info("POST returning 200 response for adding agent id: %s", agent_id) -+ asyncio.ensure_future(process_agent(agent_data, states.GET_QUOTE)) -+ web_util.echo_json_response(self, 200, "Success") -+ logger.info("POST returning 200 response for adding agent id: %s", agent_id) - else: - web_util.echo_json_response(self, 400, "uri not supported") - logger.warning("POST returning 400 response. uri not supported") -@@ -794,54 +806,54 @@ class AgentsHandler(BaseHandler): - Currently, only agents resources are available for PUTing, i.e. /agents. All other PUT uri's will return errors. - agents requests require a json block sent in the body - """ -- session = get_session() - try: - rest_params, agent_id = self.__validate_input("PUT") - if not rest_params: - return - -- try: -- verifier_id = config.get("verifier", "uuid", fallback=cloud_verifier_common.DEFAULT_VERIFIER_ID) -- db_agent = session.query(VerfierMain).filter_by(agent_id=agent_id, verifier_id=verifier_id).one() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -- raise e -+ with session_context() as session: -+ try: -+ verifier_id = config.get("verifier", "uuid", fallback=cloud_verifier_common.DEFAULT_VERIFIER_ID) -+ db_agent = session.query(VerfierMain).filter_by(agent_id=agent_id, verifier_id=verifier_id).one() -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -+ raise e - -- if db_agent is None: -- web_util.echo_json_response(self, 404, "agent id not found") -- logger.info("PUT returning 404 response. agent id: %s not found.", agent_id) -- return -+ if db_agent is None: -+ web_util.echo_json_response(self, 404, "agent id not found") -+ logger.info("PUT returning 404 response. agent id: %s not found.", agent_id) -+ return - -- if "reactivate" in rest_params: -- agent = _from_db_obj(db_agent) -+ if "reactivate" in rest_params: -+ agent = _from_db_obj(db_agent) - -- if agent["mtls_cert"] and agent["mtls_cert"] != "disabled": -- agent["ssl_context"] = web_util.generate_agent_tls_context( -- "verifier", agent["mtls_cert"], logger=logger -- ) -- if agent["ssl_context"] is None: -- logger.warning("Connecting to agent without mTLS: %s", agent_id) -+ if agent["mtls_cert"] and agent["mtls_cert"] != "disabled": -+ agent["ssl_context"] = web_util.generate_agent_tls_context( -+ "verifier", agent["mtls_cert"], logger=logger -+ ) -+ if agent["ssl_context"] is None: -+ logger.warning("Connecting to agent without mTLS: %s", agent_id) - -- agent["operational_state"] = states.START -- asyncio.ensure_future(process_agent(agent, states.GET_QUOTE)) -- web_util.echo_json_response(self, 200, "Success") -- logger.info("PUT returning 200 response for agent id: %s", agent_id) -- elif "stop" in rest_params: -- # do stuff for terminate -- logger.debug("Stopping polling on %s", agent_id) -- try: -- session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).update( # pyright: ignore -- {"operational_state": states.TENANT_FAILED} -- ) -- session.commit() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -+ agent["operational_state"] = states.START -+ asyncio.ensure_future(process_agent(agent, states.GET_QUOTE)) -+ web_util.echo_json_response(self, 200, "Success") -+ logger.info("PUT returning 200 response for agent id: %s", agent_id) -+ elif "stop" in rest_params: -+ # do stuff for terminate -+ logger.debug("Stopping polling on %s", agent_id) -+ try: -+ session.query(VerfierMain).filter(VerfierMain.agent_id == agent_id).update( # pyright: ignore -+ {"operational_state": states.TENANT_FAILED} -+ ) -+ # session.commit() is automatically called by context manager -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) - -- web_util.echo_json_response(self, 200, "Success") -- logger.info("PUT returning 200 response for agent id: %s", agent_id) -- else: -- web_util.echo_json_response(self, 400, "uri not supported") -- logger.warning("PUT returning 400 response. uri not supported") -+ web_util.echo_json_response(self, 200, "Success") -+ logger.info("PUT returning 200 response for agent id: %s", agent_id) -+ else: -+ web_util.echo_json_response(self, 400, "uri not supported") -+ logger.warning("PUT returning 400 response. uri not supported") - - except Exception as e: - web_util.echo_json_response(self, 400, f"Exception error: {str(e)}") -@@ -887,36 +899,36 @@ class AllowlistHandler(BaseHandler): - if not params_valid: - return - -- session = get_session() -- if allowlist_name is None: -- try: -- names_allowlists = session.query(VerifierAllowlist.name).all() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- web_util.echo_json_response(self, 500, "Failed to get names of allowlists") -- raise -+ with session_context() as session: -+ if allowlist_name is None: -+ try: -+ names_allowlists = session.query(VerifierAllowlist.name).all() -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ web_util.echo_json_response(self, 500, "Failed to get names of allowlists") -+ raise - -- names_response = [] -- for name in names_allowlists: -- names_response.append(name[0]) -- web_util.echo_json_response(self, 200, "Success", {"runtimepolicy names": names_response}) -+ names_response = [] -+ for name in names_allowlists: -+ names_response.append(name[0]) -+ web_util.echo_json_response(self, 200, "Success", {"runtimepolicy names": names_response}) - -- else: -- try: -- allowlist = session.query(VerifierAllowlist).filter_by(name=allowlist_name).one() -- except NoResultFound: -- web_util.echo_json_response(self, 404, f"Runtime policy {allowlist_name} not found") -- return -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- web_util.echo_json_response(self, 500, "Failed to get allowlist") -- raise -+ else: -+ try: -+ allowlist = session.query(VerifierAllowlist).filter_by(name=allowlist_name).one() -+ except NoResultFound: -+ web_util.echo_json_response(self, 404, f"Runtime policy {allowlist_name} not found") -+ return -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ web_util.echo_json_response(self, 500, "Failed to get allowlist") -+ raise - -- response = {} -- for field in ("name", "tpm_policy"): -- response[field] = getattr(allowlist, field, None) -- response["runtime_policy"] = getattr(allowlist, "ima_policy", None) -- web_util.echo_json_response(self, 200, "Success", response) -+ response = {} -+ for field in ("name", "tmp_policy"): -+ response[field] = getattr(allowlist, field, None) -+ response["runtime_policy"] = getattr(allowlist, "ima_policy", None) -+ web_util.echo_json_response(self, 200, "Success", response) - - def delete(self) -> None: - """Delete an allowlist -@@ -928,45 +940,44 @@ class AllowlistHandler(BaseHandler): - if not params_valid or allowlist_name is None: - return - -- session = get_session() -- try: -- runtime_policy = session.query(VerifierAllowlist).filter_by(name=allowlist_name).one() -- except NoResultFound: -- web_util.echo_json_response(self, 404, f"Runtime policy {allowlist_name} not found") -- return -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- web_util.echo_json_response(self, 500, "Failed to get allowlist") -- raise -+ with session_context() as session: -+ try: -+ runtime_policy = session.query(VerifierAllowlist).filter_by(name=allowlist_name).one() -+ except NoResultFound: -+ web_util.echo_json_response(self, 404, f"Runtime policy {allowlist_name} not found") -+ return -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ web_util.echo_json_response(self, 500, "Failed to get allowlist") -+ raise - -- try: -- agent = session.query(VerfierMain).filter_by(ima_policy_id=runtime_policy.id).one_or_none() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- raise -- if agent is not None: -- web_util.echo_json_response( -- self, -- 409, -- f"Can't delete allowlist as it's currently in use by agent {agent.agent_id}", -- ) -- return -+ try: -+ agent = session.query(VerfierMain).filter_by(ima_policy_id=runtime_policy.id).one_or_none() -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ raise -+ if agent is not None: -+ web_util.echo_json_response( -+ self, -+ 409, -+ f"Can't delete allowlist as it's currently in use by agent {agent.agent_id}", -+ ) -+ return - -- try: -- session.query(VerifierAllowlist).filter_by(name=allowlist_name).delete() -- session.commit() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- session.close() -- web_util.echo_json_response(self, 500, f"Database error: {e}") -- raise -+ try: -+ session.query(VerifierAllowlist).filter_by(name=allowlist_name).delete() -+ # session.commit() is automatically called by context manager -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ web_util.echo_json_response(self, 500, f"Database error: {e}") -+ raise - -- # NOTE(kaifeng) 204 Can not have response body, but current helper -- # doesn't support this case. -- self.set_status(204) -- self.set_header("Content-Type", "application/json") -- self.finish() -- logger.info("DELETE returning 204 response for allowlist: %s", allowlist_name) -+ # NOTE(kaifeng) 204 Can not have response body, but current helper -+ # doesn't support this case. -+ self.set_status(204) -+ self.set_header("Content-Type", "application/json") -+ self.finish() -+ logger.info("DELETE returning 204 response for allowlist: %s", allowlist_name) - - def __get_runtime_policy_db_format(self, runtime_policy_name: str) -> Dict[str, Any]: - """Get the IMA policy from the request and return it in Db format""" -@@ -1022,28 +1033,30 @@ class AllowlistHandler(BaseHandler): - if not runtime_policy_db_format: - return - -- session = get_session() -- # don't allow overwritting -- try: -- runtime_policy_count = session.query(VerifierAllowlist).filter_by(name=runtime_policy_name).count() -- if runtime_policy_count > 0: -- web_util.echo_json_response(self, 409, f"Runtime policy with name {runtime_policy_name} already exists") -- logger.warning("Runtime policy with name %s already exists", runtime_policy_name) -- return -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- raise -+ with session_context() as session: -+ # don't allow overwritting -+ try: -+ runtime_policy_count = session.query(VerifierAllowlist).filter_by(name=runtime_policy_name).count() -+ if runtime_policy_count > 0: -+ web_util.echo_json_response( -+ self, 409, f"Runtime policy with name {runtime_policy_name} already exists" -+ ) -+ logger.warning("Runtime policy with name %s already exists", runtime_policy_name) -+ return -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ raise - -- try: -- # Add the agent and data -- session.add(VerifierAllowlist(**runtime_policy_db_format)) -- session.commit() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- raise -+ try: -+ # Add the agent and data -+ session.add(VerifierAllowlist(**runtime_policy_db_format)) -+ # session.commit() is automatically called by context manager -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ raise - -- web_util.echo_json_response(self, 201) -- logger.info("POST returning 201") -+ web_util.echo_json_response(self, 201) -+ logger.info("POST returning 201") - - def put(self) -> None: - """Update an allowlist -@@ -1060,32 +1073,34 @@ class AllowlistHandler(BaseHandler): - if not runtime_policy_db_format: - return - -- session = get_session() -- # don't allow creating a new policy -- try: -- runtime_policy_count = session.query(VerifierAllowlist).filter_by(name=runtime_policy_name).count() -- if runtime_policy_count != 1: -- web_util.echo_json_response( -- self, 409, f"Runtime policy with name {runtime_policy_name} does not already exist" -- ) -- logger.warning("Runtime policy with name %s does not already exist", runtime_policy_name) -- return -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- raise -+ with session_context() as session: -+ # don't allow creating a new policy -+ try: -+ runtime_policy_count = session.query(VerifierAllowlist).filter_by(name=runtime_policy_name).count() -+ if runtime_policy_count != 1: -+ web_util.echo_json_response( -+ self, -+ 404, -+ f"Runtime policy with name {runtime_policy_name} does not already exist, use POST to create", -+ ) -+ logger.warning("Runtime policy with name %s does not already exist", runtime_policy_name) -+ return -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ raise - -- try: -- # Update the named runtime policy -- session.query(VerifierAllowlist).filter_by(name=runtime_policy_name).update( -- runtime_policy_db_format # pyright: ignore -- ) -- session.commit() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- raise -+ try: -+ # Update the named runtime policy -+ session.query(VerifierAllowlist).filter_by(name=runtime_policy_name).update( -+ runtime_policy_db_format # pyright: ignore -+ ) -+ # session.commit() is automatically called by context manager -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ raise - -- web_util.echo_json_response(self, 201) -- logger.info("PUT returning 201") -+ web_util.echo_json_response(self, 201) -+ logger.info("PUT returning 201") - - def data_received(self, chunk: Any) -> None: - raise NotImplementedError() -@@ -1113,8 +1128,6 @@ class VerifyIdentityHandler(BaseHandler): - - This is useful for 3rd party tools and integrations to independently verify the state of an agent. - """ -- session = get_session() -- - # validate the parameters of our request - if self.request.uri is None: - web_util.echo_json_response(self, 400, "URI not specified") -@@ -1159,36 +1172,37 @@ class VerifyIdentityHandler(BaseHandler): - return - - # get the agent information from the DB -- agent = None -- try: -- agent = ( -- session.query(VerfierMain) -- .options( # type: ignore -- joinedload(VerfierMain.ima_policy).load_only( -- VerifierAllowlist.checksum, VerifierAllowlist.generator # pyright: ignore -+ with session_context() as session: -+ agent = None -+ try: -+ agent = ( -+ session.query(VerfierMain) -+ .options( # type: ignore -+ joinedload(VerfierMain.ima_policy).load_only( -+ VerifierAllowlist.checksum, VerifierAllowlist.generator # pyright: ignore -+ ) - ) -+ .filter_by(agent_id=agent_id) -+ .one_or_none() - ) -- .filter_by(agent_id=agent_id) -- .one_or_none() -- ) -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error for agent ID %s: %s", agent_id, e) - -- if agent is not None: -- agentAttestState = get_AgentAttestStates().get_by_agent_id(agent_id) -- failure = cloud_verifier_common.process_verify_identity_quote( -- agent, quote, nonce, hash_alg, agentAttestState -- ) -- if failure: -- failure_contexts = "; ".join(x.context for x in failure.events) -- web_util.echo_json_response(self, 200, "Success", {"valid": 0, "reason": failure_contexts}) -- logger.info("GET returning 200, but validation failed") -+ if agent is not None: -+ agentAttestState = get_AgentAttestStates().get_by_agent_id(agent_id) -+ failure = cloud_verifier_common.process_verify_identity_quote( -+ agent, quote, nonce, hash_alg, agentAttestState -+ ) -+ if failure: -+ failure_contexts = "; ".join(x.context for x in failure.events) -+ web_util.echo_json_response(self, 200, "Success", {"valid": 0, "reason": failure_contexts}) -+ logger.info("GET returning 200, but validation failed") -+ else: -+ web_util.echo_json_response(self, 200, "Success", {"valid": 1}) -+ logger.info("GET returning 200, validation successful") - else: -- web_util.echo_json_response(self, 200, "Success", {"valid": 1}) -- logger.info("GET returning 200, validation successful") -- else: -- web_util.echo_json_response(self, 404, "agent id not found") -- logger.info("GET returning 404, agaent not found") -+ web_util.echo_json_response(self, 404, "agent id not found") -+ logger.info("GET returning 404, agaent not found") - - def data_received(self, chunk: Any) -> None: - raise NotImplementedError() -@@ -1231,35 +1245,35 @@ class MbpolicyHandler(BaseHandler): - if not params_valid: - return - -- session = get_session() -- if mb_policy_name is None: -- try: -- names_mbpolicies = session.query(VerifierMbpolicy.name).all() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- web_util.echo_json_response(self, 500, "Failed to get names of mbpolicies") -- raise -+ with session_context() as session: -+ if mb_policy_name is None: -+ try: -+ names_mbpolicies = session.query(VerifierMbpolicy.name).all() -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ web_util.echo_json_response(self, 500, "Failed to get names of mbpolicies") -+ raise - -- names_response = [] -- for name in names_mbpolicies: -- names_response.append(name[0]) -- web_util.echo_json_response(self, 200, "Success", {"mbpolicy names": names_response}) -+ names_response = [] -+ for name in names_mbpolicies: -+ names_response.append(name[0]) -+ web_util.echo_json_response(self, 200, "Success", {"mbpolicy names": names_response}) - -- else: -- try: -- mbpolicy = session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).one() -- except NoResultFound: -- web_util.echo_json_response(self, 404, f"Measured boot policy {mb_policy_name} not found") -- return -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- web_util.echo_json_response(self, 500, "Failed to get mb_policy") -- raise -+ else: -+ try: -+ mbpolicy = session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).one() -+ except NoResultFound: -+ web_util.echo_json_response(self, 404, f"Measured boot policy {mb_policy_name} not found") -+ return -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ web_util.echo_json_response(self, 500, "Failed to get mb_policy") -+ raise - -- response = {} -- response["name"] = getattr(mbpolicy, "name", None) -- response["mb_policy"] = getattr(mbpolicy, "mb_policy", None) -- web_util.echo_json_response(self, 200, "Success", response) -+ response = {} -+ response["name"] = getattr(mbpolicy, "name", None) -+ response["mb_policy"] = getattr(mbpolicy, "mb_policy", None) -+ web_util.echo_json_response(self, 200, "Success", response) - - def delete(self) -> None: - """Delete a mb_policy -@@ -1271,45 +1285,44 @@ class MbpolicyHandler(BaseHandler): - if not params_valid or mb_policy_name is None: - return - -- session = get_session() -- try: -- mbpolicy = session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).one() -- except NoResultFound: -- web_util.echo_json_response(self, 404, f"Measured boot policy {mb_policy_name} not found") -- return -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- web_util.echo_json_response(self, 500, "Failed to get mb_policy") -- raise -+ with session_context() as session: -+ try: -+ mbpolicy = session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).one() -+ except NoResultFound: -+ web_util.echo_json_response(self, 404, f"Measured boot policy {mb_policy_name} not found") -+ return -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ web_util.echo_json_response(self, 500, "Failed to get mb_policy") -+ raise - -- try: -- agent = session.query(VerfierMain).filter_by(mb_policy_id=mbpolicy.id).one_or_none() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- raise -- if agent is not None: -- web_util.echo_json_response( -- self, -- 409, -- f"Can't delete mb_policy as it's currently in use by agent {agent.agent_id}", -- ) -- return -+ try: -+ agent = session.query(VerfierMain).filter_by(mb_policy_id=mbpolicy.id).one_or_none() -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ raise -+ if agent is not None: -+ web_util.echo_json_response( -+ self, -+ 409, -+ f"Can't delete mb_policy as it's currently in use by agent {agent.agent_id}", -+ ) -+ return - -- try: -- session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).delete() -- session.commit() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- session.close() -- web_util.echo_json_response(self, 500, f"Database error: {e}") -- raise -+ try: -+ session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).delete() -+ # session.commit() is automatically called by context manager -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ web_util.echo_json_response(self, 500, f"Database error: {e}") -+ raise - -- # NOTE(kaifeng) 204 Can not have response body, but current helper -- # doesn't support this case. -- self.set_status(204) -- self.set_header("Content-Type", "application/json") -- self.finish() -- logger.info("DELETE returning 204 response for mb_policy: %s", mb_policy_name) -+ # NOTE(kaifeng) 204 Can not have response body, but current helper -+ # doesn't support this case. -+ self.set_status(204) -+ self.set_header("Content-Type", "application/json") -+ self.finish() -+ logger.info("DELETE returning 204 response for mb_policy: %s", mb_policy_name) - - def __get_mb_policy_db_format(self, mb_policy_name: str) -> Dict[str, Any]: - """Get the measured boot policy from the request and return it in Db format""" -@@ -1341,30 +1354,30 @@ class MbpolicyHandler(BaseHandler): - if not mb_policy_db_format: - return - -- session = get_session() -- # don't allow overwritting -- try: -- mbpolicy_count = session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).count() -- if mbpolicy_count > 0: -- web_util.echo_json_response( -- self, 409, f"Measured boot policy with name {mb_policy_name} already exists" -- ) -- logger.warning("Measured boot policy with name %s already exists", mb_policy_name) -- return -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- raise -+ with session_context() as session: -+ # don't allow overwritting -+ try: -+ mbpolicy_count = session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).count() -+ if mbpolicy_count > 0: -+ web_util.echo_json_response( -+ self, 409, f"Measured boot policy with name {mb_policy_name} already exists" -+ ) -+ logger.warning("Measured boot policy with name %s already exists", mb_policy_name) -+ return -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ raise - -- try: -- # Add the data -- session.add(VerifierMbpolicy(**mb_policy_db_format)) -- session.commit() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- raise -+ try: -+ # Add the data -+ session.add(VerifierMbpolicy(**mb_policy_db_format)) -+ # session.commit() is automatically called by context manager -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ raise - -- web_util.echo_json_response(self, 201) -- logger.info("POST returning 201") -+ web_util.echo_json_response(self, 201) -+ logger.info("POST returning 201") - - def put(self) -> None: - """Update an mb_policy -@@ -1381,32 +1394,32 @@ class MbpolicyHandler(BaseHandler): - if not mb_policy_db_format: - return - -- session = get_session() -- # don't allow creating a new policy -- try: -- mbpolicy_count = session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).count() -- if mbpolicy_count != 1: -- web_util.echo_json_response( -- self, 409, f"Measured boot policy with name {mb_policy_name} does not already exist" -- ) -- logger.warning("Measured boot policy with name %s does not already exist", mb_policy_name) -- return -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- raise -+ with session_context() as session: -+ # don't allow creating a new policy -+ try: -+ mbpolicy_count = session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).count() -+ if mbpolicy_count != 1: -+ web_util.echo_json_response( -+ self, 409, f"Measured boot policy with name {mb_policy_name} does not already exist" -+ ) -+ logger.warning("Measured boot policy with name %s does not already exist", mb_policy_name) -+ return -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ raise - -- try: -- # Update the named mb_policy -- session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).update( -- mb_policy_db_format # pyright: ignore -- ) -- session.commit() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -- raise -+ try: -+ # Update the named mb_policy -+ session.query(VerifierMbpolicy).filter_by(name=mb_policy_name).update( -+ mb_policy_db_format # pyright: ignore -+ ) -+ # session.commit() is automatically called by context manager -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) -+ raise - -- web_util.echo_json_response(self, 201) -- logger.info("PUT returning 201") -+ web_util.echo_json_response(self, 201) -+ logger.info("PUT returning 201") - - def data_received(self, chunk: Any) -> None: - raise NotImplementedError() -@@ -1460,17 +1473,18 @@ async def update_agent_api_version(agent: Dict[str, Any], timeout: float = 60.0) - return None - - logger.info("Agent %s new API version %s is supported", agent_id, new_version) -- session = get_session() -- agent["supported_version"] = new_version - -- # Remove keys that should not go to the DB -- agent_db = dict(agent) -- for key in exclude_db: -- if key in agent_db: -- del agent_db[key] -+ with session_context() as session: -+ agent["supported_version"] = new_version - -- session.query(VerfierMain).filter_by(agent_id=agent_id).update(agent_db) # pyright: ignore -- session.commit() -+ # Remove keys that should not go to the DB -+ agent_db = dict(agent) -+ for key in exclude_db: -+ if key in agent_db: -+ del agent_db[key] -+ -+ session.query(VerfierMain).filter_by(agent_id=agent_id).update(agent_db) # pyright: ignore -+ # session.commit() is automatically called by context manager - else: - logger.warning("Agent %s new API version %s is not supported", agent_id, new_version) - return None -@@ -1718,50 +1732,68 @@ async def notify_error( - revocation_notifier.notify(tosend) - if "agent" in notifiers: - verifier_id = config.get("verifier", "uuid", fallback=cloud_verifier_common.DEFAULT_VERIFIER_ID) -- session = get_session() -- agents = session.query(VerfierMain).filter_by(verifier_id=verifier_id).all() -- futures = [] -- loop = asyncio.get_event_loop() -- # Notify all agents asynchronously through a thread pool -- with ThreadPoolExecutor() as pool: -- for agent_db_obj in agents: -- if agent_db_obj.agent_id != agent["agent_id"]: -- agent = _from_db_obj(agent_db_obj) -- if agent["mtls_cert"] and agent["mtls_cert"] != "disabled": -- agent["ssl_context"] = web_util.generate_agent_tls_context( -- "verifier", agent["mtls_cert"], logger=logger -- ) -- func = functools.partial(invoke_notify_error, agent, tosend, timeout=timeout) -- futures.append(await loop.run_in_executor(pool, func)) -- # Wait for all tasks complete in 60 seconds -- try: -- for f in asyncio.as_completed(futures, timeout=60): -- await f -- except asyncio.TimeoutError as e: -- logger.error("Timeout during notifying error to agents: %s", e) -+ with session_context() as session: -+ agents = session.query(VerfierMain).filter_by(verifier_id=verifier_id).all() -+ futures = [] -+ loop = asyncio.get_event_loop() -+ # Notify all agents asynchronously through a thread pool -+ with ThreadPoolExecutor() as pool: -+ for agent_db_obj in agents: -+ if agent_db_obj.agent_id != agent["agent_id"]: -+ agent = _from_db_obj(agent_db_obj) -+ if agent["mtls_cert"] and agent["mtls_cert"] != "disabled": -+ agent["ssl_context"] = web_util.generate_agent_tls_context( -+ "verifier", agent["mtls_cert"], logger=logger -+ ) -+ func = functools.partial(invoke_notify_error, agent, tosend, timeout=timeout) -+ futures.append(await loop.run_in_executor(pool, func)) -+ # Wait for all tasks complete in 60 seconds -+ try: -+ for f in asyncio.as_completed(futures, timeout=60): -+ await f -+ except asyncio.TimeoutError as e: -+ logger.error("Timeout during notifying error to agents: %s", e) - - - async def process_agent( - agent: Dict[str, Any], new_operational_state: int, failure: Failure = Failure(Component.INTERNAL, ["verifier"]) - ) -> None: -- session = get_session() - try: # pylint: disable=R1702 - main_agent_operational_state = agent["operational_state"] - stored_agent = None -- try: -- stored_agent = ( -- session.query(VerfierMain) -- .options( # type: ignore -- joinedload(VerfierMain.ima_policy).load_only(VerifierAllowlist.checksum) # pyright: ignore -- ) -- .options( # type: ignore -- joinedload(VerfierMain.mb_policy).load_only(VerifierMbpolicy.mb_policy) # pyright: ignore -+ -+ # First database operation - read agent data and extract all needed data within session context -+ ima_policy_data = {} -+ mb_policy_data = None -+ with session_context() as session: -+ try: -+ stored_agent = ( -+ session.query(VerfierMain) -+ .options( # type: ignore -+ joinedload(VerfierMain.ima_policy) # Load full IMA policy object including content -+ ) -+ .options( # type: ignore -+ joinedload(VerfierMain.mb_policy).load_only(VerifierMbpolicy.mb_policy) # pyright: ignore -+ ) -+ .filter_by(agent_id=str(agent["agent_id"])) -+ .first() - ) -- .filter_by(agent_id=str(agent["agent_id"])) -- .first() -- ) -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error for agent ID %s: %s", agent["agent_id"], e) -+ -+ # Extract IMA policy data within session context to avoid DetachedInstanceError -+ if stored_agent and stored_agent.ima_policy: -+ ima_policy_data = { -+ "checksum": str(stored_agent.ima_policy.checksum), -+ "name": stored_agent.ima_policy.name, -+ "agent_id": str(stored_agent.agent_id), -+ "ima_policy": stored_agent.ima_policy.ima_policy, # Extract the large content too -+ } -+ -+ # Extract MB policy data within session context -+ if stored_agent and stored_agent.mb_policy: -+ mb_policy_data = stored_agent.mb_policy.mb_policy -+ -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error for agent ID %s: %s", agent["agent_id"], e) - - # if the stored agent could not be recovered from the database, stop polling - if not stored_agent: -@@ -1775,7 +1807,10 @@ async def process_agent( - logger.warning("Agent %s terminated by user.", agent["agent_id"]) - if agent["pending_event"] is not None: - tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) -- verifier_db_delete_agent(session, agent["agent_id"]) -+ -+ # Second database operation - delete agent -+ with session_context() as session: -+ verifier_db_delete_agent(session, agent["agent_id"]) - return - - # if the user tells us to stop polling because the tenant quote check failed -@@ -1808,11 +1843,16 @@ async def process_agent( - if not failure.recoverable or failure.highest_severity == MAX_SEVERITY_LABEL: - if agent["pending_event"] is not None: - tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) -- for key in exclude_db: -- if key in agent: -- del agent[key] -- session.query(VerfierMain).filter_by(agent_id=agent["agent_id"]).update(agent) # pyright: ignore -- session.commit() -+ -+ # Third database operation - update agent with failure state -+ with session_context() as session: -+ for key in exclude_db: -+ if key in agent: -+ del agent[key] -+ session.query(VerfierMain).filter_by(agent_id=agent["agent_id"]).update( -+ agent # type: ignore[arg-type] -+ ) -+ # session.commit() is automatically called by context manager - - # propagate all state, but remove none DB keys first (using exclude_db) - try: -@@ -1821,18 +1861,18 @@ async def process_agent( - if key in agent_db: - del agent_db[key] - -- session.query(VerfierMain).filter_by(agent_id=agent_db["agent_id"]).update(agent_db) # pyright: ignore -- session.commit() -+ # Fourth database operation - update agent state -+ with session_context() as session: -+ session.query(VerfierMain).filter_by(agent_id=agent_db["agent_id"]).update(agent_db) # pyright: ignore -+ # session.commit() is automatically called by context manager - except SQLAlchemyError as e: - logger.error("SQLAlchemy Error for agent ID %s: %s", agent["agent_id"], e) - - # Load agent's IMA policy -- runtime_policy = verifier_read_policy_from_cache(stored_agent) -+ runtime_policy = verifier_read_policy_from_cache(ima_policy_data) - - # Get agent's measured boot policy -- mb_policy = None -- if stored_agent.mb_policy is not None: -- mb_policy = stored_agent.mb_policy.mb_policy -+ mb_policy = mb_policy_data - - # If agent was in a failed state we check if we either stop polling - # or just add it again to the event loop -@@ -1876,7 +1916,14 @@ async def process_agent( - ) - - pending = tornado.ioloop.IOLoop.current().call_later( -- interval, invoke_get_quote, agent, mb_policy, runtime_policy, False, timeout=timeout # type: ignore # due to python <3.9 -+ # type: ignore # due to python <3.9 -+ interval, -+ invoke_get_quote, -+ agent, -+ mb_policy, -+ runtime_policy, -+ False, -+ timeout=timeout, - ) - agent["pending_event"] = pending - return -@@ -1911,7 +1958,14 @@ async def process_agent( - next_retry, - ) - tornado.ioloop.IOLoop.current().call_later( -- next_retry, invoke_get_quote, agent, mb_policy, runtime_policy, True, timeout=timeout # type: ignore # due to python <3.9 -+ # type: ignore # due to python <3.9 -+ next_retry, -+ invoke_get_quote, -+ agent, -+ mb_policy, -+ runtime_policy, -+ True, -+ timeout=timeout, - ) - return - -@@ -1980,9 +2034,9 @@ async def activate_agents(agents: List[VerfierMain], verifier_ip: str, verifier_ - - - def get_agents_by_verifier_id(verifier_id: str) -> List[VerfierMain]: -- session = get_session() - try: -- return session.query(VerfierMain).filter_by(verifier_id=verifier_id).all() -+ with session_context() as session: -+ return session.query(VerfierMain).filter_by(verifier_id=verifier_id).all() - except SQLAlchemyError as e: - logger.error("SQLAlchemy Error: %s", e) - return [] -@@ -2007,20 +2061,20 @@ def main() -> None: - os.umask(0o077) - - VerfierMain.metadata.create_all(engine, checkfirst=True) # pyright: ignore -- session = get_session() -- try: -- query_all = session.query(VerfierMain).all() -- for row in query_all: -- if row.operational_state in states.APPROVED_REACTIVATE_STATES: -- row.operational_state = states.START # pyright: ignore -- session.commit() -- except SQLAlchemyError as e: -- logger.error("SQLAlchemy Error: %s", e) -+ with session_context() as session: -+ try: -+ query_all = session.query(VerfierMain).all() -+ for row in query_all: -+ if row.operational_state in states.APPROVED_REACTIVATE_STATES: -+ row.operational_state = states.START # pyright: ignore -+ # session.commit() is automatically called by context manager -+ except SQLAlchemyError as e: -+ logger.error("SQLAlchemy Error: %s", e) - -- num = session.query(VerfierMain.agent_id).count() -- if num > 0: -- agent_ids = session.query(VerfierMain.agent_id).all() -- logger.info("Agent ids in db loaded from file: %s", agent_ids) -+ num = session.query(VerfierMain.agent_id).count() -+ if num > 0: -+ agent_ids = session.query(VerfierMain.agent_id).all() -+ logger.info("Agent ids in db loaded from file: %s", agent_ids) - - logger.info("Starting Cloud Verifier (tornado) on port %s, use to stop", verifier_port) - -diff --git a/keylime/da/examples/sqldb.py b/keylime/da/examples/sqldb.py -index 8efc84e..04a8afb 100644 ---- a/keylime/da/examples/sqldb.py -+++ b/keylime/da/examples/sqldb.py -@@ -1,7 +1,10 @@ - import time -+from contextlib import contextmanager -+from typing import Iterator - - import sqlalchemy - import sqlalchemy.ext.declarative -+from sqlalchemy.orm import sessionmaker - - from keylime import keylime_logging - from keylime.da.record import BaseRecordManagement, base_build_key_list -@@ -45,23 +48,23 @@ class RecordManagement(BaseRecordManagement): - BaseRecordManagement.__init__(self, service) - - self.engine = sqlalchemy.create_engine(self.ps_url._replace(fragment="").geturl(), pool_recycle=1800) -- sm = sqlalchemy.orm.sessionmaker() -- self.session = sqlalchemy.orm.scoped_session(sm) -- self.session.configure(bind=self.engine) -- TableBase.metadata.create_all(self.engine) -- -- def agent_list_retrieval(self, record_prefix="auto", service="auto"): -- if record_prefix == "auto": -- record_prefix = "" -- -- agent_list = [] -+ self.SessionLocal = sessionmaker(bind=self.engine) - -- recordtype = self.get_record_type(service) -- tbl = type2table(recordtype) -- for agentid in self.session.query(tbl.agentid).distinct(): # pylint: disable=no-member -- agent_list.append(agentid[0]) -+ # Create tables if they don't exist -+ TableBase.metadata.create_all(self.engine) - -- return agent_list -+ @contextmanager -+ def session_context(self) -> Iterator: -+ """Context manager for database sessions that ensures proper cleanup.""" -+ session = self.SessionLocal() -+ try: -+ yield session -+ session.commit() -+ except Exception: -+ session.rollback() -+ raise -+ finally: -+ session.close() - - def record_create( - self, -@@ -84,8 +87,9 @@ class RecordManagement(BaseRecordManagement): - d = {"time": recordtime, "agentid": agentid, "record": rcrd} - - try: -- self.session.add((type2table(recordtype))(**d)) # pylint: disable=no-member -- self.session.commit() # pylint: disable=no-member -+ with self.session_context() as session: -+ session.add((type2table(recordtype))(**d)) -+ # session.commit() is automatically called by context manager - except Exception as e: - logger.error("Failed to create attestation record: %s", e) - -@@ -106,23 +110,23 @@ class RecordManagement(BaseRecordManagement): - if f"{end_date}" == "auto": - end_date = self.end_of_times - -- if self.only_last_record_wanted(start_date, end_date): -- attestion_record_rows = ( -- self.session.query(tbl) # pylint: disable=no-member -- .filter(tbl.agentid == record_identifier) -- .order_by(sqlalchemy.desc(tbl.time)) -- .limit(1) -- ) -- -- else: -- attestion_record_rows = self.session.query(tbl).filter( # pylint: disable=no-member -- tbl.agentid == record_identifier -- ) -- -- for row in attestion_record_rows: -- decoded_record_object = self.record_deserialize(row.record) -- self.record_signature_check(decoded_record_object, record_identifier) -- record_list.append(decoded_record_object) -+ with self.session_context() as session: -+ if self.only_last_record_wanted(start_date, end_date): -+ attestion_record_rows = ( -+ session.query(tbl) -+ .filter(tbl.agentid == record_identifier) -+ .order_by(sqlalchemy.desc(tbl.time)) -+ .limit(1) -+ ) -+ -+ else: -+ attestion_record_rows = session.query(tbl).filter(tbl.agentid == record_identifier) -+ -+ for row in attestion_record_rows: -+ decoded_record_object = self.record_deserialize(row.record) -+ self.record_signature_check(decoded_record_object, record_identifier) -+ record_list.append(decoded_record_object) -+ - return record_list - - def build_key_list(self, agent_identifier, service="auto"): -diff --git a/keylime/db/keylime_db.py b/keylime/db/keylime_db.py -index 5620a28..aa49e51 100644 ---- a/keylime/db/keylime_db.py -+++ b/keylime/db/keylime_db.py -@@ -1,7 +1,8 @@ - import os - from configparser import NoOptionError -+from contextlib import contextmanager - from sqlite3 import Connection as SQLite3Connection --from typing import Any, Dict, Optional, cast -+from typing import Any, Iterator, Optional, cast - - from sqlalchemy import create_engine, event - from sqlalchemy.engine import Engine -@@ -22,90 +23,108 @@ def _set_sqlite_pragma(dbapi_connection: SQLite3Connection, _) -> None: - cursor.close() - - --class DBEngineManager: -- service: Optional[str] -- -- def __init__(self) -> None: -- self.service = None -- -- def make_engine(self, service: str) -> Engine: -- """ -- To use: engine = self.make_engine('cloud_verifier') -- """ -- -- # Keep DB related stuff as it is, but read configuration from new -- # configs -- if service == "cloud_verifier": -- config_service = "verifier" -- else: -- config_service = service -- -- self.service = service -- -- try: -- p_sz_m_ovfl = config.get(config_service, "database_pool_sz_ovfl") -- p_sz, m_ovfl = p_sz_m_ovfl.split(",") -- except NoOptionError: -- p_sz = "5" -- m_ovfl = "10" -- -- engine_args: Dict[str, Any] = {} -- -- url = config.get(config_service, "database_url") -- if url: -- logger.info("database_url is set, using it to establish database connection") -- -- # If the keyword sqlite is provided as the database url, use the -- # cv_data.sqlite for the verifier or the file reg_data.sqlite for -- # the registrar, located at the config.WORK_DIR directory -- if url == "sqlite": -+def make_engine(service: str, **engine_args: Any) -> Engine: -+ """Create a database engine for a keylime service.""" -+ # Keep DB related stuff as it is, but read configuration from new -+ # configs -+ if service == "cloud_verifier": -+ config_service = "verifier" -+ else: -+ config_service = service -+ -+ url = config.get(config_service, "database_url") -+ if url: -+ logger.info("database_url is set, using it to establish database connection") -+ -+ # If the keyword sqlite is provided as the database url, use the -+ # cv_data.sqlite for the verifier or the file reg_data.sqlite for -+ # the registrar, located at the config.WORK_DIR directory -+ if url == "sqlite": -+ logger.info( -+ "database_url is set as 'sqlite' keyword, using default values to establish database connection" -+ ) -+ if service == "cloud_verifier": -+ database = "cv_data.sqlite" -+ elif service == "registrar": -+ database = "reg_data.sqlite" -+ else: -+ logger.error("Tried to setup database access for unknown service '%s'", service) -+ raise Exception(f"Unknown service '{service}' for database setup") -+ -+ database_file = os.path.abspath(os.path.join(config.WORK_DIR, database)) -+ url = f"sqlite:///{database_file}" -+ -+ kl_dir = os.path.dirname(os.path.abspath(database_file)) -+ if not os.path.exists(kl_dir): -+ os.makedirs(kl_dir, 0o700) -+ -+ engine_args["connect_args"] = {"check_same_thread": False} -+ -+ if not url.count("sqlite:"): -+ # sqlite does not support setting pool size and max overflow, only -+ # read from the config when it is going to be used -+ try: -+ p_sz_m_ovfl = config.get(config_service, "database_pool_sz_ovfl") -+ p_sz, m_ovfl = p_sz_m_ovfl.split(",") -+ logger.info("database_pool_sz_ovfl is set, pool size = %s, max overflow = %s", p_sz, m_ovfl) -+ except NoOptionError: -+ p_sz = "5" -+ m_ovfl = "10" - logger.info( -- "database_url is set as 'sqlite' keyword, using default values to establish database connection" -+ "database_pool_sz_ovfl is not set, using default pool size = %s, max overflow = %s", p_sz, m_ovfl - ) -- if service == "cloud_verifier": -- database = "cv_data.sqlite" -- elif service == "registrar": -- database = "reg_data.sqlite" -- else: -- logger.error("Tried to setup database access for unknown service '%s'", service) -- raise Exception(f"Unknown service '{service}' for database setup") -- -- database_file = os.path.abspath(os.path.join(config.WORK_DIR, database)) -- url = f"sqlite:///{database_file}" -- -- kl_dir = os.path.dirname(os.path.abspath(database_file)) -- if not os.path.exists(kl_dir): -- os.makedirs(kl_dir, 0o700) -- -- engine_args["connect_args"] = {"check_same_thread": False} - -- if not url.count("sqlite:"): -- engine_args["pool_size"] = int(p_sz) -- engine_args["max_overflow"] = int(m_ovfl) -- engine_args["pool_pre_ping"] = True -+ engine_args["pool_size"] = int(p_sz) -+ engine_args["max_overflow"] = int(m_ovfl) -+ engine_args["pool_pre_ping"] = True - -- # Enable DB debugging -- if config.DEBUG_DB and config.INSECURE_DEBUG: -- engine_args["echo"] = True -+ # Enable DB debugging -+ if config.DEBUG_DB and config.INSECURE_DEBUG: -+ engine_args["echo"] = True - -- engine = create_engine(url, **engine_args) -- return engine -+ engine = create_engine(url, **engine_args) -+ return engine - - - class SessionManager: - engine: Optional[Engine] -+ _scoped_session: Optional[scoped_session] - - def __init__(self) -> None: - self.engine = None -+ self._scoped_session = None - - def make_session(self, engine: Engine) -> Session: - """ - To use: session = self.make_session(engine) - """ - self.engine = engine -- my_session = scoped_session(sessionmaker()) -+ if self._scoped_session is None: -+ self._scoped_session = scoped_session(sessionmaker()) - try: -- my_session.configure(bind=self.engine) # type: ignore -+ self._scoped_session.configure(bind=self.engine) # type: ignore -+ self._scoped_session.configure(expire_on_commit=False) # type: ignore - except SQLAlchemyError as err: - logger.error("Error creating SQL session manager %s", err) -- return cast(Session, my_session()) -+ return cast(Session, self._scoped_session()) -+ -+ @contextmanager -+ def session_context(self, engine: Engine) -> Iterator[Session]: -+ """ -+ Context manager for database sessions that ensures proper cleanup. -+ To use: -+ with session_manager.session_context(engine) as session: -+ # use session -+ """ -+ session = self.make_session(engine) -+ try: -+ yield session -+ session.commit() -+ except Exception: -+ session.rollback() -+ raise -+ finally: -+ # Important: remove the session from the scoped session registry -+ # to prevent connection leaks with scoped_session -+ if self._scoped_session is not None: -+ self._scoped_session.remove() # type: ignore[no-untyped-call] -diff --git a/keylime/migrations/env.py b/keylime/migrations/env.py -index ac98349..a1881f2 100644 ---- a/keylime/migrations/env.py -+++ b/keylime/migrations/env.py -@@ -8,7 +8,7 @@ import sys - - from alembic import context - --from keylime.db.keylime_db import DBEngineManager -+from keylime.db.keylime_db import make_engine - from keylime.db.registrar_db import Base as RegistrarBase - from keylime.db.verifier_db import Base as VerifierBase - -@@ -74,7 +74,7 @@ def run_migrations_offline(): - logger.info("Writing output to %s", file_) - - with open(file_, "w", encoding="utf-8") as buffer: -- engine = DBEngineManager().make_engine(name) -+ engine = make_engine(name) - connection = engine.connect() - context.configure( - connection=connection, -@@ -102,7 +102,7 @@ def run_migrations_online(): - engines = {} - for name in re.split(r",\s*", db_names): - engines[name] = rec = {} -- rec["engine"] = DBEngineManager().make_engine(name) -+ rec["engine"] = make_engine(name) - - for name, rec in engines.items(): - engine = rec["engine"] -diff --git a/keylime/models/base/db.py b/keylime/models/base/db.py -index dd47d63..0229765 100644 ---- a/keylime/models/base/db.py -+++ b/keylime/models/base/db.py -@@ -41,13 +41,6 @@ class DBManager: - - self._service = service - -- try: -- p_sz_m_ovfl = config.get(config_service, "database_pool_sz_ovfl") -- p_sz, m_ovfl = p_sz_m_ovfl.split(",") -- except NoOptionError: -- p_sz = "5" -- m_ovfl = "10" -- - engine_args: Dict[str, Any] = {} - - url = config.get(config_service, "database_url") -@@ -79,6 +72,21 @@ class DBManager: - engine_args["connect_args"] = {"check_same_thread": False} - - if not url.count("sqlite:"): -+ # sqlite does not support setting pool size and max overflow, only -+ # read from the config when it is going to be used -+ try: -+ p_sz_m_ovfl = config.get(config_service, "database_pool_sz_ovfl") -+ p_sz, m_ovfl = p_sz_m_ovfl.split(",") -+ logger.info("database_pool_sz_ovfl is set, pool size = %s, max overflow = %s", p_sz, m_ovfl) -+ except NoOptionError: -+ p_sz = "5" -+ m_ovfl = "10" -+ logger.info( -+ "database_pool_sz_ovfl is not set, using default pool size = %s, max overflow = %s", -+ p_sz, -+ m_ovfl, -+ ) -+ - engine_args["pool_size"] = int(p_sz) - engine_args["max_overflow"] = int(m_ovfl) - engine_args["pool_pre_ping"] = True -diff --git a/keylime/models/base/persistable_model.py b/keylime/models/base/persistable_model.py -index 18f7d0d..a779f0b 100644 ---- a/keylime/models/base/persistable_model.py -+++ b/keylime/models/base/persistable_model.py -@@ -207,10 +207,16 @@ class PersistableModel(BasicModel, metaclass=PersistableModelMeta): - setattr(self._db_mapping_inst, name, field.data_type.db_dump(value, db_manager.engine.dialect)) - - with db_manager.session_context() as session: -- session.add(self._db_mapping_inst) -+ # Merge the potentially detached object into the new session -+ merged_instance = session.merge(self._db_mapping_inst) -+ session.add(merged_instance) -+ # Update our reference to the merged instance -+ self._db_mapping_inst = merged_instance # pylint: disable=attribute-defined-outside-init - - self.clear_changes() - - def delete(self) -> None: - with db_manager.session_context() as session: -- session.delete(self._db_mapping_inst) # type: ignore[no-untyped-call] -+ # Merge the potentially detached object into the new session before deleting -+ merged_instance = session.merge(self._db_mapping_inst) -+ session.delete(merged_instance) # type: ignore[no-untyped-call] -diff --git a/packit-ci.fmf b/packit-ci.fmf -index 2d1e5e5..cb64faf 100644 ---- a/packit-ci.fmf -+++ b/packit-ci.fmf -@@ -101,6 +101,7 @@ adjust: - - /regression/CVE-2023-3674 - - /regression/issue-1380-agent-removed-and-re-added - - /regression/keylime-agent-option-override-through-envvar -+ - /regression/db-connection-leak-reproducer - - /sanity/keylime-secure_mount - - /sanity/opened-conf-files - - /upstream/run_keylime_tests -diff --git a/test/test_verifier_db.py b/test/test_verifier_db.py -index ad72fa6..aae8f8a 100644 ---- a/test/test_verifier_db.py -+++ b/test/test_verifier_db.py -@@ -172,3 +172,102 @@ class TestVerfierDB(unittest.TestCase): - - def tearDown(self): - self.session.close() -+ -+ def test_11_relationship_access_after_session_commit(self): -+ """Test that relationships can be accessed after session commits (DetachedInstanceError fix)""" -+ # This test reproduces the problematic pattern from cloud_verifier_tornado.py -+ # where objects are loaded with joinedload and then accessed after session closes -+ -+ # Create a new session manager and context (like in cloud_verifier_tornado.py) -+ session_manager = SessionManager() -+ -+ # First, load the agent with eager loading for relationships -+ stored_agent = None -+ with session_manager.session_context(self.engine) as session: -+ stored_agent = ( -+ session.query(VerfierMain) -+ .options(joinedload(VerfierMain.ima_policy)) -+ .options(joinedload(VerfierMain.mb_policy)) -+ .filter_by(agent_id=agent_id) -+ .first() -+ ) -+ # Verify agent was loaded correctly -+ self.assertIsNotNone(stored_agent) -+ # session.commit() is automatically called by context manager when exiting -+ -+ # Now verify we can access relationships AFTER the session has been closed -+ # This would previously trigger DetachedInstanceError -+ -+ # Ensure stored_agent is not None before proceeding -+ assert stored_agent is not None -+ -+ # Test accessing ima_policy relationship -+ self.assertIsNotNone(stored_agent.ima_policy) -+ assert stored_agent.ima_policy is not None # Type narrowing for linter -+ self.assertEqual(stored_agent.ima_policy.name, "test-allowlist") -+ # checksum is not set in test data -+ self.assertEqual(stored_agent.ima_policy.checksum, None) -+ -+ # Test accessing the ima_policy.ima_policy attribute (similar to verifier_read_policy_from_cache) -+ ima_policy_content = stored_agent.ima_policy.ima_policy -+ self.assertEqual(ima_policy_content, test_allowlist_data["ima_policy"]) -+ -+ # Test accessing mb_policy relationship -+ self.assertIsNotNone(stored_agent.mb_policy) -+ assert stored_agent.mb_policy is not None # Type narrowing for linter -+ self.assertEqual(stored_agent.mb_policy.name, "test-mbpolicy") -+ -+ # Test accessing the mb_policy.mb_policy attribute (similar to process_agent function) -+ mb_policy_content = stored_agent.mb_policy.mb_policy -+ self.assertEqual(mb_policy_content, test_mbpolicy_data["mb_policy"]) -+ -+ # Test that we can access these relationships multiple times without issues -+ for _ in range(3): -+ self.assertIsNotNone(stored_agent.ima_policy.ima_policy) -+ self.assertIsNotNone(stored_agent.mb_policy.mb_policy) -+ -+ def test_12_persistable_model_cross_session_fix(self): -+ """Test that PersistableModel can handle cross-session operations safely""" -+ # This test would previously fail with DetachedInstanceError before the fix -+ # Note: This is a conceptual test since we don't have actual PersistableModel -+ # subclasses in the test environment, but demonstrates the pattern -+ -+ # Simulate creating a SQLAlchemy object in one session -+ session_manager = SessionManager() -+ -+ # Load an object in one session context -+ test_agent = None -+ with session_manager.session_context(self.engine) as session: -+ test_agent = session.query(VerfierMain).filter_by(agent_id=agent_id).first() -+ self.assertIsNotNone(test_agent) -+ # Session closes here -+ -+ # Ensure test_agent is not None before proceeding -+ assert test_agent is not None -+ -+ # Now simulate using this object in a different session context -+ # This tests the pattern where PersistableModel would use session.add() or session.delete() -+ # on a cross-session object -+ with session_manager.session_context(self.engine) as session: -+ # Before the fix, this would cause DetachedInstanceError -+ # The fix uses session.merge() to handle detached objects safely -+ merged_agent = session.merge(test_agent) -+ assert merged_agent is not None # Type narrowing for linter -+ -+ # Test that we can modify and save the merged object -+ original_port = merged_agent.port -+ # Use setattr to avoid linter issues with Column assignment -+ setattr(merged_agent, "port", 9999) -+ session.add(merged_agent) -+ # session.commit() called automatically by context manager -+ -+ # Verify the change was persisted -+ with session_manager.session_context(self.engine) as session: -+ updated_agent = session.query(VerfierMain).filter_by(agent_id=agent_id).first() -+ assert updated_agent is not None # Type narrowing for linter -+ self.assertEqual(updated_agent.port, 9999) -+ -+ # Restore original value -+ # Use setattr to avoid linter issues -+ setattr(updated_agent, "port", original_port) -+ session.add(updated_agent) diff --git a/keylime.spec b/keylime.spec index f9b53a0..d72e29a 100644 --- a/keylime.spec +++ b/keylime.spec @@ -4,7 +4,7 @@ ## END: Set by rpmautospec %global srcname keylime -%global policy_version 42.1.2 +%global policy_version 43.2.1 # Package is actually noarch, but it has an optional dependency that is # arch-specific. @@ -13,8 +13,8 @@ %global selinuxtype targeted Name: keylime -Version: 7.12.1 -Release: 11%{?dist}.4 +Version: 7.14.1 +Release: 5%{?dist} Summary: Open source TPM software for Bootstrapping and Maintaining Trust URL: https://github.com/keylime/keylime @@ -24,44 +24,40 @@ Source1: https://github.com/RedHat-SP-Security/%{name}-selinux/archive/v% Source2: %{srcname}.sysusers Source3: %{srcname}.tmpfiles -# Backported from https://github.com/keylime/keylime/pull/1782 -# Fixes DB connections leaks (https://issues.redhat.com/browse/RHEL-102995) -Patch: keylime-fix-db-connection-leaks.patch +Patch: 0001-Fix-timestamp-conversion-to-use-UTC-timezone.patch +Patch: 0002-Fix-efivar-availability-check-in-test_create_mb_poli.patch +Patch: 0003-Close-DB-sessions-to-prevent-connection-exhaustion.patch +Patch: 0004-Include-thread-safe-session-management.patch +Patch: 0005-Address-some-improvements-from-code-review.patch +Patch: 0006-Fix-race-condition-on-in-SessionManager.patch +Patch: 0007-Fix-linter-errors-in-PersistableModel.get-and-.all.patch +Patch: 0008-refactor-Remove-dead-code-AuthSession.authenticate_a.patch +Patch: 0009-db-Clean-up-scoped-session-after-each-request.patch +Patch: 0010-fix-Check-active-flag-in-_extract_identity-and-guard.patch +Patch: 0011-fix-Add-fork-safety-to-DBManager-via-dispose.patch -# Backported from https://github.com/keylime/keylime/pull/1791 -Patch: 0002-mb-support-EV_EFI_HANDOFF_TABLES-events-on-PCR1.patch -Patch: 0003-mb-support-vendor_db-as-logged-by-newer-shim-version.patch +# RHEL-154295 - memleaks in verifier push-mode. +# Backport https://github.com/keylime/keylime/pull/1866 +Patch: 0012-fix-mem-leak-remove-unbounded-functools.cache-from-l.patch -# Backported from https://github.com/keylime/keylime/pull/1784 -# and https://github.com/keylime/keylime/pull/1785 -Patch: 0004-verifier-Gracefully-shutdown-on-signal.patch -Patch: 0005-revocations-Try-to-send-notifications-on-shutdown.patch -Patch: 0006-requests_client-close-the-session-at-the-end-of-the-.patch +# RHEL-153121 - fix verifier race condition on agent delete. +# Backport https://github.com/keylime/keylime/pulls/1874 +Patch: 0013-fix-verifier-race-condition-on-agent-delete.patch -# Backported from https://github.com/keylime/keylime/pull/1736, -# https://github.com/keylime/keylime/commit/11c6b7f and -# https://github.com/keylime/keylime/commit/dd63459 -Patch: 0007-tests-change-test_mba_parsing-to-not-need-keylime-in.patch -Patch: 0008-tests-skip-measured-boot-related-tests-for-s390x-and.patch -Patch: 0009-tests-fix-rpm-repo-tests-from-create-runtime-policy.patch - -# Backported from https://github.com/keylime/keylime/pull/1793 -Patch: 0010-mba-normalize-vendor_db-in-EV_EFI_VARIABLE_AUTHORITY.patch - -# Backported from https://github.com/keylime/keylime/pull/1794 -Patch: 0011-fix-malformed-certs-workaround.patch -# Backported from https://github.com/keylime/keylime/pull/1795 -Patch: 0012-keylime-policy-avoid-opening-dev-stdout.patch - -# CVE-2025-13609 -# Backports from: -# - https://github.com/keylime/keylime/pull/1817/commits/1024e19d -# - https://github.com/keylime/keylime/pull/1825 -Patch: 0013-Add-shared-memory-infrastructure-for-multiprocess-co.patch -Patch: 0014-Fix-registrar-duplicate-UUID-vulnerability.patch - -# CVE-2026-1709 -Patch: 0015-CVE-2026-1709.patch +# RHEL-151493 - verifier graceful shutdown. +# Backport: +# - https://github.com/keylime/keylime/pull/1809 +# - https://github.com/keylime/keylime/pull/1868 +# - https://github.com/keylime/keylime/pull/1855 +# - https://github.com/keylime/keylime/pull/1869 +# - https://github.com/keylime/keylime/pull/1883 +# - https://github.com/keylime/keylime/pull/1886 +Patch: 0014-push-attestation-documentation.patch +Patch: 0015-remove-enable-authentication-config-option.patch +Patch: 0016-docs-push-attestation-config-tables.patch +Patch: 0017-verifier-graceful-shutdown.patch +Patch: 0018-ignore-sigterm-sigint-manager-parent-processes.patch +Patch: 0019-move-socket-var-run.patch # Main program: Apache-2.0 # Icons: MIT @@ -74,13 +70,16 @@ BuildRequires: python3-devel BuildRequires: python3-dbus BuildRequires: python3-jinja2 BuildRequires: python3-cryptography +BuildRequires: python3-docutils BuildRequires: python3-gpg BuildRequires: python3-pyasn1 BuildRequires: python3-pyasn1-modules +BuildRequires: python3-requests BuildRequires: python3-tornado BuildRequires: python3-sqlalchemy BuildRequires: python3-lark BuildRequires: python3-psutil +BuildRequires: python3-pytest BuildRequires: python3-pyyaml BuildRequires: python3-jsonschema BuildRequires: python3-setuptools @@ -256,6 +255,12 @@ bzip2 -9 %{srcname}.pp %build %py3_build +mkdir -p manpages +rst2man --syntax-highlight=none docs/man/keylime_tenant.1.rst manpages/keylime_tenant.1 +rst2man --syntax-highlight=none docs/man/keylime-policy.1.rst manpages/keylime-policy.1 +rst2man --syntax-highlight=none docs/man/keylime_registrar.8.rst manpages/keylime_registrar.8 +rst2man --syntax-highlight=none docs/man/keylime_verifier.8.rst manpages/keylime_verifier.8 + %install %py3_install mkdir -p %{buildroot}/%{_sharedstatedir}/%{srcname} @@ -277,8 +282,10 @@ done # Ship the ek-openssl-verify script. mkdir -p %{buildroot}/%{_datadir}/%{srcname}/scripts -install -Dpm 755 scripts/ek-openssl-verify \ - %{buildroot}/%{_datadir}/%{srcname}/scripts/ek-openssl-verify +for s in ek-openssl-verify keylime_oneshot_attestation; do + install -Dpm 755 scripts/"${s}" \ + %{buildroot}/%{_datadir}/%{srcname}/scripts/"${s}" +done # Ship configuration templates. cp -r ./templates %{buildroot}%{_datadir}/%{srcname}/templates/ @@ -308,6 +315,14 @@ done install -p -D -m 0644 %{SOURCE2} %{buildroot}/%{_sysusersdir}/%{srcname}.conf install -p -D -m 0644 %{SOURCE3} %{buildroot}/%{_tmpfilesdir}/%{name}.conf +# Install manpages +install -d %{buildroot}%{_mandir}/man1 +install -d %{buildroot}%{_mandir}/man8 +install -m 644 manpages/keylime_tenant.1 %{buildroot}%{_mandir}/man1/ +install -m 644 manpages/keylime-policy.1 %{buildroot}%{_mandir}/man1/ +install -m 644 manpages/keylime_registrar.8 %{buildroot}%{_mandir}/man8/ +install -m 644 manpages/keylime_verifier.8 %{buildroot}%{_mandir}/man8/ + %check # Create the default configuration files to be used by the tests. # Also set the associated environment variables so that the tests @@ -322,7 +337,7 @@ export KEYLIME_CA_CONFIG="${CONF_TEMP_DIR}/ca.conf" export KEYLIME_LOGGING_CONFIG="${CONF_TEMP_DIR}/logging.conf" # Run the tests. -%{python3} -m unittest +%pytest # Cleanup. [ "${CONF_TEMP_DIR}" ] && rm -rf "${CONF_TEMP_DIR}" @@ -423,6 +438,7 @@ fi %{_bindir}/%{srcname}_verifier %{_bindir}/%{srcname}_ca %{_unitdir}/keylime_verifier.service +%{_mandir}/man8/keylime_verifier.8* %files registrar %license LICENSE @@ -430,6 +446,7 @@ fi %config(noreplace) %verify(not md5 size mode mtime) %attr(400,%{srcname},%{srcname}) %{_sysconfdir}/%{srcname}/registrar.conf %{_bindir}/%{srcname}_registrar %{_unitdir}/keylime_registrar.service +%{_mandir}/man8/keylime_registrar.8* %if 0%{?with_selinux} %files selinux @@ -443,6 +460,7 @@ fi %attr(500,%{srcname},%{srcname}) %dir %{_sysconfdir}/%{srcname}/tenant.conf.d %config(noreplace) %verify(not md5 size mode mtime) %attr(400,%{srcname},%{srcname}) %{_sysconfdir}/%{srcname}/tenant.conf %{_bindir}/%{srcname}_tenant +%{_mandir}/man1/keylime_tenant.1* %files -n python3-%{srcname} %license LICENSE @@ -450,6 +468,7 @@ fi %{python3_sitelib}/%{srcname} %{_bindir}/keylime_attest %{_bindir}/keylime-policy +%{_mandir}/man1/keylime-policy.1* %files tools @@ -465,13 +484,14 @@ fi %config(noreplace) %verify(not md5 size mode mtime) %attr(400,%{srcname},%{srcname}) %{_sysconfdir}/%{srcname}/logging.conf %attr(700,%{srcname},%{srcname}) %dir %{_rundir}/%{srcname} %attr(700,%{srcname},%{srcname}) %dir %{_sharedstatedir}/%{srcname} -%attr(500,%{srcname},%{srcname}) %dir %{_datadir}/%{srcname}/tpm_cert_store -%attr(400,%{srcname},%{srcname}) %{_datadir}/%{srcname}/tpm_cert_store/*.pem +%attr(755,root,root) %dir %{_datadir}/%{srcname}/tpm_cert_store +%attr(644,root,root) %{_datadir}/%{srcname}/tpm_cert_store/*.pem %attr(500,%{srcname},%{srcname}) %dir %{_sharedstatedir}/%{srcname}/tpm_cert_store %attr(400,%{srcname},%{srcname}) %{_sharedstatedir}/%{srcname}/tpm_cert_store/*.pem %{_tmpfilesdir}/%{srcname}.conf %{_sysusersdir}/%{srcname}.conf %{_datadir}/%{srcname}/scripts/ek-openssl-verify +%{_datadir}/%{srcname}/scripts/keylime_oneshot_attestation %{_datadir}/%{srcname}/templates %{_bindir}/keylime_upgrade_config @@ -480,11 +500,31 @@ fi %changelog ## START: Generated by rpmautospec -* Tue Feb 03 2026 Anderson Toshiyuki Sasaki - 7.12.1-16 -- CVE-2026-1709: Registrar authentication bypass +* Fri Apr 17 2026 Anderson Toshiyuki Sasaki - 7.14.1-5 +- Implement verifier graceful shutdown -* Thu Dec 11 2025 Sergio Correia - 7.12.1-15 -- Registrar allows identity takeover via duplicate UUID registration +* Tue Apr 14 2026 Sergio Arroutbi - 7.14.1-4 +- Fix verifier race condition on agent delete + +* Wed Apr 01 2026 Sergio Correia - 7.14.1-3 +- Remove unbounded functools.cache from latest_attestation + +* Mon Mar 23 2026 Sergio Arroutbi - 7.14.1-2 +- Add patches to fix DB connection leaks + +* Fri Feb 13 2026 Sergio Correia - 7.14.1-1 +- Updating for Keylime release v7.14.1 + +* Mon Feb 02 2026 Sergio Correia - 7.12.1-17 +- Change ownership of /usr/share/keylime/tpm_cert_store to root + +* Wed Oct 15 2025 Marek Safarik - 7.12.1-16 +- Added manpages for keylime services and the tenant +- Added support for ECC attestation +- Fixed man page RST formatting for rst2man compatibility + +* Mon Oct 06 2025 Sergio Correia - 7.12.1-15 +- Add support for ECC attestation * Mon Sep 15 2025 Anderson Toshiyuki Sasaki - 7.12.1-14 - Properly fix malformed TPM certificates workaround diff --git a/sources b/sources index be30057..bdfcc2e 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ -SHA512 (keylime-selinux-42.1.2.tar.gz) = cb7b7b10d1d81af628a7ffdadc1be5af6d75851a44f58cff04edc575cbba1613447e56bfa1fb86660ec7c15e5fcf16ba51f2984094550ba3e08f8095b800b741 -SHA512 (v7.12.1.tar.gz) = c1297ebfc659102d73283255cfda4a977dfbff9bdd3748e05de405dadb70f752ad39aa5848edda9143d8ec620d07c21f1551fa4a914c99397620ab1682e58458 +SHA512 (keylime-selinux-43.2.1.tar.gz) = 8cb8b032819d3b87e1dceaa7094385b4468c0d6be1e5dfc6d8b6758e6281def5255120ff34d71b5d4bc7fe9b9e960f1a98011e5bf7149df5704d0bbf6afbfad3 +SHA512 (v7.14.1.tar.gz) = d94cd1e25ec31e43fea05d0c404dd25c05b6b28435db2f8ca34546f6ff8bfd5da12d2dcd3b5cf4772c44688ae8968468dc2470da23596714e7615dbf6dfbe841