From eca2d398c11e752c7ec3401b971296496de8b192 Mon Sep 17 00:00:00 2001 From: Anderson Toshiyuki Sasaki Date: Thu, 9 Apr 2026 17:42:26 +0200 Subject: [PATCH] Implement verifier graceful shutdown The included patches implement graceful shutdown for both pull and push models, cancelling pending operations, and waiting for critical in-flight operations to finish before shutting down. Backport the following upstream PRs: - https://github.com/keylime/keylime/pull/1809 - Document supported configuration options - Sync missing and removed options from configuration templates - https://github.com/keylime/keylime/pull/1868 - Remove 'enable_authentication' from agent config templates - https://github.com/keylime/keylime/pull/1855 - Add push-model documentation - https://github.com/keylime/keylime/pull/1869 - Add verifier graceful shutdown - https://github.com/keylime/keylime/pull/1883 - Ignore SIGTERM and SIGINT signals on Manager and parent processes - https://github.com/keylime/keylime/pull/1886 - Move socket from /tmp to /var/run/keylime Also, update the keylime-selinux to the latest release (43.2.1) to include the following changes: - https://github.com/RedHat-SP-Security/keylime-selinux/pull/33 - Allow Keylime to perform socket operation on /var/run/keylime - https://github.com/RedHat-SP-Security/keylime-selinux/pull/34 - Allow Keylime to read /proc/net to populate certificates Subject Alternative Names (SAN) Documentation updates and configuration template updates were included to allow the graceful shutdown patch to apply cleanly. This also modifies the test runner to use pytest, adding python3-pytest to the BuildRequires. This was necessary to make the fixtures created in conftest.py to be used, which is not available when running with unittest. Resolves: RHEL-151493 Resolves: RHEL-151408 Signed-off-by: Anderson Toshiyuki Sasaki --- .gitignore | 1 + 0014-push-attestation-documentation.patch | 1910 +++++++++++++ ...-enable-authentication-config-option.patch | 46 + ...-docs-push-attestation-config-tables.patch | 1164 ++++++++ 0017-verifier-graceful-shutdown.patch | 2373 +++++++++++++++++ ...term-sigint-manager-parent-processes.patch | 151 ++ 0019-move-socket-var-run.patch | 348 +++ keylime.spec | 22 +- sources | 2 +- 9 files changed, 6013 insertions(+), 4 deletions(-) create mode 100644 0014-push-attestation-documentation.patch create mode 100644 0015-remove-enable-authentication-config-option.patch create mode 100644 0016-docs-push-attestation-config-tables.patch create mode 100644 0017-verifier-graceful-shutdown.patch create mode 100644 0018-ignore-sigterm-sigint-manager-parent-processes.patch create mode 100644 0019-move-socket-var-run.patch diff --git a/.gitignore b/.gitignore index a9504a0..dc40567 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,4 @@ /keylime-selinux-42.1.2.tar.gz /v7.14.1.tar.gz /keylime-selinux-43.1.1.tar.gz +/keylime-selinux-43.2.1.tar.gz diff --git a/0014-push-attestation-documentation.patch b/0014-push-attestation-documentation.patch new file mode 100644 index 0000000..ae9bf4b --- /dev/null +++ b/0014-push-attestation-documentation.patch @@ -0,0 +1,1910 @@ +From 077762aa335de0cf99e190bd5afb5b77f5403a89 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Tue, 17 Feb 2026 16:43:04 +0100 +Subject: [PATCH] Document agent-driven (push) attestation + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + docs/assets/push-model-architecture.svg | 86 ++++ + docs/assets/push-model-sequence.svg | 122 +++++ + docs/conf.py | 1 + + docs/design.rst | 1 + + docs/design/overview.rst | 11 +- + docs/design/push_model.rst | 226 +++++++++ + docs/index.rst | 1 + + docs/installation.rst | 11 + + docs/man/keylime_push_model_agent.8.rst | 226 +++++++++ + docs/man/keylime_verifier.8.rst | 3 +- + docs/rest_apis.rst | 30 ++ + docs/rest_apis/3_0/3_0.rst | 21 + + docs/rest_apis/3_0/verifier.rst | 608 ++++++++++++++++++++++++ + docs/user_guide.rst | 1 + + docs/user_guide/configuration.rst | 7 + + docs/user_guide/push_model.rst | 370 ++++++++++++++ + 16 files changed, 1721 insertions(+), 4 deletions(-) + create mode 100644 docs/assets/push-model-architecture.svg + create mode 100644 docs/assets/push-model-sequence.svg + create mode 100644 docs/design/push_model.rst + create mode 100644 docs/man/keylime_push_model_agent.8.rst + create mode 100644 docs/rest_apis/3_0/3_0.rst + create mode 100644 docs/rest_apis/3_0/verifier.rst + create mode 100644 docs/user_guide/push_model.rst + +diff --git a/docs/assets/push-model-architecture.svg b/docs/assets/push-model-architecture.svg +new file mode 100644 +index 000000000..82a5672f4 +--- /dev/null ++++ b/docs/assets/push-model-architecture.svg +@@ -0,0 +1,86 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ Push-Model Architecture ++ ++ ++ Pull Model (traditional) ++ ++ ++ ++ Agent ++ (server, port 9002) ++ ++ ++ ++ Registrar ++ ++ ++ ++ Verifier ++ ++ ++ ++ register ++ ++ ++ ++ poll quotes ++ ++ ++ ++ ++ ++ Push Model (new) ++ ++ ++ ++ Agent ++ (client, no ports) ++ ++ ++ ++ Registrar ++ ++ ++ ++ Verifier ++ ++ ++ ++ register ++ ++ ++ ++ push evidence ++ ++ ++ ++ Protocol Flow (Push Model) ++ ++ 1. Agent registers with Registrar (same as pull model) ++ 2. Agent authenticates with Verifier via PoP (POST /v3/sessions) ++ 3. Agent sends capabilities to Verifier (POST /v3/agents/{agent_id}/attestations) — receives challenge nonce ++ 4. Agent sends evidence to Verifier (PATCH /v3/agents/{agent_id}/attestations/latest) — receives 202 Accepted ++ 5. Agent waits for configured interval, then repeats from step 3 ++ +diff --git a/docs/assets/push-model-sequence.svg b/docs/assets/push-model-sequence.svg +new file mode 100644 +index 000000000..d9affe1c9 +--- /dev/null ++++ b/docs/assets/push-model-sequence.svg +@@ -0,0 +1,122 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ Push-Model Agent State Machine ++ ++ ++ ++ Unregistered ++ Initial state ++ ++ ++ ++ Registered ++ Ready for attestation ++ ++ ++ ++ Negotiating ++ Phase 1: capabilities ++ ++ ++ ++ Attesting ++ Phase 2: evidence ++ ++ ++ ++ Reg. Failed ++ Will retry ++ ++ ++ ++ Attest. Failed ++ Will retry ++ ++ ++ ++ registration OK ++ ++ ++ ++ failed ++ ++ ++ ++ retry ++ ++ ++ ++ start negotiation ++ ++ ++ ++ 201 Created ++ ++ ++ ++ error ++ ++ ++ ++ 202 Accepted ++ (wait interval) ++ ++ ++ ++ rejected ++ ++ ++ ++ retry ++ ++ ++ ++ Legend ++ ++ ++ Success transition ++ ++ ++ Error transition ++ ++ ++ Retry (with exponential backoff) ++ ++ Phase 1: Agent POSTs capabilities, receives challenge nonce ++ Phase 2: Agent PATCHes evidence, receives 202 Accepted ++ The Negotiating/Attesting cycle repeats continuously ++ +diff --git a/docs/conf.py b/docs/conf.py +index 5543afa86..00d9735de 100644 +--- a/docs/conf.py ++++ b/docs/conf.py +@@ -154,6 +154,7 @@ + ("man/keylime_registrar.8", "keylime_registrar", "Keylime registrar service", [author], 8), + ("man/keylime_verifier.8", "keylime_verifier", "Keylime verifier service", [author], 8), + ("man/keylime_agent.8", "keylime_agent", "Keylime agent service", [author], 8), ++ ("man/keylime_push_model_agent.8", "keylime_push_model_agent", "Keylime push-model agent service", [author], 8), + ] + + +diff --git a/docs/design.rst b/docs/design.rst +index 522ade113..dd72fd4e7 100644 +--- a/docs/design.rst ++++ b/docs/design.rst +@@ -7,6 +7,7 @@ Design of Keylime + :caption: Contents: + + design/overview.rst ++ design/push_model.rst + design/security.rst + + +diff --git a/docs/design/overview.rst b/docs/design/overview.rst +index 4c7b52227..985cbc94b 100644 +--- a/docs/design/overview.rst ++++ b/docs/design/overview.rst +@@ -51,9 +51,14 @@ Verifier + The verifier implements the actual attestation of an agent and sends revocation messages if an agent leaves the trusted + state. + +-Once an agent is registered for attestation (using the tenant or the API directly) the verifier continuously pulls +-the required attestation data from the agent. This can include: a quote over the PCRs, the PCR values, NK public key, +-IMA log and UEFI event log. After that the quote is validated additional validation of the data can be configured. ++In the default **pull model**, once an agent is registered for attestation (using the tenant or the API directly) ++the verifier continuously pulls the required attestation data from the agent. This can include: a quote over the ++PCRs, the PCR values, NK public key, IMA log and UEFI event log. After that the quote is validated additional ++validation of the data can be configured. ++ ++Keylime also supports a **push model** where the agent initiates connections to the verifier and proactively ++submits attestation evidence. This is useful for environments where the verifier cannot directly reach the ++agent (e.g. behind firewalls or NAT). See :doc:`push_model` for details. + + Static PCR values + """"""""""""""""" +diff --git a/docs/design/push_model.rst b/docs/design/push_model.rst +new file mode 100644 +index 000000000..29f9061e0 +--- /dev/null ++++ b/docs/design/push_model.rst +@@ -0,0 +1,226 @@ ++======================== ++Push-Model Attestation ++======================== ++ ++.. warning:: ++ Push-model attestation is currently experimental. The feature is functional ++ but the API and configuration options may change in future releases. ++ Please report issues at https://github.com/keylime/keylime/issues/?q=label:push-mode ++ ++Introduction ++------------ ++ ++Traditional Keylime attestation uses a **pull model** where the verifier continuously ++polls agents for attestation data. The agent acts as a server and the verifier initiates ++connections to it. This model requires that the verifier can reach the agent over the ++network. ++ ++The **push model** reverses this communication direction: the agent initiates connections ++to the verifier and proactively sends attestation data. The verifier never connects to ++the agent. This makes push-model attestation suitable for environments where the ++verifier cannot directly reach the agent, such as: ++ ++* **Edge and IoT devices** behind firewalls or NAT ++* **Hybrid cloud environments** with restricted network policies ++* **Air-gapped networks** where inbound connections to agents are not permitted ++* **Dynamic environments** where agent IP addresses change frequently ++ ++In push mode, the agent is a separate binary (``keylime-push-model-agent``) that ++implements the push attestation protocol using API version 3.0. ++ ++Architectural Overview ++---------------------- ++ ++In pull-model attestation, the verifier runs a polling loop that periodically contacts ++each registered agent to request a TPM quote and associated evidence. The agent exposes ++an HTTPS server that responds to these requests. ++ ++In push-model attestation, this relationship is inverted: ++ ++* The **agent initiates** all connections to the verifier ++* The agent does **not expose any HTTP endpoints** (no listening ports) ++* The verifier accepts incoming attestation data from agents ++* Verification is performed **asynchronously** after evidence is received ++* An **event-driven timeout** system replaces the polling loop for monitoring agent ++ liveness ++ ++The registrar interaction is unchanged: in both models, the agent registers itself ++with the registrar during startup. ++ ++.. figure:: ../assets/push-model-architecture.svg ++ :width: 600 ++ :align: center ++ :alt: Diagram showing the push-model architecture where the agent initiates ++ connections to both the registrar and the verifier, contrasted with the pull ++ model where the verifier connects to the agent. ++ ++ **Figure 1:** Push-Model Architecture ++ ++The Two-Phase Attestation Protocol ++----------------------------------- ++ ++Push-model attestation uses a two-phase protocol for each attestation cycle. ++ ++Phase 1: Capabilities Negotiation ++"""""""""""""""""""""""""""""""""" ++ ++The agent begins an attestation cycle by sending its capabilities to the verifier. ++This tells the verifier what types of evidence the agent can produce and what ++cryptographic algorithms it supports. ++ ++1. The agent sends a ``POST /v3/agents/{agent_id}/attestations`` request to the ++ verifier containing its supported evidence types (TPM quote parameters, IMA log ++ capabilities, UEFI log capabilities) and the public attestation key (AK). ++ ++2. The verifier creates an attestation resource, selects cryptographic parameters ++ (signature scheme, hash algorithm, PCRs to quote), generates a random challenge ++ nonce, and returns a ``201 Created`` response with: ++ ++ * The challenge nonce for TPM quote generation ++ * The chosen cryptographic parameters ++ * The evidence types requested ++ * A deadline (``challenges_expire_at``) by which evidence must be submitted ++ ++Phase 2: Evidence Submission ++""""""""""""""""""""""""""""" ++ ++The agent collects the requested evidence and submits it to the verifier. ++ ++1. The agent generates a TPM quote using the challenge nonce from Phase 1, ++ collects IMA and/or UEFI event logs as requested, and sends a ++ ``PATCH /v3/agents/{agent_id}/attestations/latest`` request with the evidence. ++ ++2. The verifier returns a ``202 Accepted`` response immediately. The evidence is ++ then verified asynchronously in a background worker process. ++ ++3. If verification succeeds, the attestation is marked as ``pass``. If it fails, ++ the attestation is marked as ``fail`` with a failure reason ++ (``broken_evidence_chain`` or ``policy_violation``). ++ ++4. The response includes a ``seconds_to_next_attestation`` value in the ``meta`` ++ field, indicating when the agent should start its next attestation cycle. ++ ++After a configurable interval, the agent begins a new cycle from Phase 1. ++ ++Agent State Machine ++""""""""""""""""""" ++ ++The push-model agent operates as a state machine with the following states: ++ ++.. figure:: ../assets/push-model-sequence.svg ++ :width: 600 ++ :align: center ++ :alt: Sequence diagram showing the push-model agent state machine transitions ++ from Unregistered through Registered, Negotiating, and Attesting states. ++ ++ **Figure 2:** Push-Model Agent State Machine ++ ++* **Unregistered**: Initial state. The agent registers with the registrar. ++* **Registered**: Registration succeeded. The agent begins negotiation with the ++ verifier. ++* **Negotiating**: The agent sends capabilities to the verifier (Phase 1) and waits ++ for the challenge response. ++* **Attesting**: The agent generates and sends evidence to the verifier (Phase 2). ++ On success, the agent waits for the configured interval and transitions back to ++ Negotiating. ++* **RegistrationFailed**: Registration with the registrar failed. The agent waits ++ and retries. ++* **AttestationFailed**: An attestation attempt failed (network error or verifier ++ rejection). The agent waits and retries from Negotiating. ++ ++The agent uses exponential backoff when retrying failed operations. ++ ++Authentication ++-------------- ++ ++Push-model attestation uses **Proof of Possession (PoP)** authentication instead of ++the mTLS client certificates used in pull mode. This is necessary because the agent ++acts as a client (not a server) and does not have certificates signed by the verifier's ++trusted CA. ++ ++The PoP authentication flow: ++ ++1. The agent creates a session by sending ``POST /v3/sessions`` with its agent ID ++ and supported authentication methods. ++2. The verifier responds with a challenge nonce. ++3. The agent proves possession of its AK by signing the challenge using the TPM ++ (``TPM2_Certify``) and sends the result via ``PATCH /v3/sessions/{session_id}``. ++4. If the signature is valid, the verifier issues a bearer token. ++5. The agent includes this token in the ``Authorization`` header of all subsequent ++ requests. ++6. Tokens have a configurable expiration time and can be refreshed. ++ ++The TLS connection uses **server verification only**: the agent verifies the verifier's ++server certificate but does not present a client certificate. The agent needs the ++verifier's CA certificate for this verification. ++ ++For full details on the authorization framework, including the separation between ++agent and admin authentication, see :doc:`../user_guide/authentication`. ++ ++Timeout Monitoring ++------------------ ++ ++In pull mode, the verifier detects unresponsive agents through its polling loop. In ++push mode, an event-driven timeout system serves this purpose. ++ ++The verifier monitors push-mode agents as follows: ++ ++1. When the verifier receives an attestation from an agent, it schedules a timeout ++ for that agent. The timeout duration is ``quote_interval * 5`` seconds (where ++ ``quote_interval`` is the verifier's configured quote interval). ++ ++2. If the agent does not submit a new attestation before the timeout fires, the ++ verifier sets the agent's ``accept_attestations`` flag to ``False``. ++ ++3. Once ``accept_attestations`` is ``False``, the verifier rejects new attestation ++ requests from that agent with a ``403 Forbidden`` response. ++ ++4. The agent can recover by re-registering or by administrator intervention ++ (reactivation). ++ ++Comparison with Pull Model ++--------------------------- ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 35 35 ++ ++ * - Aspect ++ - Pull Model ++ - Push Model ++ * - Connection direction ++ - Verifier connects to agent ++ - Agent connects to verifier ++ * - Agent binary ++ - ``keylime_agent`` ++ - ``keylime_push_model_agent`` ++ * - Agent network requirements ++ - Must expose HTTP port (default 9002) ++ - No listening ports required ++ * - Firewall requirements ++ - Inbound to agent from verifier ++ - Outbound from agent to verifier ++ * - Authentication method ++ - mTLS (agent as server) ++ - PoP bearer tokens (agent as client) ++ * - API version ++ - v2.x ++ - v3.0 ++ * - Verification trigger ++ - Verifier polls on ``quote_interval`` ++ - Agent pushes on ``attestation_interval_seconds`` ++ * - Liveness detection ++ - Polling loop state machine ++ - Event-driven timeout (``quote_interval * 5``) ++ * - Verifier configuration ++ - ``mode = pull`` (default) ++ - ``mode = push`` ++ * - Suitable for ++ - Controlled networks, data centers ++ - Edge, IoT, NAT, firewalled environments ++ * - Maturity ++ - Stable ++ - Experimental ++ ++For deployment and configuration instructions, see :doc:`../user_guide/push_model`. ++For the v3.0 API reference, see :doc:`../rest_apis/3_0/3_0`. +diff --git a/docs/index.rst b/docs/index.rst +index 8234217fd..fd5f08bed 100644 +--- a/docs/index.rst ++++ b/docs/index.rst +@@ -43,6 +43,7 @@ what the goals of Keylime are and how they are implemented. + man/keylime_verifier.8 + man/keylime_registrar.8 + man/keylime_agent.8 ++ man/keylime_push_model_agent.8 + man/keylime_policy.1 + + Indices and tables +diff --git a/docs/installation.rst b/docs/installation.rst +index 21d35a793..b96574137 100644 +--- a/docs/installation.rst ++++ b/docs/installation.rst +@@ -62,6 +62,17 @@ Rust agent + + Installation instructions can be found in the `README.md `_ for the Rust agent. + ++Push-model agent ++~~~~~~~~~~~~~~~~ ++.. note:: ++ The push-model agent (``keylime-push-model-agent``) is a separate binary from ++ the standard Rust agent. It implements the push attestation protocol where the ++ agent initiates connections to the verifier. This feature is currently experimental. ++ ++ Installation instructions are the same as for the Rust agent. The push-model ++ agent binary is built from the same repository. For configuration and deployment ++ details, see the :doc:`user_guide/push_model` user guide. ++ + Keylime Bash installer + ---------------------- + +diff --git a/docs/man/keylime_push_model_agent.8.rst b/docs/man/keylime_push_model_agent.8.rst +new file mode 100644 +index 000000000..b033db801 +--- /dev/null ++++ b/docs/man/keylime_push_model_agent.8.rst +@@ -0,0 +1,226 @@ ++========================== ++keylime_push_model_agent ++========================== ++ ++------------------------------------------------------------ ++Keylime push-model agent for TPM-based remote attestation ++------------------------------------------------------------ ++ ++:Manual section: 8 ++:Author: Keylime Developers ++:Date: February 2026 ++ ++SYNOPSIS ++======== ++ ++**keylime_push_model_agent** [*OPTIONS*] ++ ++(Most operations require root privileges, use with sudo) ++ ++DESCRIPTION ++=========== ++ ++The push-model agent is a long-running service that runs on systems to be attested. ++Unlike the standard Keylime agent which acts as a server and waits for the verifier ++to poll it, the push-model agent initiates connections to the verifier and proactively ++submits attestation evidence. ++ ++The agent registers with the registrar, authenticates with the verifier using Proof of ++Possession (PoP), and performs periodic attestation cycles consisting of capabilities ++negotiation and evidence submission. ++ ++This agent uses API version 3.0 and requires the verifier to be configured in push ++mode (``mode = push``). ++ ++OPTIONS ++======= ++ ++**--verifier-url** *URL* ++ URL of the verifier (must use HTTPS). Default: ``https://localhost:8881`` ++ ++**--registrar-url** *URL* ++ URL of the registrar. Default: ``http://127.0.0.1:8888`` ++ ++**--agent-identifier** *ID* ++ Agent UUID. Overrides the ``uuid`` configuration option. ++ ++**--attestation-interval-seconds** *SECONDS* ++ Interval between attestation cycles. Default: ``60`` ++ ++**--ca-certificate** *PATH* ++ CA certificate file for verifying the verifier's TLS certificate. Overrides ++ ``verifier_tls_ca_cert``. ++ ++**--api-version** *VERSION* ++ API version to use. Default: ``v3.0`` ++ ++**--timeout** *MILLISECONDS* ++ HTTP request timeout. Default: ``5000`` ++ ++**--insecure** ++ Accept invalid TLS certificates. For testing only. ++ ++**--avoid-tpm** ++ Use a mock TPM instead of hardware TPM. For testing only. ++ ++**--json-file** *FILE* ++ JSON file for payload data. ++ ++**--attestation-index** *INDEX* ++ Attestation index value. Default: ``1`` ++ ++**--session-index** *INDEX* ++ Session index value. Default: ``1`` ++ ++**--message-type** *TYPE* ++ Message type (Attestation, EvidenceHandling, Session). Default: ``Attestation`` ++ ++**--method** *METHOD* ++ HTTP method. Default: ``POST`` ++ ++CONFIGURATION ++============= ++ ++Primary configuration is read from ``/etc/keylime/agent.conf`` (TOML format). ++All options are under the ``[agent]`` section. Command-line arguments override ++configuration file values. ++ ++Drop-in overrides: files in ``/etc/keylime/agent.conf.d/`` are applied in ++lexicographic order. ++ ++Push-model specific options: ++ ++**verifier_url** ++ URL of the verifier. Must use HTTPS. Default: ``https://localhost:8881`` ++ ++**verifier_tls_ca_cert** ++ Path to CA certificate for verifying the verifier's TLS certificate. ++ Relative paths are resolved from ``keylime_dir``. Default: ``cv_ca/cacert.crt`` ++ ++**attestation_interval_seconds** ++ Interval in seconds between attestation cycles. Default: ``60`` ++ ++**api_versions** ++ API versions to use. Default: ``3.0`` ++ ++**certification_keys_server_identifier** ++ Server identifier for attestation key certification. Default: ``ak`` ++ ++**uefi_logs_evidence_version** ++ UEFI logs evidence format version. Default: ``2.1`` ++ ++**exponential_backoff_initial_delay** ++ Initial retry delay in milliseconds. Default: ``10000`` ++ ++**exponential_backoff_max_retries** ++ Maximum number of retry attempts. Default: ``5`` ++ ++**exponential_backoff_max_delay** ++ Maximum retry delay in milliseconds. Default: ``300000`` ++ ++Shared options (same as standard agent): ++ ++**uuid** ++ Agent identifier. Default: auto-generated UUID. ++ ++**registrar_ip**, **registrar_port** ++ Registrar endpoint. Default: ``127.0.0.1:8890`` ++ ++**registrar_tls_enabled** ++ Enable TLS for registrar communication. Default: ``false`` ++ ++**registrar_tls_ca_cert** ++ CA certificate for registrar TLS verification. Default: ``cv_ca/cacert.crt`` ++ ++**tpm_hash_alg**, **tpm_encryption_alg**, **tpm_signing_alg** ++ TPM algorithms. Defaults: ``sha256``, ``rsa``, ``rsassa`` ++ ++**keylime_dir** ++ Working directory. Default: ``/var/lib/keylime`` ++ ++**run_as** ++ User:group to drop privileges to. Default: ``keylime:tss`` ++ ++**enable_iak_idevid** ++ Enable IAK/IDevID usage. Default: ``false`` ++ ++ENVIRONMENT ++=========== ++ ++**KEYLIME_AGENT_CONFIG** ++ Path to agent.conf (highest priority) ++ ++**KEYLIME_DIR** ++ Working directory (default: ``/var/lib/keylime``) ++ ++**RUST_LOG** ++ Log level configuration. Default in systemd service: ++ ``keylime_push_model_agent=info,keylime=info`` ++ ++All configuration options can be overridden via environment variables in the form ++``KEYLIME_AGENT_`` (e.g. ``KEYLIME_AGENT_VERIFIER_URL``). ++ ++FILES ++===== ++ ++``/etc/keylime/agent.conf`` ++ TOML format configuration file (shared with standard agent) ++ ++``/etc/keylime/agent.conf.d/`` ++ Drop-in configuration snippets ++ ++``/var/lib/keylime/cv_ca/cacert.crt`` ++ Default CA certificate for verifier TLS verification ++ ++``/var/lib/keylime/agent_data.json`` ++ Persisted agent TPM data ++ ++RUNTIME ++======= ++ ++Start directly: ++ ++.. code-block:: bash ++ ++ sudo keylime_push_model_agent --verifier-url https://verifier.example.com:8881 ++ ++Start as a systemd service: ++ ++.. code-block:: bash ++ ++ sudo systemctl enable --now keylime_push_model_agent ++ ++Check service status: ++ ++.. code-block:: bash ++ ++ sudo systemctl status keylime_push_model_agent ++ sudo journalctl -u keylime_push_model_agent -f ++ ++PREREQUISITES ++============= ++ ++- Root privileges (use sudo) ++- TPM 2.0 available (verify with ``tpm2_pcrread``) ++- Verifier configured with ``mode = push`` ++- Network connectivity from agent to verifier and registrar ++- Verifier CA certificate available on agent machine ++ ++NOTES ++===== ++ ++- This service conflicts with ``keylime_agent.service``. Only one agent type can ++ run on a machine at a time. ++- The push-model agent does not expose any listening ports. ++- Push-model attestation is currently experimental. ++- Authentication uses PoP bearer tokens, not mTLS client certificates. ++ ++SEE ALSO ++======== ++ ++**keylime_agent**\(8), **keylime_verifier**\(8), **keylime_registrar**\(8), **keylime_tenant**\(1) ++ ++BUGS ++==== ++ ++Report bugs at https://github.com/keylime/rust-keylime/issues +diff --git a/docs/man/keylime_verifier.8.rst b/docs/man/keylime_verifier.8.rst +index fd7cfb941..5303a5f06 100644 +--- a/docs/man/keylime_verifier.8.rst ++++ b/docs/man/keylime_verifier.8.rst +@@ -32,6 +32,7 @@ Primary configuration is read from ``/etc/keylime/verifier.conf`` (or an overrid + All options are under the ``[verifier]`` section. + + Essentials: ++- **mode**: Attestation mode (``pull`` or ``push``). Default: ``pull`` + - **uuid**: Unique identifier for this verifier instance + - **ip**, **port**: Bind address and HTTP port + - **registrar_ip**, **registrar_port**: Registrar endpoint +@@ -108,7 +109,7 @@ NOTES + SEE ALSO + ======== + +-**keylime_registrar**\(8), **keylime_tenant**\(1), **keylime_agent**\(8) ++**keylime_registrar**\(8), **keylime_tenant**\(1), **keylime_agent**\(8), **keylime_push_model_agent**\(8) + + BUGS + ==== +diff --git a/docs/rest_apis.rst b/docs/rest_apis.rst +index edfe8be1c..aba64c338 100644 +--- a/docs/rest_apis.rst ++++ b/docs/rest_apis.rst +@@ -14,10 +14,40 @@ Check the :ref:`Changelog` section for the differences between versions + rest_apis/2_3/2_3.rst + rest_apis/2_4/2_4.rst + rest_apis/2_5/2_5.rst ++ rest_apis/3_0/3_0.rst + + Changelog + _________ + ++Changes from v2.5 to v3.0 ++~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++API version 3.0 introduces push-model attestation. Unlike previous versions where ++the verifier polls agents, in v3.0 agents initiate connections and submit ++attestation evidence to the verifier. The v3.0 endpoints are served by the ++verifier only; the push-model agent does not expose HTTP endpoints. ++ ++* Added `POST /v3/agents/{agent_id}/attestations` endpoint to the verifier: ++ * Allows agents to submit attestation capabilities (Phase 1 of push protocol) ++ * Returns challenge nonce for TPM quote generation ++* Added `PATCH /v3/agents/{agent_id}/attestations/latest` endpoint: ++ * Allows agents to submit attestation evidence (Phase 2 of push protocol) ++ * Returns `202 Accepted` for asynchronous verification ++* Added `PATCH /v3/agents/{agent_id}/attestations/{index}` endpoint: ++ * Submit evidence for a specific attestation by index ++* Added `GET /v3/agents/{agent_id}/attestations` endpoint: ++ * Lists all attestation records for an agent ++* Added `GET /v3/agents/{agent_id}/attestations/latest` endpoint: ++ * Returns the most recent attestation for an agent, including verification status ++* Added `GET /v3/agents/{agent_id}/attestations/{index}` endpoint: ++ * Returns a specific attestation by its index ++* Added `POST /v3/sessions` endpoint: ++ * Creates a PoP authentication session and returns a challenge nonce for the agent ++* Added `PATCH /v3/sessions/{session_id}` endpoint: ++ * Completes PoP authentication by submitting the TPM-signed challenge response ++* Introduced PoP (Proof of Possession) bearer token authentication for ++ agent-to-verifier communication ++ + Changes from v2.4 to v2.5 + ~~~~~~~~~~~~~~~~~~~~~~~~~ + API version 2.5 was first implemented in Keylime 7.14.0. +diff --git a/docs/rest_apis/3_0/3_0.rst b/docs/rest_apis/3_0/3_0.rst +new file mode 100644 +index 000000000..d6cac705d +--- /dev/null ++++ b/docs/rest_apis/3_0/3_0.rst +@@ -0,0 +1,21 @@ ++RESTful API for Keylime (v3.0) ++------------------------------ ++ ++API version 3.0 introduces push-model attestation, where agents initiate ++connections to the verifier and proactively submit attestation evidence. ++ ++Unlike previous API versions where the agent exposed HTTP endpoints for the ++verifier to poll, in v3.0 the agent acts as a client. The v3.0 endpoints are ++served by the **verifier only**. The push-model agent does not expose an API. ++ ++For a conceptual overview of push-model attestation, see ++:doc:`../../design/push_model`. ++ ++.. warning:: ++ Push-model attestation is currently experimental. The API may change in ++ future releases. ++ ++.. toctree:: ++ :maxdepth: 2 ++ ++ verifier.rst +diff --git a/docs/rest_apis/3_0/verifier.rst b/docs/rest_apis/3_0/verifier.rst +new file mode 100644 +index 000000000..3476cc7a3 +--- /dev/null ++++ b/docs/rest_apis/3_0/verifier.rst +@@ -0,0 +1,608 @@ ++Verifier ++~~~~~~~~ ++ ++Push-Model Attestation Endpoints ++""""""""""""""""""""""""""""""""" ++ ++These endpoints implement the two-phase push-model attestation protocol. Agents ++use these endpoints to submit attestation capabilities and evidence. Administrators ++can use the GET endpoints to view attestation results. ++ ++For details on authentication requirements, see :doc:`../../user_guide/authentication`. ++ ++.. http:post:: /v3/agents/{agent_id}/attestations ++ ++ Phase 1: Submit attestation capabilities and receive a challenge. ++ ++ The agent sends its supported evidence types, cryptographic algorithms, and ++ attestation key. The verifier selects parameters and returns a challenge nonce ++ for TPM quote generation. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ ++ **Authentication**: PoP bearer token (agent-only) ++ ++ **Example request**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "attributes": { ++ "evidence_supported": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "capabilities": { ++ "signature_schemes": ["rsassa"], ++ "hash_algorithms": ["sha256", "sha384", "sha512"], ++ "available_subjects": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], ++ "certification_keys": [ ++ { ++ "key_class": "asymmetric", ++ "key_algorithm": "rsa", ++ "key_size": 2048, ++ "server_identifier": "ak", ++ "allowable_signature_schemes": ["rsassa"], ++ "allowable_hash_algorithms": ["sha256", "sha384", "sha512"], ++ "public": "" ++ } ++ ], ++ "component_version": "2.0", ++ "evidence_version": "1.0" ++ } ++ }, ++ { ++ "evidence_class": "log", ++ "evidence_type": "ima_log", ++ "capabilities": { ++ "entry_count": 1024, ++ "supports_partial_access": true, ++ "appendable": true, ++ "formats": ["text/plain"], ++ "component_version": "1.0", ++ "evidence_version": "1.0" ++ } ++ } ++ ], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ } ++ } ++ } ++ } ++ ++ **Example response** (201 Created): ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "id": "0", ++ "attributes": { ++ "stage": "awaiting_evidence", ++ "evidence_requested": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "chosen_parameters": { ++ "challenge": "", ++ "signature_scheme": "rsassa", ++ "hash_algorithm": "sha256", ++ "selected_subjects": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], ++ "certification_key": { ++ "key_class": "asymmetric", ++ "key_algorithm": "rsa", ++ "key_size": 2048, ++ "server_identifier": "ak" ++ } ++ } ++ }, ++ { ++ "evidence_class": "log", ++ "evidence_type": "ima_log", ++ "chosen_parameters": { ++ "starting_offset": 0, ++ "entry_count": 1024, ++ "format": "text/plain" ++ } ++ } ++ ], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ }, ++ "capabilities_received_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:35:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/0" ++ } ++ } ++ } ++ ++ :json string data.id: Attestation index (auto-incremented per agent) ++ :>json string data.attributes.stage: ``"awaiting_evidence"`` ++ :>json array data.attributes.evidence_requested: Evidence the verifier wants the agent to provide ++ :>json string evidence_requested[].chosen_parameters.challenge: Base64-encoded challenge nonce for TPM quote ++ :>json string data.attributes.capabilities_received_at: ISO 8601 timestamp ++ :>json string data.attributes.challenges_expire_at: Deadline for evidence submission ++ :>json string data.links.self: URL to this attestation resource ++ ++ :statuscode 201: Attestation created, challenge issued ++ :statuscode 400: Invalid request body ++ :statuscode 403: Attestations disabled for this agent (timeout or previous failure) ++ :statuscode 404: Agent not found ++ :statuscode 409: Concurrent attestation creation attempt ++ :statuscode 422: Invalid capabilities data ++ :statuscode 429: Rate limited (attestation interval not elapsed). Includes ``Retry-After`` header ++ :statuscode 503: Previous attestation still being verified. Includes ``Retry-After`` header ++ ++ ++.. http:patch:: /v3/agents/{agent_id}/attestations/latest ++ ++ Phase 2: Submit attestation evidence for the latest attestation. ++ ++ The agent sends the TPM quote, PCR values, and event logs generated using the ++ challenge nonce from Phase 1. The verifier accepts the evidence and verifies it ++ asynchronously. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ ++ **Authentication**: PoP bearer token (agent-only) ++ ++ **Example request**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "attributes": { ++ "evidence_collected": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "data": { ++ "subject_data": { ++ "0": "", ++ "1": "" ++ }, ++ "message": "", ++ "signature": "" ++ } ++ }, ++ { ++ "evidence_class": "log", ++ "evidence_type": "ima_log", ++ "data": { ++ "entry_count": 512, ++ "entries": "" ++ } ++ } ++ ] ++ } ++ } ++ } ++ ++ **Example response** (202 Accepted): ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "id": "0", ++ "attributes": { ++ "stage": "evaluating_evidence", ++ "evidence": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "capabilities": {}, ++ "chosen_parameters": {}, ++ "data": { ++ "message": "", ++ "signature": "", ++ "subject_data": {} ++ } ++ } ++ ], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ }, ++ "capabilities_received_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:35:00.123456Z", ++ "evidence_received_at": "2024-01-15T10:31:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/0" ++ } ++ }, ++ "meta": { ++ "seconds_to_next_attestation": 45 ++ } ++ } ++ ++ :json string data.attributes.stage: ``"evaluating_evidence"`` (verification in progress) ++ :>json array data.attributes.evidence: Evidence items with capabilities, parameters, and data ++ :>json string data.attributes.evidence_received_at: ISO 8601 timestamp when evidence was received ++ :>json int meta.seconds_to_next_attestation: Suggested wait before starting the next attestation cycle ++ ++ :statuscode 202: Evidence accepted, verification in progress ++ :statuscode 400: Invalid evidence format ++ :statuscode 403: Evidence already submitted, attestation is not the latest, or challenges expired ++ :statuscode 404: Agent or attestation not found ++ :statuscode 410: Attestation no longer exists ++ :statuscode 503: No available worker processes. Includes ``Retry-After`` header ++ ++ ++.. http:patch:: /v3/agents/{agent_id}/attestations/{index} ++ ++ Submit attestation evidence for a specific attestation by index. ++ ++ Behaves identically to ``PATCH /v3/agents/{agent_id}/attestations/latest`` ++ but targets a specific attestation index. Evidence can only be submitted for ++ the latest attestation. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ :param index: Attestation index ++ :type index: integer ++ ++ **Authentication**: PoP bearer token (agent-only) ++ ++ :statuscode 202: Evidence accepted ++ :statuscode 403: Not the latest attestation, evidence already submitted, or challenges expired ++ :statuscode 404: Agent or attestation not found ++ ++ ++.. http:get:: /v3/agents/{agent_id}/attestations ++ ++ List all attestations for an agent. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ ++ **Authentication**: mTLS (admin) or PoP bearer token (own agent only) ++ ++ **Example response**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": [ ++ { ++ "type": "attestation", ++ "id": "1", ++ "attributes": { ++ "stage": "verification_complete", ++ "evaluation": "pass", ++ "evidence": [], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ }, ++ "capabilities_received_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:35:00.123456Z", ++ "evidence_received_at": "2024-01-15T10:31:00.123456Z", ++ "verification_completed_at": "2024-01-15T10:32:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/1" ++ } ++ }, ++ { ++ "type": "attestation", ++ "id": "0", ++ "attributes": { ++ "stage": "verification_complete", ++ "evaluation": "pass", ++ "evidence": [], ++ "system_info": {}, ++ "capabilities_received_at": "2024-01-15T10:25:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:30:00.123456Z", ++ "evidence_received_at": "2024-01-15T10:26:00.123456Z", ++ "verification_completed_at": "2024-01-15T10:27:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/0" ++ } ++ } ++ ] ++ } ++ ++ :>json array data: List of attestation resources ++ :>json string data[].id: Attestation index ++ :>json string data[].attributes.stage: ``"awaiting_evidence"``, ``"evaluating_evidence"``, or ``"verification_complete"`` ++ :>json string data[].attributes.evaluation: ``"pending"``, ``"pass"``, or ``"fail"`` ++ :>json string data[].attributes.failure_reason: ``"broken_evidence_chain"`` or ``"policy_violation"`` (only when evaluation is ``"fail"``) ++ ++ :statuscode 200: Success ++ :statuscode 404: Agent not found ++ ++ ++.. http:get:: /v3/agents/{agent_id}/attestations/latest ++ ++ Get the latest attestation for an agent. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ ++ **Authentication**: mTLS (admin) or PoP bearer token (own agent only) ++ ++ **Example response**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "id": "1", ++ "attributes": { ++ "stage": "verification_complete", ++ "evaluation": "pass", ++ "failure_reason": null, ++ "evidence": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "capabilities": {}, ++ "chosen_parameters": {}, ++ "data": { ++ "message": "", ++ "signature": "", ++ "subject_data": {} ++ } ++ } ++ ], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ }, ++ "capabilities_received_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:35:00.123456Z", ++ "evidence_received_at": "2024-01-15T10:31:00.123456Z", ++ "verification_completed_at": "2024-01-15T10:32:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/1" ++ } ++ } ++ } ++ ++ :>json string data.attributes.stage: Current stage of the attestation ++ :>json string data.attributes.evaluation: ``"pending"``, ``"pass"``, or ``"fail"`` ++ :>json string data.attributes.failure_reason: ``null``, ``"broken_evidence_chain"``, or ``"policy_violation"`` ++ :>json array data.attributes.evidence: Evidence items with full data ++ :>json string data.attributes.capabilities_received_at: When capabilities were received ++ :>json string data.attributes.challenges_expire_at: When challenges expire ++ :>json string data.attributes.evidence_received_at: When evidence was received (``null`` if still awaiting) ++ :>json string data.attributes.verification_completed_at: When verification completed (``null`` if still in progress) ++ ++ :statuscode 200: Success ++ :statuscode 404: Agent not found or no attestations exist ++ ++ ++.. http:get:: /v3/agents/{agent_id}/attestations/{index} ++ ++ Get a specific attestation by index. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ :param index: Attestation index ++ :type index: integer ++ ++ **Authentication**: mTLS (admin) or PoP bearer token (own agent only) ++ ++ Response format is identical to ``GET /v3/agents/{agent_id}/attestations/latest``. ++ ++ :statuscode 200: Success ++ :statuscode 404: Agent or attestation not found ++ ++ ++Session Endpoints ++""""""""""""""""" ++ ++These endpoints manage PoP (Proof of Possession) authentication sessions for ++push-model agents. Sessions are required before an agent can submit attestations. ++ ++.. http:post:: /v3/sessions ++ ++ Create a new authentication session. ++ ++ The verifier generates a challenge nonce that the agent must sign using its ++ TPM attestation key to prove possession. ++ ++ **Authentication**: None (public endpoint) ++ ++ **Example request**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "session", ++ "attributes": { ++ "agent_id": "d432fbb3-d2f1-4a97-9ef7-75bd81c00000", ++ "authentication_supported": [ ++ { ++ "authentication_class": "pop", ++ "authentication_type": "tpm_pop" ++ } ++ ] ++ } ++ } ++ } ++ ++ **Example response** (200 OK): ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "session", ++ "id": "550e8400-e29b-41d4-a716-446655440000", ++ "attributes": { ++ "agent_id": "d432fbb3-d2f1-4a97-9ef7-75bd81c00000", ++ "authentication_requested": [ ++ { ++ "authentication_class": "pop", ++ "authentication_type": "tpm_pop", ++ "chosen_parameters": { ++ "challenge": "" ++ } ++ } ++ ], ++ "created_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:31:00.123456Z" ++ } ++ } ++ } ++ ++ :json string data.id: Session UUID ++ :>json string data.attributes.challenges_expire_at: Deadline for submitting the PoP response ++ ++ :statuscode 200: Session created ++ :statuscode 400: Missing or invalid agent_id ++ :statuscode 429: Rate limited. Includes ``Retry-After`` header ++ ++ ++.. http:patch:: /v3/sessions/{session_id} ++ ++ Submit Proof of Possession response to complete authentication. ++ ++ The agent signs the challenge nonce from the session creation response using ++ ``TPM2_Certify`` and submits the result. If valid, the verifier issues a bearer ++ token for subsequent API calls. ++ ++ :param session_id: UUID of the session ++ :type session_id: string ++ ++ **Authentication**: None (public endpoint; validates PoP internally) ++ ++ **Example request**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "session", ++ "attributes": { ++ "agent_id": "d432fbb3-d2f1-4a97-9ef7-75bd81c00000", ++ "authentication_provided": [ ++ { ++ "authentication_class": "pop", ++ "authentication_type": "tpm_pop", ++ "data": { ++ "message": "", ++ "signature": "" ++ } ++ } ++ ] ++ } ++ } ++ } ++ ++ **Example response** (200 OK, authentication passed): ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "session", ++ "id": "550e8400-e29b-41d4-a716-446655440000", ++ "attributes": { ++ "agent_id": "d432fbb3-d2f1-4a97-9ef7-75bd81c00000", ++ "evaluation": "pass", ++ "token": "550e8400-e29b-41d4-a716-446655440000.", ++ "authentication": [ ++ { ++ "authentication_class": "pop", ++ "authentication_type": "tpm_pop", ++ "chosen_parameters": { ++ "challenge": "" ++ }, ++ "data": { ++ "message": "", ++ "signature": "" ++ } ++ } ++ ], ++ "created_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:31:00.123456Z", ++ "response_received_at": "2024-01-15T10:30:30.123456Z", ++ "token_expires_at": "2024-01-15T11:30:00.123456Z" ++ } ++ } ++ } ++ ++ :>json string data.attributes.evaluation: ``"pass"`` or ``"fail"`` ++ :>json string data.attributes.token: Bearer token for subsequent requests (only on ``"pass"``) ++ :>json string data.attributes.token_expires_at: Token expiration time (only on ``"pass"``) ++ ++ :statuscode 200: PoP response processed (check ``evaluation`` field for result) ++ :statuscode 400: Missing or invalid request body ++ :statuscode 401: PoP verification failed ++ :statuscode 404: Session not found ++ ++ ++Attestation Stages and Evaluations ++""""""""""""""""""""""""""""""""""" ++ ++Each attestation progresses through the following stages: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 25 75 ++ ++ * - Stage ++ - Description ++ * - ``awaiting_evidence`` ++ - Capabilities received, challenge issued, waiting for evidence ++ * - ``evaluating_evidence`` ++ - Evidence received, verification in progress ++ * - ``verification_complete`` ++ - Verification finished, see ``evaluation`` for result ++ ++The ``evaluation`` field indicates the verification result: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 20 80 ++ ++ * - Evaluation ++ - Description ++ * - ``pending`` ++ - Verification not yet complete ++ * - ``pass`` ++ - Evidence verified successfully ++ * - ``fail`` ++ - Evidence verification failed (see ``failure_reason``) ++ ++When an attestation fails, the ``failure_reason`` field provides the cause: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 70 ++ ++ * - Failure Reason ++ - Description ++ * - ``broken_evidence_chain`` ++ - TPM quote signature invalid or evidence integrity check failed ++ * - ``policy_violation`` ++ - Evidence is valid but violates the configured attestation policy +diff --git a/docs/user_guide.rst b/docs/user_guide.rst +index 9bd44c512..ed052c175 100644 +--- a/docs/user_guide.rst ++++ b/docs/user_guide.rst +@@ -8,6 +8,7 @@ User Guide + + user_guide/authentication.rst + user_guide/configuration.rst ++ user_guide/push_model.rst + user_guide/runtime_ima.rst + user_guide/user_selected_pcr_monitoring.rst + user_guide/use_measured_boot.rst +diff --git a/docs/user_guide/configuration.rst b/docs/user_guide/configuration.rst +index 6d8f35c88..2e50757df 100644 +--- a/docs/user_guide/configuration.rst ++++ b/docs/user_guide/configuration.rst +@@ -40,6 +40,13 @@ The following components can be configured: + - ``/etc/keylime/logging.conf`` + - ``/etc/keylime/logging.conf.d`` + ++.. note:: ++ For push-model attestation, the verifier must be configured with ``mode = push`` ++ in the ``[verifier]`` section. The push-model agent uses the same ++ ``/etc/keylime/agent.conf`` file (TOML format) but with additional options such ++ as ``verifier_url`` and ``attestation_interval_seconds``. See ++ :doc:`push_model` for details. ++ + The next sections contain details of the configuration files + + Configuration file processing order +diff --git a/docs/user_guide/push_model.rst b/docs/user_guide/push_model.rst +new file mode 100644 +index 000000000..773d2aaaa +--- /dev/null ++++ b/docs/user_guide/push_model.rst +@@ -0,0 +1,370 @@ ++======================== ++Push-Model Attestation ++======================== ++ ++.. warning:: ++ Push-model attestation is currently experimental. The feature is functional ++ but the API and configuration options may change in future releases. ++ ++Introduction ++------------ ++ ++In the default pull model, the Keylime verifier continuously polls agents for ++attestation data. This requires the verifier to reach the agent over the network. ++ ++The push model reverses this: the agent initiates connections to the verifier and ++proactively sends attestation evidence. This is useful when the verifier cannot ++directly reach the agent, for example behind firewalls, NAT, or in edge/IoT ++deployments. ++ ++For a detailed description of how push-model attestation works, see ++:doc:`../design/push_model`. ++ ++Prerequisites ++------------- ++ ++* Keylime verifier and registrar installed and running ++* The ``keylime-push-model-agent`` binary installed on the target machine ++* A TPM 2.0 device (hardware or emulated for development) ++* Network connectivity **from the agent to the verifier and registrar** (the ++ reverse is not required) ++* The verifier's CA certificate available on the agent machine ++ ++Configuring the Verifier for Push Mode ++-------------------------------------- ++ ++Set the verifier's attestation mode to ``push`` in ``/etc/keylime/verifier.conf``: ++ ++.. code-block:: ini ++ ++ [verifier] ++ mode = push ++ ++Or use a configuration snippet in ``/etc/keylime/verifier.conf.d/``: ++ ++.. code-block:: ini ++ ++ # /etc/keylime/verifier.conf.d/001-push-mode.conf ++ [verifier] ++ mode = push ++ ++The verifier can also be configured via environment variable: ++ ++.. code-block:: bash ++ ++ export KEYLIME_VERIFIER_MODE=push ++ ++.. note:: ++ The ``mode`` setting affects all agents on this verifier. A verifier in push ++ mode expects agents to submit attestation data; it does not poll agents. A ++ single verifier cannot operate in both modes simultaneously. ++ ++Additional verifier settings relevant to push mode: ++ ++* ``quote_interval``: Used to calculate the agent timeout threshold ++ (``quote_interval * 5``). Default: ``2`` seconds. ++* ``challenge_lifetime``: How long a challenge nonce remains valid for evidence ++ submission. ++* ``verification_timeout``: Maximum time allowed for evidence verification. ++ ++After changing the configuration, restart the verifier: ++ ++.. code-block:: bash ++ ++ sudo systemctl restart keylime_verifier ++ ++Configuring the Push-Model Agent ++--------------------------------- ++ ++The push-model agent is a separate binary from the standard Keylime agent. It is ++installed as ``keylime_push_model_agent`` (or ``keylime-push-model-agent``). ++ ++The agent is configured through ``/etc/keylime/agent.conf`` (TOML format), command-line ++arguments, or environment variables. ++ ++Key Configuration Options ++""""""""""""""""""""""""" ++ ++The following options are specific to or particularly important for push-model ++operation: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 15 55 ++ ++ * - Option ++ - Default ++ - Description ++ * - ``verifier_url`` ++ - ``https://localhost:8881`` ++ - URL of the verifier. Must use HTTPS. ++ * - ``verifier_tls_ca_cert`` ++ - ``cv_ca/cacert.crt`` ++ - Path to the CA certificate for verifying the verifier's TLS certificate. ++ Relative paths are resolved from ``keylime_dir``. ++ * - ``attestation_interval_seconds`` ++ - ``60`` ++ - Interval in seconds between attestation cycles. ++ * - ``registrar_ip`` ++ - ``127.0.0.1`` ++ - IP address of the registrar. ++ * - ``registrar_port`` ++ - ``8890`` ++ - Port of the registrar. ++ * - ``registrar_tls_enabled`` ++ - ``false`` ++ - Enable TLS for registrar communication. ++ * - ``registrar_tls_ca_cert`` ++ - ``cv_ca/cacert.crt`` ++ - CA certificate for registrar TLS verification. ++ * - ``uuid`` ++ - (generated) ++ - Agent UUID. Can be a specific UUID, ``generate`` (random), or ++ ``hash_ek`` (derived from the EK). ++ * - ``api_versions`` ++ - ``3.0`` ++ - API versions supported by the agent. Defaults to ``3.0`` for push model. ++ * - ``tpm_hash_alg`` ++ - ``sha256`` ++ - TPM hash algorithm (``sha256``, ``sha384``, ``sha512``). ++ * - ``tpm_signing_alg`` ++ - ``rsassa`` ++ - TPM signing algorithm (``rsassa``, ``ecdsa``). ++ * - ``keylime_dir`` ++ - ``/var/lib/keylime`` ++ - Working directory for certificates and data files. ++ ++Example Minimal Configuration ++"""""""""""""""""""""""""""""" ++ ++.. code-block:: toml ++ ++ # /etc/keylime/agent.conf (push-model agent) ++ [agent] ++ uuid = "d432fbb3-d2f1-4a97-9ef7-75bd81c00000" ++ verifier_url = "https://verifier.example.com:8881" ++ verifier_tls_ca_cert = "/var/lib/keylime/cv_ca/cacert.crt" ++ attestation_interval_seconds = 60 ++ registrar_ip = "registrar.example.com" ++ registrar_port = 8890 ++ tpm_hash_alg = "sha256" ++ tpm_signing_alg = "rsassa" ++ ++Command-Line Arguments ++"""""""""""""""""""""" ++ ++The push-model agent accepts the following command-line arguments, which override ++configuration file values: ++ ++.. code-block:: text ++ ++ --verifier-url Verifier URL (required) ++ --registrar-url Registrar URL (default: http://127.0.0.1:8888) ++ --agent-identifier Agent UUID ++ --attestation-interval-seconds Attestation interval (default: 60) ++ --ca-certificate CA certificate for TLS verification ++ --api-version API version (default: v3.0) ++ --timeout Request timeout in milliseconds (default: 5000) ++ --insecure Accept invalid TLS certificates (testing only) ++ --avoid-tpm Use mock TPM (testing only) ++ ++Exponential Backoff ++""""""""""""""""""" ++ ++When the agent encounters errors (network failures, verifier unavailable), it uses ++exponential backoff for retries: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 35 15 50 ++ ++ * - Option ++ - Default ++ - Description ++ * - ``exponential_backoff_initial_delay`` ++ - ``10000`` ++ - Initial delay in milliseconds (10 seconds) ++ * - ``exponential_backoff_max_retries`` ++ - ``5`` ++ - Maximum number of retry attempts ++ * - ``exponential_backoff_max_delay`` ++ - ``300000`` ++ - Maximum delay in milliseconds (5 minutes) ++ ++Systemd Service Management ++--------------------------- ++ ++The push-model agent is managed as a systemd service: ++ ++.. code-block:: bash ++ ++ # Enable the service to start on boot ++ sudo systemctl enable keylime_push_model_agent ++ ++ # Start the service ++ sudo systemctl start keylime_push_model_agent ++ ++ # Check service status ++ sudo systemctl status keylime_push_model_agent ++ ++ # View logs ++ sudo journalctl -u keylime_push_model_agent -f ++ ++.. warning:: ++ The push-model agent service (``keylime_push_model_agent.service``) conflicts ++ with the standard pull-model agent service (``keylime_agent.service``). Only one ++ can run at a time on the same machine. Starting one will stop the other. ++ ++The service is configured to restart on failure with a 120-second delay between ++restart attempts. ++ ++Enrolling an Agent for Push-Model Attestation ++--------------------------------------------- ++ ++Use the ``keylime_tenant`` tool with the ``--push-model`` flag to enroll an agent ++for push-model attestation: ++ ++.. code-block:: bash ++ ++ # Add an agent in push mode ++ sudo keylime_tenant -c add --push-model -u ++ ++ # Add with a runtime IMA policy ++ sudo keylime_tenant -c add --push-model -u \ ++ --runtime-policy-name ++ ++ # Add with a measured boot policy ++ sudo keylime_tenant -c add --push-model -u \ ++ --mb-policy-name ++ ++.. note:: ++ In push mode, the ``-t`` / ``--targethost`` option is not required because the ++ verifier does not need to connect to the agent. The agent's IP and port are set ++ to ``None`` in the verifier's database. ++ ++To check the status of a push-model agent: ++ ++.. code-block:: bash ++ ++ sudo keylime_tenant -c cvstatus -u ++ ++To remove an agent: ++ ++.. code-block:: bash ++ ++ sudo keylime_tenant -c delete -u ++ ++TLS Configuration for Push Model ++--------------------------------- ++ ++The push model uses TLS differently from the pull model: ++ ++**Agent-to-verifier connection:** ++ ++* The agent connects to the verifier over HTTPS ++* The agent verifies the verifier's server certificate using the configured CA ++ certificate (``verifier_tls_ca_cert``) ++* The agent does **not** present a client certificate (no mTLS) ++* Authentication is done via PoP bearer tokens (see :doc:`authentication`) ++ ++**Agent-to-registrar connection:** ++ ++* The agent connects to the registrar to register itself ++* TLS can be enabled with ``registrar_tls_enabled = true`` ++* The registrar CA certificate is configured with ``registrar_tls_ca_cert`` ++ ++**Firewall considerations:** ++ ++* No inbound ports need to be opened on the agent machine ++* The agent needs outbound access to the verifier port (default: 8881) ++* The agent needs outbound access to the registrar port (default: 8890) ++ ++To set up TLS, copy the verifier's CA certificate to the agent machine: ++ ++.. code-block:: bash ++ ++ # On the verifier machine, the CA cert is typically at: ++ # /var/lib/keylime/cv_ca/cacert.crt ++ ++ # Copy to the agent machine: ++ scp verifier:/var/lib/keylime/cv_ca/cacert.crt /var/lib/keylime/cv_ca/cacert.crt ++ ++Verifying the Deployment ++------------------------- ++ ++After starting both the verifier (in push mode) and the push-model agent: ++ ++1. **Check agent registration** in the registrar: ++ ++ .. code-block:: bash ++ ++ sudo keylime_tenant -c regstatus -u ++ ++2. **Check attestation status** in the verifier: ++ ++ .. code-block:: bash ++ ++ sudo keylime_tenant -c cvstatus -u ++ ++3. **View verifier logs** for attestation activity: ++ ++ .. code-block:: bash ++ ++ sudo journalctl -u keylime_verifier -f ++ ++ Successful attestations will show evidence receipt and verification completion ++ messages. ++ ++4. **View agent logs** for attestation cycles: ++ ++ .. code-block:: bash ++ ++ sudo journalctl -u keylime_push_model_agent -f ++ ++ The agent logs will show transitions through the state machine: ++ registration, negotiation, and attestation phases. ++ ++Troubleshooting ++---------------- ++ ++Agent cannot connect to verifier ++""""""""""""""""""""""""""""""""" ++ ++* Verify the ``verifier_url`` is correct and uses HTTPS ++* Check that the verifier is running and listening on the configured port ++* Verify network connectivity from the agent to the verifier ++* Check that the CA certificate (``verifier_tls_ca_cert``) matches the verifier's ++ server certificate ++ ++Agent shows timeout failures ++""""""""""""""""""""""""""""" ++ ++The verifier marks an agent as failed if it does not receive an attestation within ++``quote_interval * 5`` seconds. ++ ++* Verify the ``attestation_interval_seconds`` on the agent is less than the ++ verifier's timeout threshold ++* Check for network instability between agent and verifier ++* Review agent logs for errors during attestation cycles ++ ++PoP authentication errors ++"""""""""""""""""""""""""" ++ ++* Ensure the agent is properly registered in the registrar (the AK must be known) ++* Check that the TPM is accessible and functioning ++* Verify the agent UUID matches between agent configuration and verifier enrollment ++ ++Agent state stuck in Negotiating ++""""""""""""""""""""""""""""""""" ++ ++* The verifier may be rejecting capabilities. Check verifier logs for error details ++* Ensure the TPM algorithms configured on the agent are accepted by the verifier ++* Check that the ``api_versions`` setting includes ``3.0`` ++ ++Service fails to start ++"""""""""""""""""""""" ++ ++* Check that the pull-model agent service is not running ++ (``systemctl status keylime_agent``) ++* Verify the configuration file syntax (TOML format) ++* Check file permissions on TLS certificates and TPM device diff --git a/0015-remove-enable-authentication-config-option.patch b/0015-remove-enable-authentication-config-option.patch new file mode 100644 index 0000000..d40247b --- /dev/null +++ b/0015-remove-enable-authentication-config-option.patch @@ -0,0 +1,46 @@ +From 416d3906fe4071132d5cdc494f828ce3a909f336 Mon Sep 17 00:00:00 2001 +From: Sergio Arroutbi +Date: Fri, 20 Mar 2026 10:57:23 +0100 +Subject: [PATCH] Remove enable_authentication agent config option + +The Rust agent does not parse the enable_authentication +configuration option and always performs authentication. +Remove the option from both the agent.j2 template and the +2.5 mapping.json to avoid exposing a non-functional setting +to users. + +Signed-off-by: Sergio Arroutbi +--- + templates/2.5/agent.j2 | 7 ------- + templates/2.5/mapping.json | 3 +-- + 2 files changed, 1 insertion(+), 9 deletions(-) + +diff --git a/templates/2.5/agent.j2 b/templates/2.5/agent.j2 +index d5eec733d..5e9a1a706 100644 +--- a/templates/2.5/agent.j2 ++++ b/templates/2.5/agent.j2 +@@ -274,10 +274,3 @@ ima_ml_path = "{{ agent.ima_ml_path }}" + # If set as a relative path, it will be considered from the root path "/". + # If set as an absolute path, it will use it without changes + measuredboot_ml_path = "{{ agent.measuredboot_ml_path }}" +- +-# Enable challenge-response authentication for push model attestation. +-# When enabled, the agent will authenticate with the verifier using TPM-based +-# proof of possession before sending attestation evidence. +-# This option is specific to the push attestation model. +-# The default is False (disabled). +-enable_authentication = {{ agent.enable_authentication }} +diff --git a/templates/2.5/mapping.json b/templates/2.5/mapping.json +index 522aa4ce9..4b198e768 100644 +--- a/templates/2.5/mapping.json ++++ b/templates/2.5/mapping.json +@@ -13,8 +13,7 @@ + "ima_ml_count_file": "/tmp/ima_ml_count", + "uefi_logs_evidence_version": "1.0", + "tls_accept_invalid_certs": "false", +- "tls_accept_invalid_hostnames": "false", +- "enable_authentication": "true" ++ "tls_accept_invalid_hostnames": "false" + } + }, + "verifier": { diff --git a/0016-docs-push-attestation-config-tables.patch b/0016-docs-push-attestation-config-tables.patch new file mode 100644 index 0000000..0cd863e --- /dev/null +++ b/0016-docs-push-attestation-config-tables.patch @@ -0,0 +1,1164 @@ +From 4a36422caa40bf914b1b9f7ed86efc802e183ef1 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Tue, 24 Mar 2026 18:04:55 +0100 +Subject: [PATCH 1/3] templates: Remove unused ima_ml_count_file option + +This option was defined in the 2.5 config template and mapping but +never used. Remove it to avoid confusion. + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + templates/2.5/agent.j2 | 3 --- + templates/2.5/mapping.json | 1 - + 2 files changed, 4 deletions(-) + +diff --git a/templates/2.5/agent.j2 b/templates/2.5/agent.j2 +index 5e9a1a706..f56010e87 100644 +--- a/templates/2.5/agent.j2 ++++ b/templates/2.5/agent.j2 +@@ -47,9 +47,6 @@ verifier_url = "{{ agent_verifier_url }}" + # Server identifier for certification keys + certification_keys_server_identifier = "{{ agent_certification_keys_server_identifier }}" + +-# File to store the IMA measurement list count +-ima_ml_count_file = "{{ agent_ima_ml_count_file }}" +- + # Evidence version for UEFI logs + uefi_logs_evidence_version = "{{ agent_uefi_logs_evidence_version }}" + +diff --git a/templates/2.5/mapping.json b/templates/2.5/mapping.json +index 4b198e768..04f89e77a 100644 +--- a/templates/2.5/mapping.json ++++ b/templates/2.5/mapping.json +@@ -10,7 +10,6 @@ + "exponential_backoff_initial_delay": "10000", + "exponential_backoff_max_delay": "360000", + "certification_keys_server_identifier": "ak", +- "ima_ml_count_file": "/tmp/ima_ml_count", + "uefi_logs_evidence_version": "1.0", + "tls_accept_invalid_certs": "false", + "tls_accept_invalid_hostnames": "false" + +From baf182680ffd60ab0b4ef8bf42bba3d02208b392 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Fri, 27 Mar 2026 10:55:35 +0100 +Subject: [PATCH 2/3] templates: Sync agent config options with + keylime-agent.conf + +Add missing agent options to the 2.5 upgrade templates and mapping +that are present in keylime-agent.conf and used in the agent code: + +Common options: +- keylime_dir: working directory path +- payload_key: payload encryption private key +- payload_key_password: password for payload key +- revocation_actions_dir: path to pre-installed revocation scripts +- allow_payload_revocation_actions: control payload revocation actions + +Push model options: +- verifier_tls_ca_cert: CA cert for verifier TLS verification +- registrar_tls_port: TLS port for registrar communication +- registrar_tls_enabled: enable TLS with registrar +- registrar_tls_ca_cert: CA cert for registrar TLS verification +- registrar_api_versions: API version negotiation with registrar + +Fix default values to match keylime-agent.conf: +- exponential_backoff_max_delay: 360000 -> 300000 +- uefi_logs_evidence_version: "1.0" -> "2.1" + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + templates/2.5/agent.j2 | 40 ++++++++++++++++++++++++++++++++++++++ + templates/2.5/mapping.json | 14 +++++++++++-- + 2 files changed, 52 insertions(+), 2 deletions(-) + +diff --git a/templates/2.5/agent.j2 b/templates/2.5/agent.j2 +index f56010e87..9f85f8411 100644 +--- a/templates/2.5/agent.j2 ++++ b/templates/2.5/agent.j2 +@@ -44,6 +44,11 @@ agent_data_path = "{{ agent_data_path }}" + # Verifier URL + verifier_url = "{{ agent_verifier_url }}" + ++# Verifier TLS CA certificate (Push Model specific). ++# Used to verify the verifier's server certificate. ++# If set as "default", the "cv_ca/cacert.crt" value relative to keylime_dir is used. ++verifier_tls_ca_cert = "{{ agent.verifier_tls_ca_cert }}" ++ + # Server identifier for certification keys + certification_keys_server_identifier = "{{ agent_certification_keys_server_identifier }}" + +@@ -54,11 +59,31 @@ uefi_logs_evidence_version = "{{ agent_uefi_logs_evidence_version }}" + registrar_ip = "{{ agent.registrar_ip }}" + registrar_port = {{ agent.registrar_port }} + ++# The TLS port of the registrar server (Push Model specific). ++# Used when registrar_tls_enabled is set to true. ++registrar_tls_port = {{ agent.registrar_tls_port }} ++ ++# Enable TLS communication between agent and registrar (Push Model specific). ++# When enabled, the agent uses TLS (server verification only) with the registrar. ++registrar_tls_enabled = {{ agent.registrar_tls_enabled }} ++ ++# TLS CA certificate for verifying the registrar's server certificate (Push Model specific). ++# Only used when registrar_tls_enabled is true. ++# If set as "default", the "cv_ca/cacert.crt" value relative to keylime_dir is used. ++registrar_tls_ca_cert = "{{ agent.registrar_tls_ca_cert }}" ++ ++# The API versions to use when communicating with the registrar (Push Model specific). ++# Supports "default" (all supported), "latest", or a comma-separated list. ++registrar_api_versions = "{{ agent.registrar_api_versions }}" ++ + # Enable mTLS communication between agent, verifier and tenant. + # Details on why setting it to "False" is generally considered insecure can be found + # on https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r + enable_agent_mtls = {{ agent.enable_agent_mtls }} + ++# The keylime working directory. ++keylime_dir = "{{ agent.keylime_dir }}" ++ + # Accept invalid TLS certificates (INSECURE - for testing only) + # When enabled, the agent will accept self-signed or invalid certificates + # This option is specific to the push attestation model. +@@ -100,6 +125,14 @@ server_key = "{{ agent.server_key }}" + # If left empty, the private key will not be encrypted. + server_key_password = "{{ agent.server_key_password }}" + ++# The name of the file containing the payload encryption private key. ++# If set as "default", the "payload-private.pem" value is used. ++payload_key = "{{ agent.payload_key }}" ++ ++# Set the password used to encrypt the payload private key file. ++# If left empty, the private key will not be encrypted. ++payload_key_password = "{{ agent.payload_key_password }}" ++ + # The name of the file containing the X509 certificate used as the Keylime agent + # server TLS certificate. + # This certificate must be self signed. +@@ -159,6 +192,9 @@ revocation_cert = "{{ agent.revocation_cert }}" + # action_list in the unzipped payload content. + revocation_actions = "{{ agent.revocation_actions }}" + ++# The path to the directory containing pre-installed revocation action scripts. ++revocation_actions_dir = "{{ agent.revocation_actions_dir }}" ++ + # A script to execute after unzipping the tenant payload. This is like + # cloud-init lite =) Keylime will run it with a /bin/sh environment and + # with a working directory of /var/lib/keylime/secure/unzipped. +@@ -171,6 +207,10 @@ payload_script = "{{ agent.payload_script }}" + # https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r + enable_insecure_payload = {{ agent.enable_insecure_payload }} + ++# Whether to allow running revocation actions sent as part of the payload. ++# Setting to false limits revocation actions to pre-installed ones. ++allow_payload_revocation_actions = {{ agent.allow_payload_revocation_actions }} ++ + # Maximum number of retries for exponential backoff + exponential_backoff_max_retries = {{ agent.exponential_backoff_max_retries }} + # Initial delay in milliseconds for exponential backoff +diff --git a/templates/2.5/mapping.json b/templates/2.5/mapping.json +index 04f89e77a..f3eaf8dbb 100644 +--- a/templates/2.5/mapping.json ++++ b/templates/2.5/mapping.json +@@ -4,13 +4,23 @@ + "components": { + "agent": { + "add": { ++ "keylime_dir": "/var/lib/keylime", ++ "payload_key": "default", ++ "payload_key_password": "", ++ "revocation_actions_dir": "/usr/libexec/keylime", ++ "allow_payload_revocation_actions": "true", + "agent_data_path": "/var/lib/keylime/agent_data.json", + "verifier_url": "https://localhost:8881", ++ "verifier_tls_ca_cert": "default", ++ "registrar_tls_port": "8891", ++ "registrar_tls_enabled": "false", ++ "registrar_tls_ca_cert": "default", ++ "registrar_api_versions": "default", + "exponential_backoff_max_retries": "5", + "exponential_backoff_initial_delay": "10000", +- "exponential_backoff_max_delay": "360000", ++ "exponential_backoff_max_delay": "300000", + "certification_keys_server_identifier": "ak", +- "uefi_logs_evidence_version": "1.0", ++ "uefi_logs_evidence_version": "2.1", + "tls_accept_invalid_certs": "false", + "tls_accept_invalid_hostnames": "false" + } + +From bd392633a36839dfa51f86a1568370a87b3ecd37 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Fri, 27 Mar 2026 10:57:04 +0100 +Subject: [PATCH 3/3] docs: Add tables with push-attestation configuration + options + +Add comprehensive configuration reference tables for all Keylime +components documenting option names, defaults, config versions, and +environment variable overrides. Tables are organized by component and +separated into common, pull-model, and push-model sections. + +Mark removed agent options with "(removed in 2.5)" and version range +2.0-2.4: +- measure_payload_pcr +- exponential_backoff +- retry_interval +- max_retries + +Add missing agent common options: +- keylime_dir +- payload_key +- payload_key_password +- revocation_actions_dir +- allow_payload_revocation_actions + +Add missing agent push-model options: +- attestation_interval_seconds +- verifier_tls_ca_cert +- registrar_tls_port +- registrar_tls_enabled +- registrar_tls_ca_cert +- registrar_api_versions + +Fix default values to match keylime-agent.conf: +- exponential_backoff_max_delay: 360000 -> 300000 +- uefi_logs_evidence_version: "1.0" -> "2.1" + +Use consistent formatting for default values (unquoted for INI +components, quoted strings and unquoted booleans/integers for TOML). + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + docs/user_guide/configuration.rst | 897 +++++++++++++++++++++++++++++- + 1 file changed, 893 insertions(+), 4 deletions(-) + +diff --git a/docs/user_guide/configuration.rst b/docs/user_guide/configuration.rst +index 2e50757df..aae534423 100644 +--- a/docs/user_guide/configuration.rst ++++ b/docs/user_guide/configuration.rst +@@ -117,7 +117,7 @@ The environment variables are defined as + The section can be omitted if the option to set is located in the main section + (the section named after the component). Otherwise the section is required. + +-For example, to set the ``webhook_url` option from the `[revocations]`` section in ++For example, to set the ``webhook_url`` option from the ``[revocations]`` section in + the ``verifier.conf`` file, the environment variable to set is + ``KEYLIME_VERIFIER_REVOCATIONS_WEBHOOK_URL``. + +@@ -126,7 +126,7 @@ option from the ``[verifier]`` section in the ``verifier.conf``, the environment + variable to set is ``KEYLIME_VERIFIER_SERVER_KEY`` (note that the section can be + omitted). + +-Configuraton upgrades ++Configuration upgrades + --------------------- + + When updating keylime, it is also recommended to upgrade the configuration to +@@ -183,9 +183,9 @@ configuration files are kept intact as backup and renamed with the ``.bkp`` exte + appended to the file names. + + In case the ``--output`` option is provided to the ``keylime_upgrade_config`` +-script, the configuration files are written even when they were alredy ++script, the configuration files are written even when they were already + up-to-date using the available templates. It can be seen as a way to force the +-creation of the configuration fiels, fitting the options read into the new ++creation of the configuration files, fitting the options read into the new + templates. + + Passing the ``--debug`` option to the ``keylime_upgrade_config``, the logging level +@@ -211,3 +211,892 @@ To ignore the input files and use the default value for all options, the + + Finally, to process a single mapping file, the mapping file path can be passed + via the ``--mapping`` option ++ ++Attestation Models: Pull vs Push ++--------------------------------- ++ ++Keylime supports two attestation models that determine how the verifier obtains ++attestation evidence from agents: ++ ++Pull Model (Traditional) ++~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++In the pull model, the verifier actively polls agents at regular intervals to ++retrieve attestation evidence. This is the default and traditional mode of ++operation. ++ ++**Use Cases:** ++ ++* Traditional deployments where the verifier can directly connect to agents ++* Environments with stable network connectivity ++* When you need fine-grained control over attestation frequency ++ ++Push Model (Agent-Driven) ++~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++In the push model, agents periodically push their attestation evidence to the ++verifier. This mode is useful when the verifier cannot directly connect to ++agents (e.g., agents behind firewalls or NAT). ++ ++**Use Cases:** ++ ++* Agents deployed behind firewalls or NAT ++* Cloud or edge deployments where direct connectivity is limited ++* When agents need to control their own attestation schedule ++ ++.. note:: ++ The push model options were introduced in configuration version 2.5 and ++ require the push attestation agent. ++ ++Configuration Options Reference ++-------------------------------- ++ ++This section provides comprehensive tables of all configuration options for each ++Keylime component, including default values, environment variable overrides, and ++applicability to pull/push attestation models. ++ ++Verifier Configuration (``/etc/keylime/verifier.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++Common Options (Both Models) ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 25 12 15 48 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``2.5`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_VERSION`` ++ * - ``uuid`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_UUID`` ++ * - ``ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_IP`` ++ * - ``port`` ++ - ``8881`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_PORT`` ++ * - ``registrar_ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REGISTRAR_IP`` ++ * - ``registrar_port`` ++ - ``8891`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REGISTRAR_PORT`` ++ * - ``enable_agent_mtls`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_ENABLE_AGENT_MTLS`` ++ * - ``tls_dir`` ++ - ``generate`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TLS_DIR`` ++ * - ``server_key`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SERVER_KEY`` ++ * - ``server_key_password`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SERVER_KEY_PASSWORD`` ++ * - ``server_cert`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SERVER_CERT`` ++ * - ``trusted_client_ca`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TRUSTED_CLIENT_CA`` ++ * - ``client_key`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_CLIENT_KEY`` ++ * - ``client_key_password`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_CLIENT_KEY_PASSWORD`` ++ * - ``client_cert`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_CLIENT_CERT`` ++ * - ``trusted_server_ca`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TRUSTED_SERVER_CA`` ++ * - ``database_url`` ++ - ``sqlite`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_DATABASE_URL`` ++ * - ``database_pool_sz_ovfl`` ++ - ``5,10`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_DATABASE_POOL_SZ_OVFL`` ++ * - ``auto_migrate_db`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_AUTO_MIGRATE_DB`` ++ * - ``num_workers`` ++ - ``0`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_NUM_WORKERS`` ++ * - ``max_upload_size`` ++ - ``104857600`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MAX_UPLOAD_SIZE`` ++ * - ``measured_boot_policy_name`` ++ - ``accept-all`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MEASURED_BOOT_POLICY_NAME`` ++ * - ``measured_boot_imports`` ++ - ``[]`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MEASURED_BOOT_IMPORTS`` ++ * - ``measured_boot_evaluate`` ++ - ``once`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MEASURED_BOOT_EVALUATE`` ++ * - ``severity_labels`` ++ - ``["info", "notice", ...]`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SEVERITY_LABELS`` ++ * - ``severity_policy`` ++ - ``[{"event_id": ".*", ...}]`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SEVERITY_POLICY`` ++ * - ``ignore_tomtou_errors`` ++ - ``False`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_IGNORE_TOMTOU_ERRORS`` ++ * - ``durable_attestation_import`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_DURABLE_ATTESTATION_IMPORT`` ++ * - ``persistent_store_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_PERSISTENT_STORE_URL`` ++ * - ``transparency_log_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TRANSPARENCY_LOG_URL`` ++ * - ``time_stamp_authority_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TIME_STAMP_AUTHORITY_URL`` ++ * - ``time_stamp_authority_certs_path`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TIME_STAMP_AUTHORITY_CERTS_PATH`` ++ * - ``persistent_store_format`` ++ - ``json`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_PERSISTENT_STORE_FORMAT`` ++ * - ``persistent_store_encoding`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_PERSISTENT_STORE_ENCODING`` ++ * - ``transparency_log_sign_algo`` ++ - ``sha256`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TRANSPARENCY_LOG_SIGN_ALGO`` ++ * - ``signed_attributes`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SIGNED_ATTRIBUTES`` ++ * - ``require_allow_list_signatures`` ++ - ``False`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REQUIRE_ALLOW_LIST_SIGNATURES`` ++ * - ``authorization_provider`` ++ - ``simple`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_AUTHORIZATION_PROVIDER`` ++ * - ``cert_subject_alternative_names`` ++ - (empty) ++ - 2.5 ++ - ``KEYLIME_VERIFIER_CERT_SUBJECT_ALTERNATIVE_NAMES`` ++ ++Pull Model Specific Options ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 25 12 15 48 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``quote_interval`` ++ - ``2`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_QUOTE_INTERVAL`` ++ * - ``retry_interval`` ++ - ``2`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_RETRY_INTERVAL`` ++ * - ``max_retries`` ++ - ``5`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MAX_RETRIES`` ++ * - ``exponential_backoff`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_EXPONENTIAL_BACKOFF`` ++ * - ``request_timeout`` ++ - ``60.0`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REQUEST_TIMEOUT`` ++ ++Push Model Specific Options ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 25 12 15 48 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``mode`` ++ - ``pull`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_MODE`` ++ * - ``challenge_lifetime`` ++ - ``1800`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_CHALLENGE_LIFETIME`` ++ * - ``verification_timeout`` ++ - ``0`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_VERIFICATION_TIMEOUT`` ++ * - ``session_create_rate_limit_per_ip`` ++ - ``50`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_CREATE_RATE_LIMIT_PER_IP`` ++ * - ``session_create_rate_limit_window_ip`` ++ - ``60`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_CREATE_RATE_LIMIT_WINDOW_IP`` ++ * - ``session_create_rate_limit_per_agent`` ++ - ``15`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_CREATE_RATE_LIMIT_PER_AGENT`` ++ * - ``session_create_rate_limit_window_agent`` ++ - ``60`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_CREATE_RATE_LIMIT_WINDOW_AGENT`` ++ * - ``session_lifetime`` ++ - ``180`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_LIFETIME`` ++ * - ``extend_token_on_attestation`` ++ - ``True`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_EXTEND_TOKEN_ON_ATTESTATION`` ++ ++Revocations Section ++^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 12 15 43 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``enabled_revocation_notifications`` ++ - ``['agent']`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REVOCATIONS_ENABLED_REVOCATION_NOTIFICATIONS`` ++ * - ``zmq_ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REVOCATIONS_ZMQ_IP`` ++ * - ``zmq_port`` ++ - ``8992`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REVOCATIONS_ZMQ_PORT`` ++ * - ``webhook_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REVOCATIONS_WEBHOOK_URL`` ++ ++Registrar Configuration (``/etc/keylime/registrar.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 12 15 43 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``2.5`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_VERSION`` ++ * - ``ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_IP`` ++ * - ``port`` ++ - ``8890`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_PORT`` ++ * - ``tls_port`` ++ - ``8891`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TLS_PORT`` ++ * - ``tls_dir`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TLS_DIR`` ++ * - ``server_key`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_SERVER_KEY`` ++ * - ``server_key_password`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_SERVER_KEY_PASSWORD`` ++ * - ``server_cert`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_SERVER_CERT`` ++ * - ``trusted_client_ca`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TRUSTED_CLIENT_CA`` ++ * - ``database_url`` ++ - ``sqlite`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_DATABASE_URL`` ++ * - ``database_pool_sz_ovfl`` ++ - ``5,10`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_DATABASE_POOL_SZ_OVFL`` ++ * - ``auto_migrate_db`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_AUTO_MIGRATE_DB`` ++ * - ``durable_attestation_import`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_DURABLE_ATTESTATION_IMPORT`` ++ * - ``persistent_store_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_PERSISTENT_STORE_URL`` ++ * - ``transparency_log_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TRANSPARENCY_LOG_URL`` ++ * - ``time_stamp_authority_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TIME_STAMP_AUTHORITY_URL`` ++ * - ``time_stamp_authority_certs_path`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TIME_STAMP_AUTHORITY_CERTS_PATH`` ++ * - ``persistent_store_format`` ++ - ``json`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_PERSISTENT_STORE_FORMAT`` ++ * - ``persistent_store_encoding`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_PERSISTENT_STORE_ENCODING`` ++ * - ``transparency_log_sign_algo`` ++ - ``sha256`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TRANSPARENCY_LOG_SIGN_ALGO`` ++ * - ``signed_attributes`` ++ - ``ek_tpm,aik_tpm,ekcert`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_SIGNED_ATTRIBUTES`` ++ * - ``tpm_identity`` ++ - ``default`` ++ - 2.1 ++ - ``KEYLIME_REGISTRAR_TPM_IDENTITY`` ++ * - ``malformed_cert_action`` ++ - ``warn`` ++ - 2.4 ++ - ``KEYLIME_REGISTRAR_MALFORMED_CERT_ACTION`` ++ * - ``authorization_provider`` ++ - ``simple`` ++ - 2.5 ++ - ``KEYLIME_REGISTRAR_AUTHORIZATION_PROVIDER`` ++ * - ``cert_subject_alternative_names`` ++ - (empty) ++ - 2.5 ++ - ``KEYLIME_REGISTRAR_CERT_SUBJECT_ALTERNATIVE_NAMES`` ++ ++Tenant Configuration (``/etc/keylime/tenant.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 12 15 43 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``2.5`` ++ - 2.0 ++ - ``KEYLIME_TENANT_VERSION`` ++ * - ``verifier_ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_TENANT_VERIFIER_IP`` ++ * - ``verifier_port`` ++ - ``8881`` ++ - 2.0 ++ - ``KEYLIME_TENANT_VERIFIER_PORT`` ++ * - ``registrar_ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_TENANT_REGISTRAR_IP`` ++ * - ``registrar_port`` ++ - ``8891`` ++ - 2.0 ++ - ``KEYLIME_TENANT_REGISTRAR_PORT`` ++ * - ``tls_dir`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_TENANT_TLS_DIR`` ++ * - ``enable_agent_mtls`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_TENANT_ENABLE_AGENT_MTLS`` ++ * - ``client_key`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_TENANT_CLIENT_KEY`` ++ * - ``client_key_password`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_TENANT_CLIENT_KEY_PASSWORD`` ++ * - ``client_cert`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_TENANT_CLIENT_CERT`` ++ * - ``trusted_server_ca`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_TENANT_TRUSTED_SERVER_CA`` ++ * - ``tpm_cert_store`` ++ - ``/var/lib/keylime/tpm_cert_store`` ++ - 2.0 ++ - ``KEYLIME_TENANT_TPM_CERT_STORE`` ++ * - ``max_payload_size`` ++ - ``1048576`` ++ - 2.0 ++ - ``KEYLIME_TENANT_MAX_PAYLOAD_SIZE`` ++ * - ``accept_tpm_hash_algs`` ++ - ``['sha512', 'sha384', 'sha256']`` ++ - 2.0 ++ - ``KEYLIME_TENANT_ACCEPT_TPM_HASH_ALGS`` ++ * - ``accept_tpm_encryption_algs`` ++ - ``['ecc', 'rsa']`` ++ - 2.0 ++ - ``KEYLIME_TENANT_ACCEPT_TPM_ENCRYPTION_ALGS`` ++ * - ``accept_tpm_signing_algs`` ++ - ``['ecschnorr', 'rsassa']`` ++ - 2.0 ++ - ``KEYLIME_TENANT_ACCEPT_TPM_SIGNING_ALGS`` ++ * - ``exponential_backoff`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_TENANT_EXPONENTIAL_BACKOFF`` ++ * - ``retry_interval`` ++ - ``2`` ++ - 2.0 ++ - ``KEYLIME_TENANT_RETRY_INTERVAL`` ++ * - ``max_retries`` ++ - ``5`` ++ - 2.0 ++ - ``KEYLIME_TENANT_MAX_RETRIES`` ++ * - ``request_timeout`` ++ - ``60`` ++ - 2.0 ++ - ``KEYLIME_TENANT_REQUEST_TIMEOUT`` ++ * - ``require_ek_cert`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_TENANT_REQUIRE_EK_CERT`` ++ * - ``ek_check_script`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_TENANT_EK_CHECK_SCRIPT`` ++ * - ``mb_refstate`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_TENANT_MB_REFSTATE`` ++ ++CA Configuration (``/etc/keylime/ca.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 15 15 40 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``2.5`` ++ - 2.0 ++ - ``KEYLIME_CA_VERSION`` ++ * - ``password`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_CA_PASSWORD`` ++ * - ``cert_country`` ++ - ``US`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_COUNTRY`` ++ * - ``cert_ca_name`` ++ - ``Keylime Certificate Authority`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_CA_NAME`` ++ * - ``cert_state`` ++ - ``MA`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_STATE`` ++ * - ``cert_locality`` ++ - ``Lexington`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_LOCALITY`` ++ * - ``cert_organization`` ++ - ``MITLL`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_ORGANIZATION`` ++ * - ``cert_org_unit`` ++ - ``53`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_ORG_UNIT`` ++ * - ``cert_ca_lifetime`` ++ - ``3650`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_CA_LIFETIME`` ++ * - ``cert_lifetime`` ++ - ``365`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_LIFETIME`` ++ * - ``cert_bits`` ++ - ``2048`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_BITS`` ++ * - ``cert_crl_dist`` ++ - ``http://localhost:38080/crl`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_CRL_DIST`` ++ ++Agent Configuration (``/etc/keylime/agent.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++.. warning:: ++ The Python agent is deprecated and will be removed in version 7.0.0! ++ Please migrate to the Rust-based agent from https://github.com/keylime/rust-keylime/ ++ ++Common Options (Both Models) ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 28 12 12 48 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``"2.5"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_VERSION`` ++ * - ``api_versions`` ++ - ``"default"`` ++ - 2.4 ++ - ``KEYLIME_AGENT_API_VERSIONS`` ++ * - ``uuid`` ++ - ``"d432fbb3-d2f1-4a97-9ef7-75bd81c00000"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_UUID`` ++ * - ``ip`` ++ - ``"127.0.0.1"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_IP`` ++ * - ``port`` ++ - ``9002`` ++ - 2.0 ++ - ``KEYLIME_AGENT_PORT`` ++ * - ``contact_ip`` ++ - ``"127.0.0.1"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_CONTACT_IP`` ++ * - ``contact_port`` ++ - ``9002`` ++ - 2.0 ++ - ``KEYLIME_AGENT_CONTACT_PORT`` ++ * - ``registrar_ip`` ++ - ``"127.0.0.1"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REGISTRAR_IP`` ++ * - ``registrar_port`` ++ - ``8890`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REGISTRAR_PORT`` ++ * - ``enable_agent_mtls`` ++ - ``true`` ++ - 2.0 ++ - ``KEYLIME_AGENT_ENABLE_AGENT_MTLS`` ++ * - ``tls_dir`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TLS_DIR`` ++ * - ``server_key`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_SERVER_KEY`` ++ * - ``server_key_password`` ++ - ``""`` ++ - 2.0 ++ - ``KEYLIME_AGENT_SERVER_KEY_PASSWORD`` ++ * - ``server_cert`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_SERVER_CERT`` ++ * - ``trusted_client_ca`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TRUSTED_CLIENT_CA`` ++ * - ``enc_keyname`` ++ - ``"derived_tci_key"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_ENC_KEYNAME`` ++ * - ``dec_payload_file`` ++ - ``"decrypted_payload"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_DEC_PAYLOAD_FILE`` ++ * - ``secure_size`` ++ - ``"1m"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_SECURE_SIZE`` ++ * - ``tpm_ownerpassword`` ++ - ``""`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TPM_OWNERPASSWORD`` ++ * - ``extract_payload_zip`` ++ - ``true`` ++ - 2.0 ++ - ``KEYLIME_AGENT_EXTRACT_PAYLOAD_ZIP`` ++ * - ``enable_revocation_notifications`` ++ - ``true`` ++ - 2.0 ++ - ``KEYLIME_AGENT_ENABLE_REVOCATION_NOTIFICATIONS`` ++ * - ``revocation_notification_ip`` ++ - ``"127.0.0.1"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REVOCATION_NOTIFICATION_IP`` ++ * - ``revocation_notification_port`` ++ - ``8992`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REVOCATION_NOTIFICATION_PORT`` ++ * - ``revocation_cert`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REVOCATION_CERT`` ++ * - ``revocation_actions`` ++ - ``"[]"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REVOCATION_ACTIONS`` ++ * - ``payload_script`` ++ - ``"autorun.sh"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_PAYLOAD_SCRIPT`` ++ * - ``enable_insecure_payload`` ++ - ``false`` ++ - 2.0 ++ - ``KEYLIME_AGENT_ENABLE_INSECURE_PAYLOAD`` ++ * - ``measure_payload_pcr`` ++ - ``-1`` ++ - 2.0 ++ - ``KEYLIME_AGENT_MEASURE_PAYLOAD_PCR`` ++ * - ``exponential_backoff`` ++ - ``true`` ++ - 2.0 ++ - ``KEYLIME_AGENT_EXPONENTIAL_BACKOFF`` ++ * - ``retry_interval`` ++ - ``2`` ++ - 2.0 ++ - ``KEYLIME_AGENT_RETRY_INTERVAL`` ++ * - ``max_retries`` ++ - ``4`` ++ - 2.0 ++ - ``KEYLIME_AGENT_MAX_RETRIES`` ++ * - ``tpm_hash_alg`` ++ - ``"sha256"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TPM_HASH_ALG`` ++ * - ``tpm_encryption_alg`` ++ - ``"rsa"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TPM_ENCRYPTION_ALG`` ++ * - ``tpm_signing_alg`` ++ - ``"rsassa"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TPM_SIGNING_ALG`` ++ * - ``ek_handle`` ++ - ``"generate"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_EK_HANDLE`` ++ * - ``enable_iak_idevid`` ++ - ``false`` ++ - 2.1 ++ - ``KEYLIME_AGENT_ENABLE_IAK_IDEVID`` ++ * - ``iak_idevid_template`` ++ - ``"detect"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IAK_IDEVID_TEMPLATE`` ++ * - ``iak_idevid_asymmetric_alg`` ++ - ``"rsa"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IAK_IDEVID_ASYMMETRIC_ALG`` ++ * - ``iak_idevid_name_alg`` ++ - ``"sha256"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IAK_IDEVID_NAME_ALG`` ++ * - ``idevid_password`` ++ - ``""`` ++ - 2.3 ++ - ``KEYLIME_AGENT_IDEVID_PASSWORD`` ++ * - ``idevid_handle`` ++ - ``""`` ++ - 2.3 ++ - ``KEYLIME_AGENT_IDEVID_HANDLE`` ++ * - ``iak_password`` ++ - ``""`` ++ - 2.3 ++ - ``KEYLIME_AGENT_IAK_PASSWORD`` ++ * - ``iak_handle`` ++ - ``""`` ++ - 2.3 ++ - ``KEYLIME_AGENT_IAK_HANDLE`` ++ * - ``iak_cert`` ++ - ``"default"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IAK_CERT`` ++ * - ``idevid_cert`` ++ - ``"default"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IDEVID_CERT`` ++ * - ``run_as`` ++ - ``"keylime:tss"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_RUN_AS`` ++ * - ``ima_ml_path`` ++ - ``"default"`` ++ - 2.2 ++ - ``KEYLIME_AGENT_IMA_ML_PATH`` ++ * - ``measuredboot_ml_path`` ++ - ``"default"`` ++ - 2.2 ++ - ``KEYLIME_AGENT_MEASUREDBOOT_ML_PATH`` ++ ++Push Model Specific Options ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 35 12 12 41 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``agent_data_path`` ++ - ``"/var/lib/keylime/agent_data.json"`` ++ - 2.5 ++ - ``KEYLIME_AGENT_AGENT_DATA_PATH`` ++ * - ``verifier_url`` ++ - ``"https://localhost:8881"`` ++ - 2.5 ++ - ``KEYLIME_AGENT_VERIFIER_URL`` ++ * - ``certification_keys_server_identifier`` ++ - ``"ak"`` ++ - 2.5 ++ - ``KEYLIME_AGENT_CERTIFICATION_KEYS_SERVER_IDENTIFIER`` ++ * - ``uefi_logs_evidence_version`` ++ - ``"1.0"`` ++ - 2.5 ++ - ``KEYLIME_AGENT_UEFI_LOGS_EVIDENCE_VERSION`` ++ * - ``tls_accept_invalid_certs`` ++ - ``false`` ++ - 2.5 ++ - ``KEYLIME_AGENT_TLS_ACCEPT_INVALID_CERTS`` ++ * - ``tls_accept_invalid_hostnames`` ++ - ``false`` ++ - 2.5 ++ - ``KEYLIME_AGENT_TLS_ACCEPT_INVALID_HOSTNAMES`` ++ * - ``exponential_backoff_max_retries`` ++ - ``5`` ++ - 2.5 ++ - ``KEYLIME_AGENT_EXPONENTIAL_BACKOFF_MAX_RETRIES`` ++ * - ``exponential_backoff_initial_delay`` ++ - ``10000`` ++ - 2.5 ++ - ``KEYLIME_AGENT_EXPONENTIAL_BACKOFF_INITIAL_DELAY`` ++ * - ``exponential_backoff_max_delay`` ++ - ``360000`` ++ - 2.5 ++ - ``KEYLIME_AGENT_EXPONENTIAL_BACKOFF_MAX_DELAY`` ++ ++Logging Configuration (``/etc/keylime/logging.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++The logging configuration follows Python's standard logging configuration format. ++See the Python logging documentation for details on configuring handlers, formatters, ++and loggers. The version option can be overridden with ``KEYLIME_LOGGING_VERSION``. ++ ++Configuration Version History ++------------------------------ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 15 70 ++ ++ * - Version ++ - Changes ++ * - 2.0 ++ - Base configuration structure, pull model support ++ * - 2.1 ++ - Added IAK/IDevID support, ``tpm_identity`` for registrar ++ * - 2.2 ++ - Added ``ima_ml_path`` and ``measuredboot_ml_path`` configuration ++ * - 2.3 ++ - Added persisted key handles for IAK/IDevID (``iak_handle``, ``idevid_handle``) ++ * - 2.4 ++ - Added ``api_versions`` for agent, ``malformed_cert_action`` for registrar ++ * - 2.5 ++ - **Push model support**: Added ``mode``, ``challenge_lifetime``, ``verification_timeout``, session rate limiting and lifetime options for verifier; ``verifier_url``, ``agent_data_path``, TLS validation, exponential backoff options for agent. Added ``authorization_provider`` and ``cert_subject_alternative_names`` for verifier and registrar ++ ++For detailed information on all configuration options for each component, refer ++to the configuration files in ``/etc/keylime/`` and their inline documentation. diff --git a/0017-verifier-graceful-shutdown.patch b/0017-verifier-graceful-shutdown.patch new file mode 100644 index 0000000..a90637c --- /dev/null +++ b/0017-verifier-graceful-shutdown.patch @@ -0,0 +1,2373 @@ +From cb944ee9c178f7a717e904ddbf85aac5b27a2eac Mon Sep 17 00:00:00 2001 +From: rpm-build +Date: Fri, 17 Apr 2026 14:52:35 +0200 +Subject: [PATCH] verifier: Implement graceful shutdown + +Implement graceful shutdown, cancel pending retries, and drain in-flight +work. + +Backported from https://github.com/keylime/keylime/pull/1869 + +Signed-off-by: Anderson Toshiyuki Sasaki +--- + docs/man/keylime_verifier.8.rst | 1 + + docs/user_guide/configuration.rst | 6 + + keylime/cloud_verifier_tornado.py | 180 ++++++++++++--- + keylime/push_agent_monitor.py | 23 ++ + keylime/revocation_notifier.py | 10 +- + keylime/shared_data.py | 65 +++++- + keylime/shutdown.py | 21 ++ + keylime/web/base/server.py | 111 +++++++++- + keylime/web/verifier_server.py | 122 +++++------ + templates/2.6/agent.j2 | 313 ++++++++++++++++++++++++++ + templates/2.6/ca.j2 | 39 ++++ + templates/2.6/logging.j2 | 33 +++ + templates/2.6/mapping.json | 11 + + templates/2.6/registrar.j2 | 168 ++++++++++++++ + templates/2.6/tenant.j2 | 130 +++++++++++ + templates/2.6/verifier.j2 | 350 ++++++++++++++++++++++++++++++ + test/test_shutdown.py | 210 ++++++++++++++++++ + test/test_verifier_server.py | 82 +++---- + 18 files changed, 1722 insertions(+), 153 deletions(-) + create mode 100644 keylime/shutdown.py + create mode 100644 templates/2.6/agent.j2 + create mode 100644 templates/2.6/ca.j2 + create mode 100644 templates/2.6/logging.j2 + create mode 100644 templates/2.6/mapping.json + create mode 100644 templates/2.6/registrar.j2 + create mode 100644 templates/2.6/tenant.j2 + create mode 100644 templates/2.6/verifier.j2 + create mode 100644 test/test_shutdown.py + +diff --git a/docs/man/keylime_verifier.8.rst b/docs/man/keylime_verifier.8.rst +index 5303a5f..d22d211 100644 +--- a/docs/man/keylime_verifier.8.rst ++++ b/docs/man/keylime_verifier.8.rst +@@ -53,6 +53,7 @@ Essentials: + - **quote_interval**: Time between integrity checks (seconds) + - **max_upload_size**: Upload size limit (bytes) + - **request_timeout**: Agent request timeout (seconds) ++- **shutdown_drain_timeout**: Max time (seconds) to wait for in-flight operations during shutdown + - **measured_boot_policy_name**, **measured_boot_imports**, **measured_boot_evaluate**: measured boot policy settings + - **severity_labels**, **severity_policy**: revocation severity config + - **ignore_tomtou_errors**: handle ToMToU IMA entries (bool) +diff --git a/docs/user_guide/configuration.rst b/docs/user_guide/configuration.rst +index aae5344..327c370 100644 +--- a/docs/user_guide/configuration.rst ++++ b/docs/user_guide/configuration.rst +@@ -425,6 +425,10 @@ Common Options (Both Models) + - (empty) + - 2.5 + - ``KEYLIME_VERIFIER_CERT_SUBJECT_ALTERNATIVE_NAMES`` ++ * - ``shutdown_drain_timeout`` ++ - ``10`` ++ - 2.6 ++ - ``KEYLIME_VERIFIER_SHUTDOWN_DRAIN_TIMEOUT`` + + Pull Model Specific Options + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +@@ -1097,6 +1101,8 @@ Configuration Version History + - Added ``api_versions`` for agent, ``malformed_cert_action`` for registrar + * - 2.5 + - **Push model support**: Added ``mode``, ``challenge_lifetime``, ``verification_timeout``, session rate limiting and lifetime options for verifier; ``verifier_url``, ``agent_data_path``, TLS validation, exponential backoff options for agent. Added ``authorization_provider`` and ``cert_subject_alternative_names`` for verifier and registrar ++ * - 2.6 ++ - Added ``shutdown_drain_timeout`` for verifier graceful shutdown + + For detailed information on all configuration options for each component, refer + to the configuration files in ``/etc/keylime/`` and their inline documentation. +diff --git a/keylime/cloud_verifier_tornado.py b/keylime/cloud_verifier_tornado.py +index 75f117b..eb57de5 100644 +--- a/keylime/cloud_verifier_tornado.py ++++ b/keylime/cloud_verifier_tornado.py +@@ -29,6 +29,7 @@ from keylime import ( + keylime_logging, + push_agent_monitor, + revocation_notifier, ++ shutdown, + signing, + tornado_requests, + web_util, +@@ -175,6 +176,29 @@ exclude_db: Dict[str, Any] = { + # events (quote polls, retries). Used to cancel them all on shutdown. + _pending_events: Dict[str, object] = {} + ++# Counter of currently executing process_agent() coroutines. The shutdown ++# handler waits for this to reach zero before stopping the IOLoop so that ++# in-flight DB writes can finish. ++_active_operations = 0 ++# Event signalled when _active_operations drops to zero during shutdown. ++_operations_drained = asyncio.Event() ++_operations_drained.set() # initially no operations are active ++ ++ ++def _enter_operation() -> None: ++ """Increment the active operations counter.""" ++ global _active_operations ++ _active_operations += 1 ++ _operations_drained.clear() ++ ++ ++def _exit_operation() -> None: ++ """Decrement the active operations counter; signal if drained.""" ++ global _active_operations ++ _active_operations -= 1 ++ if _active_operations <= 0: ++ _operations_drained.set() ++ + + def _register_pending_event(agent: Dict[str, Any], handle: object) -> None: + """Track a pending IOLoop timeout in both the agent dict and the global registry. +@@ -201,6 +225,38 @@ def _cancel_pending_event(agent: Dict[str, Any]) -> None: + logger.debug("Could not remove pending event for agent %s: %s", agent["agent_id"], e) + + ++def get_active_operations() -> int: ++ """Return the number of currently executing process_agent() coroutines.""" ++ return _active_operations ++ ++ ++async def wait_for_drain(timeout: float) -> bool: ++ """Wait up to *timeout* seconds for all active operations to finish. ++ ++ Returns True if all operations drained, False if the timeout expired. ++ """ ++ try: ++ await asyncio.wait_for(_operations_drained.wait(), timeout=timeout) ++ return True ++ except asyncio.TimeoutError: ++ return False ++ ++ ++def cancel_all_pending_events() -> None: ++ """Cancel every tracked pending IOLoop timeout. Called on shutdown.""" ++ if not _pending_events: ++ return ++ io_loop = tornado.ioloop.IOLoop.current() ++ for agent_id, handle in _pending_events.items(): ++ try: ++ io_loop.remove_timeout(handle) ++ except Exception as e: ++ logger.debug("Could not remove pending event for agent %s: %s", agent_id, e) ++ count = len(_pending_events) ++ _pending_events.clear() ++ logger.info("Cancelled %d pending attestation event(s) for shutdown", count) ++ ++ + def _from_db_obj(agent_db_obj: VerfierMain) -> Dict[str, Any]: + fields = [ + "agent_id", +@@ -2159,6 +2215,17 @@ async def invoke_get_quote( + need_pubkey: bool, + timeout: float = DEFAULT_TIMEOUT, + ) -> None: ++ # Clear tracking only — the timeout already fired (this *is* the callback), ++ # so there is no handle to cancel via remove_timeout(). Done before the ++ # shutdown check so tracking state is cleaned up even on early return. ++ if agent.get("pending_event") is not None: ++ agent["pending_event"] = None ++ _pending_events.pop(agent["agent_id"], None) ++ ++ if shutdown.is_shutting_down(): ++ logger.debug("Skipping get_quote for agent %s — shutting down", agent["agent_id"]) ++ return ++ + failure = Failure(Component.INTERNAL, ["verifier"]) + + params = cloud_verifier_common.prepare_get_quote(agent) +@@ -2262,10 +2329,17 @@ async def invoke_get_quote( + + + async def invoke_provide_v(agent: Dict[str, Any], timeout: float = DEFAULT_TIMEOUT) -> None: +- failure = Failure(Component.INTERNAL, ["verifier"]) +- ++ # Clear tracking only — the timeout already fired (this *is* the callback), ++ # so there is no handle to cancel via remove_timeout(). Done before the ++ # shutdown check so tracking state is cleaned up even on early return. + if agent.get("pending_event") is not None: + agent["pending_event"] = None ++ _pending_events.pop(agent["agent_id"], None) ++ ++ if shutdown.is_shutting_down(): ++ logger.debug("Skipping provide_v for agent %s — shutting down", agent["agent_id"]) ++ return ++ failure = Failure(Component.INTERNAL, ["verifier"]) + + v_json_message = cloud_verifier_common.prepare_v(agent) + +@@ -2422,6 +2496,14 @@ async def notify_error( + async def process_agent( + agent: Dict[str, Any], new_operational_state: int, failure: Failure = Failure(Component.INTERNAL, ["verifier"]) + ) -> None: ++ # During shutdown, allow terminal-state transitions (FAILED, INVALID_QUOTE) ++ # through so that final DB writes and revocation notifications complete. ++ # Only skip non-terminal transitions that would schedule new polls/retries. ++ if shutdown.is_shutting_down() and new_operational_state not in (states.FAILED, states.INVALID_QUOTE): ++ logger.debug("Skipping process_agent for agent %s — shutting down", agent["agent_id"]) ++ return ++ ++ _enter_operation() + try: # pylint: disable=R1702 + main_agent_operational_state = agent["operational_state"] + stored_agent = None +@@ -2452,15 +2534,13 @@ async def process_agent( + # if the stored agent could not be recovered from the database, stop polling + if not stored_agent: + logger.warning("Unable to retrieve agent %s from database. Stopping polling", agent["agent_id"]) +- if agent["pending_event"] is not None: +- tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) ++ _cancel_pending_event(agent) + return + + # if the user did terminated this agent + if stored_agent.operational_state == states.TERMINATED: # pyright: ignore + logger.warning("Agent %s terminated by user.", agent["agent_id"]) +- if agent["pending_event"] is not None: +- tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) ++ _cancel_pending_event(agent) + + # Second database operation - delete agent + with session_context() as session: +@@ -2470,8 +2550,7 @@ async def process_agent( + # if the user tells us to stop polling because the tenant quote check failed + if stored_agent.operational_state == states.TENANT_FAILED: # pyright: ignore + logger.warning("Agent %s has failed tenant quote. Stopping polling", agent["agent_id"]) +- if agent["pending_event"] is not None: +- tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) ++ _cancel_pending_event(agent) + return + + # Use the request timeout stored in the agent dict (read from the +@@ -2498,8 +2577,7 @@ async def process_agent( + + # When the failure is irrecoverable we stop polling the agent + if not failure.recoverable or failure.highest_severity == MAX_SEVERITY_LABEL: +- if agent["pending_event"] is not None: +- tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) ++ _cancel_pending_event(agent) + + # Third database operation - update agent with failure state + with session_context() as session: +@@ -2575,6 +2653,10 @@ async def process_agent( + "Setting up callback to check agent ID %s again in %f seconds", agent["agent_id"], interval + ) + ++ if shutdown.is_shutting_down(): ++ logger.debug("Not scheduling next poll for agent %s — shutting down", agent["agent_id"]) ++ return ++ + pending = tornado.ioloop.IOLoop.current().call_later( + # type: ignore # due to python <3.9 + interval, +@@ -2585,7 +2667,7 @@ async def process_agent( + False, + timeout=timeout, + ) +- agent["pending_event"] = pending ++ _register_pending_event(agent, pending) + return + + maxr = config.getint("verifier", "max_retries") +@@ -2617,7 +2699,11 @@ async def process_agent( + maxr, + next_retry, + ) +- tornado.ioloop.IOLoop.current().call_later( ++ if shutdown.is_shutting_down(): ++ logger.debug("Not scheduling retry for agent %s — shutting down", agent["agent_id"]) ++ return ++ ++ pending = tornado.ioloop.IOLoop.current().call_later( + # type: ignore # due to python <3.9 + next_retry, + invoke_get_quote, +@@ -2627,6 +2713,7 @@ async def process_agent( + True, + timeout=timeout, + ) ++ _register_pending_event(agent, pending) + return + + if main_agent_operational_state == states.PROVIDE_V and new_operational_state == states.PROVIDE_V_RETRY: +@@ -2651,9 +2738,17 @@ async def process_agent( + maxr, + next_retry, + ) +- tornado.ioloop.IOLoop.current().call_later( +- next_retry, invoke_provide_v, agent # type: ignore # due to python <3.9 ++ if shutdown.is_shutting_down(): ++ logger.debug("Not scheduling retry for agent %s — shutting down", agent["agent_id"]) ++ return ++ ++ pending = tornado.ioloop.IOLoop.current().call_later( ++ next_retry, # type: ignore # due to python <3.9 ++ invoke_provide_v, ++ agent, ++ timeout, + ) ++ _register_pending_event(agent, pending) + return + raise Exception("nothing should ever fall out of this!") + +@@ -2663,6 +2758,8 @@ async def process_agent( + "exception", {"context": "Agent caused the verifier to throw an exception", "data": str(e)}, False + ) + await process_agent(agent, states.FAILED, failure) ++ finally: ++ _exit_operation() + + + async def activate_agents(agents: List[VerfierMain], verifier_ip: str, verifier_port: int) -> None: +@@ -2769,31 +2866,62 @@ def main() -> None: + server = tornado.httpserver.HTTPServer(app, ssl_options=ssl_ctx, max_buffer_size=max_upload_size) + server.add_sockets(sockets) + +- def server_sig_handler(*_: Any) -> None: +- logger.info("Shutting down server %s..", task_id) ++ # Hold strong references to async tasks to prevent GC from collecting them mid-run ++ _background_tasks: List[asyncio.Task[None]] = [] ++ ++ def server_sig_handler(signame: str = "signal") -> None: ++ if shutdown.is_shutting_down(): ++ logger.warning("Shutdown already in progress, ignoring %s (server %s)", signame, task_id) ++ return ++ logger.info("Received %s, shutting down server %s..", signame, task_id) ++ ++ # Signal all attestation loops to stop scheduling new work ++ shutdown.request_shutdown() ++ + # Stop server to not accept new incoming connections + server.stop() + +- # Gracefully shutdown webhook workers to prevent connection errors +- if "webhook" in revocation_notifier.get_notifiers(): +- revocation_notifier.shutdown_webhook_workers() ++ # Cancel all pending attestation timeouts (retries, polls) ++ cancel_all_pending_events() ++ push_agent_monitor.cancel_all_timeouts() + +- # Wait for all connections to be closed and then stop ioloop ++ # Wait for in-flight operations, then close connections and stop + async def stop() -> None: +- await server.close_all_connections() +- tornado.ioloop.IOLoop.current().stop() ++ try: ++ # Give in-flight process_agent() coroutines time to finish ++ # DB writes and revocation notifications before tearing ++ # down webhook workers. ++ drain_timeout = config.getfloat("verifier", "shutdown_drain_timeout", fallback=10.0) ++ drained = await wait_for_drain(drain_timeout) ++ if not drained: ++ logger.warning( ++ "Shutting down with %d operation(s) still active after %.1fs", ++ get_active_operations(), ++ drain_timeout, ++ ) ++ ++ # Shutdown webhook workers after draining so revocation ++ # notifications from in-flight attestations are delivered. ++ if "webhook" in revocation_notifier.get_notifiers(): ++ revocation_notifier.shutdown_webhook_workers() ++ ++ await server.close_all_connections() ++ except Exception: ++ logger.exception("Error during shutdown cleanup") ++ finally: ++ tornado.ioloop.IOLoop.current().stop() + +- asyncio.ensure_future(stop()) ++ _background_tasks.append(asyncio.ensure_future(stop())) + + # Attach signal handler to ioloop. + # Do not use signal.signal(..) for that because it does not work! + loop = asyncio.get_event_loop() +- loop.add_signal_handler(signal.SIGINT, server_sig_handler) +- loop.add_signal_handler(signal.SIGTERM, server_sig_handler) ++ loop.add_signal_handler(signal.SIGINT, lambda: server_sig_handler("SIGINT")) ++ loop.add_signal_handler(signal.SIGTERM, lambda: server_sig_handler("SIGTERM")) + + server.start() + # Reactivate agents +- asyncio.ensure_future(activate_agents(agents, verifier_host, int(verifier_port))) ++ _background_tasks.append(asyncio.ensure_future(activate_agents(agents, verifier_host, int(verifier_port)))) + tornado.ioloop.IOLoop.current().start() + logger.debug("Server %s stopped.", task_id) + sys.exit(0) +diff --git a/keylime/push_agent_monitor.py b/keylime/push_agent_monitor.py +index f41befc..6537a31 100644 +--- a/keylime/push_agent_monitor.py ++++ b/keylime/push_agent_monitor.py +@@ -171,6 +171,29 @@ def cancel_agent_timeout(agent_id: str) -> None: + logger.error("Error cancelling timeout for agent %s: %s", agent_id, e) + + ++def cancel_all_timeouts() -> None: ++ """Cancel all scheduled PUSH mode agent timeouts. ++ ++ Called during shutdown to prevent timeout callbacks from firing ++ against a stopping event loop. ++ """ ++ with _agent_timeout_handles_lock: ++ handles = dict(_agent_timeout_handles) ++ _agent_timeout_handles.clear() ++ ++ if not handles: ++ return ++ ++ io_loop = tornado.ioloop.IOLoop.current() ++ for agent_id, handle in handles.items(): ++ try: ++ io_loop.remove_timeout(handle) ++ except Exception as e: ++ logger.debug("Could not remove timeout for agent %s during shutdown: %s", agent_id, e) ++ ++ logger.info("Cancelled %d PUSH mode agent timeout(s) for shutdown", len(handles)) ++ ++ + def check_push_agent_timeouts() -> None: + """Check all PUSH mode agents for timeouts and mark failed ones. + +diff --git a/keylime/revocation_notifier.py b/keylime/revocation_notifier.py +index abab08b..f7efece 100644 +--- a/keylime/revocation_notifier.py ++++ b/keylime/revocation_notifier.py +@@ -259,9 +259,13 @@ def stop_broker() -> None: + + + def shutdown_webhook_workers() -> None: +- """Convenience function to shutdown webhook workers using the global manager.""" +- manager = _get_webhook_manager() +- manager.shutdown_workers() ++ """Shutdown webhook workers if the manager was ever initialized. ++ ++ If no revocation notifications were sent in this process, the manager ++ is still None and there is nothing to shut down. ++ """ ++ if _webhook_manager is not None: ++ _webhook_manager.shutdown_workers() + + + def notify(tosend: Dict[str, Any]) -> None: +diff --git a/keylime/shared_data.py b/keylime/shared_data.py +index a415496..09cbb97 100644 +--- a/keylime/shared_data.py ++++ b/keylime/shared_data.py +@@ -6,6 +6,8 @@ using multiprocessing.Manager(). + + import atexit + import multiprocessing as mp ++import multiprocessing.process ++import os + import threading + import time + from typing import Any, Dict, List, Optional +@@ -137,8 +139,6 @@ class SharedDataManager: + # Register handler to reinitialize manager connection after fork + # This is needed because Manager uses network connections that don't survive fork + try: +- import os # pylint: disable=import-outside-toplevel +- + self._parent_pid = os.getpid() + logger.debug("SharedDataManager initialized in process %d", self._parent_pid) + except Exception as e: +@@ -283,15 +283,48 @@ class SharedDataManager: + """Cleanup shared resources. + + This is automatically called on exit but can be called manually +- for explicit cleanup. ++ for explicit cleanup. Only the parent process (the one that ++ created the Manager) is allowed to shut it down; child workers ++ forked from the parent skip the call to avoid the ++ ``AssertionError: can only join a child process`` raised by ++ ``multiprocessing`` when a non-parent tries to join. + """ +- if hasattr(self, "_manager"): +- logger.debug("Shutting down SharedDataManager") +- try: +- self._manager.shutdown() +- logger.info("SharedDataManager shutdown complete") +- except Exception as e: +- logger.error("Error during SharedDataManager shutdown: %s", e) ++ if not hasattr(self, "_manager"): ++ return ++ ++ if hasattr(self, "_parent_pid") and os.getpid() != self._parent_pid: ++ logger.debug( ++ "Skipping SharedDataManager shutdown in child process %d (parent is %d)", ++ os.getpid(), ++ self._parent_pid, ++ ) ++ return ++ ++ logger.debug("Shutting down SharedDataManager") ++ try: ++ self._manager.shutdown() ++ logger.info("SharedDataManager shutdown complete") ++ except Exception: ++ logger.exception("Error during SharedDataManager shutdown") ++ ++ def deregister_child(self) -> None: ++ """Remove the Manager's server process from multiprocessing's child tracking. ++ ++ Must be called in each forked worker **after** ``fork()``. Without ++ this, Python's ``multiprocessing.util._exit_function`` atexit handler ++ tries to ``join()`` the Manager server process in every child worker, ++ causing ``AssertionError: can only join a child process`` because the ++ Manager was spawned by the parent, not the child. ++ """ ++ # The Manager's server process is stored in _manager._process ++ server_process = getattr(self._manager, "_process", None) ++ if server_process is not None: ++ multiprocessing.process._children.discard(server_process) # type: ignore[attr-defined] # pylint: disable=protected-access ++ logger.debug( ++ "Deregistered Manager server process (pid %s) from child tracking in worker %d", ++ getattr(server_process, "pid", "?"), ++ os.getpid(), ++ ) + + def __repr__(self) -> str: + stats = self.get_stats() +@@ -364,6 +397,18 @@ def get_shared_memory() -> SharedDataManager: + return _global_shared_manager + + ++def deregister_shared_memory_child() -> None: ++ """Deregister the Manager's server process in a forked child worker. ++ ++ Call this after ``tornado.process.fork_processes()`` (or any ``fork()``) ++ to prevent Python's atexit handler from trying to ``join()`` the Manager ++ server process in the child, which would raise ++ ``AssertionError: can only join a child process``. ++ """ ++ if _global_shared_manager is not None: ++ _global_shared_manager.deregister_child() ++ ++ + def cleanup_global_shared_memory() -> None: + """Cleanup the global shared memory manager. + +diff --git a/keylime/shutdown.py b/keylime/shutdown.py +new file mode 100644 +index 0000000..72f1c76 +--- /dev/null ++++ b/keylime/shutdown.py +@@ -0,0 +1,21 @@ ++"""Shutdown coordination for graceful server termination. ++ ++Provides a process-wide shutdown flag that attestation loops and retry ++schedulers check before starting new work. Setting the flag prevents ++new IOLoop callbacks from being scheduled and allows in-flight ++operations to drain before the event loop stops. ++""" ++ ++import asyncio ++ ++_shutdown_event = asyncio.Event() ++ ++ ++def request_shutdown() -> None: ++ """Signal that the process is shutting down.""" ++ _shutdown_event.set() ++ ++ ++def is_shutting_down() -> bool: ++ """Return True if shutdown has been requested.""" ++ return _shutdown_event.is_set() +diff --git a/keylime/web/base/server.py b/keylime/web/base/server.py +index b62debd..7c8a71b 100644 +--- a/keylime/web/base/server.py ++++ b/keylime/web/base/server.py +@@ -1,5 +1,6 @@ + import asyncio + import multiprocessing ++import signal + from abc import ABC, abstractmethod + from functools import wraps + from ssl import CERT_OPTIONAL +@@ -7,8 +8,9 @@ from typing import TYPE_CHECKING, Any, Callable, Optional + + import tornado + +-from keylime import api_version, config, keylime_logging, web_util ++from keylime import api_version, config, keylime_logging, shutdown, web_util + from keylime.models.base.db import db_manager ++from keylime.shared_data import deregister_shared_memory_child + from keylime.web.base.action_handler import ActionHandler + from keylime.web.base.route import Route + +@@ -251,6 +253,8 @@ class Server(ABC): + # Tornado servers are instantiated by calling start_single() or start_multi(), so set to None initially + self.__tornado_http_server: Optional[tornado.httpserver.HTTPServer] = None + self.__tornado_https_server: Optional[tornado.httpserver.HTTPServer] = None ++ self._server_stopped: Optional[asyncio.Event] = None ++ self._shutdown_task: Optional[asyncio.Task[None]] = None + + async def start_single(self) -> None: + """Instantiates and starts the server (with one Tornado HTTPServer instance to handle HTTP connections +@@ -273,7 +277,82 @@ class Server(ABC): + https_server.add_sockets(self.__tornado_https_sockets) + self.__tornado_https_server = https_server + +- await asyncio.Event().wait() ++ # Create the stop event before installing signal handlers so that ++ # _graceful_shutdown() can always set it, even if a signal arrives ++ # before we reach the wait(). ++ self._server_stopped = asyncio.Event() ++ ++ # Install signal handlers for graceful shutdown ++ self._install_signal_handlers() ++ ++ try: ++ # Hook for subclasses to perform work after servers are listening ++ # but before blocking (e.g. activate agents). ++ await self._on_server_started() ++ await self._server_stopped.wait() ++ finally: ++ # Remove signal handlers before returning to asyncio.run()'s ++ # teardown, which closes the wakeup fd and replaces remaining ++ # handlers with _sighandler_noop. Any signal arriving after ++ # that would write to the closed fd, causing ++ # "OSError: Bad file descriptor". ++ self._remove_signal_handlers() ++ ++ async def _on_server_started(self) -> None: ++ """Called after servers are listening but before blocking. ++ ++ Override in subclasses to perform post-startup work such as ++ activating agents. The default implementation does nothing. ++ """ ++ ++ def _install_signal_handlers(self) -> None: ++ """Install SIGINT/SIGTERM handlers for graceful shutdown.""" ++ loop = asyncio.get_event_loop() ++ ++ async def _run_graceful_shutdown() -> None: ++ try: ++ await self._graceful_shutdown() ++ except Exception: ++ logger.exception("Graceful shutdown failed") ++ finally: ++ if self._server_stopped is not None: ++ self._server_stopped.set() ++ ++ def _make_handler(signame: str) -> Callable[[], None]: ++ def _handler() -> None: ++ if shutdown.is_shutting_down(): ++ logger.warning("Shutdown already in progress, ignoring %s", signame) ++ return ++ logger.info("Received %s, shutting down", signame) ++ shutdown.request_shutdown() ++ self._shutdown_task = asyncio.ensure_future(_run_graceful_shutdown()) ++ ++ return _handler ++ ++ loop.add_signal_handler(signal.SIGINT, _make_handler("SIGINT")) ++ loop.add_signal_handler(signal.SIGTERM, _make_handler("SIGTERM")) ++ ++ def _remove_signal_handlers(self) -> None: ++ """Remove SIGINT/SIGTERM handlers from the event loop.""" ++ loop = asyncio.get_event_loop() ++ loop.remove_signal_handler(signal.SIGINT) ++ loop.remove_signal_handler(signal.SIGTERM) ++ ++ async def _graceful_shutdown(self) -> None: ++ """Stop servers and close connections gracefully. ++ ++ Subclasses can override this to cancel component-specific pending work ++ before calling super(). ++ """ ++ if self.__tornado_http_server: ++ self.__tornado_http_server.stop() ++ if self.__tornado_https_server: ++ self.__tornado_https_server.stop() ++ ++ if self.__tornado_http_server: ++ await self.__tornado_http_server.close_all_connections() ++ if self.__tornado_https_server: ++ await self.__tornado_https_server.close_all_connections() + + def start_multi(self) -> None: + ports = "" +@@ -295,12 +374,19 @@ class Server(ABC): + self.worker_count, + ) + ++ self._pre_fork() ++ + # with StatsCollector(): + # num = manager.Value('i', 0) +- tornado.process.fork_processes(self.worker_count) ++ task_id = tornado.process.fork_processes(self.worker_count) + # num.value = num.value + 1 + # print(num.value) + ++ # Remove the Manager's server process from multiprocessing's child ++ # tracking so Python's atexit handler does not try to join() it in ++ # child workers (the Manager was spawned by the parent). ++ deregister_shared_memory_child() ++ + # Dispose inherited db_manager engine after fork to avoid sharing the + # parent's connection pool, then re-create with a fresh pool for this + # child process. +@@ -309,8 +395,27 @@ class Server(ABC): + if service: + db_manager.make_engine(service) + ++ self._post_fork(task_id) ++ + asyncio.run(self.start_single()) + ++ def _pre_fork(self) -> None: ++ """Called before ``fork_processes()`` in ``start_multi()``. ++ ++ Override in subclasses to perform work that must happen in the ++ parent process before forking (e.g. querying the database for ++ agent lists to distribute across workers). ++ """ ++ ++ def _post_fork(self, task_id: int) -> None: ++ """Called after ``fork_processes()`` in each child worker. ++ ++ *task_id* is the worker index returned by Tornado's ++ ``fork_processes()``. Override to perform per-worker ++ initialization (e.g. resetting inherited DB state, distributing ++ agents). ++ """ ++ + def _setup(self) -> None: + """Defines values to use in place of the defaults for the various server options. It is suggested that this is + overriden by the implementing class.""" +diff --git a/keylime/web/verifier_server.py b/keylime/web/verifier_server.py +index 6a0261c..1ada86e 100755 +--- a/keylime/web/verifier_server.py ++++ b/keylime/web/verifier_server.py +@@ -1,12 +1,16 @@ + import asyncio + from typing import List, Optional + +-import tornado.httpserver +-import tornado.ioloop +-import tornado.process + from sqlalchemy.exc import SQLAlchemyError + +-from keylime import cloud_verifier_common, cloud_verifier_tornado, config, keylime_logging ++from keylime import ( ++ cloud_verifier_common, ++ cloud_verifier_tornado, ++ config, ++ keylime_logging, ++ push_agent_monitor, ++ revocation_notifier, ++) + from keylime.authorization.provider import Action + from keylime.common import states + from keylime.db.keylime_db import SessionManager, make_engine +@@ -30,82 +34,76 @@ class VerifierServer(Server): + super().__init__() + self._prepare_agents_on_startup() + self._clear_stale_sessions_on_startup() ++ self._all_agents: List[VerfierMain] = [] + self._worker_agents: Optional[List[VerfierMain]] = None ++ self._activate_task: Optional[asyncio.Task[None]] = None + +- def start_multi(self) -> None: # pylint: disable=no-member +- """Override to support PULL mode agent activation across multiple workers.""" +- # Get all agents from database before forking (only needed for PULL mode) ++ def _pre_fork(self) -> None: ++ """Query agents from database before forking (only needed for PULL mode).""" + logger.info("start_multi() called with operating_mode: %s", self.operating_mode) +- all_agents: List[VerfierMain] = [] ++ self._all_agents = [] + if self.operating_mode == "pull": + verifier_id = config.get("verifier", "uuid", fallback=cloud_verifier_common.DEFAULT_VERIFIER_ID) + logger.info("Querying agents for verifier_id: %s", verifier_id) +- all_agents = cloud_verifier_tornado.get_agents_by_verifier_id(verifier_id) +- logger.info("Found %d agents in database before forking", len(all_agents)) +- +- # Log server startup (copied from base class) +- ports = "" +- protocols = "" +- if self._Server__tornado_http_sockets: # type: ignore # pylint: disable=no-member +- ports = str(self.http_port) +- protocols = "HTTP" +- if self._Server__tornado_https_sockets and self.ssl_ctx: # type: ignore # pylint: disable=no-member +- ports = f"{ports}/{self.https_port}" if ports else f"{self.https_port}" +- protocols = f"{protocols}/S" if protocols else "HTTPS" +- logger.info( +- "Listening on %s:%s (%s) with %s worker processes...", +- self.bind_interface, +- ports, +- protocols, +- self.worker_count, +- ) +- +- # Fork worker processes - returns task_id in each child process +- task_id = tornado.process.fork_processes(self.worker_count) ++ self._all_agents = cloud_verifier_tornado.get_agents_by_verifier_id(verifier_id) ++ logger.info("Found %d agents in database before forking", len(self._all_agents)) + ++ def _post_fork(self, task_id: int) -> None: ++ """Reset inherited DB state and distribute agents to this worker.""" + # CRITICAL: Reset any database state inherited from parent process. +- # The parent initializes globals when querying agents (line 39), so children +- # inherit initialized state. We must reset to trigger lazy re-initialization. ++ # The parent initializes globals when querying agents in _pre_fork(), ++ # so children inherit initialized state. We must reset to trigger ++ # lazy re-initialization. + cloud_verifier_tornado.reset_verifier_config() + + # Distribute agents to this worker using round-robin (task_id is the worker index) +- if self.operating_mode == "pull" and all_agents: +- self._worker_agents = [all_agents[i] for i in range(task_id, len(all_agents), self.worker_count)] ++ if self.operating_mode == "pull" and self._all_agents: ++ self._worker_agents = [ ++ self._all_agents[i] for i in range(task_id, len(self._all_agents), self.worker_count) ++ ] + logger.info("Worker %d assigned %d agent(s)", task_id, len(self._worker_agents)) + +- # Start this worker's HTTP/HTTPS servers and activate agents +- self.start_single() +- +- def start_single(self) -> None: # type: ignore[override] # pylint: disable=attribute-defined-outside-init,invalid-overridden-method +- """Override to support PULL mode agent activation after server startup.""" +- # Start HTTP/HTTPS servers (logic copied from parent to allow agent activation before blocking) +- # pylint: disable=no-member +- if self._Server__tornado_http_sockets: # type: ignore +- http_server = tornado.httpserver.HTTPServer( +- self._Server__tornado_app, ssl_options=None, max_buffer_size=self.max_upload_size # type: ignore ++ async def _on_server_started(self) -> None: ++ """Activate agents for PULL mode after servers are listening.""" ++ # In start_single() mode (single-process), _pre_fork/_post_fork ++ # are never called so _worker_agents is None and _all_agents is ++ # empty. Query agents directly in that case. ++ agents = self._worker_agents if self._worker_agents is not None else self._all_agents ++ if self.operating_mode == "pull" and not agents and self._worker_agents is None: ++ verifier_id = config.get("verifier", "uuid", fallback=cloud_verifier_common.DEFAULT_VERIFIER_ID) ++ agents = cloud_verifier_tornado.get_agents_by_verifier_id(verifier_id) ++ if self.operating_mode == "pull" and agents: ++ verifier_host = config.get("verifier", "ip") ++ verifier_port = config.get("verifier", "port") ++ logger.info("Activating %d agent(s) for PULL mode", len(agents)) ++ self._activate_task = asyncio.ensure_future( ++ cloud_verifier_tornado.activate_agents(agents, verifier_host, int(verifier_port)) + ) +- http_server.add_sockets(self._Server__tornado_http_sockets) # type: ignore +- self._Server__tornado_http_server = http_server # type: ignore # pylint: disable=attribute-defined-outside-init + +- if self._Server__tornado_https_sockets and self.ssl_ctx: # type: ignore +- https_server = tornado.httpserver.HTTPServer( +- self._Server__tornado_app, ssl_options=self.ssl_ctx, max_buffer_size=self.max_upload_size # type: ignore ++ async def _graceful_shutdown(self) -> None: ++ """Cancel attestation-specific pending work and drain in-flight operations before stopping servers.""" ++ # Cancel all pending attestation timeouts (retries, polls) ++ cloud_verifier_tornado.cancel_all_pending_events() ++ push_agent_monitor.cancel_all_timeouts() ++ ++ # Wait for in-flight attestation operations to complete before ++ # tearing down webhook workers — in-flight process_agent() calls ++ # may still need to send revocation notifications. ++ drain_timeout = config.getfloat("verifier", "shutdown_drain_timeout", fallback=10.0) ++ drained = await cloud_verifier_tornado.wait_for_drain(drain_timeout) ++ if not drained: ++ logger.warning( ++ "Shutting down with %d attestation operation(s) still active after %.1fs", ++ cloud_verifier_tornado.get_active_operations(), ++ drain_timeout, + ) +- https_server.add_sockets(self._Server__tornado_https_sockets) # type: ignore +- self._Server__tornado_https_server = https_server # type: ignore # pylint: disable=attribute-defined-outside-init +- # pylint: enable=no-member + +- # Activate agents for PULL mode +- if self.operating_mode == "pull" and self._worker_agents: +- verifier_host = config.get("verifier", "ip") +- verifier_port = config.get("verifier", "port") +- logger.info("Activating %d agent(s) for PULL mode", len(self._worker_agents)) +- asyncio.ensure_future( +- cloud_verifier_tornado.activate_agents(self._worker_agents, verifier_host, int(verifier_port)) +- ) ++ # Shutdown webhook workers after draining so revocation ++ # notifications from in-flight attestations are delivered. ++ if "webhook" in revocation_notifier.get_notifiers(): ++ revocation_notifier.shutdown_webhook_workers() + +- # Wait forever (until event loop is stopped) +- tornado.ioloop.IOLoop.current().start() ++ await super()._graceful_shutdown() + + def _prepare_agents_on_startup(self) -> None: + """Prepare agents in database for verifier startup. +diff --git a/templates/2.6/agent.j2 b/templates/2.6/agent.j2 +new file mode 100644 +index 0000000..26d5b7c +--- /dev/null ++++ b/templates/2.6/agent.j2 +@@ -0,0 +1,313 @@ ++# Keylime agent configuration ++# The Python agent is deprecated and will be removed with the next major release (7.0.0)! ++# Please migrate to the Rust based agent: https://github.com/keylime/rust-keylime/ ++[agent] ++ ++# The configuration file version number ++version = "{{ agent.version }}" ++ ++# The enabled API versions ++# This sets which of the supported API versions to enable. ++# Only supported versions can be set, which are defined by ++# api::SUPPORTED_API_VERSIONS ++# A list of versions to enable can be provided (e.g. "2.1, 2.2") ++# The following keywords are also supported: ++# - "default": Enables all supported API versions ++# - "latest": Enables only the latest supported API version ++api_versions = "{{ agent.api_versions }}" ++ ++# The agent's UUID. ++# If you set this to "generate", Keylime will create a random UUID. ++# If you set this to "hash_ek", Keylime will set the UUID to the result ++# of 'SHA256(public EK in PEM format)'. ++# If you set this to "environment", Keylime will use the value of the ++# environment variable "KEYLIME_AGENT_UUID" as UUID. ++# If you set this to "dmidecode", Keylime will use the UUID from ++# 'dmidecode -s system-uuid'. ++# If you set this to "hostname", Keylime will use the full qualified domain ++# name of current host as the agent id. ++uuid = "{{ agent.uuid }}" ++ ++# The binding address and port for the agent server ++ip = "{{ agent.ip }}" ++port = {{ agent.port }} ++ ++# Address and port where the verifier and tenant can connect to reach the agent. ++# These keys are optional. ++contact_ip = "{{ agent.contact_ip }}" ++contact_port = {{ agent.contact_port }} ++ ++# Path to store agent persistent data ++agent_data_path = "{{ agent_data_path }}" ++ ++# Verifier Information (Push Model specific). ++# Verifier URL ++verifier_url = "{{ agent_verifier_url }}" ++ ++# Verifier TLS CA certificate (Push Model specific). ++# Used to verify the verifier's server certificate. ++# If set as "default", the "cv_ca/cacert.crt" value relative to keylime_dir is used. ++verifier_tls_ca_cert = "{{ agent.verifier_tls_ca_cert }}" ++ ++# Server identifier for certification keys ++certification_keys_server_identifier = "{{ agent_certification_keys_server_identifier }}" ++ ++# Evidence version for UEFI logs ++uefi_logs_evidence_version = "{{ agent_uefi_logs_evidence_version }}" ++ ++# The address and port of the registrar server which the agent communicates with ++registrar_ip = "{{ agent.registrar_ip }}" ++registrar_port = {{ agent.registrar_port }} ++ ++# The TLS port of the registrar server (Push Model specific). ++# Used when registrar_tls_enabled is set to true. ++registrar_tls_port = {{ agent.registrar_tls_port }} ++ ++# Enable TLS communication between agent and registrar (Push Model specific). ++# When enabled, the agent uses TLS (server verification only) with the registrar. ++registrar_tls_enabled = {{ agent.registrar_tls_enabled }} ++ ++# TLS CA certificate for verifying the registrar's server certificate (Push Model specific). ++# Only used when registrar_tls_enabled is true. ++# If set as "default", the "cv_ca/cacert.crt" value relative to keylime_dir is used. ++registrar_tls_ca_cert = "{{ agent.registrar_tls_ca_cert }}" ++ ++# The API versions to use when communicating with the registrar (Push Model specific). ++# Supports "default" (all supported), "latest", or a comma-separated list. ++registrar_api_versions = "{{ agent.registrar_api_versions }}" ++ ++# Enable mTLS communication between agent, verifier and tenant. ++# Details on why setting it to "False" is generally considered insecure can be found ++# on https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r ++enable_agent_mtls = {{ agent.enable_agent_mtls }} ++ ++# The keylime working directory. ++keylime_dir = "{{ agent.keylime_dir }}" ++ ++# Accept invalid TLS certificates (INSECURE - for testing only) ++# When enabled, the agent will accept self-signed or invalid certificates ++# This option is specific to the push attestation model. ++# This should ONLY be used for testing or development environments ++# Default: False (secure) ++tls_accept_invalid_certs = {{ agent.tls_accept_invalid_certs }} ++ ++# Accept invalid TLS hostnames (INSECURE - for testing only) ++# When enabled, the agent will accept certificates with mismatched hostnames ++# This option is specific to the push attestation model. ++# This should ONLY be used for testing or development environments ++# Default: False (secure) ++tls_accept_invalid_hostnames = {{ agent.tls_accept_invalid_hostnames }} ++ ++# The 'tls_dir' option defines the directory where the keys and certificates are ++# stored. ++# ++# If set as 'generate', automatically generate a CA, keys, and certificates for ++# the client and the server in the /var/lib/keylime/cv_ca directory, if not ++# present. ++# ++# The 'server_key', 'server_cert', and 'trusted_client_ca' options should all be ++# set with the 'default' keyword when 'generate' keyword is set for 'tls_dir'. ++# ++# If set as 'default', the '/var/lib/keylime/secure' directory is used, which ++# should contain the files indicated by the 'server_key', 'server_cert', ++# and 'trusted_client_ca' options. ++tls_dir = "{{ agent.tls_dir }}" ++ ++# The name of the file containing the Keylime agent TLS server private key. ++# This private key is used to serve the Keylime agent REST API ++# A new private key is generated in case it is not found. ++# If set as 'default', the 'server-private.pem' value is used. ++server_key = "{{ agent.server_key }}" ++ ++# Set the password used to decrypt the private key file. ++# This password will also be used to protect the generated private key used for ++# mTLS authentication ++# If left empty, the private key will not be encrypted. ++server_key_password = "{{ agent.server_key_password }}" ++ ++# The name of the file containing the payload encryption private key. ++# If set as "default", the "payload-private.pem" value is used. ++payload_key = "{{ agent.payload_key }}" ++ ++# Set the password used to encrypt the payload private key file. ++# If left empty, the private key will not be encrypted. ++payload_key_password = "{{ agent.payload_key_password }}" ++ ++# The name of the file containing the X509 certificate used as the Keylime agent ++# server TLS certificate. ++# This certificate must be self signed. ++server_cert = "{{ agent.server_cert }}" ++ ++# A list of trusted client CA certificates ++trusted_client_ca = "{{ agent.trusted_client_ca }}" ++ ++# The name of the file used to store the payload encryption key, derived from ++# the U and V parts. ++# This file is stored in the /var/lib/keylime/secure/ directory. ++enc_keyname = "{{ agent.enc_keyname }}" ++ ++# The name of the file used to store the optional decrypted payload. ++# This file is stored in the /var/lib/keylime/secure/ directory. ++dec_payload_file = "{{ agent.dec_payload_file }}" ++ ++# The size of the memory-backed tmpfs partition where Keylime stores keys and ++# the decrypted payload. ++# Use syntax that the 'mount' command would accept as a size parameter for tmpfs. ++# The default below sets it to 1 megabyte. ++secure_size = "{{ agent.secure_size }}" ++ ++# Use this option to set the TPM ownerpassword to something you want to use. ++# Set it to "generate" if you want Keylime to choose a random owner password ++# for you. ++tpm_ownerpassword = "{{ agent.tpm_ownerpassword }}" ++ ++# Whether to allow the agent to automatically extract a zip file in ++# the delivered payload after it has been decrypted, or not. Defaults to "True". ++# After decryption, the archive will be unzipped to a directory in /var/lib/keylime/secure. ++# Note: the limits on the size of the tmpfs partition set above with the 'secure_size' ++# option will affect this. ++extract_payload_zip = {{ agent.extract_payload_zip }} ++ ++# Whether to listen for revocation notifications from the verifier via ZeroMQ ++enable_revocation_notifications = {{ agent.enable_revocation_notifications }} ++ ++# The IP to listen for revocation notifications via ZeroMQ ++revocation_notification_ip = "{{ agent.revocation_notification_ip }}" ++ ++# The port to listen for revocation notifications via ZeroMQ ++revocation_notification_port = {{ agent.revocation_notification_port }} ++ ++# The path to the certificate to verify revocation messages received from the ++# verifier. The path is relative to /var/lib/keylime. ++# If set to "default", Keylime will use the file RevocationNotifier-cert.crt ++# from the unzipped contents provided by the tenant. ++revocation_cert = "{{ agent.revocation_cert }}" ++ ++# A comma-separated list of Python scripts to run upon receiving a revocation ++# message. Keylime will verify the signature first, then call these Python ++# scripts with the json revocation message passed as argument. The scripts must ++# be located in the directory set via 'revocation_actions_dir' ++# ++# Keylime will also get the list of revocation actions from the file ++# action_list in the unzipped payload content. ++revocation_actions = "{{ agent.revocation_actions }}" ++ ++# The path to the directory containing pre-installed revocation action scripts. ++revocation_actions_dir = "{{ agent.revocation_actions_dir }}" ++ ++# A script to execute after unzipping the tenant payload. This is like ++# cloud-init lite =) Keylime will run it with a /bin/sh environment and ++# with a working directory of /var/lib/keylime/secure/unzipped. ++payload_script = "{{ agent.payload_script }}" ++ ++# In case mTLS for the agent is disabled and the use of payloads is still ++# required, this option has to be set to "True" in order to allow the agent ++# to start. Details on why this configuration (mTLS disabled and payload enabled) ++# is generally considered insecure can be found on ++# https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r ++enable_insecure_payload = {{ agent.enable_insecure_payload }} ++ ++# Whether to allow running revocation actions sent as part of the payload. ++# Setting to false limits revocation actions to pre-installed ones. ++allow_payload_revocation_actions = {{ agent.allow_payload_revocation_actions }} ++ ++# Maximum number of retries for exponential backoff ++exponential_backoff_max_retries = {{ agent.exponential_backoff_max_retries }} ++# Initial delay in milliseconds for exponential backoff ++exponential_backoff_initial_delay = {{ agent.exponential_backoff_initial_delay }} ++# Maximum delay in milliseconds for exponential backoff ++exponential_backoff_max_delay = {{ agent.exponential_backoff_max_delay }} ++ ++# List of hash algorithms used for PCRs ++# Accepted values: sha512, sha384, sha256, sha1 ++tpm_hash_alg = "{{ agent.tpm_hash_alg }}" ++ ++# List of encryption algorithms to use with the TPM ++# Accepted values: ecc, rsa ++tpm_encryption_alg = "{{ agent.tpm_encryption_alg }}" ++ ++# List of signature algorithms to use ++# Accepted values: rsassa, rsapss, ecdsa, ecdaa, ecschnorr ++tpm_signing_alg = "{{ agent.tpm_signing_alg }}" ++ ++# If an EK is already present on the TPM (e.g., with "tpm2_createek") and ++# you require Keylime to use this EK, change "generate" to the actual EK ++# handle (e.g. "0x81000000"). The Keylime agent will then not attempt to ++# create a new EK upon startup, and neither will it flush the EK upon exit. ++ek_handle = "{{ agent.ek_handle }}" ++ ++# Enable IDevID and IAK usage ++enable_iak_idevid = {{ agent.enable_iak_idevid }} ++ ++# Select IDevID and IAK templates or algorithms for regenerating the keys. ++# By default the template will be detected automatically from the certificates. This will happen if iak_idevid_template is left empty or set as "default" or "detect". ++# Choosing a template will override the name and asymmetric algorithm choices. To use these choices, set iak_idevid_template to "manual" ++# Templates are specified in the TCG document found here, section 7.3.4: ++# https://trustedcomputinggroup.org/wp-content/uploads/TPM-2p0-Keys-for-Device-Identity-and-Attestation_v1_r12_pub10082021.pdf ++# ++# Accepted values: ++# iak_idevid_template: default, detect, H-1, H-2, H-3, H-4, H-5, manual ++# iak_idevid_asymmetric_alg: rsa, ecc ++# iak_idevid_name_alg: sha256, sm3_256, sha384, sha512 ++iak_idevid_template = "{{ agent.iak_idevid_template }}" ++# In order for these values to be used, set the iak_idevid_template option to manual ++iak_idevid_asymmetric_alg = "{{ agent.iak_idevid_asymmetric_alg }}" ++iak_idevid_name_alg = "{{ agent.iak_idevid_name_alg }}" ++ ++# Alternatively if the keys are persisted, provide the handles for their location below, and optionally their passwords. ++# If handles are provided, they will take priority over templates/algorithms selected above. ++# To use a hex password, use the prefix "hex:" at the start of the password. ++idevid_password = "{{ agent.idevid_password }}" ++idevid_handle = "{{ agent.idevid_handle }}" ++ ++iak_password = "{{ agent.iak_password }}" ++iak_handle = "{{ agent.iak_handle }}" ++ ++# The name of the file containing the X509 IAK certificate. ++# If set as "default", the "iak-cert.crt" value is used ++# If a relative path is set, it will be considered relative from the keylime_dir. ++# If an absolute path is set, it is used without change. ++# ++# To override iak_cert, set KEYLIME_AGENT_IAK_CERT environment variable. ++iak_cert = "{{ agent.iak_cert }}" ++ ++# The name of the file containing the X509 IDevID certificate. ++# If set as "default", the "idevid-cert.crt" value is used ++# If a relative path is set, it will be considered relative from the keylime_dir. ++# If an absolute path is set, it is used without change. ++# ++# To override idevid_cert, set KEYLIME_AGENT_IDEVID_CERT environment variable. ++idevid_cert = "{{ agent.idevid_cert }}" ++ ++# The user account to switch to to drop privileges when started as root ++# If left empty, the agent will keep running with high privileges. ++# The user and group specified here must allow the user to access the ++# WORK_DIR (typically /var/lib/keylime) and /dev/tpmrm0. Therefore, ++# suggested value for the run_as parameter is keylime:tss. ++# The following commands should be used to set ownership before running the ++# agent: ++# chown keylime /var/lib/keylime ++# ++# If tpmdata.yml already exists: ++# chown keylime /var/lib/keylime/tpmdata.yml ++# ++# If cv_ca directory exists: ++# chown keylime /var/lib/keylime/cv_ca ++# chown keylime /var/lib/keylime/cv_ca/cacert.crt ++run_as = "{{ agent.run_as }}" ++ ++# Path from where the agent will read the IMA measurement log. ++# ++# If set as "default", Keylime will use the default path: ++# The default path is /sys/kernel/security/ima/ascii_runtime_measurements ++# If set as a relative path, it will be considered from the root path "/". ++# If set as an absolute path, it will use it without changes ++ima_ml_path = "{{ agent.ima_ml_path }}" ++ ++# Path from where the agent will read the measured boot event log. ++# ++# If set as "default", Keylime will use the default path: ++# The default path is /sys/kernel/security/tpm0/binary_bios_measurements ++# If set as a relative path, it will be considered from the root path "/". ++# If set as an absolute path, it will use it without changes ++measuredboot_ml_path = "{{ agent.measuredboot_ml_path }}" +diff --git a/templates/2.6/ca.j2 b/templates/2.6/ca.j2 +new file mode 100644 +index 0000000..03ebe3b +--- /dev/null ++++ b/templates/2.6/ca.j2 +@@ -0,0 +1,39 @@ ++# Keylime CA configuration ++[ca] ++ ++# The keystore password ++# This password is used to protect the generated CA private key. ++password = {{ ca.password }} ++ ++# The configuration file version number ++version = {{ ca.version }} ++ ++# CountryName argument (C) of the Issuer when generating certificates ++cert_country = {{ ca.cert_country }} ++ ++# CommonName argument (CN) of the Issuer when generating certificates ++cert_ca_name = {{ ca.cert_ca_name }} ++ ++# StateOrProvinceName argument (S) of the Issuer when generating certificates ++cert_state = {{ ca.cert_state }} ++ ++# Locality argument (L) of the Issuer when generating certificates ++cert_locality = {{ ca.cert_locality }} ++ ++# Organization argument (O) of the Issuer when generating certificates ++cert_organization = {{ ca.cert_organization }} ++ ++# OrganizationalUnit argument (OU) of the Issuer when generating certificates ++cert_org_unit = {{ ca.cert_org_unit }} ++ ++# CA certificate validity time in days ++cert_ca_lifetime = {{ ca.cert_ca_lifetime }} ++ ++# Default generated certificate validity time in days ++cert_lifetime = {{ ca.cert_lifetime }} ++ ++# Key length in bits ++cert_bits = {{ ca.cert_bits }} ++ ++# Certificate Revocation List (CRL) distribution address (URL) ++cert_crl_dist = {{ ca.cert_crl_dist }} +diff --git a/templates/2.6/logging.j2 b/templates/2.6/logging.j2 +new file mode 100644 +index 0000000..9bd8deb +--- /dev/null ++++ b/templates/2.6/logging.j2 +@@ -0,0 +1,33 @@ ++# Keylime logging configuration ++ ++# The configuration file version number ++[logging] ++version = {{ logging.version }} ++ ++[loggers] ++keys = {{ loggers.get('keys') }} ++ ++[handlers] ++keys = {{ handlers.get('keys') }} ++ ++[formatters] ++keys = {{ formatters.get('keys') }} ++ ++[formatter_formatter] ++format = {{ formatter_formatter.format }} ++datefmt = {{ formatter_formatter.datefmt }} ++ ++[logger_root] ++level = {{ logger_root.level }} ++handlers = {{ logger_root.handlers }} ++ ++[handler_consoleHandler] ++class = {{ handler_consoleHandler.class }} ++level = {{ handler_consoleHandler.level }} ++formatter = {{ handler_consoleHandler.formatter }} ++args = {{ handler_consoleHandler.args }} ++ ++[logger_keylime] ++level = {{ logger_keylime.level }} ++qualname = {{ logger_keylime.qualname }} ++handlers = {{ logger_keylime.handlers }} +diff --git a/templates/2.6/mapping.json b/templates/2.6/mapping.json +new file mode 100644 +index 0000000..ce10d12 +--- /dev/null ++++ b/templates/2.6/mapping.json +@@ -0,0 +1,11 @@ ++{ ++ "version": "2.6", ++ "type": "update", ++ "components": { ++ "verifier": { ++ "add": { ++ "shutdown_drain_timeout": "10" ++ } ++ } ++ } ++} +diff --git a/templates/2.6/registrar.j2 b/templates/2.6/registrar.j2 +new file mode 100644 +index 0000000..06f026e +--- /dev/null ++++ b/templates/2.6/registrar.j2 +@@ -0,0 +1,168 @@ ++# Keylime registrar configuration ++[registrar] ++ ++# The configuration file version number ++version = {{ registrar.version }} ++ ++# The binding address and port for the registrar server ++ip = "{{ registrar.ip }}" ++port = {{ registrar.port }} ++tls_port = {{ registrar.tls_port }} ++ ++# The 'tls_dir' option defines the directory where the keys and certificates are ++# stored. ++# ++# If set as 'generate', automatically generate a CA, keys, and certificates for ++# the registrar server in the /var/lib/keylime/reg_ca directory, if not present. ++# ++# The 'server_key', 'server_cert', and 'trusted_client_ca' options should all be ++# set with the 'default' keyword when 'generate' keyword is set for 'tls_dir'. ++# ++# If set as 'default', share the files with the verifier by using the ++# '/var/lib/keylime/cv_ca' directory, which should contain the files indicated by ++# the 'server_key', 'server_cert', and 'trusted_client_ca' options. ++tls_dir = {{ registrar.tls_dir }} ++ ++# The name of the file containing the Keylime registrar server private key. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This private key is used to serve the Keylime registrar REST API ++# ++# If set as 'default', the 'server-private.pem' value is used. ++server_key = {{ registrar.server_key }} ++ ++# Set the password used to decrypt the private key file. ++# If 'tls_dir = generate', this password will also be used to protect the ++# generated server private key. ++# If left empty, the private key will not be encrypted. ++server_key_password = {{ registrar.server_key_password }} ++ ++# The name of the file containing the Keylime registrar server certificate. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# ++# If set as 'default', the 'server-cert.crt' value is used. ++server_cert = {{ registrar.server_cert }} ++ ++# Additional Subject Alternative Names (SANs) to include in auto-generated ++# server certificates when 'tls_dir = generate'. ++# ++# This is a comma-separated list of hostnames and/or IP addresses that will be ++# added to the server certificate's SAN extension. This allows clients to verify ++# the server's hostname when connecting. ++# ++# By default, the certificate will automatically include: ++# - localhost, 127.0.0.1, and ::1 ++# - The system's hostname and FQDN ++# - The IP address from the 'ip' option (if not 0.0.0.0 or ::) ++# ++# Use this option to add additional names, such as: ++# - External DNS names (e.g., registrar.example.com) ++# - Load balancer addresses ++# - Additional IP addresses ++# ++# Example: cert_subject_alternative_names = registrar.example.com,10.0.0.5,registrar-internal ++# Leave empty to use only the automatically detected names. ++cert_subject_alternative_names = {{ registrar.cert_subject_alternative_names }} ++ ++# The list of trusted client CA certificates. ++# The files in the list should be stored in the directory set in the 'tls_dir' ++# option. ++# ++# If set as 'default', the value is set as '[cacert.crt]' ++trusted_client_ca = {{ registrar.trusted_client_ca }} ++ ++# Authorization provider to use for access control decisions. ++# ++# Available providers: ++# simple - Default provider implementing 2-category access control: ++# - PUBLIC: No authentication (agent registration, activation, version) ++# - ADMIN: mTLS client certificate (listing, viewing, deleting registrations) ++# ++# The 'simple' provider enforces strict separation: ++# - Agent registration endpoints are public (no authentication required) ++# - Management endpoints require mTLS client certificate authentication ++# ++# SECURITY NOTE: Never distribute client certificates signed by the registrar's ++# trusted CA to agents. Agents should register via public endpoints only. ++# ++# Certificate requirements: ++# - Agents: No client certs needed (public registration endpoints) ++# - Admins: Client certs signed by trusted CA with Client Authentication EKU ++authorization_provider = {{ registrar.authorization_provider }} ++ ++# Database URL Configuration ++# See this document https://keylime.readthedocs.io/en/latest/installation.html#database-support ++# for instructions on using different database configurations. ++# ++# An example of database_url value for using sqlite: ++# sqlite:////var/lib/keylime/reg_data.sqlite ++# An example of database_url value for using mysql: ++# mysql+pymysql://keylime:keylime@keylime_db:[port]/registrar?charset=utf8 ++# ++# If set as 'sqlite' keyword, will use the configuration set by the file located ++# at "/var/lib/keylime/reg_data.sqlite". ++database_url = {{ registrar.database_url }} ++ ++# Limits for DB connection pool size in sqlalchemy ++# (https://docs.sqlalchemy.org/en/14/core/pooling.html#api-documentation-available-pool-implementations) ++database_pool_sz_ovfl = {{ registrar.database_pool_sz_ovfl }} ++ ++# Whether to automatically update the DB schema using alembic ++auto_migrate_db = {{ registrar.auto_migrate_db }} ++ ++# Durable Attestation is currently marked as an experimental feature ++# In order to enable Durable Attestation, an "adapter" for a Persistent data Store ++# (time-series like database) needs to be specified. Some example adapters can be ++# found under "da/examples" so, for instance ++# "durable_attestation_import = keylime.da.examples.redis.py" ++# could be used to interact with a Redis (Persistent data Store) ++durable_attestation_import = {{ registrar.durable_attestation_import }} ++ ++# If an adapter for Durable Attestation was specified, then the URL for a Persistent Store ++# needs to be specified here. A second optional URL could be specified, for a ++# Rekor Transparency Log. A third additional URL could be specified, pointing to a ++# Time Stamp Authority (TSA), compatible with RFC3161. Additionally, one might need to ++# specify a path containing certificates required by the stores or TSA. Continuing with ++# the above example, the following values could be assigned to the parameters: ++# "persistent_store_url=redis://127.0.0.1:6379?db=10&password=/root/redis.auth&prefix=myda" ++# "transparency_log_url=http://127.0.0.1:3000" ++# "time_stamp_authority_url=http://127.0.0.1:2020" ++# "time_stamp_authority_certs_path=~/mycerts/tsa_cert1.pem" ++persistent_store_url = {{ registrar.persistent_store_url }} ++transparency_log_url = {{ registrar.transparency_log_url }} ++time_stamp_authority_url = {{ registrar.time_stamp_authority_url }} ++time_stamp_authority_certs_path = {{ registrar.time_stamp_authority_certs_path }} ++ ++# If Durable Attestation was enabled, which requires a Persistent Store URL ++# to be specified, the two following parameters control the format and encoding ++# of the stored attestation artifacts (defaults "json" for format and "" for encoding) ++persistent_store_format = {{ registrar.persistent_store_format }} ++persistent_store_encoding = {{ registrar.persistent_store_encoding }} ++ ++# If Durable Attestation was enabled and a Transparency Log URL was specified, ++# the digest algorithm for signatures is controlled by this parameter (default "sha256") ++transparency_log_sign_algo = {{ registrar.transparency_log_sign_algo }} ++ ++# If Durable Attestation was enabled and a Transparency Log URL was specified, ++# a keylime administrator can specify some agent attributes (including attestation ++# artifacts, such as quotes and logs) to be signed by the registrar. The use of "all" ++# will result in the whole "package" (agent + artifacts) being signed and leaving it empty ++# will mean no signing should be done. ++signed_attributes = {{ registrar.signed_attributes }} ++ ++# What TPM-based identity is allowed to be used to register agents. ++# The options "default" and "iak_idevid" will only allow registration with IAK and IDevID if python cryptography is version 38.0.0 or higher. ++# The following options are accepted: ++# "default": either an EK or IAK and IDevID may be used. In the case that cryptography version is <38.0.0 only EK will be used ++# "ek_cert_or_iak_idevid": this is equivalent to default ++# "ek_cert": only allow agents to use an EK to register ++# "iak_idevid": only allow agents with an IAK and IDevID to register ++tpm_identity = {{ registrar.tpm_identity }} ++ ++# The below option controls what Keylime does when it encounters a certificate which is not parse-able when strict ++# ASN.1 Distinguished Encoding Rules (DER) are enforced. The default behaviour ("warn") is to log a warning but still ++# accept the certificate, so long as it can be interpreted by a fallback parser. ++# The following values are accepted: ++# "warn": log a warning and re-encode the certificate with the more-forgiving fallback parser (the default) ++# "reject": log an error and refuse to accept the certificate ++# "ignore": silently re-encode the certificate without logging a message ++malformed_cert_action = {{ registrar.malformed_cert_action }} +\ No newline at end of file +diff --git a/templates/2.6/tenant.j2 b/templates/2.6/tenant.j2 +new file mode 100644 +index 0000000..79934bf +--- /dev/null ++++ b/templates/2.6/tenant.j2 +@@ -0,0 +1,130 @@ ++# Keylime tenant configuration ++[tenant] ++ ++# The configuration file version number ++version = {{ tenant.version }} ++ ++# The verifier IP address and port ++verifier_ip = {{ tenant.verifier_ip }} ++verifier_port = {{ tenant.verifier_port }} ++ ++# The registrar IP address and port ++registrar_ip = {{ tenant.registrar_ip }} ++registrar_port = {{ tenant.registrar_port }} ++ ++# The 'tls_dir' option defines the directory where the keys and certificates are ++# stored. ++# ++# If set as 'default', share the files with the verifier by using the ++# '/var/lib/keylime/cv_ca', which should contain the files indicated by the ++# 'client_key', 'client_cert', and 'trusted_server_ca' options. ++tls_dir = {{ tenant.tls_dir }} ++ ++# Enable mTLS communication between agent, verifier and tenant. ++# Details on why setting it to "False" is generally considered insecure can be found ++# on https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r ++enable_agent_mtls = {{ tenant.enable_agent_mtls }} ++ ++# The name of the file containing the Keylime tenant client private key. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This private key is used by the Keylime tenant to connect to the other ++# services using TLS. ++# ++# If set as 'default', the 'client-private.pem' value is used. ++client_key = {{ tenant.client_key }} ++ ++# Set the password used to encrypt the private key file. ++# If client_key is set as 'default', should match the password set in the ++# 'client_key_password' option in the verifier configuration file ++client_key_password = {{ tenant.client_key_password }} ++ ++# The name of the file containing the Keylime tenant client certificate. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This certificate is used by the Keylime tenant to connect to the other ++# services using TLS. ++# ++# If set as 'default', the 'client-cert.crt' value is used. ++client_cert = {{ tenant.client_cert }} ++ ++# The list of trusted server CA certificates. ++# The files in the list should be stored in the directory set in the 'tls_dir' ++# option. ++# ++# If set as 'default', the value is set as '[cacert.crt]' ++trusted_server_ca = {{ tenant.trusted_server_ca }} ++ ++# Directory containing the EK CA certificates. ++# The EK certificate provided by the agent will be validated against the CAs ++# located in this directory. ++tpm_cert_store = {{ tenant.tpm_cert_store }} ++ ++# Maximum size of the payload in bytes. The value should match the 'secure_size' ++# option in the agent configuration ++max_payload_size = {{ tenant.max_payload_size }} ++ ++# List of hash algorithms used for PCRs ++# Accepted values: sha512, sha384, sha256, sha1 ++accept_tpm_hash_algs = {{ tenant.accept_tpm_hash_algs }} ++ ++# List of encryption algorithms to use with the TPM ++# Accepted values: ecc, rsa ++accept_tpm_encryption_algs = {{ tenant.accept_tpm_encryption_algs }} ++ ++# List of signature algorithms to use ++# Accepted values: rsassa, rsapss, ecdsa, ecdaa, ecschnorr ++accept_tpm_signing_algs = {{ tenant.accept_tpm_signing_algs }} ++ ++# Whether or not to use an exponential backoff algorithm for retries. ++exponential_backoff = {{ tenant.exponential_backoff }} ++ ++# Either how long to wait between failed attempts to communicate with the TPM ++# in seconds, or the base for the exponential backoff algorithm if enabled via ++# "exponential_backoff" option. ++# Floating point values are accepted. ++retry_interval = {{ tenant.retry_interval }} ++ ++# Integer number of retries to communicate with the TPM before giving up. ++max_retries = {{ tenant.max_retries }} ++ ++# Request timeout in seconds. ++request_timeout = {{ tenant.request_timeout }} ++ ++# Tell the tenant whether to require an EK certificate from the TPM. ++# If set to False the tenant will ignore EK certificates entirely. ++# ++# WARNING: SETTING THIS OPTION TO FALSE IS VERY DANGEROUS!!! ++# ++# If you disable this check, then you may not be talking to a real TPM. ++# All the security guarantees of Keylime rely upon the security of the EK ++# and the assumption that you are talking to a spec-compliant and honest TPM. ++ ++# Some physical TPMs do not have EK certificates, so you may need to set ++# this to "False" for some deployments. If you do set it to "False", you ++# MUST use the 'ek_check_script' option below to specify a script that will ++# check the provided EK against an allowlist for the environment that has ++# been collected in a trustworthy way. For example, the cloud provider ++# might provide a signed list of EK public key hashes. Then you could write ++# an ek_check_script that checks the signature of the allowlist and then ++# compares the hash of the given EK with the allowlist. ++require_ek_cert = {{ tenant.require_ek_cert }} ++ ++# Optional script to execute to check the EK and/or EK certificate against a ++# allowlist or any other additional EK processing you want to do. Runs in ++# /var/lib/keylime. You can also specify an absolute path to the script. ++# Script should return 0 if the EK or EK certificate are valid. Any other ++# return value will invalidate the tenant quote check and prevent ++# bootstrapping a key. ++# ++# The various keys are passed to the script via environment variables: ++# EK - contains a PEM encoded version of the public EK ++# EK_CERT - contains a DER encoded EK certificate if one is available. ++# PROVKEYS - contains a json document containing EK, EKcert, and AIK from the ++# provider. EK and AIK are in PEM format. The EKcert is in base64 encoded ++# DER format. ++# ++# Set to blank to disable this check. See warning above if require_ek_cert ++# is "False". ++ek_check_script = {{ tenant.ek_check_script }} ++ ++# Path to file containing the measured boot reference state ++mb_refstate = {{ tenant.mb_refstate }} +diff --git a/templates/2.6/verifier.j2 b/templates/2.6/verifier.j2 +new file mode 100644 +index 0000000..43c8e54 +--- /dev/null ++++ b/templates/2.6/verifier.j2 +@@ -0,0 +1,350 @@ ++# Keylime verifier configuration ++[verifier] ++ ++# The configuration file version number ++version = {{ verifier.version }} ++ ++# Unique identifier for each verifier instance. ++uuid = {{ verifier.uuid }} ++ ++# The binding address and port for the verifier server ++ip = "{{ verifier.ip }}" ++port = {{ verifier.port }} ++ ++# The address and port of registrar server that the verifier communicates with ++registrar_ip = {{ verifier.registrar_ip }} ++registrar_port = {{ verifier.registrar_port }} ++ ++# Enable mTLS communication between agent, verifier and tenant. ++# Details on why setting it to "False" is generally considered insecure can be found ++# on https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r ++enable_agent_mtls = {{ verifier.enable_agent_mtls }} ++ ++# The 'tls_dir' option defines the directory where the keys and certificates are ++# stored. ++# ++# If set as 'generate', automatically generate a CA, keys, and certificates for ++# the client and the server in the /var/lib/keylime/cv_ca directory, if not ++# present. ++# ++# The 'server_key', 'server_cert', 'client_key', 'client_cert', ++# 'trusted_client_ca', and 'trusted_server_ca' options should all be set with ++# the 'default' keyword when 'generate' keyword is set for 'tls_dir'. ++# ++# If set as 'default', the '/var/lib/keylime/cv_ca' directory is used, which ++# should contain the files indicated by the 'server_key', 'server_cert', ++# 'client_key', 'client_cert', 'trusted_client_ca', and 'trusted_server_ca' ++# options. ++tls_dir = {{ verifier.tls_dir }} ++ ++# The name of the file containing the Keylime verifier server private key. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This private key is used to serve the Keylime verifier REST API ++# ++# If set as 'default', the 'server-private.pem' value is used. ++server_key = {{ verifier.server_key }} ++ ++# Set the password used to decrypt the server private key file. ++# If 'tls_dir = generate', this password will also be used to protect the ++# generated server private key. ++# If left empty, the private key will not be encrypted. ++server_key_password = {{ verifier.server_key_password }} ++ ++# The name of the file containing the Keylime verifier server certificate. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# ++# If set as 'default', the 'server-cert.crt' value is used. ++server_cert = {{ verifier.server_cert }} ++ ++# Additional Subject Alternative Names (SANs) to include in auto-generated ++# server certificates when 'tls_dir = generate'. ++# ++# This is a comma-separated list of hostnames and/or IP addresses that will be ++# added to the server certificate's SAN extension. This allows clients to verify ++# the server's hostname when connecting. ++# ++# By default, the certificate will automatically include: ++# - localhost, 127.0.0.1, and ::1 ++# - The system's hostname and FQDN ++# - The IP address from the 'ip' option (if not 0.0.0.0 or ::) ++# ++# Use this option to add additional names, such as: ++# - External DNS names (e.g., verifier.example.com) ++# - Load balancer addresses ++# - Additional IP addresses ++# ++# Example: cert_subject_alternative_names = verifier.example.com,10.0.0.5,verifier-internal ++# Leave empty to use only the automatically detected names. ++cert_subject_alternative_names = {{ verifier.cert_subject_alternative_names }} ++ ++# The list of trusted client CA certificates. ++# The files in the list should be stored in the directory set in the 'tls_dir' ++# option. ++# ++# If set as 'default', the value is set as '[cacert.crt]' ++trusted_client_ca = {{ verifier.trusted_client_ca }} ++ ++# The name of the file containing the Keylime verifier client private key. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This private key is used by the Keylime verifier to connect to the other ++# services using TLS. ++# ++# If set as 'default', the 'client-private.pem' value is used. ++client_key = {{ verifier.client_key }} ++ ++# Set the password used to decrypt the client private key file. ++# If 'tls_dir = generate', this password will also be used to protect the ++# generated client private key. ++# If left empty, the private key will not be encrypted. ++client_key_password = {{ verifier.client_key_password }} ++ ++# The name of the file containing the Keylime verifier client certificate. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This certificate is used by the Keylime verifier to connect to the other ++# services using TLS. ++# ++# If set as 'default', the 'client-cert.crt' value is used. ++client_cert = {{ verifier.client_cert }} ++ ++# The list of trusted server CA certificates. ++# The files in the list should be stored in the directory set in the 'tls_dir' ++# option. ++# ++# If set as 'default', the value is set as '[cacert.crt]' ++trusted_server_ca = {{ verifier.trusted_server_ca }} ++ ++# Authorization provider to use for access control decisions. ++# ++# Available providers: ++# simple - Default provider implementing 4-category access control: ++# - PUBLIC: No authentication (version info, evidence verification) ++# - AGENT_ONLY: PoP bearer token + resource ownership (attestations) ++# - AGENT_OR_ADMIN: PoP token or mTLS certificate (read own agent) ++# - ADMIN: mTLS client certificate (all management operations) ++# ++# The 'simple' provider enforces strict separation between agent and admin ++# authentication methods: ++# - Agents authenticate via PoP (Proof-of-Possession) bearer tokens only ++# - Admins authenticate via mTLS client certificates only ++# - If an Authorization header is present, mTLS is never used (prevents ++# privilege escalation) ++# ++# SECURITY NOTE: Never distribute client certificates signed by the verifier's ++# trusted CA to agents. Agents should only have PoP tokens for authentication. ++# ++# Certificate requirements: ++# - Pull mode agents: Self-signed server certs are acceptable. If CA-issued, ++# must have Server Authentication EKU only. ++# - Push mode agents: No client certs from trusted CA. Use PoP tokens only. ++# - Admins: Client certs signed by trusted CA with Client Authentication EKU. ++authorization_provider = {{ verifier.authorization_provider }} ++ ++# Database URL Configuration ++# See this document https://keylime.readthedocs.io/en/latest/installation.html#database-support ++# for instructions on using different database configurations. ++# ++# An example of database_url value for using sqlite: ++# sqlite:////var/lib/keylime/cv_data.sqlite ++# An example of database_url value for using mysql: ++# mysql+pymysql://keylime:keylime@keylime_db:[port]/verifier?charset=utf8 ++# ++# If set as 'sqlite' keyword, will use the configuration set by the file located ++# at "/var/lib/keylime/cv_data.sqlite". ++database_url = {{ verifier.database_url }} ++ ++# Limits for DB connection pool size in sqlalchemy ++# (https://docs.sqlalchemy.org/en/14/core/pooling.html#api-documentation-available-pool-implementations) ++database_pool_sz_ovfl = {{ verifier.database_pool_sz_ovfl }} ++ ++# Whether to automatically update the DB schema using alembic ++auto_migrate_db = {{ verifier.auto_migrate_db }} ++ ++# The number of worker processes to use for the cloud verifier. ++# Set to "0" to create one worker per processor. ++num_workers = {{ verifier.num_workers }} ++ ++# Whether or not to use an exponential backoff algorithm for retries. ++exponential_backoff = {{ verifier.exponential_backoff }} ++ ++# Either how long to wait between failed attempts to connect to a cloud agent ++# in seconds, or the base for the exponential backoff algorithm. ++# Floating point values accepted here. ++retry_interval = {{ verifier.retry_interval }} ++ ++# Number of retries to connect to an agent before giving up. Must be an integer. ++max_retries = {{ verifier.max_retries }} ++ ++# Time between integrity measurement checks, in seconds. If set to "0", checks ++# will be done as fast as possible. Floating point values accepted here. ++quote_interval = {{ verifier.quote_interval }} ++ ++# The verifier limits the size of upload payloads (allowlists) which defaults to ++# 100MB (104857600 bytes). This setting can be raised (or lowered) based on the ++# size of the actual payloads ++max_upload_size = {{ verifier.max_upload_size }} ++ ++# Timeout in seconds for HTTP requests ++request_timeout = {{ verifier.request_timeout }} ++ ++# The name of the boot attestation policy to use in comparing a measured boot event log ++# with a measured boot reference state. ++# A policy is a Python object that `isinstance` of `keylime.elchecking.policies.Policy` ++# and was registered by calling `keylime.elchecking.policies.register`. ++# The keylime agent extracts the measured boot event log. ++# The verifier client specifies the measured boot reference state to use; ++# this is specified independently for each agent. ++# Depending on the policy, the same reference state may be usable with multiple agents. ++# The `accept-all` policy ignores the reference state and approves every log. ++measured_boot_policy_name = {{ verifier.measured_boot_policy_name }} ++ ++# This is a list of Python modules to dynamically load, for example to register ++# additional boot attestation policies. ++# Empty strings in the list are ignored. ++# A module here may be relative, in which case it is interpreted ++# relative to the keylime.elchecking package. ++# The default value for this config item is the empty list. ++measured_boot_imports = {{ verifier.measured_boot_imports }} ++ ++# This is used to manage the number of times measured boot attestation ++# is done. In other words, it controls the number of times the call ++# to the measured boot policy engine is made to evaluate the boot log ++# against the policy specified. ++# Here are its possible values and number of bootlog evaluations. ++# once (default) : Bootlog evaluation will be done for only one time. ++# always : Bootlog evaluation will always be done (i.e. for unlimited times). ++measured_boot_evaluate = {{ verifier.measured_boot_evaluate }} ++ ++# Severity labels for revocation events strictly ordered from least severity to ++# highest severity. ++severity_labels = {{ verifier.severity_labels }} ++ ++# Severity policy that matches different event_ids to the severity label. ++# The rules are evaluated from the beginning of the list and the first match is ++# used. The event_id can also be a regex. Default policy assigns the highest ++# severity to all events. ++severity_policy = {{ verifier.severity_policy }} ++ ++# If files are already opened when IMA tries to measure them this causes ++# a time of measure, time of use (ToMToU) error entry. ++# By default we ignore those entries and only print a warning. ++# Set to False to treat ToMToU entries as errors. ++ignore_tomtou_errors = {{ verifier.ignore_tomtou_errors }} ++ ++# Durable Attestation is currently marked as an experimental feature ++# In order to enable Durable Attestation, an "adapter" for a Persistent data Store ++# (time-series like database) needs to be specified. Some example adapters can be ++# found under "da/examples" so, for instance ++# "durable_attestation_import = keylime.da.examples.redis.py" ++# could be used to interact with a Redis (Persistent data Store) ++durable_attestation_import = {{ verifier.durable_attestation_import }} ++ ++# If an adapter for Durable Attestation was specified, then the URL for a Persistent Store ++# needs to be specified here. A second optional URL could be specified, for a ++# Rekor Transparency Log. A third additional URL could be specified, pointing to a ++# Time Stamp Authority (TSA), compatible with RFC3161. Additionally, one might need to ++# specify a path containing certificates required by the stores or TSA. Continuing with ++# the above example, the following values could be assigned to the parameters: ++# "persistent_store_url=redis://127.0.0.1:6379?db=10&password=/root/redis.auth&prefix=myda" ++# "transparency_log_url=http://127.0.0.1:3000" ++# "time_stamp_authority_url=http://127.0.0.1:2020" ++# "time_stamp_authority_certs_path=~/mycerts/tsa_cert1.pem" ++persistent_store_url = {{ verifier.persistent_store_url }} ++transparency_log_url = {{ verifier.transparency_log_url }} ++time_stamp_authority_url = {{ verifier.time_stamp_authority_url }} ++time_stamp_authority_certs_path = {{ verifier.time_stamp_authority_certs_path }} ++ ++# If Durable Attestation was enabled, which requires a Persistent Store URL ++# to be specified, the two following parameters control the format and encoding ++# of the stored attestation artifacts (defaults "json" for format and "" for encoding) ++persistent_store_format = {{ verifier.persistent_store_format }} ++persistent_store_encoding = {{ verifier.persistent_store_encoding }} ++ ++# If Durable Attestation was enabled and a Transparency Log URL was specified, ++# the digest algorithm for signatures is controlled by this parameter (default "sha256") ++transparency_log_sign_algo = {{ verifier.transparency_log_sign_algo }} ++ ++# If Durable Attestation was enabled and a Transparency Log URL was specified, ++# a keylime administrator can specify some agent attributes (including attestation ++# artifacts, such as quotes and logs) to be signed by the verifier. The use of "all" ++# will result in the whole "package" (agent + artifacts) being signed and leaving it empty ++# will mean no signing should be done. ++signed_attributes = {{ verifier.signed_attributes }} ++ ++# Require that allowlists are signed with a key passed via the tenant tool ++require_allow_list_signatures = {{ verifier.require_allow_list_signatures }} ++ ++# Attestation mode. Can be 'pull' (traditional) or 'push' (agent-driven). ++# Default: pull ++mode = {{ verifier.mode }} ++# ++# Lifetime in seconds for challenges sent to agents in push mode. ++challenge_lifetime = {{ verifier.challenge_lifetime }} ++# ++# Timeout in seconds for a single evidence verification task (0 = auto). ++verification_timeout = {{ verifier.verification_timeout }} ++# ++# Rate limiting for session creation endpoint (POST /sessions) in push mode. ++# These settings prevent denial-of-service attacks where an attacker floods the verifier ++# with session creation requests. Only applies when mode = 'push'. ++# ++# Maximum number of session creation requests per IP address within the time window. ++# Allows for multiple agents from same IP (testing, NAT scenarios). ++# Default: 50 ++session_create_rate_limit_per_ip = {{ verifier.session_create_rate_limit_per_ip }} ++# ++# Time window in seconds for IP-based rate limiting. ++# Default: 60 ++session_create_rate_limit_window_ip = {{ verifier.session_create_rate_limit_window_ip }} ++# ++# Maximum number of session creation requests per agent_id within the time window. ++# Allows for agent retries (agent default is 3 retries, this allows ~5 auth attempts). ++# Default: 15 ++session_create_rate_limit_per_agent = {{ verifier.session_create_rate_limit_per_agent }} ++# ++# Time window in seconds for agent-based rate limiting. ++# Default: 60 ++session_create_rate_limit_window_agent = {{ verifier.session_create_rate_limit_window_agent }} ++# ++# Lifetime in seconds for authentication session tokens. ++# After this time, agents must re-authenticate to continue submitting attestations. ++# Default: 180 (3 minutes) ++session_lifetime = {{ verifier.session_lifetime }} ++# ++# Whether to automatically extend the session token expiry when an agent ++# submits an attestation. When enabled, active agents won't need to re-authenticate ++# as long as they continue attesting within the session_lifetime window. ++# Default: true ++extend_token_on_attestation = {{ verifier.extend_token_on_attestation }} ++ ++# Maximum time in seconds to wait for in-flight attestation operations to ++# complete during shutdown. The verifier will wait up to this long for active ++# database writes and state transitions to finish before stopping the event ++# loop. Increasing this value reduces the risk of inconsistent agent state ++# after an unclean shutdown, at the cost of a slower shutdown. ++# Floating point values accepted here. ++shutdown_drain_timeout = {{ verifier.shutdown_drain_timeout }} ++ ++[revocations] ++ ++# List of revocation notification methods to enable. ++# ++# Available methods are: ++# ++# "agent": Deliver notification directly to the agent via the REST ++# protocol. ++# ++# "zeromq": Enable the ZeroMQ based revocation notification method; ++# zmq_ip and zmq_port options must be set. Currently this only works if you are ++# using keylime-CA. ++# ++# "webhook": Send notification via webhook. The endpoint URL must be ++# configured with 'webhook_url' option. This can be used to notify other ++# systems that do not have a Keylime agent running. ++enabled_revocation_notifications = {{ revocations.enabled_revocation_notifications }} ++ ++# The binding address and port of the revocation notifier service via ZeroMQ. ++zmq_ip = {{ revocations.zmq_ip }} ++zmq_port = {{ revocations.zmq_port }} ++ ++# Webhook url for revocation notifications. ++webhook_url = {{ revocations.webhook_url }} +diff --git a/test/test_shutdown.py b/test/test_shutdown.py +new file mode 100644 +index 0000000..85a10d3 +--- /dev/null ++++ b/test/test_shutdown.py +@@ -0,0 +1,210 @@ ++"""Unit tests for the shutdown coordination module and verifier drain logic.""" ++ ++# pylint: disable=protected-access,import-outside-toplevel ++ ++import asyncio ++import unittest ++from unittest.mock import patch ++ ++from keylime import shutdown ++ ++ ++class TestShutdownFlag(unittest.TestCase): ++ """Test the process-wide shutdown flag.""" ++ ++ def setUp(self) -> None: ++ # Reset the module-level event before each test ++ shutdown._shutdown_event = asyncio.Event() ++ ++ def test_initial_state_not_shutting_down(self) -> None: ++ self.assertFalse(shutdown.is_shutting_down()) ++ ++ def test_request_shutdown_sets_flag(self) -> None: ++ shutdown.request_shutdown() ++ self.assertTrue(shutdown.is_shutting_down()) ++ ++ def test_request_shutdown_is_idempotent(self) -> None: ++ shutdown.request_shutdown() ++ shutdown.request_shutdown() ++ self.assertTrue(shutdown.is_shutting_down()) ++ ++ ++class TestOperationTracking(unittest.TestCase): ++ """Test _enter_operation / _exit_operation and drain logic.""" ++ ++ def setUp(self) -> None: ++ # Import here so we can reset module globals ++ from keylime import cloud_verifier_tornado as cvt ++ ++ self.cvt = cvt ++ # Save and reset module state ++ self._saved_active = cvt._active_operations ++ self._saved_event = cvt._operations_drained ++ cvt._active_operations = 0 ++ cvt._operations_drained = asyncio.Event() ++ cvt._operations_drained.set() ++ ++ def tearDown(self) -> None: ++ self.cvt._active_operations = self._saved_active ++ self.cvt._operations_drained = self._saved_event ++ ++ def test_initial_state_is_drained(self) -> None: ++ self.assertEqual(self.cvt.get_active_operations(), 0) ++ self.assertTrue(self.cvt._operations_drained.is_set()) ++ ++ def test_enter_increments_and_clears_drain(self) -> None: ++ self.cvt._enter_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 1) ++ self.assertFalse(self.cvt._operations_drained.is_set()) ++ ++ def test_exit_decrements_and_signals_drain(self) -> None: ++ self.cvt._enter_operation() ++ self.cvt._exit_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 0) ++ self.assertTrue(self.cvt._operations_drained.is_set()) ++ ++ def test_multiple_operations_drain_on_last_exit(self) -> None: ++ self.cvt._enter_operation() ++ self.cvt._enter_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 2) ++ self.assertFalse(self.cvt._operations_drained.is_set()) ++ ++ self.cvt._exit_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 1) ++ self.assertFalse(self.cvt._operations_drained.is_set()) ++ ++ self.cvt._exit_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 0) ++ self.assertTrue(self.cvt._operations_drained.is_set()) ++ ++ def test_wait_for_drain_returns_true_when_already_drained(self) -> None: ++ loop = asyncio.new_event_loop() ++ try: ++ result = loop.run_until_complete(self.cvt.wait_for_drain(1.0)) ++ self.assertTrue(result) ++ finally: ++ loop.close() ++ ++ def test_wait_for_drain_returns_true_after_exit(self) -> None: ++ self.cvt._enter_operation() ++ ++ async def _drain_after_delay() -> bool: ++ async def _exit_soon() -> None: ++ await asyncio.sleep(0.05) ++ self.cvt._exit_operation() ++ ++ asyncio.ensure_future(_exit_soon()) ++ return await self.cvt.wait_for_drain(2.0) ++ ++ loop = asyncio.new_event_loop() ++ try: ++ result = loop.run_until_complete(_drain_after_delay()) ++ self.assertTrue(result) ++ self.assertEqual(self.cvt.get_active_operations(), 0) ++ finally: ++ loop.close() ++ ++ def test_wait_for_drain_returns_false_on_timeout(self) -> None: ++ self.cvt._enter_operation() ++ ++ loop = asyncio.new_event_loop() ++ try: ++ result = loop.run_until_complete(self.cvt.wait_for_drain(0.1)) ++ self.assertFalse(result) ++ finally: ++ loop.close() ++ ++ ++class TestPendingEventRegistry(unittest.TestCase): ++ """Test _register_pending_event / _cancel_pending_event / cancel_all.""" ++ ++ def setUp(self) -> None: ++ from keylime import cloud_verifier_tornado as cvt ++ ++ self.cvt = cvt ++ self._saved_pending = dict(cvt._pending_events) ++ cvt._pending_events.clear() ++ ++ def tearDown(self) -> None: ++ self.cvt._pending_events.clear() ++ self.cvt._pending_events.update(self._saved_pending) ++ ++ def _make_agent(self, agent_id: str = "test-agent-1") -> dict: ++ return {"agent_id": agent_id, "pending_event": None} ++ ++ def test_register_tracks_in_both_locations(self) -> None: ++ agent = self._make_agent() ++ handle = object() ++ self.cvt._register_pending_event(agent, handle) ++ ++ self.assertIs(agent["pending_event"], handle) ++ self.assertIs(self.cvt._pending_events["test-agent-1"], handle) ++ ++ def test_cancel_clears_both_locations(self) -> None: ++ agent = self._make_agent() ++ handle = object() ++ self.cvt._register_pending_event(agent, handle) ++ ++ with patch("tornado.ioloop.IOLoop.current"): ++ self.cvt._cancel_pending_event(agent) ++ ++ self.assertIsNone(agent["pending_event"]) ++ self.assertNotIn("test-agent-1", self.cvt._pending_events) ++ ++ def test_cancel_noop_when_no_pending_event(self) -> None: ++ agent = self._make_agent() ++ # Should not raise ++ self.cvt._cancel_pending_event(agent) ++ self.assertIsNone(agent["pending_event"]) ++ ++ def test_cancel_all_clears_registry(self) -> None: ++ agents = [self._make_agent(f"agent-{i}") for i in range(3)] ++ for i, agent in enumerate(agents): ++ self.cvt._register_pending_event(agent, object()) ++ ++ self.assertEqual(len(self.cvt._pending_events), 3) ++ ++ with patch("tornado.ioloop.IOLoop.current"): ++ self.cvt.cancel_all_pending_events() ++ ++ self.assertEqual(len(self.cvt._pending_events), 0) ++ ++ def test_cancel_all_noop_when_empty(self) -> None: ++ # Should not raise ++ self.cvt.cancel_all_pending_events() ++ ++ ++class TestPushAgentMonitorCancelAll(unittest.TestCase): ++ """Test cancel_all_timeouts in push_agent_monitor.""" ++ ++ def setUp(self) -> None: ++ from keylime import push_agent_monitor ++ ++ self.pam = push_agent_monitor ++ with self.pam._agent_timeout_handles_lock: ++ self._saved = dict(self.pam._agent_timeout_handles) ++ self.pam._agent_timeout_handles.clear() ++ ++ def tearDown(self) -> None: ++ with self.pam._agent_timeout_handles_lock: ++ self.pam._agent_timeout_handles.clear() ++ self.pam._agent_timeout_handles.update(self._saved) ++ ++ def test_cancel_all_clears_handles(self) -> None: ++ with self.pam._agent_timeout_handles_lock: ++ self.pam._agent_timeout_handles["a1"] = object() ++ self.pam._agent_timeout_handles["a2"] = object() ++ ++ with patch("tornado.ioloop.IOLoop.current"): ++ self.pam.cancel_all_timeouts() ++ ++ with self.pam._agent_timeout_handles_lock: ++ self.assertEqual(len(self.pam._agent_timeout_handles), 0) ++ ++ def test_cancel_all_noop_when_empty(self) -> None: ++ # Should not raise ++ self.pam.cancel_all_timeouts() ++ ++ ++if __name__ == "__main__": ++ unittest.main() +diff --git a/test/test_verifier_server.py b/test/test_verifier_server.py +index da0feae..e9a47ef 100644 +--- a/test/test_verifier_server.py ++++ b/test/test_verifier_server.py +@@ -256,51 +256,39 @@ class TestVerifierServerEngineDisposal(unittest.TestCase): + "_prepare_agents_on_startup should document why engine disposal is needed", + ) + +- def test_start_multi_resets_verifier_config_after_fork(self): +- """Verify start_multi() resets verifier config in each worker after forking.""" ++ def test_post_fork_resets_verifier_config(self): ++ """Verify _post_fork() resets verifier config to clear inherited database state.""" + # Read the source code + server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "verifier_server.py") + + with open(server_path, encoding="utf-8") as f: + source = f.read() + +- # Find the start_multi method +- pattern = r"def start_multi\(self\).*?(?=\n def |\Z)" ++ # Find the _post_fork method ++ pattern = r"def _post_fork\(self.*?\).*?(?=\n def |\Z)" + match = re.search(pattern, source, re.DOTALL) + +- self.assertIsNotNone(match, "start_multi method not found") ++ self.assertIsNotNone(match, "_post_fork method not found") + assert match is not None + + method_body = match.group(0) + +- # Should fork processes +- self.assertIn( +- "fork_processes", +- method_body, +- "start_multi should call tornado.process.fork_processes", +- ) +- +- # After fork, should reset verifier config (which handles engine disposal) +- # Look for the pattern after fork_processes() +- fork_index = method_body.find("fork_processes") +- after_fork = method_body[fork_index:] +- + self.assertIn( + "reset_verifier_config()", +- after_fork, +- "start_multi must call reset_verifier_config() after forking to clear inherited database state", ++ method_body, ++ "_post_fork must call reset_verifier_config() to clear inherited database state", + ) + + self.assertIn( + "cloud_verifier_tornado.reset_verifier_config()", +- after_fork, +- "start_multi should call cloud_verifier_tornado.reset_verifier_config() after forking", ++ method_body, ++ "_post_fork should call cloud_verifier_tornado.reset_verifier_config()", + ) + +- def test_verifier_config_reset_happens_before_worker_operations(self): +- """Verify verifier config reset occurs after fork but before any worker operations.""" +- # Read the source code +- server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "verifier_server.py") ++ def test_base_server_calls_post_fork_before_start_single(self): ++ """Verify base Server.start_multi() calls _post_fork() after fork and before start_single().""" ++ # Read the base server source code ++ server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "base", "server.py") + + with open(server_path, encoding="utf-8") as f: + source = f.read() +@@ -314,53 +302,49 @@ class TestVerifierServerEngineDisposal(unittest.TestCase): + + # Extract the order of operations + fork_index = method_body.find("fork_processes") +- reset_index = method_body.find("reset_verifier_config()") +- start_single_index = method_body.find("self.start_single()") ++ post_fork_index = method_body.find("_post_fork") ++ start_single_index = method_body.find("start_single()") + + # All should be present + self.assertNotEqual(fork_index, -1, "fork_processes call not found") +- self.assertNotEqual(reset_index, -1, "reset_verifier_config() call not found") ++ self.assertNotEqual(post_fork_index, -1, "_post_fork() call not found") + self.assertNotEqual(start_single_index, -1, "start_single() call not found") + +- # Correct order: fork -> reset_verifier_config -> start_single ++ # Correct order: fork -> _post_fork -> start_single + self.assertLess( + fork_index, +- reset_index, +- "Verifier config reset must happen AFTER forking", ++ post_fork_index, ++ "_post_fork must be called AFTER forking", + ) + self.assertLess( +- reset_index, ++ post_fork_index, + start_single_index, +- "Verifier config reset must happen BEFORE starting worker server", ++ "_post_fork must be called BEFORE starting worker server", + ) + +- def test_reset_pattern_is_documented(self): +- """Verify reset_verifier_config() pattern is documented.""" ++ def test_post_fork_is_documented(self): ++ """Verify _post_fork() documents why reset_verifier_config() is needed.""" + # Read the source code + server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "verifier_server.py") + + with open(server_path, encoding="utf-8") as f: + source = f.read() + +- # Find the start_multi method +- pattern = r"def start_multi\(self\).*?(?=\n def |\Z)" ++ # Find the _post_fork method ++ pattern = r"def _post_fork\(self.*?\).*?(?=\n def |\Z)" + match = re.search(pattern, source, re.DOTALL) + + assert match is not None + method_body = match.group(0) + +- # Should document why reset is needed after fork +- fork_index = method_body.find("fork_processes") +- after_fork = method_body[fork_index:] +- + # Should mention critical concepts: reset, inherited state, parent process + critical_terms = ["reset", "inherit", "parent", "database"] +- found_terms = [term for term in critical_terms if term.lower() in after_fork.lower()] ++ found_terms = [term for term in critical_terms if term.lower() in method_body.lower()] + + self.assertGreaterEqual( + len(found_terms), + 3, +- f"start_multi should document why reset_verifier_config() is needed after fork. " ++ f"_post_fork should document why reset_verifier_config() is needed after fork. " + f"Expected mentions of reset/inherit/parent/database, found: {found_terms}", + ) + +@@ -398,9 +382,9 @@ class TestEngineDisposalDocumentation(unittest.TestCase): + f"Expected mentions of fork/connection/dispose/parent/child, found: {found_terms}", + ) + +- def test_start_multi_documents_disposal_reason(self): +- """Verify start_multi() documents why global engine disposal is needed.""" +- server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "verifier_server.py") ++ def test_base_start_multi_documents_disposal_reason(self): ++ """Verify base Server.start_multi() documents why engine disposal after fork is needed.""" ++ server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "base", "server.py") + + with open(server_path, encoding="utf-8") as f: + source = f.read() +@@ -416,15 +400,15 @@ class TestEngineDisposalDocumentation(unittest.TestCase): + fork_index = method_body.find("fork_processes") + after_fork = method_body[fork_index:] + +- critical_terms = ["inherit", "corrupt", "dispose", "worker", "parent"] ++ critical_terms = ["inherit", "connection", "dispose", "worker", "parent"] + + found_terms = [term for term in critical_terms if term.lower() in after_fork.lower()] + + self.assertGreaterEqual( + len(found_terms), + 2, +- f"start_multi should document why global engine disposal after fork is critical. " +- f"Expected mentions of inherit/corrupt/dispose/worker/parent, found: {found_terms}", ++ f"start_multi should document why engine disposal after fork is critical. " ++ f"Expected mentions of inherit/connection/dispose/worker/parent, found: {found_terms}", + ) + + +-- +2.53.0 + diff --git a/0018-ignore-sigterm-sigint-manager-parent-processes.patch b/0018-ignore-sigterm-sigint-manager-parent-processes.patch new file mode 100644 index 0000000..761d88a --- /dev/null +++ b/0018-ignore-sigterm-sigint-manager-parent-processes.patch @@ -0,0 +1,151 @@ +From 15f20d2dd2e63cc621295befef46bc4161a1f636 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Fri, 10 Apr 2026 13:22:44 +0200 +Subject: [PATCH] shared_data: Ignore SIGTERM and SIGINT on Manager and parent + processes + +When systemd stops the verifier (or registrar), SIGTERM is delivered to +the entire process group, including the multiprocessing Manager's server +process that hosts the shared policy cache. The Manager dies +immediately, but worker processes still have in-flight process_agent() +coroutines that need the cache, causing ConnectionResetError. + +The same race occurs with SIGINT (Ctrl+C) when running the daemon in +the foreground. + +Fix this in two parts: + +1. Use SyncManager.start(initializer=...) to install SIG_IGN for both + SIGTERM and SIGINT in the Manager's server process, so it survives + process-group signals and stays available while workers drain. + +2. Ignore SIGTERM and SIGINT in the new architecture's parent process + (start_multi) so it stays in tornado's monitor loop until all + children have drained and exited. Once all children exit, tornado + calls sys.exit(0), triggering atexit handlers which shut down the + Manager via IPC. Without this, the default signal disposition kills + the parent immediately (no atexit), leaving the Manager orphaned. + +Resolves: #1882 + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + keylime/shared_data.py | 28 +++++++++++++++++++++++++++- + keylime/web/base/server.py | 18 ++++++++++++++++++ + test/test_verifier_server.py | 11 ++++++++--- + 3 files changed, 53 insertions(+), 4 deletions(-) + +diff --git a/keylime/shared_data.py b/keylime/shared_data.py +index 09cbb97bb..494f2f53b 100644 +--- a/keylime/shared_data.py ++++ b/keylime/shared_data.py +@@ -8,8 +8,10 @@ + import multiprocessing as mp + import multiprocessing.process + import os ++import signal + import threading + import time ++from multiprocessing.managers import SyncManager + from typing import Any, Dict, List, Optional + + from keylime import keylime_logging +@@ -17,6 +19,17 @@ + logger = keylime_logging.init_logging("shared_data") + + ++def _manager_ignore_signals() -> None: ++ """Ignore SIGTERM and SIGINT in the Manager's server process. ++ ++ Called as the ``initializer`` for ``SyncManager.start()`` so that ++ the Manager survives process-group signals (systemd SIGTERM, Ctrl+C) ++ and stays available while workers drain in-flight work. ++ """ ++ signal.signal(signal.SIGTERM, signal.SIG_IGN) ++ signal.signal(signal.SIGINT, signal.SIG_IGN) ++ ++ + class FlatDictView: + """A dictionary-like view over a flat key-value store. + +@@ -127,7 +140,20 @@ def __init__(self) -> None: + # Use explicit context to ensure fork compatibility + # The Manager must be started BEFORE any fork() calls + ctx = mp.get_context("fork") +- self._manager = ctx.Manager() ++ # Use SyncManager directly (instead of the ctx.Manager() shortcut) ++ # so we can pass an initializer that makes the Manager's server ++ # process ignore SIGTERM and SIGINT. Without this, systemd's ++ # cgroup-wide SIGTERM (or Ctrl+C SIGINT in foreground) kills the ++ # Manager before workers finish draining, causing ++ # ConnectionResetError in proxy objects. The Manager is still ++ # cleanable via IPC shutdown message, process.kill(), or systemd ++ # SIGKILL escalation. ++ # Cannot use 'with' context manager here: the Manager must outlive ++ # __init__ and persist for the lifetime of SharedDataManager. ++ self._manager = SyncManager(ctx=ctx) ++ self._manager.start( # pylint: disable=consider-using-with ++ initializer=_manager_ignore_signals, ++ ) + + # CRITICAL FIX: Use a SINGLE flat dict instead of nested dicts + # Nested DictProxy objects have synchronization issues +diff --git a/keylime/web/base/server.py b/keylime/web/base/server.py +index 4dd02b79e..8e9cce69d 100644 +--- a/keylime/web/base/server.py ++++ b/keylime/web/base/server.py +@@ -376,12 +376,30 @@ def start_multi(self) -> None: + + self._pre_fork() + ++ # Ignore SIGTERM/SIGINT in the parent so it stays in tornado's ++ # monitor loop (os.wait) until all children have drained and ++ # exited cleanly. Once all children exit, tornado calls ++ # sys.exit(0) which triggers atexit → SharedDataManager.cleanup() ++ # → Manager shutdown via IPC. Without this, the default signal ++ # disposition kills the parent immediately (no atexit), leaving ++ # the Manager process orphaned. ++ # Children inherit SIG_IGN but override it in ++ # _install_signal_handlers() before entering the event loop. ++ signal.signal(signal.SIGTERM, signal.SIG_IGN) ++ signal.signal(signal.SIGINT, signal.SIG_IGN) ++ + # with StatsCollector(): + # num = manager.Value('i', 0) + task_id = tornado.process.fork_processes(self.worker_count) + # num.value = num.value + 1 + # print(num.value) + ++ # Restore default signal disposition in children so they don't ++ # silently ignore SIGTERM/SIGINT before _install_signal_handlers() ++ # replaces these with asyncio-based handlers in start_single(). ++ signal.signal(signal.SIGTERM, signal.SIG_DFL) ++ signal.signal(signal.SIGINT, signal.SIG_DFL) ++ + # Remove the Manager's server process from multiprocessing's child + # tracking so Python's atexit handler does not try to join() it in + # child workers (the Manager was spawned by the parent). +diff --git a/test/test_verifier_server.py b/test/test_verifier_server.py +index e9a47ef70..7601b9cb0 100644 +--- a/test/test_verifier_server.py ++++ b/test/test_verifier_server.py +@@ -300,10 +300,15 @@ def test_base_server_calls_post_fork_before_start_single(self): + assert match is not None + method_body = match.group(0) + ++ # Strip comment lines to avoid false matches from mentions ++ # in comments (e.g. "# ... before start_single()"). ++ code_lines = [line for line in method_body.splitlines() if not line.lstrip().startswith("#")] ++ code_body = "\n".join(code_lines) ++ + # Extract the order of operations +- fork_index = method_body.find("fork_processes") +- post_fork_index = method_body.find("_post_fork") +- start_single_index = method_body.find("start_single()") ++ fork_index = code_body.find("fork_processes") ++ post_fork_index = code_body.find("_post_fork") ++ start_single_index = code_body.find("start_single()") + + # All should be present + self.assertNotEqual(fork_index, -1, "fork_processes call not found") diff --git a/0019-move-socket-var-run.patch b/0019-move-socket-var-run.patch new file mode 100644 index 0000000..71755da --- /dev/null +++ b/0019-move-socket-var-run.patch @@ -0,0 +1,348 @@ +From a50c7e50171d8f5999bdd927b6306f6d14974c57 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Thu, 16 Apr 2026 14:14:06 +0200 +Subject: [PATCH 1/2] shared_data: Move SyncManager socket to /var/run/keylime/ + +The SyncManager's server process creates a Unix domain socket for IPC +with worker processes. By default, this socket was placed in /tmp with +a random name (listener-*). + +Move the socket to /var/run/keylime/, following standard daemon +practice. Keylime already uses this directory for its ZeroMQ revocation +notification socket. + +Changes: +- Pass explicit address to SyncManager so the socket is created at + /var/run/keylime/shared_data..sock instead of /tmp/listener-* +- Add _ensure_runtime_dir() to create or validate the directory +- Add test conftest.py to redirect sockets to a temp directory +- Add pytest to test-requirements.txt for pylint to resolve imports + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + keylime/shared_data.py | 54 +++++++++++++++++++++++++++++++++++++----- + test-requirements.txt | 1 + + test/conftest.py | 30 +++++++++++++++++++++++ + 3 files changed, 79 insertions(+), 6 deletions(-) + create mode 100644 test/conftest.py + +diff --git a/keylime/shared_data.py b/keylime/shared_data.py +index 494f2f53b..aef39bcc4 100644 +--- a/keylime/shared_data.py ++++ b/keylime/shared_data.py +@@ -18,6 +18,23 @@ + + logger = keylime_logging.init_logging("shared_data") + ++_RUNTIME_DIR = "/var/run/keylime" ++ ++ ++def _ensure_runtime_dir() -> None: ++ """Ensure the runtime directory exists with correct permissions. ++ ++ Under systemd, ``tmpfiles.d`` creates ``/var/run/keylime/`` at boot. ++ This function provides a fallback for non-systemd execution and ++ validates permissions in either case. ++ """ ++ os.makedirs(_RUNTIME_DIR, mode=0o700, exist_ok=True) ++ perms = os.stat(_RUNTIME_DIR).st_mode & 0o777 ++ if perms != 0o700 or not os.access(_RUNTIME_DIR, os.W_OK | os.X_OK): ++ msg = f"{_RUNTIME_DIR} is not usable by the current process" ++ logger.error(msg) ++ raise PermissionError(msg) ++ + + def _manager_ignore_signals() -> None: + """Ignore SIGTERM and SIGINT in the Manager's server process. +@@ -137,8 +154,20 @@ def __init__(self) -> None: + """ + logger.debug("Initializing SharedDataManager") + +- # Use explicit context to ensure fork compatibility +- # The Manager must be started BEFORE any fork() calls ++ # Ensure /var/run/keylime/ exists with correct permissions ++ # before forking the Manager server process. ++ _ensure_runtime_dir() ++ self._socket_path = os.path.join(_RUNTIME_DIR, f"shared_data.{os.getpid()}.sock") ++ ++ # Remove stale socket from a previous run (e.g. after a crash). ++ # CPython's SocketListener does not pre-unlink before bind(). ++ try: ++ os.unlink(self._socket_path) ++ except (FileNotFoundError, PermissionError): ++ pass ++ ++ # Use explicit context to ensure fork compatibility. ++ # The Manager must be started BEFORE any fork() calls. + ctx = mp.get_context("fork") + # Use SyncManager directly (instead of the ctx.Manager() shortcut) + # so we can pass an initializer that makes the Manager's server +@@ -150,7 +179,7 @@ def __init__(self) -> None: + # SIGKILL escalation. + # Cannot use 'with' context manager here: the Manager must outlive + # __init__ and persist for the lifetime of SharedDataManager. +- self._manager = SyncManager(ctx=ctx) ++ self._manager = SyncManager(address=self._socket_path, ctx=ctx) + self._manager.start( # pylint: disable=consider-using-with + initializer=_manager_ignore_signals, + ) +@@ -162,8 +191,6 @@ def __init__(self) -> None: + self._lock = self._manager.Lock() + self._initialized_at = time.time() + +- # Register handler to reinitialize manager connection after fork +- # This is needed because Manager uses network connections that don't survive fork + try: + self._parent_pid = os.getpid() + logger.debug("SharedDataManager initialized in process %d", self._parent_pid) +@@ -173,7 +200,10 @@ def __init__(self) -> None: + # Ensure cleanup on exit + atexit.register(self.cleanup) + +- logger.info("SharedDataManager initialized successfully") ++ logger.info( ++ "SharedDataManager initialized successfully (socket: %s)", ++ self._socket_path, ++ ) + + def set_data(self, key: str, value: Any) -> None: + """Store arbitrary pickleable data by key. +@@ -333,6 +363,18 @@ def cleanup(self) -> None: + except Exception: + logger.exception("Error during SharedDataManager shutdown") + ++ # Remove socket file if it still exists. The Manager server ++ # process normally unlinks it on exit, but if it was killed ++ # (SIGKILL) the file may be left behind. ++ socket_path = getattr(self, "_socket_path", None) ++ if socket_path: ++ try: ++ os.unlink(socket_path) ++ except FileNotFoundError: ++ pass ++ except OSError as e: ++ logger.debug("Could not remove socket file %s: %s", socket_path, e) ++ + def deregister_child(self) -> None: + """Remove the Manager's server process from multiprocessing's child tracking. + +diff --git a/test-requirements.txt b/test-requirements.txt +index bdd44e3e9..bf74580a9 100644 +--- a/test-requirements.txt ++++ b/test-requirements.txt +@@ -1,6 +1,7 @@ + dbus-python + # modules required for pylint + setuptools ++pytest + # packages required for mypy + sqlalchemy-stubs + types-python-dateutil +diff --git a/test/conftest.py b/test/conftest.py +new file mode 100644 +index 000000000..da2843922 +--- /dev/null ++++ b/test/conftest.py +@@ -0,0 +1,30 @@ ++"""Shared pytest fixtures for keylime tests.""" ++ ++import shutil ++import tempfile ++from unittest.mock import patch ++ ++import pytest ++ ++from keylime.shared_data import cleanup_global_shared_memory ++ ++ ++@pytest.fixture(autouse=True) ++def _shared_data_runtime_dir(): ++ """Redirect SharedDataManager sockets to a temporary directory. ++ ++ The SyncManager creates Unix domain sockets in /var/run/keylime/, ++ which may not be writable by the test user. This fixture patches ++ the runtime directory to a per-test temp directory so that tests ++ work in any environment. ++ ++ After each test, any global SharedDataManager is shut down to ++ prevent stale managers from referencing deleted temp directories. ++ """ ++ tmpdir = tempfile.mkdtemp() ++ with patch("keylime.shared_data._RUNTIME_DIR", tmpdir): ++ yield ++ # Shut down any global SharedDataManager left alive by the test ++ # so the next test starts fresh with a new temp directory. ++ cleanup_global_shared_memory() ++ shutil.rmtree(tmpdir, ignore_errors=True) + +From 712ab6c841e258e463f858904bfc0991f704a3b9 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Thu, 16 Apr 2026 14:14:45 +0200 +Subject: [PATCH 2/2] installer: Add tmpfiles.d config for all keylime + directories + +Add keylime-tmpfiles.conf to manage all keylime directories. + +This includes: + +- /var/run/keylime (runtime IPC sockets) +- /var/lib/keylime (persistent state) +- /etc/keylime and config snippet directories (configuration) +- TPM certificate store copy from /usr/share to /var/lib + +Simplify installer.sh to avoid redundant directory creation and +ownership setting. The installer only needs to install the tmpfiles.d +config to /usr/lib/tmpfiles.d/keylime.conf and apply it immediately with +systemd-tmpfiles --create so the directories exist before the services +start. + +The installer validates the TPM cert store source exists before copying +and includes a non-systemd fallback for manual directory creation. + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + services/installer.sh | 61 ++++++++++++++++++++++++++-------- + services/keylime-tmpfiles.conf | 40 ++++++++++++++++++++++ + 2 files changed, 87 insertions(+), 14 deletions(-) + create mode 100644 services/keylime-tmpfiles.conf + +diff --git a/services/installer.sh b/services/installer.sh +index f34027c61..f462f136b 100755 +--- a/services/installer.sh ++++ b/services/installer.sh +@@ -11,7 +11,7 @@ fi + BASEDIR=$(dirname "$0") + + # check keylime scripts directory (same for verifier, agent, registrar) +-KEYLIMEDIR=$(dirname $(whereis keylime_verifier | cut -d " " -f 2)) ++KEYLIMEDIR=$(dirname "$(whereis keylime_verifier | cut -d " " -f 2)") + if [[ $KEYLIMEDIR == "." ]]; then + echo "Unable to find keylime scripts" 1>&2 + exit 1 +@@ -20,8 +20,8 @@ fi + echo "Using keylime scripts directory: ${KEYLIMEDIR}" + + # prepare keylime service files and store them in systemd path +-sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" $BASEDIR/keylime_registrar.service.template > /etc/systemd/system/keylime_registrar.service +-sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" $BASEDIR/keylime_verifier.service.template > /etc/systemd/system/keylime_verifier.service ++sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" "$BASEDIR/keylime_registrar.service.template" > /etc/systemd/system/keylime_registrar.service ++sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" "$BASEDIR/keylime_verifier.service.template" > /etc/systemd/system/keylime_verifier.service + + echo "Creating keylime user if it not exists" + if ! getent passwd keylime >/dev/null; then +@@ -30,23 +30,56 @@ if ! getent passwd keylime >/dev/null; then + keylime + fi + +-echo "Changing files to be owned by the keylime user" +-# Create all directories required if not there +-mkdir -p /var/lib/keylime +-mkdir -p /var/log/keylime +-mkdir -p /var/run/keylime ++# install TPM certificate store to /usr/share/keylime/ ++# tmpfiles.d will copy this to /var/lib/keylime/tpm_cert_store ++TPM_CERT_STORE_SRC="$BASEDIR/../tpm_cert_store" ++if [[ ! -d "$TPM_CERT_STORE_SRC" ]]; then ++ echo "Missing TPM certificate store: $TPM_CERT_STORE_SRC" 1>&2 ++ exit 1 ++fi ++ ++mkdir -p /usr/share/keylime ++cp -a "$TPM_CERT_STORE_SRC" /usr/share/keylime/ || exit 1 + +-chown keylime:keylime -R /etc/keylime +-chown keylime:keylime -R /var/lib/keylime +-chown keylime:keylime -R /var/log/keylime +-chown keylime:keylime -R /var/run/keylime ++# install tmpfiles.d config for keylime directories ++mkdir -p /usr/lib/tmpfiles.d ++cp "$BASEDIR/keylime-tmpfiles.conf" /usr/lib/tmpfiles.d/keylime.conf ++ ++# apply the tmpfiles.d config immediately to create directories with correct ownership ++if command -v systemd-tmpfiles >/dev/null 2>&1; then ++ systemd-tmpfiles --create keylime.conf ++else ++ echo "Warning: systemd-tmpfiles not found, creating directories manually" ++ # Create essential directories as fallback for non-systemd systems ++ mkdir -p /var/run/keylime /var/lib/keylime \ ++ /etc/keylime/ca.conf.d \ ++ /etc/keylime/logging.conf.d \ ++ /etc/keylime/verifier.conf.d \ ++ /etc/keylime/registrar.conf.d \ ++ /etc/keylime/tenant.conf.d \ ++ /etc/keylime/agent.conf.d ++ chown keylime:keylime /var/run/keylime /var/lib/keylime ++ chmod 700 /var/run/keylime /var/lib/keylime ++ # Mirror tmpfiles.d Z/z semantics: recursively set ownership and ++ # file permissions under /etc/keylime, then fix directories to 0500. ++ chown -R keylime:keylime /etc/keylime ++ find /etc/keylime -type f -exec chmod 400 {} \; ++ find /etc/keylime -type d -exec chmod 500 {} \; ++ # Copy TPM cert store from /usr/share to /var/lib only if the ++ # target does not exist yet (mirrors the tmpfiles.d C directive). ++ # This preserves operator-added EK certificates. ++ if [ -d /usr/share/keylime/tpm_cert_store ] && [ ! -d /var/lib/keylime/tpm_cert_store ]; then ++ cp -r /usr/share/keylime/tpm_cert_store /var/lib/keylime/ ++ chown -R keylime:keylime /var/lib/keylime/tpm_cert_store ++ find /var/lib/keylime/tpm_cert_store -type f -exec chmod 400 {} \; ++ chmod 500 /var/lib/keylime/tpm_cert_store ++ fi ++fi + + # set permissions + chmod 664 /etc/systemd/system/keylime_registrar.service + chmod 664 /etc/systemd/system/keylime_verifier.service + +-chmod 700 /var/run/keylime +- + # enable at startup + systemctl enable keylime_registrar.service + systemctl enable keylime_verifier.service +diff --git a/services/keylime-tmpfiles.conf b/services/keylime-tmpfiles.conf +new file mode 100644 +index 000000000..f3c0b43d6 +--- /dev/null ++++ b/services/keylime-tmpfiles.conf +@@ -0,0 +1,40 @@ ++d /run/keylime 0700 keylime keylime - ++ ++d /var/lib/keylime 0700 keylime keylime - ++ ++d /etc/keylime 0500 keylime keylime - ++d /etc/keylime/ca.conf.d 0500 keylime keylime - ++d /etc/keylime/logging.conf.d 0500 keylime keylime - ++d /etc/keylime/verifier.conf.d 0500 keylime keylime - ++d /etc/keylime/registrar.conf.d 0500 keylime keylime - ++d /etc/keylime/tenant.conf.d 0500 keylime keylime - ++d /etc/keylime/agent.conf.d 0500 keylime keylime - ++ ++# TPM certificate store. ++# Copy the cert store from /usr/share/keylime/tpm_cert_store ++# to /var/lib/keylime/tpm_cert_store. ++# Files inside /var/lib/keylime/tpm_cert_store/ have ++# 0400 permission and are owned by keylime/keylime, ++# while /var/lib/keylime/tpm_cert_store/ itself has ++# permission 0500, also owned by keylime/keylime. ++C /var/lib/keylime/tpm_cert_store 0500 keylime keylime - /usr/share/keylime/tpm_cert_store ++Z /var/lib/keylime/tpm_cert_store 0400 keylime keylime - ++z /var/lib/keylime/tpm_cert_store 0500 keylime keylime - ++# Finally, /var/lib/keylime itself has 0700 permission, ++# and is owned by keylime/keylime. ++z /var/lib/keylime 0700 keylime keylime - ++ ++# Keylime configuration in /etc/keylime has permission 0400 ++# owned by keylime/keylime, while snippet directories and ++# the actual /etc/keylime directory have permission 0500, ++# also owned by keylime/keylime. ++Z /etc/keylime 0400 keylime keylime - ++# Now fix the directories: ++z /etc/keylime/ca.conf.d 0500 keylime keylime - ++z /etc/keylime/logging.conf.d 0500 keylime keylime - ++z /etc/keylime/verifier.conf.d 0500 keylime keylime - ++z /etc/keylime/registrar.conf.d 0500 keylime keylime - ++z /etc/keylime/tenant.conf.d 0500 keylime keylime - ++z /etc/keylime/agent.conf.d 0500 keylime keylime - ++# And finally, /etc/keylime itself. ++z /etc/keylime 0500 keylime keylime - diff --git a/keylime.spec b/keylime.spec index 30e364c..3c5aaa6 100644 --- a/keylime.spec +++ b/keylime.spec @@ -1,5 +1,5 @@ %global srcname keylime -%global policy_version 43.1.1 +%global policy_version 43.2.1 # Package is actually noarch, but it has an optional dependency that is # arch-specific. @@ -9,7 +9,7 @@ Name: keylime Version: 7.14.1 -Release: 4%{?dist} +Release: 5%{?dist} Summary: Open source TPM software for Bootstrapping and Maintaining Trust URL: https://github.com/keylime/keylime @@ -39,6 +39,21 @@ Patch: 0012-fix-mem-leak-remove-unbounded-functools.cache-from-l.patch # Backport https://github.com/keylime/keylime/pulls/1874 Patch: 0013-fix-verifier-race-condition-on-agent-delete.patch +# RHEL-151493 - verifier graceful shutdown. +# Backport: +# - https://github.com/keylime/keylime/pull/1809 +# - https://github.com/keylime/keylime/pull/1868 +# - https://github.com/keylime/keylime/pull/1855 +# - https://github.com/keylime/keylime/pull/1869 +# - https://github.com/keylime/keylime/pull/1883 +# - https://github.com/keylime/keylime/pull/1886 +Patch: 0014-push-attestation-documentation.patch +Patch: 0015-remove-enable-authentication-config-option.patch +Patch: 0016-docs-push-attestation-config-tables.patch +Patch: 0017-verifier-graceful-shutdown.patch +Patch: 0018-ignore-sigterm-sigint-manager-parent-processes.patch +Patch: 0019-move-socket-var-run.patch + # Main program: Apache-2.0 # Icons: MIT License: Apache-2.0 AND MIT @@ -59,6 +74,7 @@ BuildRequires: python3-tornado BuildRequires: python3-sqlalchemy BuildRequires: python3-lark BuildRequires: python3-psutil +BuildRequires: python3-pytest BuildRequires: python3-pyyaml BuildRequires: python3-jsonschema BuildRequires: python3-setuptools @@ -316,7 +332,7 @@ export KEYLIME_CA_CONFIG="${CONF_TEMP_DIR}/ca.conf" export KEYLIME_LOGGING_CONFIG="${CONF_TEMP_DIR}/logging.conf" # Run the tests. -%{python3} -m unittest +%pytest # Cleanup. [ "${CONF_TEMP_DIR}" ] && rm -rf "${CONF_TEMP_DIR}" diff --git a/sources b/sources index 9e0d7e5..95edd36 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ SHA512 (v7.14.1.tar.gz) = d94cd1e25ec31e43fea05d0c404dd25c05b6b28435db2f8ca34546f6ff8bfd5da12d2dcd3b5cf4772c44688ae8968468dc2470da23596714e7615dbf6dfbe841 -SHA512 (keylime-selinux-43.1.1.tar.gz) = 1b0a850f68321e4872bb01eb99f5b000f1b5cbe3f1882e781bff519868ba5f4ca50f25b328b3662895969833add5c30d00e2a2361d2d626e7cffd95c0243ec39 +SHA512 (keylime-selinux-43.2.1.tar.gz) = 8cb8b032819d3b87e1dceaa7094385b4468c0d6be1e5dfc6d8b6758e6281def5255120ff34d71b5d4bc7fe9b9e960f1a98011e5bf7149df5704d0bbf6afbfad3