diff --git a/.gitignore b/.gitignore index a9504a0..dc40567 100644 --- a/.gitignore +++ b/.gitignore @@ -37,3 +37,4 @@ /keylime-selinux-42.1.2.tar.gz /v7.14.1.tar.gz /keylime-selinux-43.1.1.tar.gz +/keylime-selinux-43.2.1.tar.gz diff --git a/0014-push-attestation-documentation.patch b/0014-push-attestation-documentation.patch new file mode 100644 index 0000000..ae9bf4b --- /dev/null +++ b/0014-push-attestation-documentation.patch @@ -0,0 +1,1910 @@ +From 077762aa335de0cf99e190bd5afb5b77f5403a89 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Tue, 17 Feb 2026 16:43:04 +0100 +Subject: [PATCH] Document agent-driven (push) attestation + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + docs/assets/push-model-architecture.svg | 86 ++++ + docs/assets/push-model-sequence.svg | 122 +++++ + docs/conf.py | 1 + + docs/design.rst | 1 + + docs/design/overview.rst | 11 +- + docs/design/push_model.rst | 226 +++++++++ + docs/index.rst | 1 + + docs/installation.rst | 11 + + docs/man/keylime_push_model_agent.8.rst | 226 +++++++++ + docs/man/keylime_verifier.8.rst | 3 +- + docs/rest_apis.rst | 30 ++ + docs/rest_apis/3_0/3_0.rst | 21 + + docs/rest_apis/3_0/verifier.rst | 608 ++++++++++++++++++++++++ + docs/user_guide.rst | 1 + + docs/user_guide/configuration.rst | 7 + + docs/user_guide/push_model.rst | 370 ++++++++++++++ + 16 files changed, 1721 insertions(+), 4 deletions(-) + create mode 100644 docs/assets/push-model-architecture.svg + create mode 100644 docs/assets/push-model-sequence.svg + create mode 100644 docs/design/push_model.rst + create mode 100644 docs/man/keylime_push_model_agent.8.rst + create mode 100644 docs/rest_apis/3_0/3_0.rst + create mode 100644 docs/rest_apis/3_0/verifier.rst + create mode 100644 docs/user_guide/push_model.rst + +diff --git a/docs/assets/push-model-architecture.svg b/docs/assets/push-model-architecture.svg +new file mode 100644 +index 000000000..82a5672f4 +--- /dev/null ++++ b/docs/assets/push-model-architecture.svg +@@ -0,0 +1,86 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ Push-Model Architecture ++ ++ ++ Pull Model (traditional) ++ ++ ++ ++ Agent ++ (server, port 9002) ++ ++ ++ ++ Registrar ++ ++ ++ ++ Verifier ++ ++ ++ ++ register ++ ++ ++ ++ poll quotes ++ ++ ++ ++ ++ ++ Push Model (new) ++ ++ ++ ++ Agent ++ (client, no ports) ++ ++ ++ ++ Registrar ++ ++ ++ ++ Verifier ++ ++ ++ ++ register ++ ++ ++ ++ push evidence ++ ++ ++ ++ Protocol Flow (Push Model) ++ ++ 1. Agent registers with Registrar (same as pull model) ++ 2. Agent authenticates with Verifier via PoP (POST /v3/sessions) ++ 3. Agent sends capabilities to Verifier (POST /v3/agents/{agent_id}/attestations) — receives challenge nonce ++ 4. Agent sends evidence to Verifier (PATCH /v3/agents/{agent_id}/attestations/latest) — receives 202 Accepted ++ 5. Agent waits for configured interval, then repeats from step 3 ++ +diff --git a/docs/assets/push-model-sequence.svg b/docs/assets/push-model-sequence.svg +new file mode 100644 +index 000000000..d9affe1c9 +--- /dev/null ++++ b/docs/assets/push-model-sequence.svg +@@ -0,0 +1,122 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ Push-Model Agent State Machine ++ ++ ++ ++ Unregistered ++ Initial state ++ ++ ++ ++ Registered ++ Ready for attestation ++ ++ ++ ++ Negotiating ++ Phase 1: capabilities ++ ++ ++ ++ Attesting ++ Phase 2: evidence ++ ++ ++ ++ Reg. Failed ++ Will retry ++ ++ ++ ++ Attest. Failed ++ Will retry ++ ++ ++ ++ registration OK ++ ++ ++ ++ failed ++ ++ ++ ++ retry ++ ++ ++ ++ start negotiation ++ ++ ++ ++ 201 Created ++ ++ ++ ++ error ++ ++ ++ ++ 202 Accepted ++ (wait interval) ++ ++ ++ ++ rejected ++ ++ ++ ++ retry ++ ++ ++ ++ Legend ++ ++ ++ Success transition ++ ++ ++ Error transition ++ ++ ++ Retry (with exponential backoff) ++ ++ Phase 1: Agent POSTs capabilities, receives challenge nonce ++ Phase 2: Agent PATCHes evidence, receives 202 Accepted ++ The Negotiating/Attesting cycle repeats continuously ++ +diff --git a/docs/conf.py b/docs/conf.py +index 5543afa86..00d9735de 100644 +--- a/docs/conf.py ++++ b/docs/conf.py +@@ -154,6 +154,7 @@ + ("man/keylime_registrar.8", "keylime_registrar", "Keylime registrar service", [author], 8), + ("man/keylime_verifier.8", "keylime_verifier", "Keylime verifier service", [author], 8), + ("man/keylime_agent.8", "keylime_agent", "Keylime agent service", [author], 8), ++ ("man/keylime_push_model_agent.8", "keylime_push_model_agent", "Keylime push-model agent service", [author], 8), + ] + + +diff --git a/docs/design.rst b/docs/design.rst +index 522ade113..dd72fd4e7 100644 +--- a/docs/design.rst ++++ b/docs/design.rst +@@ -7,6 +7,7 @@ Design of Keylime + :caption: Contents: + + design/overview.rst ++ design/push_model.rst + design/security.rst + + +diff --git a/docs/design/overview.rst b/docs/design/overview.rst +index 4c7b52227..985cbc94b 100644 +--- a/docs/design/overview.rst ++++ b/docs/design/overview.rst +@@ -51,9 +51,14 @@ Verifier + The verifier implements the actual attestation of an agent and sends revocation messages if an agent leaves the trusted + state. + +-Once an agent is registered for attestation (using the tenant or the API directly) the verifier continuously pulls +-the required attestation data from the agent. This can include: a quote over the PCRs, the PCR values, NK public key, +-IMA log and UEFI event log. After that the quote is validated additional validation of the data can be configured. ++In the default **pull model**, once an agent is registered for attestation (using the tenant or the API directly) ++the verifier continuously pulls the required attestation data from the agent. This can include: a quote over the ++PCRs, the PCR values, NK public key, IMA log and UEFI event log. After that the quote is validated additional ++validation of the data can be configured. ++ ++Keylime also supports a **push model** where the agent initiates connections to the verifier and proactively ++submits attestation evidence. This is useful for environments where the verifier cannot directly reach the ++agent (e.g. behind firewalls or NAT). See :doc:`push_model` for details. + + Static PCR values + """"""""""""""""" +diff --git a/docs/design/push_model.rst b/docs/design/push_model.rst +new file mode 100644 +index 000000000..29f9061e0 +--- /dev/null ++++ b/docs/design/push_model.rst +@@ -0,0 +1,226 @@ ++======================== ++Push-Model Attestation ++======================== ++ ++.. warning:: ++ Push-model attestation is currently experimental. The feature is functional ++ but the API and configuration options may change in future releases. ++ Please report issues at https://github.com/keylime/keylime/issues/?q=label:push-mode ++ ++Introduction ++------------ ++ ++Traditional Keylime attestation uses a **pull model** where the verifier continuously ++polls agents for attestation data. The agent acts as a server and the verifier initiates ++connections to it. This model requires that the verifier can reach the agent over the ++network. ++ ++The **push model** reverses this communication direction: the agent initiates connections ++to the verifier and proactively sends attestation data. The verifier never connects to ++the agent. This makes push-model attestation suitable for environments where the ++verifier cannot directly reach the agent, such as: ++ ++* **Edge and IoT devices** behind firewalls or NAT ++* **Hybrid cloud environments** with restricted network policies ++* **Air-gapped networks** where inbound connections to agents are not permitted ++* **Dynamic environments** where agent IP addresses change frequently ++ ++In push mode, the agent is a separate binary (``keylime-push-model-agent``) that ++implements the push attestation protocol using API version 3.0. ++ ++Architectural Overview ++---------------------- ++ ++In pull-model attestation, the verifier runs a polling loop that periodically contacts ++each registered agent to request a TPM quote and associated evidence. The agent exposes ++an HTTPS server that responds to these requests. ++ ++In push-model attestation, this relationship is inverted: ++ ++* The **agent initiates** all connections to the verifier ++* The agent does **not expose any HTTP endpoints** (no listening ports) ++* The verifier accepts incoming attestation data from agents ++* Verification is performed **asynchronously** after evidence is received ++* An **event-driven timeout** system replaces the polling loop for monitoring agent ++ liveness ++ ++The registrar interaction is unchanged: in both models, the agent registers itself ++with the registrar during startup. ++ ++.. figure:: ../assets/push-model-architecture.svg ++ :width: 600 ++ :align: center ++ :alt: Diagram showing the push-model architecture where the agent initiates ++ connections to both the registrar and the verifier, contrasted with the pull ++ model where the verifier connects to the agent. ++ ++ **Figure 1:** Push-Model Architecture ++ ++The Two-Phase Attestation Protocol ++----------------------------------- ++ ++Push-model attestation uses a two-phase protocol for each attestation cycle. ++ ++Phase 1: Capabilities Negotiation ++"""""""""""""""""""""""""""""""""" ++ ++The agent begins an attestation cycle by sending its capabilities to the verifier. ++This tells the verifier what types of evidence the agent can produce and what ++cryptographic algorithms it supports. ++ ++1. The agent sends a ``POST /v3/agents/{agent_id}/attestations`` request to the ++ verifier containing its supported evidence types (TPM quote parameters, IMA log ++ capabilities, UEFI log capabilities) and the public attestation key (AK). ++ ++2. The verifier creates an attestation resource, selects cryptographic parameters ++ (signature scheme, hash algorithm, PCRs to quote), generates a random challenge ++ nonce, and returns a ``201 Created`` response with: ++ ++ * The challenge nonce for TPM quote generation ++ * The chosen cryptographic parameters ++ * The evidence types requested ++ * A deadline (``challenges_expire_at``) by which evidence must be submitted ++ ++Phase 2: Evidence Submission ++""""""""""""""""""""""""""""" ++ ++The agent collects the requested evidence and submits it to the verifier. ++ ++1. The agent generates a TPM quote using the challenge nonce from Phase 1, ++ collects IMA and/or UEFI event logs as requested, and sends a ++ ``PATCH /v3/agents/{agent_id}/attestations/latest`` request with the evidence. ++ ++2. The verifier returns a ``202 Accepted`` response immediately. The evidence is ++ then verified asynchronously in a background worker process. ++ ++3. If verification succeeds, the attestation is marked as ``pass``. If it fails, ++ the attestation is marked as ``fail`` with a failure reason ++ (``broken_evidence_chain`` or ``policy_violation``). ++ ++4. The response includes a ``seconds_to_next_attestation`` value in the ``meta`` ++ field, indicating when the agent should start its next attestation cycle. ++ ++After a configurable interval, the agent begins a new cycle from Phase 1. ++ ++Agent State Machine ++""""""""""""""""""" ++ ++The push-model agent operates as a state machine with the following states: ++ ++.. figure:: ../assets/push-model-sequence.svg ++ :width: 600 ++ :align: center ++ :alt: Sequence diagram showing the push-model agent state machine transitions ++ from Unregistered through Registered, Negotiating, and Attesting states. ++ ++ **Figure 2:** Push-Model Agent State Machine ++ ++* **Unregistered**: Initial state. The agent registers with the registrar. ++* **Registered**: Registration succeeded. The agent begins negotiation with the ++ verifier. ++* **Negotiating**: The agent sends capabilities to the verifier (Phase 1) and waits ++ for the challenge response. ++* **Attesting**: The agent generates and sends evidence to the verifier (Phase 2). ++ On success, the agent waits for the configured interval and transitions back to ++ Negotiating. ++* **RegistrationFailed**: Registration with the registrar failed. The agent waits ++ and retries. ++* **AttestationFailed**: An attestation attempt failed (network error or verifier ++ rejection). The agent waits and retries from Negotiating. ++ ++The agent uses exponential backoff when retrying failed operations. ++ ++Authentication ++-------------- ++ ++Push-model attestation uses **Proof of Possession (PoP)** authentication instead of ++the mTLS client certificates used in pull mode. This is necessary because the agent ++acts as a client (not a server) and does not have certificates signed by the verifier's ++trusted CA. ++ ++The PoP authentication flow: ++ ++1. The agent creates a session by sending ``POST /v3/sessions`` with its agent ID ++ and supported authentication methods. ++2. The verifier responds with a challenge nonce. ++3. The agent proves possession of its AK by signing the challenge using the TPM ++ (``TPM2_Certify``) and sends the result via ``PATCH /v3/sessions/{session_id}``. ++4. If the signature is valid, the verifier issues a bearer token. ++5. The agent includes this token in the ``Authorization`` header of all subsequent ++ requests. ++6. Tokens have a configurable expiration time and can be refreshed. ++ ++The TLS connection uses **server verification only**: the agent verifies the verifier's ++server certificate but does not present a client certificate. The agent needs the ++verifier's CA certificate for this verification. ++ ++For full details on the authorization framework, including the separation between ++agent and admin authentication, see :doc:`../user_guide/authentication`. ++ ++Timeout Monitoring ++------------------ ++ ++In pull mode, the verifier detects unresponsive agents through its polling loop. In ++push mode, an event-driven timeout system serves this purpose. ++ ++The verifier monitors push-mode agents as follows: ++ ++1. When the verifier receives an attestation from an agent, it schedules a timeout ++ for that agent. The timeout duration is ``quote_interval * 5`` seconds (where ++ ``quote_interval`` is the verifier's configured quote interval). ++ ++2. If the agent does not submit a new attestation before the timeout fires, the ++ verifier sets the agent's ``accept_attestations`` flag to ``False``. ++ ++3. Once ``accept_attestations`` is ``False``, the verifier rejects new attestation ++ requests from that agent with a ``403 Forbidden`` response. ++ ++4. The agent can recover by re-registering or by administrator intervention ++ (reactivation). ++ ++Comparison with Pull Model ++--------------------------- ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 35 35 ++ ++ * - Aspect ++ - Pull Model ++ - Push Model ++ * - Connection direction ++ - Verifier connects to agent ++ - Agent connects to verifier ++ * - Agent binary ++ - ``keylime_agent`` ++ - ``keylime_push_model_agent`` ++ * - Agent network requirements ++ - Must expose HTTP port (default 9002) ++ - No listening ports required ++ * - Firewall requirements ++ - Inbound to agent from verifier ++ - Outbound from agent to verifier ++ * - Authentication method ++ - mTLS (agent as server) ++ - PoP bearer tokens (agent as client) ++ * - API version ++ - v2.x ++ - v3.0 ++ * - Verification trigger ++ - Verifier polls on ``quote_interval`` ++ - Agent pushes on ``attestation_interval_seconds`` ++ * - Liveness detection ++ - Polling loop state machine ++ - Event-driven timeout (``quote_interval * 5``) ++ * - Verifier configuration ++ - ``mode = pull`` (default) ++ - ``mode = push`` ++ * - Suitable for ++ - Controlled networks, data centers ++ - Edge, IoT, NAT, firewalled environments ++ * - Maturity ++ - Stable ++ - Experimental ++ ++For deployment and configuration instructions, see :doc:`../user_guide/push_model`. ++For the v3.0 API reference, see :doc:`../rest_apis/3_0/3_0`. +diff --git a/docs/index.rst b/docs/index.rst +index 8234217fd..fd5f08bed 100644 +--- a/docs/index.rst ++++ b/docs/index.rst +@@ -43,6 +43,7 @@ what the goals of Keylime are and how they are implemented. + man/keylime_verifier.8 + man/keylime_registrar.8 + man/keylime_agent.8 ++ man/keylime_push_model_agent.8 + man/keylime_policy.1 + + Indices and tables +diff --git a/docs/installation.rst b/docs/installation.rst +index 21d35a793..b96574137 100644 +--- a/docs/installation.rst ++++ b/docs/installation.rst +@@ -62,6 +62,17 @@ Rust agent + + Installation instructions can be found in the `README.md `_ for the Rust agent. + ++Push-model agent ++~~~~~~~~~~~~~~~~ ++.. note:: ++ The push-model agent (``keylime-push-model-agent``) is a separate binary from ++ the standard Rust agent. It implements the push attestation protocol where the ++ agent initiates connections to the verifier. This feature is currently experimental. ++ ++ Installation instructions are the same as for the Rust agent. The push-model ++ agent binary is built from the same repository. For configuration and deployment ++ details, see the :doc:`user_guide/push_model` user guide. ++ + Keylime Bash installer + ---------------------- + +diff --git a/docs/man/keylime_push_model_agent.8.rst b/docs/man/keylime_push_model_agent.8.rst +new file mode 100644 +index 000000000..b033db801 +--- /dev/null ++++ b/docs/man/keylime_push_model_agent.8.rst +@@ -0,0 +1,226 @@ ++========================== ++keylime_push_model_agent ++========================== ++ ++------------------------------------------------------------ ++Keylime push-model agent for TPM-based remote attestation ++------------------------------------------------------------ ++ ++:Manual section: 8 ++:Author: Keylime Developers ++:Date: February 2026 ++ ++SYNOPSIS ++======== ++ ++**keylime_push_model_agent** [*OPTIONS*] ++ ++(Most operations require root privileges, use with sudo) ++ ++DESCRIPTION ++=========== ++ ++The push-model agent is a long-running service that runs on systems to be attested. ++Unlike the standard Keylime agent which acts as a server and waits for the verifier ++to poll it, the push-model agent initiates connections to the verifier and proactively ++submits attestation evidence. ++ ++The agent registers with the registrar, authenticates with the verifier using Proof of ++Possession (PoP), and performs periodic attestation cycles consisting of capabilities ++negotiation and evidence submission. ++ ++This agent uses API version 3.0 and requires the verifier to be configured in push ++mode (``mode = push``). ++ ++OPTIONS ++======= ++ ++**--verifier-url** *URL* ++ URL of the verifier (must use HTTPS). Default: ``https://localhost:8881`` ++ ++**--registrar-url** *URL* ++ URL of the registrar. Default: ``http://127.0.0.1:8888`` ++ ++**--agent-identifier** *ID* ++ Agent UUID. Overrides the ``uuid`` configuration option. ++ ++**--attestation-interval-seconds** *SECONDS* ++ Interval between attestation cycles. Default: ``60`` ++ ++**--ca-certificate** *PATH* ++ CA certificate file for verifying the verifier's TLS certificate. Overrides ++ ``verifier_tls_ca_cert``. ++ ++**--api-version** *VERSION* ++ API version to use. Default: ``v3.0`` ++ ++**--timeout** *MILLISECONDS* ++ HTTP request timeout. Default: ``5000`` ++ ++**--insecure** ++ Accept invalid TLS certificates. For testing only. ++ ++**--avoid-tpm** ++ Use a mock TPM instead of hardware TPM. For testing only. ++ ++**--json-file** *FILE* ++ JSON file for payload data. ++ ++**--attestation-index** *INDEX* ++ Attestation index value. Default: ``1`` ++ ++**--session-index** *INDEX* ++ Session index value. Default: ``1`` ++ ++**--message-type** *TYPE* ++ Message type (Attestation, EvidenceHandling, Session). Default: ``Attestation`` ++ ++**--method** *METHOD* ++ HTTP method. Default: ``POST`` ++ ++CONFIGURATION ++============= ++ ++Primary configuration is read from ``/etc/keylime/agent.conf`` (TOML format). ++All options are under the ``[agent]`` section. Command-line arguments override ++configuration file values. ++ ++Drop-in overrides: files in ``/etc/keylime/agent.conf.d/`` are applied in ++lexicographic order. ++ ++Push-model specific options: ++ ++**verifier_url** ++ URL of the verifier. Must use HTTPS. Default: ``https://localhost:8881`` ++ ++**verifier_tls_ca_cert** ++ Path to CA certificate for verifying the verifier's TLS certificate. ++ Relative paths are resolved from ``keylime_dir``. Default: ``cv_ca/cacert.crt`` ++ ++**attestation_interval_seconds** ++ Interval in seconds between attestation cycles. Default: ``60`` ++ ++**api_versions** ++ API versions to use. Default: ``3.0`` ++ ++**certification_keys_server_identifier** ++ Server identifier for attestation key certification. Default: ``ak`` ++ ++**uefi_logs_evidence_version** ++ UEFI logs evidence format version. Default: ``2.1`` ++ ++**exponential_backoff_initial_delay** ++ Initial retry delay in milliseconds. Default: ``10000`` ++ ++**exponential_backoff_max_retries** ++ Maximum number of retry attempts. Default: ``5`` ++ ++**exponential_backoff_max_delay** ++ Maximum retry delay in milliseconds. Default: ``300000`` ++ ++Shared options (same as standard agent): ++ ++**uuid** ++ Agent identifier. Default: auto-generated UUID. ++ ++**registrar_ip**, **registrar_port** ++ Registrar endpoint. Default: ``127.0.0.1:8890`` ++ ++**registrar_tls_enabled** ++ Enable TLS for registrar communication. Default: ``false`` ++ ++**registrar_tls_ca_cert** ++ CA certificate for registrar TLS verification. Default: ``cv_ca/cacert.crt`` ++ ++**tpm_hash_alg**, **tpm_encryption_alg**, **tpm_signing_alg** ++ TPM algorithms. Defaults: ``sha256``, ``rsa``, ``rsassa`` ++ ++**keylime_dir** ++ Working directory. Default: ``/var/lib/keylime`` ++ ++**run_as** ++ User:group to drop privileges to. Default: ``keylime:tss`` ++ ++**enable_iak_idevid** ++ Enable IAK/IDevID usage. Default: ``false`` ++ ++ENVIRONMENT ++=========== ++ ++**KEYLIME_AGENT_CONFIG** ++ Path to agent.conf (highest priority) ++ ++**KEYLIME_DIR** ++ Working directory (default: ``/var/lib/keylime``) ++ ++**RUST_LOG** ++ Log level configuration. Default in systemd service: ++ ``keylime_push_model_agent=info,keylime=info`` ++ ++All configuration options can be overridden via environment variables in the form ++``KEYLIME_AGENT_`` (e.g. ``KEYLIME_AGENT_VERIFIER_URL``). ++ ++FILES ++===== ++ ++``/etc/keylime/agent.conf`` ++ TOML format configuration file (shared with standard agent) ++ ++``/etc/keylime/agent.conf.d/`` ++ Drop-in configuration snippets ++ ++``/var/lib/keylime/cv_ca/cacert.crt`` ++ Default CA certificate for verifier TLS verification ++ ++``/var/lib/keylime/agent_data.json`` ++ Persisted agent TPM data ++ ++RUNTIME ++======= ++ ++Start directly: ++ ++.. code-block:: bash ++ ++ sudo keylime_push_model_agent --verifier-url https://verifier.example.com:8881 ++ ++Start as a systemd service: ++ ++.. code-block:: bash ++ ++ sudo systemctl enable --now keylime_push_model_agent ++ ++Check service status: ++ ++.. code-block:: bash ++ ++ sudo systemctl status keylime_push_model_agent ++ sudo journalctl -u keylime_push_model_agent -f ++ ++PREREQUISITES ++============= ++ ++- Root privileges (use sudo) ++- TPM 2.0 available (verify with ``tpm2_pcrread``) ++- Verifier configured with ``mode = push`` ++- Network connectivity from agent to verifier and registrar ++- Verifier CA certificate available on agent machine ++ ++NOTES ++===== ++ ++- This service conflicts with ``keylime_agent.service``. Only one agent type can ++ run on a machine at a time. ++- The push-model agent does not expose any listening ports. ++- Push-model attestation is currently experimental. ++- Authentication uses PoP bearer tokens, not mTLS client certificates. ++ ++SEE ALSO ++======== ++ ++**keylime_agent**\(8), **keylime_verifier**\(8), **keylime_registrar**\(8), **keylime_tenant**\(1) ++ ++BUGS ++==== ++ ++Report bugs at https://github.com/keylime/rust-keylime/issues +diff --git a/docs/man/keylime_verifier.8.rst b/docs/man/keylime_verifier.8.rst +index fd7cfb941..5303a5f06 100644 +--- a/docs/man/keylime_verifier.8.rst ++++ b/docs/man/keylime_verifier.8.rst +@@ -32,6 +32,7 @@ Primary configuration is read from ``/etc/keylime/verifier.conf`` (or an overrid + All options are under the ``[verifier]`` section. + + Essentials: ++- **mode**: Attestation mode (``pull`` or ``push``). Default: ``pull`` + - **uuid**: Unique identifier for this verifier instance + - **ip**, **port**: Bind address and HTTP port + - **registrar_ip**, **registrar_port**: Registrar endpoint +@@ -108,7 +109,7 @@ NOTES + SEE ALSO + ======== + +-**keylime_registrar**\(8), **keylime_tenant**\(1), **keylime_agent**\(8) ++**keylime_registrar**\(8), **keylime_tenant**\(1), **keylime_agent**\(8), **keylime_push_model_agent**\(8) + + BUGS + ==== +diff --git a/docs/rest_apis.rst b/docs/rest_apis.rst +index edfe8be1c..aba64c338 100644 +--- a/docs/rest_apis.rst ++++ b/docs/rest_apis.rst +@@ -14,10 +14,40 @@ Check the :ref:`Changelog` section for the differences between versions + rest_apis/2_3/2_3.rst + rest_apis/2_4/2_4.rst + rest_apis/2_5/2_5.rst ++ rest_apis/3_0/3_0.rst + + Changelog + _________ + ++Changes from v2.5 to v3.0 ++~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++API version 3.0 introduces push-model attestation. Unlike previous versions where ++the verifier polls agents, in v3.0 agents initiate connections and submit ++attestation evidence to the verifier. The v3.0 endpoints are served by the ++verifier only; the push-model agent does not expose HTTP endpoints. ++ ++* Added `POST /v3/agents/{agent_id}/attestations` endpoint to the verifier: ++ * Allows agents to submit attestation capabilities (Phase 1 of push protocol) ++ * Returns challenge nonce for TPM quote generation ++* Added `PATCH /v3/agents/{agent_id}/attestations/latest` endpoint: ++ * Allows agents to submit attestation evidence (Phase 2 of push protocol) ++ * Returns `202 Accepted` for asynchronous verification ++* Added `PATCH /v3/agents/{agent_id}/attestations/{index}` endpoint: ++ * Submit evidence for a specific attestation by index ++* Added `GET /v3/agents/{agent_id}/attestations` endpoint: ++ * Lists all attestation records for an agent ++* Added `GET /v3/agents/{agent_id}/attestations/latest` endpoint: ++ * Returns the most recent attestation for an agent, including verification status ++* Added `GET /v3/agents/{agent_id}/attestations/{index}` endpoint: ++ * Returns a specific attestation by its index ++* Added `POST /v3/sessions` endpoint: ++ * Creates a PoP authentication session and returns a challenge nonce for the agent ++* Added `PATCH /v3/sessions/{session_id}` endpoint: ++ * Completes PoP authentication by submitting the TPM-signed challenge response ++* Introduced PoP (Proof of Possession) bearer token authentication for ++ agent-to-verifier communication ++ + Changes from v2.4 to v2.5 + ~~~~~~~~~~~~~~~~~~~~~~~~~ + API version 2.5 was first implemented in Keylime 7.14.0. +diff --git a/docs/rest_apis/3_0/3_0.rst b/docs/rest_apis/3_0/3_0.rst +new file mode 100644 +index 000000000..d6cac705d +--- /dev/null ++++ b/docs/rest_apis/3_0/3_0.rst +@@ -0,0 +1,21 @@ ++RESTful API for Keylime (v3.0) ++------------------------------ ++ ++API version 3.0 introduces push-model attestation, where agents initiate ++connections to the verifier and proactively submit attestation evidence. ++ ++Unlike previous API versions where the agent exposed HTTP endpoints for the ++verifier to poll, in v3.0 the agent acts as a client. The v3.0 endpoints are ++served by the **verifier only**. The push-model agent does not expose an API. ++ ++For a conceptual overview of push-model attestation, see ++:doc:`../../design/push_model`. ++ ++.. warning:: ++ Push-model attestation is currently experimental. The API may change in ++ future releases. ++ ++.. toctree:: ++ :maxdepth: 2 ++ ++ verifier.rst +diff --git a/docs/rest_apis/3_0/verifier.rst b/docs/rest_apis/3_0/verifier.rst +new file mode 100644 +index 000000000..3476cc7a3 +--- /dev/null ++++ b/docs/rest_apis/3_0/verifier.rst +@@ -0,0 +1,608 @@ ++Verifier ++~~~~~~~~ ++ ++Push-Model Attestation Endpoints ++""""""""""""""""""""""""""""""""" ++ ++These endpoints implement the two-phase push-model attestation protocol. Agents ++use these endpoints to submit attestation capabilities and evidence. Administrators ++can use the GET endpoints to view attestation results. ++ ++For details on authentication requirements, see :doc:`../../user_guide/authentication`. ++ ++.. http:post:: /v3/agents/{agent_id}/attestations ++ ++ Phase 1: Submit attestation capabilities and receive a challenge. ++ ++ The agent sends its supported evidence types, cryptographic algorithms, and ++ attestation key. The verifier selects parameters and returns a challenge nonce ++ for TPM quote generation. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ ++ **Authentication**: PoP bearer token (agent-only) ++ ++ **Example request**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "attributes": { ++ "evidence_supported": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "capabilities": { ++ "signature_schemes": ["rsassa"], ++ "hash_algorithms": ["sha256", "sha384", "sha512"], ++ "available_subjects": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], ++ "certification_keys": [ ++ { ++ "key_class": "asymmetric", ++ "key_algorithm": "rsa", ++ "key_size": 2048, ++ "server_identifier": "ak", ++ "allowable_signature_schemes": ["rsassa"], ++ "allowable_hash_algorithms": ["sha256", "sha384", "sha512"], ++ "public": "" ++ } ++ ], ++ "component_version": "2.0", ++ "evidence_version": "1.0" ++ } ++ }, ++ { ++ "evidence_class": "log", ++ "evidence_type": "ima_log", ++ "capabilities": { ++ "entry_count": 1024, ++ "supports_partial_access": true, ++ "appendable": true, ++ "formats": ["text/plain"], ++ "component_version": "1.0", ++ "evidence_version": "1.0" ++ } ++ } ++ ], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ } ++ } ++ } ++ } ++ ++ **Example response** (201 Created): ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "id": "0", ++ "attributes": { ++ "stage": "awaiting_evidence", ++ "evidence_requested": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "chosen_parameters": { ++ "challenge": "", ++ "signature_scheme": "rsassa", ++ "hash_algorithm": "sha256", ++ "selected_subjects": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], ++ "certification_key": { ++ "key_class": "asymmetric", ++ "key_algorithm": "rsa", ++ "key_size": 2048, ++ "server_identifier": "ak" ++ } ++ } ++ }, ++ { ++ "evidence_class": "log", ++ "evidence_type": "ima_log", ++ "chosen_parameters": { ++ "starting_offset": 0, ++ "entry_count": 1024, ++ "format": "text/plain" ++ } ++ } ++ ], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ }, ++ "capabilities_received_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:35:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/0" ++ } ++ } ++ } ++ ++ :json string data.id: Attestation index (auto-incremented per agent) ++ :>json string data.attributes.stage: ``"awaiting_evidence"`` ++ :>json array data.attributes.evidence_requested: Evidence the verifier wants the agent to provide ++ :>json string evidence_requested[].chosen_parameters.challenge: Base64-encoded challenge nonce for TPM quote ++ :>json string data.attributes.capabilities_received_at: ISO 8601 timestamp ++ :>json string data.attributes.challenges_expire_at: Deadline for evidence submission ++ :>json string data.links.self: URL to this attestation resource ++ ++ :statuscode 201: Attestation created, challenge issued ++ :statuscode 400: Invalid request body ++ :statuscode 403: Attestations disabled for this agent (timeout or previous failure) ++ :statuscode 404: Agent not found ++ :statuscode 409: Concurrent attestation creation attempt ++ :statuscode 422: Invalid capabilities data ++ :statuscode 429: Rate limited (attestation interval not elapsed). Includes ``Retry-After`` header ++ :statuscode 503: Previous attestation still being verified. Includes ``Retry-After`` header ++ ++ ++.. http:patch:: /v3/agents/{agent_id}/attestations/latest ++ ++ Phase 2: Submit attestation evidence for the latest attestation. ++ ++ The agent sends the TPM quote, PCR values, and event logs generated using the ++ challenge nonce from Phase 1. The verifier accepts the evidence and verifies it ++ asynchronously. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ ++ **Authentication**: PoP bearer token (agent-only) ++ ++ **Example request**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "attributes": { ++ "evidence_collected": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "data": { ++ "subject_data": { ++ "0": "", ++ "1": "" ++ }, ++ "message": "", ++ "signature": "" ++ } ++ }, ++ { ++ "evidence_class": "log", ++ "evidence_type": "ima_log", ++ "data": { ++ "entry_count": 512, ++ "entries": "" ++ } ++ } ++ ] ++ } ++ } ++ } ++ ++ **Example response** (202 Accepted): ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "id": "0", ++ "attributes": { ++ "stage": "evaluating_evidence", ++ "evidence": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "capabilities": {}, ++ "chosen_parameters": {}, ++ "data": { ++ "message": "", ++ "signature": "", ++ "subject_data": {} ++ } ++ } ++ ], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ }, ++ "capabilities_received_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:35:00.123456Z", ++ "evidence_received_at": "2024-01-15T10:31:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/0" ++ } ++ }, ++ "meta": { ++ "seconds_to_next_attestation": 45 ++ } ++ } ++ ++ :json string data.attributes.stage: ``"evaluating_evidence"`` (verification in progress) ++ :>json array data.attributes.evidence: Evidence items with capabilities, parameters, and data ++ :>json string data.attributes.evidence_received_at: ISO 8601 timestamp when evidence was received ++ :>json int meta.seconds_to_next_attestation: Suggested wait before starting the next attestation cycle ++ ++ :statuscode 202: Evidence accepted, verification in progress ++ :statuscode 400: Invalid evidence format ++ :statuscode 403: Evidence already submitted, attestation is not the latest, or challenges expired ++ :statuscode 404: Agent or attestation not found ++ :statuscode 410: Attestation no longer exists ++ :statuscode 503: No available worker processes. Includes ``Retry-After`` header ++ ++ ++.. http:patch:: /v3/agents/{agent_id}/attestations/{index} ++ ++ Submit attestation evidence for a specific attestation by index. ++ ++ Behaves identically to ``PATCH /v3/agents/{agent_id}/attestations/latest`` ++ but targets a specific attestation index. Evidence can only be submitted for ++ the latest attestation. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ :param index: Attestation index ++ :type index: integer ++ ++ **Authentication**: PoP bearer token (agent-only) ++ ++ :statuscode 202: Evidence accepted ++ :statuscode 403: Not the latest attestation, evidence already submitted, or challenges expired ++ :statuscode 404: Agent or attestation not found ++ ++ ++.. http:get:: /v3/agents/{agent_id}/attestations ++ ++ List all attestations for an agent. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ ++ **Authentication**: mTLS (admin) or PoP bearer token (own agent only) ++ ++ **Example response**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": [ ++ { ++ "type": "attestation", ++ "id": "1", ++ "attributes": { ++ "stage": "verification_complete", ++ "evaluation": "pass", ++ "evidence": [], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ }, ++ "capabilities_received_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:35:00.123456Z", ++ "evidence_received_at": "2024-01-15T10:31:00.123456Z", ++ "verification_completed_at": "2024-01-15T10:32:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/1" ++ } ++ }, ++ { ++ "type": "attestation", ++ "id": "0", ++ "attributes": { ++ "stage": "verification_complete", ++ "evaluation": "pass", ++ "evidence": [], ++ "system_info": {}, ++ "capabilities_received_at": "2024-01-15T10:25:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:30:00.123456Z", ++ "evidence_received_at": "2024-01-15T10:26:00.123456Z", ++ "verification_completed_at": "2024-01-15T10:27:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/0" ++ } ++ } ++ ] ++ } ++ ++ :>json array data: List of attestation resources ++ :>json string data[].id: Attestation index ++ :>json string data[].attributes.stage: ``"awaiting_evidence"``, ``"evaluating_evidence"``, or ``"verification_complete"`` ++ :>json string data[].attributes.evaluation: ``"pending"``, ``"pass"``, or ``"fail"`` ++ :>json string data[].attributes.failure_reason: ``"broken_evidence_chain"`` or ``"policy_violation"`` (only when evaluation is ``"fail"``) ++ ++ :statuscode 200: Success ++ :statuscode 404: Agent not found ++ ++ ++.. http:get:: /v3/agents/{agent_id}/attestations/latest ++ ++ Get the latest attestation for an agent. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ ++ **Authentication**: mTLS (admin) or PoP bearer token (own agent only) ++ ++ **Example response**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "attestation", ++ "id": "1", ++ "attributes": { ++ "stage": "verification_complete", ++ "evaluation": "pass", ++ "failure_reason": null, ++ "evidence": [ ++ { ++ "evidence_class": "certification", ++ "evidence_type": "tpm_quote", ++ "capabilities": {}, ++ "chosen_parameters": {}, ++ "data": { ++ "message": "", ++ "signature": "", ++ "subject_data": {} ++ } ++ } ++ ], ++ "system_info": { ++ "boot_time": "2024-01-15T10:30:00Z" ++ }, ++ "capabilities_received_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:35:00.123456Z", ++ "evidence_received_at": "2024-01-15T10:31:00.123456Z", ++ "verification_completed_at": "2024-01-15T10:32:00.123456Z" ++ }, ++ "links": { ++ "self": "/v3/agents/{agent_id}/attestations/1" ++ } ++ } ++ } ++ ++ :>json string data.attributes.stage: Current stage of the attestation ++ :>json string data.attributes.evaluation: ``"pending"``, ``"pass"``, or ``"fail"`` ++ :>json string data.attributes.failure_reason: ``null``, ``"broken_evidence_chain"``, or ``"policy_violation"`` ++ :>json array data.attributes.evidence: Evidence items with full data ++ :>json string data.attributes.capabilities_received_at: When capabilities were received ++ :>json string data.attributes.challenges_expire_at: When challenges expire ++ :>json string data.attributes.evidence_received_at: When evidence was received (``null`` if still awaiting) ++ :>json string data.attributes.verification_completed_at: When verification completed (``null`` if still in progress) ++ ++ :statuscode 200: Success ++ :statuscode 404: Agent not found or no attestations exist ++ ++ ++.. http:get:: /v3/agents/{agent_id}/attestations/{index} ++ ++ Get a specific attestation by index. ++ ++ :param agent_id: UUID of the agent ++ :type agent_id: string ++ :param index: Attestation index ++ :type index: integer ++ ++ **Authentication**: mTLS (admin) or PoP bearer token (own agent only) ++ ++ Response format is identical to ``GET /v3/agents/{agent_id}/attestations/latest``. ++ ++ :statuscode 200: Success ++ :statuscode 404: Agent or attestation not found ++ ++ ++Session Endpoints ++""""""""""""""""" ++ ++These endpoints manage PoP (Proof of Possession) authentication sessions for ++push-model agents. Sessions are required before an agent can submit attestations. ++ ++.. http:post:: /v3/sessions ++ ++ Create a new authentication session. ++ ++ The verifier generates a challenge nonce that the agent must sign using its ++ TPM attestation key to prove possession. ++ ++ **Authentication**: None (public endpoint) ++ ++ **Example request**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "session", ++ "attributes": { ++ "agent_id": "d432fbb3-d2f1-4a97-9ef7-75bd81c00000", ++ "authentication_supported": [ ++ { ++ "authentication_class": "pop", ++ "authentication_type": "tpm_pop" ++ } ++ ] ++ } ++ } ++ } ++ ++ **Example response** (200 OK): ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "session", ++ "id": "550e8400-e29b-41d4-a716-446655440000", ++ "attributes": { ++ "agent_id": "d432fbb3-d2f1-4a97-9ef7-75bd81c00000", ++ "authentication_requested": [ ++ { ++ "authentication_class": "pop", ++ "authentication_type": "tpm_pop", ++ "chosen_parameters": { ++ "challenge": "" ++ } ++ } ++ ], ++ "created_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:31:00.123456Z" ++ } ++ } ++ } ++ ++ :json string data.id: Session UUID ++ :>json string data.attributes.challenges_expire_at: Deadline for submitting the PoP response ++ ++ :statuscode 200: Session created ++ :statuscode 400: Missing or invalid agent_id ++ :statuscode 429: Rate limited. Includes ``Retry-After`` header ++ ++ ++.. http:patch:: /v3/sessions/{session_id} ++ ++ Submit Proof of Possession response to complete authentication. ++ ++ The agent signs the challenge nonce from the session creation response using ++ ``TPM2_Certify`` and submits the result. If valid, the verifier issues a bearer ++ token for subsequent API calls. ++ ++ :param session_id: UUID of the session ++ :type session_id: string ++ ++ **Authentication**: None (public endpoint; validates PoP internally) ++ ++ **Example request**: ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "session", ++ "attributes": { ++ "agent_id": "d432fbb3-d2f1-4a97-9ef7-75bd81c00000", ++ "authentication_provided": [ ++ { ++ "authentication_class": "pop", ++ "authentication_type": "tpm_pop", ++ "data": { ++ "message": "", ++ "signature": "" ++ } ++ } ++ ] ++ } ++ } ++ } ++ ++ **Example response** (200 OK, authentication passed): ++ ++ .. sourcecode:: json ++ ++ { ++ "data": { ++ "type": "session", ++ "id": "550e8400-e29b-41d4-a716-446655440000", ++ "attributes": { ++ "agent_id": "d432fbb3-d2f1-4a97-9ef7-75bd81c00000", ++ "evaluation": "pass", ++ "token": "550e8400-e29b-41d4-a716-446655440000.", ++ "authentication": [ ++ { ++ "authentication_class": "pop", ++ "authentication_type": "tpm_pop", ++ "chosen_parameters": { ++ "challenge": "" ++ }, ++ "data": { ++ "message": "", ++ "signature": "" ++ } ++ } ++ ], ++ "created_at": "2024-01-15T10:30:00.123456Z", ++ "challenges_expire_at": "2024-01-15T10:31:00.123456Z", ++ "response_received_at": "2024-01-15T10:30:30.123456Z", ++ "token_expires_at": "2024-01-15T11:30:00.123456Z" ++ } ++ } ++ } ++ ++ :>json string data.attributes.evaluation: ``"pass"`` or ``"fail"`` ++ :>json string data.attributes.token: Bearer token for subsequent requests (only on ``"pass"``) ++ :>json string data.attributes.token_expires_at: Token expiration time (only on ``"pass"``) ++ ++ :statuscode 200: PoP response processed (check ``evaluation`` field for result) ++ :statuscode 400: Missing or invalid request body ++ :statuscode 401: PoP verification failed ++ :statuscode 404: Session not found ++ ++ ++Attestation Stages and Evaluations ++""""""""""""""""""""""""""""""""""" ++ ++Each attestation progresses through the following stages: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 25 75 ++ ++ * - Stage ++ - Description ++ * - ``awaiting_evidence`` ++ - Capabilities received, challenge issued, waiting for evidence ++ * - ``evaluating_evidence`` ++ - Evidence received, verification in progress ++ * - ``verification_complete`` ++ - Verification finished, see ``evaluation`` for result ++ ++The ``evaluation`` field indicates the verification result: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 20 80 ++ ++ * - Evaluation ++ - Description ++ * - ``pending`` ++ - Verification not yet complete ++ * - ``pass`` ++ - Evidence verified successfully ++ * - ``fail`` ++ - Evidence verification failed (see ``failure_reason``) ++ ++When an attestation fails, the ``failure_reason`` field provides the cause: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 70 ++ ++ * - Failure Reason ++ - Description ++ * - ``broken_evidence_chain`` ++ - TPM quote signature invalid or evidence integrity check failed ++ * - ``policy_violation`` ++ - Evidence is valid but violates the configured attestation policy +diff --git a/docs/user_guide.rst b/docs/user_guide.rst +index 9bd44c512..ed052c175 100644 +--- a/docs/user_guide.rst ++++ b/docs/user_guide.rst +@@ -8,6 +8,7 @@ User Guide + + user_guide/authentication.rst + user_guide/configuration.rst ++ user_guide/push_model.rst + user_guide/runtime_ima.rst + user_guide/user_selected_pcr_monitoring.rst + user_guide/use_measured_boot.rst +diff --git a/docs/user_guide/configuration.rst b/docs/user_guide/configuration.rst +index 6d8f35c88..2e50757df 100644 +--- a/docs/user_guide/configuration.rst ++++ b/docs/user_guide/configuration.rst +@@ -40,6 +40,13 @@ The following components can be configured: + - ``/etc/keylime/logging.conf`` + - ``/etc/keylime/logging.conf.d`` + ++.. note:: ++ For push-model attestation, the verifier must be configured with ``mode = push`` ++ in the ``[verifier]`` section. The push-model agent uses the same ++ ``/etc/keylime/agent.conf`` file (TOML format) but with additional options such ++ as ``verifier_url`` and ``attestation_interval_seconds``. See ++ :doc:`push_model` for details. ++ + The next sections contain details of the configuration files + + Configuration file processing order +diff --git a/docs/user_guide/push_model.rst b/docs/user_guide/push_model.rst +new file mode 100644 +index 000000000..773d2aaaa +--- /dev/null ++++ b/docs/user_guide/push_model.rst +@@ -0,0 +1,370 @@ ++======================== ++Push-Model Attestation ++======================== ++ ++.. warning:: ++ Push-model attestation is currently experimental. The feature is functional ++ but the API and configuration options may change in future releases. ++ ++Introduction ++------------ ++ ++In the default pull model, the Keylime verifier continuously polls agents for ++attestation data. This requires the verifier to reach the agent over the network. ++ ++The push model reverses this: the agent initiates connections to the verifier and ++proactively sends attestation evidence. This is useful when the verifier cannot ++directly reach the agent, for example behind firewalls, NAT, or in edge/IoT ++deployments. ++ ++For a detailed description of how push-model attestation works, see ++:doc:`../design/push_model`. ++ ++Prerequisites ++------------- ++ ++* Keylime verifier and registrar installed and running ++* The ``keylime-push-model-agent`` binary installed on the target machine ++* A TPM 2.0 device (hardware or emulated for development) ++* Network connectivity **from the agent to the verifier and registrar** (the ++ reverse is not required) ++* The verifier's CA certificate available on the agent machine ++ ++Configuring the Verifier for Push Mode ++-------------------------------------- ++ ++Set the verifier's attestation mode to ``push`` in ``/etc/keylime/verifier.conf``: ++ ++.. code-block:: ini ++ ++ [verifier] ++ mode = push ++ ++Or use a configuration snippet in ``/etc/keylime/verifier.conf.d/``: ++ ++.. code-block:: ini ++ ++ # /etc/keylime/verifier.conf.d/001-push-mode.conf ++ [verifier] ++ mode = push ++ ++The verifier can also be configured via environment variable: ++ ++.. code-block:: bash ++ ++ export KEYLIME_VERIFIER_MODE=push ++ ++.. note:: ++ The ``mode`` setting affects all agents on this verifier. A verifier in push ++ mode expects agents to submit attestation data; it does not poll agents. A ++ single verifier cannot operate in both modes simultaneously. ++ ++Additional verifier settings relevant to push mode: ++ ++* ``quote_interval``: Used to calculate the agent timeout threshold ++ (``quote_interval * 5``). Default: ``2`` seconds. ++* ``challenge_lifetime``: How long a challenge nonce remains valid for evidence ++ submission. ++* ``verification_timeout``: Maximum time allowed for evidence verification. ++ ++After changing the configuration, restart the verifier: ++ ++.. code-block:: bash ++ ++ sudo systemctl restart keylime_verifier ++ ++Configuring the Push-Model Agent ++--------------------------------- ++ ++The push-model agent is a separate binary from the standard Keylime agent. It is ++installed as ``keylime_push_model_agent`` (or ``keylime-push-model-agent``). ++ ++The agent is configured through ``/etc/keylime/agent.conf`` (TOML format), command-line ++arguments, or environment variables. ++ ++Key Configuration Options ++""""""""""""""""""""""""" ++ ++The following options are specific to or particularly important for push-model ++operation: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 15 55 ++ ++ * - Option ++ - Default ++ - Description ++ * - ``verifier_url`` ++ - ``https://localhost:8881`` ++ - URL of the verifier. Must use HTTPS. ++ * - ``verifier_tls_ca_cert`` ++ - ``cv_ca/cacert.crt`` ++ - Path to the CA certificate for verifying the verifier's TLS certificate. ++ Relative paths are resolved from ``keylime_dir``. ++ * - ``attestation_interval_seconds`` ++ - ``60`` ++ - Interval in seconds between attestation cycles. ++ * - ``registrar_ip`` ++ - ``127.0.0.1`` ++ - IP address of the registrar. ++ * - ``registrar_port`` ++ - ``8890`` ++ - Port of the registrar. ++ * - ``registrar_tls_enabled`` ++ - ``false`` ++ - Enable TLS for registrar communication. ++ * - ``registrar_tls_ca_cert`` ++ - ``cv_ca/cacert.crt`` ++ - CA certificate for registrar TLS verification. ++ * - ``uuid`` ++ - (generated) ++ - Agent UUID. Can be a specific UUID, ``generate`` (random), or ++ ``hash_ek`` (derived from the EK). ++ * - ``api_versions`` ++ - ``3.0`` ++ - API versions supported by the agent. Defaults to ``3.0`` for push model. ++ * - ``tpm_hash_alg`` ++ - ``sha256`` ++ - TPM hash algorithm (``sha256``, ``sha384``, ``sha512``). ++ * - ``tpm_signing_alg`` ++ - ``rsassa`` ++ - TPM signing algorithm (``rsassa``, ``ecdsa``). ++ * - ``keylime_dir`` ++ - ``/var/lib/keylime`` ++ - Working directory for certificates and data files. ++ ++Example Minimal Configuration ++"""""""""""""""""""""""""""""" ++ ++.. code-block:: toml ++ ++ # /etc/keylime/agent.conf (push-model agent) ++ [agent] ++ uuid = "d432fbb3-d2f1-4a97-9ef7-75bd81c00000" ++ verifier_url = "https://verifier.example.com:8881" ++ verifier_tls_ca_cert = "/var/lib/keylime/cv_ca/cacert.crt" ++ attestation_interval_seconds = 60 ++ registrar_ip = "registrar.example.com" ++ registrar_port = 8890 ++ tpm_hash_alg = "sha256" ++ tpm_signing_alg = "rsassa" ++ ++Command-Line Arguments ++"""""""""""""""""""""" ++ ++The push-model agent accepts the following command-line arguments, which override ++configuration file values: ++ ++.. code-block:: text ++ ++ --verifier-url Verifier URL (required) ++ --registrar-url Registrar URL (default: http://127.0.0.1:8888) ++ --agent-identifier Agent UUID ++ --attestation-interval-seconds Attestation interval (default: 60) ++ --ca-certificate CA certificate for TLS verification ++ --api-version API version (default: v3.0) ++ --timeout Request timeout in milliseconds (default: 5000) ++ --insecure Accept invalid TLS certificates (testing only) ++ --avoid-tpm Use mock TPM (testing only) ++ ++Exponential Backoff ++""""""""""""""""""" ++ ++When the agent encounters errors (network failures, verifier unavailable), it uses ++exponential backoff for retries: ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 35 15 50 ++ ++ * - Option ++ - Default ++ - Description ++ * - ``exponential_backoff_initial_delay`` ++ - ``10000`` ++ - Initial delay in milliseconds (10 seconds) ++ * - ``exponential_backoff_max_retries`` ++ - ``5`` ++ - Maximum number of retry attempts ++ * - ``exponential_backoff_max_delay`` ++ - ``300000`` ++ - Maximum delay in milliseconds (5 minutes) ++ ++Systemd Service Management ++--------------------------- ++ ++The push-model agent is managed as a systemd service: ++ ++.. code-block:: bash ++ ++ # Enable the service to start on boot ++ sudo systemctl enable keylime_push_model_agent ++ ++ # Start the service ++ sudo systemctl start keylime_push_model_agent ++ ++ # Check service status ++ sudo systemctl status keylime_push_model_agent ++ ++ # View logs ++ sudo journalctl -u keylime_push_model_agent -f ++ ++.. warning:: ++ The push-model agent service (``keylime_push_model_agent.service``) conflicts ++ with the standard pull-model agent service (``keylime_agent.service``). Only one ++ can run at a time on the same machine. Starting one will stop the other. ++ ++The service is configured to restart on failure with a 120-second delay between ++restart attempts. ++ ++Enrolling an Agent for Push-Model Attestation ++--------------------------------------------- ++ ++Use the ``keylime_tenant`` tool with the ``--push-model`` flag to enroll an agent ++for push-model attestation: ++ ++.. code-block:: bash ++ ++ # Add an agent in push mode ++ sudo keylime_tenant -c add --push-model -u ++ ++ # Add with a runtime IMA policy ++ sudo keylime_tenant -c add --push-model -u \ ++ --runtime-policy-name ++ ++ # Add with a measured boot policy ++ sudo keylime_tenant -c add --push-model -u \ ++ --mb-policy-name ++ ++.. note:: ++ In push mode, the ``-t`` / ``--targethost`` option is not required because the ++ verifier does not need to connect to the agent. The agent's IP and port are set ++ to ``None`` in the verifier's database. ++ ++To check the status of a push-model agent: ++ ++.. code-block:: bash ++ ++ sudo keylime_tenant -c cvstatus -u ++ ++To remove an agent: ++ ++.. code-block:: bash ++ ++ sudo keylime_tenant -c delete -u ++ ++TLS Configuration for Push Model ++--------------------------------- ++ ++The push model uses TLS differently from the pull model: ++ ++**Agent-to-verifier connection:** ++ ++* The agent connects to the verifier over HTTPS ++* The agent verifies the verifier's server certificate using the configured CA ++ certificate (``verifier_tls_ca_cert``) ++* The agent does **not** present a client certificate (no mTLS) ++* Authentication is done via PoP bearer tokens (see :doc:`authentication`) ++ ++**Agent-to-registrar connection:** ++ ++* The agent connects to the registrar to register itself ++* TLS can be enabled with ``registrar_tls_enabled = true`` ++* The registrar CA certificate is configured with ``registrar_tls_ca_cert`` ++ ++**Firewall considerations:** ++ ++* No inbound ports need to be opened on the agent machine ++* The agent needs outbound access to the verifier port (default: 8881) ++* The agent needs outbound access to the registrar port (default: 8890) ++ ++To set up TLS, copy the verifier's CA certificate to the agent machine: ++ ++.. code-block:: bash ++ ++ # On the verifier machine, the CA cert is typically at: ++ # /var/lib/keylime/cv_ca/cacert.crt ++ ++ # Copy to the agent machine: ++ scp verifier:/var/lib/keylime/cv_ca/cacert.crt /var/lib/keylime/cv_ca/cacert.crt ++ ++Verifying the Deployment ++------------------------- ++ ++After starting both the verifier (in push mode) and the push-model agent: ++ ++1. **Check agent registration** in the registrar: ++ ++ .. code-block:: bash ++ ++ sudo keylime_tenant -c regstatus -u ++ ++2. **Check attestation status** in the verifier: ++ ++ .. code-block:: bash ++ ++ sudo keylime_tenant -c cvstatus -u ++ ++3. **View verifier logs** for attestation activity: ++ ++ .. code-block:: bash ++ ++ sudo journalctl -u keylime_verifier -f ++ ++ Successful attestations will show evidence receipt and verification completion ++ messages. ++ ++4. **View agent logs** for attestation cycles: ++ ++ .. code-block:: bash ++ ++ sudo journalctl -u keylime_push_model_agent -f ++ ++ The agent logs will show transitions through the state machine: ++ registration, negotiation, and attestation phases. ++ ++Troubleshooting ++---------------- ++ ++Agent cannot connect to verifier ++""""""""""""""""""""""""""""""""" ++ ++* Verify the ``verifier_url`` is correct and uses HTTPS ++* Check that the verifier is running and listening on the configured port ++* Verify network connectivity from the agent to the verifier ++* Check that the CA certificate (``verifier_tls_ca_cert``) matches the verifier's ++ server certificate ++ ++Agent shows timeout failures ++""""""""""""""""""""""""""""" ++ ++The verifier marks an agent as failed if it does not receive an attestation within ++``quote_interval * 5`` seconds. ++ ++* Verify the ``attestation_interval_seconds`` on the agent is less than the ++ verifier's timeout threshold ++* Check for network instability between agent and verifier ++* Review agent logs for errors during attestation cycles ++ ++PoP authentication errors ++"""""""""""""""""""""""""" ++ ++* Ensure the agent is properly registered in the registrar (the AK must be known) ++* Check that the TPM is accessible and functioning ++* Verify the agent UUID matches between agent configuration and verifier enrollment ++ ++Agent state stuck in Negotiating ++""""""""""""""""""""""""""""""""" ++ ++* The verifier may be rejecting capabilities. Check verifier logs for error details ++* Ensure the TPM algorithms configured on the agent are accepted by the verifier ++* Check that the ``api_versions`` setting includes ``3.0`` ++ ++Service fails to start ++"""""""""""""""""""""" ++ ++* Check that the pull-model agent service is not running ++ (``systemctl status keylime_agent``) ++* Verify the configuration file syntax (TOML format) ++* Check file permissions on TLS certificates and TPM device diff --git a/0015-remove-enable-authentication-config-option.patch b/0015-remove-enable-authentication-config-option.patch new file mode 100644 index 0000000..d40247b --- /dev/null +++ b/0015-remove-enable-authentication-config-option.patch @@ -0,0 +1,46 @@ +From 416d3906fe4071132d5cdc494f828ce3a909f336 Mon Sep 17 00:00:00 2001 +From: Sergio Arroutbi +Date: Fri, 20 Mar 2026 10:57:23 +0100 +Subject: [PATCH] Remove enable_authentication agent config option + +The Rust agent does not parse the enable_authentication +configuration option and always performs authentication. +Remove the option from both the agent.j2 template and the +2.5 mapping.json to avoid exposing a non-functional setting +to users. + +Signed-off-by: Sergio Arroutbi +--- + templates/2.5/agent.j2 | 7 ------- + templates/2.5/mapping.json | 3 +-- + 2 files changed, 1 insertion(+), 9 deletions(-) + +diff --git a/templates/2.5/agent.j2 b/templates/2.5/agent.j2 +index d5eec733d..5e9a1a706 100644 +--- a/templates/2.5/agent.j2 ++++ b/templates/2.5/agent.j2 +@@ -274,10 +274,3 @@ ima_ml_path = "{{ agent.ima_ml_path }}" + # If set as a relative path, it will be considered from the root path "/". + # If set as an absolute path, it will use it without changes + measuredboot_ml_path = "{{ agent.measuredboot_ml_path }}" +- +-# Enable challenge-response authentication for push model attestation. +-# When enabled, the agent will authenticate with the verifier using TPM-based +-# proof of possession before sending attestation evidence. +-# This option is specific to the push attestation model. +-# The default is False (disabled). +-enable_authentication = {{ agent.enable_authentication }} +diff --git a/templates/2.5/mapping.json b/templates/2.5/mapping.json +index 522aa4ce9..4b198e768 100644 +--- a/templates/2.5/mapping.json ++++ b/templates/2.5/mapping.json +@@ -13,8 +13,7 @@ + "ima_ml_count_file": "/tmp/ima_ml_count", + "uefi_logs_evidence_version": "1.0", + "tls_accept_invalid_certs": "false", +- "tls_accept_invalid_hostnames": "false", +- "enable_authentication": "true" ++ "tls_accept_invalid_hostnames": "false" + } + }, + "verifier": { diff --git a/0016-docs-push-attestation-config-tables.patch b/0016-docs-push-attestation-config-tables.patch new file mode 100644 index 0000000..0cd863e --- /dev/null +++ b/0016-docs-push-attestation-config-tables.patch @@ -0,0 +1,1164 @@ +From 4a36422caa40bf914b1b9f7ed86efc802e183ef1 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Tue, 24 Mar 2026 18:04:55 +0100 +Subject: [PATCH 1/3] templates: Remove unused ima_ml_count_file option + +This option was defined in the 2.5 config template and mapping but +never used. Remove it to avoid confusion. + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + templates/2.5/agent.j2 | 3 --- + templates/2.5/mapping.json | 1 - + 2 files changed, 4 deletions(-) + +diff --git a/templates/2.5/agent.j2 b/templates/2.5/agent.j2 +index 5e9a1a706..f56010e87 100644 +--- a/templates/2.5/agent.j2 ++++ b/templates/2.5/agent.j2 +@@ -47,9 +47,6 @@ verifier_url = "{{ agent_verifier_url }}" + # Server identifier for certification keys + certification_keys_server_identifier = "{{ agent_certification_keys_server_identifier }}" + +-# File to store the IMA measurement list count +-ima_ml_count_file = "{{ agent_ima_ml_count_file }}" +- + # Evidence version for UEFI logs + uefi_logs_evidence_version = "{{ agent_uefi_logs_evidence_version }}" + +diff --git a/templates/2.5/mapping.json b/templates/2.5/mapping.json +index 4b198e768..04f89e77a 100644 +--- a/templates/2.5/mapping.json ++++ b/templates/2.5/mapping.json +@@ -10,7 +10,6 @@ + "exponential_backoff_initial_delay": "10000", + "exponential_backoff_max_delay": "360000", + "certification_keys_server_identifier": "ak", +- "ima_ml_count_file": "/tmp/ima_ml_count", + "uefi_logs_evidence_version": "1.0", + "tls_accept_invalid_certs": "false", + "tls_accept_invalid_hostnames": "false" + +From baf182680ffd60ab0b4ef8bf42bba3d02208b392 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Fri, 27 Mar 2026 10:55:35 +0100 +Subject: [PATCH 2/3] templates: Sync agent config options with + keylime-agent.conf + +Add missing agent options to the 2.5 upgrade templates and mapping +that are present in keylime-agent.conf and used in the agent code: + +Common options: +- keylime_dir: working directory path +- payload_key: payload encryption private key +- payload_key_password: password for payload key +- revocation_actions_dir: path to pre-installed revocation scripts +- allow_payload_revocation_actions: control payload revocation actions + +Push model options: +- verifier_tls_ca_cert: CA cert for verifier TLS verification +- registrar_tls_port: TLS port for registrar communication +- registrar_tls_enabled: enable TLS with registrar +- registrar_tls_ca_cert: CA cert for registrar TLS verification +- registrar_api_versions: API version negotiation with registrar + +Fix default values to match keylime-agent.conf: +- exponential_backoff_max_delay: 360000 -> 300000 +- uefi_logs_evidence_version: "1.0" -> "2.1" + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + templates/2.5/agent.j2 | 40 ++++++++++++++++++++++++++++++++++++++ + templates/2.5/mapping.json | 14 +++++++++++-- + 2 files changed, 52 insertions(+), 2 deletions(-) + +diff --git a/templates/2.5/agent.j2 b/templates/2.5/agent.j2 +index f56010e87..9f85f8411 100644 +--- a/templates/2.5/agent.j2 ++++ b/templates/2.5/agent.j2 +@@ -44,6 +44,11 @@ agent_data_path = "{{ agent_data_path }}" + # Verifier URL + verifier_url = "{{ agent_verifier_url }}" + ++# Verifier TLS CA certificate (Push Model specific). ++# Used to verify the verifier's server certificate. ++# If set as "default", the "cv_ca/cacert.crt" value relative to keylime_dir is used. ++verifier_tls_ca_cert = "{{ agent.verifier_tls_ca_cert }}" ++ + # Server identifier for certification keys + certification_keys_server_identifier = "{{ agent_certification_keys_server_identifier }}" + +@@ -54,11 +59,31 @@ uefi_logs_evidence_version = "{{ agent_uefi_logs_evidence_version }}" + registrar_ip = "{{ agent.registrar_ip }}" + registrar_port = {{ agent.registrar_port }} + ++# The TLS port of the registrar server (Push Model specific). ++# Used when registrar_tls_enabled is set to true. ++registrar_tls_port = {{ agent.registrar_tls_port }} ++ ++# Enable TLS communication between agent and registrar (Push Model specific). ++# When enabled, the agent uses TLS (server verification only) with the registrar. ++registrar_tls_enabled = {{ agent.registrar_tls_enabled }} ++ ++# TLS CA certificate for verifying the registrar's server certificate (Push Model specific). ++# Only used when registrar_tls_enabled is true. ++# If set as "default", the "cv_ca/cacert.crt" value relative to keylime_dir is used. ++registrar_tls_ca_cert = "{{ agent.registrar_tls_ca_cert }}" ++ ++# The API versions to use when communicating with the registrar (Push Model specific). ++# Supports "default" (all supported), "latest", or a comma-separated list. ++registrar_api_versions = "{{ agent.registrar_api_versions }}" ++ + # Enable mTLS communication between agent, verifier and tenant. + # Details on why setting it to "False" is generally considered insecure can be found + # on https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r + enable_agent_mtls = {{ agent.enable_agent_mtls }} + ++# The keylime working directory. ++keylime_dir = "{{ agent.keylime_dir }}" ++ + # Accept invalid TLS certificates (INSECURE - for testing only) + # When enabled, the agent will accept self-signed or invalid certificates + # This option is specific to the push attestation model. +@@ -100,6 +125,14 @@ server_key = "{{ agent.server_key }}" + # If left empty, the private key will not be encrypted. + server_key_password = "{{ agent.server_key_password }}" + ++# The name of the file containing the payload encryption private key. ++# If set as "default", the "payload-private.pem" value is used. ++payload_key = "{{ agent.payload_key }}" ++ ++# Set the password used to encrypt the payload private key file. ++# If left empty, the private key will not be encrypted. ++payload_key_password = "{{ agent.payload_key_password }}" ++ + # The name of the file containing the X509 certificate used as the Keylime agent + # server TLS certificate. + # This certificate must be self signed. +@@ -159,6 +192,9 @@ revocation_cert = "{{ agent.revocation_cert }}" + # action_list in the unzipped payload content. + revocation_actions = "{{ agent.revocation_actions }}" + ++# The path to the directory containing pre-installed revocation action scripts. ++revocation_actions_dir = "{{ agent.revocation_actions_dir }}" ++ + # A script to execute after unzipping the tenant payload. This is like + # cloud-init lite =) Keylime will run it with a /bin/sh environment and + # with a working directory of /var/lib/keylime/secure/unzipped. +@@ -171,6 +207,10 @@ payload_script = "{{ agent.payload_script }}" + # https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r + enable_insecure_payload = {{ agent.enable_insecure_payload }} + ++# Whether to allow running revocation actions sent as part of the payload. ++# Setting to false limits revocation actions to pre-installed ones. ++allow_payload_revocation_actions = {{ agent.allow_payload_revocation_actions }} ++ + # Maximum number of retries for exponential backoff + exponential_backoff_max_retries = {{ agent.exponential_backoff_max_retries }} + # Initial delay in milliseconds for exponential backoff +diff --git a/templates/2.5/mapping.json b/templates/2.5/mapping.json +index 04f89e77a..f3eaf8dbb 100644 +--- a/templates/2.5/mapping.json ++++ b/templates/2.5/mapping.json +@@ -4,13 +4,23 @@ + "components": { + "agent": { + "add": { ++ "keylime_dir": "/var/lib/keylime", ++ "payload_key": "default", ++ "payload_key_password": "", ++ "revocation_actions_dir": "/usr/libexec/keylime", ++ "allow_payload_revocation_actions": "true", + "agent_data_path": "/var/lib/keylime/agent_data.json", + "verifier_url": "https://localhost:8881", ++ "verifier_tls_ca_cert": "default", ++ "registrar_tls_port": "8891", ++ "registrar_tls_enabled": "false", ++ "registrar_tls_ca_cert": "default", ++ "registrar_api_versions": "default", + "exponential_backoff_max_retries": "5", + "exponential_backoff_initial_delay": "10000", +- "exponential_backoff_max_delay": "360000", ++ "exponential_backoff_max_delay": "300000", + "certification_keys_server_identifier": "ak", +- "uefi_logs_evidence_version": "1.0", ++ "uefi_logs_evidence_version": "2.1", + "tls_accept_invalid_certs": "false", + "tls_accept_invalid_hostnames": "false" + } + +From bd392633a36839dfa51f86a1568370a87b3ecd37 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Fri, 27 Mar 2026 10:57:04 +0100 +Subject: [PATCH 3/3] docs: Add tables with push-attestation configuration + options + +Add comprehensive configuration reference tables for all Keylime +components documenting option names, defaults, config versions, and +environment variable overrides. Tables are organized by component and +separated into common, pull-model, and push-model sections. + +Mark removed agent options with "(removed in 2.5)" and version range +2.0-2.4: +- measure_payload_pcr +- exponential_backoff +- retry_interval +- max_retries + +Add missing agent common options: +- keylime_dir +- payload_key +- payload_key_password +- revocation_actions_dir +- allow_payload_revocation_actions + +Add missing agent push-model options: +- attestation_interval_seconds +- verifier_tls_ca_cert +- registrar_tls_port +- registrar_tls_enabled +- registrar_tls_ca_cert +- registrar_api_versions + +Fix default values to match keylime-agent.conf: +- exponential_backoff_max_delay: 360000 -> 300000 +- uefi_logs_evidence_version: "1.0" -> "2.1" + +Use consistent formatting for default values (unquoted for INI +components, quoted strings and unquoted booleans/integers for TOML). + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + docs/user_guide/configuration.rst | 897 +++++++++++++++++++++++++++++- + 1 file changed, 893 insertions(+), 4 deletions(-) + +diff --git a/docs/user_guide/configuration.rst b/docs/user_guide/configuration.rst +index 2e50757df..aae534423 100644 +--- a/docs/user_guide/configuration.rst ++++ b/docs/user_guide/configuration.rst +@@ -117,7 +117,7 @@ The environment variables are defined as + The section can be omitted if the option to set is located in the main section + (the section named after the component). Otherwise the section is required. + +-For example, to set the ``webhook_url` option from the `[revocations]`` section in ++For example, to set the ``webhook_url`` option from the ``[revocations]`` section in + the ``verifier.conf`` file, the environment variable to set is + ``KEYLIME_VERIFIER_REVOCATIONS_WEBHOOK_URL``. + +@@ -126,7 +126,7 @@ option from the ``[verifier]`` section in the ``verifier.conf``, the environment + variable to set is ``KEYLIME_VERIFIER_SERVER_KEY`` (note that the section can be + omitted). + +-Configuraton upgrades ++Configuration upgrades + --------------------- + + When updating keylime, it is also recommended to upgrade the configuration to +@@ -183,9 +183,9 @@ configuration files are kept intact as backup and renamed with the ``.bkp`` exte + appended to the file names. + + In case the ``--output`` option is provided to the ``keylime_upgrade_config`` +-script, the configuration files are written even when they were alredy ++script, the configuration files are written even when they were already + up-to-date using the available templates. It can be seen as a way to force the +-creation of the configuration fiels, fitting the options read into the new ++creation of the configuration files, fitting the options read into the new + templates. + + Passing the ``--debug`` option to the ``keylime_upgrade_config``, the logging level +@@ -211,3 +211,892 @@ To ignore the input files and use the default value for all options, the + + Finally, to process a single mapping file, the mapping file path can be passed + via the ``--mapping`` option ++ ++Attestation Models: Pull vs Push ++--------------------------------- ++ ++Keylime supports two attestation models that determine how the verifier obtains ++attestation evidence from agents: ++ ++Pull Model (Traditional) ++~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++In the pull model, the verifier actively polls agents at regular intervals to ++retrieve attestation evidence. This is the default and traditional mode of ++operation. ++ ++**Use Cases:** ++ ++* Traditional deployments where the verifier can directly connect to agents ++* Environments with stable network connectivity ++* When you need fine-grained control over attestation frequency ++ ++Push Model (Agent-Driven) ++~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++In the push model, agents periodically push their attestation evidence to the ++verifier. This mode is useful when the verifier cannot directly connect to ++agents (e.g., agents behind firewalls or NAT). ++ ++**Use Cases:** ++ ++* Agents deployed behind firewalls or NAT ++* Cloud or edge deployments where direct connectivity is limited ++* When agents need to control their own attestation schedule ++ ++.. note:: ++ The push model options were introduced in configuration version 2.5 and ++ require the push attestation agent. ++ ++Configuration Options Reference ++-------------------------------- ++ ++This section provides comprehensive tables of all configuration options for each ++Keylime component, including default values, environment variable overrides, and ++applicability to pull/push attestation models. ++ ++Verifier Configuration (``/etc/keylime/verifier.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++Common Options (Both Models) ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 25 12 15 48 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``2.5`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_VERSION`` ++ * - ``uuid`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_UUID`` ++ * - ``ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_IP`` ++ * - ``port`` ++ - ``8881`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_PORT`` ++ * - ``registrar_ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REGISTRAR_IP`` ++ * - ``registrar_port`` ++ - ``8891`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REGISTRAR_PORT`` ++ * - ``enable_agent_mtls`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_ENABLE_AGENT_MTLS`` ++ * - ``tls_dir`` ++ - ``generate`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TLS_DIR`` ++ * - ``server_key`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SERVER_KEY`` ++ * - ``server_key_password`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SERVER_KEY_PASSWORD`` ++ * - ``server_cert`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SERVER_CERT`` ++ * - ``trusted_client_ca`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TRUSTED_CLIENT_CA`` ++ * - ``client_key`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_CLIENT_KEY`` ++ * - ``client_key_password`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_CLIENT_KEY_PASSWORD`` ++ * - ``client_cert`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_CLIENT_CERT`` ++ * - ``trusted_server_ca`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TRUSTED_SERVER_CA`` ++ * - ``database_url`` ++ - ``sqlite`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_DATABASE_URL`` ++ * - ``database_pool_sz_ovfl`` ++ - ``5,10`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_DATABASE_POOL_SZ_OVFL`` ++ * - ``auto_migrate_db`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_AUTO_MIGRATE_DB`` ++ * - ``num_workers`` ++ - ``0`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_NUM_WORKERS`` ++ * - ``max_upload_size`` ++ - ``104857600`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MAX_UPLOAD_SIZE`` ++ * - ``measured_boot_policy_name`` ++ - ``accept-all`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MEASURED_BOOT_POLICY_NAME`` ++ * - ``measured_boot_imports`` ++ - ``[]`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MEASURED_BOOT_IMPORTS`` ++ * - ``measured_boot_evaluate`` ++ - ``once`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MEASURED_BOOT_EVALUATE`` ++ * - ``severity_labels`` ++ - ``["info", "notice", ...]`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SEVERITY_LABELS`` ++ * - ``severity_policy`` ++ - ``[{"event_id": ".*", ...}]`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SEVERITY_POLICY`` ++ * - ``ignore_tomtou_errors`` ++ - ``False`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_IGNORE_TOMTOU_ERRORS`` ++ * - ``durable_attestation_import`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_DURABLE_ATTESTATION_IMPORT`` ++ * - ``persistent_store_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_PERSISTENT_STORE_URL`` ++ * - ``transparency_log_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TRANSPARENCY_LOG_URL`` ++ * - ``time_stamp_authority_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TIME_STAMP_AUTHORITY_URL`` ++ * - ``time_stamp_authority_certs_path`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TIME_STAMP_AUTHORITY_CERTS_PATH`` ++ * - ``persistent_store_format`` ++ - ``json`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_PERSISTENT_STORE_FORMAT`` ++ * - ``persistent_store_encoding`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_PERSISTENT_STORE_ENCODING`` ++ * - ``transparency_log_sign_algo`` ++ - ``sha256`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_TRANSPARENCY_LOG_SIGN_ALGO`` ++ * - ``signed_attributes`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_SIGNED_ATTRIBUTES`` ++ * - ``require_allow_list_signatures`` ++ - ``False`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REQUIRE_ALLOW_LIST_SIGNATURES`` ++ * - ``authorization_provider`` ++ - ``simple`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_AUTHORIZATION_PROVIDER`` ++ * - ``cert_subject_alternative_names`` ++ - (empty) ++ - 2.5 ++ - ``KEYLIME_VERIFIER_CERT_SUBJECT_ALTERNATIVE_NAMES`` ++ ++Pull Model Specific Options ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 25 12 15 48 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``quote_interval`` ++ - ``2`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_QUOTE_INTERVAL`` ++ * - ``retry_interval`` ++ - ``2`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_RETRY_INTERVAL`` ++ * - ``max_retries`` ++ - ``5`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_MAX_RETRIES`` ++ * - ``exponential_backoff`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_EXPONENTIAL_BACKOFF`` ++ * - ``request_timeout`` ++ - ``60.0`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REQUEST_TIMEOUT`` ++ ++Push Model Specific Options ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 25 12 15 48 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``mode`` ++ - ``pull`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_MODE`` ++ * - ``challenge_lifetime`` ++ - ``1800`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_CHALLENGE_LIFETIME`` ++ * - ``verification_timeout`` ++ - ``0`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_VERIFICATION_TIMEOUT`` ++ * - ``session_create_rate_limit_per_ip`` ++ - ``50`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_CREATE_RATE_LIMIT_PER_IP`` ++ * - ``session_create_rate_limit_window_ip`` ++ - ``60`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_CREATE_RATE_LIMIT_WINDOW_IP`` ++ * - ``session_create_rate_limit_per_agent`` ++ - ``15`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_CREATE_RATE_LIMIT_PER_AGENT`` ++ * - ``session_create_rate_limit_window_agent`` ++ - ``60`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_CREATE_RATE_LIMIT_WINDOW_AGENT`` ++ * - ``session_lifetime`` ++ - ``180`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_SESSION_LIFETIME`` ++ * - ``extend_token_on_attestation`` ++ - ``True`` ++ - 2.5 ++ - ``KEYLIME_VERIFIER_EXTEND_TOKEN_ON_ATTESTATION`` ++ ++Revocations Section ++^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 12 15 43 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``enabled_revocation_notifications`` ++ - ``['agent']`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REVOCATIONS_ENABLED_REVOCATION_NOTIFICATIONS`` ++ * - ``zmq_ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REVOCATIONS_ZMQ_IP`` ++ * - ``zmq_port`` ++ - ``8992`` ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REVOCATIONS_ZMQ_PORT`` ++ * - ``webhook_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_VERIFIER_REVOCATIONS_WEBHOOK_URL`` ++ ++Registrar Configuration (``/etc/keylime/registrar.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 12 15 43 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``2.5`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_VERSION`` ++ * - ``ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_IP`` ++ * - ``port`` ++ - ``8890`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_PORT`` ++ * - ``tls_port`` ++ - ``8891`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TLS_PORT`` ++ * - ``tls_dir`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TLS_DIR`` ++ * - ``server_key`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_SERVER_KEY`` ++ * - ``server_key_password`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_SERVER_KEY_PASSWORD`` ++ * - ``server_cert`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_SERVER_CERT`` ++ * - ``trusted_client_ca`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TRUSTED_CLIENT_CA`` ++ * - ``database_url`` ++ - ``sqlite`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_DATABASE_URL`` ++ * - ``database_pool_sz_ovfl`` ++ - ``5,10`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_DATABASE_POOL_SZ_OVFL`` ++ * - ``auto_migrate_db`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_AUTO_MIGRATE_DB`` ++ * - ``durable_attestation_import`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_DURABLE_ATTESTATION_IMPORT`` ++ * - ``persistent_store_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_PERSISTENT_STORE_URL`` ++ * - ``transparency_log_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TRANSPARENCY_LOG_URL`` ++ * - ``time_stamp_authority_url`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TIME_STAMP_AUTHORITY_URL`` ++ * - ``time_stamp_authority_certs_path`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TIME_STAMP_AUTHORITY_CERTS_PATH`` ++ * - ``persistent_store_format`` ++ - ``json`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_PERSISTENT_STORE_FORMAT`` ++ * - ``persistent_store_encoding`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_PERSISTENT_STORE_ENCODING`` ++ * - ``transparency_log_sign_algo`` ++ - ``sha256`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_TRANSPARENCY_LOG_SIGN_ALGO`` ++ * - ``signed_attributes`` ++ - ``ek_tpm,aik_tpm,ekcert`` ++ - 2.0 ++ - ``KEYLIME_REGISTRAR_SIGNED_ATTRIBUTES`` ++ * - ``tpm_identity`` ++ - ``default`` ++ - 2.1 ++ - ``KEYLIME_REGISTRAR_TPM_IDENTITY`` ++ * - ``malformed_cert_action`` ++ - ``warn`` ++ - 2.4 ++ - ``KEYLIME_REGISTRAR_MALFORMED_CERT_ACTION`` ++ * - ``authorization_provider`` ++ - ``simple`` ++ - 2.5 ++ - ``KEYLIME_REGISTRAR_AUTHORIZATION_PROVIDER`` ++ * - ``cert_subject_alternative_names`` ++ - (empty) ++ - 2.5 ++ - ``KEYLIME_REGISTRAR_CERT_SUBJECT_ALTERNATIVE_NAMES`` ++ ++Tenant Configuration (``/etc/keylime/tenant.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 12 15 43 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``2.5`` ++ - 2.0 ++ - ``KEYLIME_TENANT_VERSION`` ++ * - ``verifier_ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_TENANT_VERIFIER_IP`` ++ * - ``verifier_port`` ++ - ``8881`` ++ - 2.0 ++ - ``KEYLIME_TENANT_VERIFIER_PORT`` ++ * - ``registrar_ip`` ++ - ``127.0.0.1`` ++ - 2.0 ++ - ``KEYLIME_TENANT_REGISTRAR_IP`` ++ * - ``registrar_port`` ++ - ``8891`` ++ - 2.0 ++ - ``KEYLIME_TENANT_REGISTRAR_PORT`` ++ * - ``tls_dir`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_TENANT_TLS_DIR`` ++ * - ``enable_agent_mtls`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_TENANT_ENABLE_AGENT_MTLS`` ++ * - ``client_key`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_TENANT_CLIENT_KEY`` ++ * - ``client_key_password`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_TENANT_CLIENT_KEY_PASSWORD`` ++ * - ``client_cert`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_TENANT_CLIENT_CERT`` ++ * - ``trusted_server_ca`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_TENANT_TRUSTED_SERVER_CA`` ++ * - ``tpm_cert_store`` ++ - ``/var/lib/keylime/tpm_cert_store`` ++ - 2.0 ++ - ``KEYLIME_TENANT_TPM_CERT_STORE`` ++ * - ``max_payload_size`` ++ - ``1048576`` ++ - 2.0 ++ - ``KEYLIME_TENANT_MAX_PAYLOAD_SIZE`` ++ * - ``accept_tpm_hash_algs`` ++ - ``['sha512', 'sha384', 'sha256']`` ++ - 2.0 ++ - ``KEYLIME_TENANT_ACCEPT_TPM_HASH_ALGS`` ++ * - ``accept_tpm_encryption_algs`` ++ - ``['ecc', 'rsa']`` ++ - 2.0 ++ - ``KEYLIME_TENANT_ACCEPT_TPM_ENCRYPTION_ALGS`` ++ * - ``accept_tpm_signing_algs`` ++ - ``['ecschnorr', 'rsassa']`` ++ - 2.0 ++ - ``KEYLIME_TENANT_ACCEPT_TPM_SIGNING_ALGS`` ++ * - ``exponential_backoff`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_TENANT_EXPONENTIAL_BACKOFF`` ++ * - ``retry_interval`` ++ - ``2`` ++ - 2.0 ++ - ``KEYLIME_TENANT_RETRY_INTERVAL`` ++ * - ``max_retries`` ++ - ``5`` ++ - 2.0 ++ - ``KEYLIME_TENANT_MAX_RETRIES`` ++ * - ``request_timeout`` ++ - ``60`` ++ - 2.0 ++ - ``KEYLIME_TENANT_REQUEST_TIMEOUT`` ++ * - ``require_ek_cert`` ++ - ``True`` ++ - 2.0 ++ - ``KEYLIME_TENANT_REQUIRE_EK_CERT`` ++ * - ``ek_check_script`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_TENANT_EK_CHECK_SCRIPT`` ++ * - ``mb_refstate`` ++ - (empty) ++ - 2.0 ++ - ``KEYLIME_TENANT_MB_REFSTATE`` ++ ++CA Configuration (``/etc/keylime/ca.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 30 15 15 40 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``2.5`` ++ - 2.0 ++ - ``KEYLIME_CA_VERSION`` ++ * - ``password`` ++ - ``default`` ++ - 2.0 ++ - ``KEYLIME_CA_PASSWORD`` ++ * - ``cert_country`` ++ - ``US`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_COUNTRY`` ++ * - ``cert_ca_name`` ++ - ``Keylime Certificate Authority`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_CA_NAME`` ++ * - ``cert_state`` ++ - ``MA`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_STATE`` ++ * - ``cert_locality`` ++ - ``Lexington`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_LOCALITY`` ++ * - ``cert_organization`` ++ - ``MITLL`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_ORGANIZATION`` ++ * - ``cert_org_unit`` ++ - ``53`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_ORG_UNIT`` ++ * - ``cert_ca_lifetime`` ++ - ``3650`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_CA_LIFETIME`` ++ * - ``cert_lifetime`` ++ - ``365`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_LIFETIME`` ++ * - ``cert_bits`` ++ - ``2048`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_BITS`` ++ * - ``cert_crl_dist`` ++ - ``http://localhost:38080/crl`` ++ - 2.0 ++ - ``KEYLIME_CA_CERT_CRL_DIST`` ++ ++Agent Configuration (``/etc/keylime/agent.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++.. warning:: ++ The Python agent is deprecated and will be removed in version 7.0.0! ++ Please migrate to the Rust-based agent from https://github.com/keylime/rust-keylime/ ++ ++Common Options (Both Models) ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 28 12 12 48 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``version`` ++ - ``"2.5"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_VERSION`` ++ * - ``api_versions`` ++ - ``"default"`` ++ - 2.4 ++ - ``KEYLIME_AGENT_API_VERSIONS`` ++ * - ``uuid`` ++ - ``"d432fbb3-d2f1-4a97-9ef7-75bd81c00000"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_UUID`` ++ * - ``ip`` ++ - ``"127.0.0.1"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_IP`` ++ * - ``port`` ++ - ``9002`` ++ - 2.0 ++ - ``KEYLIME_AGENT_PORT`` ++ * - ``contact_ip`` ++ - ``"127.0.0.1"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_CONTACT_IP`` ++ * - ``contact_port`` ++ - ``9002`` ++ - 2.0 ++ - ``KEYLIME_AGENT_CONTACT_PORT`` ++ * - ``registrar_ip`` ++ - ``"127.0.0.1"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REGISTRAR_IP`` ++ * - ``registrar_port`` ++ - ``8890`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REGISTRAR_PORT`` ++ * - ``enable_agent_mtls`` ++ - ``true`` ++ - 2.0 ++ - ``KEYLIME_AGENT_ENABLE_AGENT_MTLS`` ++ * - ``tls_dir`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TLS_DIR`` ++ * - ``server_key`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_SERVER_KEY`` ++ * - ``server_key_password`` ++ - ``""`` ++ - 2.0 ++ - ``KEYLIME_AGENT_SERVER_KEY_PASSWORD`` ++ * - ``server_cert`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_SERVER_CERT`` ++ * - ``trusted_client_ca`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TRUSTED_CLIENT_CA`` ++ * - ``enc_keyname`` ++ - ``"derived_tci_key"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_ENC_KEYNAME`` ++ * - ``dec_payload_file`` ++ - ``"decrypted_payload"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_DEC_PAYLOAD_FILE`` ++ * - ``secure_size`` ++ - ``"1m"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_SECURE_SIZE`` ++ * - ``tpm_ownerpassword`` ++ - ``""`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TPM_OWNERPASSWORD`` ++ * - ``extract_payload_zip`` ++ - ``true`` ++ - 2.0 ++ - ``KEYLIME_AGENT_EXTRACT_PAYLOAD_ZIP`` ++ * - ``enable_revocation_notifications`` ++ - ``true`` ++ - 2.0 ++ - ``KEYLIME_AGENT_ENABLE_REVOCATION_NOTIFICATIONS`` ++ * - ``revocation_notification_ip`` ++ - ``"127.0.0.1"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REVOCATION_NOTIFICATION_IP`` ++ * - ``revocation_notification_port`` ++ - ``8992`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REVOCATION_NOTIFICATION_PORT`` ++ * - ``revocation_cert`` ++ - ``"default"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REVOCATION_CERT`` ++ * - ``revocation_actions`` ++ - ``"[]"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_REVOCATION_ACTIONS`` ++ * - ``payload_script`` ++ - ``"autorun.sh"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_PAYLOAD_SCRIPT`` ++ * - ``enable_insecure_payload`` ++ - ``false`` ++ - 2.0 ++ - ``KEYLIME_AGENT_ENABLE_INSECURE_PAYLOAD`` ++ * - ``measure_payload_pcr`` ++ - ``-1`` ++ - 2.0 ++ - ``KEYLIME_AGENT_MEASURE_PAYLOAD_PCR`` ++ * - ``exponential_backoff`` ++ - ``true`` ++ - 2.0 ++ - ``KEYLIME_AGENT_EXPONENTIAL_BACKOFF`` ++ * - ``retry_interval`` ++ - ``2`` ++ - 2.0 ++ - ``KEYLIME_AGENT_RETRY_INTERVAL`` ++ * - ``max_retries`` ++ - ``4`` ++ - 2.0 ++ - ``KEYLIME_AGENT_MAX_RETRIES`` ++ * - ``tpm_hash_alg`` ++ - ``"sha256"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TPM_HASH_ALG`` ++ * - ``tpm_encryption_alg`` ++ - ``"rsa"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TPM_ENCRYPTION_ALG`` ++ * - ``tpm_signing_alg`` ++ - ``"rsassa"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_TPM_SIGNING_ALG`` ++ * - ``ek_handle`` ++ - ``"generate"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_EK_HANDLE`` ++ * - ``enable_iak_idevid`` ++ - ``false`` ++ - 2.1 ++ - ``KEYLIME_AGENT_ENABLE_IAK_IDEVID`` ++ * - ``iak_idevid_template`` ++ - ``"detect"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IAK_IDEVID_TEMPLATE`` ++ * - ``iak_idevid_asymmetric_alg`` ++ - ``"rsa"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IAK_IDEVID_ASYMMETRIC_ALG`` ++ * - ``iak_idevid_name_alg`` ++ - ``"sha256"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IAK_IDEVID_NAME_ALG`` ++ * - ``idevid_password`` ++ - ``""`` ++ - 2.3 ++ - ``KEYLIME_AGENT_IDEVID_PASSWORD`` ++ * - ``idevid_handle`` ++ - ``""`` ++ - 2.3 ++ - ``KEYLIME_AGENT_IDEVID_HANDLE`` ++ * - ``iak_password`` ++ - ``""`` ++ - 2.3 ++ - ``KEYLIME_AGENT_IAK_PASSWORD`` ++ * - ``iak_handle`` ++ - ``""`` ++ - 2.3 ++ - ``KEYLIME_AGENT_IAK_HANDLE`` ++ * - ``iak_cert`` ++ - ``"default"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IAK_CERT`` ++ * - ``idevid_cert`` ++ - ``"default"`` ++ - 2.1 ++ - ``KEYLIME_AGENT_IDEVID_CERT`` ++ * - ``run_as`` ++ - ``"keylime:tss"`` ++ - 2.0 ++ - ``KEYLIME_AGENT_RUN_AS`` ++ * - ``ima_ml_path`` ++ - ``"default"`` ++ - 2.2 ++ - ``KEYLIME_AGENT_IMA_ML_PATH`` ++ * - ``measuredboot_ml_path`` ++ - ``"default"`` ++ - 2.2 ++ - ``KEYLIME_AGENT_MEASUREDBOOT_ML_PATH`` ++ ++Push Model Specific Options ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 35 12 12 41 ++ ++ * - Option ++ - Default ++ - Version ++ - Environment Variable ++ * - ``agent_data_path`` ++ - ``"/var/lib/keylime/agent_data.json"`` ++ - 2.5 ++ - ``KEYLIME_AGENT_AGENT_DATA_PATH`` ++ * - ``verifier_url`` ++ - ``"https://localhost:8881"`` ++ - 2.5 ++ - ``KEYLIME_AGENT_VERIFIER_URL`` ++ * - ``certification_keys_server_identifier`` ++ - ``"ak"`` ++ - 2.5 ++ - ``KEYLIME_AGENT_CERTIFICATION_KEYS_SERVER_IDENTIFIER`` ++ * - ``uefi_logs_evidence_version`` ++ - ``"1.0"`` ++ - 2.5 ++ - ``KEYLIME_AGENT_UEFI_LOGS_EVIDENCE_VERSION`` ++ * - ``tls_accept_invalid_certs`` ++ - ``false`` ++ - 2.5 ++ - ``KEYLIME_AGENT_TLS_ACCEPT_INVALID_CERTS`` ++ * - ``tls_accept_invalid_hostnames`` ++ - ``false`` ++ - 2.5 ++ - ``KEYLIME_AGENT_TLS_ACCEPT_INVALID_HOSTNAMES`` ++ * - ``exponential_backoff_max_retries`` ++ - ``5`` ++ - 2.5 ++ - ``KEYLIME_AGENT_EXPONENTIAL_BACKOFF_MAX_RETRIES`` ++ * - ``exponential_backoff_initial_delay`` ++ - ``10000`` ++ - 2.5 ++ - ``KEYLIME_AGENT_EXPONENTIAL_BACKOFF_INITIAL_DELAY`` ++ * - ``exponential_backoff_max_delay`` ++ - ``360000`` ++ - 2.5 ++ - ``KEYLIME_AGENT_EXPONENTIAL_BACKOFF_MAX_DELAY`` ++ ++Logging Configuration (``/etc/keylime/logging.conf``) ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++The logging configuration follows Python's standard logging configuration format. ++See the Python logging documentation for details on configuring handlers, formatters, ++and loggers. The version option can be overridden with ``KEYLIME_LOGGING_VERSION``. ++ ++Configuration Version History ++------------------------------ ++ ++.. list-table:: ++ :header-rows: 1 ++ :widths: 15 70 ++ ++ * - Version ++ - Changes ++ * - 2.0 ++ - Base configuration structure, pull model support ++ * - 2.1 ++ - Added IAK/IDevID support, ``tpm_identity`` for registrar ++ * - 2.2 ++ - Added ``ima_ml_path`` and ``measuredboot_ml_path`` configuration ++ * - 2.3 ++ - Added persisted key handles for IAK/IDevID (``iak_handle``, ``idevid_handle``) ++ * - 2.4 ++ - Added ``api_versions`` for agent, ``malformed_cert_action`` for registrar ++ * - 2.5 ++ - **Push model support**: Added ``mode``, ``challenge_lifetime``, ``verification_timeout``, session rate limiting and lifetime options for verifier; ``verifier_url``, ``agent_data_path``, TLS validation, exponential backoff options for agent. Added ``authorization_provider`` and ``cert_subject_alternative_names`` for verifier and registrar ++ ++For detailed information on all configuration options for each component, refer ++to the configuration files in ``/etc/keylime/`` and their inline documentation. diff --git a/0017-verifier-graceful-shutdown.patch b/0017-verifier-graceful-shutdown.patch new file mode 100644 index 0000000..a90637c --- /dev/null +++ b/0017-verifier-graceful-shutdown.patch @@ -0,0 +1,2373 @@ +From cb944ee9c178f7a717e904ddbf85aac5b27a2eac Mon Sep 17 00:00:00 2001 +From: rpm-build +Date: Fri, 17 Apr 2026 14:52:35 +0200 +Subject: [PATCH] verifier: Implement graceful shutdown + +Implement graceful shutdown, cancel pending retries, and drain in-flight +work. + +Backported from https://github.com/keylime/keylime/pull/1869 + +Signed-off-by: Anderson Toshiyuki Sasaki +--- + docs/man/keylime_verifier.8.rst | 1 + + docs/user_guide/configuration.rst | 6 + + keylime/cloud_verifier_tornado.py | 180 ++++++++++++--- + keylime/push_agent_monitor.py | 23 ++ + keylime/revocation_notifier.py | 10 +- + keylime/shared_data.py | 65 +++++- + keylime/shutdown.py | 21 ++ + keylime/web/base/server.py | 111 +++++++++- + keylime/web/verifier_server.py | 122 +++++------ + templates/2.6/agent.j2 | 313 ++++++++++++++++++++++++++ + templates/2.6/ca.j2 | 39 ++++ + templates/2.6/logging.j2 | 33 +++ + templates/2.6/mapping.json | 11 + + templates/2.6/registrar.j2 | 168 ++++++++++++++ + templates/2.6/tenant.j2 | 130 +++++++++++ + templates/2.6/verifier.j2 | 350 ++++++++++++++++++++++++++++++ + test/test_shutdown.py | 210 ++++++++++++++++++ + test/test_verifier_server.py | 82 +++---- + 18 files changed, 1722 insertions(+), 153 deletions(-) + create mode 100644 keylime/shutdown.py + create mode 100644 templates/2.6/agent.j2 + create mode 100644 templates/2.6/ca.j2 + create mode 100644 templates/2.6/logging.j2 + create mode 100644 templates/2.6/mapping.json + create mode 100644 templates/2.6/registrar.j2 + create mode 100644 templates/2.6/tenant.j2 + create mode 100644 templates/2.6/verifier.j2 + create mode 100644 test/test_shutdown.py + +diff --git a/docs/man/keylime_verifier.8.rst b/docs/man/keylime_verifier.8.rst +index 5303a5f..d22d211 100644 +--- a/docs/man/keylime_verifier.8.rst ++++ b/docs/man/keylime_verifier.8.rst +@@ -53,6 +53,7 @@ Essentials: + - **quote_interval**: Time between integrity checks (seconds) + - **max_upload_size**: Upload size limit (bytes) + - **request_timeout**: Agent request timeout (seconds) ++- **shutdown_drain_timeout**: Max time (seconds) to wait for in-flight operations during shutdown + - **measured_boot_policy_name**, **measured_boot_imports**, **measured_boot_evaluate**: measured boot policy settings + - **severity_labels**, **severity_policy**: revocation severity config + - **ignore_tomtou_errors**: handle ToMToU IMA entries (bool) +diff --git a/docs/user_guide/configuration.rst b/docs/user_guide/configuration.rst +index aae5344..327c370 100644 +--- a/docs/user_guide/configuration.rst ++++ b/docs/user_guide/configuration.rst +@@ -425,6 +425,10 @@ Common Options (Both Models) + - (empty) + - 2.5 + - ``KEYLIME_VERIFIER_CERT_SUBJECT_ALTERNATIVE_NAMES`` ++ * - ``shutdown_drain_timeout`` ++ - ``10`` ++ - 2.6 ++ - ``KEYLIME_VERIFIER_SHUTDOWN_DRAIN_TIMEOUT`` + + Pull Model Specific Options + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +@@ -1097,6 +1101,8 @@ Configuration Version History + - Added ``api_versions`` for agent, ``malformed_cert_action`` for registrar + * - 2.5 + - **Push model support**: Added ``mode``, ``challenge_lifetime``, ``verification_timeout``, session rate limiting and lifetime options for verifier; ``verifier_url``, ``agent_data_path``, TLS validation, exponential backoff options for agent. Added ``authorization_provider`` and ``cert_subject_alternative_names`` for verifier and registrar ++ * - 2.6 ++ - Added ``shutdown_drain_timeout`` for verifier graceful shutdown + + For detailed information on all configuration options for each component, refer + to the configuration files in ``/etc/keylime/`` and their inline documentation. +diff --git a/keylime/cloud_verifier_tornado.py b/keylime/cloud_verifier_tornado.py +index 75f117b..eb57de5 100644 +--- a/keylime/cloud_verifier_tornado.py ++++ b/keylime/cloud_verifier_tornado.py +@@ -29,6 +29,7 @@ from keylime import ( + keylime_logging, + push_agent_monitor, + revocation_notifier, ++ shutdown, + signing, + tornado_requests, + web_util, +@@ -175,6 +176,29 @@ exclude_db: Dict[str, Any] = { + # events (quote polls, retries). Used to cancel them all on shutdown. + _pending_events: Dict[str, object] = {} + ++# Counter of currently executing process_agent() coroutines. The shutdown ++# handler waits for this to reach zero before stopping the IOLoop so that ++# in-flight DB writes can finish. ++_active_operations = 0 ++# Event signalled when _active_operations drops to zero during shutdown. ++_operations_drained = asyncio.Event() ++_operations_drained.set() # initially no operations are active ++ ++ ++def _enter_operation() -> None: ++ """Increment the active operations counter.""" ++ global _active_operations ++ _active_operations += 1 ++ _operations_drained.clear() ++ ++ ++def _exit_operation() -> None: ++ """Decrement the active operations counter; signal if drained.""" ++ global _active_operations ++ _active_operations -= 1 ++ if _active_operations <= 0: ++ _operations_drained.set() ++ + + def _register_pending_event(agent: Dict[str, Any], handle: object) -> None: + """Track a pending IOLoop timeout in both the agent dict and the global registry. +@@ -201,6 +225,38 @@ def _cancel_pending_event(agent: Dict[str, Any]) -> None: + logger.debug("Could not remove pending event for agent %s: %s", agent["agent_id"], e) + + ++def get_active_operations() -> int: ++ """Return the number of currently executing process_agent() coroutines.""" ++ return _active_operations ++ ++ ++async def wait_for_drain(timeout: float) -> bool: ++ """Wait up to *timeout* seconds for all active operations to finish. ++ ++ Returns True if all operations drained, False if the timeout expired. ++ """ ++ try: ++ await asyncio.wait_for(_operations_drained.wait(), timeout=timeout) ++ return True ++ except asyncio.TimeoutError: ++ return False ++ ++ ++def cancel_all_pending_events() -> None: ++ """Cancel every tracked pending IOLoop timeout. Called on shutdown.""" ++ if not _pending_events: ++ return ++ io_loop = tornado.ioloop.IOLoop.current() ++ for agent_id, handle in _pending_events.items(): ++ try: ++ io_loop.remove_timeout(handle) ++ except Exception as e: ++ logger.debug("Could not remove pending event for agent %s: %s", agent_id, e) ++ count = len(_pending_events) ++ _pending_events.clear() ++ logger.info("Cancelled %d pending attestation event(s) for shutdown", count) ++ ++ + def _from_db_obj(agent_db_obj: VerfierMain) -> Dict[str, Any]: + fields = [ + "agent_id", +@@ -2159,6 +2215,17 @@ async def invoke_get_quote( + need_pubkey: bool, + timeout: float = DEFAULT_TIMEOUT, + ) -> None: ++ # Clear tracking only — the timeout already fired (this *is* the callback), ++ # so there is no handle to cancel via remove_timeout(). Done before the ++ # shutdown check so tracking state is cleaned up even on early return. ++ if agent.get("pending_event") is not None: ++ agent["pending_event"] = None ++ _pending_events.pop(agent["agent_id"], None) ++ ++ if shutdown.is_shutting_down(): ++ logger.debug("Skipping get_quote for agent %s — shutting down", agent["agent_id"]) ++ return ++ + failure = Failure(Component.INTERNAL, ["verifier"]) + + params = cloud_verifier_common.prepare_get_quote(agent) +@@ -2262,10 +2329,17 @@ async def invoke_get_quote( + + + async def invoke_provide_v(agent: Dict[str, Any], timeout: float = DEFAULT_TIMEOUT) -> None: +- failure = Failure(Component.INTERNAL, ["verifier"]) +- ++ # Clear tracking only — the timeout already fired (this *is* the callback), ++ # so there is no handle to cancel via remove_timeout(). Done before the ++ # shutdown check so tracking state is cleaned up even on early return. + if agent.get("pending_event") is not None: + agent["pending_event"] = None ++ _pending_events.pop(agent["agent_id"], None) ++ ++ if shutdown.is_shutting_down(): ++ logger.debug("Skipping provide_v for agent %s — shutting down", agent["agent_id"]) ++ return ++ failure = Failure(Component.INTERNAL, ["verifier"]) + + v_json_message = cloud_verifier_common.prepare_v(agent) + +@@ -2422,6 +2496,14 @@ async def notify_error( + async def process_agent( + agent: Dict[str, Any], new_operational_state: int, failure: Failure = Failure(Component.INTERNAL, ["verifier"]) + ) -> None: ++ # During shutdown, allow terminal-state transitions (FAILED, INVALID_QUOTE) ++ # through so that final DB writes and revocation notifications complete. ++ # Only skip non-terminal transitions that would schedule new polls/retries. ++ if shutdown.is_shutting_down() and new_operational_state not in (states.FAILED, states.INVALID_QUOTE): ++ logger.debug("Skipping process_agent for agent %s — shutting down", agent["agent_id"]) ++ return ++ ++ _enter_operation() + try: # pylint: disable=R1702 + main_agent_operational_state = agent["operational_state"] + stored_agent = None +@@ -2452,15 +2534,13 @@ async def process_agent( + # if the stored agent could not be recovered from the database, stop polling + if not stored_agent: + logger.warning("Unable to retrieve agent %s from database. Stopping polling", agent["agent_id"]) +- if agent["pending_event"] is not None: +- tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) ++ _cancel_pending_event(agent) + return + + # if the user did terminated this agent + if stored_agent.operational_state == states.TERMINATED: # pyright: ignore + logger.warning("Agent %s terminated by user.", agent["agent_id"]) +- if agent["pending_event"] is not None: +- tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) ++ _cancel_pending_event(agent) + + # Second database operation - delete agent + with session_context() as session: +@@ -2470,8 +2550,7 @@ async def process_agent( + # if the user tells us to stop polling because the tenant quote check failed + if stored_agent.operational_state == states.TENANT_FAILED: # pyright: ignore + logger.warning("Agent %s has failed tenant quote. Stopping polling", agent["agent_id"]) +- if agent["pending_event"] is not None: +- tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) ++ _cancel_pending_event(agent) + return + + # Use the request timeout stored in the agent dict (read from the +@@ -2498,8 +2577,7 @@ async def process_agent( + + # When the failure is irrecoverable we stop polling the agent + if not failure.recoverable or failure.highest_severity == MAX_SEVERITY_LABEL: +- if agent["pending_event"] is not None: +- tornado.ioloop.IOLoop.current().remove_timeout(agent["pending_event"]) ++ _cancel_pending_event(agent) + + # Third database operation - update agent with failure state + with session_context() as session: +@@ -2575,6 +2653,10 @@ async def process_agent( + "Setting up callback to check agent ID %s again in %f seconds", agent["agent_id"], interval + ) + ++ if shutdown.is_shutting_down(): ++ logger.debug("Not scheduling next poll for agent %s — shutting down", agent["agent_id"]) ++ return ++ + pending = tornado.ioloop.IOLoop.current().call_later( + # type: ignore # due to python <3.9 + interval, +@@ -2585,7 +2667,7 @@ async def process_agent( + False, + timeout=timeout, + ) +- agent["pending_event"] = pending ++ _register_pending_event(agent, pending) + return + + maxr = config.getint("verifier", "max_retries") +@@ -2617,7 +2699,11 @@ async def process_agent( + maxr, + next_retry, + ) +- tornado.ioloop.IOLoop.current().call_later( ++ if shutdown.is_shutting_down(): ++ logger.debug("Not scheduling retry for agent %s — shutting down", agent["agent_id"]) ++ return ++ ++ pending = tornado.ioloop.IOLoop.current().call_later( + # type: ignore # due to python <3.9 + next_retry, + invoke_get_quote, +@@ -2627,6 +2713,7 @@ async def process_agent( + True, + timeout=timeout, + ) ++ _register_pending_event(agent, pending) + return + + if main_agent_operational_state == states.PROVIDE_V and new_operational_state == states.PROVIDE_V_RETRY: +@@ -2651,9 +2738,17 @@ async def process_agent( + maxr, + next_retry, + ) +- tornado.ioloop.IOLoop.current().call_later( +- next_retry, invoke_provide_v, agent # type: ignore # due to python <3.9 ++ if shutdown.is_shutting_down(): ++ logger.debug("Not scheduling retry for agent %s — shutting down", agent["agent_id"]) ++ return ++ ++ pending = tornado.ioloop.IOLoop.current().call_later( ++ next_retry, # type: ignore # due to python <3.9 ++ invoke_provide_v, ++ agent, ++ timeout, + ) ++ _register_pending_event(agent, pending) + return + raise Exception("nothing should ever fall out of this!") + +@@ -2663,6 +2758,8 @@ async def process_agent( + "exception", {"context": "Agent caused the verifier to throw an exception", "data": str(e)}, False + ) + await process_agent(agent, states.FAILED, failure) ++ finally: ++ _exit_operation() + + + async def activate_agents(agents: List[VerfierMain], verifier_ip: str, verifier_port: int) -> None: +@@ -2769,31 +2866,62 @@ def main() -> None: + server = tornado.httpserver.HTTPServer(app, ssl_options=ssl_ctx, max_buffer_size=max_upload_size) + server.add_sockets(sockets) + +- def server_sig_handler(*_: Any) -> None: +- logger.info("Shutting down server %s..", task_id) ++ # Hold strong references to async tasks to prevent GC from collecting them mid-run ++ _background_tasks: List[asyncio.Task[None]] = [] ++ ++ def server_sig_handler(signame: str = "signal") -> None: ++ if shutdown.is_shutting_down(): ++ logger.warning("Shutdown already in progress, ignoring %s (server %s)", signame, task_id) ++ return ++ logger.info("Received %s, shutting down server %s..", signame, task_id) ++ ++ # Signal all attestation loops to stop scheduling new work ++ shutdown.request_shutdown() ++ + # Stop server to not accept new incoming connections + server.stop() + +- # Gracefully shutdown webhook workers to prevent connection errors +- if "webhook" in revocation_notifier.get_notifiers(): +- revocation_notifier.shutdown_webhook_workers() ++ # Cancel all pending attestation timeouts (retries, polls) ++ cancel_all_pending_events() ++ push_agent_monitor.cancel_all_timeouts() + +- # Wait for all connections to be closed and then stop ioloop ++ # Wait for in-flight operations, then close connections and stop + async def stop() -> None: +- await server.close_all_connections() +- tornado.ioloop.IOLoop.current().stop() ++ try: ++ # Give in-flight process_agent() coroutines time to finish ++ # DB writes and revocation notifications before tearing ++ # down webhook workers. ++ drain_timeout = config.getfloat("verifier", "shutdown_drain_timeout", fallback=10.0) ++ drained = await wait_for_drain(drain_timeout) ++ if not drained: ++ logger.warning( ++ "Shutting down with %d operation(s) still active after %.1fs", ++ get_active_operations(), ++ drain_timeout, ++ ) ++ ++ # Shutdown webhook workers after draining so revocation ++ # notifications from in-flight attestations are delivered. ++ if "webhook" in revocation_notifier.get_notifiers(): ++ revocation_notifier.shutdown_webhook_workers() ++ ++ await server.close_all_connections() ++ except Exception: ++ logger.exception("Error during shutdown cleanup") ++ finally: ++ tornado.ioloop.IOLoop.current().stop() + +- asyncio.ensure_future(stop()) ++ _background_tasks.append(asyncio.ensure_future(stop())) + + # Attach signal handler to ioloop. + # Do not use signal.signal(..) for that because it does not work! + loop = asyncio.get_event_loop() +- loop.add_signal_handler(signal.SIGINT, server_sig_handler) +- loop.add_signal_handler(signal.SIGTERM, server_sig_handler) ++ loop.add_signal_handler(signal.SIGINT, lambda: server_sig_handler("SIGINT")) ++ loop.add_signal_handler(signal.SIGTERM, lambda: server_sig_handler("SIGTERM")) + + server.start() + # Reactivate agents +- asyncio.ensure_future(activate_agents(agents, verifier_host, int(verifier_port))) ++ _background_tasks.append(asyncio.ensure_future(activate_agents(agents, verifier_host, int(verifier_port)))) + tornado.ioloop.IOLoop.current().start() + logger.debug("Server %s stopped.", task_id) + sys.exit(0) +diff --git a/keylime/push_agent_monitor.py b/keylime/push_agent_monitor.py +index f41befc..6537a31 100644 +--- a/keylime/push_agent_monitor.py ++++ b/keylime/push_agent_monitor.py +@@ -171,6 +171,29 @@ def cancel_agent_timeout(agent_id: str) -> None: + logger.error("Error cancelling timeout for agent %s: %s", agent_id, e) + + ++def cancel_all_timeouts() -> None: ++ """Cancel all scheduled PUSH mode agent timeouts. ++ ++ Called during shutdown to prevent timeout callbacks from firing ++ against a stopping event loop. ++ """ ++ with _agent_timeout_handles_lock: ++ handles = dict(_agent_timeout_handles) ++ _agent_timeout_handles.clear() ++ ++ if not handles: ++ return ++ ++ io_loop = tornado.ioloop.IOLoop.current() ++ for agent_id, handle in handles.items(): ++ try: ++ io_loop.remove_timeout(handle) ++ except Exception as e: ++ logger.debug("Could not remove timeout for agent %s during shutdown: %s", agent_id, e) ++ ++ logger.info("Cancelled %d PUSH mode agent timeout(s) for shutdown", len(handles)) ++ ++ + def check_push_agent_timeouts() -> None: + """Check all PUSH mode agents for timeouts and mark failed ones. + +diff --git a/keylime/revocation_notifier.py b/keylime/revocation_notifier.py +index abab08b..f7efece 100644 +--- a/keylime/revocation_notifier.py ++++ b/keylime/revocation_notifier.py +@@ -259,9 +259,13 @@ def stop_broker() -> None: + + + def shutdown_webhook_workers() -> None: +- """Convenience function to shutdown webhook workers using the global manager.""" +- manager = _get_webhook_manager() +- manager.shutdown_workers() ++ """Shutdown webhook workers if the manager was ever initialized. ++ ++ If no revocation notifications were sent in this process, the manager ++ is still None and there is nothing to shut down. ++ """ ++ if _webhook_manager is not None: ++ _webhook_manager.shutdown_workers() + + + def notify(tosend: Dict[str, Any]) -> None: +diff --git a/keylime/shared_data.py b/keylime/shared_data.py +index a415496..09cbb97 100644 +--- a/keylime/shared_data.py ++++ b/keylime/shared_data.py +@@ -6,6 +6,8 @@ using multiprocessing.Manager(). + + import atexit + import multiprocessing as mp ++import multiprocessing.process ++import os + import threading + import time + from typing import Any, Dict, List, Optional +@@ -137,8 +139,6 @@ class SharedDataManager: + # Register handler to reinitialize manager connection after fork + # This is needed because Manager uses network connections that don't survive fork + try: +- import os # pylint: disable=import-outside-toplevel +- + self._parent_pid = os.getpid() + logger.debug("SharedDataManager initialized in process %d", self._parent_pid) + except Exception as e: +@@ -283,15 +283,48 @@ class SharedDataManager: + """Cleanup shared resources. + + This is automatically called on exit but can be called manually +- for explicit cleanup. ++ for explicit cleanup. Only the parent process (the one that ++ created the Manager) is allowed to shut it down; child workers ++ forked from the parent skip the call to avoid the ++ ``AssertionError: can only join a child process`` raised by ++ ``multiprocessing`` when a non-parent tries to join. + """ +- if hasattr(self, "_manager"): +- logger.debug("Shutting down SharedDataManager") +- try: +- self._manager.shutdown() +- logger.info("SharedDataManager shutdown complete") +- except Exception as e: +- logger.error("Error during SharedDataManager shutdown: %s", e) ++ if not hasattr(self, "_manager"): ++ return ++ ++ if hasattr(self, "_parent_pid") and os.getpid() != self._parent_pid: ++ logger.debug( ++ "Skipping SharedDataManager shutdown in child process %d (parent is %d)", ++ os.getpid(), ++ self._parent_pid, ++ ) ++ return ++ ++ logger.debug("Shutting down SharedDataManager") ++ try: ++ self._manager.shutdown() ++ logger.info("SharedDataManager shutdown complete") ++ except Exception: ++ logger.exception("Error during SharedDataManager shutdown") ++ ++ def deregister_child(self) -> None: ++ """Remove the Manager's server process from multiprocessing's child tracking. ++ ++ Must be called in each forked worker **after** ``fork()``. Without ++ this, Python's ``multiprocessing.util._exit_function`` atexit handler ++ tries to ``join()`` the Manager server process in every child worker, ++ causing ``AssertionError: can only join a child process`` because the ++ Manager was spawned by the parent, not the child. ++ """ ++ # The Manager's server process is stored in _manager._process ++ server_process = getattr(self._manager, "_process", None) ++ if server_process is not None: ++ multiprocessing.process._children.discard(server_process) # type: ignore[attr-defined] # pylint: disable=protected-access ++ logger.debug( ++ "Deregistered Manager server process (pid %s) from child tracking in worker %d", ++ getattr(server_process, "pid", "?"), ++ os.getpid(), ++ ) + + def __repr__(self) -> str: + stats = self.get_stats() +@@ -364,6 +397,18 @@ def get_shared_memory() -> SharedDataManager: + return _global_shared_manager + + ++def deregister_shared_memory_child() -> None: ++ """Deregister the Manager's server process in a forked child worker. ++ ++ Call this after ``tornado.process.fork_processes()`` (or any ``fork()``) ++ to prevent Python's atexit handler from trying to ``join()`` the Manager ++ server process in the child, which would raise ++ ``AssertionError: can only join a child process``. ++ """ ++ if _global_shared_manager is not None: ++ _global_shared_manager.deregister_child() ++ ++ + def cleanup_global_shared_memory() -> None: + """Cleanup the global shared memory manager. + +diff --git a/keylime/shutdown.py b/keylime/shutdown.py +new file mode 100644 +index 0000000..72f1c76 +--- /dev/null ++++ b/keylime/shutdown.py +@@ -0,0 +1,21 @@ ++"""Shutdown coordination for graceful server termination. ++ ++Provides a process-wide shutdown flag that attestation loops and retry ++schedulers check before starting new work. Setting the flag prevents ++new IOLoop callbacks from being scheduled and allows in-flight ++operations to drain before the event loop stops. ++""" ++ ++import asyncio ++ ++_shutdown_event = asyncio.Event() ++ ++ ++def request_shutdown() -> None: ++ """Signal that the process is shutting down.""" ++ _shutdown_event.set() ++ ++ ++def is_shutting_down() -> bool: ++ """Return True if shutdown has been requested.""" ++ return _shutdown_event.is_set() +diff --git a/keylime/web/base/server.py b/keylime/web/base/server.py +index b62debd..7c8a71b 100644 +--- a/keylime/web/base/server.py ++++ b/keylime/web/base/server.py +@@ -1,5 +1,6 @@ + import asyncio + import multiprocessing ++import signal + from abc import ABC, abstractmethod + from functools import wraps + from ssl import CERT_OPTIONAL +@@ -7,8 +8,9 @@ from typing import TYPE_CHECKING, Any, Callable, Optional + + import tornado + +-from keylime import api_version, config, keylime_logging, web_util ++from keylime import api_version, config, keylime_logging, shutdown, web_util + from keylime.models.base.db import db_manager ++from keylime.shared_data import deregister_shared_memory_child + from keylime.web.base.action_handler import ActionHandler + from keylime.web.base.route import Route + +@@ -251,6 +253,8 @@ class Server(ABC): + # Tornado servers are instantiated by calling start_single() or start_multi(), so set to None initially + self.__tornado_http_server: Optional[tornado.httpserver.HTTPServer] = None + self.__tornado_https_server: Optional[tornado.httpserver.HTTPServer] = None ++ self._server_stopped: Optional[asyncio.Event] = None ++ self._shutdown_task: Optional[asyncio.Task[None]] = None + + async def start_single(self) -> None: + """Instantiates and starts the server (with one Tornado HTTPServer instance to handle HTTP connections +@@ -273,7 +277,82 @@ class Server(ABC): + https_server.add_sockets(self.__tornado_https_sockets) + self.__tornado_https_server = https_server + +- await asyncio.Event().wait() ++ # Create the stop event before installing signal handlers so that ++ # _graceful_shutdown() can always set it, even if a signal arrives ++ # before we reach the wait(). ++ self._server_stopped = asyncio.Event() ++ ++ # Install signal handlers for graceful shutdown ++ self._install_signal_handlers() ++ ++ try: ++ # Hook for subclasses to perform work after servers are listening ++ # but before blocking (e.g. activate agents). ++ await self._on_server_started() ++ await self._server_stopped.wait() ++ finally: ++ # Remove signal handlers before returning to asyncio.run()'s ++ # teardown, which closes the wakeup fd and replaces remaining ++ # handlers with _sighandler_noop. Any signal arriving after ++ # that would write to the closed fd, causing ++ # "OSError: Bad file descriptor". ++ self._remove_signal_handlers() ++ ++ async def _on_server_started(self) -> None: ++ """Called after servers are listening but before blocking. ++ ++ Override in subclasses to perform post-startup work such as ++ activating agents. The default implementation does nothing. ++ """ ++ ++ def _install_signal_handlers(self) -> None: ++ """Install SIGINT/SIGTERM handlers for graceful shutdown.""" ++ loop = asyncio.get_event_loop() ++ ++ async def _run_graceful_shutdown() -> None: ++ try: ++ await self._graceful_shutdown() ++ except Exception: ++ logger.exception("Graceful shutdown failed") ++ finally: ++ if self._server_stopped is not None: ++ self._server_stopped.set() ++ ++ def _make_handler(signame: str) -> Callable[[], None]: ++ def _handler() -> None: ++ if shutdown.is_shutting_down(): ++ logger.warning("Shutdown already in progress, ignoring %s", signame) ++ return ++ logger.info("Received %s, shutting down", signame) ++ shutdown.request_shutdown() ++ self._shutdown_task = asyncio.ensure_future(_run_graceful_shutdown()) ++ ++ return _handler ++ ++ loop.add_signal_handler(signal.SIGINT, _make_handler("SIGINT")) ++ loop.add_signal_handler(signal.SIGTERM, _make_handler("SIGTERM")) ++ ++ def _remove_signal_handlers(self) -> None: ++ """Remove SIGINT/SIGTERM handlers from the event loop.""" ++ loop = asyncio.get_event_loop() ++ loop.remove_signal_handler(signal.SIGINT) ++ loop.remove_signal_handler(signal.SIGTERM) ++ ++ async def _graceful_shutdown(self) -> None: ++ """Stop servers and close connections gracefully. ++ ++ Subclasses can override this to cancel component-specific pending work ++ before calling super(). ++ """ ++ if self.__tornado_http_server: ++ self.__tornado_http_server.stop() ++ if self.__tornado_https_server: ++ self.__tornado_https_server.stop() ++ ++ if self.__tornado_http_server: ++ await self.__tornado_http_server.close_all_connections() ++ if self.__tornado_https_server: ++ await self.__tornado_https_server.close_all_connections() + + def start_multi(self) -> None: + ports = "" +@@ -295,12 +374,19 @@ class Server(ABC): + self.worker_count, + ) + ++ self._pre_fork() ++ + # with StatsCollector(): + # num = manager.Value('i', 0) +- tornado.process.fork_processes(self.worker_count) ++ task_id = tornado.process.fork_processes(self.worker_count) + # num.value = num.value + 1 + # print(num.value) + ++ # Remove the Manager's server process from multiprocessing's child ++ # tracking so Python's atexit handler does not try to join() it in ++ # child workers (the Manager was spawned by the parent). ++ deregister_shared_memory_child() ++ + # Dispose inherited db_manager engine after fork to avoid sharing the + # parent's connection pool, then re-create with a fresh pool for this + # child process. +@@ -309,8 +395,27 @@ class Server(ABC): + if service: + db_manager.make_engine(service) + ++ self._post_fork(task_id) ++ + asyncio.run(self.start_single()) + ++ def _pre_fork(self) -> None: ++ """Called before ``fork_processes()`` in ``start_multi()``. ++ ++ Override in subclasses to perform work that must happen in the ++ parent process before forking (e.g. querying the database for ++ agent lists to distribute across workers). ++ """ ++ ++ def _post_fork(self, task_id: int) -> None: ++ """Called after ``fork_processes()`` in each child worker. ++ ++ *task_id* is the worker index returned by Tornado's ++ ``fork_processes()``. Override to perform per-worker ++ initialization (e.g. resetting inherited DB state, distributing ++ agents). ++ """ ++ + def _setup(self) -> None: + """Defines values to use in place of the defaults for the various server options. It is suggested that this is + overriden by the implementing class.""" +diff --git a/keylime/web/verifier_server.py b/keylime/web/verifier_server.py +index 6a0261c..1ada86e 100755 +--- a/keylime/web/verifier_server.py ++++ b/keylime/web/verifier_server.py +@@ -1,12 +1,16 @@ + import asyncio + from typing import List, Optional + +-import tornado.httpserver +-import tornado.ioloop +-import tornado.process + from sqlalchemy.exc import SQLAlchemyError + +-from keylime import cloud_verifier_common, cloud_verifier_tornado, config, keylime_logging ++from keylime import ( ++ cloud_verifier_common, ++ cloud_verifier_tornado, ++ config, ++ keylime_logging, ++ push_agent_monitor, ++ revocation_notifier, ++) + from keylime.authorization.provider import Action + from keylime.common import states + from keylime.db.keylime_db import SessionManager, make_engine +@@ -30,82 +34,76 @@ class VerifierServer(Server): + super().__init__() + self._prepare_agents_on_startup() + self._clear_stale_sessions_on_startup() ++ self._all_agents: List[VerfierMain] = [] + self._worker_agents: Optional[List[VerfierMain]] = None ++ self._activate_task: Optional[asyncio.Task[None]] = None + +- def start_multi(self) -> None: # pylint: disable=no-member +- """Override to support PULL mode agent activation across multiple workers.""" +- # Get all agents from database before forking (only needed for PULL mode) ++ def _pre_fork(self) -> None: ++ """Query agents from database before forking (only needed for PULL mode).""" + logger.info("start_multi() called with operating_mode: %s", self.operating_mode) +- all_agents: List[VerfierMain] = [] ++ self._all_agents = [] + if self.operating_mode == "pull": + verifier_id = config.get("verifier", "uuid", fallback=cloud_verifier_common.DEFAULT_VERIFIER_ID) + logger.info("Querying agents for verifier_id: %s", verifier_id) +- all_agents = cloud_verifier_tornado.get_agents_by_verifier_id(verifier_id) +- logger.info("Found %d agents in database before forking", len(all_agents)) +- +- # Log server startup (copied from base class) +- ports = "" +- protocols = "" +- if self._Server__tornado_http_sockets: # type: ignore # pylint: disable=no-member +- ports = str(self.http_port) +- protocols = "HTTP" +- if self._Server__tornado_https_sockets and self.ssl_ctx: # type: ignore # pylint: disable=no-member +- ports = f"{ports}/{self.https_port}" if ports else f"{self.https_port}" +- protocols = f"{protocols}/S" if protocols else "HTTPS" +- logger.info( +- "Listening on %s:%s (%s) with %s worker processes...", +- self.bind_interface, +- ports, +- protocols, +- self.worker_count, +- ) +- +- # Fork worker processes - returns task_id in each child process +- task_id = tornado.process.fork_processes(self.worker_count) ++ self._all_agents = cloud_verifier_tornado.get_agents_by_verifier_id(verifier_id) ++ logger.info("Found %d agents in database before forking", len(self._all_agents)) + ++ def _post_fork(self, task_id: int) -> None: ++ """Reset inherited DB state and distribute agents to this worker.""" + # CRITICAL: Reset any database state inherited from parent process. +- # The parent initializes globals when querying agents (line 39), so children +- # inherit initialized state. We must reset to trigger lazy re-initialization. ++ # The parent initializes globals when querying agents in _pre_fork(), ++ # so children inherit initialized state. We must reset to trigger ++ # lazy re-initialization. + cloud_verifier_tornado.reset_verifier_config() + + # Distribute agents to this worker using round-robin (task_id is the worker index) +- if self.operating_mode == "pull" and all_agents: +- self._worker_agents = [all_agents[i] for i in range(task_id, len(all_agents), self.worker_count)] ++ if self.operating_mode == "pull" and self._all_agents: ++ self._worker_agents = [ ++ self._all_agents[i] for i in range(task_id, len(self._all_agents), self.worker_count) ++ ] + logger.info("Worker %d assigned %d agent(s)", task_id, len(self._worker_agents)) + +- # Start this worker's HTTP/HTTPS servers and activate agents +- self.start_single() +- +- def start_single(self) -> None: # type: ignore[override] # pylint: disable=attribute-defined-outside-init,invalid-overridden-method +- """Override to support PULL mode agent activation after server startup.""" +- # Start HTTP/HTTPS servers (logic copied from parent to allow agent activation before blocking) +- # pylint: disable=no-member +- if self._Server__tornado_http_sockets: # type: ignore +- http_server = tornado.httpserver.HTTPServer( +- self._Server__tornado_app, ssl_options=None, max_buffer_size=self.max_upload_size # type: ignore ++ async def _on_server_started(self) -> None: ++ """Activate agents for PULL mode after servers are listening.""" ++ # In start_single() mode (single-process), _pre_fork/_post_fork ++ # are never called so _worker_agents is None and _all_agents is ++ # empty. Query agents directly in that case. ++ agents = self._worker_agents if self._worker_agents is not None else self._all_agents ++ if self.operating_mode == "pull" and not agents and self._worker_agents is None: ++ verifier_id = config.get("verifier", "uuid", fallback=cloud_verifier_common.DEFAULT_VERIFIER_ID) ++ agents = cloud_verifier_tornado.get_agents_by_verifier_id(verifier_id) ++ if self.operating_mode == "pull" and agents: ++ verifier_host = config.get("verifier", "ip") ++ verifier_port = config.get("verifier", "port") ++ logger.info("Activating %d agent(s) for PULL mode", len(agents)) ++ self._activate_task = asyncio.ensure_future( ++ cloud_verifier_tornado.activate_agents(agents, verifier_host, int(verifier_port)) + ) +- http_server.add_sockets(self._Server__tornado_http_sockets) # type: ignore +- self._Server__tornado_http_server = http_server # type: ignore # pylint: disable=attribute-defined-outside-init + +- if self._Server__tornado_https_sockets and self.ssl_ctx: # type: ignore +- https_server = tornado.httpserver.HTTPServer( +- self._Server__tornado_app, ssl_options=self.ssl_ctx, max_buffer_size=self.max_upload_size # type: ignore ++ async def _graceful_shutdown(self) -> None: ++ """Cancel attestation-specific pending work and drain in-flight operations before stopping servers.""" ++ # Cancel all pending attestation timeouts (retries, polls) ++ cloud_verifier_tornado.cancel_all_pending_events() ++ push_agent_monitor.cancel_all_timeouts() ++ ++ # Wait for in-flight attestation operations to complete before ++ # tearing down webhook workers — in-flight process_agent() calls ++ # may still need to send revocation notifications. ++ drain_timeout = config.getfloat("verifier", "shutdown_drain_timeout", fallback=10.0) ++ drained = await cloud_verifier_tornado.wait_for_drain(drain_timeout) ++ if not drained: ++ logger.warning( ++ "Shutting down with %d attestation operation(s) still active after %.1fs", ++ cloud_verifier_tornado.get_active_operations(), ++ drain_timeout, + ) +- https_server.add_sockets(self._Server__tornado_https_sockets) # type: ignore +- self._Server__tornado_https_server = https_server # type: ignore # pylint: disable=attribute-defined-outside-init +- # pylint: enable=no-member + +- # Activate agents for PULL mode +- if self.operating_mode == "pull" and self._worker_agents: +- verifier_host = config.get("verifier", "ip") +- verifier_port = config.get("verifier", "port") +- logger.info("Activating %d agent(s) for PULL mode", len(self._worker_agents)) +- asyncio.ensure_future( +- cloud_verifier_tornado.activate_agents(self._worker_agents, verifier_host, int(verifier_port)) +- ) ++ # Shutdown webhook workers after draining so revocation ++ # notifications from in-flight attestations are delivered. ++ if "webhook" in revocation_notifier.get_notifiers(): ++ revocation_notifier.shutdown_webhook_workers() + +- # Wait forever (until event loop is stopped) +- tornado.ioloop.IOLoop.current().start() ++ await super()._graceful_shutdown() + + def _prepare_agents_on_startup(self) -> None: + """Prepare agents in database for verifier startup. +diff --git a/templates/2.6/agent.j2 b/templates/2.6/agent.j2 +new file mode 100644 +index 0000000..26d5b7c +--- /dev/null ++++ b/templates/2.6/agent.j2 +@@ -0,0 +1,313 @@ ++# Keylime agent configuration ++# The Python agent is deprecated and will be removed with the next major release (7.0.0)! ++# Please migrate to the Rust based agent: https://github.com/keylime/rust-keylime/ ++[agent] ++ ++# The configuration file version number ++version = "{{ agent.version }}" ++ ++# The enabled API versions ++# This sets which of the supported API versions to enable. ++# Only supported versions can be set, which are defined by ++# api::SUPPORTED_API_VERSIONS ++# A list of versions to enable can be provided (e.g. "2.1, 2.2") ++# The following keywords are also supported: ++# - "default": Enables all supported API versions ++# - "latest": Enables only the latest supported API version ++api_versions = "{{ agent.api_versions }}" ++ ++# The agent's UUID. ++# If you set this to "generate", Keylime will create a random UUID. ++# If you set this to "hash_ek", Keylime will set the UUID to the result ++# of 'SHA256(public EK in PEM format)'. ++# If you set this to "environment", Keylime will use the value of the ++# environment variable "KEYLIME_AGENT_UUID" as UUID. ++# If you set this to "dmidecode", Keylime will use the UUID from ++# 'dmidecode -s system-uuid'. ++# If you set this to "hostname", Keylime will use the full qualified domain ++# name of current host as the agent id. ++uuid = "{{ agent.uuid }}" ++ ++# The binding address and port for the agent server ++ip = "{{ agent.ip }}" ++port = {{ agent.port }} ++ ++# Address and port where the verifier and tenant can connect to reach the agent. ++# These keys are optional. ++contact_ip = "{{ agent.contact_ip }}" ++contact_port = {{ agent.contact_port }} ++ ++# Path to store agent persistent data ++agent_data_path = "{{ agent_data_path }}" ++ ++# Verifier Information (Push Model specific). ++# Verifier URL ++verifier_url = "{{ agent_verifier_url }}" ++ ++# Verifier TLS CA certificate (Push Model specific). ++# Used to verify the verifier's server certificate. ++# If set as "default", the "cv_ca/cacert.crt" value relative to keylime_dir is used. ++verifier_tls_ca_cert = "{{ agent.verifier_tls_ca_cert }}" ++ ++# Server identifier for certification keys ++certification_keys_server_identifier = "{{ agent_certification_keys_server_identifier }}" ++ ++# Evidence version for UEFI logs ++uefi_logs_evidence_version = "{{ agent_uefi_logs_evidence_version }}" ++ ++# The address and port of the registrar server which the agent communicates with ++registrar_ip = "{{ agent.registrar_ip }}" ++registrar_port = {{ agent.registrar_port }} ++ ++# The TLS port of the registrar server (Push Model specific). ++# Used when registrar_tls_enabled is set to true. ++registrar_tls_port = {{ agent.registrar_tls_port }} ++ ++# Enable TLS communication between agent and registrar (Push Model specific). ++# When enabled, the agent uses TLS (server verification only) with the registrar. ++registrar_tls_enabled = {{ agent.registrar_tls_enabled }} ++ ++# TLS CA certificate for verifying the registrar's server certificate (Push Model specific). ++# Only used when registrar_tls_enabled is true. ++# If set as "default", the "cv_ca/cacert.crt" value relative to keylime_dir is used. ++registrar_tls_ca_cert = "{{ agent.registrar_tls_ca_cert }}" ++ ++# The API versions to use when communicating with the registrar (Push Model specific). ++# Supports "default" (all supported), "latest", or a comma-separated list. ++registrar_api_versions = "{{ agent.registrar_api_versions }}" ++ ++# Enable mTLS communication between agent, verifier and tenant. ++# Details on why setting it to "False" is generally considered insecure can be found ++# on https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r ++enable_agent_mtls = {{ agent.enable_agent_mtls }} ++ ++# The keylime working directory. ++keylime_dir = "{{ agent.keylime_dir }}" ++ ++# Accept invalid TLS certificates (INSECURE - for testing only) ++# When enabled, the agent will accept self-signed or invalid certificates ++# This option is specific to the push attestation model. ++# This should ONLY be used for testing or development environments ++# Default: False (secure) ++tls_accept_invalid_certs = {{ agent.tls_accept_invalid_certs }} ++ ++# Accept invalid TLS hostnames (INSECURE - for testing only) ++# When enabled, the agent will accept certificates with mismatched hostnames ++# This option is specific to the push attestation model. ++# This should ONLY be used for testing or development environments ++# Default: False (secure) ++tls_accept_invalid_hostnames = {{ agent.tls_accept_invalid_hostnames }} ++ ++# The 'tls_dir' option defines the directory where the keys and certificates are ++# stored. ++# ++# If set as 'generate', automatically generate a CA, keys, and certificates for ++# the client and the server in the /var/lib/keylime/cv_ca directory, if not ++# present. ++# ++# The 'server_key', 'server_cert', and 'trusted_client_ca' options should all be ++# set with the 'default' keyword when 'generate' keyword is set for 'tls_dir'. ++# ++# If set as 'default', the '/var/lib/keylime/secure' directory is used, which ++# should contain the files indicated by the 'server_key', 'server_cert', ++# and 'trusted_client_ca' options. ++tls_dir = "{{ agent.tls_dir }}" ++ ++# The name of the file containing the Keylime agent TLS server private key. ++# This private key is used to serve the Keylime agent REST API ++# A new private key is generated in case it is not found. ++# If set as 'default', the 'server-private.pem' value is used. ++server_key = "{{ agent.server_key }}" ++ ++# Set the password used to decrypt the private key file. ++# This password will also be used to protect the generated private key used for ++# mTLS authentication ++# If left empty, the private key will not be encrypted. ++server_key_password = "{{ agent.server_key_password }}" ++ ++# The name of the file containing the payload encryption private key. ++# If set as "default", the "payload-private.pem" value is used. ++payload_key = "{{ agent.payload_key }}" ++ ++# Set the password used to encrypt the payload private key file. ++# If left empty, the private key will not be encrypted. ++payload_key_password = "{{ agent.payload_key_password }}" ++ ++# The name of the file containing the X509 certificate used as the Keylime agent ++# server TLS certificate. ++# This certificate must be self signed. ++server_cert = "{{ agent.server_cert }}" ++ ++# A list of trusted client CA certificates ++trusted_client_ca = "{{ agent.trusted_client_ca }}" ++ ++# The name of the file used to store the payload encryption key, derived from ++# the U and V parts. ++# This file is stored in the /var/lib/keylime/secure/ directory. ++enc_keyname = "{{ agent.enc_keyname }}" ++ ++# The name of the file used to store the optional decrypted payload. ++# This file is stored in the /var/lib/keylime/secure/ directory. ++dec_payload_file = "{{ agent.dec_payload_file }}" ++ ++# The size of the memory-backed tmpfs partition where Keylime stores keys and ++# the decrypted payload. ++# Use syntax that the 'mount' command would accept as a size parameter for tmpfs. ++# The default below sets it to 1 megabyte. ++secure_size = "{{ agent.secure_size }}" ++ ++# Use this option to set the TPM ownerpassword to something you want to use. ++# Set it to "generate" if you want Keylime to choose a random owner password ++# for you. ++tpm_ownerpassword = "{{ agent.tpm_ownerpassword }}" ++ ++# Whether to allow the agent to automatically extract a zip file in ++# the delivered payload after it has been decrypted, or not. Defaults to "True". ++# After decryption, the archive will be unzipped to a directory in /var/lib/keylime/secure. ++# Note: the limits on the size of the tmpfs partition set above with the 'secure_size' ++# option will affect this. ++extract_payload_zip = {{ agent.extract_payload_zip }} ++ ++# Whether to listen for revocation notifications from the verifier via ZeroMQ ++enable_revocation_notifications = {{ agent.enable_revocation_notifications }} ++ ++# The IP to listen for revocation notifications via ZeroMQ ++revocation_notification_ip = "{{ agent.revocation_notification_ip }}" ++ ++# The port to listen for revocation notifications via ZeroMQ ++revocation_notification_port = {{ agent.revocation_notification_port }} ++ ++# The path to the certificate to verify revocation messages received from the ++# verifier. The path is relative to /var/lib/keylime. ++# If set to "default", Keylime will use the file RevocationNotifier-cert.crt ++# from the unzipped contents provided by the tenant. ++revocation_cert = "{{ agent.revocation_cert }}" ++ ++# A comma-separated list of Python scripts to run upon receiving a revocation ++# message. Keylime will verify the signature first, then call these Python ++# scripts with the json revocation message passed as argument. The scripts must ++# be located in the directory set via 'revocation_actions_dir' ++# ++# Keylime will also get the list of revocation actions from the file ++# action_list in the unzipped payload content. ++revocation_actions = "{{ agent.revocation_actions }}" ++ ++# The path to the directory containing pre-installed revocation action scripts. ++revocation_actions_dir = "{{ agent.revocation_actions_dir }}" ++ ++# A script to execute after unzipping the tenant payload. This is like ++# cloud-init lite =) Keylime will run it with a /bin/sh environment and ++# with a working directory of /var/lib/keylime/secure/unzipped. ++payload_script = "{{ agent.payload_script }}" ++ ++# In case mTLS for the agent is disabled and the use of payloads is still ++# required, this option has to be set to "True" in order to allow the agent ++# to start. Details on why this configuration (mTLS disabled and payload enabled) ++# is generally considered insecure can be found on ++# https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r ++enable_insecure_payload = {{ agent.enable_insecure_payload }} ++ ++# Whether to allow running revocation actions sent as part of the payload. ++# Setting to false limits revocation actions to pre-installed ones. ++allow_payload_revocation_actions = {{ agent.allow_payload_revocation_actions }} ++ ++# Maximum number of retries for exponential backoff ++exponential_backoff_max_retries = {{ agent.exponential_backoff_max_retries }} ++# Initial delay in milliseconds for exponential backoff ++exponential_backoff_initial_delay = {{ agent.exponential_backoff_initial_delay }} ++# Maximum delay in milliseconds for exponential backoff ++exponential_backoff_max_delay = {{ agent.exponential_backoff_max_delay }} ++ ++# List of hash algorithms used for PCRs ++# Accepted values: sha512, sha384, sha256, sha1 ++tpm_hash_alg = "{{ agent.tpm_hash_alg }}" ++ ++# List of encryption algorithms to use with the TPM ++# Accepted values: ecc, rsa ++tpm_encryption_alg = "{{ agent.tpm_encryption_alg }}" ++ ++# List of signature algorithms to use ++# Accepted values: rsassa, rsapss, ecdsa, ecdaa, ecschnorr ++tpm_signing_alg = "{{ agent.tpm_signing_alg }}" ++ ++# If an EK is already present on the TPM (e.g., with "tpm2_createek") and ++# you require Keylime to use this EK, change "generate" to the actual EK ++# handle (e.g. "0x81000000"). The Keylime agent will then not attempt to ++# create a new EK upon startup, and neither will it flush the EK upon exit. ++ek_handle = "{{ agent.ek_handle }}" ++ ++# Enable IDevID and IAK usage ++enable_iak_idevid = {{ agent.enable_iak_idevid }} ++ ++# Select IDevID and IAK templates or algorithms for regenerating the keys. ++# By default the template will be detected automatically from the certificates. This will happen if iak_idevid_template is left empty or set as "default" or "detect". ++# Choosing a template will override the name and asymmetric algorithm choices. To use these choices, set iak_idevid_template to "manual" ++# Templates are specified in the TCG document found here, section 7.3.4: ++# https://trustedcomputinggroup.org/wp-content/uploads/TPM-2p0-Keys-for-Device-Identity-and-Attestation_v1_r12_pub10082021.pdf ++# ++# Accepted values: ++# iak_idevid_template: default, detect, H-1, H-2, H-3, H-4, H-5, manual ++# iak_idevid_asymmetric_alg: rsa, ecc ++# iak_idevid_name_alg: sha256, sm3_256, sha384, sha512 ++iak_idevid_template = "{{ agent.iak_idevid_template }}" ++# In order for these values to be used, set the iak_idevid_template option to manual ++iak_idevid_asymmetric_alg = "{{ agent.iak_idevid_asymmetric_alg }}" ++iak_idevid_name_alg = "{{ agent.iak_idevid_name_alg }}" ++ ++# Alternatively if the keys are persisted, provide the handles for their location below, and optionally their passwords. ++# If handles are provided, they will take priority over templates/algorithms selected above. ++# To use a hex password, use the prefix "hex:" at the start of the password. ++idevid_password = "{{ agent.idevid_password }}" ++idevid_handle = "{{ agent.idevid_handle }}" ++ ++iak_password = "{{ agent.iak_password }}" ++iak_handle = "{{ agent.iak_handle }}" ++ ++# The name of the file containing the X509 IAK certificate. ++# If set as "default", the "iak-cert.crt" value is used ++# If a relative path is set, it will be considered relative from the keylime_dir. ++# If an absolute path is set, it is used without change. ++# ++# To override iak_cert, set KEYLIME_AGENT_IAK_CERT environment variable. ++iak_cert = "{{ agent.iak_cert }}" ++ ++# The name of the file containing the X509 IDevID certificate. ++# If set as "default", the "idevid-cert.crt" value is used ++# If a relative path is set, it will be considered relative from the keylime_dir. ++# If an absolute path is set, it is used without change. ++# ++# To override idevid_cert, set KEYLIME_AGENT_IDEVID_CERT environment variable. ++idevid_cert = "{{ agent.idevid_cert }}" ++ ++# The user account to switch to to drop privileges when started as root ++# If left empty, the agent will keep running with high privileges. ++# The user and group specified here must allow the user to access the ++# WORK_DIR (typically /var/lib/keylime) and /dev/tpmrm0. Therefore, ++# suggested value for the run_as parameter is keylime:tss. ++# The following commands should be used to set ownership before running the ++# agent: ++# chown keylime /var/lib/keylime ++# ++# If tpmdata.yml already exists: ++# chown keylime /var/lib/keylime/tpmdata.yml ++# ++# If cv_ca directory exists: ++# chown keylime /var/lib/keylime/cv_ca ++# chown keylime /var/lib/keylime/cv_ca/cacert.crt ++run_as = "{{ agent.run_as }}" ++ ++# Path from where the agent will read the IMA measurement log. ++# ++# If set as "default", Keylime will use the default path: ++# The default path is /sys/kernel/security/ima/ascii_runtime_measurements ++# If set as a relative path, it will be considered from the root path "/". ++# If set as an absolute path, it will use it without changes ++ima_ml_path = "{{ agent.ima_ml_path }}" ++ ++# Path from where the agent will read the measured boot event log. ++# ++# If set as "default", Keylime will use the default path: ++# The default path is /sys/kernel/security/tpm0/binary_bios_measurements ++# If set as a relative path, it will be considered from the root path "/". ++# If set as an absolute path, it will use it without changes ++measuredboot_ml_path = "{{ agent.measuredboot_ml_path }}" +diff --git a/templates/2.6/ca.j2 b/templates/2.6/ca.j2 +new file mode 100644 +index 0000000..03ebe3b +--- /dev/null ++++ b/templates/2.6/ca.j2 +@@ -0,0 +1,39 @@ ++# Keylime CA configuration ++[ca] ++ ++# The keystore password ++# This password is used to protect the generated CA private key. ++password = {{ ca.password }} ++ ++# The configuration file version number ++version = {{ ca.version }} ++ ++# CountryName argument (C) of the Issuer when generating certificates ++cert_country = {{ ca.cert_country }} ++ ++# CommonName argument (CN) of the Issuer when generating certificates ++cert_ca_name = {{ ca.cert_ca_name }} ++ ++# StateOrProvinceName argument (S) of the Issuer when generating certificates ++cert_state = {{ ca.cert_state }} ++ ++# Locality argument (L) of the Issuer when generating certificates ++cert_locality = {{ ca.cert_locality }} ++ ++# Organization argument (O) of the Issuer when generating certificates ++cert_organization = {{ ca.cert_organization }} ++ ++# OrganizationalUnit argument (OU) of the Issuer when generating certificates ++cert_org_unit = {{ ca.cert_org_unit }} ++ ++# CA certificate validity time in days ++cert_ca_lifetime = {{ ca.cert_ca_lifetime }} ++ ++# Default generated certificate validity time in days ++cert_lifetime = {{ ca.cert_lifetime }} ++ ++# Key length in bits ++cert_bits = {{ ca.cert_bits }} ++ ++# Certificate Revocation List (CRL) distribution address (URL) ++cert_crl_dist = {{ ca.cert_crl_dist }} +diff --git a/templates/2.6/logging.j2 b/templates/2.6/logging.j2 +new file mode 100644 +index 0000000..9bd8deb +--- /dev/null ++++ b/templates/2.6/logging.j2 +@@ -0,0 +1,33 @@ ++# Keylime logging configuration ++ ++# The configuration file version number ++[logging] ++version = {{ logging.version }} ++ ++[loggers] ++keys = {{ loggers.get('keys') }} ++ ++[handlers] ++keys = {{ handlers.get('keys') }} ++ ++[formatters] ++keys = {{ formatters.get('keys') }} ++ ++[formatter_formatter] ++format = {{ formatter_formatter.format }} ++datefmt = {{ formatter_formatter.datefmt }} ++ ++[logger_root] ++level = {{ logger_root.level }} ++handlers = {{ logger_root.handlers }} ++ ++[handler_consoleHandler] ++class = {{ handler_consoleHandler.class }} ++level = {{ handler_consoleHandler.level }} ++formatter = {{ handler_consoleHandler.formatter }} ++args = {{ handler_consoleHandler.args }} ++ ++[logger_keylime] ++level = {{ logger_keylime.level }} ++qualname = {{ logger_keylime.qualname }} ++handlers = {{ logger_keylime.handlers }} +diff --git a/templates/2.6/mapping.json b/templates/2.6/mapping.json +new file mode 100644 +index 0000000..ce10d12 +--- /dev/null ++++ b/templates/2.6/mapping.json +@@ -0,0 +1,11 @@ ++{ ++ "version": "2.6", ++ "type": "update", ++ "components": { ++ "verifier": { ++ "add": { ++ "shutdown_drain_timeout": "10" ++ } ++ } ++ } ++} +diff --git a/templates/2.6/registrar.j2 b/templates/2.6/registrar.j2 +new file mode 100644 +index 0000000..06f026e +--- /dev/null ++++ b/templates/2.6/registrar.j2 +@@ -0,0 +1,168 @@ ++# Keylime registrar configuration ++[registrar] ++ ++# The configuration file version number ++version = {{ registrar.version }} ++ ++# The binding address and port for the registrar server ++ip = "{{ registrar.ip }}" ++port = {{ registrar.port }} ++tls_port = {{ registrar.tls_port }} ++ ++# The 'tls_dir' option defines the directory where the keys and certificates are ++# stored. ++# ++# If set as 'generate', automatically generate a CA, keys, and certificates for ++# the registrar server in the /var/lib/keylime/reg_ca directory, if not present. ++# ++# The 'server_key', 'server_cert', and 'trusted_client_ca' options should all be ++# set with the 'default' keyword when 'generate' keyword is set for 'tls_dir'. ++# ++# If set as 'default', share the files with the verifier by using the ++# '/var/lib/keylime/cv_ca' directory, which should contain the files indicated by ++# the 'server_key', 'server_cert', and 'trusted_client_ca' options. ++tls_dir = {{ registrar.tls_dir }} ++ ++# The name of the file containing the Keylime registrar server private key. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This private key is used to serve the Keylime registrar REST API ++# ++# If set as 'default', the 'server-private.pem' value is used. ++server_key = {{ registrar.server_key }} ++ ++# Set the password used to decrypt the private key file. ++# If 'tls_dir = generate', this password will also be used to protect the ++# generated server private key. ++# If left empty, the private key will not be encrypted. ++server_key_password = {{ registrar.server_key_password }} ++ ++# The name of the file containing the Keylime registrar server certificate. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# ++# If set as 'default', the 'server-cert.crt' value is used. ++server_cert = {{ registrar.server_cert }} ++ ++# Additional Subject Alternative Names (SANs) to include in auto-generated ++# server certificates when 'tls_dir = generate'. ++# ++# This is a comma-separated list of hostnames and/or IP addresses that will be ++# added to the server certificate's SAN extension. This allows clients to verify ++# the server's hostname when connecting. ++# ++# By default, the certificate will automatically include: ++# - localhost, 127.0.0.1, and ::1 ++# - The system's hostname and FQDN ++# - The IP address from the 'ip' option (if not 0.0.0.0 or ::) ++# ++# Use this option to add additional names, such as: ++# - External DNS names (e.g., registrar.example.com) ++# - Load balancer addresses ++# - Additional IP addresses ++# ++# Example: cert_subject_alternative_names = registrar.example.com,10.0.0.5,registrar-internal ++# Leave empty to use only the automatically detected names. ++cert_subject_alternative_names = {{ registrar.cert_subject_alternative_names }} ++ ++# The list of trusted client CA certificates. ++# The files in the list should be stored in the directory set in the 'tls_dir' ++# option. ++# ++# If set as 'default', the value is set as '[cacert.crt]' ++trusted_client_ca = {{ registrar.trusted_client_ca }} ++ ++# Authorization provider to use for access control decisions. ++# ++# Available providers: ++# simple - Default provider implementing 2-category access control: ++# - PUBLIC: No authentication (agent registration, activation, version) ++# - ADMIN: mTLS client certificate (listing, viewing, deleting registrations) ++# ++# The 'simple' provider enforces strict separation: ++# - Agent registration endpoints are public (no authentication required) ++# - Management endpoints require mTLS client certificate authentication ++# ++# SECURITY NOTE: Never distribute client certificates signed by the registrar's ++# trusted CA to agents. Agents should register via public endpoints only. ++# ++# Certificate requirements: ++# - Agents: No client certs needed (public registration endpoints) ++# - Admins: Client certs signed by trusted CA with Client Authentication EKU ++authorization_provider = {{ registrar.authorization_provider }} ++ ++# Database URL Configuration ++# See this document https://keylime.readthedocs.io/en/latest/installation.html#database-support ++# for instructions on using different database configurations. ++# ++# An example of database_url value for using sqlite: ++# sqlite:////var/lib/keylime/reg_data.sqlite ++# An example of database_url value for using mysql: ++# mysql+pymysql://keylime:keylime@keylime_db:[port]/registrar?charset=utf8 ++# ++# If set as 'sqlite' keyword, will use the configuration set by the file located ++# at "/var/lib/keylime/reg_data.sqlite". ++database_url = {{ registrar.database_url }} ++ ++# Limits for DB connection pool size in sqlalchemy ++# (https://docs.sqlalchemy.org/en/14/core/pooling.html#api-documentation-available-pool-implementations) ++database_pool_sz_ovfl = {{ registrar.database_pool_sz_ovfl }} ++ ++# Whether to automatically update the DB schema using alembic ++auto_migrate_db = {{ registrar.auto_migrate_db }} ++ ++# Durable Attestation is currently marked as an experimental feature ++# In order to enable Durable Attestation, an "adapter" for a Persistent data Store ++# (time-series like database) needs to be specified. Some example adapters can be ++# found under "da/examples" so, for instance ++# "durable_attestation_import = keylime.da.examples.redis.py" ++# could be used to interact with a Redis (Persistent data Store) ++durable_attestation_import = {{ registrar.durable_attestation_import }} ++ ++# If an adapter for Durable Attestation was specified, then the URL for a Persistent Store ++# needs to be specified here. A second optional URL could be specified, for a ++# Rekor Transparency Log. A third additional URL could be specified, pointing to a ++# Time Stamp Authority (TSA), compatible with RFC3161. Additionally, one might need to ++# specify a path containing certificates required by the stores or TSA. Continuing with ++# the above example, the following values could be assigned to the parameters: ++# "persistent_store_url=redis://127.0.0.1:6379?db=10&password=/root/redis.auth&prefix=myda" ++# "transparency_log_url=http://127.0.0.1:3000" ++# "time_stamp_authority_url=http://127.0.0.1:2020" ++# "time_stamp_authority_certs_path=~/mycerts/tsa_cert1.pem" ++persistent_store_url = {{ registrar.persistent_store_url }} ++transparency_log_url = {{ registrar.transparency_log_url }} ++time_stamp_authority_url = {{ registrar.time_stamp_authority_url }} ++time_stamp_authority_certs_path = {{ registrar.time_stamp_authority_certs_path }} ++ ++# If Durable Attestation was enabled, which requires a Persistent Store URL ++# to be specified, the two following parameters control the format and encoding ++# of the stored attestation artifacts (defaults "json" for format and "" for encoding) ++persistent_store_format = {{ registrar.persistent_store_format }} ++persistent_store_encoding = {{ registrar.persistent_store_encoding }} ++ ++# If Durable Attestation was enabled and a Transparency Log URL was specified, ++# the digest algorithm for signatures is controlled by this parameter (default "sha256") ++transparency_log_sign_algo = {{ registrar.transparency_log_sign_algo }} ++ ++# If Durable Attestation was enabled and a Transparency Log URL was specified, ++# a keylime administrator can specify some agent attributes (including attestation ++# artifacts, such as quotes and logs) to be signed by the registrar. The use of "all" ++# will result in the whole "package" (agent + artifacts) being signed and leaving it empty ++# will mean no signing should be done. ++signed_attributes = {{ registrar.signed_attributes }} ++ ++# What TPM-based identity is allowed to be used to register agents. ++# The options "default" and "iak_idevid" will only allow registration with IAK and IDevID if python cryptography is version 38.0.0 or higher. ++# The following options are accepted: ++# "default": either an EK or IAK and IDevID may be used. In the case that cryptography version is <38.0.0 only EK will be used ++# "ek_cert_or_iak_idevid": this is equivalent to default ++# "ek_cert": only allow agents to use an EK to register ++# "iak_idevid": only allow agents with an IAK and IDevID to register ++tpm_identity = {{ registrar.tpm_identity }} ++ ++# The below option controls what Keylime does when it encounters a certificate which is not parse-able when strict ++# ASN.1 Distinguished Encoding Rules (DER) are enforced. The default behaviour ("warn") is to log a warning but still ++# accept the certificate, so long as it can be interpreted by a fallback parser. ++# The following values are accepted: ++# "warn": log a warning and re-encode the certificate with the more-forgiving fallback parser (the default) ++# "reject": log an error and refuse to accept the certificate ++# "ignore": silently re-encode the certificate without logging a message ++malformed_cert_action = {{ registrar.malformed_cert_action }} +\ No newline at end of file +diff --git a/templates/2.6/tenant.j2 b/templates/2.6/tenant.j2 +new file mode 100644 +index 0000000..79934bf +--- /dev/null ++++ b/templates/2.6/tenant.j2 +@@ -0,0 +1,130 @@ ++# Keylime tenant configuration ++[tenant] ++ ++# The configuration file version number ++version = {{ tenant.version }} ++ ++# The verifier IP address and port ++verifier_ip = {{ tenant.verifier_ip }} ++verifier_port = {{ tenant.verifier_port }} ++ ++# The registrar IP address and port ++registrar_ip = {{ tenant.registrar_ip }} ++registrar_port = {{ tenant.registrar_port }} ++ ++# The 'tls_dir' option defines the directory where the keys and certificates are ++# stored. ++# ++# If set as 'default', share the files with the verifier by using the ++# '/var/lib/keylime/cv_ca', which should contain the files indicated by the ++# 'client_key', 'client_cert', and 'trusted_server_ca' options. ++tls_dir = {{ tenant.tls_dir }} ++ ++# Enable mTLS communication between agent, verifier and tenant. ++# Details on why setting it to "False" is generally considered insecure can be found ++# on https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r ++enable_agent_mtls = {{ tenant.enable_agent_mtls }} ++ ++# The name of the file containing the Keylime tenant client private key. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This private key is used by the Keylime tenant to connect to the other ++# services using TLS. ++# ++# If set as 'default', the 'client-private.pem' value is used. ++client_key = {{ tenant.client_key }} ++ ++# Set the password used to encrypt the private key file. ++# If client_key is set as 'default', should match the password set in the ++# 'client_key_password' option in the verifier configuration file ++client_key_password = {{ tenant.client_key_password }} ++ ++# The name of the file containing the Keylime tenant client certificate. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This certificate is used by the Keylime tenant to connect to the other ++# services using TLS. ++# ++# If set as 'default', the 'client-cert.crt' value is used. ++client_cert = {{ tenant.client_cert }} ++ ++# The list of trusted server CA certificates. ++# The files in the list should be stored in the directory set in the 'tls_dir' ++# option. ++# ++# If set as 'default', the value is set as '[cacert.crt]' ++trusted_server_ca = {{ tenant.trusted_server_ca }} ++ ++# Directory containing the EK CA certificates. ++# The EK certificate provided by the agent will be validated against the CAs ++# located in this directory. ++tpm_cert_store = {{ tenant.tpm_cert_store }} ++ ++# Maximum size of the payload in bytes. The value should match the 'secure_size' ++# option in the agent configuration ++max_payload_size = {{ tenant.max_payload_size }} ++ ++# List of hash algorithms used for PCRs ++# Accepted values: sha512, sha384, sha256, sha1 ++accept_tpm_hash_algs = {{ tenant.accept_tpm_hash_algs }} ++ ++# List of encryption algorithms to use with the TPM ++# Accepted values: ecc, rsa ++accept_tpm_encryption_algs = {{ tenant.accept_tpm_encryption_algs }} ++ ++# List of signature algorithms to use ++# Accepted values: rsassa, rsapss, ecdsa, ecdaa, ecschnorr ++accept_tpm_signing_algs = {{ tenant.accept_tpm_signing_algs }} ++ ++# Whether or not to use an exponential backoff algorithm for retries. ++exponential_backoff = {{ tenant.exponential_backoff }} ++ ++# Either how long to wait between failed attempts to communicate with the TPM ++# in seconds, or the base for the exponential backoff algorithm if enabled via ++# "exponential_backoff" option. ++# Floating point values are accepted. ++retry_interval = {{ tenant.retry_interval }} ++ ++# Integer number of retries to communicate with the TPM before giving up. ++max_retries = {{ tenant.max_retries }} ++ ++# Request timeout in seconds. ++request_timeout = {{ tenant.request_timeout }} ++ ++# Tell the tenant whether to require an EK certificate from the TPM. ++# If set to False the tenant will ignore EK certificates entirely. ++# ++# WARNING: SETTING THIS OPTION TO FALSE IS VERY DANGEROUS!!! ++# ++# If you disable this check, then you may not be talking to a real TPM. ++# All the security guarantees of Keylime rely upon the security of the EK ++# and the assumption that you are talking to a spec-compliant and honest TPM. ++ ++# Some physical TPMs do not have EK certificates, so you may need to set ++# this to "False" for some deployments. If you do set it to "False", you ++# MUST use the 'ek_check_script' option below to specify a script that will ++# check the provided EK against an allowlist for the environment that has ++# been collected in a trustworthy way. For example, the cloud provider ++# might provide a signed list of EK public key hashes. Then you could write ++# an ek_check_script that checks the signature of the allowlist and then ++# compares the hash of the given EK with the allowlist. ++require_ek_cert = {{ tenant.require_ek_cert }} ++ ++# Optional script to execute to check the EK and/or EK certificate against a ++# allowlist or any other additional EK processing you want to do. Runs in ++# /var/lib/keylime. You can also specify an absolute path to the script. ++# Script should return 0 if the EK or EK certificate are valid. Any other ++# return value will invalidate the tenant quote check and prevent ++# bootstrapping a key. ++# ++# The various keys are passed to the script via environment variables: ++# EK - contains a PEM encoded version of the public EK ++# EK_CERT - contains a DER encoded EK certificate if one is available. ++# PROVKEYS - contains a json document containing EK, EKcert, and AIK from the ++# provider. EK and AIK are in PEM format. The EKcert is in base64 encoded ++# DER format. ++# ++# Set to blank to disable this check. See warning above if require_ek_cert ++# is "False". ++ek_check_script = {{ tenant.ek_check_script }} ++ ++# Path to file containing the measured boot reference state ++mb_refstate = {{ tenant.mb_refstate }} +diff --git a/templates/2.6/verifier.j2 b/templates/2.6/verifier.j2 +new file mode 100644 +index 0000000..43c8e54 +--- /dev/null ++++ b/templates/2.6/verifier.j2 +@@ -0,0 +1,350 @@ ++# Keylime verifier configuration ++[verifier] ++ ++# The configuration file version number ++version = {{ verifier.version }} ++ ++# Unique identifier for each verifier instance. ++uuid = {{ verifier.uuid }} ++ ++# The binding address and port for the verifier server ++ip = "{{ verifier.ip }}" ++port = {{ verifier.port }} ++ ++# The address and port of registrar server that the verifier communicates with ++registrar_ip = {{ verifier.registrar_ip }} ++registrar_port = {{ verifier.registrar_port }} ++ ++# Enable mTLS communication between agent, verifier and tenant. ++# Details on why setting it to "False" is generally considered insecure can be found ++# on https://github.com/keylime/keylime/security/advisories/GHSA-2m39-75g9-ff5r ++enable_agent_mtls = {{ verifier.enable_agent_mtls }} ++ ++# The 'tls_dir' option defines the directory where the keys and certificates are ++# stored. ++# ++# If set as 'generate', automatically generate a CA, keys, and certificates for ++# the client and the server in the /var/lib/keylime/cv_ca directory, if not ++# present. ++# ++# The 'server_key', 'server_cert', 'client_key', 'client_cert', ++# 'trusted_client_ca', and 'trusted_server_ca' options should all be set with ++# the 'default' keyword when 'generate' keyword is set for 'tls_dir'. ++# ++# If set as 'default', the '/var/lib/keylime/cv_ca' directory is used, which ++# should contain the files indicated by the 'server_key', 'server_cert', ++# 'client_key', 'client_cert', 'trusted_client_ca', and 'trusted_server_ca' ++# options. ++tls_dir = {{ verifier.tls_dir }} ++ ++# The name of the file containing the Keylime verifier server private key. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This private key is used to serve the Keylime verifier REST API ++# ++# If set as 'default', the 'server-private.pem' value is used. ++server_key = {{ verifier.server_key }} ++ ++# Set the password used to decrypt the server private key file. ++# If 'tls_dir = generate', this password will also be used to protect the ++# generated server private key. ++# If left empty, the private key will not be encrypted. ++server_key_password = {{ verifier.server_key_password }} ++ ++# The name of the file containing the Keylime verifier server certificate. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# ++# If set as 'default', the 'server-cert.crt' value is used. ++server_cert = {{ verifier.server_cert }} ++ ++# Additional Subject Alternative Names (SANs) to include in auto-generated ++# server certificates when 'tls_dir = generate'. ++# ++# This is a comma-separated list of hostnames and/or IP addresses that will be ++# added to the server certificate's SAN extension. This allows clients to verify ++# the server's hostname when connecting. ++# ++# By default, the certificate will automatically include: ++# - localhost, 127.0.0.1, and ::1 ++# - The system's hostname and FQDN ++# - The IP address from the 'ip' option (if not 0.0.0.0 or ::) ++# ++# Use this option to add additional names, such as: ++# - External DNS names (e.g., verifier.example.com) ++# - Load balancer addresses ++# - Additional IP addresses ++# ++# Example: cert_subject_alternative_names = verifier.example.com,10.0.0.5,verifier-internal ++# Leave empty to use only the automatically detected names. ++cert_subject_alternative_names = {{ verifier.cert_subject_alternative_names }} ++ ++# The list of trusted client CA certificates. ++# The files in the list should be stored in the directory set in the 'tls_dir' ++# option. ++# ++# If set as 'default', the value is set as '[cacert.crt]' ++trusted_client_ca = {{ verifier.trusted_client_ca }} ++ ++# The name of the file containing the Keylime verifier client private key. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This private key is used by the Keylime verifier to connect to the other ++# services using TLS. ++# ++# If set as 'default', the 'client-private.pem' value is used. ++client_key = {{ verifier.client_key }} ++ ++# Set the password used to decrypt the client private key file. ++# If 'tls_dir = generate', this password will also be used to protect the ++# generated client private key. ++# If left empty, the private key will not be encrypted. ++client_key_password = {{ verifier.client_key_password }} ++ ++# The name of the file containing the Keylime verifier client certificate. ++# The file should be stored in the directory set in the 'tls_dir' option. ++# This certificate is used by the Keylime verifier to connect to the other ++# services using TLS. ++# ++# If set as 'default', the 'client-cert.crt' value is used. ++client_cert = {{ verifier.client_cert }} ++ ++# The list of trusted server CA certificates. ++# The files in the list should be stored in the directory set in the 'tls_dir' ++# option. ++# ++# If set as 'default', the value is set as '[cacert.crt]' ++trusted_server_ca = {{ verifier.trusted_server_ca }} ++ ++# Authorization provider to use for access control decisions. ++# ++# Available providers: ++# simple - Default provider implementing 4-category access control: ++# - PUBLIC: No authentication (version info, evidence verification) ++# - AGENT_ONLY: PoP bearer token + resource ownership (attestations) ++# - AGENT_OR_ADMIN: PoP token or mTLS certificate (read own agent) ++# - ADMIN: mTLS client certificate (all management operations) ++# ++# The 'simple' provider enforces strict separation between agent and admin ++# authentication methods: ++# - Agents authenticate via PoP (Proof-of-Possession) bearer tokens only ++# - Admins authenticate via mTLS client certificates only ++# - If an Authorization header is present, mTLS is never used (prevents ++# privilege escalation) ++# ++# SECURITY NOTE: Never distribute client certificates signed by the verifier's ++# trusted CA to agents. Agents should only have PoP tokens for authentication. ++# ++# Certificate requirements: ++# - Pull mode agents: Self-signed server certs are acceptable. If CA-issued, ++# must have Server Authentication EKU only. ++# - Push mode agents: No client certs from trusted CA. Use PoP tokens only. ++# - Admins: Client certs signed by trusted CA with Client Authentication EKU. ++authorization_provider = {{ verifier.authorization_provider }} ++ ++# Database URL Configuration ++# See this document https://keylime.readthedocs.io/en/latest/installation.html#database-support ++# for instructions on using different database configurations. ++# ++# An example of database_url value for using sqlite: ++# sqlite:////var/lib/keylime/cv_data.sqlite ++# An example of database_url value for using mysql: ++# mysql+pymysql://keylime:keylime@keylime_db:[port]/verifier?charset=utf8 ++# ++# If set as 'sqlite' keyword, will use the configuration set by the file located ++# at "/var/lib/keylime/cv_data.sqlite". ++database_url = {{ verifier.database_url }} ++ ++# Limits for DB connection pool size in sqlalchemy ++# (https://docs.sqlalchemy.org/en/14/core/pooling.html#api-documentation-available-pool-implementations) ++database_pool_sz_ovfl = {{ verifier.database_pool_sz_ovfl }} ++ ++# Whether to automatically update the DB schema using alembic ++auto_migrate_db = {{ verifier.auto_migrate_db }} ++ ++# The number of worker processes to use for the cloud verifier. ++# Set to "0" to create one worker per processor. ++num_workers = {{ verifier.num_workers }} ++ ++# Whether or not to use an exponential backoff algorithm for retries. ++exponential_backoff = {{ verifier.exponential_backoff }} ++ ++# Either how long to wait between failed attempts to connect to a cloud agent ++# in seconds, or the base for the exponential backoff algorithm. ++# Floating point values accepted here. ++retry_interval = {{ verifier.retry_interval }} ++ ++# Number of retries to connect to an agent before giving up. Must be an integer. ++max_retries = {{ verifier.max_retries }} ++ ++# Time between integrity measurement checks, in seconds. If set to "0", checks ++# will be done as fast as possible. Floating point values accepted here. ++quote_interval = {{ verifier.quote_interval }} ++ ++# The verifier limits the size of upload payloads (allowlists) which defaults to ++# 100MB (104857600 bytes). This setting can be raised (or lowered) based on the ++# size of the actual payloads ++max_upload_size = {{ verifier.max_upload_size }} ++ ++# Timeout in seconds for HTTP requests ++request_timeout = {{ verifier.request_timeout }} ++ ++# The name of the boot attestation policy to use in comparing a measured boot event log ++# with a measured boot reference state. ++# A policy is a Python object that `isinstance` of `keylime.elchecking.policies.Policy` ++# and was registered by calling `keylime.elchecking.policies.register`. ++# The keylime agent extracts the measured boot event log. ++# The verifier client specifies the measured boot reference state to use; ++# this is specified independently for each agent. ++# Depending on the policy, the same reference state may be usable with multiple agents. ++# The `accept-all` policy ignores the reference state and approves every log. ++measured_boot_policy_name = {{ verifier.measured_boot_policy_name }} ++ ++# This is a list of Python modules to dynamically load, for example to register ++# additional boot attestation policies. ++# Empty strings in the list are ignored. ++# A module here may be relative, in which case it is interpreted ++# relative to the keylime.elchecking package. ++# The default value for this config item is the empty list. ++measured_boot_imports = {{ verifier.measured_boot_imports }} ++ ++# This is used to manage the number of times measured boot attestation ++# is done. In other words, it controls the number of times the call ++# to the measured boot policy engine is made to evaluate the boot log ++# against the policy specified. ++# Here are its possible values and number of bootlog evaluations. ++# once (default) : Bootlog evaluation will be done for only one time. ++# always : Bootlog evaluation will always be done (i.e. for unlimited times). ++measured_boot_evaluate = {{ verifier.measured_boot_evaluate }} ++ ++# Severity labels for revocation events strictly ordered from least severity to ++# highest severity. ++severity_labels = {{ verifier.severity_labels }} ++ ++# Severity policy that matches different event_ids to the severity label. ++# The rules are evaluated from the beginning of the list and the first match is ++# used. The event_id can also be a regex. Default policy assigns the highest ++# severity to all events. ++severity_policy = {{ verifier.severity_policy }} ++ ++# If files are already opened when IMA tries to measure them this causes ++# a time of measure, time of use (ToMToU) error entry. ++# By default we ignore those entries and only print a warning. ++# Set to False to treat ToMToU entries as errors. ++ignore_tomtou_errors = {{ verifier.ignore_tomtou_errors }} ++ ++# Durable Attestation is currently marked as an experimental feature ++# In order to enable Durable Attestation, an "adapter" for a Persistent data Store ++# (time-series like database) needs to be specified. Some example adapters can be ++# found under "da/examples" so, for instance ++# "durable_attestation_import = keylime.da.examples.redis.py" ++# could be used to interact with a Redis (Persistent data Store) ++durable_attestation_import = {{ verifier.durable_attestation_import }} ++ ++# If an adapter for Durable Attestation was specified, then the URL for a Persistent Store ++# needs to be specified here. A second optional URL could be specified, for a ++# Rekor Transparency Log. A third additional URL could be specified, pointing to a ++# Time Stamp Authority (TSA), compatible with RFC3161. Additionally, one might need to ++# specify a path containing certificates required by the stores or TSA. Continuing with ++# the above example, the following values could be assigned to the parameters: ++# "persistent_store_url=redis://127.0.0.1:6379?db=10&password=/root/redis.auth&prefix=myda" ++# "transparency_log_url=http://127.0.0.1:3000" ++# "time_stamp_authority_url=http://127.0.0.1:2020" ++# "time_stamp_authority_certs_path=~/mycerts/tsa_cert1.pem" ++persistent_store_url = {{ verifier.persistent_store_url }} ++transparency_log_url = {{ verifier.transparency_log_url }} ++time_stamp_authority_url = {{ verifier.time_stamp_authority_url }} ++time_stamp_authority_certs_path = {{ verifier.time_stamp_authority_certs_path }} ++ ++# If Durable Attestation was enabled, which requires a Persistent Store URL ++# to be specified, the two following parameters control the format and encoding ++# of the stored attestation artifacts (defaults "json" for format and "" for encoding) ++persistent_store_format = {{ verifier.persistent_store_format }} ++persistent_store_encoding = {{ verifier.persistent_store_encoding }} ++ ++# If Durable Attestation was enabled and a Transparency Log URL was specified, ++# the digest algorithm for signatures is controlled by this parameter (default "sha256") ++transparency_log_sign_algo = {{ verifier.transparency_log_sign_algo }} ++ ++# If Durable Attestation was enabled and a Transparency Log URL was specified, ++# a keylime administrator can specify some agent attributes (including attestation ++# artifacts, such as quotes and logs) to be signed by the verifier. The use of "all" ++# will result in the whole "package" (agent + artifacts) being signed and leaving it empty ++# will mean no signing should be done. ++signed_attributes = {{ verifier.signed_attributes }} ++ ++# Require that allowlists are signed with a key passed via the tenant tool ++require_allow_list_signatures = {{ verifier.require_allow_list_signatures }} ++ ++# Attestation mode. Can be 'pull' (traditional) or 'push' (agent-driven). ++# Default: pull ++mode = {{ verifier.mode }} ++# ++# Lifetime in seconds for challenges sent to agents in push mode. ++challenge_lifetime = {{ verifier.challenge_lifetime }} ++# ++# Timeout in seconds for a single evidence verification task (0 = auto). ++verification_timeout = {{ verifier.verification_timeout }} ++# ++# Rate limiting for session creation endpoint (POST /sessions) in push mode. ++# These settings prevent denial-of-service attacks where an attacker floods the verifier ++# with session creation requests. Only applies when mode = 'push'. ++# ++# Maximum number of session creation requests per IP address within the time window. ++# Allows for multiple agents from same IP (testing, NAT scenarios). ++# Default: 50 ++session_create_rate_limit_per_ip = {{ verifier.session_create_rate_limit_per_ip }} ++# ++# Time window in seconds for IP-based rate limiting. ++# Default: 60 ++session_create_rate_limit_window_ip = {{ verifier.session_create_rate_limit_window_ip }} ++# ++# Maximum number of session creation requests per agent_id within the time window. ++# Allows for agent retries (agent default is 3 retries, this allows ~5 auth attempts). ++# Default: 15 ++session_create_rate_limit_per_agent = {{ verifier.session_create_rate_limit_per_agent }} ++# ++# Time window in seconds for agent-based rate limiting. ++# Default: 60 ++session_create_rate_limit_window_agent = {{ verifier.session_create_rate_limit_window_agent }} ++# ++# Lifetime in seconds for authentication session tokens. ++# After this time, agents must re-authenticate to continue submitting attestations. ++# Default: 180 (3 minutes) ++session_lifetime = {{ verifier.session_lifetime }} ++# ++# Whether to automatically extend the session token expiry when an agent ++# submits an attestation. When enabled, active agents won't need to re-authenticate ++# as long as they continue attesting within the session_lifetime window. ++# Default: true ++extend_token_on_attestation = {{ verifier.extend_token_on_attestation }} ++ ++# Maximum time in seconds to wait for in-flight attestation operations to ++# complete during shutdown. The verifier will wait up to this long for active ++# database writes and state transitions to finish before stopping the event ++# loop. Increasing this value reduces the risk of inconsistent agent state ++# after an unclean shutdown, at the cost of a slower shutdown. ++# Floating point values accepted here. ++shutdown_drain_timeout = {{ verifier.shutdown_drain_timeout }} ++ ++[revocations] ++ ++# List of revocation notification methods to enable. ++# ++# Available methods are: ++# ++# "agent": Deliver notification directly to the agent via the REST ++# protocol. ++# ++# "zeromq": Enable the ZeroMQ based revocation notification method; ++# zmq_ip and zmq_port options must be set. Currently this only works if you are ++# using keylime-CA. ++# ++# "webhook": Send notification via webhook. The endpoint URL must be ++# configured with 'webhook_url' option. This can be used to notify other ++# systems that do not have a Keylime agent running. ++enabled_revocation_notifications = {{ revocations.enabled_revocation_notifications }} ++ ++# The binding address and port of the revocation notifier service via ZeroMQ. ++zmq_ip = {{ revocations.zmq_ip }} ++zmq_port = {{ revocations.zmq_port }} ++ ++# Webhook url for revocation notifications. ++webhook_url = {{ revocations.webhook_url }} +diff --git a/test/test_shutdown.py b/test/test_shutdown.py +new file mode 100644 +index 0000000..85a10d3 +--- /dev/null ++++ b/test/test_shutdown.py +@@ -0,0 +1,210 @@ ++"""Unit tests for the shutdown coordination module and verifier drain logic.""" ++ ++# pylint: disable=protected-access,import-outside-toplevel ++ ++import asyncio ++import unittest ++from unittest.mock import patch ++ ++from keylime import shutdown ++ ++ ++class TestShutdownFlag(unittest.TestCase): ++ """Test the process-wide shutdown flag.""" ++ ++ def setUp(self) -> None: ++ # Reset the module-level event before each test ++ shutdown._shutdown_event = asyncio.Event() ++ ++ def test_initial_state_not_shutting_down(self) -> None: ++ self.assertFalse(shutdown.is_shutting_down()) ++ ++ def test_request_shutdown_sets_flag(self) -> None: ++ shutdown.request_shutdown() ++ self.assertTrue(shutdown.is_shutting_down()) ++ ++ def test_request_shutdown_is_idempotent(self) -> None: ++ shutdown.request_shutdown() ++ shutdown.request_shutdown() ++ self.assertTrue(shutdown.is_shutting_down()) ++ ++ ++class TestOperationTracking(unittest.TestCase): ++ """Test _enter_operation / _exit_operation and drain logic.""" ++ ++ def setUp(self) -> None: ++ # Import here so we can reset module globals ++ from keylime import cloud_verifier_tornado as cvt ++ ++ self.cvt = cvt ++ # Save and reset module state ++ self._saved_active = cvt._active_operations ++ self._saved_event = cvt._operations_drained ++ cvt._active_operations = 0 ++ cvt._operations_drained = asyncio.Event() ++ cvt._operations_drained.set() ++ ++ def tearDown(self) -> None: ++ self.cvt._active_operations = self._saved_active ++ self.cvt._operations_drained = self._saved_event ++ ++ def test_initial_state_is_drained(self) -> None: ++ self.assertEqual(self.cvt.get_active_operations(), 0) ++ self.assertTrue(self.cvt._operations_drained.is_set()) ++ ++ def test_enter_increments_and_clears_drain(self) -> None: ++ self.cvt._enter_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 1) ++ self.assertFalse(self.cvt._operations_drained.is_set()) ++ ++ def test_exit_decrements_and_signals_drain(self) -> None: ++ self.cvt._enter_operation() ++ self.cvt._exit_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 0) ++ self.assertTrue(self.cvt._operations_drained.is_set()) ++ ++ def test_multiple_operations_drain_on_last_exit(self) -> None: ++ self.cvt._enter_operation() ++ self.cvt._enter_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 2) ++ self.assertFalse(self.cvt._operations_drained.is_set()) ++ ++ self.cvt._exit_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 1) ++ self.assertFalse(self.cvt._operations_drained.is_set()) ++ ++ self.cvt._exit_operation() ++ self.assertEqual(self.cvt.get_active_operations(), 0) ++ self.assertTrue(self.cvt._operations_drained.is_set()) ++ ++ def test_wait_for_drain_returns_true_when_already_drained(self) -> None: ++ loop = asyncio.new_event_loop() ++ try: ++ result = loop.run_until_complete(self.cvt.wait_for_drain(1.0)) ++ self.assertTrue(result) ++ finally: ++ loop.close() ++ ++ def test_wait_for_drain_returns_true_after_exit(self) -> None: ++ self.cvt._enter_operation() ++ ++ async def _drain_after_delay() -> bool: ++ async def _exit_soon() -> None: ++ await asyncio.sleep(0.05) ++ self.cvt._exit_operation() ++ ++ asyncio.ensure_future(_exit_soon()) ++ return await self.cvt.wait_for_drain(2.0) ++ ++ loop = asyncio.new_event_loop() ++ try: ++ result = loop.run_until_complete(_drain_after_delay()) ++ self.assertTrue(result) ++ self.assertEqual(self.cvt.get_active_operations(), 0) ++ finally: ++ loop.close() ++ ++ def test_wait_for_drain_returns_false_on_timeout(self) -> None: ++ self.cvt._enter_operation() ++ ++ loop = asyncio.new_event_loop() ++ try: ++ result = loop.run_until_complete(self.cvt.wait_for_drain(0.1)) ++ self.assertFalse(result) ++ finally: ++ loop.close() ++ ++ ++class TestPendingEventRegistry(unittest.TestCase): ++ """Test _register_pending_event / _cancel_pending_event / cancel_all.""" ++ ++ def setUp(self) -> None: ++ from keylime import cloud_verifier_tornado as cvt ++ ++ self.cvt = cvt ++ self._saved_pending = dict(cvt._pending_events) ++ cvt._pending_events.clear() ++ ++ def tearDown(self) -> None: ++ self.cvt._pending_events.clear() ++ self.cvt._pending_events.update(self._saved_pending) ++ ++ def _make_agent(self, agent_id: str = "test-agent-1") -> dict: ++ return {"agent_id": agent_id, "pending_event": None} ++ ++ def test_register_tracks_in_both_locations(self) -> None: ++ agent = self._make_agent() ++ handle = object() ++ self.cvt._register_pending_event(agent, handle) ++ ++ self.assertIs(agent["pending_event"], handle) ++ self.assertIs(self.cvt._pending_events["test-agent-1"], handle) ++ ++ def test_cancel_clears_both_locations(self) -> None: ++ agent = self._make_agent() ++ handle = object() ++ self.cvt._register_pending_event(agent, handle) ++ ++ with patch("tornado.ioloop.IOLoop.current"): ++ self.cvt._cancel_pending_event(agent) ++ ++ self.assertIsNone(agent["pending_event"]) ++ self.assertNotIn("test-agent-1", self.cvt._pending_events) ++ ++ def test_cancel_noop_when_no_pending_event(self) -> None: ++ agent = self._make_agent() ++ # Should not raise ++ self.cvt._cancel_pending_event(agent) ++ self.assertIsNone(agent["pending_event"]) ++ ++ def test_cancel_all_clears_registry(self) -> None: ++ agents = [self._make_agent(f"agent-{i}") for i in range(3)] ++ for i, agent in enumerate(agents): ++ self.cvt._register_pending_event(agent, object()) ++ ++ self.assertEqual(len(self.cvt._pending_events), 3) ++ ++ with patch("tornado.ioloop.IOLoop.current"): ++ self.cvt.cancel_all_pending_events() ++ ++ self.assertEqual(len(self.cvt._pending_events), 0) ++ ++ def test_cancel_all_noop_when_empty(self) -> None: ++ # Should not raise ++ self.cvt.cancel_all_pending_events() ++ ++ ++class TestPushAgentMonitorCancelAll(unittest.TestCase): ++ """Test cancel_all_timeouts in push_agent_monitor.""" ++ ++ def setUp(self) -> None: ++ from keylime import push_agent_monitor ++ ++ self.pam = push_agent_monitor ++ with self.pam._agent_timeout_handles_lock: ++ self._saved = dict(self.pam._agent_timeout_handles) ++ self.pam._agent_timeout_handles.clear() ++ ++ def tearDown(self) -> None: ++ with self.pam._agent_timeout_handles_lock: ++ self.pam._agent_timeout_handles.clear() ++ self.pam._agent_timeout_handles.update(self._saved) ++ ++ def test_cancel_all_clears_handles(self) -> None: ++ with self.pam._agent_timeout_handles_lock: ++ self.pam._agent_timeout_handles["a1"] = object() ++ self.pam._agent_timeout_handles["a2"] = object() ++ ++ with patch("tornado.ioloop.IOLoop.current"): ++ self.pam.cancel_all_timeouts() ++ ++ with self.pam._agent_timeout_handles_lock: ++ self.assertEqual(len(self.pam._agent_timeout_handles), 0) ++ ++ def test_cancel_all_noop_when_empty(self) -> None: ++ # Should not raise ++ self.pam.cancel_all_timeouts() ++ ++ ++if __name__ == "__main__": ++ unittest.main() +diff --git a/test/test_verifier_server.py b/test/test_verifier_server.py +index da0feae..e9a47ef 100644 +--- a/test/test_verifier_server.py ++++ b/test/test_verifier_server.py +@@ -256,51 +256,39 @@ class TestVerifierServerEngineDisposal(unittest.TestCase): + "_prepare_agents_on_startup should document why engine disposal is needed", + ) + +- def test_start_multi_resets_verifier_config_after_fork(self): +- """Verify start_multi() resets verifier config in each worker after forking.""" ++ def test_post_fork_resets_verifier_config(self): ++ """Verify _post_fork() resets verifier config to clear inherited database state.""" + # Read the source code + server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "verifier_server.py") + + with open(server_path, encoding="utf-8") as f: + source = f.read() + +- # Find the start_multi method +- pattern = r"def start_multi\(self\).*?(?=\n def |\Z)" ++ # Find the _post_fork method ++ pattern = r"def _post_fork\(self.*?\).*?(?=\n def |\Z)" + match = re.search(pattern, source, re.DOTALL) + +- self.assertIsNotNone(match, "start_multi method not found") ++ self.assertIsNotNone(match, "_post_fork method not found") + assert match is not None + + method_body = match.group(0) + +- # Should fork processes +- self.assertIn( +- "fork_processes", +- method_body, +- "start_multi should call tornado.process.fork_processes", +- ) +- +- # After fork, should reset verifier config (which handles engine disposal) +- # Look for the pattern after fork_processes() +- fork_index = method_body.find("fork_processes") +- after_fork = method_body[fork_index:] +- + self.assertIn( + "reset_verifier_config()", +- after_fork, +- "start_multi must call reset_verifier_config() after forking to clear inherited database state", ++ method_body, ++ "_post_fork must call reset_verifier_config() to clear inherited database state", + ) + + self.assertIn( + "cloud_verifier_tornado.reset_verifier_config()", +- after_fork, +- "start_multi should call cloud_verifier_tornado.reset_verifier_config() after forking", ++ method_body, ++ "_post_fork should call cloud_verifier_tornado.reset_verifier_config()", + ) + +- def test_verifier_config_reset_happens_before_worker_operations(self): +- """Verify verifier config reset occurs after fork but before any worker operations.""" +- # Read the source code +- server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "verifier_server.py") ++ def test_base_server_calls_post_fork_before_start_single(self): ++ """Verify base Server.start_multi() calls _post_fork() after fork and before start_single().""" ++ # Read the base server source code ++ server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "base", "server.py") + + with open(server_path, encoding="utf-8") as f: + source = f.read() +@@ -314,53 +302,49 @@ class TestVerifierServerEngineDisposal(unittest.TestCase): + + # Extract the order of operations + fork_index = method_body.find("fork_processes") +- reset_index = method_body.find("reset_verifier_config()") +- start_single_index = method_body.find("self.start_single()") ++ post_fork_index = method_body.find("_post_fork") ++ start_single_index = method_body.find("start_single()") + + # All should be present + self.assertNotEqual(fork_index, -1, "fork_processes call not found") +- self.assertNotEqual(reset_index, -1, "reset_verifier_config() call not found") ++ self.assertNotEqual(post_fork_index, -1, "_post_fork() call not found") + self.assertNotEqual(start_single_index, -1, "start_single() call not found") + +- # Correct order: fork -> reset_verifier_config -> start_single ++ # Correct order: fork -> _post_fork -> start_single + self.assertLess( + fork_index, +- reset_index, +- "Verifier config reset must happen AFTER forking", ++ post_fork_index, ++ "_post_fork must be called AFTER forking", + ) + self.assertLess( +- reset_index, ++ post_fork_index, + start_single_index, +- "Verifier config reset must happen BEFORE starting worker server", ++ "_post_fork must be called BEFORE starting worker server", + ) + +- def test_reset_pattern_is_documented(self): +- """Verify reset_verifier_config() pattern is documented.""" ++ def test_post_fork_is_documented(self): ++ """Verify _post_fork() documents why reset_verifier_config() is needed.""" + # Read the source code + server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "verifier_server.py") + + with open(server_path, encoding="utf-8") as f: + source = f.read() + +- # Find the start_multi method +- pattern = r"def start_multi\(self\).*?(?=\n def |\Z)" ++ # Find the _post_fork method ++ pattern = r"def _post_fork\(self.*?\).*?(?=\n def |\Z)" + match = re.search(pattern, source, re.DOTALL) + + assert match is not None + method_body = match.group(0) + +- # Should document why reset is needed after fork +- fork_index = method_body.find("fork_processes") +- after_fork = method_body[fork_index:] +- + # Should mention critical concepts: reset, inherited state, parent process + critical_terms = ["reset", "inherit", "parent", "database"] +- found_terms = [term for term in critical_terms if term.lower() in after_fork.lower()] ++ found_terms = [term for term in critical_terms if term.lower() in method_body.lower()] + + self.assertGreaterEqual( + len(found_terms), + 3, +- f"start_multi should document why reset_verifier_config() is needed after fork. " ++ f"_post_fork should document why reset_verifier_config() is needed after fork. " + f"Expected mentions of reset/inherit/parent/database, found: {found_terms}", + ) + +@@ -398,9 +382,9 @@ class TestEngineDisposalDocumentation(unittest.TestCase): + f"Expected mentions of fork/connection/dispose/parent/child, found: {found_terms}", + ) + +- def test_start_multi_documents_disposal_reason(self): +- """Verify start_multi() documents why global engine disposal is needed.""" +- server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "verifier_server.py") ++ def test_base_start_multi_documents_disposal_reason(self): ++ """Verify base Server.start_multi() documents why engine disposal after fork is needed.""" ++ server_path = os.path.join(os.path.dirname(__file__), "..", "keylime", "web", "base", "server.py") + + with open(server_path, encoding="utf-8") as f: + source = f.read() +@@ -416,15 +400,15 @@ class TestEngineDisposalDocumentation(unittest.TestCase): + fork_index = method_body.find("fork_processes") + after_fork = method_body[fork_index:] + +- critical_terms = ["inherit", "corrupt", "dispose", "worker", "parent"] ++ critical_terms = ["inherit", "connection", "dispose", "worker", "parent"] + + found_terms = [term for term in critical_terms if term.lower() in after_fork.lower()] + + self.assertGreaterEqual( + len(found_terms), + 2, +- f"start_multi should document why global engine disposal after fork is critical. " +- f"Expected mentions of inherit/corrupt/dispose/worker/parent, found: {found_terms}", ++ f"start_multi should document why engine disposal after fork is critical. " ++ f"Expected mentions of inherit/connection/dispose/worker/parent, found: {found_terms}", + ) + + +-- +2.53.0 + diff --git a/0018-ignore-sigterm-sigint-manager-parent-processes.patch b/0018-ignore-sigterm-sigint-manager-parent-processes.patch new file mode 100644 index 0000000..761d88a --- /dev/null +++ b/0018-ignore-sigterm-sigint-manager-parent-processes.patch @@ -0,0 +1,151 @@ +From 15f20d2dd2e63cc621295befef46bc4161a1f636 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Fri, 10 Apr 2026 13:22:44 +0200 +Subject: [PATCH] shared_data: Ignore SIGTERM and SIGINT on Manager and parent + processes + +When systemd stops the verifier (or registrar), SIGTERM is delivered to +the entire process group, including the multiprocessing Manager's server +process that hosts the shared policy cache. The Manager dies +immediately, but worker processes still have in-flight process_agent() +coroutines that need the cache, causing ConnectionResetError. + +The same race occurs with SIGINT (Ctrl+C) when running the daemon in +the foreground. + +Fix this in two parts: + +1. Use SyncManager.start(initializer=...) to install SIG_IGN for both + SIGTERM and SIGINT in the Manager's server process, so it survives + process-group signals and stays available while workers drain. + +2. Ignore SIGTERM and SIGINT in the new architecture's parent process + (start_multi) so it stays in tornado's monitor loop until all + children have drained and exited. Once all children exit, tornado + calls sys.exit(0), triggering atexit handlers which shut down the + Manager via IPC. Without this, the default signal disposition kills + the parent immediately (no atexit), leaving the Manager orphaned. + +Resolves: #1882 + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + keylime/shared_data.py | 28 +++++++++++++++++++++++++++- + keylime/web/base/server.py | 18 ++++++++++++++++++ + test/test_verifier_server.py | 11 ++++++++--- + 3 files changed, 53 insertions(+), 4 deletions(-) + +diff --git a/keylime/shared_data.py b/keylime/shared_data.py +index 09cbb97bb..494f2f53b 100644 +--- a/keylime/shared_data.py ++++ b/keylime/shared_data.py +@@ -8,8 +8,10 @@ + import multiprocessing as mp + import multiprocessing.process + import os ++import signal + import threading + import time ++from multiprocessing.managers import SyncManager + from typing import Any, Dict, List, Optional + + from keylime import keylime_logging +@@ -17,6 +19,17 @@ + logger = keylime_logging.init_logging("shared_data") + + ++def _manager_ignore_signals() -> None: ++ """Ignore SIGTERM and SIGINT in the Manager's server process. ++ ++ Called as the ``initializer`` for ``SyncManager.start()`` so that ++ the Manager survives process-group signals (systemd SIGTERM, Ctrl+C) ++ and stays available while workers drain in-flight work. ++ """ ++ signal.signal(signal.SIGTERM, signal.SIG_IGN) ++ signal.signal(signal.SIGINT, signal.SIG_IGN) ++ ++ + class FlatDictView: + """A dictionary-like view over a flat key-value store. + +@@ -127,7 +140,20 @@ def __init__(self) -> None: + # Use explicit context to ensure fork compatibility + # The Manager must be started BEFORE any fork() calls + ctx = mp.get_context("fork") +- self._manager = ctx.Manager() ++ # Use SyncManager directly (instead of the ctx.Manager() shortcut) ++ # so we can pass an initializer that makes the Manager's server ++ # process ignore SIGTERM and SIGINT. Without this, systemd's ++ # cgroup-wide SIGTERM (or Ctrl+C SIGINT in foreground) kills the ++ # Manager before workers finish draining, causing ++ # ConnectionResetError in proxy objects. The Manager is still ++ # cleanable via IPC shutdown message, process.kill(), or systemd ++ # SIGKILL escalation. ++ # Cannot use 'with' context manager here: the Manager must outlive ++ # __init__ and persist for the lifetime of SharedDataManager. ++ self._manager = SyncManager(ctx=ctx) ++ self._manager.start( # pylint: disable=consider-using-with ++ initializer=_manager_ignore_signals, ++ ) + + # CRITICAL FIX: Use a SINGLE flat dict instead of nested dicts + # Nested DictProxy objects have synchronization issues +diff --git a/keylime/web/base/server.py b/keylime/web/base/server.py +index 4dd02b79e..8e9cce69d 100644 +--- a/keylime/web/base/server.py ++++ b/keylime/web/base/server.py +@@ -376,12 +376,30 @@ def start_multi(self) -> None: + + self._pre_fork() + ++ # Ignore SIGTERM/SIGINT in the parent so it stays in tornado's ++ # monitor loop (os.wait) until all children have drained and ++ # exited cleanly. Once all children exit, tornado calls ++ # sys.exit(0) which triggers atexit → SharedDataManager.cleanup() ++ # → Manager shutdown via IPC. Without this, the default signal ++ # disposition kills the parent immediately (no atexit), leaving ++ # the Manager process orphaned. ++ # Children inherit SIG_IGN but override it in ++ # _install_signal_handlers() before entering the event loop. ++ signal.signal(signal.SIGTERM, signal.SIG_IGN) ++ signal.signal(signal.SIGINT, signal.SIG_IGN) ++ + # with StatsCollector(): + # num = manager.Value('i', 0) + task_id = tornado.process.fork_processes(self.worker_count) + # num.value = num.value + 1 + # print(num.value) + ++ # Restore default signal disposition in children so they don't ++ # silently ignore SIGTERM/SIGINT before _install_signal_handlers() ++ # replaces these with asyncio-based handlers in start_single(). ++ signal.signal(signal.SIGTERM, signal.SIG_DFL) ++ signal.signal(signal.SIGINT, signal.SIG_DFL) ++ + # Remove the Manager's server process from multiprocessing's child + # tracking so Python's atexit handler does not try to join() it in + # child workers (the Manager was spawned by the parent). +diff --git a/test/test_verifier_server.py b/test/test_verifier_server.py +index e9a47ef70..7601b9cb0 100644 +--- a/test/test_verifier_server.py ++++ b/test/test_verifier_server.py +@@ -300,10 +300,15 @@ def test_base_server_calls_post_fork_before_start_single(self): + assert match is not None + method_body = match.group(0) + ++ # Strip comment lines to avoid false matches from mentions ++ # in comments (e.g. "# ... before start_single()"). ++ code_lines = [line for line in method_body.splitlines() if not line.lstrip().startswith("#")] ++ code_body = "\n".join(code_lines) ++ + # Extract the order of operations +- fork_index = method_body.find("fork_processes") +- post_fork_index = method_body.find("_post_fork") +- start_single_index = method_body.find("start_single()") ++ fork_index = code_body.find("fork_processes") ++ post_fork_index = code_body.find("_post_fork") ++ start_single_index = code_body.find("start_single()") + + # All should be present + self.assertNotEqual(fork_index, -1, "fork_processes call not found") diff --git a/0019-move-socket-var-run.patch b/0019-move-socket-var-run.patch new file mode 100644 index 0000000..71755da --- /dev/null +++ b/0019-move-socket-var-run.patch @@ -0,0 +1,348 @@ +From a50c7e50171d8f5999bdd927b6306f6d14974c57 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Thu, 16 Apr 2026 14:14:06 +0200 +Subject: [PATCH 1/2] shared_data: Move SyncManager socket to /var/run/keylime/ + +The SyncManager's server process creates a Unix domain socket for IPC +with worker processes. By default, this socket was placed in /tmp with +a random name (listener-*). + +Move the socket to /var/run/keylime/, following standard daemon +practice. Keylime already uses this directory for its ZeroMQ revocation +notification socket. + +Changes: +- Pass explicit address to SyncManager so the socket is created at + /var/run/keylime/shared_data..sock instead of /tmp/listener-* +- Add _ensure_runtime_dir() to create or validate the directory +- Add test conftest.py to redirect sockets to a temp directory +- Add pytest to test-requirements.txt for pylint to resolve imports + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + keylime/shared_data.py | 54 +++++++++++++++++++++++++++++++++++++----- + test-requirements.txt | 1 + + test/conftest.py | 30 +++++++++++++++++++++++ + 3 files changed, 79 insertions(+), 6 deletions(-) + create mode 100644 test/conftest.py + +diff --git a/keylime/shared_data.py b/keylime/shared_data.py +index 494f2f53b..aef39bcc4 100644 +--- a/keylime/shared_data.py ++++ b/keylime/shared_data.py +@@ -18,6 +18,23 @@ + + logger = keylime_logging.init_logging("shared_data") + ++_RUNTIME_DIR = "/var/run/keylime" ++ ++ ++def _ensure_runtime_dir() -> None: ++ """Ensure the runtime directory exists with correct permissions. ++ ++ Under systemd, ``tmpfiles.d`` creates ``/var/run/keylime/`` at boot. ++ This function provides a fallback for non-systemd execution and ++ validates permissions in either case. ++ """ ++ os.makedirs(_RUNTIME_DIR, mode=0o700, exist_ok=True) ++ perms = os.stat(_RUNTIME_DIR).st_mode & 0o777 ++ if perms != 0o700 or not os.access(_RUNTIME_DIR, os.W_OK | os.X_OK): ++ msg = f"{_RUNTIME_DIR} is not usable by the current process" ++ logger.error(msg) ++ raise PermissionError(msg) ++ + + def _manager_ignore_signals() -> None: + """Ignore SIGTERM and SIGINT in the Manager's server process. +@@ -137,8 +154,20 @@ def __init__(self) -> None: + """ + logger.debug("Initializing SharedDataManager") + +- # Use explicit context to ensure fork compatibility +- # The Manager must be started BEFORE any fork() calls ++ # Ensure /var/run/keylime/ exists with correct permissions ++ # before forking the Manager server process. ++ _ensure_runtime_dir() ++ self._socket_path = os.path.join(_RUNTIME_DIR, f"shared_data.{os.getpid()}.sock") ++ ++ # Remove stale socket from a previous run (e.g. after a crash). ++ # CPython's SocketListener does not pre-unlink before bind(). ++ try: ++ os.unlink(self._socket_path) ++ except (FileNotFoundError, PermissionError): ++ pass ++ ++ # Use explicit context to ensure fork compatibility. ++ # The Manager must be started BEFORE any fork() calls. + ctx = mp.get_context("fork") + # Use SyncManager directly (instead of the ctx.Manager() shortcut) + # so we can pass an initializer that makes the Manager's server +@@ -150,7 +179,7 @@ def __init__(self) -> None: + # SIGKILL escalation. + # Cannot use 'with' context manager here: the Manager must outlive + # __init__ and persist for the lifetime of SharedDataManager. +- self._manager = SyncManager(ctx=ctx) ++ self._manager = SyncManager(address=self._socket_path, ctx=ctx) + self._manager.start( # pylint: disable=consider-using-with + initializer=_manager_ignore_signals, + ) +@@ -162,8 +191,6 @@ def __init__(self) -> None: + self._lock = self._manager.Lock() + self._initialized_at = time.time() + +- # Register handler to reinitialize manager connection after fork +- # This is needed because Manager uses network connections that don't survive fork + try: + self._parent_pid = os.getpid() + logger.debug("SharedDataManager initialized in process %d", self._parent_pid) +@@ -173,7 +200,10 @@ def __init__(self) -> None: + # Ensure cleanup on exit + atexit.register(self.cleanup) + +- logger.info("SharedDataManager initialized successfully") ++ logger.info( ++ "SharedDataManager initialized successfully (socket: %s)", ++ self._socket_path, ++ ) + + def set_data(self, key: str, value: Any) -> None: + """Store arbitrary pickleable data by key. +@@ -333,6 +363,18 @@ def cleanup(self) -> None: + except Exception: + logger.exception("Error during SharedDataManager shutdown") + ++ # Remove socket file if it still exists. The Manager server ++ # process normally unlinks it on exit, but if it was killed ++ # (SIGKILL) the file may be left behind. ++ socket_path = getattr(self, "_socket_path", None) ++ if socket_path: ++ try: ++ os.unlink(socket_path) ++ except FileNotFoundError: ++ pass ++ except OSError as e: ++ logger.debug("Could not remove socket file %s: %s", socket_path, e) ++ + def deregister_child(self) -> None: + """Remove the Manager's server process from multiprocessing's child tracking. + +diff --git a/test-requirements.txt b/test-requirements.txt +index bdd44e3e9..bf74580a9 100644 +--- a/test-requirements.txt ++++ b/test-requirements.txt +@@ -1,6 +1,7 @@ + dbus-python + # modules required for pylint + setuptools ++pytest + # packages required for mypy + sqlalchemy-stubs + types-python-dateutil +diff --git a/test/conftest.py b/test/conftest.py +new file mode 100644 +index 000000000..da2843922 +--- /dev/null ++++ b/test/conftest.py +@@ -0,0 +1,30 @@ ++"""Shared pytest fixtures for keylime tests.""" ++ ++import shutil ++import tempfile ++from unittest.mock import patch ++ ++import pytest ++ ++from keylime.shared_data import cleanup_global_shared_memory ++ ++ ++@pytest.fixture(autouse=True) ++def _shared_data_runtime_dir(): ++ """Redirect SharedDataManager sockets to a temporary directory. ++ ++ The SyncManager creates Unix domain sockets in /var/run/keylime/, ++ which may not be writable by the test user. This fixture patches ++ the runtime directory to a per-test temp directory so that tests ++ work in any environment. ++ ++ After each test, any global SharedDataManager is shut down to ++ prevent stale managers from referencing deleted temp directories. ++ """ ++ tmpdir = tempfile.mkdtemp() ++ with patch("keylime.shared_data._RUNTIME_DIR", tmpdir): ++ yield ++ # Shut down any global SharedDataManager left alive by the test ++ # so the next test starts fresh with a new temp directory. ++ cleanup_global_shared_memory() ++ shutil.rmtree(tmpdir, ignore_errors=True) + +From 712ab6c841e258e463f858904bfc0991f704a3b9 Mon Sep 17 00:00:00 2001 +From: Anderson Toshiyuki Sasaki +Date: Thu, 16 Apr 2026 14:14:45 +0200 +Subject: [PATCH 2/2] installer: Add tmpfiles.d config for all keylime + directories + +Add keylime-tmpfiles.conf to manage all keylime directories. + +This includes: + +- /var/run/keylime (runtime IPC sockets) +- /var/lib/keylime (persistent state) +- /etc/keylime and config snippet directories (configuration) +- TPM certificate store copy from /usr/share to /var/lib + +Simplify installer.sh to avoid redundant directory creation and +ownership setting. The installer only needs to install the tmpfiles.d +config to /usr/lib/tmpfiles.d/keylime.conf and apply it immediately with +systemd-tmpfiles --create so the directories exist before the services +start. + +The installer validates the TPM cert store source exists before copying +and includes a non-systemd fallback for manual directory creation. + +Co-Authored-By: Claude Opus 4.6 +Signed-off-by: Anderson Toshiyuki Sasaki +--- + services/installer.sh | 61 ++++++++++++++++++++++++++-------- + services/keylime-tmpfiles.conf | 40 ++++++++++++++++++++++ + 2 files changed, 87 insertions(+), 14 deletions(-) + create mode 100644 services/keylime-tmpfiles.conf + +diff --git a/services/installer.sh b/services/installer.sh +index f34027c61..f462f136b 100755 +--- a/services/installer.sh ++++ b/services/installer.sh +@@ -11,7 +11,7 @@ fi + BASEDIR=$(dirname "$0") + + # check keylime scripts directory (same for verifier, agent, registrar) +-KEYLIMEDIR=$(dirname $(whereis keylime_verifier | cut -d " " -f 2)) ++KEYLIMEDIR=$(dirname "$(whereis keylime_verifier | cut -d " " -f 2)") + if [[ $KEYLIMEDIR == "." ]]; then + echo "Unable to find keylime scripts" 1>&2 + exit 1 +@@ -20,8 +20,8 @@ fi + echo "Using keylime scripts directory: ${KEYLIMEDIR}" + + # prepare keylime service files and store them in systemd path +-sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" $BASEDIR/keylime_registrar.service.template > /etc/systemd/system/keylime_registrar.service +-sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" $BASEDIR/keylime_verifier.service.template > /etc/systemd/system/keylime_verifier.service ++sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" "$BASEDIR/keylime_registrar.service.template" > /etc/systemd/system/keylime_registrar.service ++sed "s|KEYLIMEDIR|$KEYLIMEDIR|g" "$BASEDIR/keylime_verifier.service.template" > /etc/systemd/system/keylime_verifier.service + + echo "Creating keylime user if it not exists" + if ! getent passwd keylime >/dev/null; then +@@ -30,23 +30,56 @@ if ! getent passwd keylime >/dev/null; then + keylime + fi + +-echo "Changing files to be owned by the keylime user" +-# Create all directories required if not there +-mkdir -p /var/lib/keylime +-mkdir -p /var/log/keylime +-mkdir -p /var/run/keylime ++# install TPM certificate store to /usr/share/keylime/ ++# tmpfiles.d will copy this to /var/lib/keylime/tpm_cert_store ++TPM_CERT_STORE_SRC="$BASEDIR/../tpm_cert_store" ++if [[ ! -d "$TPM_CERT_STORE_SRC" ]]; then ++ echo "Missing TPM certificate store: $TPM_CERT_STORE_SRC" 1>&2 ++ exit 1 ++fi ++ ++mkdir -p /usr/share/keylime ++cp -a "$TPM_CERT_STORE_SRC" /usr/share/keylime/ || exit 1 + +-chown keylime:keylime -R /etc/keylime +-chown keylime:keylime -R /var/lib/keylime +-chown keylime:keylime -R /var/log/keylime +-chown keylime:keylime -R /var/run/keylime ++# install tmpfiles.d config for keylime directories ++mkdir -p /usr/lib/tmpfiles.d ++cp "$BASEDIR/keylime-tmpfiles.conf" /usr/lib/tmpfiles.d/keylime.conf ++ ++# apply the tmpfiles.d config immediately to create directories with correct ownership ++if command -v systemd-tmpfiles >/dev/null 2>&1; then ++ systemd-tmpfiles --create keylime.conf ++else ++ echo "Warning: systemd-tmpfiles not found, creating directories manually" ++ # Create essential directories as fallback for non-systemd systems ++ mkdir -p /var/run/keylime /var/lib/keylime \ ++ /etc/keylime/ca.conf.d \ ++ /etc/keylime/logging.conf.d \ ++ /etc/keylime/verifier.conf.d \ ++ /etc/keylime/registrar.conf.d \ ++ /etc/keylime/tenant.conf.d \ ++ /etc/keylime/agent.conf.d ++ chown keylime:keylime /var/run/keylime /var/lib/keylime ++ chmod 700 /var/run/keylime /var/lib/keylime ++ # Mirror tmpfiles.d Z/z semantics: recursively set ownership and ++ # file permissions under /etc/keylime, then fix directories to 0500. ++ chown -R keylime:keylime /etc/keylime ++ find /etc/keylime -type f -exec chmod 400 {} \; ++ find /etc/keylime -type d -exec chmod 500 {} \; ++ # Copy TPM cert store from /usr/share to /var/lib only if the ++ # target does not exist yet (mirrors the tmpfiles.d C directive). ++ # This preserves operator-added EK certificates. ++ if [ -d /usr/share/keylime/tpm_cert_store ] && [ ! -d /var/lib/keylime/tpm_cert_store ]; then ++ cp -r /usr/share/keylime/tpm_cert_store /var/lib/keylime/ ++ chown -R keylime:keylime /var/lib/keylime/tpm_cert_store ++ find /var/lib/keylime/tpm_cert_store -type f -exec chmod 400 {} \; ++ chmod 500 /var/lib/keylime/tpm_cert_store ++ fi ++fi + + # set permissions + chmod 664 /etc/systemd/system/keylime_registrar.service + chmod 664 /etc/systemd/system/keylime_verifier.service + +-chmod 700 /var/run/keylime +- + # enable at startup + systemctl enable keylime_registrar.service + systemctl enable keylime_verifier.service +diff --git a/services/keylime-tmpfiles.conf b/services/keylime-tmpfiles.conf +new file mode 100644 +index 000000000..f3c0b43d6 +--- /dev/null ++++ b/services/keylime-tmpfiles.conf +@@ -0,0 +1,40 @@ ++d /run/keylime 0700 keylime keylime - ++ ++d /var/lib/keylime 0700 keylime keylime - ++ ++d /etc/keylime 0500 keylime keylime - ++d /etc/keylime/ca.conf.d 0500 keylime keylime - ++d /etc/keylime/logging.conf.d 0500 keylime keylime - ++d /etc/keylime/verifier.conf.d 0500 keylime keylime - ++d /etc/keylime/registrar.conf.d 0500 keylime keylime - ++d /etc/keylime/tenant.conf.d 0500 keylime keylime - ++d /etc/keylime/agent.conf.d 0500 keylime keylime - ++ ++# TPM certificate store. ++# Copy the cert store from /usr/share/keylime/tpm_cert_store ++# to /var/lib/keylime/tpm_cert_store. ++# Files inside /var/lib/keylime/tpm_cert_store/ have ++# 0400 permission and are owned by keylime/keylime, ++# while /var/lib/keylime/tpm_cert_store/ itself has ++# permission 0500, also owned by keylime/keylime. ++C /var/lib/keylime/tpm_cert_store 0500 keylime keylime - /usr/share/keylime/tpm_cert_store ++Z /var/lib/keylime/tpm_cert_store 0400 keylime keylime - ++z /var/lib/keylime/tpm_cert_store 0500 keylime keylime - ++# Finally, /var/lib/keylime itself has 0700 permission, ++# and is owned by keylime/keylime. ++z /var/lib/keylime 0700 keylime keylime - ++ ++# Keylime configuration in /etc/keylime has permission 0400 ++# owned by keylime/keylime, while snippet directories and ++# the actual /etc/keylime directory have permission 0500, ++# also owned by keylime/keylime. ++Z /etc/keylime 0400 keylime keylime - ++# Now fix the directories: ++z /etc/keylime/ca.conf.d 0500 keylime keylime - ++z /etc/keylime/logging.conf.d 0500 keylime keylime - ++z /etc/keylime/verifier.conf.d 0500 keylime keylime - ++z /etc/keylime/registrar.conf.d 0500 keylime keylime - ++z /etc/keylime/tenant.conf.d 0500 keylime keylime - ++z /etc/keylime/agent.conf.d 0500 keylime keylime - ++# And finally, /etc/keylime itself. ++z /etc/keylime 0500 keylime keylime - diff --git a/keylime.spec b/keylime.spec index 30e364c..3c5aaa6 100644 --- a/keylime.spec +++ b/keylime.spec @@ -1,5 +1,5 @@ %global srcname keylime -%global policy_version 43.1.1 +%global policy_version 43.2.1 # Package is actually noarch, but it has an optional dependency that is # arch-specific. @@ -9,7 +9,7 @@ Name: keylime Version: 7.14.1 -Release: 4%{?dist} +Release: 5%{?dist} Summary: Open source TPM software for Bootstrapping and Maintaining Trust URL: https://github.com/keylime/keylime @@ -39,6 +39,21 @@ Patch: 0012-fix-mem-leak-remove-unbounded-functools.cache-from-l.patch # Backport https://github.com/keylime/keylime/pulls/1874 Patch: 0013-fix-verifier-race-condition-on-agent-delete.patch +# RHEL-151493 - verifier graceful shutdown. +# Backport: +# - https://github.com/keylime/keylime/pull/1809 +# - https://github.com/keylime/keylime/pull/1868 +# - https://github.com/keylime/keylime/pull/1855 +# - https://github.com/keylime/keylime/pull/1869 +# - https://github.com/keylime/keylime/pull/1883 +# - https://github.com/keylime/keylime/pull/1886 +Patch: 0014-push-attestation-documentation.patch +Patch: 0015-remove-enable-authentication-config-option.patch +Patch: 0016-docs-push-attestation-config-tables.patch +Patch: 0017-verifier-graceful-shutdown.patch +Patch: 0018-ignore-sigterm-sigint-manager-parent-processes.patch +Patch: 0019-move-socket-var-run.patch + # Main program: Apache-2.0 # Icons: MIT License: Apache-2.0 AND MIT @@ -59,6 +74,7 @@ BuildRequires: python3-tornado BuildRequires: python3-sqlalchemy BuildRequires: python3-lark BuildRequires: python3-psutil +BuildRequires: python3-pytest BuildRequires: python3-pyyaml BuildRequires: python3-jsonschema BuildRequires: python3-setuptools @@ -316,7 +332,7 @@ export KEYLIME_CA_CONFIG="${CONF_TEMP_DIR}/ca.conf" export KEYLIME_LOGGING_CONFIG="${CONF_TEMP_DIR}/logging.conf" # Run the tests. -%{python3} -m unittest +%pytest # Cleanup. [ "${CONF_TEMP_DIR}" ] && rm -rf "${CONF_TEMP_DIR}" diff --git a/sources b/sources index 9e0d7e5..95edd36 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ SHA512 (v7.14.1.tar.gz) = d94cd1e25ec31e43fea05d0c404dd25c05b6b28435db2f8ca34546f6ff8bfd5da12d2dcd3b5cf4772c44688ae8968468dc2470da23596714e7615dbf6dfbe841 -SHA512 (keylime-selinux-43.1.1.tar.gz) = 1b0a850f68321e4872bb01eb99f5b000f1b5cbe3f1882e781bff519868ba5f4ca50f25b328b3662895969833add5c30d00e2a2361d2d626e7cffd95c0243ec39 +SHA512 (keylime-selinux-43.2.1.tar.gz) = 8cb8b032819d3b87e1dceaa7094385b4468c0d6be1e5dfc6d8b6758e6281def5255120ff34d71b5d4bc7fe9b9e960f1a98011e5bf7149df5704d0bbf6afbfad3