diff --git a/.gitignore b/.gitignore index e31915e..2802d2a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz -SOURCES/pacemaker-a3f44794f.tar.gz +SOURCES/pacemaker-0f7f88312.tar.gz diff --git a/.pacemaker.metadata b/.pacemaker.metadata index f5f737d..8db4d95 100644 --- a/.pacemaker.metadata +++ b/.pacemaker.metadata @@ -1,2 +1,2 @@ 2cbec94ad67dfbeba75e38d2c3c5c44961b3cd16 SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz -b16198db5f86857ba8bc0ebd04fd386da360478a SOURCES/pacemaker-a3f44794f.tar.gz +88946a460e3be18852861269f8837aaaf339328c SOURCES/pacemaker-0f7f88312.tar.gz diff --git a/SOURCES/001-schema-glib.patch b/SOURCES/001-schema-glib.patch new file mode 100644 index 0000000..c38d1d8 --- /dev/null +++ b/SOURCES/001-schema-glib.patch @@ -0,0 +1,2334 @@ +From a59d703de97a49a27564f572dac52b455b356ba9 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 25 Oct 2023 12:14:57 -0400 +Subject: [PATCH 01/20] Refactor: libcrmcommon: Remove prototypes for internal + functions. + +These are only here to give a place to put the G_GNUC_PRINTF attribute, +but that can go in the function definition itself. +--- + lib/common/schemas.c | 12 ++---------- + 1 file changed, 2 insertions(+), 10 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index b3c09eb..a85438c 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -64,11 +64,7 @@ static struct schema_s *known_schemas = NULL; + static int xml_schema_max = 0; + static bool silent_logging = FALSE; + +-static void +-xml_log(int priority, const char *fmt, ...) +-G_GNUC_PRINTF(2, 3); +- +-static void ++static void G_GNUC_PRINTF(2, 3) + xml_log(int priority, const char *fmt, ...) + { + va_list ap; +@@ -716,10 +712,6 @@ pcmk__validate_xml(xmlNode *xml_blob, const char *validation, xmlRelaxNGValidity + return FALSE; + } + +-static void +-cib_upgrade_err(void *ctx, const char *fmt, ...) +-G_GNUC_PRINTF(2, 3); +- + /* With this arrangement, an attempt to identify the message severity + as explicitly signalled directly from XSLT is performed in rather + a smart way (no reliance on formatting string + arguments being +@@ -743,7 +735,7 @@ G_GNUC_PRINTF(2, 3); + (suspicious, likely internal errors or some runaways) is + LOG_WARNING. + */ +-static void ++static void G_GNUC_PRINTF(2, 3) + cib_upgrade_err(void *ctx, const char *fmt, ...) + { + va_list ap, aq; +-- +2.31.1 + +From 3d50aeebce74e520606036ec7db8b5c70fe327b5 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 25 Oct 2023 12:54:37 -0400 +Subject: [PATCH 02/20] Refactor: libcrmcommon: validate_with should take a + schema as argument. + +...instead of taking an index, and then finding that in the +known_schemas array. +--- + lib/common/schemas.c | 25 ++++++++++++------------- + 1 file changed, 12 insertions(+), 13 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index a85438c..f1f86f4 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -555,18 +555,16 @@ crm_schema_cleanup(void) + } + + static gboolean +-validate_with(xmlNode *xml, int method, xmlRelaxNGValidityErrorFunc error_handler, void* error_handler_context) ++validate_with(xmlNode *xml, struct schema_s *schema, xmlRelaxNGValidityErrorFunc error_handler, void* error_handler_context) + { + gboolean valid = FALSE; + char *file = NULL; +- struct schema_s *schema = NULL; + relaxng_ctx_cache_t **cache = NULL; + +- if (method < 0) { ++ if (schema == NULL) { + return FALSE; + } + +- schema = &(known_schemas[method]); + if (schema->validator == schema_validator_none) { + return TRUE; + } +@@ -587,8 +585,7 @@ validate_with(xmlNode *xml, int method, xmlRelaxNGValidityErrorFunc error_handle + valid = validate_with_relaxng(xml->doc, error_handler, error_handler_context, file, cache); + break; + default: +- crm_err("Unknown validator type: %d", +- known_schemas[method].validator); ++ crm_err("Unknown validator type: %d", schema->validator); + break; + } + +@@ -597,11 +594,11 @@ validate_with(xmlNode *xml, int method, xmlRelaxNGValidityErrorFunc error_handle + } + + static bool +-validate_with_silent(xmlNode *xml, int method) ++validate_with_silent(xmlNode *xml, struct schema_s *schema) + { + bool rc, sl_backup = silent_logging; + silent_logging = TRUE; +- rc = validate_with(xml, method, (xmlRelaxNGValidityErrorFunc) xml_log, GUINT_TO_POINTER(LOG_ERR)); ++ rc = validate_with(xml, schema, (xmlRelaxNGValidityErrorFunc) xml_log, GUINT_TO_POINTER(LOG_ERR)); + silent_logging = sl_backup; + return rc; + } +@@ -687,7 +684,7 @@ pcmk__validate_xml(xmlNode *xml_blob, const char *validation, xmlRelaxNGValidity + bool valid = FALSE; + + for (lpc = 0; lpc < xml_schema_max; lpc++) { +- if (validate_with(xml_blob, lpc, NULL, NULL)) { ++ if (validate_with(xml_blob, &known_schemas[lpc], NULL, NULL)) { + valid = TRUE; + crm_xml_add(xml_blob, XML_ATTR_VALIDATION, + known_schemas[lpc].name); +@@ -705,7 +702,8 @@ pcmk__validate_xml(xmlNode *xml_blob, const char *validation, xmlRelaxNGValidity + if (strcmp(validation, PCMK__VALUE_NONE) == 0) { + return TRUE; + } else if (version < xml_schema_max) { +- return validate_with(xml_blob, version, error_handler, error_handler_context); ++ return validate_with(xml_blob, version >= 0 ? &known_schemas[version] : NULL, ++ error_handler, error_handler_context); + } + + crm_err("Unknown validator: %s", validation); +@@ -1019,7 +1017,7 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + known_schemas[lpc].name ? known_schemas[lpc].name : "", + lpc, max_stable_schemas); + +- if (validate_with(xml, lpc, error_handler, GUINT_TO_POINTER(LOG_ERR)) == FALSE) { ++ if (validate_with(xml, &known_schemas[lpc], error_handler, GUINT_TO_POINTER(LOG_ERR)) == FALSE) { + if (next != -1) { + crm_info("Configuration not valid for schema: %s", + known_schemas[lpc].name); +@@ -1067,7 +1065,7 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + version boundary, as X.0 "transitional" version is + expected to be more strict than it's successors that + may re-allow constructs from previous major line) */ +- || validate_with_silent(xml, next)) { ++ || validate_with_silent(xml, next >= 0 ? &known_schemas[next] : NULL)) { + crm_debug("%s-style configuration is also valid for %s", + known_schemas[lpc].name, known_schemas[next].name); + +@@ -1084,7 +1082,8 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + known_schemas[lpc].transform); + rc = -pcmk_err_transform_failed; + +- } else if (validate_with(upgrade, next, error_handler, GUINT_TO_POINTER(LOG_ERR))) { ++ } else if (validate_with(upgrade, next >= 0 ? &known_schemas[next] : NULL, ++ error_handler, GUINT_TO_POINTER(LOG_ERR))) { + crm_info("Transformation %s.xsl successful", + known_schemas[lpc].transform); + lpc = next; +-- +2.31.1 + +From 26a17af650a842659f57c9e58185c290c30a3fb3 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 25 Oct 2023 14:33:11 -0400 +Subject: [PATCH 03/20] Refactor: libcrmcommon: Break schema freeing out into a + function. + +--- + lib/common/schemas.c | 69 +++++++++++++++++++++++++------------------- + 1 file changed, 40 insertions(+), 29 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index f1f86f4..c21b9ae 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -511,6 +511,43 @@ validate_with_relaxng(xmlDocPtr doc, xmlRelaxNGValidityErrorFunc error_handler, + return valid; + } + ++static void ++free_schema(struct schema_s *schema) ++{ ++ relaxng_ctx_cache_t *ctx = NULL; ++ ++ switch (schema->validator) { ++ case schema_validator_none: // not cached ++ break; ++ ++ case schema_validator_rng: // cached ++ ctx = (relaxng_ctx_cache_t *) schema->cache; ++ if (ctx == NULL) { ++ break; ++ } ++ ++ if (ctx->parser != NULL) { ++ xmlRelaxNGFreeParserCtxt(ctx->parser); ++ } ++ ++ if (ctx->valid != NULL) { ++ xmlRelaxNGFreeValidCtxt(ctx->valid); ++ } ++ ++ if (ctx->rng != NULL) { ++ xmlRelaxNGFree(ctx->rng); ++ } ++ ++ free(ctx); ++ schema->cache = NULL; ++ break; ++ } ++ ++ free(schema->name); ++ free(schema->transform); ++ free(schema->transform_enter); ++} ++ + /*! + * \internal + * \brief Clean up global memory associated with XML schemas +@@ -518,36 +555,10 @@ validate_with_relaxng(xmlDocPtr doc, xmlRelaxNGValidityErrorFunc error_handler, + void + crm_schema_cleanup(void) + { +- int lpc; +- relaxng_ctx_cache_t *ctx = NULL; +- +- for (lpc = 0; lpc < xml_schema_max; lpc++) { +- +- switch (known_schemas[lpc].validator) { +- case schema_validator_none: // not cached +- break; +- case schema_validator_rng: // cached +- ctx = (relaxng_ctx_cache_t *) known_schemas[lpc].cache; +- if (ctx == NULL) { +- break; +- } +- if (ctx->parser != NULL) { +- xmlRelaxNGFreeParserCtxt(ctx->parser); +- } +- if (ctx->valid != NULL) { +- xmlRelaxNGFreeValidCtxt(ctx->valid); +- } +- if (ctx->rng != NULL) { +- xmlRelaxNGFree(ctx->rng); +- } +- free(ctx); +- known_schemas[lpc].cache = NULL; +- break; +- } +- free(known_schemas[lpc].name); +- free(known_schemas[lpc].transform); +- free(known_schemas[lpc].transform_enter); ++ for (int lpc = 0; lpc < xml_schema_max; lpc++) { ++ free_schema(&known_schemas[lpc]); + } ++ + free(known_schemas); + known_schemas = NULL; + +-- +2.31.1 + +From fdf66811c23d93715fcd34e16eb58fce5f4294d7 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 25 Oct 2023 14:36:07 -0400 +Subject: [PATCH 04/20] Refactor: libcrmcommon: Clean up add_schema a bit. + +* Use true/false instead of TRUE/FALSE. + +* Call CRM_ASSERT after all the strdups. + +* There's no need to have a for loop over a two element list. If the + version number struct ever changes, plenty of other places will have + to be changed as well so this isn't saving us much. +--- + lib/common/schemas.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index c21b9ae..17cd21f 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -178,7 +178,7 @@ add_schema(enum schema_validator_e validator, const schema_version_t *version, + int after_transform) + { + int last = xml_schema_max; +- bool have_version = FALSE; ++ bool have_version = false; + + xml_schema_max++; + known_schemas = pcmk__realloc(known_schemas, +@@ -188,26 +188,32 @@ add_schema(enum schema_validator_e validator, const schema_version_t *version, + known_schemas[last].validator = validator; + known_schemas[last].after_transform = after_transform; + +- for (int i = 0; i < 2; ++i) { +- known_schemas[last].version.v[i] = version->v[i]; +- if (version->v[i]) { +- have_version = TRUE; +- } ++ known_schemas[last].version.v[0] = version->v[0]; ++ known_schemas[last].version.v[1] = version->v[1]; ++ ++ if (version->v[0] || version->v[1]) { ++ have_version = true; + } ++ + if (have_version) { + known_schemas[last].name = schema_strdup_printf("pacemaker-", *version, ""); + } else { +- CRM_ASSERT(name); ++ CRM_ASSERT(name != NULL); + schema_scanf(name, "%*[^-]-", known_schemas[last].version, ""); + known_schemas[last].name = strdup(name); ++ CRM_ASSERT(known_schemas[last].name != NULL); + } + + if (transform) { + known_schemas[last].transform = strdup(transform); ++ CRM_ASSERT(known_schemas[last].transform != NULL); + } ++ + if (transform_enter) { + known_schemas[last].transform_enter = strdup(transform_enter); ++ CRM_ASSERT(known_schemas[last].transform_enter != NULL); + } ++ + known_schemas[last].transform_onleave = transform_onleave; + if (after_transform == 0) { + after_transform = xml_schema_max; /* upgrade is a one-way */ +-- +2.31.1 + +From ef89e0536fae09036a657cf651da1eed75356054 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 25 Oct 2023 15:16:11 -0400 +Subject: [PATCH 05/20] Refactor: libcrmcommon: Use pcmk__s in schemas.c where + possible. + +--- + lib/common/schemas.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 17cd21f..fca81e4 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -1031,7 +1031,7 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + + while (lpc <= max_stable_schemas) { + crm_debug("Testing '%s' validation (%d of %d)", +- known_schemas[lpc].name ? known_schemas[lpc].name : "", ++ pcmk__s(known_schemas[lpc].name, ""), + lpc, max_stable_schemas); + + if (validate_with(xml, &known_schemas[lpc], error_handler, GUINT_TO_POINTER(LOG_ERR)) == FALSE) { +@@ -1041,7 +1041,7 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + next = -1; + } else { + crm_trace("%s validation failed", +- known_schemas[lpc].name ? known_schemas[lpc].name : ""); ++ pcmk__s(known_schemas[lpc].name, "")); + } + if (*best) { + /* we've satisfied the validation, no need to check further */ +@@ -1128,8 +1128,8 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + + if (*best > match && *best) { + crm_info("%s the configuration from %s to %s", +- transform?"Transformed":"Upgraded", +- value ? value : "", known_schemas[*best].name); ++ transform?"Transformed":"Upgraded", pcmk__s(value, ""), ++ known_schemas[*best].name); + crm_xml_add(xml, XML_ATTR_VALIDATION, known_schemas[*best].name); + } + +-- +2.31.1 + +From 574c3c1f5ca00514eff77b927821b695c980a683 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 25 Oct 2023 15:20:04 -0400 +Subject: [PATCH 06/20] Refactor: libcrmcommon: Use a schema variable in + update_validation. + +This just gets rid of a ton of references to the known_schemas array, +making it easier to replace that array with something else in a future +commit. +--- + lib/common/schemas.c | 38 +++++++++++++++++--------------------- + 1 file changed, 17 insertions(+), 21 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index fca81e4..888a473 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -1030,18 +1030,18 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + } + + while (lpc <= max_stable_schemas) { ++ struct schema_s *schema = &known_schemas[lpc]; ++ + crm_debug("Testing '%s' validation (%d of %d)", +- pcmk__s(known_schemas[lpc].name, ""), +- lpc, max_stable_schemas); ++ pcmk__s(schema->name, ""), lpc, max_stable_schemas); + +- if (validate_with(xml, &known_schemas[lpc], error_handler, GUINT_TO_POINTER(LOG_ERR)) == FALSE) { ++ if (validate_with(xml, schema, error_handler, GUINT_TO_POINTER(LOG_ERR)) == FALSE) { + if (next != -1) { + crm_info("Configuration not valid for schema: %s", +- known_schemas[lpc].name); ++ schema->name); + next = -1; + } else { +- crm_trace("%s validation failed", +- pcmk__s(known_schemas[lpc].name, "")); ++ crm_trace("%s validation failed", pcmk__s(schema->name, "")); + } + if (*best) { + /* we've satisfied the validation, no need to check further */ +@@ -1051,8 +1051,7 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + + } else { + if (next != -1) { +- crm_debug("Configuration valid for schema: %s", +- known_schemas[next].name); ++ crm_debug("Configuration valid for schema: %s", schema->name); + next = -1; + } + rc = pcmk_ok; +@@ -1064,19 +1063,19 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + + if (rc == pcmk_ok && transform) { + xmlNode *upgrade = NULL; +- next = known_schemas[lpc].after_transform; ++ next = schema->after_transform; + + if (next <= lpc) { + /* There is no next version, or next would regress */ +- crm_trace("Stopping at %s", known_schemas[lpc].name); ++ crm_trace("Stopping at %s", schema->name); + break; + + } else if (max > 0 && (lpc == max || next > max)) { + crm_trace("Upgrade limit reached at %s (lpc=%d, next=%d, max=%d)", +- known_schemas[lpc].name, lpc, next, max); ++ schema->name, lpc, next, max); + break; + +- } else if (known_schemas[lpc].transform == NULL ++ } else if (schema->transform == NULL + /* possibly avoid transforming when readily valid + (in general more restricted when crossing the major + version boundary, as X.0 "transitional" version is +@@ -1084,25 +1083,22 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + may re-allow constructs from previous major line) */ + || validate_with_silent(xml, next >= 0 ? &known_schemas[next] : NULL)) { + crm_debug("%s-style configuration is also valid for %s", +- known_schemas[lpc].name, known_schemas[next].name); ++ schema->name, known_schemas[next].name); + + lpc = next; + + } else { + crm_debug("Upgrading %s-style configuration to %s with %s.xsl", +- known_schemas[lpc].name, known_schemas[next].name, +- known_schemas[lpc].transform); ++ schema->name, known_schemas[next].name, schema->transform); + +- upgrade = apply_upgrade(xml, &known_schemas[lpc], to_logs); ++ upgrade = apply_upgrade(xml, schema, to_logs); + if (upgrade == NULL) { +- crm_err("Transformation %s.xsl failed", +- known_schemas[lpc].transform); ++ crm_err("Transformation %s.xsl failed", schema->transform); + rc = -pcmk_err_transform_failed; + + } else if (validate_with(upgrade, next >= 0 ? &known_schemas[next] : NULL, + error_handler, GUINT_TO_POINTER(LOG_ERR))) { +- crm_info("Transformation %s.xsl successful", +- known_schemas[lpc].transform); ++ crm_info("Transformation %s.xsl successful", schema->transform); + lpc = next; + *best = next; + free_xml(xml); +@@ -1111,7 +1107,7 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + + } else { + crm_err("Transformation %s.xsl did not produce a valid configuration", +- known_schemas[lpc].transform); ++ schema->transform); + crm_log_xml_info(upgrade, "transform:bad"); + free_xml(upgrade); + rc = -pcmk_err_schema_validation; +-- +2.31.1 + +From dc724c940014fbe60aa506d8acb652b2dd5dce90 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 25 Oct 2023 15:31:27 -0400 +Subject: [PATCH 07/20] Refactor: libcrmcommon: Use a variable for the next + schema, too. + +This gets rid of further references to known_schemas update_validation, +also with the purpose of making it easier to change the implementation +of that array. +--- + lib/common/schemas.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 888a473..8e5c22e 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -1063,41 +1063,47 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + + if (rc == pcmk_ok && transform) { + xmlNode *upgrade = NULL; ++ struct schema_s *next_schema = NULL; + next = schema->after_transform; + + if (next <= lpc) { + /* There is no next version, or next would regress */ + crm_trace("Stopping at %s", schema->name); + break; ++ } + +- } else if (max > 0 && (lpc == max || next > max)) { ++ if (max > 0 && (lpc == max || next > max)) { + crm_trace("Upgrade limit reached at %s (lpc=%d, next=%d, max=%d)", + schema->name, lpc, next, max); + break; ++ } ++ ++ next_schema = &known_schemas[next]; ++ CRM_ASSERT(next_schema != NULL); + +- } else if (schema->transform == NULL ++ if (schema->transform == NULL + /* possibly avoid transforming when readily valid + (in general more restricted when crossing the major + version boundary, as X.0 "transitional" version is + expected to be more strict than it's successors that + may re-allow constructs from previous major line) */ +- || validate_with_silent(xml, next >= 0 ? &known_schemas[next] : NULL)) { ++ || validate_with_silent(xml, next_schema)) { + crm_debug("%s-style configuration is also valid for %s", +- schema->name, known_schemas[next].name); ++ schema->name, next_schema->name); + + lpc = next; + + } else { + crm_debug("Upgrading %s-style configuration to %s with %s.xsl", +- schema->name, known_schemas[next].name, schema->transform); ++ schema->name, next_schema->name, schema->transform); + + upgrade = apply_upgrade(xml, schema, to_logs); + if (upgrade == NULL) { + crm_err("Transformation %s.xsl failed", schema->transform); + rc = -pcmk_err_transform_failed; + +- } else if (validate_with(upgrade, next >= 0 ? &known_schemas[next] : NULL, +- error_handler, GUINT_TO_POINTER(LOG_ERR))) { ++ } else if (validate_with(upgrade, next_schema, error_handler, ++ GUINT_TO_POINTER(LOG_ERR))) { + crm_info("Transformation %s.xsl successful", schema->transform); + lpc = next; + *best = next; +-- +2.31.1 + +From 0664f7e327c612d5602515ecf4bb32fb7c3503f6 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 25 Oct 2023 16:09:30 -0400 +Subject: [PATCH 08/20] Refactor: libcrmcommon: Add pcmk__dump_known_schemas. + +The debug logging in add_schema isn't necessarily all that useful. +Typically, schema adding happens in crm_log_preinit which means it +happens before logging is set up, so nothing that we log actually goes +anywhere. + +This function does the same thing but can be called where needed. +--- + include/crm/common/xml_internal.h | 3 +++ + lib/common/schemas.c | 21 +++++++++++++++++++++ + 2 files changed, 24 insertions(+) + +diff --git a/include/crm/common/xml_internal.h b/include/crm/common/xml_internal.h +index ddb4384..f319856 100644 +--- a/include/crm/common/xml_internal.h ++++ b/include/crm/common/xml_internal.h +@@ -441,8 +441,11 @@ pcmk__xml_attr_value(const xmlAttr *attr) + : (const char *) attr->children->content; + } + ++ + gboolean pcmk__validate_xml(xmlNode *xml_blob, const char *validation, + xmlRelaxNGValidityErrorFunc error_handler, + void *error_handler_context); + ++void pcmk__log_known_schemas(void); ++ + #endif // PCMK__XML_INTERNAL__H +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 8e5c22e..41ca138 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -1248,3 +1248,24 @@ cli_config_update(xmlNode **xml, int *best_version, gboolean to_logs) + free(orig_value); + return rc; + } ++ ++void ++pcmk__log_known_schemas(void) ++{ ++ for (int lpc = 0; lpc < xml_schema_max; lpc++) { ++ if (known_schemas[lpc].after_transform < 0) { ++ crm_debug("known_schemas[%d] => %s", lpc, known_schemas[lpc].name); ++ ++ } else if (known_schemas[lpc].transform != NULL) { ++ crm_debug("known_schemas[%d] => %s (upgrades to %d with %s.xsl)", ++ lpc, known_schemas[lpc].name, ++ known_schemas[lpc].after_transform, ++ known_schemas[lpc].transform); ++ ++ } else { ++ crm_debug("known_schemas[%d] => %s (upgrades to %d)", ++ lpc, known_schemas[lpc].name, ++ known_schemas[lpc].after_transform); ++ } ++ } ++} +-- +2.31.1 + +From 423a28fd5c2b71945d75b68b168a607279d795f7 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 25 Oct 2023 17:22:53 -0400 +Subject: [PATCH 09/20] Refactor: libcrmcommon: Store known_schemas as a GList. + +Instead of managing our own array with realloc, use GList and the +various glib list functions. + +In many places, this makes the code easier to follow - we can simply +iterate over the list and do something on each node. In other places, +we're still relying on list indices too much to help. Those spots can +probably be cleaned up in future commits. +--- + lib/common/schemas.c | 181 ++++++++++++++++++++++++++----------------- + 1 file changed, 108 insertions(+), 73 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 41ca138..6e6f32e 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -60,7 +60,7 @@ struct schema_s { + bool transform_onleave; + }; + +-static struct schema_s *known_schemas = NULL; ++static GList *known_schemas = NULL; + static int xml_schema_max = 0; + static bool silent_logging = FALSE; + +@@ -81,27 +81,45 @@ static int + xml_latest_schema_index(void) + { + // @COMPAT: pacemaker-next is deprecated since 2.1.5 +- return xml_schema_max - 3; // index from 0, ignore "pacemaker-next"/"none" ++ // FIXME: This function assumes at least three schemas have been added ++ // before it has been called for the first time. ++ return g_list_length(known_schemas) - 3; // index from 0, ignore "pacemaker-next"/"none" + } + + static int + xml_minimum_schema_index(void) + { + static int best = 0; +- if (best == 0) { +- int lpc = 0; +- +- best = xml_latest_schema_index(); +- for (lpc = best; lpc > 0; lpc--) { +- if (known_schemas[lpc].version.v[0] +- < known_schemas[best].version.v[0]) { +- return best; +- } else { +- best = lpc; +- } ++ struct schema_s *best_schema = NULL; ++ GList *last_real_ele = NULL; ++ ++ if (best != 0) { ++ return best; ++ } ++ ++ best = xml_latest_schema_index(); ++ ++ /* We can't just use g_list_last here because "pacemaker-next" and "none" ++ * are stored at the end of the list. We need to start several elements ++ * back, at the last real schema. ++ */ ++ last_real_ele = g_list_nth(known_schemas, best); ++ best_schema = last_real_ele->data; ++ ++ for (GList *iter = last_real_ele; iter != NULL; iter = iter->prev) { ++ struct schema_s *schema = iter->data; ++ ++ if (schema->version.v[0] < best_schema->version.v[0]) { ++ return best; ++ } else { ++ best--; + } +- best = xml_latest_schema_index(); + } ++ ++ /* If we never found a schema that meets the above criteria, default to ++ * the last one. ++ */ ++ best = xml_latest_schema_index(); + return best; + } + +@@ -177,63 +195,61 @@ add_schema(enum schema_validator_e validator, const schema_version_t *version, + const char *transform_enter, bool transform_onleave, + int after_transform) + { ++ struct schema_s *schema = NULL; + int last = xml_schema_max; + bool have_version = false; + + xml_schema_max++; +- known_schemas = pcmk__realloc(known_schemas, +- xml_schema_max * sizeof(struct schema_s)); +- CRM_ASSERT(known_schemas != NULL); +- memset(known_schemas+last, 0, sizeof(struct schema_s)); +- known_schemas[last].validator = validator; +- known_schemas[last].after_transform = after_transform; + +- known_schemas[last].version.v[0] = version->v[0]; +- known_schemas[last].version.v[1] = version->v[1]; ++ schema = calloc(1, sizeof(struct schema_s)); ++ CRM_ASSERT(schema != NULL); ++ ++ schema->validator = validator; ++ schema->after_transform = after_transform; ++ schema->version.v[0] = version->v[0]; ++ schema->version.v[1] = version->v[1]; + + if (version->v[0] || version->v[1]) { + have_version = true; + } + + if (have_version) { +- known_schemas[last].name = schema_strdup_printf("pacemaker-", *version, ""); ++ schema->name = schema_strdup_printf("pacemaker-", *version, ""); + } else { + CRM_ASSERT(name != NULL); +- schema_scanf(name, "%*[^-]-", known_schemas[last].version, ""); +- known_schemas[last].name = strdup(name); +- CRM_ASSERT(known_schemas[last].name != NULL); ++ schema_scanf(name, "%*[^-]-", schema->version, ""); ++ schema->name = strdup(name); ++ CRM_ASSERT(schema->name != NULL); + } + + if (transform) { +- known_schemas[last].transform = strdup(transform); +- CRM_ASSERT(known_schemas[last].transform != NULL); ++ schema->transform = strdup(transform); ++ CRM_ASSERT(schema->transform != NULL); + } + + if (transform_enter) { +- known_schemas[last].transform_enter = strdup(transform_enter); +- CRM_ASSERT(known_schemas[last].transform_enter != NULL); ++ schema->transform_enter = strdup(transform_enter); ++ CRM_ASSERT(schema->transform_enter != NULL); + } + +- known_schemas[last].transform_onleave = transform_onleave; ++ schema->transform_onleave = transform_onleave; + if (after_transform == 0) { + after_transform = xml_schema_max; /* upgrade is a one-way */ + } +- known_schemas[last].after_transform = after_transform; ++ schema->after_transform = after_transform; + +- if (known_schemas[last].after_transform < 0) { +- crm_debug("Added supported schema %d: %s", +- last, known_schemas[last].name); ++ known_schemas = g_list_append(known_schemas, schema); + +- } else if (known_schemas[last].transform) { ++ if (schema->after_transform < 0) { ++ crm_debug("Added supported schema %d: %s", last, schema->name); ++ ++ } else if (schema->transform != NULL) { + crm_debug("Added supported schema %d: %s (upgrades to %d with %s.xsl)", +- last, known_schemas[last].name, +- known_schemas[last].after_transform, +- known_schemas[last].transform); ++ last, schema->name, schema->after_transform, schema->transform); + + } else { + crm_debug("Added supported schema %d: %s (upgrades to %d)", +- last, known_schemas[last].name, +- known_schemas[last].after_transform); ++ last, schema->name, schema->after_transform); + } + } + +@@ -518,8 +534,9 @@ validate_with_relaxng(xmlDocPtr doc, xmlRelaxNGValidityErrorFunc error_handler, + } + + static void +-free_schema(struct schema_s *schema) ++free_schema(gpointer data) + { ++ struct schema_s *schema = data; + relaxng_ctx_cache_t *ctx = NULL; + + switch (schema->validator) { +@@ -561,11 +578,7 @@ free_schema(struct schema_s *schema) + void + crm_schema_cleanup(void) + { +- for (int lpc = 0; lpc < xml_schema_max; lpc++) { +- free_schema(&known_schemas[lpc]); +- } +- +- free(known_schemas); ++ g_list_free_full(known_schemas, free_schema); + known_schemas = NULL; + + wrap_libxslt(true); +@@ -697,16 +710,17 @@ pcmk__validate_xml(xmlNode *xml_blob, const char *validation, xmlRelaxNGValidity + } + + if (validation == NULL) { +- int lpc = 0; + bool valid = FALSE; + +- for (lpc = 0; lpc < xml_schema_max; lpc++) { +- if (validate_with(xml_blob, &known_schemas[lpc], NULL, NULL)) { ++ for (GList *iter = known_schemas; iter != NULL; iter = iter->next) { ++ struct schema_s *schema = iter->data; ++ ++ if (validate_with(xml_blob, schema, NULL, NULL)) { + valid = TRUE; +- crm_xml_add(xml_blob, XML_ATTR_VALIDATION, +- known_schemas[lpc].name); +- crm_info("XML validated against %s", known_schemas[lpc].name); +- if(known_schemas[lpc].after_transform == 0) { ++ crm_xml_add(xml_blob, XML_ATTR_VALIDATION, schema->name); ++ crm_info("XML validated against %s", schema->name); ++ ++ if (schema->after_transform == 0) { + break; + } + } +@@ -719,8 +733,9 @@ pcmk__validate_xml(xmlNode *xml_blob, const char *validation, xmlRelaxNGValidity + if (strcmp(validation, PCMK__VALUE_NONE) == 0) { + return TRUE; + } else if (version < xml_schema_max) { +- return validate_with(xml_blob, version >= 0 ? &known_schemas[version] : NULL, +- error_handler, error_handler_context); ++ struct schema_s *schema = g_list_nth_data(known_schemas, version); ++ return validate_with(xml_blob, schema, error_handler, ++ error_handler_context); + } + + crm_err("Unknown validator: %s", validation); +@@ -964,10 +979,13 @@ apply_upgrade(xmlNode *xml, const struct schema_s *schema, gboolean to_logs) + const char * + get_schema_name(int version) + { +- if (version < 0 || version >= xml_schema_max) { ++ struct schema_s *schema = g_list_nth_data(known_schemas, version); ++ ++ if (schema == NULL) { + return "unknown"; + } +- return known_schemas[version].name; ++ ++ return schema->name; + } + + int +@@ -978,11 +996,17 @@ get_schema_version(const char *name) + if (name == NULL) { + name = PCMK__VALUE_NONE; + } +- for (; lpc < xml_schema_max; lpc++) { +- if (pcmk__str_eq(name, known_schemas[lpc].name, pcmk__str_casei)) { ++ ++ for (GList *iter = known_schemas; iter != NULL; iter = iter->next) { ++ struct schema_s *schema = iter->data; ++ ++ if (pcmk__str_eq(name, schema->name, pcmk__str_casei)) { + return lpc; + } ++ ++ lpc++; + } ++ + return -1; + } + +@@ -1030,7 +1054,12 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + } + + while (lpc <= max_stable_schemas) { +- struct schema_s *schema = &known_schemas[lpc]; ++ /* FIXME: This will cause us to walk the known_schemas list every time ++ * this loop iterates, which is not ideal. However, for now it's a lot ++ * easier than trying to get all the loop indices we're using here ++ * sorted out and working correctly. ++ */ ++ struct schema_s *schema = g_list_nth_data(known_schemas, lpc); + + crm_debug("Testing '%s' validation (%d of %d)", + pcmk__s(schema->name, ""), lpc, max_stable_schemas); +@@ -1078,7 +1107,7 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + break; + } + +- next_schema = &known_schemas[next]; ++ next_schema = g_list_nth_data(known_schemas, next); + CRM_ASSERT(next_schema != NULL); + + if (schema->transform == NULL +@@ -1129,10 +1158,12 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + } + + if (*best > match && *best) { ++ struct schema_s *best_schema = g_list_nth_data(known_schemas, *best); ++ + crm_info("%s the configuration from %s to %s", + transform?"Transformed":"Upgraded", pcmk__s(value, ""), +- known_schemas[*best].name); +- crm_xml_add(xml, XML_ATTR_VALIDATION, known_schemas[*best].name); ++ best_schema->name); ++ crm_xml_add(xml, XML_ATTR_VALIDATION, best_schema->name); + } + + *xml_blob = xml; +@@ -1252,20 +1283,24 @@ cli_config_update(xmlNode **xml, int *best_version, gboolean to_logs) + void + pcmk__log_known_schemas(void) + { +- for (int lpc = 0; lpc < xml_schema_max; lpc++) { +- if (known_schemas[lpc].after_transform < 0) { +- crm_debug("known_schemas[%d] => %s", lpc, known_schemas[lpc].name); ++ int lpc = 0; + +- } else if (known_schemas[lpc].transform != NULL) { ++ for (GList *iter = known_schemas; iter != NULL; iter = iter->next) { ++ struct schema_s *schema = iter->data; ++ ++ if (schema->after_transform < 0) { ++ crm_debug("known_schemas[%d] => %s", lpc, schema->name); ++ ++ } else if (schema->transform != NULL) { + crm_debug("known_schemas[%d] => %s (upgrades to %d with %s.xsl)", +- lpc, known_schemas[lpc].name, +- known_schemas[lpc].after_transform, +- known_schemas[lpc].transform); ++ lpc, schema->name, schema->after_transform, ++ schema->transform); + + } else { + crm_debug("known_schemas[%d] => %s (upgrades to %d)", +- lpc, known_schemas[lpc].name, +- known_schemas[lpc].after_transform); ++ lpc, schema->name, schema->after_transform); + } ++ ++ lpc++; + } + } +-- +2.31.1 + +From 33cfcc0d98603e04dde15d69acd46823679405f0 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 25 Oct 2023 17:34:25 -0400 +Subject: [PATCH 10/20] Refactor: libcrmcommon: Get rid of xml_schema_max. + +This is just the length of the known_schemas list. +--- + lib/common/schemas.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 6e6f32e..d4ce68e 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -61,7 +61,6 @@ struct schema_s { + }; + + static GList *known_schemas = NULL; +-static int xml_schema_max = 0; + static bool silent_logging = FALSE; + + static void G_GNUC_PRINTF(2, 3) +@@ -196,11 +195,9 @@ add_schema(enum schema_validator_e validator, const schema_version_t *version, + int after_transform) + { + struct schema_s *schema = NULL; +- int last = xml_schema_max; ++ int last = g_list_length(known_schemas); + bool have_version = false; + +- xml_schema_max++; +- + schema = calloc(1, sizeof(struct schema_s)); + CRM_ASSERT(schema != NULL); + +@@ -234,7 +231,7 @@ add_schema(enum schema_validator_e validator, const schema_version_t *version, + + schema->transform_onleave = transform_onleave; + if (after_transform == 0) { +- after_transform = xml_schema_max; /* upgrade is a one-way */ ++ after_transform = last + 1; /* upgrade is a one-way */ + } + schema->after_transform = after_transform; + +@@ -732,7 +729,7 @@ pcmk__validate_xml(xmlNode *xml_blob, const char *validation, xmlRelaxNGValidity + version = get_schema_version(validation); + if (strcmp(validation, PCMK__VALUE_NONE) == 0) { + return TRUE; +- } else if (version < xml_schema_max) { ++ } else if (version < g_list_length(known_schemas)) { + struct schema_s *schema = g_list_nth_data(known_schemas, version); + return validate_with(xml_blob, schema, error_handler, + error_handler_context); +-- +2.31.1 + +From 6ace4c912d34f495ab5f52500628c82fb1533256 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 26 Oct 2023 12:52:14 -0400 +Subject: [PATCH 11/20] Refactor: libcrmcommon: Rename + xml_minimum_schema_index. + +This function's name is unclear. It actually returns the most recent +X.0 schema index. The new name is pretty bad, but I think it's at least +clear. + +And then while I'm at it, rewrite it to make it more clear. Just +iterate the known_schemas list, looking for the right .0 one. This code +does not get run very often, and it caches its result, so there's no +need to do the reverse traversal with a lagging index. +--- + lib/common/schemas.c | 41 +++++++++++++++++++++++++---------------- + 1 file changed, 25 insertions(+), 16 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index d4ce68e..55519e8 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -85,39 +85,48 @@ xml_latest_schema_index(void) + return g_list_length(known_schemas) - 3; // index from 0, ignore "pacemaker-next"/"none" + } + ++/* Return the index of the most recent X.0 schema. */ + static int +-xml_minimum_schema_index(void) ++xml_find_x_0_schema_index(void) + { + static int best = 0; ++ int i = 0; + struct schema_s *best_schema = NULL; +- GList *last_real_ele = NULL; + + if (best != 0) { + return best; + } + ++ /* Get the most recent schema so we can look at its version number. */ + best = xml_latest_schema_index(); ++ best_schema = g_list_nth(known_schemas, best)->data; + +- /* We can't just use g_list_last here because "pacemaker-next" and "none" +- * are stored at the end of the list. We need to start several elements +- * back, at the last real schema. ++ /* Iterate over the schema list until we find a schema with the same major ++ * version as best, and with a minor version number of 0. ++ * ++ * This assumes that the first schema in a major series is always X.0, ++ * which seems like a safe assumption. + */ +- last_real_ele = g_list_nth(known_schemas, best); +- best_schema = last_real_ele->data; +- +- for (GList *iter = last_real_ele; iter != NULL; iter = iter->prev) { ++ for (GList *iter = known_schemas; iter != NULL; iter = iter->next) { + struct schema_s *schema = iter->data; + +- if (schema->version.v[0] < best_schema->version.v[0]) { ++ /* If we hit the initial best schema, the only things left in the list ++ * are "pacemaker-next" and "none" which aren't worth checking. ++ */ ++ if (schema == best_schema) { ++ break; ++ } ++ ++ if (schema->version.v[0] == best_schema->version.v[0] && ++ schema->version.v[1] == 0) { ++ best = i; + return best; +- } else { +- best--; + } ++ ++ i++; + } + +- /* If we never found a schema that meets the above criteria, default to +- * the last one. +- */ ++ /* If we got here, we never found a match. Just return the latest. */ + best = xml_latest_schema_index(); + return best; + } +@@ -1177,7 +1186,7 @@ cli_config_update(xmlNode **xml, int *best_version, gboolean to_logs) + + int version = get_schema_version(value); + int orig_version = version; +- int min_version = xml_minimum_schema_index(); ++ int min_version = xml_find_x_0_schema_index(); + + if (version < min_version) { + // Current configuration schema is not acceptable, try to update +-- +2.31.1 + +From 7df1d2df9e8710183735b19947a885bb129e523e Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 26 Oct 2023 14:14:02 -0400 +Subject: [PATCH 12/20] Refactor: libcrmcommon: Remove an unnecessary check in + validate_xml. + +I believe that add_schema ensures after_transform is never 0 - it's +either negative, or some positive non-zero value. So this check should +be pointless. +--- + lib/common/schemas.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 55519e8..cfb83dd 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -725,10 +725,6 @@ pcmk__validate_xml(xmlNode *xml_blob, const char *validation, xmlRelaxNGValidity + valid = TRUE; + crm_xml_add(xml_blob, XML_ATTR_VALIDATION, schema->name); + crm_info("XML validated against %s", schema->name); +- +- if (schema->after_transform == 0) { +- break; +- } + } + } + +-- +2.31.1 + +From dbf94f5a3c146992fb60c231a7eda21271b62b99 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 27 Oct 2023 10:49:49 -0400 +Subject: [PATCH 13/20] Refactor: libcrmcommon: Change how schema upgrade + versions are handled + +...in update_validation. Schemas always either upgrade to the next one +in the list, or do not upgrade. The latter only happens when we get to +the last real version and the next one is pacemaker-next/none. + +With that change made, we also need to change the conditional. There's +no need to check that the upgrade will regress. We only need to check +that we've run off the end of the list of real schema versions. + +A future commit will remove the after_transform variable entirely, but +since this is its most visible and complicated use, splitting this into +a separate commit seems worth it. +--- + lib/common/schemas.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index cfb83dd..3ebdf1c 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -1095,10 +1095,10 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + if (rc == pcmk_ok && transform) { + xmlNode *upgrade = NULL; + struct schema_s *next_schema = NULL; +- next = schema->after_transform; ++ next = lpc+1; + +- if (next <= lpc) { +- /* There is no next version, or next would regress */ ++ if (next > max_stable_schemas) { ++ /* There is no next version */ + crm_trace("Stopping at %s", schema->name); + break; + } +-- +2.31.1 + +From b2da7aaba5a1afe9d4f56989c0b81dd55abaf1b8 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 27 Oct 2023 11:00:12 -0400 +Subject: [PATCH 14/20] Refactor: libcrmcommon: Get rid of after_transform. + +As stated in the previous commit, schemas always just upgrade to the +next one in the list. There's no need to keep track of that fact, so +get rid of the variable that held it. This then allows us to get rid of +all the places that value was being set and passed around. +--- + lib/common/schemas.c | 54 +++++++++++++------------------------------- + 1 file changed, 16 insertions(+), 38 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 3ebdf1c..e33d3c7 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -54,7 +54,6 @@ struct schema_s { + char *transform; + void *cache; + enum schema_validator_e validator; +- int after_transform; + schema_version_t version; + char *transform_enter; + bool transform_onleave; +@@ -200,8 +199,7 @@ schema_sort(const struct dirent **a, const struct dirent **b) + static void + add_schema(enum schema_validator_e validator, const schema_version_t *version, + const char *name, const char *transform, +- const char *transform_enter, bool transform_onleave, +- int after_transform) ++ const char *transform_enter, bool transform_onleave) + { + struct schema_s *schema = NULL; + int last = g_list_length(known_schemas); +@@ -211,9 +209,9 @@ add_schema(enum schema_validator_e validator, const schema_version_t *version, + CRM_ASSERT(schema != NULL); + + schema->validator = validator; +- schema->after_transform = after_transform; + schema->version.v[0] = version->v[0]; + schema->version.v[1] = version->v[1]; ++ schema->transform_onleave = transform_onleave; + + if (version->v[0] || version->v[1]) { + have_version = true; +@@ -238,24 +236,14 @@ add_schema(enum schema_validator_e validator, const schema_version_t *version, + CRM_ASSERT(schema->transform_enter != NULL); + } + +- schema->transform_onleave = transform_onleave; +- if (after_transform == 0) { +- after_transform = last + 1; /* upgrade is a one-way */ +- } +- schema->after_transform = after_transform; +- + known_schemas = g_list_append(known_schemas, schema); + +- if (schema->after_transform < 0) { +- crm_debug("Added supported schema %d: %s", last, schema->name); +- +- } else if (schema->transform != NULL) { +- crm_debug("Added supported schema %d: %s (upgrades to %d with %s.xsl)", +- last, schema->name, schema->after_transform, schema->transform); ++ if (schema->transform != NULL) { ++ crm_debug("Added supported schema %d: %s (upgrades with %s.xsl)", ++ last, schema->name, schema->transform); + + } else { +- crm_debug("Added supported schema %d: %s (upgrades to %d)", +- last, schema->name, schema->after_transform); ++ crm_debug("Added supported schema %d: %s", last, schema->name); + } + } + +@@ -288,8 +276,7 @@ add_schema(enum schema_validator_e validator, const schema_version_t *version, + * . name convention: (see "upgrade-enter") + */ + static int +-add_schema_by_version(const schema_version_t *version, int next, +- bool transform_expected) ++add_schema_by_version(const schema_version_t *version, bool transform_expected) + { + bool transform_onleave = FALSE; + int rc = pcmk_rc_ok; +@@ -345,12 +332,11 @@ add_schema_by_version(const schema_version_t *version, int next, + free(xslt); + free(transform_upgrade); + transform_upgrade = NULL; +- next = -1; + rc = ENOENT; + } + + add_schema(schema_validator_rng, version, NULL, +- transform_upgrade, transform_enter, transform_onleave, next); ++ transform_upgrade, transform_enter, transform_onleave); + + free(transform_upgrade); + free(transform_enter); +@@ -416,7 +402,6 @@ crm_schema_init(void) + free(base); + for (lpc = 0; lpc < max; lpc++) { + bool transform_expected = FALSE; +- int next = 0; + schema_version_t version = SCHEMA_ZERO; + + if (!version_from_filename(namelist[lpc]->d_name, &version)) { +@@ -432,11 +417,9 @@ crm_schema_init(void) + && (version.v[0] < next_version.v[0])) { + transform_expected = TRUE; + } +- +- } else { +- next = -1; + } +- if (add_schema_by_version(&version, next, transform_expected) ++ ++ if (add_schema_by_version(&version, transform_expected) + == ENOENT) { + break; + } +@@ -450,10 +433,10 @@ crm_schema_init(void) + + // @COMPAT: Deprecated since 2.1.5 + add_schema(schema_validator_rng, &zero, "pacemaker-next", +- NULL, NULL, FALSE, -1); ++ NULL, NULL, FALSE); + + add_schema(schema_validator_none, &zero, PCMK__VALUE_NONE, +- NULL, NULL, FALSE, -1); ++ NULL, NULL, FALSE); + } + + static gboolean +@@ -1290,17 +1273,12 @@ pcmk__log_known_schemas(void) + for (GList *iter = known_schemas; iter != NULL; iter = iter->next) { + struct schema_s *schema = iter->data; + +- if (schema->after_transform < 0) { +- crm_debug("known_schemas[%d] => %s", lpc, schema->name); +- +- } else if (schema->transform != NULL) { +- crm_debug("known_schemas[%d] => %s (upgrades to %d with %s.xsl)", +- lpc, schema->name, schema->after_transform, +- schema->transform); ++ if (schema->transform != NULL) { ++ crm_debug("known_schemas[%d] => %s (upgrades with %s.xsl)", ++ lpc, schema->name, schema->transform); + + } else { +- crm_debug("known_schemas[%d] => %s (upgrades to %d)", +- lpc, schema->name, schema->after_transform); ++ crm_debug("known_schemas[%d] => %s", lpc, schema->name); + } + + lpc++; +-- +2.31.1 + +From 645bb233e52b9f5f559ffcd354b2f4ef0bcdee90 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 6 Nov 2023 08:22:04 -0500 +Subject: [PATCH 15/20] Refactor: libcrmcommon: Remove unnecessary schema code. + +The block that sets the version if we didn't previously do so is no +longer necessary. This block only executes if the version parameter is +all zeros, which at the moment is only "pacemaker-next" and "none". We +could probably guarantee this will continue to be the case. + +Additionally, I don't see that this would even do anything useful +anymore. Scanning the name for a version number is going to fail for +"pacemaker-next" and "none". So really, this block was just handling +the possibility that we passed in no version number but that the name +contained a number. + +And with that done, there's only one more spot using schema_scanf so we +can just replace that with a call to sscanf. +--- + lib/common/schemas.c | 14 +------------- + 1 file changed, 1 insertion(+), 13 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index e33d3c7..7b91f71 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -32,9 +32,6 @@ typedef struct { + + #define SCHEMA_ZERO { .v = { 0, 0 } } + +-#define schema_scanf(s, prefix, version, suffix) \ +- sscanf((s), prefix "%hhu.%hhu" suffix, &((version).v[0]), &((version).v[1])) +- + #define schema_strdup_printf(prefix, version, suffix) \ + crm_strdup_printf(prefix "%u.%u" suffix, (version).v[0], (version).v[1]) + +@@ -139,9 +136,7 @@ xml_latest_schema(void) + static inline bool + version_from_filename(const char *filename, schema_version_t *version) + { +- int rc = schema_scanf(filename, "pacemaker-", *version, ".rng"); +- +- return (rc == 2); ++ return sscanf(filename, "pacemaker-%hhu.%hhu.rng", &(version->v[0]), &(version->v[1])) == 2; + } + + static int +@@ -203,7 +198,6 @@ add_schema(enum schema_validator_e validator, const schema_version_t *version, + { + struct schema_s *schema = NULL; + int last = g_list_length(known_schemas); +- bool have_version = false; + + schema = calloc(1, sizeof(struct schema_s)); + CRM_ASSERT(schema != NULL); +@@ -214,14 +208,8 @@ add_schema(enum schema_validator_e validator, const schema_version_t *version, + schema->transform_onleave = transform_onleave; + + if (version->v[0] || version->v[1]) { +- have_version = true; +- } +- +- if (have_version) { + schema->name = schema_strdup_printf("pacemaker-", *version, ""); + } else { +- CRM_ASSERT(name != NULL); +- schema_scanf(name, "%*[^-]-", schema->version, ""); + schema->name = strdup(name); + CRM_ASSERT(schema->name != NULL); + } +-- +2.31.1 + +From 0943f1ff0a9e72e88c5a234a32bb83d0f2e02c84 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 17 Nov 2023 09:42:55 -0500 +Subject: [PATCH 16/20] Refactor: libcrmcommon: Improve + xml_find_x_0_schema_index. + +* Lots of comments to explain how it works. + +* Walk the list backwards, stopping on the first one in the major + version series. This means the first one no longer has to be X.0. + +* Require that known_schemas be non-NULL. + +* Don't use the returned index to also mean we've found something since + that means if the index we actually want to return is 0, the function + will have to run every time. +--- + lib/common/schemas.c | 62 +++++++++++++++++++++++++++++--------------- + 1 file changed, 41 insertions(+), 21 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 7b91f71..466ad5a 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -85,45 +85,65 @@ xml_latest_schema_index(void) + static int + xml_find_x_0_schema_index(void) + { ++ /* We can't just use best to determine whether we've found the index ++ * or not. What if we have a very long list of schemas all in the ++ * same major version series? We'd return 0 for that, which means ++ * we would still run this function every time. ++ */ ++ static bool found = false; + static int best = 0; +- int i = 0; ++ int i; ++ GList *best_node = NULL; + struct schema_s *best_schema = NULL; + +- if (best != 0) { ++ if (found) { + return best; + } + ++ CRM_ASSERT(known_schemas != NULL); ++ + /* Get the most recent schema so we can look at its version number. */ + best = xml_latest_schema_index(); +- best_schema = g_list_nth(known_schemas, best)->data; ++ best_node = g_list_nth(known_schemas, best); ++ best_schema = best_node->data; ++ ++ /* If this is a singleton list, we're done. */ ++ if (pcmk__list_of_1(known_schemas)) { ++ goto done; ++ } + +- /* Iterate over the schema list until we find a schema with the same major +- * version as best, and with a minor version number of 0. +- * +- * This assumes that the first schema in a major series is always X.0, +- * which seems like a safe assumption. ++ /* Start comparing the list from the node before the best schema (there's ++ * no point in comparing something to itself). Then, 'i' is an index ++ * starting at the best schema and will always point at the node after ++ * 'iter'. This makes it the value we want to return when we find what ++ * we're looking for. + */ +- for (GList *iter = known_schemas; iter != NULL; iter = iter->next) { ++ i = best; ++ ++ for (GList *iter = best_node->prev; iter != NULL; iter = iter->prev) { + struct schema_s *schema = iter->data; + +- /* If we hit the initial best schema, the only things left in the list +- * are "pacemaker-next" and "none" which aren't worth checking. ++ /* We've found a schema in an older major version series. Return ++ * the index of the first one in the same major version series as ++ * the best schema. + */ +- if (schema == best_schema) { +- break; +- } +- +- if (schema->version.v[0] == best_schema->version.v[0] && +- schema->version.v[1] == 0) { ++ if (schema->version.v[0] < best_schema->version.v[0]) { + best = i; +- return best; ++ goto done; ++ ++ /* We're out of list to examine. This probably means there was only ++ * one major version series, so return index 0. ++ */ ++ } else if (iter->prev == NULL) { ++ best = 0; ++ goto done; + } + +- i++; ++ i--; + } + +- /* If we got here, we never found a match. Just return the latest. */ +- best = xml_latest_schema_index(); ++done: ++ found = true; + return best; + } + +-- +2.31.1 + +From eeeb36338f48d40f9f15a51c18aeca533b6c260d Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 17 Nov 2023 11:18:34 -0500 +Subject: [PATCH 17/20] Refactor: libcrmcommon: Add a parameter to a couple + schema functions. + +Instead of assuming known_schemas, pass the list to use as a parameter. +--- + lib/common/schemas.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 466ad5a..9d98695 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -73,17 +73,17 @@ xml_log(int priority, const char *fmt, ...) + } + + static int +-xml_latest_schema_index(void) ++xml_latest_schema_index(GList *schemas) + { + // @COMPAT: pacemaker-next is deprecated since 2.1.5 + // FIXME: This function assumes at least three schemas have been added + // before it has been called for the first time. +- return g_list_length(known_schemas) - 3; // index from 0, ignore "pacemaker-next"/"none" ++ return g_list_length(schemas) - 3; // index from 0, ignore "pacemaker-next"/"none" + } + + /* Return the index of the most recent X.0 schema. */ + static int +-xml_find_x_0_schema_index(void) ++xml_find_x_0_schema_index(GList *schemas) + { + /* We can't just use best to determine whether we've found the index + * or not. What if we have a very long list of schemas all in the +@@ -100,15 +100,15 @@ xml_find_x_0_schema_index(void) + return best; + } + +- CRM_ASSERT(known_schemas != NULL); ++ CRM_ASSERT(schemas != NULL); + + /* Get the most recent schema so we can look at its version number. */ +- best = xml_latest_schema_index(); +- best_node = g_list_nth(known_schemas, best); ++ best = xml_latest_schema_index(schemas); ++ best_node = g_list_nth(schemas, best); + best_schema = best_node->data; + + /* If this is a singleton list, we're done. */ +- if (pcmk__list_of_1(known_schemas)) { ++ if (pcmk__list_of_1(schemas)) { + goto done; + } + +@@ -150,7 +150,7 @@ done: + const char * + xml_latest_schema(void) + { +- return get_schema_name(xml_latest_schema_index()); ++ return get_schema_name(xml_latest_schema_index(known_schemas)); + } + + static inline bool +@@ -1010,7 +1010,7 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + { + xmlNode *xml = NULL; + char *value = NULL; +- int max_stable_schemas = xml_latest_schema_index(); ++ int max_stable_schemas = xml_latest_schema_index(known_schemas); + int lpc = 0, match = -1, rc = pcmk_ok; + int next = -1; /* -1 denotes "inactive" value */ + xmlRelaxNGValidityErrorFunc error_handler = +@@ -1173,7 +1173,7 @@ cli_config_update(xmlNode **xml, int *best_version, gboolean to_logs) + + int version = get_schema_version(value); + int orig_version = version; +- int min_version = xml_find_x_0_schema_index(); ++ int min_version = xml_find_x_0_schema_index(known_schemas); + + if (version < min_version) { + // Current configuration schema is not acceptable, try to update +@@ -1235,7 +1235,7 @@ cli_config_update(xmlNode **xml, int *best_version, gboolean to_logs) + free_xml(*xml); + *xml = converted; + +- if (version < xml_latest_schema_index()) { ++ if (version < xml_latest_schema_index(known_schemas)) { + if (to_logs) { + pcmk__config_warn("Configuration with schema %s was " + "internally upgraded to acceptable (but " +-- +2.31.1 + +From 12e7b982da61c6cc6cf01164d45bb8f7b0255a8a Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 17 Nov 2023 11:30:18 -0500 +Subject: [PATCH 18/20] Refactor: libcrmcommon: Rename several schema-related + types. + +Give them pcmk__ names indicating they are private. This is in +preparation for moving them out into a header file. +--- + lib/common/schemas.c | 80 ++++++++++++++++++++++---------------------- + 1 file changed, 40 insertions(+), 40 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 9d98695..cf8f325 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -28,7 +28,7 @@ + + typedef struct { + unsigned char v[2]; +-} schema_version_t; ++} pcmk__schema_version_t; + + #define SCHEMA_ZERO { .v = { 0, 0 } } + +@@ -41,20 +41,20 @@ typedef struct { + xmlRelaxNGParserCtxtPtr parser; + } relaxng_ctx_cache_t; + +-enum schema_validator_e { +- schema_validator_none, +- schema_validator_rng ++enum pcmk__schema_validator { ++ pcmk__schema_validator_none, ++ pcmk__schema_validator_rng + }; + +-struct schema_s { ++typedef struct { + char *name; + char *transform; + void *cache; +- enum schema_validator_e validator; +- schema_version_t version; ++ enum pcmk__schema_validator validator; ++ pcmk__schema_version_t version; + char *transform_enter; + bool transform_onleave; +-}; ++} pcmk__schema_t; + + static GList *known_schemas = NULL; + static bool silent_logging = FALSE; +@@ -94,7 +94,7 @@ xml_find_x_0_schema_index(GList *schemas) + static int best = 0; + int i; + GList *best_node = NULL; +- struct schema_s *best_schema = NULL; ++ pcmk__schema_t *best_schema = NULL; + + if (found) { + return best; +@@ -121,7 +121,7 @@ xml_find_x_0_schema_index(GList *schemas) + i = best; + + for (GList *iter = best_node->prev; iter != NULL; iter = iter->prev) { +- struct schema_s *schema = iter->data; ++ pcmk__schema_t *schema = iter->data; + + /* We've found a schema in an older major version series. Return + * the index of the first one in the same major version series as +@@ -154,7 +154,7 @@ xml_latest_schema(void) + } + + static inline bool +-version_from_filename(const char *filename, schema_version_t *version) ++version_from_filename(const char *filename, pcmk__schema_version_t *version) + { + return sscanf(filename, "pacemaker-%hhu.%hhu.rng", &(version->v[0]), &(version->v[1])) == 2; + } +@@ -163,7 +163,7 @@ static int + schema_filter(const struct dirent *a) + { + int rc = 0; +- schema_version_t version = SCHEMA_ZERO; ++ pcmk__schema_version_t version = SCHEMA_ZERO; + + if (strstr(a->d_name, "pacemaker-") != a->d_name) { + /* crm_trace("%s - wrong prefix", a->d_name); */ +@@ -185,8 +185,8 @@ schema_filter(const struct dirent *a) + static int + schema_sort(const struct dirent **a, const struct dirent **b) + { +- schema_version_t a_version = SCHEMA_ZERO; +- schema_version_t b_version = SCHEMA_ZERO; ++ pcmk__schema_version_t a_version = SCHEMA_ZERO; ++ pcmk__schema_version_t b_version = SCHEMA_ZERO; + + if (!version_from_filename(a[0]->d_name, &a_version) + || !version_from_filename(b[0]->d_name, &b_version)) { +@@ -212,14 +212,14 @@ schema_sort(const struct dirent **a, const struct dirent **b) + * through \c add_schema_by_version. + */ + static void +-add_schema(enum schema_validator_e validator, const schema_version_t *version, ++add_schema(enum pcmk__schema_validator validator, const pcmk__schema_version_t *version, + const char *name, const char *transform, + const char *transform_enter, bool transform_onleave) + { +- struct schema_s *schema = NULL; ++ pcmk__schema_t *schema = NULL; + int last = g_list_length(known_schemas); + +- schema = calloc(1, sizeof(struct schema_s)); ++ schema = calloc(1, sizeof(pcmk__schema_t)); + CRM_ASSERT(schema != NULL); + + schema->validator = validator; +@@ -284,7 +284,7 @@ add_schema(enum schema_validator_e validator, const schema_version_t *version, + * . name convention: (see "upgrade-enter") + */ + static int +-add_schema_by_version(const schema_version_t *version, bool transform_expected) ++add_schema_by_version(const pcmk__schema_version_t *version, bool transform_expected) + { + bool transform_onleave = FALSE; + int rc = pcmk_rc_ok; +@@ -343,7 +343,7 @@ add_schema_by_version(const schema_version_t *version, bool transform_expected) + rc = ENOENT; + } + +- add_schema(schema_validator_rng, version, NULL, ++ add_schema(pcmk__schema_validator_rng, version, NULL, + transform_upgrade, transform_enter, transform_onleave); + + free(transform_upgrade); +@@ -397,7 +397,7 @@ crm_schema_init(void) + int lpc, max; + char *base = pcmk__xml_artefact_root(pcmk__xml_artefact_ns_legacy_rng); + struct dirent **namelist = NULL; +- const schema_version_t zero = SCHEMA_ZERO; ++ const pcmk__schema_version_t zero = SCHEMA_ZERO; + + wrap_libxslt(false); + +@@ -410,7 +410,7 @@ crm_schema_init(void) + free(base); + for (lpc = 0; lpc < max; lpc++) { + bool transform_expected = FALSE; +- schema_version_t version = SCHEMA_ZERO; ++ pcmk__schema_version_t version = SCHEMA_ZERO; + + if (!version_from_filename(namelist[lpc]->d_name, &version)) { + // Shouldn't be possible, but makes static analysis happy +@@ -419,7 +419,7 @@ crm_schema_init(void) + continue; + } + if ((lpc + 1) < max) { +- schema_version_t next_version = SCHEMA_ZERO; ++ pcmk__schema_version_t next_version = SCHEMA_ZERO; + + if (version_from_filename(namelist[lpc+1]->d_name, &next_version) + && (version.v[0] < next_version.v[0])) { +@@ -440,10 +440,10 @@ crm_schema_init(void) + } + + // @COMPAT: Deprecated since 2.1.5 +- add_schema(schema_validator_rng, &zero, "pacemaker-next", ++ add_schema(pcmk__schema_validator_rng, &zero, "pacemaker-next", + NULL, NULL, FALSE); + +- add_schema(schema_validator_none, &zero, PCMK__VALUE_NONE, ++ add_schema(pcmk__schema_validator_none, &zero, PCMK__VALUE_NONE, + NULL, NULL, FALSE); + } + +@@ -533,14 +533,14 @@ validate_with_relaxng(xmlDocPtr doc, xmlRelaxNGValidityErrorFunc error_handler, + static void + free_schema(gpointer data) + { +- struct schema_s *schema = data; ++ pcmk__schema_t *schema = data; + relaxng_ctx_cache_t *ctx = NULL; + + switch (schema->validator) { +- case schema_validator_none: // not cached ++ case pcmk__schema_validator_none: // not cached + break; + +- case schema_validator_rng: // cached ++ case pcmk__schema_validator_rng: // cached + ctx = (relaxng_ctx_cache_t *) schema->cache; + if (ctx == NULL) { + break; +@@ -582,7 +582,7 @@ crm_schema_cleanup(void) + } + + static gboolean +-validate_with(xmlNode *xml, struct schema_s *schema, xmlRelaxNGValidityErrorFunc error_handler, void* error_handler_context) ++validate_with(xmlNode *xml, pcmk__schema_t *schema, xmlRelaxNGValidityErrorFunc error_handler, void* error_handler_context) + { + gboolean valid = FALSE; + char *file = NULL; +@@ -592,7 +592,7 @@ validate_with(xmlNode *xml, struct schema_s *schema, xmlRelaxNGValidityErrorFunc + return FALSE; + } + +- if (schema->validator == schema_validator_none) { ++ if (schema->validator == pcmk__schema_validator_none) { + return TRUE; + } + +@@ -607,7 +607,7 @@ validate_with(xmlNode *xml, struct schema_s *schema, xmlRelaxNGValidityErrorFunc + crm_trace("Validating with %s (type=%d)", + pcmk__s(file, "missing schema"), schema->validator); + switch (schema->validator) { +- case schema_validator_rng: ++ case pcmk__schema_validator_rng: + cache = (relaxng_ctx_cache_t **) &(schema->cache); + valid = validate_with_relaxng(xml->doc, error_handler, error_handler_context, file, cache); + break; +@@ -621,7 +621,7 @@ validate_with(xmlNode *xml, struct schema_s *schema, xmlRelaxNGValidityErrorFunc + } + + static bool +-validate_with_silent(xmlNode *xml, struct schema_s *schema) ++validate_with_silent(xmlNode *xml, pcmk__schema_t *schema) + { + bool rc, sl_backup = silent_logging; + silent_logging = TRUE; +@@ -710,7 +710,7 @@ pcmk__validate_xml(xmlNode *xml_blob, const char *validation, xmlRelaxNGValidity + bool valid = FALSE; + + for (GList *iter = known_schemas; iter != NULL; iter = iter->next) { +- struct schema_s *schema = iter->data; ++ pcmk__schema_t *schema = iter->data; + + if (validate_with(xml_blob, schema, NULL, NULL)) { + valid = TRUE; +@@ -726,7 +726,7 @@ pcmk__validate_xml(xmlNode *xml_blob, const char *validation, xmlRelaxNGValidity + if (strcmp(validation, PCMK__VALUE_NONE) == 0) { + return TRUE; + } else if (version < g_list_length(known_schemas)) { +- struct schema_s *schema = g_list_nth_data(known_schemas, version); ++ pcmk__schema_t *schema = g_list_nth_data(known_schemas, version); + return validate_with(xml_blob, schema, error_handler, + error_handler_context); + } +@@ -918,7 +918,7 @@ apply_transformation(xmlNode *xml, const char *transform, gboolean to_logs) + * \note Only emits warnings about enter/leave phases in case of issues. + */ + static xmlNode * +-apply_upgrade(xmlNode *xml, const struct schema_s *schema, gboolean to_logs) ++apply_upgrade(xmlNode *xml, const pcmk__schema_t *schema, gboolean to_logs) + { + bool transform_onleave = schema->transform_onleave; + char *transform_leave; +@@ -972,7 +972,7 @@ apply_upgrade(xmlNode *xml, const struct schema_s *schema, gboolean to_logs) + const char * + get_schema_name(int version) + { +- struct schema_s *schema = g_list_nth_data(known_schemas, version); ++ pcmk__schema_t *schema = g_list_nth_data(known_schemas, version); + + if (schema == NULL) { + return "unknown"; +@@ -991,7 +991,7 @@ get_schema_version(const char *name) + } + + for (GList *iter = known_schemas; iter != NULL; iter = iter->next) { +- struct schema_s *schema = iter->data; ++ pcmk__schema_t *schema = iter->data; + + if (pcmk__str_eq(name, schema->name, pcmk__str_casei)) { + return lpc; +@@ -1052,7 +1052,7 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + * easier than trying to get all the loop indices we're using here + * sorted out and working correctly. + */ +- struct schema_s *schema = g_list_nth_data(known_schemas, lpc); ++ pcmk__schema_t *schema = g_list_nth_data(known_schemas, lpc); + + crm_debug("Testing '%s' validation (%d of %d)", + pcmk__s(schema->name, ""), lpc, max_stable_schemas); +@@ -1085,7 +1085,7 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + + if (rc == pcmk_ok && transform) { + xmlNode *upgrade = NULL; +- struct schema_s *next_schema = NULL; ++ pcmk__schema_t *next_schema = NULL; + next = lpc+1; + + if (next > max_stable_schemas) { +@@ -1151,7 +1151,7 @@ update_validation(xmlNode **xml_blob, int *best, int max, gboolean transform, + } + + if (*best > match && *best) { +- struct schema_s *best_schema = g_list_nth_data(known_schemas, *best); ++ pcmk__schema_t *best_schema = g_list_nth_data(known_schemas, *best); + + crm_info("%s the configuration from %s to %s", + transform?"Transformed":"Upgraded", pcmk__s(value, ""), +@@ -1279,7 +1279,7 @@ pcmk__log_known_schemas(void) + int lpc = 0; + + for (GList *iter = known_schemas; iter != NULL; iter = iter->next) { +- struct schema_s *schema = iter->data; ++ pcmk__schema_t *schema = iter->data; + + if (schema->transform != NULL) { + crm_debug("known_schemas[%d] => %s (upgrades with %s.xsl)", +-- +2.31.1 + +From 97b3fa3462039d4d7bdad6c6ff328a5124977e5f Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 17 Nov 2023 11:32:55 -0500 +Subject: [PATCH 19/20] Refactor: libcrmcommon: Make various schema stuff + non-static. + +This is the minimum amount necessary to make the function unit testable. +None of this is intended to ever become public. +--- + lib/common/crmcommon_private.h | 26 ++++++++++++++++++++++++++ + lib/common/schemas.c | 25 ++++--------------------- + 2 files changed, 30 insertions(+), 21 deletions(-) + +diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h +index 121d663..6ab9de1 100644 +--- a/lib/common/crmcommon_private.h ++++ b/lib/common/crmcommon_private.h +@@ -283,4 +283,30 @@ void pcmk__register_patchset_messages(pcmk__output_t *out); + #define PCMK__PW_BUFFER_LEN 500 + + ++/* ++ * Schemas ++ */ ++typedef struct { ++ unsigned char v[2]; ++} pcmk__schema_version_t; ++ ++enum pcmk__schema_validator { ++ pcmk__schema_validator_none, ++ pcmk__schema_validator_rng ++}; ++ ++typedef struct { ++ char *name; ++ char *transform; ++ void *cache; ++ enum pcmk__schema_validator validator; ++ pcmk__schema_version_t version; ++ char *transform_enter; ++ bool transform_onleave; ++} pcmk__schema_t; ++ ++G_GNUC_INTERNAL ++int pcmk__find_x_0_schema_index(GList *schemas); ++ ++ + #endif // CRMCOMMON_PRIVATE__H +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index cf8f325..83334b4 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -26,9 +26,7 @@ + #include + #include /* PCMK__XML_LOG_BASE */ + +-typedef struct { +- unsigned char v[2]; +-} pcmk__schema_version_t; ++#include "crmcommon_private.h" + + #define SCHEMA_ZERO { .v = { 0, 0 } } + +@@ -41,21 +39,6 @@ typedef struct { + xmlRelaxNGParserCtxtPtr parser; + } relaxng_ctx_cache_t; + +-enum pcmk__schema_validator { +- pcmk__schema_validator_none, +- pcmk__schema_validator_rng +-}; +- +-typedef struct { +- char *name; +- char *transform; +- void *cache; +- enum pcmk__schema_validator validator; +- pcmk__schema_version_t version; +- char *transform_enter; +- bool transform_onleave; +-} pcmk__schema_t; +- + static GList *known_schemas = NULL; + static bool silent_logging = FALSE; + +@@ -82,8 +65,8 @@ xml_latest_schema_index(GList *schemas) + } + + /* Return the index of the most recent X.0 schema. */ +-static int +-xml_find_x_0_schema_index(GList *schemas) ++int ++pcmk__find_x_0_schema_index(GList *schemas) + { + /* We can't just use best to determine whether we've found the index + * or not. What if we have a very long list of schemas all in the +@@ -1173,7 +1156,7 @@ cli_config_update(xmlNode **xml, int *best_version, gboolean to_logs) + + int version = get_schema_version(value); + int orig_version = version; +- int min_version = xml_find_x_0_schema_index(known_schemas); ++ int min_version = pcmk__find_x_0_schema_index(known_schemas); + + if (version < min_version) { + // Current configuration schema is not acceptable, try to update +-- +2.31.1 + +From c4c093c0785e06ca3371556b19ede1d5b44d090a Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 17 Nov 2023 12:39:53 -0500 +Subject: [PATCH 20/20] Test: libcrmcommon: Add unit tests for + pcmk__xml_find_x_0_schema_index. + +This requires making various things in the function conditional, which I +kind of hate. But, it all comes down to the fact that when we are +running for real, we're adding the pacemaker-next/none schemas with +crm_schema_init. + +When we are unit testing, we aren't doing any of that. The only +"schemas" we have are the ones we are adding directly. So, the list has +two items fewer than the real function expects. I think this is okay +and doesn't totally invalidating the testing. +--- + configure.ac | 1 + + lib/common/schemas.c | 33 +++++- + lib/common/tests/Makefile.am | 1 + + lib/common/tests/schemas/Makefile.am | 16 +++ + .../pcmk__xml_find_x_0_schema_index_test.c | 112 ++++++++++++++++++ + 5 files changed, 161 insertions(+), 2 deletions(-) + create mode 100644 lib/common/tests/schemas/Makefile.am + create mode 100644 lib/common/tests/schemas/pcmk__xml_find_x_0_schema_index_test.c + +diff --git a/configure.ac b/configure.ac +index 6bff02e..9eb7539 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -2153,6 +2153,7 @@ AC_CONFIG_FILES(Makefile \ + lib/common/tests/output/Makefile \ + lib/common/tests/procfs/Makefile \ + lib/common/tests/results/Makefile \ ++ lib/common/tests/schemas/Makefile \ + lib/common/tests/scores/Makefile \ + lib/common/tests/strings/Makefile \ + lib/common/tests/utils/Makefile \ +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 83334b4..372e872 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -60,8 +60,13 @@ xml_latest_schema_index(GList *schemas) + { + // @COMPAT: pacemaker-next is deprecated since 2.1.5 + // FIXME: This function assumes at least three schemas have been added +- // before it has been called for the first time. ++ // before it has been called for the first time, which is only the case ++ // if we are not unit testing. ++#if defined(PCMK__UNIT_TESTING) ++ return g_list_length(schemas) - 1; // index from 0 ++#else + return g_list_length(schemas) - 3; // index from 0, ignore "pacemaker-next"/"none" ++#endif + } + + /* Return the index of the most recent X.0 schema. */ +@@ -73,8 +78,17 @@ pcmk__find_x_0_schema_index(GList *schemas) + * same major version series? We'd return 0 for that, which means + * we would still run this function every time. + */ ++#if defined(PCMK__UNIT_TESTING) ++ /* If we're unit testing, these can't be static because they'll stick ++ * around from one test run to the next. They need to be cleared out ++ * every time. ++ */ ++ bool found = false; ++ int best = 0; ++#else + static bool found = false; + static int best = 0; ++#endif + int i; + GList *best_node = NULL; + pcmk__schema_t *best_schema = NULL; +@@ -90,10 +104,25 @@ pcmk__find_x_0_schema_index(GList *schemas) + best_node = g_list_nth(schemas, best); + best_schema = best_node->data; + +- /* If this is a singleton list, we're done. */ ++ /* If we are unit testing, we don't add the pacemaker-next/none schemas ++ * to the list because we're not using the standard schema adding ++ * functions. Thus, a singleton list means we're done. ++ * ++ * On the other hand, if we are running as usually, we have those two ++ * schemas added to the list. A list of length three actually only has ++ * one useful schema. So we're still done. ++ * ++ * @COMPAT Change this when we stop adding those schemas. ++ */ ++#if defined(PCMK__UNIT_TESTING) + if (pcmk__list_of_1(schemas)) { + goto done; + } ++#else ++ if (g_list_length(schemas) == 3) { ++ goto done; ++ } ++#endif + + /* Start comparing the list from the node before the best schema (there's + * no point in comparing something to itself). Then, 'i' is an index +diff --git a/lib/common/tests/Makefile.am b/lib/common/tests/Makefile.am +index c0407e5..22fb32e 100644 +--- a/lib/common/tests/Makefile.am ++++ b/lib/common/tests/Makefile.am +@@ -21,6 +21,7 @@ SUBDIRS = \ + options \ + output \ + results \ ++ schemas \ + scores \ + strings \ + utils \ +diff --git a/lib/common/tests/schemas/Makefile.am b/lib/common/tests/schemas/Makefile.am +new file mode 100644 +index 0000000..5f485b3 +--- /dev/null ++++ b/lib/common/tests/schemas/Makefile.am +@@ -0,0 +1,16 @@ ++# ++# Copyright 2023 the Pacemaker project contributors ++# ++# The version control history for this file may have further details. ++# ++# This source code is licensed under the GNU General Public License version 2 ++# or later (GPLv2+) WITHOUT ANY WARRANTY. ++# ++ ++include $(top_srcdir)/mk/tap.mk ++include $(top_srcdir)/mk/unittest.mk ++ ++# Add "_test" to the end of all test program names to simplify .gitignore. ++check_PROGRAMS = pcmk__xml_find_x_0_schema_index_test ++ ++TESTS = $(check_PROGRAMS) +diff --git a/lib/common/tests/schemas/pcmk__xml_find_x_0_schema_index_test.c b/lib/common/tests/schemas/pcmk__xml_find_x_0_schema_index_test.c +new file mode 100644 +index 0000000..9f16ba1 +--- /dev/null ++++ b/lib/common/tests/schemas/pcmk__xml_find_x_0_schema_index_test.c +@@ -0,0 +1,112 @@ ++/* ++ * Copyright 2023 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU General Public License version 2 ++ * or later (GPLv2+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++#include ++ ++#include ++ ++#include "crmcommon_private.h" ++ ++static pcmk__schema_t * ++mk_schema(const char *name, unsigned char x, unsigned char y) ++{ ++ pcmk__schema_t *schema = malloc(sizeof(pcmk__schema_t)); ++ ++ schema->name = strdup(name); ++ schema->version.v[0] = x; ++ schema->version.v[1] = y; ++ return schema; ++} ++ ++static void ++free_schema(void *data) ++{ ++ pcmk__schema_t *schema = data; ++ free(schema->name); ++ free(schema); ++} ++ ++static void ++empty_schema_list(void **state) ++{ ++ pcmk__assert_asserts(pcmk__find_x_0_schema_index(NULL)); ++} ++ ++static void ++singleton_schema_list(void **state) ++{ ++ GList *schemas = NULL; ++ ++ schemas = g_list_append(schemas, mk_schema("pacemaker-1.0", 1, 0)); ++ assert_int_equal(0, pcmk__find_x_0_schema_index(schemas)); ++ g_list_free_full(schemas, free_schema); ++} ++ ++static void ++one_major_version(void **state) ++{ ++ GList *schemas = NULL; ++ ++ schemas = g_list_append(schemas, mk_schema("pacemaker-1.0", 1, 0)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-1.2", 1, 2)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-1.3", 1, 3)); ++ assert_int_equal(0, pcmk__find_x_0_schema_index(schemas)); ++ g_list_free_full(schemas, free_schema); ++} ++ ++static void ++first_version_is_not_0(void **state) ++{ ++ GList *schemas = NULL; ++ ++ schemas = g_list_append(schemas, mk_schema("pacemaker-1.1", 1, 1)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-1.2", 1, 2)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-1.3", 1, 3)); ++ assert_int_equal(0, pcmk__find_x_0_schema_index(schemas)); ++ g_list_free_full(schemas, free_schema); ++} ++ ++static void ++multiple_major_versions(void **state) ++{ ++ GList *schemas = NULL; ++ ++ schemas = g_list_append(schemas, mk_schema("pacemaker-1.0", 1, 0)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-1.1", 1, 1)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-2.0", 2, 0)); ++ assert_int_equal(2, pcmk__find_x_0_schema_index(schemas)); ++ g_list_free_full(schemas, free_schema); ++} ++ ++static void ++many_versions(void **state) ++{ ++ GList *schemas = NULL; ++ ++ schemas = g_list_append(schemas, mk_schema("pacemaker-1.0", 1, 0)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-1.1", 1, 1)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-1.2", 1, 2)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-2.0", 2, 0)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-2.1", 2, 1)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-2.2", 2, 2)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-3.0", 3, 0)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-3.1", 3, 1)); ++ schemas = g_list_append(schemas, mk_schema("pacemaker-3.2", 3, 2)); ++ assert_int_equal(6, pcmk__find_x_0_schema_index(schemas)); ++ g_list_free_full(schemas, free_schema); ++} ++ ++PCMK__UNIT_TEST(NULL, NULL, ++ cmocka_unit_test(empty_schema_list), ++ cmocka_unit_test(singleton_schema_list), ++ cmocka_unit_test(one_major_version), ++ cmocka_unit_test(first_version_is_not_0), ++ cmocka_unit_test(multiple_major_versions), ++ cmocka_unit_test(many_versions)) +-- +2.31.1 + diff --git a/SOURCES/001-sync-points.patch b/SOURCES/001-sync-points.patch deleted file mode 100644 index c034c78..0000000 --- a/SOURCES/001-sync-points.patch +++ /dev/null @@ -1,2429 +0,0 @@ -From de05f6b52c667155d262ceeb541dc1041d079d71 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 8 Sep 2022 11:36:58 -0400 -Subject: [PATCH 01/26] Refactor: tools: Use a uint32_t for attr_options. - ---- - tools/attrd_updater.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c -index d90567a..b85a281 100644 ---- a/tools/attrd_updater.c -+++ b/tools/attrd_updater.c -@@ -47,7 +47,7 @@ struct { - gchar *attr_node; - gchar *attr_set; - char *attr_value; -- int attr_options; -+ uint32_t attr_options; - gboolean query_all; - gboolean quiet; - } options = { --- -2.31.1 - -From c6637520b474d44553ade52c0dbe9e36e873135f Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 21 Oct 2022 14:31:16 -0400 -Subject: [PATCH 02/26] Refactor: libcrmcommon: Make pcmk__xe_match more - broadly useful. - -If attr_v is NULL, simply return the first node with a matching name. ---- - lib/common/xml.c | 10 ++++++---- - 1 file changed, 6 insertions(+), 4 deletions(-) - -diff --git a/lib/common/xml.c b/lib/common/xml.c -index 036dd87..ac6f46a 100644 ---- a/lib/common/xml.c -+++ b/lib/common/xml.c -@@ -510,7 +510,7 @@ find_xml_node(const xmlNode *root, const char *search_path, gboolean must_find) - * \param[in] parent XML element to search - * \param[in] node_name If not NULL, only match children of this type - * \param[in] attr_n If not NULL, only match children with an attribute -- * of this name and a value of \p attr_v -+ * of this name. - * \param[in] attr_v If \p attr_n and this are not NULL, only match children - * with an attribute named \p attr_n and this value - * -@@ -520,14 +520,16 @@ xmlNode * - pcmk__xe_match(const xmlNode *parent, const char *node_name, - const char *attr_n, const char *attr_v) - { -- /* ensure attr_v specified when attr_n is */ -- CRM_CHECK(attr_n == NULL || attr_v != NULL, return NULL); -+ CRM_CHECK(parent != NULL, return NULL); -+ CRM_CHECK(attr_v == NULL || attr_n != NULL, return NULL); - - for (xmlNode *child = pcmk__xml_first_child(parent); child != NULL; - child = pcmk__xml_next(child)) { - if (pcmk__str_eq(node_name, (const char *) (child->name), - pcmk__str_null_matches) -- && ((attr_n == NULL) || attr_matches(child, attr_n, attr_v))) { -+ && ((attr_n == NULL) || -+ (attr_v == NULL && xmlHasProp(child, (pcmkXmlStr) attr_n)) || -+ (attr_v != NULL && attr_matches(child, attr_n, attr_v)))) { - return child; - } - } --- -2.31.1 - -From dd520579484c6ec091f7fbb550347941302dad0e Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 21 Oct 2022 14:32:46 -0400 -Subject: [PATCH 03/26] Tests: libcrmcommon: Add tests for pcmk__xe_match. - ---- - lib/common/tests/xml/Makefile.am | 3 +- - lib/common/tests/xml/pcmk__xe_match_test.c | 105 +++++++++++++++++++++ - 2 files changed, 107 insertions(+), 1 deletion(-) - create mode 100644 lib/common/tests/xml/pcmk__xe_match_test.c - -diff --git a/lib/common/tests/xml/Makefile.am b/lib/common/tests/xml/Makefile.am -index 342ca07..0ccdcc3 100644 ---- a/lib/common/tests/xml/Makefile.am -+++ b/lib/common/tests/xml/Makefile.am -@@ -11,6 +11,7 @@ include $(top_srcdir)/mk/tap.mk - include $(top_srcdir)/mk/unittest.mk - - # Add "_test" to the end of all test program names to simplify .gitignore. --check_PROGRAMS = pcmk__xe_foreach_child_test -+check_PROGRAMS = pcmk__xe_foreach_child_test \ -+ pcmk__xe_match_test - - TESTS = $(check_PROGRAMS) -diff --git a/lib/common/tests/xml/pcmk__xe_match_test.c b/lib/common/tests/xml/pcmk__xe_match_test.c -new file mode 100644 -index 0000000..fd529ba ---- /dev/null -+++ b/lib/common/tests/xml/pcmk__xe_match_test.c -@@ -0,0 +1,105 @@ -+/* -+ * Copyright 2022 the Pacemaker project contributors -+ * -+ * The version control history for this file may have further details. -+ * -+ * This source code is licensed under the GNU Lesser General Public License -+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. -+ */ -+ -+#include -+ -+#include -+#include -+ -+const char *str1 = -+ "\n" -+ " \n" -+ " \n" -+ " content\n" -+ " \n" -+ " \n" -+ " \n" -+ " content\n" -+ " \n" -+ " \n" -+ " \n" -+ " content\n" -+ " \n" -+ " \n" -+ " \n" -+ " content\n" -+ " \n" -+ " \n" -+ " \n" -+ " content\n" -+ " \n" -+ ""; -+ -+static void -+bad_input(void **state) { -+ xmlNode *xml = string2xml(str1); -+ -+ assert_null(pcmk__xe_match(NULL, NULL, NULL, NULL)); -+ assert_null(pcmk__xe_match(NULL, NULL, NULL, "attrX")); -+ -+ free_xml(xml); -+} -+ -+static void -+not_found(void **state) { -+ xmlNode *xml = string2xml(str1); -+ -+ /* No node with an attrX attribute */ -+ assert_null(pcmk__xe_match(xml, NULL, "attrX", NULL)); -+ /* No nodeX node */ -+ assert_null(pcmk__xe_match(xml, "nodeX", NULL, NULL)); -+ /* No nodeA node with attrX */ -+ assert_null(pcmk__xe_match(xml, "nodeA", "attrX", NULL)); -+ /* No nodeA node with attrA=XYZ */ -+ assert_null(pcmk__xe_match(xml, "nodeA", "attrA", "XYZ")); -+ -+ free_xml(xml); -+} -+ -+static void -+find_attrB(void **state) { -+ xmlNode *xml = string2xml(str1); -+ xmlNode *result = NULL; -+ -+ /* Find the first node with attrB */ -+ result = pcmk__xe_match(xml, NULL, "attrB", NULL); -+ assert_non_null(result); -+ assert_string_equal(crm_element_value(result, "id"), "3"); -+ -+ /* Find the first nodeB with attrB */ -+ result = pcmk__xe_match(xml, "nodeB", "attrB", NULL); -+ assert_non_null(result); -+ assert_string_equal(crm_element_value(result, "id"), "5"); -+ -+ free_xml(xml); -+} -+ -+static void -+find_attrA_matching(void **state) { -+ xmlNode *xml = string2xml(str1); -+ xmlNode *result = NULL; -+ -+ /* Find attrA=456 */ -+ result = pcmk__xe_match(xml, NULL, "attrA", "456"); -+ assert_non_null(result); -+ assert_string_equal(crm_element_value(result, "id"), "2"); -+ -+ /* Find a nodeB with attrA=123 */ -+ result = pcmk__xe_match(xml, "nodeB", "attrA", "123"); -+ assert_non_null(result); -+ assert_string_equal(crm_element_value(result, "id"), "4"); -+ -+ free_xml(xml); -+} -+ -+PCMK__UNIT_TEST(NULL, NULL, -+ cmocka_unit_test(bad_input), -+ cmocka_unit_test(not_found), -+ cmocka_unit_test(find_attrB), -+ cmocka_unit_test(find_attrA_matching)); --- -2.31.1 - -From 03af8498d8aaf21c509cec9b0ec4b78475da41d7 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 8 Sep 2022 12:22:26 -0400 -Subject: [PATCH 04/26] Feature: libcrmcommon: Add attrd options for specifying - a sync point. - ---- - include/crm/common/attrd_internal.h | 16 +++++++++------- - 1 file changed, 9 insertions(+), 7 deletions(-) - -diff --git a/include/crm/common/attrd_internal.h b/include/crm/common/attrd_internal.h -index f7033ad..389be48 100644 ---- a/include/crm/common/attrd_internal.h -+++ b/include/crm/common/attrd_internal.h -@@ -16,13 +16,15 @@ extern "C" { - - // Options for clients to use with functions below - enum pcmk__node_attr_opts { -- pcmk__node_attr_none = 0, -- pcmk__node_attr_remote = (1 << 0), -- pcmk__node_attr_private = (1 << 1), -- pcmk__node_attr_pattern = (1 << 2), -- pcmk__node_attr_value = (1 << 3), -- pcmk__node_attr_delay = (1 << 4), -- pcmk__node_attr_perm = (1 << 5), -+ pcmk__node_attr_none = 0, -+ pcmk__node_attr_remote = (1 << 0), -+ pcmk__node_attr_private = (1 << 1), -+ pcmk__node_attr_pattern = (1 << 2), -+ pcmk__node_attr_value = (1 << 3), -+ pcmk__node_attr_delay = (1 << 4), -+ pcmk__node_attr_perm = (1 << 5), -+ pcmk__node_attr_sync_local = (1 << 6), -+ pcmk__node_attr_sync_cluster = (1 << 7), - }; - - #define pcmk__set_node_attr_flags(node_attr_flags, flags_to_set) do { \ --- -2.31.1 - -From 5c8825293ee21d3823bdcd01b0df9c7d39739940 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 8 Sep 2022 12:23:09 -0400 -Subject: [PATCH 05/26] Feature: libcrmcommon: Add sync point to IPC request - XML. - -If one of the pcmk__node_attr_sync_* options is provided, add an -attribute to the request XML. This will later be inspected by the -server to determine when to send the reply to the client. ---- - include/crm/common/options_internal.h | 2 ++ - include/crm_internal.h | 1 + - lib/common/ipc_attrd.c | 6 ++++++ - 3 files changed, 9 insertions(+) - -diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h -index b153c67..f29ba3f 100644 ---- a/include/crm/common/options_internal.h -+++ b/include/crm/common/options_internal.h -@@ -145,9 +145,11 @@ bool pcmk__valid_sbd_timeout(const char *value); - #define PCMK__META_ALLOW_UNHEALTHY_NODES "allow-unhealthy-nodes" - - // Constants for enumerated values for various options -+#define PCMK__VALUE_CLUSTER "cluster" - #define PCMK__VALUE_CUSTOM "custom" - #define PCMK__VALUE_FENCING "fencing" - #define PCMK__VALUE_GREEN "green" -+#define PCMK__VALUE_LOCAL "local" - #define PCMK__VALUE_MIGRATE_ON_RED "migrate-on-red" - #define PCMK__VALUE_NONE "none" - #define PCMK__VALUE_NOTHING "nothing" -diff --git a/include/crm_internal.h b/include/crm_internal.h -index e6e2e96..08193c3 100644 ---- a/include/crm_internal.h -+++ b/include/crm_internal.h -@@ -71,6 +71,7 @@ - #define PCMK__XA_ATTR_RESOURCE "attr_resource" - #define PCMK__XA_ATTR_SECTION "attr_section" - #define PCMK__XA_ATTR_SET "attr_set" -+#define PCMK__XA_ATTR_SYNC_POINT "attr_sync_point" - #define PCMK__XA_ATTR_USER "attr_user" - #define PCMK__XA_ATTR_UUID "attr_key" - #define PCMK__XA_ATTR_VALUE "attr_value" -diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c -index f6cfbc4..4606509 100644 ---- a/lib/common/ipc_attrd.c -+++ b/lib/common/ipc_attrd.c -@@ -431,6 +431,12 @@ populate_update_op(xmlNode *op, const char *node, const char *name, const char * - pcmk_is_set(options, pcmk__node_attr_remote)); - crm_xml_add_int(op, PCMK__XA_ATTR_IS_PRIVATE, - pcmk_is_set(options, pcmk__node_attr_private)); -+ -+ if (pcmk_is_set(options, pcmk__node_attr_sync_local)) { -+ crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_LOCAL); -+ } else if (pcmk_is_set(options, pcmk__node_attr_sync_cluster)) { -+ crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_CLUSTER); -+ } - } - - int --- -2.31.1 - -From e2b3fee630caf0846ca8bbffcef4d6d2acfd32a5 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 8 Sep 2022 12:26:28 -0400 -Subject: [PATCH 06/26] Feature: tools: Add --wait= parameter to attrd_updater. - -This command line option is used to specify the sync point to use. For -the moment, it has no effect. ---- - tools/attrd_updater.c | 24 ++++++++++++++++++++++++ - 1 file changed, 24 insertions(+) - -diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c -index b85a281..c4779a6 100644 ---- a/tools/attrd_updater.c -+++ b/tools/attrd_updater.c -@@ -97,6 +97,22 @@ section_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError - return TRUE; - } - -+static gboolean -+wait_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { -+ if (pcmk__str_eq(optarg, "no", pcmk__str_none)) { -+ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); -+ return TRUE; -+ } else if (pcmk__str_eq(optarg, PCMK__VALUE_LOCAL, pcmk__str_none)) { -+ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); -+ pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local); -+ return TRUE; -+ } else { -+ g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_USAGE, -+ "--wait= must be one of 'no', 'local', 'cluster'"); -+ return FALSE; -+ } -+} -+ - #define INDENT " " - - static GOptionEntry required_entries[] = { -@@ -175,6 +191,14 @@ static GOptionEntry addl_entries[] = { - "If this creates a new attribute, never write the attribute to CIB", - NULL }, - -+ { "wait", 'W', 0, G_OPTION_ARG_CALLBACK, wait_cb, -+ "Wait for some event to occur before returning. Values are 'no' (wait\n" -+ INDENT "only for the attribute daemon to acknowledge the request) or\n" -+ INDENT "'local' (wait until the change has propagated to where a local\n" -+ INDENT "query will return the request value, or the value set by a\n" -+ INDENT "later request). Default is 'no'.", -+ "UNTIL" }, -+ - { NULL } - }; - --- -2.31.1 - -From 52d51ab41b2f00e72724ab39835b3db86605a96b Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 20 Oct 2022 14:40:13 -0400 -Subject: [PATCH 07/26] Feature: daemons: Add functions for checking a request - for a sync point. - ---- - daemons/attrd/Makefile.am | 1 + - daemons/attrd/attrd_sync.c | 38 +++++++++++++++++++++++++++++++++ - daemons/attrd/pacemaker-attrd.h | 3 +++ - 3 files changed, 42 insertions(+) - create mode 100644 daemons/attrd/attrd_sync.c - -diff --git a/daemons/attrd/Makefile.am b/daemons/attrd/Makefile.am -index 1a3d360..6bb81c4 100644 ---- a/daemons/attrd/Makefile.am -+++ b/daemons/attrd/Makefile.am -@@ -32,6 +32,7 @@ pacemaker_attrd_SOURCES = attrd_alerts.c \ - attrd_elections.c \ - attrd_ipc.c \ - attrd_messages.c \ -+ attrd_sync.c \ - attrd_utils.c \ - pacemaker-attrd.c - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -new file mode 100644 -index 0000000..92759d2 ---- /dev/null -+++ b/daemons/attrd/attrd_sync.c -@@ -0,0 +1,38 @@ -+/* -+ * Copyright 2022 the Pacemaker project contributors -+ * -+ * The version control history for this file may have further details. -+ * -+ * This source code is licensed under the GNU General Public License version 2 -+ * or later (GPLv2+) WITHOUT ANY WARRANTY. -+ */ -+ -+#include -+ -+#include -+#include -+ -+#include "pacemaker-attrd.h" -+ -+const char * -+attrd_request_sync_point(xmlNode *xml) -+{ -+ if (xml_has_children(xml)) { -+ xmlNode *child = pcmk__xe_match(xml, XML_ATTR_OP, PCMK__XA_ATTR_SYNC_POINT, NULL); -+ -+ if (child) { -+ return crm_element_value(child, PCMK__XA_ATTR_SYNC_POINT); -+ } else { -+ return NULL; -+ } -+ -+ } else { -+ return crm_element_value(xml, PCMK__XA_ATTR_SYNC_POINT); -+ } -+} -+ -+bool -+attrd_request_has_sync_point(xmlNode *xml) -+{ -+ return attrd_request_sync_point(xml) != NULL; -+} -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index 71ce90a..ff850bb 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -182,4 +182,7 @@ mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *a - void attrd_unregister_handlers(void); - void attrd_handle_request(pcmk__request_t *request); - -+const char *attrd_request_sync_point(xmlNode *xml); -+bool attrd_request_has_sync_point(xmlNode *xml); -+ - #endif /* PACEMAKER_ATTRD__H */ --- -2.31.1 - -From 2e0509a12ee7d4a612133ee65b75245eea7d271d Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 20 Oct 2022 14:42:04 -0400 -Subject: [PATCH 08/26] Refactor: daemons: Don't ACK update requests that give - a sync point. - -The ACK is the only response from the server for update messages. If -the message specified that it wanted to wait for a sync point, we need -to delay sending that response until the sync point is reached. -Therefore, do not always immediately send the ACK. ---- - daemons/attrd/attrd_messages.c | 19 ++++++++++++++----- - 1 file changed, 14 insertions(+), 5 deletions(-) - -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index de4a28a..9e8ae40 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -137,12 +137,21 @@ handle_update_request(pcmk__request_t *request) - attrd_peer_update(peer, request->xml, host, false); - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - return NULL; -+ - } else { -- /* Because attrd_client_update can be called recursively, we send the ACK -- * here to ensure that the client only ever receives one. -- */ -- attrd_send_ack(request->ipc_client, request->ipc_id, -- request->flags|crm_ipc_client_response); -+ if (!attrd_request_has_sync_point(request->xml)) { -+ /* If the client doesn't want to wait for a sync point, go ahead and send -+ * the ACK immediately. Otherwise, we'll send the ACK when the appropriate -+ * sync point is reached. -+ * -+ * In the normal case, attrd_client_update can be called recursively which -+ * makes where to send the ACK tricky. Doing it here ensures the client -+ * only ever receives one. -+ */ -+ attrd_send_ack(request->ipc_client, request->ipc_id, -+ request->flags|crm_ipc_client_response); -+ } -+ - return attrd_client_update(request); - } - } --- -2.31.1 - -From 2a0ff66cdf0085c4c8ab1992ef7e785a4facc8c7 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 20 Oct 2022 14:48:48 -0400 -Subject: [PATCH 09/26] Feature: daemons: Add support for local sync points on - updates. - -In the IPC dispatcher for attrd, add the client to a wait list if its -request specifies a sync point. When the attribute's value is changed -on the local attrd, alert any clients waiting on a local sync point by -then sending the previously delayed ACK. - -Sync points for other requests and the global sync point are not yet -supported. - -Fixes T35. ---- - daemons/attrd/attrd_corosync.c | 18 +++++ - daemons/attrd/attrd_messages.c | 12 ++- - daemons/attrd/attrd_sync.c | 137 ++++++++++++++++++++++++++++++++ - daemons/attrd/pacemaker-attrd.h | 7 ++ - 4 files changed, 173 insertions(+), 1 deletion(-) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index 539e5bf..4337280 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -568,14 +568,32 @@ void - attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, - bool filter) - { -+ bool handle_sync_point = false; -+ - if (xml_has_children(xml)) { - for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; - child = crm_next_same_xml(child)) { - copy_attrs(xml, child); - attrd_peer_update_one(peer, child, filter); -+ -+ if (attrd_request_has_sync_point(child)) { -+ handle_sync_point = true; -+ } - } - - } else { - attrd_peer_update_one(peer, xml, filter); -+ -+ if (attrd_request_has_sync_point(xml)) { -+ handle_sync_point = true; -+ } -+ } -+ -+ /* If the update XML specified that the client wanted to wait for a sync -+ * point, process that now. -+ */ -+ if (handle_sync_point) { -+ crm_debug("Hit local sync point for attribute update"); -+ attrd_ack_waitlist_clients(attrd_sync_point_local, xml); - } - } -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index 9e8ae40..c96700f 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -139,7 +139,17 @@ handle_update_request(pcmk__request_t *request) - return NULL; - - } else { -- if (!attrd_request_has_sync_point(request->xml)) { -+ if (attrd_request_has_sync_point(request->xml)) { -+ /* If this client supplied a sync point it wants to wait for, add it to -+ * the wait list. Clients on this list will not receive an ACK until -+ * their sync point is hit which will result in the client stalled there -+ * until it receives a response. -+ * -+ * All other clients will receive the expected response as normal. -+ */ -+ attrd_add_client_to_waitlist(request); -+ -+ } else { - /* If the client doesn't want to wait for a sync point, go ahead and send - * the ACK immediately. Otherwise, we'll send the ACK when the appropriate - * sync point is reached. -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index 92759d2..2981bd0 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -14,6 +14,143 @@ - - #include "pacemaker-attrd.h" - -+/* A hash table storing clients that are waiting on a sync point to be reached. -+ * The key is waitlist_client - just a plain int. The obvious key would be -+ * the IPC client's ID, but this is not guaranteed to be unique. A single client -+ * could be waiting on a sync point for multiple attributes at the same time. -+ * -+ * It is not expected that this hash table will ever be especially large. -+ */ -+static GHashTable *waitlist = NULL; -+static int waitlist_client = 0; -+ -+struct waitlist_node { -+ /* What kind of sync point does this node describe? */ -+ enum attrd_sync_point sync_point; -+ -+ /* Information required to construct and send a reply to the client. */ -+ char *client_id; -+ uint32_t ipc_id; -+ uint32_t flags; -+}; -+ -+static void -+next_key(void) -+{ -+ do { -+ waitlist_client++; -+ if (waitlist_client < 0) { -+ waitlist_client = 1; -+ } -+ } while (g_hash_table_contains(waitlist, GINT_TO_POINTER(waitlist_client))); -+} -+ -+static void -+free_waitlist_node(gpointer data) -+{ -+ struct waitlist_node *wl = (struct waitlist_node *) data; -+ -+ free(wl->client_id); -+ free(wl); -+} -+ -+static const char * -+sync_point_str(enum attrd_sync_point sync_point) -+{ -+ if (sync_point == attrd_sync_point_local) { -+ return PCMK__VALUE_LOCAL; -+ } else if (sync_point == attrd_sync_point_cluster) { -+ return PCMK__VALUE_CLUSTER; -+ } else { -+ return "unknown"; -+ } -+} -+ -+void -+attrd_add_client_to_waitlist(pcmk__request_t *request) -+{ -+ const char *sync_point = attrd_request_sync_point(request->xml); -+ struct waitlist_node *wl = NULL; -+ -+ if (sync_point == NULL) { -+ return; -+ } -+ -+ if (waitlist == NULL) { -+ waitlist = pcmk__intkey_table(free_waitlist_node); -+ } -+ -+ wl = calloc(sizeof(struct waitlist_node), 1); -+ -+ CRM_ASSERT(wl != NULL); -+ -+ wl->client_id = strdup(request->ipc_client->id); -+ -+ CRM_ASSERT(wl->client_id); -+ -+ if (pcmk__str_eq(sync_point, PCMK__VALUE_LOCAL, pcmk__str_none)) { -+ wl->sync_point = attrd_sync_point_local; -+ } else if (pcmk__str_eq(sync_point, PCMK__VALUE_CLUSTER, pcmk__str_none)) { -+ wl->sync_point = attrd_sync_point_cluster; -+ } else { -+ free_waitlist_node(wl); -+ return; -+ } -+ -+ wl->ipc_id = request->ipc_id; -+ wl->flags = request->flags; -+ -+ crm_debug("Added client %s to waitlist for %s sync point", -+ wl->client_id, sync_point_str(wl->sync_point)); -+ -+ next_key(); -+ pcmk__intkey_table_insert(waitlist, waitlist_client, wl); -+ -+ /* And then add the key to the request XML so we can uniquely identify -+ * it when it comes time to issue the ACK. -+ */ -+ crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); -+} -+ -+void -+attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) -+{ -+ int callid; -+ gpointer value; -+ -+ if (waitlist == NULL) { -+ return; -+ } -+ -+ if (crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid) == -1) { -+ crm_warn("Could not get callid from request XML"); -+ return; -+ } -+ -+ value = pcmk__intkey_table_lookup(waitlist, callid); -+ if (value != NULL) { -+ struct waitlist_node *wl = (struct waitlist_node *) value; -+ pcmk__client_t *client = NULL; -+ -+ if (wl->sync_point != sync_point) { -+ return; -+ } -+ -+ crm_debug("Alerting client %s for reached %s sync point", -+ wl->client_id, sync_point_str(wl->sync_point)); -+ -+ client = pcmk__find_client_by_id(wl->client_id); -+ if (client == NULL) { -+ return; -+ } -+ -+ attrd_send_ack(client, wl->ipc_id, wl->flags | crm_ipc_client_response); -+ -+ /* And then remove the client so it doesn't get alerted again. */ -+ pcmk__intkey_table_remove(waitlist, callid); -+ } -+} -+ - const char * - attrd_request_sync_point(xmlNode *xml) - { -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index ff850bb..9dd8320 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -182,6 +182,13 @@ mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *a - void attrd_unregister_handlers(void); - void attrd_handle_request(pcmk__request_t *request); - -+enum attrd_sync_point { -+ attrd_sync_point_local, -+ attrd_sync_point_cluster, -+}; -+ -+void attrd_add_client_to_waitlist(pcmk__request_t *request); -+void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); - const char *attrd_request_sync_point(xmlNode *xml); - bool attrd_request_has_sync_point(xmlNode *xml); - --- -2.31.1 - -From 59caaf1682191a91d6062358b770f8b9457ba3eb Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 20 Oct 2022 14:56:58 -0400 -Subject: [PATCH 10/26] Feature: daemons: If a client disconnects, remove it - from the waitlist. - ---- - daemons/attrd/attrd_ipc.c | 5 +++++ - daemons/attrd/attrd_sync.c | 21 +++++++++++++++++++++ - daemons/attrd/pacemaker-attrd.h | 1 + - 3 files changed, 27 insertions(+) - -diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c -index 7e4a1c0..8aa39c2 100644 ---- a/daemons/attrd/attrd_ipc.c -+++ b/daemons/attrd/attrd_ipc.c -@@ -438,8 +438,13 @@ attrd_ipc_closed(qb_ipcs_connection_t *c) - crm_trace("Ignoring request to clean up unknown connection %p", c); - } else { - crm_trace("Cleaning up closed client connection %p", c); -+ -+ /* Remove the client from the sync point waitlist if it's present. */ -+ attrd_remove_client_from_waitlist(client); -+ - pcmk__free_client(client); - } -+ - return FALSE; - } - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index 2981bd0..7293318 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -112,6 +112,27 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) - crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); - } - -+void -+attrd_remove_client_from_waitlist(pcmk__client_t *client) -+{ -+ GHashTableIter iter; -+ gpointer value; -+ -+ if (waitlist == NULL) { -+ return; -+ } -+ -+ g_hash_table_iter_init(&iter, waitlist); -+ -+ while (g_hash_table_iter_next(&iter, NULL, &value)) { -+ struct waitlist_node *wl = (struct waitlist_node *) value; -+ -+ if (wl->client_id == client->id) { -+ g_hash_table_iter_remove(&iter); -+ } -+ } -+} -+ - void - attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) - { -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index 9dd8320..b6ecb75 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -189,6 +189,7 @@ enum attrd_sync_point { - - void attrd_add_client_to_waitlist(pcmk__request_t *request); - void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); -+void attrd_remove_client_from_waitlist(pcmk__client_t *client); - const char *attrd_request_sync_point(xmlNode *xml); - bool attrd_request_has_sync_point(xmlNode *xml); - --- -2.31.1 - -From b28042e1d64b48c96dbd9da1e9ee3ff481bbf620 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 10 Oct 2022 11:00:20 -0400 -Subject: [PATCH 11/26] Feature: daemons: Add support for local sync points on - clearing failures. - -attrd_clear_client_failure just calls attrd_client_update underneath, so -that function will handle all the rest of the sync point functionality -for us. ---- - daemons/attrd/attrd_ipc.c | 2 -- - daemons/attrd/attrd_messages.c | 19 +++++++++++++++++++ - 2 files changed, 19 insertions(+), 2 deletions(-) - -diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c -index 8aa39c2..2e614e8 100644 ---- a/daemons/attrd/attrd_ipc.c -+++ b/daemons/attrd/attrd_ipc.c -@@ -101,8 +101,6 @@ attrd_client_clear_failure(pcmk__request_t *request) - xmlNode *xml = request->xml; - const char *rsc, *op, *interval_spec; - -- attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); -- - if (minimum_protocol_version >= 2) { - /* Propagate to all peers (including ourselves). - * This ends up at attrd_peer_message(). -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index c96700f..3ba14a6 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -42,6 +42,25 @@ handle_clear_failure_request(pcmk__request_t *request) - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - return NULL; - } else { -+ if (attrd_request_has_sync_point(request->xml)) { -+ /* If this client supplied a sync point it wants to wait for, add it to -+ * the wait list. Clients on this list will not receive an ACK until -+ * their sync point is hit which will result in the client stalled there -+ * until it receives a response. -+ * -+ * All other clients will receive the expected response as normal. -+ */ -+ attrd_add_client_to_waitlist(request); -+ -+ } else { -+ /* If the client doesn't want to wait for a sync point, go ahead and send -+ * the ACK immediately. Otherwise, we'll send the ACK when the appropriate -+ * sync point is reached. -+ */ -+ attrd_send_ack(request->ipc_client, request->ipc_id, -+ request->ipc_flags); -+ } -+ - return attrd_client_clear_failure(request); - } - } --- -2.31.1 - -From 291dc3b91e57f2584bbf88cfbe3a360e0332e814 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 10 Oct 2022 13:17:24 -0400 -Subject: [PATCH 12/26] Refactor: daemons: Free the waitlist on attrd exit. - ---- - daemons/attrd/attrd_sync.c | 11 +++++++++++ - daemons/attrd/attrd_utils.c | 2 ++ - daemons/attrd/pacemaker-attrd.c | 1 + - daemons/attrd/pacemaker-attrd.h | 1 + - 4 files changed, 15 insertions(+) - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index 7293318..557e49a 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -112,6 +112,17 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) - crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); - } - -+void -+attrd_free_waitlist(void) -+{ -+ if (waitlist == NULL) { -+ return; -+ } -+ -+ g_hash_table_destroy(waitlist); -+ waitlist = NULL; -+} -+ - void - attrd_remove_client_from_waitlist(pcmk__client_t *client) - { -diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c -index 6a19009..00b879b 100644 ---- a/daemons/attrd/attrd_utils.c -+++ b/daemons/attrd/attrd_utils.c -@@ -93,6 +93,8 @@ attrd_shutdown(int nsig) - mainloop_destroy_signal(SIGUSR2); - mainloop_destroy_signal(SIGTRAP); - -+ attrd_free_waitlist(); -+ - if ((mloop == NULL) || !g_main_loop_is_running(mloop)) { - /* If there's no main loop active, just exit. This should be possible - * only if we get SIGTERM in brief windows at start-up and shutdown. -diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c -index 2100db4..1336542 100644 ---- a/daemons/attrd/pacemaker-attrd.c -+++ b/daemons/attrd/pacemaker-attrd.c -@@ -300,6 +300,7 @@ main(int argc, char **argv) - attrd_ipc_fini(); - attrd_lrmd_disconnect(); - attrd_cib_disconnect(); -+ attrd_free_waitlist(); - g_hash_table_destroy(attributes); - } - -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index b6ecb75..537bf85 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -52,6 +52,7 @@ void attrd_run_mainloop(void); - - void attrd_set_requesting_shutdown(void); - void attrd_clear_requesting_shutdown(void); -+void attrd_free_waitlist(void); - bool attrd_requesting_shutdown(void); - bool attrd_shutting_down(void); - void attrd_shutdown(int nsig); --- -2.31.1 - -From 7715ce617c520e14687a82e11ff794c93cd7f64a Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 10 Oct 2022 13:21:16 -0400 -Subject: [PATCH 13/26] Feature: includes: Bump CRM_FEATURE_SET for local sync - points. - ---- - include/crm/crm.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/crm/crm.h b/include/crm/crm.h -index 5710e4b..7c5c602 100644 ---- a/include/crm/crm.h -+++ b/include/crm/crm.h -@@ -66,7 +66,7 @@ extern "C" { - * >=3.0.13: Fail counts include operation name and interval - * >=3.2.0: DC supports PCMK_EXEC_INVALID and PCMK_EXEC_NOT_CONNECTED - */ --# define CRM_FEATURE_SET "3.16.1" -+# define CRM_FEATURE_SET "3.16.2" - - /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and - * recipient of a CPG message. This imposes an arbitrary limit on cluster node --- -2.31.1 - -From b9054425a76d03f538cd0b3ae27490b1874eee8a Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 28 Oct 2022 14:23:49 -0400 -Subject: [PATCH 14/26] Refactor: daemons: Add comments for previously added - sync point code. - ---- - daemons/attrd/attrd_sync.c | 63 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 63 insertions(+) - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index 557e49a..e9690b5 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -66,6 +66,20 @@ sync_point_str(enum attrd_sync_point sync_point) - } - } - -+/*! -+ * \internal -+ * \brief Add a client to the attrd waitlist -+ * -+ * Typically, a client receives an ACK for its XML IPC request immediately. However, -+ * some clients want to wait until their request has been processed and taken effect. -+ * This is called a sync point. Any client placed on this waitlist will have its -+ * ACK message delayed until either its requested sync point is hit, or until it -+ * times out. -+ * -+ * The XML IPC request must specify the type of sync point it wants to wait for. -+ * -+ * \param[in,out] request The request describing the client to place on the waitlist. -+ */ - void - attrd_add_client_to_waitlist(pcmk__request_t *request) - { -@@ -112,6 +126,11 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) - crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); - } - -+/*! -+ * \internal -+ * \brief Free all memory associated with the waitlist. This is most typically -+ * used when attrd shuts down. -+ */ - void - attrd_free_waitlist(void) - { -@@ -123,6 +142,13 @@ attrd_free_waitlist(void) - waitlist = NULL; - } - -+/*! -+ * \internal -+ * \brief Unconditionally remove a client from the waitlist, such as when the client -+ * node disconnects from the cluster -+ * -+ * \param[in] client The client to remove -+ */ - void - attrd_remove_client_from_waitlist(pcmk__client_t *client) - { -@@ -144,6 +170,18 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) - } - } - -+/*! -+ * \internal -+ * \brief Send an IPC ACK message to all awaiting clients -+ * -+ * This function will search the waitlist for all clients that are currently awaiting -+ * an ACK indicating their attrd operation is complete. Only those clients with a -+ * matching sync point type and callid from their original XML IPC request will be -+ * ACKed. Once they have received an ACK, they will be removed from the waitlist. -+ * -+ * \param[in] sync_point What kind of sync point have we hit? -+ * \param[in] xml The original XML IPC request. -+ */ - void - attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) - { -@@ -183,6 +221,23 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) - } - } - -+/*! -+ * \internal -+ * \brief Return the sync point attribute for an IPC request -+ * -+ * This function will check both the top-level element of \p xml for a sync -+ * point attribute, as well as all of its \p op children, if any. The latter -+ * is useful for newer versions of attrd that can put multiple IPC requests -+ * into a single message. -+ * -+ * \param[in] xml An XML IPC request -+ * -+ * \note It is assumed that if one child element has a sync point attribute, -+ * all will have a sync point attribute and they will all be the same -+ * sync point. No other configuration is supported. -+ * -+ * \return The sync point attribute of \p xml, or NULL if none. -+ */ - const char * - attrd_request_sync_point(xmlNode *xml) - { -@@ -200,6 +255,14 @@ attrd_request_sync_point(xmlNode *xml) - } - } - -+/*! -+ * \internal -+ * \brief Does an IPC request contain any sync point attribute? -+ * -+ * \param[in] xml An XML IPC request -+ * -+ * \return true if there's a sync point attribute, false otherwise -+ */ - bool - attrd_request_has_sync_point(xmlNode *xml) - { --- -2.31.1 - -From 64219fb7075ee58d29f94f077a3b8f94174bb32a Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Wed, 26 Oct 2022 12:43:05 -0400 -Subject: [PATCH 15/26] Feature: tools: Add --wait=cluster option to - attrd_updater. - ---- - tools/attrd_updater.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c -index c4779a6..3cd766d 100644 ---- a/tools/attrd_updater.c -+++ b/tools/attrd_updater.c -@@ -106,6 +106,10 @@ wait_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError ** - pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); - pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local); - return TRUE; -+ } else if (pcmk__str_eq(optarg, PCMK__VALUE_CLUSTER, pcmk__str_none)) { -+ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); -+ pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_cluster); -+ return TRUE; - } else { - g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_USAGE, - "--wait= must be one of 'no', 'local', 'cluster'"); -@@ -193,10 +197,12 @@ static GOptionEntry addl_entries[] = { - - { "wait", 'W', 0, G_OPTION_ARG_CALLBACK, wait_cb, - "Wait for some event to occur before returning. Values are 'no' (wait\n" -- INDENT "only for the attribute daemon to acknowledge the request) or\n" -+ INDENT "only for the attribute daemon to acknowledge the request),\n" - INDENT "'local' (wait until the change has propagated to where a local\n" - INDENT "query will return the request value, or the value set by a\n" -- INDENT "later request). Default is 'no'.", -+ INDENT "later request), or 'cluster' (wait until the change has propagated\n" -+ INDENT "to where a query anywhere on the cluster will return the requested\n" -+ INDENT "value, or the value set by a later request). Default is 'no'.", - "UNTIL" }, - - { NULL } --- -2.31.1 - -From 1bc5511fadf6ad670508bd3a2a55129bde16f774 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 16 Sep 2022 14:55:06 -0400 -Subject: [PATCH 16/26] Refactor: daemons: Add a confirm= attribute to attrd - messages. - -This allows informing the originator of a message that the message has -been received and processed. As yet, there is no mechanism for handling -and returning the confirmation, only for requesting it. ---- - daemons/attrd/attrd_corosync.c | 6 +++--- - daemons/attrd/attrd_ipc.c | 26 +++++++++++++++++++++----- - daemons/attrd/attrd_messages.c | 11 +++++++++-- - daemons/attrd/pacemaker-attrd.h | 7 ++++--- - include/crm_internal.h | 1 + - 5 files changed, 38 insertions(+), 13 deletions(-) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index 4337280..e86ca07 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -124,7 +124,7 @@ broadcast_local_value(const attribute_t *a) - - crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); - attrd_add_value_xml(sync, a, v, false); -- attrd_send_message(NULL, sync); -+ attrd_send_message(NULL, sync, false); - free_xml(sync); - return v; - } -@@ -387,7 +387,7 @@ broadcast_unseen_local_values(void) - - if (sync != NULL) { - crm_debug("Broadcasting local-only values"); -- attrd_send_message(NULL, sync); -+ attrd_send_message(NULL, sync, false); - free_xml(sync); - } - } -@@ -539,7 +539,7 @@ attrd_peer_sync(crm_node_t *peer, xmlNode *xml) - } - - crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); -- attrd_send_message(peer, sync); -+ attrd_send_message(peer, sync, false); - free_xml(sync); - } - -diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c -index 2e614e8..0fc5e93 100644 ---- a/daemons/attrd/attrd_ipc.c -+++ b/daemons/attrd/attrd_ipc.c -@@ -105,7 +105,7 @@ attrd_client_clear_failure(pcmk__request_t *request) - /* Propagate to all peers (including ourselves). - * This ends up at attrd_peer_message(). - */ -- attrd_send_message(NULL, xml); -+ attrd_send_message(NULL, xml, false); - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - return NULL; - } -@@ -184,7 +184,7 @@ attrd_client_peer_remove(pcmk__request_t *request) - if (host) { - crm_info("Client %s is requesting all values for %s be removed", - pcmk__client_name(request->ipc_client), host); -- attrd_send_message(NULL, xml); /* ends up at attrd_peer_message() */ -+ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ - free(host_alloc); - } else { - crm_info("Ignoring request by client %s to remove all peer values without specifying peer", -@@ -314,7 +314,7 @@ attrd_client_update(pcmk__request_t *request) - } - } - -- attrd_send_message(NULL, xml); -+ attrd_send_message(NULL, xml, false); - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - - } else { -@@ -358,7 +358,7 @@ attrd_client_update(pcmk__request_t *request) - if (status == 0) { - crm_trace("Matched %s with %s", attr, regex); - crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); -- attrd_send_message(NULL, xml); -+ attrd_send_message(NULL, xml, false); - } - } - -@@ -388,7 +388,23 @@ attrd_client_update(pcmk__request_t *request) - crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), - value, (attrd_election_won()? " (writer)" : "")); - -- attrd_send_message(NULL, xml); /* ends up at attrd_peer_message() */ -+ if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { -+ /* The client is waiting on the cluster-wide sync point. In this case, -+ * the response ACK is not sent until this attrd broadcasts the update -+ * and receives its own confirmation back from all peers. -+ */ -+ attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ -+ -+ } else { -+ /* The client is either waiting on the local sync point or was not -+ * waiting on any sync point at all. For the local sync point, the -+ * response ACK is sent in attrd_peer_update. For clients not -+ * waiting on any sync point, the response ACK is sent in -+ * handle_update_request immediately before this function was called. -+ */ -+ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ -+ } -+ - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - return NULL; - } -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index 3ba14a6..78df0d0 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -279,16 +279,23 @@ attrd_broadcast_protocol(void) - crm_debug("Broadcasting attrd protocol version %s for node %s", - ATTRD_PROTOCOL_VERSION, attrd_cluster->uname); - -- attrd_send_message(NULL, attrd_op); /* ends up at attrd_peer_message() */ -+ attrd_send_message(NULL, attrd_op, false); /* ends up at attrd_peer_message() */ - - free_xml(attrd_op); - } - - gboolean --attrd_send_message(crm_node_t * node, xmlNode * data) -+attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) - { - crm_xml_add(data, F_TYPE, T_ATTRD); - crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); -+ -+ /* Request a confirmation from the destination peer node (which could -+ * be all if node is NULL) that the message has been received and -+ * acted upon. -+ */ -+ pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); -+ - attrd_xml_add_writer(data); - return send_cluster_message(node, crm_msg_attrd, data, TRUE); - } -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index 537bf85..25f7c8a 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -39,10 +39,11 @@ - * PCMK__ATTRD_CMD_UPDATE_DELAY - * 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE - * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes -- * 4 2.2.0 Multiple attributes can be updated in a single IPC -+ * 4 2.1.5 Multiple attributes can be updated in a single IPC - * message -+ * 5 2.1.5 Peers can request confirmation of a sent message - */ --#define ATTRD_PROTOCOL_VERSION "4" -+#define ATTRD_PROTOCOL_VERSION "5" - - #define attrd_send_ack(client, id, flags) \ - pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) -@@ -162,7 +163,7 @@ xmlNode *attrd_client_clear_failure(pcmk__request_t *request); - xmlNode *attrd_client_update(pcmk__request_t *request); - xmlNode *attrd_client_refresh(pcmk__request_t *request); - xmlNode *attrd_client_query(pcmk__request_t *request); --gboolean attrd_send_message(crm_node_t * node, xmlNode * data); -+gboolean attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm); - - xmlNode *attrd_add_value_xml(xmlNode *parent, const attribute_t *a, - const attribute_value_t *v, bool force_write); -diff --git a/include/crm_internal.h b/include/crm_internal.h -index 08193c3..63a1726 100644 ---- a/include/crm_internal.h -+++ b/include/crm_internal.h -@@ -79,6 +79,7 @@ - #define PCMK__XA_ATTR_WRITER "attr_writer" - #define PCMK__XA_CONFIG_ERRORS "config-errors" - #define PCMK__XA_CONFIG_WARNINGS "config-warnings" -+#define PCMK__XA_CONFIRM "confirm" - #define PCMK__XA_GRAPH_ERRORS "graph-errors" - #define PCMK__XA_GRAPH_WARNINGS "graph-warnings" - #define PCMK__XA_MODE "mode" --- -2.31.1 - -From 6f389038fc0b11f6291c022c99f188666c65f530 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Wed, 26 Oct 2022 14:44:42 -0400 -Subject: [PATCH 17/26] Feature: daemons: Respond to received attrd - confirmation requests. - -On the receiving peer side, if the XML request contains confirm="true", -construct a confirmation message after handling the request completes -and send it back to the originating peer. - -On the originating peer side, add a skeleton handler for confirmation -messages. This does nothing at the moment except log it. ---- - daemons/attrd/attrd_corosync.c | 38 ++++++++++++++++++++++++++++++++++ - daemons/attrd/attrd_messages.c | 13 ++++++++++++ - include/crm_internal.h | 1 + - 3 files changed, 52 insertions(+) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index e86ca07..1245d9c 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -25,6 +25,19 @@ - - extern crm_exit_t attrd_exit_status; - -+static xmlNode * -+attrd_confirmation(int callid) -+{ -+ xmlNode *node = create_xml_node(NULL, __func__); -+ -+ crm_xml_add(node, F_TYPE, T_ATTRD); -+ crm_xml_add(node, F_ORIG, get_local_node_name()); -+ crm_xml_add(node, PCMK__XA_TASK, PCMK__ATTRD_CMD_CONFIRM); -+ crm_xml_add_int(node, XML_LRM_ATTR_CALLID, callid); -+ -+ return node; -+} -+ - static void - attrd_peer_message(crm_node_t *peer, xmlNode *xml) - { -@@ -57,6 +70,31 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) - CRM_CHECK(request.op != NULL, return); - - attrd_handle_request(&request); -+ -+ /* Having finished handling the request, check to see if the originating -+ * peer requested confirmation. If so, send that confirmation back now. -+ */ -+ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM)) { -+ int callid = 0; -+ xmlNode *reply = NULL; -+ -+ /* Add the confirmation ID for the message we are confirming to the -+ * response so the originating peer knows what they're a confirmation -+ * for. -+ */ -+ crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid); -+ reply = attrd_confirmation(callid); -+ -+ /* And then send the confirmation back to the originating peer. This -+ * ends up right back in this same function (attrd_peer_message) on the -+ * peer where it will have to do something with a PCMK__XA_CONFIRM type -+ * message. -+ */ -+ crm_debug("Sending %s a confirmation", peer->uname); -+ attrd_send_message(peer, reply, false); -+ free_xml(reply); -+ } -+ - pcmk__reset_request(&request); - } - } -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index 78df0d0..9c792b2 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -65,6 +65,18 @@ handle_clear_failure_request(pcmk__request_t *request) - } - } - -+static xmlNode * -+handle_confirm_request(pcmk__request_t *request) -+{ -+ if (request->peer != NULL) { -+ crm_debug("Received confirmation from %s", request->peer); -+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); -+ return NULL; -+ } else { -+ return handle_unknown_request(request); -+ } -+} -+ - static xmlNode * - handle_flush_request(pcmk__request_t *request) - { -@@ -190,6 +202,7 @@ attrd_register_handlers(void) - { - pcmk__server_command_t handlers[] = { - { PCMK__ATTRD_CMD_CLEAR_FAILURE, handle_clear_failure_request }, -+ { PCMK__ATTRD_CMD_CONFIRM, handle_confirm_request }, - { PCMK__ATTRD_CMD_FLUSH, handle_flush_request }, - { PCMK__ATTRD_CMD_PEER_REMOVE, handle_remove_request }, - { PCMK__ATTRD_CMD_QUERY, handle_query_request }, -diff --git a/include/crm_internal.h b/include/crm_internal.h -index 63a1726..f60e7b4 100644 ---- a/include/crm_internal.h -+++ b/include/crm_internal.h -@@ -108,6 +108,7 @@ - #define PCMK__ATTRD_CMD_SYNC "sync" - #define PCMK__ATTRD_CMD_SYNC_RESPONSE "sync-response" - #define PCMK__ATTRD_CMD_CLEAR_FAILURE "clear-failure" -+#define PCMK__ATTRD_CMD_CONFIRM "confirm" - - #define PCMK__CONTROLD_CMD_NODES "list-nodes" - --- -2.31.1 - -From dfb730e9ced9dc75886fda9452c584860573fe30 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Wed, 26 Oct 2022 15:58:00 -0400 -Subject: [PATCH 18/26] Feature: daemons: Keep track of #attrd-protocol from - each peer. - -This information can be used in the future when dealing with -cluster-wide sync points to know which peers we are waiting on a reply -from. ---- - daemons/attrd/attrd_corosync.c | 3 +- - daemons/attrd/attrd_utils.c | 60 ++++++++++++++++++++++++++++++--- - daemons/attrd/pacemaker-attrd.h | 4 ++- - 3 files changed, 60 insertions(+), 7 deletions(-) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index 1245d9c..6f88ab6 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -268,6 +268,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da - // Remove votes from cluster nodes that leave, in case election in progress - if (gone && !is_remote) { - attrd_remove_voter(peer); -+ attrd_remove_peer_protocol_ver(peer->uname); - - // Ensure remote nodes that come up are in the remote node cache - } else if (!gone && is_remote) { -@@ -395,7 +396,7 @@ attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) - * version, check to see if it's a new minimum version. - */ - if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { -- attrd_update_minimum_protocol_ver(value); -+ attrd_update_minimum_protocol_ver(peer->uname, value); - } - } - -diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c -index 00b879b..421faed 100644 ---- a/daemons/attrd/attrd_utils.c -+++ b/daemons/attrd/attrd_utils.c -@@ -29,6 +29,11 @@ static bool requesting_shutdown = false; - static bool shutting_down = false; - static GMainLoop *mloop = NULL; - -+/* A hash table storing information on the protocol version of each peer attrd. -+ * The key is the peer's uname, and the value is the protocol version number. -+ */ -+GHashTable *peer_protocol_vers = NULL; -+ - /*! - * \internal - * \brief Set requesting_shutdown state -@@ -94,6 +99,10 @@ attrd_shutdown(int nsig) - mainloop_destroy_signal(SIGTRAP); - - attrd_free_waitlist(); -+ if (peer_protocol_vers != NULL) { -+ g_hash_table_destroy(peer_protocol_vers); -+ peer_protocol_vers = NULL; -+ } - - if ((mloop == NULL) || !g_main_loop_is_running(mloop)) { - /* If there's no main loop active, just exit. This should be possible -@@ -273,16 +282,57 @@ attrd_free_attribute(gpointer data) - } - } - -+/*! -+ * \internal -+ * \brief When a peer node leaves the cluster, stop tracking its protocol version. -+ * -+ * \param[in] host The peer node's uname to be removed -+ */ -+void -+attrd_remove_peer_protocol_ver(const char *host) -+{ -+ if (peer_protocol_vers != NULL) { -+ g_hash_table_remove(peer_protocol_vers, host); -+ } -+} -+ -+/*! -+ * \internal -+ * \brief When a peer node broadcasts a message with its protocol version, keep -+ * track of that information. -+ * -+ * We keep track of each peer's protocol version so we know which peers to -+ * expect confirmation messages from when handling cluster-wide sync points. -+ * We additionally keep track of the lowest protocol version supported by all -+ * peers so we know when we can send IPC messages containing more than one -+ * request. -+ * -+ * \param[in] host The peer node's uname to be tracked -+ * \param[in] value The peer node's protocol version -+ */ - void --attrd_update_minimum_protocol_ver(const char *value) -+attrd_update_minimum_protocol_ver(const char *host, const char *value) - { - int ver; - -+ if (peer_protocol_vers == NULL) { -+ peer_protocol_vers = pcmk__strkey_table(free, NULL); -+ } -+ - pcmk__scan_min_int(value, &ver, 0); - -- if (ver > 0 && (minimum_protocol_version == -1 || ver < minimum_protocol_version)) { -- minimum_protocol_version = ver; -- crm_trace("Set minimum attrd protocol version to %d", -- minimum_protocol_version); -+ if (ver > 0) { -+ char *host_name = strdup(host); -+ -+ /* Record the peer attrd's protocol version. */ -+ CRM_ASSERT(host_name != NULL); -+ g_hash_table_insert(peer_protocol_vers, host_name, GINT_TO_POINTER(ver)); -+ -+ /* If the protocol version is a new minimum, record it as such. */ -+ if (minimum_protocol_version == -1 || ver < minimum_protocol_version) { -+ minimum_protocol_version = ver; -+ crm_trace("Set minimum attrd protocol version to %d", -+ minimum_protocol_version); -+ } - } - } -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index 25f7c8a..302ef63 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -145,6 +145,7 @@ typedef struct attribute_value_s { - - extern crm_cluster_t *attrd_cluster; - extern GHashTable *attributes; -+extern GHashTable *peer_protocol_vers; - - #define CIB_OP_TIMEOUT_S 120 - -@@ -177,7 +178,8 @@ void attrd_write_attributes(bool all, bool ignore_delay); - void attrd_write_or_elect_attribute(attribute_t *a); - - extern int minimum_protocol_version; --void attrd_update_minimum_protocol_ver(const char *value); -+void attrd_remove_peer_protocol_ver(const char *host); -+void attrd_update_minimum_protocol_ver(const char *host, const char *value); - - mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr); - --- -2.31.1 - -From 945f0fe51d3bf69c2cb1258b394f2f11b8996525 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 27 Oct 2022 14:42:59 -0400 -Subject: [PATCH 19/26] Feature: daemons: Handle cluster-wide sync points in - attrd. - -When an attrd receives an IPC request to update some value, record the -protocol versions of all peer attrds. Additionally register a function -that will be called when all confirmations are received. - -The originating IPC cilent (attrd_updater for instance) will sit there -waiting for an ACK until its timeout is hit. - -As each confirmation message comes back to attrd, mark it off the list -of peers we are waiting on. When no more peers are expected, call the -previously registered function. - -For attribute updates, this function just sends an ack back to -attrd_updater. - -Fixes T35 ---- - daemons/attrd/attrd_corosync.c | 1 + - daemons/attrd/attrd_ipc.c | 4 + - daemons/attrd/attrd_messages.c | 10 ++ - daemons/attrd/attrd_sync.c | 260 +++++++++++++++++++++++++++++++- - daemons/attrd/attrd_utils.c | 2 + - daemons/attrd/pacemaker-attrd.h | 8 + - 6 files changed, 281 insertions(+), 4 deletions(-) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index 6f88ab6..37701aa 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -269,6 +269,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da - if (gone && !is_remote) { - attrd_remove_voter(peer); - attrd_remove_peer_protocol_ver(peer->uname); -+ attrd_do_not_expect_from_peer(peer->uname); - - // Ensure remote nodes that come up are in the remote node cache - } else if (!gone && is_remote) { -diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c -index 0fc5e93..c70aa1b 100644 ---- a/daemons/attrd/attrd_ipc.c -+++ b/daemons/attrd/attrd_ipc.c -@@ -393,6 +393,7 @@ attrd_client_update(pcmk__request_t *request) - * the response ACK is not sent until this attrd broadcasts the update - * and receives its own confirmation back from all peers. - */ -+ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); - attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ - - } else { -@@ -456,6 +457,9 @@ attrd_ipc_closed(qb_ipcs_connection_t *c) - /* Remove the client from the sync point waitlist if it's present. */ - attrd_remove_client_from_waitlist(client); - -+ /* And no longer wait for confirmations from any peers. */ -+ attrd_do_not_wait_for_client(client); -+ - pcmk__free_client(client); - } - -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index 9c792b2..f7b9c7c 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -69,7 +69,17 @@ static xmlNode * - handle_confirm_request(pcmk__request_t *request) - { - if (request->peer != NULL) { -+ int callid; -+ - crm_debug("Received confirmation from %s", request->peer); -+ -+ if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { -+ pcmk__set_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, -+ "Could not get callid from XML"); -+ } else { -+ attrd_handle_confirmation(callid, request->peer); -+ } -+ - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - return NULL; - } else { -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index e9690b5..d3d7108 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -34,6 +34,51 @@ struct waitlist_node { - uint32_t flags; - }; - -+/* A hash table storing information on in-progress IPC requests that are awaiting -+ * confirmations. These requests are currently being processed by peer attrds and -+ * we are waiting to receive confirmation messages from each peer indicating that -+ * processing is complete. -+ * -+ * Multiple requests could be waiting on confirmations at the same time. -+ * -+ * The key is the unique callid for the IPC request, and the value is a -+ * confirmation_action struct. -+ */ -+static GHashTable *expected_confirmations = NULL; -+ -+/*! -+ * \internal -+ * \brief A structure describing a single IPC request that is awaiting confirmations -+ */ -+struct confirmation_action { -+ /*! -+ * \brief A list of peer attrds that we are waiting to receive confirmation -+ * messages from -+ * -+ * This list is dynamic - as confirmations arrive from peer attrds, they will -+ * be removed from this list. When the list is empty, all peers have processed -+ * the request and the associated confirmation action will be taken. -+ */ -+ GList *respondents; -+ -+ /*! -+ * \brief A function to run when all confirmations have been received -+ */ -+ attrd_confirmation_action_fn fn; -+ -+ /*! -+ * \brief Information required to construct and send a reply to the client -+ */ -+ char *client_id; -+ uint32_t ipc_id; -+ uint32_t flags; -+ -+ /*! -+ * \brief The XML request containing the callid associated with this action -+ */ -+ void *xml; -+}; -+ - static void - next_key(void) - { -@@ -114,12 +159,13 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) - wl->ipc_id = request->ipc_id; - wl->flags = request->flags; - -- crm_debug("Added client %s to waitlist for %s sync point", -- wl->client_id, sync_point_str(wl->sync_point)); -- - next_key(); - pcmk__intkey_table_insert(waitlist, waitlist_client, wl); - -+ crm_trace("Added client %s to waitlist for %s sync point", -+ wl->client_id, sync_point_str(wl->sync_point)); -+ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); -+ - /* And then add the key to the request XML so we can uniquely identify - * it when it comes time to issue the ACK. - */ -@@ -166,6 +212,7 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) - - if (wl->client_id == client->id) { - g_hash_table_iter_remove(&iter); -+ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); - } - } - } -@@ -206,7 +253,7 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) - return; - } - -- crm_debug("Alerting client %s for reached %s sync point", -+ crm_trace("Alerting client %s for reached %s sync point", - wl->client_id, sync_point_str(wl->sync_point)); - - client = pcmk__find_client_by_id(wl->client_id); -@@ -218,9 +265,28 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) - - /* And then remove the client so it doesn't get alerted again. */ - pcmk__intkey_table_remove(waitlist, callid); -+ -+ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); - } - } - -+/*! -+ * \internal -+ * \brief Action to take when a cluster sync point is hit for a -+ * PCMK__ATTRD_CMD_UPDATE* message. -+ * -+ * \param[in] xml The request that should be passed along to -+ * attrd_ack_waitlist_clients. This should be the original -+ * IPC request containing the callid for this update message. -+ */ -+int -+attrd_cluster_sync_point_update(xmlNode *xml) -+{ -+ crm_trace("Hit cluster sync point for attribute update"); -+ attrd_ack_waitlist_clients(attrd_sync_point_cluster, xml); -+ return pcmk_rc_ok; -+} -+ - /*! - * \internal - * \brief Return the sync point attribute for an IPC request -@@ -268,3 +334,189 @@ attrd_request_has_sync_point(xmlNode *xml) - { - return attrd_request_sync_point(xml) != NULL; - } -+ -+static void -+free_action(gpointer data) -+{ -+ struct confirmation_action *action = (struct confirmation_action *) data; -+ g_list_free_full(action->respondents, free); -+ free_xml(action->xml); -+ free(action->client_id); -+ free(action); -+} -+ -+/*! -+ * \internal -+ * \brief When a peer disconnects from the cluster, no longer wait for its confirmation -+ * for any IPC action. If this peer is the last one being waited on, this will -+ * trigger the confirmation action. -+ * -+ * \param[in] host The disconnecting peer attrd's uname -+ */ -+void -+attrd_do_not_expect_from_peer(const char *host) -+{ -+ GList *keys = g_hash_table_get_keys(expected_confirmations); -+ -+ crm_trace("Removing peer %s from expected confirmations", host); -+ -+ for (GList *node = keys; node != NULL; node = node->next) { -+ int callid = *(int *) node->data; -+ attrd_handle_confirmation(callid, host); -+ } -+ -+ g_list_free(keys); -+} -+ -+/*! -+ * \internal -+ * \brief When a client disconnects from the cluster, no longer wait on confirmations -+ * for it. Because the peer attrds may still be processing the original IPC -+ * message, they may still send us confirmations. However, we will take no -+ * action on them. -+ * -+ * \param[in] client The disconnecting client -+ */ -+void -+attrd_do_not_wait_for_client(pcmk__client_t *client) -+{ -+ GHashTableIter iter; -+ gpointer value; -+ -+ if (expected_confirmations == NULL) { -+ return; -+ } -+ -+ g_hash_table_iter_init(&iter, expected_confirmations); -+ -+ while (g_hash_table_iter_next(&iter, NULL, &value)) { -+ struct confirmation_action *action = (struct confirmation_action *) value; -+ -+ if (pcmk__str_eq(action->client_id, client->id, pcmk__str_none)) { -+ crm_trace("Removing client %s from expected confirmations", client->id); -+ g_hash_table_iter_remove(&iter); -+ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); -+ break; -+ } -+ } -+} -+ -+/*! -+ * \internal -+ * \brief Register some action to be taken when IPC request confirmations are -+ * received -+ * -+ * When this function is called, a list of all peer attrds that support confirming -+ * requests is generated. As confirmations from these peer attrds are received, -+ * they are removed from this list. When the list is empty, the registered action -+ * will be called. -+ * -+ * \note This function should always be called before attrd_send_message is called -+ * to broadcast to the peers to ensure that we know what replies we are -+ * waiting on. Otherwise, it is possible the peer could finish and confirm -+ * before we know to expect it. -+ * -+ * \param[in] request The request that is awaiting confirmations -+ * \param[in] fn A function to be run after all confirmations are received -+ */ -+void -+attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn) -+{ -+ struct confirmation_action *action = NULL; -+ GHashTableIter iter; -+ gpointer host, ver; -+ GList *respondents = NULL; -+ int callid; -+ -+ if (expected_confirmations == NULL) { -+ expected_confirmations = pcmk__intkey_table((GDestroyNotify) free_action); -+ } -+ -+ if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { -+ crm_err("Could not get callid from xml"); -+ return; -+ } -+ -+ if (pcmk__intkey_table_lookup(expected_confirmations, callid)) { -+ crm_err("Already waiting on confirmations for call id %d", callid); -+ return; -+ } -+ -+ g_hash_table_iter_init(&iter, peer_protocol_vers); -+ while (g_hash_table_iter_next(&iter, &host, &ver)) { -+ if (GPOINTER_TO_INT(ver) >= 5) { -+ char *s = strdup((char *) host); -+ -+ CRM_ASSERT(s != NULL); -+ respondents = g_list_prepend(respondents, s); -+ } -+ } -+ -+ action = calloc(1, sizeof(struct confirmation_action)); -+ CRM_ASSERT(action != NULL); -+ -+ action->respondents = respondents; -+ action->fn = fn; -+ action->xml = copy_xml(request->xml); -+ -+ action->client_id = strdup(request->ipc_client->id); -+ CRM_ASSERT(action->client_id != NULL); -+ -+ action->ipc_id = request->ipc_id; -+ action->flags = request->flags; -+ -+ pcmk__intkey_table_insert(expected_confirmations, callid, action); -+ crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); -+ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); -+} -+ -+void -+attrd_free_confirmations(void) -+{ -+ if (expected_confirmations != NULL) { -+ g_hash_table_destroy(expected_confirmations); -+ expected_confirmations = NULL; -+ } -+} -+ -+/*! -+ * \internal -+ * \brief Process a confirmation message from a peer attrd -+ * -+ * This function is called every time a PCMK__ATTRD_CMD_CONFIRM message is -+ * received from a peer attrd. If this is the last confirmation we are waiting -+ * on for a given operation, the registered action will be called. -+ * -+ * \param[in] callid The unique callid for the XML IPC request -+ * \param[in] host The confirming peer attrd's uname -+ */ -+void -+attrd_handle_confirmation(int callid, const char *host) -+{ -+ struct confirmation_action *action = NULL; -+ GList *node = NULL; -+ -+ if (expected_confirmations == NULL) { -+ return; -+ } -+ -+ action = pcmk__intkey_table_lookup(expected_confirmations, callid); -+ if (action == NULL) { -+ return; -+ } -+ -+ node = g_list_find_custom(action->respondents, host, (GCompareFunc) strcasecmp); -+ -+ if (node == NULL) { -+ return; -+ } -+ -+ action->respondents = g_list_remove(action->respondents, node->data); -+ crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(action->respondents)); -+ -+ if (action->respondents == NULL) { -+ action->fn(action->xml); -+ pcmk__intkey_table_remove(expected_confirmations, callid); -+ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); -+ } -+} -diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c -index 421faed..f3a2059 100644 ---- a/daemons/attrd/attrd_utils.c -+++ b/daemons/attrd/attrd_utils.c -@@ -99,6 +99,8 @@ attrd_shutdown(int nsig) - mainloop_destroy_signal(SIGTRAP); - - attrd_free_waitlist(); -+ attrd_free_confirmations(); -+ - if (peer_protocol_vers != NULL) { - g_hash_table_destroy(peer_protocol_vers); - peer_protocol_vers = NULL; -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index 302ef63..bcc329d 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -191,8 +191,16 @@ enum attrd_sync_point { - attrd_sync_point_cluster, - }; - -+typedef int (*attrd_confirmation_action_fn)(xmlNode *); -+ - void attrd_add_client_to_waitlist(pcmk__request_t *request); - void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); -+int attrd_cluster_sync_point_update(xmlNode *xml); -+void attrd_do_not_expect_from_peer(const char *host); -+void attrd_do_not_wait_for_client(pcmk__client_t *client); -+void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn); -+void attrd_free_confirmations(void); -+void attrd_handle_confirmation(int callid, const char *host); - void attrd_remove_client_from_waitlist(pcmk__client_t *client); - const char *attrd_request_sync_point(xmlNode *xml); - bool attrd_request_has_sync_point(xmlNode *xml); --- -2.31.1 - -From 07a032a7eb2f03dce18a7c94c56b8c837dedda15 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 28 Oct 2022 14:54:15 -0400 -Subject: [PATCH 20/26] Refactor: daemons: Add some attrd version checking - macros. - -These are just to make it a little more obvious what is actually being -asked in the code, instead of having magic numbers sprinkled around. ---- - daemons/attrd/attrd_ipc.c | 2 +- - daemons/attrd/attrd_sync.c | 2 +- - daemons/attrd/pacemaker-attrd.h | 3 +++ - 3 files changed, 5 insertions(+), 2 deletions(-) - -diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c -index c70aa1b..16bfff4 100644 ---- a/daemons/attrd/attrd_ipc.c -+++ b/daemons/attrd/attrd_ipc.c -@@ -294,7 +294,7 @@ attrd_client_update(pcmk__request_t *request) - * two ways we can handle that. - */ - if (xml_has_children(xml)) { -- if (minimum_protocol_version >= 4) { -+ if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) { - /* First, if all peers support a certain protocol version, we can - * just broadcast the big message and they'll handle it. However, - * we also need to apply all the transformations in this function -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index d3d7108..e48f82e 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -444,7 +444,7 @@ attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_f - - g_hash_table_iter_init(&iter, peer_protocol_vers); - while (g_hash_table_iter_next(&iter, &host, &ver)) { -- if (GPOINTER_TO_INT(ver) >= 5) { -+ if (ATTRD_SUPPORTS_CONFIRMATION(GPOINTER_TO_INT(ver))) { - char *s = strdup((char *) host); - - CRM_ASSERT(s != NULL); -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index bcc329d..83d7c6b 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -45,6 +45,9 @@ - */ - #define ATTRD_PROTOCOL_VERSION "5" - -+#define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4) -+#define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5) -+ - #define attrd_send_ack(client, id, flags) \ - pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) - --- -2.31.1 - -From 811361b96c6f26a1f5eccc54b6e8bf6e6fd003be Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 31 Oct 2022 12:53:22 -0400 -Subject: [PATCH 21/26] Low: attrd: Fix removing clients from the waitlist when - they disconnect. - -The client ID is a string, so it must be compared like a string. ---- - daemons/attrd/attrd_sync.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index e48f82e..c9b4784 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -210,7 +210,7 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) - while (g_hash_table_iter_next(&iter, NULL, &value)) { - struct waitlist_node *wl = (struct waitlist_node *) value; - -- if (wl->client_id == client->id) { -+ if (pcmk__str_eq(wl->client_id, client->id, pcmk__str_none)) { - g_hash_table_iter_remove(&iter); - crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); - } --- -2.31.1 - -From 4e933ad14456af85c60701410c3b23b4eab03f86 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Tue, 1 Nov 2022 12:35:12 -0400 -Subject: [PATCH 22/26] Feature: daemons: Handle an attrd client timing out. - -If the update confirmations do not come back in time, use a main loop -timer to remove the client from the table. ---- - daemons/attrd/attrd_sync.c | 49 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 49 insertions(+) - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index c9b4784..9d07796 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -61,6 +61,12 @@ struct confirmation_action { - */ - GList *respondents; - -+ /*! -+ * \brief A timer that will be used to remove the client should it time out -+ * before receiving all confirmations -+ */ -+ mainloop_timer_t *timer; -+ - /*! - * \brief A function to run when all confirmations have been received - */ -@@ -340,11 +346,51 @@ free_action(gpointer data) - { - struct confirmation_action *action = (struct confirmation_action *) data; - g_list_free_full(action->respondents, free); -+ mainloop_timer_del(action->timer); - free_xml(action->xml); - free(action->client_id); - free(action); - } - -+/* Remove an IPC request from the expected_confirmations table if the peer attrds -+ * don't respond before the timeout is hit. We set the timeout to 15s. The exact -+ * number isn't critical - we just want to make sure that the table eventually gets -+ * cleared of things that didn't complete. -+ */ -+static gboolean -+confirmation_timeout_cb(gpointer data) -+{ -+ struct confirmation_action *action = (struct confirmation_action *) data; -+ -+ GHashTableIter iter; -+ gpointer value; -+ -+ if (expected_confirmations == NULL) { -+ return G_SOURCE_REMOVE; -+ } -+ -+ g_hash_table_iter_init(&iter, expected_confirmations); -+ -+ while (g_hash_table_iter_next(&iter, NULL, &value)) { -+ if (value == action) { -+ pcmk__client_t *client = pcmk__find_client_by_id(action->client_id); -+ if (client == NULL) { -+ return G_SOURCE_REMOVE; -+ } -+ -+ crm_trace("Timed out waiting for confirmations for client %s", client->id); -+ pcmk__ipc_send_ack(client, action->ipc_id, action->flags | crm_ipc_client_response, -+ "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_TIMEOUT); -+ -+ g_hash_table_iter_remove(&iter); -+ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); -+ break; -+ } -+ } -+ -+ return G_SOURCE_REMOVE; -+} -+ - /*! - * \internal - * \brief When a peer disconnects from the cluster, no longer wait for its confirmation -@@ -465,6 +511,9 @@ attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_f - action->ipc_id = request->ipc_id; - action->flags = request->flags; - -+ action->timer = mainloop_timer_add(NULL, 15000, FALSE, confirmation_timeout_cb, action); -+ mainloop_timer_start(action->timer); -+ - pcmk__intkey_table_insert(expected_confirmations, callid, action); - crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); - crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); --- -2.31.1 - -From 101896383cbe0103c98078e46540c076af08f040 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Wed, 2 Nov 2022 14:40:30 -0400 -Subject: [PATCH 23/26] Refactor: Demote a sync point related message to trace. - ---- - daemons/attrd/attrd_corosync.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index 37701aa..5cbed7e 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -633,7 +633,7 @@ attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, - * point, process that now. - */ - if (handle_sync_point) { -- crm_debug("Hit local sync point for attribute update"); -+ crm_trace("Hit local sync point for attribute update"); - attrd_ack_waitlist_clients(attrd_sync_point_local, xml); - } - } --- -2.31.1 - -From acd13246d4c2bef7982ca103e34896efcad22348 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 3 Nov 2022 10:29:20 -0400 -Subject: [PATCH 24/26] Low: daemons: Avoid infinite confirm loops in attrd. - -On the sending side, do not add confirm="yes" to a message with -op="confirm". On the receiving side, do not confirm a message with -op="confirm" even if confirm="yes" is set. ---- - daemons/attrd/attrd_corosync.c | 3 ++- - daemons/attrd/attrd_messages.c | 6 +++++- - 2 files changed, 7 insertions(+), 2 deletions(-) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index 5cbed7e..88c1ecc 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -74,7 +74,8 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) - /* Having finished handling the request, check to see if the originating - * peer requested confirmation. If so, send that confirmation back now. - */ -- if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM)) { -+ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) && -+ !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { - int callid = 0; - xmlNode *reply = NULL; - -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index f7b9c7c..184176a 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -310,6 +310,8 @@ attrd_broadcast_protocol(void) - gboolean - attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) - { -+ const char *op = crm_element_value(data, PCMK__XA_TASK); -+ - crm_xml_add(data, F_TYPE, T_ATTRD); - crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); - -@@ -317,7 +319,9 @@ attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) - * be all if node is NULL) that the message has been received and - * acted upon. - */ -- pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); -+ if (!pcmk__str_eq(op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { -+ pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); -+ } - - attrd_xml_add_writer(data); - return send_cluster_message(node, crm_msg_attrd, data, TRUE); --- -2.31.1 - -From 115e6c3a0d8db4df3eccf6da1c344168799f890d Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Tue, 15 Nov 2022 09:35:28 -0500 -Subject: [PATCH 25/26] Fix: daemons: Check for NULL in - attrd_do_not_expect_from_peer. - ---- - daemons/attrd/attrd_sync.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index 9d07796..6936771 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -402,7 +402,13 @@ confirmation_timeout_cb(gpointer data) - void - attrd_do_not_expect_from_peer(const char *host) - { -- GList *keys = g_hash_table_get_keys(expected_confirmations); -+ GList *keys = NULL; -+ -+ if (expected_confirmations == NULL) { -+ return; -+ } -+ -+ keys = g_hash_table_get_keys(expected_confirmations); - - crm_trace("Removing peer %s from expected confirmations", host); - --- -2.31.1 - -From 05da14f97ccd4f63f53801acc107ad661e5fd0c8 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Wed, 16 Nov 2022 17:37:44 -0500 -Subject: [PATCH 26/26] Low: daemons: Support cluster-wide sync points for - multi IPC messages. - -Supporting cluster-wide sync points means attrd_expect_confirmations -needs to be called, and then attrd_send_message needs "true" as a third -argument. This indicates attrd wants confirmations back from all its -peers when they have applied the update. - -We're already doing this at the end of attrd_client_update for -single-update IPC messages, and handling it for multi-update messages is -a simple matter of breaking that code out into a function and making -sure it's called. - -Note that this leaves two other spots where sync points still need to be -dealt with: - -* An update message that uses a regex. See - https://projects.clusterlabs.org/T600 for details. - -* A multi-update IPC message in a cluster where that is not supported. - See https://projects.clusterlabs.org/T601 for details. ---- - daemons/attrd/attrd_ipc.c | 43 ++++++++++++++++++++++----------------- - 1 file changed, 24 insertions(+), 19 deletions(-) - -diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c -index 16bfff4..8c5660d 100644 ---- a/daemons/attrd/attrd_ipc.c -+++ b/daemons/attrd/attrd_ipc.c -@@ -283,6 +283,28 @@ handle_value_expansion(const char **value, xmlNode *xml, const char *op, - return pcmk_rc_ok; - } - -+static void -+send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml) -+{ -+ if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { -+ /* The client is waiting on the cluster-wide sync point. In this case, -+ * the response ACK is not sent until this attrd broadcasts the update -+ * and receives its own confirmation back from all peers. -+ */ -+ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); -+ attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ -+ -+ } else { -+ /* The client is either waiting on the local sync point or was not -+ * waiting on any sync point at all. For the local sync point, the -+ * response ACK is sent in attrd_peer_update. For clients not -+ * waiting on any sync point, the response ACK is sent in -+ * handle_update_request immediately before this function was called. -+ */ -+ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ -+ } -+} -+ - xmlNode * - attrd_client_update(pcmk__request_t *request) - { -@@ -314,7 +336,7 @@ attrd_client_update(pcmk__request_t *request) - } - } - -- attrd_send_message(NULL, xml, false); -+ send_update_msg_to_cluster(request, xml); - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - - } else { -@@ -388,24 +410,7 @@ attrd_client_update(pcmk__request_t *request) - crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), - value, (attrd_election_won()? " (writer)" : "")); - -- if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { -- /* The client is waiting on the cluster-wide sync point. In this case, -- * the response ACK is not sent until this attrd broadcasts the update -- * and receives its own confirmation back from all peers. -- */ -- attrd_expect_confirmations(request, attrd_cluster_sync_point_update); -- attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ -- -- } else { -- /* The client is either waiting on the local sync point or was not -- * waiting on any sync point at all. For the local sync point, the -- * response ACK is sent in attrd_peer_update. For clients not -- * waiting on any sync point, the response ACK is sent in -- * handle_update_request immediately before this function was called. -- */ -- attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ -- } -- -+ send_update_msg_to_cluster(request, xml); - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - return NULL; - } --- -2.31.1 - diff --git a/SOURCES/002-remote-regression.patch b/SOURCES/002-remote-regression.patch deleted file mode 100644 index 0f0bea8..0000000 --- a/SOURCES/002-remote-regression.patch +++ /dev/null @@ -1,98 +0,0 @@ -From d8e08729ad5e3dc62f774172f992210902fc0ed4 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 23 Jan 2023 14:25:56 -0600 -Subject: [PATCH] High: executor: fix regression in remote node shutdown - -This reverts the essential part of d61494347, which was based on misdiagnosing -a remote node shutdown issue. Initially, it was thought that a "TLS server -session ended" log just after a remote node requested shutdown indicated that -the proxy connection coincidentally dropped at that moment. It actually is the -routine stopping of accepting new proxy connections, and existing when that -happens makes the remote node exit immediately without waiting for the -all-clear from the cluster. - -Fixes T361 ---- - daemons/execd/pacemaker-execd.c | 19 +------------------ - daemons/execd/pacemaker-execd.h | 3 +-- - daemons/execd/remoted_tls.c | 6 +----- - 3 files changed, 3 insertions(+), 25 deletions(-) - -diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c -index db12674f13..491808974a 100644 ---- a/daemons/execd/pacemaker-execd.c -+++ b/daemons/execd/pacemaker-execd.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2012-2022 the Pacemaker project contributors -+ * Copyright 2012-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -305,23 +305,6 @@ lrmd_exit(gpointer data) - return FALSE; - } - --/*! -- * \internal -- * \brief Clean up and exit if shutdown has started -- * -- * \return Doesn't return -- */ --void --execd_exit_if_shutting_down(void) --{ --#ifdef PCMK__COMPILE_REMOTE -- if (shutting_down) { -- crm_warn("exit because TLS connection was closed and 'shutting_down' set"); -- lrmd_exit(NULL); -- } --#endif --} -- - /*! - * \internal - * \brief Request cluster shutdown if appropriate, otherwise exit immediately -diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h -index 6646ae29e3..f78e8dcdde 100644 ---- a/daemons/execd/pacemaker-execd.h -+++ b/daemons/execd/pacemaker-execd.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2012-2022 the Pacemaker project contributors -+ * Copyright 2012-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -105,6 +105,5 @@ void remoted_spawn_pidone(int argc, char **argv, char **envp); - int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, - xmlNode *request); - void lrmd_drain_alerts(GMainLoop *mloop); --void execd_exit_if_shutting_down(void); - - #endif // PACEMAKER_EXECD__H -diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c -index 6f4b2d0062..c65e3f394d 100644 ---- a/daemons/execd/remoted_tls.c -+++ b/daemons/execd/remoted_tls.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2012-2022 the Pacemaker project contributors -+ * Copyright 2012-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -250,10 +250,6 @@ static void - tls_server_dropped(gpointer user_data) - { - crm_notice("TLS server session ended"); -- /* If we are in the process of shutting down, then we should actually exit. -- * bz#1804259 -- */ -- execd_exit_if_shutting_down(); - return; - } - --- -2.31.1 - diff --git a/SOURCES/002-schema-transfer.patch b/SOURCES/002-schema-transfer.patch new file mode 100644 index 0000000..9c1c05b --- /dev/null +++ b/SOURCES/002-schema-transfer.patch @@ -0,0 +1,1986 @@ +From 9e0c58dc3b949e4eadfa43364e60677478b7aa0f Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 7 Sep 2023 16:48:52 -0400 +Subject: [PATCH 01/15] Refactor: libcrmcommon: Break schema version comparison + out... + +...into its own function. Then, wrap this function with +schema_sort_directory so existing code doesn't need to be changed. This +allows us to use the version comparison elsewhere. +--- + lib/common/schemas.c | 26 ++++++++++++++++---------- + 1 file changed, 16 insertions(+), 10 deletions(-) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 372e87223..b3ff05917 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -195,7 +195,20 @@ schema_filter(const struct dirent *a) + } + + static int +-schema_sort(const struct dirent **a, const struct dirent **b) ++schema_cmp(pcmk__schema_version_t a_version, pcmk__schema_version_t b_version) ++{ ++ for (int i = 0; i < 2; ++i) { ++ if (a_version.v[i] < b_version.v[i]) { ++ return -1; ++ } else if (a_version.v[i] > b_version.v[i]) { ++ return 1; ++ } ++ } ++ return 0; ++} ++ ++static int ++schema_cmp_directory(const struct dirent **a, const struct dirent **b) + { + pcmk__schema_version_t a_version = SCHEMA_ZERO; + pcmk__schema_version_t b_version = SCHEMA_ZERO; +@@ -206,14 +219,7 @@ schema_sort(const struct dirent **a, const struct dirent **b) + return 0; + } + +- for (int i = 0; i < 2; ++i) { +- if (a_version.v[i] < b_version.v[i]) { +- return -1; +- } else if (a_version.v[i] > b_version.v[i]) { +- return 1; +- } +- } +- return 0; ++ return schema_cmp(a_version, b_version); + } + + /*! +@@ -413,7 +419,7 @@ crm_schema_init(void) + + wrap_libxslt(false); + +- max = scandir(base, &namelist, schema_filter, schema_sort); ++ max = scandir(base, &namelist, schema_filter, schema_cmp_directory); + if (max < 0) { + crm_notice("scandir(%s) failed: %s (%d)", base, strerror(errno), errno); + free(base); +-- +2.41.0 + +From e7d7c33eb8329c3c50fe648133cbb7651c1ecb9d Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 29 Nov 2023 09:37:31 -0500 +Subject: [PATCH 02/15] Feature: libcrmcommon: Add + pcmk__schema_files_later_than. + +This function takes in a schema version and returns a list of all RNG +and XSLT files from all schema versions more recent than that one. + +Also, add unit tests for the new function. +--- + include/crm/common/internal.h | 1 + + lib/common/schemas.c | 62 ++++++++++- + lib/common/tests/schemas/Makefile.am | 42 ++++++- + .../pcmk__schema_files_later_than_test.c | 104 ++++++++++++++++++ + 4 files changed, 207 insertions(+), 2 deletions(-) + create mode 100644 lib/common/tests/schemas/pcmk__schema_files_later_than_test.c + +diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h +index 307860636..a3cd455dc 100644 +--- a/include/crm/common/internal.h ++++ b/include/crm/common/internal.h +@@ -134,6 +134,7 @@ bool pcmk__procfs_has_pids(void); + void crm_schema_init(void); + void crm_schema_cleanup(void); + ++GList *pcmk__schema_files_later_than(const char *name); + + /* internal functions related to process IDs (from pid.c) */ + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index b3ff05917..1c60738ec 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -168,7 +168,11 @@ xml_latest_schema(void) + static inline bool + version_from_filename(const char *filename, pcmk__schema_version_t *version) + { +- return sscanf(filename, "pacemaker-%hhu.%hhu.rng", &(version->v[0]), &(version->v[1])) == 2; ++ if (pcmk__ends_with(filename, ".rng")) { ++ return sscanf(filename, "pacemaker-%hhu.%hhu.rng", &(version->v[0]), &(version->v[1])) == 2; ++ } else { ++ return sscanf(filename, "pacemaker-%hhu.%hhu", &(version->v[0]), &(version->v[1])) == 2; ++ } + } + + static int +@@ -1291,6 +1295,62 @@ cli_config_update(xmlNode **xml, int *best_version, gboolean to_logs) + return rc; + } + ++/*! ++ * \internal ++ * \brief Return a list of all schema files and any associated XSLT files ++ * later than the given one ++ * \brief Return a list of all schema versions later than the given one ++ * ++ * \param[in] schema The schema to compare against (for example, ++ * "pacemaker-3.1.rng" or "pacemaker-3.1") ++ * ++ * \note The caller is responsible for freeing both the returned list and ++ * the elements of the list ++ */ ++GList * ++pcmk__schema_files_later_than(const char *name) ++{ ++ GList *lst = NULL; ++ pcmk__schema_version_t ver; ++ ++ if (!version_from_filename(name, &ver)) { ++ return lst; ++ } ++ ++ for (GList *iter = g_list_nth(known_schemas, xml_latest_schema_index(known_schemas)); ++ iter != NULL; iter = iter->prev) { ++ pcmk__schema_t *schema = iter->data; ++ char *s = NULL; ++ ++ if (schema_cmp(ver, schema->version) != -1) { ++ continue; ++ } ++ ++ s = crm_strdup_printf("%s.rng", schema->name); ++ lst = g_list_prepend(lst, s); ++ ++ if (schema->transform != NULL) { ++ char *xform = crm_strdup_printf("%s.xsl", schema->transform); ++ lst = g_list_prepend(lst, xform); ++ } ++ ++ if (schema->transform_enter != NULL) { ++ char *enter = crm_strdup_printf("%s.xsl", schema->transform_enter); ++ ++ lst = g_list_prepend(lst, enter); ++ ++ if (schema->transform_onleave) { ++ int last_dash = strrchr(enter, '-') - enter; ++ char *leave = crm_strdup_printf("%.*s-leave.xsl", last_dash, enter); ++ ++ lst = g_list_prepend(lst, leave); ++ } ++ } ++ } ++ ++ return lst; ++} ++ + void + pcmk__log_known_schemas(void) + { +diff --git a/lib/common/tests/schemas/Makefile.am b/lib/common/tests/schemas/Makefile.am +index 5f485b3e9..b5c5e7f3c 100644 +--- a/lib/common/tests/schemas/Makefile.am ++++ b/lib/common/tests/schemas/Makefile.am +@@ -10,7 +10,47 @@ + include $(top_srcdir)/mk/tap.mk + include $(top_srcdir)/mk/unittest.mk + ++CFLAGS += -DPCMK__TEST_SCHEMA_DIR='"$(abs_builddir)/schemas"' ++ + # Add "_test" to the end of all test program names to simplify .gitignore. +-check_PROGRAMS = pcmk__xml_find_x_0_schema_index_test ++check_PROGRAMS = pcmk__schema_files_later_than_test \ ++ pcmk__xml_find_x_0_schema_index_test + + TESTS = $(check_PROGRAMS) ++ ++$(TESTS): setup-schema-dir ++ ++# Set up a temporary schemas/ directory containing only some of the full set of ++# pacemaker schema files. This lets us know exactly how many schemas are present, ++# allowing us to write tests without having to make changes when new schemas are ++# added. ++# ++# This directory contains the following: ++# ++# * pacemaker-next.rng - Used to verify that this sorts before all versions ++# * upgrade-*.xsl - Required by various schema versions ++# * pacemaker-[0-9]*.rng - We're only pulling in 15 schemas, which is enough ++# to get everything through pacemaker-3.0.rng. This ++# includes 2.10, needed so we can check that versions ++# are compared as numbers instead of strings. ++# * other RNG files - This catches everything except the pacemaker-*rng ++# files. These files are included by the top-level ++# pacemaker-*rng files, so we need them for tests. ++# This will glob more than we need, but the extra ones ++# won't get in the way. ++.PHONY: setup-schema-dir ++setup-schema-dir: ++ $(MKDIR_P) schemas ++ ( cd schemas ; \ ++ ln -sf $(abs_top_builddir)/xml/pacemaker-next.rng . ; \ ++ ln -sf $(abs_top_builddir)/xml/upgrade-*.xsl . ; \ ++ for f in $(shell ls -1v $(abs_top_builddir)/xml/pacemaker-[0-9]*.rng | head -15); do \ ++ ln -sf $$f $$(basename $$f); \ ++ done ; \ ++ for f in $(shell ls -1 $(top_srcdir)/xml/*.rng | grep -v pacemaker); do \ ++ ln -sf ../$$f $$(basename $$f); \ ++ done ) ++ ++.PHONY: clean-local ++clean-local: ++ -rm -rf schemas +diff --git a/lib/common/tests/schemas/pcmk__schema_files_later_than_test.c b/lib/common/tests/schemas/pcmk__schema_files_later_than_test.c +new file mode 100644 +index 000000000..50b5f4334 +--- /dev/null ++++ b/lib/common/tests/schemas/pcmk__schema_files_later_than_test.c +@@ -0,0 +1,104 @@ ++/* ++ * Copyright 2023 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU General Public License version 2 ++ * or later (GPLv2+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++ ++#include ++ ++static int ++setup(void **state) { ++ setenv("PCMK_schema_directory", PCMK__TEST_SCHEMA_DIR, 1); ++ crm_schema_init(); ++ return 0; ++} ++ ++static int ++teardown(void **state) { ++ crm_schema_cleanup(); ++ unsetenv("PCMK_schema_directory"); ++ return 0; ++} ++ ++static void ++invalid_name(void **state) ++{ ++ assert_null(pcmk__schema_files_later_than("xyz")); ++ assert_null(pcmk__schema_files_later_than("pacemaker-")); ++} ++ ++static void ++valid_name(void **state) ++{ ++ GList *schemas = NULL; ++ ++ schemas = pcmk__schema_files_later_than("pacemaker-1.0"); ++ assert_int_equal(g_list_length(schemas), 18); ++ /* There is no "pacemaker-1.1". */ ++ assert_string_equal("pacemaker-1.2.rng", g_list_nth_data(schemas, 0)); ++ assert_string_equal("upgrade-1.3.xsl", g_list_nth_data(schemas, 1)); ++ assert_string_equal("pacemaker-1.3.rng", g_list_nth_data(schemas, 2)); ++ assert_string_equal("pacemaker-2.0.rng", g_list_nth_data(schemas, 3)); ++ assert_string_equal("pacemaker-2.1.rng", g_list_nth_data(schemas, 4)); ++ assert_string_equal("pacemaker-2.2.rng", g_list_nth_data(schemas, 5)); ++ assert_string_equal("pacemaker-2.3.rng", g_list_nth_data(schemas, 6)); ++ assert_string_equal("pacemaker-2.4.rng", g_list_nth_data(schemas, 7)); ++ assert_string_equal("pacemaker-2.5.rng", g_list_nth_data(schemas, 8)); ++ assert_string_equal("pacemaker-2.6.rng", g_list_nth_data(schemas, 9)); ++ assert_string_equal("pacemaker-2.7.rng", g_list_nth_data(schemas, 10)); ++ assert_string_equal("pacemaker-2.8.rng", g_list_nth_data(schemas, 11)); ++ assert_string_equal("pacemaker-2.9.rng", g_list_nth_data(schemas, 12)); ++ assert_string_equal("upgrade-2.10-leave.xsl", g_list_nth_data(schemas, 13)); ++ assert_string_equal("upgrade-2.10-enter.xsl", g_list_nth_data(schemas, 14)); ++ assert_string_equal("upgrade-2.10.xsl", g_list_nth_data(schemas, 15)); ++ assert_string_equal("pacemaker-2.10.rng", g_list_nth_data(schemas, 16)); ++ assert_string_equal("pacemaker-3.0.rng", g_list_nth_data(schemas, 17)); ++ g_list_free_full(schemas, free); ++ ++ /* Adding .rng to the end of the schema we're requesting is also valid. */ ++ schemas = pcmk__schema_files_later_than("pacemaker-2.0.rng"); ++ assert_int_equal(g_list_length(schemas), 14); ++ assert_string_equal("pacemaker-2.1.rng", g_list_nth_data(schemas, 0)); ++ assert_string_equal("pacemaker-2.2.rng", g_list_nth_data(schemas, 1)); ++ assert_string_equal("pacemaker-2.3.rng", g_list_nth_data(schemas, 2)); ++ assert_string_equal("pacemaker-2.4.rng", g_list_nth_data(schemas, 3)); ++ assert_string_equal("pacemaker-2.5.rng", g_list_nth_data(schemas, 4)); ++ assert_string_equal("pacemaker-2.6.rng", g_list_nth_data(schemas, 5)); ++ assert_string_equal("pacemaker-2.7.rng", g_list_nth_data(schemas, 6)); ++ assert_string_equal("pacemaker-2.8.rng", g_list_nth_data(schemas, 7)); ++ assert_string_equal("pacemaker-2.9.rng", g_list_nth_data(schemas, 8)); ++ assert_string_equal("upgrade-2.10-leave.xsl", g_list_nth_data(schemas, 9)); ++ assert_string_equal("upgrade-2.10-enter.xsl", g_list_nth_data(schemas, 10)); ++ assert_string_equal("upgrade-2.10.xsl", g_list_nth_data(schemas, 11)); ++ assert_string_equal("pacemaker-2.10.rng", g_list_nth_data(schemas, 12)); ++ assert_string_equal("pacemaker-3.0.rng", g_list_nth_data(schemas, 13)); ++ g_list_free_full(schemas, free); ++ ++ /* Check that "pacemaker-2.10" counts as later than "pacemaker-2.9". */ ++ schemas = pcmk__schema_files_later_than("pacemaker-2.9"); ++ assert_int_equal(g_list_length(schemas), 5); ++ assert_string_equal("upgrade-2.10-leave.xsl", g_list_nth_data(schemas, 0)); ++ assert_string_equal("upgrade-2.10-enter.xsl", g_list_nth_data(schemas, 1)); ++ assert_string_equal("upgrade-2.10.xsl", g_list_nth_data(schemas, 2)); ++ assert_string_equal("pacemaker-2.10.rng", g_list_nth_data(schemas, 3)); ++ assert_string_equal("pacemaker-3.0.rng", g_list_nth_data(schemas, 4)); ++ g_list_free_full(schemas, free); ++ ++ /* And then something way in the future that will never apply due to our ++ * special schema directory. ++ */ ++ schemas = pcmk__schema_files_later_than("pacemaker-9.0"); ++ assert_null(schemas); ++} ++ ++PCMK__UNIT_TEST(setup, teardown, ++ cmocka_unit_test(invalid_name), ++ cmocka_unit_test(valid_name)) +-- +2.41.0 + +From 76859d61f4d35e1f8b7c35d8766d3d0c123d4552 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 11 Sep 2023 08:56:22 -0400 +Subject: [PATCH 03/15] Refactor: libcrmcommon: Allow more specs in + pcmk__xml_artefact_path. + +If the given filespec already has a .rng or .xsl extension, don't add an +additional one. This allows reusing this function to grab files given +in another schema file's externalRef links without modification. +--- + lib/common/xml.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/lib/common/xml.c b/lib/common/xml.c +index 53ebff770..142b501df 100644 +--- a/lib/common/xml.c ++++ b/lib/common/xml.c +@@ -2618,11 +2618,19 @@ pcmk__xml_artefact_path(enum pcmk__xml_artefact_ns ns, const char *filespec) + switch (ns) { + case pcmk__xml_artefact_ns_legacy_rng: + case pcmk__xml_artefact_ns_base_rng: +- ret = crm_strdup_printf("%s/%s.rng", base, filespec); ++ if (pcmk__ends_with(filespec, ".rng")) { ++ ret = crm_strdup_printf("%s/%s", base, filespec); ++ } else { ++ ret = crm_strdup_printf("%s/%s.rng", base, filespec); ++ } + break; + case pcmk__xml_artefact_ns_legacy_xslt: + case pcmk__xml_artefact_ns_base_xslt: +- ret = crm_strdup_printf("%s/%s.xsl", base, filespec); ++ if (pcmk__ends_with(filespec, ".xsl")) { ++ ret = crm_strdup_printf("%s/%s", base, filespec); ++ } else { ++ ret = crm_strdup_printf("%s/%s.xsl", base, filespec); ++ } + break; + default: + crm_err("XML artefact family specified as %u not recognized", ns); +-- +2.41.0 + +From 0d702bba5b50e1eede201853c7680a2517fade9f Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 18 Sep 2023 12:37:46 -0400 +Subject: [PATCH 04/15] Feature: build: Add PCMK__REMOTE_SCHEMA_DIR to + configure.ac. + +This is a new subdirectory where schema files downloaded to a remote +executor can be stored. + +Also, add support for an environment variable that can override this +compile-time setting and explain it in the documentation. +--- + configure.ac | 5 +++++ + doc/sphinx/Pacemaker_Explained/local-options.rst | 12 ++++++++++++ + etc/sysconfig/pacemaker.in | 7 +++++++ + include/crm/common/options_internal.h | 1 + + 4 files changed, 25 insertions(+) + +diff --git a/configure.ac b/configure.ac +index 17cee41e9..bd548200c 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -838,6 +838,11 @@ AC_DEFINE_UNQUOTED([CRM_SCHEMA_DIRECTORY], ["$CRM_SCHEMA_DIRECTORY"], + [Location for the Pacemaker Relax-NG Schema]) + AC_SUBST(CRM_SCHEMA_DIRECTORY) + ++PCMK__REMOTE_SCHEMA_DIR="${localstatedir}/lib/pacemaker/schemas" ++AC_DEFINE_UNQUOTED([PCMK__REMOTE_SCHEMA_DIR], ["$PCMK__REMOTE_SCHEMA_DIR"], ++ [Location to store Relax-NG Schema files on remote nodes]) ++AC_SUBST(PCMK__REMOTE_SCHEMA_DIR) ++ + CRM_CORE_DIR="${localstatedir}/lib/pacemaker/cores" + AC_DEFINE_UNQUOTED([CRM_CORE_DIR], ["$CRM_CORE_DIR"], + [Directory Pacemaker daemons should change to (without systemd, core files will go here)]) +diff --git a/doc/sphinx/Pacemaker_Explained/local-options.rst b/doc/sphinx/Pacemaker_Explained/local-options.rst +index 91eda6632..118256247 100644 +--- a/doc/sphinx/Pacemaker_Explained/local-options.rst ++++ b/doc/sphinx/Pacemaker_Explained/local-options.rst +@@ -478,6 +478,18 @@ environment variables when Pacemaker daemons start up. + - *Advanced Use Only:* Specify an alternate location for RNG schemas and + XSL transforms. + ++ * - .. _pcmk_remote_schema_directory: ++ ++ .. index:: ++ pair:: node option; PCMK_remote_schema_directory ++ ++ PCMK_remote_schema_directory ++ - :ref:`text ` ++ - |PCMK__REMOTE_SCHEMA_DIR| ++ - *Advanced Use Only:* Specify an alternate location on Pacemaker Remote ++ nodes for storing newer RNG schemas and XSL transforms fetched from ++ the cluster. ++ + * - .. _pcmk_valgrind_enabled: + + .. index:: +diff --git a/etc/sysconfig/pacemaker.in b/etc/sysconfig/pacemaker.in +index 0c3609d8e..487a104a7 100644 +--- a/etc/sysconfig/pacemaker.in ++++ b/etc/sysconfig/pacemaker.in +@@ -339,6 +339,13 @@ + # + # Default: PCMK_schema_directory="@CRM_SCHEMA_DIRECTORY@" + ++# PCMK_remote_schema_directory (Advanced Use Only) ++# ++# Specify an alternate location on Pacemaker Remote nodes for storing newer ++# RNG schemas and XSL transforms fetched from the cluster. ++# ++# Default: PCMK_remote_schema_directory="@PCMK__REMOTE_SCHEMA_DIR@" ++ + # G_SLICE (Advanced Use Only) + # + # Affect the behavior of glib's memory allocator. Setting to "always-malloc" +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index 5c561fd1f..a9316ca33 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -95,6 +95,7 @@ bool pcmk__valid_sbd_timeout(const char *value); + #define PCMK__ENV_PANIC_ACTION "panic_action" + #define PCMK__ENV_PHYSICAL_HOST "physical_host" + #define PCMK__ENV_REMOTE_ADDRESS "remote_address" ++#define PCMK__ENV_REMOTE_SCHEMA_DIR "remote_schema_directory" + #define PCMK__ENV_REMOTE_PID1 "remote_pid1" + #define PCMK__ENV_REMOTE_PORT "remote_port" + #define PCMK__ENV_RESPAWNED "respawned" +-- +2.41.0 + +From 16d46de389f33a8b29cbd74ee2c9077f28029446 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 6 Dec 2023 12:38:12 -0500 +Subject: [PATCH 05/15] Feature: libcrmcommon: Add pcmk__remote_schema_dir. + +This function checks both the environment (thus, +/etc/sysconfig/pacemaker as well) and ./configure options for the +location where any additional schema files should be stored. +--- + include/crm/common/xml_internal.h | 1 + + lib/common/schemas.c | 17 +++++++++++++++++ + 2 files changed, 18 insertions(+) + +diff --git a/include/crm/common/xml_internal.h b/include/crm/common/xml_internal.h +index f319856c8..cb27ec6b2 100644 +--- a/include/crm/common/xml_internal.h ++++ b/include/crm/common/xml_internal.h +@@ -447,5 +447,6 @@ gboolean pcmk__validate_xml(xmlNode *xml_blob, const char *validation, + void *error_handler_context); + + void pcmk__log_known_schemas(void); ++const char *pcmk__remote_schema_dir(void); + + #endif // PCMK__XML_INTERNAL__H +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 1c60738ec..c03d80036 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -1351,6 +1351,23 @@ pcmk__schema_files_later_than(const char *name) + return lst; + } + ++/*! ++ * \internal ++ * \brief Return the directory containing any extra schema files that a ++ * Pacemaker Remote node fetched from the cluster ++ */ ++const char * ++pcmk__remote_schema_dir(void) ++{ ++ const char *dir = pcmk__env_option(PCMK__ENV_REMOTE_SCHEMA_DIR); ++ ++ if (pcmk__str_empty(dir)) { ++ return PCMK__REMOTE_SCHEMA_DIR; ++ } ++ ++ return dir; ++} ++ + void + pcmk__log_known_schemas(void) + { +-- +2.41.0 + +From d2ed95651ea7c44153a34ba009494c70928319d7 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 19 Sep 2023 11:40:59 -0400 +Subject: [PATCH 06/15] Refactor: libcrmcommon: Look in more dirs in + pcmk__xml_artefact_path. + +If the directory returned by pcmk__xml_artefact_root doesn't contain the +file we were looking for, fall back on PCMK__REMOTE_SCHEMA_DIR. If that +still doesn't contain the file, return NULL. +--- + lib/common/xml.c | 32 +++++++++++++++++++++++++------- + 1 file changed, 25 insertions(+), 7 deletions(-) + +diff --git a/lib/common/xml.c b/lib/common/xml.c +index 142b501df..a1b1291f9 100644 +--- a/lib/common/xml.c ++++ b/lib/common/xml.c +@@ -10,6 +10,7 @@ + #include + + #include ++#include + #include + #include + #include +@@ -2610,33 +2611,50 @@ pcmk__xml_artefact_root(enum pcmk__xml_artefact_ns ns) + return ret; + } + +-char * +-pcmk__xml_artefact_path(enum pcmk__xml_artefact_ns ns, const char *filespec) ++static char * ++find_artefact(enum pcmk__xml_artefact_ns ns, const char *path, const char *filespec) + { +- char *base = pcmk__xml_artefact_root(ns), *ret = NULL; ++ char *ret = NULL; + + switch (ns) { + case pcmk__xml_artefact_ns_legacy_rng: + case pcmk__xml_artefact_ns_base_rng: + if (pcmk__ends_with(filespec, ".rng")) { +- ret = crm_strdup_printf("%s/%s", base, filespec); ++ ret = crm_strdup_printf("%s/%s", path, filespec); + } else { +- ret = crm_strdup_printf("%s/%s.rng", base, filespec); ++ ret = crm_strdup_printf("%s/%s.rng", path, filespec); + } + break; + case pcmk__xml_artefact_ns_legacy_xslt: + case pcmk__xml_artefact_ns_base_xslt: + if (pcmk__ends_with(filespec, ".xsl")) { +- ret = crm_strdup_printf("%s/%s", base, filespec); ++ ret = crm_strdup_printf("%s/%s", path, filespec); + } else { +- ret = crm_strdup_printf("%s/%s.xsl", base, filespec); ++ ret = crm_strdup_printf("%s/%s.xsl", path, filespec); + } + break; + default: + crm_err("XML artefact family specified as %u not recognized", ns); + } ++ ++ return ret; ++} ++ ++char * ++pcmk__xml_artefact_path(enum pcmk__xml_artefact_ns ns, const char *filespec) ++{ ++ struct stat sb; ++ char *base = pcmk__xml_artefact_root(ns); ++ char *ret = NULL; ++ ++ ret = find_artefact(ns, base, filespec); + free(base); + ++ if (stat(ret, &sb) != 0 || !S_ISREG(sb.st_mode)) { ++ const char *remote_schema_dir = pcmk__remote_schema_dir(); ++ ret = find_artefact(ns, remote_schema_dir, filespec); ++ } ++ + return ret; + } + +-- +2.41.0 + +From 9f4da4102a40f7dbfe0d78c7735c2801ba852f4b Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 11 Sep 2023 09:17:18 -0400 +Subject: [PATCH 07/15] Feature: libcrmcommon: Add XML attrs needed for schema + file transfer. + +Also, move PCMK__XA_CONN_HOST to be alphabetized. +--- + include/crm_internal.h | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 71a0f7efa..3bc8d096a 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -81,17 +81,21 @@ + #define PCMK__XA_CONFIG_ERRORS "config-errors" + #define PCMK__XA_CONFIG_WARNINGS "config-warnings" + #define PCMK__XA_CONFIRM "confirm" ++#define PCMK__XA_CONN_HOST "connection_host" + #define PCMK__XA_CRMD "crmd" + #define PCMK__XA_EXPECTED "expected" ++#define PCMK__XA_FILE "file" + #define PCMK__XA_GRAPH_ERRORS "graph-errors" + #define PCMK__XA_GRAPH_WARNINGS "graph-warnings" + #define PCMK__XA_IN_CCM "in_ccm" + #define PCMK__XA_JOIN "join" + #define PCMK__XA_MODE "mode" + #define PCMK__XA_NODE_START_STATE "node_start_state" ++#define PCMK__XA_PATH "path" ++#define PCMK__XA_SCHEMA "schema" ++#define PCMK__XA_SCHEMAS "schemas" + #define PCMK__XA_TASK "task" + #define PCMK__XA_UPTIME "uptime" +-#define PCMK__XA_CONN_HOST "connection_host" + + + /* +-- +2.41.0 + +From 3384643bd4572ad03e183b16d5cc84fe69599380 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 11 Sep 2023 10:46:26 -0400 +Subject: [PATCH 08/15] Feature: libcrmcommon: Add pcmk__build_schema_xml_node. + +This function adds a given RNG or XSLT file and all of the files they +refer to as children of a given XML node. + +Also, add unit tests for the new function. +--- + include/crm/common/internal.h | 3 + + lib/common/schemas.c | 144 +++++++++++++++++ + lib/common/tests/schemas/Makefile.am | 3 +- + .../pcmk__build_schema_xml_node_test.c | 149 ++++++++++++++++++ + 4 files changed, 298 insertions(+), 1 deletion(-) + create mode 100644 lib/common/tests/schemas/pcmk__build_schema_xml_node_test.c + +diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h +index a3cd455dc..318003efe 100644 +--- a/include/crm/common/internal.h ++++ b/include/crm/common/internal.h +@@ -135,6 +135,9 @@ void crm_schema_init(void); + void crm_schema_cleanup(void); + + GList *pcmk__schema_files_later_than(const char *name); ++void pcmk__build_schema_xml_node(xmlNode *parent, const char *name, ++ GList **already_included); ++ + + /* internal functions related to process IDs (from pid.c) */ + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index c03d80036..1bcdff031 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -1351,6 +1351,150 @@ pcmk__schema_files_later_than(const char *name) + return lst; + } + ++static void ++append_href(xmlNode *xml, void *user_data) ++{ ++ GList **list = user_data; ++ const char *href = crm_element_value(xml, "href"); ++ char *s = NULL; ++ ++ if (href == NULL) { ++ return; ++ } ++ ++ s = strdup(href); ++ CRM_ASSERT(s != NULL); ++ *list = g_list_prepend(*list, s); ++} ++ ++static void ++external_refs_in_schema(GList **list, const char *contents) ++{ ++ /* local-name()= is needed to ignore the xmlns= setting at the top of ++ * the XML file. Otherwise, the xpath query will always return nothing. ++ */ ++ const char *search = "//*[local-name()='externalRef'] | //*[local-name()='include']"; ++ xmlNode *xml = string2xml(contents); ++ ++ crm_foreach_xpath_result(xml, search, append_href, list); ++ free_xml(xml); ++} ++ ++static int ++read_file_contents(const char *file, char **contents) ++{ ++ int rc = pcmk_rc_ok; ++ char *path = NULL; ++ ++ if (pcmk__ends_with(file, ".rng")) { ++ path = pcmk__xml_artefact_path(pcmk__xml_artefact_ns_legacy_rng, file); ++ } else { ++ path = pcmk__xml_artefact_path(pcmk__xml_artefact_ns_legacy_xslt, file); ++ } ++ ++ rc = pcmk__file_contents(path, contents); ++ ++ free(path); ++ return rc; ++} ++ ++static void ++add_schema_file_to_xml(xmlNode *parent, const char *file, GList **already_included) ++{ ++ char *contents = NULL; ++ char *path = NULL; ++ xmlNode *file_node = NULL; ++ GList *includes = NULL; ++ int rc = pcmk_rc_ok; ++ ++ /* If we already included this file, don't do so again. */ ++ if (g_list_find_custom(*already_included, file, (GCompareFunc) strcmp) != NULL) { ++ return; ++ } ++ ++ /* Ensure whatever file we were given has a suffix we know about. If not, ++ * just assume it's an RNG file. ++ */ ++ if (!pcmk__ends_with(file, ".rng") && !pcmk__ends_with(file, ".xsl")) { ++ path = crm_strdup_printf("%s.rng", file); ++ } else { ++ path = strdup(file); ++ CRM_ASSERT(path != NULL); ++ } ++ ++ rc = read_file_contents(path, &contents); ++ if (rc != pcmk_rc_ok || contents == NULL) { ++ crm_warn("Could not read schema file %s: %s", file, pcmk_rc_str(rc)); ++ free(path); ++ return; ++ } ++ ++ /* Create a new node with the contents of the file ++ * as a CDATA block underneath it. ++ */ ++ file_node = create_xml_node(parent, PCMK__XA_FILE); ++ if (file_node == NULL) { ++ free(contents); ++ free(path); ++ return; ++ } ++ ++ crm_xml_add(file_node, PCMK__XA_PATH, path); ++ *already_included = g_list_prepend(*already_included, path); ++ ++ xmlAddChild(file_node, xmlNewCDataBlock(parent->doc, (pcmkXmlStr) contents, ++ strlen(contents))); ++ ++ /* Scan the file for any or nodes and build up ++ * a list of the files they reference. ++ */ ++ external_refs_in_schema(&includes, contents); ++ ++ /* For each referenced file, recurse to add it (and potentially anything it ++ * references, ...) to the XML. ++ */ ++ for (GList *iter = includes; iter != NULL; iter = iter->next) { ++ add_schema_file_to_xml(parent, iter->data, already_included); ++ } ++ ++ free(contents); ++ g_list_free_full(includes, free); ++} ++ ++/*! ++ * \internal ++ * \brief Add an XML schema file and all the files it references as children ++ * of a given XML node ++ * ++ * \param[in,out] parent The parent XML node ++ * \param[in] name The schema version to compare against ++ * (for example, "pacemaker-3.1" or "pacemaker-3.1.rng") ++ * \param[in,out] already_included A list of names that have already been added ++ * to the parent node. ++ * ++ * \note The caller is responsible for freeing both the returned list and ++ * the elements of the list ++ */ ++void ++pcmk__build_schema_xml_node(xmlNode *parent, const char *name, GList **already_included) ++{ ++ /* First, create an unattached node to add all the schema files to as children. */ ++ xmlNode *schema_node = create_xml_node(NULL, PCMK__XA_SCHEMA); ++ ++ crm_xml_add(schema_node, XML_ATTR_VERSION, name); ++ add_schema_file_to_xml(schema_node, name, already_included); ++ ++ /* Then, if we actually added any children, attach the node to parent. If ++ * we did not add any children (for instance, name was invalid), this prevents ++ * us from returning a document with additional empty children. ++ */ ++ if (schema_node->children != NULL) { ++ xmlAddChild(parent, schema_node); ++ } else { ++ free_xml(schema_node); ++ } ++} ++ + /*! + * \internal + * \brief Return the directory containing any extra schema files that a +diff --git a/lib/common/tests/schemas/Makefile.am b/lib/common/tests/schemas/Makefile.am +index b5c5e7f3c..8854eb264 100644 +--- a/lib/common/tests/schemas/Makefile.am ++++ b/lib/common/tests/schemas/Makefile.am +@@ -13,7 +13,8 @@ include $(top_srcdir)/mk/unittest.mk + CFLAGS += -DPCMK__TEST_SCHEMA_DIR='"$(abs_builddir)/schemas"' + + # Add "_test" to the end of all test program names to simplify .gitignore. +-check_PROGRAMS = pcmk__schema_files_later_than_test \ ++check_PROGRAMS = pcmk__build_schema_xml_node_test \ ++ pcmk__schema_files_later_than_test \ + pcmk__xml_find_x_0_schema_index_test + + TESTS = $(check_PROGRAMS) +diff --git a/lib/common/tests/schemas/pcmk__build_schema_xml_node_test.c b/lib/common/tests/schemas/pcmk__build_schema_xml_node_test.c +new file mode 100644 +index 000000000..1f5cb6ce1 +--- /dev/null ++++ b/lib/common/tests/schemas/pcmk__build_schema_xml_node_test.c +@@ -0,0 +1,149 @@ ++/* ++ * Copyright 2023 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU General Public License version 2 ++ * or later (GPLv2+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++ ++#include ++ ++const char *rngs1[] = { "pacemaker-3.0.rng", "status-1.0.rng", "alerts-2.10.rng", ++ "nvset-2.9.rng", "score.rng", "rule-2.9.rng", ++ "tags-1.3.rng", "acls-2.0.rng", "fencing-2.4.rng", ++ "constraints-3.0.rng", "resources-3.0.rng", "nvset-3.0.rng", ++ "nodes-3.0.rng", "options-3.0.rng", NULL }; ++ ++const char *rngs2[] = { "pacemaker-2.0.rng", "status-1.0.rng", "tags-1.3.rng", ++ "acls-2.0.rng", "fencing-1.2.rng", "constraints-1.2.rng", ++ "rule.rng", "score.rng", "resources-1.3.rng", ++ "nvset-1.3.rng", "nodes-1.3.rng", "options-1.0.rng", ++ "nvset.rng", "cib-1.2.rng", NULL }; ++ ++const char *rngs3[] = { "pacemaker-2.1.rng", "constraints-2.1.rng", NULL }; ++ ++static int ++setup(void **state) { ++ setenv("PCMK_schema_directory", PCMK__TEST_SCHEMA_DIR, 1); ++ crm_schema_init(); ++ return 0; ++} ++ ++static int ++teardown(void **state) { ++ crm_schema_cleanup(); ++ unsetenv("PCMK_schema_directory"); ++ return 0; ++} ++ ++static void ++invalid_name(void **state) ++{ ++ GList *already_included = NULL; ++ xmlNode *parent = create_xml_node(NULL, PCMK__XA_SCHEMAS); ++ ++ pcmk__build_schema_xml_node(parent, "pacemaker-9.0", &already_included); ++ assert_null(parent->children); ++ assert_null(already_included); ++ free_xml(parent); ++} ++ ++static void ++single_schema(void **state) ++{ ++ GList *already_included = NULL; ++ xmlNode *parent = create_xml_node(NULL, PCMK__XA_SCHEMAS); ++ xmlNode *schema_node = NULL; ++ xmlNode *file_node = NULL; ++ int i = 0; ++ ++ pcmk__build_schema_xml_node(parent, "pacemaker-3.0", &already_included); ++ ++ assert_non_null(already_included); ++ assert_non_null(parent->children); ++ ++ /* Test that the result looks like this: ++ * ++ * ++ * ++ * CDATA ++ * CDATA ++ * ... ++ * ++ * ++ */ ++ schema_node = pcmk__xml_first_child(parent); ++ assert_string_equal("pacemaker-3.0", crm_element_value(schema_node, XML_ATTR_VERSION)); ++ ++ file_node = pcmk__xml_first_child(schema_node); ++ while (file_node != NULL && rngs1[i] != NULL) { ++ assert_string_equal(rngs1[i], crm_element_value(file_node, PCMK__XA_PATH)); ++ assert_int_equal(pcmk__xml_first_child(file_node)->type, XML_CDATA_SECTION_NODE); ++ ++ file_node = pcmk__xml_next(file_node); ++ i++; ++ } ++ ++ g_list_free_full(already_included, free); ++ free_xml(parent); ++} ++ ++static void ++multiple_schemas(void **state) ++{ ++ GList *already_included = NULL; ++ xmlNode *parent = create_xml_node(NULL, PCMK__XA_SCHEMAS); ++ xmlNode *schema_node = NULL; ++ xmlNode *file_node = NULL; ++ int i = 0; ++ ++ pcmk__build_schema_xml_node(parent, "pacemaker-2.0", &already_included); ++ pcmk__build_schema_xml_node(parent, "pacemaker-2.1", &already_included); ++ ++ assert_non_null(already_included); ++ assert_non_null(parent->children); ++ ++ /* Like single_schema, but make sure files aren't included multiple times ++ * when the function is called repeatedly. ++ */ ++ schema_node = pcmk__xml_first_child(parent); ++ assert_string_equal("pacemaker-2.0", crm_element_value(schema_node, XML_ATTR_VERSION)); ++ ++ file_node = pcmk__xml_first_child(schema_node); ++ while (file_node != NULL && rngs2[i] != NULL) { ++ assert_string_equal(rngs2[i], crm_element_value(file_node, PCMK__XA_PATH)); ++ assert_int_equal(pcmk__xml_first_child(file_node)->type, XML_CDATA_SECTION_NODE); ++ ++ file_node = pcmk__xml_next(file_node); ++ i++; ++ } ++ ++ schema_node = pcmk__xml_next(schema_node); ++ assert_string_equal("pacemaker-2.1", crm_element_value(schema_node, XML_ATTR_VERSION)); ++ ++ file_node = pcmk__xml_first_child(schema_node); ++ i = 0; ++ ++ while (file_node != NULL && rngs3[i] != NULL) { ++ assert_string_equal(rngs3[i], crm_element_value(file_node, PCMK__XA_PATH)); ++ assert_int_equal(pcmk__xml_first_child(file_node)->type, XML_CDATA_SECTION_NODE); ++ ++ file_node = pcmk__xml_next(file_node); ++ i++; ++ } ++ ++ g_list_free_full(already_included, free); ++ free_xml(parent); ++} ++ ++PCMK__UNIT_TEST(setup, teardown, ++ cmocka_unit_test(invalid_name), ++ cmocka_unit_test(single_schema), ++ cmocka_unit_test(multiple_schemas)) +-- +2.41.0 + +From 036eb9f59326962ed2d1f2f4af88b20755a046d5 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 14 Sep 2023 10:02:16 -0400 +Subject: [PATCH 09/15] Feature: daemons: Add a new fetch_schemas CIB command. + +This command wraps pcmk__schema_files_later_than and +pcmk__build_schema_xml_node to produce a new CIB command that takes in a +minimum schema version and returns a big XML message containing all the +schema files after that version. +--- + daemons/based/based_messages.c | 43 +++++++++++++++++++++++++++++++++ + daemons/based/based_operation.c | 1 + + daemons/based/pacemaker-based.h | 4 +++ + include/crm/cib/cib_types.h | 3 +++ + include/crm/cib/internal.h | 2 ++ + lib/cib/cib_client.c | 15 ++++++++++++ + lib/cib/cib_ops.c | 3 +++ + 7 files changed, 71 insertions(+) + +diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c +index 35d639a89..a87d9ac2e 100644 +--- a/daemons/based/based_messages.c ++++ b/daemons/based/based_messages.c +@@ -478,3 +478,46 @@ cib_process_commit_transaction(const char *op, int options, const char *section, + } + return pcmk_rc2legacy(rc); + } ++ ++int ++cib_process_schemas(const char *op, int options, const char *section, xmlNode *req, ++ xmlNode *input, xmlNode *existing_cib, xmlNode **result_cib, ++ xmlNode **answer) ++{ ++ xmlNode *data = NULL; ++ const char *after_ver = NULL; ++ GList *schemas = NULL; ++ GList *already_included = NULL; ++ ++ *answer = create_xml_node(NULL, PCMK__XA_SCHEMAS); ++ ++ data = get_message_xml(req, F_CIB_CALLDATA); ++ if (data == NULL) { ++ crm_warn("No data specified in request"); ++ return -EPROTO; ++ } ++ ++ after_ver = crm_element_value(data, XML_ATTR_VERSION); ++ if (after_ver == NULL) { ++ crm_warn("No version specified in request"); ++ return -EPROTO; ++ } ++ ++ /* The client requested all schemas after the latest one we know about, which ++ * means the client is fully up-to-date. Return a properly formatted reply ++ * with no schemas. ++ */ ++ if (pcmk__str_eq(after_ver, xml_latest_schema(), pcmk__str_none)) { ++ return pcmk_ok; ++ } ++ ++ schemas = pcmk__schema_files_later_than(after_ver); ++ ++ for (GList *iter = schemas; iter != NULL; iter = iter->next) { ++ pcmk__build_schema_xml_node(*answer, iter->data, &already_included); ++ } ++ ++ g_list_free_full(schemas, free); ++ g_list_free_full(already_included, free); ++ return pcmk_ok; ++} +diff --git a/daemons/based/based_operation.c b/daemons/based/based_operation.c +index 736d425e3..8dd07af93 100644 +--- a/daemons/based/based_operation.c ++++ b/daemons/based/based_operation.c +@@ -35,6 +35,7 @@ static const cib__op_fn_t cib_op_functions[] = { + [cib__op_sync_all] = cib_process_sync, + [cib__op_sync_one] = cib_process_sync_one, + [cib__op_upgrade] = cib_process_upgrade_server, ++ [cib__op_schemas] = cib_process_schemas, + }; + + /*! +diff --git a/daemons/based/pacemaker-based.h b/daemons/based/pacemaker-based.h +index 33c7642c5..de24779ac 100644 +--- a/daemons/based/pacemaker-based.h ++++ b/daemons/based/pacemaker-based.h +@@ -122,6 +122,10 @@ int cib_process_commit_transaction(const char *op, int options, + const char *section, xmlNode *req, + xmlNode *input, xmlNode *existing_cib, + xmlNode **result_cib, xmlNode **answer); ++int cib_process_schemas(const char *op, int options, const char *section, ++ xmlNode *req, xmlNode *input, xmlNode *existing_cib, ++ xmlNode **result_cib, xmlNode **answer); ++ + void send_sync_request(const char *host); + int sync_our_cib(xmlNode *request, gboolean all); + +diff --git a/include/crm/cib/cib_types.h b/include/crm/cib/cib_types.h +index a803311c2..bebe770ed 100644 +--- a/include/crm/cib/cib_types.h ++++ b/include/crm/cib/cib_types.h +@@ -324,6 +324,9 @@ typedef struct cib_api_operations_s { + * processing requests + */ + void (*set_user)(cib_t *cib, const char *user); ++ ++ int (*fetch_schemas)(cib_t *cib, xmlNode **output_data, const char *after_ver, ++ int call_options); + } cib_api_operations_t; + + struct cib_s { +diff --git a/include/crm/cib/internal.h b/include/crm/cib/internal.h +index 20059ec7e..9d54d52b7 100644 +--- a/include/crm/cib/internal.h ++++ b/include/crm/cib/internal.h +@@ -32,6 +32,7 @@ + #define PCMK__CIB_REQUEST_NOOP "noop" + #define PCMK__CIB_REQUEST_SHUTDOWN "cib_shutdown_req" + #define PCMK__CIB_REQUEST_COMMIT_TRANSACT "cib_commit_transact" ++#define PCMK__CIB_REQUEST_SCHEMAS "cib_schemas" + + # define F_CIB_CLIENTID "cib_clientid" + # define F_CIB_CALLOPTS "cib_callopt" +@@ -110,6 +111,7 @@ enum cib__op_type { + cib__op_sync_all, + cib__op_sync_one, + cib__op_upgrade, ++ cib__op_schemas, + }; + + gboolean cib_diff_version_details(xmlNode * diff, int *admin_epoch, int *epoch, int *updates, +diff --git a/lib/cib/cib_client.c b/lib/cib/cib_client.c +index 32e1f83c5..a2fcabbca 100644 +--- a/lib/cib/cib_client.c ++++ b/lib/cib/cib_client.c +@@ -451,6 +451,19 @@ cib_client_end_transaction(cib_t *cib, bool commit, int call_options) + return rc; + } + ++static int ++cib_client_fetch_schemas(cib_t *cib, xmlNode **output_data, const char *after_ver, ++ int call_options) ++{ ++ xmlNode *data = create_xml_node(NULL, PCMK__XA_SCHEMA); ++ ++ crm_xml_add(data, XML_ATTR_VERSION, after_ver); ++ ++ return cib_internal_op(cib, PCMK__CIB_REQUEST_SCHEMAS, NULL, NULL, data, ++ output_data, call_options, NULL); ++ ++} ++ + static void + cib_client_set_user(cib_t *cib, const char *user) + { +@@ -736,6 +749,8 @@ cib_new_variant(void) + + new_cib->cmds->set_user = cib_client_set_user; + ++ new_cib->cmds->fetch_schemas = cib_client_fetch_schemas; ++ + return new_cib; + } + +diff --git a/lib/cib/cib_ops.c b/lib/cib/cib_ops.c +index c324304b9..2165d8af3 100644 +--- a/lib/cib/cib_ops.c ++++ b/lib/cib/cib_ops.c +@@ -127,6 +127,9 @@ static const cib__operation_t cib_ops[] = { + |cib__op_attr_writes_through + |cib__op_attr_transaction + }, ++ { ++ PCMK__CIB_REQUEST_SCHEMAS, cib__op_schemas, cib__op_attr_local ++ } + }; + + /*! +-- +2.41.0 + +From e8076b4a387ee758508f0683739b3e880f79db47 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 14 Sep 2023 15:48:31 -0400 +Subject: [PATCH 10/15] Refactor: libcrmcommon: Add + pcmk__load_schemas_from_dir. + +This just moves the bulk of crm_schema_init out into its own function, +allowing us to call it multiple times as other schema directories +appear. + +There is no similar function for unloading schemas from a directory. If +you want to do that, you'll have to use crm_schema_cleanup to unload +everything and start over. + +This function has not been tested for the possibility that the same +schema files exist in multiple directories. It is assumed that it won't +be used like that. +--- + include/crm/common/internal.h | 1 + + lib/common/schemas.c | 83 +++++++++++++++++++---------------- + 2 files changed, 45 insertions(+), 39 deletions(-) + +diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h +index 318003efe..542d0a67c 100644 +--- a/include/crm/common/internal.h ++++ b/include/crm/common/internal.h +@@ -134,6 +134,7 @@ bool pcmk__procfs_has_pids(void); + void crm_schema_init(void); + void crm_schema_cleanup(void); + ++void pcmk__load_schemas_from_dir(const char *dir); + GList *pcmk__schema_files_later_than(const char *name); + void pcmk__build_schema_xml_node(xmlNode *parent, const char *name, + GList **already_included); +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 1bcdff031..a0c844131 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -406,6 +406,49 @@ wrap_libxslt(bool finalize) + } + } + ++void ++pcmk__load_schemas_from_dir(const char *dir) ++{ ++ int lpc, max; ++ struct dirent **namelist = NULL; ++ ++ max = scandir(dir, &namelist, schema_filter, schema_cmp_directory); ++ if (max < 0) { ++ crm_warn("Could not load schemas from %s: %s", dir, strerror(errno)); ++ return; ++ } ++ ++ for (lpc = 0; lpc < max; lpc++) { ++ bool transform_expected = false; ++ pcmk__schema_version_t version = SCHEMA_ZERO; ++ ++ if (!version_from_filename(namelist[lpc]->d_name, &version)) { ++ // Shouldn't be possible, but makes static analysis happy ++ crm_warn("Skipping schema '%s': could not parse version", ++ namelist[lpc]->d_name); ++ continue; ++ } ++ if ((lpc + 1) < max) { ++ pcmk__schema_version_t next_version = SCHEMA_ZERO; ++ ++ if (version_from_filename(namelist[lpc+1]->d_name, &next_version) ++ && (version.v[0] < next_version.v[0])) { ++ transform_expected = true; ++ } ++ } ++ ++ if (add_schema_by_version(&version, transform_expected) != pcmk_rc_ok) { ++ break; ++ } ++ } ++ ++ for (lpc = 0; lpc < max; lpc++) { ++ free(namelist[lpc]); ++ } ++ ++ free(namelist); ++} ++ + /*! + * \internal + * \brief Load pacemaker schemas into cache +@@ -416,50 +459,12 @@ wrap_libxslt(bool finalize) + void + crm_schema_init(void) + { +- int lpc, max; + char *base = pcmk__xml_artefact_root(pcmk__xml_artefact_ns_legacy_rng); +- struct dirent **namelist = NULL; + const pcmk__schema_version_t zero = SCHEMA_ZERO; + + wrap_libxslt(false); + +- max = scandir(base, &namelist, schema_filter, schema_cmp_directory); +- if (max < 0) { +- crm_notice("scandir(%s) failed: %s (%d)", base, strerror(errno), errno); +- free(base); +- +- } else { +- free(base); +- for (lpc = 0; lpc < max; lpc++) { +- bool transform_expected = FALSE; +- pcmk__schema_version_t version = SCHEMA_ZERO; +- +- if (!version_from_filename(namelist[lpc]->d_name, &version)) { +- // Shouldn't be possible, but makes static analysis happy +- crm_err("Skipping schema '%s': could not parse version", +- namelist[lpc]->d_name); +- continue; +- } +- if ((lpc + 1) < max) { +- pcmk__schema_version_t next_version = SCHEMA_ZERO; +- +- if (version_from_filename(namelist[lpc+1]->d_name, &next_version) +- && (version.v[0] < next_version.v[0])) { +- transform_expected = TRUE; +- } +- } +- +- if (add_schema_by_version(&version, transform_expected) +- == ENOENT) { +- break; +- } +- } +- +- for (lpc = 0; lpc < max; lpc++) { +- free(namelist[lpc]); +- } +- free(namelist); +- } ++ pcmk__load_schemas_from_dir(base); + + // @COMPAT: Deprecated since 2.1.5 + add_schema(pcmk__schema_validator_rng, &zero, "pacemaker-next", +-- +2.41.0 + +From 5b40f0227b33edd6be65515dcc1af4eb656b78e7 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 14 Sep 2023 11:24:36 -0400 +Subject: [PATCH 11/15] Feature: daemons: Download newer schema files to a + remote executor. + +If the remote executor supports an older version of the CIB schema than +the rest of the cluster, various operations could fail due to the schema +not validating on the remote node. + +Instead, ask the CIB manager for the updated schema files, store them in +a separate directory on the remote node, and add them to the list of +known schema files. These can then be used just like the packaged +schema files. + +Note that while this is a fairly large patch, it's really just a lot of +boring IO and file management code, which requires a lot of error +handling. + +Fixes T182 +--- + daemons/execd/Makefile.am | 4 +- + daemons/execd/execd_commands.c | 6 + + daemons/execd/pacemaker-execd.h | 1 + + daemons/execd/remoted_schemas.c | 282 ++++++++++++++++++++++++++++++++ + 4 files changed, 292 insertions(+), 1 deletion(-) + create mode 100644 daemons/execd/remoted_schemas.c + +diff --git a/daemons/execd/Makefile.am b/daemons/execd/Makefile.am +index ab8544f9d..ce0e16126 100644 +--- a/daemons/execd/Makefile.am ++++ b/daemons/execd/Makefile.am +@@ -44,12 +44,14 @@ pacemaker_remoted_LDFLAGS = $(LDFLAGS_HARDENED_EXE) + + pacemaker_remoted_LDADD = $(top_builddir)/lib/fencing/libstonithd.la + pacemaker_remoted_LDADD += $(top_builddir)/lib/services/libcrmservice.la ++pacemaker_remoted_LDADD += $(top_builddir)/lib/cib/libcib.la + pacemaker_remoted_LDADD += $(top_builddir)/lib/lrmd/liblrmd.la + pacemaker_remoted_LDADD += $(top_builddir)/lib/common/libcrmcommon.la + pacemaker_remoted_SOURCES = $(pacemaker_execd_SOURCES) \ + remoted_tls.c \ + remoted_pidone.c \ +- remoted_proxy.c ++ remoted_proxy.c \ ++ remoted_schemas.c + endif + + cts_exec_helper_LDADD = $(top_builddir)/lib/pengine/libpe_status.la +diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c +index cf4503a25..1601efb0b 100644 +--- a/daemons/execd/execd_commands.c ++++ b/daemons/execd/execd_commands.c +@@ -1493,9 +1493,15 @@ process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, + if ((client->remote != NULL) + && pcmk_is_set(client->flags, + pcmk__client_tls_handshake_complete)) { ++ const char *op = crm_element_value(request, F_LRMD_OPERATION); + + // This is a remote connection from a cluster node's controller + ipc_proxy_add_provider(client); ++ ++ /* If this was a register operation, also ask for new schema files. */ ++ if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { ++ remoted_request_cib_schema_files(); ++ } + } else { + rc = -EACCES; + } +diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h +index 9c1d173f5..6fb8ef440 100644 +--- a/daemons/execd/pacemaker-execd.h ++++ b/daemons/execd/pacemaker-execd.h +@@ -101,6 +101,7 @@ void ipc_proxy_forward_client(pcmk__client_t *client, xmlNode *xml); + pcmk__client_t *ipc_proxy_get_provider(void); + int ipc_proxy_shutdown_req(pcmk__client_t *ipc_proxy); + void remoted_spawn_pidone(int argc, char **argv, char **envp); ++void remoted_request_cib_schema_files(void); + #endif + + int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, +diff --git a/daemons/execd/remoted_schemas.c b/daemons/execd/remoted_schemas.c +new file mode 100644 +index 000000000..d501fa495 +--- /dev/null ++++ b/daemons/execd/remoted_schemas.c +@@ -0,0 +1,282 @@ ++/* ++ * Copyright 2023 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "pacemaker-execd.h" ++ ++static pid_t schema_fetch_pid = 0; ++ ++static int ++rm_files(const char *pathname, const struct stat *sbuf, int type, struct FTW *ftwb) ++{ ++ /* Don't delete PCMK__REMOTE_SCHEMA_DIR . */ ++ if (ftwb->level == 0) { ++ return 0; ++ } ++ ++ if (remove(pathname) != 0) { ++ int rc = errno; ++ crm_err("Could not remove %s: %s", pathname, pcmk_rc_str(rc)); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static void ++clean_up_extra_schema_files(void) ++{ ++ const char *remote_schema_dir = pcmk__remote_schema_dir(); ++ struct stat sb; ++ int rc; ++ ++ rc = stat(remote_schema_dir, &sb); ++ ++ if (rc == -1) { ++ if (errno == ENOENT) { ++ /* If the directory doesn't exist, try to make it first. */ ++ if (mkdir(remote_schema_dir, 0755) != 0) { ++ rc = errno; ++ crm_err("Could not create directory for schemas: %s", ++ pcmk_rc_str(rc)); ++ } ++ ++ } else { ++ rc = errno; ++ crm_err("Could not create directory for schemas: %s", ++ pcmk_rc_str(rc)); ++ } ++ ++ } else if (!S_ISDIR(sb.st_mode)) { ++ /* If something exists with the same name that's not a directory, that's ++ * an error. ++ */ ++ crm_err("%s already exists but is not a directory", remote_schema_dir); ++ ++ } else { ++ /* It's a directory - clear it out so we can download potentially new ++ * schema files. ++ */ ++ rc = nftw(remote_schema_dir, rm_files, 10, FTW_DEPTH|FTW_MOUNT|FTW_PHYS); ++ ++ if (rc != 0) { ++ crm_err("Could not remove %s: %s", remote_schema_dir, pcmk_rc_str(rc)); ++ } ++ } ++} ++ ++static void ++write_extra_schema_file(xmlNode *xml, void *user_data) ++{ ++ const char *remote_schema_dir = pcmk__remote_schema_dir(); ++ const char *file = NULL; ++ char *path = NULL; ++ int rc; ++ ++ file = crm_element_value(xml, PCMK__XA_PATH); ++ if (file == NULL) { ++ crm_warn("No destination path given in schema request"); ++ return; ++ } ++ ++ path = crm_strdup_printf("%s/%s", remote_schema_dir, file); ++ ++ /* The schema is a CDATA node, which is a child of the node. Traverse ++ * all children and look for the first CDATA child. There can't be more than ++ * one because we only have one file attribute on the parent. ++ */ ++ for (xmlNode *child = xml->children; child != NULL; child = child->next) { ++ FILE *stream = NULL; ++ ++ if (child->type != XML_CDATA_SECTION_NODE) { ++ continue; ++ } ++ ++ stream = fopen(path, "w+"); ++ if (stream == NULL) { ++ crm_warn("Could not write schema file %s: %s", path, strerror(errno)); ++ } else { ++ rc = fprintf(stream, "%s", child->content); ++ ++ if (rc < 0) { ++ crm_warn("Could not write schema file %s: %s", path, strerror(errno)); ++ } ++ ++ fclose(stream); ++ } ++ ++ break; ++ } ++ ++ free(path); ++} ++ ++static void ++get_schema_files(void) ++{ ++ int rc = pcmk_rc_ok; ++ cib_t *cib = NULL; ++ xmlNode *reply; ++ ++ cib = cib_new(); ++ if (cib == NULL) { ++ _exit(ENOTCONN); ++ } ++ ++ rc = cib->cmds->signon(cib, crm_system_name, cib_query); ++ if (rc != pcmk_ok) { ++ crm_err("Could not connect to the CIB manager: %s", pcmk_strerror(rc)); ++ _exit(pcmk_rc2exitc(rc)); ++ } ++ ++ rc = cib->cmds->fetch_schemas(cib, &reply, xml_latest_schema(), cib_sync_call); ++ if (rc != pcmk_ok) { ++ crm_err("Could not get schema files: %s", pcmk_strerror(rc)); ++ rc = pcmk_legacy2rc(rc); ++ ++ } else if (reply->children != NULL) { ++ /* The returned document looks something like this: ++ * ++ * ++ * ++ * ++ * ++ * ++ * ++ * ++ * ... ++ * ++ * ++ * ++ * ... ++ * ++ * ++ * ++ * ++ * All the and tags are really just there for organizing ++ * the XML a little better. What we really care about are the nodes, ++ * and specifically the path attributes and the CDATA children (not shown) ++ * of each. We can use an xpath query to reach down and get all the ++ * nodes at once. ++ * ++ * If we already have the latest schema version, or we asked for one later ++ * than what the cluster supports, we'll get back an empty node, ++ * so all this will continue to work. It just won't do anything. ++ */ ++ crm_foreach_xpath_result(reply, "//" PCMK__XA_FILE, write_extra_schema_file, NULL); ++ } ++ ++ cib__clean_up_connection(&cib); ++ _exit(pcmk_rc2exitc(rc)); ++} ++ ++/* Load any additional schema files when the child is finished fetching and ++ * saving them to disk. ++ */ ++static void ++get_schema_files_complete(mainloop_child_t *p, pid_t pid, int core, int signo, int exitcode) ++{ ++ const char *errmsg = "Could not load additional schema files"; ++ ++ if ((signo == 0) && (exitcode == 0)) { ++ const char *remote_schema_dir = pcmk__remote_schema_dir(); ++ ++ /* Don't just crm_schema_init here because that will load the base ++ * schemas again too. Instead just load the things we fetched. ++ */ ++ pcmk__load_schemas_from_dir(remote_schema_dir); ++ crm_info("Fetching extra schema files completed successfully"); ++ ++ } else { ++ if (signo == 0) { ++ crm_err("%s: process %d exited %d", errmsg, (int) pid, exitcode); ++ ++ } else { ++ crm_err("%s: process %d terminated with signal %d (%s)%s", ++ errmsg, (int) pid, signo, strsignal(signo), ++ (core? " and dumped core" : "")); ++ } ++ ++ /* Clean up any incomplete schema data we might have been downloading when ++ * the process timed out or crashed. We don't need to do any extra cleanup ++ * because we never loaded the extra schemas, and we don't need to call ++ * crm_schema_init because that was called in remoted_request_cib_schema_files ++ * before this function. ++ */ ++ clean_up_extra_schema_files(); ++ } ++} ++ ++void ++remoted_request_cib_schema_files(void) ++{ ++ pid_t pid; ++ int rc; ++ ++ /* If a previous schema-fetch process is still running when we're called ++ * again, it's hung. Attempt to kill it before cleaning up the extra ++ * directory. ++ */ ++ if (schema_fetch_pid != 0) { ++ if (mainloop_child_kill(schema_fetch_pid) == FALSE) { ++ crm_warn("Unable to kill pre-existing schema-fetch process"); ++ return; ++ } ++ ++ schema_fetch_pid = 0; ++ } ++ ++ /* Clean up any extra schema files we downloaded from a previous cluster ++ * connection. After the files are gone, we need to wipe them from ++ * known_schemas, but there's no opposite operation for add_schema(). ++ * ++ * Instead, unload all the schemas. This means we'll also forget about all ++ * the installed schemas as well, which means that xml_latest_schema() will ++ * fail. So we need to load the base schemas right now. ++ */ ++ clean_up_extra_schema_files(); ++ crm_schema_cleanup(); ++ crm_schema_init(); ++ ++ crm_info("Fetching extra schema files from cluster"); ++ pid = fork(); ++ ++ switch (pid) { ++ case -1: { ++ rc = errno; ++ crm_warn("Could not spawn process to get schema files: %s", pcmk_rc_str(rc)); ++ break; ++ } ++ ++ case 0: ++ /* child */ ++ get_schema_files(); ++ break; ++ ++ default: ++ /* parent */ ++ schema_fetch_pid = pid; ++ mainloop_child_add_with_flags(pid, 5 * 60 * 1000, "schema-fetch", NULL, ++ mainloop_leave_pid_group, ++ get_schema_files_complete); ++ break; ++ } ++} +-- +2.41.0 + +From b05fef32cf1f9063e01db5108f95386be329d778 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 29 Nov 2023 10:04:57 -0500 +Subject: [PATCH 12/15] Feature: libcrmcommon: Load additional schema files in + crm_schema_init. + +If the /var/lib/pacemaker/schemas directory exists, load any extra +schemas from it when we init. This makes them available for command +line programs to use. +--- + lib/common/schemas.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index a0c844131..68d79cfc7 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -459,12 +459,14 @@ pcmk__load_schemas_from_dir(const char *dir) + void + crm_schema_init(void) + { ++ const char *remote_schema_dir = pcmk__remote_schema_dir(); + char *base = pcmk__xml_artefact_root(pcmk__xml_artefact_ns_legacy_rng); + const pcmk__schema_version_t zero = SCHEMA_ZERO; + + wrap_libxslt(false); + + pcmk__load_schemas_from_dir(base); ++ pcmk__load_schemas_from_dir(remote_schema_dir); + + // @COMPAT: Deprecated since 2.1.5 + add_schema(pcmk__schema_validator_rng, &zero, "pacemaker-next", +-- +2.41.0 + +From 7454a8400238301848cc6694f5413fdb19d5834e Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 29 Sep 2023 10:41:51 -0400 +Subject: [PATCH 13/15] Refactor: daemons: Remove redundant includes from + remoted_tls.c. + +--- + daemons/execd/remoted_tls.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c +index 23a2dcf45..978edfdd3 100644 +--- a/daemons/execd/remoted_tls.c ++++ b/daemons/execd/remoted_tls.c +@@ -12,8 +12,6 @@ + #include + #include + +-#include +-#include + #include + #include + #include +-- +2.41.0 + +From c6b5f73a013515d6acd818d348d011e38f3d8e0e Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Fri, 27 Oct 2023 15:23:41 -0400 +Subject: [PATCH 14/15] Refactor: daemons: Keep XML schemas sorted. + +Now that we can add schemas multiple times (including after next/none +have been added in the initial set), we've broken the assumption that +schemas are always sorted by scandir. + +This introduces a function that can be called wherever to keep them +sorted. It should be called pretty much whenever +pcmk__load_schemas_from_dir is called. +--- + daemons/execd/remoted_schemas.c | 1 + + include/crm/common/xml_internal.h | 1 + + lib/common/schemas.c | 40 +++++++++++++++++++++++++++++++ + 3 files changed, 42 insertions(+) + +diff --git a/daemons/execd/remoted_schemas.c b/daemons/execd/remoted_schemas.c +index d501fa495..eed43dfa9 100644 +--- a/daemons/execd/remoted_schemas.c ++++ b/daemons/execd/remoted_schemas.c +@@ -203,6 +203,7 @@ get_schema_files_complete(mainloop_child_t *p, pid_t pid, int core, int signo, i + * schemas again too. Instead just load the things we fetched. + */ + pcmk__load_schemas_from_dir(remote_schema_dir); ++ pcmk__sort_schemas(); + crm_info("Fetching extra schema files completed successfully"); + + } else { +diff --git a/include/crm/common/xml_internal.h b/include/crm/common/xml_internal.h +index cb27ec6b2..9b50f4e72 100644 +--- a/include/crm/common/xml_internal.h ++++ b/include/crm/common/xml_internal.h +@@ -448,5 +448,6 @@ gboolean pcmk__validate_xml(xmlNode *xml_blob, const char *validation, + + void pcmk__log_known_schemas(void); + const char *pcmk__remote_schema_dir(void); ++void pcmk__sort_schemas(void); + + #endif // PCMK__XML_INTERNAL__H +diff --git a/lib/common/schemas.c b/lib/common/schemas.c +index 68d79cfc7..fd6202f62 100644 +--- a/lib/common/schemas.c ++++ b/lib/common/schemas.c +@@ -449,6 +449,41 @@ pcmk__load_schemas_from_dir(const char *dir) + free(namelist); + } + ++static gint ++schema_sort_GCompareFunc(gconstpointer a, gconstpointer b) ++{ ++ const pcmk__schema_t *schema_a = a; ++ const pcmk__schema_t *schema_b = b; ++ ++ if (pcmk__str_eq(schema_a->name, "pacemaker-next", pcmk__str_none)) { ++ if (pcmk__str_eq(schema_b->name, "none", pcmk__str_none)) { ++ return -1; ++ } else { ++ return 1; ++ } ++ } else if (pcmk__str_eq(schema_a->name, "none", pcmk__str_none)) { ++ return 1; ++ } else if (pcmk__str_eq(schema_b->name, "pacemaker-next", pcmk__str_none)) { ++ return -1; ++ } else { ++ return schema_cmp(schema_a->version, schema_b->version); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Sort the list of known schemas such that all pacemaker-X.Y are in ++ * version order, then pacemaker-next, then none ++ * ++ * This function should be called whenever additional schemas are loaded using ++ * pcmk__load_schemas_from_dir(), after the initial sets in crm_schema_init(). ++ */ ++void ++pcmk__sort_schemas(void) ++{ ++ known_schemas = g_list_sort(known_schemas, schema_sort_GCompareFunc); ++} ++ + /*! + * \internal + * \brief Load pacemaker schemas into cache +@@ -474,6 +509,11 @@ crm_schema_init(void) + + add_schema(pcmk__schema_validator_none, &zero, PCMK__VALUE_NONE, + NULL, NULL, FALSE); ++ ++ /* This shouldn't be strictly necessary, but we'll do it here just in case ++ * there's anything in PCMK__REMOTE_SCHEMA_DIR that messes up the order. ++ */ ++ pcmk__sort_schemas(); + } + + static gboolean +-- +2.41.0 + +From d6a535ca43db202bd01a2e085b58722ed1abcdb0 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 30 Oct 2023 12:59:13 -0400 +Subject: [PATCH 15/15] Feature: daemons: Only ask for schemas if supported by + the server + +We can use LRMD_PROTOCOL_VERSION in the handshake to determine what the +server supports, similar to what is being done in attrd. Add a macro to +compare the version we were given with the known minimum version that +supports the schema transfer command. + +Additionally, introduce LRMD_COMPATIBLE_PROTOCOL which is just the major +version number required for the connection to succeed. This gets rid of +the need for LRMD_MIN_PROTOCOL_VERSION, which can now be deprecated. + +And then since I wasn't sure compare_version would work if you give it a +full version number and just a major version, add a unit test for that. +--- + daemons/execd/execd_commands.c | 11 +++++---- + include/crm/lrmd.h | 23 +++++++++++++++---- + lib/common/tests/utils/compare_version_test.c | 5 +++- + 3 files changed, 30 insertions(+), 9 deletions(-) + +diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c +index 1601efb0b..4ec4d03d6 100644 +--- a/daemons/execd/execd_commands.c ++++ b/daemons/execd/execd_commands.c +@@ -1482,9 +1482,9 @@ process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, + const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); + const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE); + +- if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) { ++ if (compare_version(protocol_version, LRMD_COMPATIBLE_PROTOCOL) < 0) { + crm_err("Cluster API version must be greater than or equal to %s, not %s", +- LRMD_MIN_PROTOCOL_VERSION, protocol_version); ++ LRMD_COMPATIBLE_PROTOCOL, protocol_version); + rc = -EPROTO; + } + +@@ -1498,8 +1498,11 @@ process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, + // This is a remote connection from a cluster node's controller + ipc_proxy_add_provider(client); + +- /* If this was a register operation, also ask for new schema files. */ +- if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { ++ /* If this was a register operation, also ask for new schema files but ++ * only if it's supported by the protocol version. ++ */ ++ if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none) && ++ LRMD_SUPPORTS_SCHEMA_XFER(protocol_version)) { + remoted_request_cib_schema_files(); + } + } else { +diff --git a/include/crm/lrmd.h b/include/crm/lrmd.h +index 0c5a40b62..948a3b1fc 100644 +--- a/include/crm/lrmd.h ++++ b/include/crm/lrmd.h +@@ -33,12 +33,27 @@ typedef struct lrmd_key_value_s { + struct lrmd_key_value_s *next; + } lrmd_key_value_t; + +-/* This should be bumped every time there is an incompatible change that +- * prevents older clients from connecting to this version of the server. ++/* The major version should be bumped every time there is an incompatible ++ * change that prevents older clients from connecting to this version of ++ * the server. The minor version indicates feature support. ++ * ++ * Protocol Pacemaker Significant changes ++ * -------- --------- ------------------- ++ * 1.2 2.1.7 PCMK__CIB_REQUEST_SCHEMAS + */ +-#define LRMD_PROTOCOL_VERSION "1.1" ++#define LRMD_PROTOCOL_VERSION "1.2" ++ ++#define LRMD_SUPPORTS_SCHEMA_XFER(x) (compare_version((x), "1.2") >= 0) + +-/* This is the version that the client version will actually be compared ++/* The major protocol version the client and server both need to support for ++ * the connection to be successful. This should only ever be the major ++ * version - not a complete version number. ++ */ ++#define LRMD_COMPATIBLE_PROTOCOL "1" ++ ++/* \deprecated Do not use (will be removed in a future release) ++ * ++ * This is the version that the client version will actually be compared + * against. This should be identical to LRMD_PROTOCOL_VERSION. However, we + * accidentally bumped LRMD_PROTOCOL_VERSION in 6424a647 (1.1.15) when we didn't + * need to, so for now it's different. If we ever have a truly incompatible +diff --git a/lib/common/tests/utils/compare_version_test.c b/lib/common/tests/utils/compare_version_test.c +index 35ebb63c6..d191f4abb 100644 +--- a/lib/common/tests/utils/compare_version_test.c ++++ b/lib/common/tests/utils/compare_version_test.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2022 the Pacemaker project contributors ++ * Copyright 2022-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -46,6 +46,9 @@ shorter_versions(void **state) + { + assert_int_equal(compare_version("1.0", "1.0.1"), -1); + assert_int_equal(compare_version("1.0.1", "1.0"), 1); ++ assert_int_equal(compare_version("1.0", "1"), 0); ++ assert_int_equal(compare_version("1", "1.2"), -1); ++ assert_int_equal(compare_version("1.2", "1"), 1); + } + + PCMK__UNIT_TEST(NULL, NULL, +-- +2.41.0 + diff --git a/SOURCES/003-history-cleanup.patch b/SOURCES/003-history-cleanup.patch deleted file mode 100644 index 87a3e27..0000000 --- a/SOURCES/003-history-cleanup.patch +++ /dev/null @@ -1,2829 +0,0 @@ -From e953591a9796edebd4796c344df0eddcbc7a2dff Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 30 Jan 2023 16:34:32 -0600 -Subject: [PATCH 01/14] Refactor: scheduler: drop unneeded arguments from - process_rsc_state() - -migrate_op has been unused since at least 2011 ---- - lib/pengine/unpack.c | 36 +++++++++++++++--------------------- - 1 file changed, 15 insertions(+), 21 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 5fcba3b..9524def 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -1963,8 +1963,7 @@ process_orphan_resource(xmlNode * rsc_entry, pe_node_t * node, pe_working_set_t - - static void - process_rsc_state(pe_resource_t * rsc, pe_node_t * node, -- enum action_fail_response on_fail, -- xmlNode * migrate_op, pe_working_set_t * data_set) -+ enum action_fail_response on_fail) - { - pe_node_t *tmpnode = NULL; - char *reason = NULL; -@@ -2016,7 +2015,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); - should_fence = TRUE; - -- } else if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { -+ } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { - if (pe__is_remote_node(node) && node->details->remote_rsc - && !pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_failed)) { - -@@ -2039,7 +2038,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - if (reason == NULL) { - reason = crm_strdup_printf("%s is thought to be active there", rsc->id); - } -- pe_fence_node(data_set, node, reason, FALSE); -+ pe_fence_node(rsc->cluster, node, reason, FALSE); - } - free(reason); - } -@@ -2069,7 +2068,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - * but also mark the node as unclean - */ - reason = crm_strdup_printf("%s failed there", rsc->id); -- pe_fence_node(data_set, node, reason, FALSE); -+ pe_fence_node(rsc->cluster, node, reason, FALSE); - free(reason); - break; - -@@ -2090,7 +2089,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - /* make sure it comes up somewhere else - * or not at all - */ -- resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); -+ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", -+ rsc->cluster); - break; - - case action_fail_stop: -@@ -2112,8 +2112,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - * container is running yet, so remember it and add a stop - * action for it later. - */ -- data_set->stop_needed = g_list_prepend(data_set->stop_needed, -- rsc->container); -+ rsc->cluster->stop_needed = -+ g_list_prepend(rsc->cluster->stop_needed, rsc->container); - } else if (rsc->container) { - stop_action(rsc->container, node, FALSE); - } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { -@@ -2123,10 +2123,10 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - - case action_fail_reset_remote: - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); -- if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { -+ if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { - tmpnode = NULL; - if (rsc->is_remote_node) { -- tmpnode = pe_find_node(data_set->nodes, rsc->id); -+ tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); - } - if (tmpnode && - pe__is_remote_node(tmpnode) && -@@ -2135,7 +2135,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - /* The remote connection resource failed in a way that - * should result in fencing the remote node. - */ -- pe_fence_node(data_set, tmpnode, -+ pe_fence_node(rsc->cluster, tmpnode, - "remote connection is unrecoverable", FALSE); - } - } -@@ -2158,7 +2158,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - * result in a fencing operation regardless if we're going to attempt to - * reconnect to the remote-node in this transition or not. */ - if (pcmk_is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { -- tmpnode = pe_find_node(data_set->nodes, rsc->id); -+ tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); - if (tmpnode && tmpnode->details->unclean) { - tmpnode->details->unseen = FALSE; - } -@@ -2177,7 +2177,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - } - } - -- native_add_running(rsc, node, data_set, (save_on_fail != action_fail_ignore)); -+ native_add_running(rsc, node, rsc->cluster, -+ (save_on_fail != action_fail_ignore)); - switch (on_fail) { - case action_fail_ignore: - break; -@@ -2376,14 +2377,12 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, - int start_index = -1; - enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; - -- const char *task = NULL; - const char *rsc_id = ID(lrm_resource); - - pe_resource_t *rsc = NULL; - GList *op_list = NULL; - GList *sorted_op_list = NULL; - -- xmlNode *migrate_op = NULL; - xmlNode *rsc_op = NULL; - xmlNode *last_failure = NULL; - -@@ -2437,11 +2436,6 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, - for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { - xmlNode *rsc_op = (xmlNode *) gIter->data; - -- task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); -- if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { -- migrate_op = rsc_op; -- } -- - unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set); - } - -@@ -2452,7 +2446,7 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, - /* no need to free the contents */ - g_list_free(sorted_op_list); - -- process_rsc_state(rsc, node, on_fail, migrate_op, data_set); -+ process_rsc_state(rsc, node, on_fail); - - if (get_target_role(rsc, &req_role)) { - if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { --- -2.31.1 - -From 6f4e34cccc4864961d2020a2dd547450ac53a44e Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 1 Feb 2023 16:30:20 -0600 -Subject: [PATCH 02/14] Log: scheduler: improve trace logs when unpacking - resource history - ---- - lib/pengine/unpack.c | 112 +++++++++++++++++++++++++++---------------- - 1 file changed, 71 insertions(+), 41 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 9524def..b7b2873 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -3363,6 +3363,24 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, - pe__set_resource_flags(rsc, pe_rsc_block); - } - -+/*! -+ * \internal -+ * \brief Update an integer value and why -+ * -+ * \param[in,out] i Pointer to integer to update -+ * \param[in,out] why Where to store reason for update -+ * \param[in] value New value -+ * \param[in,out] reason Description of why value was changed -+ */ -+static inline void -+remap_because(int *i, const char **why, int value, const char *reason) -+{ -+ if (*i != value) { -+ *i = value; -+ *why = reason; -+ } -+} -+ - /*! - * \internal - * \brief Remap informational monitor results and operation status -@@ -3393,29 +3411,34 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, - static void - remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - pe_working_set_t *data_set, enum action_fail_response *on_fail, -- int target_rc, int *rc, int *status) { -+ int target_rc, int *rc, int *status) -+{ - bool is_probe = false; -+ int orig_exit_status = *rc; -+ int orig_exec_status = *status; -+ const char *why = NULL; - const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); - const char *key = get_op_key(xml_op); - const char *exit_reason = crm_element_value(xml_op, - XML_LRM_ATTR_EXIT_REASON); - - if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_none)) { -- int remapped_rc = pcmk__effective_rc(*rc); -- -- if (*rc != remapped_rc) { -- crm_trace("Remapping monitor result %d to %d", *rc, remapped_rc); -+ // Remap degraded results to their usual counterparts -+ *rc = pcmk__effective_rc(*rc); -+ if (*rc != orig_exit_status) { -+ why = "degraded monitor result"; - if (!node->details->shutdown || node->details->online) { - record_failed_op(xml_op, node, rsc, data_set); - } -- -- *rc = remapped_rc; - } - } - - if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) { -- *status = PCMK_EXEC_DONE; -- *rc = PCMK_OCF_NOT_RUNNING; -+ if ((*status != PCMK_EXEC_DONE) || (*rc != PCMK_OCF_NOT_RUNNING)) { -+ *status = PCMK_EXEC_DONE; -+ *rc = PCMK_OCF_NOT_RUNNING; -+ why = "irrelevant probe result"; -+ } - } - - /* If the executor reported an operation status of anything but done or -@@ -3423,22 +3446,19 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - * it should be treated as a failure or not, because we know the expected - * result. - */ -- if (*status != PCMK_EXEC_DONE && *status != PCMK_EXEC_ERROR) { -- return; -+ switch (*status) { -+ case PCMK_EXEC_DONE: -+ case PCMK_EXEC_ERROR: -+ break; -+ default: -+ goto remap_done; - } - -- CRM_ASSERT(rsc); -- CRM_CHECK(task != NULL, -- *status = PCMK_EXEC_ERROR; return); -- -- *status = PCMK_EXEC_DONE; -- - if (exit_reason == NULL) { - exit_reason = ""; - } - - is_probe = pcmk_xe_is_probe(xml_op); -- - if (is_probe) { - task = "probe"; - } -@@ -3452,12 +3472,15 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - * those versions or processing of saved CIB files from those versions, - * so we do not need to care much about this case. - */ -- *status = PCMK_EXEC_ERROR; -+ remap_because(status, &why, PCMK_EXEC_ERROR, "obsolete history format"); - crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)", - key, pe__node_name(node)); - -- } else if (target_rc != *rc) { -- *status = PCMK_EXEC_ERROR; -+ } else if (*rc == target_rc) { -+ remap_because(status, &why, PCMK_EXEC_DONE, "expected result"); -+ -+ } else { -+ remap_because(status, &why, PCMK_EXEC_ERROR, "unexpected result"); - pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", - key, pe__node_name(node), - target_rc, services_ocf_exitcode_str(target_rc), -@@ -3468,7 +3491,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - switch (*rc) { - case PCMK_OCF_OK: - if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { -- *status = PCMK_EXEC_DONE; -+ remap_because(status, &why,PCMK_EXEC_DONE, "probe"); - pe_rsc_info(rsc, "Probe found %s active on %s at %s", - rsc->id, pe__node_name(node), - last_change_str(xml_op)); -@@ -3479,7 +3502,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - if (is_probe || (target_rc == *rc) - || !pcmk_is_set(rsc->flags, pe_rsc_managed)) { - -- *status = PCMK_EXEC_DONE; -+ remap_because(status, &why, PCMK_EXEC_DONE, "exit status"); - rsc->role = RSC_ROLE_STOPPED; - - /* clear any previous failure actions */ -@@ -3490,7 +3513,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - - case PCMK_OCF_RUNNING_PROMOTED: - if (is_probe && (*rc != target_rc)) { -- *status = PCMK_EXEC_DONE; -+ remap_because(status, &why, PCMK_EXEC_DONE, "probe"); - pe_rsc_info(rsc, - "Probe found %s active and promoted on %s at %s", - rsc->id, pe__node_name(node), -@@ -3502,11 +3525,11 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - case PCMK_OCF_DEGRADED_PROMOTED: - case PCMK_OCF_FAILED_PROMOTED: - rsc->role = RSC_ROLE_PROMOTED; -- *status = PCMK_EXEC_ERROR; -+ remap_because(status, &why, PCMK_EXEC_ERROR, "exit status"); - break; - - case PCMK_OCF_NOT_CONFIGURED: -- *status = PCMK_EXEC_ERROR_FATAL; -+ remap_because(status, &why, PCMK_EXEC_ERROR_FATAL, "exit status"); - break; - - case PCMK_OCF_UNIMPLEMENT_FEATURE: -@@ -3517,9 +3540,11 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - - if (interval_ms == 0) { - check_recoverable(rsc, node, task, *rc, xml_op); -- *status = PCMK_EXEC_ERROR_HARD; -+ remap_because(status, &why, PCMK_EXEC_ERROR_HARD, -+ "exit status"); - } else { -- *status = PCMK_EXEC_NOT_SUPPORTED; -+ remap_because(status, &why, PCMK_EXEC_NOT_SUPPORTED, -+ "exit status"); - } - } - break; -@@ -3528,7 +3553,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - case PCMK_OCF_INVALID_PARAM: - case PCMK_OCF_INSUFFICIENT_PRIV: - check_recoverable(rsc, node, task, *rc, xml_op); -- *status = PCMK_EXEC_ERROR_HARD; -+ remap_because(status, &why, PCMK_EXEC_ERROR_HARD, "exit status"); - break; - - default: -@@ -3537,13 +3562,21 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - "on %s at %s as failure", - *rc, task, rsc->id, pe__node_name(node), - last_change_str(xml_op)); -- *status = PCMK_EXEC_ERROR; -+ remap_because(status, &why, PCMK_EXEC_ERROR, -+ "unknown exit status"); - } - break; - } - -- pe_rsc_trace(rsc, "Remapped %s status to '%s'", -- key, pcmk_exec_status_str(*status)); -+remap_done: -+ if (why != NULL) { -+ pe_rsc_trace(rsc, -+ "Remapped %s result from [%s: %s] to [%s: %s] " -+ "because of %s", -+ key, pcmk_exec_status_str(orig_exec_status), -+ crm_exit_str(orig_exit_status), -+ pcmk_exec_status_str(*status), crm_exit_str(*rc), why); -+ } - } - - // return TRUE if start or monitor last failure but parameters changed -@@ -3947,9 +3980,9 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - parent = uber_parent(rsc); - } - -- pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", -- task_key, task, task_id, status, rc, pe__node_name(node), -- role2text(rsc->role)); -+ pe_rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)", -+ ID(xml_op), task, task_id, pe__node_name(node), -+ pcmk_exec_status_str(status), crm_exit_str(rc)); - - if (node->details->unclean) { - pe_rsc_trace(rsc, -@@ -4077,9 +4110,6 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - goto done; - - case PCMK_EXEC_DONE: -- pe_rsc_trace(rsc, "%s of %s on %s completed at %s " CRM_XS " id=%s", -- task, rsc->id, pe__node_name(node), -- last_change_str(xml_op), ID(xml_op)); - update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set); - goto done; - -@@ -4175,9 +4205,9 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - } - - done: -- pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", -- rsc->id, task, role2text(rsc->role), -- role2text(rsc->next_role)); -+ pe_rsc_trace(rsc, "%s role on %s after %s is %s (next %s)", -+ rsc->id, pe__node_name(node), ID(xml_op), -+ role2text(rsc->role), role2text(rsc->next_role)); - } - - static void --- -2.31.1 - -From 5a1d2a3ba58fa73225433dab40cee0a6e0ef9bda Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 1 Feb 2023 12:08:55 -0600 -Subject: [PATCH 03/14] Low: scheduler: improve migration history validation - -Instead of a simple CRM_CHECK(), functionize parsing the source and target node -names from a migration action's resource history entry. This reduces -duplication and allows us to log more helpful errors. - -Also, CRM_CHECK() tries to dump core for debugging, and that's not helpful for -corrupted CIB entries. ---- - lib/pengine/unpack.c | 87 ++++++++++++++++++++++++++++++++++++++------ - 1 file changed, 75 insertions(+), 12 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index b7b2873..cd1b038 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2786,6 +2786,60 @@ newer_state_after_migrate(const char *rsc_id, const char *node_name, - || monitor_not_running_after(rsc_id, node_name, xml_op, same_node, - data_set); - } -+ -+/*! -+ * \internal -+ * \brief Parse migration source and target node names from history entry -+ * -+ * \param[in] entry Resource history entry for a migration action -+ * \param[in] source_node If not NULL, source must match this node -+ * \param[in] target_node If not NULL, target must match this node -+ * \param[out] source_name Where to store migration source node name -+ * \param[out] target_name Where to store migration target node name -+ * -+ * \return Standard Pacemaker return code -+ */ -+static int -+get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, -+ const pe_node_t *target_node, -+ const char **source_name, const char **target_name) -+{ -+ const char *id = ID(entry); -+ -+ if (id == NULL) { -+ crm_err("Ignoring resource history entry without ID"); -+ return pcmk_rc_unpack_error; -+ } -+ -+ *source_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_SOURCE); -+ *target_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_TARGET); -+ if ((*source_name == NULL) || (*target_name == NULL)) { -+ crm_err("Ignoring resource history entry %s without " -+ XML_LRM_ATTR_MIGRATE_SOURCE " and " XML_LRM_ATTR_MIGRATE_TARGET, -+ id); -+ return pcmk_rc_unpack_error; -+ } -+ -+ if ((source_node != NULL) -+ && !pcmk__str_eq(*source_name, source_node->details->uname, -+ pcmk__str_casei|pcmk__str_null_matches)) { -+ crm_err("Ignoring resource history entry %s because " -+ XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s", -+ id, pcmk__s(*source_name, ""), pe__node_name(source_node)); -+ return pcmk_rc_unpack_error; -+ } -+ -+ if ((target_node != NULL) -+ && !pcmk__str_eq(*target_name, target_node->details->uname, -+ pcmk__str_casei|pcmk__str_null_matches)) { -+ crm_err("Ignoring resource history entry %s because " -+ XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s", -+ id, pcmk__s(*target_name, ""), pe__node_name(target_node)); -+ return pcmk_rc_unpack_error; -+ } -+ -+ return pcmk_rc_ok; -+} - - static void - unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, -@@ -2834,13 +2888,16 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - pe_node_t *target_node = NULL; - pe_node_t *source_node = NULL; - xmlNode *migrate_from = NULL; -- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); -- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); -+ const char *source = NULL; -+ const char *target = NULL; - bool source_newer_op = false; - bool target_newer_state = false; - -- // Sanity check -- CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); -+ // Get source and target node names from XML -+ if (get_migration_node_names(xml_op, node, NULL, &source, -+ &target) != pcmk_rc_ok) { -+ return; -+ } - - /* If there's any newer non-monitor operation on the source, this migrate_to - * potentially no longer matters for the source. -@@ -2949,11 +3006,14 @@ unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - pe_working_set_t *data_set) - { - xmlNode *target_migrate_from = NULL; -- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); -- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); -+ const char *source = NULL; -+ const char *target = NULL; - -- // Sanity check -- CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); -+ // Get source and target node names from XML -+ if (get_migration_node_names(xml_op, node, NULL, &source, -+ &target) != pcmk_rc_ok) { -+ return; -+ } - - /* If a migration failed, we have to assume the resource is active. Clones - * are not allowed to migrate, so role can't be promoted. -@@ -3001,11 +3061,14 @@ unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node, - xmlNode *xml_op, pe_working_set_t *data_set) - { - xmlNode *source_migrate_to = NULL; -- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); -- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); -+ const char *source = NULL; -+ const char *target = NULL; - -- // Sanity check -- CRM_CHECK(source && target && !strcmp(target, node->details->uname), return); -+ // Get source and target node names from XML -+ if (get_migration_node_names(xml_op, NULL, node, &source, -+ &target) != pcmk_rc_ok) { -+ return; -+ } - - /* If a migration failed, we have to assume the resource is active. Clones - * are not allowed to migrate, so role can't be promoted. --- -2.31.1 - -From 5139e5369769e733b05bc28940d3dccb4f7fca95 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 31 Jan 2023 14:30:16 -0600 -Subject: [PATCH 04/14] Refactor: scheduler: functionize adding a dangling - migration - -... for code isolation and readability ---- - lib/pengine/unpack.c | 31 +++++++++++++++++++++++-------- - 1 file changed, 23 insertions(+), 8 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index cd1b038..fa7c2cc 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2841,6 +2841,28 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, - return pcmk_rc_ok; - } - -+/* -+ * \internal -+ * \brief Add a migration source to a resource's list of dangling migrations -+ * -+ * If the migrate_to and migrate_from actions in a live migration both -+ * succeeded, but there is no stop on the source, the migration is considered -+ * "dangling." Add the source to the resource's dangling migration list, which -+ * will be used to schedule a stop on the source without affecting the target. -+ * -+ * \param[in,out] rsc Resource involved in migration -+ * \param[in] node Migration source -+ */ -+static void -+add_dangling_migration(pe_resource_t *rsc, const pe_node_t *node) -+{ -+ pe_rsc_trace(rsc, "Dangling migration of %s requires stop on %s", -+ rsc->id, pe__node_name(node)); -+ rsc->role = RSC_ROLE_STOPPED; -+ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, -+ (gpointer) node); -+} -+ - static void - unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - pe_working_set_t *data_set) -@@ -2941,14 +2963,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - - if (migrate_from && from_rc == PCMK_OCF_OK - && (from_status == PCMK_EXEC_DONE)) { -- /* The migrate_to and migrate_from both succeeded, so mark the migration -- * as "dangling". This will be used to schedule a stop action on the -- * source without affecting the target. -- */ -- pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), -- source); -- rsc->role = RSC_ROLE_STOPPED; -- rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); -+ add_dangling_migration(rsc, node); - - } else if (migrate_from && (from_status != PCMK_EXEC_PENDING)) { // Failed - /* If the resource has newer state on the target, this migrate_to no --- -2.31.1 - -From da71c04463d31338dd5da54d1d48b53e413716dc Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 31 Jan 2023 16:57:55 -0600 -Subject: [PATCH 05/14] Refactor: scheduler: check for dangling migration - before setting role - -Previously, unpack_migrate_to_success() set rsc->role = RSC_ROLE_STARTED -then checked for dangling migration, which would reset it to RSC_ROLE_STOPPED. - -For clarity, do the dangling migration check first. ---- - lib/pengine/unpack.c | 47 ++++++++++++++++++++++++-------------------- - 1 file changed, 26 insertions(+), 21 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index fa7c2cc..b858b59 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2905,8 +2905,8 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - * migration is considered to be "dangling". Schedule a stop on the source - * in this case. - */ -- int from_rc = 0; -- int from_status = 0; -+ int from_rc = PCMK_OCF_OK; -+ int from_status = PCMK_EXEC_PENDING; - pe_node_t *target_node = NULL; - pe_node_t *source_node = NULL; - xmlNode *migrate_from = NULL; -@@ -2930,12 +2930,17 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - // Check whether there was a migrate_from action on the target - migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, - source, -1, data_set); -- -- /* Even if there's a newer non-monitor operation on the source, we still -- * need to check how this migrate_to might matter for the target. -- */ -- if (source_newer_op && migrate_from) { -- return; -+ if (migrate_from != NULL) { -+ if (source_newer_op) { -+ /* There's a newer non-monitor operation on the source and a -+ * migrate_from on the target, so this migrate_to is irrelevant to -+ * the resource's state. -+ */ -+ return; -+ } -+ crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); -+ crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, -+ &from_status); - } - - /* If the resource has newer state on the target after the migration -@@ -2948,24 +2953,24 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - return; - } - -- // Clones are not allowed to migrate, so role can't be promoted -+ /* Check for dangling migration (migrate_from succeeded but stop not done). -+ * We know there's no stop because we already returned if the target has a -+ * migrate_from and the source has any newer non-monitor operation. -+ */ -+ if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) { -+ add_dangling_migration(rsc, node); -+ return; -+ } -+ -+ /* Without newer state, this migrate_to implies the resource is active. -+ * (Clones are not allowed to migrate, so role can't be promoted.) -+ */ - rsc->role = RSC_ROLE_STARTED; - - target_node = pe_find_node(data_set->nodes, target); - source_node = pe_find_node(data_set->nodes, source); - -- if (migrate_from) { -- crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); -- crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); -- pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", -- ID(migrate_from), target, from_status, from_rc); -- } -- -- if (migrate_from && from_rc == PCMK_OCF_OK -- && (from_status == PCMK_EXEC_DONE)) { -- add_dangling_migration(rsc, node); -- -- } else if (migrate_from && (from_status != PCMK_EXEC_PENDING)) { // Failed -+ if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target - /* If the resource has newer state on the target, this migrate_to no - * longer matters for the target. - */ --- -2.31.1 - -From d98a2687d68747b0598554939dea05c420456a12 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 31 Jan 2023 17:05:50 -0600 -Subject: [PATCH 06/14] Refactor: scheduler: avoid duplication of - active-on-target check - ---- - lib/pengine/unpack.c | 24 ++++++------------------ - 1 file changed, 6 insertions(+), 18 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index b858b59..8cfc0ef 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2914,6 +2914,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - const char *target = NULL; - bool source_newer_op = false; - bool target_newer_state = false; -+ bool active_on_target = false; - - // Get source and target node names from XML - if (get_migration_node_names(xml_op, node, NULL, &source, -@@ -2969,23 +2970,14 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - - target_node = pe_find_node(data_set->nodes, target); - source_node = pe_find_node(data_set->nodes, source); -+ active_on_target = !target_newer_state && (target_node != NULL) -+ && target_node->details->online; - - if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target -- /* If the resource has newer state on the target, this migrate_to no -- * longer matters for the target. -- */ -- if (!target_newer_state -- && target_node && target_node->details->online) { -- pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, -- target_node->details->online); -+ if (active_on_target) { - native_add_running(rsc, target_node, data_set, TRUE); -- - } else { -- /* With the earlier bail logic, migrate_from != NULL here implies -- * source_newer_op is false, meaning this migrate_to still matters -- * for the source. -- * Consider it failed here - forces a restart, prevents migration -- */ -+ // Mark resource as failed, require recovery, and prevent migration - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); - pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); - } -@@ -2994,11 +2986,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - /* If the resource has newer state on the target, this migrate_to no - * longer matters for the target. - */ -- if (!target_newer_state -- && target_node && target_node->details->online) { -- pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, -- target_node->details->online); -- -+ if (active_on_target) { - native_add_running(rsc, target_node, data_set, FALSE); - if (source_node && source_node->details->online) { - /* This is a partial migration: the migrate_to completed --- -2.31.1 - -From ae145309e3fdb26608e99f6d1fe1a7859d98efd0 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 31 Jan 2023 17:07:58 -0600 -Subject: [PATCH 07/14] Refactor: scheduler: improve unpacking of successful - migrate_to - -Improve log messages, comments, and formatting, and avoid doing things until -needed, to improve efficiency of early returns. ---- - lib/pengine/unpack.c | 109 +++++++++++++++++++------------------------ - 1 file changed, 48 insertions(+), 61 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 8cfc0ef..224b7b5 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2867,48 +2867,40 @@ static void - unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - pe_working_set_t *data_set) - { -- /* A successful migration sequence is: -- * migrate_to on source node -- * migrate_from on target node -- * stop on source node -+ /* A complete migration sequence is: -+ * 1. migrate_to on source node (which succeeded if we get to this function) -+ * 2. migrate_from on target node -+ * 3. stop on source node - * -- * But there could be scenarios like (It's easier to produce with cluster -- * property batch-limit=1): -- * -- * - rscA is live-migrating from node1 to node2. -- * -- * - Before migrate_to on node1 returns, put node2 into standby. -- * -- * - Transition aborts upon return of successful migrate_to on node1. New -- * transition is going to stop the rscA on both nodes and start it on -- * node1. -+ * If no migrate_from has happened, the migration is considered to be -+ * "partial". If the migrate_from succeeded but no stop has happened, the -+ * migration is considered to be "dangling". - * -- * - While it is stopping on node1, run something that is going to make -- * the transition abort again like: -- * crm_resource --resource rscA --ban --node node2 -+ * If a successful migrate_to and stop have happened on the source node, we -+ * still need to check for a partial migration, due to scenarios (easier to -+ * produce with batch-limit=1) like: - * -- * - Transition aborts upon return of stop on node1. -+ * - A resource is migrating from node1 to node2, and a migrate_to is -+ * initiated for it on node1. - * -- * Now although there's a stop on node1, it's still a partial migration and -- * rscA is still potentially active on node2. -+ * - node2 goes into standby mode while the migrate_to is pending, which -+ * aborts the transition. - * -- * So even if a migrate_to is followed by a stop, we still need to check -- * whether there's a corresponding migrate_from or any newer operation on -- * the target. -+ * - Upon completion of the migrate_to, a new transition schedules a stop -+ * on both nodes and a start on node1. - * -- * If no migrate_from has happened, the migration is considered to be -- * "partial". If the migrate_from failed, make sure the resource gets -- * stopped on both source and target (if up). -+ * - If the new transition is aborted for any reason while the resource is -+ * stopping on node1, the transition after that stop completes will see -+ * the migrate_from and stop on the source, but it's still a partial -+ * migration, and the resource must be stopped on node2 because it is -+ * potentially active there due to the migrate_to. - * -- * If the migrate_to and migrate_from both succeeded (which also implies the -- * resource is no longer running on the source), but there is no stop, the -- * migration is considered to be "dangling". Schedule a stop on the source -- * in this case. -+ * We also need to take into account that either node's history may be -+ * cleared at any point in the migration process. - */ - int from_rc = PCMK_OCF_OK; - int from_status = PCMK_EXEC_PENDING; - pe_node_t *target_node = NULL; -- pe_node_t *source_node = NULL; - xmlNode *migrate_from = NULL; - const char *source = NULL; - const char *target = NULL; -@@ -2922,13 +2914,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - return; - } - -- /* If there's any newer non-monitor operation on the source, this migrate_to -- * potentially no longer matters for the source. -- */ -+ // Check for newer state on the source - source_newer_op = non_monitor_after(rsc->id, source, xml_op, true, - data_set); - -- // Check whether there was a migrate_from action on the target -+ // Check for a migrate_from action from this source on the target - migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, - source, -1, data_set); - if (migrate_from != NULL) { -@@ -2944,12 +2934,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - &from_status); - } - -- /* If the resource has newer state on the target after the migration -- * events, this migrate_to no longer matters for the target. -+ /* If the resource has newer state on both the source and target after the -+ * migration events, this migrate_to is irrelevant to the resource's state. - */ - target_newer_state = newer_state_after_migrate(rsc->id, target, xml_op, - migrate_from, data_set); -- - if (source_newer_op && target_newer_state) { - return; - } -@@ -2969,7 +2958,6 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - rsc->role = RSC_ROLE_STARTED; - - target_node = pe_find_node(data_set->nodes, target); -- source_node = pe_find_node(data_set->nodes, source); - active_on_target = !target_newer_state && (target_node != NULL) - && target_node->details->online; - -@@ -2981,31 +2969,30 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); - pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); - } -+ return; -+ } - -- } else { // Pending, or complete but erased -- /* If the resource has newer state on the target, this migrate_to no -- * longer matters for the target. -- */ -- if (active_on_target) { -- native_add_running(rsc, target_node, data_set, FALSE); -- if (source_node && source_node->details->online) { -- /* This is a partial migration: the migrate_to completed -- * successfully on the source, but the migrate_from has not -- * completed. Remember the source and target; if the newly -- * chosen target remains the same when we schedule actions -- * later, we may continue with the migration. -- */ -- rsc->partial_migration_target = target_node; -- rsc->partial_migration_source = source_node; -- } -- } else if (!source_newer_op) { -- /* This migrate_to matters for the source only if it's the last -- * non-monitor operation here. -- * Consider it failed here - forces a restart, prevents migration -+ // The migrate_from is pending, complete but erased, or to be scheduled -+ -+ if (active_on_target) { -+ pe_node_t *source_node = pe_find_node(data_set->nodes, source); -+ -+ native_add_running(rsc, target_node, data_set, FALSE); -+ if ((source_node != NULL) && source_node->details->online) { -+ /* This is a partial migration: the migrate_to completed -+ * successfully on the source, but the migrate_from has not -+ * completed. Remember the source and target; if the newly -+ * chosen target remains the same when we schedule actions -+ * later, we may continue with the migration. - */ -- pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); -- pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); -+ rsc->partial_migration_target = target_node; -+ rsc->partial_migration_source = source_node; - } -+ -+ } else if (!source_newer_op) { -+ // Mark resource as failed, require recovery, and prevent migration -+ pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); -+ pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); - } - } - --- -2.31.1 - -From 7d63ed8d52f64d2523367cff36bf77bd85296bd9 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 31 Jan 2023 17:14:57 -0600 -Subject: [PATCH 08/14] Refactor: scheduler: drop redundant argument from - unpack_migrate_to_success() - ---- - lib/pengine/unpack.c | 19 +++++++++---------- - 1 file changed, 9 insertions(+), 10 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 224b7b5..6222115 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2864,8 +2864,7 @@ add_dangling_migration(pe_resource_t *rsc, const pe_node_t *node) - } - - static void --unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, -- pe_working_set_t *data_set) -+unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) - { - /* A complete migration sequence is: - * 1. migrate_to on source node (which succeeded if we get to this function) -@@ -2916,11 +2915,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - - // Check for newer state on the source - source_newer_op = non_monitor_after(rsc->id, source, xml_op, true, -- data_set); -+ rsc->cluster); - - // Check for a migrate_from action from this source on the target - migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, -- source, -1, data_set); -+ source, -1, rsc->cluster); - if (migrate_from != NULL) { - if (source_newer_op) { - /* There's a newer non-monitor operation on the source and a -@@ -2938,7 +2937,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - * migration events, this migrate_to is irrelevant to the resource's state. - */ - target_newer_state = newer_state_after_migrate(rsc->id, target, xml_op, -- migrate_from, data_set); -+ migrate_from, rsc->cluster); - if (source_newer_op && target_newer_state) { - return; - } -@@ -2957,13 +2956,13 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - */ - rsc->role = RSC_ROLE_STARTED; - -- target_node = pe_find_node(data_set->nodes, target); -+ target_node = pe_find_node(rsc->cluster->nodes, target); - active_on_target = !target_newer_state && (target_node != NULL) - && target_node->details->online; - - if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target - if (active_on_target) { -- native_add_running(rsc, target_node, data_set, TRUE); -+ native_add_running(rsc, target_node, rsc->cluster, TRUE); - } else { - // Mark resource as failed, require recovery, and prevent migration - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); -@@ -2975,9 +2974,9 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - // The migrate_from is pending, complete but erased, or to be scheduled - - if (active_on_target) { -- pe_node_t *source_node = pe_find_node(data_set->nodes, source); -+ pe_node_t *source_node = pe_find_node(rsc->cluster->nodes, source); - -- native_add_running(rsc, target_node, data_set, FALSE); -+ native_add_running(rsc, target_node, rsc->cluster, FALSE); - if ((source_node != NULL) && source_node->details->online) { - /* This is a partial migration: the migrate_to completed - * successfully on the source, but the migrate_from has not -@@ -3946,7 +3945,7 @@ update_resource_state(pe_resource_t * rsc, pe_node_t * node, xmlNode * xml_op, c - clear_past_failure = TRUE; - - } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { -- unpack_migrate_to_success(rsc, node, xml_op, data_set); -+ unpack_migrate_to_success(rsc, node, xml_op); - - } else if (rsc->role < RSC_ROLE_STARTED) { - pe_rsc_trace(rsc, "%s active on %s", rsc->id, pe__node_name(node)); --- -2.31.1 - -From 3be487f87bf5e26277379148922525fd98d29681 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 2 Feb 2023 09:13:30 -0600 -Subject: [PATCH 09/14] Doc: scheduler: clarify comments about unpacking - migration history - -per review ---- - lib/pengine/unpack.c | 20 ++++++++++---------- - 1 file changed, 10 insertions(+), 10 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 6222115..ec2cf26 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2791,9 +2791,9 @@ newer_state_after_migrate(const char *rsc_id, const char *node_name, - * \internal - * \brief Parse migration source and target node names from history entry - * -- * \param[in] entry Resource history entry for a migration action -- * \param[in] source_node If not NULL, source must match this node -- * \param[in] target_node If not NULL, target must match this node -+ * \param[in] entry Resource history entry for a migration action -+ * \param[in] source_node If not NULL, source must match this node -+ * \param[in] target_node If not NULL, target must match this node - * \param[out] source_name Where to store migration source node name - * \param[out] target_name Where to store migration target node name - * -@@ -2825,7 +2825,7 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, - pcmk__str_casei|pcmk__str_null_matches)) { - crm_err("Ignoring resource history entry %s because " - XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s", -- id, pcmk__s(*source_name, ""), pe__node_name(source_node)); -+ id, *source_name, pe__node_name(source_node)); - return pcmk_rc_unpack_error; - } - -@@ -2834,7 +2834,7 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, - pcmk__str_casei|pcmk__str_null_matches)) { - crm_err("Ignoring resource history entry %s because " - XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s", -- id, pcmk__s(*target_name, ""), pe__node_name(target_node)); -+ id, *target_name, pe__node_name(target_node)); - return pcmk_rc_unpack_error; - } - -@@ -2890,7 +2890,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) - * - * - If the new transition is aborted for any reason while the resource is - * stopping on node1, the transition after that stop completes will see -- * the migrate_from and stop on the source, but it's still a partial -+ * the migrate_to and stop on the source, but it's still a partial - * migration, and the resource must be stopped on node2 because it is - * potentially active there due to the migrate_to. - * -@@ -3425,9 +3425,9 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, - * \brief Update an integer value and why - * - * \param[in,out] i Pointer to integer to update -- * \param[in,out] why Where to store reason for update -+ * \param[out] why Where to store reason for update - * \param[in] value New value -- * \param[in,out] reason Description of why value was changed -+ * \param[in] reason Description of why value was changed - */ - static inline void - remap_because(int *i, const char **why, int value, const char *reason) -@@ -3456,7 +3456,7 @@ remap_because(int *i, const char **why, int value, const char *reason) - * \param[in] data_set Current cluster working set - * \param[in,out] on_fail What should be done about the result - * \param[in] target_rc Expected return code of operation -- * \param[in,out] rc Actual return code of operation -+ * \param[in,out] rc Actual return code of operation (treated as OCF) - * \param[in,out] status Operation execution status - * - * \note If the result is remapped and the node is not shutting down or failed, -@@ -3548,7 +3548,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - switch (*rc) { - case PCMK_OCF_OK: - if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { -- remap_because(status, &why,PCMK_EXEC_DONE, "probe"); -+ remap_because(status, &why, PCMK_EXEC_DONE, "probe"); - pe_rsc_info(rsc, "Probe found %s active on %s at %s", - rsc->id, pe__node_name(node), - last_change_str(xml_op)); --- -2.31.1 - -From 3ef6c84a7b0dd434731e72d91f2724bdb52e292e Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 2 Feb 2023 09:42:01 -0600 -Subject: [PATCH 10/14] Refactor: scheduler: improve xpath efficiency when - unpacking - -Using "//" means that every child must be searched recursively. If we know the -exact path, we should explicitly specify it. ---- - lib/pengine/unpack.c | 20 ++++++++++++-------- - 1 file changed, 12 insertions(+), 8 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index ec2cf26..8aead58 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2571,6 +2571,13 @@ set_node_score(gpointer key, gpointer value, gpointer user_data) - node->weight = *score; - } - -+#define XPATH_NODE_STATE "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ -+ "/" XML_CIB_TAG_STATE -+#define SUB_XPATH_LRM_RESOURCE "/" XML_CIB_TAG_LRM \ -+ "/" XML_LRM_TAG_RESOURCES \ -+ "/" XML_LRM_TAG_RESOURCE -+#define SUB_XPATH_LRM_RSC_OP "/" XML_LRM_TAG_RSC_OP -+ - static xmlNode * - find_lrm_op(const char *resource, const char *op, const char *node, const char *source, - int target_rc, pe_working_set_t *data_set) -@@ -2583,10 +2590,9 @@ find_lrm_op(const char *resource, const char *op, const char *node, const char * - - xpath = g_string_sized_new(256); - pcmk__g_strcat(xpath, -- "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='", node, "']" -- "//" XML_LRM_TAG_RESOURCE -- "[@" XML_ATTR_ID "='", resource, "']" -- "/" XML_LRM_TAG_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'", -+ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node, "']" -+ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", resource, "']" -+ SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'", - NULL); - - /* Need to check against transition_magic too? */ -@@ -2631,10 +2637,8 @@ find_lrm_resource(const char *rsc_id, const char *node_name, - - xpath = g_string_sized_new(256); - pcmk__g_strcat(xpath, -- "//" XML_CIB_TAG_STATE -- "[@" XML_ATTR_UNAME "='", node_name, "']" -- "//" XML_LRM_TAG_RESOURCE -- "[@" XML_ATTR_ID "='", rsc_id, "']", -+ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" -+ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc_id, "']", - NULL); - - xml = get_xpath_object((const char *) xpath->str, data_set->input, --- -2.31.1 - -From 1869f99bc8eeedb976f96f0f1cc3d4dd86735504 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 2 Feb 2023 10:25:53 -0600 -Subject: [PATCH 11/14] Low: scheduler: unknown_on_node() should ignore pending - actions - -Previously, unknown_on_node() looked for any lrm_rsc_op at all to decide -whether a resource is known on a node. However if the only action is pending, -the resource is not yet known. - -Also drop a redundant argument and add a doxygen block. (The rsc argument is -not const due to a getDocPtr() call in the chain, as well as libxml2 calls that -are likely const in practice but aren't marked as such.) ---- - lib/pengine/unpack.c | 37 +++++++++++++++++++++++++------------ - 1 file changed, 25 insertions(+), 12 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 8aead58..14dc202 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2648,19 +2648,32 @@ find_lrm_resource(const char *rsc_id, const char *node_name, - return xml; - } - -+/*! -+ * \internal -+ * \brief Check whether a resource has no completed action history on a node -+ * -+ * \param[in,out] rsc Resource to check -+ * \param[in] node_name Node to check -+ * -+ * \return true if \p rsc_id is unknown on \p node_name, otherwise false -+ */ - static bool --unknown_on_node(const char *rsc_id, const char *node_name, -- pe_working_set_t *data_set) -+unknown_on_node(pe_resource_t *rsc, const char *node_name) - { -- xmlNode *lrm_resource = NULL; -- -- lrm_resource = find_lrm_resource(rsc_id, node_name, data_set); -+ bool result = false; -+ xmlXPathObjectPtr search; -+ GString *xpath = g_string_sized_new(256); - -- /* If the resource has no lrm_rsc_op history on the node, that means its -- * state is unknown there. -- */ -- return (lrm_resource == NULL -- || first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP) == NULL); -+ pcmk__g_strcat(xpath, -+ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" -+ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc->id, "']" -+ SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_RC "!='193']", -+ NULL); -+ search = xpath_search(rsc->cluster->input, (const char *) xpath->str); -+ result = (numXpathResults(search) == 0); -+ freeXpathObject(search); -+ g_string_free(xpath, TRUE); -+ return result; - } - - /*! -@@ -3027,7 +3040,7 @@ unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - * Don't just consider it running there. We will get back here anyway in - * case the probe detects it's running there. - */ -- !unknown_on_node(rsc->id, target, data_set) -+ !unknown_on_node(rsc, target) - /* If the resource has newer state on the target after the migration - * events, this migrate_to no longer matters for the target. - */ -@@ -3082,7 +3095,7 @@ unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node, - * Don't just consider it running there. We will get back here anyway in - * case the probe detects it's running there. - */ -- !unknown_on_node(rsc->id, source, data_set) -+ !unknown_on_node(rsc, source) - /* If the resource has newer state on the source after the migration - * events, this migrate_from no longer matters for the source. - */ --- -2.31.1 - -From 22fbab8e0d449d2accb231dfcec94294ded27f4e Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 31 Jan 2023 12:11:19 -0600 -Subject: [PATCH 12/14] Test: scheduler: add regression test for migration - intermediary - -As of this commit, the cluster wrongly restarts the migrated resource ---- - cts/cts-scheduler.in | 3 + - .../dot/migration-intermediary-cleaned.dot | 46 ++ - .../exp/migration-intermediary-cleaned.exp | 316 +++++++++++ - .../migration-intermediary-cleaned.scores | 201 +++++++ - .../migration-intermediary-cleaned.summary | 94 ++++ - .../xml/migration-intermediary-cleaned.xml | 513 ++++++++++++++++++ - 6 files changed, 1173 insertions(+) - create mode 100644 cts/scheduler/dot/migration-intermediary-cleaned.dot - create mode 100644 cts/scheduler/exp/migration-intermediary-cleaned.exp - create mode 100644 cts/scheduler/scores/migration-intermediary-cleaned.scores - create mode 100644 cts/scheduler/summary/migration-intermediary-cleaned.summary - create mode 100644 cts/scheduler/xml/migration-intermediary-cleaned.xml - -diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in -index feb5dc8..9899c36 100644 ---- a/cts/cts-scheduler.in -+++ b/cts/cts-scheduler.in -@@ -387,6 +387,9 @@ TESTS = [ - [ "probe-target-of-failed-migrate_to-1", "Failed migrate_to, target rejoins" ], - [ "probe-target-of-failed-migrate_to-2", "Failed migrate_to, target rejoined and probed" ], - [ "partial-live-migration-multiple-active", "Prevent running on multiple nodes due to partial live migration" ], -+ [ "migration-intermediary-cleaned", -+ "Probe live-migration intermediary with no history" -+ ], - [ "bug-lf-2422", "Dependency on partially active group - stop ocfs:*" ], - ], - [ -diff --git a/cts/scheduler/dot/migration-intermediary-cleaned.dot b/cts/scheduler/dot/migration-intermediary-cleaned.dot -new file mode 100644 -index 0000000..09568d0 ---- /dev/null -+++ b/cts/scheduler/dot/migration-intermediary-cleaned.dot -@@ -0,0 +1,46 @@ -+ digraph "g" { -+"Connectivity_running_0" [ style=bold color="green" fontcolor="orange"] -+"Connectivity_start_0" -> "Connectivity_running_0" [ style = bold] -+"Connectivity_start_0" -> "ping-1_start_0 rhel8-2" [ style = bold] -+"Connectivity_start_0" [ style=bold color="green" fontcolor="orange"] -+"FencingFail_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"FencingPass_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"Fencing_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"lsb-dummy_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"migrator_monitor_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] -+"migrator_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"migrator_monitor_10000 rhel8-5" [ style=bold color="green" fontcolor="black"] -+"migrator_start_0 rhel8-5" -> "migrator_monitor_10000 rhel8-5" [ style = bold] -+"migrator_start_0 rhel8-5" [ style=bold color="green" fontcolor="black"] -+"migrator_stop_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] -+"migrator_stop_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"migrator_stop_0 rhel8-5" -> "migrator_start_0 rhel8-5" [ style = bold] -+"migrator_stop_0 rhel8-5" [ style=bold color="green" fontcolor="black"] -+"petulant_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"ping-1_monitor_0 rhel8-2" -> "Connectivity_start_0" [ style = bold] -+"ping-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"ping-1_monitor_60000 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"ping-1_start_0 rhel8-2" -> "Connectivity_running_0" [ style = bold] -+"ping-1_start_0 rhel8-2" -> "ping-1_monitor_60000 rhel8-2" [ style = bold] -+"ping-1_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"r192.168.122.207_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"r192.168.122.208_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-1_monitor_0 rhel8-2" -> "rsc_rhel8-1_start_0 rhel8-2" [ style = bold] -+"rsc_rhel8-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-1_monitor_5000 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-1_start_0 rhel8-2" -> "rsc_rhel8-1_monitor_5000 rhel8-2" [ style = bold] -+"rsc_rhel8-1_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-1_stop_0 rhel8-3" -> "rsc_rhel8-1_start_0 rhel8-2" [ style = bold] -+"rsc_rhel8-1_stop_0 rhel8-3" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-2_monitor_0 rhel8-2" -> "rsc_rhel8-2_start_0 rhel8-2" [ style = bold] -+"rsc_rhel8-2_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-2_monitor_5000 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-2_start_0 rhel8-2" -> "rsc_rhel8-2_monitor_5000 rhel8-2" [ style = bold] -+"rsc_rhel8-2_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-2_stop_0 rhel8-4" -> "rsc_rhel8-2_start_0 rhel8-2" [ style = bold] -+"rsc_rhel8-2_stop_0 rhel8-4" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-3_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-4_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-5_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"stateful-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/cts/scheduler/exp/migration-intermediary-cleaned.exp b/cts/scheduler/exp/migration-intermediary-cleaned.exp -new file mode 100644 -index 0000000..28fa776 ---- /dev/null -+++ b/cts/scheduler/exp/migration-intermediary-cleaned.exp -@@ -0,0 +1,316 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/cts/scheduler/scores/migration-intermediary-cleaned.scores b/cts/scheduler/scores/migration-intermediary-cleaned.scores -new file mode 100644 -index 0000000..b3b8dff ---- /dev/null -+++ b/cts/scheduler/scores/migration-intermediary-cleaned.scores -@@ -0,0 +1,201 @@ -+ -+pcmk__clone_allocate: Connectivity allocation score on rhel8-1: 0 -+pcmk__clone_allocate: Connectivity allocation score on rhel8-2: 0 -+pcmk__clone_allocate: Connectivity allocation score on rhel8-3: 0 -+pcmk__clone_allocate: Connectivity allocation score on rhel8-4: 0 -+pcmk__clone_allocate: Connectivity allocation score on rhel8-5: 0 -+pcmk__clone_allocate: ping-1:0 allocation score on rhel8-1: 0 -+pcmk__clone_allocate: ping-1:0 allocation score on rhel8-2: 0 -+pcmk__clone_allocate: ping-1:0 allocation score on rhel8-3: 1 -+pcmk__clone_allocate: ping-1:0 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: ping-1:0 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: ping-1:1 allocation score on rhel8-1: 0 -+pcmk__clone_allocate: ping-1:1 allocation score on rhel8-2: 0 -+pcmk__clone_allocate: ping-1:1 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: ping-1:1 allocation score on rhel8-4: 1 -+pcmk__clone_allocate: ping-1:1 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: ping-1:2 allocation score on rhel8-1: 0 -+pcmk__clone_allocate: ping-1:2 allocation score on rhel8-2: 0 -+pcmk__clone_allocate: ping-1:2 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: ping-1:2 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: ping-1:2 allocation score on rhel8-5: 1 -+pcmk__clone_allocate: ping-1:3 allocation score on rhel8-1: 0 -+pcmk__clone_allocate: ping-1:3 allocation score on rhel8-2: 0 -+pcmk__clone_allocate: ping-1:3 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: ping-1:3 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: ping-1:3 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: ping-1:4 allocation score on rhel8-1: 0 -+pcmk__clone_allocate: ping-1:4 allocation score on rhel8-2: 0 -+pcmk__clone_allocate: ping-1:4 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: ping-1:4 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: ping-1:4 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: promotable-1 allocation score on rhel8-1: -INFINITY -+pcmk__clone_allocate: promotable-1 allocation score on rhel8-2: -INFINITY -+pcmk__clone_allocate: promotable-1 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: promotable-1 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: promotable-1 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-1: -INFINITY -+pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-2: -INFINITY -+pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-3: 11 -+pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-1: -INFINITY -+pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-2: -INFINITY -+pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-4: 6 -+pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-1: -INFINITY -+pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-2: -INFINITY -+pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-5: 6 -+pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-1: -INFINITY -+pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-2: -INFINITY -+pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-1: -INFINITY -+pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-2: -INFINITY -+pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-3: 10 -+pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-4: 5 -+pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-5: 5 -+pcmk__group_assign: group-1 allocation score on rhel8-1: 0 -+pcmk__group_assign: group-1 allocation score on rhel8-2: 0 -+pcmk__group_assign: group-1 allocation score on rhel8-3: 0 -+pcmk__group_assign: group-1 allocation score on rhel8-4: 0 -+pcmk__group_assign: group-1 allocation score on rhel8-5: 0 -+pcmk__group_assign: petulant allocation score on rhel8-1: 0 -+pcmk__group_assign: petulant allocation score on rhel8-2: 0 -+pcmk__group_assign: petulant allocation score on rhel8-3: 0 -+pcmk__group_assign: petulant allocation score on rhel8-4: 0 -+pcmk__group_assign: petulant allocation score on rhel8-5: 0 -+pcmk__group_assign: r192.168.122.207 allocation score on rhel8-1: 0 -+pcmk__group_assign: r192.168.122.207 allocation score on rhel8-2: 0 -+pcmk__group_assign: r192.168.122.207 allocation score on rhel8-3: 0 -+pcmk__group_assign: r192.168.122.207 allocation score on rhel8-4: 0 -+pcmk__group_assign: r192.168.122.207 allocation score on rhel8-5: 0 -+pcmk__group_assign: r192.168.122.208 allocation score on rhel8-1: 0 -+pcmk__group_assign: r192.168.122.208 allocation score on rhel8-2: 0 -+pcmk__group_assign: r192.168.122.208 allocation score on rhel8-3: 0 -+pcmk__group_assign: r192.168.122.208 allocation score on rhel8-4: 0 -+pcmk__group_assign: r192.168.122.208 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: Fencing allocation score on rhel8-1: 0 -+pcmk__primitive_assign: Fencing allocation score on rhel8-2: 0 -+pcmk__primitive_assign: Fencing allocation score on rhel8-3: 0 -+pcmk__primitive_assign: Fencing allocation score on rhel8-4: 0 -+pcmk__primitive_assign: Fencing allocation score on rhel8-5: 0 -+pcmk__primitive_assign: FencingFail allocation score on rhel8-1: 0 -+pcmk__primitive_assign: FencingFail allocation score on rhel8-2: 0 -+pcmk__primitive_assign: FencingFail allocation score on rhel8-3: 0 -+pcmk__primitive_assign: FencingFail allocation score on rhel8-4: 0 -+pcmk__primitive_assign: FencingFail allocation score on rhel8-5: 0 -+pcmk__primitive_assign: FencingPass allocation score on rhel8-1: 0 -+pcmk__primitive_assign: FencingPass allocation score on rhel8-2: 0 -+pcmk__primitive_assign: FencingPass allocation score on rhel8-3: 0 -+pcmk__primitive_assign: FencingPass allocation score on rhel8-4: 0 -+pcmk__primitive_assign: FencingPass allocation score on rhel8-5: 0 -+pcmk__primitive_assign: lsb-dummy allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: lsb-dummy allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: lsb-dummy allocation score on rhel8-3: 0 -+pcmk__primitive_assign: lsb-dummy allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: lsb-dummy allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: migrator allocation score on rhel8-1: 0 -+pcmk__primitive_assign: migrator allocation score on rhel8-2: 0 -+pcmk__primitive_assign: migrator allocation score on rhel8-3: 0 -+pcmk__primitive_assign: migrator allocation score on rhel8-4: 0 -+pcmk__primitive_assign: migrator allocation score on rhel8-5: 0 -+pcmk__primitive_assign: petulant allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: petulant allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: petulant allocation score on rhel8-3: 0 -+pcmk__primitive_assign: petulant allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: petulant allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: ping-1:0 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: ping-1:0 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: ping-1:0 allocation score on rhel8-3: 1 -+pcmk__primitive_assign: ping-1:0 allocation score on rhel8-4: 0 -+pcmk__primitive_assign: ping-1:0 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: ping-1:1 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: ping-1:1 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: ping-1:1 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: ping-1:1 allocation score on rhel8-4: 1 -+pcmk__primitive_assign: ping-1:1 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: ping-1:2 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: ping-1:2 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: ping-1:2 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: ping-1:2 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: ping-1:2 allocation score on rhel8-5: 1 -+pcmk__primitive_assign: ping-1:3 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: ping-1:3 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: ping-1:3 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: ping-1:3 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: ping-1:3 allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: ping-1:4 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: ping-1:4 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: ping-1:4 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: ping-1:4 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: ping-1:4 allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-3: 11 -+pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-3: 0 -+pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-1: 100 -+pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-3: 0 -+pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-4: 0 -+pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-1: 0 -+pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-2: 100 -+pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-3: 0 -+pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-4: 0 -+pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-1: 0 -+pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-3: 100 -+pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-4: 0 -+pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-1: 0 -+pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-3: 0 -+pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-4: 100 -+pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-1: 0 -+pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-3: 0 -+pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-4: 0 -+pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-5: 100 -+pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-3: 11 -+pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-4: 0 -+pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-4: 6 -+pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-5: 6 -+pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-5: -INFINITY -+stateful-1:0 promotion score on rhel8-3: 10 -+stateful-1:1 promotion score on rhel8-4: 5 -+stateful-1:2 promotion score on rhel8-5: 5 -+stateful-1:3 promotion score on none: 0 -+stateful-1:4 promotion score on none: 0 -diff --git a/cts/scheduler/summary/migration-intermediary-cleaned.summary b/cts/scheduler/summary/migration-intermediary-cleaned.summary -new file mode 100644 -index 0000000..5de1355 ---- /dev/null -+++ b/cts/scheduler/summary/migration-intermediary-cleaned.summary -@@ -0,0 +1,94 @@ -+Using the original execution date of: 2023-01-19 21:05:59Z -+Current cluster status: -+ * Node List: -+ * Online: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] -+ * OFFLINE: [ rhel8-1 ] -+ -+ * Full List of Resources: -+ * Fencing (stonith:fence_xvm): Started rhel8-3 -+ * FencingPass (stonith:fence_dummy): Started rhel8-4 -+ * FencingFail (stonith:fence_dummy): Started rhel8-5 -+ * rsc_rhel8-1 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * rsc_rhel8-2 (ocf:heartbeat:IPaddr2): Started rhel8-4 -+ * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 -+ * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 -+ * migrator (ocf:pacemaker:Dummy): Started [ rhel8-5 rhel8-2 ] -+ * Clone Set: Connectivity [ping-1]: -+ * Started: [ rhel8-3 rhel8-4 rhel8-5 ] -+ * Stopped: [ rhel8-1 rhel8-2 ] -+ * Clone Set: promotable-1 [stateful-1] (promotable): -+ * Promoted: [ rhel8-3 ] -+ * Unpromoted: [ rhel8-4 rhel8-5 ] -+ * Stopped: [ rhel8-1 rhel8-2 ] -+ * Resource Group: group-1: -+ * r192.168.122.207 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * petulant (service:pacemaker-cts-dummyd@10): Started rhel8-3 -+ * r192.168.122.208 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * lsb-dummy (lsb:LSBDummy): Started rhel8-3 -+ -+Transition Summary: -+ * Move rsc_rhel8-1 ( rhel8-3 -> rhel8-2 ) -+ * Move rsc_rhel8-2 ( rhel8-4 -> rhel8-2 ) -+ * Restart migrator ( rhel8-5 ) -+ * Start ping-1:3 ( rhel8-2 ) -+ -+Executing Cluster Transition: -+ * Resource action: Fencing monitor on rhel8-2 -+ * Resource action: FencingPass monitor on rhel8-2 -+ * Resource action: FencingFail monitor on rhel8-2 -+ * Resource action: rsc_rhel8-1 stop on rhel8-3 -+ * Resource action: rsc_rhel8-1 monitor on rhel8-2 -+ * Resource action: rsc_rhel8-2 stop on rhel8-4 -+ * Resource action: rsc_rhel8-2 monitor on rhel8-2 -+ * Resource action: rsc_rhel8-3 monitor on rhel8-2 -+ * Resource action: rsc_rhel8-4 monitor on rhel8-2 -+ * Resource action: rsc_rhel8-5 monitor on rhel8-2 -+ * Resource action: migrator stop on rhel8-2 -+ * Resource action: migrator stop on rhel8-5 -+ * Resource action: migrator monitor on rhel8-2 -+ * Resource action: ping-1 monitor on rhel8-2 -+ * Pseudo action: Connectivity_start_0 -+ * Resource action: stateful-1 monitor on rhel8-2 -+ * Resource action: r192.168.122.207 monitor on rhel8-2 -+ * Resource action: petulant monitor on rhel8-2 -+ * Resource action: r192.168.122.208 monitor on rhel8-2 -+ * Resource action: lsb-dummy monitor on rhel8-2 -+ * Resource action: rsc_rhel8-1 start on rhel8-2 -+ * Resource action: rsc_rhel8-2 start on rhel8-2 -+ * Resource action: migrator start on rhel8-5 -+ * Resource action: migrator monitor=10000 on rhel8-5 -+ * Resource action: ping-1 start on rhel8-2 -+ * Pseudo action: Connectivity_running_0 -+ * Resource action: rsc_rhel8-1 monitor=5000 on rhel8-2 -+ * Resource action: rsc_rhel8-2 monitor=5000 on rhel8-2 -+ * Resource action: ping-1 monitor=60000 on rhel8-2 -+Using the original execution date of: 2023-01-19 21:05:59Z -+ -+Revised Cluster Status: -+ * Node List: -+ * Online: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] -+ * OFFLINE: [ rhel8-1 ] -+ -+ * Full List of Resources: -+ * Fencing (stonith:fence_xvm): Started rhel8-3 -+ * FencingPass (stonith:fence_dummy): Started rhel8-4 -+ * FencingFail (stonith:fence_dummy): Started rhel8-5 -+ * rsc_rhel8-1 (ocf:heartbeat:IPaddr2): Started rhel8-2 -+ * rsc_rhel8-2 (ocf:heartbeat:IPaddr2): Started rhel8-2 -+ * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 -+ * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 -+ * migrator (ocf:pacemaker:Dummy): Started [ rhel8-2 rhel8-5 ] -+ * Clone Set: Connectivity [ping-1]: -+ * Started: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] -+ * Stopped: [ rhel8-1 ] -+ * Clone Set: promotable-1 [stateful-1] (promotable): -+ * Promoted: [ rhel8-3 ] -+ * Unpromoted: [ rhel8-4 rhel8-5 ] -+ * Stopped: [ rhel8-1 rhel8-2 ] -+ * Resource Group: group-1: -+ * r192.168.122.207 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * petulant (service:pacemaker-cts-dummyd@10): Started rhel8-3 -+ * r192.168.122.208 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * lsb-dummy (lsb:LSBDummy): Started rhel8-3 -diff --git a/cts/scheduler/xml/migration-intermediary-cleaned.xml b/cts/scheduler/xml/migration-intermediary-cleaned.xml -new file mode 100644 -index 0000000..bec7888 ---- /dev/null -+++ b/cts/scheduler/xml/migration-intermediary-cleaned.xml -@@ -0,0 +1,513 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -2.31.1 - -From 1f9fadbb06baded3fc393cfe30a0cb620aca0829 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 1 Feb 2023 17:12:13 -0600 -Subject: [PATCH 13/14] Fix: scheduler: handle cleaned migrate_from history - correctly - -Fixes T623 ---- - lib/pengine/unpack.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 14dc202..9c99183 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2990,6 +2990,15 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) - - // The migrate_from is pending, complete but erased, or to be scheduled - -+ /* If there is no history at all for the resource on an online target, then -+ * it was likely cleaned. Just return, and we'll schedule a probe. Once we -+ * have the probe result, it will be reflected in target_newer_state. -+ */ -+ if ((target_node != NULL) && target_node->details->online -+ && unknown_on_node(rsc, target)) { -+ return; -+ } -+ - if (active_on_target) { - pe_node_t *source_node = pe_find_node(rsc->cluster->nodes, source); - --- -2.31.1 - -From d9d1bf19e8522ea29c87f0c39b05828947bc5b0f Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 2 Feb 2023 15:48:01 -0600 -Subject: [PATCH 14/14] Test: scheduler: update expected output for migration - fix - ---- - .../dot/migration-intermediary-cleaned.dot | 8 -- - .../exp/migration-intermediary-cleaned.exp | 88 ++++--------------- - .../migration-intermediary-cleaned.scores | 2 +- - .../migration-intermediary-cleaned.summary | 9 +- - 4 files changed, 22 insertions(+), 85 deletions(-) - -diff --git a/cts/scheduler/dot/migration-intermediary-cleaned.dot b/cts/scheduler/dot/migration-intermediary-cleaned.dot -index 09568d0..f6eabba 100644 ---- a/cts/scheduler/dot/migration-intermediary-cleaned.dot -+++ b/cts/scheduler/dot/migration-intermediary-cleaned.dot -@@ -7,15 +7,7 @@ - "FencingPass_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] - "Fencing_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] - "lsb-dummy_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] --"migrator_monitor_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] - "migrator_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] --"migrator_monitor_10000 rhel8-5" [ style=bold color="green" fontcolor="black"] --"migrator_start_0 rhel8-5" -> "migrator_monitor_10000 rhel8-5" [ style = bold] --"migrator_start_0 rhel8-5" [ style=bold color="green" fontcolor="black"] --"migrator_stop_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] --"migrator_stop_0 rhel8-2" [ style=bold color="green" fontcolor="black"] --"migrator_stop_0 rhel8-5" -> "migrator_start_0 rhel8-5" [ style = bold] --"migrator_stop_0 rhel8-5" [ style=bold color="green" fontcolor="black"] - "petulant_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] - "ping-1_monitor_0 rhel8-2" -> "Connectivity_start_0" [ style = bold] - "ping-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -diff --git a/cts/scheduler/exp/migration-intermediary-cleaned.exp b/cts/scheduler/exp/migration-intermediary-cleaned.exp -index 28fa776..8b9bb39 100644 ---- a/cts/scheduler/exp/migration-intermediary-cleaned.exp -+++ b/cts/scheduler/exp/migration-intermediary-cleaned.exp -@@ -148,91 +148,41 @@ - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - - - -- -+ - - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ - -- -+ - - - - - - -- -+ - - - -- -+ - -- -+ - - - - - - -- -+ - - - -- -+ - - - -@@ -241,24 +191,24 @@ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - -@@ -268,7 +218,7 @@ - - - -- -+ - - - -@@ -277,7 +227,7 @@ - - - -- -+ - - - -@@ -286,7 +236,7 @@ - - - -- -+ - - - -@@ -295,7 +245,7 @@ - - - -- -+ - - - -@@ -304,7 +254,7 @@ - - - -- -+ - - - -diff --git a/cts/scheduler/scores/migration-intermediary-cleaned.scores b/cts/scheduler/scores/migration-intermediary-cleaned.scores -index b3b8dff..09f05d1 100644 ---- a/cts/scheduler/scores/migration-intermediary-cleaned.scores -+++ b/cts/scheduler/scores/migration-intermediary-cleaned.scores -@@ -103,7 +103,7 @@ pcmk__primitive_assign: migrator allocation score on rhel8-1: 0 - pcmk__primitive_assign: migrator allocation score on rhel8-2: 0 - pcmk__primitive_assign: migrator allocation score on rhel8-3: 0 - pcmk__primitive_assign: migrator allocation score on rhel8-4: 0 --pcmk__primitive_assign: migrator allocation score on rhel8-5: 0 -+pcmk__primitive_assign: migrator allocation score on rhel8-5: 1 - pcmk__primitive_assign: petulant allocation score on rhel8-1: -INFINITY - pcmk__primitive_assign: petulant allocation score on rhel8-2: -INFINITY - pcmk__primitive_assign: petulant allocation score on rhel8-3: 0 -diff --git a/cts/scheduler/summary/migration-intermediary-cleaned.summary b/cts/scheduler/summary/migration-intermediary-cleaned.summary -index 5de1355..dd127a8 100644 ---- a/cts/scheduler/summary/migration-intermediary-cleaned.summary -+++ b/cts/scheduler/summary/migration-intermediary-cleaned.summary -@@ -13,7 +13,7 @@ Current cluster status: - * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 - * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 - * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 -- * migrator (ocf:pacemaker:Dummy): Started [ rhel8-5 rhel8-2 ] -+ * migrator (ocf:pacemaker:Dummy): Started rhel8-5 - * Clone Set: Connectivity [ping-1]: - * Started: [ rhel8-3 rhel8-4 rhel8-5 ] - * Stopped: [ rhel8-1 rhel8-2 ] -@@ -30,7 +30,6 @@ Current cluster status: - Transition Summary: - * Move rsc_rhel8-1 ( rhel8-3 -> rhel8-2 ) - * Move rsc_rhel8-2 ( rhel8-4 -> rhel8-2 ) -- * Restart migrator ( rhel8-5 ) - * Start ping-1:3 ( rhel8-2 ) - - Executing Cluster Transition: -@@ -44,8 +43,6 @@ Executing Cluster Transition: - * Resource action: rsc_rhel8-3 monitor on rhel8-2 - * Resource action: rsc_rhel8-4 monitor on rhel8-2 - * Resource action: rsc_rhel8-5 monitor on rhel8-2 -- * Resource action: migrator stop on rhel8-2 -- * Resource action: migrator stop on rhel8-5 - * Resource action: migrator monitor on rhel8-2 - * Resource action: ping-1 monitor on rhel8-2 - * Pseudo action: Connectivity_start_0 -@@ -56,8 +53,6 @@ Executing Cluster Transition: - * Resource action: lsb-dummy monitor on rhel8-2 - * Resource action: rsc_rhel8-1 start on rhel8-2 - * Resource action: rsc_rhel8-2 start on rhel8-2 -- * Resource action: migrator start on rhel8-5 -- * Resource action: migrator monitor=10000 on rhel8-5 - * Resource action: ping-1 start on rhel8-2 - * Pseudo action: Connectivity_running_0 - * Resource action: rsc_rhel8-1 monitor=5000 on rhel8-2 -@@ -79,7 +74,7 @@ Revised Cluster Status: - * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 - * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 - * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 -- * migrator (ocf:pacemaker:Dummy): Started [ rhel8-2 rhel8-5 ] -+ * migrator (ocf:pacemaker:Dummy): Started rhel8-5 - * Clone Set: Connectivity [ping-1]: - * Started: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] - * Stopped: [ rhel8-1 ] --- -2.31.1 - diff --git a/SOURCES/003-schema-doc.patch b/SOURCES/003-schema-doc.patch new file mode 100644 index 0000000..293e50f --- /dev/null +++ b/SOURCES/003-schema-doc.patch @@ -0,0 +1,42 @@ +From a3bffc7c66bf6f796f977cffd44f223635b008c5 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 20 Dec 2023 13:33:47 -0800 +Subject: [PATCH] Doc: Pacemaker Explained: Add replace for + PCMK__REMOTE_SCHEMA_DIR + +So that the existing use in local-options.rst expands correctly. + +Signed-off-by: Reid Wahl +--- + doc/sphinx/Makefile.am | 1 + + doc/sphinx/conf.py.in | 1 + + 3 files changed, 2 insertions(+) + create mode 100644 doc/sphinx/conf.py.in.rej + +diff --git a/doc/sphinx/Makefile.am b/doc/sphinx/Makefile.am +index e48e19a..d0309ff 100644 +--- a/doc/sphinx/Makefile.am ++++ b/doc/sphinx/Makefile.am +@@ -134,6 +134,7 @@ $(BOOKS:%=%/conf.py): conf.py.in + -e 's#%CRM_SCHEMA_DIRECTORY%#@CRM_SCHEMA_DIRECTORY@#g' \ + -e 's#%PACEMAKER_CONFIG_DIR%#@PACEMAKER_CONFIG_DIR@#g' \ + -e 's#%PCMK_GNUTLS_PRIORITIES%#@PCMK_GNUTLS_PRIORITIES@#g' \ ++ -e 's#%PCMK__REMOTE_SCHEMA_DIR%#@PCMK__REMOTE_SCHEMA_DIR@#g' \ + $(<) > "$@" + + $(BOOK)/_build: $(STATIC_FILES) $(BOOK)/conf.py $(DEPS_$(BOOK)) $(wildcard $(srcdir)/$(BOOK)/*.rst) +diff --git a/doc/sphinx/conf.py.in b/doc/sphinx/conf.py.in +index 556eb72..511f029 100644 +--- a/doc/sphinx/conf.py.in ++++ b/doc/sphinx/conf.py.in +@@ -40,6 +40,7 @@ rst_prolog=""" + .. |PCMK_INIT_ENV_FILE| replace:: ``%PACEMAKER_CONFIG_DIR%/pcmk-init.env`` + .. |PCMK_LOG_FILE| replace:: %CRM_LOG_DIR%/pacemaker.log + .. |PCMK_GNUTLS_PRIORITIES| replace:: %PCMK_GNUTLS_PRIORITIES% ++.. |PCMK__REMOTE_SCHEMA_DIR| replace:: %PCMK__REMOTE_SCHEMA_DIR% + .. |REMOTE_DISTRO| replace:: AlmaLinux + .. |REMOTE_DISTRO_VER| replace:: 9 + """ +-- +2.31.1 + diff --git a/SOURCES/004-attrd-cache-1.patch b/SOURCES/004-attrd-cache-1.patch new file mode 100644 index 0000000..dd617c4 --- /dev/null +++ b/SOURCES/004-attrd-cache-1.patch @@ -0,0 +1,1443 @@ +From 543a1e9b6f22f13956a8ef22b20c8fe93dad7ae9 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 12 Dec 2023 16:08:44 -0600 +Subject: [PATCH 01/12] Refactor: libcrmcommon: support attrd purge requests + without clearing cache + +Nothing uses the new capability yet +--- + daemons/attrd/attrd_corosync.c | 4 +++- + daemons/attrd/attrd_messages.c | 8 +++++++- + daemons/attrd/pacemaker-attrd.h | 3 ++- + daemons/controld/controld_attrd.c | 2 +- + include/crm/common/ipc_attrd_internal.h | 7 ++++--- + include/crm_internal.h | 1 + + lib/common/ipc_attrd.c | 3 ++- + lib/common/ipc_client.c | 1 + + 8 files changed, 21 insertions(+), 8 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 86dc67b04..e6cd07f65 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -540,7 +540,9 @@ attrd_peer_remove(const char *host, bool uncache, const char *source) + GHashTableIter aIter; + + CRM_CHECK(host != NULL, return); +- crm_notice("Removing all %s attributes for peer %s", host, source); ++ crm_notice("Removing all %s attributes for node %s " ++ CRM_XS " %s reaping node from cache", ++ host, source, (uncache? "and" : "without")); + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 89da6d894..ac32e18af 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -148,7 +148,13 @@ handle_remove_request(pcmk__request_t *request) + { + if (request->peer != NULL) { + const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME); +- attrd_peer_remove(host, true, request->peer); ++ bool reap = false; ++ ++ if (pcmk__xe_get_bool_attr(request->xml, PCMK__XA_REAP, ++ &reap) != pcmk_rc_ok) { ++ reap = true; // Default to true for backward compatibility ++ } ++ attrd_peer_remove(host, reap, request->peer); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index b8929a7f7..70e2cb41b 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -42,8 +42,9 @@ + * 4 2.1.5 Multiple attributes can be updated in a single IPC + * message + * 5 2.1.5 Peers can request confirmation of a sent message ++ * 6 2.1.7 PCMK__ATTRD_CMD_PEER_REMOVE supports PCMK__XA_REAP + */ +-#define ATTRD_PROTOCOL_VERSION "5" ++#define ATTRD_PROTOCOL_VERSION "6" + + #define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4) + #define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5) +diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c +index 923abb92d..958dc2f14 100644 +--- a/daemons/controld/controld_attrd.c ++++ b/daemons/controld/controld_attrd.c +@@ -117,7 +117,7 @@ update_attrd_remote_node_removed(const char *host, const char *user_name) + if (rc == pcmk_rc_ok) { + crm_trace("Asking attribute manager to purge Pacemaker Remote node %s", + host); +- rc = pcmk__attrd_api_purge(attrd_api, host); ++ rc = pcmk__attrd_api_purge(attrd_api, host, true); + } + if (rc != pcmk_rc_ok) { + crm_err("Could not purge Pacemaker Remote node %s " +diff --git a/include/crm/common/ipc_attrd_internal.h b/include/crm/common/ipc_attrd_internal.h +index b1b7584bd..39a55ad1d 100644 +--- a/include/crm/common/ipc_attrd_internal.h ++++ b/include/crm/common/ipc_attrd_internal.h +@@ -89,10 +89,11 @@ int pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *na + + /*! + * \internal +- * \brief Purge a node from pacemaker-attrd ++ * \brief Request removal of a node's transient attributes + * + * \param[in,out] api pacemaker-attrd IPC object +- * \param[in] node Node to remove ++ * \param[in] node Node whose attributes should be purged ++ * \param[in] reap If true, also request removal from node caches + * + * \note If \p api is NULL, a new temporary connection will be created + * just for this operation and destroyed afterwards. If \p api is +@@ -102,7 +103,7 @@ int pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *na + * + * \return Standard Pacemaker return code + */ +-int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node); ++int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap); + + /*! + * \internal +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 3bc8d096a..f800ab0cc 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -92,6 +92,7 @@ + #define PCMK__XA_MODE "mode" + #define PCMK__XA_NODE_START_STATE "node_start_state" + #define PCMK__XA_PATH "path" ++#define PCMK__XA_REAP "reap" + #define PCMK__XA_SCHEMA "schema" + #define PCMK__XA_SCHEMAS "schemas" + #define PCMK__XA_TASK "task" +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index 9caaabec0..56cdb5aba 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -277,7 +277,7 @@ pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *name, + } + + int +-pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node) ++pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap) + { + int rc = pcmk_rc_ok; + xmlNode *request = NULL; +@@ -291,6 +291,7 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node) + request = create_attrd_op(NULL); + + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); ++ pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, reap); + pcmk__xe_add_node(request, node, 0); + + if (api == NULL) { +diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c +index 0d3865095..5e64e2324 100644 +--- a/lib/common/ipc_client.c ++++ b/lib/common/ipc_client.c +@@ -759,6 +759,7 @@ create_purge_node_request(const pcmk_ipc_api_t *api, const char *node_name, + crm_xml_add(request, F_TYPE, T_ATTRD); + crm_xml_add(request, F_ORIG, crm_system_name); + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); ++ pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, true); + pcmk__xe_add_node(request, node_name, nodeid); + break; + +-- +2.41.0 + +From adc1d8ef587913e5505494e0205bd77a8e0a878e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 13 Dec 2023 09:24:28 -0600 +Subject: [PATCH 02/12] Log: attrd: improve messages for CIB wipe + +Also, expose attrd_erase_attrs() as attrd_cib_erase_transient_attrs() and make +it take the node name as an argument, for future reuse. +--- + daemons/attrd/attrd_cib.c | 60 ++++++++++++++++++++------------- + daemons/attrd/pacemaker-attrd.h | 1 + + 2 files changed, 37 insertions(+), 24 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index 80e5580d9..ca1c5b9e0 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -153,41 +153,44 @@ static void + attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data) + { +- do_crm_log_unlikely(((rc != pcmk_ok)? LOG_NOTICE : LOG_DEBUG), +- "Cleared transient attributes: %s " +- CRM_XS " xpath=%s rc=%d", +- pcmk_strerror(rc), (char *) user_data, rc); ++ const char *node = pcmk__s((const char *) user_data, "a node"); ++ ++ if (rc == pcmk_ok) { ++ crm_info("Cleared transient node attributes for %s from CIB", node); ++ } else { ++ crm_err("Unable to clear transient node attributes for %s from CIB: %s", ++ node, pcmk_strerror(rc)); ++ } + } + + #define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS + + /*! + * \internal +- * \brief Wipe all transient attributes for this node from the CIB ++ * \brief Wipe all transient node attributes for a node from the CIB + * +- * Clear any previous transient node attributes from the CIB. This is +- * normally done by the DC's controller when this node leaves the cluster, but +- * this handles the case where the node restarted so quickly that the +- * cluster layer didn't notice. +- * +- * \todo If pacemaker-attrd respawns after crashing (see PCMK_ENV_RESPAWNED), +- * ideally we'd skip this and sync our attributes from the writer. +- * However, currently we reject any values for us that the writer has, in +- * attrd_peer_update(). ++ * \param[in] node Node to clear attributes for + */ +-static void +-attrd_erase_attrs(void) ++void ++attrd_cib_erase_transient_attrs(const char *node) + { + int call_id = 0; +- char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname); ++ char *xpath = NULL; ++ ++ CRM_CHECK(node != NULL, return); ++ ++ xpath = crm_strdup_printf(XPATH_TRANSIENT, node); + +- crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s", +- xpath); ++ crm_debug("Clearing transient node attributes for %s from CIB using %s", ++ node, xpath); + + call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath); +- the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath, +- "attrd_erase_cb", attrd_erase_cb, +- free); ++ free(xpath); ++ ++ // strdup() is just for logging here, so ignore failure ++ the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, ++ strdup(node), "attrd_erase_cb", ++ attrd_erase_cb, free); + } + + /*! +@@ -197,8 +200,17 @@ attrd_erase_attrs(void) + void + attrd_cib_init(void) + { +- // We have no attribute values in memory, wipe the CIB to match +- attrd_erase_attrs(); ++ /* We have no attribute values in memory, so wipe the CIB to match. This is ++ * normally done by the DC's controller when this node leaves the cluster, but ++ * this handles the case where the node restarted so quickly that the ++ * cluster layer didn't notice. ++ * ++ * \todo If pacemaker-attrd respawns after crashing (see PCMK_ENV_RESPAWNED), ++ * ideally we'd skip this and sync our attributes from the writer. ++ * However, currently we reject any values for us that the writer has, in ++ * attrd_peer_update(). ++ */ ++ attrd_cib_erase_transient_attrs(attrd_cluster->uname); + + // Set a trigger for reading the CIB (for the alerts section) + attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 70e2cb41b..62637d1d7 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -66,6 +66,7 @@ void attrd_ipc_fini(void); + int attrd_cib_connect(int max_retry); + void attrd_cib_disconnect(void); + void attrd_cib_init(void); ++void attrd_cib_erase_transient_attrs(const char *node); + + bool attrd_value_needs_expansion(const char *value); + int attrd_expand_value(const char *value, const char *old_value); +-- +2.41.0 + +From 9be38897eaa683ad7920503d9c9fd7db7a20a8ec Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 13 Dec 2023 11:20:07 -0600 +Subject: [PATCH 03/12] Refactor: attrd: convert value booleans to flags + +--- + daemons/attrd/attrd_attributes.c | 7 +++--- + daemons/attrd/attrd_corosync.c | 38 +++++++++++++++++--------------- + daemons/attrd/pacemaker-attrd.h | 21 ++++++++++++++++-- + 3 files changed, 42 insertions(+), 24 deletions(-) + +diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c +index 388c181d7..8f32988be 100644 +--- a/daemons/attrd/attrd_attributes.c ++++ b/daemons/attrd/attrd_attributes.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2013-2022 the Pacemaker project contributors ++ * Copyright 2013-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -143,7 +143,7 @@ attrd_add_value_xml(xmlNode *parent, const attribute_t *a, + crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid); + crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user); + pcmk__xe_add_node(xml, v->nodename, v->nodeid); +- if (v->is_remote != 0) { ++ if (pcmk_is_set(v->flags, attrd_value_remote)) { + crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1); + } + crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current); +@@ -166,8 +166,7 @@ attrd_clear_value_seen(void) + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { +- v->seen = FALSE; +- crm_trace("Clear seen flag %s[%s] = %s.", a->id, v->nodename, v->current); ++ attrd_clear_value_flags(v, attrd_value_from_peer); + } + } + } +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index e6cd07f65..ca20bdc0f 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -192,34 +192,35 @@ cache_remote_node(const char *node_name) + + /*! + * \internal +- * \brief Return host's hash table entry (creating one if needed) ++ * \brief Return a node's value from hash table (creating one if needed) + * +- * \param[in,out] values Hash table of values +- * \param[in] host Name of peer to look up +- * \param[in] xml XML describing the attribute ++ * \param[in,out] values Hash table of values ++ * \param[in] node_name Name of node to look up ++ * \param[in] xml XML describing the attribute + * + * \return Pointer to new or existing hash table entry + */ + static attribute_value_t * +-attrd_lookup_or_create_value(GHashTable *values, const char *host, ++attrd_lookup_or_create_value(GHashTable *values, const char *node_name, + const xmlNode *xml) + { +- attribute_value_t *v = g_hash_table_lookup(values, host); ++ attribute_value_t *v = g_hash_table_lookup(values, node_name); + int is_remote = 0; + +- crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); +- if (is_remote) { +- cache_remote_node(host); +- } +- + if (v == NULL) { + v = calloc(1, sizeof(attribute_value_t)); + CRM_ASSERT(v != NULL); + +- pcmk__str_update(&v->nodename, host); +- v->is_remote = is_remote; ++ pcmk__str_update(&v->nodename, node_name); + g_hash_table_replace(values, v->nodename, v); + } ++ ++ crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); ++ if (is_remote) { ++ attrd_set_value_flags(v, attrd_value_remote); ++ cache_remote_node(node_name); ++ } ++ + return(v); + } + +@@ -344,11 +345,11 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + } + } + +- /* Set the seen flag for attribute processing held only in the own node. */ +- v->seen = TRUE; ++ // This allows us to later detect local values that peer doesn't know about ++ attrd_set_value_flags(v, attrd_value_from_peer); + + /* If this is a cluster node whose node ID we are learning, remember it */ +- if ((v->nodeid == 0) && (v->is_remote == FALSE) ++ if ((v->nodeid == 0) && !pcmk_is_set(v->flags, attrd_value_remote) + && (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, + (int*)&v->nodeid) == 0) && (v->nodeid > 0)) { + record_peer_nodeid(v, host); +@@ -414,8 +415,9 @@ broadcast_unseen_local_values(void) + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { +- if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname, +- pcmk__str_casei)) { ++ if (!pcmk_is_set(v->flags, attrd_value_from_peer) ++ && pcmk__str_eq(v->nodename, attrd_cluster->uname, ++ pcmk__str_casei)) { + if (sync == NULL) { + sync = create_xml_node(NULL, __func__); + crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 62637d1d7..738418857 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -140,15 +140,32 @@ typedef struct attribute_s { + + } attribute_t; + ++enum attrd_value_flags { ++ attrd_value_none = 0U, ++ attrd_value_remote = (1U << 0), // Value is for Pacemaker Remote node ++ attrd_value_from_peer = (1U << 1), // Value is from peer sync response ++}; ++ + typedef struct attribute_value_s { + uint32_t nodeid; +- gboolean is_remote; + char *nodename; + char *current; + char *requested; +- gboolean seen; ++ uint32_t flags; // Group of attrd_value_flags + } attribute_value_t; + ++#define attrd_set_value_flags(attr_value, flags_to_set) do { \ ++ (attr_value)->flags = pcmk__set_flags_as(__func__, __LINE__, \ ++ LOG_TRACE, "Value for node", (attr_value)->nodename, \ ++ (attr_value)->flags, (flags_to_set), #flags_to_set); \ ++ } while (0) ++ ++#define attrd_clear_value_flags(attr_value, flags_to_clear) do { \ ++ (attr_value)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ ++ LOG_TRACE, "Value for node", (attr_value)->nodename, \ ++ (attr_value)->flags, (flags_to_clear), #flags_to_clear); \ ++ } while (0) ++ + extern crm_cluster_t *attrd_cluster; + extern GHashTable *attributes; + extern GHashTable *peer_protocol_vers; +-- +2.41.0 + +From 922c79f4e39dc9501ff7c0136df8043081b771cb Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 13 Dec 2023 16:51:39 -0600 +Subject: [PATCH 04/12] Log: attrd: improve logging of CIB write result + +When attrd requests a write-out of a changed attribute value, it saves the new +value in attribute_value_t:requested so it can be used in a log when the write +completes (which may occur after the value has already changed again, so we +can't log the current value at that time). + +Previously, the log call relied on libqb mapping a NULL pointer to "(null)". +To be safer, do that explicitly. + +Also, it previously erased "requested" after the write completed, even if the +write failed and would be reattempted. Leave the value alone in this case so +the result of the reattempt can be logged correctly. +--- + daemons/attrd/attrd_cib.c | 18 ++++++++---------- + 1 file changed, 8 insertions(+), 10 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index ca1c5b9e0..ae6564856 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -274,11 +274,12 @@ attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *use + + g_hash_table_iter_init(&iter, a->values); + while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) { +- do_crm_log(level, "* %s[%s]=%s", a->id, peer, v->requested); +- free(v->requested); +- v->requested = NULL; +- if (rc != pcmk_ok) { +- a->changed = true; /* Attempt write out again */ ++ do_crm_log(level, "* %s[%s]=%s", ++ a->id, peer, pcmk__s(v->requested, "(null)")); ++ if (rc == pcmk_ok) { ++ pcmk__str_update(&(v->requested), NULL); ++ } else { ++ a->changed = true; // Reattempt write below if we are still writer + } + } + +@@ -605,11 +606,8 @@ write_attribute(attribute_t *a, bool ignore_delay) + /* Preservation of the attribute to transmit alert */ + set_alert_attribute_value(alert_attribute_value, v); + +- free(v->requested); +- v->requested = NULL; +- if (v->current) { +- v->requested = strdup(v->current); +- } ++ // Save this value so we can log it when write completes ++ pcmk__str_update(&(v->requested), v->current); + } + + if (private_updates) { +-- +2.41.0 + +From fa2830b1c4acf061faa40490620eb63c48a56a2b Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 13 Dec 2023 17:01:01 -0600 +Subject: [PATCH 05/12] Low: libcrmcluster: avoid use-after-free in trace log + +--- + lib/cluster/membership.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index f856ccaca..6958e65f2 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -143,11 +143,23 @@ crm_remote_peer_get(const char *node_name) + return node; + } + ++/*! ++ * \brief Remove a node from the Pacemaker Remote node cache ++ * ++ * \param[in] node_name Name of node to remove from cache ++ * ++ * \note The caller must be careful not to use \p node_name after calling this ++ * function if it might be a pointer into the cache entry being removed. ++ */ + void + crm_remote_peer_cache_remove(const char *node_name) + { +- if (g_hash_table_remove(crm_remote_peer_cache, node_name)) { +- crm_trace("removed %s from remote peer cache", node_name); ++ /* Do a lookup first, because node_name could be a pointer within the entry ++ * being removed -- we can't log it *after* removing it. ++ */ ++ if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) { ++ crm_trace("Removing %s from Pacemaker Remote node cache", node_name); ++ g_hash_table_remove(crm_remote_peer_cache, node_name); + } + } + +-- +2.41.0 + +From 14a7449a413f3f10eb80634c607386007d264475 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 09:24:38 -0600 +Subject: [PATCH 06/12] Refactor: libcrmcluster,attrd: functionize removing + node from both caches + +This future-proofs against a potential use-after-free (not possible with +current code) and isolates cache management better. +--- + daemons/attrd/attrd_corosync.c | 3 +-- + include/crm/cluster/internal.h | 9 +++---- + lib/cluster/membership.c | 44 ++++++++++++++++++++++++++++++++++ + 3 files changed, 50 insertions(+), 6 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index ca20bdc0f..aa94a078e 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -554,8 +554,7 @@ attrd_peer_remove(const char *host, bool uncache, const char *source) + } + + if (uncache) { +- crm_remote_peer_cache_remove(host); +- reap_crm_member(0, host); ++ pcmk__purge_node_from_cache(host, 0); + } + } + +diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h +index e20ee4c59..c71069be2 100644 +--- a/include/crm/cluster/internal.h ++++ b/include/crm/cluster/internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2021 the Pacemaker project contributors ++ * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -7,8 +7,8 @@ + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +-#ifndef CRM_CLUSTER_INTERNAL__H +-# define CRM_CLUSTER_INTERNAL__H ++#ifndef PCMK__CRM_CLUSTER_INTERNAL__H ++# define PCMK__CRM_CLUSTER_INTERNAL__H + + # include // uint32_t, uint64_t + # include +@@ -126,6 +126,7 @@ crm_node_t *pcmk__search_node_caches(unsigned int id, const char *uname, + uint32_t flags); + crm_node_t *pcmk__search_cluster_node_cache(unsigned int id, const char *uname, + const char *uuid); ++void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id); + + void pcmk__refresh_node_caches_from_cib(xmlNode *cib); + crm_node_t *pcmk__search_known_node_cache(unsigned int id, const char *uname, +@@ -136,4 +137,4 @@ crm_node_t *pcmk__get_peer(unsigned int id, const char *uname, + crm_node_t *pcmk__get_peer_full(unsigned int id, const char *uname, + const char *uuid, int flags); + +-#endif ++#endif // PCMK__CRM_CLUSTER_INTERNAL__H +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index 6958e65f2..173aaaa17 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -341,6 +341,9 @@ crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) + * \param[in] name Uname of node to remove (or NULL to ignore) + * + * \return Number of cache entries removed ++ * ++ * \note The caller must be careful not to use \p name after calling this ++ * function if it might be a pointer into the cache entry being removed. + */ + guint + reap_crm_member(uint32_t id, const char *name) +@@ -564,6 +567,47 @@ pcmk__get_peer_full(unsigned int id, const char *uname, const char *uuid, + return node; + } + ++/*! ++ * \internal ++ * \brief Purge a node from cache (both cluster and Pacemaker Remote) ++ * ++ * \param[in] node_name If not NULL, purge only nodes with this name ++ * \param[in] node_id If not 0, purge cluster nodes only if they have this ID ++ * ++ * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged. ++ * If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote ++ * nodes that match \p node_name will be purged, and cluster nodes that ++ * match both \p node_name and \p node_id will be purged. ++ * \note The caller must be careful not to use \p node_name after calling this ++ * function if it might be a pointer into a cache entry being removed. ++ */ ++void ++pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id) ++{ ++ char *node_name_copy = NULL; ++ ++ if ((node_name == NULL) && (node_id == 0U)) { ++ return; ++ } ++ ++ // Purge from Pacemaker Remote node cache ++ if ((node_name != NULL) ++ && (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) { ++ /* node_name could be a pointer into the cache entry being purged, ++ * so reassign it to a copy before the original gets freed ++ */ ++ node_name_copy = strdup(node_name); ++ CRM_ASSERT(node_name_copy != NULL); ++ node_name = node_name_copy; ++ ++ crm_trace("Purging %s from Pacemaker Remote node cache", node_name); ++ g_hash_table_remove(crm_remote_peer_cache, node_name); ++ } ++ ++ reap_crm_member(node_id, node_name); ++ free(node_name_copy); ++} ++ + /*! + * \brief Get a node cache entry (cluster or Pacemaker Remote) + * +-- +2.41.0 + +From 8d552c1b582a95f9879b15e2dd991a7f995e7eca Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 09:51:37 -0600 +Subject: [PATCH 07/12] Fix: pacemaker-attrd,libcrmcluster: avoid + use-after-free when remote node in cluster node cache + +Previously, pacemaker-attrd removed any conflicting entry from the cluster node +cache before adding a node to the remote node cache. However, if the name used +was a pointer into the cluster node cache entry being freed, it would be reused +to create the remote node cache entry. + +This avoids that and also moves the functionality into libcrmcluster for better +isolation of cache management. It also corrects mistakenly setting errno to a +negative value. +--- + daemons/attrd/attrd_corosync.c | 26 ++------------------------ + lib/cluster/membership.c | 30 ++++++++++++++++++++++++++++-- + 2 files changed, 30 insertions(+), 26 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index aa94a078e..1d0f87f04 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -166,28 +166,6 @@ broadcast_local_value(const attribute_t *a) + return v; + } + +-/*! +- * \internal +- * \brief Ensure a Pacemaker Remote node is in the correct peer cache +- * +- * \param[in] node_name Name of Pacemaker Remote node to check +- */ +-static void +-cache_remote_node(const char *node_name) +-{ +- /* If we previously assumed this node was an unseen cluster node, +- * remove its entry from the cluster peer cache. +- */ +- crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name, NULL); +- +- if (dup && (dup->uuid == NULL)) { +- reap_crm_member(0, node_name); +- } +- +- // Ensure node is in the remote peer cache +- CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); +-} +- + #define state_text(state) pcmk__s((state), "in unknown state") + + /*! +@@ -218,7 +196,7 @@ attrd_lookup_or_create_value(GHashTable *values, const char *node_name, + crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); + if (is_remote) { + attrd_set_value_flags(v, attrd_value_remote); +- cache_remote_node(node_name); ++ CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); + } + + return(v); +@@ -273,7 +251,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + + // Ensure remote nodes that come up are in the remote node cache + } else if (!gone && is_remote) { +- cache_remote_node(peer->uname); ++ CRM_ASSERT(crm_remote_peer_get(peer->uname) != NULL); + } + } + +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index 173aaaa17..a653617fa 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -102,26 +102,50 @@ crm_remote_peer_cache_size(void) + * \note When creating a new entry, this will leave the node state undetermined, + * so the caller should also call pcmk__update_peer_state() if the state + * is known. ++ * \note Because this can add and remove cache entries, callers should not ++ * assume any previously obtained cache entry pointers remain valid. + */ + crm_node_t * + crm_remote_peer_get(const char *node_name) + { + crm_node_t *node; ++ char *node_name_copy = NULL; + + if (node_name == NULL) { +- errno = -EINVAL; ++ errno = EINVAL; + return NULL; + } + ++ /* It's theoretically possible that the node was added to the cluster peer ++ * cache before it was known to be a Pacemaker Remote node. Remove that ++ * entry unless it has a node ID, which means the name actually is ++ * associated with a cluster node. (@TODO return an error in that case?) ++ */ ++ node = pcmk__search_cluster_node_cache(0, node_name, NULL); ++ if ((node != NULL) && (node->uuid == NULL)) { ++ /* node_name could be a pointer into the cache entry being removed, so ++ * reassign it to a copy before the original gets freed ++ */ ++ node_name_copy = strdup(node_name); ++ if (node_name_copy == NULL) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ node_name = node_name_copy; ++ reap_crm_member(0, node_name); ++ } ++ + /* Return existing cache entry if one exists */ + node = g_hash_table_lookup(crm_remote_peer_cache, node_name); + if (node) { ++ free(node_name_copy); + return node; + } + + /* Allocate a new entry */ + node = calloc(1, sizeof(crm_node_t)); + if (node == NULL) { ++ free(node_name_copy); + return NULL; + } + +@@ -130,7 +154,8 @@ crm_remote_peer_get(const char *node_name) + node->uuid = strdup(node_name); + if (node->uuid == NULL) { + free(node); +- errno = -ENOMEM; ++ errno = ENOMEM; ++ free(node_name_copy); + return NULL; + } + +@@ -140,6 +165,7 @@ crm_remote_peer_get(const char *node_name) + + /* Update the entry's uname, ensuring peer status callbacks are called */ + update_peer_uname(node, node_name); ++ free(node_name_copy); + return node; + } + +-- +2.41.0 + +From 17ac8f0409021cbcd3e03a1b70518ab7abd9b259 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 10:03:05 -0600 +Subject: [PATCH 08/12] Refactor: attrd: remove dead code + +The peer change callback can't be called for a Pacemaker Remote node unless the +node is already in the remote node cache, so don't bother trying to add it. +Modifying the peer caches is forbidden in peer change callbacks anyway since it +could lead to use-after-free issues in libcrmcluster. +--- + daemons/attrd/attrd_corosync.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 1d0f87f04..eba734c3a 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -248,10 +248,6 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + attrd_remove_voter(peer); + attrd_remove_peer_protocol_ver(peer->uname); + attrd_do_not_expect_from_peer(peer->uname); +- +- // Ensure remote nodes that come up are in the remote node cache +- } else if (!gone && is_remote) { +- CRM_ASSERT(crm_remote_peer_get(peer->uname) != NULL); + } + } + +-- +2.41.0 + +From 221c4d697edc0481817c206ce8fdd878afd98ca1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 17:17:32 -0600 +Subject: [PATCH 09/12] Low: libcrmcommon: handle disconnected attrd API + connections consistently + +Drop send_attrd_request() in favor of using connect_and_send_attrd_request(), +since pcmk__connect_ipc() will return pcmk_rc_ok immediately if the API is +already connected. + +All the attribute manager IPC APIs attempted the connection if not already +connected except for pcmk__attrd_api_query(). Now that it uses +connect_and_send_attrd_request(), they are all consistent. +--- + lib/common/ipc_attrd.c | 28 +++++----------------------- + 1 file changed, 5 insertions(+), 23 deletions(-) + +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index 56cdb5aba..e36b42cbc 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -190,12 +190,6 @@ connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request) + return pcmk_rc_ok; + } + +-static int +-send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request) +-{ +- return pcmk__send_ipc_request(api, request); +-} +- + int + pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, + const char *resource, const char *operation, +@@ -229,11 +223,8 @@ pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, + rc = connect_and_send_attrd_request(api, request); + destroy_api(api); + +- } else if (!pcmk_ipc_is_connected(api)) { +- rc = connect_and_send_attrd_request(api, request); +- + } else { +- rc = send_attrd_request(api, request); ++ rc = connect_and_send_attrd_request(api, request); + } + + free_xml(request); +@@ -303,11 +294,8 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap) + rc = connect_and_send_attrd_request(api, request); + destroy_api(api); + +- } else if (!pcmk_ipc_is_connected(api)) { +- rc = connect_and_send_attrd_request(api, request); +- + } else { +- rc = send_attrd_request(api, request); ++ rc = connect_and_send_attrd_request(api, request); + } + + free_xml(request); +@@ -346,7 +334,7 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_QUERY); + pcmk__xe_add_node(request, node, 0); + +- rc = send_attrd_request(api, request); ++ rc = connect_and_send_attrd_request(api, request); + free_xml(request); + + if (node) { +@@ -386,11 +374,8 @@ pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node) + rc = connect_and_send_attrd_request(api, request); + destroy_api(api); + +- } else if (!pcmk_ipc_is_connected(api)) { +- rc = connect_and_send_attrd_request(api, request); +- + } else { +- rc = send_attrd_request(api, request); ++ rc = connect_and_send_attrd_request(api, request); + } + + free_xml(request); +@@ -479,11 +464,8 @@ pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name, + rc = connect_and_send_attrd_request(api, request); + destroy_api(api); + +- } else if (!pcmk_ipc_is_connected(api)) { +- rc = connect_and_send_attrd_request(api, request); +- + } else { +- rc = send_attrd_request(api, request); ++ rc = connect_and_send_attrd_request(api, request); + } + + free_xml(request); +-- +2.41.0 + +From 85502a405c384fdf0331e43ec161910ee1d14973 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 17:29:11 -0600 +Subject: [PATCH 10/12] Low: libcrmcommon: handle NULL attribute manager IPC + API connections consistently + +Previously, all attribute manager IPC APIs except pcmk__attrd_api_query() would +create a temporary connection if passed a NULL argument for one. Now, +connect_and_send_attrd_request() does this itself, reducing code duplication and +making the handling consistent across all APIs. +--- + lib/common/ipc_attrd.c | 116 +++++++++-------------------------------- + 1 file changed, 25 insertions(+), 91 deletions(-) + +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index e36b42cbc..68975c7b6 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -148,46 +148,39 @@ create_attrd_op(const char *user_name) + return attrd_op; + } + +-static int +-create_api(pcmk_ipc_api_t **api) +-{ +- int rc = pcmk_new_ipc_api(api, pcmk_ipc_attrd); +- +- if (rc != pcmk_rc_ok) { +- crm_err("Could not connect to attrd: %s", pcmk_rc_str(rc)); +- } +- +- return rc; +-} +- +-static void +-destroy_api(pcmk_ipc_api_t *api) +-{ +- pcmk_disconnect_ipc(api); +- pcmk_free_ipc_api(api); +- api = NULL; +-} +- + static int + connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request) + { + int rc = pcmk_rc_ok; ++ bool created_api = false; ++ ++ if (api == NULL) { ++ rc = pcmk_new_ipc_api(&api, pcmk_ipc_attrd); ++ if (rc != pcmk_rc_ok) { ++ crm_err("Could not connect to attribute manager: %s", ++ pcmk_rc_str(rc)); ++ return rc; ++ } ++ created_api = true; ++ } + + rc = pcmk__connect_ipc(api, pcmk_ipc_dispatch_sync, 5); + if (rc != pcmk_rc_ok) { + crm_err("Could not connect to %s: %s", + pcmk_ipc_name(api, true), pcmk_rc_str(rc)); +- return rc; +- } + +- rc = pcmk__send_ipc_request(api, request); +- if (rc != pcmk_rc_ok) { +- crm_err("Could not send request to %s: %s", +- pcmk_ipc_name(api, true), pcmk_rc_str(rc)); +- return rc; ++ } else { ++ rc = pcmk__send_ipc_request(api, request); ++ if (rc != pcmk_rc_ok) { ++ crm_err("Could not send request to %s: %s", ++ pcmk_ipc_name(api, true), pcmk_rc_str(rc)); ++ } + } + +- return pcmk_rc_ok; ++ if (created_api) { ++ pcmk_free_ipc_api(api); ++ } ++ return rc; + } + + int +@@ -214,18 +207,7 @@ pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, + crm_xml_add_int(request, PCMK__XA_ATTR_IS_REMOTE, + pcmk_is_set(options, pcmk__node_attr_remote)); + +- if (api == NULL) { +- rc = create_api(&api); +- if (rc != pcmk_rc_ok) { +- return rc; +- } +- +- rc = connect_and_send_attrd_request(api, request); +- destroy_api(api); +- +- } else { +- rc = connect_and_send_attrd_request(api, request); +- } ++ rc = connect_and_send_attrd_request(api, request); + + free_xml(request); + +@@ -285,18 +267,7 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap) + pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, reap); + pcmk__xe_add_node(request, node, 0); + +- if (api == NULL) { +- rc = create_api(&api); +- if (rc != pcmk_rc_ok) { +- return rc; +- } +- +- rc = connect_and_send_attrd_request(api, request); +- destroy_api(api); +- +- } else { +- rc = connect_and_send_attrd_request(api, request); +- } ++ rc = connect_and_send_attrd_request(api, request); + + free_xml(request); + +@@ -365,18 +336,7 @@ pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node) + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_REFRESH); + pcmk__xe_add_node(request, node, 0); + +- if (api == NULL) { +- rc = create_api(&api); +- if (rc != pcmk_rc_ok) { +- return rc; +- } +- +- rc = connect_and_send_attrd_request(api, request); +- destroy_api(api); +- +- } else { +- rc = connect_and_send_attrd_request(api, request); +- } ++ rc = connect_and_send_attrd_request(api, request); + + free_xml(request); + +@@ -455,18 +415,7 @@ pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name, + request = create_attrd_op(user_name); + populate_update_op(request, node, name, value, dampen, set, options); + +- if (api == NULL) { +- rc = create_api(&api); +- if (rc != pcmk_rc_ok) { +- return rc; +- } +- +- rc = connect_and_send_attrd_request(api, request); +- destroy_api(api); +- +- } else { +- rc = connect_and_send_attrd_request(api, request); +- } ++ rc = connect_and_send_attrd_request(api, request); + + free_xml(request); + +@@ -547,23 +496,8 @@ pcmk__attrd_api_update_list(pcmk_ipc_api_t *api, GList *attrs, const char *dampe + * request. Do that now, creating and destroying the API object if needed. + */ + if (pcmk__is_daemon) { +- bool created_api = false; +- +- if (api == NULL) { +- rc = create_api(&api); +- if (rc != pcmk_rc_ok) { +- return rc; +- } +- +- created_api = true; +- } +- + rc = connect_and_send_attrd_request(api, request); + free_xml(request); +- +- if (created_api) { +- destroy_api(api); +- } + } + + return rc; +-- +2.41.0 + +From 4b25e2e2cf52e6c772805309e1f3dd6bb7ce8fab Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 18:11:14 -0600 +Subject: [PATCH 11/12] Log: controld,libcrmcommon: improve attrd IPC API + messages + +Previously, connect_and_send_attrd_request() would log error messages for +failures, attrd IPC APIs would log debug messages with the result whether +success or failure, and then callers would log or output failures again. + +Now, connect_and_send_attrd_request() does not log, the attrd IPC APIs log a +debug message before sending the request, and the callers log or output +failures. +--- + daemons/controld/controld_attrd.c | 22 ++++----- + lib/common/ipc_attrd.c | 76 ++++++++++++------------------- + 2 files changed, 38 insertions(+), 60 deletions(-) + +diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c +index 958dc2f14..24c1e7068 100644 +--- a/daemons/controld/controld_attrd.c ++++ b/daemons/controld/controld_attrd.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2006-2022 the Pacemaker project contributors ++ * Copyright 2006-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -136,25 +136,23 @@ update_attrd_clear_failures(const char *host, const char *rsc, const char *op, + rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); + } + if (rc == pcmk_rc_ok) { +- const char *op_desc = pcmk__s(op, "operations"); +- const char *interval_desc = "all"; + uint32_t attrd_opts = pcmk__node_attr_none; + +- if (op != NULL) { +- interval_desc = pcmk__s(interval_spec, "nonrecurring"); +- } + if (is_remote_node) { + pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote); + } +- crm_info("Asking attribute manager to clear failure of %s %s for %s " +- "on %s node %s", interval_desc, op_desc, rsc, +- node_type(is_remote_node), host); + rc = pcmk__attrd_api_clear_failures(attrd_api, host, rsc, op, + interval_spec, NULL, attrd_opts); + } + if (rc != pcmk_rc_ok) { +- crm_err("Could not clear failure attributes for %s on %s node %s%s: %s " +- CRM_XS " rc=%d", pcmk__s(rsc, "all resources"), +- node_type(is_remote_node), host, when(), pcmk_rc_str(rc), rc); ++ const char *interval_desc = "all"; ++ ++ if (op != NULL) { ++ interval_desc = pcmk__s(interval_spec, "nonrecurring"); ++ } ++ crm_err("Could not clear failure of %s %s for %s on %s node %s%s: %s " ++ CRM_XS " rc=%d", interval_desc, pcmk__s(op, "operations"), ++ pcmk__s(rsc, "all resources"), node_type(is_remote_node), host, ++ when(), pcmk_rc_str(rc), rc); + } + } +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index 68975c7b6..3951bd3df 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -157,24 +157,14 @@ connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request) + if (api == NULL) { + rc = pcmk_new_ipc_api(&api, pcmk_ipc_attrd); + if (rc != pcmk_rc_ok) { +- crm_err("Could not connect to attribute manager: %s", +- pcmk_rc_str(rc)); + return rc; + } + created_api = true; + } + + rc = pcmk__connect_ipc(api, pcmk_ipc_dispatch_sync, 5); +- if (rc != pcmk_rc_ok) { +- crm_err("Could not connect to %s: %s", +- pcmk_ipc_name(api, true), pcmk_rc_str(rc)); +- +- } else { ++ if (rc == pcmk_rc_ok) { + rc = pcmk__send_ipc_request(api, request); +- if (rc != pcmk_rc_ok) { +- crm_err("Could not send request to %s: %s", +- pcmk_ipc_name(api, true), pcmk_rc_str(rc)); +- } + } + + if (created_api) { +@@ -199,6 +189,17 @@ pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, + node = target; + } + ++ if (operation) { ++ interval_desc = pcmk__s(interval_spec, "nonrecurring"); ++ op_desc = operation; ++ } else { ++ interval_desc = "all"; ++ op_desc = "operations"; ++ } ++ crm_debug("Asking %s to clear failure of %s %s for %s on %s", ++ pcmk_ipc_name(api, true), interval_desc, op_desc, ++ pcmk__s(resource, "all resources"), pcmk__s(node, "all nodes")); ++ + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_CLEAR_FAILURE); + pcmk__xe_add_node(request, node, 0); + crm_xml_add(request, PCMK__XA_ATTR_RESOURCE, resource); +@@ -210,19 +211,6 @@ pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, + rc = connect_and_send_attrd_request(api, request); + + free_xml(request); +- +- if (operation) { +- interval_desc = interval_spec? interval_spec : "nonrecurring"; +- op_desc = operation; +- } else { +- interval_desc = "all"; +- op_desc = "operations"; +- } +- +- crm_debug("Asked pacemaker-attrd to clear failure of %s %s for %s on %s: %s (%d)", +- interval_desc, op_desc, (resource? resource : "all resources"), +- (node? node : "all nodes"), pcmk_rc_str(rc), rc); +- + return rc; + } + +@@ -254,13 +242,17 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap) + { + int rc = pcmk_rc_ok; + xmlNode *request = NULL; +- const char *display_host = (node ? node : "localhost"); + const char *target = pcmk__node_attr_target(node); + + if (target != NULL) { + node = target; + } + ++ crm_debug("Asking %s to purge transient attributes%s for %s", ++ pcmk_ipc_name(api, true), ++ (reap? " and node cache entries" : ""), ++ pcmk__s(node, "local node")); ++ + request = create_attrd_op(NULL); + + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); +@@ -270,10 +262,6 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap) + rc = connect_and_send_attrd_request(api, request); + + free_xml(request); +- +- crm_debug("Asked pacemaker-attrd to purge %s: %s (%d)", +- display_host, pcmk_rc_str(rc), rc); +- + return rc; + } + +@@ -299,6 +287,10 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, + } + } + ++ crm_debug("Querying %s for value of '%s'%s%s", ++ pcmk_ipc_name(api, true), name, ++ ((node == NULL)? "" : " on "), pcmk__s(node, "")); ++ + request = create_attrd_op(NULL); + + crm_xml_add(request, PCMK__XA_ATTR_NAME, name); +@@ -307,15 +299,6 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, + + rc = connect_and_send_attrd_request(api, request); + free_xml(request); +- +- if (node) { +- crm_debug("Queried pacemaker-attrd for %s on %s: %s (%d)", +- name, node, pcmk_rc_str(rc), rc); +- } else { +- crm_debug("Queried pacemaker-attrd for %s: %s (%d)", +- name, pcmk_rc_str(rc), rc); +- } +- + return rc; + } + +@@ -324,13 +307,15 @@ pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node) + { + int rc = pcmk_rc_ok; + xmlNode *request = NULL; +- const char *display_host = (node ? node : "localhost"); + const char *target = pcmk__node_attr_target(node); + + if (target != NULL) { + node = target; + } + ++ crm_debug("Asking %s to write all transient attributes for %s to CIB", ++ pcmk_ipc_name(api, true), pcmk__s(node, "local node")); ++ + request = create_attrd_op(NULL); + + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_REFRESH); +@@ -339,10 +324,6 @@ pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node) + rc = connect_and_send_attrd_request(api, request); + + free_xml(request); +- +- crm_debug("Asked pacemaker-attrd to refresh %s: %s (%d)", +- display_host, pcmk_rc_str(rc), rc); +- + return rc; + } + +@@ -399,7 +380,6 @@ pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name, + { + int rc = pcmk_rc_ok; + xmlNode *request = NULL; +- const char *display_host = (node ? node : "localhost"); + const char *target = NULL; + + if (name == NULL) { +@@ -412,16 +392,16 @@ pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name, + node = target; + } + ++ crm_debug("Asking %s to update '%s' to '%s' for %s", ++ pcmk_ipc_name(api, true), name, pcmk__s(value, "(null)"), ++ pcmk__s(node, "local node")); ++ + request = create_attrd_op(user_name); + populate_update_op(request, node, name, value, dampen, set, options); + + rc = connect_and_send_attrd_request(api, request); + + free_xml(request); +- +- crm_debug("Asked pacemaker-attrd to update %s on %s: %s (%d)", +- name, display_host, pcmk_rc_str(rc), rc); +- + return rc; + } + +-- +2.41.0 + +From e5d22ef2a6b130768bd59ab5b7d8cd1155bb02a5 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 17:54:01 -0600 +Subject: [PATCH 12/12] Log: libcrmcommon: use log-friendly name in pacemakerd + IPC logs + +--- + lib/common/ipc_pacemakerd.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c +index 2f0370974..6d6f6d6bf 100644 +--- a/lib/common/ipc_pacemakerd.c ++++ b/lib/common/ipc_pacemakerd.c +@@ -210,15 +210,16 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply) + value = crm_element_value(reply, F_CRM_MSG_TYPE); + if (pcmk__str_empty(value) + || !pcmk__str_eq(value, XML_ATTR_RESPONSE, pcmk__str_none)) { +- crm_info("Unrecognizable message from pacemakerd: " ++ crm_info("Unrecognizable message from %s: " + "message type '%s' not '" XML_ATTR_RESPONSE "'", +- pcmk__s(value, "")); ++ pcmk_ipc_name(api, true), pcmk__s(value, "")); + status = CRM_EX_PROTOCOL; + goto done; + } + + if (pcmk__str_empty(crm_element_value(reply, XML_ATTR_REFERENCE))) { +- crm_info("Unrecognizable message from pacemakerd: no reference"); ++ crm_info("Unrecognizable message from %s: no reference", ++ pcmk_ipc_name(api, true)); + status = CRM_EX_PROTOCOL; + goto done; + } +@@ -244,8 +245,8 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply) + reply_data.reply_type = pcmk_pacemakerd_reply_shutdown; + reply_data.data.shutdown.status = atoi(crm_element_value(msg_data, XML_LRM_ATTR_OPSTATUS)); + } else { +- crm_info("Unrecognizable message from pacemakerd: " +- "unknown command '%s'", pcmk__s(value, "")); ++ crm_info("Unrecognizable message from %s: unknown command '%s'", ++ pcmk_ipc_name(api, true), pcmk__s(value, "")); + status = CRM_EX_PROTOCOL; + goto done; + } +@@ -292,8 +293,8 @@ do_pacemakerd_api_call(pcmk_ipc_api_t *api, const char *ipc_name, const char *ta + if (cmd) { + rc = pcmk__send_ipc_request(api, cmd); + if (rc != pcmk_rc_ok) { +- crm_debug("Couldn't send request to pacemakerd: %s rc=%d", +- pcmk_rc_str(rc), rc); ++ crm_debug("Couldn't send request to %s: %s rc=%d", ++ pcmk_ipc_name(api, true), pcmk_rc_str(rc), rc); + } + free_xml(cmd); + } else { +-- +2.41.0 + diff --git a/SOURCES/004-g_source_remove.patch b/SOURCES/004-g_source_remove.patch deleted file mode 100644 index 2af0f47..0000000 --- a/SOURCES/004-g_source_remove.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 45617b727e280cac384a28ae3d96145e066e6197 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Fri, 3 Feb 2023 12:08:57 -0800 -Subject: [PATCH 01/02] Fix: fencer: Prevent double g_source_remove of op_timer_one - -QE observed a rarely reproducible core dump in the fencer during -Pacemaker shutdown, in which we try to g_source_remove() an op timer -that's already been removed. - -free_stonith_remote_op_list() --> g_hash_table_destroy() --> g_hash_table_remove_all_nodes() --> clear_remote_op_timers() --> g_source_remove() --> crm_glib_handler() --> "Source ID 190 was not found when attempting to remove it" - -The likely cause is that request_peer_fencing() doesn't set -op->op_timer_one to 0 after calling g_source_remove() on it, so if that -op is still in the stonith_remote_op_list at shutdown with the same -timer, clear_remote_op_timers() tries to remove the source for -op_timer_one again. - -There are only five locations that call g_source_remove() on a -remote_fencing_op_t timer. -* Three of them are in clear_remote_op_timers(), which first 0-checks - the timer and then sets it to 0 after g_source_remove(). -* One is in remote_op_query_timeout(), which does the same. -* The last is the one we fix here in request_peer_fencing(). - -I don't know all the conditions of QE's test scenario at this point. -What I do know: -* have-watchdog=true -* stonith-watchdog-timeout=10 -* no explicit topology -* fence agent script is missing for the configured fence device -* requested fencing of one node -* cluster shutdown - -Fixes RHBZ2166967 - -Signed-off-by: Reid Wahl ---- - daemons/fenced/fenced_remote.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c -index d61b5bd..b7426ff 100644 ---- a/daemons/fenced/fenced_remote.c -+++ b/daemons/fenced/fenced_remote.c -@@ -1825,6 +1825,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) - op->state = st_exec; - if (op->op_timer_one) { - g_source_remove(op->op_timer_one); -+ op->op_timer_one = 0; - } - - if (!((stonith_watchdog_timeout_ms > 0) --- -2.31.1 - -From 0291db4750322ec7f01ae6a4a2a30abca9d8e19e Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Wed, 15 Feb 2023 22:30:27 -0800 -Subject: [PATCH 02/02] Fix: fencer: Avoid double source remove of op_timer_total - -remote_op_timeout() returns G_SOURCE_REMOVE, which tells GLib to remove -the source from the main loop after returning. Currently this function -is used as the callback only when creating op->op_timer_total. - -If we don't set op->op_timer_total to 0 before returning from -remote_op_timeout(), then we can get an assertion and core dump from -GLib when the op's timers are being cleared (either during op -finalization or during fencer shutdown). This is because -clear_remote_op_timers() sees that op->op_timer_total != 0 and tries to -remove the source, but the source has already been removed. - -Note that we're already (correctly) zeroing op->op_timer_one and -op->query_timeout as appropriate in their respective callback functions. - -Fortunately, GLib doesn't care whether the source has already been -removed before we return G_SOURCE_REMOVE from a callback. So it's safe -to call finalize_op() (which removes all the op's timer sources) from -within a callback. - -Fixes RHBZ#2166967 - -Signed-off-by: Reid Wahl ---- - daemons/fenced/fenced_remote.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c -index b7426ff88..adea3d7d8 100644 ---- a/daemons/fenced/fenced_remote.c -+++ b/daemons/fenced/fenced_remote.c -@@ -718,6 +718,8 @@ remote_op_timeout(gpointer userdata) - { - remote_fencing_op_t *op = userdata; - -+ op->op_timer_total = 0; -+ - if (op->state == st_done) { - crm_debug("Action '%s' targeting %s for client %s already completed " - CRM_XS " id=%.8s", --- -2.39.0 diff --git a/SOURCES/005-attrd-cache-2.patch b/SOURCES/005-attrd-cache-2.patch new file mode 100644 index 0000000..c1174e1 --- /dev/null +++ b/SOURCES/005-attrd-cache-2.patch @@ -0,0 +1,2786 @@ +From 843ef27542aac43ed7789b15255dd4f30004f0d1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 18 Dec 2023 10:08:30 -0600 +Subject: [PATCH 1/9] Fix: attrd: write Pacemaker Remote node attributes even + if not in cache + +Previously, we required a node to be in one of the node caches in order to +write out its attributes. However for Pacemaker Remote nodes, we only need the +node name to do the write, and we already have that even if it's not cached. +--- + daemons/attrd/attrd_cib.c | 55 +++++++++++++++++++++------------------ + 1 file changed, 30 insertions(+), 25 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index ae65648..b22137a 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include // pcmk__get_peer_full() + + #include "pacemaker-attrd.h" + +@@ -556,20 +557,26 @@ write_attribute(attribute_t *a, bool ignore_delay) + /* Iterate over each peer value of this attribute */ + g_hash_table_iter_init(&iter, a->values); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { +- crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, +- CRM_GET_PEER_ANY); ++ const char *uuid = NULL; + +- /* If the value's peer info does not correspond to a peer, ignore it */ +- if (peer == NULL) { +- crm_notice("Cannot update %s[%s]=%s because peer not known", +- a->id, v->nodename, v->current); +- continue; +- } ++ if (pcmk_is_set(v->flags, attrd_value_remote)) { ++ /* If this is a Pacemaker Remote node, the node's UUID is the same ++ * as its name, which we already have. ++ */ ++ uuid = v->nodename; + +- /* If we're just learning the peer's node id, remember it */ +- if (peer->id && (v->nodeid == 0)) { +- crm_trace("Learned ID %u for node %s", peer->id, v->nodename); +- v->nodeid = peer->id; ++ } else { ++ // This will create a cluster node cache entry if none exists ++ crm_node_t *peer = pcmk__get_peer_full(v->nodeid, v->nodename, NULL, ++ CRM_GET_PEER_ANY); ++ ++ uuid = peer->uuid; ++ ++ // Remember peer's node ID if we're just now learning it ++ if ((peer->id != 0) && (v->nodeid == 0)) { ++ crm_trace("Learned ID %u for node %s", peer->id, v->nodename); ++ v->nodeid = peer->id; ++ } + } + + /* If this is a private attribute, no update needs to be sent */ +@@ -578,29 +585,27 @@ write_attribute(attribute_t *a, bool ignore_delay) + continue; + } + +- /* If the peer is found, but its uuid is unknown, defer write */ +- if (peer->uuid == NULL) { ++ // Defer write if this is a cluster node that's never been seen ++ if (uuid == NULL) { + a->unknown_peer_uuids = true; +- crm_notice("Cannot update %s[%s]=%s because peer UUID not known " +- "(will retry if learned)", ++ crm_notice("Cannot update %s[%s]='%s' now because node's UUID is " ++ "unknown (will retry if learned)", + a->id, v->nodename, v->current); + continue; + } + + // Update this value as part of the CIB transaction we're building +- rc = add_attr_update(a, v->current, peer->uuid); ++ rc = add_attr_update(a, v->current, uuid); + if (rc != pcmk_rc_ok) { +- crm_err("Failed to update %s[%s]=%s (peer known as %s, UUID %s, " +- "ID %" PRIu32 "/%" PRIu32 "): %s", +- a->id, v->nodename, v->current, peer->uname, peer->uuid, +- peer->id, v->nodeid, pcmk_rc_str(rc)); ++ crm_err("Failed to update %s[%s]='%s': %s " ++ CRM_XS " node uuid=%s id=%" PRIu32, ++ a->id, v->nodename, v->current, pcmk_rc_str(rc), ++ uuid, v->nodeid); + continue; + } + +- crm_debug("Updating %s[%s]=%s (peer known as %s, UUID %s, ID " +- "%" PRIu32 "/%" PRIu32 ")", +- a->id, v->nodename, v->current, +- peer->uname, peer->uuid, peer->id, v->nodeid); ++ crm_debug("Updating %s[%s]=%s (node uuid=%s id=%" PRIu32 ")", ++ a->id, v->nodename, v->current, uuid, v->nodeid); + cib_updates++; + + /* Preservation of the attribute to transmit alert */ +-- +2.31.1 + +From 724e6db9830475e212381430a30014ccda43c901 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 19 Dec 2023 14:59:54 -0600 +Subject: [PATCH 2/9] API: libcrmcluster: deprecate crm_get_peer_full() + +--- + daemons/attrd/attrd_messages.c | 1 + + daemons/controld/controld_execd.c | 2 +- + include/crm/cluster.h | 5 +---- + include/crm/cluster/compat.h | 5 ++++- + lib/cluster/membership.c | 21 ++++++--------------- + 5 files changed, 13 insertions(+), 21 deletions(-) + +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index ac32e18..53c70bd 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -12,6 +12,7 @@ + #include + + #include ++#include // pcmk__get_peer() + #include + + #include "pacemaker-attrd.h" +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 480d37d..381b0be 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -581,7 +581,7 @@ controld_query_executor_state(void) + return NULL; + } + +- peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); ++ peer = pcmk__get_peer_full(0, lrm_state->node_name, NULL, CRM_GET_PEER_ANY); + CRM_CHECK(peer != NULL, return NULL); + + xml_state = create_node_state_update(peer, +diff --git a/include/crm/cluster.h b/include/crm/cluster.h +index b61fd70..137684d 100644 +--- a/include/crm/cluster.h ++++ b/include/crm/cluster.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -146,9 +146,6 @@ void crm_remote_peer_cache_refresh(xmlNode *cib); + crm_node_t *crm_remote_peer_get(const char *node_name); + void crm_remote_peer_cache_remove(const char *node_name); + +-/* allows filtering of remote and cluster nodes using crm_get_peer_flags */ +-crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags); +- + /* only searches cluster nodes */ + crm_node_t *crm_get_peer(unsigned int id, const char *uname); + +diff --git a/include/crm/cluster/compat.h b/include/crm/cluster/compat.h +index 89a03fd..fc68f27 100644 +--- a/include/crm/cluster/compat.h ++++ b/include/crm/cluster/compat.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -26,6 +26,9 @@ extern "C" { + * release. + */ + ++// \deprecated Do not use Pacemaker for cluster node cacheing ++crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags); ++ + // \deprecated Use stonith_api_kick() from libstonithd instead + int crm_terminate_member(int nodeid, const char *uname, void *unused); + +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index a653617..52db840 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -634,21 +634,6 @@ pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id) + free(node_name_copy); + } + +-/*! +- * \brief Get a node cache entry (cluster or Pacemaker Remote) +- * +- * \param[in] id If not 0, cluster node ID to search for +- * \param[in] uname If not NULL, node name to search for +- * \param[in] flags Bitmask of enum crm_get_peer_flags +- * +- * \return (Possibly newly created) node cache entry +- */ +-crm_node_t * +-crm_get_peer_full(unsigned int id, const char *uname, int flags) +-{ +- return pcmk__get_peer_full(id, uname, NULL, flags); +-} +- + /*! + * \internal + * \brief Search cluster node cache +@@ -1444,5 +1429,11 @@ crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) + return stonith_api_kick(nodeid, uname, 120, TRUE); + } + ++crm_node_t * ++crm_get_peer_full(unsigned int id, const char *uname, int flags) ++{ ++ return pcmk__get_peer_full(id, uname, NULL, flags); ++} ++ + // LCOV_EXCL_STOP + // End deprecated API +-- +2.31.1 + +From 8a263fa254a62b07f3b591844e7eacd5cdd0538f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 19 Dec 2023 15:07:47 -0600 +Subject: [PATCH 3/9] API: libcrmcluster: deprecate crm_get_peer() + +Use pcmk__get_peer() internally +--- + daemons/attrd/attrd_corosync.c | 8 +++----- + daemons/attrd/attrd_messages.c | 6 +++--- + daemons/based/based_callbacks.c | 5 +++-- + daemons/based/based_messages.c | 7 ++++--- + daemons/controld/controld_corosync.c | 2 +- + daemons/controld/controld_election.c | 3 ++- + daemons/controld/controld_execd.c | 2 +- + daemons/controld/controld_fencing.c | 2 +- + daemons/controld/controld_join_client.c | 6 +++--- + daemons/controld/controld_join_dc.c | 10 +++++----- + daemons/controld/controld_messages.c | 2 +- + daemons/controld/controld_remote_ra.c | 2 +- + daemons/controld/controld_te_actions.c | 8 +++++--- + daemons/controld/controld_te_events.c | 3 ++- + daemons/controld/controld_utils.c | 2 +- + daemons/fenced/fenced_commands.c | 8 ++++---- + daemons/fenced/fenced_remote.c | 8 +++++--- + include/crm/cluster.h | 3 --- + include/crm/cluster/compat.h | 3 +++ + lib/cluster/corosync.c | 8 ++++---- + lib/cluster/cpg.c | 8 ++++---- + lib/cluster/election.c | 6 +++--- + lib/cluster/membership.c | 21 ++++++--------------- + 23 files changed, 65 insertions(+), 68 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index eba734c..3b2880b 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -119,9 +119,7 @@ attrd_cpg_dispatch(cpg_handle_t handle, + if (xml == NULL) { + crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); + } else { +- crm_node_t *peer = crm_get_peer(nodeid, from); +- +- attrd_peer_message(peer, xml); ++ attrd_peer_message(pcmk__get_peer(nodeid, from, NULL), xml); + } + + free_xml(xml); +@@ -254,7 +252,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + static void + record_peer_nodeid(attribute_value_t *v, const char *host) + { +- crm_node_t *known_peer = crm_get_peer(v->nodeid, host); ++ crm_node_t *known_peer = pcmk__get_peer(v->nodeid, host, NULL); + + crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); + if (attrd_election_won()) { +@@ -439,7 +437,7 @@ attrd_peer_clear_failure(pcmk__request_t *request) + GHashTableIter iter; + regex_t regex; + +- crm_node_t *peer = crm_get_peer(0, request->peer); ++ crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); + + if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { + crm_info("Ignoring invalid request to clear failures for %s", +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 53c70bd..5536207 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -177,7 +177,7 @@ static xmlNode * + handle_sync_request(pcmk__request_t *request) + { + if (request->peer != NULL) { +- crm_node_t *peer = crm_get_peer(0, request->peer); ++ crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); + + attrd_peer_sync(peer, request->xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +@@ -194,7 +194,7 @@ handle_sync_response_request(pcmk__request_t *request) + return handle_unknown_request(request); + } else { + if (request->peer != NULL) { +- crm_node_t *peer = crm_get_peer(0, request->peer); ++ crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); + bool peer_won = attrd_check_for_new_writer(peer, request->xml); + + if (!pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) { +@@ -212,7 +212,7 @@ handle_update_request(pcmk__request_t *request) + { + if (request->peer != NULL) { + const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME); +- crm_node_t *peer = crm_get_peer(0, request->peer); ++ crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); + + attrd_peer_update(peer, request->xml, host, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c +index 4fac222..02f3425 100644 +--- a/daemons/based/based_callbacks.c ++++ b/daemons/based/based_callbacks.c +@@ -928,7 +928,7 @@ forward_request(xmlNode *request) + + crm_xml_add(request, F_CIB_DELEGATED, OUR_NODENAME); + +- send_cluster_message(((host != NULL)? crm_get_peer(0, host) : NULL), ++ send_cluster_message(((host != NULL)? pcmk__get_peer(0, host, NULL) : NULL), + crm_msg_cib, request, FALSE); + + // Return the request to its original state +@@ -986,7 +986,8 @@ send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gb + /* send reply via HA to originating node */ + crm_trace("Sending request result to %s only", originator); + crm_xml_add(msg, F_CIB_ISREPLY, originator); +- return send_cluster_message(crm_get_peer(0, originator), crm_msg_cib, msg, FALSE); ++ return send_cluster_message(pcmk__get_peer(0, originator, NULL), ++ crm_msg_cib, msg, FALSE); + } + + return FALSE; +diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c +index a87d9ac..08521e4 100644 +--- a/daemons/based/based_messages.c ++++ b/daemons/based/based_messages.c +@@ -127,7 +127,8 @@ send_sync_request(const char *host) + crm_xml_add(sync_me, F_CIB_DELEGATED, + stand_alone? "localhost" : crm_cluster->uname); + +- send_cluster_message(host ? crm_get_peer(0, host) : NULL, crm_msg_cib, sync_me, FALSE); ++ send_cluster_message((host == NULL)? NULL : pcmk__get_peer(0, host, NULL), ++ crm_msg_cib, sync_me, FALSE); + free_xml(sync_me); + } + +@@ -443,8 +444,8 @@ sync_our_cib(xmlNode * request, gboolean all) + + add_message_xml(replace_request, F_CIB_CALLDATA, the_cib); + +- if (send_cluster_message +- (all ? NULL : crm_get_peer(0, host), crm_msg_cib, replace_request, FALSE) == FALSE) { ++ if (!send_cluster_message(all? NULL : pcmk__get_peer(0, host, NULL), ++ crm_msg_cib, replace_request, FALSE)) { + result = -ENOTCONN; + } + free_xml(replace_request); +diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c +index b69e821..c2953b5 100644 +--- a/daemons/controld/controld_corosync.c ++++ b/daemons/controld/controld_corosync.c +@@ -49,7 +49,7 @@ crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, + crm_xml_add(xml, F_ORIG, from); + /* crm_xml_add_int(xml, F_SEQ, wrapper->id); Fake? */ + +- peer = crm_get_peer(0, from); ++ peer = pcmk__get_peer(0, from, NULL); + if (!pcmk_is_set(peer->processes, crm_proc_cpg)) { + /* If we can still talk to our peer process on that node, + * then it must be part of the corosync membership +diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c +index 70ffecc..6e22067 100644 +--- a/daemons/controld/controld_election.c ++++ b/daemons/controld/controld_election.c +@@ -265,7 +265,8 @@ do_dc_release(long long action, + crm_info("DC role released"); + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + xmlNode *update = NULL; +- crm_node_t *node = crm_get_peer(0, controld_globals.our_nodename); ++ crm_node_t *node = pcmk__get_peer(0, controld_globals.our_nodename, ++ NULL); + + pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); + update = create_node_state_update(node, node_update_expected, NULL, +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 381b0be..45b6b58 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -1752,7 +1752,7 @@ controld_ack_event_directly(const char *to_host, const char *to_sys, + to_sys = CRM_SYSTEM_TENGINE; + } + +- peer = crm_get_peer(0, controld_globals.our_nodename); ++ peer = pcmk__get_peer(0, controld_globals.our_nodename, NULL); + update = create_node_state_update(peer, node_update_none, NULL, + __func__); + +diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c +index 9557d9e..6c0ee09 100644 +--- a/daemons/controld/controld_fencing.c ++++ b/daemons/controld/controld_fencing.c +@@ -374,7 +374,7 @@ execute_stonith_cleanup(void) + + for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { + char *target = iter->data; +- crm_node_t *target_node = crm_get_peer(0, target); ++ crm_node_t *target_node = pcmk__get_peer(0, target, NULL); + const char *uuid = crm_peer_uuid(target_node); + + crm_notice("Marking %s, target of a previous stonith action, as clean", target); +diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c +index 805ecbd..2b5267d 100644 +--- a/daemons/controld/controld_join_client.c ++++ b/daemons/controld/controld_join_client.c +@@ -35,7 +35,7 @@ update_dc_expected(const xmlNode *msg) + { + if ((controld_globals.dc_name != NULL) + && pcmk__xe_attr_is_true(msg, F_CRM_DC_LEAVING)) { +- crm_node_t *dc_node = crm_get_peer(0, controld_globals.dc_name); ++ crm_node_t *dc_node = pcmk__get_peer(0, controld_globals.dc_name, NULL); + + pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN); + } +@@ -177,7 +177,7 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void * + + crm_xml_add(reply, F_CRM_JOIN_ID, join_id); + crm_xml_add(reply, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); +- send_cluster_message(crm_get_peer(0, controld_globals.dc_name), ++ send_cluster_message(pcmk__get_peer(0, controld_globals.dc_name, NULL), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + } +@@ -333,7 +333,7 @@ do_cl_join_finalize_respond(long long action, + } + } + +- send_cluster_message(crm_get_peer(0, controld_globals.dc_name), ++ send_cluster_message(pcmk__get_peer(0, controld_globals.dc_name, NULL), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + +diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c +index 2fe6710..45e1eba 100644 +--- a/daemons/controld/controld_join_dc.c ++++ b/daemons/controld/controld_join_dc.c +@@ -318,7 +318,7 @@ do_dc_join_offer_one(long long action, + crm_err("Can't make join-%d offer to unknown node", current_join_id); + return; + } +- member = crm_get_peer(0, join_to); ++ member = pcmk__get_peer(0, join_to, NULL); + + /* It is possible that a node will have been sick or starting up when the + * original offer was made. However, it will either re-announce itself in +@@ -332,7 +332,7 @@ do_dc_join_offer_one(long long action, + * well, to ensure the correct value for max_generation_from. + */ + if (strcasecmp(join_to, controld_globals.our_nodename) != 0) { +- member = crm_get_peer(0, controld_globals.our_nodename); ++ member = pcmk__get_peer(0, controld_globals.our_nodename, NULL); + join_make_offer(NULL, member, NULL); + } + +@@ -396,7 +396,7 @@ do_dc_join_filter_offer(long long action, + crm_err("Ignoring invalid join request without node name"); + return; + } +- join_node = crm_get_peer(0, join_from); ++ join_node = pcmk__get_peer(0, join_from, NULL); + + crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); + if (join_id != current_join_id) { +@@ -732,7 +732,7 @@ do_dc_join_ack(long long action, + goto done; + } + +- peer = crm_get_peer(0, join_from); ++ peer = pcmk__get_peer(0, join_from, NULL); + if (peer->join != crm_join_finalized) { + crm_info("Ignoring out-of-sequence join-%d confirmation from %s " + "(currently %s not %s)", +@@ -866,7 +866,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) + return; + } + +- join_node = crm_get_peer(0, join_to); ++ join_node = pcmk__get_peer(0, join_to, NULL); + if (!crm_is_peer_active(join_node)) { + /* + * NACK'ing nodes that the membership layer doesn't know about yet +diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c +index 39f3c7a..8d3cef7 100644 +--- a/daemons/controld/controld_messages.c ++++ b/daemons/controld/controld_messages.c +@@ -458,7 +458,7 @@ relay_message(xmlNode * msg, gboolean originated_locally) + ref, pcmk__s(host_to, "broadcast")); + crm_log_xml_trace(msg, "relayed"); + if (!broadcast) { +- node_to = crm_get_peer(0, host_to); ++ node_to = pcmk__get_peer(0, host_to, NULL); + } + send_cluster_message(node_to, dest, msg, TRUE); + return TRUE; +diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c +index d692ef6..a9c398d 100644 +--- a/daemons/controld/controld_remote_ra.c ++++ b/daemons/controld/controld_remote_ra.c +@@ -206,7 +206,7 @@ should_purge_attributes(crm_node_t *node) + /* Get the node that was hosting the remote connection resource from the + * peer cache. That's the one we really care about here. + */ +- conn_node = crm_get_peer(0, node->conn_host); ++ conn_node = pcmk__get_peer(0, node->conn_host, NULL); + if (conn_node == NULL) { + return purge; + } +diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c +index fe6b744..e76174b 100644 +--- a/daemons/controld/controld_te_actions.c ++++ b/daemons/controld/controld_te_actions.c +@@ -158,7 +158,7 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + return pcmk_rc_ok; + + } else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { +- crm_node_t *peer = crm_get_peer(0, router_node); ++ crm_node_t *peer = pcmk__get_peer(0, router_node, NULL); + + pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); + } +@@ -170,7 +170,8 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + controld_globals.te_uuid); + crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); + +- rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE); ++ rc = send_cluster_message(pcmk__get_peer(0, router_node, NULL), ++ crm_msg_crmd, cmd, TRUE); + free(counter); + free_xml(cmd); + +@@ -421,7 +422,8 @@ execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + I_NULL, &msg); + + } else { +- rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE); ++ rc = send_cluster_message(pcmk__get_peer(0, router_node, NULL), ++ crm_msg_lrmd, cmd, TRUE); + } + + free(counter); +diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c +index 28977c0..c8cceed 100644 +--- a/daemons/controld/controld_te_events.c ++++ b/daemons/controld/controld_te_events.c +@@ -119,7 +119,8 @@ fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node) + target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + if (router) { +- crm_node_t *node = crm_get_peer(0, router); ++ crm_node_t *node = pcmk__get_peer(0, router, NULL); ++ + if (node) { + router_uuid = node->uuid; + } +diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c +index 9b306ee..55790c0 100644 +--- a/daemons/controld/controld_utils.c ++++ b/daemons/controld/controld_utils.c +@@ -734,7 +734,7 @@ update_dc(xmlNode * msg) + /* do nothing */ + + } else if (controld_globals.dc_name != NULL) { +- crm_node_t *dc_node = crm_get_peer(0, controld_globals.dc_name); ++ crm_node_t *dc_node = pcmk__get_peer(0, controld_globals.dc_name, NULL); + + crm_info("Set DC to %s (%s)", + controld_globals.dc_name, +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 7a62ed6..28f08dd 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -645,7 +645,7 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) + } + + if (device->include_nodeid && (cmd->target != NULL)) { +- crm_node_t *node = crm_get_peer(0, cmd->target); ++ crm_node_t *node = pcmk__get_peer(0, cmd->target, NULL); + + cmd->target_nodeid = node->id; + } +@@ -2402,8 +2402,8 @@ stonith_send_reply(const xmlNode *reply, int call_options, + if (remote_peer == NULL) { + do_local_reply(reply, client, call_options); + } else { +- send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, +- reply, FALSE); ++ send_cluster_message(pcmk__get_peer(0, remote_peer, NULL), ++ crm_msg_stonith_ng, reply, FALSE); + } + } + +@@ -3371,7 +3371,7 @@ handle_fence_request(pcmk__request_t *request) + crm_xml_add(request->xml, F_STONITH_CLIENTID, + request->ipc_client->id); + crm_xml_add(request->xml, F_STONITH_REMOTE_OP_ID, op->id); +- send_cluster_message(crm_get_peer(0, alternate_host), ++ send_cluster_message(pcmk__get_peer(0, alternate_host, NULL), + crm_msg_stonith_ng, request->xml, FALSE); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, + NULL); +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 843b3d4..3c176c8 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1030,7 +1030,7 @@ merge_duplicates(remote_fencing_op_t *op) + op->id, other->id, other->target); + continue; + } +- if (!fencing_peer_active(crm_get_peer(0, other->originator))) { ++ if (!fencing_peer_active(pcmk__get_peer(0, other->originator, NULL))) { + crm_notice("Failing action '%s' targeting %s originating from " + "client %s@%s: Originator is dead " CRM_XS " id=%.8s", + other->action, other->target, other->client_name, +@@ -1663,7 +1663,8 @@ report_timeout_period(remote_fencing_op_t * op, int op_timeout) + crm_xml_add(update, F_STONITH_CALLID, call_id); + crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); + +- send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE); ++ send_cluster_message(pcmk__get_peer(0, client_node, NULL), ++ crm_msg_stonith_ng, update, FALSE); + + free_xml(update); + +@@ -1916,7 +1917,8 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) + op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); + } + +- send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE); ++ send_cluster_message(pcmk__get_peer(0, peer->host, NULL), ++ crm_msg_stonith_ng, remote_op, FALSE); + peer->tried = TRUE; + free_xml(remote_op); + return; +diff --git a/include/crm/cluster.h b/include/crm/cluster.h +index 137684d..302b807 100644 +--- a/include/crm/cluster.h ++++ b/include/crm/cluster.h +@@ -146,9 +146,6 @@ void crm_remote_peer_cache_refresh(xmlNode *cib); + crm_node_t *crm_remote_peer_get(const char *node_name); + void crm_remote_peer_cache_remove(const char *node_name); + +-/* only searches cluster nodes */ +-crm_node_t *crm_get_peer(unsigned int id, const char *uname); +- + guint crm_active_peers(void); + gboolean crm_is_peer_active(const crm_node_t * node); + guint reap_crm_member(uint32_t id, const char *name); +diff --git a/include/crm/cluster/compat.h b/include/crm/cluster/compat.h +index fc68f27..e853fd8 100644 +--- a/include/crm/cluster/compat.h ++++ b/include/crm/cluster/compat.h +@@ -26,6 +26,9 @@ extern "C" { + * release. + */ + ++// \deprecated Do not use Pacemaker for cluster node cacheing ++crm_node_t *crm_get_peer(unsigned int id, const char *uname); ++ + // \deprecated Do not use Pacemaker for cluster node cacheing + crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags); + +diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c +index 08280ce..34a31fb 100644 +--- a/lib/cluster/corosync.c ++++ b/lib/cluster/corosync.c +@@ -309,12 +309,12 @@ quorum_notification_cb(quorum_handle_t handle, uint32_t quorate, + crm_debug("Member[%d] %u ", i, id); + + /* Get this node's peer cache entry (adding one if not already there) */ +- node = crm_get_peer(id, NULL); ++ node = pcmk__get_peer(id, NULL, NULL); + if (node->uname == NULL) { + char *name = pcmk__corosync_name(0, id); + + crm_info("Obtaining name for new node %u", id); +- node = crm_get_peer(id, name); ++ node = pcmk__get_peer(id, name, NULL); + free(name); + } + +@@ -480,7 +480,7 @@ pcmk__corosync_connect(crm_cluster_t *cluster) + } + + // Ensure local node always exists in peer cache +- peer = crm_get_peer(cluster->nodeid, cluster->uname); ++ peer = pcmk__get_peer(cluster->nodeid, cluster->uname, NULL); + cluster->uuid = pcmk__corosync_uuid(peer); + + return TRUE; +@@ -640,7 +640,7 @@ pcmk__corosync_add_nodes(xmlNode *xml_parent) + + if (nodeid > 0 || name != NULL) { + crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); +- crm_get_peer(nodeid, name); ++ pcmk__get_peer(nodeid, name, NULL); + } + + if (nodeid > 0 && name != NULL) { +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index d1decc6..778368f 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -465,7 +465,7 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + + msg->sender.id = nodeid; + if (msg->sender.size == 0) { +- crm_node_t *peer = crm_get_peer(nodeid, NULL); ++ crm_node_t *peer = pcmk__get_peer(nodeid, NULL, NULL); + + if (peer == NULL) { + crm_err("Peer with nodeid=%u is unknown", nodeid); +@@ -526,7 +526,7 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + } + + // Is this necessary? +- crm_get_peer(msg->sender.id, msg->sender.uname); ++ pcmk__get_peer(msg->sender.id, msg->sender.uname, NULL); + + crm_trace("Payload: %.200s", data); + return data; +@@ -720,7 +720,7 @@ pcmk_cpg_membership(cpg_handle_t handle, + } + + for (i = 0; i < member_list_entries; i++) { +- crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); ++ crm_node_t *peer = pcmk__get_peer(member_list[i].nodeid, NULL, NULL); + + if (member_list[i].nodeid == local_nodeid + && member_list[i].pid != getpid()) { +@@ -873,7 +873,7 @@ cluster_connect_cpg(crm_cluster_t *cluster) + return FALSE; + } + +- peer = crm_get_peer(id, NULL); ++ peer = pcmk__get_peer(id, NULL, NULL); + crm_update_peer_proc(__func__, peer, crm_proc_cpg, ONLINESTATUS); + return TRUE; + } +diff --git a/lib/cluster/election.c b/lib/cluster/election.c +index ebbae72..31867f2 100644 +--- a/lib/cluster/election.c ++++ b/lib/cluster/election.c +@@ -298,7 +298,7 @@ election_vote(election_t *e) + return; + } + +- our_node = crm_get_peer(0, e->uname); ++ our_node = pcmk__get_peer(0, e->uname, NULL); + if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) { + crm_trace("Cannot vote in %s yet: local node not connected to cluster", + e->name); +@@ -547,8 +547,8 @@ election_count_vote(election_t *e, const xmlNode *message, bool can_win) + return election_error; + } + +- your_node = crm_get_peer(0, vote.from); +- our_node = crm_get_peer(0, e->uname); ++ your_node = pcmk__get_peer(0, vote.from, NULL); ++ our_node = pcmk__get_peer(0, e->uname, NULL); + we_are_owner = (our_node != NULL) + && pcmk__str_eq(our_node->uuid, vote.election_owner, + pcmk__str_none); +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index 52db840..41e0fa3 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -868,21 +868,6 @@ pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) + return node; + } + +-/*! +- * \brief Get a cluster node cache entry +- * +- * \param[in] id If not 0, cluster node ID to search for +- * \param[in] uname If not NULL, node name to search for +- * +- * \return (Possibly newly created) cluster node cache entry +- */ +-/* coverity[-alloc] Memory is referenced in one or both hashtables */ +-crm_node_t * +-crm_get_peer(unsigned int id, const char *uname) +-{ +- return pcmk__get_peer(id, uname, NULL); +-} +- + /*! + * \internal + * \brief Update a node's uname +@@ -1429,6 +1414,12 @@ crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) + return stonith_api_kick(nodeid, uname, 120, TRUE); + } + ++crm_node_t * ++crm_get_peer(unsigned int id, const char *uname) ++{ ++ return pcmk__get_peer(id, uname, NULL); ++} ++ + crm_node_t * + crm_get_peer_full(unsigned int id, const char *uname, int flags) + { +-- +2.31.1 + +From 39e949a698afb5b0177b05e7d81b403cbb27a57a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 19 Dec 2023 15:23:59 -0600 +Subject: [PATCH 4/9] Refactor: libcrmcluster: consolidate pcmk__get_peer() and + pcmk__get_peer_full() + +... into a new function pcmk__get_node() (since it can handle Pacemaker Remote +nodes, which aren't peers) +--- + daemons/attrd/attrd_cib.c | 6 +-- + daemons/attrd/attrd_corosync.c | 10 +++-- + daemons/attrd/attrd_messages.c | 11 ++++-- + daemons/based/based_callbacks.c | 10 +++-- + daemons/based/based_messages.c | 14 +++++-- + daemons/controld/controld_corosync.c | 2 +- + daemons/controld/controld_election.c | 4 +- + daemons/controld/controld_execd.c | 5 ++- + daemons/controld/controld_fencing.c | 5 ++- + daemons/controld/controld_join_client.c | 9 +++-- + daemons/controld/controld_join_dc.c | 11 +++--- + daemons/controld/controld_messages.c | 3 +- + daemons/controld/controld_remote_ra.c | 2 +- + daemons/controld/controld_te_actions.c | 9 +++-- + daemons/controld/controld_te_events.c | 3 +- + daemons/controld/controld_utils.c | 3 +- + daemons/fenced/fenced_commands.c | 9 +++-- + daemons/fenced/fenced_remote.c | 9 +++-- + include/crm/cluster/internal.h | 8 ++-- + lib/cluster/corosync.c | 9 +++-- + lib/cluster/cpg.c | 13 ++++--- + lib/cluster/election.c | 6 +-- + lib/cluster/membership.c | 52 ++++++++----------------- + 23 files changed, 116 insertions(+), 97 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index b22137a..7018a32 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -20,7 +20,7 @@ + #include + #include + #include +-#include // pcmk__get_peer_full() ++#include // pcmk__get_node() + + #include "pacemaker-attrd.h" + +@@ -567,8 +567,8 @@ write_attribute(attribute_t *a, bool ignore_delay) + + } else { + // This will create a cluster node cache entry if none exists +- crm_node_t *peer = pcmk__get_peer_full(v->nodeid, v->nodename, NULL, +- CRM_GET_PEER_ANY); ++ crm_node_t *peer = pcmk__get_node(v->nodeid, v->nodename, NULL, ++ CRM_GET_PEER_ANY); + + uuid = peer->uuid; + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 3b2880b..c9e11e6 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -119,7 +119,9 @@ attrd_cpg_dispatch(cpg_handle_t handle, + if (xml == NULL) { + crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); + } else { +- attrd_peer_message(pcmk__get_peer(nodeid, from, NULL), xml); ++ attrd_peer_message(pcmk__get_node(nodeid, from, NULL, ++ CRM_GET_PEER_CLUSTER), ++ xml); + } + + free_xml(xml); +@@ -252,7 +254,8 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + static void + record_peer_nodeid(attribute_value_t *v, const char *host) + { +- crm_node_t *known_peer = pcmk__get_peer(v->nodeid, host, NULL); ++ crm_node_t *known_peer = pcmk__get_node(v->nodeid, host, NULL, ++ CRM_GET_PEER_CLUSTER); + + crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); + if (attrd_election_won()) { +@@ -437,7 +440,8 @@ attrd_peer_clear_failure(pcmk__request_t *request) + GHashTableIter iter; + regex_t regex; + +- crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); ++ crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, ++ CRM_GET_PEER_CLUSTER); + + if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { + crm_info("Ignoring invalid request to clear failures for %s", +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 5536207..c6c1b9a 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -12,7 +12,7 @@ + #include + + #include +-#include // pcmk__get_peer() ++#include // pcmk__get_node() + #include + + #include "pacemaker-attrd.h" +@@ -177,7 +177,8 @@ static xmlNode * + handle_sync_request(pcmk__request_t *request) + { + if (request->peer != NULL) { +- crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); ++ crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, ++ CRM_GET_PEER_CLUSTER); + + attrd_peer_sync(peer, request->xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +@@ -194,7 +195,8 @@ handle_sync_response_request(pcmk__request_t *request) + return handle_unknown_request(request); + } else { + if (request->peer != NULL) { +- crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); ++ crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, ++ CRM_GET_PEER_CLUSTER); + bool peer_won = attrd_check_for_new_writer(peer, request->xml); + + if (!pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) { +@@ -212,7 +214,8 @@ handle_update_request(pcmk__request_t *request) + { + if (request->peer != NULL) { + const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME); +- crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); ++ crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, ++ CRM_GET_PEER_CLUSTER); + + attrd_peer_update(peer, request->xml, host, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c +index 02f3425..b1f3b4b 100644 +--- a/daemons/based/based_callbacks.c ++++ b/daemons/based/based_callbacks.c +@@ -910,6 +910,7 @@ forward_request(xmlNode *request) + const char *originator = crm_element_value(request, F_ORIG); + const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME); + const char *call_id = crm_element_value(request, F_CIB_CALLID); ++ crm_node_t *peer = NULL; + + int log_level = LOG_INFO; + +@@ -928,8 +929,10 @@ forward_request(xmlNode *request) + + crm_xml_add(request, F_CIB_DELEGATED, OUR_NODENAME); + +- send_cluster_message(((host != NULL)? pcmk__get_peer(0, host, NULL) : NULL), +- crm_msg_cib, request, FALSE); ++ if (host != NULL) { ++ peer = pcmk__get_node(0, host, NULL, CRM_GET_PEER_CLUSTER); ++ } ++ send_cluster_message(peer, crm_msg_cib, request, FALSE); + + // Return the request to its original state + xml_remove_prop(request, F_CIB_DELEGATED); +@@ -986,7 +989,8 @@ send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gb + /* send reply via HA to originating node */ + crm_trace("Sending request result to %s only", originator); + crm_xml_add(msg, F_CIB_ISREPLY, originator); +- return send_cluster_message(pcmk__get_peer(0, originator, NULL), ++ return send_cluster_message(pcmk__get_node(0, originator, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_cib, msg, FALSE); + } + +diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c +index 08521e4..ff1a6aa 100644 +--- a/daemons/based/based_messages.c ++++ b/daemons/based/based_messages.c +@@ -118,6 +118,7 @@ void + send_sync_request(const char *host) + { + xmlNode *sync_me = create_xml_node(NULL, "sync-me"); ++ crm_node_t *peer = NULL; + + crm_info("Requesting re-sync from %s", (host? host : "all peers")); + sync_in_progress = 1; +@@ -127,8 +128,10 @@ send_sync_request(const char *host) + crm_xml_add(sync_me, F_CIB_DELEGATED, + stand_alone? "localhost" : crm_cluster->uname); + +- send_cluster_message((host == NULL)? NULL : pcmk__get_peer(0, host, NULL), +- crm_msg_cib, sync_me, FALSE); ++ if (host != NULL) { ++ peer = pcmk__get_node(0, host, NULL, CRM_GET_PEER_CLUSTER); ++ } ++ send_cluster_message(peer, crm_msg_cib, sync_me, FALSE); + free_xml(sync_me); + } + +@@ -418,6 +421,7 @@ sync_our_cib(xmlNode * request, gboolean all) + const char *host = crm_element_value(request, F_ORIG); + const char *op = crm_element_value(request, F_CIB_OPERATION); + ++ crm_node_t *peer = NULL; + xmlNode *replace_request = NULL; + + CRM_CHECK(the_cib != NULL, return -EINVAL); +@@ -444,8 +448,10 @@ sync_our_cib(xmlNode * request, gboolean all) + + add_message_xml(replace_request, F_CIB_CALLDATA, the_cib); + +- if (!send_cluster_message(all? NULL : pcmk__get_peer(0, host, NULL), +- crm_msg_cib, replace_request, FALSE)) { ++ if (!all) { ++ peer = pcmk__get_node(0, host, NULL, CRM_GET_PEER_CLUSTER); ++ } ++ if (!send_cluster_message(peer, crm_msg_cib, replace_request, FALSE)) { + result = -ENOTCONN; + } + free_xml(replace_request); +diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c +index c2953b5..fa1df6f 100644 +--- a/daemons/controld/controld_corosync.c ++++ b/daemons/controld/controld_corosync.c +@@ -49,7 +49,7 @@ crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, + crm_xml_add(xml, F_ORIG, from); + /* crm_xml_add_int(xml, F_SEQ, wrapper->id); Fake? */ + +- peer = pcmk__get_peer(0, from, NULL); ++ peer = pcmk__get_node(0, from, NULL, CRM_GET_PEER_CLUSTER); + if (!pcmk_is_set(peer->processes, crm_proc_cpg)) { + /* If we can still talk to our peer process on that node, + * then it must be part of the corosync membership +diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c +index 6e22067..734064d 100644 +--- a/daemons/controld/controld_election.c ++++ b/daemons/controld/controld_election.c +@@ -265,8 +265,8 @@ do_dc_release(long long action, + crm_info("DC role released"); + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + xmlNode *update = NULL; +- crm_node_t *node = pcmk__get_peer(0, controld_globals.our_nodename, +- NULL); ++ crm_node_t *node = pcmk__get_node(0, controld_globals.our_nodename, ++ NULL, CRM_GET_PEER_CLUSTER); + + pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); + update = create_node_state_update(node, node_update_expected, NULL, +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 45b6b58..df715aa 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -581,7 +581,7 @@ controld_query_executor_state(void) + return NULL; + } + +- peer = pcmk__get_peer_full(0, lrm_state->node_name, NULL, CRM_GET_PEER_ANY); ++ peer = pcmk__get_node(0, lrm_state->node_name, NULL, CRM_GET_PEER_ANY); + CRM_CHECK(peer != NULL, return NULL); + + xml_state = create_node_state_update(peer, +@@ -1752,7 +1752,8 @@ controld_ack_event_directly(const char *to_host, const char *to_sys, + to_sys = CRM_SYSTEM_TENGINE; + } + +- peer = pcmk__get_peer(0, controld_globals.our_nodename, NULL); ++ peer = pcmk__get_node(0, controld_globals.our_nodename, NULL, ++ CRM_GET_PEER_CLUSTER); + update = create_node_state_update(peer, node_update_none, NULL, + __func__); + +diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c +index 6c0ee09..60a7f9f 100644 +--- a/daemons/controld/controld_fencing.c ++++ b/daemons/controld/controld_fencing.c +@@ -222,7 +222,7 @@ send_stonith_update(pcmk__graph_action_t *action, const char *target, + * Try getting any existing node cache entry also by node uuid in case it + * doesn't have an uname yet. + */ +- peer = pcmk__get_peer_full(0, target, uuid, CRM_GET_PEER_ANY); ++ peer = pcmk__get_node(0, target, uuid, CRM_GET_PEER_ANY); + + CRM_CHECK(peer != NULL, return); + +@@ -374,7 +374,8 @@ execute_stonith_cleanup(void) + + for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { + char *target = iter->data; +- crm_node_t *target_node = pcmk__get_peer(0, target, NULL); ++ crm_node_t *target_node = pcmk__get_node(0, target, NULL, ++ CRM_GET_PEER_CLUSTER); + const char *uuid = crm_peer_uuid(target_node); + + crm_notice("Marking %s, target of a previous stonith action, as clean", target); +diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c +index 2b5267d..6f20ef2 100644 +--- a/daemons/controld/controld_join_client.c ++++ b/daemons/controld/controld_join_client.c +@@ -35,7 +35,8 @@ update_dc_expected(const xmlNode *msg) + { + if ((controld_globals.dc_name != NULL) + && pcmk__xe_attr_is_true(msg, F_CRM_DC_LEAVING)) { +- crm_node_t *dc_node = pcmk__get_peer(0, controld_globals.dc_name, NULL); ++ crm_node_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL, ++ CRM_GET_PEER_CLUSTER); + + pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN); + } +@@ -177,7 +178,8 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void * + + crm_xml_add(reply, F_CRM_JOIN_ID, join_id); + crm_xml_add(reply, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); +- send_cluster_message(pcmk__get_peer(0, controld_globals.dc_name, NULL), ++ send_cluster_message(pcmk__get_node(0, controld_globals.dc_name, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + } +@@ -333,7 +335,8 @@ do_cl_join_finalize_respond(long long action, + } + } + +- send_cluster_message(pcmk__get_peer(0, controld_globals.dc_name, NULL), ++ send_cluster_message(pcmk__get_node(0, controld_globals.dc_name, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + +diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c +index 45e1eba..064649f 100644 +--- a/daemons/controld/controld_join_dc.c ++++ b/daemons/controld/controld_join_dc.c +@@ -318,7 +318,7 @@ do_dc_join_offer_one(long long action, + crm_err("Can't make join-%d offer to unknown node", current_join_id); + return; + } +- member = pcmk__get_peer(0, join_to, NULL); ++ member = pcmk__get_node(0, join_to, NULL, CRM_GET_PEER_CLUSTER); + + /* It is possible that a node will have been sick or starting up when the + * original offer was made. However, it will either re-announce itself in +@@ -332,7 +332,8 @@ do_dc_join_offer_one(long long action, + * well, to ensure the correct value for max_generation_from. + */ + if (strcasecmp(join_to, controld_globals.our_nodename) != 0) { +- member = pcmk__get_peer(0, controld_globals.our_nodename, NULL); ++ member = pcmk__get_node(0, controld_globals.our_nodename, NULL, ++ CRM_GET_PEER_CLUSTER); + join_make_offer(NULL, member, NULL); + } + +@@ -396,7 +397,7 @@ do_dc_join_filter_offer(long long action, + crm_err("Ignoring invalid join request without node name"); + return; + } +- join_node = pcmk__get_peer(0, join_from, NULL); ++ join_node = pcmk__get_node(0, join_from, NULL, CRM_GET_PEER_CLUSTER); + + crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); + if (join_id != current_join_id) { +@@ -732,7 +733,7 @@ do_dc_join_ack(long long action, + goto done; + } + +- peer = pcmk__get_peer(0, join_from, NULL); ++ peer = pcmk__get_node(0, join_from, NULL, CRM_GET_PEER_CLUSTER); + if (peer->join != crm_join_finalized) { + crm_info("Ignoring out-of-sequence join-%d confirmation from %s " + "(currently %s not %s)", +@@ -866,7 +867,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) + return; + } + +- join_node = pcmk__get_peer(0, join_to, NULL); ++ join_node = pcmk__get_node(0, join_to, NULL, CRM_GET_PEER_CLUSTER); + if (!crm_is_peer_active(join_node)) { + /* + * NACK'ing nodes that the membership layer doesn't know about yet +diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c +index 8d3cef7..71f5680 100644 +--- a/daemons/controld/controld_messages.c ++++ b/daemons/controld/controld_messages.c +@@ -458,7 +458,8 @@ relay_message(xmlNode * msg, gboolean originated_locally) + ref, pcmk__s(host_to, "broadcast")); + crm_log_xml_trace(msg, "relayed"); + if (!broadcast) { +- node_to = pcmk__get_peer(0, host_to, NULL); ++ node_to = pcmk__get_node(0, host_to, NULL, ++ CRM_GET_PEER_CLUSTER); + } + send_cluster_message(node_to, dest, msg, TRUE); + return TRUE; +diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c +index a9c398d..9c4bb58 100644 +--- a/daemons/controld/controld_remote_ra.c ++++ b/daemons/controld/controld_remote_ra.c +@@ -206,7 +206,7 @@ should_purge_attributes(crm_node_t *node) + /* Get the node that was hosting the remote connection resource from the + * peer cache. That's the one we really care about here. + */ +- conn_node = pcmk__get_peer(0, node->conn_host, NULL); ++ conn_node = pcmk__get_node(0, node->conn_host, NULL, CRM_GET_PEER_CLUSTER); + if (conn_node == NULL) { + return purge; + } +diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c +index e76174b..01ba4a0 100644 +--- a/daemons/controld/controld_te_actions.c ++++ b/daemons/controld/controld_te_actions.c +@@ -158,7 +158,8 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + return pcmk_rc_ok; + + } else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { +- crm_node_t *peer = pcmk__get_peer(0, router_node, NULL); ++ crm_node_t *peer = pcmk__get_node(0, router_node, NULL, ++ CRM_GET_PEER_CLUSTER); + + pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); + } +@@ -170,7 +171,8 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + controld_globals.te_uuid); + crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); + +- rc = send_cluster_message(pcmk__get_peer(0, router_node, NULL), ++ rc = send_cluster_message(pcmk__get_node(0, router_node, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_crmd, cmd, TRUE); + free(counter); + free_xml(cmd); +@@ -422,7 +424,8 @@ execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + I_NULL, &msg); + + } else { +- rc = send_cluster_message(pcmk__get_peer(0, router_node, NULL), ++ rc = send_cluster_message(pcmk__get_node(0, router_node, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_lrmd, cmd, TRUE); + } + +diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c +index c8cceed..84bef5b 100644 +--- a/daemons/controld/controld_te_events.c ++++ b/daemons/controld/controld_te_events.c +@@ -119,7 +119,8 @@ fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node) + target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + if (router) { +- crm_node_t *node = pcmk__get_peer(0, router, NULL); ++ crm_node_t *node = pcmk__get_node(0, router, NULL, ++ CRM_GET_PEER_CLUSTER); + + if (node) { + router_uuid = node->uuid; +diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c +index 55790c0..0e92416 100644 +--- a/daemons/controld/controld_utils.c ++++ b/daemons/controld/controld_utils.c +@@ -734,7 +734,8 @@ update_dc(xmlNode * msg) + /* do nothing */ + + } else if (controld_globals.dc_name != NULL) { +- crm_node_t *dc_node = pcmk__get_peer(0, controld_globals.dc_name, NULL); ++ crm_node_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL, ++ CRM_GET_PEER_CLUSTER); + + crm_info("Set DC to %s (%s)", + controld_globals.dc_name, +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 28f08dd..c519607 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -645,7 +645,8 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) + } + + if (device->include_nodeid && (cmd->target != NULL)) { +- crm_node_t *node = pcmk__get_peer(0, cmd->target, NULL); ++ crm_node_t *node = pcmk__get_node(0, cmd->target, NULL, ++ CRM_GET_PEER_CLUSTER); + + cmd->target_nodeid = node->id; + } +@@ -2402,7 +2403,8 @@ stonith_send_reply(const xmlNode *reply, int call_options, + if (remote_peer == NULL) { + do_local_reply(reply, client, call_options); + } else { +- send_cluster_message(pcmk__get_peer(0, remote_peer, NULL), ++ send_cluster_message(pcmk__get_node(0, remote_peer, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_stonith_ng, reply, FALSE); + } + } +@@ -3371,7 +3373,8 @@ handle_fence_request(pcmk__request_t *request) + crm_xml_add(request->xml, F_STONITH_CLIENTID, + request->ipc_client->id); + crm_xml_add(request->xml, F_STONITH_REMOTE_OP_ID, op->id); +- send_cluster_message(pcmk__get_peer(0, alternate_host, NULL), ++ send_cluster_message(pcmk__get_node(0, alternate_host, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_stonith_ng, request->xml, FALSE); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, + NULL); +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 3c176c8..96b518a 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1030,7 +1030,8 @@ merge_duplicates(remote_fencing_op_t *op) + op->id, other->id, other->target); + continue; + } +- if (!fencing_peer_active(pcmk__get_peer(0, other->originator, NULL))) { ++ if (!fencing_peer_active(pcmk__get_node(0, other->originator, NULL, ++ CRM_GET_PEER_CLUSTER))) { + crm_notice("Failing action '%s' targeting %s originating from " + "client %s@%s: Originator is dead " CRM_XS " id=%.8s", + other->action, other->target, other->client_name, +@@ -1663,7 +1664,8 @@ report_timeout_period(remote_fencing_op_t * op, int op_timeout) + crm_xml_add(update, F_STONITH_CALLID, call_id); + crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); + +- send_cluster_message(pcmk__get_peer(0, client_node, NULL), ++ send_cluster_message(pcmk__get_node(0, client_node, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_stonith_ng, update, FALSE); + + free_xml(update); +@@ -1917,7 +1919,8 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) + op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); + } + +- send_cluster_message(pcmk__get_peer(0, peer->host, NULL), ++ send_cluster_message(pcmk__get_node(0, peer->host, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_stonith_ng, remote_op, FALSE); + peer->tried = TRUE; + free_xml(remote_op); +diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h +index c71069b..bea4086 100644 +--- a/include/crm/cluster/internal.h ++++ b/include/crm/cluster/internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -132,9 +132,7 @@ void pcmk__refresh_node_caches_from_cib(xmlNode *cib); + crm_node_t *pcmk__search_known_node_cache(unsigned int id, const char *uname, + uint32_t flags); + +-crm_node_t *pcmk__get_peer(unsigned int id, const char *uname, +- const char *uuid); +-crm_node_t *pcmk__get_peer_full(unsigned int id, const char *uname, +- const char *uuid, int flags); ++crm_node_t *pcmk__get_node(unsigned int id, const char *uname, ++ const char *uuid, uint32_t flags); + + #endif // PCMK__CRM_CLUSTER_INTERNAL__H +diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c +index 34a31fb..47a3321 100644 +--- a/lib/cluster/corosync.c ++++ b/lib/cluster/corosync.c +@@ -309,12 +309,12 @@ quorum_notification_cb(quorum_handle_t handle, uint32_t quorate, + crm_debug("Member[%d] %u ", i, id); + + /* Get this node's peer cache entry (adding one if not already there) */ +- node = pcmk__get_peer(id, NULL, NULL); ++ node = pcmk__get_node(id, NULL, NULL, CRM_GET_PEER_CLUSTER); + if (node->uname == NULL) { + char *name = pcmk__corosync_name(0, id); + + crm_info("Obtaining name for new node %u", id); +- node = pcmk__get_peer(id, name, NULL); ++ node = pcmk__get_node(id, name, NULL, CRM_GET_PEER_CLUSTER); + free(name); + } + +@@ -480,7 +480,8 @@ pcmk__corosync_connect(crm_cluster_t *cluster) + } + + // Ensure local node always exists in peer cache +- peer = pcmk__get_peer(cluster->nodeid, cluster->uname, NULL); ++ peer = pcmk__get_node(cluster->nodeid, cluster->uname, NULL, ++ CRM_GET_PEER_CLUSTER); + cluster->uuid = pcmk__corosync_uuid(peer); + + return TRUE; +@@ -640,7 +641,7 @@ pcmk__corosync_add_nodes(xmlNode *xml_parent) + + if (nodeid > 0 || name != NULL) { + crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); +- pcmk__get_peer(nodeid, name, NULL); ++ pcmk__get_node(nodeid, name, NULL, CRM_GET_PEER_CLUSTER); + } + + if (nodeid > 0 && name != NULL) { +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index 778368f..bc251da 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -465,7 +465,8 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + + msg->sender.id = nodeid; + if (msg->sender.size == 0) { +- crm_node_t *peer = pcmk__get_peer(nodeid, NULL, NULL); ++ crm_node_t *peer = pcmk__get_node(nodeid, NULL, NULL, ++ CRM_GET_PEER_CLUSTER); + + if (peer == NULL) { + crm_err("Peer with nodeid=%u is unknown", nodeid); +@@ -526,7 +527,8 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + } + + // Is this necessary? +- pcmk__get_peer(msg->sender.id, msg->sender.uname, NULL); ++ pcmk__get_node(msg->sender.id, msg->sender.uname, NULL, ++ CRM_GET_PEER_CLUSTER); + + crm_trace("Payload: %.200s", data); + return data; +@@ -720,7 +722,8 @@ pcmk_cpg_membership(cpg_handle_t handle, + } + + for (i = 0; i < member_list_entries; i++) { +- crm_node_t *peer = pcmk__get_peer(member_list[i].nodeid, NULL, NULL); ++ crm_node_t *peer = pcmk__get_node(member_list[i].nodeid, NULL, NULL, ++ CRM_GET_PEER_CLUSTER); + + if (member_list[i].nodeid == local_nodeid + && member_list[i].pid != getpid()) { +@@ -873,7 +876,7 @@ cluster_connect_cpg(crm_cluster_t *cluster) + return FALSE; + } + +- peer = pcmk__get_peer(id, NULL, NULL); ++ peer = pcmk__get_node(id, NULL, NULL, CRM_GET_PEER_CLUSTER); + crm_update_peer_proc(__func__, peer, crm_proc_cpg, ONLINESTATUS); + return TRUE; + } +diff --git a/lib/cluster/election.c b/lib/cluster/election.c +index 31867f2..576c0aa 100644 +--- a/lib/cluster/election.c ++++ b/lib/cluster/election.c +@@ -298,7 +298,7 @@ election_vote(election_t *e) + return; + } + +- our_node = pcmk__get_peer(0, e->uname, NULL); ++ our_node = pcmk__get_node(0, e->uname, NULL, CRM_GET_PEER_CLUSTER); + if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) { + crm_trace("Cannot vote in %s yet: local node not connected to cluster", + e->name); +@@ -547,8 +547,8 @@ election_count_vote(election_t *e, const xmlNode *message, bool can_win) + return election_error; + } + +- your_node = pcmk__get_peer(0, vote.from, NULL); +- our_node = pcmk__get_peer(0, e->uname, NULL); ++ your_node = pcmk__get_node(0, vote.from, NULL, CRM_GET_PEER_CLUSTER); ++ our_node = pcmk__get_node(0, e->uname, NULL, CRM_GET_PEER_CLUSTER); + we_are_owner = (our_node != NULL) + && pcmk__str_eq(our_node->uuid, vote.election_owner, + pcmk__str_none); +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index 41e0fa3..4c89a7c 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -562,37 +562,6 @@ pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) + return node; + } + +-/*! +- * \brief Get a node cache entry (cluster or Pacemaker Remote) +- * +- * \param[in] id If not 0, cluster node ID to search for +- * \param[in] uname If not NULL, node name to search for +- * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster +- * node ID to search for +- * \param[in] flags Bitmask of enum crm_get_peer_flags +- * +- * \return (Possibly newly created) node cache entry +- */ +-crm_node_t * +-pcmk__get_peer_full(unsigned int id, const char *uname, const char *uuid, +- int flags) +-{ +- crm_node_t *node = NULL; +- +- CRM_ASSERT(id > 0 || uname != NULL); +- +- crm_peer_init(); +- +- if (pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { +- node = g_hash_table_lookup(crm_remote_peer_cache, uname); +- } +- +- if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { +- node = pcmk__get_peer(id, uname, uuid); +- } +- return node; +-} +- + /*! + * \internal + * \brief Purge a node from cache (both cluster and Pacemaker Remote) +@@ -794,12 +763,14 @@ remove_conflicting_peer(crm_node_t *node) + * \param[in] uname If not NULL, node name to search for + * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster + * node ID to search for ++ * \param[in] flags Group of enum crm_get_peer_flags + * + * \return (Possibly newly created) cluster node cache entry + */ + /* coverity[-alloc] Memory is referenced in one or both hashtables */ + crm_node_t * +-pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) ++pcmk__get_node(unsigned int id, const char *uname, const char *uuid, ++ uint32_t flags) + { + crm_node_t *node = NULL; + char *uname_lookup = NULL; +@@ -808,6 +779,18 @@ pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) + + crm_peer_init(); + ++ // Check the Pacemaker Remote node cache first ++ if (pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { ++ node = g_hash_table_lookup(crm_remote_peer_cache, uname); ++ if (node != NULL) { ++ return node; ++ } ++ } ++ ++ if (!pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { ++ return NULL; ++ } ++ + node = pcmk__search_cluster_node_cache(id, uname, uuid); + + /* if uname wasn't provided, and find_peer did not turn up a uname based on id. +@@ -826,7 +809,6 @@ pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) + } + } + +- + if (node == NULL) { + char *uniqueid = crm_generate_uuid(); + +@@ -1417,13 +1399,13 @@ crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) + crm_node_t * + crm_get_peer(unsigned int id, const char *uname) + { +- return pcmk__get_peer(id, uname, NULL); ++ return pcmk__get_node(id, uname, NULL, CRM_GET_PEER_CLUSTER); + } + + crm_node_t * + crm_get_peer_full(unsigned int id, const char *uname, int flags) + { +- return pcmk__get_peer_full(id, uname, NULL, flags); ++ return pcmk__get_node(id, uname, NULL, flags); + } + + // LCOV_EXCL_STOP +-- +2.31.1 + +From 8a33a98c48475790a033f59aeb3e026f2bb68e4f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 19 Dec 2023 16:18:45 -0600 +Subject: [PATCH 5/9] API: libcrmcluster: deprecate enum crm_get_peer_flags and + all its values + +Replace it internally with a new enum pcmk__node_search_flags +--- + daemons/attrd/attrd_cib.c | 2 +- + daemons/attrd/attrd_corosync.c | 6 +++--- + daemons/attrd/attrd_messages.c | 6 +++--- + daemons/based/based_callbacks.c | 4 ++-- + daemons/based/based_messages.c | 4 ++-- + daemons/controld/controld_corosync.c | 2 +- + daemons/controld/controld_election.c | 2 +- + daemons/controld/controld_execd.c | 4 ++-- + daemons/controld/controld_fencing.c | 6 +++--- + daemons/controld/controld_join_client.c | 6 +++--- + daemons/controld/controld_join_dc.c | 10 +++++----- + daemons/controld/controld_messages.c | 4 ++-- + daemons/controld/controld_remote_ra.c | 3 ++- + daemons/controld/controld_te_actions.c | 6 +++--- + daemons/controld/controld_te_events.c | 2 +- + daemons/controld/controld_utils.c | 2 +- + daemons/fenced/fenced_commands.c | 9 +++++---- + daemons/fenced/fenced_history.c | 3 ++- + daemons/fenced/fenced_remote.c | 9 +++++---- + include/crm/cluster.h | 7 ------- + include/crm/cluster/compat.h | 7 +++++++ + include/crm/cluster/internal.h | 13 +++++++++++++ + lib/cluster/corosync.c | 8 ++++---- + lib/cluster/cpg.c | 8 ++++---- + lib/cluster/election.c | 6 +++--- + lib/cluster/membership.c | 18 +++++++++--------- + 26 files changed, 87 insertions(+), 70 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index 7018a32..bdc0a10 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -568,7 +568,7 @@ write_attribute(attribute_t *a, bool ignore_delay) + } else { + // This will create a cluster node cache entry if none exists + crm_node_t *peer = pcmk__get_node(v->nodeid, v->nodename, NULL, +- CRM_GET_PEER_ANY); ++ pcmk__node_search_any); + + uuid = peer->uuid; + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index c9e11e6..158d82f 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -120,7 +120,7 @@ attrd_cpg_dispatch(cpg_handle_t handle, + crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); + } else { + attrd_peer_message(pcmk__get_node(nodeid, from, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + xml); + } + +@@ -255,7 +255,7 @@ static void + record_peer_nodeid(attribute_value_t *v, const char *host) + { + crm_node_t *known_peer = pcmk__get_node(v->nodeid, host, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); + if (attrd_election_won()) { +@@ -441,7 +441,7 @@ attrd_peer_clear_failure(pcmk__request_t *request) + regex_t regex; + + crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { + crm_info("Ignoring invalid request to clear failures for %s", +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index c6c1b9a..5525d4b 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -178,7 +178,7 @@ handle_sync_request(pcmk__request_t *request) + { + if (request->peer != NULL) { + crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + attrd_peer_sync(peer, request->xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +@@ -196,7 +196,7 @@ handle_sync_response_request(pcmk__request_t *request) + } else { + if (request->peer != NULL) { + crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + bool peer_won = attrd_check_for_new_writer(peer, request->xml); + + if (!pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) { +@@ -215,7 +215,7 @@ handle_update_request(pcmk__request_t *request) + if (request->peer != NULL) { + const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME); + crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + attrd_peer_update(peer, request->xml, host, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c +index b1f3b4b..5f3dc62 100644 +--- a/daemons/based/based_callbacks.c ++++ b/daemons/based/based_callbacks.c +@@ -930,7 +930,7 @@ forward_request(xmlNode *request) + crm_xml_add(request, F_CIB_DELEGATED, OUR_NODENAME); + + if (host != NULL) { +- peer = pcmk__get_node(0, host, NULL, CRM_GET_PEER_CLUSTER); ++ peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster); + } + send_cluster_message(peer, crm_msg_cib, request, FALSE); + +@@ -990,7 +990,7 @@ send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gb + crm_trace("Sending request result to %s only", originator); + crm_xml_add(msg, F_CIB_ISREPLY, originator); + return send_cluster_message(pcmk__get_node(0, originator, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_cib, msg, FALSE); + } + +diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c +index ff1a6aa..7f503b2 100644 +--- a/daemons/based/based_messages.c ++++ b/daemons/based/based_messages.c +@@ -129,7 +129,7 @@ send_sync_request(const char *host) + stand_alone? "localhost" : crm_cluster->uname); + + if (host != NULL) { +- peer = pcmk__get_node(0, host, NULL, CRM_GET_PEER_CLUSTER); ++ peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster); + } + send_cluster_message(peer, crm_msg_cib, sync_me, FALSE); + free_xml(sync_me); +@@ -449,7 +449,7 @@ sync_our_cib(xmlNode * request, gboolean all) + add_message_xml(replace_request, F_CIB_CALLDATA, the_cib); + + if (!all) { +- peer = pcmk__get_node(0, host, NULL, CRM_GET_PEER_CLUSTER); ++ peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster); + } + if (!send_cluster_message(peer, crm_msg_cib, replace_request, FALSE)) { + result = -ENOTCONN; +diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c +index fa1df6f..0f3ea32 100644 +--- a/daemons/controld/controld_corosync.c ++++ b/daemons/controld/controld_corosync.c +@@ -49,7 +49,7 @@ crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, + crm_xml_add(xml, F_ORIG, from); + /* crm_xml_add_int(xml, F_SEQ, wrapper->id); Fake? */ + +- peer = pcmk__get_node(0, from, NULL, CRM_GET_PEER_CLUSTER); ++ peer = pcmk__get_node(0, from, NULL, pcmk__node_search_cluster); + if (!pcmk_is_set(peer->processes, crm_proc_cpg)) { + /* If we can still talk to our peer process on that node, + * then it must be part of the corosync membership +diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c +index 734064d..adad168 100644 +--- a/daemons/controld/controld_election.c ++++ b/daemons/controld/controld_election.c +@@ -266,7 +266,7 @@ do_dc_release(long long action, + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + xmlNode *update = NULL; + crm_node_t *node = pcmk__get_node(0, controld_globals.our_nodename, +- NULL, CRM_GET_PEER_CLUSTER); ++ NULL, pcmk__node_search_cluster); + + pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); + update = create_node_state_update(node, node_update_expected, NULL, +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index df715aa..fe2313c 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -581,7 +581,7 @@ controld_query_executor_state(void) + return NULL; + } + +- peer = pcmk__get_node(0, lrm_state->node_name, NULL, CRM_GET_PEER_ANY); ++ peer = pcmk__get_node(0, lrm_state->node_name, NULL, pcmk__node_search_any); + CRM_CHECK(peer != NULL, return NULL); + + xml_state = create_node_state_update(peer, +@@ -1753,7 +1753,7 @@ controld_ack_event_directly(const char *to_host, const char *to_sys, + } + + peer = pcmk__get_node(0, controld_globals.our_nodename, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + update = create_node_state_update(peer, node_update_none, NULL, + __func__); + +diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c +index 60a7f9f..79a52be 100644 +--- a/daemons/controld/controld_fencing.c ++++ b/daemons/controld/controld_fencing.c +@@ -222,7 +222,7 @@ send_stonith_update(pcmk__graph_action_t *action, const char *target, + * Try getting any existing node cache entry also by node uuid in case it + * doesn't have an uname yet. + */ +- peer = pcmk__get_node(0, target, uuid, CRM_GET_PEER_ANY); ++ peer = pcmk__get_node(0, target, uuid, pcmk__node_search_any); + + CRM_CHECK(peer != NULL, return); + +@@ -375,7 +375,7 @@ execute_stonith_cleanup(void) + for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { + char *target = iter->data; + crm_node_t *target_node = pcmk__get_node(0, target, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + const char *uuid = crm_peer_uuid(target_node); + + crm_notice("Marking %s, target of a previous stonith action, as clean", target); +@@ -582,7 +582,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event) + + if (succeeded) { + crm_node_t *peer = pcmk__search_known_node_cache(0, event->target, +- CRM_GET_PEER_ANY); ++ pcmk__node_search_any); + const char *uuid = NULL; + + if (peer == NULL) { +diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c +index 6f20ef2..101c73d 100644 +--- a/daemons/controld/controld_join_client.c ++++ b/daemons/controld/controld_join_client.c +@@ -36,7 +36,7 @@ update_dc_expected(const xmlNode *msg) + if ((controld_globals.dc_name != NULL) + && pcmk__xe_attr_is_true(msg, F_CRM_DC_LEAVING)) { + crm_node_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN); + } +@@ -179,7 +179,7 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void * + crm_xml_add(reply, F_CRM_JOIN_ID, join_id); + crm_xml_add(reply, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + send_cluster_message(pcmk__get_node(0, controld_globals.dc_name, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + } +@@ -336,7 +336,7 @@ do_cl_join_finalize_respond(long long action, + } + + send_cluster_message(pcmk__get_node(0, controld_globals.dc_name, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + +diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c +index 064649f..e9fc698 100644 +--- a/daemons/controld/controld_join_dc.c ++++ b/daemons/controld/controld_join_dc.c +@@ -318,7 +318,7 @@ do_dc_join_offer_one(long long action, + crm_err("Can't make join-%d offer to unknown node", current_join_id); + return; + } +- member = pcmk__get_node(0, join_to, NULL, CRM_GET_PEER_CLUSTER); ++ member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster); + + /* It is possible that a node will have been sick or starting up when the + * original offer was made. However, it will either re-announce itself in +@@ -333,7 +333,7 @@ do_dc_join_offer_one(long long action, + */ + if (strcasecmp(join_to, controld_globals.our_nodename) != 0) { + member = pcmk__get_node(0, controld_globals.our_nodename, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + join_make_offer(NULL, member, NULL); + } + +@@ -397,7 +397,7 @@ do_dc_join_filter_offer(long long action, + crm_err("Ignoring invalid join request without node name"); + return; + } +- join_node = pcmk__get_node(0, join_from, NULL, CRM_GET_PEER_CLUSTER); ++ join_node = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster); + + crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); + if (join_id != current_join_id) { +@@ -733,7 +733,7 @@ do_dc_join_ack(long long action, + goto done; + } + +- peer = pcmk__get_node(0, join_from, NULL, CRM_GET_PEER_CLUSTER); ++ peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster); + if (peer->join != crm_join_finalized) { + crm_info("Ignoring out-of-sequence join-%d confirmation from %s " + "(currently %s not %s)", +@@ -867,7 +867,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) + return; + } + +- join_node = pcmk__get_node(0, join_to, NULL, CRM_GET_PEER_CLUSTER); ++ join_node = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster); + if (!crm_is_peer_active(join_node)) { + /* + * NACK'ing nodes that the membership layer doesn't know about yet +diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c +index 71f5680..999dd13 100644 +--- a/daemons/controld/controld_messages.c ++++ b/daemons/controld/controld_messages.c +@@ -459,7 +459,7 @@ relay_message(xmlNode * msg, gboolean originated_locally) + crm_log_xml_trace(msg, "relayed"); + if (!broadcast) { + node_to = pcmk__get_node(0, host_to, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + } + send_cluster_message(node_to, dest, msg, TRUE); + return TRUE; +@@ -904,7 +904,7 @@ handle_node_info_request(const xmlNode *msg) + value = controld_globals.our_nodename; + } + +- node = pcmk__search_node_caches(node_id, value, CRM_GET_PEER_ANY); ++ node = pcmk__search_node_caches(node_id, value, pcmk__node_search_any); + if (node) { + crm_xml_add(reply_data, XML_ATTR_ID, node->uuid); + crm_xml_add(reply_data, XML_ATTR_UNAME, node->uname); +diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c +index 9c4bb58..662643c 100644 +--- a/daemons/controld/controld_remote_ra.c ++++ b/daemons/controld/controld_remote_ra.c +@@ -206,7 +206,8 @@ should_purge_attributes(crm_node_t *node) + /* Get the node that was hosting the remote connection resource from the + * peer cache. That's the one we really care about here. + */ +- conn_node = pcmk__get_node(0, node->conn_host, NULL, CRM_GET_PEER_CLUSTER); ++ conn_node = pcmk__get_node(0, node->conn_host, NULL, ++ pcmk__node_search_cluster); + if (conn_node == NULL) { + return purge; + } +diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c +index 01ba4a0..fbd9955 100644 +--- a/daemons/controld/controld_te_actions.c ++++ b/daemons/controld/controld_te_actions.c +@@ -159,7 +159,7 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + + } else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { + crm_node_t *peer = pcmk__get_node(0, router_node, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); + } +@@ -172,7 +172,7 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); + + rc = send_cluster_message(pcmk__get_node(0, router_node, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_crmd, cmd, TRUE); + free(counter); + free_xml(cmd); +@@ -425,7 +425,7 @@ execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + + } else { + rc = send_cluster_message(pcmk__get_node(0, router_node, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_lrmd, cmd, TRUE); + } + +diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c +index 84bef5b..a54304b 100644 +--- a/daemons/controld/controld_te_events.c ++++ b/daemons/controld/controld_te_events.c +@@ -120,7 +120,7 @@ fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node) + router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + if (router) { + crm_node_t *node = pcmk__get_node(0, router, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + if (node) { + router_uuid = node->uuid; +diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c +index 0e92416..1143e88 100644 +--- a/daemons/controld/controld_utils.c ++++ b/daemons/controld/controld_utils.c +@@ -735,7 +735,7 @@ update_dc(xmlNode * msg) + + } else if (controld_globals.dc_name != NULL) { + crm_node_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + crm_info("Set DC to %s (%s)", + controld_globals.dc_name, +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index c519607..d2a556f 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -646,7 +646,7 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) + + if (device->include_nodeid && (cmd->target != NULL)) { + crm_node_t *node = pcmk__get_node(0, cmd->target, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + cmd->target_nodeid = node->id; + } +@@ -2404,7 +2404,7 @@ stonith_send_reply(const xmlNode *reply, int call_options, + do_local_reply(reply, client, call_options); + } else { + send_cluster_message(pcmk__get_node(0, remote_peer, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_stonith_ng, reply, FALSE); + } + } +@@ -2920,7 +2920,8 @@ fence_locally(xmlNode *msg, pcmk__action_result_t *result) + crm_node_t *node = NULL; + + pcmk__scan_min_int(host, &nodeid, 0); +- node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); ++ node = pcmk__search_known_node_cache(nodeid, NULL, ++ pcmk__node_search_any); + if (node != NULL) { + host = node->uname; + } +@@ -3374,7 +3375,7 @@ handle_fence_request(pcmk__request_t *request) + request->ipc_client->id); + crm_xml_add(request->xml, F_STONITH_REMOTE_OP_ID, op->id); + send_cluster_message(pcmk__get_node(0, alternate_host, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_stonith_ng, request->xml, FALSE); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, + NULL); +diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c +index a766477..4fa2215 100644 +--- a/daemons/fenced/fenced_history.c ++++ b/daemons/fenced/fenced_history.c +@@ -469,7 +469,8 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + crm_node_t *node; + + pcmk__scan_min_int(target, &nodeid, 0); +- node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); ++ node = pcmk__search_known_node_cache(nodeid, NULL, ++ pcmk__node_search_any); + if (node) { + target = node->uname; + } +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 96b518a..482efb9 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1031,7 +1031,7 @@ merge_duplicates(remote_fencing_op_t *op) + continue; + } + if (!fencing_peer_active(pcmk__get_node(0, other->originator, NULL, +- CRM_GET_PEER_CLUSTER))) { ++ pcmk__node_search_cluster))) { + crm_notice("Failing action '%s' targeting %s originating from " + "client %s@%s: Originator is dead " CRM_XS " id=%.8s", + other->action, other->target, other->client_name, +@@ -1221,7 +1221,8 @@ create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) + crm_node_t *node; + + pcmk__scan_min_int(op->target, &nodeid, 0); +- node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); ++ node = pcmk__search_known_node_cache(nodeid, NULL, ++ pcmk__node_search_any); + + /* Ensure the conversion only happens once */ + stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid); +@@ -1665,7 +1666,7 @@ report_timeout_period(remote_fencing_op_t * op, int op_timeout) + crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); + + send_cluster_message(pcmk__get_node(0, client_node, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_stonith_ng, update, FALSE); + + free_xml(update); +@@ -1920,7 +1921,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) + } + + send_cluster_message(pcmk__get_node(0, peer->host, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_stonith_ng, remote_op, FALSE); + peer->tried = TRUE; + free_xml(remote_op); +diff --git a/include/crm/cluster.h b/include/crm/cluster.h +index 302b807..decb8e8 100644 +--- a/include/crm/cluster.h ++++ b/include/crm/cluster.h +@@ -128,13 +128,6 @@ enum crm_ais_msg_types { + crm_msg_stonith_ng = 9, + }; + +-/* used with crm_get_peer_full */ +-enum crm_get_peer_flags { +- CRM_GET_PEER_CLUSTER = 0x0001, +- CRM_GET_PEER_REMOTE = 0x0002, +- CRM_GET_PEER_ANY = CRM_GET_PEER_CLUSTER|CRM_GET_PEER_REMOTE, +-}; +- + gboolean send_cluster_message(const crm_node_t *node, + enum crm_ais_msg_types service, + const xmlNode *data, gboolean ordered); +diff --git a/include/crm/cluster/compat.h b/include/crm/cluster/compat.h +index e853fd8..14c4504 100644 +--- a/include/crm/cluster/compat.h ++++ b/include/crm/cluster/compat.h +@@ -26,6 +26,13 @@ extern "C" { + * release. + */ + ++// \deprecated Do not use ++enum crm_get_peer_flags { ++ CRM_GET_PEER_CLUSTER = 0x0001, ++ CRM_GET_PEER_REMOTE = 0x0002, ++ CRM_GET_PEER_ANY = CRM_GET_PEER_CLUSTER|CRM_GET_PEER_REMOTE, ++}; ++ + // \deprecated Do not use Pacemaker for cluster node cacheing + crm_node_t *crm_get_peer(unsigned int id, const char *uname); + +diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h +index bea4086..9513254 100644 +--- a/include/crm/cluster/internal.h ++++ b/include/crm/cluster/internal.h +@@ -30,6 +30,19 @@ enum crm_proc_flag { + }; + /* *INDENT-ON* */ + ++// Used with node cache search functions ++enum pcmk__node_search_flags { ++ pcmk__node_search_none = 0, ++ pcmk__node_search_cluster = (1 << 0), // Search for cluster nodes ++ pcmk__node_search_remote = (1 << 1), // Search for remote nodes ++ pcmk__node_search_any = pcmk__node_search_cluster ++ |pcmk__node_search_remote, ++ ++ /* @COMPAT The values before this must stay the same until we can drop ++ * support for enum crm_get_peer_flags ++ */ ++}; ++ + /*! + * \internal + * \brief Return the process bit corresponding to the current cluster stack +diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c +index 47a3321..374250f 100644 +--- a/lib/cluster/corosync.c ++++ b/lib/cluster/corosync.c +@@ -309,12 +309,12 @@ quorum_notification_cb(quorum_handle_t handle, uint32_t quorate, + crm_debug("Member[%d] %u ", i, id); + + /* Get this node's peer cache entry (adding one if not already there) */ +- node = pcmk__get_node(id, NULL, NULL, CRM_GET_PEER_CLUSTER); ++ node = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster); + if (node->uname == NULL) { + char *name = pcmk__corosync_name(0, id); + + crm_info("Obtaining name for new node %u", id); +- node = pcmk__get_node(id, name, NULL, CRM_GET_PEER_CLUSTER); ++ node = pcmk__get_node(id, name, NULL, pcmk__node_search_cluster); + free(name); + } + +@@ -481,7 +481,7 @@ pcmk__corosync_connect(crm_cluster_t *cluster) + + // Ensure local node always exists in peer cache + peer = pcmk__get_node(cluster->nodeid, cluster->uname, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + cluster->uuid = pcmk__corosync_uuid(peer); + + return TRUE; +@@ -641,7 +641,7 @@ pcmk__corosync_add_nodes(xmlNode *xml_parent) + + if (nodeid > 0 || name != NULL) { + crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); +- pcmk__get_node(nodeid, name, NULL, CRM_GET_PEER_CLUSTER); ++ pcmk__get_node(nodeid, name, NULL, pcmk__node_search_cluster); + } + + if (nodeid > 0 && name != NULL) { +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index bc251da..b5f2884 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -466,7 +466,7 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + msg->sender.id = nodeid; + if (msg->sender.size == 0) { + crm_node_t *peer = pcmk__get_node(nodeid, NULL, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + if (peer == NULL) { + crm_err("Peer with nodeid=%u is unknown", nodeid); +@@ -528,7 +528,7 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + + // Is this necessary? + pcmk__get_node(msg->sender.id, msg->sender.uname, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + crm_trace("Payload: %.200s", data); + return data; +@@ -723,7 +723,7 @@ pcmk_cpg_membership(cpg_handle_t handle, + + for (i = 0; i < member_list_entries; i++) { + crm_node_t *peer = pcmk__get_node(member_list[i].nodeid, NULL, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + if (member_list[i].nodeid == local_nodeid + && member_list[i].pid != getpid()) { +@@ -876,7 +876,7 @@ cluster_connect_cpg(crm_cluster_t *cluster) + return FALSE; + } + +- peer = pcmk__get_node(id, NULL, NULL, CRM_GET_PEER_CLUSTER); ++ peer = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster); + crm_update_peer_proc(__func__, peer, crm_proc_cpg, ONLINESTATUS); + return TRUE; + } +diff --git a/lib/cluster/election.c b/lib/cluster/election.c +index 576c0aa..7276a2d 100644 +--- a/lib/cluster/election.c ++++ b/lib/cluster/election.c +@@ -298,7 +298,7 @@ election_vote(election_t *e) + return; + } + +- our_node = pcmk__get_node(0, e->uname, NULL, CRM_GET_PEER_CLUSTER); ++ our_node = pcmk__get_node(0, e->uname, NULL, pcmk__node_search_cluster); + if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) { + crm_trace("Cannot vote in %s yet: local node not connected to cluster", + e->name); +@@ -547,8 +547,8 @@ election_count_vote(election_t *e, const xmlNode *message, bool can_win) + return election_error; + } + +- your_node = pcmk__get_node(0, vote.from, NULL, CRM_GET_PEER_CLUSTER); +- our_node = pcmk__get_node(0, e->uname, NULL, CRM_GET_PEER_CLUSTER); ++ your_node = pcmk__get_node(0, vote.from, NULL, pcmk__node_search_cluster); ++ our_node = pcmk__get_node(0, e->uname, NULL, pcmk__node_search_cluster); + we_are_owner = (our_node != NULL) + && pcmk__str_eq(our_node->uuid, vote.election_owner, + pcmk__str_none); +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index 4c89a7c..705b70c 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -539,7 +539,7 @@ hash_find_by_data(gpointer key, gpointer value, gpointer user_data) + * + * \param[in] id If not 0, cluster node ID to search for + * \param[in] uname If not NULL, node name to search for +- * \param[in] flags Bitmask of enum crm_get_peer_flags ++ * \param[in] flags Group of enum pcmk__node_search_flags + * + * \return Node cache entry if found, otherwise NULL + */ +@@ -552,11 +552,11 @@ pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) + + crm_peer_init(); + +- if ((uname != NULL) && pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { ++ if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) { + node = g_hash_table_lookup(crm_remote_peer_cache, uname); + } + +- if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { ++ if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster)) { + node = pcmk__search_cluster_node_cache(id, uname, NULL); + } + return node; +@@ -763,7 +763,7 @@ remove_conflicting_peer(crm_node_t *node) + * \param[in] uname If not NULL, node name to search for + * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster + * node ID to search for +- * \param[in] flags Group of enum crm_get_peer_flags ++ * \param[in] flags Group of enum pcmk__node_search_flags + * + * \return (Possibly newly created) cluster node cache entry + */ +@@ -780,14 +780,14 @@ pcmk__get_node(unsigned int id, const char *uname, const char *uuid, + crm_peer_init(); + + // Check the Pacemaker Remote node cache first +- if (pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { ++ if (pcmk_is_set(flags, pcmk__node_search_remote)) { + node = g_hash_table_lookup(crm_remote_peer_cache, uname); + if (node != NULL) { + return node; + } + } + +- if (!pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { ++ if (!pcmk_is_set(flags, pcmk__node_search_cluster)) { + return NULL; + } + +@@ -1349,7 +1349,7 @@ pcmk__refresh_node_caches_from_cib(xmlNode *cib) + * + * \param[in] id If not 0, cluster node ID to search for + * \param[in] uname If not NULL, node name to search for +- * \param[in] flags Bitmask of enum crm_get_peer_flags ++ * \param[in] flags Group of enum pcmk__node_search_flags + * + * \return Known node cache entry if found, otherwise NULL + */ +@@ -1364,7 +1364,7 @@ pcmk__search_known_node_cache(unsigned int id, const char *uname, + + node = pcmk__search_node_caches(id, uname, flags); + +- if (node || !(flags & CRM_GET_PEER_CLUSTER)) { ++ if (node || !(flags & pcmk__node_search_cluster)) { + return node; + } + +@@ -1399,7 +1399,7 @@ crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) + crm_node_t * + crm_get_peer(unsigned int id, const char *uname) + { +- return pcmk__get_node(id, uname, NULL, CRM_GET_PEER_CLUSTER); ++ return pcmk__get_node(id, uname, NULL, pcmk__node_search_cluster); + } + + crm_node_t * +-- +2.31.1 + +From aef8f5016b2de67ab12f896b2bfa7a0f1954b5b1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 19 Dec 2023 16:27:24 -0600 +Subject: [PATCH 6/9] Refactor: libcrmcluster: replace + pcmk__search_known_node_cache() + +... with new flag in enum pcmk__node_search_flags +--- + daemons/controld/controld_fencing.c | 5 ++-- + daemons/fenced/fenced_commands.c | 5 ++-- + daemons/fenced/fenced_history.c | 5 ++-- + daemons/fenced/fenced_remote.c | 5 ++-- + include/crm/cluster/internal.h | 4 +-- + lib/cluster/membership.c | 45 ++++++----------------------- + 6 files changed, 23 insertions(+), 46 deletions(-) + +diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c +index 79a52be..ede2c27 100644 +--- a/daemons/controld/controld_fencing.c ++++ b/daemons/controld/controld_fencing.c +@@ -581,8 +581,9 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event) + event->id); + + if (succeeded) { +- crm_node_t *peer = pcmk__search_known_node_cache(0, event->target, +- pcmk__node_search_any); ++ crm_node_t *peer = pcmk__search_node_caches(0, event->target, ++ pcmk__node_search_any ++ |pcmk__node_search_known); + const char *uuid = NULL; + + if (peer == NULL) { +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index d2a556f..4f21858 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2920,8 +2920,9 @@ fence_locally(xmlNode *msg, pcmk__action_result_t *result) + crm_node_t *node = NULL; + + pcmk__scan_min_int(host, &nodeid, 0); +- node = pcmk__search_known_node_cache(nodeid, NULL, +- pcmk__node_search_any); ++ node = pcmk__search_node_caches(nodeid, NULL, ++ pcmk__node_search_any ++ |pcmk__node_search_known); + if (node != NULL) { + host = node->uname; + } +diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c +index 4fa2215..fb709ff 100644 +--- a/daemons/fenced/fenced_history.c ++++ b/daemons/fenced/fenced_history.c +@@ -469,8 +469,9 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + crm_node_t *node; + + pcmk__scan_min_int(target, &nodeid, 0); +- node = pcmk__search_known_node_cache(nodeid, NULL, +- pcmk__node_search_any); ++ node = pcmk__search_node_caches(nodeid, NULL, ++ pcmk__node_search_any ++ |pcmk__node_search_known); + if (node) { + target = node->uname; + } +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 482efb9..ba70c57 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1221,8 +1221,9 @@ create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) + crm_node_t *node; + + pcmk__scan_min_int(op->target, &nodeid, 0); +- node = pcmk__search_known_node_cache(nodeid, NULL, +- pcmk__node_search_any); ++ node = pcmk__search_node_caches(nodeid, NULL, ++ pcmk__node_search_any ++ |pcmk__node_search_known); + + /* Ensure the conversion only happens once */ + stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid); +diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h +index 9513254..b75784c 100644 +--- a/include/crm/cluster/internal.h ++++ b/include/crm/cluster/internal.h +@@ -41,6 +41,8 @@ enum pcmk__node_search_flags { + /* @COMPAT The values before this must stay the same until we can drop + * support for enum crm_get_peer_flags + */ ++ ++ pcmk__node_search_known = (1 << 2), // Search previously known nodes + }; + + /*! +@@ -142,8 +144,6 @@ crm_node_t *pcmk__search_cluster_node_cache(unsigned int id, const char *uname, + void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id); + + void pcmk__refresh_node_caches_from_cib(xmlNode *cib); +-crm_node_t *pcmk__search_known_node_cache(unsigned int id, const char *uname, +- uint32_t flags); + + crm_node_t *pcmk__get_node(unsigned int id, const char *uname, + const char *uuid, uint32_t flags); +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index 705b70c..ef4aaac 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -82,6 +82,7 @@ static gboolean crm_autoreap = TRUE; + } while (0) + + static void update_peer_uname(crm_node_t *node, const char *uname); ++static crm_node_t *find_known_node(const char *id, const char *uname); + + int + crm_remote_peer_cache_size(void) +@@ -559,6 +560,14 @@ pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) + if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster)) { + node = pcmk__search_cluster_node_cache(id, uname, NULL); + } ++ ++ if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_known)) { ++ char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id); ++ ++ node = find_known_node(id_str, uname); ++ free(id_str); ++ } ++ + return node; + } + +@@ -1343,42 +1352,6 @@ pcmk__refresh_node_caches_from_cib(xmlNode *cib) + refresh_known_node_cache(cib); + } + +-/*! +- * \internal +- * \brief Search known node cache +- * +- * \param[in] id If not 0, cluster node ID to search for +- * \param[in] uname If not NULL, node name to search for +- * \param[in] flags Group of enum pcmk__node_search_flags +- * +- * \return Known node cache entry if found, otherwise NULL +- */ +-crm_node_t * +-pcmk__search_known_node_cache(unsigned int id, const char *uname, +- uint32_t flags) +-{ +- crm_node_t *node = NULL; +- char *id_str = NULL; +- +- CRM_ASSERT(id > 0 || uname != NULL); +- +- node = pcmk__search_node_caches(id, uname, flags); +- +- if (node || !(flags & pcmk__node_search_cluster)) { +- return node; +- } +- +- if (id > 0) { +- id_str = crm_strdup_printf("%u", id); +- } +- +- node = find_known_node(id_str, uname); +- +- free(id_str); +- return node; +-} +- +- + // Deprecated functions kept only for backward API compatibility + // LCOV_EXCL_START + +-- +2.31.1 + +From 5b64c943bd8ba82b06e803fa97737fb7b574ec04 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 19 Dec 2023 16:38:10 -0600 +Subject: [PATCH 7/9] Refactor: libcrmcluster: replace + pcmk__search_cluster_node_cache() + +... with calls to pcmk__search_node_caches() using pcmk__node_search_cluster +where possible +--- + daemons/attrd/attrd_ipc.c | 5 +++-- + daemons/based/based_messages.c | 5 ++++- + daemons/controld/controld_corosync.c | 4 ++-- + daemons/controld/controld_messages.c | 6 ++++-- + lib/cluster/cluster.c | 3 ++- + lib/cluster/cpg.c | 4 ++-- + lib/cluster/membership.c | 2 +- + 7 files changed, 18 insertions(+), 11 deletions(-) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 05c4a69..b08963d 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -162,10 +162,11 @@ attrd_client_peer_remove(pcmk__request_t *request) + + crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, &nodeid); + if (nodeid > 0) { +- crm_node_t *node = pcmk__search_cluster_node_cache(nodeid, NULL, +- NULL); ++ crm_node_t *node = NULL; + char *host_alloc = NULL; + ++ node = pcmk__search_node_caches(nodeid, NULL, ++ pcmk__node_search_cluster); + if (node && node->uname) { + // Use cached name if available + host = node->uname; +diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c +index 7f503b2..efad9a7 100644 +--- a/daemons/based/based_messages.c ++++ b/daemons/based/based_messages.c +@@ -247,7 +247,10 @@ cib_process_upgrade_server(const char *op, int options, const char *section, xml + + if (rc != pcmk_ok) { + // Notify originating peer so it can notify its local clients +- crm_node_t *origin = pcmk__search_cluster_node_cache(0, host, NULL); ++ crm_node_t *origin = NULL; ++ ++ origin = pcmk__search_node_caches(0, host, ++ pcmk__node_search_cluster); + + crm_info("Rejecting upgrade request from %s: %s " + CRM_XS " rc=%d peer=%s", host, pcmk_strerror(rc), rc, +diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c +index 0f3ea32..63184d2 100644 +--- a/daemons/controld/controld_corosync.c ++++ b/daemons/controld/controld_corosync.c +@@ -119,8 +119,8 @@ cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name, + if (controld_globals.dc_name != NULL) { + crm_node_t *peer = NULL; + +- peer = pcmk__search_cluster_node_cache(0, controld_globals.dc_name, +- NULL); ++ peer = pcmk__search_node_caches(0, controld_globals.dc_name, ++ pcmk__node_search_cluster); + if (peer != NULL) { + for (int i = 0; i < left_list_entries; ++i) { + if (left_list[i].nodeid == peer->id) { +diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c +index 999dd13..bd5237e 100644 +--- a/daemons/controld/controld_messages.c ++++ b/daemons/controld/controld_messages.c +@@ -485,7 +485,8 @@ relay_message(xmlNode * msg, gboolean originated_locally) + } + + if (!broadcast) { +- node_to = pcmk__search_cluster_node_cache(0, host_to, NULL); ++ node_to = pcmk__search_node_caches(0, host_to, ++ pcmk__node_search_cluster); + if (node_to == NULL) { + crm_warn("Ignoring message %s because node %s is unknown", + ref, host_to); +@@ -1029,7 +1030,8 @@ handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) + + if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { + const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM); +- crm_node_t *node = pcmk__search_cluster_node_cache(0, from, NULL); ++ crm_node_t *node = pcmk__search_node_caches(0, from, ++ pcmk__node_search_cluster); + + pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); + if(AM_I_DC == FALSE) { +diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c +index f2cd428..1cdc204 100644 +--- a/lib/cluster/cluster.c ++++ b/lib/cluster/cluster.c +@@ -280,7 +280,8 @@ crm_peer_uname(const char *uuid) + return NULL; + } + +- node = pcmk__search_cluster_node_cache((uint32_t) id, NULL, NULL); ++ node = pcmk__search_node_caches((uint32_t) id, NULL, ++ pcmk__node_search_cluster); + if (node != NULL) { + crm_info("Setting uuid for node %s[%u] to %s", + node->uname, node->id, uuid); +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index b5f2884..4f3e81c 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -629,8 +629,8 @@ node_left(const char *cpg_group_name, int event_counter, + const struct cpg_address **sorted_member_list, + size_t member_list_entries) + { +- crm_node_t *peer = pcmk__search_cluster_node_cache(cpg_peer->nodeid, +- NULL, NULL); ++ crm_node_t *peer = pcmk__search_node_caches(cpg_peer->nodeid, NULL, ++ pcmk__node_search_cluster); + const struct cpg_address **rival = NULL; + + /* Most CPG-related Pacemaker code assumes that only one process on a node +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index ef4aaac..73ea1e3 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -122,7 +122,7 @@ crm_remote_peer_get(const char *node_name) + * entry unless it has a node ID, which means the name actually is + * associated with a cluster node. (@TODO return an error in that case?) + */ +- node = pcmk__search_cluster_node_cache(0, node_name, NULL); ++ node = pcmk__search_node_caches(0, node_name, pcmk__node_search_cluster); + if ((node != NULL) && (node->uuid == NULL)) { + /* node_name could be a pointer into the cache entry being removed, so + * reassign it to a copy before the original gets freed +-- +2.31.1 + +From cbeb9eb516d3bf29df7850dcf2a8515f6a0dfb2c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 18 Dec 2023 17:09:12 -0600 +Subject: [PATCH 8/9] Test: cts-cli: strip feature set out of reference output + +--- + cts/cli/regression.tools.exp | 4 ++-- + cts/cts-cli.in | 2 ++ + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp +index accf781..417b5cd 100644 +--- a/cts/cli/regression.tools.exp ++++ b/cts/cli/regression.tools.exp +@@ -7667,7 +7667,7 @@ Diff: +++ 0.1.0 (null) + -- /cib/status/node_state[@id='1'] + -- /cib/status/node_state[@id='httpd-bundle-0'] + -- /cib/status/node_state[@id='httpd-bundle-1'] +-+ /cib: @crm_feature_set=3.19.0, @num_updates=0, @admin_epoch=0 +++ /cib: @num_updates=0, @admin_epoch=0 + -- /cib: @cib-last-written, @update-origin, @update-client, @update-user, @have-quorum, @dc-uuid + =#=#=#= End test: Get active shadow instance's diff (empty CIB) - Error occurred (1) =#=#=#= + * Passed: crm_shadow - Get active shadow instance's diff (empty CIB) +@@ -7701,7 +7701,7 @@ Diff: +++ 0.1.0 (null) + + + +- ++ + + + +diff --git a/cts/cts-cli.in b/cts/cts-cli.in +index f4cb7c3..40ada49 100755 +--- a/cts/cts-cli.in ++++ b/cts/cts-cli.in +@@ -3357,7 +3357,9 @@ for t in $tests; do + -e 's/ version="[^"]*"/ version=""/' \ + -e 's/request=\".*\(crm_[a-zA-Z0-9]*\)/request=\"\1/' \ + -e 's/crm_feature_set="[^"]*" //'\ ++ -e 's/@crm_feature_set=[0-9.]*, //'\ + -e 's/validate-with="[^"]*" //'\ ++ -e 's/\( +Date: Tue, 2 Jan 2024 14:48:14 -0600 +Subject: [PATCH 9/9] Test: cts-lab: ignore all transition calculation log + messages + +9e28f3b6d means these are now possible for more ignorable errors +--- + python/pacemaker/_cts/patterns.py | 26 +++++++------------------- + 1 file changed, 7 insertions(+), 19 deletions(-) + +diff --git a/python/pacemaker/_cts/patterns.py b/python/pacemaker/_cts/patterns.py +index 0fb1c2b..d05ff5f 100644 +--- a/python/pacemaker/_cts/patterns.py ++++ b/python/pacemaker/_cts/patterns.py +@@ -1,7 +1,7 @@ + """ Pattern-holding classes for Pacemaker's Cluster Test Suite (CTS) """ + + __all__ = ["PatternSelector"] +-__copyright__ = "Copyright 2008-2023 the Pacemaker project contributors" ++__copyright__ = "Copyright 2008-2024 the Pacemaker project contributors" + __license__ = "GNU General Public License version 2 or later (GPLv2+)" + + import argparse +@@ -32,6 +32,12 @@ class BasePatterns: + # pcs can log this when node is fenced, but fencing is OK in some + # tests (and we will catch it in pacemaker logs when not OK) + r"pcs.daemon:No response from: .* request: get_configs, error:", ++ ++ # This is overbroad, but there's no way to say that only certain ++ # transition errors are acceptable. We have to rely on causes of a ++ # transition error logging their own error message, which should ++ # always be the case. ++ r"pacemaker-schedulerd.* Calculated transition .*/pe-error", + ] + + self._commands = { +@@ -239,12 +245,6 @@ class Corosync2Patterns(BasePatterns): + r"error:.*cib_(shm|rw) IPC provider disconnected while waiting", + r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", + r"error: Lost fencer connection", +- # This is overbroad, but we don't have a way to say that only +- # certain transition errors are acceptable (if the fencer respawns, +- # fence devices may appear multiply active). We have to rely on +- # other causes of a transition error logging their own error +- # message, which is the usual practice. +- r"pacemaker-schedulerd.* Calculated transition .*/pe-error", + ] + + self._components["corosync"] = [ +@@ -281,12 +281,6 @@ class Corosync2Patterns(BasePatterns): + r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", + r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", + r"pacemaker-controld.*:Could not connect to attrd: Connection refused", +- # This is overbroad, but we don't have a way to say that only +- # certain transition errors are acceptable (if the fencer respawns, +- # fence devices may appear multiply active). We have to rely on +- # other causes of a transition error logging their own error +- # message, which is the usual practice. +- r"pacemaker-schedulerd.* Calculated transition .*/pe-error", + ] + + self._components["pacemaker-execd"] = [ +@@ -338,12 +332,6 @@ class Corosync2Patterns(BasePatterns): + r"error:.*Lost fencer connection", + r"error:.*Fencer connection failed \(will retry\)", + r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", +- # This is overbroad, but we don't have a way to say that only +- # certain transition errors are acceptable (if the fencer respawns, +- # fence devices may appear multiply active). We have to rely on +- # other causes of a transition error logging their own error +- # message, which is the usual practice. +- r"pacemaker-schedulerd.* Calculated transition .*/pe-error", + ] + + self._components["pacemaker-fenced-ignore"].extend(self._components["common-ignore"]) +-- +2.31.1 + diff --git a/SOURCES/005-query-null.patch b/SOURCES/005-query-null.patch deleted file mode 100644 index 194cd33..0000000 --- a/SOURCES/005-query-null.patch +++ /dev/null @@ -1,151 +0,0 @@ -From 0d15568a538349ac41028db6b506d13dd23e8732 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Tue, 14 Feb 2023 14:00:37 -0500 -Subject: [PATCH] High: libcrmcommon: Fix handling node=NULL in - pcmk__attrd_api_query. - -According to the header file, if node is NULL, pcmk__attrd_api_query -should query the value of the given attribute on all cluster nodes. -This is also what the server expects and how attrd_updater is supposed -to work. - -However, pcmk__attrd_api_query has no way of letting callers decide -whether they want to query all nodes or whether they want to use the -local node. We were passing NULL for the node name, which it took to -mean it should look up the local node name. This calls -pcmk__node_attr_target, which probes the local cluster name and returns -that to pcmk__attrd_api_query. If it returns non-NULL, that value will -then be put into the XML IPC call which means the server will only -return the value for that node. - -In testing this was usually fine. However, in pratice, the methods -pcmk__node_attr_target uses to figure out the local cluster node name -involves checking the OCF_RESKEY_CRM_meta_on_node environment variable -among others. - -This variable was never set in testing, but can be set in the real -world. This leads to circumstances where the user did "attrd_updater -QA" -expecting to get the values on all nodes, but instead only got the value -on the local cluster node. - -In pacemaker-2.1.4 and prior, pcmk__node_attr_target was simply never -called if the node was NULL but was called otherwise. - -The fix is to modify pcmk__attrd_api_query to take an option for -querying all nodes. If that's present, we'll query all nodes. If it's -not present, we'll look at the given node name - NULL means look it up, -anything else means just that node. - -Regression in 2.1.5 introduced by eb20a65577 ---- - include/crm/common/attrd_internal.h | 6 +++++- - include/crm/common/ipc_attrd_internal.h | 7 +++++-- - lib/common/ipc_attrd.c | 12 ++++++++---- - tools/attrd_updater.c | 5 +++-- - 4 files changed, 21 insertions(+), 9 deletions(-) - -diff --git a/include/crm/common/attrd_internal.h b/include/crm/common/attrd_internal.h -index 389be48..7337c38 100644 ---- a/include/crm/common/attrd_internal.h -+++ b/include/crm/common/attrd_internal.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2022 the Pacemaker project contributors -+ * Copyright 2004-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -25,6 +25,10 @@ enum pcmk__node_attr_opts { - pcmk__node_attr_perm = (1 << 5), - pcmk__node_attr_sync_local = (1 << 6), - pcmk__node_attr_sync_cluster = (1 << 7), -+ // pcmk__node_attr_utilization is 8, but that has not been backported. -+ // I'm leaving the gap here in case we backport that in the future and -+ // also to avoid problems on mixed-version clusters. -+ pcmk__node_attr_query_all = (1 << 9), - }; - - #define pcmk__set_node_attr_flags(node_attr_flags, flags_to_set) do { \ -diff --git a/include/crm/common/ipc_attrd_internal.h b/include/crm/common/ipc_attrd_internal.h -index 2c6713f..b1b7584 100644 ---- a/include/crm/common/ipc_attrd_internal.h -+++ b/include/crm/common/ipc_attrd_internal.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2022 the Pacemaker project contributors -+ * Copyright 2022-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -110,10 +110,13 @@ int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node); - * - * \param[in,out] api Connection to pacemaker-attrd - * \param[in] node Look up the attribute for this node -- * (or NULL for all nodes) -+ * (or NULL for the local node) - * \param[in] name Attribute name - * \param[in] options Bitmask of pcmk__node_attr_opts - * -+ * \note Passing pcmk__node_attr_query_all will cause the function to query -+ * the value of \p name on all nodes, regardless of the value of \p node. -+ * - * \return Standard Pacemaker return code - */ - int pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, -diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c -index 4606509..dece49b 100644 ---- a/lib/common/ipc_attrd.c -+++ b/lib/common/ipc_attrd.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2011-2022 the Pacemaker project contributors -+ * Copyright 2011-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -332,10 +332,14 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, - return EINVAL; - } - -- target = pcmk__node_attr_target(node); -+ if (pcmk_is_set(options, pcmk__node_attr_query_all)) { -+ node = NULL; -+ } else { -+ target = pcmk__node_attr_target(node); - -- if (target != NULL) { -- node = target; -+ if (target != NULL) { -+ node = target; -+ } - } - - request = create_attrd_op(NULL); -diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c -index 3cd766d..cbd341d 100644 ---- a/tools/attrd_updater.c -+++ b/tools/attrd_updater.c -@@ -376,6 +376,7 @@ attrd_event_cb(pcmk_ipc_api_t *attrd_api, enum pcmk_ipc_event event_type, - static int - send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_node, gboolean query_all) - { -+ uint32_t options = pcmk__node_attr_none; - pcmk_ipc_api_t *attrd_api = NULL; - int rc = pcmk_rc_ok; - -@@ -400,10 +401,10 @@ send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_no - - /* Decide which node(s) to query */ - if (query_all == TRUE) { -- attr_node = NULL; -+ options |= pcmk__node_attr_query_all; - } - -- rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, 0); -+ rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, options); - - if (rc != pcmk_rc_ok) { - g_set_error(&error, PCMK__RC_ERROR, rc, "Could not query value of %s: %s (%d)", --- -2.31.1 - diff --git a/SOURCES/006-cib-file-feature-set.patch b/SOURCES/006-cib-file-feature-set.patch new file mode 100644 index 0000000..a7ce74d --- /dev/null +++ b/SOURCES/006-cib-file-feature-set.patch @@ -0,0 +1,276 @@ +From d50bbafc32428e873c0052a9defcf93d2e52667e Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 10 Jan 2024 11:35:11 -0500 +Subject: [PATCH 1/3] Refactor: libcrmcommon: Split feature set check into its + own function. + +--- + include/crm/common/cib_internal.h | 4 +++- + lib/cib/cib_utils.c | 12 ++++++------ + lib/common/cib.c | 18 +++++++++++++++++- + 3 files changed, 26 insertions(+), 8 deletions(-) + +diff --git a/include/crm/common/cib_internal.h b/include/crm/common/cib_internal.h +index c41c12e..fa65e58 100644 +--- a/include/crm/common/cib_internal.h ++++ b/include/crm/common/cib_internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2023 the Pacemaker project contributors ++ * Copyright 2023-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -16,6 +16,8 @@ extern "C" { + + const char *pcmk__cib_abs_xpath_for(const char *element); + ++int pcmk__check_feature_set(const char *cib_version); ++ + #ifdef __cplusplus + } + #endif +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 0082eef..bf2982c 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -353,7 +353,6 @@ cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query, + xmlNode *patchset_cib = NULL; + xmlNode *local_diff = NULL; + +- const char *new_version = NULL; + const char *user = crm_element_value(req, F_CIB_USER); + bool with_digest = false; + +@@ -470,12 +469,13 @@ cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query, + } + + if (scratch) { +- new_version = crm_element_value(scratch, XML_ATTR_CRM_VERSION); ++ const char *new_version = crm_element_value(scratch, XML_ATTR_CRM_VERSION); + +- if (new_version && compare_version(new_version, CRM_FEATURE_SET) > 0) { +- crm_err("Discarding update with feature set '%s' greater than our own '%s'", +- new_version, CRM_FEATURE_SET); +- rc = -EPROTONOSUPPORT; ++ rc = pcmk__check_feature_set(new_version); ++ if (rc != pcmk_rc_ok) { ++ pcmk__config_err("Discarding update with feature set '%s' greater than our own '%s'", ++ new_version, CRM_FEATURE_SET); ++ rc = pcmk_rc2legacy(rc); + goto done; + } + } +diff --git a/lib/common/cib.c b/lib/common/cib.c +index fee7881..cbebc2e 100644 +--- a/lib/common/cib.c ++++ b/lib/common/cib.c +@@ -1,6 +1,6 @@ + /* + * Original copyright 2004 International Business Machines +- * Later changes copyright 2008-2023 the Pacemaker project contributors ++ * Later changes copyright 2008-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -173,3 +173,19 @@ pcmk_find_cib_element(xmlNode *cib, const char *element_name) + { + return get_xpath_object(pcmk_cib_xpath_for(element_name), cib, LOG_TRACE); + } ++ ++/*! ++ * \internal ++ * \brief Check that the feature set in the CIB is supported on this node ++ * ++ * \param[in] new_version XML_ATTR_CRM_VERSION attribute from the CIB ++ */ ++int ++pcmk__check_feature_set(const char *cib_version) ++{ ++ if (cib_version && compare_version(cib_version, CRM_FEATURE_SET) > 0) { ++ return EPROTONOSUPPORT; ++ } ++ ++ return pcmk_rc_ok; ++} +-- +2.31.1 + +From d89fd8336ae47d892201513c99773705d57f15f0 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 10 Jan 2024 13:46:42 -0500 +Subject: [PATCH 2/3] Feature: scheduler: Check the CIB feature set in + cluster_status. + +This adds the check that was previously only in cib_perform_op to the +scheduler code, ensuring that any daemon or tool that calls the +scheduler will check that the feature set in the CIB is supported. +--- + lib/pengine/status.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/lib/pengine/status.c b/lib/pengine/status.c +index e6ec237..1294803 100644 +--- a/lib/pengine/status.c ++++ b/lib/pengine/status.c +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + + #include + +@@ -70,12 +71,21 @@ pe_free_working_set(pcmk_scheduler_t *scheduler) + gboolean + cluster_status(pcmk_scheduler_t * scheduler) + { ++ const char *new_version = NULL; + xmlNode *section = NULL; + + if ((scheduler == NULL) || (scheduler->input == NULL)) { + return FALSE; + } + ++ new_version = crm_element_value(scheduler->input, XML_ATTR_CRM_VERSION); ++ ++ if (pcmk__check_feature_set(new_version) != pcmk_rc_ok) { ++ pcmk__config_err("Can't process CIB with feature set '%s' greater than our own '%s'", ++ new_version, CRM_FEATURE_SET); ++ return FALSE; ++ } ++ + crm_trace("Beginning unpack"); + + if (scheduler->failed != NULL) { +-- +2.31.1 + +From a3428926d37af506014a6b462d1308d8541c5932 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 10 Jan 2024 14:56:36 -0500 +Subject: [PATCH 3/3] Low: libcib: Do not check CIB feature set for files in + cib_perform_op. + +This is related to the previous feature for transferring schema files to +older remote nodes. In that case, the newer schema files may also have +a newer feature set than the node supports, so the transferred files are +still not usable. + +However, the feature set only matters for the scheduler, not for most +command line tools (obviously, crm_simulate would still care). So in +those cases, we can just disable the feature set check if the CIB was +read in from a file. For the scheduler, the check is still performed as +part of cluster_status. +--- + cts/cli/regression.tools.exp | 2 +- + daemons/based/based_callbacks.c | 4 ++-- + include/crm/cib/internal.h | 4 ++-- + lib/cib/cib_file.c | 2 +- + lib/cib/cib_utils.c | 15 +++++++++------ + 5 files changed, 15 insertions(+), 12 deletions(-) + +diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp +index 417b5cd..c81c420 100644 +--- a/cts/cli/regression.tools.exp ++++ b/cts/cli/regression.tools.exp +@@ -7939,7 +7939,7 @@ unpack_config warning: Blind faith: not fencing unseen nodes + =#=#=#= End test: Verbosely verify a file-specified invalid configuration, outputting as xml - Invalid configuration (78) =#=#=#= + * Passed: crm_verify - Verbosely verify a file-specified invalid configuration, outputting as xml + =#=#=#= Begin test: Verbosely verify another file-specified invalid configuration, outputting as xml =#=#=#= +-(cluster_status@status.c:113) warning: Fencing and resource management disabled due to lack of quorum ++(cluster_status@status.c:123) warning: Fencing and resource management disabled due to lack of quorum + + + +diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c +index 5f3dc62..f16e4d9 100644 +--- a/daemons/based/based_callbacks.c ++++ b/daemons/based/based_callbacks.c +@@ -1362,7 +1362,7 @@ cib_process_command(xmlNode *request, const cib__operation_t *operation, + input = prepare_input(request, operation->type, §ion); + + if (!pcmk_is_set(operation->flags, cib__op_attr_modifies)) { +- rc = cib_perform_op(op, call_options, op_function, true, section, ++ rc = cib_perform_op(NULL, op, call_options, op_function, true, section, + request, input, false, &config_changed, &the_cib, + &result_cib, NULL, &output); + +@@ -1395,7 +1395,7 @@ cib_process_command(xmlNode *request, const cib__operation_t *operation, + } + + // result_cib must not be modified after cib_perform_op() returns +- rc = cib_perform_op(op, call_options, op_function, false, section, ++ rc = cib_perform_op(NULL, op, call_options, op_function, false, section, + request, input, manage_counters, &config_changed, + &the_cib, &result_cib, cib_diff, &output); + +diff --git a/include/crm/cib/internal.h b/include/crm/cib/internal.h +index 9d54d52..b6d6871 100644 +--- a/include/crm/cib/internal.h ++++ b/include/crm/cib/internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -206,7 +206,7 @@ int cib__get_notify_patchset(const xmlNode *msg, const xmlNode **patchset); + + bool cib__element_in_patchset(const xmlNode *patchset, const char *element); + +-int cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, ++int cib_perform_op(cib_t *cib, const char *op, int call_options, cib__op_fn_t fn, + bool is_query, const char *section, xmlNode *req, + xmlNode *input, bool manage_counters, bool *config_changed, + xmlNode **current_cib, xmlNode **result_cib, xmlNode **diff, +diff --git a/lib/cib/cib_file.c b/lib/cib/cib_file.c +index a279823..9dd952c 100644 +--- a/lib/cib/cib_file.c ++++ b/lib/cib/cib_file.c +@@ -245,7 +245,7 @@ cib_file_process_request(cib_t *cib, xmlNode *request, xmlNode **output) + data = pcmk_find_cib_element(data, section); + } + +- rc = cib_perform_op(op, call_options, op_function, read_only, section, ++ rc = cib_perform_op(cib, op, call_options, op_function, read_only, section, + request, data, true, &changed, &private->cib_xml, + &result_cib, &cib_diff, output); + +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index bf2982c..9c3f9f1 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -339,11 +339,10 @@ should_copy_cib(const char *op, const char *section, int call_options) + } + + int +-cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query, +- const char *section, xmlNode *req, xmlNode *input, +- bool manage_counters, bool *config_changed, +- xmlNode **current_cib, xmlNode **result_cib, xmlNode **diff, +- xmlNode **output) ++cib_perform_op(cib_t *cib, const char *op, int call_options, cib__op_fn_t fn, ++ bool is_query, const char *section, xmlNode *req, xmlNode *input, ++ bool manage_counters, bool *config_changed, xmlNode **current_cib, ++ xmlNode **result_cib, xmlNode **diff, xmlNode **output) + { + int rc = pcmk_ok; + bool check_schema = true; +@@ -468,7 +467,11 @@ cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query, + goto done; + } + +- if (scratch) { ++ /* If the CIB is from a file, we don't need to check that the feature set is ++ * supported. All we care about in that case is the schema version, which ++ * is checked elsewhere. ++ */ ++ if (scratch && (cib == NULL || cib->variant != cib_file)) { + const char *new_version = crm_element_value(scratch, XML_ATTR_CRM_VERSION); + + rc = pcmk__check_feature_set(new_version); +-- +2.31.1 + diff --git a/SOURCES/006-watchdog-fencing-topology.patch b/SOURCES/006-watchdog-fencing-topology.patch deleted file mode 100644 index 7651584..0000000 --- a/SOURCES/006-watchdog-fencing-topology.patch +++ /dev/null @@ -1,142 +0,0 @@ -From 17cc49e1564b0ae55cc8212d14c5c055f88040da Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Tue, 14 Feb 2023 15:35:37 +0100 -Subject: [PATCH] Fix: watchdog-fencing: terminate dangling timer before - watchdog-waiting - ---- - daemons/fenced/fenced_remote.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c -index 5c3fe25e3..aab185adb 100644 ---- a/daemons/fenced/fenced_remote.c -+++ b/daemons/fenced/fenced_remote.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2009-2022 the Pacemaker project contributors -+ * Copyright 2009-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -1702,6 +1702,10 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op) - "client %s " CRM_XS " id=%.8s", - (stonith_watchdog_timeout_ms / 1000), - op->target, op->action, op->client_name, op->id); -+ -+ if (op->op_timer_one) { -+ g_source_remove(op->op_timer_one); -+ } - op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, - remote_op_watchdog_done, op); - return TRUE; --- -2.39.0 - -From f2cc2a4277124230903a18713e50604a8f1842cd Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Wed, 1 Mar 2023 15:00:15 +0100 -Subject: [PATCH] Refactor: watchdog-fencing: convenience function - pcmk__is_fencing_action - -for consistency and add comment making clear why this block exits -with new timer set in any case ---- - daemons/fenced/fenced_remote.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c -index aab185adb..e0f8de057 100644 ---- a/daemons/fenced/fenced_remote.c -+++ b/daemons/fenced/fenced_remote.c -@@ -1834,7 +1834,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) - if (!((stonith_watchdog_timeout_ms > 0) - && (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none) - || (pcmk__str_eq(peer->host, op->target, pcmk__str_casei) -- && !pcmk__str_eq(op->action, "on", pcmk__str_none))) -+ && pcmk__is_fencing_action(op->action))) - && check_watchdog_fencing_and_wait(op))) { - - /* Some thoughts about self-fencing cases reaching this point: -@@ -1854,6 +1854,9 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) - Otherwise the selection of stonith-watchdog-timeout at - least is questionable. - */ -+ -+ /* coming here we're not waiting for watchdog timeout - -+ thus engage timer with timout evaluated before */ - op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); - } - --- -2.39.0 - -From c4eb45a986f8865fc5e69350fd5b9f4b056d9d69 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Tue, 14 Feb 2023 11:57:17 +0100 -Subject: [PATCH] Fix: watchdog-fencing: correctly derive timeout with topology - -up to now the timeout for watchdog-fencing was just added to -the overall timeout if the node to be fenced was visible and -reported back to the query. ---- - daemons/fenced/fenced_remote.c | 28 +++++++++++++++++++++++++--- - 1 file changed, 25 insertions(+), 3 deletions(-) - -diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c -index e0f8de057..3b7ab05e9 100644 ---- a/daemons/fenced/fenced_remote.c -+++ b/daemons/fenced/fenced_remote.c -@@ -969,8 +969,9 @@ advance_topology_level(remote_fencing_op_t *op, bool empty_ok) - return pcmk_rc_ok; - } - -- crm_info("All fencing options targeting %s for client %s@%s failed " -+ crm_info("All %sfencing options targeting %s for client %s@%s failed " - CRM_XS " id=%.8s", -+ (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"", - op->target, op->client_name, op->originator, op->id); - return ENODEV; - } -@@ -1434,8 +1435,17 @@ stonith_choose_peer(remote_fencing_op_t * op) - && pcmk_is_set(op->call_options, st_opt_topology) - && (advance_topology_level(op, false) == pcmk_rc_ok)); - -- crm_notice("Couldn't find anyone to fence (%s) %s using %s", -- op->action, op->target, (device? device : "any device")); -+ if ((stonith_watchdog_timeout_ms > 0) -+ && pcmk__is_fencing_action(op->action) -+ && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none) -+ && node_does_watchdog_fencing(op->target)) { -+ crm_info("Couldn't contact watchdog-fencing target-node (%s)", -+ op->target); -+ /* check_watchdog_fencing_and_wait will log additional info */ -+ } else { -+ crm_notice("Couldn't find anyone to fence (%s) %s using %s", -+ op->action, op->target, (device? device : "any device")); -+ } - return NULL; - } - -@@ -1531,6 +1541,18 @@ get_op_total_timeout(const remote_fencing_op_t *op, - continue; - } - for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { -+ /* in case of watchdog-device we add the timeout to the budget -+ regardless of if we got a reply or not -+ */ -+ if ((stonith_watchdog_timeout_ms > 0) -+ && pcmk__is_fencing_action(op->action) -+ && pcmk__str_eq(device_list->data, STONITH_WATCHDOG_ID, -+ pcmk__str_none) -+ && node_does_watchdog_fencing(op->target)) { -+ total_timeout += stonith_watchdog_timeout_ms / 1000; -+ continue; -+ } -+ - for (iter = op->query_results; iter != NULL; iter = iter->next) { - const peer_device_info_t *peer = iter->data; - --- -2.39.0 - diff --git a/SOURCES/007-attrd-dampen.patch b/SOURCES/007-attrd-dampen.patch deleted file mode 100644 index 871c128..0000000 --- a/SOURCES/007-attrd-dampen.patch +++ /dev/null @@ -1,26 +0,0 @@ -From ebac530c815a62f7c3a1c24f64e9a530d9753dbe Mon Sep 17 00:00:00 2001 -From: Hideo Yamauchi -Date: Wed, 19 Jul 2023 18:21:07 +0900 -Subject: [PATCH] High: tools: The dampen parameter is disabled when setting - values with attrd_updater. - ---- - tools/attrd_updater.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c -index b615a3575..4688b9ff6 100644 ---- a/tools/attrd_updater.c -+++ b/tools/attrd_updater.c -@@ -445,7 +445,7 @@ send_attrd_update(char command, const char *attr_node, const char *attr_name, - - case 'U': - rc = pcmk__attrd_api_update(NULL, attr_node, attr_name, attr_value, -- NULL, attr_set, NULL, -+ attr_dampen, attr_set, NULL, - attr_options | pcmk__node_attr_value); - break; - --- -2.41.0 - diff --git a/SOURCES/007-option-metadata.patch b/SOURCES/007-option-metadata.patch new file mode 100644 index 0000000..d948042 --- /dev/null +++ b/SOURCES/007-option-metadata.patch @@ -0,0 +1,3689 @@ +From 4401064f409921caed9444d18a74713250213c44 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Tue, 2 Jan 2024 19:52:41 -0800 +Subject: [PATCH 01/24] Test: cts-cli: Update for added spaces + +Signed-off-by: Reid Wahl +--- + cts/cli/regression.daemons.exp | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cts/cli/regression.daemons.exp b/cts/cli/regression.daemons.exp +index b34fba8..1cd049f 100644 +--- a/cts/cli/regression.daemons.exp ++++ b/cts/cli/regression.daemons.exp +@@ -47,7 +47,7 @@ + + + +- Pacemaker is primarily event-driven, and looks ahead to know when to recheck cluster state for failure timeouts and most time-based rules. However, it will also recheck the cluster after this amount of inactivity, to evaluate rules with date specifications and serve as a fail-safe for certain types of scheduler bugs. Allowed values: Zero disables polling, while positive values are an interval in seconds(unless other units are specified, for example "5min") ++ Pacemaker is primarily event-driven, and looks ahead to know when to recheck cluster state for failure timeouts and most time-based rules. However, it will also recheck the cluster after this amount of inactivity, to evaluate rules with date specifications and serve as a fail-safe for certain types of scheduler bugs. Allowed values: Zero disables polling, while positive values are an interval in seconds (unless other units are specified, for example "5min") + Polling interval to recheck cluster state and evaluate rules with date specifications + + +@@ -345,7 +345,7 @@ + + + +- Setting this to false may lead to a "split-brain" situation,potentially leading to data loss and/or service unavailability. ++ Setting this to false may lead to a "split-brain" situation, potentially leading to data loss and/or service unavailability. + *** Advanced Use Only *** Whether to fence unseen nodes at start-up + + +-- +2.31.1 + +From e3597b061afa62a1f869a3e238ad78d3f7222029 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Tue, 2 Jan 2024 20:28:21 -0800 +Subject: [PATCH 02/24] Test: cts-cli: Update daemon tests to note that + stonith-timeout is used + +Signed-off-by: Reid Wahl +--- + cts/cli/regression.daemons.exp | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cts/cli/regression.daemons.exp b/cts/cli/regression.daemons.exp +index 1cd049f..6a24089 100644 +--- a/cts/cli/regression.daemons.exp ++++ b/cts/cli/regression.daemons.exp +@@ -330,8 +330,8 @@ + + + +- This value is not used by Pacemaker, but is kept for backward compatibility, and certain legacy fence agents might use it. +- *** Advanced Use Only *** Unused by Pacemaker ++ How long to wait for on, off, and reboot fence actions to complete by default ++ How long to wait for on, off, and reboot fence actions to complete by default + + + +-- +2.31.1 + +From cb3431c1058dd3cfbcc1cc490db268a95d1731e1 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 6 Jan 2024 19:20:26 -0800 +Subject: [PATCH 03/24] Refactor: libcrmcommon: New + pcmk__valid_placement_strategy() + +We'll soon need this function outside the scheduler. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + include/crm/common/options_internal.h | 1 + + lib/common/options.c | 16 ++++++++++++++++ + lib/pengine/common.c | 9 +-------- + 3 files changed, 18 insertions(+), 8 deletions(-) + +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index a9316ca..1ea27ee 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -67,6 +67,7 @@ bool pcmk__valid_number(const char *value); + bool pcmk__valid_positive_number(const char *value); + bool pcmk__valid_quorum(const char *value); + bool pcmk__valid_script(const char *value); ++bool pcmk__valid_placement_strategy(const char *value); + bool pcmk__valid_percentage(const char *value); + + // from watchdog.c +diff --git a/lib/common/options.c b/lib/common/options.c +index 2d86ebc..1db41a2 100644 +--- a/lib/common/options.c ++++ b/lib/common/options.c +@@ -246,6 +246,22 @@ pcmk__valid_script(const char *value) + + return true; + } ++ ++/*! ++ * \internal ++ * \brief Check whether a string represents a valid placement strategy ++ * ++ * \param[in] value String to validate ++ * ++ * \return \c true if \p value is a valid placement strategy, or \c false ++ * otherwise ++ */ ++bool ++pcmk__valid_placement_strategy(const char *value) ++{ ++ return pcmk__strcase_any_of(value, "default", "utilization", "minimal", ++ "balanced", NULL); ++} + + bool + pcmk__valid_percentage(const char *value) +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 0fdd5a1..6878f4d 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -21,13 +21,6 @@ + gboolean was_processing_error = FALSE; + gboolean was_processing_warning = FALSE; + +-static bool +-check_placement_strategy(const char *value) +-{ +- return pcmk__strcase_any_of(value, "default", "utilization", "minimal", +- "balanced", NULL); +-} +- + static pcmk__cluster_option_t pe_opts[] = { + /* name, old name, type, allowed values, + * default value, validator, +@@ -285,7 +278,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "placement-strategy", NULL, "select", + "default, utilization, minimal, balanced", +- "default", check_placement_strategy, ++ "default", pcmk__valid_placement_strategy, + N_("How the cluster should allocate resources to nodes"), + NULL + }, +-- +2.31.1 + +From 4c877cefcde40da8a2cd776956ade62919a2c926 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Tue, 9 Jan 2024 22:13:19 -0800 +Subject: [PATCH 04/24] Refactor: controller: Remove stonith-watchdog-timeout + validator function + +...from options array. Instead, call it from the controller after +validating the options array. + +We'll soon be moving the options array to libcrmcommon. There, we don't +have access to controld_verify_stonith_watchdog_timeout() or to the +controller's stonith API connection and node name. New comments and the +following task have more details: https://projects.clusterlabs.org/T749. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 25 ++++++++++++++++++++++++- + lib/common/watchdog.c | 4 ++++ + 2 files changed, 28 insertions(+), 1 deletion(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 644d686..83b802e 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -621,8 +621,18 @@ static pcmk__cluster_option_t controller_options[] = { + "the order in which ping updates arrive.") + }, + { ++ /* @COMPAT Currently unparsable values default to -1 (auto-calculate), ++ * while missing values default to 0 (disable). All values are accepted ++ * (unless the controller finds that the value conflicts with the ++ * SBD_WATCHDOG_TIMEOUT). ++ * ++ * At a compatibility break: properly validate as a timeout, let ++ * either negative values or a particular string like "auto" mean auto- ++ * calculate, and use 0 as the single default for when the option either ++ * is unset or fails to validate. ++ */ + "stonith-watchdog-timeout", NULL, "time", NULL, +- "0", controld_verify_stonith_watchdog_timeout, ++ "0", NULL, + N_("How long before nodes can be assumed to be safely down when " + "watchdog-based self-fencing via SBD is in use"), + N_("If this is set to a positive value, lost nodes are assumed to " +@@ -747,6 +757,19 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void + pcmk__validate_cluster_options(config_hash, controller_options, + PCMK__NELEM(controller_options)); + ++ /* Validate the watchdog timeout in the context of the local node ++ * environment. If invalid, the controller will exit with a fatal error. ++ * ++ * We do this via a wrapper in the controller, so that we call ++ * pcmk__valid_stonith_watchdog_timeout() only if watchdog fencing is ++ * enabled for the local node. Otherwise, we may exit unnecessarily. ++ * ++ * A validator function in libcrmcommon can't act as such a wrapper, because ++ * it doesn't have a stonith API connection or the local node name. ++ */ ++ value = g_hash_table_lookup(config_hash, "stonith-watchdog-timeout"); ++ controld_verify_stonith_watchdog_timeout(value); ++ + value = g_hash_table_lookup(config_hash, "no-quorum-policy"); + if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) { + controld_set_global_flags(controld_no_quorum_suicide); +diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c +index e569214..7d126af 100644 +--- a/lib/common/watchdog.c ++++ b/lib/common/watchdog.c +@@ -278,6 +278,10 @@ pcmk__valid_sbd_timeout(const char *value) + { + long st_timeout = value? crm_get_msec(value) : 0; + ++ /* @COMPAT At a compatibility break, accept either negative values or a ++ * specific string like "auto" (but not both) to mean "auto-calculate the ++ * timeout." Reject other values that aren't parsable as timeouts. ++ */ + if (st_timeout < 0) { + st_timeout = pcmk__auto_watchdog_timeout(); + crm_debug("Using calculated value %ld for stonith-watchdog-timeout (%s)", +-- +2.31.1 + +From 28d96fc802bca24ed3e52b8ce5946f4b2b971b7d Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 6 Jan 2024 17:52:18 -0800 +Subject: [PATCH 05/24] Refactor: libcrmcommon: New enum pcmk__opt_context + +At first this will be used only for backward compatibility with metadata +commands for pacemaker-based, pacemaker-controld, and +pacemaker-schedulerd. It may be extended later for meta-attribute +contexts or similar. + +The idea is that we'll consolidate all cluster options into a single +table and use these enum values as filters for getting daemon metadata. + +We won't need a pcmk__opt_context_fenced, because its metadata consists +of stonith instance attributes, not cluster options. Those instance +attributes will be stored in a separate array from the cluster options. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + include/crm/common/options_internal.h | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index 1ea27ee..0c6c9e8 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -34,6 +34,19 @@ bool pcmk__env_option_enabled(const char *daemon, const char *option); + * Cluster option handling + */ + ++/*! ++ * \internal ++ * \enum pcmk__opt_context ++ * \brief Context flags for options ++ */ ++enum pcmk__opt_context { ++ // @COMPAT Used only for daemon metadata ++ pcmk__opt_context_none = 0, //!< No additional context ++ pcmk__opt_context_based = (1 << 1), //!< CIB manager metadata ++ pcmk__opt_context_controld = (1 << 2), //!< Controller metadata ++ pcmk__opt_context_schedulerd = (1 << 3), //!< Scheduler metadata ++}; ++ + typedef struct pcmk__cluster_option_s { + const char *name; + const char *alt_name; +-- +2.31.1 + +From 2f8537331e2948b9186555ffbd9c9f2c121587d1 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 6 Jan 2024 17:58:25 -0800 +Subject: [PATCH 06/24] Refactor: libcrmcommon: New + pcmk__cluster_option_t:context member + +Arguably makes more sense adjacent to the type member, but this +placement keeps the diffs smaller when updating existing options arrays. + +We will use this soon to ensure that each option occurs in exactly one +daemon's metadata. Several options (for example, +PCMK_OPT_NO_QUORUM_POLICY) currently appear in the metadata of both the +controller and the scheduler, causing issues for external tools that +parse the output. + +Where an option currently appears in the metadata of both the controller +and the scheduler, it will soon appear only in the scheduler's metadata. +We assign context flags accordingly. + +Note that the fencer doesn't have a context flag. The options in its +metadata are actually stonith device instance attributes, not cluster +options. They will continue to reside in a separate table from the +cluster options, so there's no ambiguity about which daemon they "belong +to." + +Ref T746 + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 20 +++++++++++++++++ + daemons/fenced/pacemaker-fenced.c | 26 ++++++++++++++++++++++ + include/crm/common/options_internal.h | 3 +++ + lib/cib/cib_utils.c | 3 +++ + lib/pengine/common.c | 32 +++++++++++++++++++++++++++ + 5 files changed, 84 insertions(+) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 83b802e..4d7cb14 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -518,22 +518,26 @@ do_recover(long long action, + static pcmk__cluster_option_t controller_options[] = { + /* name, old name, type, allowed values, + * default value, validator, ++ * context, + * short description, + * long description + */ + { + "dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL, ++ pcmk__opt_context_controld, + N_("Pacemaker version on cluster node elected Designated Controller (DC)"), + N_("Includes a hash which identifies the exact changeset the code was " + "built from. Used for diagnostic purposes.") + }, + { + "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL, ++ pcmk__opt_context_controld, + N_("The messaging stack on which Pacemaker is currently running"), + N_("Used for informational and diagnostic purposes.") + }, + { + "cluster-name", NULL, "string", NULL, NULL, NULL, ++ pcmk__opt_context_controld, + N_("An arbitrary name for the cluster"), + N_("This optional value is mostly for users' convenience as desired " + "in administration, but may also be used in Pacemaker " +@@ -543,6 +547,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", + NULL, "20s", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + N_("How long to wait for a response from other nodes during start-up"), + N_("The optimal value will depend on the speed and load of your network " + "and the type of switches used.") +@@ -552,6 +557,7 @@ static pcmk__cluster_option_t controller_options[] = { + N_("Zero disables polling, while positive values are an interval in seconds" + "(unless other units are specified, for example \"5min\")"), + "15min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + N_("Polling interval to recheck cluster state and evaluate rules " + "with date specifications"), + N_("Pacemaker is primarily event-driven, and looks ahead to know when to " +@@ -563,6 +569,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + "load-threshold", NULL, "percentage", NULL, + "80%", pcmk__valid_percentage, ++ pcmk__opt_context_controld, + N_("Maximum amount of system load that should be used by cluster nodes"), + N_("The cluster will slow down its recovery process when the amount of " + "system resources used (currently CPU) approaches this limit"), +@@ -570,10 +577,12 @@ static pcmk__cluster_option_t controller_options[] = { + { + "node-action-limit", NULL, "integer", NULL, + "0", pcmk__valid_number, ++ pcmk__opt_context_controld, + N_("Maximum number of jobs that can be scheduled per node " + "(defaults to 2x cores)") + }, + { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL, ++ pcmk__opt_context_controld, + N_("How a cluster node should react if notified of its own fencing"), + N_("A cluster node may receive notification of its own fencing if fencing " + "is misconfigured, or if fabric fencing is in use that doesn't cut " +@@ -584,6 +593,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, + "2min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + "*** Advanced Use Only ***", + N_("Declare an election failed if it is not decided within this much " + "time. If you need to adjust this value, it probably indicates " +@@ -592,6 +602,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, + "20min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + "*** Advanced Use Only ***", + N_("Exit immediately if shutdown does not complete within this much " + "time. If you need to adjust this value, it probably indicates " +@@ -600,6 +611,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + "join-integration-timeout", "crmd-integration-timeout", "time", NULL, + "3min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + "*** Advanced Use Only ***", + N_("If you need to adjust this value, it probably indicates " + "the presence of a bug.") +@@ -607,6 +619,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL, + "30min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + "*** Advanced Use Only ***", + N_("If you need to adjust this value, it probably indicates " + "the presence of a bug.") +@@ -614,6 +627,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + "transition-delay", "crmd-transition-delay", "time", NULL, + "0s", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + N_("*** Advanced Use Only *** Enabling this option will slow down " + "cluster recovery under all conditions"), + N_("Delay cluster recovery for this much time to allow for additional " +@@ -633,6 +647,7 @@ static pcmk__cluster_option_t controller_options[] = { + */ + "stonith-watchdog-timeout", NULL, "time", NULL, + "0", NULL, ++ pcmk__opt_context_controld, + N_("How long before nodes can be assumed to be safely down when " + "watchdog-based self-fencing via SBD is in use"), + N_("If this is set to a positive value, lost nodes are assumed to " +@@ -654,6 +669,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + "stonith-max-attempts", NULL, "integer", NULL, + "10", pcmk__valid_positive_number, ++ pcmk__opt_context_controld, + N_("How many times fencing can fail before it will no longer be " + "immediately re-attempted on a target") + }, +@@ -662,11 +678,13 @@ static pcmk__cluster_option_t controller_options[] = { + { + "no-quorum-policy", NULL, "select", + "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum, ++ pcmk__opt_context_controld, + N_("What to do when the cluster does not have quorum"), NULL + }, + { + XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_controld, + N_("Whether to lock resources to a cleanly shut down node"), + N_("When true, resources active on a node when it is cleanly shut down " + "are kept \"locked\" to that node (not allowed to run elsewhere) " +@@ -680,6 +698,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, + "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + N_("Do not lock resources to a cleanly shut down node longer than " + "this"), + N_("If shutdown-lock is true and this is set to a nonzero time " +@@ -690,6 +709,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL, + "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + N_("How long to wait for a node that has joined the cluster to join " + "the controller process group"), + N_("Fence nodes that do not join the controller process group within " +diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c +index 7c69fb8..b2f4742 100644 +--- a/daemons/fenced/pacemaker-fenced.c ++++ b/daemons/fenced/pacemaker-fenced.c +@@ -528,11 +528,13 @@ st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void + static pcmk__cluster_option_t fencer_options[] = { + /* name, old name, type, allowed values, + * default value, validator, ++ * context, + * short description, + * long description + */ + { + PCMK_STONITH_HOST_ARGUMENT, NULL, "string", NULL, "port", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate parameter to supply instead of 'port'"), + N_("some devices do not support the " + "standard 'port' parameter or may provide additional ones. Use " +@@ -543,17 +545,20 @@ static pcmk__cluster_option_t fencer_options[] = { + }, + { + PCMK_STONITH_HOST_MAP,NULL, "string", NULL, "", NULL, ++ pcmk__opt_context_none, + N_("A mapping of host names to ports numbers for devices that do not support host names."), + N_("Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2") + }, + { + PCMK_STONITH_HOST_LIST,NULL, "string", NULL, "", NULL, ++ pcmk__opt_context_none, + N_("Eg. node1,node2,node3"), + N_("A list of machines controlled by " + "this device (Optional unless pcmk_host_list=static-list)") + }, + { + PCMK_STONITH_HOST_CHECK,NULL, "string", NULL, "dynamic-list", NULL, ++ pcmk__opt_context_none, + N_("How to determine which machines are controlled by the device."), + N_("Allowed values: dynamic-list " + "(query the device via the 'list' command), static-list " +@@ -564,6 +569,7 @@ static pcmk__cluster_option_t fencer_options[] = { + }, + { + PCMK_STONITH_DELAY_MAX,NULL, "time", NULL, "0s", NULL, ++ pcmk__opt_context_none, + N_("Enable a base delay for fencing actions and specify base delay value."), + N_("Enable a delay of no more than the " + "time specified before executing fencing actions. Pacemaker " +@@ -573,6 +579,7 @@ static pcmk__cluster_option_t fencer_options[] = { + }, + { + PCMK_STONITH_DELAY_BASE,NULL, "string", NULL, "0s", NULL, ++ pcmk__opt_context_none, + N_("Enable a base delay for " + "fencing actions and specify base delay value."), + N_("This enables a static delay for " +@@ -587,6 +594,7 @@ static pcmk__cluster_option_t fencer_options[] = { + }, + { + PCMK_STONITH_ACTION_LIMIT,NULL, "integer", NULL, "1", NULL, ++ pcmk__opt_context_none, + N_("The maximum number of actions can be performed in parallel on this device"), + N_("Cluster property concurrent-fencing=true needs to be configured first." + "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.") +@@ -594,18 +602,21 @@ static pcmk__cluster_option_t fencer_options[] = { + { + "pcmk_reboot_action", NULL, "string", NULL, + PCMK_ACTION_REBOOT, NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate command to run instead of 'reboot'"), + N_("Some devices do not support the standard commands or may provide additional ones.\n" + "Use this to specify an alternate, device-specific, command that implements the \'reboot\' action.") + }, + { + "pcmk_reboot_timeout",NULL, "time", NULL, "60s", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: Specify an alternate timeout to use for reboot actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'reboot\' actions.") + }, + { + "pcmk_reboot_retries",NULL, "integer", NULL, "2", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: The maximum number of times to retry the 'reboot' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." +@@ -614,18 +625,21 @@ static pcmk__cluster_option_t fencer_options[] = { + { + "pcmk_off_action", NULL, "string", NULL, + PCMK_ACTION_OFF, NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate command to run instead of \'off\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'off\' action.") + }, + { + "pcmk_off_timeout",NULL, "time", NULL, "60s", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: Specify an alternate timeout to use for off actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'off\' actions.") + }, + { + "pcmk_off_retries",NULL, "integer", NULL, "2", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: The maximum number of times to retry the 'off' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." +@@ -634,18 +648,21 @@ static pcmk__cluster_option_t fencer_options[] = { + { + "pcmk_on_action", NULL, "string", NULL, + PCMK_ACTION_ON, NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate command to run instead of 'on'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'on\' action.") + }, + { + "pcmk_on_timeout",NULL, "time", NULL, "60s", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: Specify an alternate timeout to use for on actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'on\' actions.") + }, + { + "pcmk_on_retries",NULL, "integer", NULL, "2", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: The maximum number of times to retry the 'on' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." +@@ -654,18 +671,21 @@ static pcmk__cluster_option_t fencer_options[] = { + { + "pcmk_list_action",NULL, "string", NULL, + PCMK_ACTION_LIST, NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate command to run instead of \'list\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'list\' action.") + }, + { + "pcmk_list_timeout",NULL, "time", NULL, "60s", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: Specify an alternate timeout to use for list actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'list\' actions.") + }, + { + "pcmk_list_retries",NULL, "integer", NULL, "2", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: The maximum number of times to retry the \'list\' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." +@@ -674,18 +694,21 @@ static pcmk__cluster_option_t fencer_options[] = { + { + "pcmk_monitor_action", NULL, "string", NULL, + PCMK_ACTION_MONITOR, NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate command to run instead of \'monitor\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'monitor\' action.") + }, + { + "pcmk_monitor_timeout",NULL, "time", NULL, "60s", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: Specify an alternate timeout to use for monitor actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal.\n" + "Use this to specify an alternate, device-specific, timeout for \'monitor\' actions.") + }, + { + "pcmk_monitor_retries",NULL, "integer", NULL, "2", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: The maximum number of times to retry the \'monitor\' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." +@@ -694,18 +717,21 @@ static pcmk__cluster_option_t fencer_options[] = { + { + "pcmk_status_action", NULL, "string", NULL, + PCMK_ACTION_STATUS, NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate command to run instead of \'status\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'status\' action.") + }, + { + "pcmk_status_timeout",NULL, "time", NULL, "60s", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: Specify an alternate timeout to use for status actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'status\' actions.") + }, + { + "pcmk_status_retries",NULL, "integer", NULL, "2", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: The maximum number of times to retry the \'status\' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index 0c6c9e8..b2525ef 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -56,6 +56,9 @@ typedef struct pcmk__cluster_option_s { + + bool (*is_valid)(const char *); + ++ // @COMPAT context is used only for daemon meta-data ++ enum pcmk__opt_context context; ++ + const char *description_short; + const char *description_long; + +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 9c3f9f1..9e4060b 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -869,18 +869,21 @@ cib_native_notify(gpointer data, gpointer user_data) + static pcmk__cluster_option_t cib_opts[] = { + /* name, legacy name, type, allowed values, + * default value, validator, ++ * context, + * short description, + * long description + */ + { + "enable-acl", NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_based, + N_("Enable Access Control Lists (ACLs) for the CIB"), + NULL + }, + { + "cluster-ipc-limit", NULL, "integer", NULL, + "500", pcmk__valid_positive_number, ++ pcmk__opt_context_based, + N_("Maximum IPC message backlog before disconnecting a cluster daemon"), + N_("Raise this if log has \"Evicting client\" messages for cluster daemon" + " PIDs (a good value is the number of resources in the cluster" +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 6878f4d..383c4af 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -24,24 +24,28 @@ gboolean was_processing_warning = FALSE; + static pcmk__cluster_option_t pe_opts[] = { + /* name, old name, type, allowed values, + * default value, validator, ++ * context, + * short description, + * long description + */ + { + "no-quorum-policy", NULL, "select", "stop, freeze, ignore, demote, suicide", + "stop", pcmk__valid_quorum, ++ pcmk__opt_context_schedulerd, + N_("What to do when the cluster does not have quorum"), + NULL + }, + { + "symmetric-cluster", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether resources can run on any node by default"), + NULL + }, + { + "maintenance-mode", NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether the cluster should refrain from monitoring, starting, " + "and stopping resources"), + NULL +@@ -49,6 +53,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "start-failure-is-fatal", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether a start failure should prevent a resource from being " + "recovered on the same node"), + N_("When true, the cluster will immediately ban a resource from a node " +@@ -58,12 +63,14 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "enable-startup-probes", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether the cluster should check for active resources during start-up"), + NULL + }, + { + XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether to lock resources to a cleanly shut down node"), + N_("When true, resources active on a node when it is cleanly shut down " + "are kept \"locked\" to that node (not allowed to run elsewhere) " +@@ -77,6 +84,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, + "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, + N_("Do not lock resources to a cleanly shut down node longer than " + "this"), + N_("If shutdown-lock is true and this is set to a nonzero time " +@@ -89,6 +97,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "stonith-enabled", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("*** Advanced Use Only *** " + "Whether nodes may be fenced as part of recovery"), + N_("If false, unresponsive nodes are immediately assumed to be harmless, " +@@ -99,6 +108,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "stonith-action", NULL, "select", "reboot, off, poweroff", + PCMK_ACTION_REBOOT, pcmk__is_fencing_action, ++ pcmk__opt_context_schedulerd, + N_("Action to send to fence device when a node needs to be fenced " + "(\"poweroff\" is a deprecated alias for \"off\")"), + NULL +@@ -106,6 +116,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "stonith-timeout", NULL, "time", NULL, + "60s", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, + N_("*** Advanced Use Only *** Unused by Pacemaker"), + N_("This value is not used by Pacemaker, but is kept for backward " + "compatibility, and certain legacy fence agents might use it.") +@@ -113,6 +124,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether watchdog integration is enabled"), + N_("This is set automatically by the cluster according to whether SBD " + "is detected to be in use. User-configured values are ignored. " +@@ -124,12 +136,14 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "concurrent-fencing", NULL, "boolean", NULL, + PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Allow performing fencing operations in parallel"), + NULL + }, + { + "startup-fencing", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("*** Advanced Use Only *** Whether to fence unseen nodes at start-up"), + N_("Setting this to false may lead to a \"split-brain\" situation," + "potentially leading to data loss and/or service unavailability.") +@@ -137,6 +151,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL, + "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, + N_("Apply fencing delay targeting the lost nodes with the highest total resource priority"), + N_("Apply specified delay for the fencings that are targeting the lost " + "nodes with the highest total resource priority in case we don't " +@@ -154,6 +169,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL, + "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, + N_("How long to wait for a node that has joined the cluster to join " + "the controller process group"), + N_("Fence nodes that do not join the controller process group within " +@@ -165,6 +181,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "cluster-delay", NULL, "time", NULL, + "60s", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, + N_("Maximum time for node-to-node communication"), + N_("The node elected Designated Controller (DC) will consider an action " + "failed if it does not get a response from the node executing the " +@@ -175,6 +192,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "batch-limit", NULL, "integer", NULL, + "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("Maximum number of jobs that the cluster may execute in parallel " + "across all nodes"), + N_("The \"correct\" value will depend on the speed and load of your " +@@ -185,6 +203,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "migration-limit", NULL, "integer", NULL, + "-1", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The number of live migration actions that the cluster is allowed " + "to execute in parallel on a node (-1 means no limit)") + }, +@@ -193,24 +212,28 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "stop-all-resources", NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether the cluster should stop all active resources"), + NULL + }, + { + "stop-orphan-resources", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether to stop resources that were removed from the configuration"), + NULL + }, + { + "stop-orphan-actions", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether to cancel recurring actions removed from the configuration"), + NULL + }, + { + "remove-after-stop", NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("*** Deprecated *** Whether to remove stopped resources from " + "the executor"), + N_("Values other than default are poorly tested and potentially dangerous." +@@ -221,18 +244,21 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "pe-error-series-max", NULL, "integer", NULL, + "-1", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The number of scheduler inputs resulting in errors to save"), + N_("Zero to disable, -1 to store unlimited.") + }, + { + "pe-warn-series-max", NULL, "integer", NULL, + "5000", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The number of scheduler inputs resulting in warnings to save"), + N_("Zero to disable, -1 to store unlimited.") + }, + { + "pe-input-series-max", NULL, "integer", NULL, + "4000", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The number of scheduler inputs without errors or warnings to save"), + N_("Zero to disable, -1 to store unlimited.") + }, +@@ -244,6 +270,7 @@ static pcmk__cluster_option_t pe_opts[] = { + PCMK__VALUE_ONLY_GREEN ", " PCMK__VALUE_PROGRESSIVE ", " + PCMK__VALUE_CUSTOM, + PCMK__VALUE_NONE, pcmk__validate_health_strategy, ++ pcmk__opt_context_schedulerd, + N_("How cluster should react to node health attributes"), + N_("Requires external entities to create node attributes (named with " + "the prefix \"#health\") with values \"red\", " +@@ -252,24 +279,28 @@ static pcmk__cluster_option_t pe_opts[] = { + { + PCMK__OPT_NODE_HEALTH_BASE, NULL, "integer", NULL, + "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("Base health score assigned to a node"), + N_("Only used when \"node-health-strategy\" is set to \"progressive\".") + }, + { + PCMK__OPT_NODE_HEALTH_GREEN, NULL, "integer", NULL, + "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The score to use for a node health attribute whose value is \"green\""), + N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") + }, + { + PCMK__OPT_NODE_HEALTH_YELLOW, NULL, "integer", NULL, + "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The score to use for a node health attribute whose value is \"yellow\""), + N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") + }, + { + PCMK__OPT_NODE_HEALTH_RED, NULL, "integer", NULL, + "-INFINITY", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The score to use for a node health attribute whose value is \"red\""), + N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") + }, +@@ -279,6 +310,7 @@ static pcmk__cluster_option_t pe_opts[] = { + "placement-strategy", NULL, "select", + "default, utilization, minimal, balanced", + "default", pcmk__valid_placement_strategy, ++ pcmk__opt_context_schedulerd, + N_("How the cluster should allocate resources to nodes"), + NULL + }, +-- +2.31.1 + +From cc7c3c87d333854d0f28abe461dd58d5c94b0888 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 6 Jan 2024 18:47:25 -0800 +Subject: [PATCH 07/24] Refactor: libcrmcommon: Consolidate cluster option + metadata + +This isn't plugged in yet. It's also currently defined out, to avoid an +unused variable warning from the compiler. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + lib/common/options.c | 499 +++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 499 insertions(+) + +diff --git a/lib/common/options.c b/lib/common/options.c +index 1db41a2..ff73dcc 100644 +--- a/lib/common/options.c ++++ b/lib/common/options.c +@@ -20,6 +20,7 @@ + #include + + #include ++#include + + void + pcmk__cli_help(char cmd) +@@ -38,6 +39,504 @@ pcmk__cli_help(char cmd) + } + + ++/* ++ * Option metadata ++ */ ++ ++#if 0 ++static pcmk__cluster_option_t cluster_options[] = { ++ /* name, old name, type, allowed values, ++ * default value, validator, ++ * context, ++ * short description, ++ * long description ++ */ ++ { ++ "dc-version", NULL, "string", NULL, ++ PCMK__VALUE_NONE, NULL, ++ pcmk__opt_context_controld, ++ N_("Pacemaker version on cluster node elected Designated Controller " ++ "(DC)"), ++ N_("Includes a hash which identifies the exact changeset the code was " ++ "built from. Used for diagnostic purposes."), ++ }, ++ { ++ "cluster-infrastructure", NULL, "string", NULL, ++ "corosync", NULL, ++ pcmk__opt_context_controld, ++ N_("The messaging stack on which Pacemaker is currently running"), ++ N_("Used for informational and diagnostic purposes."), ++ }, ++ { ++ "cluster-name", NULL, "string", NULL, ++ NULL, NULL, ++ pcmk__opt_context_controld, ++ N_("An arbitrary name for the cluster"), ++ N_("This optional value is mostly for users' convenience as desired " ++ "in administration, but may also be used in Pacemaker " ++ "configuration rules via the #cluster-name node attribute, and " ++ "by higher-level tools and resource agents."), ++ }, ++ { ++ "dc-deadtime", NULL, "time", NULL, ++ "20s", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("How long to wait for a response from other nodes during start-up"), ++ N_("The optimal value will depend on the speed and load of your " ++ "network and the type of switches used."), ++ }, ++ { ++ "cluster-recheck-interval", NULL, "time", ++ N_("Zero disables polling, while positive values are an interval in " ++ "seconds (unless other units are specified, for example \"5min\")"), ++ "15min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("Polling interval to recheck cluster state and evaluate rules " ++ "with date specifications"), ++ N_("Pacemaker is primarily event-driven, and looks ahead to know when " ++ "to recheck cluster state for failure timeouts and most time-based " ++ "rules. However, it will also recheck the cluster after this " ++ "amount of inactivity, to evaluate rules with date specifications " ++ "and serve as a fail-safe for certain types of scheduler bugs."), ++ }, ++ { ++ "fence-reaction", NULL, "select", "stop, panic", ++ "stop", NULL, ++ pcmk__opt_context_controld, ++ N_("How a cluster node should react if notified of its own fencing"), ++ N_("A cluster node may receive notification of its own fencing if " ++ "fencing is misconfigured, or if fabric fencing is in use that " ++ "doesn't cut cluster communication. Use \"stop\" to attempt to " ++ "immediately stop Pacemaker and stay stopped, or \"panic\" to " ++ "attempt to immediately reboot the local node, falling back to " ++ "stop on failure."), ++ }, ++ { ++ "election-timeout", NULL, "time", NULL, ++ "2min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("*** Advanced Use Only ***"), ++ N_("Declare an election failed if it is not decided within this much " ++ "time. If you need to adjust this value, it probably indicates " ++ "the presence of a bug."), ++ }, ++ { ++ "shutdown-escalation", NULL, "time", NULL, ++ "20min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("*** Advanced Use Only ***"), ++ N_("Exit immediately if shutdown does not complete within this much " ++ "time. If you need to adjust this value, it probably indicates " ++ "the presence of a bug."), ++ }, ++ { ++ "join-integration-timeout", "crmd-integration-timeout", "time", ++ NULL, ++ "3min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("*** Advanced Use Only ***"), ++ N_("If you need to adjust this value, it probably indicates " ++ "the presence of a bug."), ++ }, ++ { ++ "join-finalization-timeout", "crmd-finalization-timeout", ++ "time", NULL, ++ "30min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("*** Advanced Use Only ***"), ++ N_("If you need to adjust this value, it probably indicates " ++ "the presence of a bug."), ++ }, ++ { ++ "transition-delay", "crmd-transition-delay", "time", NULL, ++ "0s", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("*** Advanced Use Only *** " ++ "Enabling this option will slow down cluster recovery under all " ++ "conditions"), ++ N_("Delay cluster recovery for this much time to allow for additional " ++ "events to occur. Useful if your configuration is sensitive to " ++ "the order in which ping updates arrive."), ++ }, ++ { ++ "no-quorum-policy", NULL, "select", ++ "stop, freeze, ignore, demote, suicide", ++ "stop", pcmk__valid_quorum, ++ pcmk__opt_context_schedulerd, ++ N_("What to do when the cluster does not have quorum"), ++ NULL, ++ }, ++ { ++ "shutdown-lock", NULL, "boolean", NULL, ++ XML_BOOLEAN_FALSE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether to lock resources to a cleanly shut down node"), ++ N_("When true, resources active on a node when it is cleanly shut down " ++ "are kept \"locked\" to that node (not allowed to run elsewhere) " ++ "until they start again on that node after it rejoins (or for at " ++ "most shutdown-lock-limit, if set). Stonith resources and " ++ "Pacemaker Remote connections are never locked. Clone and bundle " ++ "instances and the promoted role of promotable clones are " ++ "currently never locked, though support could be added in a future " ++ "release."), ++ }, ++ { ++ "shutdown-lock-limit", NULL, "time", NULL, ++ "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, ++ N_("Do not lock resources to a cleanly shut down node longer than " ++ "this"), ++ N_("If shutdown-lock is true and this is set to a nonzero time " ++ "duration, shutdown locks will expire after this much time has " ++ "passed since the shutdown was initiated, even if the node has not " ++ "rejoined."), ++ }, ++ { ++ "enable-acl", NULL, "boolean", NULL, ++ XML_BOOLEAN_FALSE, pcmk__valid_boolean, ++ pcmk__opt_context_based, ++ N_("Enable Access Control Lists (ACLs) for the CIB"), ++ NULL, ++ }, ++ { ++ "symmetric-cluster", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether resources can run on any node by default"), ++ NULL, ++ }, ++ { ++ "maintenance-mode", NULL, "boolean", NULL, ++ XML_BOOLEAN_FALSE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether the cluster should refrain from monitoring, starting, and " ++ "stopping resources"), ++ NULL, ++ }, ++ { ++ "start-failure-is-fatal", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether a start failure should prevent a resource from being " ++ "recovered on the same node"), ++ N_("When true, the cluster will immediately ban a resource from a node " ++ "if it fails to start there. When false, the cluster will instead " ++ "check the resource's fail count against its migration-threshold.") ++ }, ++ { ++ "enable-startup-probes", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether the cluster should check for active resources during " ++ "start-up"), ++ NULL, ++ }, ++ ++ // Fencing-related options ++ { ++ "stonith-enabled", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("*** Advanced Use Only *** " ++ "Whether nodes may be fenced as part of recovery"), ++ N_("If false, unresponsive nodes are immediately assumed to be " ++ "harmless, and resources that were active on them may be recovered " ++ "elsewhere. This can result in a \"split-brain\" situation, " ++ "potentially leading to data loss and/or service unavailability."), ++ }, ++ { ++ "stonith-action", NULL, "select", "reboot, off, poweroff", ++ PCMK_ACTION_REBOOT, pcmk__is_fencing_action, ++ pcmk__opt_context_schedulerd, ++ N_("Action to send to fence device when a node needs to be fenced " ++ "(\"poweroff\" is a deprecated alias for \"off\")"), ++ NULL, ++ }, ++ { ++ "stonith-timeout", NULL, "time", NULL, ++ "60s", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, ++ N_("How long to wait for on, off, and reboot fence actions to complete " ++ "by default"), ++ NULL, ++ }, ++ { ++ "have-watchdog", NULL, "boolean", NULL, ++ XML_BOOLEAN_FALSE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether watchdog integration is enabled"), ++ N_("This is set automatically by the cluster according to whether SBD " ++ "is detected to be in use. User-configured values are ignored. " ++ "The value `true` is meaningful if diskless SBD is used and " ++ "`stonith-watchdog-timeout` is nonzero. In that case, if fencing " ++ "is required, watchdog-based self-fencing will be performed via " ++ "SBD without requiring a fencing resource explicitly configured."), ++ }, ++ { ++ /* @COMPAT Currently, unparsable values default to -1 (auto-calculate), ++ * while missing values default to 0 (disable). All values are accepted ++ * (unless the controller finds that the value conflicts with the ++ * SBD_WATCHDOG_TIMEOUT). ++ * ++ * At a compatibility break: properly validate as a timeout, let ++ * either negative values or a particular string like "auto" mean auto- ++ * calculate, and use 0 as the single default for when the option either ++ * is unset or fails to validate. ++ */ ++ "stonith-watchdog-timeout", NULL, "time", NULL, ++ "0", NULL, ++ pcmk__opt_context_controld, ++ N_("How long before nodes can be assumed to be safely down when " ++ "watchdog-based self-fencing via SBD is in use"), ++ N_("If this is set to a positive value, lost nodes are assumed to " ++ "self-fence using watchdog-based SBD within this much time. This " ++ "does not require a fencing resource to be explicitly configured, " ++ "though a fence_watchdog resource can be configured, to limit use " ++ "to specific nodes. If this is set to 0 (the default), the cluster " ++ "will never assume watchdog-based self-fencing. If this is set to a " ++ "negative value, the cluster will use twice the local value of the " ++ "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, " ++ "or otherwise treat this as 0. WARNING: When used, this timeout " ++ "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use " ++ "watchdog-based SBD, and Pacemaker will refuse to start on any of " ++ "those nodes where this is not true for the local value or SBD is " ++ "not active. When this is set to a negative value, " ++ "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes " ++ "that use SBD, otherwise data corruption or loss could occur."), ++ }, ++ { ++ "stonith-max-attempts", NULL, "integer", NULL, ++ "10", pcmk__valid_positive_number, ++ pcmk__opt_context_controld, ++ N_("How many times fencing can fail before it will no longer be " ++ "immediately re-attempted on a target"), ++ NULL, ++ }, ++ { ++ "concurrent-fencing", NULL, "boolean", NULL, ++ PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Allow performing fencing operations in parallel"), ++ NULL, ++ }, ++ { ++ "startup-fencing", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("*** Advanced Use Only *** " ++ "Whether to fence unseen nodes at start-up"), ++ N_("Setting this to false may lead to a \"split-brain\" situation, " ++ "potentially leading to data loss and/or service unavailability."), ++ }, ++ { ++ "priority-fencing-delay", NULL, "time", NULL, ++ "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, ++ N_("Apply fencing delay targeting the lost nodes with the highest " ++ "total resource priority"), ++ N_("Apply specified delay for the fencings that are targeting the lost " ++ "nodes with the highest total resource priority in case we don't " ++ "have the majority of the nodes in our cluster partition, so that " ++ "the more significant nodes potentially win any fencing match, " ++ "which is especially meaningful under split-brain of 2-node " ++ "cluster. A promoted resource instance takes the base priority + 1 " ++ "on calculation if the base priority is not 0. Any static/random " ++ "delays that are introduced by `pcmk_delay_base/max` configured " ++ "for the corresponding fencing resources will be added to this " ++ "delay. This delay should be significantly greater than, safely " ++ "twice, the maximum `pcmk_delay_base/max`. By default, priority " ++ "fencing delay is disabled."), ++ }, ++ { ++ "node-pending-timeout", NULL, "time", NULL, ++ "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, ++ N_("How long to wait for a node that has joined the cluster to join " ++ "the controller process group"), ++ N_("Fence nodes that do not join the controller process group within " ++ "this much time after joining the cluster, to allow the cluster " ++ "to continue managing resources. A value of 0 means never fence " ++ "pending nodes. Setting the value to 2h means fence nodes after " ++ "2 hours."), ++ }, ++ { ++ "cluster-delay", NULL, "time", NULL, ++ "60s", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, ++ N_("Maximum time for node-to-node communication"), ++ N_("The node elected Designated Controller (DC) will consider an action " ++ "failed if it does not get a response from the node executing the " ++ "action within this time (after considering the action's own " ++ "timeout). The \"correct\" value will depend on the speed and " ++ "load of your network and cluster nodes.") ++ }, ++ ++ // Limits ++ { ++ "load-threshold", NULL, "percentage", NULL, ++ "80%", pcmk__valid_percentage, ++ pcmk__opt_context_controld, ++ N_("Maximum amount of system load that should be used by cluster " ++ "nodes"), ++ N_("The cluster will slow down its recovery process when the amount of " ++ "system resources used (currently CPU) approaches this limit"), ++ }, ++ { ++ "node-action-limit", NULL, "integer", NULL, ++ "0", pcmk__valid_number, ++ pcmk__opt_context_controld, ++ N_("Maximum number of jobs that can be scheduled per node (defaults to " ++ "2x cores)"), ++ NULL, ++ }, ++ { ++ "batch-limit", NULL, "integer", NULL, ++ "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("Maximum number of jobs that the cluster may execute in parallel " ++ "across all nodes"), ++ N_("The \"correct\" value will depend on the speed and load of your " ++ "network and cluster nodes. If set to 0, the cluster will " ++ "impose a dynamically calculated limit when any node has a " ++ "high load."), ++ }, ++ { ++ "migration-limit", NULL, "integer", NULL, ++ "-1", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The number of live migration actions that the cluster is allowed " ++ "to execute in parallel on a node (-1 means no limit)"), ++ NULL, ++ }, ++ { ++ "cluster-ipc-limit", NULL, "integer", NULL, ++ "500", pcmk__valid_positive_number, ++ pcmk__opt_context_based, ++ N_("Maximum IPC message backlog before disconnecting a cluster daemon"), ++ N_("Raise this if log has \"Evicting client\" messages for cluster " ++ "daemon PIDs (a good value is the number of resources in the " ++ "cluster multiplied by the number of nodes)."), ++ }, ++ ++ // Orphans and stopping ++ { ++ "stop-all-resources", NULL, "boolean", NULL, ++ XML_BOOLEAN_FALSE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether the cluster should stop all active resources"), ++ NULL, ++ }, ++ { ++ "stop-orphan-resources", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether to stop resources that were removed from the " ++ "configuration"), ++ NULL, ++ }, ++ { ++ "stop-orphan-actions", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether to cancel recurring actions removed from the " ++ "configuration"), ++ NULL, ++ }, ++ { ++ "remove-after-stop", NULL, "boolean", NULL, ++ XML_BOOLEAN_FALSE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("*** Deprecated *** " ++ "Whether to remove stopped resources from the executor"), ++ N_("Values other than default are poorly tested and potentially " ++ "dangerous. This option will be removed in a future release."), ++ }, ++ ++ // Storing inputs ++ { ++ "pe-error-series-max", NULL, "integer", NULL, ++ "-1", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The number of scheduler inputs resulting in errors to save"), ++ N_("Zero to disable, -1 to store unlimited."), ++ }, ++ { ++ "pe-warn-series-max", NULL, "integer", NULL, ++ "5000", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The number of scheduler inputs resulting in warnings to save"), ++ N_("Zero to disable, -1 to store unlimited."), ++ }, ++ { ++ "pe-input-series-max", NULL, "integer", NULL, ++ "4000", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The number of scheduler inputs without errors or warnings to save"), ++ N_("Zero to disable, -1 to store unlimited."), ++ }, ++ ++ // Node health ++ { ++ "node-health-strategy", NULL, "select", ++ PCMK__VALUE_NONE ", " PCMK__VALUE_MIGRATE_ON_RED ", " ++ PCMK__VALUE_ONLY_GREEN ", " PCMK__VALUE_PROGRESSIVE ", " ++ PCMK__VALUE_CUSTOM, ++ PCMK__VALUE_NONE, pcmk__validate_health_strategy, ++ pcmk__opt_context_schedulerd, ++ N_("How cluster should react to node health attributes"), ++ N_("Requires external entities to create node attributes (named with " ++ "the prefix \"#health\") with values \"red\", \"yellow\", or " ++ "\"green\".") ++ }, ++ { ++ "node-health-base", NULL, "integer", NULL, ++ "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("Base health score assigned to a node"), ++ N_("Only used when \"node-health-strategy\" is set to " ++ "\"progressive\"."), ++ }, ++ { ++ "node-health-green", NULL, "integer", NULL, ++ "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The score to use for a node health attribute whose value is " ++ "\"green\""), ++ N_("Only used when \"node-health-strategy\" is set to \"custom\" or " ++ "\"progressive\"."), ++ }, ++ { ++ "node-health-yellow", NULL, "integer", NULL, ++ "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The score to use for a node health attribute whose value is " ++ "\"yellow\""), ++ N_("Only used when \"node-health-strategy\" is set to \"custom\" or " ++ "\"progressive\"."), ++ }, ++ { ++ "node-health-red", NULL, "integer", NULL, ++ "-INFINITY", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The score to use for a node health attribute whose value is " ++ "\"red\""), ++ N_("Only used when \"node-health-strategy\" is set to \"custom\" or " ++ "\"progressive\".") ++ }, ++ ++ // Placement strategy ++ { ++ "placement-strategy", NULL, "select", ++ "default, utilization, minimal, balanced", ++ "default", pcmk__valid_placement_strategy, ++ pcmk__opt_context_schedulerd, ++ N_("How the cluster should allocate resources to nodes"), ++ NULL, ++ }, ++}; ++#endif // 0 ++ ++ + /* + * Environment variable option handling + */ +-- +2.31.1 + +From 96fa08b7adc911cce417f7f9889029510ec1c428 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 10 Jan 2024 23:35:22 -0800 +Subject: [PATCH 08/24] Refactor: libcrmcommon: New filter arg to + pcmk__format_option_metadata() + +Now each cluster option is in exactly one daemon's metadata. The four +options that were previously in the metadata of both the controller and +the scheduler are now only in the scheduler's metadata. + +All daemons still have access to all the options they use. + +All function calls in daemons still use the local options arrays rather +than the one in libcrmcommon. That will change in upcoming commits. + +Closes T746 + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 6 ++++-- + daemons/fenced/pacemaker-fenced.c | 17 +++++++++------- + include/crm/common/options_internal.h | 1 + + lib/cib/cib_utils.c | 7 ++++--- + lib/common/options.c | 28 +++++++++++++++++++++++++-- + lib/pengine/common.c | 7 ++++--- + 6 files changed, 49 insertions(+), 17 deletions(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 4d7cb14..8fe09da 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -723,11 +723,13 @@ static pcmk__cluster_option_t controller_options[] = { + void + crmd_metadata(void) + { ++ const char *name = "pacemaker-controld"; + const char *desc_short = "Pacemaker controller options"; + const char *desc_long = "Cluster options used by Pacemaker's controller"; + +- gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short, +- desc_long, controller_options, ++ gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_controld, ++ controller_options, + PCMK__NELEM(controller_options)); + printf("%s", s); + g_free(s); +diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c +index b2f4742..d0b6c31 100644 +--- a/daemons/fenced/pacemaker-fenced.c ++++ b/daemons/fenced/pacemaker-fenced.c +@@ -742,14 +742,17 @@ static pcmk__cluster_option_t fencer_options[] = { + void + fencer_metadata(void) + { ++ const char *name = "pacemaker-fenced"; + const char *desc_short = N_("Instance attributes available for all " +- "\"stonith\"-class resources"); +- const char *desc_long = N_("Instance attributes available for all \"stonith\"-" +- "class resources and used by Pacemaker's fence " +- "daemon, formerly known as stonithd"); +- +- gchar *s = pcmk__format_option_metadata("pacemaker-fenced", desc_short, +- desc_long, fencer_options, ++ "\"stonith\"-class resources"); ++ const char *desc_long = N_("Instance attributes available for all " ++ "\"stonith\"-class resources and used by " ++ "Pacemaker's fence daemon, formerly known as " ++ "stonithd"); ++ ++ gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_none, ++ fencer_options, + PCMK__NELEM(fencer_options)); + printf("%s", s); + g_free(s); +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index b2525ef..89d27d1 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -70,6 +70,7 @@ const char *pcmk__cluster_option(GHashTable *options, + + gchar *pcmk__format_option_metadata(const char *name, const char *desc_short, + const char *desc_long, ++ enum pcmk__opt_context filter, + pcmk__cluster_option_t *option_list, + int len); + +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 9e4060b..2205d15 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -894,13 +894,14 @@ static pcmk__cluster_option_t cib_opts[] = { + void + cib_metadata(void) + { ++ const char *name = "pacemaker-based"; + const char *desc_short = "Cluster Information Base manager options"; + const char *desc_long = "Cluster options used by Pacemaker's Cluster " + "Information Base manager"; + +- gchar *s = pcmk__format_option_metadata("pacemaker-based", desc_short, +- desc_long, cib_opts, +- PCMK__NELEM(cib_opts)); ++ gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_based, ++ cib_opts, PCMK__NELEM(cib_opts)); + printf("%s", s); + g_free(s); + } +diff --git a/lib/common/options.c b/lib/common/options.c +index ff73dcc..d5b6c17 100644 +--- a/lib/common/options.c ++++ b/lib/common/options.c +@@ -937,13 +937,32 @@ add_desc(GString *s, const char *tag, const char *desc, const char *values, + free(escaped_en); + } + ++/*! ++ * \internal ++ * \brief Format option metadata as an OCF-like XML string ++ * ++ * \param[in] name Daemon name ++ * \param[in] desc_short Short description of the daemon ++ * \param[in] desc_long Long description of the daemon ++ * \param[in] filter If not \c pcmk__opt_context_none, include only ++ * those options whose \c context field is equal to ++ * \p filter ++ * \param[in] option_list Options whose metadata to format ++ * \param[in] len Number of items in \p option_list ++ * ++ * \return A string containing OCF-like option metadata XML ++ * ++ * \note The caller is responsible for freeing the return value using ++ * \c g_free(). ++ */ + gchar * + pcmk__format_option_metadata(const char *name, const char *desc_short, + const char *desc_long, ++ enum pcmk__opt_context filter, + pcmk__cluster_option_t *option_list, int len) + { +- /* big enough to hold "pacemaker-schedulerd metadata" output */ +- GString *s = g_string_sized_new(13000); ++ // Large enough to hold current cluster options with room for growth (2^15) ++ GString *s = g_string_sized_new(32768); + + pcmk__g_strcat(s, + "\n" +@@ -964,6 +983,11 @@ pcmk__format_option_metadata(const char *name, const char *desc_short, + const char *opt_desc_short = option_list[lpc].description_short; + const char *opt_desc_long = option_list[lpc].description_long; + ++ if ((filter != pcmk__opt_context_none) ++ && (filter != option_list[lpc].context)) { ++ continue; ++ } ++ + // The standard requires long and short parameter descriptions + CRM_ASSERT((opt_desc_short != NULL) || (opt_desc_long != NULL)); + +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 383c4af..e9aa2e2 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -319,12 +319,13 @@ static pcmk__cluster_option_t pe_opts[] = { + void + pe_metadata(pcmk__output_t *out) + { ++ const char *name = "pacemaker-schedulerd"; + const char *desc_short = "Pacemaker scheduler options"; + const char *desc_long = "Cluster options used by Pacemaker's scheduler"; + +- gchar *s = pcmk__format_option_metadata("pacemaker-schedulerd", desc_short, +- desc_long, pe_opts, +- PCMK__NELEM(pe_opts)); ++ gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_schedulerd, ++ pe_opts, PCMK__NELEM(pe_opts)); + out->output_xml(out, "metadata", s); + g_free(s); + } +-- +2.31.1 + +From 96b59bf0c66fccc0656a9195ebe7580d54083eb3 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 10 Jan 2024 23:59:39 -0800 +Subject: [PATCH 09/24] Test: cts-cli: Update daemon outputs for option + filtering + +Now each cluster option is in exactly one daemon's metadata. The four +options that were previously in the metadata of both the controller and +the scheduler are now only in the scheduler's metadata. + +All daemons still have access to all the options they use. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + cts/cli/regression.daemons.exp | 26 -------------------------- + 1 file changed, 26 deletions(-) + +diff --git a/cts/cli/regression.daemons.exp b/cts/cli/regression.daemons.exp +index 6a24089..9b2dd96 100644 +--- a/cts/cli/regression.daemons.exp ++++ b/cts/cli/regression.daemons.exp +@@ -101,32 +101,6 @@ + How many times fencing can fail before it will no longer be immediately re-attempted on a target + + +- +- What to do when the cluster does not have quorum Allowed values: stop, freeze, ignore, demote, suicide +- What to do when the cluster does not have quorum +- +- +- +- +- When true, resources active on a node when it is cleanly shut down are kept "locked" to that node (not allowed to run elsewhere) until they start again on that node after it rejoins (or for at most shutdown-lock-limit, if set). Stonith resources and Pacemaker Remote connections are never locked. Clone and bundle instances and the promoted role of promotable clones are currently never locked, though support could be added in a future release. +- Whether to lock resources to a cleanly shut down node +- +- +- +- If shutdown-lock is true and this is set to a nonzero time duration, shutdown locks will expire after this much time has passed since the shutdown was initiated, even if the node has not rejoined. +- Do not lock resources to a cleanly shut down node longer than this +- +- +- +- Fence nodes that do not join the controller process group within this much time after joining the cluster, to allow the cluster to continue managing resources. A value of 0 means never fence pending nodes. Setting the value to 2h means fence nodes after 2 hours. +- How long to wait for a node that has joined the cluster to join the controller process group +- +- + + + =#=#=#= End test: Get controller metadata - OK (0) =#=#=#= +-- +2.31.1 + +From fec945824ed11395a8366882c29315c509de80f0 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 6 Jan 2024 19:07:00 -0800 +Subject: [PATCH 10/24] Refactor: libcrmcommon, daemons: New + pcmk__cluster_option_metadata() + +This new function is a wrapper for pcmk__format_option_metadata() that +always uses the shared cluster_options array and its length. + +Daemons can now call this function to get metadata instead of using +their local options arrays for that purpose. + +Soon we'll introduce a command that outputs all cluster option metadata +directly, instead of calling daemon metadata commands. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 7 +++---- + include/crm/common/options_internal.h | 4 ++++ + lib/cib/cib_utils.c | 6 +++--- + lib/common/options.c | 28 +++++++++++++++++++++++++-- + lib/pengine/common.c | 6 +++--- + 5 files changed, 39 insertions(+), 12 deletions(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 8fe09da..82aa143 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -727,10 +727,9 @@ crmd_metadata(void) + const char *desc_short = "Pacemaker controller options"; + const char *desc_long = "Cluster options used by Pacemaker's controller"; + +- gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, +- pcmk__opt_context_controld, +- controller_options, +- PCMK__NELEM(controller_options)); ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_controld); ++ + printf("%s", s); + g_free(s); + } +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index 89d27d1..a62015f 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -74,6 +74,10 @@ gchar *pcmk__format_option_metadata(const char *name, const char *desc_short, + pcmk__cluster_option_t *option_list, + int len); + ++gchar *pcmk__cluster_option_metadata(const char *name, const char *desc_short, ++ const char *desc_long, ++ enum pcmk__opt_context filter); ++ + void pcmk__validate_cluster_options(GHashTable *options, + pcmk__cluster_option_t *option_list, + int len); +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 2205d15..479a7fb 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -899,9 +899,9 @@ cib_metadata(void) + const char *desc_long = "Cluster options used by Pacemaker's Cluster " + "Information Base manager"; + +- gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, +- pcmk__opt_context_based, +- cib_opts, PCMK__NELEM(cib_opts)); ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_based); ++ + printf("%s", s); + g_free(s); + } +diff --git a/lib/common/options.c b/lib/common/options.c +index d5b6c17..df4a8b4 100644 +--- a/lib/common/options.c ++++ b/lib/common/options.c +@@ -43,7 +43,6 @@ pcmk__cli_help(char cmd) + * Option metadata + */ + +-#if 0 + static pcmk__cluster_option_t cluster_options[] = { + /* name, old name, type, allowed values, + * default value, validator, +@@ -534,7 +533,6 @@ static pcmk__cluster_option_t cluster_options[] = { + NULL, + }, + }; +-#endif // 0 + + + /* +@@ -1036,6 +1034,32 @@ pcmk__format_option_metadata(const char *name, const char *desc_short, + return g_string_free(s, FALSE); + } + ++/*! ++ * \internal ++ * \brief Format cluster option metadata as an OCF-like XML string ++ * ++ * \param[in] name Daemon name ++ * \param[in] desc_short Short description of the daemon ++ * \param[in] desc_long Long description of the daemon ++ * \param[in] filter If not \c pcmk__opt_context_none, include only ++ * those options whose \c context field is equal to ++ * \p filter ++ * ++ * \return A string containing OCF-like cluster option metadata XML ++ * ++ * \note The caller is responsible for freeing the return value using ++ * \c g_free(). ++ */ ++gchar * ++pcmk__cluster_option_metadata(const char *name, const char *desc_short, ++ const char *desc_long, ++ enum pcmk__opt_context filter) ++{ ++ return pcmk__format_option_metadata(name, desc_short, desc_long, filter, ++ cluster_options, ++ PCMK__NELEM(cluster_options)); ++} ++ + void + pcmk__validate_cluster_options(GHashTable *options, + pcmk__cluster_option_t *option_list, int len) +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index e9aa2e2..c9f1fc1 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -323,9 +323,9 @@ pe_metadata(pcmk__output_t *out) + const char *desc_short = "Pacemaker scheduler options"; + const char *desc_long = "Cluster options used by Pacemaker's scheduler"; + +- gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, +- pcmk__opt_context_schedulerd, +- pe_opts, PCMK__NELEM(pe_opts)); ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_schedulerd); ++ + out->output_xml(out, "metadata", s); + g_free(s); + } +-- +2.31.1 + +From 9a7d33003dffea465e7b452abd0388db4a7d73b0 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Thu, 11 Jan 2024 00:06:32 -0800 +Subject: [PATCH 11/24] Test: cts-cli: Update daemon outputs for reordering + +In the new libcrmcommon options array, some options have been reordered +to be near other similar options. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + cts/cli/regression.daemons.exp | 40 +++++++++++++++++----------------- + 1 file changed, 20 insertions(+), 20 deletions(-) + +diff --git a/cts/cli/regression.daemons.exp b/cts/cli/regression.daemons.exp +index 9b2dd96..43393df 100644 +--- a/cts/cli/regression.daemons.exp ++++ b/cts/cli/regression.daemons.exp +@@ -51,16 +51,6 @@ + Polling interval to recheck cluster state and evaluate rules with date specifications + + +- +- The cluster will slow down its recovery process when the amount of system resources used (currently CPU) approaches this limit +- Maximum amount of system load that should be used by cluster nodes +- +- +- +- Maximum number of jobs that can be scheduled per node (defaults to 2x cores) +- Maximum number of jobs that can be scheduled per node (defaults to 2x cores) +- +- + + A cluster node may receive notification of its own fencing if fencing is misconfigured, or if fabric fencing is in use that doesn't cut cluster communication. Allowed values are "stop" to attempt to immediately stop Pacemaker and stay stopped, or "panic" to attempt to immediately reboot the local node, falling back to stop on failure. + How a cluster node should react if notified of its own fencing +@@ -101,6 +91,16 @@ + How many times fencing can fail before it will no longer be immediately re-attempted on a target + + ++ ++ The cluster will slow down its recovery process when the amount of system resources used (currently CPU) approaches this limit ++ Maximum amount of system load that should be used by cluster nodes ++ ++ ++ ++ Maximum number of jobs that can be scheduled per node (defaults to 2x cores) ++ Maximum number of jobs that can be scheduled per node (defaults to 2x cores) ++ ++ + + + =#=#=#= End test: Get controller metadata - OK (0) =#=#=#= +@@ -259,6 +259,16 @@ + + ++ ++ When true, resources active on a node when it is cleanly shut down are kept "locked" to that node (not allowed to run elsewhere) until they start again on that node after it rejoins (or for at most shutdown-lock-limit, if set). Stonith resources and Pacemaker Remote connections are never locked. Clone and bundle instances and the promoted role of promotable clones are currently never locked, though support could be added in a future release. ++ Whether to lock resources to a cleanly shut down node ++ ++ ++ ++ If shutdown-lock is true and this is set to a nonzero time duration, shutdown locks will expire after this much time has passed since the shutdown was initiated, even if the node has not rejoined. ++ Do not lock resources to a cleanly shut down node longer than this ++ ++ + + Whether resources can run on any node by default + Whether resources can run on any node by default +@@ -279,16 +289,6 @@ + Whether the cluster should check for active resources during start-up + + +- +- When true, resources active on a node when it is cleanly shut down are kept "locked" to that node (not allowed to run elsewhere) until they start again on that node after it rejoins (or for at most shutdown-lock-limit, if set). Stonith resources and Pacemaker Remote connections are never locked. Clone and bundle instances and the promoted role of promotable clones are currently never locked, though support could be added in a future release. +- Whether to lock resources to a cleanly shut down node +- +- +- +- If shutdown-lock is true and this is set to a nonzero time duration, shutdown locks will expire after this much time has passed since the shutdown was initiated, even if the node has not rejoined. +- Do not lock resources to a cleanly shut down node longer than this +- +- + + If false, unresponsive nodes are immediately assumed to be harmless, and resources that were active on them may be recovered elsewhere. This can result in a "split-brain" situation, potentially leading to data loss and/or service unavailability. + *** Advanced Use Only *** Whether nodes may be fenced as part of recovery +-- +2.31.1 + +From c085ff844deddefe4f00355e2a273f27eb35ce00 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Thu, 11 Jan 2024 00:23:01 -0800 +Subject: [PATCH 12/24] Refactor: libcrmcommon, daemons: Use cluster_options + array in getters + +Drop the option_list and len arguments from pcmk__cluster_option() and +pcmk__validate_cluster_options(). Use cluster_options in libcrmcommon +instead. + +Now, all daemons fetch and validate the full set of cluster options, +even the ones they don't use. This is only slightly less efficient. It +ensures that there's no problem using the same option with multiple +daemons, and it makes it easy to use new options in a given daemon in +the future. + +Now that nothing is using the local, per-daemon options arrays anymore, +we can drop them in an upcoming commit. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 9 +++--- + include/crm/common/options_internal.h | 8 ++---- + lib/cib/cib_utils.c | 7 +++-- + lib/common/options.c | 40 ++++++++++++--------------- + lib/pengine/common.c | 6 ++-- + 5 files changed, 31 insertions(+), 39 deletions(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 82aa143..4208947 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -515,6 +515,7 @@ do_recover(long long action, + register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); + } + ++#if 0 + static pcmk__cluster_option_t controller_options[] = { + /* name, old name, type, allowed values, + * default value, validator, +@@ -719,6 +720,7 @@ static pcmk__cluster_option_t controller_options[] = { + "2 hours.") + }, + }; ++#endif // 0 + + void + crmd_metadata(void) +@@ -775,8 +777,7 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void + config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL); + + // Validate all options, and use defaults if not already present in hash +- pcmk__validate_cluster_options(config_hash, controller_options, +- PCMK__NELEM(controller_options)); ++ pcmk__validate_cluster_options(config_hash); + + /* Validate the watchdog timeout in the context of the local node + * environment. If invalid, the controller will exit with a fatal error. +@@ -900,9 +901,7 @@ crm_shutdown(int nsig) + * config_query_callback() has been run at least once, it doesn't look like + * anything could have changed the timer period since then. + */ +- value = pcmk__cluster_option(NULL, controller_options, +- PCMK__NELEM(controller_options), +- XML_CONFIG_ATTR_FORCE_QUIT); ++ value = pcmk__cluster_option(NULL, XML_CONFIG_ATTR_FORCE_QUIT); + default_period_ms = crm_parse_interval_spec(value); + controld_shutdown_start_countdown(default_period_ms); + } +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index a62015f..b727a58 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -64,9 +64,7 @@ typedef struct pcmk__cluster_option_s { + + } pcmk__cluster_option_t; + +-const char *pcmk__cluster_option(GHashTable *options, +- const pcmk__cluster_option_t *option_list, +- int len, const char *name); ++const char *pcmk__cluster_option(GHashTable *options, const char *name); + + gchar *pcmk__format_option_metadata(const char *name, const char *desc_short, + const char *desc_long, +@@ -78,9 +76,7 @@ gchar *pcmk__cluster_option_metadata(const char *name, const char *desc_short, + const char *desc_long, + enum pcmk__opt_context filter); + +-void pcmk__validate_cluster_options(GHashTable *options, +- pcmk__cluster_option_t *option_list, +- int len); ++void pcmk__validate_cluster_options(GHashTable *options); + + bool pcmk__valid_interval_spec(const char *value); + bool pcmk__valid_boolean(const char *value); +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 479a7fb..97f62ac 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -866,6 +866,7 @@ cib_native_notify(gpointer data, gpointer user_data) + crm_trace("Callback invoked..."); + } + ++#if 0 + static pcmk__cluster_option_t cib_opts[] = { + /* name, legacy name, type, allowed values, + * default value, validator, +@@ -890,6 +891,7 @@ static pcmk__cluster_option_t cib_opts[] = { + " multiplied by the number of nodes).") + }, + }; ++#endif // 0 + + void + cib_metadata(void) +@@ -909,14 +911,13 @@ cib_metadata(void) + static void + verify_cib_options(GHashTable *options) + { +- pcmk__validate_cluster_options(options, cib_opts, PCMK__NELEM(cib_opts)); ++ pcmk__validate_cluster_options(options); + } + + const char * + cib_pref(GHashTable * options, const char *name) + { +- return pcmk__cluster_option(options, cib_opts, PCMK__NELEM(cib_opts), +- name); ++ return pcmk__cluster_option(options, name); + } + + gboolean +diff --git a/lib/common/options.c b/lib/common/options.c +index df4a8b4..13d58e3 100644 +--- a/lib/common/options.c ++++ b/lib/common/options.c +@@ -852,27 +852,21 @@ cluster_option_value(GHashTable *options, bool (*validate)(const char *), + * \internal + * \brief Get the value of a cluster option + * +- * \param[in,out] options Name/value pairs for configured options +- * \param[in] option_list Possible cluster options +- * \param[in] len Length of \p option_list +- * \param[in] name (Primary) option name to look for ++ * \param[in,out] options Name/value pairs for configured options ++ * \param[in] name (Primary) option name to look for + * + * \return Option value + */ + const char * +-pcmk__cluster_option(GHashTable *options, +- const pcmk__cluster_option_t *option_list, +- int len, const char *name) ++pcmk__cluster_option(GHashTable *options, const char *name) + { +- const char *value = NULL; +- +- for (int lpc = 0; lpc < len; lpc++) { +- if (pcmk__str_eq(name, option_list[lpc].name, pcmk__str_casei)) { +- value = cluster_option_value(options, option_list[lpc].is_valid, +- option_list[lpc].name, +- option_list[lpc].alt_name, +- option_list[lpc].default_value); +- return value; ++ for (int lpc = 0; lpc < PCMK__NELEM(cluster_options); lpc++) { ++ if (pcmk__str_eq(name, cluster_options[lpc].name, pcmk__str_casei)) { ++ return cluster_option_value(options, ++ cluster_options[lpc].is_valid, ++ cluster_options[lpc].name, ++ cluster_options[lpc].alt_name, ++ cluster_options[lpc].default_value); + } + } + CRM_CHECK(FALSE, crm_err("Bug: looking for unknown option '%s'", name)); +@@ -1061,13 +1055,13 @@ pcmk__cluster_option_metadata(const char *name, const char *desc_short, + } + + void +-pcmk__validate_cluster_options(GHashTable *options, +- pcmk__cluster_option_t *option_list, int len) ++pcmk__validate_cluster_options(GHashTable *options) + { +- for (int lpc = 0; lpc < len; lpc++) { +- cluster_option_value(options, option_list[lpc].is_valid, +- option_list[lpc].name, +- option_list[lpc].alt_name, +- option_list[lpc].default_value); ++ for (int lpc = 0; lpc < PCMK__NELEM(cluster_options); lpc++) { ++ cluster_option_value(options, ++ cluster_options[lpc].is_valid, ++ cluster_options[lpc].name, ++ cluster_options[lpc].alt_name, ++ cluster_options[lpc].default_value); + } + } +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index c9f1fc1..f99bd1b 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -21,6 +21,7 @@ + gboolean was_processing_error = FALSE; + gboolean was_processing_warning = FALSE; + ++#if 0 + static pcmk__cluster_option_t pe_opts[] = { + /* name, old name, type, allowed values, + * default value, validator, +@@ -315,6 +316,7 @@ static pcmk__cluster_option_t pe_opts[] = { + NULL + }, + }; ++#endif // 0 + + void + pe_metadata(pcmk__output_t *out) +@@ -333,13 +335,13 @@ pe_metadata(pcmk__output_t *out) + void + verify_pe_options(GHashTable * options) + { +- pcmk__validate_cluster_options(options, pe_opts, PCMK__NELEM(pe_opts)); ++ pcmk__validate_cluster_options(options); + } + + const char * + pe_pref(GHashTable * options, const char *name) + { +- return pcmk__cluster_option(options, pe_opts, PCMK__NELEM(pe_opts), name); ++ return pcmk__cluster_option(options, name); + } + + const char * +-- +2.31.1 + +From de834cee2c5d8f4f796633e66f263ad77b9cd2eb Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 03:06:27 -0800 +Subject: [PATCH 13/24] Refactor: various: Drop per-daemon cluster opt tables + +Ref T746 + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 207 ------------------- + lib/cib/cib_utils.c | 27 --- + lib/pengine/common.c | 297 ---------------------------- + 3 files changed, 531 deletions(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 4208947..40b90f8 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -515,213 +515,6 @@ do_recover(long long action, + register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); + } + +-#if 0 +-static pcmk__cluster_option_t controller_options[] = { +- /* name, old name, type, allowed values, +- * default value, validator, +- * context, +- * short description, +- * long description +- */ +- { +- "dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL, +- pcmk__opt_context_controld, +- N_("Pacemaker version on cluster node elected Designated Controller (DC)"), +- N_("Includes a hash which identifies the exact changeset the code was " +- "built from. Used for diagnostic purposes.") +- }, +- { +- "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL, +- pcmk__opt_context_controld, +- N_("The messaging stack on which Pacemaker is currently running"), +- N_("Used for informational and diagnostic purposes.") +- }, +- { +- "cluster-name", NULL, "string", NULL, NULL, NULL, +- pcmk__opt_context_controld, +- N_("An arbitrary name for the cluster"), +- N_("This optional value is mostly for users' convenience as desired " +- "in administration, but may also be used in Pacemaker " +- "configuration rules via the #cluster-name node attribute, and " +- "by higher-level tools and resource agents.") +- }, +- { +- XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", +- NULL, "20s", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- N_("How long to wait for a response from other nodes during start-up"), +- N_("The optimal value will depend on the speed and load of your network " +- "and the type of switches used.") +- }, +- { +- XML_CONFIG_ATTR_RECHECK, NULL, "time", +- N_("Zero disables polling, while positive values are an interval in seconds" +- "(unless other units are specified, for example \"5min\")"), +- "15min", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- N_("Polling interval to recheck cluster state and evaluate rules " +- "with date specifications"), +- N_("Pacemaker is primarily event-driven, and looks ahead to know when to " +- "recheck cluster state for failure timeouts and most time-based " +- "rules. However, it will also recheck the cluster after this " +- "amount of inactivity, to evaluate rules with date specifications " +- "and serve as a fail-safe for certain types of scheduler bugs.") +- }, +- { +- "load-threshold", NULL, "percentage", NULL, +- "80%", pcmk__valid_percentage, +- pcmk__opt_context_controld, +- N_("Maximum amount of system load that should be used by cluster nodes"), +- N_("The cluster will slow down its recovery process when the amount of " +- "system resources used (currently CPU) approaches this limit"), +- }, +- { +- "node-action-limit", NULL, "integer", NULL, +- "0", pcmk__valid_number, +- pcmk__opt_context_controld, +- N_("Maximum number of jobs that can be scheduled per node " +- "(defaults to 2x cores)") +- }, +- { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL, +- pcmk__opt_context_controld, +- N_("How a cluster node should react if notified of its own fencing"), +- N_("A cluster node may receive notification of its own fencing if fencing " +- "is misconfigured, or if fabric fencing is in use that doesn't cut " +- "cluster communication. Allowed values are \"stop\" to attempt to " +- "immediately stop Pacemaker and stay stopped, or \"panic\" to attempt " +- "to immediately reboot the local node, falling back to stop on failure.") +- }, +- { +- XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, +- "2min", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- "*** Advanced Use Only ***", +- N_("Declare an election failed if it is not decided within this much " +- "time. If you need to adjust this value, it probably indicates " +- "the presence of a bug.") +- }, +- { +- XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, +- "20min", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- "*** Advanced Use Only ***", +- N_("Exit immediately if shutdown does not complete within this much " +- "time. If you need to adjust this value, it probably indicates " +- "the presence of a bug.") +- }, +- { +- "join-integration-timeout", "crmd-integration-timeout", "time", NULL, +- "3min", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- "*** Advanced Use Only ***", +- N_("If you need to adjust this value, it probably indicates " +- "the presence of a bug.") +- }, +- { +- "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL, +- "30min", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- "*** Advanced Use Only ***", +- N_("If you need to adjust this value, it probably indicates " +- "the presence of a bug.") +- }, +- { +- "transition-delay", "crmd-transition-delay", "time", NULL, +- "0s", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- N_("*** Advanced Use Only *** Enabling this option will slow down " +- "cluster recovery under all conditions"), +- N_("Delay cluster recovery for this much time to allow for additional " +- "events to occur. Useful if your configuration is sensitive to " +- "the order in which ping updates arrive.") +- }, +- { +- /* @COMPAT Currently unparsable values default to -1 (auto-calculate), +- * while missing values default to 0 (disable). All values are accepted +- * (unless the controller finds that the value conflicts with the +- * SBD_WATCHDOG_TIMEOUT). +- * +- * At a compatibility break: properly validate as a timeout, let +- * either negative values or a particular string like "auto" mean auto- +- * calculate, and use 0 as the single default for when the option either +- * is unset or fails to validate. +- */ +- "stonith-watchdog-timeout", NULL, "time", NULL, +- "0", NULL, +- pcmk__opt_context_controld, +- N_("How long before nodes can be assumed to be safely down when " +- "watchdog-based self-fencing via SBD is in use"), +- N_("If this is set to a positive value, lost nodes are assumed to " +- "self-fence using watchdog-based SBD within this much time. This " +- "does not require a fencing resource to be explicitly configured, " +- "though a fence_watchdog resource can be configured, to limit use " +- "to specific nodes. If this is set to 0 (the default), the cluster " +- "will never assume watchdog-based self-fencing. If this is set to a " +- "negative value, the cluster will use twice the local value of the " +- "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, " +- "or otherwise treat this as 0. WARNING: When used, this timeout " +- "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use " +- "watchdog-based SBD, and Pacemaker will refuse to start on any of " +- "those nodes where this is not true for the local value or SBD is " +- "not active. When this is set to a negative value, " +- "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes " +- "that use SBD, otherwise data corruption or loss could occur.") +- }, +- { +- "stonith-max-attempts", NULL, "integer", NULL, +- "10", pcmk__valid_positive_number, +- pcmk__opt_context_controld, +- N_("How many times fencing can fail before it will no longer be " +- "immediately re-attempted on a target") +- }, +- +- // Already documented in libpe_status (other values must be kept identical) +- { +- "no-quorum-policy", NULL, "select", +- "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum, +- pcmk__opt_context_controld, +- N_("What to do when the cluster does not have quorum"), NULL +- }, +- { +- XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_controld, +- N_("Whether to lock resources to a cleanly shut down node"), +- N_("When true, resources active on a node when it is cleanly shut down " +- "are kept \"locked\" to that node (not allowed to run elsewhere) " +- "until they start again on that node after it rejoins (or for at " +- "most shutdown-lock-limit, if set). Stonith resources and " +- "Pacemaker Remote connections are never locked. Clone and bundle " +- "instances and the promoted role of promotable clones are " +- "currently never locked, though support could be added in a future " +- "release.") +- }, +- { +- XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, +- "0", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- N_("Do not lock resources to a cleanly shut down node longer than " +- "this"), +- N_("If shutdown-lock is true and this is set to a nonzero time " +- "duration, shutdown locks will expire after this much time has " +- "passed since the shutdown was initiated, even if the node has not " +- "rejoined.") +- }, +- { +- XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL, +- "0", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- N_("How long to wait for a node that has joined the cluster to join " +- "the controller process group"), +- N_("Fence nodes that do not join the controller process group within " +- "this much time after joining the cluster, to allow the cluster " +- "to continue managing resources. A value of 0 means never fence " +- "pending nodes. Setting the value to 2h means fence nodes after " +- "2 hours.") +- }, +-}; +-#endif // 0 +- + void + crmd_metadata(void) + { +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 97f62ac..b83158c 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -866,33 +866,6 @@ cib_native_notify(gpointer data, gpointer user_data) + crm_trace("Callback invoked..."); + } + +-#if 0 +-static pcmk__cluster_option_t cib_opts[] = { +- /* name, legacy name, type, allowed values, +- * default value, validator, +- * context, +- * short description, +- * long description +- */ +- { +- "enable-acl", NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_based, +- N_("Enable Access Control Lists (ACLs) for the CIB"), +- NULL +- }, +- { +- "cluster-ipc-limit", NULL, "integer", NULL, +- "500", pcmk__valid_positive_number, +- pcmk__opt_context_based, +- N_("Maximum IPC message backlog before disconnecting a cluster daemon"), +- N_("Raise this if log has \"Evicting client\" messages for cluster daemon" +- " PIDs (a good value is the number of resources in the cluster" +- " multiplied by the number of nodes).") +- }, +-}; +-#endif // 0 +- + void + cib_metadata(void) + { +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index f99bd1b..e96f0b5 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -21,303 +21,6 @@ + gboolean was_processing_error = FALSE; + gboolean was_processing_warning = FALSE; + +-#if 0 +-static pcmk__cluster_option_t pe_opts[] = { +- /* name, old name, type, allowed values, +- * default value, validator, +- * context, +- * short description, +- * long description +- */ +- { +- "no-quorum-policy", NULL, "select", "stop, freeze, ignore, demote, suicide", +- "stop", pcmk__valid_quorum, +- pcmk__opt_context_schedulerd, +- N_("What to do when the cluster does not have quorum"), +- NULL +- }, +- { +- "symmetric-cluster", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether resources can run on any node by default"), +- NULL +- }, +- { +- "maintenance-mode", NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether the cluster should refrain from monitoring, starting, " +- "and stopping resources"), +- NULL +- }, +- { +- "start-failure-is-fatal", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether a start failure should prevent a resource from being " +- "recovered on the same node"), +- N_("When true, the cluster will immediately ban a resource from a node " +- "if it fails to start there. When false, the cluster will instead " +- "check the resource's fail count against its migration-threshold.") +- }, +- { +- "enable-startup-probes", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether the cluster should check for active resources during start-up"), +- NULL +- }, +- { +- XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether to lock resources to a cleanly shut down node"), +- N_("When true, resources active on a node when it is cleanly shut down " +- "are kept \"locked\" to that node (not allowed to run elsewhere) " +- "until they start again on that node after it rejoins (or for at " +- "most shutdown-lock-limit, if set). Stonith resources and " +- "Pacemaker Remote connections are never locked. Clone and bundle " +- "instances and the promoted role of promotable clones are " +- "currently never locked, though support could be added in a future " +- "release.") +- }, +- { +- XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, +- "0", pcmk__valid_interval_spec, +- pcmk__opt_context_schedulerd, +- N_("Do not lock resources to a cleanly shut down node longer than " +- "this"), +- N_("If shutdown-lock is true and this is set to a nonzero time " +- "duration, shutdown locks will expire after this much time has " +- "passed since the shutdown was initiated, even if the node has not " +- "rejoined.") +- }, +- +- // Fencing-related options +- { +- "stonith-enabled", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("*** Advanced Use Only *** " +- "Whether nodes may be fenced as part of recovery"), +- N_("If false, unresponsive nodes are immediately assumed to be harmless, " +- "and resources that were active on them may be recovered " +- "elsewhere. This can result in a \"split-brain\" situation, " +- "potentially leading to data loss and/or service unavailability.") +- }, +- { +- "stonith-action", NULL, "select", "reboot, off, poweroff", +- PCMK_ACTION_REBOOT, pcmk__is_fencing_action, +- pcmk__opt_context_schedulerd, +- N_("Action to send to fence device when a node needs to be fenced " +- "(\"poweroff\" is a deprecated alias for \"off\")"), +- NULL +- }, +- { +- "stonith-timeout", NULL, "time", NULL, +- "60s", pcmk__valid_interval_spec, +- pcmk__opt_context_schedulerd, +- N_("*** Advanced Use Only *** Unused by Pacemaker"), +- N_("This value is not used by Pacemaker, but is kept for backward " +- "compatibility, and certain legacy fence agents might use it.") +- }, +- { +- XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether watchdog integration is enabled"), +- N_("This is set automatically by the cluster according to whether SBD " +- "is detected to be in use. User-configured values are ignored. " +- "The value `true` is meaningful if diskless SBD is used and " +- "`stonith-watchdog-timeout` is nonzero. In that case, if fencing " +- "is required, watchdog-based self-fencing will be performed via " +- "SBD without requiring a fencing resource explicitly configured.") +- }, +- { +- "concurrent-fencing", NULL, "boolean", NULL, +- PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Allow performing fencing operations in parallel"), +- NULL +- }, +- { +- "startup-fencing", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("*** Advanced Use Only *** Whether to fence unseen nodes at start-up"), +- N_("Setting this to false may lead to a \"split-brain\" situation," +- "potentially leading to data loss and/or service unavailability.") +- }, +- { +- XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL, +- "0", pcmk__valid_interval_spec, +- pcmk__opt_context_schedulerd, +- N_("Apply fencing delay targeting the lost nodes with the highest total resource priority"), +- N_("Apply specified delay for the fencings that are targeting the lost " +- "nodes with the highest total resource priority in case we don't " +- "have the majority of the nodes in our cluster partition, so that " +- "the more significant nodes potentially win any fencing match, " +- "which is especially meaningful under split-brain of 2-node " +- "cluster. A promoted resource instance takes the base priority + 1 " +- "on calculation if the base priority is not 0. Any static/random " +- "delays that are introduced by `pcmk_delay_base/max` configured " +- "for the corresponding fencing resources will be added to this " +- "delay. This delay should be significantly greater than, safely " +- "twice, the maximum `pcmk_delay_base/max`. By default, priority " +- "fencing delay is disabled.") +- }, +- { +- XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL, +- "0", pcmk__valid_interval_spec, +- pcmk__opt_context_schedulerd, +- N_("How long to wait for a node that has joined the cluster to join " +- "the controller process group"), +- N_("Fence nodes that do not join the controller process group within " +- "this much time after joining the cluster, to allow the cluster " +- "to continue managing resources. A value of 0 means never fence " +- "pending nodes. Setting the value to 2h means fence nodes after " +- "2 hours.") +- }, +- { +- "cluster-delay", NULL, "time", NULL, +- "60s", pcmk__valid_interval_spec, +- pcmk__opt_context_schedulerd, +- N_("Maximum time for node-to-node communication"), +- N_("The node elected Designated Controller (DC) will consider an action " +- "failed if it does not get a response from the node executing the " +- "action within this time (after considering the action's own " +- "timeout). The \"correct\" value will depend on the speed and " +- "load of your network and cluster nodes.") +- }, +- { +- "batch-limit", NULL, "integer", NULL, +- "0", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("Maximum number of jobs that the cluster may execute in parallel " +- "across all nodes"), +- N_("The \"correct\" value will depend on the speed and load of your " +- "network and cluster nodes. If set to 0, the cluster will " +- "impose a dynamically calculated limit when any node has a " +- "high load.") +- }, +- { +- "migration-limit", NULL, "integer", NULL, +- "-1", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The number of live migration actions that the cluster is allowed " +- "to execute in parallel on a node (-1 means no limit)") +- }, +- +- /* Orphans and stopping */ +- { +- "stop-all-resources", NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether the cluster should stop all active resources"), +- NULL +- }, +- { +- "stop-orphan-resources", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether to stop resources that were removed from the configuration"), +- NULL +- }, +- { +- "stop-orphan-actions", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether to cancel recurring actions removed from the configuration"), +- NULL +- }, +- { +- "remove-after-stop", NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("*** Deprecated *** Whether to remove stopped resources from " +- "the executor"), +- N_("Values other than default are poorly tested and potentially dangerous." +- " This option will be removed in a future release.") +- }, +- +- /* Storing inputs */ +- { +- "pe-error-series-max", NULL, "integer", NULL, +- "-1", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The number of scheduler inputs resulting in errors to save"), +- N_("Zero to disable, -1 to store unlimited.") +- }, +- { +- "pe-warn-series-max", NULL, "integer", NULL, +- "5000", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The number of scheduler inputs resulting in warnings to save"), +- N_("Zero to disable, -1 to store unlimited.") +- }, +- { +- "pe-input-series-max", NULL, "integer", NULL, +- "4000", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The number of scheduler inputs without errors or warnings to save"), +- N_("Zero to disable, -1 to store unlimited.") +- }, +- +- /* Node health */ +- { +- PCMK__OPT_NODE_HEALTH_STRATEGY, NULL, "select", +- PCMK__VALUE_NONE ", " PCMK__VALUE_MIGRATE_ON_RED ", " +- PCMK__VALUE_ONLY_GREEN ", " PCMK__VALUE_PROGRESSIVE ", " +- PCMK__VALUE_CUSTOM, +- PCMK__VALUE_NONE, pcmk__validate_health_strategy, +- pcmk__opt_context_schedulerd, +- N_("How cluster should react to node health attributes"), +- N_("Requires external entities to create node attributes (named with " +- "the prefix \"#health\") with values \"red\", " +- "\"yellow\", or \"green\".") +- }, +- { +- PCMK__OPT_NODE_HEALTH_BASE, NULL, "integer", NULL, +- "0", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("Base health score assigned to a node"), +- N_("Only used when \"node-health-strategy\" is set to \"progressive\".") +- }, +- { +- PCMK__OPT_NODE_HEALTH_GREEN, NULL, "integer", NULL, +- "0", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The score to use for a node health attribute whose value is \"green\""), +- N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") +- }, +- { +- PCMK__OPT_NODE_HEALTH_YELLOW, NULL, "integer", NULL, +- "0", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The score to use for a node health attribute whose value is \"yellow\""), +- N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") +- }, +- { +- PCMK__OPT_NODE_HEALTH_RED, NULL, "integer", NULL, +- "-INFINITY", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The score to use for a node health attribute whose value is \"red\""), +- N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") +- }, +- +- /*Placement Strategy*/ +- { +- "placement-strategy", NULL, "select", +- "default, utilization, minimal, balanced", +- "default", pcmk__valid_placement_strategy, +- pcmk__opt_context_schedulerd, +- N_("How the cluster should allocate resources to nodes"), +- NULL +- }, +-}; +-#endif // 0 +- + void + pe_metadata(pcmk__output_t *out) + { +-- +2.31.1 + +From 9a8bb049fcb49204932e96014c3a63e58fd95d23 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:36:34 -0800 +Subject: [PATCH 14/24] Refactor: libpe_status: Drop verify_pe_opts() + +Signed-off-by: Reid Wahl +--- + include/crm/pengine/internal.h | 1 - + lib/pengine/common.c | 6 ------ + lib/pengine/unpack.c | 2 +- + 3 files changed, 1 insertion(+), 8 deletions(-) + +diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h +index 9c8068f..5835ef8 100644 +--- a/include/crm/pengine/internal.h ++++ b/include/crm/pengine/internal.h +@@ -209,7 +209,6 @@ pcmk_node_t *native_location(const pcmk_resource_t *rsc, GList **list, + int current); + + void pe_metadata(pcmk__output_t *out); +-void verify_pe_options(GHashTable * options); + + void native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node, + pcmk_scheduler_t *scheduler, gboolean failed); +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index e96f0b5..402fae9 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -35,12 +35,6 @@ pe_metadata(pcmk__output_t *out) + g_free(s); + } + +-void +-verify_pe_options(GHashTable * options) +-{ +- pcmk__validate_cluster_options(options); +-} +- + const char * + pe_pref(GHashTable * options, const char *name) + { +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 3429d56..2a9b563 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -228,7 +228,7 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash, + CIB_OPTIONS_FIRST, FALSE, scheduler); + +- verify_pe_options(scheduler->config_hash); ++ pcmk__validate_cluster_options(config_hash); + + set_config_flag(scheduler, "enable-startup-probes", + pcmk_sched_probe_resources); +-- +2.31.1 + +From af79c50b7a5626218bf2a9b34fe631f07b1e2bda Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 08:40:13 -0800 +Subject: [PATCH 15/24] Refactor: libpe_status: Drop pe_pref() internally + +Signed-off-by: Reid Wahl +--- + daemons/schedulerd/schedulerd_messages.c | 3 ++- + include/crm/pengine/internal.h | 10 +++++++--- + lib/pacemaker/pcmk_graph_producer.c | 9 +++++---- + lib/pacemaker/pcmk_sched_nodes.c | 5 +++-- + lib/pengine/unpack.c | 4 +++- + tools/crm_resource_print.c | 4 ++-- + 6 files changed, 22 insertions(+), 13 deletions(-) + +diff --git a/daemons/schedulerd/schedulerd_messages.c b/daemons/schedulerd/schedulerd_messages.c +index 5a97365..ff31fce 100644 +--- a/daemons/schedulerd/schedulerd_messages.c ++++ b/daemons/schedulerd/schedulerd_messages.c +@@ -112,7 +112,8 @@ handle_pecalc_request(pcmk__request_t *request) + series_id = 2; + } + +- value = pe_pref(scheduler->config_hash, series[series_id].param); ++ value = pcmk__cluster_option(scheduler->config_hash, ++ series[series_id].param); + if ((value == NULL) + || (pcmk__scan_min_int(value, &series_wrap, -1) != pcmk_rc_ok)) { + series_wrap = series[series_id].wrap; +diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h +index 5835ef8..2b7f2eb 100644 +--- a/include/crm/pengine/internal.h ++++ b/include/crm/pengine/internal.h +@@ -620,14 +620,18 @@ int pe__node_health(pcmk_node_t *node); + static inline enum pcmk__health_strategy + pe__health_strategy(pcmk_scheduler_t *scheduler) + { +- return pcmk__parse_health_strategy(pe_pref(scheduler->config_hash, +- PCMK__OPT_NODE_HEALTH_STRATEGY)); ++ const char *strategy = pcmk__cluster_option(scheduler->config_hash, ++ PCMK__OPT_NODE_HEALTH_STRATEGY); ++ ++ return pcmk__parse_health_strategy(strategy); + } + + static inline int + pe__health_score(const char *option, pcmk_scheduler_t *scheduler) + { +- return char2score(pe_pref(scheduler->config_hash, option)); ++ const char *value = pcmk__cluster_option(scheduler->config_hash, option); ++ ++ return char2score(value); + } + + /*! +diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c +index 59b6176..3006775 100644 +--- a/lib/pacemaker/pcmk_graph_producer.c ++++ b/lib/pacemaker/pcmk_graph_producer.c +@@ -1004,16 +1004,17 @@ pcmk__create_graph(pcmk_scheduler_t *scheduler) + GList *iter = NULL; + const char *value = NULL; + long long limit = 0LL; ++ GHashTable *config_hash = scheduler->config_hash; + + transition_id++; + crm_trace("Creating transition graph %d", transition_id); + + scheduler->graph = create_xml_node(NULL, XML_TAG_GRAPH); + +- value = pe_pref(scheduler->config_hash, "cluster-delay"); ++ value = pcmk__cluster_option(config_hash, "cluster-delay"); + crm_xml_add(scheduler->graph, "cluster-delay", value); + +- value = pe_pref(scheduler->config_hash, "stonith-timeout"); ++ value = pcmk__cluster_option(config_hash, "stonith-timeout"); + crm_xml_add(scheduler->graph, "stonith-timeout", value); + + crm_xml_add(scheduler->graph, "failed-stop-offset", "INFINITY"); +@@ -1024,12 +1025,12 @@ pcmk__create_graph(pcmk_scheduler_t *scheduler) + crm_xml_add(scheduler->graph, "failed-start-offset", "1"); + } + +- value = pe_pref(scheduler->config_hash, "batch-limit"); ++ value = pcmk__cluster_option(config_hash, "batch-limit"); + crm_xml_add(scheduler->graph, "batch-limit", value); + + crm_xml_add_int(scheduler->graph, "transition_id", transition_id); + +- value = pe_pref(scheduler->config_hash, "migration-limit"); ++ value = pcmk__cluster_option(config_hash, "migration-limit"); + if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) { + crm_xml_add(scheduler->graph, "migration-limit", value); + } +diff --git a/lib/pacemaker/pcmk_sched_nodes.c b/lib/pacemaker/pcmk_sched_nodes.c +index 9cf5545..03baa2c 100644 +--- a/lib/pacemaker/pcmk_sched_nodes.c ++++ b/lib/pacemaker/pcmk_sched_nodes.c +@@ -360,8 +360,9 @@ pcmk__apply_node_health(pcmk_scheduler_t *scheduler) + { + int base_health = 0; + enum pcmk__health_strategy strategy; +- const char *strategy_str = pe_pref(scheduler->config_hash, +- PCMK__OPT_NODE_HEALTH_STRATEGY); ++ const char *strategy_str = ++ pcmk__cluster_option(scheduler->config_hash, ++ PCMK__OPT_NODE_HEALTH_STRATEGY); + + strategy = pcmk__parse_health_strategy(strategy_str); + if (strategy == pcmk__health_strategy_none) { +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 2a9b563..49443c6 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -50,7 +50,9 @@ struct action_history { + * flag is stringified more readably in log messages. + */ + #define set_config_flag(scheduler, option, flag) do { \ +- const char *scf_value = pe_pref((scheduler)->config_hash, (option)); \ ++ GHashTable *config_hash = (scheduler)->config_hash; \ ++ const char *scf_value = pcmk__cluster_option(config_hash, (option)); \ ++ \ + if (scf_value != NULL) { \ + if (crm_is_true(scf_value)) { \ + (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \ +diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c +index bdf3ad9..26761dd 100644 +--- a/tools/crm_resource_print.c ++++ b/tools/crm_resource_print.c +@@ -479,8 +479,8 @@ resource_check_list_default(pcmk__output_t *out, va_list args) { + "'%s' cannot run on unhealthy nodes due to " + PCMK__OPT_NODE_HEALTH_STRATEGY "='%s'", + parent->id, +- pe_pref(checks->rsc->cluster->config_hash, +- PCMK__OPT_NODE_HEALTH_STRATEGY)); ++ pcmk__cluster_option(checks->rsc->cluster->config_hash, ++ PCMK__OPT_NODE_HEALTH_STRATEGY)); + } + + out->end_list(out); +-- +2.31.1 + +From 1e78e617965b1a2e1a5671aa15943ba42487b09a Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 08:43:55 -0800 +Subject: [PATCH 16/24] API: libpe_status: Deprecate pe_pref() + +Signed-off-by: Reid Wahl +--- + include/crm/pengine/common.h | 4 +--- + include/crm/pengine/common_compat.h | 5 ++++- + lib/pengine/common.c | 20 ++++++++++++++------ + lib/pengine/unpack.c | 28 ++++++++++++++-------------- + 4 files changed, 33 insertions(+), 24 deletions(-) + +diff --git a/include/crm/pengine/common.h b/include/crm/pengine/common.h +index 2feac8a..a935aa7 100644 +--- a/include/crm/pengine/common.h ++++ b/include/crm/pengine/common.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -28,8 +28,6 @@ enum rsc_role_e text2role(const char *role); + const char *role2text(enum rsc_role_e role); + const char *fail2text(enum action_fail_response fail); + +-const char *pe_pref(GHashTable * options, const char *name); +- + /*! + * \brief Get readable description of a recovery type + * +diff --git a/include/crm/pengine/common_compat.h b/include/crm/pengine/common_compat.h +index 4330ccf..52e11f7 100644 +--- a/include/crm/pengine/common_compat.h ++++ b/include/crm/pengine/common_compat.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -54,6 +54,9 @@ extern "C" { + + //! \deprecated Do not use + #define RSC_ROLE_MASTER_S RSC_ROLE_PROMOTED_LEGACY_S ++ ++//! \deprecated Do not use ++const char *pe_pref(GHashTable * options, const char *name); + + #ifdef __cplusplus + } +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 402fae9..0a4dfe6 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -35,12 +35,6 @@ pe_metadata(pcmk__output_t *out) + g_free(s); + } + +-const char * +-pe_pref(GHashTable * options, const char *name) +-{ +- return pcmk__cluster_option(options, name); +-} +- + const char * + fail2text(enum action_fail_response fail) + { +@@ -350,3 +344,17 @@ pe_node_attribute_raw(const pcmk_node_t *node, const char *name) + } + return g_hash_table_lookup(node->details->attrs, name); + } ++ ++// Deprecated functions kept only for backward API compatibility ++// LCOV_EXCL_START ++ ++#include ++ ++const char * ++pe_pref(GHashTable * options, const char *name) ++{ ++ return pcmk__cluster_option(options, name); ++} ++ ++// LCOV_EXCL_STOP ++// End deprecated API +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 49443c6..d484e93 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -238,7 +238,7 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + crm_info("Startup probes: disabled (dangerous)"); + } + +- value = pe_pref(scheduler->config_hash, XML_ATTR_HAVE_WATCHDOG); ++ value = pcmk__cluster_option(config_hash, XML_ATTR_HAVE_WATCHDOG); + if (value && crm_is_true(value)) { + crm_info("Watchdog-based self-fencing will be performed via SBD if " + "fencing is required and stonith-watchdog-timeout is nonzero"); +@@ -251,7 +251,7 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + set_if_xpath(pcmk_sched_enable_unfencing, XPATH_ENABLE_UNFENCING, + scheduler); + +- value = pe_pref(scheduler->config_hash, "stonith-timeout"); ++ value = pcmk__cluster_option(config_hash, "stonith-timeout"); + scheduler->stonith_timeout = (int) crm_parse_interval_spec(value); + crm_debug("STONITH timeout: %d", scheduler->stonith_timeout); + +@@ -262,8 +262,8 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + crm_debug("STONITH of failed nodes is disabled"); + } + +- scheduler->stonith_action = pe_pref(scheduler->config_hash, +- "stonith-action"); ++ scheduler->stonith_action = pcmk__cluster_option(config_hash, ++ "stonith-action"); + if (!strcmp(scheduler->stonith_action, "poweroff")) { + pe_warn_once(pcmk__wo_poweroff, + "Support for stonith-action of 'poweroff' is deprecated " +@@ -280,8 +280,8 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + crm_debug("Concurrent fencing is disabled"); + } + +- value = pe_pref(scheduler->config_hash, +- XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); ++ value = pcmk__cluster_option(config_hash, ++ XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); + if (value) { + scheduler->priority_fencing_delay = crm_parse_interval_spec(value) + / 1000; +@@ -299,7 +299,7 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); + } + +- value = pe_pref(scheduler->config_hash, "no-quorum-policy"); ++ value = pcmk__cluster_option(config_hash, "no-quorum-policy"); + + if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) { + scheduler->no_quorum_policy = pcmk_no_quorum_ignore; +@@ -367,7 +367,7 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + crm_trace("Orphan resource actions are ignored"); + } + +- value = pe_pref(scheduler->config_hash, "remove-after-stop"); ++ value = pcmk__cluster_option(config_hash, "remove-after-stop"); + if (value != NULL) { + if (crm_is_true(value)) { + pe__set_working_set_flags(scheduler, pcmk_sched_remove_after_stop); +@@ -407,14 +407,14 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + + pe__unpack_node_health_scores(scheduler); + +- scheduler->placement_strategy = pe_pref(scheduler->config_hash, +- "placement-strategy"); ++ scheduler->placement_strategy = ++ pcmk__cluster_option(config_hash, "placement-strategy"); + crm_trace("Placement strategy: %s", scheduler->placement_strategy); + + set_config_flag(scheduler, "shutdown-lock", pcmk_sched_shutdown_lock); + if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) { +- value = pe_pref(scheduler->config_hash, +- XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT); ++ value = pcmk__cluster_option(config_hash, ++ XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT); + scheduler->shutdown_lock = crm_parse_interval_spec(value) / 1000; + crm_trace("Resources will be locked to nodes that were cleanly " + "shut down (locks expire after %s)", +@@ -424,8 +424,8 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + "shut down"); + } + +- value = pe_pref(scheduler->config_hash, +- XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT); ++ value = pcmk__cluster_option(config_hash, ++ XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT); + scheduler->node_pending_timeout = crm_parse_interval_spec(value) / 1000; + if (scheduler->node_pending_timeout == 0) { + crm_trace("Do not fence pending nodes"); +-- +2.31.1 + +From 866877401075e7ea4c3bc278e69ed94ea3a7af99 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:32:36 -0800 +Subject: [PATCH 17/24] Refactor: libpe_status, scheduler: Drop pe_metadata() + +Replace with static scheduler_metadata() in the scheduler since we don't +rely on a static options array in lib/pengine/common.c anymore. + +Signed-off-by: Reid Wahl +--- + daemons/schedulerd/pacemaker-schedulerd.c | 18 ++++++++++++++++-- + include/crm/pengine/internal.h | 2 -- + lib/pengine/common.c | 14 -------------- + 3 files changed, 16 insertions(+), 18 deletions(-) + +diff --git a/daemons/schedulerd/pacemaker-schedulerd.c b/daemons/schedulerd/pacemaker-schedulerd.c +index 3f2a3e8..27c96da 100644 +--- a/daemons/schedulerd/pacemaker-schedulerd.c ++++ b/daemons/schedulerd/pacemaker-schedulerd.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -46,6 +46,20 @@ pcmk__supported_format_t formats[] = { + + void pengine_shutdown(int nsig); + ++static void ++scheduler_metadata(pcmk__output_t *out) ++{ ++ const char *name = "pacemaker-schedulerd"; ++ const char *desc_short = "Pacemaker scheduler options"; ++ const char *desc_long = "Cluster options used by Pacemaker's scheduler"; ++ ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_schedulerd); ++ ++ out->output_xml(out, "metadata", s); ++ g_free(s); ++} ++ + static GOptionContext * + build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { + GOptionContext *context = NULL; +@@ -98,7 +112,7 @@ main(int argc, char **argv) + if (options.remainder) { + if (g_strv_length(options.remainder) == 1 && + pcmk__str_eq("metadata", options.remainder[0], pcmk__str_casei)) { +- pe_metadata(out); ++ scheduler_metadata(out); + goto done; + } else { + exit_code = CRM_EX_USAGE; +diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h +index 2b7f2eb..5965c1a 100644 +--- a/include/crm/pengine/internal.h ++++ b/include/crm/pengine/internal.h +@@ -208,8 +208,6 @@ char *native_parameter(pcmk_resource_t *rsc, pcmk_node_t *node, gboolean create, + pcmk_node_t *native_location(const pcmk_resource_t *rsc, GList **list, + int current); + +-void pe_metadata(pcmk__output_t *out); +- + void native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node, + pcmk_scheduler_t *scheduler, gboolean failed); + +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 0a4dfe6..6551d10 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -21,20 +21,6 @@ + gboolean was_processing_error = FALSE; + gboolean was_processing_warning = FALSE; + +-void +-pe_metadata(pcmk__output_t *out) +-{ +- const char *name = "pacemaker-schedulerd"; +- const char *desc_short = "Pacemaker scheduler options"; +- const char *desc_long = "Cluster options used by Pacemaker's scheduler"; +- +- gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, +- pcmk__opt_context_schedulerd); +- +- out->output_xml(out, "metadata", s); +- g_free(s); +-} +- + const char * + fail2text(enum action_fail_response fail) + { +-- +2.31.1 + +From 700c906d621887f257c73ddfd7c82c773cb32c8e Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:40:18 -0800 +Subject: [PATCH 18/24] Refactor: libcib: Drop verify_cib_options() + +Signed-off-by: Reid Wahl +--- + lib/cib/cib_utils.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index b83158c..227a50f 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -881,12 +881,6 @@ cib_metadata(void) + g_free(s); + } + +-static void +-verify_cib_options(GHashTable *options) +-{ +- pcmk__validate_cluster_options(options); +-} +- + const char * + cib_pref(GHashTable * options, const char *name) + { +@@ -913,7 +907,7 @@ cib_read_config(GHashTable * options, xmlNode * current_cib) + options, CIB_OPTIONS_FIRST, TRUE, now, NULL); + } + +- verify_cib_options(options); ++ pcmk__validate_cluster_options(options); + + crm_time_free(now); + +-- +2.31.1 + +From 6284a3a79b88fd20630bfbfe866a4c2c3686a246 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:24:19 -0800 +Subject: [PATCH 19/24] Refactor: libcib: Drop cib_pref() internally + +Signed-off-by: Reid Wahl +--- + lib/cib/cib_utils.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 227a50f..5b241ae 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -281,7 +281,7 @@ cib_acl_enabled(xmlNode *xml, const char *user) + GHashTable *options = pcmk__strkey_table(free, free); + + cib_read_config(options, xml); +- value = cib_pref(options, "enable-acl"); ++ value = pcmk__cluster_option(options, "enable-acl"); + rc = crm_is_true(value); + g_hash_table_destroy(options); + } +-- +2.31.1 + +From 1806822590b0060079b94b7d2867722ef2430bf9 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:26:29 -0800 +Subject: [PATCH 20/24] API: libcib: Deprecate cib_pref() + +Signed-off-by: Reid Wahl +--- + include/crm/cib/util.h | 1 - + include/crm/cib/util_compat.h | 3 +++ + lib/cib/cib_utils.c | 12 ++++++------ + 3 files changed, 9 insertions(+), 7 deletions(-) + +diff --git a/include/crm/cib/util.h b/include/crm/cib/util.h +index 18726bb..47894cb 100644 +--- a/include/crm/cib/util.h ++++ b/include/crm/cib/util.h +@@ -57,7 +57,6 @@ int set_standby(cib_t * the_cib, const char *uuid, const char *scope, const char + xmlNode *cib_get_generation(cib_t * cib); + + void cib_metadata(void); +-const char *cib_pref(GHashTable * options, const char *name); + + int cib_apply_patch_event(xmlNode *event, xmlNode *input, xmlNode **output, + int level); +diff --git a/include/crm/cib/util_compat.h b/include/crm/cib/util_compat.h +index 20f1e2d..d6ccd4d 100644 +--- a/include/crm/cib/util_compat.h ++++ b/include/crm/cib/util_compat.h +@@ -33,6 +33,9 @@ const char *get_object_parent(const char *object_type); + //! \deprecated Use pcmk_cib_xpath_for() instead + xmlNode *get_object_root(const char *object_type, xmlNode *the_root); + ++//! \deprecated Do not use ++const char *cib_pref(GHashTable * options, const char *name); ++ + #ifdef __cplusplus + } + #endif +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 5b241ae..f9c463e 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -881,12 +881,6 @@ cib_metadata(void) + g_free(s); + } + +-const char * +-cib_pref(GHashTable * options, const char *name) +-{ +- return pcmk__cluster_option(options, name); +-} +- + gboolean + cib_read_config(GHashTable * options, xmlNode * current_cib) + { +@@ -1085,5 +1079,11 @@ get_object_root(const char *object_type, xmlNode *the_root) + return pcmk_find_cib_element(the_root, object_type); + } + ++const char * ++cib_pref(GHashTable * options, const char *name) ++{ ++ return pcmk__cluster_option(options, name); ++} ++ + // LCOV_EXCL_STOP + // End deprecated API +-- +2.31.1 + +From 422fb81250aa733d2601b4d412c3fbcbf5b74420 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:29:29 -0800 +Subject: [PATCH 21/24] Refactor: based, libcib: Drop cib_metadata() internally + +Replace with a static based_metadata() in pacemaker-based since we don't +depend on a static options array in lib/cib/cib_utils.c anymore. + +Signed-off-by: Reid Wahl +--- + daemons/based/pacemaker-based.c | 17 ++++++++++++++++- + 1 file changed, 16 insertions(+), 1 deletion(-) + +diff --git a/daemons/based/pacemaker-based.c b/daemons/based/pacemaker-based.c +index 5dd7938..78bcd51 100644 +--- a/daemons/based/pacemaker-based.c ++++ b/daemons/based/pacemaker-based.c +@@ -126,6 +126,21 @@ setup_stand_alone(GError **error) + return pcmk_rc_ok; + } + ++static void ++based_metadata(void) ++{ ++ const char *name = "pacemaker-based"; ++ const char *desc_short = "Cluster Information Base manager options"; ++ const char *desc_long = "Cluster options used by Pacemaker's Cluster " ++ "Information Base manager"; ++ ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_based); ++ ++ printf("%s", s); ++ g_free(s); ++} ++ + static GOptionEntry entries[] = { + { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, + "(Advanced use only) Run in stand-alone mode", NULL }, +@@ -204,7 +219,7 @@ main(int argc, char **argv) + + if ((g_strv_length(processed_args) >= 2) + && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { +- cib_metadata(); ++ based_metadata(); + goto done; + } + +-- +2.31.1 + +From 05b3e08de7c515c38cf42bbbeaf18e3346eb360d Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:30:16 -0800 +Subject: [PATCH 22/24] API: libcib: Deprecate cib_metadata() + +Signed-off-by: Reid Wahl +--- + include/crm/cib/util.h | 2 -- + include/crm/cib/util_compat.h | 3 +++ + lib/cib/cib_utils.c | 30 +++++++++++++++--------------- + 3 files changed, 18 insertions(+), 17 deletions(-) + +diff --git a/include/crm/cib/util.h b/include/crm/cib/util.h +index 47894cb..ce6cbeb 100644 +--- a/include/crm/cib/util.h ++++ b/include/crm/cib/util.h +@@ -56,8 +56,6 @@ int set_standby(cib_t * the_cib, const char *uuid, const char *scope, const char + + xmlNode *cib_get_generation(cib_t * cib); + +-void cib_metadata(void); +- + int cib_apply_patch_event(xmlNode *event, xmlNode *input, xmlNode **output, + int level); + +diff --git a/include/crm/cib/util_compat.h b/include/crm/cib/util_compat.h +index d6ccd4d..95e0766 100644 +--- a/include/crm/cib/util_compat.h ++++ b/include/crm/cib/util_compat.h +@@ -36,6 +36,9 @@ xmlNode *get_object_root(const char *object_type, xmlNode *the_root); + //! \deprecated Do not use + const char *cib_pref(GHashTable * options, const char *name); + ++//! \deprecated Do not use ++void cib_metadata(void); ++ + #ifdef __cplusplus + } + #endif +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index f9c463e..75dda16 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -866,21 +866,6 @@ cib_native_notify(gpointer data, gpointer user_data) + crm_trace("Callback invoked..."); + } + +-void +-cib_metadata(void) +-{ +- const char *name = "pacemaker-based"; +- const char *desc_short = "Cluster Information Base manager options"; +- const char *desc_long = "Cluster options used by Pacemaker's Cluster " +- "Information Base manager"; +- +- gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, +- pcmk__opt_context_based); +- +- printf("%s", s); +- g_free(s); +-} +- + gboolean + cib_read_config(GHashTable * options, xmlNode * current_cib) + { +@@ -1085,5 +1070,20 @@ cib_pref(GHashTable * options, const char *name) + return pcmk__cluster_option(options, name); + } + ++void ++cib_metadata(void) ++{ ++ const char *name = "pacemaker-based"; ++ const char *desc_short = "Cluster Information Base manager options"; ++ const char *desc_long = "Cluster options used by Pacemaker's Cluster " ++ "Information Base manager"; ++ ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_based); ++ ++ printf("%s", s); ++ g_free(s); ++} ++ + // LCOV_EXCL_STOP + // End deprecated API +-- +2.31.1 + +From f8ee575a51f6bacf82abb1d1f41eba1092776682 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:42:37 -0800 +Subject: [PATCH 23/24] Refactor: controller: Replace crmd_metadata() with + controld_metadata() + +Can be static since we don't rely on an options array that lives in +controld_control.c anymore. + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 14 -------------- + daemons/controld/pacemaker-controld.c | 18 ++++++++++++++++-- + daemons/controld/pacemaker-controld.h | 3 +-- + 3 files changed, 17 insertions(+), 18 deletions(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 40b90f8..9b54900 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -515,20 +515,6 @@ do_recover(long long action, + register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); + } + +-void +-crmd_metadata(void) +-{ +- const char *name = "pacemaker-controld"; +- const char *desc_short = "Pacemaker controller options"; +- const char *desc_long = "Cluster options used by Pacemaker's controller"; +- +- gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, +- pcmk__opt_context_controld); +- +- printf("%s", s); +- g_free(s); +-} +- + static void + config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) + { +diff --git a/daemons/controld/pacemaker-controld.c b/daemons/controld/pacemaker-controld.c +index e4a72c2..d80644d 100644 +--- a/daemons/controld/pacemaker-controld.c ++++ b/daemons/controld/pacemaker-controld.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -46,6 +46,20 @@ static pcmk__supported_format_t formats[] = { + { NULL, NULL, NULL } + }; + ++static void ++controld_metadata(void) ++{ ++ const char *name = "pacemaker-controld"; ++ const char *desc_short = "Pacemaker controller options"; ++ const char *desc_long = "Cluster options used by Pacemaker's controller"; ++ ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_controld); ++ ++ printf("%s", s); ++ g_free(s); ++} ++ + static GOptionContext * + build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) + { +@@ -96,7 +110,7 @@ main(int argc, char **argv) + + if ((g_strv_length(processed_args) >= 2) + && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { +- crmd_metadata(); ++ controld_metadata(); + initialize = false; + goto done; + } +diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h +index 2334cce..ba8dc8f 100644 +--- a/daemons/controld/pacemaker-controld.h ++++ b/daemons/controld/pacemaker-controld.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -28,7 +28,6 @@ + # define controld_trigger_config() \ + controld_trigger_config_as(__func__, __LINE__) + +-void crmd_metadata(void); + void controld_trigger_config_as(const char *fn, int line); + void controld_election_init(const char *uname); + void controld_configure_election(GHashTable *options); +-- +2.31.1 + +From 282e9eb026699abef5a28fc37f54b9330029da1c Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Tue, 2 Jan 2024 19:56:11 -0800 +Subject: [PATCH 24/24] Test: cts-cli: Update daemon tests after fence-reaction + select + +Signed-off-by: Reid Wahl +--- + cts/cli/regression.daemons.exp | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/cts/cli/regression.daemons.exp b/cts/cli/regression.daemons.exp +index 43393df..543d62f 100644 +--- a/cts/cli/regression.daemons.exp ++++ b/cts/cli/regression.daemons.exp +@@ -52,9 +52,12 @@ + + + +- A cluster node may receive notification of its own fencing if fencing is misconfigured, or if fabric fencing is in use that doesn't cut cluster communication. Allowed values are "stop" to attempt to immediately stop Pacemaker and stay stopped, or "panic" to attempt to immediately reboot the local node, falling back to stop on failure. ++ A cluster node may receive notification of its own fencing if fencing is misconfigured, or if fabric fencing is in use that doesn't cut cluster communication. Use "stop" to attempt to immediately stop Pacemaker and stay stopped, or "panic" to attempt to immediately reboot the local node, falling back to stop on failure. Allowed values: stop, panic + How a cluster node should react if notified of its own fencing +- ++ ++ + + + Declare an election failed if it is not decided within this much time. If you need to adjust this value, it probably indicates the presence of a bug. +-- +2.31.1 + diff --git a/SOURCES/008-attrd-prep.patch b/SOURCES/008-attrd-prep.patch new file mode 100644 index 0000000..acc22d3 --- /dev/null +++ b/SOURCES/008-attrd-prep.patch @@ -0,0 +1,373 @@ +From 4823643bef8801b33688167b159bb531bcdf8911 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 4 Jan 2024 17:10:08 -0600 +Subject: [PATCH 1/5] Refactor: pacemaker-attrd: drop redundant argument from + update_attr_on_host() + +It can check for a force-write via its xml argument, to simplify the caller +--- + daemons/attrd/attrd_corosync.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 158d82f..1b56923 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -266,7 +266,7 @@ record_peer_nodeid(attribute_value_t *v, const char *host) + static void + update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + const char *attr, const char *value, const char *host, +- bool filter, int is_force_write) ++ bool filter) + { + attribute_value_t *v = NULL; + +@@ -309,6 +309,10 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + } + + } else { ++ int is_force_write = 0; ++ ++ crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write); ++ + if (is_force_write == 1 && a->timeout_ms && a->timer) { + /* Save forced writing and set change flag. */ + /* The actual attribute is written by Writer after election. */ +@@ -338,15 +342,12 @@ attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) + const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); + const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); + const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); +- int is_force_write = 0; + + if (attr == NULL) { + crm_warn("Could not update attribute: peer did not specify name"); + return; + } + +- crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write); +- + a = attrd_populate_attribute(xml, attr); + if (a == NULL) { + return; +@@ -361,12 +362,12 @@ attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) + g_hash_table_iter_init(&vIter, a->values); + + while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { +- update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); ++ update_attr_on_host(a, peer, xml, attr, value, host, filter); + } + + } else { + // Update attribute value for the given host +- update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); ++ update_attr_on_host(a, peer, xml, attr, value, host, filter); + } + + /* If this is a message from some attrd instance broadcasting its protocol +-- +2.31.1 + +From c7a1ab819b25e3225c185c1630a7139a96fb5c71 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 9 Jan 2024 16:48:37 -0600 +Subject: [PATCH 2/5] Refactor: pacemaker-attrd: drop unused argument from + attrd_peer_sync() + +--- + daemons/attrd/attrd_corosync.c | 10 ++++++++-- + daemons/attrd/attrd_elections.c | 2 +- + daemons/attrd/attrd_messages.c | 2 +- + daemons/attrd/pacemaker-attrd.h | 2 +- + 4 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 1b56923..088f00c 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -233,7 +233,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + */ + if (attrd_election_won() + && !pcmk_is_set(peer->flags, crm_remote_node)) { +- attrd_peer_sync(peer, NULL); ++ attrd_peer_sync(peer); + } + } else { + // Remove all attribute values associated with lost nodes +@@ -535,8 +535,14 @@ attrd_peer_remove(const char *host, bool uncache, const char *source) + } + } + ++/*! ++ * \internal ++ * \brief Send all known attributes and values to a peer ++ * ++ * \param[in] peer Peer to send sync to (if NULL, broadcast to all peers) ++ */ + void +-attrd_peer_sync(crm_node_t *peer, xmlNode *xml) ++attrd_peer_sync(crm_node_t *peer) + { + GHashTableIter aIter; + GHashTableIter vIter; +diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c +index 82fbe8a..9dbf133 100644 +--- a/daemons/attrd/attrd_elections.c ++++ b/daemons/attrd/attrd_elections.c +@@ -23,7 +23,7 @@ attrd_election_cb(gpointer user_data) + attrd_declare_winner(); + + /* Update the peers after an election */ +- attrd_peer_sync(NULL, NULL); ++ attrd_peer_sync(NULL); + + /* After winning an election, update the CIB with the values of all + * attributes as the winner knows them. +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 5525d4b..13ac01f 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -180,7 +180,7 @@ handle_sync_request(pcmk__request_t *request) + crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, + pcmk__node_search_cluster); + +- attrd_peer_sync(peer, request->xml); ++ attrd_peer_sync(peer); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 7384188..bacaad6 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -175,7 +175,7 @@ extern GHashTable *peer_protocol_vers; + int attrd_cluster_connect(void); + void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + bool filter); +-void attrd_peer_sync(crm_node_t *peer, xmlNode *xml); ++void attrd_peer_sync(crm_node_t *peer); + void attrd_peer_remove(const char *host, bool uncache, const char *source); + void attrd_peer_clear_failure(pcmk__request_t *request); + void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, +-- +2.31.1 + +From abafae0068e10abb135b0496086947728365299a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Jan 2024 17:31:17 -0600 +Subject: [PATCH 3/5] Refactor: pacemaker-attrd: de-functionize + attrd_lookup_or_create_value() + +... to make planned changes easier +--- + daemons/attrd/attrd_corosync.c | 62 +++++++++++++--------------------- + 1 file changed, 24 insertions(+), 38 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 088f00c..59e6a26 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -168,40 +168,6 @@ broadcast_local_value(const attribute_t *a) + + #define state_text(state) pcmk__s((state), "in unknown state") + +-/*! +- * \internal +- * \brief Return a node's value from hash table (creating one if needed) +- * +- * \param[in,out] values Hash table of values +- * \param[in] node_name Name of node to look up +- * \param[in] xml XML describing the attribute +- * +- * \return Pointer to new or existing hash table entry +- */ +-static attribute_value_t * +-attrd_lookup_or_create_value(GHashTable *values, const char *node_name, +- const xmlNode *xml) +-{ +- attribute_value_t *v = g_hash_table_lookup(values, node_name); +- int is_remote = 0; +- +- if (v == NULL) { +- v = calloc(1, sizeof(attribute_value_t)); +- CRM_ASSERT(v != NULL); +- +- pcmk__str_update(&v->nodename, node_name); +- g_hash_table_replace(values, v->nodename, v); +- } +- +- crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); +- if (is_remote) { +- attrd_set_value_flags(v, attrd_value_remote); +- CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); +- } +- +- return(v); +-} +- + static void + attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) + { +@@ -268,18 +234,38 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + const char *attr, const char *value, const char *host, + bool filter) + { ++ int is_remote = 0; ++ bool changed = false; + attribute_value_t *v = NULL; + +- v = attrd_lookup_or_create_value(a->values, host, xml); ++ // Create entry for value if not already existing ++ v = g_hash_table_lookup(a->values, host); ++ if (v == NULL) { ++ v = calloc(1, sizeof(attribute_value_t)); ++ CRM_ASSERT(v != NULL); ++ ++ pcmk__str_update(&v->nodename, host); ++ g_hash_table_replace(a->values, v->nodename, v); ++ } ++ ++ // If value is for a Pacemaker Remote node, remember that ++ crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); ++ if (is_remote) { ++ attrd_set_value_flags(v, attrd_value_remote); ++ CRM_ASSERT(crm_remote_peer_get(host) != NULL); ++ } ++ ++ // Check whether the value changed ++ changed = !pcmk__str_eq(v->current, value, pcmk__str_casei); + +- if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei) +- && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) { ++ if (changed && filter && pcmk__str_eq(host, attrd_cluster->uname, ++ pcmk__str_casei)) { + + crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", + attr, host, v->current, value, peer->uname); + v = broadcast_local_value(a); + +- } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) { ++ } else if (changed) { + crm_notice("Setting %s[%s]%s%s: %s -> %s " + CRM_XS " from %s with %s write delay", + attr, host, a->set_type ? " in " : "", +-- +2.31.1 + +From 72529ec512fb4938bd8dbbd2caf44bbb1a616826 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Jan 2024 18:04:33 -0600 +Subject: [PATCH 4/5] Refactor: pacemaker-attrd: minor shuffling to make + planned changes easier + +--- + daemons/attrd/attrd_cib.c | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index bdc0a10..481fea7 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -51,6 +51,7 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg) + { + const xmlNode *patchset = NULL; + const char *client_name = NULL; ++ bool status_changed = false; + + if (attrd_shutting_down(true)) { + return; +@@ -64,20 +65,22 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg) + mainloop_set_trigger(attrd_config_read); + } + +- if (!attrd_election_won()) { +- // Don't write attributes if we're not the writer +- return; +- } ++ status_changed = cib__element_in_patchset(patchset, XML_CIB_TAG_STATUS); + + client_name = crm_element_value(msg, F_CIB_CLIENTNAME); + if (!cib__client_triggers_refresh(client_name)) { +- // The CIB is still accurate ++ /* This change came from a source that ensured the CIB is consistent ++ * with our attributes table, so we don't need to write anything out. ++ */ + return; + } + +- if (cib__element_in_patchset(patchset, XML_CIB_TAG_NODES) +- || cib__element_in_patchset(patchset, XML_CIB_TAG_STATUS)) { +- ++ if (!attrd_election_won()) { ++ // Don't write attributes if we're not the writer ++ return; ++ } ++ ++ if (status_changed || cib__element_in_patchset(patchset, XML_CIB_TAG_NODES)) { + /* An unsafe client modified the nodes or status section. Write + * transient attributes to ensure they're up-to-date in the CIB. + */ +-- +2.31.1 + +From b83c2567fb450eec5b18882ded16403831d2c3c0 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Jan 2024 17:53:55 -0600 +Subject: [PATCH 5/5] Log: pacemaker-attrd: make sure we don't try to log NULL + +--- + daemons/attrd/attrd_corosync.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 59e6a26..b348d52 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -229,6 +229,11 @@ record_peer_nodeid(attribute_value_t *v, const char *host) + } + } + ++#define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)") ++ ++#define readable_peer(p) \ ++ (((p) == NULL)? "all peers" : pcmk__s((p)->uname, "unknown peer")) ++ + static void + update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + const char *attr, const char *value, const char *host, +@@ -262,14 +267,14 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + pcmk__str_casei)) { + + crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", +- attr, host, v->current, value, peer->uname); ++ attr, host, readable_value(v), value, peer->uname); + v = broadcast_local_value(a); + + } else if (changed) { + crm_notice("Setting %s[%s]%s%s: %s -> %s " + CRM_XS " from %s with %s write delay", + attr, host, a->set_type ? " in " : "", +- pcmk__s(a->set_type, ""), pcmk__s(v->current, "(unset)"), ++ pcmk__s(a->set_type, ""), readable_value(v), + pcmk__s(value, "(unset)"), peer->uname, + (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); + pcmk__str_update(&v->current, value); +@@ -543,12 +548,14 @@ attrd_peer_sync(crm_node_t *peer) + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { +- crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone"); ++ crm_debug("Syncing %s[%s]='%s' to %s", ++ a->id, v->nodename, readable_value(v), ++ readable_peer(peer)); + attrd_add_value_xml(sync, a, v, false); + } + } + +- crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); ++ crm_debug("Syncing values to %s", readable_peer(peer)); + attrd_send_message(peer, sync, false); + free_xml(sync); + } +-- +2.31.1 + diff --git a/SOURCES/008-controller-reply.patch b/SOURCES/008-controller-reply.patch deleted file mode 100644 index bd72a6e..0000000 --- a/SOURCES/008-controller-reply.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 3e31da0016795397bfeacb2f3d76ecfe35cc1f67 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 17 Jul 2023 14:52:42 -0500 -Subject: [PATCH] Fix: libcrmcommon: wait for reply from appropriate controller - commands - -ipc_controld.c:reply_expected() wrongly omitted PCMK__CONTROLD_CMD_NODES (which -hasn't been a problem because crm_node uses a mainloop instead of sync dispatch -for that) and CRM_OP_RM_NODE_CACHE (which can be sent via -ipc_client.c:pcmk_ipc_purge_node()). - -Because CRM_OP_RM_NODE_CACHE gets only an ack and no further replies, we now -have to be careful not to return true from the controller's dispatch() -function, otherwise crm_node -R would wait forever for more data. That means -we have to check for whether any replies are expected, which means we have to -increment expected replies *before* sending a request (in case it's sync). - -Regression introduced in 2.0.5 by ae14fa4a - -Fixes T681 ---- - lib/common/ipc_controld.c | 49 ++++++++++++++------------------------- - 1 file changed, 17 insertions(+), 32 deletions(-) - -diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c -index 3c3a98964..405fd0518 100644 ---- a/lib/common/ipc_controld.c -+++ b/lib/common/ipc_controld.c -@@ -143,18 +143,16 @@ set_nodes_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data) - static bool - reply_expected(pcmk_ipc_api_t *api, xmlNode *request) - { -- const char *command = crm_element_value(request, F_CRM_TASK); -- -- if (command == NULL) { -- return false; -- } -- -- // We only need to handle commands that functions in this file can send -- return !strcmp(command, CRM_OP_REPROBE) -- || !strcmp(command, CRM_OP_NODE_INFO) -- || !strcmp(command, CRM_OP_PING) -- || !strcmp(command, CRM_OP_LRM_FAIL) -- || !strcmp(command, CRM_OP_LRM_DELETE); -+ // We only need to handle commands that API functions can send -+ return pcmk__str_any_of(crm_element_value(request, F_CRM_TASK), -+ PCMK__CONTROLD_CMD_NODES, -+ CRM_OP_LRM_DELETE, -+ CRM_OP_LRM_FAIL, -+ CRM_OP_NODE_INFO, -+ CRM_OP_PING, -+ CRM_OP_REPROBE, -+ CRM_OP_RM_NODE_CACHE, -+ NULL); - } - - static bool -@@ -168,22 +166,12 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply) - pcmk_controld_reply_unknown, NULL, NULL, - }; - -- /* If we got an ACK, return true so the caller knows to expect more responses -- * from the IPC server. We do this before decrementing replies_expected because -- * ACKs are not going to be included in that value. -- * -- * Note that we cannot do the same kind of status checking here that we do in -- * ipc_pacemakerd.c. The ACK message we receive does not necessarily contain -- * a status attribute. That is, we may receive this: -- * -- * -- * -- * Instead of this: -- * -- * -- */ - if (pcmk__str_eq(crm_element_name(reply), "ack", pcmk__str_none)) { -- return true; // More replies needed -+ /* ACKs are trivial responses that do not count toward expected replies, -+ * and do not have all the fields that validation requires, so skip that -+ * processing. -+ */ -+ return private->replies_expected > 0; - } - - if (private->replies_expected > 0) { -@@ -310,18 +298,15 @@ static int - send_controller_request(pcmk_ipc_api_t *api, xmlNode *request, - bool reply_is_expected) - { -- int rc; -- - if (crm_element_value(request, XML_ATTR_REFERENCE) == NULL) { - return EINVAL; - } -- rc = pcmk__send_ipc_request(api, request); -- if ((rc == pcmk_rc_ok) && reply_is_expected) { -+ if (reply_is_expected) { - struct controld_api_private_s *private = api->api_data; - - private->replies_expected++; - } -- return rc; -+ return pcmk__send_ipc_request(api, request); - } - - static xmlNode * --- -2.41.0 - diff --git a/SOURCES/009-attrd-cache-3.patch b/SOURCES/009-attrd-cache-3.patch new file mode 100644 index 0000000..6150218 --- /dev/null +++ b/SOURCES/009-attrd-cache-3.patch @@ -0,0 +1,385 @@ +From 84d4a0d5f562df91baa0fece45d06ad3732f941c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 16 Jan 2024 11:20:53 -0600 +Subject: [PATCH 1/5] Low: pacemaker-attrd: properly validate attribute set + type + +The sense of the test was accidentally reversed in 26471a52689 +--- + daemons/attrd/attrd_attributes.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c +index 8f32988..f059059 100644 +--- a/daemons/attrd/attrd_attributes.c ++++ b/daemons/attrd/attrd_attributes.c +@@ -40,9 +40,9 @@ attrd_create_attribute(xmlNode *xml) + * attributes are not written. + */ + crm_element_value_int(xml, PCMK__XA_ATTR_IS_PRIVATE, &is_private); +- if ((is_private != 0) +- && !pcmk__str_any_of(set_type, XML_TAG_ATTR_SETS, XML_TAG_UTILIZATION, +- NULL)) { ++ if (!is_private && !pcmk__str_any_of(set_type, ++ XML_TAG_ATTR_SETS, ++ XML_TAG_UTILIZATION, NULL)) { + crm_warn("Ignoring attribute %s with invalid set type %s", + pcmk__s(name, "(unidentified)"), set_type); + return NULL; +-- +2.31.1 + +From d0d0511e71fe983a2d89589c39810b79fb48a8ca Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 16 Jan 2024 12:13:42 -0600 +Subject: [PATCH 2/5] Fix: pacemaker-attrd: sync utilization attributes to + peers correctly + +Include the set type with attribute syncs. + +Previously, utilization attributes would have the correct set_type on the node +where they were set, but peers would store it as a regular node attribute. If +one of those peers became writer, the attribute would get written to the wrong +set. +--- + daemons/attrd/attrd_attributes.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c +index f059059..0ad9630 100644 +--- a/daemons/attrd/attrd_attributes.c ++++ b/daemons/attrd/attrd_attributes.c +@@ -139,6 +139,7 @@ attrd_add_value_xml(xmlNode *parent, const attribute_t *a, + xmlNode *xml = create_xml_node(parent, __func__); + + crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id); ++ crm_xml_add(xml, PCMK__XA_ATTR_SET_TYPE, a->set_type); + crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id); + crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid); + crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user); +-- +2.31.1 + +From 4479ff8507dd69f5946d31cf83c7e47fe15d3bdb Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 16 Jan 2024 12:18:40 -0600 +Subject: [PATCH 3/5] Refactor: pacemaker-attrd: functionize getting attribute + set ID + +... for future reuse +--- + daemons/attrd/attrd_attributes.c | 38 ++++++++++++++++++++++++++++++++ + daemons/attrd/attrd_cib.c | 9 +------- + daemons/attrd/pacemaker-attrd.h | 3 ++- + 3 files changed, 41 insertions(+), 9 deletions(-) + +diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c +index 0ad9630..5727ab8 100644 +--- a/daemons/attrd/attrd_attributes.c ++++ b/daemons/attrd/attrd_attributes.c +@@ -210,3 +210,41 @@ attrd_populate_attribute(xmlNode *xml, const char *attr) + + return a; + } ++ ++/*! ++ * \internal ++ * \brief Get the XML ID used to write out an attribute set ++ * ++ * \param[in] attr Attribute to get set ID for ++ * \param[in] node_state_id XML ID of node state that attribute value is for ++ * ++ * \return Newly allocated string with XML ID to use for \p attr set ++ */ ++char * ++attrd_set_id(const attribute_t *attr, const char *node_state_id) ++{ ++ char *set_id = NULL; ++ ++ CRM_ASSERT((attr != NULL) && (node_state_id != NULL)); ++ ++ if (attr->set_id == NULL) { ++ /* @COMPAT This should really take the set type into account. Currently ++ * we use the same XML ID for transient attributes and utilization ++ * attributes. It doesn't cause problems because the status section is ++ * not limited by the schema in any way, but it's still unfortunate. ++ * For backward compatibility reasons, we can't change this. ++ */ ++ set_id = crm_strdup_printf("%s-%s", XML_CIB_TAG_STATUS, node_state_id); ++ } else { ++ /* @COMPAT When the user specifies a set ID for an attribute, it is the ++ * same for every node. That is less than ideal, but again, the schema ++ * doesn't enforce anything for the status section. We couldn't change ++ * it without allowing the set ID to vary per value rather than per ++ * attribute, which would break backward compatibility, pose design ++ * challenges, and potentially cause problems in rolling upgrades. ++ */ ++ pcmk__str_update(&set_id, attr->set_id); ++ } ++ crm_xml_sanitize_id(set_id); ++ return set_id; ++} +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index 481fea7..08d3425 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -423,17 +423,10 @@ add_unset_attr_update(const attribute_t *attr, const char *attr_id, + static int + add_attr_update(const attribute_t *attr, const char *value, const char *node_id) + { +- char *set_id = NULL; ++ char *set_id = attrd_set_id(attr, node_id); + char *attr_id = NULL; + int rc = pcmk_rc_ok; + +- if (attr->set_id != NULL) { +- pcmk__str_update(&set_id, attr->set_id); +- } else { +- set_id = crm_strdup_printf("%s-%s", XML_CIB_TAG_STATUS, node_id); +- } +- crm_xml_sanitize_id(set_id); +- + if (attr->uuid != NULL) { + pcmk__str_update(&attr_id, attr->uuid); + } else { +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index bacaad6..3da7f8d 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2013-2023 the Pacemaker project contributors ++ * Copyright 2013-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -195,6 +195,7 @@ void attrd_clear_value_seen(void); + void attrd_free_attribute(gpointer data); + void attrd_free_attribute_value(gpointer data); + attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr); ++char *attrd_set_id(const attribute_t *attr, const char *node_state_id); + + enum attrd_write_options { + attrd_write_changed = 0, +-- +2.31.1 + +From eee2169ac348b8ed26ac0b78cb11ddc5cef9384e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 16 Jan 2024 12:25:59 -0600 +Subject: [PATCH 4/5] Refactor: pacemaker-attrd: functionize getting attribute + nvpair ID + +... for future reuse +--- + daemons/attrd/attrd_attributes.c | 28 ++++++++++++++++++++++++++++ + daemons/attrd/attrd_cib.c | 17 +++++------------ + daemons/attrd/pacemaker-attrd.h | 1 + + 3 files changed, 34 insertions(+), 12 deletions(-) + +diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c +index 5727ab8..23de2e2 100644 +--- a/daemons/attrd/attrd_attributes.c ++++ b/daemons/attrd/attrd_attributes.c +@@ -248,3 +248,31 @@ attrd_set_id(const attribute_t *attr, const char *node_state_id) + crm_xml_sanitize_id(set_id); + return set_id; + } ++ ++/*! ++ * \internal ++ * \brief Get the XML ID used to write out an attribute value ++ * ++ * \param[in] attr Attribute to get value XML ID for ++ * \param[in] node_state_id UUID of node that attribute value is for ++ * ++ * \return Newly allocated string with XML ID of \p attr value ++ */ ++char * ++attrd_nvpair_id(const attribute_t *attr, const char *node_state_id) ++{ ++ char *nvpair_id = NULL; ++ ++ if (attr->uuid != NULL) { ++ pcmk__str_update(&nvpair_id, attr->uuid); ++ ++ } else if (attr->set_id != NULL) { ++ nvpair_id = crm_strdup_printf("%s-%s", attr->set_id, attr->id); ++ ++ } else { ++ nvpair_id = crm_strdup_printf(XML_CIB_TAG_STATUS "-%s-%s", ++ node_state_id, attr->id); ++ } ++ crm_xml_sanitize_id(nvpair_id); ++ return nvpair_id; ++} +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index 08d3425..d42345f 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -424,23 +424,16 @@ static int + add_attr_update(const attribute_t *attr, const char *value, const char *node_id) + { + char *set_id = attrd_set_id(attr, node_id); +- char *attr_id = NULL; ++ char *nvpair_id = attrd_nvpair_id(attr, node_id); + int rc = pcmk_rc_ok; + +- if (attr->uuid != NULL) { +- pcmk__str_update(&attr_id, attr->uuid); ++ if (value == NULL) { ++ rc = add_unset_attr_update(attr, nvpair_id, node_id, set_id); + } else { +- attr_id = crm_strdup_printf("%s-%s", set_id, attr->id); +- } +- crm_xml_sanitize_id(attr_id); +- +- if (value != NULL) { +- rc = add_set_attr_update(attr, attr_id, node_id, set_id, value); +- } else { +- rc = add_unset_attr_update(attr, attr_id, node_id, set_id); ++ rc = add_set_attr_update(attr, nvpair_id, node_id, set_id, value); + } + free(set_id); +- free(attr_id); ++ free(nvpair_id); + return rc; + } + +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 3da7f8d..deec790 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -196,6 +196,7 @@ void attrd_free_attribute(gpointer data); + void attrd_free_attribute_value(gpointer data); + attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr); + char *attrd_set_id(const attribute_t *attr, const char *node_state_id); ++char *attrd_nvpair_id(const attribute_t *attr, const char *node_state_id); + + enum attrd_write_options { + attrd_write_changed = 0, +-- +2.31.1 + +From 2abde6cb87d2e3d31a370c74656f6f7c0818c185 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 18 Jan 2024 10:01:56 -0600 +Subject: [PATCH 5/5] Log: pacemaker-attrd: improve some messages for debugging + +--- + daemons/attrd/attrd_attributes.c | 8 +++++--- + daemons/attrd/attrd_cib.c | 13 +++++++++---- + daemons/attrd/attrd_corosync.c | 10 ++++++---- + 3 files changed, 20 insertions(+), 11 deletions(-) + +diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c +index 23de2e2..68b9585 100644 +--- a/daemons/attrd/attrd_attributes.c ++++ b/daemons/attrd/attrd_attributes.c +@@ -60,13 +60,10 @@ attrd_create_attribute(xmlNode *xml) + a->values = pcmk__strikey_table(NULL, attrd_free_attribute_value); + + a->user = crm_element_value_copy(xml, PCMK__XA_ATTR_USER); +- crm_trace("Performing all %s operations as user '%s'", a->id, a->user); + + if (dampen_s != NULL) { + dampen = crm_get_msec(dampen_s); + } +- crm_trace("Created attribute %s with %s write delay", a->id, +- (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); + + if(dampen > 0) { + a->timeout_ms = dampen; +@@ -75,6 +72,11 @@ attrd_create_attribute(xmlNode *xml) + crm_warn("Ignoring invalid delay %s for attribute %s", dampen_s, a->id); + } + ++ crm_trace("Created attribute %s with %s write delay and %s CIB user", ++ a->id, ++ ((dampen > 0)? pcmk__readable_interval(a->timeout_ms) : "no"), ++ pcmk__s(a->user, "default")); ++ + g_hash_table_replace(attributes, a->id, a); + return a; + } +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index d42345f..cae6846 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -54,6 +54,7 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg) + bool status_changed = false; + + if (attrd_shutting_down(true)) { ++ crm_debug("Ignoring CIB change during shutdown"); + return; + } + +@@ -278,11 +279,13 @@ attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *use + + g_hash_table_iter_init(&iter, a->values); + while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) { +- do_crm_log(level, "* %s[%s]=%s", +- a->id, peer, pcmk__s(v->requested, "(null)")); + if (rc == pcmk_ok) { ++ crm_info("* Wrote %s[%s]=%s", ++ a->id, peer, pcmk__s(v->requested, "(unset)")); + pcmk__str_update(&(v->requested), NULL); + } else { ++ do_crm_log(level, "* Could not write %s[%s]=%s", ++ a->id, peer, pcmk__s(v->requested, "(unset)")); + a->changed = true; // Reattempt write below if we are still writer + } + } +@@ -292,6 +295,7 @@ attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *use + /* We deferred a write of a new update because this update was in + * progress. Write out the new value without additional delay. + */ ++ crm_debug("Pending update for %s can be written now", a->id); + write_attribute(a, false); + + /* We're re-attempting a write because the original failed; delay +@@ -593,8 +597,9 @@ write_attribute(attribute_t *a, bool ignore_delay) + continue; + } + +- crm_debug("Updating %s[%s]=%s (node uuid=%s id=%" PRIu32 ")", +- a->id, v->nodename, v->current, uuid, v->nodeid); ++ crm_debug("Writing %s[%s]=%s (node-state-id=%s node-id=%" PRIu32 ")", ++ a->id, v->nodename, pcmk__s(v->current, "(unset)"), ++ uuid, v->nodeid); + cib_updates++; + + /* Preservation of the attribute to transmit alert */ +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index b348d52..6fb847b 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -293,7 +293,8 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + + // Write out new value or start dampening timer + if (a->timeout_ms && a->timer) { +- crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, attr); ++ crm_trace("Delaying write of %s %s for dampening", ++ attr, pcmk__readable_interval(a->timeout_ms)); + mainloop_timer_start(a->timer); + } else { + attrd_write_or_elect_attribute(a); +@@ -307,11 +308,12 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + if (is_force_write == 1 && a->timeout_ms && a->timer) { + /* Save forced writing and set change flag. */ + /* The actual attribute is written by Writer after election. */ +- crm_trace("Unchanged %s[%s] from %s is %s(Set the forced write flag)", +- attr, host, peer->uname, value); ++ crm_trace("%s[%s] from %s is unchanged (%s), forcing write", ++ attr, host, peer->uname, pcmk__s(value, "unset")); + a->force_write = TRUE; + } else { +- crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value); ++ crm_trace("%s[%s] from %s is unchanged (%s)", ++ attr, host, peer->uname, pcmk__s(value, "unset")); + } + } + +-- +2.31.1 + diff --git a/SOURCES/009-glib-assertions.patch b/SOURCES/009-glib-assertions.patch deleted file mode 100644 index 14babe8..0000000 --- a/SOURCES/009-glib-assertions.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 2587f9fabea3a7ef01eb7752d4e2ef082823934e Mon Sep 17 00:00:00 2001 -From: eabdullin -Date: Wed, 13 Sep 2023 14:15:46 +0300 -Subject: [PATCH] - Fix: controller: don't try to execute agent action at - shutdown Normally, agent execution is not possible at shutdown. However, when - metadata is needed for some action, the agent can be called asynchronously, - and when the metadata action returns, the original action is performed. If - the metadata is initiated before shutdown, but completes after shutdown has - begun, do not try to attempt the original action, so we avoid unnecessary - error logs. - Refactor: controller: de-functionize lrm_state_destroy() It was - a one-liner called once - Log: controller: improve messages for resource - history updates - Low: controller: guard lrm_state_table usage with NULLcheck - It is NULL while draining the mainloop during the shutdown sequence. - ---- - daemons/controld/controld_execd.c | 15 ++++++++++++--- - daemons/controld/controld_execd_state.c | 15 +++++++-------- - daemons/controld/controld_lrm.h | 5 ----- - 3 files changed, 19 insertions(+), 16 deletions(-) - -diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c -index afead92..e7a91ab 100644 ---- a/daemons/controld/controld_execd.c -+++ b/daemons/controld/controld_execd.c -@@ -1728,7 +1728,9 @@ metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data) - md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc, - result->action_stdout); - } -- do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); -+ if (!pcmk_is_set(fsa_input_register, R_HA_DISCONNECTED)) { -+ do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); -+ } - free_metadata_cb_data(data); - } - -@@ -2406,10 +2408,17 @@ cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *use - case pcmk_ok: - case -pcmk_err_diff_failed: - case -pcmk_err_diff_resync: -- crm_trace("Resource update %d complete: rc=%d", call_id, rc); -+ crm_trace("Resource history update completed (call=%d rc=%d)", -+ call_id, rc); - break; - default: -- crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); -+ if (call_id > 0) { -+ crm_warn("Resource history update %d failed: %s " -+ CRM_XS " rc=%d", call_id, pcmk_strerror(rc), rc); -+ } else { -+ crm_warn("Resource history update failed: %s " CRM_XS " rc=%d", -+ pcmk_strerror(rc), rc); -+ } - } - - if (call_id == last_resource_update) { -diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c -index adba2e5..3994b6d 100644 ---- a/daemons/controld/controld_execd_state.c -+++ b/daemons/controld/controld_execd_state.c -@@ -131,12 +131,6 @@ lrm_state_create(const char *node_name) - return state; - } - --void --lrm_state_destroy(const char *node_name) --{ -- g_hash_table_remove(lrm_state_table, node_name); --} -- - static gboolean - remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data) - { -@@ -252,7 +246,7 @@ lrm_state_destroy_all(void) - lrm_state_t * - lrm_state_find(const char *node_name) - { -- if (!node_name) { -+ if ((node_name == NULL) || (lrm_state_table == NULL)) { - return NULL; - } - return g_hash_table_lookup(lrm_state_table, node_name); -@@ -263,6 +257,8 @@ lrm_state_find_or_create(const char *node_name) - { - lrm_state_t *lrm_state; - -+ CRM_CHECK(lrm_state_table != NULL, return NULL); -+ - lrm_state = g_hash_table_lookup(lrm_state_table, node_name); - if (!lrm_state) { - lrm_state = lrm_state_create(node_name); -@@ -274,6 +270,9 @@ lrm_state_find_or_create(const char *node_name) - GList * - lrm_state_get_list(void) - { -+ if (lrm_state_table == NULL) { -+ return NULL; -+ } - return g_hash_table_get_values(lrm_state_table); - } - -@@ -764,7 +763,7 @@ lrm_state_unregister_rsc(lrm_state_t * lrm_state, - } - - if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { -- lrm_state_destroy(rsc_id); -+ g_hash_table_remove(lrm_state_table, rsc_id); - return pcmk_ok; - } - -diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h -index 983c288..11ff1bc 100644 ---- a/daemons/controld/controld_lrm.h -+++ b/daemons/controld/controld_lrm.h -@@ -113,11 +113,6 @@ void lrm_state_destroy_all(void); - */ - lrm_state_t *lrm_state_create(const char *node_name); - --/*! -- * \brief Destroy executor connection by node name -- */ --void lrm_state_destroy(const char *node_name); -- - /*! - * \brief Find lrm_state data by node name - */ --- - diff --git a/SOURCES/010-attrd-shutdown.patch b/SOURCES/010-attrd-shutdown.patch deleted file mode 100644 index 1d02526..0000000 --- a/SOURCES/010-attrd-shutdown.patch +++ /dev/null @@ -1,45 +0,0 @@ -From f5263c9401c9c38d4e039149deddcc0da0c184ba Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 3 Aug 2023 12:17:08 -0500 -Subject: [PATCH] Fix: attrd: avoid race condition when shutting down - -This addresses a race condition that can occur when the DC and the attribute -writer are different nodes, and shutting down at the same time. When the DC -controller leaves its Corosync process group, the remaining nodes erase its -transient node attributes (including "shutdown") from the CIB. However if the -(former) DC's attrd is still up, it can win the attribute writer election -called after the original writer leaves. As the election winner, it writes out -all its attributes to the CIB, including "shutdown". The next time it rejoins -the cluster, it will be immediately shut down. - -Fixes T138 ---- - daemons/attrd/attrd_elections.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c -index 3b6b55a0f59..6f4916888a9 100644 ---- a/daemons/attrd/attrd_elections.c -+++ b/daemons/attrd/attrd_elections.c -@@ -22,12 +22,20 @@ attrd_election_cb(gpointer user_data) - { - attrd_declare_winner(); - -+ if (attrd_requesting_shutdown() || attrd_shutting_down()) { -+ /* This node is shutting down or about to, meaning its attributes will -+ * be removed (and may have already been removed from the CIB by a -+ * controller). Don't sync or write its attributes in this case. -+ */ -+ return G_SOURCE_REMOVE; -+ } -+ - /* Update the peers after an election */ - attrd_peer_sync(NULL, NULL); - - /* Update the CIB after an election */ - attrd_write_attributes(true, false); -- return FALSE; -+ return G_SOURCE_REMOVE; - } - - void diff --git a/SOURCES/010-crm_attribute-free.patch b/SOURCES/010-crm_attribute-free.patch new file mode 100644 index 0000000..d1e4831 --- /dev/null +++ b/SOURCES/010-crm_attribute-free.patch @@ -0,0 +1,53 @@ +From 9c13ce6fe95812308443c188ace8f897e6bce942 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 29 Jan 2024 11:14:25 -0800 +Subject: [PATCH] Fix: tools: crm_attribute emits garbage for --node localhost + or auto + +This happens because pcmk__node_attr_target() returns its argument if +its argument is NULL, "auto", or "localhost" and no relevant environment +variables are found. Then crm_attribute frees the return value, makes a +copy of it, and assigns it back to options.dest_uname. + +The fix is to check whether the return value is equal to the argument. + +Fixes RHEL-23065 + +Signed-off-by: Reid Wahl +--- + tools/crm_attribute.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c +index d221ab85d..636d03dbd 100644 +--- a/tools/crm_attribute.c ++++ b/tools/crm_attribute.c +@@ -766,8 +766,23 @@ main(int argc, char **argv) + const char *target = pcmk__node_attr_target(options.dest_uname); + + if (target != NULL) { +- g_free(options.dest_uname); +- options.dest_uname = g_strdup(target); ++ /* If options.dest_uname is "auto" or "localhost", then ++ * pcmk__node_attr_target() may return it, depending on environment ++ * variables. In that case, attribute lookups will fail for "auto" ++ * (unless there's a node named "auto"). attrd maps "localhost" to ++ * the true local node name for queries. ++ * ++ * @TODO ++ * * Investigate whether "localhost" is mapped to a real node name ++ * for non-query commands. If not, possibly modify it so that it ++ * is. ++ * * Map "auto" to "localhost" (probably). ++ */ ++ if (target != (const char *) options.dest_uname) { ++ g_free(options.dest_uname); ++ options.dest_uname = g_strdup(target); ++ } ++ + } else if (getenv("CIB_file") != NULL && options.dest_uname == NULL) { + get_node_name_from_local(); + } +-- +2.41.0 + diff --git a/SOURCES/011-attrd-shutdown-2.patch b/SOURCES/011-attrd-shutdown-2.patch deleted file mode 100644 index ba79a62..0000000 --- a/SOURCES/011-attrd-shutdown-2.patch +++ /dev/null @@ -1,210 +0,0 @@ -From 83e547cc64f2586031a007ab58e91fc22cd1a68a Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 24 Aug 2023 12:18:23 -0500 -Subject: [PATCH] Refactor: attrd: use enum instead of bools for - attrd_write_attributes() - ---- - daemons/attrd/attrd_cib.c | 24 ++++++++++++++++++------ - daemons/attrd/attrd_corosync.c | 2 +- - daemons/attrd/attrd_elections.c | 2 +- - daemons/attrd/attrd_ipc.c | 2 +- - daemons/attrd/attrd_utils.c | 2 +- - daemons/attrd/pacemaker-attrd.h | 8 +++++++- - 6 files changed, 29 insertions(+), 11 deletions(-) - -diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c -index 928c0133745..9c787fe1024 100644 ---- a/daemons/attrd/attrd_cib.c -+++ b/daemons/attrd/attrd_cib.c -@@ -343,16 +343,23 @@ attrd_write_attribute(attribute_t *a, bool ignore_delay) - free_xml(xml_top); - } - -+/*! -+ * \internal -+ * \brief Write out attributes -+ * -+ * \param[in] options Group of enum attrd_write_options -+ */ - void --attrd_write_attributes(bool all, bool ignore_delay) -+attrd_write_attributes(uint32_t options) - { - GHashTableIter iter; - attribute_t *a = NULL; - -- crm_debug("Writing out %s attributes", all? "all" : "changed"); -+ crm_debug("Writing out %s attributes", -+ pcmk_is_set(options, attrd_write_all)? "all" : "changed"); - g_hash_table_iter_init(&iter, attributes); - while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { -- if (!all && a->unknown_peer_uuids) { -+ if (!pcmk_is_set(options, attrd_write_all) && a->unknown_peer_uuids) { - // Try writing this attribute again, in case peer ID was learned - a->changed = true; - } else if (a->force_write) { -@@ -360,9 +367,14 @@ attrd_write_attributes(bool all, bool ignore_delay) - a->changed = true; - } - -- if(all || a->changed) { -- /* When forced write flag is set, ignore delay. */ -- attrd_write_attribute(a, (a->force_write ? true : ignore_delay)); -+ if (pcmk_is_set(options, attrd_write_all) || a->changed) { -+ bool ignore_delay = pcmk_is_set(options, attrd_write_no_delay); -+ -+ if (a->force_write) { -+ // Always ignore delay when forced write flag is set -+ ignore_delay = true; -+ } -+ attrd_write_attribute(a, ignore_delay); - } else { - crm_trace("Skipping unchanged attribute %s", a->id); - } -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index 1aec35a054e..49631df6e44 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -285,7 +285,7 @@ record_peer_nodeid(attribute_value_t *v, const char *host) - - crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); - if (attrd_election_won()) { -- attrd_write_attributes(false, false); -+ attrd_write_attributes(attrd_write_changed); - } - } - -diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c -index c25a41a4492..01341db18e4 100644 ---- a/daemons/attrd/attrd_elections.c -+++ b/daemons/attrd/attrd_elections.c -@@ -34,7 +34,7 @@ attrd_election_cb(gpointer user_data) - attrd_peer_sync(NULL, NULL); - - /* Update the CIB after an election */ -- attrd_write_attributes(true, false); -+ attrd_write_attributes(attrd_write_all); - return G_SOURCE_REMOVE; - } - -diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c -index 4be789de7f9..05c4a696a19 100644 ---- a/daemons/attrd/attrd_ipc.c -+++ b/daemons/attrd/attrd_ipc.c -@@ -232,7 +232,7 @@ attrd_client_refresh(pcmk__request_t *request) - crm_info("Updating all attributes"); - - attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); -- attrd_write_attributes(true, true); -+ attrd_write_attributes(attrd_write_all|attrd_write_no_delay); - - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - return NULL; -diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c -index c43eac1695a..bfd51368890 100644 ---- a/daemons/attrd/attrd_utils.c -+++ b/daemons/attrd/attrd_utils.c -@@ -156,7 +156,7 @@ attrd_cib_replaced_cb(const char *event, xmlNode * msg) - if (attrd_election_won()) { - if (change_section & (cib_change_section_nodes | cib_change_section_status)) { - crm_notice("Updating all attributes after %s event", event); -- attrd_write_attributes(true, false); -+ attrd_write_attributes(attrd_write_all); - } - } - -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index 41f31d97b3b..2d781d11394 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -176,8 +176,14 @@ void attrd_free_attribute(gpointer data); - void attrd_free_attribute_value(gpointer data); - attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr); - -+enum attrd_write_options { -+ attrd_write_changed = 0, -+ attrd_write_all = (1 << 0), -+ attrd_write_no_delay = (1 << 1), -+}; -+ - void attrd_write_attribute(attribute_t *a, bool ignore_delay); --void attrd_write_attributes(bool all, bool ignore_delay); -+void attrd_write_attributes(uint32_t options); - void attrd_write_or_elect_attribute(attribute_t *a); - - extern int minimum_protocol_version; -From 58400e272cfc51f02eec69cdd0ed0d27a30e78a3 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 24 Aug 2023 12:27:53 -0500 -Subject: [PATCH] Fix: attrd: avoid race condition at writer election - -f5263c94 was not a complete fix. The issue may also occur if a remaining node -(not the original DC or writer) wins the attribute writer election after the -original DC's controller has exited but before its attribute manger has exited. - -The long-term solution will be to have the attribute manager (instead of the -controller) be in control of erasing transient attributes from the CIB when a -node leaves. This short-term workaround simply has new attribute writers skip -shutdown attributes when writing out all attributes. - -Fixes T138 ---- - daemons/attrd/attrd_cib.c | 5 +++++ - daemons/attrd/attrd_elections.c | 14 ++++++++++++-- - daemons/attrd/pacemaker-attrd.h | 1 + - 3 files changed, 18 insertions(+), 2 deletions(-) - -diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c -index 9c787fe102..2c910b4c64 100644 ---- a/daemons/attrd/attrd_cib.c -+++ b/daemons/attrd/attrd_cib.c -@@ -359,6 +359,11 @@ attrd_write_attributes(uint32_t options) - pcmk_is_set(options, attrd_write_all)? "all" : "changed"); - g_hash_table_iter_init(&iter, attributes); - while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { -+ if (pcmk_is_set(options, attrd_write_skip_shutdown) -+ && pcmk__str_eq(a->id, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) { -+ continue; -+ } -+ - if (!pcmk_is_set(options, attrd_write_all) && a->unknown_peer_uuids) { - // Try writing this attribute again, in case peer ID was learned - a->changed = true; -diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c -index 01341db18e..a95cd44cbd 100644 ---- a/daemons/attrd/attrd_elections.c -+++ b/daemons/attrd/attrd_elections.c -@@ -33,8 +33,18 @@ attrd_election_cb(gpointer user_data) - /* Update the peers after an election */ - attrd_peer_sync(NULL, NULL); - -- /* Update the CIB after an election */ -- attrd_write_attributes(attrd_write_all); -+ /* After winning an election, update the CIB with the values of all -+ * attributes as the winner knows them. -+ * -+ * However, do not write out any "shutdown" attributes. A node that is -+ * shutting down will have all its transient attributes removed from the CIB -+ * when its controller exits, and from the attribute manager's memory (on -+ * remaining nodes) when its attribute manager exits; if an election is won -+ * between when those two things happen, we don't want to write the shutdown -+ * attribute back out, which would cause the node to immediately shut down -+ * the next time it rejoins. -+ */ -+ attrd_write_attributes(attrd_write_all|attrd_write_skip_shutdown); - return G_SOURCE_REMOVE; - } - -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index 2d781d1139..2e35bd7ec5 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -180,6 +180,7 @@ enum attrd_write_options { - attrd_write_changed = 0, - attrd_write_all = (1 << 0), - attrd_write_no_delay = (1 << 1), -+ attrd_write_skip_shutdown = (1 << 2), - }; - - void attrd_write_attribute(attribute_t *a, bool ignore_delay); diff --git a/SOURCES/pacemaker.sysusers b/SOURCES/pacemaker.sysusers new file mode 100644 index 0000000..daadb96 --- /dev/null +++ b/SOURCES/pacemaker.sysusers @@ -0,0 +1,3 @@ +#Type Name ID GECOS Home directory Shell +g haclient 189 +u hacluster 189:haclient "cluster user" /var/lib/pacemaker /sbin/nologin diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index 05fde5f..8e2f84d 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -35,11 +35,11 @@ ## Upstream pacemaker version, and its package version (specversion ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) -%global pcmkversion 2.1.5 -%global specversion 9 +%global pcmkversion 2.1.7 +%global specversion 5 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build -%global commit a3f44794f94e1571c6ba0042915ade369b4ce4b1 +%global commit 0f7f88312f7a1ccedee60bf768aba79ee13d41e0 ## Since git v2.11, the extent of abbreviation is autoscaled by default ## (used to be constant of 7), so we need to convey it for non-tags, too. @@ -153,7 +153,6 @@ %global pkgname_procps procps %global pkgname_glue_libs libglue %global pkgname_pcmk_libs lib%{name}3 -%global hacluster_id 90 %else %global pkgname_libtool_devel libtool-ltdl-devel %global pkgname_libtool_devel_arch libtool-ltdl-devel%{?_isa} @@ -165,7 +164,6 @@ %global pkgname_procps procps-ng %global pkgname_glue_libs cluster-glue-libs %global pkgname_pcmk_libs %{name}-libs -%global hacluster_id 189 %endif ## Distro-specific configuration choices @@ -232,8 +230,8 @@ Name: pacemaker Summary: Scalable High-Availability cluster resource manager Version: %{pcmkversion} -Release: %{pcmk_release}%{?dist}.3.alma.1 -License: GPLv2+ and LGPLv2+ +Release: %{pcmk_release}%{?dist} +License: GPL-2.0-or-later AND LGPL-2.1-or-later Url: https://www.clusterlabs.org/ # Example: https://codeload.github.com/ClusterLabs/pacemaker/tar.gz/e91769e @@ -246,30 +244,27 @@ Url: https://www.clusterlabs.org/ # You can use "spectool -s 0 pacemaker.spec" (rpmdevtools) to show final URL. Source0: https://codeload.github.com/%{github_owner}/%{name}/tar.gz/%{archive_github_url} Source1: https://codeload.github.com/%{github_owner}/%{nagios_name}/tar.gz/%{nagios_archive_github_url} +Source2: pacemaker.sysusers # upstream commits -Patch001: 001-sync-points.patch -Patch002: 002-remote-regression.patch -Patch003: 003-history-cleanup.patch -Patch004: 004-g_source_remove.patch -Patch005: 005-query-null.patch -Patch006: 006-watchdog-fencing-topology.patch - -# Paches were taken from and backported from: -# https://gitlab.com/redhat/centos-stream/rpms/pacemaker/-/commit/32e8343928702115ba5f706a2dd926a69704aeac -Patch007: 007-attrd-dampen.patch -# https://gitlab.com/redhat/centos-stream/rpms/pacemaker/-/commit/4e91d187ae7b9b4b72874e46b24dc98cc5bf33d6 -Patch008: 008-controller-reply.patch -# https://gitlab.com/redhat/centos-stream/rpms/pacemaker/-/commit/f8b33ad541545649b825ab3091a8e027255e2474 -Patch009: 009-glib-assertions.patch -Patch010: 010-attrd-shutdown.patch -# https://gitlab.com/redhat/centos-stream/rpms/pacemaker/-/commit/7d3a78e13253362a96f3b99e304c873a36397f7f -Patch011: 011-attrd-shutdown-2.patch +Patch001: 001-schema-glib.patch +Patch002: 002-schema-transfer.patch +Patch003: 003-schema-doc.patch +Patch004: 004-attrd-cache-1.patch +Patch005: 005-attrd-cache-2.patch +Patch006: 006-cib-file-feature-set.patch +Patch007: 007-option-metadata.patch +Patch008: 008-attrd-prep.patch +Patch009: 009-attrd-cache-3.patch +Patch010: 010-crm_attribute-free.patch Requires: resource-agents Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} +%if %{with stonithd} +Requires: %{python_name}-%{name} = %{version}-%{release} +%endif %{?systemd_requires} %if %{defined centos} @@ -282,6 +277,7 @@ ExclusiveArch: aarch64 i686 ppc64le s390x x86_64 Requires: %{python_path} BuildRequires: %{python_name}-devel +BuildRequires: %{python_name}-setuptools # Pacemaker requires a minimum libqb functionality # RHEL requires a higher version than upstream, for qb_ipcc_connect_async() @@ -305,7 +301,7 @@ BuildRequires: sed # Required for core functionality BuildRequires: pkgconfig(glib-2.0) >= 2.42 -BuildRequires: libxml2-devel +BuildRequires: libxml2-devel >= 2.6.0 BuildRequires: libxslt-devel BuildRequires: libuuid-devel BuildRequires: %{pkgname_bzip2_devel} @@ -320,7 +316,7 @@ BuildRequires: pam-devel BuildRequires: %{pkgname_gettext} >= 0.18 # Required for "make check" -BuildRequires: libcmocka-devel +BuildRequires: libcmocka-devel >= 1.1.0 BuildRequires: pkgconfig(systemd) @@ -343,6 +339,10 @@ BuildRequires: inkscape BuildRequires: %{python_name}-sphinx %endif +# Creation of Users / Groups +BuildRequires: systemd-rpm-macros +%{?sysusers_requires_compat} + # Booth requires this Provides: pacemaker-ticket-support = 2.0 @@ -371,7 +371,7 @@ Available rpmbuild rebuild options: stonithd %package cli -License: GPLv2+ and LGPLv2+ +License: GPL-2.0-or-later AND LGPL-2.1-or-later Summary: Command line tools for controlling Pacemaker clusters Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} %if 0%{?supports_recommends} @@ -394,7 +394,7 @@ to query and control the cluster from machines that may, or may not, be part of the cluster. %package -n %{pkgname_pcmk_libs} -License: GPLv2+ and LGPLv2+ +License: GPL-2.0-or-later AND LGPL-2.1-or-later Summary: Core Pacemaker libraries Requires(pre): %{pkgname_shadow_utils} Requires: %{name}-schemas = %{version}-%{release} @@ -411,7 +411,7 @@ The %{pkgname_pcmk_libs} package contains shared libraries needed for cluster nodes and those just running the CLI tools. %package cluster-libs -License: GPLv2+ and LGPLv2+ +License: GPL-2.0-or-later AND LGPL-2.1-or-later Summary: Cluster Libraries used by Pacemaker Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} @@ -422,8 +422,22 @@ manager. The %{name}-cluster-libs package contains cluster-aware shared libraries needed for nodes that will form part of the cluster nodes. +%package -n %{python_name}-%{name} +License: LGPL-2.1-or-later +Summary: Python libraries for Pacemaker +Requires: %{python_path} +Requires: %{pkgname_pcmk_libs} = %{version}-%{release} +BuildArch: noarch + +%description -n %{python_name}-%{name} +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +The %{python_name}-%{name} package contains a Python library that can be used +to interface with Pacemaker. + %package remote -License: GPLv2+ and LGPLv2+ +License: GPL-2.0-or-later AND LGPL-2.1-or-later Summary: Pacemaker remote executor daemon for non-cluster nodes Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} @@ -442,7 +456,7 @@ which is capable of extending pacemaker functionality to remote nodes not running the full corosync/cluster stack. %package -n %{pkgname_pcmk_libs}-devel -License: GPLv2+ and LGPLv2+ +License: GPL-2.0-or-later AND LGPL-2.1-or-later Summary: Pacemaker development package Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release} @@ -454,7 +468,7 @@ Requires: libqb-devel%{?_isa} Requires: %{?pkgname_libtool_devel_arch} %endif Requires: libuuid-devel%{?_isa} -Requires: libxml2-devel%{?_isa} +Requires: libxml2-devel%{?_isa} >= 2.6.0 Requires: libxslt-devel%{?_isa} %description -n %{pkgname_pcmk_libs}-devel @@ -465,11 +479,12 @@ The %{pkgname_pcmk_libs}-devel package contains headers and shared libraries for developing tools for Pacemaker. %package cts -License: GPLv2+ and LGPLv2+ +License: GPL-2.0-or-later AND LGPL-2.1-or-later Summary: Test framework for cluster-related technologies like Pacemaker Requires: %{python_path} Requires: %{pkgname_pcmk_libs} = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} +Requires: %{python_name}-%{name} = %{version}-%{release} Requires: %{pkgname_procps} Requires: psmisc Requires: %{python_name}-psutil @@ -497,7 +512,7 @@ Pacemaker is an advanced, scalable High-Availability cluster resource manager. %package schemas -License: GPLv2+ +License: GPL-2.0-or-later Summary: Schemas and upgrade stylesheets for Pacemaker BuildArch: noarch @@ -542,6 +557,12 @@ export LDFLAGS_HARDENED_EXE="%{?_hardening_ldflags}" export LDFLAGS_HARDENED_LIB="%{?_hardening_ldflags}" %endif +# DO NOT REMOVE THE FOLLOWING LINE! +# This is necessary to ensure we use the git commit ID from the +# pacemaker-abcd1234 directory name as the latest commit ID when +# generating crm_config.h. +rm -rf .git + ./autogen.sh %{configure} \ @@ -569,6 +590,10 @@ export LDFLAGS_HARDENED_LIB="%{?_hardening_ldflags}" make %{_smp_mflags} V=1 +pushd python +%py3_build +popd + %check make %{_smp_mflags} check { cts/cts-scheduler --run load-stopped-loop \ @@ -586,6 +611,10 @@ make install \ DESTDIR=%{buildroot} V=1 docdir=%{pcmk_docdir} \ %{?_python_bytecompile_extra:%{?py_byte_compile:am__py_compile=true}} +pushd python +%py3_install +popd + mkdir -p %{buildroot}%{_datadir}/pacemaker/nagios/plugins-metadata for file in $(find %{nagios_name}-%{nagios_hash}/metadata -type f); do install -m 644 $file %{buildroot}%{_datadir}/pacemaker/nagios/plugins-metadata @@ -605,6 +634,8 @@ find %{buildroot} -name '*.la' -type f -print0 | xargs -0 rm -f rm -f %{buildroot}/%{_sbindir}/fence_legacy rm -f %{buildroot}/%{_mandir}/man8/fence_legacy.* +install -p -D -m 0644 %{SOURCE2} %{buildroot}%{_sysusersdir}/pacemaker.conf + # For now, don't package the servicelog-related binaries built only for # ppc64le when certain dependencies are installed. If they get more exercise by # advanced users, we can reconsider. @@ -680,10 +711,7 @@ fi %systemd_postun_with_restart crm_mon.service %pre -n %{pkgname_pcmk_libs} -# @TODO Use sysusers.d: -# https://fedoraproject.org/wiki/Changes/Adopting_sysusers.d_format -getent group %{gname} >/dev/null || groupadd -r %{gname} -g %{hacluster_id} -getent passwd %{uname} >/dev/null || useradd -r -g %{gname} -u %{hacluster_id} -s /sbin/nologin -c "cluster user" %{uname} +%sysusers_create_compat %{SOURCE2} exit 0 %ldconfig_scriptlets -n %{pkgname_pcmk_libs} @@ -704,15 +732,20 @@ exit 0 %exclude %{_sbindir}/pacemaker_remoted %{_libexecdir}/pacemaker/* -%{_sbindir}/crm_master +%if %{with stonithd} +%{_sbindir}/fence_legacy +%endif %{_sbindir}/fence_watchdog %doc %{_mandir}/man7/pacemaker-controld.* %doc %{_mandir}/man7/pacemaker-schedulerd.* %doc %{_mandir}/man7/pacemaker-fenced.* %doc %{_mandir}/man7/ocf_pacemaker_controld.* +%doc %{_mandir}/man7/ocf_pacemaker_o2cb.* %doc %{_mandir}/man7/ocf_pacemaker_remote.* -%doc %{_mandir}/man8/crm_master.* +%if %{with stonithd} +%doc %{_mandir}/man8/fence_legacy.* +%endif %doc %{_mandir}/man8/fence_watchdog.* %doc %{_mandir}/man8/pacemakerd.* @@ -725,6 +758,7 @@ exit 0 %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/cib %dir %attr (750, %{uname}, %{gname}) %{_var}/lib/pacemaker/pengine %{ocf_root}/resource.d/pacemaker/controld +%{ocf_root}/resource.d/pacemaker/o2cb %{ocf_root}/resource.d/pacemaker/remote %files cli @@ -743,6 +777,7 @@ exit 0 %{_sbindir}/crm_diff %{_sbindir}/crm_error %{_sbindir}/crm_failcount +%{_sbindir}/crm_master %{_sbindir}/crm_mon %{_sbindir}/crm_node %{_sbindir}/crm_resource @@ -770,19 +805,21 @@ exit 0 %dir %{ocf_root}/resource.d %{ocf_root}/resource.d/pacemaker -%doc %{_mandir}/man7/* +%doc %{_mandir}/man7/*pacemaker* %exclude %{_mandir}/man7/pacemaker-controld.* %exclude %{_mandir}/man7/pacemaker-schedulerd.* %exclude %{_mandir}/man7/pacemaker-fenced.* %exclude %{_mandir}/man7/ocf_pacemaker_controld.* %exclude %{_mandir}/man7/ocf_pacemaker_o2cb.* %exclude %{_mandir}/man7/ocf_pacemaker_remote.* -%doc %{_mandir}/man8/* -%exclude %{_mandir}/man8/crm_master.* -%exclude %{_mandir}/man8/fence_legacy.* -%exclude %{_mandir}/man8/fence_watchdog.* -%exclude %{_mandir}/man8/pacemakerd.* -%exclude %{_mandir}/man8/pacemaker-remoted.* +%doc %{_mandir}/man8/crm*.8.gz +%doc %{_mandir}/man8/attrd_updater.* +%doc %{_mandir}/man8/cibadmin.* +%if %{with cibsecrets} + %doc %{_mandir}/man8/cibsecret.* +%endif +%doc %{_mandir}/man8/iso8601.* +%doc %{_mandir}/man8/stonith_admin.* %license licenses/GPLv2 %doc COPYING @@ -795,6 +832,7 @@ exit 0 %dir %attr (770, %{uname}, %{gname}) %{_var}/log/pacemaker/bundles %files -n %{pkgname_pcmk_libs} %{?with_nls:-f %{name}.lang} +%{_sysusersdir}/pacemaker.conf %{_libdir}/libcib.so.* %{_libdir}/liblrmd.so.* %{_libdir}/libcrmservice.so.* @@ -813,6 +851,14 @@ exit 0 %doc COPYING %doc ChangeLog +%files -n %{python_name}-%{name} +%{python3_sitelib}/pacemaker/ +%{python3_sitelib}/pacemaker-*.egg-info +%exclude %{python3_sitelib}/pacemaker/_cts/ +%license licenses/LGPLv2.1 +%doc COPYING +%doc ChangeLog + %files remote %config(noreplace) %{_sysconfdir}/sysconfig/pacemaker # state directory is shared between the subpackets @@ -833,7 +879,7 @@ exit 0 %license licenses/CC-BY-SA-4.0 %files cts -%{python_site}/cts +%{python3_sitelib}/pacemaker/_cts/ %{_datadir}/pacemaker/tests %{_libexecdir}/pacemaker/cts-log-watcher @@ -845,8 +891,16 @@ exit 0 %files -n %{pkgname_pcmk_libs}-devel %{_includedir}/pacemaker -%{_libdir}/*.so -%{_libdir}/pkgconfig/*.pc +%{_libdir}/libcib.so +%{_libdir}/liblrmd.so +%{_libdir}/libcrmservice.so +%{_libdir}/libcrmcommon.so +%{_libdir}/libpe_status.so +%{_libdir}/libpe_rules.so +%{_libdir}/libpacemaker.so +%{_libdir}/libstonithd.so +%{_libdir}/libcrmcluster.so +%{_libdir}/pkgconfig/*pacemaker*.pc %license licenses/LGPLv2.1 %doc COPYING %doc ChangeLog @@ -867,16 +921,87 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog -* Tue Sep 12 2023 Eduard Abdullin - 2.1.5-9.3.alma.1 -- High: tools: The dampen parameter is disabled when setting - values with attrd_updater. -- Fix: libcrmcommon: wait for reply from appropriate controller - commands -- Fix: controller: don't try to execute agent action at - shutdown -- Fix: attrd: avoid race condition when shutting down -- Refactor: attrd: use enum instead of bools for - attrd_write_attributes() +* Thu Mar 21 2024 Chris Lumens - 2.1.7-5 +- Fix upgrading to this package on multilib systems +- Resolves: RHEL-28999 + +* Wed Jan 31 2024 Chris Lumens - 2.1.7-4 +- Properly validate attribute set type in pacemaker-attrd +- Fix `crm_attribute -t nodes --node localhost` +- Resolves: RHEL-13216 +- Resolves: RHEL-17225 +- Resolves: RHEL-23498 + +* Tue Jan 16 2024 Chris Lumens - 2.1.7-3 +- Rebase on upstream 2.1.7 final release +- Fix documentation for Pacemaker Remote schema transfers +- Do not check CIB feature set version when CIB_file is set +- Consolidate attrd cache handling +- Avoid duplicating option metadata across daemons +- Related: RHEL-7665 +- Related: RHEL-13216 +- Resolves: RHEL-7702 + +* Wed Dec 13 2023 Chris Lumens - 2.1.7-2 +- Rebase on upstream 2.1.7-rc4 release +- Use systemd-sysusers to create user/group +- Pacemaker Remote nodes can validate against later schema versions +- Related: RHEL-17225 +- Resolves: RHEL-7665 + +* Wed Nov 22 2023 Chris Lumens - 2.1.7-1 +- Rebase on upstream 2.1.7-rc2 release +- Resolves: RHEL-7682 +- Related: RHEL-17225 + +* Tue Oct 31 2023 Chris Lumens - 2.1.6-10.1 +- Revert the rest of the attrd shutdown race condition fix +- Related: RHEL-14044 + +* Thu Oct 19 2023 Chris Lumens - 2.1.6-10 +- Avoid an error if the elected attrd is on a node that is shutting down +- Resolves: RHEL-14044 + +* Mon Aug 28 2023 Chris Lumens - 2.1.6-9 +- Fix an additional shutdown race between attrd and the controller +- Related: rhbz2228933 + +* Tue Aug 8 2023 Chris Lumens - 2.1.6-8 +- Fix attrd race condition when shutting down +- Resolves: rhbz2228933 + +* Thu Jul 27 2023 Chris Lumens - 2.1.6-7 +- Wait for a reply from various controller commands +- Resolves: rhbz2221084 +- Related: rhbz2189301 + +* Mon Jul 24 2023 Chris Lumens - 2.1.6-6 +- Apply dampening when creating attributes with attrd_updater -U +- Resolves: rhbz2224051 +- Related: rhbz2189301 + +* Wed Jul 19 2023 Chris Lumens - 2.1.6-5 +- Clone instances should not shuffle unnecessarily +- Fix a bug in clone resource description display +- Resolves: rhbz2222055 +- Related: rhbz2189301 + +* Fri Jun 30 2023 Chris Lumens - 2.1.6-4 +- Fix moving groups when there's a constraint for a single group member +- Resolves: rhbz2218218 +- Resolves: rhbz2189301 + +* Wed Jun 21 2023 Chris Lumens - 2.1.6-3 +- Support start state for Pacemaker Remote nodes +- Related: rhbz2182482 + +* Fri May 26 2023 Chris Lumens - 2.1.6-2 +- Rebase pacemaker on upstream 2.1.6 final release +- Related: rhbz2182482 + +* Tue May 23 2023 Chris Lumens - 2.1.6-1 +- Rebase on upstream 2.1.6-rc2 release +- Resolves: rhbz2182482 * Wed May 17 2023 Klaus Wenninger - 2.1.5-9 - Rebuild with incremented release to allow a safe upgrade from