389-ds-base/SOURCES/0021-Issue-6509-Race-condition-with-Paged-Result-searches.patch

246 lines
11 KiB
Diff

From dab3b79e8de4ff8825c9f73685bf59be7bc19f97 Mon Sep 17 00:00:00 2001
From: Mark Reynolds <mreynolds@redhat.com>
Date: Thu, 16 Jan 2025 08:42:53 -0500
Subject: [PATCH] Issue 6509 - Race condition with Paged Result searches
Description:
There is a race condition with Paged Result searches when a new operation comes
in while a paged search is finishing. This triggers an invalid time out error
and closes the connection with a T3 code.
The problem is that we do not use the "PagedResult lock" when checking the
connection's paged result data for a timeout event. This causes the paged
result timeout value to change unexpectedly and trigger a false timeout when a
new operation arrives.
Now we check the timeout without hte conn lock, if its expired it could
be a race condition and false positive. Try the lock again and test the
timeout. This also prevents blocking non-paged result searches from
getting held up by the lock when it's not necessary.
This also fixes some memory leaks that occur when an error happens.
Relates: https://github.com/389ds/389-ds-base/issues/6509
Reviewed by: tbordaz & proger (Thanks!!)
---
ldap/servers/slapd/daemon.c | 59 ++++++++++++++++++-------------
ldap/servers/slapd/opshared.c | 32 ++++++++---------
ldap/servers/slapd/pagedresults.c | 9 +++++
ldap/servers/slapd/slap.h | 2 +-
4 files changed, 61 insertions(+), 41 deletions(-)
diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c
index 56e20bd13..1a20796b9 100644
--- a/ldap/servers/slapd/daemon.c
+++ b/ldap/servers/slapd/daemon.c
@@ -1546,7 +1546,29 @@ setup_pr_read_pds(Connection_Table *ct, int listnum)
if (c->c_state == CONN_STATE_FREE) {
connection_table_move_connection_out_of_active_list(ct, c);
} else {
- /* we try to acquire the connection mutex, if it is already
+ /* Check for a timeout for PAGED RESULTS */
+ if (pagedresults_is_timedout_nolock(c)) {
+ /*
+ * There could be a race condition so lets try again with the
+ * right lock
+ */
+ pthread_mutex_t *pr_mutex = pageresult_lock_get_addr(c);
+ if (pthread_mutex_trylock(pr_mutex) == EBUSY) {
+ c = next;
+ continue;
+ }
+ if (pagedresults_is_timedout_nolock(c)) {
+ pthread_mutex_unlock(pr_mutex);
+ disconnect_server(c, c->c_connid, -1,
+ SLAPD_DISCONNECT_PAGED_SEARCH_LIMIT,
+ 0);
+ } else {
+ pthread_mutex_unlock(pr_mutex);
+ }
+ }
+
+ /*
+ * we try to acquire the connection mutex, if it is already
* acquired by another thread, don't wait
*/
if (pthread_mutex_trylock(&(c->c_mutex)) == EBUSY) {
@@ -1554,35 +1576,24 @@ setup_pr_read_pds(Connection_Table *ct, int listnum)
continue;
}
if (c->c_flags & CONN_FLAG_CLOSING) {
- /* A worker thread has marked that this connection
- * should be closed by calling disconnect_server.
- * move this connection out of the active list
- * the last thread to use the connection will close it
+ /*
+ * A worker thread, or paged result timeout, has marked that
+ * this connection should be closed by calling
+ * disconnect_server(). Move this connection out of the active
+ * list then the last thread to use the connection will close
+ * it.
*/
connection_table_move_connection_out_of_active_list(ct, c);
} else if (c->c_sd == SLAPD_INVALID_SOCKET) {
connection_table_move_connection_out_of_active_list(ct, c);
} else if (c->c_prfd != NULL) {
if ((!c->c_gettingber) && (c->c_threadnumber < c->c_max_threads_per_conn)) {
- int add_fd = 1;
- /* check timeout for PAGED RESULTS */
- if (pagedresults_is_timedout_nolock(c)) {
- /* Exceeded the paged search timelimit; disconnect the client */
- disconnect_server_nomutex(c, c->c_connid, -1,
- SLAPD_DISCONNECT_PAGED_SEARCH_LIMIT,
- 0);
- connection_table_move_connection_out_of_active_list(ct,
- c);
- add_fd = 0; /* do not poll on this fd */
- }
- if (add_fd) {
- ct->fd[listnum][count].fd = c->c_prfd;
- ct->fd[listnum][count].in_flags = SLAPD_POLL_FLAGS;
- /* slot i of the connection table is mapped to slot
- * count of the fds array */
- c->c_fdi = count;
- count++;
- }
+ ct->fd[listnum][count].fd = c->c_prfd;
+ ct->fd[listnum][count].in_flags = SLAPD_POLL_FLAGS;
+ /* slot i of the connection table is mapped to slot
+ * count of the fds array */
+ c->c_fdi = count;
+ count++;
} else {
if (c->c_threadnumber >= c->c_max_threads_per_conn) {
c->c_maxthreadsblocked++;
diff --git a/ldap/servers/slapd/opshared.c b/ldap/servers/slapd/opshared.c
index 540597f45..7dc2d5983 100644
--- a/ldap/servers/slapd/opshared.c
+++ b/ldap/servers/slapd/opshared.c
@@ -251,7 +251,7 @@ op_shared_search(Slapi_PBlock *pb, int send_result)
char *errtext = NULL;
int nentries, pnentries;
int flag_search_base_found = 0;
- int flag_no_such_object = 0;
+ bool flag_no_such_object = false;
int flag_referral = 0;
int flag_psearch = 0;
int err_code = LDAP_SUCCESS;
@@ -852,7 +852,7 @@ op_shared_search(Slapi_PBlock *pb, int send_result)
/* may be the object exist somewhere else
* wait the end of the loop to send back this error
*/
- flag_no_such_object = 1;
+ flag_no_such_object = true;
} else {
/* err something other than LDAP_NO_SUCH_OBJECT, so the backend will
* have sent the result -
@@ -862,35 +862,35 @@ op_shared_search(Slapi_PBlock *pb, int send_result)
/* fall through */
case -1: /* an error occurred */
+ slapi_pblock_get(pb, SLAPI_RESULT_CODE, &err);
/* PAGED RESULTS */
if (op_is_pagedresults(operation)) {
/* cleanup the slot */
pthread_mutex_lock(pagedresults_mutex);
+ if (err != LDAP_NO_SUCH_OBJECT && !flag_no_such_object) {
+ /* Free the results if not "no_such_object" */
+ void *sr = NULL;
+ slapi_pblock_get(pb, SLAPI_SEARCH_RESULT_SET, &sr);
+ be->be_search_results_release(&sr);
+ }
pagedresults_set_search_result(pb_conn, operation, NULL, 1, pr_idx);
rc = pagedresults_set_current_be(pb_conn, NULL, pr_idx, 1);
pthread_mutex_unlock(pagedresults_mutex);
}
- if (1 == flag_no_such_object) {
- break;
- }
- slapi_pblock_get(pb, SLAPI_RESULT_CODE, &err);
- if (err == LDAP_NO_SUCH_OBJECT) {
- /* may be the object exist somewhere else
- * wait the end of the loop to send back this error
- */
- flag_no_such_object = 1;
+
+ if (err == LDAP_NO_SUCH_OBJECT || flag_no_such_object) {
+ /* Maybe the object exists somewhere else, wait to the end
+ * of the loop to send back this error */
+ flag_no_such_object = true;
break;
} else {
- /* for error other than LDAP_NO_SUCH_OBJECT
- * the error has already been sent
- * stop the search here
- */
+ /* For error other than LDAP_NO_SUCH_OBJECT the error has
+ * already been sent stop the search here */
cache_return_target_entry(pb, be, operation);
goto free_and_return;
}
/* when rc == SLAPI_FAIL_DISKFULL this case is executed */
-
case SLAPI_FAIL_DISKFULL:
operation_out_of_disk_space();
cache_return_target_entry(pb, be, operation);
diff --git a/ldap/servers/slapd/pagedresults.c b/ldap/servers/slapd/pagedresults.c
index 9959c927e..642aefb3d 100644
--- a/ldap/servers/slapd/pagedresults.c
+++ b/ldap/servers/slapd/pagedresults.c
@@ -121,12 +121,15 @@ pagedresults_parse_control_value(Slapi_PBlock *pb,
if (ber_scanf(ber, "{io}", pagesize, &cookie) == LBER_ERROR) {
slapi_log_err(SLAPI_LOG_ERR, "pagedresults_parse_control_value",
"<= corrupted control value\n");
+ ber_free(ber, 1);
return LDAP_PROTOCOL_ERROR;
}
if (!maxreqs) {
slapi_log_err(SLAPI_LOG_ERR, "pagedresults_parse_control_value",
"Simple paged results requests per conn exceeded the limit: %d\n",
maxreqs);
+ ber_free(ber, 1);
+ slapi_ch_free_string(&cookie.bv_val);
return LDAP_UNWILLING_TO_PERFORM;
}
@@ -376,6 +379,10 @@ pagedresults_free_one_msgid(Connection *conn, ber_int_t msgid, pthread_mutex_t *
}
prp->pr_flags |= CONN_FLAG_PAGEDRESULTS_ABANDONED;
prp->pr_flags &= ~CONN_FLAG_PAGEDRESULTS_PROCESSING;
+ if (conn->c_pagedresults.prl_count > 0) {
+ _pr_cleanup_one_slot(prp);
+ conn->c_pagedresults.prl_count--;
+ }
rc = 0;
break;
}
@@ -936,7 +943,9 @@ pagedresults_is_timedout_nolock(Connection *conn)
return 1;
}
}
+
slapi_log_err(SLAPI_LOG_TRACE, "<-- pagedresults_is_timedout", "<= false 2\n");
+
return 0;
}
diff --git a/ldap/servers/slapd/slap.h b/ldap/servers/slapd/slap.h
index 15b69e69e..ca2f1f12c 100644
--- a/ldap/servers/slapd/slap.h
+++ b/ldap/servers/slapd/slap.h
@@ -77,7 +77,7 @@ static char ptokPBE[34] = "Internal (Software) Token ";
#include <sys/statvfs.h>
#include <sys/socket.h>
#include <netinet/in.h>
-
+#include <stdbool.h>
#include <time.h> /* For timespec definitions */
/* Provides our int types and platform specific requirements. */
--
2.48.1