321 lines
12 KiB
Diff
321 lines
12 KiB
Diff
|
diff -Naurp pcp-5.3.7.orig/qa/src/test_pcp_sockets.python pcp-5.3.7/qa/src/test_pcp_sockets.python
|
||
|
--- pcp-5.3.7.orig/qa/src/test_pcp_sockets.python 1970-01-01 10:00:00.000000000 +1000
|
||
|
+++ pcp-5.3.7/qa/src/test_pcp_sockets.python 2024-09-09 13:47:06.848083320 +1000
|
||
|
@@ -0,0 +1,23 @@
|
||
|
+import socket
|
||
|
+import cpmapi as api
|
||
|
+from pcp import pmapi
|
||
|
+
|
||
|
+address = 'localhost'
|
||
|
+port = 44321
|
||
|
+
|
||
|
+c = []
|
||
|
+for i in range(0, 1234):
|
||
|
+ print('context', i)
|
||
|
+ ctx = pmapi.pmContext(api.PM_CONTEXT_HOST, "local:")
|
||
|
+ print('created', i)
|
||
|
+ c.append(ctx)
|
||
|
+
|
||
|
+s = []
|
||
|
+for i in range(0, 1234):
|
||
|
+ sock = socket.socket()
|
||
|
+ print('socket', i)
|
||
|
+ sock.connect((address, port))
|
||
|
+ print('connect', i)
|
||
|
+ sock.send(b"abba\r") # -- gives a too-large PDU
|
||
|
+ print('send', i)
|
||
|
+ # s.append(sock) # -- exercise pduread: timeout
|
||
|
diff -Naurp pcp-5.3.7.orig/src/libpcp/src/pdu.c pcp-5.3.7/src/libpcp/src/pdu.c
|
||
|
--- pcp-5.3.7.orig/src/libpcp/src/pdu.c 2022-04-05 09:05:43.000000000 +1000
|
||
|
+++ pcp-5.3.7/src/libpcp/src/pdu.c 2024-09-09 13:47:06.869083364 +1000
|
||
|
@@ -186,10 +186,7 @@ pduread(int fd, char *buf, int len, int
|
||
|
* Need all parts of the PDU to be received by dead_hand
|
||
|
* This enforces a low overall timeout for the whole PDU
|
||
|
* (as opposed to just a timeout for individual calls to
|
||
|
- * recv). A more invasive alternative (better) approach
|
||
|
- * would see all I/O performed in the main event loop,
|
||
|
- * and I/O routines transformed to continuation-passing
|
||
|
- * style.
|
||
|
+ * recv).
|
||
|
*/
|
||
|
gettimeofday(&dead_hand, NULL);
|
||
|
dead_hand.tv_sec += wait.tv_sec;
|
||
|
@@ -499,9 +496,10 @@ PM_FAULT_RETURN(PM_ERR_TIMEOUT);
|
||
|
if (len == -1) {
|
||
|
if (! __pmSocketClosed()) {
|
||
|
char errmsg[PM_MAXERRMSGLEN];
|
||
|
- pmNotifyErr(LOG_ERR, "%s: fd=%d hdr read: len=%d: %s",
|
||
|
- "__pmGetPDU", fd, len,
|
||
|
- pmErrStr_r(-oserror(), errmsg, sizeof(errmsg)));
|
||
|
+ if (pmDebugOptions.pdu)
|
||
|
+ pmNotifyErr(LOG_ERR, "%s: fd=%d hdr read: len=%d: %s",
|
||
|
+ "__pmGetPDU", fd, len,
|
||
|
+ pmErrStr_r(-oserror(), errmsg, sizeof(errmsg)));
|
||
|
}
|
||
|
}
|
||
|
else if (len >= (int)sizeof(php->len)) {
|
||
|
@@ -520,15 +518,17 @@ PM_FAULT_RETURN(PM_ERR_TIMEOUT);
|
||
|
}
|
||
|
else if (len < 0) {
|
||
|
char errmsg[PM_MAXERRMSGLEN];
|
||
|
- pmNotifyErr(LOG_ERR, "%s: fd=%d hdr read: len=%d: %s",
|
||
|
- "__pmGetPDU", fd, len,
|
||
|
- pmErrStr_r(len, errmsg, sizeof(errmsg)));
|
||
|
+ if (pmDebugOptions.pdu)
|
||
|
+ pmNotifyErr(LOG_ERR, "%s: fd=%d hdr read: len=%d: %s",
|
||
|
+ "__pmGetPDU", fd, len,
|
||
|
+ pmErrStr_r(len, errmsg, sizeof(errmsg)));
|
||
|
__pmUnpinPDUBuf(pdubuf);
|
||
|
return PM_ERR_IPC;
|
||
|
}
|
||
|
else if (len > 0) {
|
||
|
- pmNotifyErr(LOG_ERR, "%s: fd=%d hdr read: bad len=%d",
|
||
|
- "__pmGetPDU", fd, len);
|
||
|
+ if (pmDebugOptions.pdu)
|
||
|
+ pmNotifyErr(LOG_ERR, "%s: fd=%d hdr read: bad len=%d",
|
||
|
+ "__pmGetPDU", fd, len);
|
||
|
__pmUnpinPDUBuf(pdubuf);
|
||
|
return PM_ERR_IPC;
|
||
|
}
|
||
|
@@ -547,8 +547,9 @@ check_read_len:
|
||
|
* PDU length indicates insufficient bytes for a PDU header
|
||
|
* ... looks like DOS attack like PV 935490
|
||
|
*/
|
||
|
- pmNotifyErr(LOG_ERR, "%s: fd=%d illegal PDU len=%d in hdr",
|
||
|
- "__pmGetPDU", fd, php->len);
|
||
|
+ if (pmDebugOptions.pdu)
|
||
|
+ pmNotifyErr(LOG_ERR, "%s: fd=%d illegal PDU len=%d in hdr",
|
||
|
+ "__pmGetPDU", fd, php->len);
|
||
|
__pmUnpinPDUBuf(pdubuf);
|
||
|
return PM_ERR_IPC;
|
||
|
}
|
||
|
@@ -559,16 +560,18 @@ check_read_len:
|
||
|
* (note, pmcd and pmdas have to be able to _send_ large PDUs,
|
||
|
* e.g. for a pmResult or instance domain enquiry)
|
||
|
*/
|
||
|
- if (len < (int)(sizeof(php->len) + sizeof(php->type)))
|
||
|
- /* PDU too short to provide a valid type */
|
||
|
- pmNotifyErr(LOG_ERR, "%s: fd=%d bad PDU len=%d in hdr "
|
||
|
- "exceeds maximum client PDU size (%d)",
|
||
|
- "__pmGetPDU", fd, php->len, ceiling);
|
||
|
- else
|
||
|
- pmNotifyErr(LOG_ERR, "%s: fd=%d type=0x%x bad PDU len=%d in hdr "
|
||
|
- "exceeds maximum client PDU size (%d)",
|
||
|
- "__pmGetPDU", fd, (unsigned)ntohl(php->type),
|
||
|
- php->len, ceiling);
|
||
|
+ if (pmDebugOptions.pdu) {
|
||
|
+ if (len < (int)(sizeof(php->len) + sizeof(php->type)))
|
||
|
+ /* PDU too short to provide a valid type */
|
||
|
+ pmNotifyErr(LOG_ERR, "%s: fd=%d bad PDU len=%d in hdr"
|
||
|
+ " exceeds maximum client PDU size (%d)",
|
||
|
+ "__pmGetPDU", fd, php->len, ceiling);
|
||
|
+ else
|
||
|
+ pmNotifyErr(LOG_ERR, "%s: fd=%d type=0x%x bad PDU len=%d in hdr"
|
||
|
+ " exceeds maximum client PDU size (%d)",
|
||
|
+ "__pmGetPDU", fd, (unsigned)ntohl(php->type),
|
||
|
+ php->len, ceiling);
|
||
|
+ }
|
||
|
__pmUnpinPDUBuf(pdubuf);
|
||
|
return PM_ERR_TOOBIG;
|
||
|
}
|
||
|
@@ -608,6 +611,10 @@ check_read_len:
|
||
|
__pmUnpinPDUBuf(pdubuf);
|
||
|
return PM_ERR_TIMEOUT;
|
||
|
}
|
||
|
+ else if (!pmDebugOptions.pdu) {
|
||
|
+ __pmUnpinPDUBuf(pdubuf);
|
||
|
+ return PM_ERR_IPC;
|
||
|
+ }
|
||
|
else if (len < 0) {
|
||
|
char errmsg[PM_MAXERRMSGLEN];
|
||
|
pmNotifyErr(LOG_ERR, "%s: fd=%d data read: len=%d: %s",
|
||
|
@@ -641,7 +648,8 @@ check_read_len:
|
||
|
* PDU type is bad ... could be a possible mem leak attack like
|
||
|
* https://bugzilla.redhat.com/show_bug.cgi?id=841319
|
||
|
*/
|
||
|
- pmNotifyErr(LOG_ERR, "%s: fd=%d illegal PDU type=%d in hdr",
|
||
|
+ if (pmDebugOptions.pdu)
|
||
|
+ pmNotifyErr(LOG_ERR, "%s: fd=%d illegal PDU type=%d in hdr",
|
||
|
"__pmGetPDU", fd, php->type);
|
||
|
__pmUnpinPDUBuf(pdubuf);
|
||
|
return PM_ERR_IPC;
|
||
|
diff -Naurp pcp-5.3.7.orig/src/libpcp_web/src/load.h pcp-5.3.7/src/libpcp_web/src/load.h
|
||
|
--- pcp-5.3.7.orig/src/libpcp_web/src/load.h 2021-02-17 15:27:41.000000000 +1100
|
||
|
+++ pcp-5.3.7/src/libpcp_web/src/load.h 2024-09-09 13:45:56.531933622 +1000
|
||
|
@@ -42,8 +42,9 @@ typedef struct context {
|
||
|
unsigned int setup : 1; /* context established */
|
||
|
unsigned int cached : 1; /* context/source in cache */
|
||
|
unsigned int garbage : 1; /* context pending removal */
|
||
|
+ unsigned int inactive: 1; /* context removal deferred */
|
||
|
unsigned int updated : 1; /* context labels are updated */
|
||
|
- unsigned int padding : 4; /* zero-filled struct padding */
|
||
|
+ unsigned int padding : 3; /* zero-filled struct padding */
|
||
|
unsigned int refcount : 16; /* currently-referenced counter */
|
||
|
unsigned int timeout; /* context timeout in milliseconds */
|
||
|
uv_timer_t timer;
|
||
|
diff -Naurp pcp-5.3.7.orig/src/libpcp_web/src/webgroup.c pcp-5.3.7/src/libpcp_web/src/webgroup.c
|
||
|
--- pcp-5.3.7.orig/src/libpcp_web/src/webgroup.c 2024-09-09 13:44:34.166748200 +1000
|
||
|
+++ pcp-5.3.7/src/libpcp_web/src/webgroup.c 2024-09-09 13:45:56.531933622 +1000
|
||
|
@@ -134,9 +134,18 @@ webgroup_timeout_context(uv_timer_t *arg
|
||
|
* is returned to zero by the caller, or background cleanup
|
||
|
* finds this context and cleans it.
|
||
|
*/
|
||
|
- if (cp->refcount == 0 && cp->garbage == 0) {
|
||
|
- cp->garbage = 1;
|
||
|
- uv_timer_stop(&cp->timer);
|
||
|
+ if (cp->refcount == 0) {
|
||
|
+ if (cp->garbage == 0) {
|
||
|
+ cp->garbage = 1;
|
||
|
+ uv_timer_stop(&cp->timer);
|
||
|
+ }
|
||
|
+ } else {
|
||
|
+ /*
|
||
|
+ * Context timed out but still referenced, must wait
|
||
|
+ * until the caller releases its reference (shortly)
|
||
|
+ * before beginning garbage collection process.
|
||
|
+ */
|
||
|
+ cp->inactive = 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
@@ -298,20 +307,28 @@ webgroup_garbage_collect(struct webgroup
|
||
|
dictIterator *iterator;
|
||
|
dictEntry *entry;
|
||
|
context_t *cp;
|
||
|
- unsigned int count = 0, drops = 0;
|
||
|
+ unsigned int count = 0, drops = 0, garbageset = 0, inactiveset = 0;
|
||
|
|
||
|
if (pmDebugOptions.http || pmDebugOptions.libweb)
|
||
|
- fprintf(stderr, "%s: started\n", "webgroup_garbage_collect");
|
||
|
+ fprintf(stderr, "%s: started for groups %p\n",
|
||
|
+ "webgroup_garbage_collect", groups);
|
||
|
|
||
|
/* do context GC if we get the lock (else don't block here) */
|
||
|
if (uv_mutex_trylock(&groups->mutex) == 0) {
|
||
|
iterator = dictGetSafeIterator(groups->contexts);
|
||
|
for (entry = dictNext(iterator); entry;) {
|
||
|
cp = (context_t *)dictGetVal(entry);
|
||
|
+ if (cp->privdata != groups)
|
||
|
+ continue;
|
||
|
entry = dictNext(iterator);
|
||
|
- if (cp->garbage && cp->privdata == groups) {
|
||
|
+ if (cp->garbage)
|
||
|
+ garbageset++;
|
||
|
+ if (cp->inactive && cp->refcount == 0)
|
||
|
+ inactiveset++;
|
||
|
+ if (cp->garbage || (cp->inactive && cp->refcount == 0)) {
|
||
|
if (pmDebugOptions.http || pmDebugOptions.libweb)
|
||
|
- fprintf(stderr, "GC context %u (%p)\n", cp->randomid, cp);
|
||
|
+ fprintf(stderr, "GC dropping context %u (%p)\n",
|
||
|
+ cp->randomid, cp);
|
||
|
uv_mutex_unlock(&groups->mutex);
|
||
|
webgroup_drop_context(cp, groups);
|
||
|
uv_mutex_lock(&groups->mutex);
|
||
|
@@ -324,7 +341,8 @@ webgroup_garbage_collect(struct webgroup
|
||
|
/* if dropping the last remaining context, do cleanup */
|
||
|
if (groups->active && drops == count) {
|
||
|
if (pmDebugOptions.http || pmDebugOptions.libweb)
|
||
|
- fprintf(stderr, "%s: freezing\n", "webgroup_garbage_collect");
|
||
|
+ fprintf(stderr, "%s: freezing groups %p\n",
|
||
|
+ "webgroup_garbage_collect", groups);
|
||
|
webgroup_timers_stop(groups);
|
||
|
}
|
||
|
uv_mutex_unlock(&groups->mutex);
|
||
|
@@ -334,8 +352,10 @@ webgroup_garbage_collect(struct webgroup
|
||
|
mmv_set(groups->map, groups->metrics[WEBGROUP_GC_COUNT], &count);
|
||
|
|
||
|
if (pmDebugOptions.http || pmDebugOptions.libweb)
|
||
|
- fprintf(stderr, "%s: finished [%u drops from %u entries]\n",
|
||
|
- "webgroup_garbage_collect", drops, count);
|
||
|
+ fprintf(stderr, "%s: finished [%u drops from %u entries,"
|
||
|
+ " %u garbageset, %u inactiveset]\n",
|
||
|
+ "webgroup_garbage_collect", drops, count,
|
||
|
+ garbageset, inactiveset);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
@@ -354,7 +374,7 @@ webgroup_use_context(struct context *cp,
|
||
|
int sts;
|
||
|
struct webgroups *gp = (struct webgroups *)cp->privdata;
|
||
|
|
||
|
- if (cp->garbage == 0) {
|
||
|
+ if (cp->garbage == 0 && cp->inactive == 0) {
|
||
|
if (cp->setup == 0) {
|
||
|
if ((sts = pmReconnectContext(cp->context)) < 0) {
|
||
|
infofmt(*message, "cannot reconnect context: %s",
|
||
|
@@ -424,7 +444,7 @@ webgroup_lookup_context(pmWebGroupSettin
|
||
|
*status = -ENOTCONN;
|
||
|
return NULL;
|
||
|
}
|
||
|
- if (cp->garbage == 0) {
|
||
|
+ if (cp->garbage == 0 && cp->inactive == 0) {
|
||
|
access.username = cp->username;
|
||
|
access.password = cp->password;
|
||
|
access.realm = cp->realm;
|
||
|
diff -Naurp pcp-5.3.7.orig/src/pmcd/src/client.c pcp-5.3.7/src/pmcd/src/client.c
|
||
|
--- pcp-5.3.7.orig/src/pmcd/src/client.c 2018-01-15 15:49:13.000000000 +1100
|
||
|
+++ pcp-5.3.7/src/pmcd/src/client.c 2024-09-09 13:47:06.870083366 +1000
|
||
|
@@ -74,7 +74,8 @@ NotifyEndContext(int ctx)
|
||
|
ClientInfo *
|
||
|
AcceptNewClient(int reqfd)
|
||
|
{
|
||
|
- static unsigned int seq = 0;
|
||
|
+ static unsigned int seq, saved, count;
|
||
|
+ static struct timeval then;
|
||
|
int i, fd;
|
||
|
__pmSockLen addrlen;
|
||
|
struct timeval now;
|
||
|
@@ -83,21 +84,30 @@ AcceptNewClient(int reqfd)
|
||
|
addrlen = __pmSockAddrSize();
|
||
|
fd = __pmAccept(reqfd, client[i].addr, &addrlen);
|
||
|
if (fd == -1) {
|
||
|
- if (neterror() == EPERM) {
|
||
|
- pmNotifyErr(LOG_NOTICE, "AcceptNewClient(%d): "
|
||
|
- "Permission Denied\n", reqfd);
|
||
|
- }
|
||
|
- else if (neterror() == ECONNABORTED) {
|
||
|
+ if (neterror() == ECONNABORTED) {
|
||
|
/* quietly ignore this one ... */
|
||
|
;
|
||
|
}
|
||
|
else {
|
||
|
- /*
|
||
|
- * unexpected ... ignore the client (we used to kill off pmcd
|
||
|
- * but that seems way too extreme)
|
||
|
+ /* Permission denied or an unexpected error (e.g. EMFILE)
|
||
|
+ * - rate limit the logging and make this client go away.
|
||
|
*/
|
||
|
- pmNotifyErr(LOG_ERR, "AcceptNewClient(%d): Unexpected error from __pmAccept: %d: %s\n",
|
||
|
- reqfd, neterror(), netstrerror());
|
||
|
+ pmtimevalNow(&now);
|
||
|
+ if (neterror() != saved || now.tv_sec > then.tv_sec + 60) {
|
||
|
+ if (neterror() == EPERM)
|
||
|
+ pmNotifyErr(LOG_NOTICE, "AcceptNewClient(%d): "
|
||
|
+ "Permission Denied (%d suppressed)\n",
|
||
|
+ reqfd, count);
|
||
|
+ else
|
||
|
+ pmNotifyErr(LOG_ERR, "AcceptNewClient(%d): "
|
||
|
+ "Accept error (%d suppressed): %d: %s\n",
|
||
|
+ reqfd, count, neterror(), netstrerror());
|
||
|
+ saved = neterror();
|
||
|
+ count = 0;
|
||
|
+ } else {
|
||
|
+ count++;
|
||
|
+ }
|
||
|
+ then = now;
|
||
|
}
|
||
|
client[i].fd = -1;
|
||
|
DeleteClient(&client[i]);
|
||
|
diff -Naurp pcp-5.3.7.orig/src/pmcd/src/pmcd.c pcp-5.3.7/src/pmcd/src/pmcd.c
|
||
|
--- pcp-5.3.7.orig/src/pmcd/src/pmcd.c 2021-10-13 10:48:23.000000000 +1100
|
||
|
+++ pcp-5.3.7/src/pmcd/src/pmcd.c 2024-09-09 13:47:06.871083368 +1000
|
||
|
@@ -685,7 +685,7 @@ HandleReadyAgents(__pmFdSet *readyFds)
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
-CheckNewClient(__pmFdSet * fdset, int rfd, int family)
|
||
|
+CheckNewClient(__pmFdSet *fdset, int rfd, int family)
|
||
|
{
|
||
|
int s, sts, accepted = 1;
|
||
|
__uint32_t challenge;
|