sanlock/SOURCES/sanlock-do-not-close-connec...

453 lines
13 KiB
Diff

From bb70c220b51720a46a1bdc6b824936fc7269d5d8 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Mon, 3 May 2021 12:52:17 -0500
Subject: [PATCH] sanlock: do not close connection in error handling
When processing a client connection, a problem with the message
or with the client handling would cause the sanlock daemon to
close the client connection (the socket fd) and release any
leases if the connection was "registered". If this happened,
the client may be unaware of it, and may continue running,
using the leases that have been dropped. This could lead to
different clients on different hosts believing that they hold
the same lease concurrently.
A known cause of this is when a sanlock client program makes
libsanlock calls on a registered connection concurrently
from multiple threads without serialization. These calls are:
sanlock_acquire, sanlock_release, sanlock_inquire,
sanlock_convert, sanlock_restrict, sanlock_killpath.
These calls involve:
- sending a header to the sanlock daemon
- sending a body to the sanlock daemon
- receving a reply from the sanlock daemon
If these steps are interleaved from multiple threads, the
sanlock daemon will read incorrect data when processing
a request, or the wrong result could be received by the caller.
A specific example that's been seen involves two concurrent
sanlock_release calls from different threads. The proper
sequence would be:
sanlock_release_1
send header_1
send body_1
recv result_1
sanlock_release_2
send header_2
send body_2
recv result_2
Without locking, data from both requests are interleaved,
causing a sequence to be received in the daemon:
header_1
header_2
body_1
The sanlock daemon expects the correct sequence of data,
so it misinterprets the mixed data and reports errors when
it finds unexpected fields in what it thinks are headers
and body structs.
The sanlock daemon recvs header_1, then recvs header_2,
and thinks header_2 is body_1. When it finds an unknown
resource name in what it thinks is body_1 (really header_2),
it logs an error to sanlock.log:
cmd_release 19,86,41799 no resource ...
Then the sanlock dameon recvs data from body_1, and thinks
it is header_2. When it finds an invalid magic number in
header_2 (really body_1), it logs an error to sanlock.log:
ci 19 recv 32 magic 0 vs 4282010
The error path after seeing a bad magic number in a message
header is to call deadfn(). For a registered connection,
this is client_pid_dead() which closes the socket fd for
the client and drops leases held by the client.
Closing the connection is the wrong way to handle a bad message
because the client is still running, and a closed connection
implies that a registered client has exited.
The fix is to simply ignore the bad data (logging an error).
By ignoring the messages, the sanlock clients will likely be
stuck waiting for replies, or possibly receive errors from
their calls. So, this fix only prevents leases from being
dropped incorrectly. Clients must still serialize access
to sockets.
Other error conditions in processing a client connection also
use this same incorrect error handling, and they are also
changed to simply ignore the issue and log an error.
---
src/main.c | 44 ++++++-----
src/sanlock_resource.h | 3 +
tests/Makefile | 9 ++-
tests/sanlk_mixmsg.c | 208 +++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 242 insertions(+), 22 deletions(-)
create mode 100644 tests/sanlk_mixmsg.c
diff --git a/src/main.c b/src/main.c
index 622dc8e39f2a..026f7dae7d6a 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1235,33 +1235,33 @@ static void process_connection(int ci)
if (!rv)
goto dead;
- log_client(ci, client[ci].fd, "recv %d %d", rv, h.cmd);
+ log_client(ci, client[ci].fd, "recv %d %u", rv, h.cmd);
if (rv < 0) {
- log_error("ci %d fd %d pid %d recv errno %d",
- ci, client[ci].fd, client[ci].pid, errno);
- goto dead;
+ log_error("client connection %d %d %d recv msg header rv %d errno %d",
+ ci, client[ci].fd, client[ci].pid, rv, errno);
+ goto bad;
}
if (rv != sizeof(h)) {
- log_error("ci %d fd %d pid %d recv size %d",
- ci, client[ci].fd, client[ci].pid, rv);
- goto dead;
+ log_error("client connection %d %d %d recv msg header rv %d cmd %u len %u",
+ ci, client[ci].fd, client[ci].pid, rv, h.cmd, h.length);
+ goto bad;
}
if (h.magic != SM_MAGIC) {
- log_error("ci %d recv %d magic %x vs %x",
- ci, rv, h.magic, SM_MAGIC);
- goto dead;
+ log_error("client connection %d %d %d recv msg header rv %d cmd %u len %u magic %x vs %x",
+ ci, client[ci].fd, client[ci].pid, rv, h.cmd, h.length, h.magic, SM_MAGIC);
+ goto bad;
}
if (client[ci].restricted & SANLK_RESTRICT_ALL) {
- log_error("ci %d fd %d pid %d cmd %d restrict all",
- ci, client[ci].fd, client[ci].pid, h.cmd);
- goto dead;
+ log_error("client connection %d %d %d recv msg header rv %d cmd %u len %u restrict all",
+ ci, client[ci].fd, client[ci].pid, rv, h.cmd, h.length);
+ goto bad;
}
if (h.version && (h.cmd != SM_CMD_VERSION) &&
(h.version & 0xFFFF0000) > (SM_PROTO & 0xFFFF0000)) {
- log_error("ci %d recv %d proto %x vs %x",
- ci, rv, h.version , SM_PROTO);
- goto dead;
+ log_error("client connection %d %d %d recv msg header rv %d cmd %u len %u version %x",
+ ci, client[ci].fd, client[ci].pid, rv, h.cmd, h.length, h.version);
+ goto bad;
}
client[ci].cmd_last = h.cmd;
@@ -1306,7 +1306,7 @@ static void process_connection(int ci)
case SM_CMD_DELETE_RESOURCE:
rv = client_suspend(ci);
if (rv < 0)
- goto dead;
+ goto bad;
process_cmd_thread_unregistered(ci, &h);
break;
case SM_CMD_ACQUIRE:
@@ -1318,16 +1318,20 @@ static void process_connection(int ci)
while the thread is working on it */
rv = client_suspend(ci);
if (rv < 0)
- goto dead;
+ goto bad;
process_cmd_thread_registered(ci, &h);
break;
default:
- log_error("process_connection ci %d fd %d cmd %d unknown", ci, client[ci].fd, h.cmd);
- goto dead;
+ log_error("client connection ci %d fd %d pid %d cmd %d unknown",
+ ci, client[ci].fd, client[ci].pid, h.cmd);
+ goto bad;
};
return;
+ bad:
+ return;
+
dead:
log_client(ci, client[ci].fd, "recv dead");
deadfn = client[ci].deadfn;
diff --git a/src/sanlock_resource.h b/src/sanlock_resource.h
index 80178d194b6b..48e448969b3c 100644
--- a/src/sanlock_resource.h
+++ b/src/sanlock_resource.h
@@ -15,6 +15,9 @@
* process creates registered connection and acquires/releases leases on
* that connection for itself
*
+ * A threaded sanlock client must serialize libsanlock calls that are
+ * made using a registered socket connection.
+ *
* sock == -1, pid is used:
* process asks daemon to acquire/release leases for another separately
* registered pid
diff --git a/tests/Makefile b/tests/Makefile
index 1e7f7f487915..80123d3dc633 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -5,6 +5,7 @@ TARGET4 = killpath
TARGET5 = sanlk_path
TARGET6 = sanlk_testr
TARGET7 = sanlk_events
+TARGET8 = sanlk_mixmsg
SOURCE1 = devcount.c
SOURCE2 = sanlk_load.c
@@ -13,6 +14,7 @@ SOURCE4 = killpath.c
SOURCE5 = sanlk_path.c
SOURCE6 = sanlk_testr.c
SOURCE7 = sanlk_events.c
+SOURCE8 = sanlk_mixmsg.c
CFLAGS += -D_GNU_SOURCE -g \
-Wall \
@@ -36,7 +38,7 @@ CFLAGS += -D_GNU_SOURCE -g \
LDFLAGS = -lrt -laio -lblkid -lsanlock
-all: $(TARGET1) $(TARGET2) $(TARGET3) $(TARGET4) $(TARGET5) $(TARGET6) $(TARGET7)
+all: $(TARGET1) $(TARGET2) $(TARGET3) $(TARGET4) $(TARGET5) $(TARGET6) $(TARGET7) $(TARGET8)
$(TARGET1): $(SOURCE1)
$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ -L. -I../src -L../src
@@ -59,6 +61,9 @@ $(TARGET6): $(SOURCE6)
$(TARGET7): $(SOURCE7)
$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ -L. -I../src -L../src
+$(TARGET8): $(SOURCE8)
+ $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ -L. -I../src -L../src
+
clean:
- rm -f *.o *.so *.so.* $(TARGET) $(TARGET2) $(TARGET3) $(TARGET4) $(TARGET5) $(TARGET6) $(TARGET7)
+ rm -f *.o *.so *.so.* $(TARGET) $(TARGET2) $(TARGET3) $(TARGET4) $(TARGET5) $(TARGET6) $(TARGET7) $(TARGET8)
diff --git a/tests/sanlk_mixmsg.c b/tests/sanlk_mixmsg.c
new file mode 100644
index 000000000000..1b9376e981b6
--- /dev/null
+++ b/tests/sanlk_mixmsg.c
@@ -0,0 +1,208 @@
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/un.h>
+#include <sys/mount.h>
+#include <sys/signalfd.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <time.h>
+#include <signal.h>
+#include <sys/socket.h>
+
+#include "sanlock.h"
+#include "sanlock_resource.h"
+#include "sanlock_admin.h"
+#include "sanlock_sock.h"
+
+/* gcc with -lsanlock */
+
+/*
+ * sanlock direct init -s 1271384c-24db-4c9b-bebf-61a1916b6cb1:0:/dev/test/main:0
+ * sanlock add_lockspace -s 1271384c-24db-4c9b-bebf-61a1916b6cb1:1:/dev/test/main:0
+ */
+
+/* copied from client.c */
+static int send_header(int sock, int cmd, uint32_t cmd_flags, int datalen,
+ uint32_t data, uint32_t data2)
+{
+ struct sm_header header;
+ int rv;
+
+ memset(&header, 0, sizeof(header));
+ header.magic = SM_MAGIC;
+ header.version = SM_PROTO;
+ header.cmd = cmd;
+ header.cmd_flags = cmd_flags;
+ header.length = sizeof(header) + datalen;
+ header.data = data;
+ header.data2 = data2;
+
+retry:
+ rv = send(sock, (void *) &header, sizeof(header), 0);
+ if (rv == -1 && errno == EINTR)
+ goto retry;
+
+ if (rv < 0)
+ return -errno;
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ char rd1[sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk)];
+ char rd2[sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk)];
+ struct sanlk_resource *res1;
+ struct sanlk_resource *res2;
+ const char *lsname;
+ const char *resname1;
+ const char *resname2;
+ char *path;
+ int fd, rv;
+
+ if (argc < 2) {
+ printf("%s <path>\n", argv[0]);
+ return -1;
+ }
+
+ path = argv[1];
+
+ lsname = "1271384c-24db-4c9b-bebf-61a1916b6cb1";
+ resname1 = "2e794e7a-5a9c-4617-8cd0-dc03c917d7a1";
+ resname2 = "2e794e7a-5a9c-4617-8cd0-dc03c917d7a2";
+
+ memset(rd1, 0, sizeof(rd1));
+ memset(rd2, 0, sizeof(rd2));
+
+ res1 = (struct sanlk_resource *)&rd1;
+ res2 = (struct sanlk_resource *)&rd2;
+
+ strcpy(res1->lockspace_name, lsname);
+ sprintf(res1->name, "%s", resname1);
+ res1->num_disks = 1;
+ strcpy(res1->disks[0].path, path);
+ res1->disks[0].offset = 1048576;
+
+ strcpy(res2->lockspace_name, lsname);
+ sprintf(res2->name, "%s", resname2);
+ res2->num_disks = 1;
+ strcpy(res2->disks[0].path, path);
+ res2->disks[0].offset = 2 * 1048576;
+
+ /*
+ struct sanlk_lockspace ls = { 0 };
+ sprintf(ls.name, lsname);
+ sprintf(ls.host_id_disk.path, path);
+
+ rv = sanlock_write_lockspace(&ls, 0, 0, 0);
+ if (rv < 0) {
+ printf("write_lockspace error %d\n", rv);
+ return -1;
+ }
+ */
+
+ rv = sanlock_write_resource(res1, 0, 0, 0);
+ if (rv < 0) {
+ printf("write_resource1 error %d\n", rv);
+ return -1;
+ }
+ rv = sanlock_write_resource(res2, 0, 0, 0);
+ if (rv < 0) {
+ printf("write_resource2 error %d\n", rv);
+ return -1;
+ }
+
+ fd = sanlock_register();
+ if (fd < 0) {
+ printf("register error %d\n", fd);
+ return -1;
+ }
+
+ printf("acquiring both leases for registered fd %d\n", fd);
+
+ rv = sanlock_acquire(fd, -1, 0, 1, &res1, NULL);
+ if (rv < 0) {
+ printf("acquire res1 error %d\n", rv);
+ return -1;
+ }
+
+ rv = sanlock_acquire(fd, -1, 0, 1, &res2, NULL);
+ if (rv < 0) {
+ printf("acquire res2 error %d\n", rv);
+ return -1;
+ }
+
+ printf("sleeping... check that both leases are held\n");
+ sleep(20);
+
+ printf("sending res1 release header only\n");
+ rv = send_header(fd, SM_CMD_RELEASE, 0, sizeof(struct sanlk_resource), 1, -1);
+ if (rv < 0)
+ printf("send bad header error %d\n", rv);
+ else
+ printf("send bad header ok\n");
+
+ printf("sending res2 release interleaved\n");
+ rv = sanlock_release(fd, -1, 0, 1, &res2);
+ if (rv < 0)
+ printf("odd release res2 error %d\n", rv);
+ else
+ printf("odd release res2 ok\n");
+
+ printf("sending res1 release body only\n");
+ rv = send(fd, res1, sizeof(struct sanlk_resource), 0);
+ if (rv < 0)
+ printf("send bad body error %d\n", rv);
+ else
+ printf("send bad body ok\n");
+
+ /*
+ * This is not simulating the recv() that each sanlock_release
+ * would do in libsanlock to get a result for each release.
+ * These would likely just cause the client block indefinitely
+ * waiting for a reply that won't come because the bad release
+ * calls were ignored.
+ */
+
+ printf("sleeping... check which leases are held\n");
+ sleep(20);
+
+ printf("releasing both leases normally\n");
+ rv = sanlock_release(fd, -1, 0, 1, &res1);
+ if (rv < 0)
+ printf("release res1 error %d\n", rv);
+ else
+ printf("release res1 ok\n");
+
+ rv = sanlock_release(fd, -1, 0, 1, &res2);
+ if (rv < 0)
+ printf("release res2 error %d\n", rv);
+ else
+ printf("release res2 ok\n");
+
+ printf("sleeping... check that both leases are released\n");
+ sleep(20);
+
+ printf("acquiring lease res1\n");
+ rv = sanlock_acquire(fd, -1, 0, 1, &res1, NULL);
+ if (rv < 0)
+ printf("acquire res1 error %d\n", rv);
+ else
+ printf("acquire res1 ok\n");
+
+ /* exit should close our registered connection and
+ automatically release res1 */
+
+ printf("exiting... check if held lease is released after exit\n");
+
+ return 0;
+}
+
--
2.7.5