453 lines
13 KiB
Diff
453 lines
13 KiB
Diff
From bb70c220b51720a46a1bdc6b824936fc7269d5d8 Mon Sep 17 00:00:00 2001
|
|
From: David Teigland <teigland@redhat.com>
|
|
Date: Mon, 3 May 2021 12:52:17 -0500
|
|
Subject: [PATCH] sanlock: do not close connection in error handling
|
|
|
|
When processing a client connection, a problem with the message
|
|
or with the client handling would cause the sanlock daemon to
|
|
close the client connection (the socket fd) and release any
|
|
leases if the connection was "registered". If this happened,
|
|
the client may be unaware of it, and may continue running,
|
|
using the leases that have been dropped. This could lead to
|
|
different clients on different hosts believing that they hold
|
|
the same lease concurrently.
|
|
|
|
A known cause of this is when a sanlock client program makes
|
|
libsanlock calls on a registered connection concurrently
|
|
from multiple threads without serialization. These calls are:
|
|
sanlock_acquire, sanlock_release, sanlock_inquire,
|
|
sanlock_convert, sanlock_restrict, sanlock_killpath.
|
|
|
|
These calls involve:
|
|
- sending a header to the sanlock daemon
|
|
- sending a body to the sanlock daemon
|
|
- receving a reply from the sanlock daemon
|
|
|
|
If these steps are interleaved from multiple threads, the
|
|
sanlock daemon will read incorrect data when processing
|
|
a request, or the wrong result could be received by the caller.
|
|
|
|
A specific example that's been seen involves two concurrent
|
|
sanlock_release calls from different threads. The proper
|
|
sequence would be:
|
|
|
|
sanlock_release_1
|
|
send header_1
|
|
send body_1
|
|
recv result_1
|
|
sanlock_release_2
|
|
send header_2
|
|
send body_2
|
|
recv result_2
|
|
|
|
Without locking, data from both requests are interleaved,
|
|
causing a sequence to be received in the daemon:
|
|
|
|
header_1
|
|
header_2
|
|
body_1
|
|
|
|
The sanlock daemon expects the correct sequence of data,
|
|
so it misinterprets the mixed data and reports errors when
|
|
it finds unexpected fields in what it thinks are headers
|
|
and body structs.
|
|
|
|
The sanlock daemon recvs header_1, then recvs header_2,
|
|
and thinks header_2 is body_1. When it finds an unknown
|
|
resource name in what it thinks is body_1 (really header_2),
|
|
it logs an error to sanlock.log:
|
|
|
|
cmd_release 19,86,41799 no resource ...
|
|
|
|
Then the sanlock dameon recvs data from body_1, and thinks
|
|
it is header_2. When it finds an invalid magic number in
|
|
header_2 (really body_1), it logs an error to sanlock.log:
|
|
|
|
ci 19 recv 32 magic 0 vs 4282010
|
|
|
|
The error path after seeing a bad magic number in a message
|
|
header is to call deadfn(). For a registered connection,
|
|
this is client_pid_dead() which closes the socket fd for
|
|
the client and drops leases held by the client.
|
|
|
|
Closing the connection is the wrong way to handle a bad message
|
|
because the client is still running, and a closed connection
|
|
implies that a registered client has exited.
|
|
|
|
The fix is to simply ignore the bad data (logging an error).
|
|
By ignoring the messages, the sanlock clients will likely be
|
|
stuck waiting for replies, or possibly receive errors from
|
|
their calls. So, this fix only prevents leases from being
|
|
dropped incorrectly. Clients must still serialize access
|
|
to sockets.
|
|
|
|
Other error conditions in processing a client connection also
|
|
use this same incorrect error handling, and they are also
|
|
changed to simply ignore the issue and log an error.
|
|
---
|
|
src/main.c | 44 ++++++-----
|
|
src/sanlock_resource.h | 3 +
|
|
tests/Makefile | 9 ++-
|
|
tests/sanlk_mixmsg.c | 208 +++++++++++++++++++++++++++++++++++++++++++++++++
|
|
4 files changed, 242 insertions(+), 22 deletions(-)
|
|
create mode 100644 tests/sanlk_mixmsg.c
|
|
|
|
diff --git a/src/main.c b/src/main.c
|
|
index 622dc8e39f2a..026f7dae7d6a 100644
|
|
--- a/src/main.c
|
|
+++ b/src/main.c
|
|
@@ -1235,33 +1235,33 @@ static void process_connection(int ci)
|
|
if (!rv)
|
|
goto dead;
|
|
|
|
- log_client(ci, client[ci].fd, "recv %d %d", rv, h.cmd);
|
|
+ log_client(ci, client[ci].fd, "recv %d %u", rv, h.cmd);
|
|
|
|
if (rv < 0) {
|
|
- log_error("ci %d fd %d pid %d recv errno %d",
|
|
- ci, client[ci].fd, client[ci].pid, errno);
|
|
- goto dead;
|
|
+ log_error("client connection %d %d %d recv msg header rv %d errno %d",
|
|
+ ci, client[ci].fd, client[ci].pid, rv, errno);
|
|
+ goto bad;
|
|
}
|
|
if (rv != sizeof(h)) {
|
|
- log_error("ci %d fd %d pid %d recv size %d",
|
|
- ci, client[ci].fd, client[ci].pid, rv);
|
|
- goto dead;
|
|
+ log_error("client connection %d %d %d recv msg header rv %d cmd %u len %u",
|
|
+ ci, client[ci].fd, client[ci].pid, rv, h.cmd, h.length);
|
|
+ goto bad;
|
|
}
|
|
if (h.magic != SM_MAGIC) {
|
|
- log_error("ci %d recv %d magic %x vs %x",
|
|
- ci, rv, h.magic, SM_MAGIC);
|
|
- goto dead;
|
|
+ log_error("client connection %d %d %d recv msg header rv %d cmd %u len %u magic %x vs %x",
|
|
+ ci, client[ci].fd, client[ci].pid, rv, h.cmd, h.length, h.magic, SM_MAGIC);
|
|
+ goto bad;
|
|
}
|
|
if (client[ci].restricted & SANLK_RESTRICT_ALL) {
|
|
- log_error("ci %d fd %d pid %d cmd %d restrict all",
|
|
- ci, client[ci].fd, client[ci].pid, h.cmd);
|
|
- goto dead;
|
|
+ log_error("client connection %d %d %d recv msg header rv %d cmd %u len %u restrict all",
|
|
+ ci, client[ci].fd, client[ci].pid, rv, h.cmd, h.length);
|
|
+ goto bad;
|
|
}
|
|
if (h.version && (h.cmd != SM_CMD_VERSION) &&
|
|
(h.version & 0xFFFF0000) > (SM_PROTO & 0xFFFF0000)) {
|
|
- log_error("ci %d recv %d proto %x vs %x",
|
|
- ci, rv, h.version , SM_PROTO);
|
|
- goto dead;
|
|
+ log_error("client connection %d %d %d recv msg header rv %d cmd %u len %u version %x",
|
|
+ ci, client[ci].fd, client[ci].pid, rv, h.cmd, h.length, h.version);
|
|
+ goto bad;
|
|
}
|
|
|
|
client[ci].cmd_last = h.cmd;
|
|
@@ -1306,7 +1306,7 @@ static void process_connection(int ci)
|
|
case SM_CMD_DELETE_RESOURCE:
|
|
rv = client_suspend(ci);
|
|
if (rv < 0)
|
|
- goto dead;
|
|
+ goto bad;
|
|
process_cmd_thread_unregistered(ci, &h);
|
|
break;
|
|
case SM_CMD_ACQUIRE:
|
|
@@ -1318,16 +1318,20 @@ static void process_connection(int ci)
|
|
while the thread is working on it */
|
|
rv = client_suspend(ci);
|
|
if (rv < 0)
|
|
- goto dead;
|
|
+ goto bad;
|
|
process_cmd_thread_registered(ci, &h);
|
|
break;
|
|
default:
|
|
- log_error("process_connection ci %d fd %d cmd %d unknown", ci, client[ci].fd, h.cmd);
|
|
- goto dead;
|
|
+ log_error("client connection ci %d fd %d pid %d cmd %d unknown",
|
|
+ ci, client[ci].fd, client[ci].pid, h.cmd);
|
|
+ goto bad;
|
|
};
|
|
|
|
return;
|
|
|
|
+ bad:
|
|
+ return;
|
|
+
|
|
dead:
|
|
log_client(ci, client[ci].fd, "recv dead");
|
|
deadfn = client[ci].deadfn;
|
|
diff --git a/src/sanlock_resource.h b/src/sanlock_resource.h
|
|
index 80178d194b6b..48e448969b3c 100644
|
|
--- a/src/sanlock_resource.h
|
|
+++ b/src/sanlock_resource.h
|
|
@@ -15,6 +15,9 @@
|
|
* process creates registered connection and acquires/releases leases on
|
|
* that connection for itself
|
|
*
|
|
+ * A threaded sanlock client must serialize libsanlock calls that are
|
|
+ * made using a registered socket connection.
|
|
+ *
|
|
* sock == -1, pid is used:
|
|
* process asks daemon to acquire/release leases for another separately
|
|
* registered pid
|
|
diff --git a/tests/Makefile b/tests/Makefile
|
|
index 1e7f7f487915..80123d3dc633 100644
|
|
--- a/tests/Makefile
|
|
+++ b/tests/Makefile
|
|
@@ -5,6 +5,7 @@ TARGET4 = killpath
|
|
TARGET5 = sanlk_path
|
|
TARGET6 = sanlk_testr
|
|
TARGET7 = sanlk_events
|
|
+TARGET8 = sanlk_mixmsg
|
|
|
|
SOURCE1 = devcount.c
|
|
SOURCE2 = sanlk_load.c
|
|
@@ -13,6 +14,7 @@ SOURCE4 = killpath.c
|
|
SOURCE5 = sanlk_path.c
|
|
SOURCE6 = sanlk_testr.c
|
|
SOURCE7 = sanlk_events.c
|
|
+SOURCE8 = sanlk_mixmsg.c
|
|
|
|
CFLAGS += -D_GNU_SOURCE -g \
|
|
-Wall \
|
|
@@ -36,7 +38,7 @@ CFLAGS += -D_GNU_SOURCE -g \
|
|
|
|
LDFLAGS = -lrt -laio -lblkid -lsanlock
|
|
|
|
-all: $(TARGET1) $(TARGET2) $(TARGET3) $(TARGET4) $(TARGET5) $(TARGET6) $(TARGET7)
|
|
+all: $(TARGET1) $(TARGET2) $(TARGET3) $(TARGET4) $(TARGET5) $(TARGET6) $(TARGET7) $(TARGET8)
|
|
|
|
$(TARGET1): $(SOURCE1)
|
|
$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ -L. -I../src -L../src
|
|
@@ -59,6 +61,9 @@ $(TARGET6): $(SOURCE6)
|
|
$(TARGET7): $(SOURCE7)
|
|
$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ -L. -I../src -L../src
|
|
|
|
+$(TARGET8): $(SOURCE8)
|
|
+ $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ -L. -I../src -L../src
|
|
+
|
|
clean:
|
|
- rm -f *.o *.so *.so.* $(TARGET) $(TARGET2) $(TARGET3) $(TARGET4) $(TARGET5) $(TARGET6) $(TARGET7)
|
|
+ rm -f *.o *.so *.so.* $(TARGET) $(TARGET2) $(TARGET3) $(TARGET4) $(TARGET5) $(TARGET6) $(TARGET7) $(TARGET8)
|
|
|
|
diff --git a/tests/sanlk_mixmsg.c b/tests/sanlk_mixmsg.c
|
|
new file mode 100644
|
|
index 000000000000..1b9376e981b6
|
|
--- /dev/null
|
|
+++ b/tests/sanlk_mixmsg.c
|
|
@@ -0,0 +1,208 @@
|
|
+#include <sys/types.h>
|
|
+#include <sys/wait.h>
|
|
+#include <sys/un.h>
|
|
+#include <sys/mount.h>
|
|
+#include <sys/signalfd.h>
|
|
+#include <inttypes.h>
|
|
+#include <unistd.h>
|
|
+#include <stdio.h>
|
|
+#include <stdlib.h>
|
|
+#include <stdint.h>
|
|
+#include <stddef.h>
|
|
+#include <fcntl.h>
|
|
+#include <string.h>
|
|
+#include <errno.h>
|
|
+#include <limits.h>
|
|
+#include <time.h>
|
|
+#include <signal.h>
|
|
+#include <sys/socket.h>
|
|
+
|
|
+#include "sanlock.h"
|
|
+#include "sanlock_resource.h"
|
|
+#include "sanlock_admin.h"
|
|
+#include "sanlock_sock.h"
|
|
+
|
|
+/* gcc with -lsanlock */
|
|
+
|
|
+/*
|
|
+ * sanlock direct init -s 1271384c-24db-4c9b-bebf-61a1916b6cb1:0:/dev/test/main:0
|
|
+ * sanlock add_lockspace -s 1271384c-24db-4c9b-bebf-61a1916b6cb1:1:/dev/test/main:0
|
|
+ */
|
|
+
|
|
+/* copied from client.c */
|
|
+static int send_header(int sock, int cmd, uint32_t cmd_flags, int datalen,
|
|
+ uint32_t data, uint32_t data2)
|
|
+{
|
|
+ struct sm_header header;
|
|
+ int rv;
|
|
+
|
|
+ memset(&header, 0, sizeof(header));
|
|
+ header.magic = SM_MAGIC;
|
|
+ header.version = SM_PROTO;
|
|
+ header.cmd = cmd;
|
|
+ header.cmd_flags = cmd_flags;
|
|
+ header.length = sizeof(header) + datalen;
|
|
+ header.data = data;
|
|
+ header.data2 = data2;
|
|
+
|
|
+retry:
|
|
+ rv = send(sock, (void *) &header, sizeof(header), 0);
|
|
+ if (rv == -1 && errno == EINTR)
|
|
+ goto retry;
|
|
+
|
|
+ if (rv < 0)
|
|
+ return -errno;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int main(int argc, char *argv[])
|
|
+{
|
|
+ char rd1[sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk)];
|
|
+ char rd2[sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk)];
|
|
+ struct sanlk_resource *res1;
|
|
+ struct sanlk_resource *res2;
|
|
+ const char *lsname;
|
|
+ const char *resname1;
|
|
+ const char *resname2;
|
|
+ char *path;
|
|
+ int fd, rv;
|
|
+
|
|
+ if (argc < 2) {
|
|
+ printf("%s <path>\n", argv[0]);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ path = argv[1];
|
|
+
|
|
+ lsname = "1271384c-24db-4c9b-bebf-61a1916b6cb1";
|
|
+ resname1 = "2e794e7a-5a9c-4617-8cd0-dc03c917d7a1";
|
|
+ resname2 = "2e794e7a-5a9c-4617-8cd0-dc03c917d7a2";
|
|
+
|
|
+ memset(rd1, 0, sizeof(rd1));
|
|
+ memset(rd2, 0, sizeof(rd2));
|
|
+
|
|
+ res1 = (struct sanlk_resource *)&rd1;
|
|
+ res2 = (struct sanlk_resource *)&rd2;
|
|
+
|
|
+ strcpy(res1->lockspace_name, lsname);
|
|
+ sprintf(res1->name, "%s", resname1);
|
|
+ res1->num_disks = 1;
|
|
+ strcpy(res1->disks[0].path, path);
|
|
+ res1->disks[0].offset = 1048576;
|
|
+
|
|
+ strcpy(res2->lockspace_name, lsname);
|
|
+ sprintf(res2->name, "%s", resname2);
|
|
+ res2->num_disks = 1;
|
|
+ strcpy(res2->disks[0].path, path);
|
|
+ res2->disks[0].offset = 2 * 1048576;
|
|
+
|
|
+ /*
|
|
+ struct sanlk_lockspace ls = { 0 };
|
|
+ sprintf(ls.name, lsname);
|
|
+ sprintf(ls.host_id_disk.path, path);
|
|
+
|
|
+ rv = sanlock_write_lockspace(&ls, 0, 0, 0);
|
|
+ if (rv < 0) {
|
|
+ printf("write_lockspace error %d\n", rv);
|
|
+ return -1;
|
|
+ }
|
|
+ */
|
|
+
|
|
+ rv = sanlock_write_resource(res1, 0, 0, 0);
|
|
+ if (rv < 0) {
|
|
+ printf("write_resource1 error %d\n", rv);
|
|
+ return -1;
|
|
+ }
|
|
+ rv = sanlock_write_resource(res2, 0, 0, 0);
|
|
+ if (rv < 0) {
|
|
+ printf("write_resource2 error %d\n", rv);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ fd = sanlock_register();
|
|
+ if (fd < 0) {
|
|
+ printf("register error %d\n", fd);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ printf("acquiring both leases for registered fd %d\n", fd);
|
|
+
|
|
+ rv = sanlock_acquire(fd, -1, 0, 1, &res1, NULL);
|
|
+ if (rv < 0) {
|
|
+ printf("acquire res1 error %d\n", rv);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ rv = sanlock_acquire(fd, -1, 0, 1, &res2, NULL);
|
|
+ if (rv < 0) {
|
|
+ printf("acquire res2 error %d\n", rv);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ printf("sleeping... check that both leases are held\n");
|
|
+ sleep(20);
|
|
+
|
|
+ printf("sending res1 release header only\n");
|
|
+ rv = send_header(fd, SM_CMD_RELEASE, 0, sizeof(struct sanlk_resource), 1, -1);
|
|
+ if (rv < 0)
|
|
+ printf("send bad header error %d\n", rv);
|
|
+ else
|
|
+ printf("send bad header ok\n");
|
|
+
|
|
+ printf("sending res2 release interleaved\n");
|
|
+ rv = sanlock_release(fd, -1, 0, 1, &res2);
|
|
+ if (rv < 0)
|
|
+ printf("odd release res2 error %d\n", rv);
|
|
+ else
|
|
+ printf("odd release res2 ok\n");
|
|
+
|
|
+ printf("sending res1 release body only\n");
|
|
+ rv = send(fd, res1, sizeof(struct sanlk_resource), 0);
|
|
+ if (rv < 0)
|
|
+ printf("send bad body error %d\n", rv);
|
|
+ else
|
|
+ printf("send bad body ok\n");
|
|
+
|
|
+ /*
|
|
+ * This is not simulating the recv() that each sanlock_release
|
|
+ * would do in libsanlock to get a result for each release.
|
|
+ * These would likely just cause the client block indefinitely
|
|
+ * waiting for a reply that won't come because the bad release
|
|
+ * calls were ignored.
|
|
+ */
|
|
+
|
|
+ printf("sleeping... check which leases are held\n");
|
|
+ sleep(20);
|
|
+
|
|
+ printf("releasing both leases normally\n");
|
|
+ rv = sanlock_release(fd, -1, 0, 1, &res1);
|
|
+ if (rv < 0)
|
|
+ printf("release res1 error %d\n", rv);
|
|
+ else
|
|
+ printf("release res1 ok\n");
|
|
+
|
|
+ rv = sanlock_release(fd, -1, 0, 1, &res2);
|
|
+ if (rv < 0)
|
|
+ printf("release res2 error %d\n", rv);
|
|
+ else
|
|
+ printf("release res2 ok\n");
|
|
+
|
|
+ printf("sleeping... check that both leases are released\n");
|
|
+ sleep(20);
|
|
+
|
|
+ printf("acquiring lease res1\n");
|
|
+ rv = sanlock_acquire(fd, -1, 0, 1, &res1, NULL);
|
|
+ if (rv < 0)
|
|
+ printf("acquire res1 error %d\n", rv);
|
|
+ else
|
|
+ printf("acquire res1 ok\n");
|
|
+
|
|
+ /* exit should close our registered connection and
|
|
+ automatically release res1 */
|
|
+
|
|
+ printf("exiting... check if held lease is released after exit\n");
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
--
|
|
2.7.5
|
|
|