nfs-utils/nfs-utils-2.3.3-nfsdcld-upstream-update.patch

4049 lines
113 KiB
Diff
Raw Normal View History

diff --git a/.gitignore b/.gitignore
index e91e7a25..e97b31f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,6 +54,7 @@ utils/rquotad/rquotad
utils/rquotad/rquota.h
utils/rquotad/rquota_xdr.c
utils/showmount/showmount
+utils/nfsdcld/nfsdcld
utils/nfsdcltrack/nfsdcltrack
utils/statd/statd
tools/locktest/testlk
diff --git a/aclocal/ax_gcc_func_attribute.m4 b/aclocal/ax_gcc_func_attribute.m4
new file mode 100644
index 00000000..098c9aad
--- /dev/null
+++ b/aclocal/ax_gcc_func_attribute.m4
@@ -0,0 +1,238 @@
+# ===========================================================================
+# https://www.gnu.org/software/autoconf-archive/ax_gcc_func_attribute.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+# AX_GCC_FUNC_ATTRIBUTE(ATTRIBUTE)
+#
+# DESCRIPTION
+#
+# This macro checks if the compiler supports one of GCC's function
+# attributes; many other compilers also provide function attributes with
+# the same syntax. Compiler warnings are used to detect supported
+# attributes as unsupported ones are ignored by default so quieting
+# warnings when using this macro will yield false positives.
+#
+# The ATTRIBUTE parameter holds the name of the attribute to be checked.
+#
+# If ATTRIBUTE is supported define HAVE_FUNC_ATTRIBUTE_<ATTRIBUTE>.
+#
+# The macro caches its result in the ax_cv_have_func_attribute_<attribute>
+# variable.
+#
+# The macro currently supports the following function attributes:
+#
+# alias
+# aligned
+# alloc_size
+# always_inline
+# artificial
+# cold
+# const
+# constructor
+# constructor_priority for constructor attribute with priority
+# deprecated
+# destructor
+# dllexport
+# dllimport
+# error
+# externally_visible
+# fallthrough
+# flatten
+# format
+# format_arg
+# gnu_inline
+# hot
+# ifunc
+# leaf
+# malloc
+# noclone
+# noinline
+# nonnull
+# noreturn
+# nothrow
+# optimize
+# pure
+# sentinel
+# sentinel_position
+# unused
+# used
+# visibility
+# warning
+# warn_unused_result
+# weak
+# weakref
+#
+# Unsupported function attributes will be tested with a prototype
+# returning an int and not accepting any arguments and the result of the
+# check might be wrong or meaningless so use with care.
+#
+# LICENSE
+#
+# Copyright (c) 2013 Gabriele Svelto <gabriele.svelto@gmail.com>
+#
+# Copying and distribution of this file, with or without modification, are
+# permitted in any medium without royalty provided the copyright notice
+# and this notice are preserved. This file is offered as-is, without any
+# warranty.
+
+#serial 9
+
+AC_DEFUN([AX_GCC_FUNC_ATTRIBUTE], [
+ AS_VAR_PUSHDEF([ac_var], [ax_cv_have_func_attribute_$1])
+
+ AC_CACHE_CHECK([for __attribute__(($1))], [ac_var], [
+ AC_LINK_IFELSE([AC_LANG_PROGRAM([
+ m4_case([$1],
+ [alias], [
+ int foo( void ) { return 0; }
+ int bar( void ) __attribute__(($1("foo")));
+ ],
+ [aligned], [
+ int foo( void ) __attribute__(($1(32)));
+ ],
+ [alloc_size], [
+ void *foo(int a) __attribute__(($1(1)));
+ ],
+ [always_inline], [
+ inline __attribute__(($1)) int foo( void ) { return 0; }
+ ],
+ [artificial], [
+ inline __attribute__(($1)) int foo( void ) { return 0; }
+ ],
+ [cold], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [const], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [constructor_priority], [
+ int foo( void ) __attribute__((__constructor__(65535/2)));
+ ],
+ [constructor], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [deprecated], [
+ int foo( void ) __attribute__(($1("")));
+ ],
+ [destructor], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [dllexport], [
+ __attribute__(($1)) int foo( void ) { return 0; }
+ ],
+ [dllimport], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [error], [
+ int foo( void ) __attribute__(($1("")));
+ ],
+ [externally_visible], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [fallthrough], [
+ int foo( void ) {switch (0) { case 1: __attribute__(($1)); case 2: break ; }};
+ ],
+ [flatten], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [format], [
+ int foo(const char *p, ...) __attribute__(($1(printf, 1, 2)));
+ ],
+ [format_arg], [
+ char *foo(const char *p) __attribute__(($1(1)));
+ ],
+ [gnu_inline], [
+ inline __attribute__(($1)) int foo( void ) { return 0; }
+ ],
+ [hot], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [ifunc], [
+ int my_foo( void ) { return 0; }
+ static int (*resolve_foo(void))(void) { return my_foo; }
+ int foo( void ) __attribute__(($1("resolve_foo")));
+ ],
+ [leaf], [
+ __attribute__(($1)) int foo( void ) { return 0; }
+ ],
+ [malloc], [
+ void *foo( void ) __attribute__(($1));
+ ],
+ [noclone], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [noinline], [
+ __attribute__(($1)) int foo( void ) { return 0; }
+ ],
+ [nonnull], [
+ int foo(char *p) __attribute__(($1(1)));
+ ],
+ [noreturn], [
+ void foo( void ) __attribute__(($1));
+ ],
+ [nothrow], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [optimize], [
+ __attribute__(($1(3))) int foo( void ) { return 0; }
+ ],
+ [pure], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [sentinel], [
+ int foo(void *p, ...) __attribute__(($1));
+ ],
+ [sentinel_position], [
+ int foo(void *p, ...) __attribute__(($1(1)));
+ ],
+ [returns_nonnull], [
+ void *foo( void ) __attribute__(($1));
+ ],
+ [unused], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [used], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [visibility], [
+ int foo_def( void ) __attribute__(($1("default")));
+ int foo_hid( void ) __attribute__(($1("hidden")));
+ int foo_int( void ) __attribute__(($1("internal")));
+ int foo_pro( void ) __attribute__(($1("protected")));
+ ],
+ [warning], [
+ int foo( void ) __attribute__(($1("")));
+ ],
+ [warn_unused_result], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [weak], [
+ int foo( void ) __attribute__(($1));
+ ],
+ [weakref], [
+ static int foo( void ) { return 0; }
+ static int bar( void ) __attribute__(($1("foo")));
+ ],
+ [
+ m4_warn([syntax], [Unsupported attribute $1, the test may fail])
+ int foo( void ) __attribute__(($1));
+ ]
+ )], [])
+ ],
+ dnl GCC doesn't exit with an error if an unknown attribute is
+ dnl provided but only outputs a warning, so accept the attribute
+ dnl only if no warning were issued.
+ [AS_IF([test -s conftest.err],
+ [AS_VAR_SET([ac_var], [no])],
+ [AS_VAR_SET([ac_var], [yes])])],
+ [AS_VAR_SET([ac_var], [no])])
+ ])
+
+ AS_IF([test yes = AS_VAR_GET([ac_var])],
+ [AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_FUNC_ATTRIBUTE_$1), 1,
+ [Define to 1 if the system has the `$1' function attribute])], [])
+
+ AS_VAR_POPDEF([ac_var])
+])
diff --git a/configure.ac b/configure.ac
index 48eb9eb6..13ea957f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -238,6 +238,12 @@ else
AM_CONDITIONAL(MOUNT_CONFIG, [test "$enable_mount" = "yes"])
fi
+AC_ARG_ENABLE(nfsdcld,
+ [AC_HELP_STRING([--disable-nfsdcld],
+ [disable NFSv4 clientid tracking daemon @<:@default=no@:>@])],
+ enable_nfsdcld=$enableval,
+ enable_nfsdcld="yes")
+
AC_ARG_ENABLE(nfsdcltrack,
[AC_HELP_STRING([--disable-nfsdcltrack],
[disable NFSv4 clientid tracking programs @<:@default=no@:>@])],
@@ -317,6 +323,20 @@ if test "$enable_nfsv4" = yes; then
dnl Check for sqlite3
AC_SQLITE3_VERS
+ if test "$enable_nfsdcld" = "yes"; then
+ AC_CHECK_HEADERS([libgen.h sys/inotify.h], ,
+ AC_MSG_ERROR([Cannot find header needed for nfsdcld]))
+
+ case $libsqlite3_cv_is_recent in
+ yes) ;;
+ unknown)
+ dnl do not fail when cross-compiling
+ AC_MSG_WARN([assuming sqlite is at least v3.3]) ;;
+ *)
+ AC_MSG_ERROR([nfsdcld requires sqlite-devel]) ;;
+ esac
+ fi
+
if test "$enable_nfsdcltrack" = "yes"; then
AC_CHECK_HEADERS([libgen.h sys/inotify.h], ,
AC_MSG_ERROR([Cannot find header needed for nfsdcltrack]))
@@ -332,6 +352,7 @@ if test "$enable_nfsv4" = yes; then
fi
else
+ enable_nfsdcld="no"
enable_nfsdcltrack="no"
fi
@@ -342,6 +363,7 @@ if test "$enable_nfsv41" = yes; then
fi
dnl enable nfsidmap when its support by libnfsidmap
+AM_CONDITIONAL(CONFIG_NFSDCLD, [test "$enable_nfsdcld" = "yes" ])
AM_CONDITIONAL(CONFIG_NFSDCLTRACK, [test "$enable_nfsdcltrack" = "yes" ])
@@ -581,6 +603,7 @@ CHECK_CCSUPPORT([-Werror=format-overflow=2], [flg1])
CHECK_CCSUPPORT([-Werror=int-conversion], [flg2])
CHECK_CCSUPPORT([-Werror=incompatible-pointer-types], [flg3])
CHECK_CCSUPPORT([-Werror=misleading-indentation], [flg4])
+AX_GCC_FUNC_ATTRIBUTE([format])
AC_SUBST([AM_CFLAGS], ["$my_am_cflags $flg1 $flg2 $flg3 $flg4"])
@@ -617,8 +640,10 @@ AC_CONFIG_FILES([
tools/mountstats/Makefile
tools/nfs-iostat/Makefile
tools/nfsconf/Makefile
+ tools/clddb-tool/Makefile
utils/Makefile
utils/blkmapd/Makefile
+ utils/nfsdcld/Makefile
utils/nfsdcltrack/Makefile
utils/exportfs/Makefile
utils/gssd/Makefile
diff --git a/nfs.conf b/nfs.conf
index d48a4e55..56172c49 100644
--- a/nfs.conf
+++ b/nfs.conf
@@ -36,6 +36,10 @@ use-gss-proxy=1
# state-directory-path=/var/lib/nfs
# ha-callout=
#
+[nfsdcld]
+# debug=0
+# storagedir=/var/lib/nfs/nfsdcld
+#
[nfsdcltrack]
# debug=0
# storagedir=/var/lib/nfs/nfsdcltrack
diff --git a/support/include/cld.h b/support/include/cld.h
index f14a9ab0..88d3b63e 100644
--- a/support/include/cld.h
+++ b/support/include/cld.h
@@ -23,16 +23,22 @@
#define _NFSD_CLD_H
/* latest upcall version available */
-#define CLD_UPCALL_VERSION 1
+#define CLD_UPCALL_VERSION 2
/* defined by RFC3530 */
#define NFS4_OPAQUE_LIMIT 1024
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE 32
+#endif
+
enum cld_command {
Cld_Create, /* create a record for this cm_id */
Cld_Remove, /* remove record of this cm_id */
Cld_Check, /* is this cm_id allowed? */
Cld_GraceDone, /* grace period is complete */
+ Cld_GraceStart, /* grace start (upload client records) */
+ Cld_GetVersion, /* query max supported upcall version */
};
/* representation of long-form NFSv4 client ID */
@@ -41,6 +47,17 @@ struct cld_name {
unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */
} __attribute__((packed));
+/* sha256 hash of the kerberos principal */
+struct cld_princhash {
+ uint8_t cp_len; /* length of cp_data */
+ unsigned char cp_data[SHA256_DIGEST_SIZE]; /* hash of principal */
+} __attribute__((packed));
+
+struct cld_clntinfo {
+ struct cld_name cc_name;
+ struct cld_princhash cc_princhash;
+} __attribute__((packed));
+
/* message struct for communication with userspace */
struct cld_msg {
uint8_t cm_vers; /* upcall version */
@@ -50,7 +67,28 @@ struct cld_msg {
union {
int64_t cm_gracetime; /* grace period start time */
struct cld_name cm_name;
+ uint8_t cm_version; /* for getting max version */
+ } __attribute__((packed)) cm_u;
+} __attribute__((packed));
+
+/* version 2 message can include hash of kerberos principal */
+struct cld_msg_v2 {
+ uint8_t cm_vers; /* upcall version */
+ uint8_t cm_cmd; /* upcall command */
+ int16_t cm_status; /* return code */
+ uint32_t cm_xid; /* transaction id */
+ union {
+ struct cld_name cm_name;
+ uint8_t cm_version; /* for getting max version */
+ struct cld_clntinfo cm_clntinfo; /* name & princ hash */
} __attribute__((packed)) cm_u;
} __attribute__((packed));
+struct cld_msg_hdr {
+ uint8_t cm_vers; /* upcall version */
+ uint8_t cm_cmd; /* upcall command */
+ int16_t cm_status; /* return code */
+ uint32_t cm_xid; /* transaction id */
+} __attribute__((packed));
+
#endif /* !_NFSD_CLD_H */
diff --git a/support/include/xcommon.h b/support/include/xcommon.h
index 23c9a135..30b0403b 100644
--- a/support/include/xcommon.h
+++ b/support/include/xcommon.h
@@ -9,6 +9,10 @@
#ifndef _XMALLOC_H
#define _MALLOC_H
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
#include <sys/types.h>
#include <fcntl.h>
#include <limits.h>
@@ -25,9 +29,15 @@
#define streq(s, t) (strcmp ((s), (t)) == 0)
-/* Functions in sundries.c that are used in mount.c and umount.c */
+#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
+#define X_FORMAT(_x) __attribute__((__format__ _x))
+#else
+#define X_FORMAT(_x)
+#endif
+
+/* Functions in sundries.c that are used in mount.c and umount.c */
char *canonicalize (const char *path);
-void nfs_error (const char *fmt, ...);
+void nfs_error (const char *fmt, ...) X_FORMAT((printf, 1, 2));
void *xmalloc (size_t size);
void *xrealloc(void *p, size_t size);
void xfree(void *);
@@ -36,9 +46,9 @@ char *xstrndup (const char *s, int n);
char *xstrconcat2 (const char *, const char *);
char *xstrconcat3 (const char *, const char *, const char *);
char *xstrconcat4 (const char *, const char *, const char *, const char *);
-void die (int errcode, const char *fmt, ...);
+void die (int errcode, const char *fmt, ...) X_FORMAT((printf, 2, 3));
-extern void die(int err, const char *fmt, ...);
+extern void die(int err, const char *fmt, ...) X_FORMAT((printf, 2, 3));
extern void (*at_die)(void);
/* exit status - bits below are ORed */
diff --git a/support/include/xlog.h b/support/include/xlog.h
index a11463ed..32ff5a1b 100644
--- a/support/include/xlog.h
+++ b/support/include/xlog.h
@@ -7,6 +7,10 @@
#ifndef XLOG_H
#define XLOG_H
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
#include <stdarg.h>
/* These are logged always. L_FATAL also does exit(1) */
@@ -35,6 +39,12 @@ struct xlog_debugfac {
int df_fac;
};
+#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
+#define XLOG_FORMAT(_x) __attribute__((__format__ _x))
+#else
+#define XLOG_FORMAT(_x)
+#endif
+
extern int export_errno;
void xlog_open(char *progname);
void xlog_stderr(int on);
@@ -43,10 +53,10 @@ void xlog_config(int fac, int on);
void xlog_sconfig(char *, int on);
void xlog_from_conffile(char *);
int xlog_enabled(int fac);
-void xlog(int fac, const char *fmt, ...);
-void xlog_warn(const char *fmt, ...);
-void xlog_err(const char *fmt, ...);
-void xlog_errno(int err, const char *fmt, ...);
-void xlog_backend(int fac, const char *fmt, va_list args);
+void xlog(int fac, const char *fmt, ...) XLOG_FORMAT((printf, 2, 3));
+void xlog_warn(const char *fmt, ...) XLOG_FORMAT((printf, 1, 2));
+void xlog_err(const char *fmt, ...) XLOG_FORMAT((printf, 1, 2));
+void xlog_errno(int err, const char *fmt, ...) XLOG_FORMAT((printf, 2, 3));
+void xlog_backend(int fac, const char *fmt, va_list args) XLOG_FORMAT((printf, 2, 0));
#endif /* XLOG_H */
diff --git a/support/junction/junction.c b/support/junction/junction.c
index ab6caa61..41cce261 100644
--- a/support/junction/junction.c
+++ b/support/junction/junction.c
@@ -23,6 +23,10 @@
* http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
*/
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
#include <sys/types.h>
#include <sys/stat.h>
diff --git a/support/misc/file.c b/support/misc/file.c
index 4065376e..74973169 100644
--- a/support/misc/file.c
+++ b/support/misc/file.c
@@ -18,6 +18,10 @@
* along with nfs-utils. If not, see <http://www.gnu.org/licenses/>.
*/
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
#include <sys/stat.h>
#include <string.h>
diff --git a/support/misc/mountpoint.c b/support/misc/mountpoint.c
index 9f9ce44e..4205b41c 100644
--- a/support/misc/mountpoint.c
+++ b/support/misc/mountpoint.c
@@ -3,6 +3,10 @@
* check if a given path is a mountpoint
*/
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
#include <string.h>
#include "xcommon.h"
#include <sys/stat.h>
diff --git a/support/nfs/cacheio.c b/support/nfs/cacheio.c
index 9dc4cf1c..7c4cf373 100644
--- a/support/nfs/cacheio.c
+++ b/support/nfs/cacheio.c
@@ -15,6 +15,10 @@
*
*/
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
#include <nfslib.h>
#include <stdio.h>
#include <stdio_ext.h>
diff --git a/support/nfs/svc_create.c b/support/nfs/svc_create.c
index ef7ff05f..7b595f89 100644
--- a/support/nfs/svc_create.c
+++ b/support/nfs/svc_create.c
@@ -184,7 +184,7 @@ svc_create_sock(const struct sockaddr *sap, socklen_t salen,
type = SOCK_STREAM;
break;
default:
- xlog(D_GENERAL, "%s: Unrecognized bind address semantics: %u",
+ xlog(D_GENERAL, "%s: Unrecognized bind address semantics: %lu",
__func__, nconf->nc_semantics);
return -1;
}
diff --git a/support/nsm/rpc.c b/support/nsm/rpc.c
index ae49006c..08b4746f 100644
--- a/support/nsm/rpc.c
+++ b/support/nsm/rpc.c
@@ -182,7 +182,7 @@ nsm_xmit_getport(const int sock, const struct sockaddr_in *sin,
uint32_t xid;
XDR xdr;
- xlog(D_CALL, "Sending PMAP_GETPORT for %u, %u, udp", program, version);
+ xlog(D_CALL, "Sending PMAP_GETPORT for %lu, %lu, udp", program, version);
nsm_init_xdrmem(msgbuf, NSM_MAXMSGSIZE, &xdr);
xid = nsm_init_rpc_header(PMAPPROG, PMAPVERS,
diff --git a/systemd/Makefile.am b/systemd/Makefile.am
index d54518bc..53458c62 100644
--- a/systemd/Makefile.am
+++ b/systemd/Makefile.am
@@ -36,6 +36,11 @@ unit_files += \
endif
endif
+if CONFIG_NFSDCLD
+unit_files += \
+ nfsdcld.service
+endif
+
man5_MANS = nfs.conf.man
man7_MANS = nfs.systemd.man
EXTRA_DIST = $(unit_files) $(man5_MANS) $(man7_MANS)
diff --git a/systemd/nfs-server.service b/systemd/nfs-server.service
index 136552b5..24118d69 100644
--- a/systemd/nfs-server.service
+++ b/systemd/nfs-server.service
@@ -6,10 +6,12 @@ Requires= nfs-mountd.service
Wants=rpcbind.socket network-online.target
Wants=rpc-statd.service nfs-idmapd.service
Wants=rpc-statd-notify.service
+Wants=nfsdcld.service
After= network-online.target local-fs.target
After= proc-fs-nfsd.mount rpcbind.socket nfs-mountd.service
After= nfs-idmapd.service rpc-statd.service
+After= nfsdcld.service
Before= rpc-statd-notify.service
# GSS services dependencies and ordering
diff --git a/systemd/nfsdcld.service b/systemd/nfsdcld.service
new file mode 100644
index 00000000..a32d2430
--- /dev/null
+++ b/systemd/nfsdcld.service
@@ -0,0 +1,10 @@
+[Unit]
+Description=NFSv4 Client Tracking Daemon
+DefaultDependencies=no
+Conflicts=umount.target
+Requires=rpc_pipefs.target proc-fs-nfsd.mount
+After=rpc_pipefs.target proc-fs-nfsd.mount
+
+[Service]
+Type=forking
+ExecStart=/usr/sbin/nfsdcld
diff --git a/tools/Makefile.am b/tools/Makefile.am
index 4266da49..53e61170 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -8,6 +8,10 @@ endif
OPTDIRS += nfsconf
+if CONFIG_NFSDCLD
+OPTDIRS += clddb-tool
+endif
+
SUBDIRS = locktest rpcdebug nlmtest mountstats nfs-iostat $(OPTDIRS)
MAINTAINERCLEANFILES = Makefile.in
diff --git a/tools/clddb-tool/Makefile.am b/tools/clddb-tool/Makefile.am
new file mode 100644
index 00000000..15a8fd47
--- /dev/null
+++ b/tools/clddb-tool/Makefile.am
@@ -0,0 +1,13 @@
+## Process this file with automake to produce Makefile.in
+PYTHON_FILES = clddb-tool.py
+
+man8_MANS = clddb-tool.man
+
+EXTRA_DIST = $(man8_MANS) $(PYTHON_FILES)
+
+all-local: $(PYTHON_FILES)
+
+install-data-hook:
+ $(INSTALL) -m 755 clddb-tool.py $(DESTDIR)$(sbindir)/clddb-tool
+
+MAINTAINERCLEANFILES=Makefile.in
diff --git a/tools/clddb-tool/clddb-tool.man b/tools/clddb-tool/clddb-tool.man
new file mode 100644
index 00000000..e80b2c05
--- /dev/null
+++ b/tools/clddb-tool/clddb-tool.man
@@ -0,0 +1,83 @@
+.\"
+.\" clddb-tool(8)
+.\"
+.TH clddb-tool 8 "07 Aug 2019"
+.SH NAME
+clddb-tool \- Tool for manipulating the nfsdcld sqlite database
+.SH SYNOPSIS
+.B clddb-tool
+.RB [ \-h | \-\-help ]
+.P
+.B clddb-tool
+.RB [ \-p | \-\-path
+.IR dbpath ]
+.B fix-table-names
+.RB [ \-h | \-\-help ]
+.P
+.B clddb-tool
+.RB [ \-p | \-\-path
+.IR dbpath ]
+.B downgrade-schema
+.RB [ \-h | \-\-help ]
+.RB [ \-v | \-\-version
+.IR to-version ]
+.P
+.B clddb-tool
+.RB [ \-p | \-\-path
+.IR dbpath ]
+.B print
+.RB [ \-h | \-\-help ]
+.RB [ \-s | \-\-summary ]
+.P
+
+.SH DESCRIPTION
+.RB "The " clddb-tool " command is provided to perform some manipulation of the nfsdcld sqlite database schema and to print the contents of the database."
+.SS Sub-commands
+Valid
+.B clddb-tool
+subcommands are:
+.IP "\fBfix-table-names\fP"
+.RB "A previous version of " nfsdcld "(8) contained a bug that corrupted the reboot epoch table names. This sub-command will fix those table names."
+.IP "\fBdowngrade-schema\fP"
+Downgrade the database schema. Currently the schema can only to downgraded from version 4 to version 3.
+.IP "\fBprint\fP"
+Display the contents of the database. Prints the schema version and the values of the current and recovery epochs. If the
+.BR \-s | \-\-summary
+option is not given, also prints the clients in the reboot epoch tables.
+.SH OPTIONS
+.SS Options valid for all sub-commands
+.TP
+.B \-h, \-\-help
+Show the help message and exit
+.TP
+\fB\-p \fIdbpath\fR, \fB\-\-path \fIdbpath\fR
+Open the sqlite database located at
+.I dbpath
+instead of
+.IR /var/lib/nfs/nfsdcld/main.sqlite ". "
+This is mainly for testing purposes.
+.SS Options specific to the downgrade-schema sub-command
+.TP
+\fB\-v \fIto-version\fR, \fB\-\-version \fIto-version\fR
+The schema version to downgrade to. Currently the schema can only be downgraded to version 3.
+.SS Options specific to the print sub-command
+.TP
+.B \-s, \-\-summary
+Do not list the clients in the reboot epoch tables in the output.
+.SH NOTES
+The
+.B clddb-tool
+command will not allow the
+.B fix-table-names
+or
+.B downgrade-schema
+subcommands to be used if
+.BR nfsdcld (8)
+is running.
+.SH FILES
+.TP
+.B /var/lib/nfs/nfsdcld/main.sqlite
+.SH SEE ALSO
+.BR nfsdcld (8)
+.SH AUTHOR
+Scott Mayhew <smayhew@redhat.com>
diff --git a/tools/clddb-tool/clddb-tool.py b/tools/clddb-tool/clddb-tool.py
new file mode 100644
index 00000000..8a661318
--- /dev/null
+++ b/tools/clddb-tool/clddb-tool.py
@@ -0,0 +1,266 @@
+#!/usr/bin/python3
+"""Tool for manipulating the nfsdcld sqlite database
+"""
+
+__copyright__ = """
+Copyright (C) 2019 Scott Mayhew <smayhew@redhat.com>
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+MA 02110-1301, USA.
+"""
+
+import argparse
+import os
+import sqlite3
+import sys
+
+
+class CldDb():
+ def __init__(self, path):
+ self.con = sqlite3.connect(path)
+ self.con.row_factory = sqlite3.Row
+ for row in self.con.execute('select value from parameters '
+ 'where key = "version"'):
+ self.version = int(row['value'])
+ for row in self.con.execute('select * from grace'):
+ self.current = int(row['current'])
+ self.recovery = int(row['recovery'])
+
+ def __del__(self):
+ self.con.close()
+
+ def __str__(self):
+ return ('Schema version: {self.version} '
+ 'current epoch: {self.current} '
+ 'recovery epoch: {self.recovery}'.format(self=self))
+
+ def _print_clients(self, epoch):
+ if epoch:
+ for row in self.con.execute('select * from "rec-{:016x}"'
+ .format(epoch)):
+ if self.version >= 4:
+ if row['princhash'] is not None:
+ princhash = row['princhash'].hex()
+ else:
+ princhash = "(null)"
+ print('id = {}, princhash = {}'
+ .format(row['id'].decode(), princhash))
+ else:
+ print('id = {}'.format(row['id'].decode()))
+
+ def print_current_clients(self):
+ print('Clients in current epoch:')
+ self._print_clients(self.current)
+
+ def print_recovery_clients(self):
+ if self.recovery:
+ print('Clients in recovery epoch:')
+ self._print_clients(self.recovery)
+
+ def check_bad_table_names(self):
+ bad_names = []
+ for row in self.con.execute('select name from sqlite_master '
+ 'where type = "table" '
+ 'and name like "%rec-%" '
+ 'and length(name) < 20'):
+ bad_names.append(row['name'])
+ return bad_names
+
+ def fix_bad_table_names(self):
+ try:
+ self.con.execute('begin exclusive transaction')
+ bad_names = self.check_bad_table_names()
+ for bad_name in bad_names:
+ epoch = int(bad_name.split('-')[1], base=16)
+ if epoch == self.current or epoch == self.recovery:
+ if epoch == self.current:
+ which = 'current'
+ else:
+ which = 'recovery'
+ print('found invalid table name {} for {} epoch'
+ .format(bad_name, which))
+ self.con.execute('alter table "{}" '
+ 'rename to "rec-{:016x}"'
+ .format(bad_name, epoch))
+ print('renamed to rec-{:016x}'.format(epoch))
+ else:
+ print('found invalid table name {} for unknown epoch {}'
+ .format(bad_name, epoch))
+ self.con.execute('drop table "{}"'.format(bad_name))
+ print('dropped table {}'.format(bad_name))
+ except sqlite3.Error:
+ self.con.rollback()
+ else:
+ self.con.commit()
+
+ def has_princ_data(self):
+ if self.version < 4:
+ return False
+ for row in self.con.execute('select count(*) '
+ 'from "rec-{:016x}" '
+ 'where princhash not null'
+ .format(self.current)):
+ count = row[0]
+ if self.recovery:
+ for row in self.con.execute('select count(*) '
+ 'from "rec-{:016x}" '
+ 'where princhash not null'
+ .format(self.current)):
+ count = count + row[0]
+ if count:
+ return True
+ return False
+
+ def _downgrade_table_v4_to_v3(self, epoch):
+ if not self.con.in_transaction:
+ raise sqlite3.Error
+ try:
+ self.con.execute('create table "new_rec-{:016x}" '
+ '(id blob primary key)'.format(epoch))
+ self.con.execute('insert into "new_rec-{:016x}" '
+ 'select id from "rec-{:016x}"'
+ .format(epoch, epoch))
+ self.con.execute('drop table "rec-{:016x}"'.format(epoch))
+ self.con.execute('alter table "new_rec-{:016x}" '
+ 'rename to "rec-{:016x}"'
+ .format(epoch, epoch))
+ except sqlite3.Error:
+ raise
+
+ def downgrade_schema_v4_to_v3(self):
+ try:
+ self.con.execute('begin exclusive transaction')
+ for row in self.con.execute('select value from parameters '
+ 'where key = "version"'):
+ version = int(row['value'])
+ if version != self.version:
+ raise sqlite3.Error
+ for row in self.con.execute('select * from grace'):
+ current = int(row['current'])
+ recovery = int(row['recovery'])
+ if current != self.current:
+ raise sqlite3.Error
+ if recovery != self.recovery:
+ raise sqlite3.Error
+ self._downgrade_table_v4_to_v3(current)
+ if recovery:
+ self._downgrade_table_v4_to_v3(recovery)
+ self.con.execute('update parameters '
+ 'set value = "3" '
+ 'where key = "version"')
+ self.version = 3
+ except sqlite3.Error:
+ self.con.rollback()
+ print('Downgrade failed')
+ else:
+ self.con.commit()
+ print('Downgrade successful')
+
+
+def nfsdcld_active():
+ rc = os.system('ps -C nfsdcld >/dev/null 2>/dev/null')
+ if rc == 0:
+ return True
+ return False
+
+
+def fix_table_names_command(db, args):
+ if nfsdcld_active():
+ print('Warning: nfsdcld is running!')
+ ans = input('Continue? ')
+ if ans.lower() not in ['y', 'yes']:
+ print('Operation canceled.')
+ return
+ bad_names = db.check_bad_table_names()
+ if not bad_names:
+ print('No invalid table names found.')
+ return
+ db.fix_bad_table_names()
+
+
+def downgrade_schema_command(db, args):
+ if nfsdcld_active():
+ print('Warning: nfsdcld is running!')
+ ans = input('Continue? ')
+ if ans.lower() not in ['y', 'yes']:
+ print('Operation canceled')
+ return
+ if db.version != 4:
+ print('Cannot downgrade database from schema version {}.'
+ .format(db.version))
+ return
+ if args.version != 3:
+ print('Cannot downgrade to version {}.'.format(args.version))
+ return
+ bad_names = db.check_bad_table_names()
+ if bad_names:
+ print('Invalid table names detected.')
+ print('Please run "{} fix-table-names" before downgrading the schema.'
+ .format(sys.argv[0]))
+ return
+ if db.has_princ_data():
+ print('Warning: database has principal data, which will be erased.')
+ ans = input('Continue? ')
+ if ans.lower() not in ['y', 'yes']:
+ print('Operation canceled')
+ return
+ db.downgrade_schema_v4_to_v3()
+
+
+def print_command(db, args):
+ print(str(db))
+ if not args.summary:
+ bad_names = db.check_bad_table_names()
+ if bad_names:
+ print('Invalid table names detected.')
+ print('Please run "{} fix-table-names".'.format(sys.argv[0]))
+ return
+ db.print_current_clients()
+ db.print_recovery_clients()
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-p', '--path',
+ default='/var/lib/nfs/nfsdcld/main.sqlite',
+ help='path to the database '
+ '(default: /var/lib/nfs/nfsdcld/main.sqlite)')
+ subparsers = parser.add_subparsers(help='sub-command help')
+ fix_parser = subparsers.add_parser('fix-table-names',
+ help='fix invalid table names')
+ fix_parser.set_defaults(func=fix_table_names_command)
+ downgrade_parser = subparsers.add_parser('downgrade-schema',
+ help='downgrade database schema')
+ downgrade_parser.add_argument('-v', '--version', type=int, choices=[3],
+ default=3,
+ help='version to downgrade to')
+ downgrade_parser.set_defaults(func=downgrade_schema_command)
+ print_parser = subparsers.add_parser('print',
+ help='print database info')
+ print_parser.add_argument('-s', '--summary', default=False,
+ action='store_true',
+ help='print summary only')
+ print_parser.set_defaults(func=print_command)
+ args = parser.parse_args()
+ if not os.path.exists(args.path):
+ return parser.print_usage()
+ clddb = CldDb(args.path)
+ return args.func(clddb, args)
+
+
+if __name__ == '__main__':
+ if len(sys.argv) == 1:
+ sys.argv.extend(['print', '--summary'])
+ main()
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 0a5b062c..4c930a4b 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -19,6 +19,10 @@ if CONFIG_MOUNT
OPTDIRS += mount
endif
+if CONFIG_NFSDCLD
+OPTDIRS += nfsdcld
+endif
+
if CONFIG_NFSDCLTRACK
OPTDIRS += nfsdcltrack
endif
diff --git a/utils/exportfs/exportfs.c b/utils/exportfs/exportfs.c
index cd3c979d..4b9634b7 100644
--- a/utils/exportfs/exportfs.c
+++ b/utils/exportfs/exportfs.c
@@ -644,6 +644,9 @@ out:
return result;
}
+#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
+__attribute__((format (printf, 2, 3)))
+#endif
static char
dumpopt(char c, char *fmt, ...)
{
diff --git a/utils/mount/fstab.c b/utils/mount/fstab.c
index eedbddab..8b0aaf1a 100644
--- a/utils/mount/fstab.c
+++ b/utils/mount/fstab.c
@@ -7,6 +7,10 @@
* - Moved code to nfs-utils/support/nfs from util-linux/mount.
*/
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
#include <errno.h>
#include <stdio.h>
#include <fcntl.h>
diff --git a/utils/mountd/cache.c b/utils/mountd/cache.c
index a054ce6f..c73e29be 100644
--- a/utils/mountd/cache.c
+++ b/utils/mountd/cache.c
@@ -967,8 +967,7 @@ lookup_export(char *dom, char *path, struct addrinfo *ai)
} else if (found_type == i && found->m_warned == 0) {
xlog(L_WARNING, "%s exported to both %s and %s, "
"arbitrarily choosing options from first",
- path, found->m_client->m_hostname, exp->m_client->m_hostname,
- dom);
+ path, found->m_client->m_hostname, exp->m_client->m_hostname);
found->m_warned = 1;
}
}
diff --git a/utils/mountd/mountd.c b/utils/mountd/mountd.c
index 086c39bf..0b891121 100644
--- a/utils/mountd/mountd.c
+++ b/utils/mountd/mountd.c
@@ -209,10 +209,10 @@ killer (int sig)
}
static void
-sig_hup (int sig)
+sig_hup (int UNUSED(sig))
{
/* don't exit on SIGHUP */
- xlog (L_NOTICE, "Received SIGHUP... Ignoring.\n", sig);
+ xlog (L_NOTICE, "Received SIGHUP... Ignoring.\n");
return;
}
diff --git a/utils/nfsdcld/Makefile.am b/utils/nfsdcld/Makefile.am
new file mode 100644
index 00000000..273d64f1
--- /dev/null
+++ b/utils/nfsdcld/Makefile.am
@@ -0,0 +1,15 @@
+## Process this file with automake to produce Makefile.in
+
+man8_MANS = nfsdcld.man
+EXTRA_DIST = $(man8_MANS)
+
+AM_CFLAGS += -D_LARGEFILE64_SOURCE
+sbin_PROGRAMS = nfsdcld
+
+nfsdcld_SOURCES = nfsdcld.c sqlite.c legacy.c
+nfsdcld_LDADD = ../../support/nfs/libnfs.la $(LIBEVENT) $(LIBSQLITE) $(LIBCAP)
+
+noinst_HEADERS = sqlite.h cld-internal.h legacy.h
+
+MAINTAINERCLEANFILES = Makefile.in
+
diff --git a/utils/nfsdcld/cld-internal.h b/utils/nfsdcld/cld-internal.h
new file mode 100644
index 00000000..05f01be2
--- /dev/null
+++ b/utils/nfsdcld/cld-internal.h
@@ -0,0 +1,44 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _CLD_INTERNAL_H_
+#define _CLD_INTERNAL_H_
+
+#if CLD_UPCALL_VERSION >= 2
+#define UPCALL_VERSION 2
+#else
+#define UPCALL_VERSION 1
+#endif
+
+struct cld_client {
+ int cl_fd;
+ struct event cl_event;
+ union {
+ struct cld_msg cl_msg;
+#if UPCALL_VERSION >= 2
+ struct cld_msg_v2 cl_msg_v2;
+#endif
+ } cl_u;
+};
+
+uint64_t current_epoch;
+uint64_t recovery_epoch;
+int first_time;
+int num_cltrack_records;
+int num_legacy_records;
+
+#endif /* _CLD_INTERNAL_H_ */
diff --git a/utils/nfsdcld/legacy.c b/utils/nfsdcld/legacy.c
new file mode 100644
index 00000000..3c6bea6c
--- /dev/null
+++ b/utils/nfsdcld/legacy.c
@@ -0,0 +1,185 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <dirent.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <limits.h>
+#include "cld.h"
+#include "sqlite.h"
+#include "xlog.h"
+#include "legacy.h"
+
+#define NFSD_RECDIR_FILE "/proc/fs/nfsd/nfsv4recoverydir"
+
+/*
+ * Loads client records from the v4recovery directory into the database.
+ * Records are prefixed with the string "hash:" and include the '\0' byte.
+ *
+ * Called during database initialization as part of a one-time "upgrade".
+ */
+void
+legacy_load_clients_from_recdir(int *num_records)
+{
+ int fd;
+ DIR *v4recovery;
+ struct dirent *entry;
+ char recdirname[PATH_MAX];
+ char buf[NFS4_OPAQUE_LIMIT];
+ struct stat st;
+ char *nl;
+
+ fd = open(NFSD_RECDIR_FILE, O_RDONLY);
+ if (fd < 0) {
+ xlog(D_GENERAL, "Unable to open %s: %m", NFSD_RECDIR_FILE);
+ return;
+ }
+ if (read(fd, recdirname, PATH_MAX) < 0) {
+ xlog(D_GENERAL, "Unable to read from %s: %m", NFSD_RECDIR_FILE);
+ return;
+ }
+ close(fd);
+ /* the output from the proc file isn't null-terminated */
+ nl = strchr(recdirname, '\n');
+ if (!nl)
+ return;
+ *nl = '\0';
+ if (stat(recdirname, &st) < 0) {
+ xlog(D_GENERAL, "Unable to stat %s: %d", recdirname, errno);
+ return;
+ }
+ if (!S_ISDIR(st.st_mode)) {
+ xlog(D_GENERAL, "%s is not a directory: mode=0%o", recdirname
+ , st.st_mode);
+ return;
+ }
+ v4recovery = opendir(recdirname);
+ if (!v4recovery)
+ return;
+ while ((entry = readdir(v4recovery))) {
+ int ret;
+
+ /* skip "." and ".." */
+ if (entry->d_name[0] == '.') {
+ switch (entry->d_name[1]) {
+ case '\0':
+ continue;
+ case '.':
+ if (entry->d_name[2] == '\0')
+ continue;
+ }
+ }
+ /* prefix legacy records with the string "hash:" */
+ ret = snprintf(buf, sizeof(buf), "hash:%s", entry->d_name);
+ /* if there's a problem, then skip this entry */
+ if (ret < 0 || (size_t)ret >= sizeof(buf)) {
+ xlog(L_WARNING, "%s: unable to build client string for %s!",
+ __func__, entry->d_name);
+ continue;
+ }
+ /* legacy client records need to include the null terminator */
+ ret = sqlite_insert_client((unsigned char *)buf, strlen(buf) + 1);
+ if (ret)
+ xlog(L_WARNING, "%s: unable to insert %s: %d", __func__,
+ entry->d_name, ret);
+ else
+ (*num_records)++;
+ }
+ closedir(v4recovery);
+}
+
+/*
+ * Cleans out the v4recovery directory.
+ *
+ * Called upon receipt of the first "GraceDone" upcall only.
+ */
+void
+legacy_clear_recdir(void)
+{
+ int fd;
+ DIR *v4recovery;
+ struct dirent *entry;
+ char recdirname[PATH_MAX];
+ char dirname[PATH_MAX];
+ struct stat st;
+ char *nl;
+
+ fd = open(NFSD_RECDIR_FILE, O_RDONLY);
+ if (fd < 0) {
+ xlog(D_GENERAL, "Unable to open %s: %m", NFSD_RECDIR_FILE);
+ return;
+ }
+ if (read(fd, recdirname, PATH_MAX) < 0) {
+ xlog(D_GENERAL, "Unable to read from %s: %m", NFSD_RECDIR_FILE);
+ return;
+ }
+ close(fd);
+ /* the output from the proc file isn't null-terminated */
+ nl = strchr(recdirname, '\n');
+ if (!nl)
+ return;
+ *nl = '\0';
+ if (stat(recdirname, &st) < 0) {
+ xlog(D_GENERAL, "Unable to stat %s: %d", recdirname, errno);
+ return;
+ }
+ if (!S_ISDIR(st.st_mode)) {
+ xlog(D_GENERAL, "%s is not a directory: mode=0%o", recdirname
+ , st.st_mode);
+ return;
+ }
+ v4recovery = opendir(recdirname);
+ if (!v4recovery)
+ return;
+ while ((entry = readdir(v4recovery))) {
+ int len;
+
+ /* skip "." and ".." */
+ if (entry->d_name[0] == '.') {
+ switch (entry->d_name[1]) {
+ case '\0':
+ continue;
+ case '.':
+ if (entry->d_name[2] == '\0')
+ continue;
+ }
+ }
+ len = snprintf(dirname, sizeof(dirname), "%s/%s", recdirname,
+ entry->d_name);
+ /* if there's a problem, then skip this entry */
+ if (len < 0 || (size_t)len >= sizeof(dirname)) {
+ xlog(L_WARNING, "%s: unable to build filename for %s!",
+ __func__, entry->d_name);
+ continue;
+ }
+ len = rmdir(dirname);
+ if (len)
+ xlog(L_WARNING, "%s: unable to rmdir %s: %d", __func__,
+ dirname, len);
+ }
+ closedir(v4recovery);
+}
diff --git a/utils/nfsdcld/legacy.h b/utils/nfsdcld/legacy.h
new file mode 100644
index 00000000..8988f6e8
--- /dev/null
+++ b/utils/nfsdcld/legacy.h
@@ -0,0 +1,24 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _LEGACY_H_
+#define _LEGACY_H_
+
+void legacy_load_clients_from_recdir(int *);
+void legacy_clear_recdir(void);
+
+#endif /* _LEGACY_H_ */
diff --git a/utils/nfsdcld/nfsdcld.c b/utils/nfsdcld/nfsdcld.c
new file mode 100644
index 00000000..2ad10019
--- /dev/null
+++ b/utils/nfsdcld/nfsdcld.c
@@ -0,0 +1,866 @@
+/*
+ * nfsdcld.c -- NFSv4 client name tracking daemon
+ *
+ * Copyright (C) 2011 Red Hat, Jeff Layton <jlayton@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <errno.h>
+#include <event.h>
+#include <stdbool.h>
+#include <getopt.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/inotify.h>
+#ifdef HAVE_SYS_CAPABILITY_H
+#include <sys/prctl.h>
+#include <sys/capability.h>
+#endif
+
+#include "xlog.h"
+#include "nfslib.h"
+#include "cld.h"
+#include "cld-internal.h"
+#include "sqlite.h"
+#include "../mount/version.h"
+#include "conffile.h"
+#include "legacy.h"
+
+#ifndef DEFAULT_PIPEFS_DIR
+#define DEFAULT_PIPEFS_DIR NFS_STATEDIR "/rpc_pipefs"
+#endif
+
+#define DEFAULT_CLD_PATH "/nfsd/cld"
+
+#ifndef CLD_DEFAULT_STORAGEDIR
+#define CLD_DEFAULT_STORAGEDIR NFS_STATEDIR "/nfsdcld"
+#endif
+
+#define NFSD_END_GRACE_FILE "/proc/fs/nfsd/v4_end_grace"
+
+/* private data structures */
+
+/* global variables */
+static char pipefs_dir[PATH_MAX] = DEFAULT_PIPEFS_DIR;
+static char pipepath[PATH_MAX];
+static int inotify_fd = -1;
+static struct event pipedir_event;
+static bool old_kernel = false;
+
+static struct option longopts[] =
+{
+ { "help", 0, NULL, 'h' },
+ { "foreground", 0, NULL, 'F' },
+ { "debug", 0, NULL, 'd' },
+ { "pipefsdir", 1, NULL, 'p' },
+ { "storagedir", 1, NULL, 's' },
+ { NULL, 0, 0, 0 },
+};
+
+/* forward declarations */
+static void cldcb(int UNUSED(fd), short which, void *data);
+
+static void
+usage(char *progname)
+{
+ printf("%s [ -hFd ] [ -p pipefsdir ] [ -s storagedir ]\n", progname);
+}
+
+static int
+cld_set_caps(void)
+{
+ int ret = 0;
+#ifdef HAVE_SYS_CAPABILITY_H
+ unsigned long i;
+ cap_t caps;
+
+ if (getuid() != 0) {
+ xlog(L_ERROR, "Not running as root. Daemon won't be able to "
+ "open the pipe after dropping capabilities!");
+ return -EINVAL;
+ }
+
+ /* prune the bounding set to nothing */
+ for (i = 0; prctl(PR_CAPBSET_READ, i, 0, 0, 0) >= 0 ; ++i) {
+ ret = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
+ if (ret) {
+ xlog(L_ERROR, "Unable to prune capability %lu from "
+ "bounding set: %m", i);
+ return -errno;
+ }
+ }
+
+ /* get a blank capset */
+ caps = cap_init();
+ if (caps == NULL) {
+ xlog(L_ERROR, "Unable to get blank capability set: %m");
+ return -errno;
+ }
+
+ /* reset the process capabilities */
+ if (cap_set_proc(caps) != 0) {
+ xlog(L_ERROR, "Unable to set process capabilities: %m");
+ ret = -errno;
+ }
+ cap_free(caps);
+#endif
+ return ret;
+}
+
+#define INOTIFY_EVENT_MAX (sizeof(struct inotify_event) + NAME_MAX)
+
+static int
+cld_pipe_open(struct cld_client *clnt)
+{
+ int fd;
+
+ xlog(D_GENERAL, "%s: opening upcall pipe %s", __func__, pipepath);
+ fd = open(pipepath, O_RDWR, 0);
+ if (fd < 0) {
+ xlog(D_GENERAL, "%s: open of %s failed: %m", __func__, pipepath);
+ return -errno;
+ }
+
+ if (event_initialized(&clnt->cl_event))
+ event_del(&clnt->cl_event);
+ if (clnt->cl_fd >= 0)
+ close(clnt->cl_fd);
+
+ clnt->cl_fd = fd;
+ event_set(&clnt->cl_event, clnt->cl_fd, EV_READ, cldcb, clnt);
+ /* event_add is done by the caller */
+ return 0;
+}
+
+static void
+cld_inotify_cb(int UNUSED(fd), short which, void *data)
+{
+ int ret;
+ size_t elen;
+ ssize_t rret;
+ char evbuf[INOTIFY_EVENT_MAX];
+ char *dirc = NULL, *pname;
+ struct inotify_event *event = (struct inotify_event *)evbuf;
+ struct cld_client *clnt = data;
+
+ if (which != EV_READ)
+ return;
+
+ xlog(D_GENERAL, "%s: called for EV_READ", __func__);
+
+ dirc = strndup(pipepath, PATH_MAX);
+ if (!dirc) {
+ xlog(L_ERROR, "%s: unable to allocate memory", __func__);
+ goto out;
+ }
+
+ rret = read(inotify_fd, evbuf, INOTIFY_EVENT_MAX);
+ if (rret < 0) {
+ xlog(L_ERROR, "%s: read from inotify fd failed: %m", __func__);
+ goto out;
+ }
+
+ /* check to see if we have a filename in the evbuf */
+ if (!event->len) {
+ xlog(D_GENERAL, "%s: no filename in inotify event", __func__);
+ goto out;
+ }
+
+ pname = basename(dirc);
+ elen = strnlen(event->name, event->len);
+
+ /* does the filename match our pipe? */
+ if (strlen(pname) != elen || memcmp(pname, event->name, elen)) {
+ xlog(D_GENERAL, "%s: wrong filename (%s)", __func__,
+ event->name);
+ goto out;
+ }
+
+ ret = cld_pipe_open(clnt);
+ switch (ret) {
+ case 0:
+ /* readd the event for the cl_event pipe */
+ event_add(&clnt->cl_event, NULL);
+ break;
+ case -ENOENT:
+ /* pipe must have disappeared, wait for it to come back */
+ goto out;
+ default:
+ /* anything else is fatal */
+ xlog(L_FATAL, "%s: unable to open new pipe (%d). Aborting.",
+ __func__, ret);
+ exit(ret);
+ }
+
+out:
+ event_add(&pipedir_event, NULL);
+ free(dirc);
+}
+
+static int
+cld_inotify_setup(void)
+{
+ int ret;
+ char *dirc, *dname;
+
+ dirc = strndup(pipepath, PATH_MAX);
+ if (!dirc) {
+ xlog_err("%s: unable to allocate memory", __func__);
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ dname = dirname(dirc);
+
+ inotify_fd = inotify_init();
+ if (inotify_fd < 0) {
+ xlog_err("%s: inotify_init failed: %m", __func__);
+ ret = -errno;
+ goto out_free;
+ }
+
+ ret = inotify_add_watch(inotify_fd, dname, IN_CREATE);
+ if (ret < 0) {
+ xlog_err("%s: inotify_add_watch failed: %m", __func__);
+ ret = -errno;
+ goto out_err;
+ }
+
+out_free:
+ free(dirc);
+ return 0;
+out_err:
+ close(inotify_fd);
+ goto out_free;
+}
+
+/*
+ * Set an inotify watch on the directory that should contain the pipe, and then
+ * try to open it. If it fails with anything but -ENOENT, return the error
+ * immediately.
+ *
+ * If it succeeds, then set up the pipe event handler. At that point, set up
+ * the inotify event handler and go ahead and return success.
+ */
+static int
+cld_pipe_init(struct cld_client *clnt)
+{
+ int ret;
+
+ xlog(D_GENERAL, "%s: init pipe handlers", __func__);
+
+ ret = cld_inotify_setup();
+ if (ret != 0)
+ goto out;
+
+ clnt->cl_fd = -1;
+ ret = cld_pipe_open(clnt);
+ switch (ret) {
+ case 0:
+ /* add the event and we're good to go */
+ event_add(&clnt->cl_event, NULL);
+ break;
+ case -ENOENT:
+ /* ignore this error -- cld_inotify_cb will handle it */
+ ret = 0;
+ break;
+ default:
+ /* anything else is fatal */
+ close(inotify_fd);
+ goto out;
+ }
+
+ /* set event for inotify read */
+ event_set(&pipedir_event, inotify_fd, EV_READ, cld_inotify_cb, clnt);
+ event_add(&pipedir_event, NULL);
+out:
+ return ret;
+}
+
+/*
+ * Older kernels will not tell nfsdcld when a grace period has started.
+ * Therefore we have to peek at the /proc/fs/nfsd/v4_end_grace file to
+ * see if nfsd is in grace. We have to do this for create and remove
+ * upcalls to ensure that the correct table is being updated - otherwise
+ * we could lose client records when the grace period is lifted.
+ */
+static int
+cld_check_grace_period(void)
+{
+ int fd, ret = 0;
+ char c;
+
+ if (!old_kernel)
+ return 0;
+ if (recovery_epoch != 0)
+ return 0;
+ fd = open(NFSD_END_GRACE_FILE, O_RDONLY);
+ if (fd < 0) {
+ xlog(L_WARNING, "Unable to open %s: %m",
+ NFSD_END_GRACE_FILE);
+ return 1;
+ }
+ if (read(fd, &c, 1) < 0) {
+ xlog(L_WARNING, "Unable to read from %s: %m",
+ NFSD_END_GRACE_FILE);
+ return 1;
+ }
+ close(fd);
+ if (c == 'N') {
+ xlog(L_WARNING, "nfsd is in grace but didn't send a gracestart upcall, "
+ "please update the kernel");
+ ret = sqlite_grace_start();
+ }
+ return ret;
+}
+
+#if UPCALL_VERSION >= 2
+static ssize_t cld_message_size(void *msg)
+{
+ struct cld_msg_hdr *hdr = (struct cld_msg_hdr *)msg;
+
+ switch (hdr->cm_vers) {
+ case 1:
+ return sizeof(struct cld_msg);
+ case 2:
+ return sizeof(struct cld_msg_v2);
+ default:
+ xlog(L_FATAL, "%s invalid upcall version %d", __func__,
+ hdr->cm_vers);
+ exit(-EINVAL);
+ }
+}
+#else
+static ssize_t cld_message_size(void *UNUSED(msg))
+{
+ return sizeof(struct cld_msg);
+}
+#endif
+
+static void
+cld_not_implemented(struct cld_client *clnt)
+{
+ int ret;
+ ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+ struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+ struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+ xlog(D_GENERAL, "%s: downcalling with not implemented error", __func__);
+
+ /* set up reply */
+ cmsg->cm_status = -EOPNOTSUPP;
+
+ bsize = cld_message_size(cmsg);
+ wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+ if (wsize != bsize)
+ xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+ __func__, wsize);
+
+ /* reopen pipe, just to be sure */
+ ret = cld_pipe_open(clnt);
+ if (ret) {
+ xlog(L_FATAL, "%s: unable to reopen pipe: %d", __func__, ret);
+ exit(ret);
+ }
+}
+
+static void
+cld_get_version(struct cld_client *clnt)
+{
+ int ret;
+ ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+ struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+ struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+ xlog(D_GENERAL, "%s: version = %u.", __func__, UPCALL_VERSION);
+
+ cmsg->cm_u.cm_version = UPCALL_VERSION;
+ cmsg->cm_status = 0;
+
+ bsize = cld_message_size(cmsg);
+ xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
+ wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+ if (wsize != bsize) {
+ xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+ __func__, wsize);
+ ret = cld_pipe_open(clnt);
+ if (ret) {
+ xlog(L_FATAL, "%s: unable to reopen pipe: %d",
+ __func__, ret);
+ exit(ret);
+ }
+ }
+}
+
+static void
+cld_create(struct cld_client *clnt)
+{
+ int ret;
+ ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+ struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+ struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+ ret = cld_check_grace_period();
+ if (ret)
+ goto reply;
+
+ xlog(D_GENERAL, "%s: create client record.", __func__);
+
+#if UPCALL_VERSION >= 2
+ if (cmsg->cm_vers >= 2)
+ ret = sqlite_insert_client_and_princhash(
+ cmsg->cm_u.cm_clntinfo.cc_name.cn_id,
+ cmsg->cm_u.cm_clntinfo.cc_name.cn_len,
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len);
+ else
+ ret = sqlite_insert_client(cmsg->cm_u.cm_name.cn_id,
+ cmsg->cm_u.cm_name.cn_len);
+#else
+ ret = sqlite_insert_client(cmsg->cm_u.cm_name.cn_id,
+ cmsg->cm_u.cm_name.cn_len);
+#endif
+
+reply:
+ cmsg->cm_status = ret ? -EREMOTEIO : ret;
+
+ bsize = cld_message_size(cmsg);
+ xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
+ wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+ if (wsize != bsize) {
+ xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+ __func__, wsize);
+ ret = cld_pipe_open(clnt);
+ if (ret) {
+ xlog(L_FATAL, "%s: unable to reopen pipe: %d",
+ __func__, ret);
+ exit(ret);
+ }
+ }
+}
+
+static void
+cld_remove(struct cld_client *clnt)
+{
+ int ret;
+ ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+ struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+ struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+ ret = cld_check_grace_period();
+ if (ret)
+ goto reply;
+
+ xlog(D_GENERAL, "%s: remove client record.", __func__);
+
+ ret = sqlite_remove_client(cmsg->cm_u.cm_name.cn_id,
+ cmsg->cm_u.cm_name.cn_len);
+
+reply:
+ cmsg->cm_status = ret ? -EREMOTEIO : ret;
+
+ bsize = cld_message_size(cmsg);
+ xlog(D_GENERAL, "%s: downcall with status %d", __func__,
+ cmsg->cm_status);
+ wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+ if (wsize != bsize) {
+ xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+ __func__, wsize);
+ ret = cld_pipe_open(clnt);
+ if (ret) {
+ xlog(L_FATAL, "%s: unable to reopen pipe: %d",
+ __func__, ret);
+ exit(ret);
+ }
+ }
+}
+
+static void
+cld_check(struct cld_client *clnt)
+{
+ int ret;
+ ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+ struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+ struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+ /*
+ * If we get a check upcall at all, it means we're talking to an old
+ * kernel. Furthermore, if we're not in grace it means this is the
+ * first client to do a reclaim. Log a message and use
+ * sqlite_grace_start() to advance the epoch numbers.
+ */
+ if (recovery_epoch == 0) {
+ xlog(D_GENERAL, "%s: received a check upcall, please update the kernel",
+ __func__);
+ ret = sqlite_grace_start();
+ if (ret)
+ goto reply;
+ }
+
+ xlog(D_GENERAL, "%s: check client record", __func__);
+
+ ret = sqlite_check_client(cmsg->cm_u.cm_name.cn_id,
+ cmsg->cm_u.cm_name.cn_len);
+
+reply:
+ /* set up reply */
+ cmsg->cm_status = ret ? -EACCES : ret;
+
+ bsize = cld_message_size(cmsg);
+ xlog(D_GENERAL, "%s: downcall with status %d", __func__,
+ cmsg->cm_status);
+ wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+ if (wsize != bsize) {
+ xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+ __func__, wsize);
+ ret = cld_pipe_open(clnt);
+ if (ret) {
+ xlog(L_FATAL, "%s: unable to reopen pipe: %d",
+ __func__, ret);
+ exit(ret);
+ }
+ }
+}
+
+static void
+cld_gracedone(struct cld_client *clnt)
+{
+ int ret;
+ ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+ struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+ struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+ /*
+ * If we got a "gracedone" upcall while we're not in grace, then
+ * 1) we must be talking to an old kernel
+ * 2) no clients attempted to reclaim
+ * In that case, log a message and use sqlite_grace_start() to
+ * advance the epoch numbers, and then proceed as normal.
+ */
+ if (recovery_epoch == 0) {
+ xlog(D_GENERAL, "%s: received gracedone upcall "
+ "while not in grace, please update the kernel",
+ __func__);
+ ret = sqlite_grace_start();
+ if (ret)
+ goto reply;
+ }
+
+ xlog(D_GENERAL, "%s: grace done.", __func__);
+
+ ret = sqlite_grace_done();
+
+ if (first_time) {
+ if (num_cltrack_records > 0)
+ sqlite_delete_cltrack_records();
+ if (num_legacy_records > 0)
+ legacy_clear_recdir();
+ sqlite_first_time_done();
+ first_time = 0;
+ }
+
+reply:
+ /* set up reply: downcall with 0 status */
+ cmsg->cm_status = ret ? -EREMOTEIO : ret;
+
+ bsize = cld_message_size(cmsg);
+ xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
+ wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+ if (wsize != bsize) {
+ xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+ __func__, wsize);
+ ret = cld_pipe_open(clnt);
+ if (ret) {
+ xlog(L_FATAL, "%s: unable to reopen pipe: %d",
+ __func__, ret);
+ exit(ret);
+ }
+ }
+}
+
+static int
+gracestart_callback(struct cld_client *clnt) {
+ ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+ struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+ struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+ cmsg->cm_status = -EINPROGRESS;
+
+ bsize = cld_message_size(cmsg);
+ xlog(D_GENERAL, "Sending client %.*s",
+ cmsg->cm_u.cm_name.cn_len, cmsg->cm_u.cm_name.cn_id);
+ wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+ if (wsize != bsize)
+ return -EIO;
+ return 0;
+}
+
+static void
+cld_gracestart(struct cld_client *clnt)
+{
+ int ret;
+ ssize_t bsize, wsize;
+#if UPCALL_VERSION >= 2
+ struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+ struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+ xlog(D_GENERAL, "%s: updating grace epochs", __func__);
+
+ ret = sqlite_grace_start();
+ if (ret)
+ goto reply;
+
+ xlog(D_GENERAL, "%s: sending client records to the kernel", __func__);
+
+ ret = sqlite_iterate_recovery(&gracestart_callback, clnt);
+
+reply:
+ /* set up reply: downcall with 0 status */
+ cmsg->cm_status = ret ? -EREMOTEIO : ret;
+
+ bsize = cld_message_size(cmsg);
+ xlog(D_GENERAL, "Doing downcall with status %d", cmsg->cm_status);
+ wsize = atomicio((void *)write, clnt->cl_fd, cmsg, bsize);
+ if (wsize != bsize) {
+ xlog(L_ERROR, "%s: problem writing to cld pipe (%zd): %m",
+ __func__, wsize);
+ ret = cld_pipe_open(clnt);
+ if (ret) {
+ xlog(L_FATAL, "%s: unable to reopen pipe: %d",
+ __func__, ret);
+ exit(ret);
+ }
+ }
+}
+
+static void
+cldcb(int UNUSED(fd), short which, void *data)
+{
+ ssize_t len;
+ struct cld_client *clnt = data;
+#if UPCALL_VERSION >= 2
+ struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+ struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+ if (which != EV_READ)
+ goto out;
+
+ len = atomicio(read, clnt->cl_fd, cmsg, sizeof(*cmsg));
+ if (len <= 0) {
+ xlog(L_ERROR, "%s: pipe read failed: %m", __func__);
+ cld_pipe_open(clnt);
+ goto out;
+ }
+
+ if (cmsg->cm_vers > UPCALL_VERSION) {
+ xlog(L_ERROR, "%s: unsupported upcall version: %hu",
+ __func__, cmsg->cm_vers);
+ cld_pipe_open(clnt);
+ goto out;
+ }
+
+ switch(cmsg->cm_cmd) {
+ case Cld_Create:
+ cld_create(clnt);
+ break;
+ case Cld_Remove:
+ cld_remove(clnt);
+ break;
+ case Cld_Check:
+ cld_check(clnt);
+ break;
+ case Cld_GraceDone:
+ cld_gracedone(clnt);
+ break;
+ case Cld_GraceStart:
+ cld_gracestart(clnt);
+ break;
+ case Cld_GetVersion:
+ cld_get_version(clnt);
+ break;
+ default:
+ xlog(L_WARNING, "%s: command %u is not yet implemented",
+ __func__, cmsg->cm_cmd);
+ cld_not_implemented(clnt);
+ }
+out:
+ event_add(&clnt->cl_event, NULL);
+}
+
+int
+main(int argc, char **argv)
+{
+ int arg;
+ int rc = 0;
+ bool foreground = false;
+ char *progname;
+ char *storagedir = CLD_DEFAULT_STORAGEDIR;
+ struct cld_client clnt;
+ char *s;
+ first_time = 0;
+ num_cltrack_records = 0;
+ num_legacy_records = 0;
+
+ memset(&clnt, 0, sizeof(clnt));
+
+ progname = strdup(basename(argv[0]));
+ if (!progname) {
+ fprintf(stderr, "%s: unable to allocate memory.\n", argv[0]);
+ return 1;
+ }
+
+ event_init();
+ xlog_syslog(0);
+ xlog_stderr(1);
+
+ conf_init_file(NFS_CONFFILE);
+ s = conf_get_str("general", "pipefs-directory");
+ if (s)
+ strlcpy(pipefs_dir, s, sizeof(pipefs_dir));
+ s = conf_get_str("nfsdcld", "storagedir");
+ if (s)
+ storagedir = s;
+ rc = conf_get_num("nfsdcld", "debug", 0);
+ if (rc > 0)
+ xlog_config(D_ALL, 1);
+
+ /* process command-line options */
+ while ((arg = getopt_long(argc, argv, "hdFp:s:", longopts,
+ NULL)) != EOF) {
+ switch (arg) {
+ case 'd':
+ xlog_config(D_ALL, 1);
+ break;
+ case 'F':
+ foreground = true;
+ break;
+ case 'p':
+ strlcpy(pipefs_dir, optarg, sizeof(pipefs_dir));
+ break;
+ case 's':
+ storagedir = optarg;
+ break;
+ default:
+ usage(progname);
+ return 0;
+ }
+ }
+
+ strlcpy(pipepath, pipefs_dir, sizeof(pipepath));
+ strlcat(pipepath, DEFAULT_CLD_PATH, sizeof(pipepath));
+
+ xlog_open(progname);
+ if (!foreground) {
+ xlog_syslog(1);
+ xlog_stderr(0);
+ rc = daemon(0, 0);
+ if (rc) {
+ xlog(L_ERROR, "Unable to daemonize: %m");
+ goto out;
+ }
+ }
+
+ /* drop all capabilities */
+ rc = cld_set_caps();
+ if (rc)
+ goto out;
+
+ /*
+ * now see if the storagedir is writable by root w/o CAP_DAC_OVERRIDE.
+ * If it isn't then give the user a warning but proceed as if
+ * everything is OK. If the DB has already been created, then
+ * everything might still work. If it doesn't exist at all, then
+ * assume that the maindb init will be able to create it. Fail on
+ * anything else.
+ */
+ if (access(storagedir, W_OK) == -1) {
+ switch (errno) {
+ case EACCES:
+ xlog(L_WARNING, "Storage directory %s is not writable. "
+ "Should be owned by root and writable "
+ "by owner!", storagedir);
+ break;
+ case ENOENT:
+ /* ignore and assume that we can create dir as root */
+ break;
+ default:
+ xlog(L_ERROR, "Unexpected error when checking access "
+ "on %s: %m", storagedir);
+ rc = -errno;
+ goto out;
+ }
+ }
+
+ if (linux_version_code() < MAKE_VERSION(4, 20, 0))
+ old_kernel = true;
+
+ /* set up storage db */
+ rc = sqlite_prepare_dbh(storagedir);
+ if (rc) {
+ xlog(L_ERROR, "Failed to open main database: %d", rc);
+ goto out;
+ }
+
+ /* set up event handler */
+ rc = cld_pipe_init(&clnt);
+ if (rc)
+ goto out;
+
+ xlog(D_GENERAL, "%s: Starting event dispatch handler.", __func__);
+ rc = event_dispatch();
+ if (rc < 0)
+ xlog(L_ERROR, "%s: event_dispatch failed: %m", __func__);
+
+ close(clnt.cl_fd);
+ close(inotify_fd);
+out:
+ free(progname);
+ return rc;
+}
diff --git a/utils/nfsdcld/nfsdcld.man b/utils/nfsdcld/nfsdcld.man
new file mode 100644
index 00000000..4c2b1e80
--- /dev/null
+++ b/utils/nfsdcld/nfsdcld.man
@@ -0,0 +1,221 @@
+.\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.13)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. \*(C+ will
+.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.ie \nF \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. nr % 0
+. rr F
+.\}
+.el \{\
+. de IX
+..
+.\}
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "NFSDCLD 8"
+.TH NFSDCLD 8 "2011-12-21" "" ""
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+nfsdcld \- NFSv4 Client Tracking Daemon
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+nfsdcld [\-d] [\-F] [\-p path] [\-s stable storage dir]
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+nfsdcld is the NFSv4 client tracking daemon. It is not necessary to run
+this daemon on machines that are not acting as NFSv4 servers.
+.PP
+When a network partition is combined with a server reboot, there are
+edge conditions that can cause the server to grant lock reclaims when
+other clients have taken conflicting locks in the interim. A more detailed
+explanation of this issue is described in \s-1RFC\s0 3530, section 8.6.3.
+.PP
+In order to prevent these problems, the server must track a small amount
+of per-client information on stable storage. This daemon provides the
+userspace piece of that functionality.
+.SH "OPTIONS"
+.IX Header "OPTIONS"
+.IP "\fB\-d\fR, \fB\-\-debug\fR" 4
+.IX Item "-d, --debug"
+Enable debug level logging.
+.IP "\fB\-F\fR, \fB\-\-foreground\fR" 4
+.IX Item "-F, --foreground"
+Runs the daemon in the foreground and prints all output to stderr
+.IP "\fB\-p\fR \fIpath\fR, \fB\-\-pipefsdir\fR=\fIpath\fR" 4
+.IX Item "-p path, --pipefsdir=path"
+Location of the rpc_pipefs filesystem. The default value is
+\&\fI/var/lib/nfs/rpc_pipefs\fR.
+.IP "\fB\-s\fR \fIstorage_dir\fR, \fB\-\-storagedir\fR=\fIstorage_dir\fR" 4
+.IX Item "-s storagedir, --storagedir=storage_dir"
+Directory where stable storage information should be kept. The default
+value is \fI/var/lib/nfs/nfsdcld\fR.
+.SH "CONFIGURATION FILE"
+.IX Header "CONFIGURATION FILE"
+The following values are recognized in the \fB[nfsdcld]\fR section
+of the \fI/etc/nfs.conf\fR configuration file:
+.IP "\fBstoragedir\fR" 4
+.IX Item "storagedir"
+Equivalent to \fB\-s\fR/\fB\-\-storagedir\fR.
+.IP "\fBdebug\fR" 4
+.IX Item "debug"
+Setting "debug = 1" is equivalent to \fB\-d\fR/\fB\-\-debug\fR.
+.LP
+In addition, the following value is recognized from the \fB[general]\fR section:
+.IP "\fBpipefs\-directory\fR" 4
+.IX Item "pipefs-directory"
+Equivalent to \fB\-p\fR/\fB\-\-pipefsdir\fR.
+.SH "NOTES"
+.IX Header "NOTES"
+The Linux kernel NFSv4 server has historically tracked this information
+on stable storage by manipulating information on the filesystem
+directly, in the directory to which \fI/proc/fs/nfsd/nfsv4recoverydir\fR
+points.
+.PP
+This changed with the original introduction of \fBnfsdcld\fR upcall in kernel version 3.4,
+which was later deprecated in favor of the \fBnfsdcltrack\fR(8) usermodehelper
+program, support for which was added in kernel version 3.8. However, since the
+usermodehelper upcall does not work in containers, support for a new version of
+the \fBnfsdcld\fR upcall was added in kernel version 5.2.
+.PP
+This daemon requires a kernel that supports the \fBnfsdcld\fR upcall. On older kernels, if
+the legacy client name tracking code was in use, then the kernel would not create the
+pipe that \fBnfsdcld\fR uses to talk to the kernel. On newer kernels, nfsd attempts to
+initialize client tracking in the following order: First, the \fBnfsdcld\fR upcall. Second,
+the \fBnfsdcltrack\fR usermodehelper upcall. Finally, the legacy client tracking.
+.PP
+This daemon should be run as root, as the pipe that it uses to communicate
+with the kernel is only accessable by root. The daemon however does drop all
+superuser capabilities after starting. Because of this, the \fIstoragedir\fR
+should be owned by root, and be readable and writable by owner.
+.PP
+The daemon now supports different upcall versions to allow the kernel to pass additional
+data to be stored in the on-disk database. The kernel will query the supported upcall
+version from \fBnfsdcld\fR during client tracking initialization. A restart of \fBnfsd\fR is
+not necessary after upgrading \fBnfsdcld\fR, however \fBnfsd\fR will not use a later upcall
+version until restart. A restart of \fBnfsd is necessary\fR after downgrading \fBnfsdcld\fR,
+to ensure that \fBnfsd\fR does not use an upcall version that \fBnfsdcld\fR does not support.
+Additionally, a downgrade of \fBnfsdcld\fR requires the schema of the on-disk database to
+be downgraded as well. That can be accomplished using the \fBclddb-tool\fR(8) utility.
+.SH FILES
+.TP
+.B /var/lib/nfs/nfsdcld/main.sqlite
+.SH SEE ALSO
+.BR nfsdcltrack "(8), " clddb-tool (8)
+.SH "AUTHORS"
+.IX Header "AUTHORS"
+The nfsdcld daemon was developed by Jeff Layton <jlayton@redhat.com>
+with modifications from Scott Mayhew <smayhew@redhat.com>.
diff --git a/utils/nfsdcld/sqlite.c b/utils/nfsdcld/sqlite.c
new file mode 100644
index 00000000..6666c867
--- /dev/null
+++ b/utils/nfsdcld/sqlite.c
@@ -0,0 +1,1406 @@
+/*
+ * Copyright (C) 2011 Red Hat, Jeff Layton <jlayton@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+/*
+ * Explanation:
+ *
+ * This file contains the code to manage the sqlite backend database for the
+ * nfsdcld client tracking daemon.
+ *
+ * The main database is called main.sqlite and contains the following tables:
+ *
+ * parameters: simple key/value pairs for storing database info
+ *
+ * grace: a "current" column containing an INTEGER representing the current
+ * epoch (where should new values be stored) and a "recovery" column
+ * containing an INTEGER representing the recovery epoch (from what
+ * epoch are we allowed to recover). A recovery epoch of 0 means
+ * normal operation (grace period not in force). Note: sqlite stores
+ * integers as signed values, so these must be cast to a uint64_t when
+ * retrieving them from the database and back to an int64_t when storing
+ * them in the database.
+ *
+ * rec-CCCCCCCCCCCCCCCC (where C is the hex representation of the epoch value):
+ * an "id" column containing a BLOB with the long-form clientid
+ * as sent by the client, and a "princhash" column containing a BLOB
+ * with the sha256 hash of the kerberos principal (if available).
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <dirent.h>
+#include <errno.h>
+#include <event.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
+#include <sqlite3.h>
+#include <linux/limits.h>
+#include <inttypes.h>
+
+#include "xlog.h"
+#include "sqlite.h"
+#include "cld.h"
+#include "cld-internal.h"
+#include "conffile.h"
+#include "legacy.h"
+#include "nfslib.h"
+
+#define CLD_SQLITE_LATEST_SCHEMA_VERSION 4
+#define CLTRACK_DEFAULT_STORAGEDIR NFS_STATEDIR "/nfsdcltrack"
+
+/* in milliseconds */
+#define CLD_SQLITE_BUSY_TIMEOUT 10000
+
+/* private data structures */
+
+/* global variables */
+static char *cltrack_storagedir = CLTRACK_DEFAULT_STORAGEDIR;
+
+/* reusable pathname and sql command buffer */
+static char buf[PATH_MAX];
+
+/* global database handle */
+static sqlite3 *dbh;
+
+/* forward declarations */
+
+/* make a directory, ignoring EEXIST errors unless it's not a directory */
+static int
+mkdir_if_not_exist(const char *dirname)
+{
+ int ret;
+ struct stat statbuf;
+
+ ret = mkdir(dirname, S_IRWXU);
+ if (ret && errno != EEXIST)
+ return -errno;
+
+ ret = stat(dirname, &statbuf);
+ if (ret)
+ return -errno;
+
+ if (!S_ISDIR(statbuf.st_mode))
+ ret = -ENOTDIR;
+
+ return ret;
+}
+
+static int
+sqlite_query_schema_version(void)
+{
+ int ret;
+ sqlite3_stmt *stmt = NULL;
+
+ /* prepare select query */
+ ret = sqlite3_prepare_v2(dbh,
+ "SELECT value FROM parameters WHERE key == \"version\";",
+ -1, &stmt, NULL);
+ if (ret != SQLITE_OK) {
+ xlog(D_GENERAL, "Unable to prepare select statement: %s",
+ sqlite3_errmsg(dbh));
+ ret = 0;
+ goto out;
+ }
+
+ /* query schema version */
+ ret = sqlite3_step(stmt);
+ if (ret != SQLITE_ROW) {
+ xlog(D_GENERAL, "Select statement execution failed: %s",
+ sqlite3_errmsg(dbh));
+ ret = 0;
+ goto out;
+ }
+
+ ret = sqlite3_column_int(stmt, 0);
+out:
+ sqlite3_finalize(stmt);
+ return ret;
+}
+
+static int
+sqlite_query_first_time(int *first_time)
+{
+ int ret;
+ sqlite3_stmt *stmt = NULL;
+
+ /* prepare select query */
+ ret = sqlite3_prepare_v2(dbh,
+ "SELECT value FROM parameters WHERE key == \"first_time\";",
+ -1, &stmt, NULL);
+ if (ret != SQLITE_OK) {
+ xlog(D_GENERAL, "Unable to prepare select statement: %s",
+ sqlite3_errmsg(dbh));
+ goto out;
+ }
+
+ /* query first_time */
+ ret = sqlite3_step(stmt);
+ if (ret != SQLITE_ROW) {
+ xlog(D_GENERAL, "Select statement execution failed: %s",
+ sqlite3_errmsg(dbh));
+ goto out;
+ }
+
+ *first_time = sqlite3_column_int(stmt, 0);
+ ret = 0;
+out:
+ sqlite3_finalize(stmt);
+ return ret;
+}
+
+static int
+sqlite_add_princ_col_cb(void *UNUSED(arg), int ncols, char **cols,
+ char **UNUSED(colnames))
+{
+ int ret;
+ char *err;
+
+ if (ncols > 1)
+ return -EINVAL;
+ ret = snprintf(buf, sizeof(buf), "ALTER TABLE \"%s\" "
+ "ADD COLUMN princhash BLOB;", cols[0]);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ return -EINVAL;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ return -EINVAL;
+ }
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to add princhash column to table %s: %s",
+ cols[0], err);
+ goto out;
+ }
+ xlog(D_GENERAL, "Added princhash column to table %s", cols[0]);
+out:
+ sqlite3_free(err);
+ return ret;
+}
+
+static int
+sqlite_maindb_update_v3_to_v4(void)
+{
+ int ret;
+ char *err;
+
+ ret = sqlite3_exec(dbh, "SELECT name FROM sqlite_master "
+ "WHERE type=\"table\" AND name LIKE \"%rec-%\";",
+ sqlite_add_princ_col_cb, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: Failed to update tables!: %s", __func__, err);
+ }
+ sqlite3_free(err);
+ return ret;
+}
+
+static int
+sqlite_maindb_update_v1v2_to_v4(void)
+{
+ int ret;
+ char *err;
+
+ /* create grace table */
+ ret = sqlite3_exec(dbh, "CREATE TABLE grace "
+ "(current INTEGER , recovery INTEGER);",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to create grace table: %s", err);
+ goto out;
+ }
+
+ /* insert initial epochs into grace table */
+ ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO grace "
+ "values (1, 0);",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to set initial epochs: %s", err);
+ goto out;
+ }
+
+ /* create recovery table for current epoch */
+ ret = sqlite3_exec(dbh, "CREATE TABLE \"rec-0000000000000001\" "
+ "(id BLOB PRIMARY KEY, princhash BLOB);",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to create recovery table "
+ "for current epoch: %s", err);
+ goto out;
+ }
+
+ /* copy records from old clients table */
+ ret = sqlite3_exec(dbh, "INSERT INTO \"rec-0000000000000001\" (id) "
+ "SELECT id FROM clients;",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to copy client records: %s", err);
+ goto out;
+ }
+
+ /* drop the old clients table */
+ ret = sqlite3_exec(dbh, "DROP TABLE clients;",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to drop old clients table: %s", err);
+ }
+out:
+ sqlite3_free(err);
+ return ret;
+}
+
+static int
+sqlite_maindb_update_schema(int oldversion)
+{
+ int ret, ret2;
+ char *err;
+
+ /* begin transaction */
+ ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+ &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to begin transaction: %s", err);
+ goto rollback;
+ }
+
+ /*
+ * Check schema version again. This time, under an exclusive
+ * transaction to guard against racing DB setup attempts
+ */
+ ret = sqlite_query_schema_version();
+ if (ret != oldversion) {
+ if (ret == CLD_SQLITE_LATEST_SCHEMA_VERSION)
+ /* Someone else raced in and set it up */
+ ret = 0;
+ else
+ /* Something went wrong -- fail! */
+ ret = -EINVAL;
+ goto rollback;
+ }
+
+ /* Still at old version -- do conversion */
+
+ switch (oldversion) {
+ case 3:
+ case 2:
+ ret = sqlite_maindb_update_v3_to_v4();
+ break;
+ case 1:
+ ret = sqlite_maindb_update_v1v2_to_v4();
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ if (ret != SQLITE_OK)
+ goto rollback;
+
+ ret = snprintf(buf, sizeof(buf), "UPDATE parameters SET value = %d "
+ "WHERE key = \"version\";",
+ CLD_SQLITE_LATEST_SCHEMA_VERSION);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ goto rollback;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ ret = -EINVAL;
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to update schema version: %s", err);
+ goto rollback;
+ }
+
+ ret = sqlite_query_first_time(&first_time);
+ if (ret != SQLITE_OK) {
+ /* insert first_time into parameters table */
+ ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO parameters "
+ "values (\"first_time\", \"1\");",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to insert into parameter table: %s", err);
+ goto rollback;
+ }
+ }
+
+ ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to commit transaction: %s", err);
+ goto rollback;
+ }
+out:
+ sqlite3_free(err);
+ return ret;
+rollback:
+ ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+ if (ret2 != SQLITE_OK)
+ xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+ goto out;
+}
+
+/*
+ * Start an exclusive transaction and recheck the DB schema version. If it's
+ * still zero (indicating a new database) then set it up. If that all works,
+ * then insert schema version into the parameters table and commit the
+ * transaction. On any error, rollback the transaction.
+ */
+static int
+sqlite_maindb_init_v4(void)
+{
+ int ret, ret2;
+ char *err = NULL;
+
+ /* Start a transaction */
+ ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+ &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to begin transaction: %s", err);
+ return ret;
+ }
+
+ /*
+ * Check schema version again. This time, under an exclusive
+ * transaction to guard against racing DB setup attempts
+ */
+ ret = sqlite_query_schema_version();
+ switch (ret) {
+ case 0:
+ /* Query failed again -- set up DB */
+ break;
+ case CLD_SQLITE_LATEST_SCHEMA_VERSION:
+ /* Someone else raced in and set it up */
+ ret = 0;
+ goto rollback;
+ default:
+ /* Something went wrong -- fail! */
+ ret = -EINVAL;
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, "CREATE TABLE parameters "
+ "(key TEXT PRIMARY KEY, value TEXT);",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to create parameter table: %s", err);
+ goto rollback;
+ }
+
+ /* create grace table */
+ ret = sqlite3_exec(dbh, "CREATE TABLE grace "
+ "(current INTEGER , recovery INTEGER);",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to create grace table: %s", err);
+ goto rollback;
+ }
+
+ /* insert initial epochs into grace table */
+ ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO grace "
+ "values (1, 0);",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to set initial epochs: %s", err);
+ goto rollback;
+ }
+
+ /* create recovery table for current epoch */
+ ret = sqlite3_exec(dbh, "CREATE TABLE \"rec-0000000000000001\" "
+ "(id BLOB PRIMARY KEY, princhash BLOB);",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to create recovery table "
+ "for current epoch: %s", err);
+ goto rollback;
+ }
+
+ /* insert version into parameters table */
+ ret = snprintf(buf, sizeof(buf), "INSERT OR FAIL INTO parameters "
+ "values (\"version\", \"%d\");",
+ CLD_SQLITE_LATEST_SCHEMA_VERSION);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ goto rollback;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ ret = -EINVAL;
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to insert into parameter table: %s", err);
+ goto rollback;
+ }
+
+ /* insert first_time into parameters table */
+ ret = sqlite3_exec(dbh, "INSERT OR FAIL INTO parameters "
+ "values (\"first_time\", \"1\");",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to insert into parameter table: %s", err);
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to commit transaction: %s", err);
+ goto rollback;
+ }
+out:
+ sqlite3_free(err);
+ return ret;
+
+rollback:
+ /* Attempt to rollback the transaction */
+ ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+ if (ret2 != SQLITE_OK)
+ xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+ goto out;
+}
+
+static int
+sqlite_startup_query_grace(void)
+{
+ int ret;
+ uint64_t tcur;
+ uint64_t trec;
+ sqlite3_stmt *stmt = NULL;
+
+ /* prepare select query */
+ ret = sqlite3_prepare_v2(dbh, "SELECT * FROM grace;", -1, &stmt, NULL);
+ if (ret != SQLITE_OK) {
+ xlog(D_GENERAL, "Unable to prepare select statement: %s",
+ sqlite3_errmsg(dbh));
+ goto out;
+ }
+
+ ret = sqlite3_step(stmt);
+ if (ret != SQLITE_ROW) {
+ xlog(D_GENERAL, "Select statement execution failed: %s",
+ sqlite3_errmsg(dbh));
+ goto out;
+ }
+
+ tcur = (uint64_t)sqlite3_column_int64(stmt, 0);
+ trec = (uint64_t)sqlite3_column_int64(stmt, 1);
+
+ current_epoch = tcur;
+ recovery_epoch = trec;
+ ret = 0;
+ xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64,
+ __func__, current_epoch, recovery_epoch);
+out:
+ sqlite3_finalize(stmt);
+ return ret;
+}
+
+/*
+ * Helper for renaming a recovery table to fix the padding.
+ */
+static int
+sqlite_fix_table_name(const char *name)
+{
+ int ret;
+ uint64_t val;
+ char *err;
+
+ if (sscanf(name, "rec-%" PRIx64, &val) != 1)
+ return -EINVAL;
+ ret = snprintf(buf, sizeof(buf), "ALTER TABLE \"%s\" "
+ "RENAME TO \"rec-%016" PRIx64 "\";",
+ name, val);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ return -EINVAL;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ return -EINVAL;
+ }
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to fix table for epoch %"PRIu64": %s",
+ val, err);
+ goto out;
+ }
+ xlog(D_GENERAL, "Renamed table %s to rec-%016" PRIx64, name, val);
+out:
+ sqlite3_free(err);
+ return ret;
+}
+
+/*
+ * Callback for the sqlite_exec statement in sqlite_check_table_names.
+ * If the epoch encoded in the table name matches either the current
+ * epoch or the recovery epoch, then try to fix the padding. Otherwise,
+ * we bail.
+ */
+static int
+sqlite_check_table_names_cb(void *UNUSED(arg), int ncols, char **cols,
+ char **UNUSED(colnames))
+{
+ int ret = SQLITE_OK;
+ uint64_t val;
+
+ if (ncols > 1)
+ return -EINVAL;
+ if (sscanf(cols[0], "rec-%" PRIx64, &val) != 1)
+ return -EINVAL;
+ if (val == current_epoch || val == recovery_epoch) {
+ xlog(D_GENERAL, "found invalid table name %s for %s epoch",
+ cols[0], val == current_epoch ? "current" : "recovery");
+ ret = sqlite_fix_table_name(cols[0]);
+ } else {
+ xlog(L_ERROR, "found invalid table name %s for unknown epoch %"
+ PRId64, cols[0], val);
+ return -EINVAL;
+ }
+ return ret;
+}
+
+/*
+ * Look for recovery table names where the epoch isn't zero-padded
+ */
+static int
+sqlite_check_table_names(void)
+{
+ int ret;
+ char *err;
+
+ ret = sqlite3_exec(dbh, "SELECT name FROM sqlite_master "
+ "WHERE type=\"table\" AND name LIKE \"%rec-%\" "
+ "AND length(name) < 20;",
+ sqlite_check_table_names_cb, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Table names check failed: %s", err);
+ }
+ sqlite3_free(err);
+ return ret;
+}
+
+/*
+ * Simple db health check. For now we're just making sure that the recovery
+ * table names are of the format "rec-CCCCCCCCCCCCCCCC" (where C is the hex
+ * representation of the epoch value) and that epoch value matches either
+ * the current epoch or the recovery epoch.
+ */
+static int
+sqlite_check_db_health(void)
+{
+ int ret, ret2;
+ char *err;
+
+ ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+ &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to begin transaction: %s", err);
+ goto rollback;
+ }
+
+ ret = sqlite_check_table_names();
+ if (ret != SQLITE_OK)
+ goto rollback;
+
+ ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to commit transaction: %s", err);
+ goto rollback;
+ }
+
+cleanup:
+ sqlite3_free(err);
+ xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+ return ret;
+rollback:
+ ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+ if (ret2 != SQLITE_OK)
+ xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+ goto cleanup;
+}
+
+static int
+sqlite_attach_db(const char *path)
+{
+ int ret;
+ char dbpath[PATH_MAX];
+ struct stat stb;
+ sqlite3_stmt *stmt = NULL;
+
+ ret = snprintf(dbpath, PATH_MAX - 1, "%s/main.sqlite", path);
+ if (ret < 0)
+ return ret;
+
+ dbpath[PATH_MAX - 1] = '\0';
+ ret = stat(dbpath, &stb);
+ if (ret < 0)
+ return ret;
+
+ xlog(D_GENERAL, "attaching %s", dbpath);
+ ret = sqlite3_prepare_v2(dbh, "ATTACH DATABASE ? AS attached;",
+ -1, &stmt, NULL);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: unable to prepare attach statement: %s",
+ __func__, sqlite3_errmsg(dbh));
+ return ret;
+ }
+
+ ret = sqlite3_bind_text(stmt, 1, dbpath, strlen(dbpath), SQLITE_STATIC);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: bind text failed: %s",
+ __func__, sqlite3_errmsg(dbh));
+ return ret;
+ }
+
+ ret = sqlite3_step(stmt);
+ if (ret == SQLITE_DONE)
+ ret = SQLITE_OK;
+ else
+ xlog(L_ERROR, "%s: unexpected return code from attach: %s",
+ __func__, sqlite3_errmsg(dbh));
+
+ sqlite3_finalize(stmt);
+ stmt = NULL;
+ return ret;
+}
+
+static int
+sqlite_detach_db(void)
+{
+ int ret;
+ char *err = NULL;
+
+ xlog(D_GENERAL, "detaching database");
+ ret = sqlite3_exec(dbh, "DETACH DATABASE attached;", NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to detach attached db: %s", err);
+ }
+
+ sqlite3_free(err);
+ return ret;
+}
+
+/*
+ * Copies client records from the nfsdcltrack database as part of a one-time
+ * "upgrade".
+ *
+ * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0).
+ * Returns the number of records copied via "num_rec".
+ */
+static int
+sqlite_copy_cltrack_records(int *num_rec)
+{
+ int ret, ret2;
+ char *s;
+ char *err = NULL;
+ sqlite3_stmt *stmt = NULL;
+
+ s = conf_get_str("nfsdcltrack", "storagedir");
+ if (s)
+ cltrack_storagedir = s;
+ ret = sqlite_attach_db(cltrack_storagedir);
+ if (ret)
+ goto out;
+ ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+ &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to begin transaction: %s", err);
+ goto rollback;
+ }
+ ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\";",
+ current_epoch);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ goto rollback;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ ret = -EINVAL;
+ goto rollback;
+ }
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to clear records from current epoch: %s", err);
+ goto rollback;
+ }
+ ret = snprintf(buf, sizeof(buf), "INSERT INTO \"rec-%016" PRIx64 "\" (id) "
+ "SELECT id FROM attached.clients;",
+ current_epoch);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ goto rollback;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ ret = -EINVAL;
+ goto rollback;
+ }
+ ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: insert statement prepare failed: %s",
+ __func__, sqlite3_errmsg(dbh));
+ goto rollback;
+ }
+ ret = sqlite3_step(stmt);
+ if (ret != SQLITE_DONE) {
+ xlog(L_ERROR, "%s: unexpected return code from insert: %s",
+ __func__, sqlite3_errmsg(dbh));
+ goto rollback;
+ }
+ *num_rec = sqlite3_changes(dbh);
+ ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to commit transaction: %s", err);
+ goto rollback;
+ }
+cleanup:
+ sqlite3_finalize(stmt);
+ sqlite3_free(err);
+ sqlite_detach_db();
+out:
+ xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+ return ret;
+rollback:
+ *num_rec = 0;
+ ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+ if (ret2 != SQLITE_OK)
+ xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+ goto cleanup;
+}
+
+/* Open the database and set up the database handle for it */
+int
+sqlite_prepare_dbh(const char *topdir)
+{
+ int ret;
+
+ /* Do nothing if the database handle is already set up */
+ if (dbh)
+ return 0;
+
+ ret = snprintf(buf, PATH_MAX - 1, "%s/main.sqlite", topdir);
+ if (ret < 0)
+ return ret;
+
+ buf[PATH_MAX - 1] = '\0';
+
+ /* open a new DB handle */
+ ret = sqlite3_open(buf, &dbh);
+ if (ret != SQLITE_OK) {
+ /* try to create the dir */
+ ret = mkdir_if_not_exist(topdir);
+ if (ret)
+ goto out_close;
+
+ /* retry open */
+ ret = sqlite3_open(buf, &dbh);
+ if (ret != SQLITE_OK)
+ goto out_close;
+ }
+
+ /* set busy timeout */
+ ret = sqlite3_busy_timeout(dbh, CLD_SQLITE_BUSY_TIMEOUT);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to set sqlite busy timeout: %s",
+ sqlite3_errmsg(dbh));
+ goto out_close;
+ }
+
+ ret = sqlite_query_schema_version();
+ switch (ret) {
+ case CLD_SQLITE_LATEST_SCHEMA_VERSION:
+ /* DB is already set up. Do nothing */
+ ret = 0;
+ break;
+ case 3:
+ /* Old DB -- update to new schema */
+ ret = sqlite_maindb_update_schema(3);
+ if (ret)
+ goto out_close;
+ break;
+ case 2:
+ /* Old DB -- update to new schema */
+ ret = sqlite_maindb_update_schema(2);
+ if (ret)
+ goto out_close;
+ break;
+
+ case 1:
+ /* Old DB -- update to new schema */
+ ret = sqlite_maindb_update_schema(1);
+ if (ret)
+ goto out_close;
+ break;
+ case 0:
+ /* Query failed -- try to set up new DB */
+ ret = sqlite_maindb_init_v4();
+ if (ret)
+ goto out_close;
+ break;
+ default:
+ /* Unknown DB version -- downgrade? Fail */
+ xlog(L_ERROR, "Unsupported database schema version! "
+ "Expected %d, got %d.",
+ CLD_SQLITE_LATEST_SCHEMA_VERSION, ret);
+ ret = -EINVAL;
+ goto out_close;
+ }
+
+ ret = sqlite_startup_query_grace();
+
+ ret = sqlite_query_first_time(&first_time);
+ if (ret)
+ goto out_close;
+
+ ret = sqlite_check_db_health();
+ if (ret) {
+ xlog(L_ERROR, "Database health check failed! "
+ "Database must be fixed manually.");
+ goto out_close;
+ }
+
+ /* one-time "upgrade" from older client tracking methods */
+ if (first_time) {
+ sqlite_copy_cltrack_records(&num_cltrack_records);
+ xlog(D_GENERAL, "%s: num_cltrack_records = %d\n",
+ __func__, num_cltrack_records);
+ legacy_load_clients_from_recdir(&num_legacy_records);
+ xlog(D_GENERAL, "%s: num_legacy_records = %d\n",
+ __func__, num_legacy_records);
+ if (num_cltrack_records > 0 && num_legacy_records > 0)
+ xlog(L_WARNING, "%s: first-time upgrade detected "
+ "both cltrack and legacy records!\n", __func__);
+ }
+
+ return ret;
+out_close:
+ sqlite3_close(dbh);
+ dbh = NULL;
+ return ret;
+}
+
+/*
+ * Create a client record
+ *
+ * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0)
+ */
+int
+sqlite_insert_client(const unsigned char *clname, const size_t namelen)
+{
+ int ret;
+ sqlite3_stmt *stmt = NULL;
+
+ ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" (id) "
+ "VALUES (?);", current_epoch);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ return ret;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ return -EINVAL;
+ }
+
+ ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: insert statement prepare failed: %s",
+ __func__, sqlite3_errmsg(dbh));
+ return ret;
+ }
+
+ ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
+ SQLITE_STATIC);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
+ sqlite3_errmsg(dbh));
+ goto out_err;
+ }
+
+ ret = sqlite3_step(stmt);
+ if (ret == SQLITE_DONE)
+ ret = SQLITE_OK;
+ else
+ xlog(L_ERROR, "%s: unexpected return code from insert: %s",
+ __func__, sqlite3_errmsg(dbh));
+
+out_err:
+ xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+ sqlite3_finalize(stmt);
+ return ret;
+}
+
+#if UPCALL_VERSION >= 2
+/*
+ * Create a client record including hash the kerberos principal
+ *
+ * Returns a non-zero sqlite error code, or SQLITE_OK (aka 0)
+ */
+int
+sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen,
+ const unsigned char *clprinchash, const size_t princhashlen)
+{
+ int ret;
+ sqlite3_stmt *stmt = NULL;
+
+ if (princhashlen > 0)
+ ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" "
+ "VALUES (?, ?);", current_epoch);
+ else
+ ret = snprintf(buf, sizeof(buf), "INSERT OR REPLACE INTO \"rec-%016" PRIx64 "\" (id) "
+ "VALUES (?);", current_epoch);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ return ret;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ return -EINVAL;
+ }
+
+ ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: insert statement prepare failed: %s",
+ __func__, sqlite3_errmsg(dbh));
+ return ret;
+ }
+
+ ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
+ SQLITE_STATIC);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
+ sqlite3_errmsg(dbh));
+ goto out_err;
+ }
+
+ if (princhashlen > 0) {
+ ret = sqlite3_bind_blob(stmt, 2, (const void *)clprinchash, princhashlen,
+ SQLITE_STATIC);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
+ sqlite3_errmsg(dbh));
+ goto out_err;
+ }
+ }
+
+ ret = sqlite3_step(stmt);
+ if (ret == SQLITE_DONE)
+ ret = SQLITE_OK;
+ else
+ xlog(L_ERROR, "%s: unexpected return code from insert: %s",
+ __func__, sqlite3_errmsg(dbh));
+
+out_err:
+ xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+ sqlite3_finalize(stmt);
+ return ret;
+}
+#else
+int
+sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen,
+ const unsigned char *clprinchash, const size_t princhashlen)
+{
+ return -EINVAL;
+}
+#endif
+
+/* Remove a client record */
+int
+sqlite_remove_client(const unsigned char *clname, const size_t namelen)
+{
+ int ret;
+ sqlite3_stmt *stmt = NULL;
+
+ ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\" "
+ "WHERE id==?;", current_epoch);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ return ret;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ return -EINVAL;
+ }
+
+ ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: statement prepare failed: %s",
+ __func__, sqlite3_errmsg(dbh));
+ goto out_err;
+ }
+
+ ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
+ SQLITE_STATIC);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: bind blob failed: %s", __func__,
+ sqlite3_errmsg(dbh));
+ goto out_err;
+ }
+
+ ret = sqlite3_step(stmt);
+ if (ret == SQLITE_DONE)
+ ret = SQLITE_OK;
+ else
+ xlog(L_ERROR, "%s: unexpected return code from delete: %d",
+ __func__, ret);
+
+out_err:
+ xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+ sqlite3_finalize(stmt);
+ return ret;
+}
+
+/*
+ * Is the given clname in the clients table? If so, then update its timestamp
+ * and return success. If the record isn't present, or the update fails, then
+ * return an error.
+ */
+int
+sqlite_check_client(const unsigned char *clname, const size_t namelen)
+{
+ int ret;
+ sqlite3_stmt *stmt = NULL;
+
+ ret = snprintf(buf, sizeof(buf), "SELECT count(*) FROM \"rec-%016" PRIx64 "\" "
+ "WHERE id==?;", recovery_epoch);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ return ret;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ return -EINVAL;
+ }
+
+ ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: select statement prepare failed: %s",
+ __func__, sqlite3_errmsg(dbh));
+ return ret;
+ }
+
+ ret = sqlite3_bind_blob(stmt, 1, (const void *)clname, namelen,
+ SQLITE_STATIC);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: bind blob failed: %s",
+ __func__, sqlite3_errmsg(dbh));
+ goto out_err;
+ }
+
+ ret = sqlite3_step(stmt);
+ if (ret != SQLITE_ROW) {
+ xlog(L_ERROR, "%s: unexpected return code from select: %d",
+ __func__, ret);
+ goto out_err;
+ }
+
+ ret = sqlite3_column_int(stmt, 0);
+ xlog(D_GENERAL, "%s: select returned %d rows", __func__, ret);
+ if (ret != 1) {
+ ret = -EACCES;
+ goto out_err;
+ }
+
+ sqlite3_finalize(stmt);
+
+ /* Now insert the client into the table for the current epoch */
+ return sqlite_insert_client(clname, namelen);
+
+out_err:
+ xlog(D_GENERAL, "%s: returning %d", __func__, ret);
+ sqlite3_finalize(stmt);
+ return ret;
+}
+
+int
+sqlite_grace_start(void)
+{
+ int ret, ret2;
+ char *err;
+ uint64_t tcur = current_epoch;
+ uint64_t trec = recovery_epoch;
+
+ /* begin transaction */
+ ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+ &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to begin transaction: %s", err);
+ goto rollback;
+ }
+
+ if (trec == 0) {
+ /*
+ * A normal grace start - update the epoch values in the grace
+ * table and create a new table for the current reboot epoch.
+ */
+ trec = tcur;
+ tcur++;
+
+ ret = snprintf(buf, sizeof(buf), "UPDATE grace "
+ "SET current = %" PRId64 ", recovery = %" PRId64 ";",
+ (int64_t)tcur, (int64_t)trec);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ goto rollback;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)",
+ ret);
+ ret = -EINVAL;
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to update epochs: %s", err);
+ goto rollback;
+ }
+
+ ret = snprintf(buf, sizeof(buf), "CREATE TABLE \"rec-%016" PRIx64 "\" "
+ "(id BLOB PRIMARY KEY, princhash blob);",
+ tcur);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ goto rollback;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)",
+ ret);
+ ret = -EINVAL;
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to create table for current epoch: %s",
+ err);
+ goto rollback;
+ }
+ } else {
+ /* Server restarted while in grace - don't update the epoch
+ * values in the grace table, just clear out the records for
+ * the current reboot epoch.
+ */
+ ret = snprintf(buf, sizeof(buf), "DELETE FROM \"rec-%016" PRIx64 "\";",
+ tcur);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ goto rollback;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ ret = -EINVAL;
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to clear table for current epoch: %s",
+ err);
+ goto rollback;
+ }
+ }
+
+ ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to commit transaction: %s", err);
+ goto rollback;
+ }
+
+ current_epoch = tcur;
+ recovery_epoch = trec;
+ xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64,
+ __func__, current_epoch, recovery_epoch);
+
+out:
+ sqlite3_free(err);
+ return ret;
+rollback:
+ ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+ if (ret2 != SQLITE_OK)
+ xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+ goto out;
+}
+
+int
+sqlite_grace_done(void)
+{
+ int ret, ret2;
+ char *err;
+
+ /* begin transaction */
+ ret = sqlite3_exec(dbh, "BEGIN EXCLUSIVE TRANSACTION;", NULL, NULL,
+ &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to begin transaction: %s", err);
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, "UPDATE grace SET recovery = \"0\";",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to clear recovery epoch: %s", err);
+ goto rollback;
+ }
+
+ ret = snprintf(buf, sizeof(buf), "DROP TABLE \"rec-%016" PRIx64 "\";",
+ recovery_epoch);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ goto rollback;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ ret = -EINVAL;
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, (const char *)buf, NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to drop table for recovery epoch: %s",
+ err);
+ goto rollback;
+ }
+
+ ret = sqlite3_exec(dbh, "COMMIT TRANSACTION;", NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to commit transaction: %s", err);
+ goto rollback;
+ }
+
+ recovery_epoch = 0;
+ xlog(D_GENERAL, "%s: current_epoch=%"PRIu64" recovery_epoch=%"PRIu64,
+ __func__, current_epoch, recovery_epoch);
+
+out:
+ sqlite3_free(err);
+ return ret;
+rollback:
+ ret2 = sqlite3_exec(dbh, "ROLLBACK TRANSACTION;", NULL, NULL, &err);
+ if (ret2 != SQLITE_OK)
+ xlog(L_ERROR, "Unable to rollback transaction: %s", err);
+ goto out;
+}
+
+
+int
+sqlite_iterate_recovery(int (*cb)(struct cld_client *clnt), struct cld_client *clnt)
+{
+ int ret;
+ sqlite3_stmt *stmt = NULL;
+#if UPCALL_VERSION >= 2
+ struct cld_msg_v2 *cmsg = &clnt->cl_u.cl_msg_v2;
+#else
+ struct cld_msg *cmsg = &clnt->cl_u.cl_msg;
+#endif
+
+ if (recovery_epoch == 0) {
+ xlog(D_GENERAL, "%s: not in grace!", __func__);
+ return -EINVAL;
+ }
+
+ ret = snprintf(buf, sizeof(buf), "SELECT * FROM \"rec-%016" PRIx64 "\";",
+ recovery_epoch);
+ if (ret < 0) {
+ xlog(L_ERROR, "sprintf failed!");
+ return ret;
+ } else if ((size_t)ret >= sizeof(buf)) {
+ xlog(L_ERROR, "sprintf output too long! (%d chars)", ret);
+ return -EINVAL;
+ }
+
+ ret = sqlite3_prepare_v2(dbh, buf, -1, &stmt, NULL);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "%s: select statement prepare failed: %s",
+ __func__, sqlite3_errmsg(dbh));
+ return ret;
+ }
+
+ while ((ret = sqlite3_step(stmt)) == SQLITE_ROW) {
+ memset(&cmsg->cm_u, 0, sizeof(cmsg->cm_u));
+#if UPCALL_VERSION >= 2
+ memcpy(&cmsg->cm_u.cm_clntinfo.cc_name.cn_id,
+ sqlite3_column_blob(stmt, 0), NFS4_OPAQUE_LIMIT);
+ cmsg->cm_u.cm_clntinfo.cc_name.cn_len = sqlite3_column_bytes(stmt, 0);
+ if (sqlite3_column_bytes(stmt, 1) > 0) {
+ memcpy(&cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
+ sqlite3_column_blob(stmt, 1), SHA256_DIGEST_SIZE);
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = sqlite3_column_bytes(stmt, 1);
+ }
+#else
+ memcpy(&cmsg->cm_u.cm_name.cn_id, sqlite3_column_blob(stmt, 0),
+ NFS4_OPAQUE_LIMIT);
+ cmsg->cm_u.cm_name.cn_len = sqlite3_column_bytes(stmt, 0);
+#endif
+ cb(clnt);
+ }
+ if (ret == SQLITE_DONE)
+ ret = 0;
+ sqlite3_finalize(stmt);
+ return ret;
+}
+
+/*
+ * Cleans out the old nfsdcltrack database.
+ *
+ * Called upon receipt of the first "GraceDone" upcall only.
+ */
+int
+sqlite_delete_cltrack_records(void)
+{
+ int ret;
+ char *s;
+ char *err = NULL;
+
+ s = conf_get_str("nfsdcltrack", "storagedir");
+ if (s)
+ cltrack_storagedir = s;
+ ret = sqlite_attach_db(cltrack_storagedir);
+ if (ret)
+ goto out;
+ ret = sqlite3_exec(dbh, "DELETE FROM attached.clients;",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK) {
+ xlog(L_ERROR, "Unable to clear records from cltrack db: %s",
+ err);
+ }
+ sqlite_detach_db();
+out:
+ sqlite3_free(err);
+ return ret;
+}
+
+/*
+ * Sets first_time to 0 in the parameters table to ensure we only
+ * copy old client tracking records into the database one time.
+ *
+ * Called upon receipt of the first "GraceDone" upcall only.
+ */
+int
+sqlite_first_time_done(void)
+{
+ int ret;
+ char *err = NULL;
+
+ ret = sqlite3_exec(dbh, "UPDATE parameters SET value = \"0\" "
+ "WHERE key = \"first_time\";",
+ NULL, NULL, &err);
+ if (ret != SQLITE_OK)
+ xlog(L_ERROR, "Unable to clear first_time: %s", err);
+
+ sqlite3_free(err);
+ return ret;
+}
diff --git a/utils/nfsdcld/sqlite.h b/utils/nfsdcld/sqlite.h
new file mode 100644
index 00000000..0a26ad67
--- /dev/null
+++ b/utils/nfsdcld/sqlite.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2011 Red Hat, Jeff Layton <jlayton@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _SQLITE_H_
+#define _SQLITE_H_
+
+struct cld_client;
+
+int sqlite_prepare_dbh(const char *topdir);
+int sqlite_insert_client(const unsigned char *clname, const size_t namelen);
+int sqlite_insert_client_and_princhash(const unsigned char *clname, const size_t namelen,
+ const unsigned char *clprinchash, const size_t princhashlen);
+int sqlite_remove_client(const unsigned char *clname, const size_t namelen);
+int sqlite_check_client(const unsigned char *clname, const size_t namelen);
+int sqlite_grace_start(void);
+int sqlite_grace_done(void);
+int sqlite_iterate_recovery(int (*cb)(struct cld_client *clnt), struct cld_client *clnt);
+int sqlite_delete_cltrack_records(void);
+int sqlite_first_time_done(void);
+
+#endif /* _SQLITE_H */
diff --git a/utils/nfsidmap/nfsidmap.c b/utils/nfsidmap/nfsidmap.c
index d3967a3a..4d219ef5 100644
--- a/utils/nfsidmap/nfsidmap.c
+++ b/utils/nfsidmap/nfsidmap.c
@@ -18,7 +18,7 @@
#include "xcommon.h"
int verbose = 0;
-char *usage = "Usage: %s [-vh] [-c || [-u|-g|-r key] || -d || -l || [-t timeout] key desc]";
+#define USAGE "Usage: %s [-vh] [-c || [-u|-g|-r key] || -d || -l || [-t timeout] key desc]"
#define MAX_ID_LEN 11
#define IDMAP_NAMESZ 128
@@ -401,7 +401,7 @@ int main(int argc, char **argv)
break;
case 'h':
default:
- xlog_warn(usage, progname);
+ xlog_warn(USAGE, progname);
exit(opt == 'h' ? 0 : 1);
}
}
@@ -433,7 +433,7 @@ int main(int argc, char **argv)
xlog_stderr(verbose);
if ((argc - optind) != 2) {
xlog_warn("Bad arg count. Check /etc/request-key.conf");
- xlog_warn(usage, progname);
+ xlog_warn(USAGE, progname);
return EXIT_FAILURE;
}
@@ -451,7 +451,7 @@ int main(int argc, char **argv)
return EXIT_FAILURE;
}
if (verbose) {
- xlog_warn("key: 0x%lx type: %s value: %s timeout %ld",
+ xlog_warn("key: 0x%x type: %s value: %s timeout %d",
key, type, value, timeout);
}
diff --git a/utils/statd/rmtcall.c b/utils/statd/rmtcall.c
index c4f6364f..5b261480 100644
--- a/utils/statd/rmtcall.c
+++ b/utils/statd/rmtcall.c
@@ -247,7 +247,7 @@ process_reply(FD_SET_TYPE *rfds)
xlog_warn("%s: service %d not registered on localhost",
__func__, NL_MY_PROG(lp));
} else {
- xlog(D_GENERAL, "%s: Callback to %s (for %d) succeeded",
+ xlog(D_GENERAL, "%s: Callback to %s (for %s) succeeded",
__func__, NL_MY_NAME(lp), NL_MON_NAME(lp));
}
nlist_free(&notify, lp);
diff --git a/utils/statd/statd.c b/utils/statd/statd.c
index 14673800..8eef2ff2 100644
--- a/utils/statd/statd.c
+++ b/utils/statd/statd.c
@@ -136,7 +136,7 @@ static void log_modes(void)
strcat(buf, "TI-RPC ");
#endif
- xlog_warn(buf);
+ xlog_warn("%s", buf);
}
/*
diff --git a/utils/statd/svc_run.c b/utils/statd/svc_run.c
index d1dbd74a..e343c768 100644
--- a/utils/statd/svc_run.c
+++ b/utils/statd/svc_run.c
@@ -53,6 +53,7 @@
#include <errno.h>
#include <time.h>
+#include <inttypes.h>
#include "statd.h"
#include "notlist.h"
@@ -104,8 +105,8 @@ my_svc_run(int sockfd)
tv.tv_sec = NL_WHEN(notify) - now;
tv.tv_usec = 0;
- xlog(D_GENERAL, "Waiting for reply... (timeo %d)",
- tv.tv_sec);
+ xlog(D_GENERAL, "Waiting for reply... (timeo %jd)",
+ (intmax_t)tv.tv_sec);
selret = select(FD_SETSIZE, &readfds,
(void *) 0, (void *) 0, &tv);
} else {