# HG changeset patch # Parent 2ef463e657e6564c9792d499bbaaa09a9cc8a004 diff -r db78da0996b1 dist/android/android_config.in --- a/dist/android/android_config.in Mon Sep 09 11:09:35 2013 -0400 +++ b/dist/android/android_config.in Sat Apr 29 04:10:18 2017 -0700 @@ -123,6 +123,9 @@ /* Define to 1 if allocated filesystem blocks are not zeroed. */ /* #undef HAVE_FILESYSTEM_NOTZERO */ +/* Define to 1 if you have the `flock' function. */ +#undef HAVE_FLOCK + /* Define to 1 if you have the `fopen' function. */ #define HAVE_FOPEN 1 diff -r db78da0996b1 dist/config.hin --- a/dist/config.hin Mon Sep 09 11:09:35 2013 -0400 +++ b/dist/config.hin Sat Apr 29 04:10:18 2017 -0700 @@ -92,6 +92,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_DLFCN_H +/* Define to 1 if you have the `dl_iterate_phdr' function. */ +#undef HAVE_DL_ITERATE_PHDR + /* Define to 1 to use dtrace for performance monitoring. */ #undef HAVE_DTRACE @@ -125,6 +128,9 @@ /* Define to 1 if allocated filesystem blocks are not zeroed. */ #undef HAVE_FILESYSTEM_NOTZERO +/* Define to 1 if you have the `flock' function. */ +#undef HAVE_FLOCK + /* Define to 1 if you have the `fopen' function. */ #undef HAVE_FOPEN @@ -360,6 +366,9 @@ /* Define to 1 if you have the `pstat_getdynamic' function. */ #undef HAVE_PSTAT_GETDYNAMIC +/* Define to 1 if you have dl_iterate_phdr and use pthread-based mutexes. */ +#undef HAVE_PTHREADS_TIMESTAMP + /* Define to 1 if it is OK to initialize an already initialized pthread_cond_t. */ #undef HAVE_PTHREAD_COND_REINIT_OKAY diff -r db78da0996b1 dist/configure --- a/dist/configure Mon Sep 09 11:09:35 2013 -0400 +++ b/dist/configure Sat Apr 29 04:10:18 2017 -0700 @@ -23368,7 +23368,7 @@ # Check for system functions we optionally use. for ac_func in \ _fstati64 backtrace backtrace_symbols directio fchmod fclose\ - fcntl fdatasync fgetc fgets fopen fwrite getgid\ + fcntl fdatasync fgetc fgets flock fopen fwrite getgid\ getrusage getuid hstrerror mprotect pstat_getdynamic\ pthread_self pthread_yield random sched_yield select setgid setuid\ sigaction snprintf stat sysconf vsnprintf yield @@ -24668,6 +24668,59 @@ fi +# Check for dl_iterate_phdr; do the test explicitly instead of using +# AC_CHECK_FUNCS because isn't a standard include file. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dl_iterate_phdr" >&5 +$as_echo_n "checking for dl_iterate_phdr... " >&6; } +if ${db_cv_dl_iterate_phdr+:} false; then : + $as_echo_n "(cached) " >&6 +else + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main () +{ + + dl_iterate_phdr(0, 0); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + db_cv_dl_iterate_phdr=yes +else + db_cv_dl_iterate_phdr=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $db_cv_dl_iterate_phdr" >&5 +$as_echo "$db_cv_dl_iterate_phdr" >&6; } +if test "$db_cv_dl_iterate_phdr" = "yes"; then + $as_echo "#define HAVE_DL_ITERATE_PHDR 1" >>confdefs.h + + +fi + +# If we are using pthread mutex or condition variables, and dl_iterate_phdr() is +# available, then we try to detect when libpthread is updated -- which can +# render existing environment invalid. DB_ENV->open() tries to rebuild such +# environments when they are idle. +case "$db_cv_mutex" in + *pthreads*) + if test "$db_cv_dl_iterate_phdr" = "yes" ; then + $as_echo "#define HAVE_PTHREADS_TIMESTAMP 1" >>confdefs.h + + + fi + ;; +esac + # We need to add the additional object files into the Makefile with the correct # suffix. We can't use $LTLIBOBJS itself, because that variable has $U encoded # in it for automake, and that's not what we want. See SR #7227 for additional diff -r db78da0996b1 dist/configure.ac --- a/dist/configure.ac Mon Sep 09 11:09:35 2013 -0400 +++ b/dist/configure.ac Sat Apr 29 04:10:18 2017 -0700 @@ -698,7 +698,7 @@ # Check for system functions we optionally use. AC_CHECK_FUNCS(\ _fstati64 backtrace backtrace_symbols directio fchmod fclose\ - fcntl fdatasync fgetc fgets fopen fwrite getgid\ + fcntl fdatasync fgetc fgets flock fopen fwrite getgid\ getrusage getuid hstrerror mprotect pstat_getdynamic\ pthread_self pthread_yield random sched_yield select setgid setuid\ sigaction snprintf stat sysconf vsnprintf yield) @@ -1042,6 +1042,34 @@ [Define to 1 if you have localization function to support globalization.]) fi +# Check for dl_iterate_phdr; do the test explicitly instead of using +# AC_CHECK_FUNCS because isn't a standard include file. +AC_CACHE_CHECK([for dl_iterate_phdr], db_cv_dl_iterate_phdr, [ +AC_TRY_LINK([ +#include +#include ], [ + dl_iterate_phdr(0, 0); +], [db_cv_dl_iterate_phdr=yes], [db_cv_dl_iterate_phdr=no])]) +if test "$db_cv_dl_iterate_phdr" = "yes"; then + AC_DEFINE(HAVE_DL_ITERATE_PHDR) + AH_TEMPLATE(HAVE_DL_ITERATE_PHDR, + [Define to 1 if you have the `dl_iterate_phdr' function.]) +fi + +# If we are using pthread mutex or condition variables, and dl_iterate_phdr() is +# available, then we try to detect when libpthread is updated -- which can +# render existing environment invalid. DB_ENV->open() tries to rebuild such +# environments when they are idle. +case "$db_cv_mutex" in + *pthreads*) + if test "$db_cv_dl_iterate_phdr" = "yes" ; then + AC_DEFINE(HAVE_PTHREADS_TIMESTAMP) + AH_TEMPLATE(HAVE_PTHREADS_TIMESTAMP, + [Define to 1 if you have dl_iterate_phdr and use pthread-based mutexes.]) + fi + ;; +esac + # We need to add the additional object files into the Makefile with the correct # suffix. We can't use $LTLIBOBJS itself, because that variable has $U encoded # in it for automake, and that's not what we want. See SR #7227 for additional diff -r db78da0996b1 src/db/db_meta.c --- a/src/db/db_meta.c Mon Sep 09 11:09:35 2013 -0400 +++ b/src/db/db_meta.c Sat Apr 29 04:10:18 2017 -0700 @@ -1330,8 +1330,9 @@ } /* * __db_has_pagelock -- - * Determine if this locker holds a particular page lock. - * Returns 0 if lock is held, non-zero otherwise. + * Determine if this locker holds a particular page lock, and return an + * error if it is missing a page lock that it should have. + * Otherwise (TDS with the page locked, or DS or CDS) return 0. * * PUBLIC: #ifdef DIAGNOSTIC * PUBLIC: int __db_has_pagelock __P((ENV *, DB_LOCKER *, @@ -1348,6 +1349,9 @@ { int ret; + if (!FLD_ISSET(env->open_flags, DB_INIT_TXN)) + return (0); + switch (pagep->type) { case P_OVERFLOW: case P_INVALID: diff -r db78da0996b1 src/dbinc/atomic.h --- a/src/dbinc/atomic.h Mon Sep 09 11:09:35 2013 -0400 +++ b/src/dbinc/atomic.h Sat Apr 29 04:10:18 2017 -0700 @@ -144,7 +144,7 @@ #define atomic_inc(env, p) __atomic_inc(p) #define atomic_dec(env, p) __atomic_dec(p) #define atomic_compare_exchange(env, p, o, n) \ - __atomic_compare_exchange((p), (o), (n)) + __db_atomic_compare_exchange((p), (o), (n)) static inline int __atomic_inc(db_atomic_t *p) { int temp; @@ -176,7 +176,7 @@ * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html * which configure could be changed to use. */ -static inline int __atomic_compare_exchange( +static inline int __db_atomic_compare_exchange( db_atomic_t *p, atomic_value_t oldval, atomic_value_t newval) { atomic_value_t was; diff -r db78da0996b1 src/dbinc/db_int.in --- a/src/dbinc/db_int.in Mon Sep 09 11:09:35 2013 -0400 +++ b/src/dbinc/db_int.in Sat Apr 29 04:10:18 2017 -0700 @@ -792,7 +792,7 @@ pid_t pid_cache; /* Cached process ID */ - DB_FH *lockfhp; /* fcntl(2) locking file handle */ + DB_FH *primary_fhp; /* fcntl(2) locking on __db.001 */ DB_LOCKER *env_lref; /* Locker in non-threaded handles */ diff -r db78da0996b1 src/dbinc/region.h --- a/src/dbinc/region.h Mon Sep 09 11:09:35 2013 -0400 +++ b/src/dbinc/region.h Sat Apr 29 04:10:18 2017 -0700 @@ -178,6 +178,16 @@ u_int32_t envid; /* Unique environment ID. */ u_int32_t signature; /* Structure signatures. */ +#if defined(HAVE_PTHREADS_TIMESTAMP) && defined(HAVE_MUTEX_PTHREADS) + /* + * Updates to glibc/libpthread can change its pthreads implementation + * and invalidate on-disk environments, even without changing the + * version number. If using POSIX mutexes and a change in this + * timestamp is detecting when opening an environment with DB_CREATE, + * __env_attach recreates any existing on-disk environment. + */ + time_t pthreads_timestamp; +#endif time_t timestamp; /* Creation time. */ diff -r db78da0996b1 src/dbinc_auto/int_def.in --- a/src/dbinc_auto/int_def.in Mon Sep 09 11:09:35 2013 -0400 +++ b/src/dbinc_auto/int_def.in Sat Apr 29 04:10:18 2017 -0700 @@ -1542,6 +1542,7 @@ #if defined(HAVE_REPLICATION_THREADS) #define __os_freeaddrinfo __os_freeaddrinfo@DB_VERSION_UNIQUE_NAME@ #endif +#define __os_pthreads_timestamp __os_pthreads_timestamp@DB_VERSION_UNIQUE_NAME@ #define __os_umalloc __os_umalloc@DB_VERSION_UNIQUE_NAME@ #define __os_urealloc __os_urealloc@DB_VERSION_UNIQUE_NAME@ #define __os_ufree __os_ufree@DB_VERSION_UNIQUE_NAME@ diff -r db78da0996b1 src/dbinc_auto/os_ext.h --- a/src/dbinc_auto/os_ext.h Mon Sep 09 11:09:35 2013 -0400 +++ b/src/dbinc_auto/os_ext.h Sat Apr 29 04:10:18 2017 -0700 @@ -14,6 +14,7 @@ #if defined(HAVE_REPLICATION_THREADS) void __os_freeaddrinfo __P((ENV *, ADDRINFO *)); #endif +time_t __os_pthreads_timestamp __P((ENV *)); int __os_umalloc __P((ENV *, size_t, void *)); int __os_urealloc __P((ENV *, size_t, void *)); void __os_ufree __P((ENV *, void *)); @@ -40,7 +41,7 @@ char *__os_strerror __P((int, char *, size_t)); int __os_posix_err __P((int)); int __os_fileid __P((ENV *, const char *, int, u_int8_t *)); -int __os_fdlock __P((ENV *, DB_FH *, off_t, int, int)); +int __os_fdlock __P((ENV *, DB_FH *, off_t, db_lockmode_t, int)); int __os_fsync __P((ENV *, DB_FH *)); int __os_getenv __P((ENV *, const char *, char **, size_t)); int __os_openhandle __P((ENV *, const char *, int, int, DB_FH **)); diff -r db78da0996b1 src/env/env_open.c --- a/src/env/env_open.c Mon Sep 09 11:09:35 2013 -0400 +++ b/src/env/env_open.c Sat Apr 29 04:10:18 2017 -0700 @@ -937,6 +937,8 @@ { DB_FH *fhp; + if (env->primary_fhp != NULL) + env->primary_fhp = NULL; if (TAILQ_FIRST(&env->fdlist) == NULL) return (0); @@ -1031,11 +1033,11 @@ goto err; /* - * __env_attach will return the saved init_flags field, which contains + * __env_attach has returned the saved init_flags field, which contains * the DB_INIT_* flags used when the environment was created. * - * We may be joining an environment -- reset our flags to match the - * ones in the environment. + * We may be joining an existing environment -- reset our flags to match + * the ones in the environment. */ if (FLD_ISSET(init_flags, DB_INITENV_CDB)) LF_SET(DB_INIT_CDB); diff -r db78da0996b1 src/env/env_region.c --- a/src/env/env_region.c Mon Sep 09 11:09:35 2013 -0400 +++ b/src/env/env_region.c Sat Apr 29 04:10:18 2017 -0700 @@ -14,17 +14,54 @@ #include "dbinc/log.h" #include "dbinc/txn.h" +#define static static int __env_des_get __P((ENV *, REGINFO *, REGINFO *, REGION **)); static int __env_faultmem __P((ENV *, void *, size_t, int)); static int __env_sys_attach __P((ENV *, REGINFO *, REGION *)); static int __env_sys_detach __P((ENV *, REGINFO *, int)); +static int __env_check_recreate __P((ENV *, REGENV *, u_int32_t)); static void __env_des_destroy __P((ENV *, REGION *)); static void __env_remove_file __P((ENV *)); + +/* + * If the system supports flock()-like file locking, then the primary region + * file __db.001 is exclusively locked during creation, and is read-locked while + * the environment is open. Most Unix-like systems have flock(), with the + * notable exception of Solaris. + * Note: fcntl cannot be used for this locking because of the unfortunate + * definition of its interaction with close(2). A process's fcntl locks are + * released whenever it closes any file descriptor for that file. So, if an + * environment is opened more than once, closing one of the DB_ENV handles would + * release the read lock that protects the other handle. + */ +#ifdef HAVE_FLOCK +#define ENV_PRIMARY_LOCK(env, lockmode, async) \ + ((env)->primary_fhp == NULL ? 0 : \ + __os_fdlock((env), (env)->primary_fhp, -1, lockmode, async)) +#define ENV_PRIMARY_UNLOCK(env) \ + ((env)->primary_fhp == NULL ? 0 : \ + __os_fdlock((env), (env)->primary_fhp, -1, DB_LOCK_NG, 0)) +#else +#define ENV_PRIMARY_LOCK(env, lockmode, async) (0) +#define ENV_PRIMARY_UNLOCK(env) (0) +#endif + /* * __env_attach * Join/create the environment * + * Safely detecting and managing multiple processes' environment handles: + * BDB uses a shared or exclusive fcntl()-style lock on the first byte + * of the primary region file (__db.001) to detect whether other processes + * have the environment open, and to single-thread attempts to create the + * environment. If the open includes DB_CREATE, an exclusive lock is + * obtained during the open call. After the creation is finished, and + * anytime during a non-DB_CREATE env open, the process holds a shared + * lock. + * - single-thread creation of the environment + * - detect whether any other processes are currently attached to it. + * * PUBLIC: int __env_attach __P((ENV *, u_int32_t *, int, int)); */ int @@ -104,7 +141,7 @@ if (create_ok) { if ((ret = __os_open(env, infop->name, 0, DB_OSO_CREATE | DB_OSO_EXCL | DB_OSO_REGION, - env->db_mode, &env->lockfhp)) == 0) + env->db_mode, &env->primary_fhp)) == 0) goto creation; if (ret != EEXIST) { __db_err(env, ret, "%s", infop->name); @@ -120,8 +157,13 @@ * we're done.) */ if ((ret = __os_open( - env, infop->name, 0, DB_OSO_REGION, 0, &env->lockfhp)) != 0) + env, infop->name, 0, DB_OSO_REGION, 0, &env->primary_fhp)) != 0) goto err; + /* Wait to get shared access to the primary region. */ + if ((ret = ENV_PRIMARY_LOCK(env, DB_LOCK_READ, 0)) != 0) { + __db_err(env, ret, "__env_attach: existing: shared lock error"); + goto err; + } /* * !!! @@ -153,7 +195,7 @@ * now, trying to make different versions of it work.) */ if ((ret = __os_ioinfo(env, infop->name, - env->lockfhp, &mbytes, &bytes, NULL)) != 0) { + env->primary_fhp, &mbytes, &bytes, NULL)) != 0) { __db_err(env, ret, "%s", infop->name); goto err; } @@ -189,14 +231,14 @@ ret = EINVAL; __db_err(env, ret, DB_STR_A("1535", "%s: existing environment not created in system memory", - "%s"), infop->name); - goto err; + "%s"), infop->name); + goto err; } else { - if ((ret = __os_read(env, env->lockfhp, &rbuf, + if ((ret = __os_read(env, env->primary_fhp, &rbuf, sizeof(rbuf), &nrw)) != 0 || nrw < (size_t)sizeof(rbuf) || (ret = __os_seek(env, - env->lockfhp, 0, 0, rbuf.region_off)) != 0) { + env->primary_fhp, 0, 0, rbuf.region_off)) != 0) { __db_err(env, ret, DB_STR_A("1536", "%s: unable to read region info", "%s"), infop->name); @@ -204,7 +246,7 @@ } } - if ((ret = __os_read(env, env->lockfhp, &ref, + if ((ret = __os_read(env, env->primary_fhp, &ref, sizeof(ref), &nrw)) != 0 || nrw < (size_t)sizeof(ref)) { if (ret == 0) ret = EIO; @@ -218,14 +260,13 @@ segid = ref.segid; } -#ifndef HAVE_MUTEX_FCNTL +#if !defined(HAVE_FCNTL) && !defined(HAVE_PTHREADS_TIMESTAMP) /* - * If we're not doing fcntl locking, we can close the file handle. We - * no longer need it and the less contact between the buffer cache and - * the VM, the better. + * Without fcntl-like support, we no longer need the file handle. Close + * it to limit the interaction between the buffer cache and the VM. */ - (void)__os_closehandle(env, env->lockfhp); - env->lockfhp = NULL; + (void)__os_closehandle(env, env->primary_fhp); + env->primary_fhp = NULL; #endif /* Call the region join routine to acquire the region. */ @@ -233,6 +274,8 @@ tregion.size = (roff_t)size; tregion.max = (roff_t)max; tregion.segid = segid; + /* Attach to the existing primary region. */ + /* The leaking db.001 gets open inside of here, in __os_attach(). */ if ((ret = __env_sys_attach(env, infop, &tregion)) != 0) goto err; @@ -246,20 +289,38 @@ infop->head = (u_int8_t *)infop->addr + sizeof(REGENV); renv = infop->primary; - /* - * Make sure the region matches our build. Special case a region - * that's all nul bytes, just treat it like any other corruption. - */ + if (create_ok && + __env_check_recreate(env, renv, signature) == DB_OLD_VERSION && + (ret = ENV_PRIMARY_LOCK(env, DB_LOCK_WRITE, 1)) == 0) { + if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) + __db_msg(env, "Recreating idle environment"); + F_SET(infop, REGION_CREATE_OK); + + /* + * Detach from the environment region; we need to unmap it (and + * close any file handle) so that we don't leak memory or files. + */ + DB_ASSERT(env, infop->rp == NULL); + infop->rp = &tregion; + (void)__env_sys_detach(env, infop, 0); + goto creation; + } + if (renv->majver != DB_VERSION_MAJOR || renv->minver != DB_VERSION_MINOR) { - if (renv->majver != 0 || renv->minver != 0) { + /* + * Special case a region that's all nul bytes, just treat it + * like any other corruption. + */ + if (renv->majver == 0 && renv->minver == 0) + ret = EINVAL; + else { __db_errx(env, DB_STR_A("1538", - "Program version %d.%d doesn't match environment version %d.%d", + "Program version %d.%d doesn't match in-use environment version %d.%d", "%d %d %d %d"), DB_VERSION_MAJOR, DB_VERSION_MINOR, renv->majver, renv->minver); ret = DB_VERSION_MISMATCH; - } else - ret = EINVAL; + } goto err; } if (renv->signature != signature) { @@ -289,6 +350,18 @@ } if (renv->magic != DB_REGION_MAGIC) goto retry; + /* + * A bad magic number means that the env is new and not yet available: + * wait a while and try again. If the magic number says recovery is in + * process, remember the env creation time to record that recovery was + * the reason that the open failed. + */ + if (renv->magic != DB_REGION_MAGIC) { + __db_msg(env, "attach sees bad region magic 0x%lx", + (u_long)renv->magic); + goto retry; + } + /* * Get a reference to the underlying REGION information for this @@ -296,7 +369,7 @@ */ if ((ret = __env_des_get(env, infop, infop, &rp)) != 0 || rp == NULL) goto find_err; - infop->rp = rp; + infop->rp = rp; /* * There's still a possibility for inconsistent data. When we acquired @@ -346,6 +419,12 @@ return (0); creation: + /* Should this wait for the lock (passing 0 instead of 1)? */ + if ((ret = ENV_PRIMARY_LOCK(env, DB_LOCK_WRITE, 1)) != 0) { + __db_err(env, ret, "__env_attach: creation could not lock %s", + env->primary_fhp->name); + goto err; + } /* Create the environment region. */ F_SET(infop, REGION_CREATE); @@ -437,7 +516,14 @@ renv->minver = (u_int32_t)minver; renv->patchver = (u_int32_t)patchver; renv->signature = signature; - +#ifdef HAVE_PTHREADS_TIMESTAMP + renv->pthreads_timestamp = __os_pthreads_timestamp(env); + { + char *s = getenv("TS_ADJUST"); + if (s != NULL) + renv->pthreads_timestamp -= atoi(s); + } +#endif (void)time(&renv->timestamp); __os_unique_id(env, &renv->envid); @@ -505,7 +591,7 @@ ref.segid = tregion.segid; ref.max = tregion.max; if ((ret = __os_write( - env, env->lockfhp, &ref, sizeof(ref), &nrw)) != 0) { + env, env->primary_fhp, &ref, sizeof(ref), &nrw)) != 0) { __db_err(env, ret, DB_STR_A("1545", "%s: unable to write out public environment ID", "%s"), infop->name); @@ -513,16 +599,24 @@ } } -#ifndef HAVE_MUTEX_FCNTL - /* - * If we're not doing fcntl locking, we can close the file handle. We - * no longer need it and the less contact between the buffer cache and - * the VM, the better. - */ - if (env->lockfhp != NULL) { - (void)__os_closehandle(env, env->lockfhp); - env->lockfhp = NULL; +#ifdef HAVE_FCNTL + if ((ret = ENV_PRIMARY_UNLOCK(env)) != 0) { + __db_err(env, ret, "__env_attach: release exclusive lock"); + goto err; } + if ((ret = ENV_PRIMARY_LOCK(env, DB_LOCK_READ, 0)) != 0) { + __db_err(env, ret, "__env_attach: new: acquire shared lock"); + goto err; + } +#else + /* + * We no longer need the primary region file's handle and the less + * contact between the buffer cache and the VM, the better. + */ + if (env->primary_fhp != NULL) { + (void)__os_closehandle(env, env->primary_fhp); + env->primary_fhp = NULL; + } #endif /* Everything looks good, we're done. */ @@ -531,9 +625,9 @@ err: retry: /* Close any open file handle. */ - if (env->lockfhp != NULL) { - (void)__os_closehandle(env, env->lockfhp); - env->lockfhp = NULL; + if (env->primary_fhp != NULL) { + (void)__os_closehandle(env, env->primary_fhp); + env->primary_fhp = NULL; } /* @@ -562,9 +656,9 @@ /* If we had a temporary error, wait awhile and try again. */ if (ret == 0) { if (!retry_ok || ++retry_cnt > 3) { + ret = EAGAIN; __db_errx(env, DB_STR("1546", "unable to join the environment")); - ret = EAGAIN; } else { __os_yield(env, retry_cnt * 3, 0); goto loop; @@ -575,6 +669,59 @@ } /* + * __env_check_recreate -- + * Determine whether an existing on-disk environment should be recreated + * because it is not compatible with this compiled BDB library. + * + * Returns: + * 0 - + * The env was generated by this library. No recreation needed. + * DB_OLD_VERSION - + * It was created by an earlier BDB version, or by an earlier + * version of libpthreads (on certain Linux systems). The caller + * will try to recreate it with the currently configured settings. + * DB_VERSION_MISMATCH - + * It was created by a newer version of BDB. Do not attempt to + * fix it, something is probably wrong with the application setup. + */ +static int +__env_check_recreate(env, renv, signature) + ENV *env; + REGENV *renv; + u_int32_t signature; +{ +#ifdef HAVE_PTHREADS_TIMESTAMP + time_t pthreads_time; + char envtime[CTIME_BUFLEN], libtime[CTIME_BUFLEN]; +#endif + + /* First, bail out if the env is too new for this code to handle. */ + if (renv->majver > DB_VERSION_MAJOR || + (renv->majver == DB_VERSION_MAJOR && + renv->minver > DB_VERSION_MINOR)) + return (DB_VERSION_MISMATCH); + +#ifdef HAVE_PTHREADS_TIMESTAMP + pthreads_time = __os_pthreads_timestamp(env); + if (pthreads_time != renv->pthreads_timestamp) { + if (FLD_ISSET(env->dbenv->verbose, DB_VERB_RECOVERY)) + __db_msg(env, + "Pthreads timestamp changed: env %.24s current %.24s", + __os_ctime(&renv->pthreads_timestamp, envtime), + __os_ctime(&pthreads_time, libtime)); + return (DB_OLD_VERSION); + } +#endif + if (renv->signature != signature || renv->majver != DB_VERSION_MAJOR || + renv->minver != DB_VERSION_MINOR) { + if (FLD_ISSET(env->dbenv->verbose, DB_VERB_RECOVERY)) + __db_msg(env, "Signature or version changed"); + return (DB_OLD_VERSION); + } + return (0); +} + +/* * __env_turn_on -- * Turn on the created environment. * @@ -794,12 +941,11 @@ renv = infop->primary; ret = 0; - /* Close the locking file handle. */ - if (env->lockfhp != NULL) { + if (env->primary_fhp != NULL) { if ((t_ret = - __os_closehandle(env, env->lockfhp)) != 0 && ret == 0) + __os_closehandle(env, env->primary_fhp)) != 0 && ret == 0) ret = t_ret; - env->lockfhp = NULL; + env->primary_fhp = NULL; } /* diff -r db78da0996b1 src/env/env_register.c --- a/src/env/env_register.c Mon Sep 09 11:09:35 2013 -0400 +++ b/src/env/env_register.c Sat Apr 29 04:10:18 2017 -0700 @@ -19,9 +19,9 @@ #define PID_LEN (25) /* PID entry length */ #define REGISTRY_LOCK(env, pos, nowait) \ - __os_fdlock(env, (env)->dbenv->registry, (off_t)(pos), 1, nowait) + __os_fdlock(env, (env)->dbenv->registry, (off_t)(pos), DB_LOCK_WRITE, nowait) #define REGISTRY_UNLOCK(env, pos) \ - __os_fdlock(env, (env)->dbenv->registry, (off_t)(pos), 0, 0) + __os_fdlock(env, (env)->dbenv->registry, (off_t)(pos), DB_LOCK_NG, 0) #define REGISTRY_EXCL_LOCK(env, nowait) \ REGISTRY_LOCK(env, 1, nowait) #define REGISTRY_EXCL_UNLOCK(env) \ diff -r db78da0996b1 src/env/env_stat.c --- a/src/env/env_stat.c Mon Sep 09 11:09:35 2013 -0400 +++ b/src/env/env_stat.c Sat Apr 29 04:10:18 2017 -0700 @@ -177,6 +177,10 @@ STAT_LONG("Txn version", DB_TXNVERSION); __db_msg(env, "%.24s\tCreation time", __os_ctime(&renv->timestamp, time_buf)); +#if defined(HAVE_PTHREADS_TIMESTAMP) && defined(HAVE_MUTEX_PTHREADS) + __db_msg(env, + "%.24s\tlibpthread timestamp", __os_ctime(&renv->pthreads_timestamp, time_buf)); +#endif STAT_HEX("Environment ID", renv->envid); __mutex_print_debug_single(env, "Primary region allocation and reference count mutex", @@ -429,7 +433,8 @@ STAT_ULONG("Pid cache", env->pid_cache); - STAT_ISSET("Lockfhp", env->lockfhp); + /* Change to Primary Region fhp? The name changed, but not its usage. */ + STAT_ISSET("Lockfhp", env->primary_fhp); STAT_ISSET("Locker", env->env_lref); diff -r db78da0996b1 src/os/os_addrinfo.c --- a/src/os/os_addrinfo.c Mon Sep 09 11:09:35 2013 -0400 +++ b/src/os/os_addrinfo.c Sat Apr 29 04:10:18 2017 -0700 @@ -10,6 +10,10 @@ #include "db_int.h" +#ifdef HAVE_PTHREADS_TIMESTAMP +#include +#endif + /* * __os_getaddrinfo and __os_freeaddrinfo wrap the getaddrinfo and freeaddrinfo * calls, as well as the associated platform dependent error handling, mapping @@ -177,3 +181,48 @@ } #endif } + +#ifdef HAVE_PTHREADS_TIMESTAMP +/* + * callback_find_pthreads -- + * dl_iterate_phdr() calls this once for each loaded library. + * + * Returns: + * 0 - the library does not appear to be libpthreads. + * 1 - the library *does* seem to be libpthreads. Its modification time is + * stored into into the last argument's location. + */ +static int +callback_find_pthreads(struct dl_phdr_info *info, size_t size, void *data) +{ + struct stat stbuf; + + /* Stop (return non-zero) when libc is found. */ + if (strstr(info->dlpi_name, "libpthread") != NULL && + stat(info->dlpi_name, &stbuf) == 0) { + *(time_t *)data = stbuf.st_mtime; + return (1); + } + COMPQUIET(size, 0); + COMPQUIET(data, NULL); + return (0); +} + +/* + * __os_pthreads_timestamp -- + * + * PUBLIC: time_t __os_pthreads_timestamp __P((ENV *)); + */ +time_t +__os_pthreads_timestamp(env) + ENV *env; +{ + time_t timestamp; + + timestamp = 0; + dl_iterate_phdr(callback_find_pthreads, ×tamp); + + COMPQUIET(env, 0); + return (timestamp); +} +#endif diff -r db78da0996b1 src/os/os_flock.c --- a/src/os/os_flock.c Mon Sep 09 11:09:35 2013 -0400 +++ b/src/os/os_flock.c Sat Apr 29 04:10:18 2017 -0700 @@ -10,41 +10,98 @@ #include "db_int.h" +#if !defined(HAVE_FCNTL) || !defined(HAVE_FLOCK) +static int __os_filelocking_notsup __P((ENV *)); +#endif + /* * __os_fdlock -- * Acquire/release a lock on a byte in a file. * - * PUBLIC: int __os_fdlock __P((ENV *, DB_FH *, off_t, int, int)); + * The lock modes supported here are: + * DB_LOCK_NG - release the lock + * DB_LOCK_READ - get shared access + * DB_LOCK_WRITE - get exclusive access + * + * Use fcntl()-like semantics most of the time (DB_REGISTER support). Fcntl + * supports range locking, but has the additional broken semantics that + * closing any of the file's descriptors releases any locks, even if its + * other file descriptors remain open. Thanks SYSV & POSIX. + * However, if the offset is negative (which is allowed, because POSIX + * off_t a signed integer) then use flock() instead. It has only whole- + * file locks, but they persist until explicitly unlocked or the process + * exits. + * PUBLIC: int __os_fdlock __P((ENV *, DB_FH *, off_t, db_lockmode_t, int)); */ int -__os_fdlock(env, fhp, offset, acquire, nowait) +__os_fdlock(env, fhp, offset, lockmode, nowait) ENV *env; DB_FH *fhp; - int acquire, nowait; off_t offset; + db_lockmode_t lockmode; + int nowait; { #ifdef HAVE_FCNTL DB_ENV *dbenv; struct flock fl; int ret, t_ret; + static char *mode_string[DB_LOCK_WRITE + 1] = { + "unlock", + "read", + "write" + }; + short mode_fcntl[DB_LOCK_WRITE + 1] = { + F_UNLCK, + F_RDLCK, + F_WRLCK + }; +#ifdef HAVE_FLOCK + short mode_flock[DB_LOCK_WRITE + 1] = { + LOCK_UN, + LOCK_SH, + LOCK_EX + }; +#endif dbenv = env == NULL ? NULL : env->dbenv; DB_ASSERT(env, F_ISSET(fhp, DB_FH_OPENED) && fhp->fd != -1); + DB_ASSERT(env, lockmode <= DB_LOCK_WRITE); - if (dbenv != NULL && FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) - __db_msg(env, DB_STR_A("0138", - "fileops: flock %s %s offset %lu", "%s %s %lu"), fhp->name, - acquire ? DB_STR_P("acquire"): DB_STR_P("release"), - (u_long)offset); + if (dbenv != NULL && FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) { + if (offset < 0) + __db_msg(env, DB_STR_A("####", + "fileops: flock %s %s %s", "%s %s %s"), + fhp->name, mode_string[lockmode], + nowait ? "nowait" : ""); + else + __db_msg(env, DB_STR_A("0020", + "fileops: fcntls %s %s offset %lu", "%s %s %lu"), + fhp->name, mode_string[lockmode], (u_long)offset); + } - fl.l_start = offset; - fl.l_len = 1; - fl.l_type = acquire ? F_WRLCK : F_UNLCK; - fl.l_whence = SEEK_SET; + if (offset < 0) { +#ifdef HAVE_FLOCK + RETRY_CHK_EINTR_ONLY(flock(fhp->fd, + mode_flock[lockmode] | (nowait ? LOCK_NB : 0)), ret); +#else + ret = __os_filelocking_notsup(env); +#endif + } else { + fl.l_start = offset; + fl.l_len = 1; + fl.l_whence = SEEK_SET; + fl.l_type = mode_fcntl[lockmode]; + RETRY_CHK_EINTR_ONLY( + fcntl(fhp->fd, nowait ? F_SETLK : F_SETLKW, &fl), ret); + } - RETRY_CHK_EINTR_ONLY( - (fcntl(fhp->fd, nowait ? F_SETLK : F_SETLKW, &fl)), ret); + if (offset < 0 && dbenv != NULL && + FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS_ALL)) + __db_msg(env, DB_STR_A("####", + "fileops: flock %s %s %s returns %s", "%s %s %s"), + fhp->name, mode_string[lockmode], + nowait ? "nowait" : "", db_strerror(ret)); if (ret == 0) return (0); @@ -53,12 +110,29 @@ __db_syserr(env, ret, DB_STR("0139", "fcntl")); return (t_ret); #else + ret = __os_filelocking_notsup(env); COMPQUIET(fhp, NULL); - COMPQUIET(acquire, 0); + COMPQUIET(lockmode, 0); COMPQUIET(nowait, 0); COMPQUIET(offset, 0); + return (ret) +#endif +} + + +#if !defined(HAVE_FCNTL) || !defined(HAVE_FLOCK) +/* + * __os_filelocking_notsup -- + * Generate an error message if fcntl() or flock() is requested on a + * platform that does not support it. + * + */ +static int +__os_filelocking_notsup(env) + ENV *env; +{ __db_syserr(env, DB_OPNOTSUP, DB_STR("0140", "advisory file locking unavailable")); return (DB_OPNOTSUP); +} #endif -} --- db-5.3.28/src/os/os_map.c.pthreads 2013-09-09 17:35:09.000000000 +0200 +++ db-5.3.28/src/os/os_map.c 2017-05-16 09:31:30.535713279 +0200 @@ -32,7 +32,7 @@ /* * __os_attach -- - * Create/join a shared memory region. + * Create/join a 'shared' region of Berkeley DB memory. * * PUBLIC: int __os_attach __P((ENV *, REGINFO *, REGION *)); */ @@ -50,6 +50,7 @@ * so there must be a valid handle. */ DB_ASSERT(env, env != NULL && env->dbenv != NULL); + DB_ASSERT(env, infop->fhp == NULL); dbenv = env->dbenv; if (DB_GLOBAL(j_region_map) != NULL) {