9e3a39e72d
Resolves: bz#1471742 bz#1652461 bz#1671862 bz#1676495 bz#1691620 Resolves: bz#1696334 bz#1696903 bz#1697820 bz#1698436 bz#1698728 Resolves: bz#1699709 bz#1699835 bz#1702240 Signed-off-by: Milind Changire <mchangir@redhat.com>
8977 lines
284 KiB
Diff
8977 lines
284 KiB
Diff
From 379b9f7247a4daac9545e3dec79d3c2660111d8d Mon Sep 17 00:00:00 2001
|
|
From: Hari Gowtham <hgowtham@redhat.com>
|
|
Date: Mon, 8 Apr 2019 11:32:09 +0530
|
|
Subject: [PATCH 085/124] Revert "all: remove code which is not being
|
|
considered in build"
|
|
|
|
This reverts most part of commit 8293d21280fd6ddfc9bb54068cf87794fc6be207.
|
|
It adds in the changes for tier and CTR with the neccesary changes for building it.
|
|
|
|
Label: DOWNSTREAM ONLY
|
|
|
|
Change-Id: I8f7978618f2a6a949b09dbcfd25722494cb8f1cd
|
|
Signed-off-by: Hari Gowtham <hgowtham@redhat.com>
|
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/166245
|
|
Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
|
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
---
|
|
Makefile.am | 8 +-
|
|
configure.ac | 34 +
|
|
glusterfs.spec.in | 19 +
|
|
libglusterfs/Makefile.am | 4 +-
|
|
libglusterfs/src/glusterfs/mem-types.h | 1 +
|
|
xlators/cluster/dht/src/Makefile.am | 14 +-
|
|
xlators/cluster/dht/src/dht-rebalance.c | 12 +
|
|
xlators/cluster/dht/src/tier-common.c | 1199 ++++++++
|
|
xlators/cluster/dht/src/tier-common.h | 55 +
|
|
xlators/cluster/dht/src/tier.c | 3105 ++++++++++++++++++++
|
|
xlators/cluster/dht/src/tier.h | 110 +
|
|
xlators/features/Makefile.am | 2 +-
|
|
xlators/features/changetimerecorder/Makefile.am | 3 +
|
|
.../features/changetimerecorder/src/Makefile.am | 26 +
|
|
.../changetimerecorder/src/changetimerecorder.c | 2371 +++++++++++++++
|
|
.../changetimerecorder/src/changetimerecorder.h | 21 +
|
|
.../features/changetimerecorder/src/ctr-helper.c | 293 ++
|
|
.../features/changetimerecorder/src/ctr-helper.h | 854 ++++++
|
|
.../features/changetimerecorder/src/ctr-messages.h | 61 +
|
|
.../changetimerecorder/src/ctr-xlator-ctx.c | 362 +++
|
|
.../changetimerecorder/src/ctr-xlator-ctx.h | 68 +
|
|
.../changetimerecorder/src/ctr_mem_types.h | 22 +
|
|
22 files changed, 8637 insertions(+), 7 deletions(-)
|
|
create mode 100644 xlators/cluster/dht/src/tier-common.c
|
|
create mode 100644 xlators/cluster/dht/src/tier-common.h
|
|
create mode 100644 xlators/cluster/dht/src/tier.c
|
|
create mode 100644 xlators/cluster/dht/src/tier.h
|
|
create mode 100644 xlators/features/changetimerecorder/Makefile.am
|
|
create mode 100644 xlators/features/changetimerecorder/src/Makefile.am
|
|
create mode 100644 xlators/features/changetimerecorder/src/changetimerecorder.c
|
|
create mode 100644 xlators/features/changetimerecorder/src/changetimerecorder.h
|
|
create mode 100644 xlators/features/changetimerecorder/src/ctr-helper.c
|
|
create mode 100644 xlators/features/changetimerecorder/src/ctr-helper.h
|
|
create mode 100644 xlators/features/changetimerecorder/src/ctr-messages.h
|
|
create mode 100644 xlators/features/changetimerecorder/src/ctr-xlator-ctx.c
|
|
create mode 100644 xlators/features/changetimerecorder/src/ctr-xlator-ctx.h
|
|
create mode 100644 xlators/features/changetimerecorder/src/ctr_mem_types.h
|
|
|
|
diff --git a/Makefile.am b/Makefile.am
|
|
index e0c795f..613382f 100644
|
|
--- a/Makefile.am
|
|
+++ b/Makefile.am
|
|
@@ -3,7 +3,7 @@ SOURCES = site.h
|
|
EXTRA_DIST = autogen.sh \
|
|
COPYING-GPLV2 COPYING-LGPLV3 COMMITMENT \
|
|
INSTALL README.md AUTHORS THANKS NEWS \
|
|
- glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in \
|
|
+ glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in libgfdb.pc.in \
|
|
run-tests.sh \
|
|
build-aux/pkg-version \
|
|
contrib/umountd \
|
|
@@ -15,8 +15,12 @@ SUBDIRS = $(ARGP_STANDALONE_DIR) rpc/xdr/gen libglusterfs rpc api xlators \
|
|
|
|
pkgconfigdir = @pkgconfigdir@
|
|
pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc
|
|
+if USE_GFDB
|
|
+pkgconfig_DATA += libgfdb.pc
|
|
+endif
|
|
|
|
-CLEANFILES = glusterfs-api.pc libgfchangelog.pc contrib/umountd/Makefile
|
|
+CLEANFILES = glusterfs-api.pc libgfchangelog.pc libgfdb.pc \
|
|
+ contrib/umountd/Makefile
|
|
|
|
gitclean: distclean
|
|
find . -name Makefile.in -exec rm -f {} \;
|
|
diff --git a/configure.ac b/configure.ac
|
|
index baa811a..633e850 100644
|
|
--- a/configure.ac
|
|
+++ b/configure.ac
|
|
@@ -30,6 +30,7 @@ AC_CONFIG_HEADERS([config.h site.h])
|
|
AC_CONFIG_FILES([Makefile
|
|
libglusterfs/Makefile
|
|
libglusterfs/src/Makefile
|
|
+ libglusterfs/src/gfdb/Makefile
|
|
geo-replication/src/peer_gsec_create
|
|
geo-replication/src/peer_mountbroker
|
|
geo-replication/src/peer_mountbroker.py
|
|
@@ -121,6 +122,8 @@ AC_CONFIG_FILES([Makefile
|
|
xlators/features/changelog/src/Makefile
|
|
xlators/features/changelog/lib/Makefile
|
|
xlators/features/changelog/lib/src/Makefile
|
|
+ xlators/features/changetimerecorder/Makefile
|
|
+ xlators/features/changetimerecorder/src/Makefile
|
|
xlators/features/locks/Makefile
|
|
xlators/features/locks/src/Makefile
|
|
xlators/features/quota/Makefile
|
|
@@ -237,6 +240,7 @@ AC_CONFIG_FILES([Makefile
|
|
contrib/umountd/Makefile
|
|
glusterfs-api.pc
|
|
libgfchangelog.pc
|
|
+ libgfdb.pc
|
|
api/Makefile
|
|
api/src/Makefile
|
|
api/examples/Makefile
|
|
@@ -866,6 +870,33 @@ AM_CONDITIONAL([USE_FIREWALLD],test ["x${BUILD_FIREWALLD}" = "xyes"])
|
|
|
|
#endof firewald section
|
|
|
|
+# Data tiering requires sqlite
|
|
+AC_ARG_ENABLE([tiering],
|
|
+ AC_HELP_STRING([--disable-tiering],
|
|
+ [Disable data classification/tiering]),
|
|
+ [BUILD_GFDB="${enableval}"], [BUILD_GFDB="yes"])
|
|
+
|
|
+case $host_os in
|
|
+ darwin*)
|
|
+ SQLITE_LIBS="-lsqlite3"
|
|
+ AC_CHECK_HEADERS([sqlite3.h], AC_DEFINE(USE_GFDB, 1))
|
|
+ ;;
|
|
+ *)
|
|
+ if test "x${BUILD_GFDB}" = "xyes"; then
|
|
+ PKG_CHECK_MODULES([SQLITE], [sqlite3],
|
|
+ AC_DEFINE(USE_GFDB, 1),
|
|
+ AC_MSG_ERROR([pass --disable-tiering to build without sqlite]))
|
|
+ else
|
|
+ AC_DEFINE(USE_GFDB, 0, [no sqlite, gfdb is disabled])
|
|
+ fi
|
|
+ ;;
|
|
+esac
|
|
+
|
|
+AC_SUBST(SQLITE_CFLAGS)
|
|
+AC_SUBST(SQLITE_LIBS)
|
|
+AM_CONDITIONAL(BUILD_GFDB, test "x${with_server}" = "xyes" -a "x${BUILD_GFDB}" = "xyes")
|
|
+AM_CONDITIONAL(USE_GFDB, test "x${with_server}" = "xyes" -a "x${BUILD_GFDB}" = "xyes")
|
|
+
|
|
# xml-output
|
|
AC_ARG_ENABLE([xml-output],
|
|
AC_HELP_STRING([--disable-xml-output],
|
|
@@ -1544,6 +1575,8 @@ GFAPI_VERSION="7."${PACKAGE_VERSION}
|
|
LIBGFCHANGELOG_VERSION="0.0.1"
|
|
AC_SUBST(GFAPI_VERSION)
|
|
AC_SUBST(LIBGFCHANGELOG_VERSION)
|
|
+LIBGFDB_VERSION="0.0.1"
|
|
+AC_SUBST(LIBGFDB_VERSION)
|
|
|
|
dnl libtool versioning
|
|
LIBGFXDR_LT_VERSION="0:1:0"
|
|
@@ -1584,6 +1617,7 @@ echo "XML output : $BUILD_XML_OUTPUT"
|
|
echo "Unit Tests : $BUILD_UNITTEST"
|
|
echo "Track priv ports : $TRACK_PRIVPORTS"
|
|
echo "POSIX ACLs : $BUILD_POSIX_ACLS"
|
|
+echo "Data Classification : $BUILD_GFDB"
|
|
echo "firewalld-config : $BUILD_FIREWALLD"
|
|
echo "Events : $BUILD_EVENTS"
|
|
echo "EC dynamic support : $EC_DYNAMIC_SUPPORT"
|
|
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
|
|
index 2149f86..e0607ba 100644
|
|
--- a/glusterfs.spec.in
|
|
+++ b/glusterfs.spec.in
|
|
@@ -154,6 +154,7 @@
|
|
%global _without_events --disable-events
|
|
%global _without_georeplication --disable-georeplication
|
|
%global _with_gnfs %{nil}
|
|
+%global _without_tiering --disable-tiering
|
|
%global _without_ocf --without-ocf
|
|
%endif
|
|
|
|
@@ -287,6 +288,9 @@ BuildRequires: libuuid-devel
|
|
%if ( 0%{?_with_cmocka:1} )
|
|
BuildRequires: libcmocka-devel >= 1.0.1
|
|
%endif
|
|
+%if ( 0%{!?_without_tiering:1} )
|
|
+BuildRequires: sqlite-devel
|
|
+%endif
|
|
%if ( 0%{!?_without_georeplication:1} )
|
|
BuildRequires: libattr-devel
|
|
%endif
|
|
@@ -797,6 +801,7 @@ export LDFLAGS
|
|
%{?_without_rdma} \
|
|
%{?_without_server} \
|
|
%{?_without_syslog} \
|
|
+ %{?_without_tiering} \
|
|
%{?_with_ipv6default} \
|
|
%{?_without_libtirpc}
|
|
|
|
@@ -1232,9 +1237,15 @@ exit 0
|
|
%if ( 0%{?_without_server:1} )
|
|
%exclude %{_libdir}/pkgconfig/libgfchangelog.pc
|
|
%exclude %{_libdir}/libgfchangelog.so
|
|
+%if ( 0%{!?_without_tiering:1} )
|
|
+%{_libdir}/pkgconfig/libgfdb.pc
|
|
+%endif
|
|
%else
|
|
%{_libdir}/pkgconfig/libgfchangelog.pc
|
|
%{_libdir}/libgfchangelog.so
|
|
+%if ( 0%{!?_without_tiering:1} )
|
|
+%{_libdir}/pkgconfig/libgfdb.pc
|
|
+%endif
|
|
%endif
|
|
|
|
%files client-xlators
|
|
@@ -1330,6 +1341,10 @@ exit 0
|
|
%files libs
|
|
%{_libdir}/*.so.*
|
|
%exclude %{_libdir}/libgfapi.*
|
|
+%if ( 0%{!?_without_tiering:1} )
|
|
+# libgfdb is only needed server-side
|
|
+%exclude %{_libdir}/libgfdb.*
|
|
+%endif
|
|
|
|
%files -n python%{_pythonver}-gluster
|
|
# introducing glusterfs module in site packages.
|
|
@@ -1417,6 +1432,10 @@ exit 0
|
|
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so
|
|
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so
|
|
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so
|
|
+%if ( 0%{!?_without_tiering:1} )
|
|
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changetimerecorder.so
|
|
+ %{_libdir}/libgfdb.so.*
|
|
+%endif
|
|
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so
|
|
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so
|
|
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix*
|
|
diff --git a/libglusterfs/Makefile.am b/libglusterfs/Makefile.am
|
|
index d471a3f..7e72f61 100644
|
|
--- a/libglusterfs/Makefile.am
|
|
+++ b/libglusterfs/Makefile.am
|
|
@@ -1,3 +1,3 @@
|
|
-SUBDIRS = src
|
|
+SUBDIRS = src src/gfdb
|
|
|
|
-CLEANFILES =
|
|
+CLEANFILES =
|
|
diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h
|
|
index 832f68c..92730a9 100644
|
|
--- a/libglusterfs/src/glusterfs/mem-types.h
|
|
+++ b/libglusterfs/src/glusterfs/mem-types.h
|
|
@@ -138,6 +138,7 @@ enum gf_common_mem_types_ {
|
|
gf_common_volfile_t,
|
|
gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */
|
|
gf_common_mt_server_cmdline_t, /* used only in one location */
|
|
+ gf_mt_gfdb_query_record_t,
|
|
gf_common_mt_end
|
|
};
|
|
#endif
|
|
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
|
|
index 56f1f2a..5532047 100644
|
|
--- a/xlators/cluster/dht/src/Makefile.am
|
|
+++ b/xlators/cluster/dht/src/Makefile.am
|
|
@@ -1,4 +1,7 @@
|
|
xlator_LTLIBRARIES = dht.la nufa.la switch.la
|
|
+if BUILD_GFDB
|
|
+ xlator_LTLIBRARIES += tier.la
|
|
+endif
|
|
|
|
AM_CFLAGS = -Wall $(GF_CFLAGS)
|
|
|
|
@@ -13,6 +16,7 @@ dht_la_SOURCES = $(dht_common_source) dht.c
|
|
|
|
nufa_la_SOURCES = $(dht_common_source) nufa.c
|
|
switch_la_SOURCES = $(dht_common_source) switch.c
|
|
+tier_la_SOURCES = $(dht_common_source) tier.c tier-common.c
|
|
|
|
dht_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
|
|
dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
|
|
@@ -23,15 +27,21 @@ nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
|
|
switch_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
|
|
switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
|
|
|
|
+tier_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) $(LIB_DL)
|
|
+tier_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
|
|
+
|
|
noinst_HEADERS = dht-common.h dht-mem-types.h dht-messages.h \
|
|
- dht-lock.h $(top_builddir)/xlators/lib/src/libxlator.h
|
|
+ dht-lock.h tier-common.h tier.h \
|
|
+ $(top_builddir)/xlators/lib/src/libxlator.h
|
|
|
|
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
|
|
+ -I$(top_srcdir)/libglusterfs/src/gfdb \
|
|
-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
|
|
-I$(top_srcdir)/rpc/rpc-lib/src \
|
|
-I$(top_srcdir)/xlators/lib/src \
|
|
-DDATADIR=\"$(localstatedir)\" \
|
|
- -DLIBDIR=\"$(libdir)\"
|
|
+ -DLIBDIR=\"$(libdir)\" \
|
|
+ -DLIBGFDB_VERSION=\"$(LIBGFDB_VERSION)\"
|
|
|
|
CLEANFILES =
|
|
|
|
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
|
|
index e0f25b1..efbe8a4 100644
|
|
--- a/xlators/cluster/dht/src/dht-rebalance.c
|
|
+++ b/xlators/cluster/dht/src/dht-rebalance.c
|
|
@@ -8,6 +8,7 @@
|
|
cases as published by the Free Software Foundation.
|
|
*/
|
|
|
|
+#include "tier.h"
|
|
#include "dht-common.h"
|
|
#include <glusterfs/xlator.h>
|
|
#include <glusterfs/syscall.h>
|
|
@@ -2134,6 +2135,17 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
}
|
|
}
|
|
|
|
+ /* store size of previous migrated file */
|
|
+ if (defrag && defrag->tier_conf.is_tier) {
|
|
+ if (from != TIER_HASHED_SUBVOL) {
|
|
+ defrag->tier_conf.st_last_promoted_size = stbuf.ia_size;
|
|
+ } else {
|
|
+ /* Don't delete the linkto file on the hashed subvol */
|
|
+ delete_src_linkto = _gf_false;
|
|
+ defrag->tier_conf.st_last_demoted_size = stbuf.ia_size;
|
|
+ }
|
|
+ }
|
|
+
|
|
/* The src file is being unlinked after this so we don't need
|
|
to clean it up */
|
|
clean_src = _gf_false;
|
|
diff --git a/xlators/cluster/dht/src/tier-common.c b/xlators/cluster/dht/src/tier-common.c
|
|
new file mode 100644
|
|
index 0000000..b22f477
|
|
--- /dev/null
|
|
+++ b/xlators/cluster/dht/src/tier-common.c
|
|
@@ -0,0 +1,1199 @@
|
|
+/*
|
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
|
|
+ This file is part of GlusterFS.
|
|
+
|
|
+ This file is licensed to you under your choice of the GNU Lesser
|
|
+ General Public License, version 3 or any later version (LGPLv3 or
|
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all
|
|
+ cases as published by the Free Software Foundation.
|
|
+*/
|
|
+
|
|
+#include <glusterfs/glusterfs.h>
|
|
+#include <glusterfs/xlator.h>
|
|
+#include "libxlator.h"
|
|
+#include "dht-common.h"
|
|
+#include <glusterfs/defaults.h>
|
|
+#include "tier-common.h"
|
|
+#include "tier.h"
|
|
+
|
|
+int
|
|
+dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
|
|
+ int op_errno, inode_t *inode, struct iatt *stbuf,
|
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
|
|
+
|
|
+int
|
|
+tier_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
|
|
+ int op_errno, inode_t *inode, struct iatt *stbuf,
|
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
|
|
+{
|
|
+ dht_local_t *local = NULL;
|
|
+ loc_t *oldloc = NULL;
|
|
+ loc_t *newloc = NULL;
|
|
+
|
|
+ local = frame->local;
|
|
+
|
|
+ oldloc = &local->loc;
|
|
+ newloc = &local->loc2;
|
|
+
|
|
+ if (op_ret == -1) {
|
|
+ /* No continuation on DHT inode missing errors, as we should
|
|
+ * then have a good stbuf that states P2 happened. We would
|
|
+ * get inode missing if, the file completed migrated between
|
|
+ * the lookup and the link call */
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (local->call_cnt != 1) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ local->call_cnt = 2;
|
|
+
|
|
+ /* Do this on the hot tier now */
|
|
+
|
|
+ STACK_WIND(frame, tier_link_cbk, local->cached_subvol,
|
|
+ local->cached_subvol->fops->link, oldloc, newloc, xdata);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+out:
|
|
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
|
|
+
|
|
+ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
|
|
+ postparent, NULL);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
|
|
+ dict_t *xdata)
|
|
+{
|
|
+ xlator_t *cached_subvol = NULL;
|
|
+ xlator_t *hashed_subvol = NULL;
|
|
+ int op_errno = -1;
|
|
+ int ret = -1;
|
|
+ dht_local_t *local = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+
|
|
+ VALIDATE_OR_GOTO(frame, err);
|
|
+ VALIDATE_OR_GOTO(this, err);
|
|
+ VALIDATE_OR_GOTO(oldloc, err);
|
|
+ VALIDATE_OR_GOTO(newloc, err);
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK);
|
|
+ if (!local) {
|
|
+ op_errno = ENOMEM;
|
|
+ goto err;
|
|
+ }
|
|
+ local->call_cnt = 1;
|
|
+
|
|
+ cached_subvol = local->cached_subvol;
|
|
+
|
|
+ if (!cached_subvol) {
|
|
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
|
|
+ oldloc->path);
|
|
+ op_errno = ENOENT;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ hashed_subvol = TIER_HASHED_SUBVOL;
|
|
+
|
|
+ ret = loc_copy(&local->loc2, newloc);
|
|
+ if (ret == -1) {
|
|
+ op_errno = ENOMEM;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ if (hashed_subvol == cached_subvol) {
|
|
+ STACK_WIND(frame, dht_link_cbk, cached_subvol,
|
|
+ cached_subvol->fops->link, oldloc, newloc, xdata);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ /* Create hardlinks to both the data file on the hot tier
|
|
+ and the linkto file on the cold tier */
|
|
+
|
|
+ gf_uuid_copy(local->gfid, oldloc->inode->gfid);
|
|
+
|
|
+ STACK_WIND(frame, tier_link_cbk, hashed_subvol, hashed_subvol->fops->link,
|
|
+ oldloc, newloc, xdata);
|
|
+
|
|
+ return 0;
|
|
+err:
|
|
+ op_errno = (op_errno == -1) ? errno : op_errno;
|
|
+ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
|
|
+ xlator_t *this, int op_ret, int op_errno,
|
|
+ struct iatt *preparent,
|
|
+ struct iatt *postparent, dict_t *xdata)
|
|
+{
|
|
+ dht_local_t *local = NULL;
|
|
+
|
|
+ local = frame->local;
|
|
+
|
|
+ if (local->params) {
|
|
+ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
|
|
+ }
|
|
+
|
|
+ DHT_STACK_UNWIND(create, frame, -1, local->op_errno, NULL, NULL, NULL, NULL,
|
|
+ NULL, NULL);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
|
|
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
|
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
|
|
+{
|
|
+ xlator_t *prev = NULL;
|
|
+ int ret = -1;
|
|
+ dht_local_t *local = NULL;
|
|
+ xlator_t *hashed_subvol = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+
|
|
+ local = frame->local;
|
|
+ conf = this->private;
|
|
+
|
|
+ hashed_subvol = TIER_HASHED_SUBVOL;
|
|
+
|
|
+ if (!local) {
|
|
+ op_ret = -1;
|
|
+ op_errno = EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (op_ret == -1) {
|
|
+ if (local->linked == _gf_true && local->xattr_req) {
|
|
+ local->op_errno = op_errno;
|
|
+ local->op_ret = op_ret;
|
|
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(
|
|
+ local->xattr_req);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
|
|
+ "Failed to set dictionary value to "
|
|
+ "unlink of migrating file");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ STACK_WIND(frame, tier_create_unlink_stale_linkto_cbk,
|
|
+ hashed_subvol, hashed_subvol->fops->unlink, &local->loc,
|
|
+ 0, local->xattr_req);
|
|
+ return 0;
|
|
+ }
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ prev = cookie;
|
|
+
|
|
+ if (local->loc.parent) {
|
|
+ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
|
|
+
|
|
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
|
|
+ }
|
|
+
|
|
+ ret = dht_layout_preset(this, prev, inode);
|
|
+ if (ret != 0) {
|
|
+ gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s",
|
|
+ prev->name);
|
|
+ op_ret = -1;
|
|
+ op_errno = EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ local->op_errno = op_errno;
|
|
+
|
|
+ if (local->linked == _gf_true) {
|
|
+ local->stbuf = *stbuf;
|
|
+ dht_linkfile_attr_heal(frame, this);
|
|
+ }
|
|
+out:
|
|
+ if (local) {
|
|
+ if (local->xattr_req) {
|
|
+ dict_del(local->xattr_req, TIER_LINKFILE_GFID);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
|
|
+
|
|
+ DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
|
|
+ preparent, postparent, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
|
|
+ xlator_t *this, int32_t op_ret,
|
|
+ int32_t op_errno, inode_t *inode,
|
|
+ struct iatt *stbuf, struct iatt *preparent,
|
|
+ struct iatt *postparent, dict_t *xdata)
|
|
+{
|
|
+ dht_local_t *local = NULL;
|
|
+ xlator_t *cached_subvol = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ int ret = -1;
|
|
+ unsigned char *gfid = NULL;
|
|
+
|
|
+ local = frame->local;
|
|
+ if (!local) {
|
|
+ op_errno = EINVAL;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ if (op_ret == -1) {
|
|
+ local->op_errno = op_errno;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ conf = this->private;
|
|
+ if (!conf) {
|
|
+ local->op_errno = EINVAL;
|
|
+ op_errno = EINVAL;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ cached_subvol = TIER_UNHASHED_SUBVOL;
|
|
+
|
|
+ if (local->params) {
|
|
+ dict_del(local->params, conf->link_xattr_name);
|
|
+ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * We will delete the linkfile if data file creation fails.
|
|
+ * When deleting this stale linkfile, there is a possibility
|
|
+ * for a race between this linkfile deletion and a stale
|
|
+ * linkfile deletion triggered by another lookup from different
|
|
+ * client.
|
|
+ *
|
|
+ * For eg:
|
|
+ *
|
|
+ * Client 1 Client 2
|
|
+ *
|
|
+ * 1 linkfile created for foo
|
|
+ *
|
|
+ * 2 data file creation failed
|
|
+ *
|
|
+ * 3 creating a file with same name
|
|
+ *
|
|
+ * 4 lookup before creation deleted
|
|
+ * the linkfile created by client1
|
|
+ * considering as a stale linkfile.
|
|
+ *
|
|
+ * 5 New linkfile created for foo
|
|
+ * with different gfid.
|
|
+ *
|
|
+ * 6 Trigger linkfile deletion as
|
|
+ * data file creation failed.
|
|
+ *
|
|
+ * 7 Linkfile deleted which is
|
|
+ * created by client2.
|
|
+ *
|
|
+ * 8 Data file created.
|
|
+ *
|
|
+ * With this race, we will end up having a file in a non-hashed subvol
|
|
+ * without a linkfile in hashed subvol.
|
|
+ *
|
|
+ * To avoid this, we store the gfid of linkfile created by client, So
|
|
+ * If we delete the linkfile , we validate gfid of existing file with
|
|
+ * stored value from posix layer.
|
|
+ *
|
|
+ * Storing this value in local->xattr_req as local->params was also used
|
|
+ * to create the data file. During the linkfile deletion we will use
|
|
+ * local->xattr_req dictionary.
|
|
+ */
|
|
+ if (!local->xattr_req) {
|
|
+ local->xattr_req = dict_new();
|
|
+ if (!local->xattr_req) {
|
|
+ local->op_errno = ENOMEM;
|
|
+ op_errno = ENOMEM;
|
|
+ goto err;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
|
|
+ if (!gfid) {
|
|
+ local->op_errno = ENOMEM;
|
|
+ op_errno = ENOMEM;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ gf_uuid_copy(gfid, stbuf->ia_gfid);
|
|
+ ret = dict_set_dynptr(local->xattr_req, TIER_LINKFILE_GFID, gfid,
|
|
+ sizeof(uuid_t));
|
|
+ if (ret) {
|
|
+ GF_FREE(gfid);
|
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
|
|
+ "Failed to set dictionary value"
|
|
+ " : key = %s",
|
|
+ TIER_LINKFILE_GFID);
|
|
+ }
|
|
+
|
|
+ STACK_WIND_COOKIE(frame, tier_create_cbk, cached_subvol, cached_subvol,
|
|
+ cached_subvol->fops->create, &local->loc, local->flags,
|
|
+ local->mode, local->umask, local->fd, local->params);
|
|
+
|
|
+ return 0;
|
|
+err:
|
|
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
|
|
+ NULL);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+gf_boolean_t
|
|
+tier_is_hot_tier_decommissioned(xlator_t *this)
|
|
+{
|
|
+ dht_conf_t *conf = NULL;
|
|
+ xlator_t *hot_tier = NULL;
|
|
+ int i = 0;
|
|
+
|
|
+ conf = this->private;
|
|
+ hot_tier = conf->subvolumes[1];
|
|
+
|
|
+ if (conf->decommission_subvols_cnt) {
|
|
+ for (i = 0; i < conf->subvolume_cnt; i++) {
|
|
+ if (conf->decommissioned_bricks[i] &&
|
|
+ conf->decommissioned_bricks[i] == hot_tier)
|
|
+ return _gf_true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return _gf_false;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
|
|
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
|
|
+{
|
|
+ int op_errno = -1;
|
|
+ dht_local_t *local = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ xlator_t *hot_subvol = NULL;
|
|
+ xlator_t *cold_subvol = NULL;
|
|
+
|
|
+ VALIDATE_OR_GOTO(frame, err);
|
|
+ VALIDATE_OR_GOTO(this, err);
|
|
+ VALIDATE_OR_GOTO(loc, err);
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ dht_get_du_info(frame, this, loc);
|
|
+
|
|
+ local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
|
|
+ if (!local) {
|
|
+ op_errno = ENOMEM;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ cold_subvol = TIER_HASHED_SUBVOL;
|
|
+ hot_subvol = TIER_UNHASHED_SUBVOL;
|
|
+
|
|
+ if (conf->subvolumes[0] != cold_subvol) {
|
|
+ hot_subvol = conf->subvolumes[0];
|
|
+ }
|
|
+ /*
|
|
+ * if hot tier full, write to cold.
|
|
+ * Also if hot tier is full, create in cold
|
|
+ */
|
|
+ if (dht_is_subvol_filled(this, hot_subvol) ||
|
|
+ tier_is_hot_tier_decommissioned(this)) {
|
|
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
|
|
+ cold_subvol->name);
|
|
+
|
|
+ STACK_WIND_COOKIE(frame, tier_create_cbk, cold_subvol, cold_subvol,
|
|
+ cold_subvol->fops->create, loc, flags, mode, umask,
|
|
+ fd, params);
|
|
+ } else {
|
|
+ local->params = dict_ref(params);
|
|
+ local->flags = flags;
|
|
+ local->mode = mode;
|
|
+ local->umask = umask;
|
|
+ local->cached_subvol = hot_subvol;
|
|
+ local->hashed_subvol = cold_subvol;
|
|
+
|
|
+ gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)", loc->path,
|
|
+ hot_subvol->name, cold_subvol->name);
|
|
+
|
|
+ dht_linkfile_create(frame, tier_create_linkfile_create_cbk, this,
|
|
+ hot_subvol, cold_subvol, loc);
|
|
+
|
|
+ goto out;
|
|
+ }
|
|
+out:
|
|
+ return 0;
|
|
+
|
|
+err:
|
|
+
|
|
+ op_errno = (op_errno == -1) ? errno : op_errno;
|
|
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
|
|
+ NULL);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_unlink_nonhashed_linkfile_cbk(call_frame_t *frame, void *cookie,
|
|
+ xlator_t *this, int op_ret, int op_errno,
|
|
+ struct iatt *preparent,
|
|
+ struct iatt *postparent, dict_t *xdata)
|
|
+{
|
|
+ dht_local_t *local = NULL;
|
|
+ xlator_t *prev = NULL;
|
|
+
|
|
+ local = frame->local;
|
|
+ prev = cookie;
|
|
+
|
|
+ LOCK(&frame->lock);
|
|
+ {
|
|
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
|
|
+ local->op_errno = op_errno;
|
|
+ local->op_ret = op_ret;
|
|
+ gf_msg_debug(this->name, op_errno,
|
|
+ "Unlink link: subvolume %s"
|
|
+ " returned -1",
|
|
+ prev->name);
|
|
+ goto unlock;
|
|
+ }
|
|
+
|
|
+ local->op_ret = 0;
|
|
+ }
|
|
+unlock:
|
|
+ UNLOCK(&frame->lock);
|
|
+
|
|
+ if (local->op_ret == -1)
|
|
+ goto err;
|
|
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
|
|
+ &local->preparent, &local->postparent, NULL);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+err:
|
|
+ DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int op_ret, int op_errno, inode_t *inode,
|
|
+ struct iatt *preparent, dict_t *xdata,
|
|
+ struct iatt *postparent)
|
|
+{
|
|
+ dht_local_t *local = NULL;
|
|
+ xlator_t *prev = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ xlator_t *hot_subvol = NULL;
|
|
+
|
|
+ local = frame->local;
|
|
+ prev = cookie;
|
|
+ conf = this->private;
|
|
+ hot_subvol = TIER_UNHASHED_SUBVOL;
|
|
+
|
|
+ if (!op_ret) {
|
|
+ /*
|
|
+ * linkfile present on hot tier. unlinking the linkfile
|
|
+ */
|
|
+ STACK_WIND_COOKIE(frame, tier_unlink_nonhashed_linkfile_cbk, hot_subvol,
|
|
+ hot_subvol, hot_subvol->fops->unlink, &local->loc,
|
|
+ local->flags, NULL);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ LOCK(&frame->lock);
|
|
+ {
|
|
+ if (op_errno == ENOENT) {
|
|
+ local->op_ret = 0;
|
|
+ local->op_errno = op_errno;
|
|
+ } else {
|
|
+ local->op_ret = op_ret;
|
|
+ local->op_errno = op_errno;
|
|
+ }
|
|
+ gf_msg_debug(this->name, op_errno, "Lookup : subvolume %s returned -1",
|
|
+ prev->name);
|
|
+ }
|
|
+
|
|
+ UNLOCK(&frame->lock);
|
|
+
|
|
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
|
|
+ &local->preparent, &local->postparent, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int op_ret, int op_errno, struct iatt *preparent,
|
|
+ struct iatt *postparent, dict_t *xdata)
|
|
+{
|
|
+ dht_local_t *local = NULL;
|
|
+ xlator_t *prev = NULL;
|
|
+
|
|
+ local = frame->local;
|
|
+ prev = cookie;
|
|
+
|
|
+ LOCK(&frame->lock);
|
|
+ {
|
|
+ /* Ignore EINVAL for tier to ignore error when the file
|
|
+ does not exist on the other tier */
|
|
+ if ((op_ret == -1) && !((op_errno == ENOENT) || (op_errno == EINVAL))) {
|
|
+ local->op_errno = op_errno;
|
|
+ local->op_ret = op_ret;
|
|
+ gf_msg_debug(this->name, op_errno,
|
|
+ "Unlink link: subvolume %s"
|
|
+ " returned -1",
|
|
+ prev->name);
|
|
+ goto unlock;
|
|
+ }
|
|
+
|
|
+ local->op_ret = 0;
|
|
+ }
|
|
+unlock:
|
|
+ UNLOCK(&frame->lock);
|
|
+
|
|
+ if (local->op_ret == -1)
|
|
+ goto err;
|
|
+
|
|
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
|
|
+ &local->preparent, &local->postparent, xdata);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+err:
|
|
+ DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+tier_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
|
|
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
|
|
+ dict_t *xdata)
|
|
+{
|
|
+ dht_local_t *local = NULL;
|
|
+ xlator_t *prev = NULL;
|
|
+ struct iatt *stbuf = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ int ret = -1;
|
|
+ xlator_t *hot_tier = NULL;
|
|
+ xlator_t *cold_tier = NULL;
|
|
+
|
|
+ local = frame->local;
|
|
+ prev = cookie;
|
|
+ conf = this->private;
|
|
+
|
|
+ cold_tier = TIER_HASHED_SUBVOL;
|
|
+ hot_tier = TIER_UNHASHED_SUBVOL;
|
|
+
|
|
+ LOCK(&frame->lock);
|
|
+ {
|
|
+ if (op_ret == -1) {
|
|
+ if (op_errno == ENOENT) {
|
|
+ local->op_ret = 0;
|
|
+ } else {
|
|
+ local->op_ret = -1;
|
|
+ local->op_errno = op_errno;
|
|
+ }
|
|
+ gf_msg_debug(this->name, op_errno,
|
|
+ "Unlink: subvolume %s returned -1"
|
|
+ " with errno = %d",
|
|
+ prev->name, op_errno);
|
|
+ goto unlock;
|
|
+ }
|
|
+
|
|
+ local->op_ret = 0;
|
|
+
|
|
+ local->postparent = *postparent;
|
|
+ local->preparent = *preparent;
|
|
+
|
|
+ if (local->loc.parent) {
|
|
+ dht_inode_ctx_time_update(local->loc.parent, this,
|
|
+ &local->preparent, 0);
|
|
+ dht_inode_ctx_time_update(local->loc.parent, this,
|
|
+ &local->postparent, 1);
|
|
+ }
|
|
+ }
|
|
+unlock:
|
|
+ UNLOCK(&frame->lock);
|
|
+
|
|
+ if (local->op_ret)
|
|
+ goto out;
|
|
+
|
|
+ if (cold_tier != local->cached_subvol) {
|
|
+ /*
|
|
+ * File is present in hot tier, so there will be
|
|
+ * a link file on cold tier, deleting the linkfile
|
|
+ * from cold tier
|
|
+ */
|
|
+ STACK_WIND_COOKIE(frame, tier_unlink_linkfile_cbk, cold_tier, cold_tier,
|
|
+ cold_tier->fops->unlink, &local->loc, local->flags,
|
|
+ xdata);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
|
|
+ if (!ret && stbuf &&
|
|
+ ((IS_DHT_MIGRATION_PHASE2(stbuf)) || IS_DHT_MIGRATION_PHASE1(stbuf))) {
|
|
+ /*
|
|
+ * File is migrating from cold to hot tier.
|
|
+ * Delete the destination linkfile.
|
|
+ */
|
|
+ STACK_WIND_COOKIE(frame, tier_unlink_lookup_cbk, hot_tier, hot_tier,
|
|
+ hot_tier->fops->lookup, &local->loc, NULL);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+out:
|
|
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
|
|
+ &local->preparent, &local->postparent, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
|
|
+ dict_t *xdata)
|
|
+{
|
|
+ xlator_t *cached_subvol = NULL;
|
|
+ xlator_t *hashed_subvol = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ int op_errno = -1;
|
|
+ dht_local_t *local = NULL;
|
|
+ int ret = -1;
|
|
+
|
|
+ VALIDATE_OR_GOTO(frame, err);
|
|
+ VALIDATE_OR_GOTO(this, err);
|
|
+ VALIDATE_OR_GOTO(loc, err);
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK);
|
|
+ if (!local) {
|
|
+ op_errno = ENOMEM;
|
|
+
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ hashed_subvol = TIER_HASHED_SUBVOL;
|
|
+
|
|
+ cached_subvol = local->cached_subvol;
|
|
+ if (!cached_subvol) {
|
|
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
|
|
+ loc->path);
|
|
+ op_errno = EINVAL;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ local->flags = xflag;
|
|
+ if (IA_ISREG(loc->inode->ia_type) && (hashed_subvol == cached_subvol)) {
|
|
+ /*
|
|
+ * File resides in cold tier. We need to stat
|
|
+ * the file to see if it is being promoted.
|
|
+ * If yes we need to delete the destination
|
|
+ * file as well.
|
|
+ *
|
|
+ * Currently we are doing this check only for
|
|
+ * regular files.
|
|
+ */
|
|
+ xdata = xdata ? dict_ref(xdata) : dict_new();
|
|
+ if (xdata) {
|
|
+ ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1);
|
|
+ if (ret) {
|
|
+ gf_msg_debug(this->name, 0, "Failed to set dictionary key %s",
|
|
+ DHT_IATT_IN_XDATA_KEY);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * File is on hot tier, delete the data file first, then
|
|
+ * linkfile from cold.
|
|
+ */
|
|
+ STACK_WIND_COOKIE(frame, tier_unlink_cbk, cached_subvol, cached_subvol,
|
|
+ cached_subvol->fops->unlink, loc, xflag, xdata);
|
|
+ if (xdata)
|
|
+ dict_unref(xdata);
|
|
+ return 0;
|
|
+err:
|
|
+ op_errno = (op_errno == -1) ? errno : op_errno;
|
|
+ DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
|
|
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
|
|
+{
|
|
+ gf_dirent_t entries;
|
|
+ gf_dirent_t *orig_entry = NULL;
|
|
+ gf_dirent_t *entry = NULL;
|
|
+ int count = 0;
|
|
+
|
|
+ INIT_LIST_HEAD(&entries.list);
|
|
+
|
|
+ if (op_ret < 0)
|
|
+ goto unwind;
|
|
+
|
|
+ list_for_each_entry(orig_entry, (&orig_entries->list), list)
|
|
+ {
|
|
+ entry = gf_dirent_for_name(orig_entry->d_name);
|
|
+ if (!entry) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
|
|
+ "Memory allocation failed ");
|
|
+ goto unwind;
|
|
+ }
|
|
+
|
|
+ entry->d_off = orig_entry->d_off;
|
|
+ entry->d_ino = orig_entry->d_ino;
|
|
+ entry->d_type = orig_entry->d_type;
|
|
+ entry->d_len = orig_entry->d_len;
|
|
+
|
|
+ list_add_tail(&entry->list, &entries.list);
|
|
+ count++;
|
|
+ }
|
|
+ op_ret = count;
|
|
+
|
|
+unwind:
|
|
+ if (op_ret < 0)
|
|
+ op_ret = 0;
|
|
+
|
|
+ DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL);
|
|
+
|
|
+ gf_dirent_free(&entries);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
|
|
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
|
|
+{
|
|
+ dht_local_t *local = NULL;
|
|
+ gf_dirent_t entries;
|
|
+ gf_dirent_t *orig_entry = NULL;
|
|
+ gf_dirent_t *entry = NULL;
|
|
+ xlator_t *prev = NULL;
|
|
+ xlator_t *next_subvol = NULL;
|
|
+ off_t next_offset = 0;
|
|
+ int count = 0;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ int ret = 0;
|
|
+ inode_table_t *itable = NULL;
|
|
+ inode_t *inode = NULL;
|
|
+
|
|
+ INIT_LIST_HEAD(&entries.list);
|
|
+ prev = cookie;
|
|
+ local = frame->local;
|
|
+ itable = local->fd ? local->fd->inode->table : NULL;
|
|
+
|
|
+ conf = this->private;
|
|
+ GF_VALIDATE_OR_GOTO(this->name, conf, unwind);
|
|
+
|
|
+ if (op_ret < 0)
|
|
+ goto done;
|
|
+
|
|
+ list_for_each_entry(orig_entry, (&orig_entries->list), list)
|
|
+ {
|
|
+ next_offset = orig_entry->d_off;
|
|
+
|
|
+ if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
|
|
+ /*stat failed somewhere- ignore this entry*/
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ entry = gf_dirent_for_name(orig_entry->d_name);
|
|
+ if (!entry) {
|
|
+ goto unwind;
|
|
+ }
|
|
+
|
|
+ entry->d_off = orig_entry->d_off;
|
|
+ entry->d_stat = orig_entry->d_stat;
|
|
+ entry->d_ino = orig_entry->d_ino;
|
|
+ entry->d_type = orig_entry->d_type;
|
|
+ entry->d_len = orig_entry->d_len;
|
|
+
|
|
+ if (orig_entry->dict)
|
|
+ entry->dict = dict_ref(orig_entry->dict);
|
|
+
|
|
+ if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict,
|
|
+ conf->link_xattr_name)) {
|
|
+ goto entries;
|
|
+
|
|
+ } else if (IA_ISDIR(entry->d_stat.ia_type)) {
|
|
+ if (orig_entry->inode) {
|
|
+ dht_inode_ctx_time_update(orig_entry->inode, this,
|
|
+ &entry->d_stat, 1);
|
|
+ }
|
|
+ } else {
|
|
+ if (orig_entry->inode) {
|
|
+ ret = dht_layout_preset(this, prev, orig_entry->inode);
|
|
+ if (ret)
|
|
+ gf_msg(this->name, GF_LOG_WARNING, 0,
|
|
+ DHT_MSG_LAYOUT_SET_FAILED,
|
|
+ "failed to link the layout "
|
|
+ "in inode");
|
|
+
|
|
+ entry->inode = inode_ref(orig_entry->inode);
|
|
+ } else if (itable) {
|
|
+ /*
|
|
+ * orig_entry->inode might be null if any upper
|
|
+ * layer xlators below client set to null, to
|
|
+ * force a lookup on the inode even if the inode
|
|
+ * is present in the inode table. In that case
|
|
+ * we just update the ctx to make sure we didn't
|
|
+ * missed anything.
|
|
+ */
|
|
+ inode = inode_find(itable, orig_entry->d_stat.ia_gfid);
|
|
+ if (inode) {
|
|
+ ret = dht_layout_preset(this, TIER_HASHED_SUBVOL, inode);
|
|
+ if (ret)
|
|
+ gf_msg(this->name, GF_LOG_WARNING, 0,
|
|
+ DHT_MSG_LAYOUT_SET_FAILED,
|
|
+ "failed to link the layout"
|
|
+ " in inode");
|
|
+ inode_unref(inode);
|
|
+ inode = NULL;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ entries:
|
|
+ list_add_tail(&entry->list, &entries.list);
|
|
+ count++;
|
|
+ }
|
|
+ op_ret = count;
|
|
+
|
|
+done:
|
|
+ if (count == 0) {
|
|
+ /* non-zero next_offset means that
|
|
+ EOF is not yet hit on the current subvol
|
|
+ */
|
|
+ if (next_offset != 0) {
|
|
+ next_subvol = prev;
|
|
+ } else {
|
|
+ goto unwind;
|
|
+ }
|
|
+
|
|
+ STACK_WIND_COOKIE(frame, tier_readdirp_cbk, next_subvol, next_subvol,
|
|
+ next_subvol->fops->readdirp, local->fd, local->size,
|
|
+ next_offset, local->xattr);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+unwind:
|
|
+ if (op_ret < 0)
|
|
+ op_ret = 0;
|
|
+
|
|
+ DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL);
|
|
+
|
|
+ gf_dirent_free(&entries);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
|
|
+ off_t yoff, int whichop, dict_t *dict)
|
|
+{
|
|
+ dht_local_t *local = NULL;
|
|
+ int op_errno = -1;
|
|
+ xlator_t *hashed_subvol = NULL;
|
|
+ int ret = 0;
|
|
+ dht_conf_t *conf = NULL;
|
|
+
|
|
+ VALIDATE_OR_GOTO(frame, err);
|
|
+ VALIDATE_OR_GOTO(this, err);
|
|
+ VALIDATE_OR_GOTO(fd, err);
|
|
+ VALIDATE_OR_GOTO(this->private, err);
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ local = dht_local_init(frame, NULL, NULL, whichop);
|
|
+ if (!local) {
|
|
+ op_errno = ENOMEM;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ local->fd = fd_ref(fd);
|
|
+ local->size = size;
|
|
+ local->xattr_req = (dict) ? dict_ref(dict) : NULL;
|
|
+
|
|
+ hashed_subvol = TIER_HASHED_SUBVOL;
|
|
+
|
|
+ /* TODO: do proper readdir */
|
|
+ if (whichop == GF_FOP_READDIRP) {
|
|
+ if (dict)
|
|
+ local->xattr = dict_ref(dict);
|
|
+ else
|
|
+ local->xattr = dict_new();
|
|
+
|
|
+ if (local->xattr) {
|
|
+ ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256);
|
|
+ if (ret)
|
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
|
|
+ "Failed to set dictionary value"
|
|
+ " : key = %s",
|
|
+ conf->link_xattr_name);
|
|
+ }
|
|
+
|
|
+ STACK_WIND_COOKIE(frame, tier_readdirp_cbk, hashed_subvol,
|
|
+ hashed_subvol, hashed_subvol->fops->readdirp, fd,
|
|
+ size, yoff, local->xattr);
|
|
+
|
|
+ } else {
|
|
+ STACK_WIND_COOKIE(frame, tier_readdir_cbk, hashed_subvol, hashed_subvol,
|
|
+ hashed_subvol->fops->readdir, fd, size, yoff,
|
|
+ local->xattr);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+err:
|
|
+ op_errno = (op_errno == -1) ? errno : op_errno;
|
|
+ DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
|
|
+ off_t yoff, dict_t *xdata)
|
|
+{
|
|
+ int op = GF_FOP_READDIR;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ int i = 0;
|
|
+
|
|
+ conf = this->private;
|
|
+ if (!conf)
|
|
+ goto out;
|
|
+
|
|
+ for (i = 0; i < conf->subvolume_cnt; i++) {
|
|
+ if (!conf->subvolume_status[i]) {
|
|
+ op = GF_FOP_READDIRP;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (conf->use_readdirp)
|
|
+ op = GF_FOP_READDIRP;
|
|
+
|
|
+out:
|
|
+ tier_do_readdir(frame, this, fd, size, yoff, op, 0);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
|
|
+ off_t yoff, dict_t *dict)
|
|
+{
|
|
+ tier_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
|
|
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
|
|
+{
|
|
+ gf_boolean_t event = _gf_false;
|
|
+ qdstatfs_action_t action = qdstatfs_action_OFF;
|
|
+ dht_local_t *local = NULL;
|
|
+ int this_call_cnt = 0;
|
|
+ int bsize = 0;
|
|
+ int frsize = 0;
|
|
+ GF_UNUSED int ret = 0;
|
|
+ unsigned long new_usage = 0;
|
|
+ unsigned long cur_usage = 0;
|
|
+ xlator_t *prev = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ tier_statvfs_t *tier_stat = NULL;
|
|
+
|
|
+ prev = cookie;
|
|
+ local = frame->local;
|
|
+ GF_ASSERT(local);
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ if (xdata)
|
|
+ ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event);
|
|
+
|
|
+ tier_stat = &local->tier_statvfs;
|
|
+
|
|
+ LOCK(&frame->lock);
|
|
+ {
|
|
+ if (op_ret == -1) {
|
|
+ local->op_errno = op_errno;
|
|
+ goto unlock;
|
|
+ }
|
|
+ if (!statvfs) {
|
|
+ op_errno = EINVAL;
|
|
+ local->op_ret = -1;
|
|
+ goto unlock;
|
|
+ }
|
|
+ local->op_ret = 0;
|
|
+
|
|
+ if (local->quota_deem_statfs) {
|
|
+ if (event == _gf_true) {
|
|
+ action = qdstatfs_action_COMPARE;
|
|
+ } else {
|
|
+ action = qdstatfs_action_NEGLECT;
|
|
+ }
|
|
+ } else {
|
|
+ if (event == _gf_true) {
|
|
+ action = qdstatfs_action_REPLACE;
|
|
+ local->quota_deem_statfs = _gf_true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (local->quota_deem_statfs) {
|
|
+ switch (action) {
|
|
+ case qdstatfs_action_NEGLECT:
|
|
+ goto unlock;
|
|
+
|
|
+ case qdstatfs_action_REPLACE:
|
|
+ local->statvfs = *statvfs;
|
|
+ goto unlock;
|
|
+
|
|
+ case qdstatfs_action_COMPARE:
|
|
+ new_usage = statvfs->f_blocks - statvfs->f_bfree;
|
|
+ cur_usage = local->statvfs.f_blocks -
|
|
+ local->statvfs.f_bfree;
|
|
+
|
|
+ /* Take the max of the usage from subvols */
|
|
+ if (new_usage >= cur_usage)
|
|
+ local->statvfs = *statvfs;
|
|
+ goto unlock;
|
|
+
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (local->statvfs.f_bsize != 0) {
|
|
+ bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
|
|
+ frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
|
|
+ dht_normalize_stats(&local->statvfs, bsize, frsize);
|
|
+ dht_normalize_stats(statvfs, bsize, frsize);
|
|
+ } else {
|
|
+ local->statvfs.f_bsize = statvfs->f_bsize;
|
|
+ local->statvfs.f_frsize = statvfs->f_frsize;
|
|
+ }
|
|
+
|
|
+ if (prev == TIER_HASHED_SUBVOL) {
|
|
+ local->statvfs.f_blocks = statvfs->f_blocks;
|
|
+ local->statvfs.f_files = statvfs->f_files;
|
|
+ local->statvfs.f_fsid = statvfs->f_fsid;
|
|
+ local->statvfs.f_flag = statvfs->f_flag;
|
|
+ local->statvfs.f_namemax = statvfs->f_namemax;
|
|
+ tier_stat->blocks_used = (statvfs->f_blocks - statvfs->f_bfree);
|
|
+ tier_stat->pblocks_used = (statvfs->f_blocks - statvfs->f_bavail);
|
|
+ tier_stat->files_used = (statvfs->f_files - statvfs->f_ffree);
|
|
+ tier_stat->pfiles_used = (statvfs->f_files - statvfs->f_favail);
|
|
+ tier_stat->hashed_fsid = statvfs->f_fsid;
|
|
+ } else {
|
|
+ tier_stat->unhashed_fsid = statvfs->f_fsid;
|
|
+ tier_stat->unhashed_blocks_used = (statvfs->f_blocks -
|
|
+ statvfs->f_bfree);
|
|
+ tier_stat->unhashed_pblocks_used = (statvfs->f_blocks -
|
|
+ statvfs->f_bavail);
|
|
+ tier_stat->unhashed_files_used = (statvfs->f_files -
|
|
+ statvfs->f_ffree);
|
|
+ tier_stat->unhashed_pfiles_used = (statvfs->f_files -
|
|
+ statvfs->f_favail);
|
|
+ }
|
|
+ }
|
|
+unlock:
|
|
+ UNLOCK(&frame->lock);
|
|
+
|
|
+ this_call_cnt = dht_frame_return(frame);
|
|
+ if (is_last_call(this_call_cnt)) {
|
|
+ if (tier_stat->unhashed_fsid != tier_stat->hashed_fsid) {
|
|
+ tier_stat->blocks_used += tier_stat->unhashed_blocks_used;
|
|
+ tier_stat->pblocks_used += tier_stat->unhashed_pblocks_used;
|
|
+ tier_stat->files_used += tier_stat->unhashed_files_used;
|
|
+ tier_stat->pfiles_used += tier_stat->unhashed_pfiles_used;
|
|
+ }
|
|
+ local->statvfs.f_bfree = local->statvfs.f_blocks -
|
|
+ tier_stat->blocks_used;
|
|
+ local->statvfs.f_bavail = local->statvfs.f_blocks -
|
|
+ tier_stat->pblocks_used;
|
|
+ local->statvfs.f_ffree = local->statvfs.f_files - tier_stat->files_used;
|
|
+ local->statvfs.f_favail = local->statvfs.f_files -
|
|
+ tier_stat->pfiles_used;
|
|
+ DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
|
|
+ &local->statvfs, xdata);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
|
|
+{
|
|
+ dht_local_t *local = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ int op_errno = -1;
|
|
+ int i = -1;
|
|
+ inode_t *inode = NULL;
|
|
+ inode_table_t *itable = NULL;
|
|
+ uuid_t root_gfid = {
|
|
+ 0,
|
|
+ };
|
|
+ loc_t newloc = {
|
|
+ 0,
|
|
+ };
|
|
+
|
|
+ VALIDATE_OR_GOTO(frame, err);
|
|
+ VALIDATE_OR_GOTO(this, err);
|
|
+ VALIDATE_OR_GOTO(loc, err);
|
|
+ VALIDATE_OR_GOTO(this->private, err);
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS);
|
|
+ if (!local) {
|
|
+ op_errno = ENOMEM;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) {
|
|
+ itable = loc->inode->table;
|
|
+ if (!itable) {
|
|
+ op_errno = EINVAL;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ loc = &local->loc2;
|
|
+ root_gfid[15] = 1;
|
|
+
|
|
+ inode = inode_find(itable, root_gfid);
|
|
+ if (!inode) {
|
|
+ op_errno = EINVAL;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ dht_build_root_loc(inode, &newloc);
|
|
+ loc = &newloc;
|
|
+ }
|
|
+
|
|
+ local->call_cnt = conf->subvolume_cnt;
|
|
+
|
|
+ for (i = 0; i < conf->subvolume_cnt; i++) {
|
|
+ STACK_WIND_COOKIE(frame, tier_statfs_cbk, conf->subvolumes[i],
|
|
+ conf->subvolumes[i],
|
|
+ conf->subvolumes[i]->fops->statfs, loc, xdata);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+err:
|
|
+ op_errno = (op_errno == -1) ? errno : op_errno;
|
|
+ DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
diff --git a/xlators/cluster/dht/src/tier-common.h b/xlators/cluster/dht/src/tier-common.h
|
|
new file mode 100644
|
|
index 0000000..b1ebaa8
|
|
--- /dev/null
|
|
+++ b/xlators/cluster/dht/src/tier-common.h
|
|
@@ -0,0 +1,55 @@
|
|
+/*
|
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
|
|
+ This file is part of GlusterFS.
|
|
+
|
|
+ This file is licensed to you under your choice of the GNU Lesser
|
|
+ General Public License, version 3 or any later version (LGPLv3 or
|
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all
|
|
+ cases as published by the Free Software Foundation.
|
|
+*/
|
|
+
|
|
+#ifndef _TIER_COMMON_H_
|
|
+#define _TIER_COMMON_H_
|
|
+/* Function definitions */
|
|
+int
|
|
+tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
|
|
+ xlator_t *this, int op_ret, int op_errno,
|
|
+ struct iatt *preparent,
|
|
+ struct iatt *postparent, dict_t *xdata);
|
|
+
|
|
+int
|
|
+tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
|
|
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
|
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
|
|
+
|
|
+int
|
|
+tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
|
|
+ xlator_t *this, int32_t op_ret,
|
|
+ int32_t op_errno, inode_t *inode,
|
|
+ struct iatt *stbuf, struct iatt *preparent,
|
|
+ struct iatt *postparent, dict_t *xdata);
|
|
+
|
|
+int
|
|
+tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
|
|
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params);
|
|
+
|
|
+int32_t
|
|
+tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
|
|
+ dict_t *xdata);
|
|
+
|
|
+int32_t
|
|
+tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
|
|
+ off_t off, dict_t *dict);
|
|
+
|
|
+int
|
|
+tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
|
|
+ off_t yoff, dict_t *xdata);
|
|
+
|
|
+int
|
|
+tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
|
|
+ dict_t *xdata);
|
|
+
|
|
+int
|
|
+tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
|
|
+
|
|
+#endif
|
|
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
|
|
new file mode 100644
|
|
index 0000000..94b4c63
|
|
--- /dev/null
|
|
+++ b/xlators/cluster/dht/src/tier.c
|
|
@@ -0,0 +1,3105 @@
|
|
+/*
|
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
|
|
+ This file is part of GlusterFS.
|
|
+
|
|
+ This file is licensed to you under your choice of the GNU Lesser
|
|
+ General Public License, version 3 or any later version (LGPLv3 or
|
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all
|
|
+ cases as published by the Free Software Foundation.
|
|
+*/
|
|
+
|
|
+#include <dlfcn.h>
|
|
+
|
|
+#include "dht-common.h"
|
|
+#include "tier.h"
|
|
+#include "tier-common.h"
|
|
+#include <glusterfs/syscall.h>
|
|
+#include <glusterfs/events.h>
|
|
+#include "tier-ctr-interface.h"
|
|
+
|
|
+/*Hard coded DB info*/
|
|
+static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3;
|
|
+/*Hard coded DB info*/
|
|
+
|
|
+/*Mutex for updating the data movement stats*/
|
|
+static pthread_mutex_t dm_stat_mutex = PTHREAD_MUTEX_INITIALIZER;
|
|
+
|
|
+/* Stores the path location of promotion query files */
|
|
+static char *promotion_qfile;
|
|
+/* Stores the path location of demotion query files */
|
|
+static char *demotion_qfile;
|
|
+
|
|
+static void *libhandle;
|
|
+static gfdb_methods_t gfdb_methods;
|
|
+
|
|
+#define DB_QUERY_RECORD_SIZE 4096
|
|
+
|
|
+/*
|
|
+ * Closes all the fds and frees the qfile_array
|
|
+ * */
|
|
+static void
|
|
+qfile_array_free(tier_qfile_array_t *qfile_array)
|
|
+{
|
|
+ ssize_t i = 0;
|
|
+
|
|
+ if (qfile_array) {
|
|
+ if (qfile_array->fd_array) {
|
|
+ for (i = 0; i < qfile_array->array_size; i++) {
|
|
+ if (qfile_array->fd_array[i] != -1) {
|
|
+ sys_close(qfile_array->fd_array[i]);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ GF_FREE(qfile_array->fd_array);
|
|
+ }
|
|
+ GF_FREE(qfile_array);
|
|
+}
|
|
+
|
|
+/* Create a new query file list with given size */
|
|
+static tier_qfile_array_t *
|
|
+qfile_array_new(ssize_t array_size)
|
|
+{
|
|
+ int ret = -1;
|
|
+ tier_qfile_array_t *qfile_array = NULL;
|
|
+ ssize_t i = 0;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", (array_size > 0), out);
|
|
+
|
|
+ qfile_array = GF_CALLOC(1, sizeof(tier_qfile_array_t),
|
|
+ gf_tier_mt_qfile_array_t);
|
|
+ if (!qfile_array) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to allocate memory for tier_qfile_array_t");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ qfile_array->fd_array = GF_MALLOC(array_size * sizeof(int),
|
|
+ gf_dht_mt_int32_t);
|
|
+ if (!qfile_array->fd_array) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to allocate memory for "
|
|
+ "tier_qfile_array_t->fd_array");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Init all the fds to -1 */
|
|
+ for (i = 0; i < array_size; i++) {
|
|
+ qfile_array->fd_array[i] = -1;
|
|
+ }
|
|
+
|
|
+ qfile_array->array_size = array_size;
|
|
+ qfile_array->next_index = 0;
|
|
+
|
|
+ /* Set exhausted count to list size as the list is empty */
|
|
+ qfile_array->exhausted_count = qfile_array->array_size;
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+ if (ret) {
|
|
+ qfile_array_free(qfile_array);
|
|
+ qfile_array = NULL;
|
|
+ }
|
|
+ return qfile_array;
|
|
+}
|
|
+
|
|
+/* Checks if the query file list is empty or totally exhausted. */
|
|
+static gf_boolean_t
|
|
+is_qfile_array_empty(tier_qfile_array_t *qfile_array)
|
|
+{
|
|
+ return (qfile_array->exhausted_count == qfile_array->array_size)
|
|
+ ? _gf_true
|
|
+ : _gf_false;
|
|
+}
|
|
+
|
|
+/* Shifts the next_fd pointer to the next available fd in the list */
|
|
+static void
|
|
+shift_next_index(tier_qfile_array_t *qfile_array)
|
|
+{
|
|
+ int qfile_fd = 0;
|
|
+ int spin_count = 0;
|
|
+
|
|
+ if (is_qfile_array_empty(qfile_array)) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ do {
|
|
+ /* change next_index in a rotional manner */
|
|
+ (qfile_array->next_index == (qfile_array->array_size - 1))
|
|
+ ? qfile_array->next_index = 0
|
|
+ : qfile_array->next_index++;
|
|
+
|
|
+ qfile_fd = (qfile_array->fd_array[qfile_array->next_index]);
|
|
+
|
|
+ spin_count++;
|
|
+
|
|
+ } while ((qfile_fd == -1) && (spin_count < qfile_array->array_size));
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This is a non-thread safe function to read query records
|
|
+ * from a list of query files in a Round-Robin manner.
|
|
+ * As in when the query files get exhuasted they are closed.
|
|
+ * Returns:
|
|
+ * 0 if all the query records in all the query files of the list are
|
|
+ * exhausted.
|
|
+ * > 0 if a query record is successfully read. Indicates the size of the query
|
|
+ * record read.
|
|
+ * < 0 if there was failure
|
|
+ * */
|
|
+static int
|
|
+read_query_record_list(tier_qfile_array_t *qfile_array,
|
|
+ gfdb_query_record_t **query_record)
|
|
+{
|
|
+ int ret = -1;
|
|
+ int qfile_fd = 0;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", qfile_array, out);
|
|
+ GF_VALIDATE_OR_GOTO("tier", qfile_array->fd_array, out);
|
|
+
|
|
+ do {
|
|
+ if (is_qfile_array_empty(qfile_array)) {
|
|
+ ret = 0;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ qfile_fd = qfile_array->fd_array[qfile_array->next_index];
|
|
+ ret = gfdb_methods.gfdb_read_query_record(qfile_fd, query_record);
|
|
+ if (ret <= 0) {
|
|
+ /*The qfile_fd has reached EOF or
|
|
+ * there was an error.
|
|
+ * 1. Close the exhausted fd
|
|
+ * 2. increment the exhausted count
|
|
+ * 3. shift next_qfile to next qfile
|
|
+ **/
|
|
+ sys_close(qfile_fd);
|
|
+ qfile_array->fd_array[qfile_array->next_index] = -1;
|
|
+ qfile_array->exhausted_count++;
|
|
+ /* shift next_qfile to next qfile */
|
|
+ shift_next_index(qfile_array);
|
|
+ continue;
|
|
+ } else {
|
|
+ /* shift next_qfile to next qfile */
|
|
+ shift_next_index(qfile_array);
|
|
+ break;
|
|
+ }
|
|
+ } while (1);
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/* Check and update the watermark every WM_INTERVAL seconds */
|
|
+#define WM_INTERVAL 5
|
|
+#define WM_INTERVAL_EMERG 1
|
|
+
|
|
+static int
|
|
+tier_check_same_node(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
|
|
+{
|
|
+ int ret = -1;
|
|
+ dict_t *dict = NULL;
|
|
+ char *uuid_str = NULL;
|
|
+ uuid_t node_uuid = {
|
|
+ 0,
|
|
+ };
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", this, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, defrag, out);
|
|
+
|
|
+ if (syncop_getxattr(this, loc, &dict, GF_XATTR_NODE_UUID_KEY, NULL, NULL)) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Unable to get NODE_UUID_KEY %s %s\n", loc->name, loc->path);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (dict_get_str(dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to get node-uuids for %s", loc->path);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (gf_uuid_parse(uuid_str, node_uuid)) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "uuid_parse failed for %s", loc->path);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (gf_uuid_compare(node_uuid, defrag->node_uuid)) {
|
|
+ gf_msg_debug(this->name, 0, "%s does not belong to this node",
|
|
+ loc->path);
|
|
+ ret = 1;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+ if (dict)
|
|
+ dict_unref(dict);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_get_fs_stat(xlator_t *this, loc_t *root_loc)
|
|
+{
|
|
+ int ret = 0;
|
|
+ gf_defrag_info_t *defrag = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ dict_t *xdata = NULL;
|
|
+ struct statvfs statfs = {
|
|
+ 0,
|
|
+ };
|
|
+ gf_tier_conf_t *tier_conf = NULL;
|
|
+
|
|
+ conf = this->private;
|
|
+ if (!conf) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "conf is NULL");
|
|
+ ret = -1;
|
|
+ goto exit;
|
|
+ }
|
|
+
|
|
+ defrag = conf->defrag;
|
|
+ if (!defrag) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "defrag is NULL");
|
|
+ ret = -1;
|
|
+ goto exit;
|
|
+ }
|
|
+
|
|
+ tier_conf = &defrag->tier_conf;
|
|
+
|
|
+ xdata = dict_new();
|
|
+ if (!xdata) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
|
|
+ "failed to allocate dictionary");
|
|
+ ret = -1;
|
|
+ goto exit;
|
|
+ }
|
|
+
|
|
+ ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
|
|
+ "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict");
|
|
+ ret = -1;
|
|
+ goto exit;
|
|
+ }
|
|
+
|
|
+ /* Find how much free space is on the hot subvolume.
|
|
+ * Then see if that value */
|
|
+ /* is less than or greater than user defined watermarks.
|
|
+ * Stash results in */
|
|
+ /* the tier_conf data structure. */
|
|
+
|
|
+ ret = syncop_statfs(conf->subvolumes[1], root_loc, &statfs, xdata, NULL);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Unable to obtain statfs.");
|
|
+ goto exit;
|
|
+ }
|
|
+
|
|
+ pthread_mutex_lock(&dm_stat_mutex);
|
|
+
|
|
+ tier_conf->block_size = statfs.f_bsize;
|
|
+ tier_conf->blocks_total = statfs.f_blocks;
|
|
+ tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree;
|
|
+
|
|
+ tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used,
|
|
+ statfs.f_blocks);
|
|
+ pthread_mutex_unlock(&dm_stat_mutex);
|
|
+
|
|
+exit:
|
|
+ if (xdata)
|
|
+ dict_unref(xdata);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void
|
|
+tier_send_watermark_event(const char *volname, tier_watermark_op_t old_wm,
|
|
+ tier_watermark_op_t new_wm)
|
|
+{
|
|
+ if (old_wm == TIER_WM_LOW || old_wm == TIER_WM_NONE) {
|
|
+ if (new_wm == TIER_WM_MID) {
|
|
+ gf_event(EVENT_TIER_WATERMARK_RAISED_TO_MID, "vol=%s", volname);
|
|
+ } else if (new_wm == TIER_WM_HI) {
|
|
+ gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname);
|
|
+ }
|
|
+ } else if (old_wm == TIER_WM_MID) {
|
|
+ if (new_wm == TIER_WM_LOW) {
|
|
+ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname);
|
|
+ } else if (new_wm == TIER_WM_HI) {
|
|
+ gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname);
|
|
+ }
|
|
+ } else if (old_wm == TIER_WM_HI) {
|
|
+ if (new_wm == TIER_WM_MID) {
|
|
+ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_MID, "vol=%s", volname);
|
|
+ } else if (new_wm == TIER_WM_LOW) {
|
|
+ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+int
|
|
+tier_check_watermark(xlator_t *this)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_defrag_info_t *defrag = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ gf_tier_conf_t *tier_conf = NULL;
|
|
+ tier_watermark_op_t wm = TIER_WM_NONE;
|
|
+
|
|
+ conf = this->private;
|
|
+ if (!conf)
|
|
+ goto exit;
|
|
+
|
|
+ defrag = conf->defrag;
|
|
+ if (!defrag)
|
|
+ goto exit;
|
|
+
|
|
+ tier_conf = &defrag->tier_conf;
|
|
+
|
|
+ if (tier_conf->percent_full < tier_conf->watermark_low) {
|
|
+ wm = TIER_WM_LOW;
|
|
+
|
|
+ } else if (tier_conf->percent_full < tier_conf->watermark_hi) {
|
|
+ wm = TIER_WM_MID;
|
|
+
|
|
+ } else {
|
|
+ wm = TIER_WM_HI;
|
|
+ }
|
|
+
|
|
+ if (wm != tier_conf->watermark_last) {
|
|
+ tier_send_watermark_event(tier_conf->volname, tier_conf->watermark_last,
|
|
+ wm);
|
|
+
|
|
+ tier_conf->watermark_last = wm;
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Tier watermark now %d", wm);
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+exit:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static gf_boolean_t
|
|
+is_hot_tier_full(gf_tier_conf_t *tier_conf)
|
|
+{
|
|
+ if (tier_conf && (tier_conf->mode == TIER_MODE_WM) &&
|
|
+ (tier_conf->watermark_last == TIER_WM_HI))
|
|
+ return _gf_true;
|
|
+
|
|
+ return _gf_false;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_do_migration(xlator_t *this, int promote)
|
|
+{
|
|
+ gf_defrag_info_t *defrag = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ long rand = 0;
|
|
+ int migrate = 0;
|
|
+ gf_tier_conf_t *tier_conf = NULL;
|
|
+
|
|
+ conf = this->private;
|
|
+ if (!conf)
|
|
+ goto exit;
|
|
+
|
|
+ defrag = conf->defrag;
|
|
+ if (!defrag)
|
|
+ goto exit;
|
|
+
|
|
+ if (tier_check_watermark(this) != 0) {
|
|
+ gf_msg(this->name, GF_LOG_CRITICAL, errno, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to get watermark");
|
|
+ goto exit;
|
|
+ }
|
|
+
|
|
+ tier_conf = &defrag->tier_conf;
|
|
+
|
|
+ switch (tier_conf->watermark_last) {
|
|
+ case TIER_WM_LOW:
|
|
+ migrate = promote ? 1 : 0;
|
|
+ break;
|
|
+ case TIER_WM_HI:
|
|
+ migrate = promote ? 0 : 1;
|
|
+ break;
|
|
+ case TIER_WM_MID:
|
|
+ /* coverity[DC.WEAK_CRYPTO] */
|
|
+ rand = random() % 100;
|
|
+ if (promote) {
|
|
+ migrate = (rand > tier_conf->percent_full);
|
|
+ } else {
|
|
+ migrate = (rand <= tier_conf->percent_full);
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+
|
|
+exit:
|
|
+ return migrate;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_migrate(xlator_t *this, int is_promotion, dict_t *migrate_data, loc_t *loc,
|
|
+ gf_tier_conf_t *tier_conf)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ pthread_mutex_lock(&tier_conf->pause_mutex);
|
|
+ if (is_promotion)
|
|
+ tier_conf->promote_in_progress = 1;
|
|
+ else
|
|
+ tier_conf->demote_in_progress = 1;
|
|
+ pthread_mutex_unlock(&tier_conf->pause_mutex);
|
|
+
|
|
+ /* Data migration */
|
|
+ ret = syncop_setxattr(this, loc, migrate_data, 0, NULL, NULL);
|
|
+
|
|
+ pthread_mutex_lock(&tier_conf->pause_mutex);
|
|
+ if (is_promotion)
|
|
+ tier_conf->promote_in_progress = 0;
|
|
+ else
|
|
+ tier_conf->demote_in_progress = 0;
|
|
+ pthread_mutex_unlock(&tier_conf->pause_mutex);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/* returns _gf_true: if file can be promoted
|
|
+ * returns _gf_false: if file cannot be promoted
|
|
+ */
|
|
+static gf_boolean_t
|
|
+tier_can_promote_file(xlator_t *this, char const *file_name,
|
|
+ struct iatt *current, gf_defrag_info_t *defrag)
|
|
+{
|
|
+ gf_boolean_t ret = _gf_false;
|
|
+ fsblkcnt_t estimated_usage = 0;
|
|
+
|
|
+ if (defrag->tier_conf.tier_max_promote_size &&
|
|
+ (current->ia_size > defrag->tier_conf.tier_max_promote_size)) {
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "File %s (gfid:%s) with size (%" PRIu64
|
|
+ ") exceeds maxsize "
|
|
+ "(%d) for promotion. File will not be promoted.",
|
|
+ file_name, uuid_utoa(current->ia_gfid), current->ia_size,
|
|
+ defrag->tier_conf.tier_max_promote_size);
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ /* bypass further validations for TEST mode */
|
|
+ if (defrag->tier_conf.mode != TIER_MODE_WM) {
|
|
+ ret = _gf_true;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ /* convert the file size to blocks as per the block size of the
|
|
+ * destination tier
|
|
+ * NOTE: add (block_size - 1) to get the correct block size when
|
|
+ * there is a remainder after a modulo
|
|
+ */
|
|
+ estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) /
|
|
+ defrag->tier_conf.block_size) +
|
|
+ defrag->tier_conf.blocks_used;
|
|
+
|
|
+ /* test if the estimated block usage goes above HI watermark */
|
|
+ if (GF_PERCENTAGE(estimated_usage, defrag->tier_conf.blocks_total) >=
|
|
+ defrag->tier_conf.watermark_hi) {
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Estimated block count consumption on "
|
|
+ "hot tier (%" PRIu64
|
|
+ ") exceeds hi watermark (%d%%). "
|
|
+ "File will not be promoted.",
|
|
+ estimated_usage, defrag->tier_conf.watermark_hi);
|
|
+ goto err;
|
|
+ }
|
|
+ ret = _gf_true;
|
|
+err:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_set_migrate_data(dict_t *migrate_data)
|
|
+{
|
|
+ int failed = 1;
|
|
+
|
|
+ failed = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, "force");
|
|
+ if (failed) {
|
|
+ goto bail_out;
|
|
+ }
|
|
+
|
|
+ /* Flag to suggest the xattr call is from migrator */
|
|
+ failed = dict_set_str(migrate_data, "from.migrator", "yes");
|
|
+ if (failed) {
|
|
+ goto bail_out;
|
|
+ }
|
|
+
|
|
+ /* Flag to suggest its a tiering migration
|
|
+ * The reason for this dic key-value is that
|
|
+ * promotions and demotions are multithreaded
|
|
+ * so the original frame from gf_defrag_start()
|
|
+ * is not carried. A new frame will be created when
|
|
+ * we do syncop_setxattr(). This does not have the
|
|
+ * frame->root->pid of the original frame. So we pass
|
|
+ * this dic key-value when we do syncop_setxattr() to do
|
|
+ * data migration and set the frame->root->pid to
|
|
+ * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
|
|
+ * calling dht_start_rebalance_task() */
|
|
+ failed = dict_set_str(migrate_data, TIERING_MIGRATION_KEY, "yes");
|
|
+ if (failed) {
|
|
+ goto bail_out;
|
|
+ }
|
|
+
|
|
+ failed = 0;
|
|
+
|
|
+bail_out:
|
|
+ return failed;
|
|
+}
|
|
+
|
|
+static char *
|
|
+tier_get_parent_path(xlator_t *this, loc_t *p_loc, struct iatt *par_stbuf,
|
|
+ int *per_link_status)
|
|
+{
|
|
+ int ret = -1;
|
|
+ char *parent_path = NULL;
|
|
+ dict_t *xdata_request = NULL;
|
|
+ dict_t *xdata_response = NULL;
|
|
+
|
|
+ xdata_request = dict_new();
|
|
+ if (!xdata_request) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to create xdata_request dict");
|
|
+ goto err;
|
|
+ }
|
|
+ ret = dict_set_int32(xdata_request, GET_ANCESTRY_PATH_KEY, 42);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to set value to dict : key %s \n",
|
|
+ GET_ANCESTRY_PATH_KEY);
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ ret = syncop_lookup(this, p_loc, par_stbuf, NULL, xdata_request,
|
|
+ &xdata_response);
|
|
+ /* When the parent gfid is a stale entry, the lookup
|
|
+ * will fail and stop the demotion process.
|
|
+ * The parent gfid can be stale when a huge folder is
|
|
+ * deleted while the files within it are being migrated
|
|
+ */
|
|
+ if (ret == -ESTALE) {
|
|
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP,
|
|
+ "Stale entry in parent lookup for %s", uuid_utoa(p_loc->gfid));
|
|
+ *per_link_status = 1;
|
|
+ goto err;
|
|
+ } else if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Error in parent lookup for %s", uuid_utoa(p_loc->gfid));
|
|
+ *per_link_status = -1;
|
|
+ goto err;
|
|
+ }
|
|
+ ret = dict_get_str(xdata_response, GET_ANCESTRY_PATH_KEY, &parent_path);
|
|
+ if (ret || !parent_path) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to get parent path for %s", uuid_utoa(p_loc->gfid));
|
|
+ *per_link_status = -1;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+err:
|
|
+ if (xdata_request) {
|
|
+ dict_unref(xdata_request);
|
|
+ }
|
|
+
|
|
+ if (xdata_response) {
|
|
+ dict_unref(xdata_response);
|
|
+ xdata_response = NULL;
|
|
+ }
|
|
+
|
|
+ return parent_path;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_get_file_name_and_path(xlator_t *this, uuid_t gfid,
|
|
+ gfdb_link_info_t *link_info,
|
|
+ char const *parent_path, loc_t *loc,
|
|
+ int *per_link_status)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ loc->name = gf_strdup(link_info->file_name);
|
|
+ if (!loc->name) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Memory "
|
|
+ "allocation failed for %s",
|
|
+ uuid_utoa(gfid));
|
|
+ *per_link_status = -1;
|
|
+ goto err;
|
|
+ }
|
|
+ ret = gf_asprintf((char **)&(loc->path), "%s/%s", parent_path, loc->name);
|
|
+ if (ret < 0) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to "
|
|
+ "construct file path for %s %s\n",
|
|
+ parent_path, loc->name);
|
|
+ *per_link_status = -1;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+err:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_lookup_file(xlator_t *this, loc_t *p_loc, loc_t *loc, struct iatt *current,
|
|
+ int *per_link_status)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ ret = syncop_lookup(this, loc, current, NULL, NULL, NULL);
|
|
+
|
|
+ /* The file may be deleted even when the parent
|
|
+ * is available and the lookup will
|
|
+ * return a stale entry which would stop the
|
|
+ * migration. so if its a stale entry, then skip
|
|
+ * the file and keep migrating.
|
|
+ */
|
|
+ if (ret == -ESTALE) {
|
|
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP,
|
|
+ "Stale lookup for %s", uuid_utoa(p_loc->gfid));
|
|
+ *per_link_status = 1;
|
|
+ goto err;
|
|
+ } else if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to "
|
|
+ "lookup file %s\n",
|
|
+ loc->name);
|
|
+ *per_link_status = -1;
|
|
+ goto err;
|
|
+ }
|
|
+ ret = 0;
|
|
+
|
|
+err:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static gf_boolean_t
|
|
+tier_is_file_already_at_destination(xlator_t *src_subvol,
|
|
+ query_cbk_args_t *query_cbk_args,
|
|
+ dht_conf_t *conf, int *per_link_status)
|
|
+{
|
|
+ gf_boolean_t at_destination = _gf_true;
|
|
+
|
|
+ if (src_subvol == NULL) {
|
|
+ *per_link_status = 1;
|
|
+ goto err;
|
|
+ }
|
|
+ if (query_cbk_args->is_promotion && src_subvol == conf->subvolumes[1]) {
|
|
+ *per_link_status = 1;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ if (!query_cbk_args->is_promotion && src_subvol == conf->subvolumes[0]) {
|
|
+ *per_link_status = 1;
|
|
+ goto err;
|
|
+ }
|
|
+ at_destination = _gf_false;
|
|
+
|
|
+err:
|
|
+ return at_destination;
|
|
+}
|
|
+
|
|
+static void
|
|
+tier_update_migration_counters(query_cbk_args_t *query_cbk_args,
|
|
+ gf_defrag_info_t *defrag,
|
|
+ uint64_t *total_migrated_bytes, int *total_files)
|
|
+{
|
|
+ if (query_cbk_args->is_promotion) {
|
|
+ defrag->total_files_promoted++;
|
|
+ *total_migrated_bytes += defrag->tier_conf.st_last_promoted_size;
|
|
+ pthread_mutex_lock(&dm_stat_mutex);
|
|
+ defrag->tier_conf.blocks_used += defrag->tier_conf
|
|
+ .st_last_promoted_size;
|
|
+ pthread_mutex_unlock(&dm_stat_mutex);
|
|
+ } else {
|
|
+ defrag->total_files_demoted++;
|
|
+ *total_migrated_bytes += defrag->tier_conf.st_last_demoted_size;
|
|
+ pthread_mutex_lock(&dm_stat_mutex);
|
|
+ defrag->tier_conf.blocks_used -= defrag->tier_conf.st_last_demoted_size;
|
|
+ pthread_mutex_unlock(&dm_stat_mutex);
|
|
+ }
|
|
+ if (defrag->tier_conf.blocks_total) {
|
|
+ pthread_mutex_lock(&dm_stat_mutex);
|
|
+ defrag->tier_conf.percent_full = GF_PERCENTAGE(
|
|
+ defrag->tier_conf.blocks_used, defrag->tier_conf.blocks_total);
|
|
+ pthread_mutex_unlock(&dm_stat_mutex);
|
|
+ }
|
|
+
|
|
+ (*total_files)++;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_migrate_link(xlator_t *this, dht_conf_t *conf, uuid_t gfid,
|
|
+ gfdb_link_info_t *link_info, gf_defrag_info_t *defrag,
|
|
+ query_cbk_args_t *query_cbk_args, dict_t *migrate_data,
|
|
+ int *per_link_status, int *total_files,
|
|
+ uint64_t *total_migrated_bytes)
|
|
+{
|
|
+ int ret = -1;
|
|
+ struct iatt current = {
|
|
+ 0,
|
|
+ };
|
|
+ struct iatt par_stbuf = {
|
|
+ 0,
|
|
+ };
|
|
+ loc_t p_loc = {
|
|
+ 0,
|
|
+ };
|
|
+ loc_t loc = {
|
|
+ 0,
|
|
+ };
|
|
+ xlator_t *src_subvol = NULL;
|
|
+ inode_t *linked_inode = NULL;
|
|
+ char *parent_path = NULL;
|
|
+
|
|
+ /* Lookup for parent and get the path of parent */
|
|
+ gf_uuid_copy(p_loc.gfid, link_info->pargfid);
|
|
+ p_loc.inode = inode_new(defrag->root_inode->table);
|
|
+ if (!p_loc.inode) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to create reference to inode"
|
|
+ " for %s",
|
|
+ uuid_utoa(p_loc.gfid));
|
|
+
|
|
+ *per_link_status = -1;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ parent_path = tier_get_parent_path(this, &p_loc, &par_stbuf,
|
|
+ per_link_status);
|
|
+ if (!parent_path) {
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ linked_inode = inode_link(p_loc.inode, NULL, NULL, &par_stbuf);
|
|
+ inode_unref(p_loc.inode);
|
|
+ p_loc.inode = linked_inode;
|
|
+
|
|
+ /* Preparing File Inode */
|
|
+ gf_uuid_copy(loc.gfid, gfid);
|
|
+ loc.inode = inode_new(defrag->root_inode->table);
|
|
+ gf_uuid_copy(loc.pargfid, link_info->pargfid);
|
|
+ loc.parent = inode_ref(p_loc.inode);
|
|
+
|
|
+ /* Get filename and Construct file path */
|
|
+ if (tier_get_file_name_and_path(this, gfid, link_info, parent_path, &loc,
|
|
+ per_link_status) != 0) {
|
|
+ goto err;
|
|
+ }
|
|
+ gf_uuid_copy(loc.parent->gfid, link_info->pargfid);
|
|
+
|
|
+ /* lookup file inode */
|
|
+ if (tier_lookup_file(this, &p_loc, &loc, ¤t, per_link_status) != 0) {
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ if (query_cbk_args->is_promotion) {
|
|
+ if (!tier_can_promote_file(this, link_info->file_name, ¤t,
|
|
+ defrag)) {
|
|
+ *per_link_status = 1;
|
|
+ goto err;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ linked_inode = inode_link(loc.inode, NULL, NULL, ¤t);
|
|
+ inode_unref(loc.inode);
|
|
+ loc.inode = linked_inode;
|
|
+
|
|
+ /*
|
|
+ * Do not promote/demote if file already is where it
|
|
+ * should be. It means another brick moved the file
|
|
+ * so is not an error. So we set per_link_status = 1
|
|
+ * so that we ignore counting this.
|
|
+ */
|
|
+ src_subvol = dht_subvol_get_cached(this, loc.inode);
|
|
+
|
|
+ if (tier_is_file_already_at_destination(src_subvol, query_cbk_args, conf,
|
|
+ per_link_status)) {
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ gf_msg_debug(this->name, 0, "Tier %s: src_subvol %s file %s",
|
|
+ (query_cbk_args->is_promotion ? "promote" : "demote"),
|
|
+ src_subvol->name, loc.path);
|
|
+
|
|
+ ret = tier_check_same_node(this, &loc, defrag);
|
|
+ if (ret != 0) {
|
|
+ if (ret < 0) {
|
|
+ *per_link_status = -1;
|
|
+ goto err;
|
|
+ }
|
|
+ ret = 0;
|
|
+ /* By setting per_link_status to 1 we are
|
|
+ * ignoring this status and will not be counting
|
|
+ * this file for migration */
|
|
+ *per_link_status = 1;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ gf_uuid_copy(loc.gfid, loc.inode->gfid);
|
|
+
|
|
+ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) {
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Tiering paused. "
|
|
+ "Exiting tier_migrate_link");
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ ret = tier_migrate(this, query_cbk_args->is_promotion, migrate_data, &loc,
|
|
+ &defrag->tier_conf);
|
|
+
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to "
|
|
+ "migrate %s ",
|
|
+ loc.path);
|
|
+ *per_link_status = -1;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ tier_update_migration_counters(query_cbk_args, defrag, total_migrated_bytes,
|
|
+ total_files);
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+err:
|
|
+ GF_FREE((char *)loc.name);
|
|
+ loc.name = NULL;
|
|
+ loc_wipe(&loc);
|
|
+ loc_wipe(&p_loc);
|
|
+
|
|
+ if ((*total_files >= defrag->tier_conf.max_migrate_files) ||
|
|
+ (*total_migrated_bytes > defrag->tier_conf.max_migrate_bytes)) {
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Reached cycle migration limit."
|
|
+ "migrated bytes %" PRId64 " files %d",
|
|
+ *total_migrated_bytes, *total_files);
|
|
+ ret = -1;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_migrate_using_query_file(void *_args)
|
|
+{
|
|
+ int ret = -1;
|
|
+ query_cbk_args_t *query_cbk_args = (query_cbk_args_t *)_args;
|
|
+ xlator_t *this = NULL;
|
|
+ gf_defrag_info_t *defrag = NULL;
|
|
+ gfdb_query_record_t *query_record = NULL;
|
|
+ gfdb_link_info_t *link_info = NULL;
|
|
+ dict_t *migrate_data = NULL;
|
|
+ /*
|
|
+ * per_file_status and per_link_status
|
|
+ * 0 : success
|
|
+ * -1 : failure
|
|
+ * 1 : ignore the status and don't count for migration
|
|
+ * */
|
|
+ int per_file_status = 0;
|
|
+ int per_link_status = 0;
|
|
+ int total_status = 0;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ uint64_t total_migrated_bytes = 0;
|
|
+ int total_files = 0;
|
|
+ loc_t root_loc = {0};
|
|
+ gfdb_time_t start_time = {0};
|
|
+ gfdb_time_t current_time = {0};
|
|
+ int total_time = 0;
|
|
+ int max_time = 0;
|
|
+ gf_boolean_t emergency_demote_mode = _gf_false;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out);
|
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
|
|
+ this = query_cbk_args->this;
|
|
+ GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->defrag, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->qfile_array, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ defrag = query_cbk_args->defrag;
|
|
+ migrate_data = dict_new();
|
|
+ if (!migrate_data)
|
|
+ goto out;
|
|
+
|
|
+ emergency_demote_mode = (!query_cbk_args->is_promotion &&
|
|
+ is_hot_tier_full(&defrag->tier_conf));
|
|
+
|
|
+ if (tier_set_migrate_data(migrate_data) != 0) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dht_build_root_loc(defrag->root_inode, &root_loc);
|
|
+
|
|
+ ret = gettimeofday(&start_time, NULL);
|
|
+ if (query_cbk_args->is_promotion) {
|
|
+ max_time = defrag->tier_conf.tier_promote_frequency;
|
|
+ } else {
|
|
+ max_time = defrag->tier_conf.tier_demote_frequency;
|
|
+ }
|
|
+
|
|
+ /* Per file */
|
|
+ while ((ret = read_query_record_list(query_cbk_args->qfile_array,
|
|
+ &query_record)) != 0) {
|
|
+ if (ret < 0) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to fetch query record "
|
|
+ "from query file");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
|
|
+ ret = -1;
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Exiting tier migration as"
|
|
+ "defrag status is not started");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = gettimeofday(¤t_time, NULL);
|
|
+ if (ret < 0) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Could not get current time.");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ total_time = current_time.tv_sec - start_time.tv_sec;
|
|
+ if (total_time > max_time) {
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Max cycle time reached. Exiting migration.");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ per_file_status = 0;
|
|
+ per_link_status = 0;
|
|
+
|
|
+ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) {
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Tiering paused. "
|
|
+ "Exiting tier_migrate_using_query_file");
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (defrag->tier_conf.mode == TIER_MODE_WM) {
|
|
+ ret = tier_get_fs_stat(this, &root_loc);
|
|
+ if (ret != 0) {
|
|
+ gfdb_methods.gfdb_query_record_free(query_record);
|
|
+ query_record = NULL;
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "tier_get_fs_stat() FAILED ... "
|
|
+ "skipping file migrations until next cycle");
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (!tier_do_migration(this, query_cbk_args->is_promotion)) {
|
|
+ gfdb_methods.gfdb_query_record_free(query_record);
|
|
+ query_record = NULL;
|
|
+
|
|
+ /* We have crossed the high watermark. Stop processing
|
|
+ * files if this is a promotion cycle so demotion gets
|
|
+ * a chance to start if not already running*/
|
|
+
|
|
+ if (query_cbk_args->is_promotion &&
|
|
+ is_hot_tier_full(&defrag->tier_conf)) {
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "High watermark crossed during "
|
|
+ "promotion. Exiting "
|
|
+ "tier_migrate_using_query_file");
|
|
+ break;
|
|
+ }
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ per_link_status = 0;
|
|
+
|
|
+ /* For now we only support single link migration. And we will
|
|
+ * ignore other hard links in the link info list of query record
|
|
+ * TODO: Multiple hard links migration */
|
|
+ if (!list_empty(&query_record->link_list)) {
|
|
+ link_info = list_first_entry(&query_record->link_list,
|
|
+ gfdb_link_info_t, list);
|
|
+ }
|
|
+ if (link_info != NULL) {
|
|
+ if (tier_migrate_link(this, conf, query_record->gfid, link_info,
|
|
+ defrag, query_cbk_args, migrate_data,
|
|
+ &per_link_status, &total_files,
|
|
+ &total_migrated_bytes) != 0) {
|
|
+ gf_msg(
|
|
+ this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "%s failed for %s(gfid:%s)",
|
|
+ (query_cbk_args->is_promotion ? "Promotion" : "Demotion"),
|
|
+ link_info->file_name, uuid_utoa(query_record->gfid));
|
|
+ }
|
|
+ }
|
|
+ per_file_status = per_link_status;
|
|
+
|
|
+ if (per_file_status < 0) { /* Failure */
|
|
+ pthread_mutex_lock(&dm_stat_mutex);
|
|
+ defrag->total_failures++;
|
|
+ pthread_mutex_unlock(&dm_stat_mutex);
|
|
+ } else if (per_file_status == 0) { /* Success */
|
|
+ pthread_mutex_lock(&dm_stat_mutex);
|
|
+ defrag->total_files++;
|
|
+ pthread_mutex_unlock(&dm_stat_mutex);
|
|
+ } else if (per_file_status == 1) { /* Ignore */
|
|
+ per_file_status = 0;
|
|
+ /* Since this attempt was ignored we
|
|
+ * decrement the lookup count*/
|
|
+ pthread_mutex_lock(&dm_stat_mutex);
|
|
+ defrag->num_files_lookedup--;
|
|
+ pthread_mutex_unlock(&dm_stat_mutex);
|
|
+ }
|
|
+ total_status = total_status + per_file_status;
|
|
+ per_link_status = 0;
|
|
+ per_file_status = 0;
|
|
+
|
|
+ gfdb_methods.gfdb_query_record_free(query_record);
|
|
+ query_record = NULL;
|
|
+
|
|
+ /* If we are demoting and the entry watermark was HI, then
|
|
+ * we are done with emergency demotions if the current
|
|
+ * watermark has fallen below hi-watermark level
|
|
+ */
|
|
+ if (emergency_demote_mode) {
|
|
+ if (tier_check_watermark(this) == 0) {
|
|
+ if (!is_hot_tier_full(&defrag->tier_conf)) {
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+out:
|
|
+ if (migrate_data)
|
|
+ dict_unref(migrate_data);
|
|
+
|
|
+ gfdb_methods.gfdb_query_record_free(query_record);
|
|
+ query_record = NULL;
|
|
+
|
|
+ return total_status;
|
|
+}
|
|
+
|
|
+/* This is the call back function per record/file from data base */
|
|
+static int
|
|
+tier_gf_query_callback(gfdb_query_record_t *gfdb_query_record, void *_args)
|
|
+{
|
|
+ int ret = -1;
|
|
+ query_cbk_args_t *query_cbk_args = _args;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out);
|
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->defrag, out);
|
|
+ GF_VALIDATE_OR_GOTO("tier", (query_cbk_args->query_fd > 0), out);
|
|
+
|
|
+ ret = gfdb_methods.gfdb_write_query_record(query_cbk_args->query_fd,
|
|
+ gfdb_query_record);
|
|
+ if (ret) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed writing query record to query file");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ pthread_mutex_lock(&dm_stat_mutex);
|
|
+ query_cbk_args->defrag->num_files_lookedup++;
|
|
+ pthread_mutex_unlock(&dm_stat_mutex);
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/* Create query file in tier process */
|
|
+static int
|
|
+tier_process_self_query(tier_brick_list_t *local_brick, void *args)
|
|
+{
|
|
+ int ret = -1;
|
|
+ char *db_path = NULL;
|
|
+ query_cbk_args_t *query_cbk_args = NULL;
|
|
+ xlator_t *this = NULL;
|
|
+ gfdb_conn_node_t *conn_node = NULL;
|
|
+ dict_t *params_dict = NULL;
|
|
+ dict_t *ctr_ipc_dict = NULL;
|
|
+ gfdb_brick_info_t *gfdb_brick_info = args;
|
|
+
|
|
+ /*Init of all the essentials*/
|
|
+ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
|
|
+ query_cbk_args = gfdb_brick_info->_query_cbk_args;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
|
|
+ this = query_cbk_args->this;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
|
|
+
|
|
+ db_path = local_brick->brick_db_path;
|
|
+
|
|
+ /*Preparing DB parameters before init_db i.e getting db connection*/
|
|
+ params_dict = dict_new();
|
|
+ if (!params_dict) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "DB Params cannot initialized");
|
|
+ goto out;
|
|
+ }
|
|
+ SET_DB_PARAM_TO_DICT(this->name, params_dict,
|
|
+ (char *)gfdb_methods.get_db_path_key(), db_path, ret,
|
|
+ out);
|
|
+
|
|
+ /*Get the db connection*/
|
|
+ conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type);
|
|
+ if (!conn_node) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "FATAL: Failed initializing db operations");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Query for eligible files from db */
|
|
+ query_cbk_args->query_fd = open(local_brick->qfile_path,
|
|
+ O_WRONLY | O_CREAT | O_APPEND,
|
|
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
|
|
+ if (query_cbk_args->query_fd < 0) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to open query file %s", local_brick->qfile_path);
|
|
+ goto out;
|
|
+ }
|
|
+ if (!gfdb_brick_info->_gfdb_promote) {
|
|
+ if (query_cbk_args->defrag->tier_conf.watermark_last == TIER_WM_HI) {
|
|
+ /* emergency demotion mode */
|
|
+ ret = gfdb_methods.find_all(
|
|
+ conn_node, tier_gf_query_callback, (void *)query_cbk_args,
|
|
+ query_cbk_args->defrag->tier_conf.query_limit);
|
|
+ } else {
|
|
+ if (query_cbk_args->defrag->write_freq_threshold == 0 &&
|
|
+ query_cbk_args->defrag->read_freq_threshold == 0) {
|
|
+ ret = gfdb_methods.find_unchanged_for_time(
|
|
+ conn_node, tier_gf_query_callback, (void *)query_cbk_args,
|
|
+ gfdb_brick_info->time_stamp);
|
|
+ } else {
|
|
+ ret = gfdb_methods.find_unchanged_for_time_freq(
|
|
+ conn_node, tier_gf_query_callback, (void *)query_cbk_args,
|
|
+ gfdb_brick_info->time_stamp,
|
|
+ query_cbk_args->defrag->write_freq_threshold,
|
|
+ query_cbk_args->defrag->read_freq_threshold, _gf_false);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ if (query_cbk_args->defrag->write_freq_threshold == 0 &&
|
|
+ query_cbk_args->defrag->read_freq_threshold == 0) {
|
|
+ ret = gfdb_methods.find_recently_changed_files(
|
|
+ conn_node, tier_gf_query_callback, (void *)query_cbk_args,
|
|
+ gfdb_brick_info->time_stamp);
|
|
+ } else {
|
|
+ ret = gfdb_methods.find_recently_changed_files_freq(
|
|
+ conn_node, tier_gf_query_callback, (void *)query_cbk_args,
|
|
+ gfdb_brick_info->time_stamp,
|
|
+ query_cbk_args->defrag->write_freq_threshold,
|
|
+ query_cbk_args->defrag->read_freq_threshold, _gf_false);
|
|
+ }
|
|
+ }
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "FATAL: query from db failed");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /*Clear the heat on the DB entries*/
|
|
+ /*Preparing ctr_ipc_dict*/
|
|
+ ctr_ipc_dict = dict_new();
|
|
+ if (!ctr_ipc_dict) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "ctr_ipc_dict cannot initialized");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY,
|
|
+ GFDB_IPC_CTR_CLEAR_OPS, ret, out);
|
|
+
|
|
+ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
|
|
+ NULL);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed clearing the heat "
|
|
+ "on db %s error %d",
|
|
+ local_brick->brick_db_path, ret);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+ if (params_dict) {
|
|
+ dict_unref(params_dict);
|
|
+ params_dict = NULL;
|
|
+ }
|
|
+
|
|
+ if (ctr_ipc_dict) {
|
|
+ dict_unref(ctr_ipc_dict);
|
|
+ ctr_ipc_dict = NULL;
|
|
+ }
|
|
+
|
|
+ if (query_cbk_args && query_cbk_args->query_fd >= 0) {
|
|
+ sys_close(query_cbk_args->query_fd);
|
|
+ query_cbk_args->query_fd = -1;
|
|
+ }
|
|
+ gfdb_methods.fini_db(conn_node);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*Ask CTR to create the query file*/
|
|
+static int
|
|
+tier_process_ctr_query(tier_brick_list_t *local_brick, void *args)
|
|
+{
|
|
+ int ret = -1;
|
|
+ query_cbk_args_t *query_cbk_args = NULL;
|
|
+ xlator_t *this = NULL;
|
|
+ dict_t *ctr_ipc_in_dict = NULL;
|
|
+ dict_t *ctr_ipc_out_dict = NULL;
|
|
+ gfdb_brick_info_t *gfdb_brick_info = args;
|
|
+ gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL;
|
|
+ int count = 0;
|
|
+
|
|
+ /*Init of all the essentials*/
|
|
+ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
|
|
+ query_cbk_args = gfdb_brick_info->_query_cbk_args;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
|
|
+ this = query_cbk_args->this;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
|
|
+
|
|
+ /*Preparing ctr_ipc_in_dict*/
|
|
+ ctr_ipc_in_dict = dict_new();
|
|
+ if (!ctr_ipc_in_dict) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "ctr_ipc_in_dict cannot initialized");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ipc_ctr_params = GF_CALLOC(1, sizeof(gfdb_ipc_ctr_params_t),
|
|
+ gf_tier_mt_ipc_ctr_params_t);
|
|
+ if (!ipc_ctr_params) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* set all the query params*/
|
|
+ ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote;
|
|
+
|
|
+ ipc_ctr_params->write_freq_threshold = query_cbk_args->defrag
|
|
+ ->write_freq_threshold;
|
|
+
|
|
+ ipc_ctr_params->read_freq_threshold = query_cbk_args->defrag
|
|
+ ->read_freq_threshold;
|
|
+
|
|
+ ipc_ctr_params->query_limit = query_cbk_args->defrag->tier_conf.query_limit;
|
|
+
|
|
+ ipc_ctr_params->emergency_demote = (!gfdb_brick_info->_gfdb_promote &&
|
|
+ query_cbk_args->defrag->tier_conf
|
|
+ .watermark_last == TIER_WM_HI);
|
|
+
|
|
+ memcpy(&ipc_ctr_params->time_stamp, gfdb_brick_info->time_stamp,
|
|
+ sizeof(gfdb_time_t));
|
|
+
|
|
+ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, GFDB_IPC_CTR_KEY,
|
|
+ GFDB_IPC_CTR_QUERY_OPS, ret, out);
|
|
+
|
|
+ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict,
|
|
+ GFDB_IPC_CTR_GET_QFILE_PATH, local_brick->qfile_path,
|
|
+ ret, out);
|
|
+
|
|
+ ret = dict_set_bin(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,
|
|
+ ipc_ctr_params, sizeof(*ipc_ctr_params));
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
|
|
+ "Failed setting %s to params dictionary",
|
|
+ GFDB_IPC_CTR_GET_QUERY_PARAMS);
|
|
+ GF_FREE(ipc_ctr_params);
|
|
+ goto out;
|
|
+ }
|
|
+ ipc_ctr_params = NULL;
|
|
+
|
|
+ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_in_dict,
|
|
+ &ctr_ipc_out_dict);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_IPC_TIER_ERROR,
|
|
+ "Failed query on %s ret %d", local_brick->brick_db_path, ret);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = dict_get_int32(ctr_ipc_out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT,
|
|
+ &count);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed getting count "
|
|
+ "of records on %s",
|
|
+ local_brick->brick_db_path);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (count < 0) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed query on %s", local_brick->brick_db_path);
|
|
+ ret = -1;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ pthread_mutex_lock(&dm_stat_mutex);
|
|
+ query_cbk_args->defrag->num_files_lookedup = count;
|
|
+ pthread_mutex_unlock(&dm_stat_mutex);
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+
|
|
+ if (ctr_ipc_in_dict) {
|
|
+ dict_unref(ctr_ipc_in_dict);
|
|
+ ctr_ipc_in_dict = NULL;
|
|
+ }
|
|
+
|
|
+ if (ctr_ipc_out_dict) {
|
|
+ dict_unref(ctr_ipc_out_dict);
|
|
+ ctr_ipc_out_dict = NULL;
|
|
+ }
|
|
+
|
|
+ GF_FREE(ipc_ctr_params);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/* This is the call back function for each brick from hot/cold bricklist
|
|
+ * It picks up each bricks db and queries for eligible files for migration.
|
|
+ * The list of eligible files are populated in appropriate query files*/
|
|
+static int
|
|
+tier_process_brick(tier_brick_list_t *local_brick, void *args)
|
|
+{
|
|
+ int ret = -1;
|
|
+ dict_t *ctr_ipc_in_dict = NULL;
|
|
+ dict_t *ctr_ipc_out_dict = NULL;
|
|
+ char *strval = NULL;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", local_brick, out);
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out);
|
|
+
|
|
+ if (dht_tier_db_type == GFDB_SQLITE3) {
|
|
+ /*Preparing ctr_ipc_in_dict*/
|
|
+ ctr_ipc_in_dict = dict_new();
|
|
+ if (!ctr_ipc_in_dict) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "ctr_ipc_in_dict cannot initialized");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_KEY,
|
|
+ GFDB_IPC_CTR_GET_DB_PARAM_OPS);
|
|
+ if (ret) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
|
|
+ "Failed to set %s "
|
|
+ "to params dictionary",
|
|
+ GFDB_IPC_CTR_KEY);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_PARAM_OPS, "");
|
|
+ if (ret) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
|
|
+ "Failed to set %s "
|
|
+ "to params dictionary",
|
|
+ GFDB_IPC_CTR_GET_DB_PARAM_OPS);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_KEY,
|
|
+ "journal_mode");
|
|
+ if (ret) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
|
|
+ "Failed to set %s "
|
|
+ "to params dictionary",
|
|
+ GFDB_IPC_CTR_GET_DB_KEY);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR,
|
|
+ ctr_ipc_in_dict, &ctr_ipc_out_dict);
|
|
+ if (ret || ctr_ipc_out_dict == NULL) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to get "
|
|
+ "journal_mode of sql db %s",
|
|
+ local_brick->brick_db_path);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = dict_get_str(ctr_ipc_out_dict, "journal_mode", &strval);
|
|
+ if (ret) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_GET_PARAM_FAILED,
|
|
+ "Failed to get %s "
|
|
+ "from params dictionary"
|
|
+ "journal_mode",
|
|
+ strval);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (strval && (strncmp(strval, "wal", SLEN("wal")) == 0)) {
|
|
+ ret = tier_process_self_query(local_brick, args);
|
|
+ if (ret) {
|
|
+ goto out;
|
|
+ }
|
|
+ } else {
|
|
+ ret = tier_process_ctr_query(local_brick, args);
|
|
+ if (ret) {
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+ ret = 0;
|
|
+
|
|
+ } else {
|
|
+ ret = tier_process_self_query(local_brick, args);
|
|
+ if (ret) {
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+ if (ctr_ipc_in_dict)
|
|
+ dict_unref(ctr_ipc_in_dict);
|
|
+
|
|
+ if (ctr_ipc_out_dict)
|
|
+ dict_unref(ctr_ipc_out_dict);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_build_migration_qfile(migration_args_t *args,
|
|
+ query_cbk_args_t *query_cbk_args,
|
|
+ gf_boolean_t is_promotion)
|
|
+{
|
|
+ gfdb_time_t current_time;
|
|
+ gfdb_brick_info_t gfdb_brick_info;
|
|
+ gfdb_time_t time_in_past;
|
|
+ int ret = -1;
|
|
+ tier_brick_list_t *local_brick = NULL;
|
|
+ int i = 0;
|
|
+ time_in_past.tv_sec = args->freq_time;
|
|
+ time_in_past.tv_usec = 0;
|
|
+
|
|
+ ret = gettimeofday(¤t_time, NULL);
|
|
+ if (ret == -1) {
|
|
+ gf_msg(args->this->name, GF_LOG_ERROR, errno,
|
|
+ DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time");
|
|
+ goto out;
|
|
+ }
|
|
+ time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
|
|
+
|
|
+ /* The migration daemon may run a varying numberof usec after the */
|
|
+ /* sleep call triggers. A file may be registered in CTR some number */
|
|
+ /* of usec X after the daemon started and missed in the subsequent */
|
|
+ /* cycle if the daemon starts Y usec after the period in seconds */
|
|
+ /* where Y>X. Normalize away this problem by always setting usec */
|
|
+ /* to 0. */
|
|
+ time_in_past.tv_usec = 0;
|
|
+
|
|
+ gfdb_brick_info.time_stamp = &time_in_past;
|
|
+ gfdb_brick_info._gfdb_promote = is_promotion;
|
|
+ gfdb_brick_info._query_cbk_args = query_cbk_args;
|
|
+
|
|
+ list_for_each_entry(local_brick, args->brick_list, list)
|
|
+ {
|
|
+ /* Construct query file path for this brick
|
|
+ * i.e
|
|
+ * /var/run/gluster/xlator_name/
|
|
+ * {promote/demote}-brickname-indexinbricklist
|
|
+ * So that no two query files will have same path even
|
|
+ * bricks have the same name
|
|
+ * */
|
|
+ snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d",
|
|
+ GET_QFILE_PATH(gfdb_brick_info._gfdb_promote),
|
|
+ local_brick->brick_name, i);
|
|
+
|
|
+ /* Delete any old query files for this brick */
|
|
+ sys_unlink(local_brick->qfile_path);
|
|
+
|
|
+ ret = tier_process_brick(local_brick, &gfdb_brick_info);
|
|
+ if (ret) {
|
|
+ gf_msg(args->this->name, GF_LOG_ERROR, 0,
|
|
+ DHT_MSG_BRICK_QUERY_FAILED, "Brick %s query failed\n",
|
|
+ local_brick->brick_db_path);
|
|
+ }
|
|
+ i++;
|
|
+ }
|
|
+ ret = 0;
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_migrate_files_using_qfile(migration_args_t *comp,
|
|
+ query_cbk_args_t *query_cbk_args)
|
|
+{
|
|
+ int ret = -1;
|
|
+ tier_brick_list_t *local_brick = NULL;
|
|
+ tier_brick_list_t *temp = NULL;
|
|
+ gfdb_time_t current_time = {
|
|
+ 0,
|
|
+ };
|
|
+ ssize_t qfile_array_size = 0;
|
|
+ int count = 0;
|
|
+ int temp_fd = 0;
|
|
+ gf_tier_conf_t *tier_conf = NULL;
|
|
+
|
|
+ tier_conf = &(query_cbk_args->defrag->tier_conf);
|
|
+
|
|
+ /* Time for error query files */
|
|
+ gettimeofday(¤t_time, NULL);
|
|
+
|
|
+ /* Build the qfile list */
|
|
+ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
|
|
+ {
|
|
+ qfile_array_size++;
|
|
+ }
|
|
+ query_cbk_args->qfile_array = qfile_array_new(qfile_array_size);
|
|
+ if (!query_cbk_args->qfile_array) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to create new "
|
|
+ "qfile_array");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /*Open all qfiles*/
|
|
+ count = 0;
|
|
+ query_cbk_args->qfile_array->exhausted_count = 0;
|
|
+ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
|
|
+ {
|
|
+ temp_fd = query_cbk_args->qfile_array->fd_array[count];
|
|
+ temp_fd = open(local_brick->qfile_path, O_RDONLY,
|
|
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
|
|
+ if (temp_fd < 0) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to open "
|
|
+ "%s to the query file",
|
|
+ local_brick->qfile_path);
|
|
+ query_cbk_args->qfile_array->exhausted_count++;
|
|
+ }
|
|
+ query_cbk_args->qfile_array->fd_array[count] = temp_fd;
|
|
+ count++;
|
|
+ }
|
|
+
|
|
+ /* Moving the query file index to the next, so that we won't the same
|
|
+ * query file every cycle as the first one */
|
|
+ query_cbk_args->qfile_array
|
|
+ ->next_index = (query_cbk_args->is_promotion)
|
|
+ ? tier_conf->last_promote_qfile_index
|
|
+ : tier_conf->last_demote_qfile_index;
|
|
+ shift_next_index(query_cbk_args->qfile_array);
|
|
+ if (query_cbk_args->is_promotion) {
|
|
+ tier_conf->last_promote_qfile_index = query_cbk_args->qfile_array
|
|
+ ->next_index;
|
|
+ } else {
|
|
+ tier_conf->last_demote_qfile_index = query_cbk_args->qfile_array
|
|
+ ->next_index;
|
|
+ }
|
|
+
|
|
+ /* Migrate files using query file list */
|
|
+ ret = tier_migrate_using_query_file((void *)query_cbk_args);
|
|
+out:
|
|
+ qfile_array_free(query_cbk_args->qfile_array);
|
|
+
|
|
+ /* If there is an error rename all the query files to .err files
|
|
+ * with a timestamp for better debugging */
|
|
+ if (ret) {
|
|
+ struct tm tm = {
|
|
+ 0,
|
|
+ };
|
|
+ char time_str[128] = {
|
|
+ 0,
|
|
+ };
|
|
+ char query_file_path_err[PATH_MAX] = {
|
|
+ 0,
|
|
+ };
|
|
+ int32_t len = 0;
|
|
+
|
|
+ /* Time format for error query files */
|
|
+ gmtime_r(¤t_time.tv_sec, &tm);
|
|
+ strftime(time_str, sizeof(time_str), "%F-%T", &tm);
|
|
+
|
|
+ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
|
|
+ {
|
|
+ /* rename error qfile*/
|
|
+ len = snprintf(query_file_path_err, sizeof(query_file_path_err),
|
|
+ "%s-%s.err", local_brick->qfile_path, time_str);
|
|
+ if ((len >= 0) && (len < sizeof(query_file_path_err))) {
|
|
+ if (sys_rename(local_brick->qfile_path, query_file_path_err) ==
|
|
+ -1)
|
|
+ gf_msg_debug("tier", 0,
|
|
+ "rename "
|
|
+ "failed");
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ query_cbk_args->qfile_array = NULL;
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_demote(migration_args_t *demotion_args)
|
|
+{
|
|
+ query_cbk_args_t query_cbk_args;
|
|
+ int ret = -1;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", demotion_args, out);
|
|
+ GF_VALIDATE_OR_GOTO("tier", demotion_args->this, out);
|
|
+ GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->brick_list,
|
|
+ out);
|
|
+ GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->defrag, out);
|
|
+
|
|
+ THIS = demotion_args->this;
|
|
+
|
|
+ query_cbk_args.this = demotion_args->this;
|
|
+ query_cbk_args.defrag = demotion_args->defrag;
|
|
+ query_cbk_args.is_promotion = 0;
|
|
+
|
|
+ /*Build the query file using bricklist*/
|
|
+ ret = tier_build_migration_qfile(demotion_args, &query_cbk_args, _gf_false);
|
|
+ if (ret)
|
|
+ goto out;
|
|
+
|
|
+ /* Migrate files using the query file */
|
|
+ ret = tier_migrate_files_using_qfile(demotion_args, &query_cbk_args);
|
|
+ if (ret)
|
|
+ goto out;
|
|
+
|
|
+out:
|
|
+ demotion_args->return_value = ret;
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_promote(migration_args_t *promotion_args)
|
|
+{
|
|
+ int ret = -1;
|
|
+ query_cbk_args_t query_cbk_args;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", promotion_args->this, out);
|
|
+ GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->brick_list,
|
|
+ out);
|
|
+ GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->defrag,
|
|
+ out);
|
|
+
|
|
+ THIS = promotion_args->this;
|
|
+
|
|
+ query_cbk_args.this = promotion_args->this;
|
|
+ query_cbk_args.defrag = promotion_args->defrag;
|
|
+ query_cbk_args.is_promotion = 1;
|
|
+
|
|
+ /*Build the query file using bricklist*/
|
|
+ ret = tier_build_migration_qfile(promotion_args, &query_cbk_args, _gf_true);
|
|
+ if (ret)
|
|
+ goto out;
|
|
+
|
|
+ /* Migrate files using the query file */
|
|
+ ret = tier_migrate_files_using_qfile(promotion_args, &query_cbk_args);
|
|
+ if (ret)
|
|
+ goto out;
|
|
+
|
|
+out:
|
|
+ promotion_args->return_value = ret;
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Command the CTR on a brick to compact the local database using an IPC
|
|
+ */
|
|
+static int
|
|
+tier_process_self_compact(tier_brick_list_t *local_brick, void *args)
|
|
+{
|
|
+ int ret = -1;
|
|
+ char *db_path = NULL;
|
|
+ query_cbk_args_t *query_cbk_args = NULL;
|
|
+ xlator_t *this = NULL;
|
|
+ gfdb_conn_node_t *conn_node = NULL;
|
|
+ dict_t *params_dict = NULL;
|
|
+ dict_t *ctr_ipc_dict = NULL;
|
|
+ gfdb_brick_info_t *gfdb_brick_info = args;
|
|
+
|
|
+ /*Init of all the essentials*/
|
|
+ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
|
|
+ query_cbk_args = gfdb_brick_info->_query_cbk_args;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
|
|
+ this = query_cbk_args->this;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
|
|
+
|
|
+ db_path = local_brick->brick_db_path;
|
|
+
|
|
+ /*Preparing DB parameters before init_db i.e getting db connection*/
|
|
+ params_dict = dict_new();
|
|
+ if (!params_dict) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "DB Params cannot initialized");
|
|
+ goto out;
|
|
+ }
|
|
+ SET_DB_PARAM_TO_DICT(this->name, params_dict,
|
|
+ (char *)gfdb_methods.get_db_path_key(), db_path, ret,
|
|
+ out);
|
|
+
|
|
+ /*Get the db connection*/
|
|
+ conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type);
|
|
+ if (!conn_node) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "FATAL: Failed initializing db operations");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+ /*Preparing ctr_ipc_dict*/
|
|
+ ctr_ipc_dict = dict_new();
|
|
+ if (!ctr_ipc_dict) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "ctr_ipc_dict cannot initialized");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = dict_set_int32(ctr_ipc_dict, "compact_active",
|
|
+ query_cbk_args->defrag->tier_conf.compact_active);
|
|
+
|
|
+ if (ret) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
|
|
+ "Failed to set %s "
|
|
+ "to params dictionary",
|
|
+ "compact_active");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = dict_set_int32(
|
|
+ ctr_ipc_dict, "compact_mode_switched",
|
|
+ query_cbk_args->defrag->tier_conf.compact_mode_switched);
|
|
+
|
|
+ if (ret) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
|
|
+ "Failed to set %s "
|
|
+ "to params dictionary",
|
|
+ "compact_mode_switched");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY,
|
|
+ GFDB_IPC_CTR_SET_COMPACT_PRAGMA, ret, out);
|
|
+
|
|
+ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Starting Compaction IPC");
|
|
+
|
|
+ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
|
|
+ NULL);
|
|
+
|
|
+ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Ending Compaction IPC");
|
|
+
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed compaction "
|
|
+ "on db %s error %d",
|
|
+ local_brick->brick_db_path, ret);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "SUCCESS: %s Compaction", local_brick->brick_name);
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+ if (params_dict) {
|
|
+ dict_unref(params_dict);
|
|
+ params_dict = NULL;
|
|
+ }
|
|
+
|
|
+ if (ctr_ipc_dict) {
|
|
+ dict_unref(ctr_ipc_dict);
|
|
+ ctr_ipc_dict = NULL;
|
|
+ }
|
|
+
|
|
+ gfdb_methods.fini_db(conn_node);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This is the call back function for each brick from hot/cold bricklist.
|
|
+ * It determines the database type on each brick and calls the corresponding
|
|
+ * function to prepare the compaction IPC.
|
|
+ */
|
|
+static int
|
|
+tier_compact_db_brick(tier_brick_list_t *local_brick, void *args)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", local_brick, out);
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out);
|
|
+
|
|
+ ret = tier_process_self_compact(local_brick, args);
|
|
+ if (ret) {
|
|
+ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Brick %s did not compact", local_brick->brick_name);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+out:
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_send_compact(migration_args_t *args, query_cbk_args_t *query_cbk_args)
|
|
+{
|
|
+ gfdb_time_t current_time;
|
|
+ gfdb_brick_info_t gfdb_brick_info;
|
|
+ gfdb_time_t time_in_past;
|
|
+ int ret = -1;
|
|
+ tier_brick_list_t *local_brick = NULL;
|
|
+
|
|
+ time_in_past.tv_sec = args->freq_time;
|
|
+ time_in_past.tv_usec = 0;
|
|
+
|
|
+ ret = gettimeofday(¤t_time, NULL);
|
|
+ if (ret == -1) {
|
|
+ gf_msg(args->this->name, GF_LOG_ERROR, errno,
|
|
+ DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time");
|
|
+ goto out;
|
|
+ }
|
|
+ time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
|
|
+
|
|
+ /* The migration daemon may run a varying numberof usec after the sleep
|
|
+ call triggers. A file may be registered in CTR some number of usec X
|
|
+ after the daemon started and missed in the subsequent cycle if the
|
|
+ daemon starts Y usec after the period in seconds where Y>X. Normalize
|
|
+ away this problem by always setting usec to 0. */
|
|
+ time_in_past.tv_usec = 0;
|
|
+
|
|
+ gfdb_brick_info.time_stamp = &time_in_past;
|
|
+
|
|
+ /* This is meant to say we are always compacting at this point */
|
|
+ /* We simply borrow the promotion flag to do this */
|
|
+ gfdb_brick_info._gfdb_promote = 1;
|
|
+
|
|
+ gfdb_brick_info._query_cbk_args = query_cbk_args;
|
|
+
|
|
+ list_for_each_entry(local_brick, args->brick_list, list)
|
|
+ {
|
|
+ gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Start compaction for %s", local_brick->brick_name);
|
|
+
|
|
+ ret = tier_compact_db_brick(local_brick, &gfdb_brick_info);
|
|
+ if (ret) {
|
|
+ gf_msg(args->this->name, GF_LOG_ERROR, 0,
|
|
+ DHT_MSG_BRICK_QUERY_FAILED, "Brick %s compaction failed\n",
|
|
+ local_brick->brick_db_path);
|
|
+ }
|
|
+
|
|
+ gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "End compaction for %s", local_brick->brick_name);
|
|
+ }
|
|
+ ret = 0;
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_compact(void *args)
|
|
+{
|
|
+ int ret = -1;
|
|
+ query_cbk_args_t query_cbk_args;
|
|
+ migration_args_t *compaction_args = args;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", compaction_args->this, out);
|
|
+ GF_VALIDATE_OR_GOTO(compaction_args->this->name,
|
|
+ compaction_args->brick_list, out);
|
|
+ GF_VALIDATE_OR_GOTO(compaction_args->this->name, compaction_args->defrag,
|
|
+ out);
|
|
+
|
|
+ THIS = compaction_args->this;
|
|
+
|
|
+ query_cbk_args.this = compaction_args->this;
|
|
+ query_cbk_args.defrag = compaction_args->defrag;
|
|
+ query_cbk_args.is_compaction = 1;
|
|
+
|
|
+ /* Send the compaction pragma out to all the bricks on the bricklist. */
|
|
+ /* tier_get_bricklist ensures all bricks on the list are local to */
|
|
+ /* this node. */
|
|
+ ret = tier_send_compact(compaction_args, &query_cbk_args);
|
|
+ if (ret)
|
|
+ goto out;
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+ compaction_args->return_value = ret;
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_get_bricklist(xlator_t *xl, struct list_head *local_bricklist_head)
|
|
+{
|
|
+ xlator_list_t *child = NULL;
|
|
+ char *rv = NULL;
|
|
+ char *rh = NULL;
|
|
+ char *brickname = NULL;
|
|
+ char db_name[PATH_MAX] = "";
|
|
+ int ret = 0;
|
|
+ tier_brick_list_t *local_brick = NULL;
|
|
+ int32_t len = 0;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", xl, out);
|
|
+ GF_VALIDATE_OR_GOTO("tier", local_bricklist_head, out);
|
|
+
|
|
+ /*
|
|
+ * This function obtains remote subvolumes and filters out only
|
|
+ * those running on the same node as the tier daemon.
|
|
+ */
|
|
+ if (strcmp(xl->type, "protocol/client") == 0) {
|
|
+ ret = dict_get_str(xl->options, "remote-host", &rh);
|
|
+ if (ret < 0)
|
|
+ goto out;
|
|
+
|
|
+ if (gf_is_local_addr(rh)) {
|
|
+ local_brick = GF_CALLOC(1, sizeof(tier_brick_list_t),
|
|
+ gf_tier_mt_bricklist_t);
|
|
+ if (!local_brick) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = dict_get_str(xl->options, "remote-subvolume", &rv);
|
|
+ if (ret < 0)
|
|
+ goto out;
|
|
+
|
|
+ brickname = strrchr(rv, '/') + 1;
|
|
+ snprintf(db_name, sizeof(db_name), "%s.db", brickname);
|
|
+
|
|
+ local_brick->brick_db_path = GF_MALLOC(PATH_MAX, gf_common_mt_char);
|
|
+ if (!local_brick->brick_db_path) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Failed to allocate memory for"
|
|
+ " bricklist.");
|
|
+ ret = -1;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ len = snprintf(local_brick->brick_db_path, PATH_MAX, "%s/%s/%s", rv,
|
|
+ GF_HIDDEN_PATH, db_name);
|
|
+ if ((len < 0) || (len >= PATH_MAX)) {
|
|
+ gf_msg("tier", GF_LOG_ERROR, EINVAL, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "DB path too long");
|
|
+ ret = -1;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ local_brick->xlator = xl;
|
|
+
|
|
+ snprintf(local_brick->brick_name, NAME_MAX, "%s", brickname);
|
|
+
|
|
+ list_add_tail(&(local_brick->list), local_bricklist_head);
|
|
+
|
|
+ ret = 0;
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ for (child = xl->children; child; child = child->next) {
|
|
+ ret = tier_get_bricklist(child->xlator, local_bricklist_head);
|
|
+ if (ret) {
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+
|
|
+ if (ret) {
|
|
+ if (local_brick) {
|
|
+ GF_FREE(local_brick->brick_db_path);
|
|
+ }
|
|
+ GF_FREE(local_brick);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_get_freq_demote(gf_tier_conf_t *tier_conf)
|
|
+{
|
|
+ if ((tier_conf->mode == TIER_MODE_WM) &&
|
|
+ (tier_conf->watermark_last == TIER_WM_HI))
|
|
+ return DEFAULT_DEMOTE_DEGRADED;
|
|
+ else
|
|
+ return tier_conf->tier_demote_frequency;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_get_freq_promote(gf_tier_conf_t *tier_conf)
|
|
+{
|
|
+ return tier_conf->tier_promote_frequency;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_get_freq_compact_hot(gf_tier_conf_t *tier_conf)
|
|
+{
|
|
+ return tier_conf->tier_compact_hot_frequency;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_get_freq_compact_cold(gf_tier_conf_t *tier_conf)
|
|
+{
|
|
+ return tier_conf->tier_compact_cold_frequency;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_check_demote(gfdb_time_t current_time, int freq)
|
|
+{
|
|
+ return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false;
|
|
+}
|
|
+
|
|
+static gf_boolean_t
|
|
+tier_check_promote(gf_tier_conf_t *tier_conf, gfdb_time_t current_time,
|
|
+ int freq)
|
|
+{
|
|
+ if ((tier_conf->mode == TIER_MODE_WM) &&
|
|
+ (tier_conf->watermark_last == TIER_WM_HI))
|
|
+ return _gf_false;
|
|
+
|
|
+ else
|
|
+ return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false;
|
|
+}
|
|
+
|
|
+static gf_boolean_t
|
|
+tier_check_compact(gf_tier_conf_t *tier_conf, gfdb_time_t current_time,
|
|
+ int freq_compact)
|
|
+{
|
|
+ if (!(tier_conf->compact_active || tier_conf->compact_mode_switched))
|
|
+ return _gf_false;
|
|
+
|
|
+ return ((current_time.tv_sec % freq_compact) == 0) ? _gf_true : _gf_false;
|
|
+}
|
|
+
|
|
+void
|
|
+clear_bricklist(struct list_head *brick_list)
|
|
+{
|
|
+ tier_brick_list_t *local_brick = NULL;
|
|
+ tier_brick_list_t *temp = NULL;
|
|
+
|
|
+ if (list_empty(brick_list)) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ list_for_each_entry_safe(local_brick, temp, brick_list, list)
|
|
+ {
|
|
+ list_del(&local_brick->list);
|
|
+ GF_FREE(local_brick->brick_db_path);
|
|
+ GF_FREE(local_brick);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void
|
|
+set_brick_list_qpath(struct list_head *brick_list, gf_boolean_t is_cold)
|
|
+{
|
|
+ tier_brick_list_t *local_brick = NULL;
|
|
+ int i = 0;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", brick_list, out);
|
|
+
|
|
+ list_for_each_entry(local_brick, brick_list, list)
|
|
+ {
|
|
+ /* Construct query file path for this brick
|
|
+ * i.e
|
|
+ * /var/run/gluster/xlator_name/
|
|
+ * {promote/demote}-brickname-indexinbricklist
|
|
+ * So that no two query files will have same path even
|
|
+ * bricks have the same name
|
|
+ * */
|
|
+ snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d",
|
|
+ GET_QFILE_PATH(is_cold), local_brick->brick_name, i);
|
|
+ i++;
|
|
+ }
|
|
+out:
|
|
+ return;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_prepare_compact(migration_args_t *args, gfdb_time_t current_time)
|
|
+{
|
|
+ xlator_t *this = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ gf_defrag_info_t *defrag = NULL;
|
|
+ gf_tier_conf_t *tier_conf = NULL;
|
|
+ gf_boolean_t is_hot_tier = args->is_hot_tier;
|
|
+ int freq = 0;
|
|
+ int ret = -1;
|
|
+ const char *tier_type = is_hot_tier ? "hot" : "cold";
|
|
+
|
|
+ this = args->this;
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ defrag = conf->defrag;
|
|
+
|
|
+ tier_conf = &defrag->tier_conf;
|
|
+
|
|
+ freq = is_hot_tier ? tier_get_freq_compact_hot(tier_conf)
|
|
+ : tier_get_freq_compact_cold(tier_conf);
|
|
+
|
|
+ defrag->tier_conf.compact_mode_switched =
|
|
+ is_hot_tier ? defrag->tier_conf.compact_mode_switched_hot
|
|
+ : defrag->tier_conf.compact_mode_switched_cold;
|
|
+
|
|
+ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Compact mode %i", defrag->tier_conf.compact_mode_switched);
|
|
+
|
|
+ if (tier_check_compact(tier_conf, current_time, freq)) {
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Start compaction on %s tier", tier_type);
|
|
+
|
|
+ args->freq_time = freq;
|
|
+ ret = tier_compact(args);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Compaction failed on "
|
|
+ "%s tier",
|
|
+ tier_type);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "End compaction on %s tier", tier_type);
|
|
+
|
|
+ if (is_hot_tier) {
|
|
+ defrag->tier_conf.compact_mode_switched_hot = _gf_false;
|
|
+ } else {
|
|
+ defrag->tier_conf.compact_mode_switched_cold = _gf_false;
|
|
+ }
|
|
+ }
|
|
+
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm)
|
|
+{
|
|
+ if (mode == TIER_MODE_WM && wm == TIER_WM_HI)
|
|
+ return WM_INTERVAL_EMERG;
|
|
+
|
|
+ return WM_INTERVAL;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Main tiering loop. This is called from the promotion and the
|
|
+ * demotion threads spawned in tier_start().
|
|
+ *
|
|
+ * Every second, wake from sleep to perform tasks.
|
|
+ * 1. Check trigger to migrate data.
|
|
+ * 2. Check for state changes (pause, unpause, stop).
|
|
+ */
|
|
+static void *
|
|
+tier_run(void *in_args)
|
|
+{
|
|
+ dht_conf_t *conf = NULL;
|
|
+ gfdb_time_t current_time = {0};
|
|
+ int freq = 0;
|
|
+ int ret = 0;
|
|
+ xlator_t *any = NULL;
|
|
+ xlator_t *xlator = NULL;
|
|
+ gf_tier_conf_t *tier_conf = NULL;
|
|
+ loc_t root_loc = {0};
|
|
+ int check_watermark = 0;
|
|
+ gf_defrag_info_t *defrag = NULL;
|
|
+ xlator_t *this = NULL;
|
|
+ migration_args_t *args = in_args;
|
|
+ GF_VALIDATE_OR_GOTO("tier", args, out);
|
|
+ GF_VALIDATE_OR_GOTO("tier", args->brick_list, out);
|
|
+
|
|
+ this = args->this;
|
|
+ GF_VALIDATE_OR_GOTO("tier", this, out);
|
|
+
|
|
+ conf = this->private;
|
|
+ GF_VALIDATE_OR_GOTO("tier", conf, out);
|
|
+
|
|
+ defrag = conf->defrag;
|
|
+ GF_VALIDATE_OR_GOTO("tier", defrag, out);
|
|
+
|
|
+ if (list_empty(args->brick_list)) {
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Brick list for tier is empty. Exiting.");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
|
|
+ tier_conf = &defrag->tier_conf;
|
|
+
|
|
+ dht_build_root_loc(defrag->root_inode, &root_loc);
|
|
+
|
|
+ while (1) {
|
|
+ /*
|
|
+ * Check if a graph switch occurred. If so, stop migration
|
|
+ * thread. It will need to be restarted manually.
|
|
+ */
|
|
+ any = THIS->ctx->active->first;
|
|
+ xlator = xlator_search_by_name(any, this->name);
|
|
+
|
|
+ if (xlator != this) {
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Detected graph switch. Exiting migration "
|
|
+ "daemon.");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ gf_defrag_check_pause_tier(tier_conf);
|
|
+
|
|
+ sleep(1);
|
|
+
|
|
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
|
|
+ ret = 1;
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "defrag->defrag_status != "
|
|
+ "GF_DEFRAG_STATUS_STARTED");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
|
|
+ defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
|
|
+ ret = 0;
|
|
+ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
|
|
+ gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "defrag->defrag_cmd == "
|
|
+ "GF_DEFRAG_CMD_START_DETACH_TIER");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)
|
|
+ continue;
|
|
+
|
|
+ /* To have proper synchronization amongst all
|
|
+ * brick holding nodes, so that promotion and demotions
|
|
+ * start atomically w.r.t promotion/demotion frequency
|
|
+ * period, all nodes should have their system time
|
|
+ * in-sync with each other either manually set or
|
|
+ * using a NTP server*/
|
|
+ ret = gettimeofday(¤t_time, NULL);
|
|
+ if (ret == -1) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, errno,
|
|
+ DHT_MSG_SYS_CALL_GET_TIME_FAILED,
|
|
+ "Failed to get current time");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ check_watermark++;
|
|
+
|
|
+ /* emergency demotion requires frequent watermark monitoring */
|
|
+ if (check_watermark >=
|
|
+ tier_get_wm_interval(tier_conf->mode, tier_conf->watermark_last)) {
|
|
+ check_watermark = 0;
|
|
+ if (tier_conf->mode == TIER_MODE_WM) {
|
|
+ ret = tier_get_fs_stat(this, &root_loc);
|
|
+ if (ret != 0) {
|
|
+ continue;
|
|
+ }
|
|
+ ret = tier_check_watermark(this);
|
|
+ if (ret != 0) {
|
|
+ gf_msg(this->name, GF_LOG_CRITICAL, errno,
|
|
+ DHT_MSG_LOG_TIER_ERROR, "Failed to get watermark");
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (args->is_promotion) {
|
|
+ freq = tier_get_freq_promote(tier_conf);
|
|
+
|
|
+ if (tier_check_promote(tier_conf, current_time, freq)) {
|
|
+ args->freq_time = freq;
|
|
+ ret = tier_promote(args);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Promotion failed");
|
|
+ }
|
|
+ }
|
|
+ } else if (args->is_compaction) {
|
|
+ tier_prepare_compact(args, current_time);
|
|
+ } else {
|
|
+ freq = tier_get_freq_demote(tier_conf);
|
|
+
|
|
+ if (tier_check_demote(current_time, freq)) {
|
|
+ args->freq_time = freq;
|
|
+ ret = tier_demote(args);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Demotion failed");
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Check the statfs immediately after the processing threads
|
|
+ return */
|
|
+ check_watermark = WM_INTERVAL;
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+
|
|
+ args->return_value = ret;
|
|
+
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_start(xlator_t *this, gf_defrag_info_t *defrag)
|
|
+{
|
|
+ pthread_t promote_thread;
|
|
+ pthread_t demote_thread;
|
|
+ pthread_t hot_compact_thread;
|
|
+ pthread_t cold_compact_thread;
|
|
+ int ret = -1;
|
|
+ struct list_head bricklist_hot = {0};
|
|
+ struct list_head bricklist_cold = {0};
|
|
+ migration_args_t promotion_args = {0};
|
|
+ migration_args_t demotion_args = {0};
|
|
+ migration_args_t hot_compaction_args = {0};
|
|
+ migration_args_t cold_compaction_args = {0};
|
|
+ dht_conf_t *conf = NULL;
|
|
+
|
|
+ INIT_LIST_HEAD((&bricklist_hot));
|
|
+ INIT_LIST_HEAD((&bricklist_cold));
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ tier_get_bricklist(conf->subvolumes[1], &bricklist_hot);
|
|
+ set_brick_list_qpath(&bricklist_hot, _gf_false);
|
|
+
|
|
+ demotion_args.this = this;
|
|
+ demotion_args.brick_list = &bricklist_hot;
|
|
+ demotion_args.defrag = defrag;
|
|
+ demotion_args.is_promotion = _gf_false;
|
|
+ demotion_args.is_compaction = _gf_false;
|
|
+
|
|
+ ret = gf_thread_create(&demote_thread, NULL, &tier_run, &demotion_args,
|
|
+ "tierdem");
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to start demotion thread.");
|
|
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
|
|
+ goto cleanup;
|
|
+ }
|
|
+
|
|
+ tier_get_bricklist(conf->subvolumes[0], &bricklist_cold);
|
|
+ set_brick_list_qpath(&bricklist_cold, _gf_true);
|
|
+
|
|
+ promotion_args.this = this;
|
|
+ promotion_args.brick_list = &bricklist_cold;
|
|
+ promotion_args.defrag = defrag;
|
|
+ promotion_args.is_promotion = _gf_true;
|
|
+
|
|
+ ret = gf_thread_create(&promote_thread, NULL, &tier_run, &promotion_args,
|
|
+ "tierpro");
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to start promotion thread.");
|
|
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
|
|
+ goto waitforspawned;
|
|
+ }
|
|
+
|
|
+ hot_compaction_args.this = this;
|
|
+ hot_compaction_args.brick_list = &bricklist_hot;
|
|
+ hot_compaction_args.defrag = defrag;
|
|
+ hot_compaction_args.is_promotion = _gf_false;
|
|
+ hot_compaction_args.is_compaction = _gf_true;
|
|
+ hot_compaction_args.is_hot_tier = _gf_true;
|
|
+
|
|
+ ret = gf_thread_create(&hot_compact_thread, NULL, &tier_run,
|
|
+ &hot_compaction_args, "tierhcom");
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to start compaction thread.");
|
|
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
|
|
+ goto waitforspawnedpromote;
|
|
+ }
|
|
+
|
|
+ cold_compaction_args.this = this;
|
|
+ cold_compaction_args.brick_list = &bricklist_cold;
|
|
+ cold_compaction_args.defrag = defrag;
|
|
+ cold_compaction_args.is_promotion = _gf_false;
|
|
+ cold_compaction_args.is_compaction = _gf_true;
|
|
+ cold_compaction_args.is_hot_tier = _gf_false;
|
|
+
|
|
+ ret = gf_thread_create(&cold_compact_thread, NULL, &tier_run,
|
|
+ &cold_compaction_args, "tierccom");
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Failed to start compaction thread.");
|
|
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
|
|
+ goto waitforspawnedhotcompact;
|
|
+ }
|
|
+ pthread_join(cold_compact_thread, NULL);
|
|
+
|
|
+waitforspawnedhotcompact:
|
|
+ pthread_join(hot_compact_thread, NULL);
|
|
+
|
|
+waitforspawnedpromote:
|
|
+ pthread_join(promote_thread, NULL);
|
|
+
|
|
+waitforspawned:
|
|
+ pthread_join(demote_thread, NULL);
|
|
+
|
|
+cleanup:
|
|
+ clear_bricklist(&bricklist_cold);
|
|
+ clear_bricklist(&bricklist_hot);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+tier_migration_needed(xlator_t *this)
|
|
+{
|
|
+ gf_defrag_info_t *defrag = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ int ret = 0;
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, conf->defrag, out);
|
|
+
|
|
+ defrag = conf->defrag;
|
|
+
|
|
+ if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) ||
|
|
+ (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER))
|
|
+ ret = 1;
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+tier_migration_get_dst(xlator_t *this, dht_local_t *local)
|
|
+{
|
|
+ dht_conf_t *conf = NULL;
|
|
+ int32_t ret = -1;
|
|
+ gf_defrag_info_t *defrag = NULL;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", this, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ defrag = conf->defrag;
|
|
+
|
|
+ if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
|
|
+ local->rebalance.target_node = conf->subvolumes[0];
|
|
+
|
|
+ } else if (conf->subvolumes[0] == local->cached_subvol)
|
|
+ local->rebalance.target_node = conf->subvolumes[1];
|
|
+ else
|
|
+ local->rebalance.target_node = conf->subvolumes[0];
|
|
+
|
|
+ if (local->rebalance.target_node)
|
|
+ ret = 0;
|
|
+
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+xlator_t *
|
|
+tier_search(xlator_t *this, dht_layout_t *layout, const char *name)
|
|
+{
|
|
+ xlator_t *subvol = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", this, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ subvol = TIER_HASHED_SUBVOL;
|
|
+
|
|
+out:
|
|
+ return subvol;
|
|
+}
|
|
+
|
|
+static int
|
|
+tier_load_externals(xlator_t *this)
|
|
+{
|
|
+ int ret = -1;
|
|
+ char *libpathfull = (LIBDIR "/libgfdb.so.0");
|
|
+ get_gfdb_methods_t get_gfdb_methods;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("this", this, out);
|
|
+
|
|
+ libhandle = dlopen(libpathfull, RTLD_NOW);
|
|
+ if (!libhandle) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Error loading libgfdb.so %s\n", dlerror());
|
|
+ ret = -1;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ get_gfdb_methods = dlsym(libhandle, "get_gfdb_methods");
|
|
+ if (!get_gfdb_methods) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Error loading get_gfdb_methods()");
|
|
+ ret = -1;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ get_gfdb_methods(&gfdb_methods);
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+out:
|
|
+ if (ret && libhandle)
|
|
+ dlclose(libhandle);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static tier_mode_t
|
|
+tier_validate_mode(char *mode)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ if (strcmp(mode, "test") == 0) {
|
|
+ ret = TIER_MODE_TEST;
|
|
+ } else {
|
|
+ ret = TIER_MODE_WM;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static gf_boolean_t
|
|
+tier_validate_compact_mode(char *mode)
|
|
+{
|
|
+ gf_boolean_t ret = _gf_false;
|
|
+
|
|
+ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "tier_validate_compact_mode: mode = %s", mode);
|
|
+
|
|
+ if (!strcmp(mode, "on")) {
|
|
+ ret = _gf_true;
|
|
+ } else {
|
|
+ ret = _gf_false;
|
|
+ }
|
|
+
|
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "tier_validate_compact_mode: ret = %i", ret);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_init_methods(xlator_t *this)
|
|
+{
|
|
+ int ret = -1;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ dht_methods_t *methods = NULL;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("tier", this, err);
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ methods = &(conf->methods);
|
|
+
|
|
+ methods->migration_get_dst_subvol = tier_migration_get_dst;
|
|
+ methods->migration_other = tier_start;
|
|
+ methods->migration_needed = tier_migration_needed;
|
|
+ methods->layout_search = tier_search;
|
|
+
|
|
+ ret = 0;
|
|
+err:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void
|
|
+tier_save_vol_name(xlator_t *this)
|
|
+{
|
|
+ dht_conf_t *conf = NULL;
|
|
+ gf_defrag_info_t *defrag = NULL;
|
|
+ char *suffix = NULL;
|
|
+ int name_len = 0;
|
|
+
|
|
+ conf = this->private;
|
|
+ defrag = conf->defrag;
|
|
+
|
|
+ suffix = strstr(this->name, "-tier-dht");
|
|
+
|
|
+ if (suffix)
|
|
+ name_len = suffix - this->name;
|
|
+ else
|
|
+ name_len = strlen(this->name);
|
|
+
|
|
+ if (name_len > GD_VOLUME_NAME_MAX)
|
|
+ name_len = GD_VOLUME_NAME_MAX;
|
|
+
|
|
+ strncpy(defrag->tier_conf.volname, this->name, name_len);
|
|
+ defrag->tier_conf.volname[name_len] = 0;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_init(xlator_t *this)
|
|
+{
|
|
+ int ret = -1;
|
|
+ int freq = 0;
|
|
+ int maxsize = 0;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ gf_defrag_info_t *defrag = NULL;
|
|
+ char *voldir = NULL;
|
|
+ char *mode = NULL;
|
|
+ char *paused = NULL;
|
|
+ tier_mode_t tier_mode = DEFAULT_TIER_MODE;
|
|
+ gf_boolean_t compact_mode = _gf_false;
|
|
+
|
|
+ ret = dht_init(this);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "tier_init failed");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ ret = tier_init_methods(this);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "tier_init_methods failed");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (conf->subvolume_cnt != 2) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Invalid number of subvolumes %d", conf->subvolume_cnt);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* if instatiated from client side initialization is complete. */
|
|
+ if (!conf->defrag) {
|
|
+ ret = 0;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* if instatiated from server side, load db libraries */
|
|
+ ret = tier_load_externals(this);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "Could not load externals. Aborting");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ defrag = conf->defrag;
|
|
+
|
|
+ defrag->tier_conf.last_demote_qfile_index = 0;
|
|
+ defrag->tier_conf.last_promote_qfile_index = 0;
|
|
+
|
|
+ defrag->tier_conf.is_tier = 1;
|
|
+ defrag->this = this;
|
|
+
|
|
+ ret = dict_get_int32(this->options, "tier-max-promote-file-size", &maxsize);
|
|
+ if (ret) {
|
|
+ maxsize = 0;
|
|
+ }
|
|
+
|
|
+ defrag->tier_conf.tier_max_promote_size = maxsize;
|
|
+
|
|
+ ret = dict_get_int32(this->options, "tier-promote-frequency", &freq);
|
|
+ if (ret) {
|
|
+ freq = DEFAULT_PROMOTE_FREQ_SEC;
|
|
+ }
|
|
+
|
|
+ defrag->tier_conf.tier_promote_frequency = freq;
|
|
+
|
|
+ ret = dict_get_int32(this->options, "tier-demote-frequency", &freq);
|
|
+ if (ret) {
|
|
+ freq = DEFAULT_DEMOTE_FREQ_SEC;
|
|
+ }
|
|
+
|
|
+ defrag->tier_conf.tier_demote_frequency = freq;
|
|
+
|
|
+ ret = dict_get_int32(this->options, "tier-hot-compact-frequency", &freq);
|
|
+ if (ret) {
|
|
+ freq = DEFAULT_HOT_COMPACT_FREQ_SEC;
|
|
+ }
|
|
+
|
|
+ defrag->tier_conf.tier_compact_hot_frequency = freq;
|
|
+
|
|
+ ret = dict_get_int32(this->options, "tier-cold-compact-frequency", &freq);
|
|
+ if (ret) {
|
|
+ freq = DEFAULT_COLD_COMPACT_FREQ_SEC;
|
|
+ }
|
|
+
|
|
+ defrag->tier_conf.tier_compact_cold_frequency = freq;
|
|
+
|
|
+ ret = dict_get_int32(this->options, "watermark-hi", &freq);
|
|
+ if (ret) {
|
|
+ freq = DEFAULT_WM_HI;
|
|
+ }
|
|
+
|
|
+ defrag->tier_conf.watermark_hi = freq;
|
|
+
|
|
+ ret = dict_get_int32(this->options, "watermark-low", &freq);
|
|
+ if (ret) {
|
|
+ freq = DEFAULT_WM_LOW;
|
|
+ }
|
|
+
|
|
+ defrag->tier_conf.watermark_low = freq;
|
|
+
|
|
+ ret = dict_get_int32(this->options, "write-freq-threshold", &freq);
|
|
+ if (ret) {
|
|
+ freq = DEFAULT_WRITE_FREQ_SEC;
|
|
+ }
|
|
+
|
|
+ defrag->write_freq_threshold = freq;
|
|
+
|
|
+ ret = dict_get_int32(this->options, "read-freq-threshold", &freq);
|
|
+ if (ret) {
|
|
+ freq = DEFAULT_READ_FREQ_SEC;
|
|
+ }
|
|
+
|
|
+ defrag->read_freq_threshold = freq;
|
|
+
|
|
+ ret = dict_get_int32(this->options, "tier-max-mb", &freq);
|
|
+ if (ret) {
|
|
+ freq = DEFAULT_TIER_MAX_MIGRATE_MB;
|
|
+ }
|
|
+
|
|
+ defrag->tier_conf.max_migrate_bytes = (uint64_t)freq * 1024 * 1024;
|
|
+
|
|
+ ret = dict_get_int32(this->options, "tier-max-files", &freq);
|
|
+ if (ret) {
|
|
+ freq = DEFAULT_TIER_MAX_MIGRATE_FILES;
|
|
+ }
|
|
+
|
|
+ defrag->tier_conf.max_migrate_files = freq;
|
|
+
|
|
+ ret = dict_get_int32(this->options, "tier-query-limit",
|
|
+ &(defrag->tier_conf.query_limit));
|
|
+ if (ret) {
|
|
+ defrag->tier_conf.query_limit = DEFAULT_TIER_QUERY_LIMIT;
|
|
+ }
|
|
+
|
|
+ ret = dict_get_str(this->options, "tier-compact", &mode);
|
|
+
|
|
+ if (ret) {
|
|
+ defrag->tier_conf.compact_active = DEFAULT_COMP_MODE;
|
|
+ } else {
|
|
+ compact_mode = tier_validate_compact_mode(mode);
|
|
+ /* If compaction is now active, we need to inform the bricks on
|
|
+ the hot and cold tier of this. See dht-common.h for more. */
|
|
+ defrag->tier_conf.compact_active = compact_mode;
|
|
+ if (compact_mode) {
|
|
+ defrag->tier_conf.compact_mode_switched_hot = _gf_true;
|
|
+ defrag->tier_conf.compact_mode_switched_cold = _gf_true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ret = dict_get_str(this->options, "tier-mode", &mode);
|
|
+ if (ret) {
|
|
+ defrag->tier_conf.mode = DEFAULT_TIER_MODE;
|
|
+ } else {
|
|
+ tier_mode = tier_validate_mode(mode);
|
|
+ defrag->tier_conf.mode = tier_mode;
|
|
+ }
|
|
+
|
|
+ pthread_mutex_init(&defrag->tier_conf.pause_mutex, 0);
|
|
+
|
|
+ gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
|
|
+
|
|
+ ret = dict_get_str(this->options, "tier-pause", &paused);
|
|
+
|
|
+ if (paused && strcmp(paused, "on") == 0)
|
|
+ gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE);
|
|
+
|
|
+ ret = gf_asprintf(&voldir, "%s/%s", DEFAULT_VAR_RUN_DIRECTORY, this->name);
|
|
+ if (ret < 0)
|
|
+ goto out;
|
|
+
|
|
+ ret = mkdir_p(voldir, 0777, _gf_true);
|
|
+ if (ret == -1 && errno != EEXIST) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "tier_init failed");
|
|
+
|
|
+ GF_FREE(voldir);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ GF_FREE(voldir);
|
|
+
|
|
+ ret = gf_asprintf(&promotion_qfile, "%s/%s/promote",
|
|
+ DEFAULT_VAR_RUN_DIRECTORY, this->name);
|
|
+ if (ret < 0)
|
|
+ goto out;
|
|
+
|
|
+ ret = gf_asprintf(&demotion_qfile, "%s/%s/demote",
|
|
+ DEFAULT_VAR_RUN_DIRECTORY, this->name);
|
|
+ if (ret < 0) {
|
|
+ GF_FREE(promotion_qfile);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "Promote/demote frequency %d/%d "
|
|
+ "Write/Read freq thresholds %d/%d",
|
|
+ defrag->tier_conf.tier_promote_frequency,
|
|
+ defrag->tier_conf.tier_demote_frequency,
|
|
+ defrag->write_freq_threshold, defrag->read_freq_threshold);
|
|
+
|
|
+ tier_save_vol_name(this);
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+out:
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_cli_pause_done(int op_ret, call_frame_t *sync_frame, void *data)
|
|
+{
|
|
+ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED,
|
|
+ "Migrate file paused with op_ret %d", op_ret);
|
|
+
|
|
+ return op_ret;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_cli_pause(void *data)
|
|
+{
|
|
+ gf_defrag_info_t *defrag = NULL;
|
|
+ xlator_t *this = NULL;
|
|
+ dht_conf_t *conf = NULL;
|
|
+ int ret = -1;
|
|
+
|
|
+ this = data;
|
|
+
|
|
+ conf = this->private;
|
|
+ GF_VALIDATE_OR_GOTO(this->name, conf, exit);
|
|
+
|
|
+ defrag = conf->defrag;
|
|
+ GF_VALIDATE_OR_GOTO(this->name, defrag, exit);
|
|
+
|
|
+ gf_defrag_pause_tier(this, defrag);
|
|
+
|
|
+ ret = 0;
|
|
+exit:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int
|
|
+tier_reconfigure(xlator_t *this, dict_t *options)
|
|
+{
|
|
+ dht_conf_t *conf = NULL;
|
|
+ gf_defrag_info_t *defrag = NULL;
|
|
+ char *mode = NULL;
|
|
+ int migrate_mb = 0;
|
|
+ gf_boolean_t req_pause = _gf_false;
|
|
+ int ret = 0;
|
|
+ call_frame_t *frame = NULL;
|
|
+ gf_boolean_t last_compact_setting = _gf_false;
|
|
+
|
|
+ conf = this->private;
|
|
+
|
|
+ if (conf->defrag) {
|
|
+ defrag = conf->defrag;
|
|
+ GF_OPTION_RECONF("tier-max-promote-file-size",
|
|
+ defrag->tier_conf.tier_max_promote_size, options,
|
|
+ int32, out);
|
|
+
|
|
+ GF_OPTION_RECONF("tier-promote-frequency",
|
|
+ defrag->tier_conf.tier_promote_frequency, options,
|
|
+ int32, out);
|
|
+
|
|
+ GF_OPTION_RECONF("tier-demote-frequency",
|
|
+ defrag->tier_conf.tier_demote_frequency, options,
|
|
+ int32, out);
|
|
+
|
|
+ GF_OPTION_RECONF("write-freq-threshold", defrag->write_freq_threshold,
|
|
+ options, int32, out);
|
|
+
|
|
+ GF_OPTION_RECONF("read-freq-threshold", defrag->read_freq_threshold,
|
|
+ options, int32, out);
|
|
+
|
|
+ GF_OPTION_RECONF("watermark-hi", defrag->tier_conf.watermark_hi,
|
|
+ options, int32, out);
|
|
+
|
|
+ GF_OPTION_RECONF("watermark-low", defrag->tier_conf.watermark_low,
|
|
+ options, int32, out);
|
|
+
|
|
+ last_compact_setting = defrag->tier_conf.compact_active;
|
|
+
|
|
+ GF_OPTION_RECONF("tier-compact", defrag->tier_conf.compact_active,
|
|
+ options, bool, out);
|
|
+
|
|
+ if (last_compact_setting != defrag->tier_conf.compact_active) {
|
|
+ defrag->tier_conf.compact_mode_switched_hot = _gf_true;
|
|
+ defrag->tier_conf.compact_mode_switched_cold = _gf_true;
|
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
|
|
+ "compact mode switched");
|
|
+ }
|
|
+
|
|
+ GF_OPTION_RECONF("tier-hot-compact-frequency",
|
|
+ defrag->tier_conf.tier_compact_hot_frequency, options,
|
|
+ int32, out);
|
|
+
|
|
+ GF_OPTION_RECONF("tier-cold-compact-frequency",
|
|
+ defrag->tier_conf.tier_compact_cold_frequency, options,
|
|
+ int32, out);
|
|
+
|
|
+ GF_OPTION_RECONF("tier-mode", mode, options, str, out);
|
|
+ defrag->tier_conf.mode = tier_validate_mode(mode);
|
|
+
|
|
+ GF_OPTION_RECONF("tier-max-mb", migrate_mb, options, int32, out);
|
|
+ defrag->tier_conf.max_migrate_bytes = (uint64_t)migrate_mb * 1024 *
|
|
+ 1024;
|
|
+
|
|
+ GF_OPTION_RECONF("tier-max-files", defrag->tier_conf.max_migrate_files,
|
|
+ options, int32, out);
|
|
+
|
|
+ GF_OPTION_RECONF("tier-query-limit", defrag->tier_conf.query_limit,
|
|
+ options, int32, out);
|
|
+
|
|
+ GF_OPTION_RECONF("tier-pause", req_pause, options, bool, out);
|
|
+
|
|
+ if (req_pause == _gf_true) {
|
|
+ frame = create_frame(this, this->ctx->pool);
|
|
+ if (!frame)
|
|
+ goto out;
|
|
+
|
|
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
|
|
+
|
|
+ ret = synctask_new(this->ctx->env, tier_cli_pause,
|
|
+ tier_cli_pause_done, frame, this);
|
|
+
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "pause tier failed on reconfigure");
|
|
+ }
|
|
+ } else {
|
|
+ ret = gf_defrag_resume_tier(this, defrag);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
|
|
+ "resume tier failed on reconfigure");
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+out:
|
|
+ return dht_reconfigure(this, options);
|
|
+}
|
|
+
|
|
+void
|
|
+tier_fini(xlator_t *this)
|
|
+{
|
|
+ if (libhandle)
|
|
+ dlclose(libhandle);
|
|
+
|
|
+ GF_FREE(demotion_qfile);
|
|
+ GF_FREE(promotion_qfile);
|
|
+
|
|
+ dht_fini(this);
|
|
+}
|
|
+
|
|
+struct xlator_fops fops = {
|
|
+
|
|
+ .lookup = dht_lookup,
|
|
+ .create = tier_create,
|
|
+ .mknod = dht_mknod,
|
|
+
|
|
+ .open = dht_open,
|
|
+ .statfs = tier_statfs,
|
|
+ .opendir = dht_opendir,
|
|
+ .readdir = tier_readdir,
|
|
+ .readdirp = tier_readdirp,
|
|
+ .fsyncdir = dht_fsyncdir,
|
|
+ .symlink = dht_symlink,
|
|
+ .unlink = tier_unlink,
|
|
+ .link = tier_link,
|
|
+ .mkdir = dht_mkdir,
|
|
+ .rmdir = dht_rmdir,
|
|
+ .rename = dht_rename,
|
|
+ .entrylk = dht_entrylk,
|
|
+ .fentrylk = dht_fentrylk,
|
|
+
|
|
+ /* Inode read operations */
|
|
+ .stat = dht_stat,
|
|
+ .fstat = dht_fstat,
|
|
+ .access = dht_access,
|
|
+ .readlink = dht_readlink,
|
|
+ .getxattr = dht_getxattr,
|
|
+ .fgetxattr = dht_fgetxattr,
|
|
+ .readv = dht_readv,
|
|
+ .flush = dht_flush,
|
|
+ .fsync = dht_fsync,
|
|
+ .inodelk = dht_inodelk,
|
|
+ .finodelk = dht_finodelk,
|
|
+ .lk = dht_lk,
|
|
+
|
|
+ /* Inode write operations */
|
|
+ .fremovexattr = dht_fremovexattr,
|
|
+ .removexattr = dht_removexattr,
|
|
+ .setxattr = dht_setxattr,
|
|
+ .fsetxattr = dht_fsetxattr,
|
|
+ .truncate = dht_truncate,
|
|
+ .ftruncate = dht_ftruncate,
|
|
+ .writev = dht_writev,
|
|
+ .xattrop = dht_xattrop,
|
|
+ .fxattrop = dht_fxattrop,
|
|
+ .setattr = dht_setattr,
|
|
+ .fsetattr = dht_fsetattr,
|
|
+ .fallocate = dht_fallocate,
|
|
+ .discard = dht_discard,
|
|
+ .zerofill = dht_zerofill,
|
|
+};
|
|
+
|
|
+struct xlator_cbks cbks = {.release = dht_release, .forget = dht_forget};
|
|
+
|
|
+extern int32_t
|
|
+mem_acct_init(xlator_t *this);
|
|
+
|
|
+extern struct volume_options dht_options[];
|
|
+
|
|
+xlator_api_t xlator_api = {
|
|
+ .init = tier_init,
|
|
+ .fini = tier_fini,
|
|
+ .notify = dht_notify,
|
|
+ .reconfigure = tier_reconfigure,
|
|
+ .mem_acct_init = mem_acct_init,
|
|
+ .op_version = {GD_OP_VERSION_3_7_0}, /* Present from the initial version */
|
|
+ .fops = &fops,
|
|
+ .cbks = &cbks,
|
|
+ .options = dht_options,
|
|
+ .identifier = "tier",
|
|
+ .category = GF_MAINTAINED,
|
|
+};
|
|
+
|
|
diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h
|
|
new file mode 100644
|
|
index 0000000..a20b1db
|
|
--- /dev/null
|
|
+++ b/xlators/cluster/dht/src/tier.h
|
|
@@ -0,0 +1,110 @@
|
|
+/*
|
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
|
|
+ This file is part of GlusterFS.
|
|
+
|
|
+ This file is licensed to you under your choice of the GNU Lesser
|
|
+ General Public License, version 3 or any later version (LGPLv3 or
|
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all
|
|
+ cases as published by the Free Software Foundation.
|
|
+*/
|
|
+
|
|
+#ifndef _TIER_H_
|
|
+#define _TIER_H_
|
|
+
|
|
+/******************************************************************************/
|
|
+/* This is from dht-rebalancer.c as we don't have dht-rebalancer.h */
|
|
+#include "dht-common.h"
|
|
+#include <glusterfs/xlator.h>
|
|
+#include <signal.h>
|
|
+#include <fnmatch.h>
|
|
+#include <signal.h>
|
|
+
|
|
+/*
|
|
+ * Size of timer wheel. We would not promote or demote less
|
|
+ * frequently than this number.
|
|
+ */
|
|
+#define TIMER_SECS 3600
|
|
+
|
|
+#include "gfdb_data_store.h"
|
|
+#include <ctype.h>
|
|
+#include <sys/stat.h>
|
|
+
|
|
+#define PROMOTION_QFILE "promotequeryfile"
|
|
+#define DEMOTION_QFILE "demotequeryfile"
|
|
+
|
|
+#define TIER_HASHED_SUBVOL conf->subvolumes[0]
|
|
+#define TIER_UNHASHED_SUBVOL conf->subvolumes[1]
|
|
+
|
|
+#define GET_QFILE_PATH(is_promotion) \
|
|
+ (is_promotion) ? promotion_qfile : demotion_qfile
|
|
+
|
|
+typedef struct tier_qfile_array {
|
|
+ int *fd_array;
|
|
+ ssize_t array_size;
|
|
+ ssize_t next_index;
|
|
+ /* Indicate the number of exhuasted FDs*/
|
|
+ ssize_t exhausted_count;
|
|
+} tier_qfile_array_t;
|
|
+
|
|
+typedef struct _query_cbk_args {
|
|
+ xlator_t *this;
|
|
+ gf_defrag_info_t *defrag;
|
|
+ /* This is write */
|
|
+ int query_fd;
|
|
+ int is_promotion;
|
|
+ int is_compaction;
|
|
+ /* This is for read */
|
|
+ tier_qfile_array_t *qfile_array;
|
|
+} query_cbk_args_t;
|
|
+
|
|
+int
|
|
+gf_run_tier(xlator_t *this, gf_defrag_info_t *defrag);
|
|
+
|
|
+typedef struct gfdb_brick_info {
|
|
+ gfdb_time_t *time_stamp;
|
|
+ gf_boolean_t _gfdb_promote;
|
|
+ query_cbk_args_t *_query_cbk_args;
|
|
+} gfdb_brick_info_t;
|
|
+
|
|
+typedef struct brick_list {
|
|
+ xlator_t *xlator;
|
|
+ char *brick_db_path;
|
|
+ char brick_name[NAME_MAX];
|
|
+ char qfile_path[PATH_MAX];
|
|
+ struct list_head list;
|
|
+} tier_brick_list_t;
|
|
+
|
|
+typedef struct _dm_thread_args {
|
|
+ xlator_t *this;
|
|
+ gf_defrag_info_t *defrag;
|
|
+ struct list_head *brick_list;
|
|
+ int freq_time;
|
|
+ int return_value;
|
|
+ int is_promotion;
|
|
+ int is_compaction;
|
|
+ gf_boolean_t is_hot_tier;
|
|
+} migration_args_t;
|
|
+
|
|
+typedef enum tier_watermark_op_ {
|
|
+ TIER_WM_NONE = 0,
|
|
+ TIER_WM_LOW,
|
|
+ TIER_WM_HI,
|
|
+ TIER_WM_MID
|
|
+} tier_watermark_op_t;
|
|
+
|
|
+#define DEFAULT_PROMOTE_FREQ_SEC 120
|
|
+#define DEFAULT_DEMOTE_FREQ_SEC 120
|
|
+#define DEFAULT_HOT_COMPACT_FREQ_SEC 604800
|
|
+#define DEFAULT_COLD_COMPACT_FREQ_SEC 604800
|
|
+#define DEFAULT_DEMOTE_DEGRADED 1
|
|
+#define DEFAULT_WRITE_FREQ_SEC 0
|
|
+#define DEFAULT_READ_FREQ_SEC 0
|
|
+#define DEFAULT_WM_LOW 75
|
|
+#define DEFAULT_WM_HI 90
|
|
+#define DEFAULT_TIER_MODE TIER_MODE_TEST
|
|
+#define DEFAULT_COMP_MODE _gf_true
|
|
+#define DEFAULT_TIER_MAX_MIGRATE_MB 1000
|
|
+#define DEFAULT_TIER_MAX_MIGRATE_FILES 5000
|
|
+#define DEFAULT_TIER_QUERY_LIMIT 100
|
|
+
|
|
+#endif
|
|
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
|
|
index 194634b..545c02b 100644
|
|
--- a/xlators/features/Makefile.am
|
|
+++ b/xlators/features/Makefile.am
|
|
@@ -5,6 +5,6 @@ endif
|
|
SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \
|
|
compress changelog gfid-access snapview-client snapview-server trash \
|
|
shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \
|
|
- utime
|
|
+ utime changetimerecorder
|
|
|
|
CLEANFILES =
|
|
diff --git a/xlators/features/changetimerecorder/Makefile.am b/xlators/features/changetimerecorder/Makefile.am
|
|
new file mode 100644
|
|
index 0000000..a985f42
|
|
--- /dev/null
|
|
+++ b/xlators/features/changetimerecorder/Makefile.am
|
|
@@ -0,0 +1,3 @@
|
|
+SUBDIRS = src
|
|
+
|
|
+CLEANFILES =
|
|
diff --git a/xlators/features/changetimerecorder/src/Makefile.am b/xlators/features/changetimerecorder/src/Makefile.am
|
|
new file mode 100644
|
|
index 0000000..620017e
|
|
--- /dev/null
|
|
+++ b/xlators/features/changetimerecorder/src/Makefile.am
|
|
@@ -0,0 +1,26 @@
|
|
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
|
|
+
|
|
+# changetimerecorder can only get build when libgfdb is enabled
|
|
+if BUILD_GFDB
|
|
+ xlator_LTLIBRARIES = changetimerecorder.la
|
|
+endif
|
|
+
|
|
+changetimerecorder_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
|
|
+
|
|
+changetimerecorder_la_SOURCES = changetimerecorder.c \
|
|
+ ctr-helper.c ctr-xlator-ctx.c
|
|
+
|
|
+changetimerecorder_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\
|
|
+ $(top_builddir)/libglusterfs/src/gfdb/libgfdb.la
|
|
+
|
|
+noinst_HEADERS = ctr-messages.h changetimerecorder.h ctr_mem_types.h \
|
|
+ ctr-helper.h ctr-xlator-ctx.h
|
|
+
|
|
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
|
|
+ -I$(top_srcdir)/libglusterfs/src/gfdb \
|
|
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
|
|
+ -DDATADIR=\"$(localstatedir)\"
|
|
+
|
|
+AM_CFLAGS = -Wall $(GF_CFLAGS) $(SQLITE_CFLAGS)
|
|
+
|
|
+CLEANFILES =
|
|
diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c
|
|
new file mode 100644
|
|
index 0000000..f2aa4a9
|
|
--- /dev/null
|
|
+++ b/xlators/features/changetimerecorder/src/changetimerecorder.c
|
|
@@ -0,0 +1,2371 @@
|
|
+/*
|
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
|
|
+ This file is part of GlusterFS.
|
|
+
|
|
+ This file is licensed to you under your choice of the GNU Lesser
|
|
+ General Public License, version 3 or any later version (LGPLv3 or
|
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all
|
|
+ cases as published by the Free Software Foundation.
|
|
+*/
|
|
+#include <ctype.h>
|
|
+#include <sys/uio.h>
|
|
+
|
|
+#include "gfdb_sqlite3.h"
|
|
+#include "ctr-helper.h"
|
|
+#include "ctr-messages.h"
|
|
+#include <glusterfs/syscall.h>
|
|
+
|
|
+#include "changetimerecorder.h"
|
|
+#include "tier-ctr-interface.h"
|
|
+
|
|
+/*******************************inode forget***********************************/
|
|
+int
|
|
+ctr_forget(xlator_t *this, inode_t *inode)
|
|
+{
|
|
+ fini_ctr_xlator_ctx(this, inode);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/************************** Look up heal **************************************/
|
|
+/*
|
|
+Problem: The CTR xlator records file meta (heat/hardlinks)
|
|
+into the data. This works fine for files which are created
|
|
+after ctr xlator is switched ON. But for files which were
|
|
+created before CTR xlator is ON, CTR xlator is not able to
|
|
+record either of the meta i.e heat or hardlinks. Thus making
|
|
+those files immune to promotions/demotions.
|
|
+
|
|
+Solution: The solution that is implemented in this patch is
|
|
+do ctr-db heal of all those pre-existent files, using named lookup.
|
|
+For this purpose we use the inode-xlator context variable option
|
|
+in gluster.
|
|
+The inode-xlator context variable for ctr xlator will have the
|
|
+following,
|
|
+ a. A Lock for the context variable
|
|
+ b. A hardlink list: This list represents the successful looked
|
|
+ up hardlinks.
|
|
+These are the scenarios when the hardlink list is updated:
|
|
+1) Named-Lookup: Whenever a named lookup happens on a file, in the
|
|
+ wind path we copy all required hardlink and inode information to
|
|
+ ctr_db_record structure, which resides in the frame->local variable.
|
|
+ We don't update the database in wind. During the unwind, we read the
|
|
+ information from the ctr_db_record and ,
|
|
+ Check if the inode context variable is created, if not we create it.
|
|
+ Check if the hard link is there in the hardlink list.
|
|
+ If its not there we add it to the list and send a update to the
|
|
+ database using libgfdb.
|
|
+ Please note: The database transaction can fail(and we ignore) as there
|
|
+ already might be a record in the db. This update to the db is to heal
|
|
+ if its not there.
|
|
+ If its there in the list we ignore it.
|
|
+2) Inode Forget: Whenever an inode forget hits we clear the hardlink list in
|
|
+ the inode context variable and delete the inode context variable.
|
|
+ Please note: An inode forget may happen for two reason,
|
|
+ a. when the inode is delete.
|
|
+ b. the in-memory inode is evicted from the inode table due to cache limits.
|
|
+3) create: whenever a create happens we create the inode context variable and
|
|
+ add the hardlink. The database updation is done as usual by ctr.
|
|
+4) link: whenever a hardlink is created for the inode, we create the inode
|
|
+ context variable, if not present, and add the hardlink to the list.
|
|
+5) unlink: whenever a unlink happens we delete the hardlink from the list.
|
|
+6) mknod: same as create.
|
|
+7) rename: whenever a rename happens we update the hardlink in list. if the
|
|
+ hardlink was not present for updation, we add the hardlink to the list.
|
|
+
|
|
+What is pending:
|
|
+1) This solution will only work for named lookups.
|
|
+2) We don't track afr-self-heal/dht-rebalancer traffic for healing.
|
|
+
|
|
+*/
|
|
+
|
|
+/* This function does not write anything to the db,
|
|
+ * just created the local variable
|
|
+ * for the frame and sets values for the ctr_db_record */
|
|
+static int
|
|
+ctr_lookup_wind(call_frame_t *frame, xlator_t *this,
|
|
+ gf_ctr_inode_context_t *ctr_inode_cx)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_private_t *_priv = NULL;
|
|
+ gf_ctr_local_t *ctr_local = NULL;
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+ GF_ASSERT(frame->root);
|
|
+ GF_ASSERT(this);
|
|
+ IS_CTR_INODE_CX_SANE(ctr_inode_cx);
|
|
+
|
|
+ _priv = this->private;
|
|
+ GF_ASSERT(_priv);
|
|
+
|
|
+ if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) {
|
|
+ frame->local = init_ctr_local_t(this);
|
|
+ if (!frame->local) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
|
|
+ "WIND: Error while creating ctr local");
|
|
+ goto out;
|
|
+ };
|
|
+ ctr_local = frame->local;
|
|
+ /*Definitely no internal fops will reach here*/
|
|
+ ctr_local->is_internal_fop = _gf_false;
|
|
+ /*Don't record counters*/
|
|
+ CTR_DB_REC(ctr_local).do_record_counters = _gf_false;
|
|
+ /*Don't record time at all*/
|
|
+ CTR_DB_REC(ctr_local).do_record_times = _gf_false;
|
|
+
|
|
+ /* Copy gfid into db record*/
|
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid));
|
|
+
|
|
+ /* Set fop_path and fop_type, required by libgfdb to make
|
|
+ * decision while inserting the record */
|
|
+ CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path;
|
|
+ CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type;
|
|
+
|
|
+ /* Copy hard link info*/
|
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid,
|
|
+ *((NEW_LINK_CX(ctr_inode_cx))->pargfid));
|
|
+ if (snprintf(CTR_DB_REC(ctr_local).file_name,
|
|
+ sizeof(CTR_DB_REC(ctr_local).file_name), "%s",
|
|
+ NEW_LINK_CX(ctr_inode_cx)->basename) >=
|
|
+ sizeof(CTR_DB_REC(ctr_local).file_name)) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
|
|
+ "WIND: Error copying filename of ctr local");
|
|
+ goto out;
|
|
+ }
|
|
+ /* Since we are in lookup we can ignore errors while
|
|
+ * Inserting in the DB, because there may be many
|
|
+ * to write to the DB attempts for healing.
|
|
+ * We don't want to log all failed attempts and
|
|
+ * bloat the log*/
|
|
+ ctr_local->gfdb_db_record.ignore_errors = _gf_true;
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+out:
|
|
+
|
|
+ if (ret) {
|
|
+ free_ctr_local(ctr_local);
|
|
+ frame->local = NULL;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/* This function inserts the ctr_db_record populated by ctr_lookup_wind
|
|
+ * in to the db. It also destroys the frame->local created by ctr_lookup_wind */
|
|
+static int
|
|
+ctr_lookup_unwind(call_frame_t *frame, xlator_t *this)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_private_t *_priv = NULL;
|
|
+ gf_ctr_local_t *ctr_local = NULL;
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+ GF_ASSERT(this);
|
|
+
|
|
+ _priv = this->private;
|
|
+ GF_ASSERT(_priv);
|
|
+
|
|
+ GF_ASSERT(_priv->_db_conn);
|
|
+
|
|
+ ctr_local = frame->local;
|
|
+
|
|
+ if (ctr_local && (ctr_local->ia_inode_type != IA_IFDIR)) {
|
|
+ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record);
|
|
+ if (ret == -1) {
|
|
+ gf_msg(this->name,
|
|
+ _gfdb_log_level(GF_LOG_ERROR,
|
|
+ ctr_local->gfdb_db_record.ignore_errors),
|
|
+ 0, CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
|
|
+ "UNWIND: Error filling ctr local");
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+ ret = 0;
|
|
+out:
|
|
+ free_ctr_local(ctr_local);
|
|
+ frame->local = NULL;
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/******************************************************************************
|
|
+ *
|
|
+ * FOPS HANDLING BELOW
|
|
+ *
|
|
+ * ***************************************************************************/
|
|
+
|
|
+/****************************LOOKUP********************************************/
|
|
+
|
|
+int32_t
|
|
+ctr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
|
|
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
|
|
+{
|
|
+ int ret = -1;
|
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
|
|
+ gf_ctr_local_t *ctr_local = NULL;
|
|
+ ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR;
|
|
+ gf_boolean_t _is_heal_needed = _gf_false;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+
|
|
+ /* if the lookup failed lookup don't do anything*/
|
|
+ if (op_ret == -1) {
|
|
+ gf_msg_trace(this->name, 0, "lookup failed with %s",
|
|
+ strerror(op_errno));
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Ignore directory lookups */
|
|
+ if (inode->ia_type == IA_IFDIR) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* if frame local was not set by the ctr_lookup()
|
|
+ * so don't so anything*/
|
|
+ if (!frame->local) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* if the lookup is for dht link donot record*/
|
|
+ if (dht_is_linkfile(buf, dict)) {
|
|
+ gf_msg_trace(this->name, 0,
|
|
+ "Ignoring Lookup "
|
|
+ "for dht link file");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ctr_local = frame->local;
|
|
+ /*Assign the proper inode type*/
|
|
+ ctr_local->ia_inode_type = inode->ia_type;
|
|
+
|
|
+ /* Copy gfid directly from inode */
|
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid);
|
|
+
|
|
+ /* Checking if gfid and parent gfid is valid */
|
|
+ if (gf_uuid_is_null(CTR_DB_REC(ctr_local).gfid) ||
|
|
+ gf_uuid_is_null(CTR_DB_REC(ctr_local).pargfid)) {
|
|
+ gf_msg_trace(this->name, 0, "Invalid GFID");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* if its a first entry
|
|
+ * then mark the ctr_record for create
|
|
+ * A create will attempt a file and a hard link created in the db*/
|
|
+ ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode);
|
|
+ if (!ctr_xlator_ctx) {
|
|
+ /* This marks inode heal */
|
|
+ CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE;
|
|
+ _is_heal_needed = _gf_true;
|
|
+ }
|
|
+
|
|
+ /* Copy the correct gfid from resolved inode */
|
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid);
|
|
+
|
|
+ /* Add hard link to the list */
|
|
+ ret_val = add_hard_link_ctx(frame, this, inode);
|
|
+ if (ret_val == CTR_CTX_ERROR) {
|
|
+ gf_msg_trace(this->name, 0, "Failed adding hardlink to list");
|
|
+ goto out;
|
|
+ }
|
|
+ /* If inode needs healing then heal the hardlink also */
|
|
+ else if (ret_val & CTR_TRY_INODE_HEAL) {
|
|
+ /* This marks inode heal */
|
|
+ CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE;
|
|
+ _is_heal_needed = _gf_true;
|
|
+ }
|
|
+ /* If hardlink needs healing */
|
|
+ else if (ret_val & CTR_TRY_HARDLINK_HEAL) {
|
|
+ _is_heal_needed = _gf_true;
|
|
+ }
|
|
+
|
|
+ /* If lookup heal needed */
|
|
+ if (!_is_heal_needed)
|
|
+ goto out;
|
|
+
|
|
+ /* FINALLY HEAL : Inserts the ctr_db_record populated by ctr_lookup_wind
|
|
+ * in to the db. It also destroys the frame->local
|
|
+ * created by ctr_lookup_wind */
|
|
+ ret = ctr_lookup_unwind(frame, this);
|
|
+ if (ret) {
|
|
+ gf_msg_trace(this->name, 0, "Failed healing/inserting link");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ free_ctr_local((gf_ctr_local_t *)frame->local);
|
|
+ frame->local = NULL;
|
|
+
|
|
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, dict,
|
|
+ postparent);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+ctr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
|
|
+{
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+ gf_ctr_link_context_t ctr_link_cx;
|
|
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+ GF_ASSERT(frame->root);
|
|
+
|
|
+ /* Don't handle nameless lookups*/
|
|
+ if (!loc->parent || !loc->name)
|
|
+ goto out;
|
|
+
|
|
+ /*fill ctr link context*/
|
|
+ FILL_CTR_LINK_CX(_link_cx, loc->parent->gfid, loc->name, out);
|
|
+
|
|
+ /* Fill ctr inode context*/
|
|
+ /* IA_IFREG : We assume its a file in the wind
|
|
+ * but in the unwind we are sure what the inode is a file
|
|
+ * or directory
|
|
+ * gfid: we are just filling loc->gfid which is not correct.
|
|
+ * In unwind we fill the correct gfid for successful lookup*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, IA_IFREG, loc->gfid, _link_cx, NULL,
|
|
+ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /* Create the frame->local and populate ctr_db_record
|
|
+ * No writing to the db yet */
|
|
+ ret = ctr_lookup_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED,
|
|
+ "Failed to insert link wind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_lookup_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/****************************WRITEV********************************************/
|
|
+int32_t
|
|
+ctr_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
|
|
+ struct iatt *postbuf, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_UNWIND_FAILED,
|
|
+ "Failed to insert writev unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
|
|
+ xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+ctr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
|
|
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref,
|
|
+ dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
|
|
+ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_WIND_FAILED,
|
|
+ "Failed to insert writev wind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_writev_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags,
|
|
+ iobref, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/******************************setattr*****************************************/
|
|
+
|
|
+int32_t
|
|
+ctr_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf,
|
|
+ struct iatt *postop_stbuf, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
|
|
+ "Failed to insert setattr unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop_stbuf,
|
|
+ postop_stbuf, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+ctr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
|
|
+ int32_t valid, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
|
|
+ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
|
|
+ "Failed to insert setattr wind");
|
|
+ }
|
|
+out:
|
|
+
|
|
+ STACK_WIND(frame, ctr_setattr_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*************************** fsetattr ***************************************/
|
|
+int32_t
|
|
+ctr_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf,
|
|
+ struct iatt *postop_stbuf, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
|
|
+ "Failed to insert fsetattr unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, preop_stbuf,
|
|
+ postop_stbuf, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+ctr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
|
|
+ int32_t valid, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
|
|
+ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
|
|
+ "Failed to insert fsetattr wind");
|
|
+ }
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_fsetattr_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+/****************************fremovexattr************************************/
|
|
+
|
|
+int32_t
|
|
+ctr_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED,
|
|
+ "Failed to insert fremovexattr unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+ctr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
+ const char *name, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
|
|
+ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED,
|
|
+ "Failed to insert fremovexattr wind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_fremovexattr_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/****************************removexattr*************************************/
|
|
+
|
|
+int32_t
|
|
+ctr_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED,
|
|
+ "Failed to insert removexattr unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+ctr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
|
|
+ const char *name, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
|
|
+ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED,
|
|
+ "Failed to insert removexattr wind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_removexattr_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/****************************truncate****************************************/
|
|
+
|
|
+int32_t
|
|
+ctr_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
|
|
+ struct iatt *postbuf, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED,
|
|
+ "Failed to insert truncate unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
|
|
+ xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+ctr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
|
|
+ dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
|
|
+ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_TRUNCATE_WIND_FAILED,
|
|
+ "Failed to insert truncate wind");
|
|
+ }
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_truncate_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/****************************ftruncate***************************************/
|
|
+
|
|
+int32_t
|
|
+ctr_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
|
|
+ struct iatt *postbuf, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED,
|
|
+ "Failed to insert ftruncate unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
|
|
+ xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+ctr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
|
|
+ dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
|
|
+ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED,
|
|
+ "Failed to insert ftruncate wind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_ftruncate_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/****************************rename******************************************/
|
|
+int32_t
|
|
+ctr_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
|
|
+ struct iatt *preoldparent, struct iatt *postoldparent,
|
|
+ struct iatt *prenewparent, struct iatt *postnewparent,
|
|
+ dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ uint32_t remaining_links = -1;
|
|
+ gf_ctr_local_t *ctr_local = NULL;
|
|
+ gfdb_fop_type_t fop_type = GFDB_FOP_INVALID_OP;
|
|
+ gfdb_fop_path_t fop_path = GFDB_FOP_INVALID;
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+ GF_ASSERT(this);
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
|
|
+ GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
|
|
+ "Failed to insert rename unwind");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (!xdata)
|
|
+ goto out;
|
|
+ /*
|
|
+ *
|
|
+ * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator
|
|
+ * This is only set when we are overwriting hardlinks.
|
|
+ *
|
|
+ * */
|
|
+ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA,
|
|
+ &remaining_links);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
|
|
+ "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA");
|
|
+ remaining_links = -1;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ctr_local = frame->local;
|
|
+ if (!ctr_local) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_NULL_LOCAL,
|
|
+ "ctr_local is NULL.");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* This is not the only link */
|
|
+ if (remaining_links > 1) {
|
|
+ fop_type = GFDB_FOP_DENTRY_WRITE;
|
|
+ fop_path = GFDB_FOP_UNDEL;
|
|
+ }
|
|
+ /* Last link that was deleted */
|
|
+ else if (remaining_links == 1) {
|
|
+ fop_type = GFDB_FOP_DENTRY_WRITE;
|
|
+ fop_path = GFDB_FOP_UNDEL_ALL;
|
|
+ } else {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
|
|
+ "Invalid link count from posix");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = ctr_delete_hard_link_from_db(
|
|
+ this, CTR_DB_REC(ctr_local).old_gfid, CTR_DB_REC(ctr_local).pargfid,
|
|
+ CTR_DB_REC(ctr_local).file_name, fop_type, fop_path);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
|
|
+ "Failed to delete records of %s",
|
|
+ CTR_DB_REC(ctr_local).old_file_name);
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent,
|
|
+ postoldparent, prenewparent, postnewparent, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+ctr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
|
|
+ dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+ gf_ctr_link_context_t new_link_cx, old_link_cx;
|
|
+ gf_ctr_link_context_t *_nlink_cx = &new_link_cx;
|
|
+ gf_ctr_link_context_t *_olink_cx = &old_link_cx;
|
|
+ int is_dict_created = 0;
|
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+
|
|
+ /*Fill old link context*/
|
|
+ FILL_CTR_LINK_CX(_olink_cx, oldloc->pargfid, oldloc->name, out);
|
|
+
|
|
+ /*Fill new link context*/
|
|
+ FILL_CTR_LINK_CX(_nlink_cx, newloc->pargfid, newloc->name, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type,
|
|
+ oldloc->inode->gfid, _nlink_cx, _olink_cx,
|
|
+ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /* If the rename is a overwrite of hardlink
|
|
+ * rename ("file1", "file2")
|
|
+ * file1 is hardlink for gfid say 00000000-0000-0000-0000-00000000000A
|
|
+ * file2 is hardlink for gfid say 00000000-0000-0000-0000-00000000000B
|
|
+ * so we are saving file2 gfid in old_gfid so that we delete entries
|
|
+ * from the db during rename callback if the fop is successful
|
|
+ * */
|
|
+ if (newloc->inode) {
|
|
+ /* This is the GFID from where the newloc hardlink will be
|
|
+ * unlinked */
|
|
+ _inode_cx->old_gfid = &newloc->inode->gfid;
|
|
+ }
|
|
+
|
|
+ /* Is a metatdata fop */
|
|
+ _inode_cx->is_metadata_fop = _gf_true;
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_WIND_FAILED,
|
|
+ "Failed to insert rename wind");
|
|
+ } else {
|
|
+ /* We are doing updation of hard link in inode context in wind
|
|
+ * As we don't get the "inode" in the call back for rename */
|
|
+ ret = update_hard_link_ctx(frame, this, oldloc->inode);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_UPDATE_HARDLINK_FAILED,
|
|
+ "Failed "
|
|
+ "updating hard link in ctr inode context");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* If the newloc has an inode. i.e acquiring hardlink of an
|
|
+ * exisitng file i.e overwritting a file.
|
|
+ * */
|
|
+ if (newloc->inode) {
|
|
+ /* Getting the ctr inode context variable for
|
|
+ * inode whose hardlink will be acquired during
|
|
+ * the rename
|
|
+ * */
|
|
+ ctr_xlator_ctx = get_ctr_xlator_ctx(this, newloc->inode);
|
|
+ if (!ctr_xlator_ctx) {
|
|
+ /* Since there is no ctr inode context
|
|
+ * so nothing more to do */
|
|
+ ret = 0;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Deleting hardlink from context variable */
|
|
+ ret = ctr_delete_hard_link(this, ctr_xlator_ctx, newloc->pargfid,
|
|
+ newloc->name);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_DELETE_HARDLINK_FAILED,
|
|
+ "Failed to delete hard link");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Requesting for number of hardlinks on the newloc
|
|
+ * inode from POSIX.
|
|
+ * */
|
|
+ is_dict_created = set_posix_link_request(this, &xdata);
|
|
+ if (is_dict_created == -1) {
|
|
+ ret = -1;
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_rename_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
|
|
+
|
|
+ if (is_dict_created == 1) {
|
|
+ dict_unref(xdata);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/****************************unlink******************************************/
|
|
+int32_t
|
|
+ctr_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
|
|
+ struct iatt *postparent, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ uint32_t remaining_links = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ if (!xdata)
|
|
+ goto out;
|
|
+
|
|
+ /*
|
|
+ *
|
|
+ * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator
|
|
+ *
|
|
+ * */
|
|
+ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA,
|
|
+ &remaining_links);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
|
|
+ "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA");
|
|
+ remaining_links = -1;
|
|
+ }
|
|
+
|
|
+ /*This is not the only link*/
|
|
+ if (remaining_links != 1) {
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
|
|
+ GFDB_FOP_UNDEL);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
|
|
+ "Failed to insert unlink unwind");
|
|
+ }
|
|
+ }
|
|
+ /*Last link that was deleted*/
|
|
+ else if (remaining_links == 1) {
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
|
|
+ GFDB_FOP_UNDEL_ALL);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
|
|
+ "Failed to insert unlink unwind");
|
|
+ }
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
|
|
+ xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+ctr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
|
|
+ dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+ gf_ctr_link_context_t ctr_link_cx;
|
|
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
|
|
+ gf_boolean_t is_xdata_created = _gf_false;
|
|
+ struct iatt dummy_stat = {0};
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+
|
|
+ /*Fill link context*/
|
|
+ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
|
|
+ _link_cx, NULL, GFDB_FOP_DENTRY_WRITE,
|
|
+ GFDB_FOP_WDEL);
|
|
+
|
|
+ /*Internal FOP*/
|
|
+ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata);
|
|
+
|
|
+ /* Is a metadata FOP */
|
|
+ _inode_cx->is_metadata_fop = _gf_true;
|
|
+
|
|
+ /* If its a internal FOP and dht link file donot record*/
|
|
+ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
|
|
+ "Failed to insert unlink wind");
|
|
+ } else {
|
|
+ /* We are doing delete of hard link in inode context in wind
|
|
+ * As we don't get the "inode" in the call back for rename */
|
|
+ ret = delete_hard_link_ctx(frame, this, loc->inode);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
|
|
+ "Failed "
|
|
+ "deleting hard link from ctr inode context");
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ *
|
|
+ * Sending GF_REQUEST_LINK_COUNT_XDATA
|
|
+ * to POSIX Xlator to send link count in unwind path
|
|
+ *
|
|
+ * */
|
|
+ /*create xdata if NULL*/
|
|
+ if (!xdata) {
|
|
+ xdata = dict_new();
|
|
+ is_xdata_created = (xdata) ? _gf_true : _gf_false;
|
|
+ }
|
|
+ if (!xdata) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL,
|
|
+ "xdata is NULL :Cannot send "
|
|
+ "GF_REQUEST_LINK_COUNT_XDATA to posix");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
|
|
+ "Failed setting GF_REQUEST_LINK_COUNT_XDATA");
|
|
+ if (is_xdata_created) {
|
|
+ dict_unref(xdata);
|
|
+ }
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_unlink_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
|
|
+
|
|
+ if (is_xdata_created)
|
|
+ dict_unref(xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/****************************fsync******************************************/
|
|
+int32_t
|
|
+ctr_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
|
|
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
|
|
+ dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
|
|
+ "Failed to insert fsync unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+ctr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
|
|
+ dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
|
|
+ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_WIND_FAILED,
|
|
+ "Failed to insert fsync wind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_fsync_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/****************************setxattr****************************************/
|
|
+
|
|
+int
|
|
+ctr_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
|
|
+ "Failed to insert setxattr unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+ctr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
|
|
+ int flags, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
|
|
+ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
|
|
+ "Failed to insert setxattr wind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_setxattr_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata);
|
|
+ return 0;
|
|
+}
|
|
+/**************************** fsetxattr *************************************/
|
|
+int32_t
|
|
+ctr_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
|
|
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
|
|
+ "Failed to insert fsetxattr unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+ctr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
|
|
+ int32_t flags, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
|
|
+ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
|
|
+ "Failed to insert fsetxattr wind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_fsetxattr_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
|
|
+ return 0;
|
|
+}
|
|
+/****************************mknod*******************************************/
|
|
+
|
|
+int32_t
|
|
+ctr_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
|
|
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
|
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ /* Add hard link to the list */
|
|
+ ret_val = add_hard_link_ctx(frame, this, inode);
|
|
+ if (ret_val == CTR_CTX_ERROR) {
|
|
+ gf_msg_trace(this->name, 0, "Failed adding hard link");
|
|
+ }
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE,
|
|
+ GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED,
|
|
+ "Failed to insert mknod unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent,
|
|
+ postparent, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+ctr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
|
|
+ dev_t rdev, mode_t umask, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+ gf_ctr_link_context_t ctr_link_cx;
|
|
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
|
|
+ uuid_t gfid = {
|
|
+ 0,
|
|
+ };
|
|
+ uuid_t *ptr_gfid = &gfid;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+ GF_ASSERT(frame->root);
|
|
+
|
|
+ /*get gfid from xdata dict*/
|
|
+ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid);
|
|
+ if (ret) {
|
|
+ gf_msg_debug(this->name, 0, "failed to get gfid from dict");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /*fill ctr link context*/
|
|
+ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx,
|
|
+ NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_WIND_FAILED,
|
|
+ "Failed to insert mknod wind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_mknod_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/****************************create******************************************/
|
|
+int
|
|
+ctr_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
|
|
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
|
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ ret = add_hard_link_ctx(frame, this, inode);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_ADD_HARDLINK_FAILED,
|
|
+ "Failed adding hard link");
|
|
+ }
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE,
|
|
+ GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
|
|
+ "Failed to insert create unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, stbuf,
|
|
+ preparent, postparent, xdata);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+ctr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
|
|
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+ gf_ctr_link_context_t ctr_link_cx;
|
|
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
|
|
+ uuid_t gfid = {
|
|
+ 0,
|
|
+ };
|
|
+ uuid_t *ptr_gfid = &gfid;
|
|
+ struct iatt dummy_stat = {0};
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+ GF_ASSERT(frame->root);
|
|
+
|
|
+ /*Get GFID from Xdata dict*/
|
|
+ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_GET_GFID_FROM_DICT_FAILED,
|
|
+ "failed to get gfid from dict");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /*fill ctr link context*/
|
|
+ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx,
|
|
+ NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /*Internal FOP*/
|
|
+ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata);
|
|
+
|
|
+ /* If its a internal FOP and dht link file donot record*/
|
|
+ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, &ctr_inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_WIND_FAILED,
|
|
+ "Failed to insert create wind");
|
|
+ }
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_create_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
|
|
+ xdata);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/****************************link********************************************/
|
|
+
|
|
+int
|
|
+ctr_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
|
|
+ int op_errno, inode_t *inode, struct iatt *stbuf,
|
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ /* Add hard link to the list */
|
|
+ ret = add_hard_link_ctx(frame, this, inode);
|
|
+ if (ret) {
|
|
+ gf_msg_trace(this->name, 0, "Failed adding hard link");
|
|
+ }
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
|
|
+ GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
|
|
+ "Failed to insert create unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, stbuf, preparent,
|
|
+ postparent, xdata);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+ctr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
|
|
+ dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+ gf_ctr_link_context_t ctr_link_cx;
|
|
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
|
|
+ struct iatt dummy_stat = {0};
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+ GF_ASSERT(frame->root);
|
|
+
|
|
+ /*fill ctr link context*/
|
|
+ FILL_CTR_LINK_CX(_link_cx, newloc->pargfid, newloc->name, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type,
|
|
+ oldloc->inode->gfid, _link_cx, NULL,
|
|
+ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
|
|
+
|
|
+ /*Internal FOP*/
|
|
+ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata);
|
|
+
|
|
+ /* Is a metadata fop */
|
|
+ _inode_cx->is_metadata_fop = _gf_true;
|
|
+
|
|
+ /* If its a internal FOP and dht link file donot record*/
|
|
+ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED,
|
|
+ "Failed to insert link wind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_link_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/******************************readv*****************************************/
|
|
+int
|
|
+ctr_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
|
|
+ int op_errno, struct iovec *vector, int count, struct iatt *stbuf,
|
|
+ struct iobref *iobref, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
|
|
+
|
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_READ, GFDB_FOP_UNWIND);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
|
|
+ "Failed to insert create unwind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ ctr_free_frame_local(frame);
|
|
+
|
|
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf,
|
|
+ iobref, xdata);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+ctr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off,
|
|
+ uint32_t flags, dict_t *xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_inode_context_t ctr_inode_cx;
|
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
|
|
+
|
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out);
|
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
|
|
+
|
|
+ /*Fill ctr inode context*/
|
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
|
|
+ NULL, GFDB_FOP_INODE_READ, GFDB_FOP_WIND);
|
|
+
|
|
+ /*record into the database*/
|
|
+ ret = ctr_insert_wind(frame, this, _inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_READV_WIND_FAILED,
|
|
+ "Failed to insert readv wind");
|
|
+ }
|
|
+
|
|
+out:
|
|
+ STACK_WIND(frame, ctr_readv_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->readv, fd, size, off, flags, xdata);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*******************************ctr_ipc****************************************/
|
|
+
|
|
+/*This is the call back function per record/file from data base*/
|
|
+static int
|
|
+ctr_db_query_callback(gfdb_query_record_t *gfdb_query_record, void *args)
|
|
+{
|
|
+ int ret = -1;
|
|
+ ctr_query_cbk_args_t *query_cbk_args = args;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("ctr", query_cbk_args, out);
|
|
+
|
|
+ ret = gfdb_write_query_record(query_cbk_args->query_fd, gfdb_query_record);
|
|
+ if (ret) {
|
|
+ gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
|
|
+ "Failed to write to query file");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ query_cbk_args->count++;
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/* This function does all the db queries related to tiering and
|
|
+ * generates/populates new/existing query file
|
|
+ * inputs:
|
|
+ * xlator_t *this : CTR Translator
|
|
+ * void *conn_node : Database connection
|
|
+ * char *query_file: the query file that needs to be updated
|
|
+ * gfdb_ipc_ctr_params_t *ipc_ctr_params: the query parameters
|
|
+ * Return:
|
|
+ * On success 0
|
|
+ * On failure -1
|
|
+ * */
|
|
+int
|
|
+ctr_db_query(xlator_t *this, void *conn_node, char *query_file,
|
|
+ gfdb_ipc_ctr_params_t *ipc_ctr_params)
|
|
+{
|
|
+ int ret = -1;
|
|
+ ctr_query_cbk_args_t query_cbk_args = {0};
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("ctr", this, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, conn_node, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, query_file, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, ipc_ctr_params, out);
|
|
+
|
|
+ /*Query for eligible files from db*/
|
|
+ query_cbk_args.query_fd = open(query_file, O_WRONLY | O_CREAT | O_APPEND,
|
|
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
|
|
+ if (query_cbk_args.query_fd < 0) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, errno, CTR_MSG_FATAL_ERROR,
|
|
+ "Failed to open query file %s", query_file);
|
|
+ goto out;
|
|
+ }
|
|
+ if (!ipc_ctr_params->is_promote) {
|
|
+ if (ipc_ctr_params->emergency_demote) {
|
|
+ /* emergency demotion mode */
|
|
+ ret = find_all(conn_node, ctr_db_query_callback,
|
|
+ (void *)&query_cbk_args,
|
|
+ ipc_ctr_params->query_limit);
|
|
+ } else {
|
|
+ if (ipc_ctr_params->write_freq_threshold == 0 &&
|
|
+ ipc_ctr_params->read_freq_threshold == 0) {
|
|
+ ret = find_unchanged_for_time(conn_node, ctr_db_query_callback,
|
|
+ (void *)&query_cbk_args,
|
|
+ &ipc_ctr_params->time_stamp);
|
|
+ } else {
|
|
+ ret = find_unchanged_for_time_freq(
|
|
+ conn_node, ctr_db_query_callback, (void *)&query_cbk_args,
|
|
+ &ipc_ctr_params->time_stamp,
|
|
+ ipc_ctr_params->write_freq_threshold,
|
|
+ ipc_ctr_params->read_freq_threshold, _gf_false);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ if (ipc_ctr_params->write_freq_threshold == 0 &&
|
|
+ ipc_ctr_params->read_freq_threshold == 0) {
|
|
+ ret = find_recently_changed_files(conn_node, ctr_db_query_callback,
|
|
+ (void *)&query_cbk_args,
|
|
+ &ipc_ctr_params->time_stamp);
|
|
+ } else {
|
|
+ ret = find_recently_changed_files_freq(
|
|
+ conn_node, ctr_db_query_callback, (void *)&query_cbk_args,
|
|
+ &ipc_ctr_params->time_stamp,
|
|
+ ipc_ctr_params->write_freq_threshold,
|
|
+ ipc_ctr_params->read_freq_threshold, _gf_false);
|
|
+ }
|
|
+ }
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
|
|
+ "FATAL: query from db failed");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = clear_files_heat(conn_node);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
|
|
+ "FATAL: Failed to clear db entries");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+
|
|
+ if (!ret)
|
|
+ ret = query_cbk_args.count;
|
|
+
|
|
+ if (query_cbk_args.query_fd >= 0) {
|
|
+ sys_close(query_cbk_args.query_fd);
|
|
+ query_cbk_args.query_fd = -1;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void *
|
|
+ctr_compact_thread(void *args)
|
|
+{
|
|
+ int ret = -1;
|
|
+ void *db_conn = NULL;
|
|
+
|
|
+ xlator_t *this = NULL;
|
|
+ gf_ctr_private_t *priv = NULL;
|
|
+ gf_boolean_t compact_active = _gf_false;
|
|
+ gf_boolean_t compact_mode_switched = _gf_false;
|
|
+
|
|
+ this = (xlator_t *)args;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("ctr", this, out);
|
|
+
|
|
+ priv = this->private;
|
|
+
|
|
+ db_conn = priv->_db_conn;
|
|
+ compact_active = priv->compact_active;
|
|
+ compact_mode_switched = priv->compact_mode_switched;
|
|
+
|
|
+ gf_msg("ctr-compact", GF_LOG_INFO, 0, CTR_MSG_SET, "Starting compaction");
|
|
+
|
|
+ ret = compact_db(db_conn, compact_active, compact_mode_switched);
|
|
+
|
|
+ if (ret) {
|
|
+ gf_msg("ctr-compact", GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed to perform the compaction");
|
|
+ }
|
|
+
|
|
+ ret = pthread_mutex_lock(&priv->compact_lock);
|
|
+
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed to acquire lock");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* We are done compaction on this brick. Set all flags to false */
|
|
+ priv->compact_active = _gf_false;
|
|
+ priv->compact_mode_switched = _gf_false;
|
|
+
|
|
+ ret = pthread_mutex_unlock(&priv->compact_lock);
|
|
+
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed to release lock");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+out:
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+int
|
|
+ctr_ipc_helper(xlator_t *this, dict_t *in_dict, dict_t *out_dict)
|
|
+{
|
|
+ int ret = -1;
|
|
+ char *ctr_ipc_ops = NULL;
|
|
+ gf_ctr_private_t *priv = NULL;
|
|
+ char *db_version = NULL;
|
|
+ char *db_param_key = NULL;
|
|
+ char *db_param = NULL;
|
|
+ char *query_file = NULL;
|
|
+ gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL;
|
|
+ int result = 0;
|
|
+ pthread_t compact_thread;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("ctr", this, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
|
|
+ priv = this->private;
|
|
+ GF_VALIDATE_OR_GOTO(this->name, priv->_db_conn, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, in_dict, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, out_dict, out);
|
|
+
|
|
+ GET_DB_PARAM_FROM_DICT(this->name, in_dict, GFDB_IPC_CTR_KEY, ctr_ipc_ops,
|
|
+ out);
|
|
+
|
|
+ /*if its a db clear operation */
|
|
+ if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_CLEAR_OPS,
|
|
+ SLEN(GFDB_IPC_CTR_CLEAR_OPS)) == 0) {
|
|
+ ret = clear_files_heat(priv->_db_conn);
|
|
+ if (ret)
|
|
+ goto out;
|
|
+
|
|
+ } /* if its a query operation, in which case its query + clear db*/
|
|
+ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_QUERY_OPS,
|
|
+ SLEN(GFDB_IPC_CTR_QUERY_OPS)) == 0) {
|
|
+ ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_QFILE_PATH, &query_file);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed extracting query file path");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = dict_get_bin(in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,
|
|
+ (void *)&ipc_ctr_params);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed extracting query parameters");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = ctr_db_query(this, priv->_db_conn, query_file, ipc_ctr_params);
|
|
+
|
|
+ ret = dict_set_int32(out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT, ret);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed setting query reply");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ } /* if its a query for db version */
|
|
+ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_VERSION_OPS,
|
|
+ SLEN(GFDB_IPC_CTR_GET_DB_VERSION_OPS)) == 0) {
|
|
+ ret = get_db_version(priv->_db_conn, &db_version);
|
|
+ if (ret == -1 || !db_version) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed extracting db version ");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ SET_DB_PARAM_TO_DICT(this->name, out_dict, GFDB_IPC_CTR_RET_DB_VERSION,
|
|
+ db_version, ret, error);
|
|
+
|
|
+ } /* if its a query for a db setting */
|
|
+ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_PARAM_OPS,
|
|
+ SLEN(GFDB_IPC_CTR_GET_DB_PARAM_OPS)) == 0) {
|
|
+ ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_DB_KEY, &db_param_key);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed extracting db param key");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = get_db_params(priv->_db_conn, db_param_key, &db_param);
|
|
+ if (ret == -1 || !db_param) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ SET_DB_PARAM_TO_DICT(this->name, out_dict, db_param_key, db_param, ret,
|
|
+ error);
|
|
+ } /* if its an attempt to compact the database */
|
|
+ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_SET_COMPACT_PRAGMA,
|
|
+ SLEN(GFDB_IPC_CTR_SET_COMPACT_PRAGMA)) == 0) {
|
|
+ ret = pthread_mutex_lock(&priv->compact_lock);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed to acquire lock for compaction");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if ((priv->compact_active || priv->compact_mode_switched)) {
|
|
+ /* Compaction in progress. LEAVE */
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Compaction already in progress.");
|
|
+ pthread_mutex_unlock(&priv->compact_lock);
|
|
+ goto out;
|
|
+ }
|
|
+ /* At this point, we should be the only one on the brick */
|
|
+ /* compacting */
|
|
+
|
|
+ /* Grab the arguments from the dictionary */
|
|
+ ret = dict_get_int32(in_dict, "compact_active", &result);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed to get compaction type");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (result) {
|
|
+ priv->compact_active = _gf_true;
|
|
+ }
|
|
+
|
|
+ ret = dict_get_int32(in_dict, "compact_mode_switched", &result);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed to see if compaction switched");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (result) {
|
|
+ priv->compact_mode_switched = _gf_true;
|
|
+ gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET,
|
|
+ "Pre-thread: Compact mode switch is true");
|
|
+ } else {
|
|
+ gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET,
|
|
+ "Pre-thread: Compact mode switch is false");
|
|
+ }
|
|
+
|
|
+ ret = pthread_mutex_unlock(&priv->compact_lock);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed to release lock for compaction");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = gf_thread_create(&compact_thread, NULL, ctr_compact_thread,
|
|
+ (void *)this, "ctrcomp");
|
|
+
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed to spawn compaction thread");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ goto out;
|
|
+ } /* default case */
|
|
+ else {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+ goto out;
|
|
+error:
|
|
+ GF_FREE(db_param_key);
|
|
+ GF_FREE(db_param);
|
|
+ GF_FREE(db_version);
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/* IPC Call from tier migrator to clear the heat on the DB */
|
|
+int32_t
|
|
+ctr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *in_dict)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_private_t *priv = NULL;
|
|
+ dict_t *out_dict = NULL;
|
|
+
|
|
+ GF_ASSERT(this);
|
|
+ priv = this->private;
|
|
+ GF_ASSERT(priv);
|
|
+ GF_ASSERT(priv->_db_conn);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, in_dict, wind);
|
|
+
|
|
+ if (op != GF_IPC_TARGET_CTR)
|
|
+ goto wind;
|
|
+
|
|
+ out_dict = dict_new();
|
|
+ if (!out_dict) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = ctr_ipc_helper(this, in_dict, out_dict);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
|
|
+ "Failed in ctr_ipc_helper");
|
|
+ }
|
|
+out:
|
|
+
|
|
+ STACK_UNWIND_STRICT(ipc, frame, ret, 0, out_dict);
|
|
+
|
|
+ if (out_dict)
|
|
+ dict_unref(out_dict);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+wind:
|
|
+ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this),
|
|
+ FIRST_CHILD(this)->fops->ipc, op, in_dict);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Call to initialize db for ctr xlator while ctr is enabled */
|
|
+int32_t
|
|
+initialize_ctr_resource(xlator_t *this, gf_ctr_private_t *priv)
|
|
+{
|
|
+ int ret_db = -1;
|
|
+ dict_t *params_dict = NULL;
|
|
+
|
|
+ if (!priv)
|
|
+ goto error;
|
|
+
|
|
+ /* For compaction */
|
|
+ priv->compact_active = _gf_false;
|
|
+ priv->compact_mode_switched = _gf_false;
|
|
+ ret_db = pthread_mutex_init(&priv->compact_lock, NULL);
|
|
+
|
|
+ if (ret_db) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
|
|
+ "FATAL: Failed initializing compaction mutex");
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ params_dict = dict_new();
|
|
+ if (!params_dict) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INIT_DB_PARAMS_FAILED,
|
|
+ "DB Params cannot initialized!");
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ /*Extract db params options*/
|
|
+ ret_db = extract_db_params(this, params_dict, priv->gfdb_db_type);
|
|
+ if (ret_db) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED,
|
|
+ "Failed extracting db params options");
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ /*Create a memory pool for ctr xlator*/
|
|
+ this->local_pool = mem_pool_new(gf_ctr_local_t, 64);
|
|
+ if (!this->local_pool) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED,
|
|
+ "failed to create local memory pool");
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ /*Initialize Database Connection*/
|
|
+ priv->_db_conn = init_db(params_dict, priv->gfdb_db_type);
|
|
+ if (!priv->_db_conn) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
|
|
+ "FATAL: Failed initializing data base");
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ ret_db = 0;
|
|
+ goto out;
|
|
+
|
|
+error:
|
|
+ if (this)
|
|
+ mem_pool_destroy(this->local_pool);
|
|
+
|
|
+ if (priv) {
|
|
+ GF_FREE(priv->ctr_db_path);
|
|
+ }
|
|
+ GF_FREE(priv);
|
|
+ ret_db = -1;
|
|
+out:
|
|
+ if (params_dict)
|
|
+ dict_unref(params_dict);
|
|
+
|
|
+ return ret_db;
|
|
+}
|
|
+
|
|
+/******************************************************************************/
|
|
+int
|
|
+reconfigure(xlator_t *this, dict_t *options)
|
|
+{
|
|
+ char *temp_str = NULL;
|
|
+ int ret = 0;
|
|
+ gf_ctr_private_t *priv = NULL;
|
|
+
|
|
+ priv = this->private;
|
|
+
|
|
+ if (dict_get_str(options, "changetimerecorder.frequency", &temp_str)) {
|
|
+ gf_msg(this->name, GF_LOG_TRACE, 0, CTR_MSG_SET, "set");
|
|
+ }
|
|
+
|
|
+ GF_OPTION_RECONF("ctr-enabled", priv->enabled, options, bool, out);
|
|
+ if (!priv->enabled) {
|
|
+ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED,
|
|
+ "CTR Xlator is not enabled so skip ctr reconfigure");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* If ctr is enabled after skip init for ctr xlator then call
|
|
+ initialize_ctr_resource during reconfigure phase to allocate resources
|
|
+ for xlator
|
|
+ */
|
|
+ if (priv->enabled && !priv->_db_conn) {
|
|
+ ret = initialize_ctr_resource(this, priv);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
|
|
+ "FATAL: Failed ctr initialize resource");
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ GF_OPTION_RECONF("record-counters", priv->ctr_record_counter, options, bool,
|
|
+ out);
|
|
+
|
|
+ GF_OPTION_RECONF("ctr-record-metadata-heat", priv->ctr_record_metadata_heat,
|
|
+ options, bool, out);
|
|
+
|
|
+ GF_OPTION_RECONF("ctr_link_consistency", priv->ctr_link_consistency,
|
|
+ options, bool, out);
|
|
+
|
|
+ GF_OPTION_RECONF("ctr_lookupheal_inode_timeout",
|
|
+ priv->ctr_lookupheal_inode_timeout, options, uint64, out);
|
|
+
|
|
+ GF_OPTION_RECONF("ctr_lookupheal_link_timeout",
|
|
+ priv->ctr_lookupheal_link_timeout, options, uint64, out);
|
|
+
|
|
+ GF_OPTION_RECONF("record-exit", priv->ctr_record_unwind, options, bool,
|
|
+ out);
|
|
+
|
|
+ GF_OPTION_RECONF("record-entry", priv->ctr_record_wind, options, bool, out);
|
|
+
|
|
+ /* If database is sqlite */
|
|
+ if (priv->gfdb_db_type == GFDB_SQLITE3) {
|
|
+ /* AUTOCHECKPOINT */
|
|
+ if (dict_get_str(options, GFDB_SQL_PARAM_WAL_AUTOCHECK, &temp_str) ==
|
|
+ 0) {
|
|
+ ret = set_db_params(priv->_db_conn, "wal_autocheckpoint", temp_str);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
|
|
+ "Failed to set %s", GFDB_SQL_PARAM_WAL_AUTOCHECK);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* CACHE_SIZE */
|
|
+ if (dict_get_str(options, GFDB_SQL_PARAM_CACHE_SIZE, &temp_str) == 0) {
|
|
+ ret = set_db_params(priv->_db_conn, "cache_size", temp_str);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
|
|
+ "Failed to set %s", GFDB_SQL_PARAM_CACHE_SIZE);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+out:
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/****************************init********************************************/
|
|
+
|
|
+int32_t
|
|
+init(xlator_t *this)
|
|
+{
|
|
+ gf_ctr_private_t *priv = NULL;
|
|
+ int ret_db = -1;
|
|
+
|
|
+ if (!this) {
|
|
+ gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
|
|
+ "FATAL: ctr this is not initialized");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ if (!this->children || this->children->next) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
|
|
+ "FATAL: ctr should have exactly one child");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ if (!this->parents) {
|
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DANGLING_VOLUME,
|
|
+ "dangling volume. check volfile ");
|
|
+ }
|
|
+
|
|
+ priv = GF_CALLOC(1, sizeof(*priv), gf_ctr_mt_private_t);
|
|
+ if (!priv) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED,
|
|
+ "Calloc did not work!!!");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ /*Default values for the translator*/
|
|
+ priv->ctr_record_wind = _gf_true;
|
|
+ priv->ctr_record_unwind = _gf_false;
|
|
+ priv->ctr_hot_brick = _gf_false;
|
|
+ priv->gfdb_db_type = GFDB_SQLITE3;
|
|
+ priv->gfdb_sync_type = GFDB_DB_SYNC;
|
|
+ priv->_db_conn = NULL;
|
|
+ priv->ctr_lookupheal_link_timeout = CTR_DEFAULT_HARDLINK_EXP_PERIOD;
|
|
+ priv->ctr_lookupheal_inode_timeout = CTR_DEFAULT_INODE_EXP_PERIOD;
|
|
+
|
|
+ /*Extract ctr xlator options*/
|
|
+ ret_db = extract_ctr_options(this, priv);
|
|
+ if (ret_db) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED,
|
|
+ "Failed extracting ctr xlator options");
|
|
+ GF_FREE(priv);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ if (!priv->enabled) {
|
|
+ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED,
|
|
+ "CTR Xlator is not enabled so skip ctr init");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret_db = initialize_ctr_resource(this, priv);
|
|
+ if (ret_db) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
|
|
+ "FATAL: Failed ctr initialize resource");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+out:
|
|
+ this->private = (void *)priv;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+notify(xlator_t *this, int event, void *data, ...)
|
|
+{
|
|
+ gf_ctr_private_t *priv = NULL;
|
|
+ int ret = 0;
|
|
+
|
|
+ priv = this->private;
|
|
+
|
|
+ if (!priv)
|
|
+ goto out;
|
|
+
|
|
+ ret = default_notify(this, event, data);
|
|
+
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int32_t
|
|
+mem_acct_init(xlator_t *this)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("ctr", this, out);
|
|
+
|
|
+ ret = xlator_mem_acct_init(this, gf_ctr_mt_end + 1);
|
|
+
|
|
+ if (ret != 0) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_MEM_ACC_INIT_FAILED,
|
|
+ "Memory accounting init"
|
|
+ "failed");
|
|
+ return ret;
|
|
+ }
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void
|
|
+fini(xlator_t *this)
|
|
+{
|
|
+ gf_ctr_private_t *priv = NULL;
|
|
+
|
|
+ priv = this->private;
|
|
+
|
|
+ if (priv && priv->enabled) {
|
|
+ if (fini_db(priv->_db_conn)) {
|
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED,
|
|
+ "Failed closing "
|
|
+ "db connection");
|
|
+ }
|
|
+
|
|
+ if (priv->_db_conn)
|
|
+ priv->_db_conn = NULL;
|
|
+
|
|
+ GF_FREE(priv->ctr_db_path);
|
|
+ if (pthread_mutex_destroy(&priv->compact_lock)) {
|
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED,
|
|
+ "Failed to "
|
|
+ "destroy the compaction mutex");
|
|
+ }
|
|
+ }
|
|
+ GF_FREE(priv);
|
|
+ mem_pool_destroy(this->local_pool);
|
|
+ this->local_pool = NULL;
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+struct xlator_fops fops = {
|
|
+ /*lookup*/
|
|
+ .lookup = ctr_lookup,
|
|
+ /*write fops */
|
|
+ .mknod = ctr_mknod,
|
|
+ .create = ctr_create,
|
|
+ .truncate = ctr_truncate,
|
|
+ .ftruncate = ctr_ftruncate,
|
|
+ .setxattr = ctr_setxattr,
|
|
+ .fsetxattr = ctr_fsetxattr,
|
|
+ .removexattr = ctr_removexattr,
|
|
+ .fremovexattr = ctr_fremovexattr,
|
|
+ .unlink = ctr_unlink,
|
|
+ .link = ctr_link,
|
|
+ .rename = ctr_rename,
|
|
+ .writev = ctr_writev,
|
|
+ .setattr = ctr_setattr,
|
|
+ .fsetattr = ctr_fsetattr,
|
|
+ /*read fops*/
|
|
+ .readv = ctr_readv,
|
|
+ /* IPC call*/
|
|
+ .ipc = ctr_ipc};
|
|
+
|
|
+struct xlator_cbks cbks = {.forget = ctr_forget};
|
|
+
|
|
+struct volume_options options[] = {
|
|
+ {.key =
|
|
+ {
|
|
+ "ctr-enabled",
|
|
+ },
|
|
+ .type = GF_OPTION_TYPE_BOOL,
|
|
+ .value = {"on", "off"},
|
|
+ .default_value = "off",
|
|
+ .description = "Enables the CTR",
|
|
+ .flags = OPT_FLAG_SETTABLE},
|
|
+ {.key = {"record-entry"},
|
|
+ .type = GF_OPTION_TYPE_BOOL,
|
|
+ .value = {"on", "off"},
|
|
+ .default_value = "on"},
|
|
+ {.key = {"record-exit"},
|
|
+ .type = GF_OPTION_TYPE_BOOL,
|
|
+ .value = {"on", "off"},
|
|
+ .default_value = "off"},
|
|
+ {.key = {"record-counters"},
|
|
+ .type = GF_OPTION_TYPE_BOOL,
|
|
+ .value = {"on", "off"},
|
|
+ .default_value = "off",
|
|
+ .op_version = {GD_OP_VERSION_3_7_0},
|
|
+ .flags = OPT_FLAG_SETTABLE,
|
|
+ .tags = {}},
|
|
+ {.key = {"ctr-record-metadata-heat"},
|
|
+ .type = GF_OPTION_TYPE_BOOL,
|
|
+ .value = {"on", "off"},
|
|
+ .default_value = "off",
|
|
+ .flags = OPT_FLAG_SETTABLE,
|
|
+ .op_version = {GD_OP_VERSION_3_7_0},
|
|
+ .tags = {}},
|
|
+ {.key = {"ctr_link_consistency"},
|
|
+ .type = GF_OPTION_TYPE_BOOL,
|
|
+ .value = {"on", "off"},
|
|
+ .default_value = "off",
|
|
+ .flags = OPT_FLAG_SETTABLE,
|
|
+ .op_version = {GD_OP_VERSION_3_7_0},
|
|
+ .tags = {}},
|
|
+ {.key = {"ctr_lookupheal_link_timeout"},
|
|
+ .type = GF_OPTION_TYPE_INT,
|
|
+ .default_value = "300",
|
|
+ .flags = OPT_FLAG_SETTABLE,
|
|
+ .op_version = {GD_OP_VERSION_3_7_2},
|
|
+ .tags = {}},
|
|
+ {.key = {"ctr_lookupheal_inode_timeout"},
|
|
+ .type = GF_OPTION_TYPE_INT,
|
|
+ .default_value = "300",
|
|
+ .flags = OPT_FLAG_SETTABLE,
|
|
+ .op_version = {GD_OP_VERSION_3_7_2},
|
|
+ .tags = {}},
|
|
+ {.key = {"hot-brick"},
|
|
+ .type = GF_OPTION_TYPE_BOOL,
|
|
+ .value = {"on", "off"},
|
|
+ .default_value = "off"},
|
|
+ {.key = {"db-type"},
|
|
+ .type = GF_OPTION_TYPE_STR,
|
|
+ .value = {"hashfile", "rocksdb", "changelog", "sqlite3", "hyperdex"},
|
|
+ .default_value = "sqlite3",
|
|
+ .op_version = {GD_OP_VERSION_3_7_0},
|
|
+ .flags = OPT_FLAG_SETTABLE,
|
|
+ .tags = {}},
|
|
+ {.key = {"db-sync"},
|
|
+ .type = GF_OPTION_TYPE_STR,
|
|
+ .value = {"sync", "async"},
|
|
+ .default_value = "sync"},
|
|
+ {.key = {"db-path"}, .type = GF_OPTION_TYPE_PATH},
|
|
+ {.key = {"db-name"}, .type = GF_OPTION_TYPE_STR},
|
|
+ {.key = {GFDB_SQL_PARAM_SYNC},
|
|
+ .type = GF_OPTION_TYPE_STR,
|
|
+ .value = {"off", "normal", "full"},
|
|
+ .default_value = "normal"},
|
|
+ {.key = {GFDB_SQL_PARAM_JOURNAL_MODE},
|
|
+ .type = GF_OPTION_TYPE_STR,
|
|
+ .value = {"delete", "truncate", "persist", "memory", "wal", "off"},
|
|
+ .default_value = "wal",
|
|
+ .flags = OPT_FLAG_SETTABLE,
|
|
+ .op_version = {GD_OP_VERSION_3_7_0},
|
|
+ .tags = {}},
|
|
+ {.key = {GFDB_SQL_PARAM_AUTO_VACUUM},
|
|
+ .type = GF_OPTION_TYPE_STR,
|
|
+ .value = {"off", "full", "incr"},
|
|
+ .default_value = "off",
|
|
+ .flags = OPT_FLAG_SETTABLE,
|
|
+ .op_version = {GD_OP_VERSION_3_7_0},
|
|
+ .tags = {}},
|
|
+ {.key = {GFDB_SQL_PARAM_WAL_AUTOCHECK},
|
|
+ .type = GF_OPTION_TYPE_INT,
|
|
+ .default_value = "25000",
|
|
+ .flags = OPT_FLAG_SETTABLE,
|
|
+ .op_version = {GD_OP_VERSION_3_7_0},
|
|
+ .tags = {}},
|
|
+ {.key = {GFDB_SQL_PARAM_CACHE_SIZE},
|
|
+ .type = GF_OPTION_TYPE_INT,
|
|
+ .default_value = "12500",
|
|
+ .flags = OPT_FLAG_SETTABLE,
|
|
+ .op_version = {GD_OP_VERSION_3_7_0},
|
|
+ .tags = {}},
|
|
+ {.key = {GFDB_SQL_PARAM_PAGE_SIZE},
|
|
+ .type = GF_OPTION_TYPE_INT,
|
|
+ .default_value = "4096",
|
|
+ .flags = OPT_FLAG_SETTABLE,
|
|
+ .op_version = {GD_OP_VERSION_3_7_0},
|
|
+ .tags = {}},
|
|
+ {.key = {NULL}},
|
|
+};
|
|
+
|
|
+xlator_api_t xlator_api = {
|
|
+ .init = init,
|
|
+ .fini = fini,
|
|
+ .notify = notify,
|
|
+ .reconfigure = reconfigure,
|
|
+ .mem_acct_init = mem_acct_init,
|
|
+ .op_version = {GD_OP_VERSION_3_7_0}, /* Present from the initial version */
|
|
+ .fops = &fops,
|
|
+ .cbks = &cbks,
|
|
+ .identifier = "changetimerecorder",
|
|
+ .category = GF_MAINTAINED,
|
|
+ .options = options,
|
|
+};
|
|
diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.h b/xlators/features/changetimerecorder/src/changetimerecorder.h
|
|
new file mode 100644
|
|
index 0000000..0150a1c
|
|
--- /dev/null
|
|
+++ b/xlators/features/changetimerecorder/src/changetimerecorder.h
|
|
@@ -0,0 +1,21 @@
|
|
+/*
|
|
+ Copyright (c) 2006-2015 Red Hat, Inc. <http://www.redhat.com>
|
|
+ This file is part of GlusterFS.
|
|
+
|
|
+ This file is licensed to you under your choice of the GNU Lesser
|
|
+ General Public License, version 3 or any later version (LGPLv3 or
|
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all
|
|
+ cases as published by the Free Software Foundation.
|
|
+*/
|
|
+
|
|
+#ifndef __CTR_H
|
|
+#define __CTR_H
|
|
+
|
|
+#include <glusterfs/glusterfs.h>
|
|
+#include <glusterfs/xlator.h>
|
|
+#include <glusterfs/logging.h>
|
|
+#include <glusterfs/common-utils.h>
|
|
+#include "ctr_mem_types.h"
|
|
+#include "ctr-helper.h"
|
|
+
|
|
+#endif /* __CTR_H */
|
|
diff --git a/xlators/features/changetimerecorder/src/ctr-helper.c b/xlators/features/changetimerecorder/src/ctr-helper.c
|
|
new file mode 100644
|
|
index 0000000..e1e6573
|
|
--- /dev/null
|
|
+++ b/xlators/features/changetimerecorder/src/ctr-helper.c
|
|
@@ -0,0 +1,293 @@
|
|
+/*
|
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
|
|
+ This file is part of GlusterFS.
|
|
+
|
|
+ This file is licensed to you under your choice of the GNU Lesser
|
|
+ General Public License, version 3 or any later version (LGPLv3 or
|
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all
|
|
+ cases as published by the Free Software Foundation.
|
|
+*/
|
|
+
|
|
+#include "gfdb_sqlite3.h"
|
|
+#include "ctr-helper.h"
|
|
+#include "ctr-messages.h"
|
|
+
|
|
+/*******************************************************************************
|
|
+ *
|
|
+ * Fill unwind into db record
|
|
+ *
|
|
+ ******************************************************************************/
|
|
+int
|
|
+fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local,
|
|
+ gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gfdb_time_t *ctr_uwtime = NULL;
|
|
+ gf_ctr_private_t *_priv = NULL;
|
|
+
|
|
+ GF_ASSERT(this);
|
|
+ _priv = this->private;
|
|
+ GF_ASSERT(_priv);
|
|
+
|
|
+ GF_ASSERT(ctr_local);
|
|
+
|
|
+ /*If not unwind path error*/
|
|
+ if (!isunwindpath(fop_path)) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH,
|
|
+ "Wrong fop_path. Should be unwind");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ctr_uwtime = &CTR_DB_REC(ctr_local).gfdb_unwind_change_time;
|
|
+ CTR_DB_REC(ctr_local).gfdb_fop_path = fop_path;
|
|
+ CTR_DB_REC(ctr_local).gfdb_fop_type = fop_type;
|
|
+
|
|
+ ret = gettimeofday(ctr_uwtime, NULL);
|
|
+ if (ret == -1) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, errno,
|
|
+ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR,
|
|
+ "Error "
|
|
+ "filling unwind time record %s",
|
|
+ strerror(errno));
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Special case i.e if its a tier rebalance
|
|
+ * + cold tier brick
|
|
+ * + its a create/mknod FOP
|
|
+ * we record unwind time as zero */
|
|
+ if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG &&
|
|
+ (!_priv->ctr_hot_brick) && isdentrycreatefop(fop_type)) {
|
|
+ memset(ctr_uwtime, 0, sizeof(*ctr_uwtime));
|
|
+ }
|
|
+ ret = 0;
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*******************************************************************************
|
|
+ *
|
|
+ * Fill wind into db record
|
|
+ *
|
|
+ ******************************************************************************/
|
|
+int
|
|
+fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local,
|
|
+ gf_ctr_inode_context_t *ctr_inode_cx)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gfdb_time_t *ctr_wtime = NULL;
|
|
+ gf_ctr_private_t *_priv = NULL;
|
|
+
|
|
+ GF_ASSERT(this);
|
|
+ _priv = this->private;
|
|
+ GF_ASSERT(_priv);
|
|
+ GF_ASSERT(ctr_local);
|
|
+ IS_CTR_INODE_CX_SANE(ctr_inode_cx);
|
|
+
|
|
+ /*if not wind path error!*/
|
|
+ if (!iswindpath(ctr_inode_cx->fop_path)) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH,
|
|
+ "Wrong fop_path. Should be wind");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ctr_wtime = &CTR_DB_REC(ctr_local).gfdb_wind_change_time;
|
|
+ CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path;
|
|
+ CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type;
|
|
+ CTR_DB_REC(ctr_local).link_consistency = _priv->ctr_link_consistency;
|
|
+
|
|
+ ret = gettimeofday(ctr_wtime, NULL);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, errno,
|
|
+ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR,
|
|
+ "Error filling wind time record %s", strerror(errno));
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Special case i.e if its a tier rebalance
|
|
+ * + cold tier brick
|
|
+ * + its a create/mknod FOP
|
|
+ * we record wind time as zero */
|
|
+ if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG &&
|
|
+ (!_priv->ctr_hot_brick) && isdentrycreatefop(ctr_inode_cx->fop_type)) {
|
|
+ memset(ctr_wtime, 0, sizeof(*ctr_wtime));
|
|
+ }
|
|
+
|
|
+ /* Copy gfid into db record */
|
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid));
|
|
+
|
|
+ /* Copy older gfid if any */
|
|
+ if (ctr_inode_cx->old_gfid &&
|
|
+ (!gf_uuid_is_null(*(ctr_inode_cx->old_gfid)))) {
|
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).old_gfid, *(ctr_inode_cx->old_gfid));
|
|
+ }
|
|
+
|
|
+ /*Hard Links*/
|
|
+ if (isdentryfop(ctr_inode_cx->fop_type)) {
|
|
+ /*new link fop*/
|
|
+ if (NEW_LINK_CX(ctr_inode_cx)) {
|
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid,
|
|
+ *((NEW_LINK_CX(ctr_inode_cx))->pargfid));
|
|
+ strcpy(CTR_DB_REC(ctr_local).file_name,
|
|
+ NEW_LINK_CX(ctr_inode_cx)->basename);
|
|
+ }
|
|
+ /*rename fop*/
|
|
+ if (OLD_LINK_CX(ctr_inode_cx)) {
|
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).old_pargfid,
|
|
+ *((OLD_LINK_CX(ctr_inode_cx))->pargfid));
|
|
+ strcpy(CTR_DB_REC(ctr_local).old_file_name,
|
|
+ OLD_LINK_CX(ctr_inode_cx)->basename);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+ /*On error roll back and clean the record*/
|
|
+ if (ret == -1) {
|
|
+ CLEAR_CTR_DB_RECORD(ctr_local);
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/******************************************************************************
|
|
+ *
|
|
+ * CTR xlator init related functions
|
|
+ *
|
|
+ *
|
|
+ * ****************************************************************************/
|
|
+static int
|
|
+extract_sql_params(xlator_t *this, dict_t *params_dict)
|
|
+{
|
|
+ int ret = -1;
|
|
+ char *db_path = NULL;
|
|
+ char *db_name = NULL;
|
|
+ char *db_full_path = NULL;
|
|
+
|
|
+ GF_ASSERT(this);
|
|
+ GF_ASSERT(params_dict);
|
|
+
|
|
+ /*Extract the path of the db*/
|
|
+ db_path = NULL;
|
|
+ GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-path",
|
|
+ db_path, "/var/run/gluster/");
|
|
+
|
|
+ /*Extract the name of the db*/
|
|
+ db_name = NULL;
|
|
+ GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-name",
|
|
+ db_name, "gf_ctr_db.db");
|
|
+
|
|
+ /*Construct full path of the db*/
|
|
+ ret = gf_asprintf(&db_full_path, "%s/%s", db_path, db_name);
|
|
+ if (ret < 0) {
|
|
+ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_CONSTRUCT_DB_PATH_FAILED,
|
|
+ "Construction of full db path failed!");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /*Setting the SQL DB Path*/
|
|
+ SET_DB_PARAM_TO_DICT(this->name, params_dict, GFDB_SQL_PARAM_DBPATH,
|
|
+ db_full_path, ret, out);
|
|
+
|
|
+ /*Extract rest of the sql params*/
|
|
+ ret = gfdb_set_sql_params(this->name, this->options, params_dict);
|
|
+ if (ret) {
|
|
+ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
|
|
+ "Failed setting values to sql param dict!");
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+out:
|
|
+ if (ret)
|
|
+ GF_FREE(db_full_path);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int
|
|
+extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type)
|
|
+{
|
|
+ int ret = -1;
|
|
+
|
|
+ GF_ASSERT(this);
|
|
+ GF_ASSERT(params_dict);
|
|
+
|
|
+ switch (db_type) {
|
|
+ case GFDB_SQLITE3:
|
|
+ ret = extract_sql_params(this, params_dict);
|
|
+ if (ret)
|
|
+ goto out;
|
|
+ break;
|
|
+ case GFDB_ROCKS_DB:
|
|
+ case GFDB_HYPERDEX:
|
|
+ case GFDB_HASH_FILE_STORE:
|
|
+ case GFDB_INVALID_DB:
|
|
+ case GFDB_DB_END:
|
|
+ goto out;
|
|
+ }
|
|
+ ret = 0;
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int
|
|
+extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv)
|
|
+{
|
|
+ int ret = -1;
|
|
+ char *_val_str = NULL;
|
|
+
|
|
+ GF_ASSERT(this);
|
|
+ GF_ASSERT(_priv);
|
|
+
|
|
+ /*Checking if the CTR Translator is enabled. By default its disabled*/
|
|
+ _priv->enabled = _gf_false;
|
|
+ GF_OPTION_INIT("ctr-enabled", _priv->enabled, bool, out);
|
|
+ if (!_priv->enabled) {
|
|
+ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED,
|
|
+ "CTR Xlator is disabled.");
|
|
+ ret = 0;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /*Extract db type*/
|
|
+ GF_OPTION_INIT("db-type", _val_str, str, out);
|
|
+ _priv->gfdb_db_type = gf_string2gfdbdbtype(_val_str);
|
|
+
|
|
+ /*Extract flag for record on wind*/
|
|
+ GF_OPTION_INIT("record-entry", _priv->ctr_record_wind, bool, out);
|
|
+
|
|
+ /*Extract flag for record on unwind*/
|
|
+ GF_OPTION_INIT("record-exit", _priv->ctr_record_unwind, bool, out);
|
|
+
|
|
+ /*Extract flag for record on counters*/
|
|
+ GF_OPTION_INIT("record-counters", _priv->ctr_record_counter, bool, out);
|
|
+
|
|
+ /* Extract flag for record metadata heat */
|
|
+ GF_OPTION_INIT("ctr-record-metadata-heat", _priv->ctr_record_metadata_heat,
|
|
+ bool, out);
|
|
+
|
|
+ /*Extract flag for link consistency*/
|
|
+ GF_OPTION_INIT("ctr_link_consistency", _priv->ctr_link_consistency, bool,
|
|
+ out);
|
|
+
|
|
+ /*Extract ctr_lookupheal_inode_timeout */
|
|
+ GF_OPTION_INIT("ctr_lookupheal_inode_timeout",
|
|
+ _priv->ctr_lookupheal_inode_timeout, uint64, out);
|
|
+
|
|
+ /*Extract ctr_lookupheal_link_timeout*/
|
|
+ GF_OPTION_INIT("ctr_lookupheal_link_timeout",
|
|
+ _priv->ctr_lookupheal_link_timeout, uint64, out);
|
|
+
|
|
+ /*Extract flag for hot tier brick*/
|
|
+ GF_OPTION_INIT("hot-brick", _priv->ctr_hot_brick, bool, out);
|
|
+
|
|
+ /*Extract flag for sync mode*/
|
|
+ GF_OPTION_INIT("db-sync", _val_str, str, out);
|
|
+ _priv->gfdb_sync_type = gf_string2gfdbdbsync(_val_str);
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h
|
|
new file mode 100644
|
|
index 0000000..517fbb0
|
|
--- /dev/null
|
|
+++ b/xlators/features/changetimerecorder/src/ctr-helper.h
|
|
@@ -0,0 +1,854 @@
|
|
+/*
|
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
|
|
+ This file is part of GlusterFS.
|
|
+
|
|
+ This file is licensed to you under your choice of the GNU Lesser
|
|
+ General Public License, version 3 or any later version (LGPLv3 or
|
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all
|
|
+ cases as published by the Free Software Foundation.
|
|
+*/
|
|
+
|
|
+#ifndef __CTR_HELPER_H
|
|
+#define __CTR_HELPER_H
|
|
+
|
|
+#include <glusterfs/xlator.h>
|
|
+#include "ctr_mem_types.h"
|
|
+#include <glusterfs/iatt.h>
|
|
+#include <glusterfs/glusterfs.h>
|
|
+#include <glusterfs/xlator.h>
|
|
+#include <glusterfs/defaults.h>
|
|
+#include <glusterfs/logging.h>
|
|
+#include <glusterfs/common-utils.h>
|
|
+#include <time.h>
|
|
+#include <sys/time.h>
|
|
+#include <pthread.h>
|
|
+
|
|
+#include "gfdb_data_store.h"
|
|
+#include "ctr-xlator-ctx.h"
|
|
+#include "ctr-messages.h"
|
|
+
|
|
+#define CTR_DEFAULT_HARDLINK_EXP_PERIOD 300 /* Five mins */
|
|
+#define CTR_DEFAULT_INODE_EXP_PERIOD 300 /* Five mins */
|
|
+
|
|
+typedef struct ctr_query_cbk_args {
|
|
+ int query_fd;
|
|
+ int count;
|
|
+} ctr_query_cbk_args_t;
|
|
+
|
|
+/*CTR Xlator Private structure*/
|
|
+typedef struct gf_ctr_private {
|
|
+ gf_boolean_t enabled;
|
|
+ char *ctr_db_path;
|
|
+ gf_boolean_t ctr_hot_brick;
|
|
+ gf_boolean_t ctr_record_wind;
|
|
+ gf_boolean_t ctr_record_unwind;
|
|
+ gf_boolean_t ctr_record_counter;
|
|
+ gf_boolean_t ctr_record_metadata_heat;
|
|
+ gf_boolean_t ctr_link_consistency;
|
|
+ gfdb_db_type_t gfdb_db_type;
|
|
+ gfdb_sync_type_t gfdb_sync_type;
|
|
+ gfdb_conn_node_t *_db_conn;
|
|
+ uint64_t ctr_lookupheal_link_timeout;
|
|
+ uint64_t ctr_lookupheal_inode_timeout;
|
|
+ gf_boolean_t compact_active;
|
|
+ gf_boolean_t compact_mode_switched;
|
|
+ pthread_mutex_t compact_lock;
|
|
+} gf_ctr_private_t;
|
|
+
|
|
+/*
|
|
+ * gf_ctr_local_t is the ctr xlator local data structure that is stored in
|
|
+ * the call_frame of each FOP.
|
|
+ *
|
|
+ * gfdb_db_record: The gf_ctr_local contains a gfdb_db_record object, which is
|
|
+ * used by the insert_record() api from the libgfdb. The gfdb_db_record object
|
|
+ * will contain all the inode and hardlink(only for dentry fops: create,
|
|
+ * mknod,link, unlink, rename).The ctr_local is keep alive till the unwind
|
|
+ * call and will be release during the unwind. The same gfdb_db_record will
|
|
+ * used for the unwind insert_record() api, to record unwind in the database.
|
|
+ *
|
|
+ * ia_inode_type in gf_ctr_local will tell the type of the inode. This is
|
|
+ * important for during the unwind path. As we will not have the inode during
|
|
+ * the unwind path. We would have include this in the gfdb_db_record itself
|
|
+ * but currently we record only file inode information.
|
|
+ *
|
|
+ * is_internal_fop in gf_ctr_local will tell us if this is a internal fop and
|
|
+ * take special/no action. We don't record change/access times or increement
|
|
+ * heat counter for internal fops from rebalancer.
|
|
+ * */
|
|
+typedef struct gf_ctr_local {
|
|
+ gfdb_db_record_t gfdb_db_record;
|
|
+ ia_type_t ia_inode_type;
|
|
+ gf_boolean_t is_internal_fop;
|
|
+ gf_special_pid_t client_pid;
|
|
+} gf_ctr_local_t;
|
|
+/*
|
|
+ * Easy access of gfdb_db_record of ctr_local
|
|
+ * */
|
|
+#define CTR_DB_REC(ctr_local) (ctr_local->gfdb_db_record)
|
|
+
|
|
+/*Clear db record*/
|
|
+#define CLEAR_CTR_DB_RECORD(ctr_local) \
|
|
+ do { \
|
|
+ ctr_local->gfdb_db_record.gfdb_fop_path = GFDB_FOP_INVALID; \
|
|
+ memset(&(ctr_local->gfdb_db_record.gfdb_wind_change_time), 0, \
|
|
+ sizeof(gfdb_time_t)); \
|
|
+ memset(&(ctr_local->gfdb_db_record.gfdb_unwind_change_time), 0, \
|
|
+ sizeof(gfdb_time_t)); \
|
|
+ gf_uuid_clear(ctr_local->gfdb_db_record.gfid); \
|
|
+ gf_uuid_clear(ctr_local->gfdb_db_record.pargfid); \
|
|
+ memset(ctr_local->gfdb_db_record.file_name, 0, GF_NAME_MAX + 1); \
|
|
+ memset(ctr_local->gfdb_db_record.old_file_name, 0, GF_NAME_MAX + 1); \
|
|
+ ctr_local->gfdb_db_record.gfdb_fop_type = GFDB_FOP_INVALID_OP; \
|
|
+ ctr_local->ia_inode_type = IA_INVAL; \
|
|
+ } while (0)
|
|
+
|
|
+static gf_ctr_local_t *
|
|
+init_ctr_local_t(xlator_t *this)
|
|
+{
|
|
+ gf_ctr_local_t *ctr_local = NULL;
|
|
+
|
|
+ GF_ASSERT(this);
|
|
+
|
|
+ ctr_local = mem_get0(this->local_pool);
|
|
+ if (!ctr_local) {
|
|
+ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
|
|
+ "Error while creating ctr local");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ CLEAR_CTR_DB_RECORD(ctr_local);
|
|
+out:
|
|
+ return ctr_local;
|
|
+}
|
|
+
|
|
+static void
|
|
+free_ctr_local(gf_ctr_local_t *ctr_local)
|
|
+{
|
|
+ if (ctr_local)
|
|
+ mem_put(ctr_local);
|
|
+}
|
|
+
|
|
+/******************************************************************************
|
|
+ *
|
|
+ *
|
|
+ * Context Carrier Structures
|
|
+ *
|
|
+ *
|
|
+ * ****************************************************************************/
|
|
+
|
|
+/*
|
|
+ * Context Carrier structures are used to carry relevant information about
|
|
+ * inodes and links from the fops calls to the ctr_insert_wind.
|
|
+ * These structure just have pointers to the original data and donot
|
|
+ * do a deep copy of any data. This info is deep copied to
|
|
+ * ctr_local->gfdb_db_record and passed to insert_record() api of libgfdb. This
|
|
+ * info remains persistent for the unwind in ctr_local->gfdb_db_record
|
|
+ * and once used will be destroyed.
|
|
+ *
|
|
+ * gf_ctr_link_context_t : Context structure for hard links
|
|
+ * gf_ctr_inode_context_t : Context structure for inodes
|
|
+ *
|
|
+ * */
|
|
+
|
|
+/*Context Carrier Structure for hard links*/
|
|
+typedef struct gf_ctr_link_context {
|
|
+ uuid_t *pargfid;
|
|
+ const char *basename;
|
|
+} gf_ctr_link_context_t;
|
|
+
|
|
+/*Context Carrier Structure for inodes*/
|
|
+typedef struct gf_ctr_inode_context {
|
|
+ ia_type_t ia_type;
|
|
+ uuid_t *gfid;
|
|
+ uuid_t *old_gfid;
|
|
+ gf_ctr_link_context_t *new_link_cx;
|
|
+ gf_ctr_link_context_t *old_link_cx;
|
|
+ gfdb_fop_type_t fop_type;
|
|
+ gfdb_fop_path_t fop_path;
|
|
+ gf_boolean_t is_internal_fop;
|
|
+ /* Indicating metadata fops */
|
|
+ gf_boolean_t is_metadata_fop;
|
|
+} gf_ctr_inode_context_t;
|
|
+
|
|
+/*******************Util Macros for Context Carrier Structures*****************/
|
|
+
|
|
+/*Checks if ctr_link_cx is sane!*/
|
|
+#define IS_CTR_LINK_CX_SANE(ctr_link_cx) \
|
|
+ do { \
|
|
+ if (ctr_link_cx) { \
|
|
+ if (ctr_link_cx->pargfid) \
|
|
+ GF_ASSERT(*(ctr_link_cx->pargfid)); \
|
|
+ GF_ASSERT(ctr_link_cx->basename); \
|
|
+ }; \
|
|
+ } while (0)
|
|
+
|
|
+/*Clear and fill the ctr_link_context with values*/
|
|
+#define FILL_CTR_LINK_CX(ctr_link_cx, _pargfid, _basename, label) \
|
|
+ do { \
|
|
+ GF_VALIDATE_OR_GOTO("ctr", ctr_link_cx, label); \
|
|
+ GF_VALIDATE_OR_GOTO("ctr", _pargfid, label); \
|
|
+ GF_VALIDATE_OR_GOTO("ctr", _basename, label); \
|
|
+ memset(ctr_link_cx, 0, sizeof(*ctr_link_cx)); \
|
|
+ ctr_link_cx->pargfid = &_pargfid; \
|
|
+ ctr_link_cx->basename = _basename; \
|
|
+ } while (0)
|
|
+
|
|
+#define NEW_LINK_CX(ctr_inode_cx) ctr_inode_cx->new_link_cx
|
|
+
|
|
+#define OLD_LINK_CX(ctr_inode_cx) ctr_inode_cx->old_link_cx
|
|
+
|
|
+/*Checks if ctr_inode_cx is sane!*/
|
|
+#define IS_CTR_INODE_CX_SANE(ctr_inode_cx) \
|
|
+ do { \
|
|
+ GF_ASSERT(ctr_inode_cx); \
|
|
+ GF_ASSERT(ctr_inode_cx->gfid); \
|
|
+ GF_ASSERT(*(ctr_inode_cx->gfid)); \
|
|
+ GF_ASSERT(ctr_inode_cx->fop_type != GFDB_FOP_INVALID_OP); \
|
|
+ GF_ASSERT(ctr_inode_cx->fop_path != GFDB_FOP_INVALID); \
|
|
+ IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \
|
|
+ IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \
|
|
+ } while (0)
|
|
+
|
|
+/*Clear and fill the ctr_inode_context with values*/
|
|
+#define FILL_CTR_INODE_CONTEXT(ctr_inode_cx, _ia_type, _gfid, _new_link_cx, \
|
|
+ _old_link_cx, _fop_type, _fop_path) \
|
|
+ do { \
|
|
+ GF_ASSERT(ctr_inode_cx); \
|
|
+ GF_ASSERT(_gfid); \
|
|
+ GF_ASSERT(_fop_type != GFDB_FOP_INVALID_OP); \
|
|
+ GF_ASSERT(_fop_path != GFDB_FOP_INVALID); \
|
|
+ memset(ctr_inode_cx, 0, sizeof(*ctr_inode_cx)); \
|
|
+ ctr_inode_cx->ia_type = _ia_type; \
|
|
+ ctr_inode_cx->gfid = &_gfid; \
|
|
+ IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \
|
|
+ if (_new_link_cx) \
|
|
+ NEW_LINK_CX(ctr_inode_cx) = _new_link_cx; \
|
|
+ IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \
|
|
+ if (_old_link_cx) \
|
|
+ OLD_LINK_CX(ctr_inode_cx) = _old_link_cx; \
|
|
+ ctr_inode_cx->fop_type = _fop_type; \
|
|
+ ctr_inode_cx->fop_path = _fop_path; \
|
|
+ } while (0)
|
|
+
|
|
+/******************************************************************************
|
|
+ *
|
|
+ * Util functions or macros used by
|
|
+ * insert wind and insert unwind
|
|
+ *
|
|
+ * ****************************************************************************/
|
|
+/* Free ctr frame local */
|
|
+static inline void
|
|
+ctr_free_frame_local(call_frame_t *frame)
|
|
+{
|
|
+ if (frame) {
|
|
+ free_ctr_local((gf_ctr_local_t *)frame->local);
|
|
+ frame->local = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Setting GF_REQUEST_LINK_COUNT_XDATA in dict
|
|
+ * that has to be sent to POSIX Xlator to send
|
|
+ * link count in unwind path.
|
|
+ * return 0 for success with not creation of dict
|
|
+ * return 1 for success with creation of dict
|
|
+ * return -1 for failure.
|
|
+ * */
|
|
+static inline int
|
|
+set_posix_link_request(xlator_t *this, dict_t **xdata)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_boolean_t is_created = _gf_false;
|
|
+
|
|
+ GF_VALIDATE_OR_GOTO("ctr", this, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, xdata, out);
|
|
+
|
|
+ /*create xdata if NULL*/
|
|
+ if (!*xdata) {
|
|
+ *xdata = dict_new();
|
|
+ is_created = _gf_true;
|
|
+ ret = 1;
|
|
+ } else {
|
|
+ ret = 0;
|
|
+ }
|
|
+
|
|
+ if (!*xdata) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL,
|
|
+ "xdata is NULL :Cannot send "
|
|
+ "GF_REQUEST_LINK_COUNT_XDATA to posix");
|
|
+ ret = -1;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = dict_set_int32(*xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
|
|
+ "Failed setting GF_REQUEST_LINK_COUNT_XDATA");
|
|
+ ret = -1;
|
|
+ goto out;
|
|
+ }
|
|
+ ret = 0;
|
|
+out:
|
|
+ if (ret == -1) {
|
|
+ if (*xdata && is_created) {
|
|
+ dict_unref(*xdata);
|
|
+ }
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * If a bitrot fop
|
|
+ * */
|
|
+#define BITROT_FOP(frame) \
|
|
+ (frame->root->pid == GF_CLIENT_PID_BITD || \
|
|
+ frame->root->pid == GF_CLIENT_PID_SCRUB)
|
|
+
|
|
+/*
|
|
+ * If a rebalancer fop
|
|
+ * */
|
|
+#define REBALANCE_FOP(frame) (frame->root->pid == GF_CLIENT_PID_DEFRAG)
|
|
+
|
|
+/*
|
|
+ * If its a tiering rebalancer fop
|
|
+ * */
|
|
+#define TIER_REBALANCE_FOP(frame) \
|
|
+ (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG)
|
|
+
|
|
+/*
|
|
+ * If its a AFR SELF HEAL
|
|
+ * */
|
|
+#define AFR_SELF_HEAL_FOP(frame) (frame->root->pid == GF_CLIENT_PID_SELF_HEALD)
|
|
+
|
|
+/*
|
|
+ * if a rebalancer fop goto
|
|
+ * */
|
|
+#define CTR_IF_REBALANCE_FOP_THEN_GOTO(frame, label) \
|
|
+ do { \
|
|
+ if (REBALANCE_FOP(frame)) \
|
|
+ goto label; \
|
|
+ } while (0)
|
|
+
|
|
+/*
|
|
+ * Internal fop
|
|
+ *
|
|
+ * */
|
|
+static inline gf_boolean_t
|
|
+is_internal_fop(call_frame_t *frame, dict_t *xdata)
|
|
+{
|
|
+ gf_boolean_t ret = _gf_false;
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+ GF_ASSERT(frame->root);
|
|
+
|
|
+ if (AFR_SELF_HEAL_FOP(frame)) {
|
|
+ ret = _gf_true;
|
|
+ }
|
|
+ if (BITROT_FOP(frame)) {
|
|
+ ret = _gf_true;
|
|
+ }
|
|
+ if (REBALANCE_FOP(frame) || TIER_REBALANCE_FOP(frame)) {
|
|
+ ret = _gf_true;
|
|
+ if (xdata && dict_get(xdata, CTR_ATTACH_TIER_LOOKUP)) {
|
|
+ ret = _gf_false;
|
|
+ }
|
|
+ }
|
|
+ if (xdata && dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) {
|
|
+ ret = _gf_true;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+#define CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label) \
|
|
+ do { \
|
|
+ if (is_internal_fop(frame, dict)) \
|
|
+ goto label; \
|
|
+ } while (0)
|
|
+
|
|
+/* if fop has failed exit */
|
|
+#define CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, label) \
|
|
+ do { \
|
|
+ if (op_ret == -1) { \
|
|
+ gf_msg_trace(this->name, 0, "Failed fop with %s", \
|
|
+ strerror(op_errno)); \
|
|
+ goto label; \
|
|
+ }; \
|
|
+ } while (0)
|
|
+
|
|
+/*
|
|
+ * IS CTR Xlator is disabled then goto to label
|
|
+ * */
|
|
+#define CTR_IS_DISABLED_THEN_GOTO(this, label) \
|
|
+ do { \
|
|
+ gf_ctr_private_t *_priv = NULL; \
|
|
+ GF_ASSERT(this); \
|
|
+ GF_ASSERT(this->private); \
|
|
+ _priv = this->private; \
|
|
+ if (!_priv->_db_conn) \
|
|
+ goto label; \
|
|
+ } while (0)
|
|
+
|
|
+/*
|
|
+ * IS CTR record metadata heat is disabled then goto to label
|
|
+ * */
|
|
+#define CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, label) \
|
|
+ do { \
|
|
+ gf_ctr_private_t *_priv = NULL; \
|
|
+ GF_ASSERT(this); \
|
|
+ GF_ASSERT(this->private); \
|
|
+ _priv = this->private; \
|
|
+ if (!_priv->ctr_record_metadata_heat) \
|
|
+ goto label; \
|
|
+ } while (0)
|
|
+
|
|
+int
|
|
+fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local,
|
|
+ gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path);
|
|
+
|
|
+int
|
|
+fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local,
|
|
+ gf_ctr_inode_context_t *ctr_inode_cx);
|
|
+
|
|
+/*******************************************************************************
|
|
+ * CTR INSERT WIND
|
|
+ * *****************************************************************************
|
|
+ * Function used to insert/update record into the database during a wind fop
|
|
+ * This function creates ctr_local structure into the frame of the fop
|
|
+ * call.
|
|
+ * ****************************************************************************/
|
|
+
|
|
+static inline int
|
|
+ctr_insert_wind(call_frame_t *frame, xlator_t *this,
|
|
+ gf_ctr_inode_context_t *ctr_inode_cx)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_private_t *_priv = NULL;
|
|
+ gf_ctr_local_t *ctr_local = NULL;
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+ GF_ASSERT(frame->root);
|
|
+ GF_ASSERT(this);
|
|
+ IS_CTR_INODE_CX_SANE(ctr_inode_cx);
|
|
+
|
|
+ _priv = this->private;
|
|
+ GF_ASSERT(_priv);
|
|
+
|
|
+ GF_ASSERT(_priv->_db_conn);
|
|
+
|
|
+ /*If record_wind option of CTR is on record wind for
|
|
+ * regular files only*/
|
|
+ if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) {
|
|
+ frame->local = init_ctr_local_t(this);
|
|
+ if (!frame->local) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
|
|
+ "WIND: Error while creating ctr local");
|
|
+ goto out;
|
|
+ };
|
|
+ ctr_local = frame->local;
|
|
+ ctr_local->client_pid = frame->root->pid;
|
|
+ ctr_local->is_internal_fop = ctr_inode_cx->is_internal_fop;
|
|
+
|
|
+ /* Decide whether to record counters or not */
|
|
+ CTR_DB_REC(ctr_local).do_record_counters = _gf_false;
|
|
+ /* If record counter is enabled */
|
|
+ if (_priv->ctr_record_counter) {
|
|
+ /* If not a internal fop */
|
|
+ if (!(ctr_local->is_internal_fop)) {
|
|
+ /* If its a metadata fop AND
|
|
+ * record metadata heat
|
|
+ * OR
|
|
+ * its NOT a metadata fop */
|
|
+ if ((ctr_inode_cx->is_metadata_fop &&
|
|
+ _priv->ctr_record_metadata_heat) ||
|
|
+ (!ctr_inode_cx->is_metadata_fop)) {
|
|
+ CTR_DB_REC(ctr_local).do_record_counters = _gf_true;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Decide whether to record times or not
|
|
+ * For non internal FOPS record times as usual*/
|
|
+ CTR_DB_REC(ctr_local).do_record_times = _gf_false;
|
|
+ if (!ctr_local->is_internal_fop) {
|
|
+ /* If its a metadata fop AND
|
|
+ * record metadata heat
|
|
+ * OR
|
|
+ * its NOT a metadata fop */
|
|
+ if ((ctr_inode_cx->is_metadata_fop &&
|
|
+ _priv->ctr_record_metadata_heat) ||
|
|
+ (!ctr_inode_cx->is_metadata_fop)) {
|
|
+ CTR_DB_REC(ctr_local).do_record_times =
|
|
+ (_priv->ctr_record_wind || _priv->ctr_record_unwind);
|
|
+ }
|
|
+ }
|
|
+ /* when its a internal FOPS*/
|
|
+ else {
|
|
+ /* Record times only for create
|
|
+ * i.e when the inode is created */
|
|
+ CTR_DB_REC(ctr_local).do_record_times = (isdentrycreatefop(
|
|
+ ctr_inode_cx->fop_type))
|
|
+ ? _gf_true
|
|
+ : _gf_false;
|
|
+ }
|
|
+
|
|
+ /*Fill the db record for insertion*/
|
|
+ ret = fill_db_record_for_wind(this, ctr_local, ctr_inode_cx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND,
|
|
+ "WIND: Error filling ctr local");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /*Insert the db record*/
|
|
+ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_INSERT_RECORD_WIND_FAILED,
|
|
+ "WIND: Inserting of record failed!");
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+ ret = 0;
|
|
+out:
|
|
+
|
|
+ if (ret) {
|
|
+ free_ctr_local(ctr_local);
|
|
+ frame->local = NULL;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*******************************************************************************
|
|
+ * CTR INSERT UNWIND
|
|
+ * *****************************************************************************
|
|
+ * Function used to insert/update record into the database during a unwind fop
|
|
+ * This function destroys ctr_local structure into the frame of the fop
|
|
+ * call at the end.
|
|
+ * ****************************************************************************/
|
|
+static inline int
|
|
+ctr_insert_unwind(call_frame_t *frame, xlator_t *this, gfdb_fop_type_t fop_type,
|
|
+ gfdb_fop_path_t fop_path)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gf_ctr_private_t *_priv = NULL;
|
|
+ gf_ctr_local_t *ctr_local = NULL;
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+ GF_ASSERT(this);
|
|
+
|
|
+ _priv = this->private;
|
|
+ GF_ASSERT(_priv);
|
|
+
|
|
+ GF_ASSERT(_priv->_db_conn);
|
|
+
|
|
+ ctr_local = frame->local;
|
|
+
|
|
+ if (ctr_local && (_priv->ctr_record_unwind || isdentryfop(fop_type)) &&
|
|
+ (ctr_local->ia_inode_type != IA_IFDIR)) {
|
|
+ CTR_DB_REC(ctr_local).do_record_uwind_time = _priv->ctr_record_unwind;
|
|
+
|
|
+ ret = fill_db_record_for_unwind(this, ctr_local, fop_type, fop_path);
|
|
+ if (ret == -1) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
|
|
+ "UNWIND: Error filling ctr local");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record);
|
|
+ if (ret == -1) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
|
|
+ "UNWIND: Error filling ctr local");
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+ ret = 0;
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/******************************************************************************
|
|
+ * Delete file/flink record/s from db
|
|
+ * ****************************************************************************/
|
|
+static inline int
|
|
+ctr_delete_hard_link_from_db(xlator_t *this, uuid_t gfid, uuid_t pargfid,
|
|
+ char *basename, gfdb_fop_type_t fop_type,
|
|
+ gfdb_fop_path_t fop_path)
|
|
+{
|
|
+ int ret = -1;
|
|
+ gfdb_db_record_t gfdb_db_record;
|
|
+ gf_ctr_private_t *_priv = NULL;
|
|
+
|
|
+ _priv = this->private;
|
|
+ GF_VALIDATE_OR_GOTO(this->name, _priv, out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(gfid)), out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(pargfid)), out);
|
|
+ GF_VALIDATE_OR_GOTO(this->name, (fop_type == GFDB_FOP_DENTRY_WRITE), out);
|
|
+ GF_VALIDATE_OR_GOTO(
|
|
+ this->name, (fop_path == GFDB_FOP_UNDEL || GFDB_FOP_UNDEL_ALL), out);
|
|
+
|
|
+ /* Set gfdb_db_record to 0 */
|
|
+ memset(&gfdb_db_record, 0, sizeof(gfdb_db_record));
|
|
+
|
|
+ /* Copy basename */
|
|
+ if (snprintf(gfdb_db_record.file_name, GF_NAME_MAX, "%s", basename) >=
|
|
+ GF_NAME_MAX)
|
|
+ goto out;
|
|
+
|
|
+ /* Copy gfid into db record */
|
|
+ gf_uuid_copy(gfdb_db_record.gfid, gfid);
|
|
+
|
|
+ /* Copy pargid into db record */
|
|
+ gf_uuid_copy(gfdb_db_record.pargfid, pargfid);
|
|
+
|
|
+ gfdb_db_record.gfdb_fop_path = fop_path;
|
|
+ gfdb_db_record.gfdb_fop_type = fop_type;
|
|
+
|
|
+ /*send delete request to db*/
|
|
+ ret = insert_record(_priv->_db_conn, &gfdb_db_record);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RECORD_WIND_FAILED,
|
|
+ "Failed to delete record. %s", basename);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/******************************* Hard link function ***************************/
|
|
+
|
|
+static inline gf_boolean_t
|
|
+__is_inode_expired(ctr_xlator_ctx_t *ctr_xlator_ctx, gf_ctr_private_t *_priv,
|
|
+ gfdb_time_t *current_time)
|
|
+{
|
|
+ gf_boolean_t ret = _gf_false;
|
|
+ uint64_t time_diff = 0;
|
|
+
|
|
+ GF_ASSERT(ctr_xlator_ctx);
|
|
+ GF_ASSERT(_priv);
|
|
+ GF_ASSERT(current_time);
|
|
+
|
|
+ time_diff = current_time->tv_sec - ctr_xlator_ctx->inode_heal_period;
|
|
+
|
|
+ ret = (time_diff >= _priv->ctr_lookupheal_inode_timeout) ? _gf_true
|
|
+ : _gf_false;
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static inline gf_boolean_t
|
|
+__is_hardlink_expired(ctr_hard_link_t *ctr_hard_link, gf_ctr_private_t *_priv,
|
|
+ gfdb_time_t *current_time)
|
|
+{
|
|
+ gf_boolean_t ret = _gf_false;
|
|
+ uint64_t time_diff = 0;
|
|
+
|
|
+ GF_ASSERT(ctr_hard_link);
|
|
+ GF_ASSERT(_priv);
|
|
+ GF_ASSERT(current_time);
|
|
+
|
|
+ time_diff = current_time->tv_sec - ctr_hard_link->hardlink_heal_period;
|
|
+
|
|
+ ret = ret || (time_diff >= _priv->ctr_lookupheal_link_timeout) ? _gf_true
|
|
+ : _gf_false;
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/* Return values of heal*/
|
|
+typedef enum ctr_heal_ret_val {
|
|
+ CTR_CTX_ERROR = -1,
|
|
+ /* No healing required */
|
|
+ CTR_TRY_NO_HEAL = 0,
|
|
+ /* Try healing hard link */
|
|
+ CTR_TRY_HARDLINK_HEAL = 1,
|
|
+ /* Try healing inode */
|
|
+ CTR_TRY_INODE_HEAL = 2,
|
|
+} ctr_heal_ret_val_t;
|
|
+
|
|
+/**
|
|
+ * @brief Function to add hard link to the inode context variable.
|
|
+ * The inode context maintainences a in-memory list. This is used
|
|
+ * smart healing of database.
|
|
+ * @param frame of the FOP
|
|
+ * @param this is the Xlator instant
|
|
+ * @param inode
|
|
+ * @return Return ctr_heal_ret_val_t
|
|
+ */
|
|
+
|
|
+static inline ctr_heal_ret_val_t
|
|
+add_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode)
|
|
+{
|
|
+ ctr_heal_ret_val_t ret_val = CTR_TRY_NO_HEAL;
|
|
+ int ret = -1;
|
|
+ gf_ctr_local_t *ctr_local = NULL;
|
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
|
|
+ ctr_hard_link_t *ctr_hard_link = NULL;
|
|
+ gf_ctr_private_t *_priv = NULL;
|
|
+ gfdb_time_t current_time = {0};
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+ GF_ASSERT(this);
|
|
+ GF_ASSERT(inode);
|
|
+ GF_ASSERT(this->private);
|
|
+
|
|
+ _priv = this->private;
|
|
+
|
|
+ ctr_local = frame->local;
|
|
+ if (!ctr_local) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode);
|
|
+ if (!ctr_xlator_ctx) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED,
|
|
+ "Failed accessing ctr inode context");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ LOCK(&ctr_xlator_ctx->lock);
|
|
+
|
|
+ /* Check if the hard link already exists
|
|
+ * in the ctr inode context*/
|
|
+ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx,
|
|
+ CTR_DB_REC(ctr_local).pargfid,
|
|
+ CTR_DB_REC(ctr_local).file_name);
|
|
+ /* if there then ignore */
|
|
+ if (ctr_hard_link) {
|
|
+ ret = gettimeofday(¤t_time, NULL);
|
|
+ if (ret == -1) {
|
|
+ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
|
|
+ ret_val = CTR_CTX_ERROR;
|
|
+ goto unlock;
|
|
+ }
|
|
+
|
|
+ if (__is_hardlink_expired(ctr_hard_link, _priv, ¤t_time)) {
|
|
+ ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
|
|
+ ret_val = ret_val | CTR_TRY_HARDLINK_HEAL;
|
|
+ }
|
|
+
|
|
+ if (__is_inode_expired(ctr_xlator_ctx, _priv, ¤t_time)) {
|
|
+ ctr_xlator_ctx->inode_heal_period = current_time.tv_sec;
|
|
+ ret_val = ret_val | CTR_TRY_INODE_HEAL;
|
|
+ }
|
|
+
|
|
+ goto unlock;
|
|
+ }
|
|
+
|
|
+ /* Add the hard link to the list*/
|
|
+ ret = ctr_add_hard_link(this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid,
|
|
+ CTR_DB_REC(ctr_local).file_name);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED,
|
|
+ "Failed to add hardlink to the ctr inode context");
|
|
+ ret_val = CTR_CTX_ERROR;
|
|
+ goto unlock;
|
|
+ }
|
|
+
|
|
+ ret_val = CTR_TRY_NO_HEAL;
|
|
+unlock:
|
|
+ UNLOCK(&ctr_xlator_ctx->lock);
|
|
+out:
|
|
+ return ret_val;
|
|
+}
|
|
+
|
|
+static inline int
|
|
+delete_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode)
|
|
+{
|
|
+ int ret = -1;
|
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
|
|
+ gf_ctr_local_t *ctr_local = NULL;
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+ GF_ASSERT(this);
|
|
+ GF_ASSERT(inode);
|
|
+
|
|
+ ctr_local = frame->local;
|
|
+ if (!ctr_local) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode);
|
|
+ if (!ctr_xlator_ctx) {
|
|
+ /* Since there is no ctr inode context so nothing more to do */
|
|
+ ret = 0;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = ctr_delete_hard_link(this, ctr_xlator_ctx,
|
|
+ CTR_DB_REC(ctr_local).pargfid,
|
|
+ CTR_DB_REC(ctr_local).file_name);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
|
|
+ "Failed to delete hard link");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static inline int
|
|
+update_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode)
|
|
+{
|
|
+ int ret = -1;
|
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
|
|
+ gf_ctr_local_t *ctr_local = NULL;
|
|
+
|
|
+ GF_ASSERT(frame);
|
|
+ GF_ASSERT(this);
|
|
+ GF_ASSERT(inode);
|
|
+
|
|
+ ctr_local = frame->local;
|
|
+ if (!ctr_local) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode);
|
|
+ if (!ctr_xlator_ctx) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED,
|
|
+ "Failed accessing ctr inode context");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = ctr_update_hard_link(
|
|
+ this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid,
|
|
+ CTR_DB_REC(ctr_local).file_name, CTR_DB_REC(ctr_local).old_pargfid,
|
|
+ CTR_DB_REC(ctr_local).old_file_name);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
|
|
+ "Failed to delete hard link");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/******************************************************************************
|
|
+ *
|
|
+ * CTR xlator init related functions
|
|
+ *
|
|
+ *
|
|
+ * ****************************************************************************/
|
|
+int
|
|
+extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type);
|
|
+
|
|
+int
|
|
+extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv);
|
|
+
|
|
+#endif
|
|
diff --git a/xlators/features/changetimerecorder/src/ctr-messages.h b/xlators/features/changetimerecorder/src/ctr-messages.h
|
|
new file mode 100644
|
|
index 0000000..23adf0a
|
|
--- /dev/null
|
|
+++ b/xlators/features/changetimerecorder/src/ctr-messages.h
|
|
@@ -0,0 +1,61 @@
|
|
+/*
|
|
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
|
|
+ This file is part of GlusterFS.
|
|
+
|
|
+ This file is licensed to you under your choice of the GNU Lesser
|
|
+ General Public License, version 3 or any later version (LGPLv3 or
|
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all
|
|
+ cases as published by the Free Software Foundation.
|
|
+ */
|
|
+
|
|
+#ifndef _CTR_MESSAGES_H_
|
|
+#define _CTR_MESSAGES_H_
|
|
+
|
|
+#include <glusterfs/glfs-message-id.h>
|
|
+
|
|
+/* To add new message IDs, append new identifiers at the end of the list.
|
|
+ *
|
|
+ * Never remove a message ID. If it's not used anymore, you can rename it or
|
|
+ * leave it as it is, but not delete it. This is to prevent reutilization of
|
|
+ * IDs by other messages.
|
|
+ *
|
|
+ * The component name must match one of the entries defined in
|
|
+ * glfs-message-id.h.
|
|
+ */
|
|
+
|
|
+GLFS_MSGID(
|
|
+ CTR, CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
|
|
+ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND,
|
|
+ CTR_MSG_INSERT_LINK_WIND_FAILED, CTR_MSG_INSERT_WRITEV_WIND_FAILED,
|
|
+ CTR_MSG_INSERT_WRITEV_UNWIND_FAILED, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
|
|
+ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
|
|
+ CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED,
|
|
+ CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED,
|
|
+ CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED,
|
|
+ CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED,
|
|
+ CTR_MSG_INSERT_TRUNCATE_WIND_FAILED, CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED,
|
|
+ CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED,
|
|
+ CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED, CTR_MSG_INSERT_RENAME_WIND_FAILED,
|
|
+ CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
|
|
+ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, CTR_MSG_ADD_HARDLINK_FAILED,
|
|
+ CTR_MSG_DELETE_HARDLINK_FAILED, CTR_MSG_UPDATE_HARDLINK_FAILED,
|
|
+ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
|
|
+ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
|
|
+ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, CTR_MSG_INSERT_UNLINK_WIND_FAILED,
|
|
+ CTR_MSG_XDATA_NULL, CTR_MSG_INSERT_FSYNC_WIND_FAILED,
|
|
+ CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED,
|
|
+ CTR_MSG_INSERT_MKNOD_WIND_FAILED, CTR_MSG_INSERT_CREATE_WIND_FAILED,
|
|
+ CTR_MSG_INSERT_CREATE_UNWIND_FAILED, CTR_MSG_INSERT_RECORD_WIND_FAILED,
|
|
+ CTR_MSG_INSERT_READV_WIND_FAILED, CTR_MSG_GET_GFID_FROM_DICT_FAILED,
|
|
+ CTR_MSG_SET, CTR_MSG_FATAL_ERROR, CTR_MSG_DANGLING_VOLUME,
|
|
+ CTR_MSG_CALLOC_FAILED, CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED,
|
|
+ CTR_MSG_INIT_DB_PARAMS_FAILED, CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED,
|
|
+ CTR_MSG_MEM_ACC_INIT_FAILED, CTR_MSG_CLOSE_DB_CONN_FAILED,
|
|
+ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, CTR_MSG_WRONG_FOP_PATH,
|
|
+ CTR_MSG_CONSTRUCT_DB_PATH_FAILED, CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
|
|
+ CTR_MSG_XLATOR_DISABLED, CTR_MSG_HARDLINK_MISSING_IN_LIST,
|
|
+ CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED, CTR_MSG_INIT_LOCK_FAILED,
|
|
+ CTR_MSG_COPY_FAILED, CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED,
|
|
+ CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED, CTR_MSG_NULL_LOCAL);
|
|
+
|
|
+#endif /* !_CTR_MESSAGES_H_ */
|
|
diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c
|
|
new file mode 100644
|
|
index 0000000..b6b66d5
|
|
--- /dev/null
|
|
+++ b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c
|
|
@@ -0,0 +1,362 @@
|
|
+/*
|
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
|
|
+ This file is part of GlusterFS.
|
|
+
|
|
+ This file is licensed to you under your choice of the GNU Lesser
|
|
+ General Public License, version 3 or any later version (LGPLv3 or
|
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all
|
|
+ cases as published by the Free Software Foundation.
|
|
+*/
|
|
+
|
|
+#include "ctr-xlator-ctx.h"
|
|
+#include "ctr-messages.h"
|
|
+#include <time.h>
|
|
+#include <sys/time.h>
|
|
+
|
|
+#define IS_THE_ONLY_HARDLINK(ctr_hard_link) \
|
|
+ (ctr_hard_link->list.next == ctr_hard_link->list.prev)
|
|
+
|
|
+static void
|
|
+fini_ctr_hard_link(ctr_hard_link_t **ctr_hard_link)
|
|
+{
|
|
+ GF_ASSERT(ctr_hard_link);
|
|
+
|
|
+ if (*ctr_hard_link)
|
|
+ return;
|
|
+ GF_FREE((*ctr_hard_link)->base_name);
|
|
+ GF_FREE(*ctr_hard_link);
|
|
+ *ctr_hard_link = NULL;
|
|
+}
|
|
+
|
|
+/* Please lock the ctr_xlator_ctx before using this function */
|
|
+ctr_hard_link_t *
|
|
+ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
|
|
+ uuid_t pgfid, const char *base_name)
|
|
+{
|
|
+ ctr_hard_link_t *_hard_link = NULL;
|
|
+ ctr_hard_link_t *searched_hardlink = NULL;
|
|
+
|
|
+ GF_ASSERT(this);
|
|
+ GF_ASSERT(ctr_xlator_ctx);
|
|
+
|
|
+ if (pgfid == NULL || base_name == NULL)
|
|
+ goto out;
|
|
+
|
|
+ /*linear search*/
|
|
+ list_for_each_entry(_hard_link, &ctr_xlator_ctx->hardlink_list, list)
|
|
+ {
|
|
+ if (gf_uuid_compare(_hard_link->pgfid, pgfid) == 0 &&
|
|
+ _hard_link->base_name &&
|
|
+ strcmp(_hard_link->base_name, base_name) == 0) {
|
|
+ searched_hardlink = _hard_link;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+out:
|
|
+ return searched_hardlink;
|
|
+}
|
|
+
|
|
+/* Please lock the ctr_xlator_ctx before using this function */
|
|
+int
|
|
+ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
|
|
+ uuid_t pgfid, const char *base_name)
|
|
+{
|
|
+ int ret = -1;
|
|
+ ctr_hard_link_t *ctr_hard_link = NULL;
|
|
+ struct timeval current_time = {0};
|
|
+
|
|
+ GF_ASSERT(this);
|
|
+ GF_ASSERT(ctr_xlator_ctx);
|
|
+
|
|
+ if (pgfid == NULL || base_name == NULL)
|
|
+ goto out;
|
|
+
|
|
+ ctr_hard_link = GF_CALLOC(1, sizeof(*ctr_hard_link), gf_ctr_mt_hard_link_t);
|
|
+ if (!ctr_hard_link) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED,
|
|
+ "Failed allocating "
|
|
+ "ctr_hard_link");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /*Initialize the ctr_hard_link object and
|
|
+ * Assign the values : parent GFID and basename*/
|
|
+ INIT_LIST_HEAD(&ctr_hard_link->list);
|
|
+ gf_uuid_copy(ctr_hard_link->pgfid, pgfid);
|
|
+ ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name);
|
|
+ if (ret < 0) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED,
|
|
+ "Failed copying basename"
|
|
+ "to ctr_hard_link");
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ ret = gettimeofday(¤t_time, NULL);
|
|
+ if (ret == -1) {
|
|
+ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ /*Add the hard link to the list*/
|
|
+ list_add_tail(&ctr_hard_link->list, &ctr_xlator_ctx->hardlink_list);
|
|
+
|
|
+ ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
|
|
+
|
|
+ /*aal izz well!*/
|
|
+ ret = 0;
|
|
+ goto out;
|
|
+error:
|
|
+ GF_FREE(ctr_hard_link);
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void
|
|
+__delete_hard_link_from_list(ctr_hard_link_t **ctr_hard_link)
|
|
+{
|
|
+ GF_ASSERT(ctr_hard_link);
|
|
+ GF_ASSERT(*ctr_hard_link);
|
|
+
|
|
+ /*Remove hard link from list*/
|
|
+ list_del(&(*ctr_hard_link)->list);
|
|
+ fini_ctr_hard_link(ctr_hard_link);
|
|
+}
|
|
+
|
|
+int
|
|
+ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
|
|
+ uuid_t pgfid, const char *base_name)
|
|
+{
|
|
+ int ret = -1;
|
|
+ ctr_hard_link_t *ctr_hard_link = NULL;
|
|
+
|
|
+ GF_ASSERT(this);
|
|
+ GF_ASSERT(ctr_xlator_ctx);
|
|
+
|
|
+ LOCK(&ctr_xlator_ctx->lock);
|
|
+
|
|
+ /*Check if the hard link is present */
|
|
+ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, pgfid,
|
|
+ base_name);
|
|
+ if (!ctr_hard_link) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_HARDLINK_MISSING_IN_LIST,
|
|
+ "Hard link doesn't exist in the list");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ __delete_hard_link_from_list(&ctr_hard_link);
|
|
+ ctr_hard_link = NULL;
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+ UNLOCK(&ctr_xlator_ctx->lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int
|
|
+ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
|
|
+ uuid_t pgfid, const char *base_name, uuid_t old_pgfid,
|
|
+ const char *old_base_name)
|
|
+{
|
|
+ int ret = -1;
|
|
+ ctr_hard_link_t *ctr_hard_link = NULL;
|
|
+ struct timeval current_time = {0};
|
|
+
|
|
+ GF_ASSERT(this);
|
|
+ GF_ASSERT(ctr_xlator_ctx);
|
|
+
|
|
+ LOCK(&ctr_xlator_ctx->lock);
|
|
+
|
|
+ /*Check if the hard link is present */
|
|
+ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, old_pgfid,
|
|
+ old_base_name);
|
|
+ if (!ctr_hard_link) {
|
|
+ gf_msg_trace(this->name, 0,
|
|
+ "Hard link doesn't exist"
|
|
+ " in the list");
|
|
+ /* Since the hard link is not present in the list
|
|
+ * we add it to the list */
|
|
+ ret = ctr_add_hard_link(this, ctr_xlator_ctx, pgfid, base_name);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0,
|
|
+ CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED,
|
|
+ "Failed adding hard link to the list");
|
|
+ goto out;
|
|
+ }
|
|
+ ret = 0;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* update the hard link */
|
|
+ gf_uuid_copy(ctr_hard_link->pgfid, pgfid);
|
|
+ GF_FREE(ctr_hard_link->base_name);
|
|
+ ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name);
|
|
+ if (ret < 0) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED,
|
|
+ "Failed copying basename"
|
|
+ "to ctr_hard_link");
|
|
+ /* delete the corrupted entry */
|
|
+ __delete_hard_link_from_list(&ctr_hard_link);
|
|
+ ctr_hard_link = NULL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = gettimeofday(¤t_time, NULL);
|
|
+ if (ret == -1) {
|
|
+ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
|
|
+ ctr_hard_link->hardlink_heal_period = 0;
|
|
+ } else {
|
|
+ ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+out:
|
|
+ UNLOCK(&ctr_xlator_ctx->lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/* Delete all hardlinks */
|
|
+static int
|
|
+ctr_delete_all_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx)
|
|
+{
|
|
+ int ret = -1;
|
|
+ ctr_hard_link_t *ctr_hard_link = NULL;
|
|
+ ctr_hard_link_t *tmp = NULL;
|
|
+
|
|
+ GF_ASSERT(ctr_xlator_ctx);
|
|
+
|
|
+ LOCK(&ctr_xlator_ctx->lock);
|
|
+
|
|
+ list_for_each_entry_safe(ctr_hard_link, tmp, &ctr_xlator_ctx->hardlink_list,
|
|
+ list)
|
|
+ {
|
|
+ /*Remove hard link from list*/
|
|
+ __delete_hard_link_from_list(&ctr_hard_link);
|
|
+ ctr_hard_link = NULL;
|
|
+ }
|
|
+
|
|
+ UNLOCK(&ctr_xlator_ctx->lock);
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/* Please lock the inode before using this function */
|
|
+static ctr_xlator_ctx_t *
|
|
+__get_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
|
|
+{
|
|
+ int ret = 0;
|
|
+ uint64_t _addr = 0;
|
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
|
|
+
|
|
+ GF_ASSERT(this);
|
|
+ GF_ASSERT(inode);
|
|
+
|
|
+ ret = __inode_ctx_get(inode, this, &_addr);
|
|
+ if (ret < 0)
|
|
+ _addr = 0;
|
|
+ if (_addr != 0) {
|
|
+ ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr;
|
|
+ }
|
|
+
|
|
+ return ctr_xlator_ctx;
|
|
+}
|
|
+
|
|
+ctr_xlator_ctx_t *
|
|
+init_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
|
|
+{
|
|
+ int ret = -1;
|
|
+ uint64_t _addr = 0;
|
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
|
|
+ struct timeval current_time = {0};
|
|
+
|
|
+ GF_ASSERT(this);
|
|
+ GF_ASSERT(inode);
|
|
+
|
|
+ LOCK(&inode->lock);
|
|
+ {
|
|
+ ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode);
|
|
+ if (ctr_xlator_ctx) {
|
|
+ ret = 0;
|
|
+ goto out;
|
|
+ }
|
|
+ ctr_xlator_ctx = GF_CALLOC(1, sizeof(*ctr_xlator_ctx),
|
|
+ gf_ctr_mt_xlator_ctx);
|
|
+ if (!ctr_xlator_ctx)
|
|
+ goto out;
|
|
+
|
|
+ ret = LOCK_INIT(&ctr_xlator_ctx->lock);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_ERROR, ret, CTR_MSG_INIT_LOCK_FAILED,
|
|
+ "Failed init lock %s", strerror(ret));
|
|
+ goto out;
|
|
+ }
|
|
+ _addr = (uint64_t)(uintptr_t)ctr_xlator_ctx;
|
|
+
|
|
+ ret = __inode_ctx_set(inode, this, &_addr);
|
|
+ if (ret) {
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ INIT_LIST_HEAD(&ctr_xlator_ctx->hardlink_list);
|
|
+
|
|
+ ret = gettimeofday(¤t_time, NULL);
|
|
+ if (ret == -1) {
|
|
+ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ctr_xlator_ctx->inode_heal_period = current_time.tv_sec;
|
|
+ }
|
|
+ ret = 0;
|
|
+out:
|
|
+ if (ret) {
|
|
+ GF_FREE(ctr_xlator_ctx);
|
|
+ ctr_xlator_ctx = NULL;
|
|
+ }
|
|
+
|
|
+ UNLOCK(&inode->lock);
|
|
+
|
|
+ return ctr_xlator_ctx;
|
|
+}
|
|
+
|
|
+void
|
|
+fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
|
|
+{
|
|
+ int ret = 0;
|
|
+ uint64_t _addr = 0;
|
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
|
|
+
|
|
+ inode_ctx_del(inode, this, &_addr);
|
|
+ if (!_addr)
|
|
+ return;
|
|
+
|
|
+ ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr;
|
|
+
|
|
+ ret = ctr_delete_all_hard_link(this, ctr_xlator_ctx);
|
|
+ if (ret) {
|
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
|
|
+ "Failed deleting all "
|
|
+ "hard links from inode context");
|
|
+ }
|
|
+
|
|
+ LOCK_DESTROY(&ctr_xlator_ctx->lock);
|
|
+
|
|
+ GF_FREE(ctr_xlator_ctx);
|
|
+}
|
|
+
|
|
+ctr_xlator_ctx_t *
|
|
+get_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
|
|
+{
|
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
|
|
+
|
|
+ LOCK(&inode->lock);
|
|
+ ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode);
|
|
+ UNLOCK(&inode->lock);
|
|
+
|
|
+ return ctr_xlator_ctx;
|
|
+}
|
|
diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h
|
|
new file mode 100644
|
|
index 0000000..4e3bf7e
|
|
--- /dev/null
|
|
+++ b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h
|
|
@@ -0,0 +1,68 @@
|
|
+/*
|
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
|
|
+ This file is part of GlusterFS.
|
|
+
|
|
+ This file is licensed to you under your choice of the GNU Lesser
|
|
+ General Public License, version 3 or any later version (LGPLv3 or
|
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all
|
|
+ cases as published by the Free Software Foundation.
|
|
+*/
|
|
+
|
|
+#ifndef __CTR_XLATOR_CTX_H
|
|
+#define __CTR_XLATOR_CTX_H
|
|
+
|
|
+#include <glusterfs/xlator.h>
|
|
+#include "ctr_mem_types.h"
|
|
+#include <glusterfs/iatt.h>
|
|
+#include <glusterfs/glusterfs.h>
|
|
+#include <glusterfs/xlator.h>
|
|
+#include <glusterfs/logging.h>
|
|
+#include <glusterfs/locking.h>
|
|
+#include <glusterfs/common-utils.h>
|
|
+#include <time.h>
|
|
+#include <sys/time.h>
|
|
+
|
|
+typedef struct ctr_hard_link {
|
|
+ uuid_t pgfid;
|
|
+ char *base_name;
|
|
+ /* Hardlink expiry : Defines the expiry period after which a
|
|
+ * database heal is attempted. */
|
|
+ uint64_t hardlink_heal_period;
|
|
+ struct list_head list;
|
|
+} ctr_hard_link_t;
|
|
+
|
|
+typedef struct ctr_xlator_ctx {
|
|
+ /* This represents the looked up hardlinks
|
|
+ * NOTE: This doesn't represent all physical hardlinks of the inode*/
|
|
+ struct list_head hardlink_list;
|
|
+ uint64_t inode_heal_period;
|
|
+ gf_lock_t lock;
|
|
+} ctr_xlator_ctx_t;
|
|
+
|
|
+ctr_hard_link_t *
|
|
+ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
|
|
+ uuid_t pgfid, const char *base_name);
|
|
+
|
|
+int
|
|
+ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
|
|
+ uuid_t pgfid, const char *base_name);
|
|
+
|
|
+int
|
|
+ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
|
|
+ uuid_t pgfid, const char *base_name);
|
|
+
|
|
+int
|
|
+ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
|
|
+ uuid_t pgfid, const char *base_name, uuid_t old_pgfid,
|
|
+ const char *old_base_name);
|
|
+
|
|
+ctr_xlator_ctx_t *
|
|
+get_ctr_xlator_ctx(xlator_t *this, inode_t *inode);
|
|
+
|
|
+ctr_xlator_ctx_t *
|
|
+init_ctr_xlator_ctx(xlator_t *this, inode_t *inode);
|
|
+
|
|
+void
|
|
+fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode);
|
|
+
|
|
+#endif
|
|
diff --git a/xlators/features/changetimerecorder/src/ctr_mem_types.h b/xlators/features/changetimerecorder/src/ctr_mem_types.h
|
|
new file mode 100644
|
|
index 0000000..7b8f531
|
|
--- /dev/null
|
|
+++ b/xlators/features/changetimerecorder/src/ctr_mem_types.h
|
|
@@ -0,0 +1,22 @@
|
|
+/*
|
|
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
|
|
+ This file is part of GlusterFS.
|
|
+
|
|
+ This file is licensed to you under your choice of the GNU Lesser
|
|
+ General Public License, version 3 or any later version (LGPLv3 or
|
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all
|
|
+ cases as published by the Free Software Foundation.
|
|
+*/
|
|
+
|
|
+#ifndef __CTR_MEM_TYPES_H__
|
|
+#define __CTR_MEM_TYPES_H__
|
|
+
|
|
+#include "gfdb_mem-types.h"
|
|
+
|
|
+enum gf_ctr_mem_types_ {
|
|
+ gf_ctr_mt_private_t = gfdb_mt_end + 1,
|
|
+ gf_ctr_mt_xlator_ctx,
|
|
+ gf_ctr_mt_hard_link_t,
|
|
+ gf_ctr_mt_end
|
|
+};
|
|
+#endif
|
|
--
|
|
1.8.3.1
|
|
|