Compare commits

...

No commits in common. "c8s" and "c8" have entirely different histories.
c8s ... c8

615 changed files with 18 additions and 24090 deletions

1
.gitignore vendored
View File

@ -1,2 +1 @@
SOURCES/glusterfs-6.0.tar.gz SOURCES/glusterfs-6.0.tar.gz
/glusterfs-6.0.tar.gz

1
.glusterfs.metadata Normal file
View File

@ -0,0 +1 @@
c9d75f37e00502a10f64cd4ba9aafb17552e0800 SOURCES/glusterfs-6.0.tar.gz

View File

@ -1,86 +0,0 @@
From 3eaf937e69fe4219738c93d39af1cc909b1ee3f8 Mon Sep 17 00:00:00 2001
From: Rinku Kothiya <rkothiya@redhat.com>
Date: Fri, 23 Apr 2021 09:30:35 +0000
Subject: [PATCH 544/584] tests: avoid empty paths in environment variables
Many variables containing paths in env.rc.in are defined in a way
that leave a trailing ':' in the variable when the previous value
was empty or undefined.
In the particular case of 'LD_PRELOAD_PATH' variable, this causes
that the system looks for dynamic libraries in the current working
directory. When this directory is inside a Gluster mount point, a
significant delay is caused each time a program is run (and testing
framework can run lots of programs for each test).
This patch prevents that variables containing paths could end with
a trailing ':'.
Backport of :
>Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2349
>Fixes: #2348
>Change-Id: I669f5a78e14f176c0a58824ba577330989d84769
>Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
>Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
Change-Id: Ie903ca443aa4789553ac4687818a7f69c113af41
Signed-off-by: Rinku Kothiya <rkothiya@redhat.com>
---
tests/env.rc.in | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/tests/env.rc.in b/tests/env.rc.in
index 1f0ca88..2d8ff0e 100644
--- a/tests/env.rc.in
+++ b/tests/env.rc.in
@@ -2,34 +2,31 @@ prefix=@prefix@
exec_prefix=@exec_prefix@
libdir=@libdir@
-PATH=@sbindir@:$PATH
+PATH=@bindir@:@sbindir@${PATH:+:${PATH}}
export PATH
GLUSTERD_PIDFILEDIR=@localstatedir@/run/gluster
export GLUSTERD_PIDFILEDIR
-LD_LIBRARY_PATH=@libdir@:$LD_LIBRARY_PATH
+LD_LIBRARY_PATH=@libdir@${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
export LD_LIBRARY_PATH
-LIBRARY_PATH=@libdir@:$LIBRARY_PATH
+LIBRARY_PATH=@libdir@${LIBRARY_PATH:+:${LIBRARY_PATH}}
export LIBRARY_PATH
-CPATH=@includedir@:$CPATH
+CPATH=@includedir@${CPATH:+:${CPATH}}
export CPATH
GLUSTERD_WORKDIR=@GLUSTERD_WORKDIR@
export GLUSTERD_WORKDIR
-PKG_CONFIG_PATH=@pkgconfigdir@:$PKG_CONFIG_PATH
+PKG_CONFIG_PATH=@pkgconfigdir@${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}
export PKG_CONFIG_PATH
-PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES@:$PYTHON_PATH
-export PYTHONPATH
-
PYTHON=@PYTHON@
export PYTHON
-PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES@:$PYTHON_PATH
+PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES@${PYTHONPATH:+:${PYTHONPATH}}
export PYTHONPATH
GLUSTER_CMD_DIR=@sbindir@
@@ -42,4 +39,4 @@ RUN_NFS_TESTS=@BUILD_GNFS@
export RUN_NFS_TESTS
GLUSTER_XLATOR_DIR=@libdir@/glusterfs/@PACKAGE_VERSION@/xlator
-export GLUSTER_XLATOR_DIR
\ No newline at end of file
+export GLUSTER_XLATOR_DIR
--
1.8.3.1

View File

@ -1,32 +0,0 @@
From 6b340470e01dc177767fae990cf19037202140b7 Mon Sep 17 00:00:00 2001
From: Tamar Shacked <tshacked@redhat.com>
Date: Mon, 31 May 2021 21:27:41 +0300
Subject: [PATCH 545/584] tests: Excluded tests for unsupported components
Quota and Tier are depricated from RHGS-3.5.5.
Stop running regression tests for them.
Label: DOWNSTREAM ONLY
Signed-off-by: Tamar Shacked <tshacked@redhat.com>
Change-Id: I3ca1aacba9a31129f5e68fcffdd80e69e51f7bcc
---
run-tests.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/run-tests.sh b/run-tests.sh
index c835d93..5cc18b0 100755
--- a/run-tests.sh
+++ b/run-tests.sh
@@ -349,7 +349,7 @@ function run_tests()
fi
for t in $(find ${regression_testsdir}/tests -name '*.t' \
- | LC_COLLATE=C sort) ; do
+ | egrep -v "tier|quota" | LC_COLLATE=C sort) ; do
old_cores=$(ls /*-*.core 2> /dev/null | wc -l)
total_tests=$((total_tests+1))
if match $t "$@" ; then
--
1.8.3.1

View File

@ -1,36 +0,0 @@
From 6ff3314f24687c8224a5520f9c4d2b3c39e730b7 Mon Sep 17 00:00:00 2001
From: Tamar Shacked <tshacked@redhat.com>
Date: Tue, 1 Jun 2021 13:02:24 +0300
Subject: [PATCH 546/584] Update rfc.sh to rhgs-3.5.5
Signed-off-by: Tamar Shacked <tshacked@redhat.com>
Change-Id: Iff543dc77174f983dd39f9fb7cc5005b49594750
---
rfc.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/rfc.sh b/rfc.sh
index c0559b9..daeff32 100755
--- a/rfc.sh
+++ b/rfc.sh
@@ -18,7 +18,7 @@ done
shift $((OPTIND-1))
-branch="rhgs-3.5.4";
+branch="rhgs-3.5.5";
set_hooks_commit_msg()
{
@@ -315,7 +315,7 @@ main()
if [ -z "${reference}" ]; then
$drier git push $ORIGIN HEAD:refs/for/$branch/rfc;
else
- $drier git push $ORIGIN HEAD:refs/for/$branch/ref-${reference};
+ $drier git push $ORIGIN HEAD:refs/for/$branch;
fi
}
--
1.8.3.1

View File

@ -1,47 +0,0 @@
From 08c57926118b1ab8fa1fcd5b16913ff22d97d065 Mon Sep 17 00:00:00 2001
From: N Balachandran <nbalacha@redhat.com>
Date: Wed, 25 Sep 2019 19:50:27 +0530
Subject: [PATCH 547/584] perf/write-behind: Clear frame->local on conflict
error
WB saves the wb_inode in frame->local for the truncate and
ftruncate fops. This value is not cleared in case of error
on a conflicting write request. FRAME_DESTROY finds a non-null
frame->local and tries to free it using mem_put. However,
wb_inode is allocated using GF_CALLOC, causing the
process to crash.
credit: vpolakis@gmail.com
Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/23485/
>Change-Id: I217f61470445775e05145aebe44c814731c1b8c5
>Fixes: bz#1753592
>Signed-off-by: N Balachandran <nbalacha@redhat.com>
BUG: 1917488
Change-Id: I217f61470445775e05145aebe44c814731c1b8c5
Signed-off-by: Sunil Kumar H G <sheggodu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244277
Tested-by: RHGS Build Bot <nigelb@redhat.com>
---
xlators/performance/write-behind/src/write-behind.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
index 90a0bcf..31ab723 100644
--- a/xlators/performance/write-behind/src/write-behind.c
+++ b/xlators/performance/write-behind/src/write-behind.c
@@ -1523,6 +1523,10 @@ __wb_handle_failed_conflict(wb_request_t *req, wb_request_t *conflict,
*/
req->op_ret = -1;
req->op_errno = conflict->op_errno;
+ if ((req->stub->fop == GF_FOP_TRUNCATE) ||
+ (req->stub->fop == GF_FOP_FTRUNCATE)) {
+ req->stub->frame->local = NULL;
+ }
list_del_init(&req->todo);
list_add_tail(&req->winds, tasks);
--
1.8.3.1

View File

@ -1,49 +0,0 @@
From cb7e72bce8b6a46605753b72919c1c839ecb4cc9 Mon Sep 17 00:00:00 2001
From: root <root@sacharya.remote.csb>
Date: Thu, 3 Jun 2021 12:08:24 +0530
Subject: [PATCH 548/584] Add tar as dependency to geo-rep rpm for RHEL 8.3 and
above
Reason: from RHEL 8.3, tar is not bundled by default
>Fixes: #1849
>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
>Change-Id: Ic1424e0550cef6a78e3e9e7b42665ab01016436f
Upstream Patch: https://github.com/gluster/glusterfs/pull/1850
BUG: 1901468
Change-Id: Ic1424e0550cef6a78e3e9e7b42665ab01016436f
Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244896
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Srijan Sivakumar <ssivakum@redhat.com>
---
glusterfs.spec.in | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 2be7677..424f4ab 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -521,6 +521,9 @@ Requires: python%{_pythonver}-gluster = %{version}-%{release}
Requires: rsync
Requires: util-linux
Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+%if ( 0%{?rhel} && ( ( 0%{?rhel} == 8 && 0%{?rhel_minor_version} >= 3 ) || 0%{?rhel} >= 9 ) )
+Requires: tar
+%endif
# required for setting selinux bools
%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
Requires(post): policycoreutils-python-utils
@@ -1982,6 +1985,8 @@ fi
%endif
%changelog
+* Thu Nov 26 2020 Shwetha K Acharya <sacharya@redhat.com>
+- Add tar as dependency to georeplication rpm for RHEL version >= 8.3
* Mon May 11 2020 Sunny Kumar <sunkumar@redhat.com>
- added requires policycoreutils-python-utils on rhel8 for geo-replication
--
1.8.3.1

View File

@ -1,45 +0,0 @@
From f90c13912a9c64e4479b55fee4ba4ac50e509302 Mon Sep 17 00:00:00 2001
From: schaffung <ssivakum@redhat.com>
Date: Sat, 9 Jan 2021 15:41:15 +0530
Subject: [PATCH 549/584] geo-rep : Change in attribute for getting function
name in py 3 (#1900)
Issue: The schedule_geo-rep script uses `func_name` to obtain
the name of the function being referred to but from python3
onwards, the attribute has been changed to `__name__`.
Code Change:
Changing `func_name` to `__name__`.
>Fixes: #1898
>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
>Change-Id: I4ed69a06cffed9db17c8f8949b8000c74be1d717
Upstream Patch : https://github.com/gluster/glusterfs/pull/1900
BUG: 1903911
Change-Id: I4ed69a06cffed9db17c8f8949b8000c74be1d717
Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244570
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Shwetha Acharya <sacharya@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
extras/geo-rep/schedule_georep.py.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/extras/geo-rep/schedule_georep.py.in b/extras/geo-rep/schedule_georep.py.in
index ac93716..9bb3df5 100644
--- a/extras/geo-rep/schedule_georep.py.in
+++ b/extras/geo-rep/schedule_georep.py.in
@@ -102,7 +102,7 @@ def cache_output_with_args(func):
"""
def wrapper(*args, **kwargs):
global cache_data
- key = "_".join([func.func_name] + list(args))
+ key = "_".join([func.__name__] + list(args))
if cache_data.get(key, None) is None:
cache_data[key] = func(*args, **kwargs)
--
1.8.3.1

View File

@ -1,184 +0,0 @@
From 053bb9c7356eae82b1089582bb2844388ae4df57 Mon Sep 17 00:00:00 2001
From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
Date: Wed, 2 Jun 2021 07:49:12 -0400
Subject: [PATCH 550/584] common-ha: stability fixes for ganesha_grace and
ganesha_mon RAs
Include fixes suggested by ClusterHA devs.
1) It turns out that crm_attribute attrs and attrd_updater attrs really
are one and the same, despite what I was told years ago.
attrs created with crm_attribute ... --lifetime=reboot ... or
attrd_updater are one and same. As per ClusterHA devs having an attr
created with crm_attribute ... --lifetime=forever and also
creating/updating the same attr with attrd_updater is a recipe for
weird things to happen that will be difficult to debug.
2) using hostname -s or hostname for node names in crm_attribute and
attrd_updater potentially could use the wrong name if the host has
been renamed; use ocf_local_nodename() (in ocf-shellfuncs) instead.
https://github.com/gluster/glusterfs/issues/2276
https://github.com/gluster/glusterfs/pull/2283
commit 9bd2c697686ec40e2c4f711df961860c8a735baa
Change-Id:If572d396fae9206628714fb2ce00f72e94f2258f
BUG: 1945143
Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244593
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
extras/ganesha/ocf/ganesha_grace | 28 +++++++++---------------
extras/ganesha/ocf/ganesha_mon | 47 ++++++++++++++--------------------------
2 files changed, 26 insertions(+), 49 deletions(-)
diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace
index 825f716..edc6fa2 100644
--- a/extras/ganesha/ocf/ganesha_grace
+++ b/extras/ganesha/ocf/ganesha_grace
@@ -94,25 +94,21 @@ esac
ganesha_grace_start()
{
local rc=${OCF_ERR_GENERIC}
- local host=$(hostname -s)
+ local host=$(ocf_local_nodename)
- ocf_log debug "ganesha_grace_start()"
- # give ganesha_mon RA a chance to set the crm_attr first
+ ocf_log debug "ganesha_grace_start ${host}"
+ # give ganesha_mon RA a chance to set the attr first
# I mislike the sleep, but it's not clear that looping
# with a small sleep is necessarily better
# start has a 40sec timeout, so a 5sec sleep here is okay
sleep 5
- attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+ attr=$(attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
if [ $? -ne 0 ]; then
- host=$(hostname)
- attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null )
- if [ $? -ne 0 ]; then
- ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
- fi
+ ocf_log info "grace start: attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
fi
# Three possibilities:
- # 1. There is no attribute at all and attr_updater returns
+ # 1. There is no attribute at all and attrd_updater returns
# a zero length string. This happens when
# ganesha_mon::monitor hasn't run at least once to set
# the attribute. The assumption here is that the system
@@ -164,17 +160,13 @@ ganesha_grace_notify()
ganesha_grace_monitor()
{
- local host=$(hostname -s)
+ local host=$(ocf_local_nodename)
- ocf_log debug "monitor"
+ ocf_log debug "ganesha_grace monitor ${host}"
- attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+ attr=$(attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
if [ $? -ne 0 ]; then
- host=$(hostname)
- attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
- if [ $? -ne 0 ]; then
- ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
- fi
+ ocf_log info "attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
fi
# if there is no attribute (yet), maybe it's because
diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon
index 2b4a9d6..7fbbf70 100644
--- a/extras/ganesha/ocf/ganesha_mon
+++ b/extras/ganesha/ocf/ganesha_mon
@@ -124,7 +124,6 @@ ganesha_mon_stop()
ganesha_mon_monitor()
{
- local host=$(hostname -s)
local pid_file="/var/run/ganesha.pid"
local rhel6_pid_file="/var/run/ganesha.nfsd.pid"
local proc_pid="/proc/"
@@ -141,31 +140,27 @@ ganesha_mon_monitor()
if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then
- attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1
+ attrd_updater --name ${OCF_RESKEY_ganesha_active} -v 1
if [ $? -ne 0 ]; then
- ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed"
+ ocf_log info "warning: attrd_updater --name ${OCF_RESKEY_ganesha_active} -v 1 failed"
fi
# ganesha_grace (nfs-grace) RA follows grace-active attr
# w/ constraint location
- attrd_updater -n ${OCF_RESKEY_grace_active} -v 1
+ attrd_updater --name ${OCF_RESKEY_grace_active} -v 1
if [ $? -ne 0 ]; then
- ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed"
+ ocf_log info "warning: attrd_updater --name ${OCF_RESKEY_grace_active} -v 1 failed"
fi
# ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace)
- # track grace-active crm_attr (attr != crm_attr)
- # we can't just use the attr as there's no way to query
- # its value in RHEL6 pacemaker
-
- crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
- if [ $? -ne 0 ]; then
- host=$(hostname)
- crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
- if [ $? -ne 0 ]; then
- ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed"
- fi
- fi
+ # track grace-active attr.
+ #
+ # Originally we were told that attrs set with attrd_updater
+ # are different/distinct than attrs set with crm_attribute.
+ # Now, years later, we are told that they are the same and
+ # that the values of attrs set with attrd_updater can be
+ # retrieved with crm_attribute. Or with attrd_updater -Q
+ # now that we no longer have to deal with rhel6.
return ${OCF_SUCCESS}
fi
@@ -182,26 +177,16 @@ ganesha_mon_monitor()
# the remaining ganesha.nfsds into grace before
# initiating the VIP fail-over.
- attrd_updater -D -n ${OCF_RESKEY_grace_active}
- if [ $? -ne 0 ]; then
- ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed"
- fi
-
- host=$(hostname -s)
- crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
+ attrd_updater --delete --name ${OCF_RESKEY_grace_active}
if [ $? -ne 0 ]; then
- host=$(hostname)
- crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
- if [ $? -ne 0 ]; then
- ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed"
- fi
+ ocf_log info "warning: attrd_updater --delete --name ${OCF_RESKEY_grace_active} failed"
fi
sleep ${OCF_RESKEY_grace_delay}
- attrd_updater -D -n ${OCF_RESKEY_ganesha_active}
+ attrd_updater --delete --name ${OCF_RESKEY_ganesha_active}
if [ $? -ne 0 ]; then
- ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed"
+ ocf_log info "warning: attrd_updater --delete --name ${OCF_RESKEY_ganesha_active} failed"
fi
return ${OCF_SUCCESS}
--
1.8.3.1

View File

@ -1,52 +0,0 @@
From fcfd40132624df5e888d53b4a8c4ce1cf7087413 Mon Sep 17 00:00:00 2001
From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
Date: Wed, 2 Jun 2021 07:40:04 -0400
Subject: [PATCH 551/584] common-ha: ensure shared_storage is mounted before
setup (#2296)
If gluster shared-storage isn't mounted, ganesha will fail to start
commit a249b9020d281d0482db0aeb52e8856acd931e02
https://github.com/gluster/glusterfs/issues/2278
https://github.com/gluster/glusterfs/pull/2296
Change-Id: I6ed7044ea6b6c61b013ebe17088bfde311b109b7
BUG: 1918018
Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244592
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
extras/ganesha/scripts/ganesha-ha.sh | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
index 491c61d..012084f 100644
--- a/extras/ganesha/scripts/ganesha-ha.sh
+++ b/extras/ganesha/scripts/ganesha-ha.sh
@@ -195,9 +195,22 @@ setup_cluster()
local servers=${3}
local unclean=""
local quorum_policy="stop"
+ local dfresult=""
logger "setting up cluster ${name} with the following ${servers}"
+ # check that shared_storage is mounted
+ dfresult=$(df -T ${HA_VOL_MNT})
+ if [[ -z "${dfresult}" ]]; then
+ logger "gluster shared_storage is not mounted, exiting..."
+ exit 1
+ fi
+
+ if [[ "${dfresult}" != *"fuse.glusterfs"* ]]; then
+ logger "gluster shared_storage is not mounted, exiting..."
+ exit 1
+ fi
+
# pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers}
pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers}
if [ $? -ne 0 ]; then
--
1.8.3.1

View File

@ -1,67 +0,0 @@
From e9e1b0bc6e2deaf44190636ab6826065ed3c0392 Mon Sep 17 00:00:00 2001
From: Pranith Kumar Karampuri <pranith.karampuri@phonepe.com>
Date: Wed, 3 Feb 2021 18:10:40 +0530
Subject: [PATCH 552/584] cluster/afr: Change default self-heal-window-size to
1MB (#2068)
At the moment self-heal-window-size is 128KB. This leads to healing data
in 128KB chunks. With the growth of data and the avg file sizes
nowadays, 1MB seems like a better default.
Upstream patch details:
> https://github.com/gluster/glusterfs/pull/2111
> Change-Id: I70c42c83b16c7adb53d6b5762969e878477efb5c
> Fixes: #2067
> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
BUG: 1946171
Change-Id: Icd6a5c02ca16a1a6095f7bc10feed8ddc2505f41
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244557
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/afr/src/afr-self-heal-data.c | 6 ++++++
xlators/cluster/afr/src/afr.c | 6 +++---
2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index b97c66b..156cb18 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -337,6 +337,12 @@ afr_selfheal_data_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
}
block = 128 * 1024 * priv->data_self_heal_window_size;
+ if (HAS_HOLES((&replies[source].poststat))) {
+ /*Reduce the possibility of data-block allocations in case of files
+ * with holes. Correct way to fix it would be to use seek fop while
+ * healing data*/
+ block = 128 * 1024;
+ }
type = afr_data_self_heal_type_get(priv, healed_sinks, source, replies);
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 33fe4d8..0956e5a 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -910,12 +910,12 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_INT,
.min = 1,
.max = 1024,
- .default_value = "1",
+ .default_value = "8",
.op_version = {1},
.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
.tags = {"replicate"},
- .description = "Maximum number blocks per file for which self-heal "
- "process would be applied simultaneously."},
+ .description = "Maximum number of 128KB blocks per file for which "
+ "self-heal process would be applied simultaneously."},
{.key = {"metadata-self-heal"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",
--
1.8.3.1

View File

@ -1,46 +0,0 @@
From 1fa01865eb9bf6a1113669c262fc526ef11f61f2 Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <xhernandez@users.noreply.github.com>
Date: Sat, 6 Feb 2021 01:53:28 +0100
Subject: [PATCH 553/584] cluster/ec: Change self-heal-window-size to 4MiB by
default (#2071)
The current block size used for self-heal by default is 128 KiB. This
requires a significant amount of management requests for a very small
portion of data healed.
With this patch the block size is increased to 4 MiB. For a standard
EC volume configuration of 4+2, this means that each healed block of
a file will update 1 MiB on each brick.
Upstream patch details:
> https://github.com/gluster/glusterfs/pull/2071
> Change-Id: Ifeec4a2d54988017d038085720513c121b03445b
> Updates: #2067
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
BUG: 1946171
Change-Id: I9e3eed2d83c9de54242e6161b2e3951c2f6f8000
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244558
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/ec/src/ec.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 4118c3b..a930089 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -1644,7 +1644,7 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_INT,
.min = 1,
.max = 1024,
- .default_value = "1",
+ .default_value = "32",
.op_version = {GD_OP_VERSION_3_11_0},
.flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
.tags = {"disperse"},
--
1.8.3.1

View File

@ -1,245 +0,0 @@
From 2cb90b7798fa469f2d7d938ae88733eb1962d63d Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <xhernandez@gmail.com>
Date: Fri, 9 Apr 2021 18:13:30 +0200
Subject: [PATCH 554/584] dht: fix rebalance of sparse files
Current implementation of rebalance for sparse files has a bug that,
in some cases, causes a read of 0 bytes from the source subvolume.
Posix xlator doesn't allow 0 byte reads and fails them with EINVAL,
which causes rebalance to abort the migration.
This patch implements a more robust way of finding data segments in
a sparse file that avoids 0 byte reads, allowing the file to be
migrated successfully.
Backport of:
> Upstream-patch: https://github.com/gluster/glusterfs/pull/2318
> Fixes: #2317
> Change-Id: Iff168dda2fb0f2edf716b21eb04cc2cc8ac3915c
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
BUG: 1957641
Change-Id: Iff168dda2fb0f2edf716b21eb04cc2cc8ac3915c
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244551
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/bugs/distribute/issue-2317.t | 29 ++++++++
tests/volume.rc | 4 ++
xlators/cluster/dht/src/dht-rebalance.c | 116 +++++++++++++++++---------------
3 files changed, 93 insertions(+), 56 deletions(-)
create mode 100755 tests/bugs/distribute/issue-2317.t
diff --git a/tests/bugs/distribute/issue-2317.t b/tests/bugs/distribute/issue-2317.t
new file mode 100755
index 0000000..e29d003
--- /dev/null
+++ b/tests/bugs/distribute/issue-2317.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TESTS_EXPECTED_IN_LOOP=126
+
+cleanup
+
+TEST glusterd
+TEST ${CLI} volume create ${V0} replica 3 ${H0}:/$B0/${V0}_{0..2}
+TEST ${CLI} volume start ${V0}
+
+TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
+
+# Create several files to make sure that at least some of them should be
+# migrated by rebalance.
+for i in {0..63}; do
+ TEST dd if=/dev/urandom of=${M0}/file.${i} bs=4k count=1
+ TEST dd if=/dev/urandom of=${M0}/file.${i} bs=4k count=1 seek=128
+done
+
+TEST ${CLI} volume add-brick ${V0} ${H0}:${B0}/${V0}_{3..5}
+TEST ${CLI} volume rebalance ${V0} start force
+EXPECT_WITHIN ${REBALANCE_TIMEOUT} "completed" rebalance_status_field "${V0}"
+
+EXPECT "^0$" rebalance_failed_field "${V0}"
+
+cleanup
diff --git a/tests/volume.rc b/tests/volume.rc
index 9a002d9..f5dd0b1 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -75,6 +75,10 @@ function rebalance_status_field {
$CLI volume rebalance $1 status | awk '{print $7}' | sed -n 3p
}
+function rebalance_failed_field {
+ $CLI volume rebalance $1 status | awk '{print $5}' | sed -n 3p
+}
+
function fix-layout_status_field {
#The fix-layout status can be up to 3 words, (ex:'fix-layout in progress'), hence the awk-print $2 thru $4.
#But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(ex:'completed 3.00').
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 072896d..eab7558 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -1024,6 +1024,46 @@ out:
return ret;
}
+static int32_t
+dht_rebalance_sparse_segment(xlator_t *subvol, fd_t *fd, off_t *offset,
+ size_t *size)
+{
+ off_t hole;
+ int32_t ret;
+
+ do {
+ ret = syncop_seek(subvol, fd, *offset, GF_SEEK_DATA, NULL, offset);
+ if (ret >= 0) {
+ /* Starting at the offset of the last data segment, find the
+ * next hole. After a data segment there should always be a
+ * hole, since EOF is considered a hole. */
+ ret = syncop_seek(subvol, fd, *offset, GF_SEEK_HOLE, NULL, &hole);
+ }
+
+ if (ret < 0) {
+ if (ret == -ENXIO) {
+ /* This can happen if there are no more data segments (i.e.
+ * the offset is at EOF), or there was a data segment but the
+ * file has been truncated to a smaller size between both
+ * seek requests. In both cases we are done. The file doesn't
+ * contain more data. */
+ ret = 0;
+ }
+ return ret;
+ }
+
+ /* It could happen that at the same offset we detected data in the
+ * first seek, there could be a hole in the second seek if user is
+ * modifying the file concurrently. In this case we need to find a
+ * new data segment to migrate. */
+ } while (hole <= *offset);
+
+ /* Calculate the total size of the current data block */
+ *size = hole - *offset;
+
+ return 1;
+}
+
static int
__dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
@@ -1032,8 +1072,6 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
int ret = 0;
int count = 0;
off_t offset = 0;
- off_t data_offset = 0;
- off_t hole_offset = 0;
struct iovec *vector = NULL;
struct iobref *iobref = NULL;
uint64_t total = 0;
@@ -1048,71 +1086,36 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
while (total < ia_size) {
/* This is a regular file - read it sequentially */
if (!hole_exists) {
- read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
- ? DHT_REBALANCE_BLKSIZE
- : (ia_size - total));
+ data_block_size = ia_size - total;
} else {
/* This is a sparse file - read only the data segments in the file
*/
/* If the previous data block is fully copied, find the next data
- * segment
- * starting at the offset of the last read and written byte, */
+ * segment starting at the offset of the last read and written
+ * byte. */
if (data_block_size <= 0) {
- ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL,
- &data_offset);
- if (ret) {
- if (ret == -ENXIO)
- ret = 0; /* No more data segments */
- else
- *fop_errno = -ret; /* Error occurred */
-
+ ret = dht_rebalance_sparse_segment(from, src, &offset,
+ &data_block_size);
+ if (ret <= 0) {
+ *fop_errno = -ret;
break;
}
-
- /* If the position of the current data segment is greater than
- * the position of the next hole, find the next hole in order to
- * calculate the length of the new data segment */
- if (data_offset > hole_offset) {
- /* Starting at the offset of the last data segment, find the
- * next hole */
- ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE,
- NULL, &hole_offset);
- if (ret) {
- /* If an error occurred here it's a real error because
- * if the seek for a data segment was successful then
- * necessarily another hole must exist (EOF is a hole)
- */
- *fop_errno = -ret;
- break;
- }
-
- /* Calculate the total size of the current data block */
- data_block_size = hole_offset - data_offset;
- }
- } else {
- /* There is still data in the current segment, move the
- * data_offset to the position of the last written byte */
- data_offset = offset;
}
-
- /* Calculate how much data needs to be read and written. If the data
- * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
- * write DHT_REBALANCE_BLKSIZE data length and the rest in the
- * next iteration(s) */
- read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
- ? DHT_REBALANCE_BLKSIZE
- : data_block_size);
-
- /* Calculate the remaining size of the data block - maybe there's no
- * need to seek for data in the next iteration */
- data_block_size -= read_size;
-
- /* Set offset to the offset of the data segment so read and write
- * will have the correct position */
- offset = data_offset;
}
+ /* Calculate how much data needs to be read and written. If the data
+ * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
+ * write DHT_REBALANCE_BLKSIZE data length and the rest in the
+ * next iteration(s) */
+ read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : data_block_size);
+
+ /* Calculate the remaining size of the data block - maybe there's no
+ * need to seek for data in the next iteration */
+ data_block_size -= read_size;
+
ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count,
&iobref, NULL, NULL, NULL);
@@ -1177,6 +1180,7 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
iobref = NULL;
vector = NULL;
}
+
if (iobref)
iobref_unref(iobref);
GF_FREE(vector);
--
1.8.3.1

View File

@ -1,79 +0,0 @@
From f2d3866e617d25ea62cda01afddc81ef0db3356e Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <xhernandez@redhat.com>
Date: Tue, 4 May 2021 22:39:03 +0200
Subject: [PATCH 555/584] geo-rep: Improve handling of gfid mismatches
In some circumstances geo-replication can detect mismatching gfids
between primary and secondary. These entries are fixed in an iterative
way, assuming that after a fix, a previously failing entry could
succeed.
Previous code was trying to fix them in a loop that can be executed
up to 10 times. If some entry cannot be fixed after 10 attempts, it's
discarded. These fixes are very slow, so trying to do them many times
causes geo-replication to get out of sync.
To minimize the number of iterations done, this patch checks if the
number of entries and failures remains constant after each iteration.
If they are constant, it means that nothing else can be fixed, so it
makes no sense to do more iterations. This reduces the number of
iterations to 2 or 3 in most of the cases, improving geo-replication
performance.
Backport of:
> Upstream-patch: https://github.com/gluster/glusterfs/pull/2389
> Fixes: #2388
> Change-Id: I6d9a623a60045694e1a832195e1dc1fb9e88ae54
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
BUG: 1957191
Change-Id: I6d9a623a60045694e1a832195e1dc1fb9e88ae54
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244550
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
geo-replication/syncdaemon/master.py | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
index 98637e7..aef9373 100644
--- a/geo-replication/syncdaemon/master.py
+++ b/geo-replication/syncdaemon/master.py
@@ -1224,9 +1224,11 @@ class GMasterChangelogMixin(GMasterCommon):
if gconf.get("gfid-conflict-resolution"):
count = 0
+ num_entries = len(entries)
+ num_failures = len(failures)
if failures:
logging.info(lf('Entry ops failed with gfid mismatch',
- count=len(failures)))
+ count=num_failures))
while failures and count < self.MAX_OE_RETRIES:
count += 1
self.handle_entry_failures(failures, entries)
@@ -1237,6 +1239,20 @@ class GMasterChangelogMixin(GMasterCommon):
"gfid mismatch")
break
+ # If this iteration has not removed any entry or reduced
+ # the number of failures compared to the previous one, we
+ # don't need to keep iterating because we'll get the same
+ # result in all other attempts.
+ if ((num_entries == len(entries)) and
+ (num_failures == len(failures))):
+ logging.info(lf("No more gfid mismatches can be fixed",
+ entries=num_entries,
+ failures=num_failures))
+ break
+
+ num_entries = len(entries)
+ num_failures = len(failures)
+
self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
self.status.dec_value("entry", len(entries))
--
1.8.3.1

View File

@ -1,52 +0,0 @@
From a7f6ad0c617a36414c8232cb692471703923b16d Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <xhernandez@users.noreply.github.com>
Date: Tue, 19 Jan 2021 18:03:33 +0100
Subject: [PATCH 556/584] dht: don't ignore xdata in fgetxattr
DHT was passing NULL for xdata in fgetxattr() request, ignoring any
data sent by upper xlators.
This patch fixes the issue by sending the received xdata to lower
xlators, as it's currently done for getxattr().
Backport of:
> Upstream-patch: https://github.com/gluster/glusterfs/pull/2020
> Fixes: #1991
> Change-Id: If3d3f1f2ce6215f3b1acc46480e133cb4294eaec
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
BUG: 1919132
Change-Id: If3d3f1f2ce6215f3b1acc46480e133cb4294eaec
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244538
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/dht/src/dht-common.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 7425c1a..0773092 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -5262,7 +5262,7 @@ dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
if (!ret && key && local->mds_subvol && dht_match_xattr(key)) {
STACK_WIND(frame, dht_mds_getxattr_cbk, local->mds_subvol,
- local->mds_subvol->fops->fgetxattr, fd, key, NULL);
+ local->mds_subvol->fops->fgetxattr, fd, key, xdata);
return 0;
}
@@ -5274,7 +5274,7 @@ dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
for (i = 0; i < cnt; i++) {
subvol = layout->list[i].xlator;
STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr, fd,
- key, NULL);
+ key, xdata);
}
return 0;
--
1.8.3.1

View File

@ -1,306 +0,0 @@
From ba57b043db1e19196cf860baeeeb1acfc9985cd2 Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <xhernandez@users.noreply.github.com>
Date: Wed, 24 Feb 2021 15:04:23 +0100
Subject: [PATCH 557/584] cluster/dht: Fix stack overflow in readdir(p)
When parallel-readdir is enabled, readdir(p) requests sent by DHT can be
immediately processed and answered in the same thread before the call to
STACK_WIND_COOKIE() completes.
This means that the readdir(p) cbk is processed synchronously. In some
cases it may decide to send another readdir(p) request, which causes a
recursive call.
When some special conditions happen and the directories are big, it's
possible that the number of nested calls is so high that the process
crashes because of a stack overflow.
This patch fixes this by not allowing nested readdir(p) calls. When a
nested call is detected, it's queued instead of sending it. The queued
request is processed when the current call finishes by the top level
stack function.
Backport of 3 patches:
> Upstream-patch: https://github.com/gluster/glusterfs/pull/2170
> Fixes: #2169
> Change-Id: Id763a8a51fb3c3314588ec7c162f649babf33099
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
> Upstream-patch: https://github.com/gluster/glusterfs/pull/2202
> Updates: #2169
> Change-Id: I97e73c0aae74fc5d80c975f56f2f7a64e3e1ae95
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
> Upstream-patch: https://github.com/gluster/glusterfs/pull/2242
> Fixes: #2239
> Change-Id: I6b2e48e87c85de27fad67a12d97abd91fa27c0c1
> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
BUG: 1798897
Change-Id: Id763a8a51fb3c3314588ec7c162f649babf33099
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244549
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/bugs/distribute/issue-2169.t | 33 +++++++++
xlators/cluster/dht/src/dht-common.c | 134 ++++++++++++++++++++++++++++++++---
xlators/cluster/dht/src/dht-common.h | 5 ++
3 files changed, 162 insertions(+), 10 deletions(-)
create mode 100755 tests/bugs/distribute/issue-2169.t
diff --git a/tests/bugs/distribute/issue-2169.t b/tests/bugs/distribute/issue-2169.t
new file mode 100755
index 0000000..91fa72a
--- /dev/null
+++ b/tests/bugs/distribute/issue-2169.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST ${CLI} volume create ${V0} ${H0}:/$B0/${V0}_0
+TEST ${CLI} volume set ${V0} readdir-ahead on
+TEST ${CLI} volume set ${V0} parallel-readdir on
+TEST ${CLI} volume start ${V0}
+
+TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
+
+TEST mkdir -p ${M0}/d/d.{000..999}
+
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "Y" force_umount ${M0}
+
+TEST ${CLI} volume add-brick ${V0} ${H0}:${B0}/${V0}_{1..7}
+
+TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
+
+ls -l ${M0}/d/ | wc -l
+
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "Y" force_umount ${M0}
+TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
+
+ls -l ${M0}/d/ | wc -l
+
+TEST ls ${M0}/d
+
+cleanup
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 0773092..ce0fbbf 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -24,8 +24,15 @@
#include <libgen.h>
#include <signal.h>
+#include <urcu/uatomic.h>
+
int run_defrag = 0;
+static int
+dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
+
int
dht_link2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
@@ -6681,6 +6688,94 @@ out:
return;
}
+/* Execute a READDIR request if no other request is in progress. Otherwise
+ * queue it to be executed when the current one finishes.
+ *
+ * When parallel-readdir is enabled and directory contents are cached, the
+ * callback of a readdirp will be called before returning from STACK_WIND.
+ * If the returned contents are not useful for DHT, and the buffer is not
+ * yet full, a nested readdirp request will be sent. This means that there
+ * will be many recursive calls. In the worst case there might be a stack
+ * overflow.
+ *
+ * To avoid this, we only wind a request if no other request is being wound.
+ * If there's another request, we simple store the values for the next call.
+ * When the thread processing the current wind completes it, it will take
+ * the new arguments and send the request from the top level stack. */
+static void
+dht_queue_readdir(call_frame_t *frame, xlator_t *xl, off_t offset,
+ fop_readdir_cbk_t cbk)
+{
+ dht_local_t *local;
+ int32_t queue;
+ xlator_t *this = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ local->queue_xl = xl;
+ local->queue_offset = offset;
+
+ if (uatomic_add_return(&local->queue, 1) == 1) {
+ /* If we are here it means that we are the first one to send a
+ * readdir request. Any attempt to send more readdir requests will
+ * find local->queue > 1, so it won't do anything. The needed data
+ * to send the request has been stored into local->queue_*.
+ *
+ * Note: this works because we will only have 1 additional request
+ * at most (the one called by the cbk function) while we are
+ * processing another readdir. */
+ do {
+ STACK_WIND_COOKIE(frame, cbk, local->queue_xl, local->queue_xl,
+ local->queue_xl->fops->readdir, local->fd,
+ local->size, local->queue_offset, local->xattr);
+
+ /* If a new readdirp request has been added before returning
+ * from winding, we process it. */
+ } while ((queue = uatomic_sub_return(&local->queue, 1)) > 0);
+
+ if (queue < 0) {
+ /* A negative value means that an unwind has been called before
+ * returning from the previous wind. This means that 'local' is
+ * not needed anymore and must be destroyed. */
+ dht_local_wipe(this, local);
+ }
+ }
+}
+
+/* Execute a READDIRP request if no other request is in progress. Otherwise
+ * queue it to be executed when the current one finishes. */
+static void
+dht_queue_readdirp(call_frame_t *frame, xlator_t *xl, off_t offset,
+ fop_readdirp_cbk_t cbk)
+{
+ dht_local_t *local;
+ int32_t queue;
+ xlator_t *this = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ local->queue_xl = xl;
+ local->queue_offset = offset;
+
+ /* Check dht_queue_readdir() comments for an explanation of this. */
+ if (uatomic_add_return(&local->queue, 1) == 1) {
+ do {
+ STACK_WIND_COOKIE(frame, cbk, local->queue_xl, local->queue_xl,
+ local->queue_xl->fops->readdirp, local->fd,
+ local->size, local->queue_offset, local->xattr);
+ } while ((queue = uatomic_sub_return(&local->queue, 1)) > 0);
+
+ if (queue < 0) {
+ /* A negative value means that an unwind has been called before
+ * returning from the previous wind. This means that 'local' is
+ * not needed anymore and must be destroyed. */
+ dht_local_wipe(this, local);
+ }
+ }
+}
+
/* Posix returns op_errno = ENOENT to indicate that there are no more
* entries
*/
@@ -6950,9 +7045,8 @@ done:
}
}
- STACK_WIND_COOKIE(frame, dht_readdirp_cbk, next_subvol, next_subvol,
- next_subvol->fops->readdirp, local->fd, local->size,
- next_offset, local->xattr);
+ dht_queue_readdirp(frame, next_subvol, next_offset, dht_readdirp_cbk);
+
return 0;
}
@@ -6970,6 +7064,17 @@ unwind:
if (prev != dht_last_up_subvol(this))
op_errno = 0;
+ /* If we are inside a recursive call (or not inside a recursive call but
+ * the cbk is completed before the wind returns), local->queue will be 1.
+ * In this case we cannot destroy 'local' because it will be needed by
+ * the caller of STACK_WIND. In this case, we decrease the value to let
+ * the caller know that the operation has terminated and it must destroy
+ * 'local'. If local->queue 0, we can destroy it here because there are
+ * no other users. */
+ if (uatomic_sub_return(&local->queue, 1) >= 0) {
+ frame->local = NULL;
+ }
+
DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL);
gf_dirent_free(&entries);
@@ -7071,9 +7176,8 @@ done:
goto unwind;
}
- STACK_WIND_COOKIE(frame, dht_readdir_cbk, next_subvol, next_subvol,
- next_subvol->fops->readdir, local->fd, local->size,
- next_offset, NULL);
+ dht_queue_readdir(frame, next_subvol, next_offset, dht_readdir_cbk);
+
return 0;
}
@@ -7089,6 +7193,17 @@ unwind:
if (prev != dht_last_up_subvol(this))
op_errno = 0;
+ /* If we are inside a recursive call (or not inside a recursive call but
+ * the cbk is completed before the wind returns), local->queue will be 1.
+ * In this case we cannot destroy 'local' because it will be needed by
+ * the caller of STACK_WIND. In this case, we decrease the value to let
+ * the caller know that the operation has terminated and it must destroy
+ * 'local'. If local->queue 0, we can destroy it here because there are
+ * no other users. */
+ if (uatomic_sub_return(&local->queue, 1) >= 0) {
+ frame->local = NULL;
+ }
+
if (!skip_hashed_check) {
DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL);
gf_dirent_free(&entries);
@@ -7096,6 +7211,7 @@ unwind:
} else {
DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, orig_entries, NULL);
}
+
return 0;
}
@@ -7172,11 +7288,9 @@ dht_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
}
}
- STACK_WIND_COOKIE(frame, dht_readdirp_cbk, xvol, xvol,
- xvol->fops->readdirp, fd, size, yoff, local->xattr);
+ dht_queue_readdirp(frame, xvol, yoff, dht_readdirp_cbk);
} else {
- STACK_WIND_COOKIE(frame, dht_readdir_cbk, xvol, xvol,
- xvol->fops->readdir, fd, size, yoff, local->xattr);
+ dht_queue_readdir(frame, xvol, yoff, dht_readdir_cbk);
}
return 0;
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 92f1b89..132b3b3 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -369,6 +369,11 @@ struct dht_local {
dht_dir_transaction_t lock[2], *current;
+ /* for nested readdirs */
+ xlator_t *queue_xl;
+ off_t queue_offset;
+ int32_t queue;
+
/* inodelks during filerename for backward compatibility */
dht_lock_t **rename_inodelk_backward_compatible;
int rename_inodelk_bc_count;
--
1.8.3.1

View File

@ -1,238 +0,0 @@
From 9bf6986f8ea3edd9de3d2629404f7ab11c1597de Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <xhernandez@redhat.com>
Date: Tue, 9 Mar 2021 00:24:07 +0100
Subject: [PATCH 558/584] afr: fix directory entry count
AFR may hide some existing entries from a directory when reading it
because they are generated internally for private management. However
the returned number of entries from readdir() function is not updated
accordingly. So it may return a number higher than the real entries
present in the gf_dirent list.
This may cause unexpected behavior of clients, including gfapi which
incorrectly assumes that there was an entry when the list was actually
empty.
This patch also makes the check in gfapi more robust to avoid similar
issues that could appear in the future.
Backport of:
> Upstream-patch: https://github.com/gluster/glusterfs/pull/2233
> Fixes: #2232
> Change-Id: I81ba3699248a53ebb0ee4e6e6231a4301436f763
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
BUG: 1927411
Change-Id: I81ba3699248a53ebb0ee4e6e6231a4301436f763
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244535
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
api/src/glfs-fops.c | 3 +-
tests/bugs/replicate/issue-2232.c | 85 ++++++++++++++++++++++++++++++++++
tests/bugs/replicate/issue-2232.t | 34 ++++++++++++++
xlators/cluster/afr/src/afr-dir-read.c | 11 +++--
4 files changed, 129 insertions(+), 4 deletions(-)
create mode 100644 tests/bugs/replicate/issue-2232.c
create mode 100644 tests/bugs/replicate/issue-2232.t
diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
index 6dc3b66..821d250 100644
--- a/api/src/glfs-fops.c
+++ b/api/src/glfs-fops.c
@@ -3748,8 +3748,9 @@ glfd_entry_refresh(struct glfs_fd *glfd, int plus)
errno = 0;
}
- if (ret > 0)
+ if ((ret > 0) && !list_empty(&glfd->entries)) {
glfd->next = list_entry(glfd->entries.next, gf_dirent_t, list);
+ }
gf_dirent_free(&old);
out:
diff --git a/tests/bugs/replicate/issue-2232.c b/tests/bugs/replicate/issue-2232.c
new file mode 100644
index 0000000..df547c2
--- /dev/null
+++ b/tests/bugs/replicate/issue-2232.c
@@ -0,0 +1,85 @@
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <glusterfs/api/glfs.h>
+
+int main(int argc, char **argv)
+{
+ char log[128];
+ struct dirent entry;
+ struct dirent *ent;
+ glfs_xreaddirp_stat_t *xstat;
+ int ret, flags;
+
+ if (argc != 3) {
+ fprintf(stderr, "Syntax: %s <hostname> <volume>\n", argv[0]);
+ exit(1);
+ }
+ char *hostname = argv[1];
+ char *volname = argv[2];
+
+ glfs_t *fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new() failed\n");
+ exit(1);
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_volfile_server() failed\n");
+ return ret;
+ }
+
+ sprintf(log, "/tmp/logs-%d.log", getpid());
+ ret = glfs_set_logging(fs, log, 9);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_logging() failed\n");
+ return ret;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_init() failed\n");
+ return ret;
+ }
+
+ glfs_fd_t *fd = glfs_opendir(fs, "/");
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_opendir() failed\n");
+ return 1;
+ }
+
+ flags = GFAPI_XREADDIRP_STAT | GFAPI_XREADDIRP_HANDLE;
+ xstat = NULL;
+ while ((ret = glfs_xreaddirplus_r(fd, flags, &xstat, &entry, &ent)) > 0) {
+ if (xstat != NULL) {
+ glfs_free(xstat);
+ }
+ if ((strcmp(ent->d_name, ".") == 0) ||
+ (strcmp(ent->d_name, "..") == 0)) {
+ xstat = NULL;
+ continue;
+ }
+ if ((xstat == NULL) || ((ret & GFAPI_XREADDIRP_HANDLE) == 0)) {
+ fprintf(stderr, "glfs_xreaddirplus_r() failed: %s\n",
+ strerror(errno));
+ return 1;
+ }
+
+ xstat = NULL;
+ }
+
+ if (ret < 0) {
+ fprintf(stderr, "glfs_xreaddirplus_r() failed\n");
+ return ret;
+ }
+
+ glfs_close(fd);
+
+ glfs_fini(fs);
+
+ return ret;
+}
diff --git a/tests/bugs/replicate/issue-2232.t b/tests/bugs/replicate/issue-2232.t
new file mode 100644
index 0000000..66a41e0
--- /dev/null
+++ b/tests/bugs/replicate/issue-2232.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname "${0}")/../../include.rc
+. $(dirname "${0}")/../../volume.rc
+
+cleanup;
+TEST gcc $(dirname "${0}")/issue-2232.c -o $(dirname "${0}")/issue-2232 -lgfapi
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create ${V0} replica 3 ${H0}:${B0}/${V0}{0..2}
+
+# Create a fake .glusterfs-anonymous-inode-... entry
+ANONINO=".glusterfs-anonymous-inode-aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
+TEST mkdir ${B0}/${V0}{0..2}/${ANONINO}
+gfid="$(uuidgen)"
+hex="0x$(echo "${gfid}" | tr -d '-')"
+TEST assign_gfid "${hex}" "${B0}/${V0}0/${ANONINO}"
+TEST assign_gfid "${hex}" "${B0}/${V0}1/${ANONINO}"
+TEST assign_gfid "${hex}" "${B0}/${V0}2/${ANONINO}"
+TEST mkdir -p "${B0}/${V0}0/.glusterfs/${gfid:0:2}/${gfid:2:2}"
+TEST mkdir -p "${B0}/${V0}1/.glusterfs/${gfid:0:2}/${gfid:2:2}"
+TEST mkdir -p "${B0}/${V0}2/.glusterfs/${gfid:0:2}/${gfid:2:2}"
+TEST ln -s "../../00/00/00000000-0000-0000-0000-000000000001/${ANONINO}" "${B0}/${V0}0/.glusterfs/${gfid:0:2}/${gfid:2:2}/${gfid}"
+TEST ln -s "../../00/00/00000000-0000-0000-0000-000000000001/${ANONINO}" "${B0}/${V0}1/.glusterfs/${gfid:0:2}/${gfid:2:2}/${gfid}"
+TEST ln -s "../../00/00/00000000-0000-0000-0000-000000000001/${ANONINO}" "${B0}/${V0}2/.glusterfs/${gfid:0:2}/${gfid:2:2}/${gfid}"
+
+TEST $CLI volume start ${V0}
+
+TEST $(dirname "${0}")/issue-2232 ${H0} ${V0}
+
+TEST rm -f $(dirname $0)/issue-2232
+
+cleanup
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index d64b6a9..a98f8df 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -157,7 +157,7 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
return 0;
}
-static void
+static int32_t
afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
int subvol, gf_dirent_t *entries, fd_t *fd)
{
@@ -168,6 +168,7 @@ afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
afr_private_t *priv = NULL;
gf_boolean_t need_heal = _gf_false;
gf_boolean_t validate_subvol = _gf_false;
+ int32_t count = 0;
this = THIS;
priv = this->private;
@@ -184,6 +185,7 @@ afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
list_del_init(&entry->list);
list_add_tail(&entry->list, &entries->list);
+ count++;
if (!validate_subvol)
continue;
@@ -197,6 +199,8 @@ afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
}
}
}
+
+ return count;
}
int32_t
@@ -222,8 +226,9 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_ret >= 0)
- afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
- &entries, local->fd);
+ op_ret = afr_readdir_transform_entries(frame, subvol_entries,
+ (long)cookie, &entries,
+ local->fd);
AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
--
1.8.3.1

View File

@ -1,163 +0,0 @@
From 2b6e6c234dffa72c9f2af747908b1e1f29080698 Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Thu, 25 Mar 2021 11:52:13 +0530
Subject: [PATCH 559/584] afr: make fsync post-op aware of inodelk count
(#2273)
Problem:
Since commit bd540db1e, eager-locking was enabled for fsync. But on
certain VM workloads wit sharding enabled, shard xlator keeps sending
fsync on the base shard. This can cause blocked inodelks from other
clients (including shd) to time out due to call bail.
Fix:
Make afr fsync aware of inodelk count and not delay post-op + unlock
when inodelk count > 1, just like writev.
Code is restructured so that any fd based AFR_DATA_TRANSACTION can be made
aware by setting GLUSTERFS_INODELK_DOM_COUNT in xdata request.
Note: We do not know yet why VMs go in to paused state because of the
blocked inodelks but this patch should be a first step in reducing the
occurence.
Upstream patch details:
> https://github.com/gluster/glusterfs/pull/2273/
> Updates: #2198
> Change-Id: Ib91ebdd3101d590c326e69c829cf9335003e260b
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
BUG: 1943467
Change-Id: Id407ca54007e3bbb206a1d9431ebaf89a2167f74
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244516
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/afr/src/afr-inode-write.c | 40 ++++++++++++++++++-------------
xlators/features/locks/src/posix.c | 1 +
2 files changed, 24 insertions(+), 17 deletions(-)
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index df82b6e..962a7b1 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -42,6 +42,7 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
struct iatt *stbuf = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ afr_lock_t *lock = NULL;
afr_read_subvol_args_t args = {
0,
};
@@ -50,6 +51,12 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, local->inode, out);
+ if (local->update_num_inodelks &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ lock->num_inodelks = local->num_inodelks;
+ }
+
/*This code needs to stay till DHT sends fops on linked
* inodes*/
if (!inode_is_linked(local->inode)) {
@@ -134,6 +141,7 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ int num_inodelks = 0;
local = frame->local;
priv = this->private;
@@ -146,8 +154,16 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
local->replies[child_index].op_ret = op_ret;
local->replies[child_index].op_errno = op_errno;
- if (xdata)
+ if (xdata) {
local->replies[child_index].xdata = dict_ref(xdata);
+ if (dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
+ &num_inodelks) == 0) {
+ if (num_inodelks > local->num_inodelks) {
+ local->num_inodelks = num_inodelks;
+ local->update_num_inodelks = _gf_true;
+ }
+ }
+ }
if (op_ret >= 0) {
if (prebuf)
@@ -284,7 +300,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
afr_local_t *local = frame->local;
uint32_t open_fd_count = 0;
uint32_t write_is_append = 0;
- int32_t num_inodelks = 0;
LOCK(&frame->lock);
{
@@ -306,15 +321,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
local->open_fd_count = open_fd_count;
local->update_open_fd_count = _gf_true;
}
-
- ret = dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
- &num_inodelks);
- if (ret < 0)
- goto unlock;
- if (num_inodelks > local->num_inodelks) {
- local->num_inodelks = num_inodelks;
- local->update_num_inodelks = _gf_true;
- }
}
unlock:
UNLOCK(&frame->lock);
@@ -324,7 +330,6 @@ void
afr_process_post_writev(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
- afr_lock_t *lock = NULL;
local = frame->local;
@@ -343,11 +348,6 @@ afr_process_post_writev(call_frame_t *frame, xlator_t *this)
if (local->update_open_fd_count)
local->inode_ctx->open_fd_count = local->open_fd_count;
- if (local->update_num_inodelks &&
- local->transaction.type == AFR_DATA_TRANSACTION) {
- lock = &local->inode_ctx->lock[local->transaction.type];
- lock->num_inodelks = local->num_inodelks;
- }
}
int
@@ -2516,6 +2516,12 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
if (!local->xdata_req)
goto out;
+ if (dict_set_str_sizen(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
+ this->name)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
local->fd = fd_ref(fd);
ret = afr_set_inode_local(this, local, fd->inode);
if (ret)
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index cdd1ff7..22ef5b8 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -4943,6 +4943,7 @@ struct xlator_fops fops = {
.rchecksum = pl_rchecksum,
.statfs = pl_statfs,
.fsyncdir = pl_fsyncdir,
+ .fsync = pl_fsync,
.readdir = pl_readdir,
.symlink = pl_symlink,
.link = pl_link,
--
1.8.3.1

View File

@ -1,73 +0,0 @@
From e56605d5808b41335026a5470fa10f5e5b5389f3 Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawal@redhat.com>
Date: Mon, 6 Apr 2020 21:58:03 +0530
Subject: [PATCH 560/584] posix: Avoid dict_del logs in posix_is_layout_stale
while key is NULL
Problem: The key "GF_PREOP_PARENT_KEY" has been populated by dht and
for non-distribute volume like 1x3 key is not populated so
posix_is_layout stale throw a message while a file is created
Solution: To avoid a log put a condition before delete a key
Upstream patch details:
> https://review.gluster.org/#/c/glusterfs/+/24297/
> Change-Id: I813ee7960633e7f9f5e9ad2f42f288053d9eb71f
> Fixes: #1150
> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
BUG: 1942816
Change-Id: I746a2619989265f3bc9bb648c4b8e4bbefaedc56
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244925
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/bugs/glusterd/brick-mux-validation.t | 4 ++--
xlators/storage/posix/src/posix-helpers.c | 5 +++--
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/tests/bugs/glusterd/brick-mux-validation.t b/tests/bugs/glusterd/brick-mux-validation.t
index 03a4768..61b0455 100644
--- a/tests/bugs/glusterd/brick-mux-validation.t
+++ b/tests/bugs/glusterd/brick-mux-validation.t
@@ -24,7 +24,7 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3}
TEST $CLI volume start $V0
EXPECT 1 count_brick_processes
-EXPECT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 online_brick_count
pkill gluster
@@ -101,4 +101,4 @@ TEST $CLI_IGNORE_PARTITION volume reset-brick $V1 $H0:$B0/${V1}1 $H0:$B0/${V1}1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 online_brick_count
EXPECT 1 count_brick_processes
-cleanup;
\ No newline at end of file
+cleanup;
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 110d383..16351d8 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -3596,13 +3596,14 @@ posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this)
op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name);
if (xattr_name == NULL) {
op_ret = 0;
- goto out;
+ return is_stale;
}
arg_data = dict_get(xdata, xattr_name);
if (!arg_data) {
op_ret = 0;
- goto out;
+ dict_del_sizen(xdata, GF_PREOP_PARENT_KEY);
+ return is_stale;
}
size = sys_lgetxattr(par_path, xattr_name, value_buf,
--
1.8.3.1

View File

@ -1,202 +0,0 @@
From 488a5aa4932842334e2749224e9c39f8b6fd379c Mon Sep 17 00:00:00 2001
From: Ashish Pandey <aspandey@redhat.com>
Date: Wed, 20 May 2020 11:30:17 +0530
Subject: [PATCH 561/584] cluster/ec: Inform failure when some bricks are
unavailable.
Provide proper information about failure when a fop
fails on some of the brick.
Also provide information about parent fop and
the map of the bricks on which it is failing.
Upstream patch details:
>Change-Id: If812739617df65cd146c8e667fbacff653717248
>updates #1434
>Signed-off-by: Ashish Pandey <aspandey@redhat.com>
>https://review.gluster.org/#/c/glusterfs/+/24858/
Change-Id: I3549d637e7345f05f21ac1c0e8106973c69d1be9
BUG: 1908635
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244926
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/ec/src/ec-common.c | 76 +++++++++++++++++++++++---------------
xlators/cluster/ec/src/ec.c | 14 ++++++-
2 files changed, 58 insertions(+), 32 deletions(-)
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index e3f8769..a9624d8 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -316,17 +316,19 @@ ec_check_status(ec_fop_data_t *fop)
}
}
- gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
- "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
- "remaining=%s, good=%s, bad=%s, %s)",
- gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
- ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
- ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
- ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
- ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
- ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
- ec->nodes),
- ec_msg_str(fop));
+ gf_msg(
+ fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+ "remaining=%s, good=%s, bad=%s,"
+ "(Least significant bit represents first client/brick of subvol), %s)",
+ gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+ ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+ ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+ ec->nodes),
+ ec_msg_str(fop));
if (fop->use_fd) {
if (fop->fd != NULL) {
ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
@@ -614,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop)
loc_t *loc2 = NULL;
char gfid1[64] = {0};
char gfid2[64] = {0};
+ ec_fop_data_t *parent = fop->parent;
if (fop->errstr)
return fop->errstr;
-
if (!fop->use_fd) {
loc1 = &fop->loc[0];
loc2 = &fop->loc[1];
@@ -625,23 +627,45 @@ ec_msg_str(ec_fop_data_t *fop)
if (fop->id == GF_FOP_RENAME) {
gf_asprintf(&fop->errstr,
"FOP : '%s' failed on '%s' and '%s' with gfids "
- "%s and %s respectively",
+ "%s and %s respectively. Parent FOP: %s",
ec_fop_name(fop->id), loc1->path, loc2->path,
uuid_utoa_r(loc1->gfid, gfid1),
- uuid_utoa_r(loc2->gfid, gfid2));
+ uuid_utoa_r(loc2->gfid, gfid2),
+ parent ? ec_fop_name(parent->id) : "No Parent");
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s",
- ec_fop_name(fop->id), loc1->path,
- uuid_utoa_r(loc1->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr,
+ "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), loc1->path,
+ uuid_utoa_r(loc1->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s",
- ec_fop_name(fop->id),
- uuid_utoa_r(fop->fd->inode->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
return fop->errstr;
}
+static void
+ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need,
+ int32_t loglevel)
+{
+ ec_t *ec = fop->xl->private;
+ char str1[32], str2[32], str3[32];
+
+ gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT,
+ "Insufficient available children for this request: "
+ "Have : %d, Need : %u : Child UP : %s "
+ "Mask: %s, Healing : %s : %s ",
+ have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->healing, ec->nodes),
+ ec_msg_str(fop));
+}
+
static int32_t
ec_child_select(ec_fop_data_t *fop)
{
@@ -699,11 +723,7 @@ ec_child_select(ec_fop_data_t *fop)
ec_trace("SELECT", fop, "");
if ((num < fop->minimum) && (num < ec->fragments)) {
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
- "Insufficient available children "
- "for this request (have %d, need "
- "%d). %s",
- num, fop->minimum, ec_msg_str(fop));
+ ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR);
return 0;
}
@@ -711,11 +731,7 @@ ec_child_select(ec_fop_data_t *fop)
(fop->locks[0].update[EC_DATA_TXN] ||
fop->locks[0].update[EC_METADATA_TXN])) {
if (ec->quorum_count && (num < ec->quorum_count)) {
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
- "Insufficient available children "
- "for this request (have %d, need "
- "%d). %s",
- num, ec->quorum_count, ec_msg_str(fop));
+ ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR);
return 0;
}
}
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index a930089..047cdd8 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -325,13 +325,18 @@ ec_get_event_from_state(ec_t *ec)
void
ec_up(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 1;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP,
+ "Going UP : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
}
@@ -339,13 +344,18 @@ ec_up(xlator_t *this, ec_t *ec)
void
ec_down(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 0;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN,
+ "Going DOWN : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
}
--
1.8.3.1

File diff suppressed because it is too large Load Diff

View File

@ -1,318 +0,0 @@
From a19fa252942938a308ffa655fca3814d0660c6e2 Mon Sep 17 00:00:00 2001
From: Vinayakswami Hariharmath <vharihar@redhat.com>
Date: Wed, 3 Jun 2020 18:58:56 +0530
Subject: [PATCH 563/584] features/shard: Use fd lookup post file open
Issue:
When a process has the open fd and the same file is
unlinked in middle of the operations, then file based
lookup fails with ENOENT or stale file
Solution:
When the file already open and fd is available, use fstat
to get the file attributes
Backport of:
> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24528/
> Change-Id: I0e83aee9f11b616dcfe13769ebfcda6742e4e0f4
> Fixes: #1281
> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
BUG: 1925425
Change-Id: I0e83aee9f11b616dcfe13769ebfcda6742e4e0f4
Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244957
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/bugs/shard/issue-1281.t | 34 +++++++++++
xlators/features/shard/src/shard.c | 119 +++++++++++++++++++++++--------------
2 files changed, 110 insertions(+), 43 deletions(-)
create mode 100644 tests/bugs/shard/issue-1281.t
diff --git a/tests/bugs/shard/issue-1281.t b/tests/bugs/shard/issue-1281.t
new file mode 100644
index 0000000..9704caa
--- /dev/null
+++ b/tests/bugs/shard/issue-1281.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+#Open a file and store descriptor in fd = 5
+exec 5>$M0/foo
+
+#Unlink the same file which is opened in prev step
+TEST unlink $M0/foo
+
+#Write something on the file using the open fd = 5
+echo "issue-1281" >&5
+
+#Write on the descriptor should be succesful
+EXPECT 0 echo $?
+
+#Close the fd = 5
+exec 5>&-
+
+cleanup
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index c5cc224..2ba4528 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -1653,26 +1653,24 @@ err:
}
int
-shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata,
- struct iatt *postparent)
+shard_set_iattr_invoke_post_handler(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
int ret = -1;
int32_t mask = SHARD_INODE_WRITE_MASK;
- shard_local_t *local = NULL;
+ shard_local_t *local = frame->local;
shard_inode_ctx_t ctx = {
0,
};
- local = frame->local;
-
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, op_errno,
SHARD_MSG_BASE_FILE_LOOKUP_FAILED,
"Lookup on base file"
" failed : %s",
- loc_gfid_utoa(&(local->loc)));
+ uuid_utoa(inode->gfid));
local->op_ret = op_ret;
local->op_errno = op_errno;
goto unwind;
@@ -1706,18 +1704,57 @@ unwind:
}
int
-shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
- shard_post_fop_handler_t handler)
+shard_fstat_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ shard_local_t *local = frame->local;
+
+ shard_set_iattr_invoke_post_handler(frame, this, local->fd->inode, op_ret,
+ op_errno, buf, xdata);
+ return 0;
+}
+
+int
+shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ /* In case of op_ret < 0, inode passed to this function will be NULL
+ ex: in case of op_errno = ENOENT. So refer prefilled inode data
+ which is part of local.
+ Note: Reassigning/overriding the inode passed to this cbk with inode
+ which is part of *struct shard_local_t* won't cause any issue as
+ both inodes have same reference/address as of the inode passed */
+ inode = ((shard_local_t *)frame->local)->loc.inode;
+
+ shard_set_iattr_invoke_post_handler(frame, this, inode, op_ret, op_errno,
+ buf, xdata);
+ return 0;
+}
+
+/* This function decides whether to make file based lookup or
+ * fd based lookup (fstat) depending on the 3rd and 4th arg.
+ * If fd != NULL and loc == NULL then call is for fstat
+ * If fd == NULL and loc != NULL then call is for file based
+ * lookup. Please pass args based on the requirement.
+ */
+int
+shard_refresh_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, shard_post_fop_handler_t handler)
{
int ret = -1;
+ inode_t *inode = NULL;
shard_local_t *local = NULL;
dict_t *xattr_req = NULL;
gf_boolean_t need_refresh = _gf_false;
local = frame->local;
local->handler = handler;
+ inode = fd ? fd->inode : loc->inode;
- ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
+ ret = shard_inode_ctx_fill_iatt_from_cache(inode, this, &local->prebuf,
&need_refresh);
/* By this time, inode ctx should have been created either in create,
* mknod, readdirp or lookup. If not it is a bug!
@@ -1726,7 +1763,7 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_msg_debug(this->name, 0,
"Skipping lookup on base file: %s"
"Serving prebuf off the inode ctx cache",
- uuid_utoa(loc->gfid));
+ uuid_utoa(inode->gfid));
goto out;
}
@@ -1737,10 +1774,14 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
goto out;
}
- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
+ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, inode->gfid, local, out);
- STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ if (fd)
+ STACK_WIND(frame, shard_fstat_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xattr_req);
+ else
+ STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
dict_unref(xattr_req);
return 0;
@@ -2718,8 +2759,8 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
local->resolver_base_inode = loc->inode;
GF_ATOMIC_INIT(local->delta_blocks, 0);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_truncate_handler);
+ shard_refresh_base_file(frame, this, &local->loc, NULL,
+ shard_post_lookup_truncate_handler);
return 0;
err:
@@ -2774,8 +2815,8 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
local->resolver_base_inode = fd->inode;
GF_ATOMIC_INIT(local->delta_blocks, 0);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_truncate_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_truncate_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
@@ -2919,8 +2960,8 @@ shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (!local->xattr_req)
goto err;
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_link_handler);
+ shard_refresh_base_file(frame, this, &local->loc, NULL,
+ shard_post_lookup_link_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
@@ -4249,8 +4290,8 @@ shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this)
switch (local->fop) {
case GF_FOP_UNLINK:
case GF_FOP_RENAME:
- shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
- shard_post_lookup_base_shard_rm_handler);
+ shard_refresh_base_file(frame, this, &local->int_inodelk.loc, NULL,
+ shard_post_lookup_base_shard_rm_handler);
break;
default:
gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
@@ -4505,8 +4546,8 @@ shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->block_size) {
local->tmp_loc.inode = inode_new(this->itable);
gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
- shard_lookup_base_file(frame, this, &local->tmp_loc,
- shard_post_rename_lookup_handler);
+ shard_refresh_base_file(frame, this, &local->tmp_loc, NULL,
+ shard_post_rename_lookup_handler);
} else {
shard_rename_cbk(frame, this);
}
@@ -5242,8 +5283,8 @@ shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_readv_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_readv_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
@@ -6046,8 +6087,8 @@ shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_fsync_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_fsync_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
@@ -6420,12 +6461,8 @@ shard_common_remove_xattr(call_frame_t *frame, xlator_t *this,
if (xdata)
local->xattr_req = dict_ref(xdata);
- /* To-Do: Switch from LOOKUP which is path-based, to FSTAT if the fop is
- * on an fd. This comes under a generic class of bugs in shard tracked by
- * bz #1782428.
- */
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_remove_xattr_handler);
+ shard_refresh_base_file(frame, this, loc, fd,
+ shard_post_lookup_remove_xattr_handler);
return 0;
err:
shard_common_failure_unwind(fop, frame, -1, op_errno);
@@ -6662,12 +6699,8 @@ shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
if (xdata)
local->xattr_rsp = dict_ref(xdata);
- /* To-Do: Switch from LOOKUP which is path-based, to FSTAT if the fop is
- * on an fd. This comes under a generic class of bugs in shard tracked by
- * bz #1782428.
- */
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_set_xattr_handler);
+ shard_refresh_base_file(frame, this, loc, fd,
+ shard_post_lookup_set_xattr_handler);
return 0;
err:
shard_common_failure_unwind(fop, frame, -1, op_errno);
@@ -6951,8 +6984,8 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_common_inode_write_post_lookup_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_common_inode_write_post_lookup_handler);
return 0;
out:
shard_common_failure_unwind(fop, frame, -1, ENOMEM);
--
1.8.3.1

View File

@ -1,215 +0,0 @@
From a7a56c079df2eb0253efdd53e1538656c0ce9095 Mon Sep 17 00:00:00 2001
From: Yaniv Kaul <ykaul@redhat.com>
Date: Mon, 25 Nov 2019 15:37:46 +0200
Subject: [PATCH 564/584] store.c/glusterd-store.c: remove sys_stat calls
Instead of querying for the file size and allocating a char array
according to its size, let's just use a fixed size.
Those calls are not really needed, and are either expensive or
cached anyway. Since we do dynamic allocation/free, let's just use
a fixed array instead.
I'll see if there are other sys_stat() calls that are not really
useful and try to eliminate them in separate patches.
Backport of:
> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/23752/
> Change-Id: I76b40e78a52ab38f613fc0cdef4be60e6253bf20
> updates: bz#1193929
> Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
BUG: 1925425
Change-Id: I76b40e78a52ab38f613fc0cdef4be60e6253bf20
Signed-off-by: Yaniv Kaul <ykaul@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244958
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
libglusterfs/src/glusterfs/store.h | 4 +-
libglusterfs/src/store.c | 71 ++++--------------------------
xlators/mgmt/glusterd/src/glusterd-store.c | 5 +--
3 files changed, 12 insertions(+), 68 deletions(-)
diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h
index 3b3a24c..f63bd05 100644
--- a/libglusterfs/src/glusterfs/store.h
+++ b/libglusterfs/src/glusterfs/store.h
@@ -59,8 +59,8 @@ int32_t
gf_store_unlink_tmppath(gf_store_handle_t *shandle);
int
-gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key,
- char **iter_val, gf_store_op_errno_t *store_errno);
+gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+ gf_store_op_errno_t *store_errno);
int32_t
gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value);
diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
index cdf0aea..fa3649b 100644
--- a/libglusterfs/src/store.c
+++ b/libglusterfs/src/store.c
@@ -184,8 +184,8 @@ out:
}
int
-gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key,
- char **iter_val, gf_store_op_errno_t *store_errno)
+gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+ gf_store_op_errno_t *store_errno)
{
int32_t ret = -1;
char *savetok = NULL;
@@ -193,15 +193,15 @@ gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key,
char *value = NULL;
char *temp = NULL;
size_t str_len = 0;
+ char str[8192];
GF_ASSERT(file);
- GF_ASSERT(str);
GF_ASSERT(iter_key);
GF_ASSERT(iter_val);
GF_ASSERT(store_errno);
retry:
- temp = fgets(str, size, file);
+ temp = fgets(str, 8192, file);
if (temp == NULL || feof(file)) {
ret = -1;
*store_errno = GD_STORE_EOF;
@@ -241,13 +241,8 @@ int32_t
gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value)
{
int32_t ret = -1;
- char *scan_str = NULL;
char *iter_key = NULL;
char *iter_val = NULL;
- char *free_str = NULL;
- struct stat st = {
- 0,
- };
gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
GF_ASSERT(handle);
@@ -279,32 +274,9 @@ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value)
} else {
fseek(handle->read, 0, SEEK_SET);
}
- ret = sys_fstat(handle->fd, &st);
- if (ret < 0) {
- gf_msg("", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
- "stat on file %s failed", handle->path);
- ret = -1;
- store_errno = GD_STORE_STAT_FAILED;
- goto out;
- }
-
- /* "st.st_size + 1" is used as we are fetching each
- * line of a file using fgets, fgets will append "\0"
- * to the end of the string
- */
- scan_str = GF_CALLOC(1, st.st_size + 1, gf_common_mt_char);
-
- if (scan_str == NULL) {
- ret = -1;
- store_errno = GD_STORE_ENOMEM;
- goto out;
- }
-
- free_str = scan_str;
-
do {
- ret = gf_store_read_and_tokenize(handle->read, scan_str, st.st_size + 1,
- &iter_key, &iter_val, &store_errno);
+ ret = gf_store_read_and_tokenize(handle->read, &iter_key, &iter_val,
+ &store_errno);
if (ret < 0) {
gf_msg_trace("", 0,
"error while reading key '%s': "
@@ -334,8 +306,6 @@ out:
sys_close(handle->fd);
}
- GF_FREE(free_str);
-
return ret;
}
@@ -561,40 +531,16 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
gf_store_op_errno_t *op_errno)
{
int32_t ret = -1;
- char *scan_str = NULL;
char *iter_key = NULL;
char *iter_val = NULL;
- struct stat st = {
- 0,
- };
gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
GF_ASSERT(iter);
GF_ASSERT(key);
GF_ASSERT(value);
- ret = sys_stat(iter->filepath, &st);
- if (ret < 0) {
- gf_msg("", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
- "stat on file failed");
- ret = -1;
- store_errno = GD_STORE_STAT_FAILED;
- goto out;
- }
-
- /* "st.st_size + 1" is used as we are fetching each
- * line of a file using fgets, fgets will append "\0"
- * to the end of the string
- */
- scan_str = GF_CALLOC(1, st.st_size + 1, gf_common_mt_char);
- if (!scan_str) {
- ret = -1;
- store_errno = GD_STORE_ENOMEM;
- goto out;
- }
-
- ret = gf_store_read_and_tokenize(iter->file, scan_str, st.st_size + 1,
- &iter_key, &iter_val, &store_errno);
+ ret = gf_store_read_and_tokenize(iter->file, &iter_key, &iter_val,
+ &store_errno);
if (ret < 0) {
goto out;
}
@@ -619,7 +565,6 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
ret = 0;
out:
- GF_FREE(scan_str);
if (ret) {
GF_FREE(*key);
GF_FREE(*value);
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 4fa8116..da63c03 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -4092,7 +4092,6 @@ out:
int32_t
glusterd_store_retrieve_missed_snaps_list(xlator_t *this)
{
- char buf[PATH_MAX] = "";
char path[PATH_MAX] = "";
char *snap_vol_id = NULL;
char *missed_node_info = NULL;
@@ -4129,8 +4128,8 @@ glusterd_store_retrieve_missed_snaps_list(xlator_t *this)
}
do {
- ret = gf_store_read_and_tokenize(
- fp, buf, sizeof(buf), &missed_node_info, &value, &store_errno);
+ ret = gf_store_read_and_tokenize(fp, &missed_node_info, &value,
+ &store_errno);
if (ret) {
if (store_errno == GD_STORE_EOF) {
gf_msg_debug(this->name, 0, "EOF for missed_snap_list");
--
1.8.3.1

View File

@ -1,124 +0,0 @@
From d491843640658e91a77f15647cefd1c00422c731 Mon Sep 17 00:00:00 2001
From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
Date: Wed, 21 Oct 2020 16:14:29 +0530
Subject: [PATCH 565/584] libglusterfs/coverity: pointer to local outside the
scope
issue: gf_store_read_and_tokenize() returns the address
of the locally referred string.
fix: pass the buf to gf_store_read_and_tokenize() and
use it for tokenize.
CID: 1430143
Backport of:
> Upstream-patch: https://github.com/gluster/glusterfs/pull/1675
> Updates: #1060
> Change-Id: Ifc346540c263f58f4014ba2ba8c1d491c20ac609
> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
BUG: 1925425
Change-Id: Ifc346540c263f58f4014ba2ba8c1d491c20ac609
Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244959
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
libglusterfs/src/glusterfs/store.h | 3 ++-
libglusterfs/src/store.c | 13 ++++++++-----
xlators/mgmt/glusterd/src/glusterd-store.c | 3 ++-
3 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h
index f63bd05..68a20ad 100644
--- a/libglusterfs/src/glusterfs/store.h
+++ b/libglusterfs/src/glusterfs/store.h
@@ -60,7 +60,8 @@ gf_store_unlink_tmppath(gf_store_handle_t *shandle);
int
gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
- gf_store_op_errno_t *store_errno);
+ gf_store_op_errno_t *store_errno, char *str,
+ size_t buf_size);
int32_t
gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value);
diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
index fa3649b..3af627a 100644
--- a/libglusterfs/src/store.c
+++ b/libglusterfs/src/store.c
@@ -185,7 +185,8 @@ out:
int
gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
- gf_store_op_errno_t *store_errno)
+ gf_store_op_errno_t *store_errno, char *str,
+ size_t buf_size)
{
int32_t ret = -1;
char *savetok = NULL;
@@ -193,7 +194,6 @@ gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
char *value = NULL;
char *temp = NULL;
size_t str_len = 0;
- char str[8192];
GF_ASSERT(file);
GF_ASSERT(iter_key);
@@ -201,7 +201,7 @@ gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
GF_ASSERT(store_errno);
retry:
- temp = fgets(str, 8192, file);
+ temp = fgets(str, buf_size, file);
if (temp == NULL || feof(file)) {
ret = -1;
*store_errno = GD_STORE_EOF;
@@ -275,8 +275,9 @@ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value)
fseek(handle->read, 0, SEEK_SET);
}
do {
+ char buf[8192];
ret = gf_store_read_and_tokenize(handle->read, &iter_key, &iter_val,
- &store_errno);
+ &store_errno, buf, 8192);
if (ret < 0) {
gf_msg_trace("", 0,
"error while reading key '%s': "
@@ -533,6 +534,8 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
int32_t ret = -1;
char *iter_key = NULL;
char *iter_val = NULL;
+ char buf[8192];
+
gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
GF_ASSERT(iter);
@@ -540,7 +543,7 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
GF_ASSERT(value);
ret = gf_store_read_and_tokenize(iter->file, &iter_key, &iter_val,
- &store_errno);
+ &store_errno, buf, 8192);
if (ret < 0) {
goto out;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index da63c03..a8651d8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -4128,8 +4128,9 @@ glusterd_store_retrieve_missed_snaps_list(xlator_t *this)
}
do {
+ char buf[8192];
ret = gf_store_read_and_tokenize(fp, &missed_node_info, &value,
- &store_errno);
+ &store_errno, buf, 8192);
if (ret) {
if (store_errno == GD_STORE_EOF) {
gf_msg_debug(this->name, 0, "EOF for missed_snap_list");
--
1.8.3.1

View File

@ -1,236 +0,0 @@
From e66ab728426e147bf4fc594109137ebfb1f2dda6 Mon Sep 17 00:00:00 2001
From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
Date: Mon, 23 Nov 2020 08:09:44 +0530
Subject: [PATCH 566/584] enahancement/debug: Option to generate core dump
without killing the process
Comments and idea proposed by: Xavi Hernandez(jahernan@redhat.com):
On production systems sometimes we see a log message saying that an assertion
has failed. But it's hard to track why it failed without additional information
(on debug builds, a GF_ASSERT() generates a core dump and kills the process,
so it can be used to debug the issue, but many times we are only able to
reproduce assertion failures on production systems, where GF_ASSERT() only logs
a message and continues).
In other cases we may have a core dump caused by a bug, but the core dump doesn't
necessarily happen when the bug has happened. Sometimes the crash happens so much
later that the causes that triggered the bug are lost. In these cases we can add
more assertions to the places that touch the potential candidates to cause the bug,
but the only thing we'll get is a log message, which may not be enough.
One solution would be to always generate a core dump in case of assertion failure,
but this was already discussed and it was decided that it was too drastic. If a
core dump was really needed, a new macro was created to do so: GF_ABORT(),
but GF_ASSERT() would continue to not kill the process on production systems.
I'm proposing to modify GF_ASSERT() on production builds so that it conditionally
triggers a signal when a debugger is attached. When this happens, the debugger
will generate a core dump and continue the process as if nothing had happened.
If there's no debugger attached, GF_ASSERT() will behave as always.
The idea I have is to use SIGCONT to do that. This signal is harmless, so we can
unmask it (we currently mask all unneeded signals) and raise it inside a GF_ASSERT()
when some global variable is set to true.
To produce the core dump, run the script under extras/debug/gfcore.py on other
terminal. gdb breaks and produces coredump when GF_ASSERT is hit.
The script is copied from #1810 which is written by Xavi Hernandez(jahernan@redhat.com)
Backport of:
> Upstream-patch: https://github.com/gluster/glusterfs/pull/1814
> Fixes: #1810
> Change-Id: I6566ca2cae15501d8835c36f56be4c6950cb2a53
> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
BUG: 1927640
Change-Id: I6566ca2cae15501d8835c36f56be4c6950cb2a53
Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244960
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
extras/debug/gfcore.py | 77 +++++++++++++++++++++++++++++++
libglusterfs/src/common-utils.c | 11 +++++
libglusterfs/src/glusterfs/common-utils.h | 10 +++-
libglusterfs/src/libglusterfs.sym | 16 +++++++
4 files changed, 112 insertions(+), 2 deletions(-)
create mode 100755 extras/debug/gfcore.py
diff --git a/extras/debug/gfcore.py b/extras/debug/gfcore.py
new file mode 100755
index 0000000..9f097f0
--- /dev/null
+++ b/extras/debug/gfcore.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+
+def launch():
+ if len(sys.argv) < 3:
+ sys.stderr.write("Syntax: {} <pid> <count> [<dir>]\n".format(os.path.basename(sys.argv[0])))
+ sys.exit(1)
+
+ pid = int(sys.argv[1])
+ count = int(sys.argv[2])
+ base = os.getcwd()
+ if len(sys.argv) > 3:
+ base = sys.argv[3]
+ base = os.path.realpath(base)
+
+ subprocess.run([
+ "gdb", "-batch",
+ "-p", str(pid),
+ "-ex", "py arg_count = {}".format(count),
+ "-ex", "py arg_dir = '{}'".format(base),
+ "-x", __file__
+ ])
+
+class GFCore(object):
+ def __init__(self, count, base):
+ self.count = count
+ self.base = base
+ gdb.execute('set pagination off')
+ gdb.execute('set gf_signal_on_assert = 1')
+ gdb.events.stop.connect(self.gf_stop)
+
+ self.cont()
+
+ def cont(self, quit = False):
+ if not(quit) and (self.count > 0):
+ gdb.execute('continue')
+ else:
+ gdb.execute('set gf_signal_on_assert = 0')
+ gdb.execute('quit')
+
+ def gf_stop(self, event):
+ quit = False
+
+ if isinstance(event, gdb.SignalEvent):
+ if event.stop_signal == 'SIGCONT':
+ now = datetime.utcnow().isoformat()
+ pid = gdb.selected_inferior().pid
+ name = "{}/gfcore.{}.{}".format(self.base, pid, now)
+ print("Generating coredump '{}'".format(name))
+ gdb.execute('gcore {}'.format(name))
+ self.count -= 1
+
+ elif event.stop_signal == 'SIGINT':
+ print("SIGINT received. Exiting")
+ quit = True
+
+ else:
+ print("Ignoring signal {}".format(event.stop_signal))
+ else:
+ print("Unexpected event {}".format(type(event)))
+
+ self.cont(quit)
+
+# Module 'gdb' is not available when running outside gdb.
+try:
+ import gdb
+ from datetime import datetime
+
+ GFCore(arg_count, arg_dir)
+except ModuleNotFoundError:
+ import sys
+ import os
+ import subprocess
+
+ try:
+ launch()
+ except KeyboardInterrupt:
+ pass
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index 70d5d21..d351b93 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -77,9 +77,19 @@ char *vol_type_str[] = {
"Distributed-Disperse",
};
+gf_boolean_t gf_signal_on_assert = false;
+
typedef int32_t (*rw_op_t)(int32_t fd, char *buf, int32_t size);
typedef int32_t (*rwv_op_t)(int32_t fd, const struct iovec *buf, int32_t size);
+void gf_assert(void)
+{
+ if (gf_signal_on_assert) {
+ raise(SIGCONT);
+ }
+
+}
+
void
gf_xxh64_wrapper(const unsigned char *data, size_t const len,
unsigned long long const seed, char *xxh64)
@@ -4021,6 +4031,7 @@ gf_thread_vcreate(pthread_t *thread, const pthread_attr_t *attr,
sigdelset(&set, SIGSYS);
sigdelset(&set, SIGFPE);
sigdelset(&set, SIGABRT);
+ sigdelset(&set, SIGCONT);
pthread_sigmask(SIG_BLOCK, &set, &old);
diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
index f0a0a41..604afd0 100644
--- a/libglusterfs/src/glusterfs/common-utils.h
+++ b/libglusterfs/src/glusterfs/common-utils.h
@@ -25,6 +25,7 @@
#include <limits.h>
#include <fnmatch.h>
#include <uuid/uuid.h>
+#include <urcu/compiler.h>
#ifndef ffsll
#define ffsll(x) __builtin_ffsll(x)
@@ -431,14 +432,19 @@ BIT_VALUE(unsigned char *array, unsigned int index)
#define GF_FILE_CONTENT_REQUESTED(_xattr_req, _content_limit) \
(dict_get_uint64(_xattr_req, "glusterfs.content", _content_limit) == 0)
+void gf_assert(void);
+
#ifdef DEBUG
#define GF_ASSERT(x) assert(x);
#else
#define GF_ASSERT(x) \
do { \
- if (!(x)) { \
+ if (caa_unlikely(!(x))) { \
+ gf_assert(); \
gf_msg_callingfn("", GF_LOG_ERROR, 0, LG_MSG_ASSERTION_FAILED, \
- "Assertion failed: " #x); \
+ "Assertion failed: To attach gdb and coredump," \
+ " Run the script under " \
+ "\"glusterfs/extras/debug/gfcore.py\""); \
} \
} while (0)
#endif
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
index 0a0862e..9072afa 100644
--- a/libglusterfs/src/libglusterfs.sym
+++ b/libglusterfs/src/libglusterfs.sym
@@ -1167,3 +1167,19 @@ gf_changelog_register_generic
gf_gfid_generate_from_xxh64
find_xlator_option_in_cmd_args_t
gf_d_type_from_ia_type
+glusterfs_graph_fini
+glusterfs_process_svc_attach_volfp
+glusterfs_mux_volfile_reconfigure
+glusterfs_process_svc_detach
+mgmt_is_multiplexed_daemon
+xlator_is_cleanup_starting
+gf_nanosleep
+gf_syncfs
+graph_total_client_xlator
+get_xattrs_to_heal
+gf_latency_statedump_and_reset
+gf_latency_new
+gf_latency_reset
+gf_latency_update
+gf_frame_latency_update
+gf_assert
--
1.8.3.1

View File

@ -1,86 +0,0 @@
From 5c81d813c8b1f494d31d54c1ab09a3f0153ebfd4 Mon Sep 17 00:00:00 2001
From: Amar Tumballi <amarts@redhat.com>
Date: Sat, 9 Feb 2019 13:13:47 +0530
Subject: [PATCH 567/584] inode: create inode outside locked region
Only linking of inode to the table, and inserting it in
a list needs to be in locked region.
Backport of:
> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/22183/
> Updates: bz#1670031
> Change-Id: I6ea7e956b80cf2765c2233d761909c4bf9c7253c
> Signed-off-by: Amar Tumballi <amarts@redhat.com>
BUG: 1927640
Change-Id: I6ea7e956b80cf2765c2233d761909c4bf9c7253c
Signed-off-by: Amar Tumballi <amarts@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244961
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
libglusterfs/src/inode.c | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index 98f8ea6..46db04f 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -620,7 +620,7 @@ out:
}
static inode_t *
-__inode_create(inode_table_t *table)
+inode_create(inode_table_t *table)
{
inode_t *newi = NULL;
@@ -647,11 +647,7 @@ __inode_create(inode_table_t *table)
goto out;
}
- list_add(&newi->list, &table->lru);
- table->lru_size++;
-
out:
-
return newi;
}
@@ -668,14 +664,16 @@ inode_new(inode_table_t *table)
return NULL;
}
- pthread_mutex_lock(&table->lock);
- {
- inode = __inode_create(table);
- if (inode != NULL) {
+ inode = inode_create(table);
+ if (inode) {
+ pthread_mutex_lock(&table->lock);
+ {
+ list_add(&inode->list, &table->lru);
+ table->lru_size++;
__inode_ref(inode, false);
}
+ pthread_mutex_unlock(&table->lock);
}
- pthread_mutex_unlock(&table->lock);
return inode;
}
@@ -1613,7 +1611,10 @@ __inode_table_init_root(inode_table_t *table)
if (!table)
return;
- root = __inode_create(table);
+ root = inode_create(table);
+
+ list_add(&root->list, &table->lru);
+ table->lru_size++;
iatt.ia_gfid[15] = 1;
iatt.ia_ino = 1;
--
1.8.3.1

View File

@ -1,131 +0,0 @@
From 2640ee56201d320b838909f95608abe07e3ff9b0 Mon Sep 17 00:00:00 2001
From: mohit84 <moagrawa@redhat.com>
Date: Tue, 24 Nov 2020 15:29:58 +0530
Subject: [PATCH 568/584] core: tcmu-runner process continuous growing logs
lru_size showing -1
* core: tcmu-runner process continuous growing logs lru_size showing -1
At the time of calling inode_table_prune it checks if current lru_size
is greater than lru_limit but lru_list is empty it throws a log message
"Empty inode lru list found but with (%d) lru_size".As per code reading
it seems lru_size is out of sync with the actual number of inodes in
lru_list. Due to throwing continuous error messages entire disk is
getting full and the user has to restart the tcmu-runner process to use
the volumes.The log message was introduce by a patch
https://review.gluster.org/#/c/glusterfs/+/15087/.
Solution: Introduce a flag in_lru_list to take decision about inode is
being part of lru_list or not.
Backport of:
> Upstream-patch: https://github.com/gluster/glusterfs/pull/1776
> Fixes: #1775
> Change-Id: I4b836bebf4b5db65fbf88ff41c6c88f4a7ac55c1
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
BUG: 1927640
Change-Id: I4b836bebf4b5db65fbf88ff41c6c88f4a7ac55c1
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244962
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
libglusterfs/src/glusterfs/inode.h | 1 +
libglusterfs/src/inode.c | 14 ++++++++++++++
2 files changed, 15 insertions(+)
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
index 62c093d..17d0340 100644
--- a/libglusterfs/src/glusterfs/inode.h
+++ b/libglusterfs/src/glusterfs/inode.h
@@ -110,6 +110,7 @@ struct _inode {
struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
bool in_invalidate_list; /* Set if inode is in table invalidate list */
bool invalidate_sent; /* Set it if invalidator_fn is called for inode */
+ bool in_lru_list; /* Set if inode is in table lru list */
};
#define UUID0_STR "00000000-0000-0000-0000-000000000000"
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index 46db04f..8e91197 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -417,8 +417,10 @@ __inode_passivate(inode_t *inode)
dentry_t *dentry = NULL;
dentry_t *t = NULL;
+ GF_ASSERT(!inode->in_lru_list);
list_move_tail(&inode->list, &inode->table->lru);
inode->table->lru_size++;
+ inode->in_lru_list = _gf_true;
list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list)
{
@@ -531,7 +533,10 @@ __inode_ref(inode_t *inode, bool is_invalidate)
inode->in_invalidate_list = false;
inode->table->invalidate_size--;
} else {
+ GF_ASSERT(inode->table->lru_size > 0);
+ GF_ASSERT(inode->in_lru_list);
inode->table->lru_size--;
+ inode->in_lru_list = _gf_false;
}
if (is_invalidate) {
inode->in_invalidate_list = true;
@@ -670,6 +675,8 @@ inode_new(inode_table_t *table)
{
list_add(&inode->list, &table->lru);
table->lru_size++;
+ GF_ASSERT(!inode->in_lru_list);
+ inode->in_lru_list = _gf_true;
__inode_ref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -1533,6 +1540,7 @@ inode_table_prune(inode_table_t *table)
lru_size = table->lru_size;
while (lru_size > (table->lru_limit)) {
if (list_empty(&table->lru)) {
+ GF_ASSERT(0);
gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
LG_MSG_INVALID_INODE_LIST,
"Empty inode lru list found"
@@ -1543,6 +1551,7 @@ inode_table_prune(inode_table_t *table)
lru_size--;
entry = list_entry(table->lru.next, inode_t, list);
+ GF_ASSERT(entry->in_lru_list);
/* The logic of invalidation is required only if invalidator_fn
is present */
if (table->invalidator_fn) {
@@ -1560,6 +1569,7 @@ inode_table_prune(inode_table_t *table)
}
table->lru_size--;
+ entry->in_lru_list = _gf_false;
__inode_retire(entry);
ret++;
}
@@ -1615,6 +1625,7 @@ __inode_table_init_root(inode_table_t *table)
list_add(&root->list, &table->lru);
table->lru_size++;
+ root->in_lru_list = _gf_true;
iatt.ia_gfid[15] = 1;
iatt.ia_ino = 1;
@@ -1873,8 +1884,11 @@ inode_table_destroy(inode_table_t *inode_table)
while (!list_empty(&inode_table->lru)) {
trav = list_first_entry(&inode_table->lru, inode_t, list);
inode_forget_atomic(trav, 0);
+ GF_ASSERT(inode_table->lru_size > 0);
+ GF_ASSERT(trav->in_lru_list);
__inode_retire(trav);
inode_table->lru_size--;
+ trav->in_lru_list = _gf_false;
}
/* Same logic for invalidate list */
--
1.8.3.1

View File

@ -1,200 +0,0 @@
From 1b86a4bda540ff4cf307c7f38d3041318636ecb7 Mon Sep 17 00:00:00 2001
From: Vinayakswami Hariharmath <vharihar@redhat.com>
Date: Thu, 6 Aug 2020 14:39:59 +0530
Subject: [PATCH 569/584] features/shard: optimization over shard lookup in
case of prealloc
Assume that we are preallocating a VM of size 1TB with a shard
block size of 64MB then there will be ~16k shards.
This creation happens in 2 steps shard_fallocate() path i.e
1. lookup for the shards if any already present and
2. mknod over those shards do not exist.
But in case of fresh creation, we dont have to lookup for all
shards which are not present as the the file size will be 0.
Through this, we can save lookup on all shards which are not
present. This optimization is quite useful in the case of
preallocating big vm.
Also if the file is already present and the call is to
extend it to bigger size then we need not to lookup for non-
existent shards. Just lookup preexisting shards, populate
the inodes and issue mknod on extended size.
Backport of:
> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24813/
> Fixes: #1425
> Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
BUG: 1925425
Change-Id: I60036fe8302c696e0ca80ff11ab0ef5bcdbd7880
Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244963
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/bugs/shard/issue-1425.t | 45 +++++++++++++++++++++++++++++++++++++
xlators/features/shard/src/shard.c | 46 ++++++++++++++++++++++++++++++++------
2 files changed, 84 insertions(+), 7 deletions(-)
create mode 100644 tests/bugs/shard/issue-1425.t
diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
new file mode 100644
index 0000000..bbe82c0
--- /dev/null
+++ b/tests/bugs/shard/issue-1425.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+FILE_COUNT_TIME=5
+
+function get_file_count {
+ ls $1* | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST fallocate -l 20M $M0/foo
+gfid_new=$(get_gfid_string $M0/foo)
+
+# Check for the base shard
+TEST stat $M0/foo
+TEST stat $B0/${V0}0/foo
+
+# There should be 4 associated shards
+EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
+
+# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
+EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
+
+# Delete the base shard and check shards get cleaned up
+TEST unlink $M0/foo
+
+TEST ! stat $M0/foo
+TEST ! stat $B0/${V0}0/foo
+
+# There should be no shards now
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
+cleanup
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 2ba4528..a6ad1b8 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -995,6 +995,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
}
int
+shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
+ xlator_t *this);
+
+int
shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
shard_post_resolve_fop_handler_t post_res_handler)
{
@@ -1011,21 +1015,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
inode_t *fsync_inode = NULL;
shard_priv_t *priv = NULL;
shard_local_t *local = NULL;
+ uint64_t resolve_count = 0;
priv = this->private;
local = frame->local;
local->call_count = 0;
shard_idx_iter = local->first_block;
res_inode = local->resolver_base_inode;
+
+ if ((local->op_ret < 0) || (local->resolve_not))
+ goto out;
+
+ /* If this prealloc FOP is for fresh file creation, then the size of the
+ * file will be 0. Then there will be no shards associated with this file.
+ * So we can skip the lookup process for the shards which do not exists
+ * and directly issue mknod to crete shards.
+ *
+ * In case the prealloc fop is to extend the preallocated file to bigger
+ * size then just lookup and populate inodes of existing shards and
+ * update the create count
+ */
+ if (local->fop == GF_FOP_FALLOCATE) {
+ if (!local->prebuf.ia_size) {
+ local->inode_list[0] = inode_ref(res_inode);
+ local->create_count = local->last_block;
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
+ return 0;
+ }
+ if (local->prebuf.ia_size < local->total_size)
+ local->create_count = local->last_block -
+ ((local->prebuf.ia_size - 1) /
+ local->block_size);
+ }
+
+ resolve_count = local->last_block - local->create_count;
+
if (res_inode)
gf_uuid_copy(gfid, res_inode->gfid);
else
gf_uuid_copy(gfid, local->base_gfid);
- if ((local->op_ret < 0) || (local->resolve_not))
- goto out;
-
- while (shard_idx_iter <= local->last_block) {
+ while (shard_idx_iter <= resolve_count) {
i++;
if (shard_idx_iter == 0) {
local->inode_list[i] = inode_ref(res_inode);
@@ -2434,7 +2464,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
int count = 0;
int call_count = 0;
int32_t shard_idx_iter = 0;
- int last_block = 0;
+ int lookup_count = 0;
char path[PATH_MAX] = {
0,
};
@@ -2454,7 +2484,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
local = frame->local;
count = call_count = local->call_count;
shard_idx_iter = local->first_block;
- last_block = local->last_block;
+ lookup_count = local->last_block - local->create_count;
local->pls_fop_handler = handler;
if (local->lookup_shards_barriered)
local->barrier.waitfor = local->call_count;
@@ -2464,7 +2494,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
else
gf_uuid_copy(gfid, local->base_gfid);
- while (shard_idx_iter <= last_block) {
+ while (shard_idx_iter <= lookup_count) {
if (local->inode_list[i]) {
i++;
shard_idx_iter++;
@@ -5651,6 +5681,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
shard_common_lookup_shards(
frame, this, local->resolver_base_inode,
shard_common_inode_write_post_lookup_shards_handler);
+ } else if (local->create_count) {
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
} else {
shard_common_inode_write_do(frame, this);
}
--
1.8.3.1

View File

@ -1,340 +0,0 @@
From 1a8b001a121ada4d3d338b52b312896f1790f2bb Mon Sep 17 00:00:00 2001
From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
Date: Mon, 11 Jan 2021 12:34:55 +0530
Subject: [PATCH 570/584] features/shard: avoid repeatative calls to
gf_uuid_unparse()
The issue is shard_make_block_abspath() calls gf_uuid_unparse()
every time while constructing shard path. The gfid can be parsed
and saved once and passed while constructing the path. Thus
we can avoid calling gf_uuid_unparse().
Backport of:
> Upstream-patch: https://github.com/gluster/glusterfs/pull/1689
> Fixes: #1423
> Change-Id: Ia26fbd5f09e812bbad9e5715242f14143c013c9c
> Signed-off-by: Vinayakswami Hariharmath vharihar@redhat.com
BUG: 1925425
Change-Id: Ia26fbd5f09e812bbad9e5715242f14143c013c9c
Signed-off-by: Vinayakswami Hariharmath vharihar@redhat.com
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244964
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/bugs/shard/issue-1425.t | 9 ++-
xlators/features/shard/src/shard.c | 119 ++++++++++++++++++-------------------
2 files changed, 65 insertions(+), 63 deletions(-)
diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
index bbe82c0..8b77705 100644
--- a/tests/bugs/shard/issue-1425.t
+++ b/tests/bugs/shard/issue-1425.t
@@ -21,7 +21,13 @@ TEST $CLI volume profile $V0 start
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+$CLI volume profile $V0 info clear
+
TEST fallocate -l 20M $M0/foo
+
+# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
+EXPECT "5" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
+
gfid_new=$(get_gfid_string $M0/foo)
# Check for the base shard
@@ -31,9 +37,6 @@ TEST stat $B0/${V0}0/foo
# There should be 4 associated shards
EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
-# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
-EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
-
# Delete the base shard and check shards get cleaned up
TEST unlink $M0/foo
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index a6ad1b8..d1d7d7a 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -16,6 +16,8 @@
#include <glusterfs/defaults.h>
#include <glusterfs/statedump.h>
+#define SHARD_PATH_MAX (sizeof(GF_SHARD_DIR) + GF_UUID_BUF_SIZE + 16)
+
static gf_boolean_t
__is_shard_dir(uuid_t gfid)
{
@@ -49,15 +51,19 @@ shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len)
snprintf(buf, len, "%s.%d", gfid_str, block_num);
}
-void
-shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len)
+static int
+shard_make_base_path(char *path, uuid_t gfid)
{
- char gfid_str[GF_UUID_BUF_SIZE] = {
- 0,
- };
+ strcpy(path, "/" GF_SHARD_DIR "/");
+ uuid_utoa_r(gfid, path + sizeof(GF_SHARD_DIR) + 1);
+ return (sizeof(GF_SHARD_DIR) + GF_UUID_BUF_SIZE);
+}
- gf_uuid_unparse(gfid, gfid_str);
- snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num);
+static inline void
+shard_append_index(char *path, int path_size, int prefix_len,
+ int shard_idx_iter)
+{
+ snprintf(path + prefix_len, path_size - prefix_len, ".%d", shard_idx_iter);
}
int
@@ -1004,9 +1010,8 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
{
int i = -1;
uint32_t shard_idx_iter = 0;
- char path[PATH_MAX] = {
- 0,
- };
+ int prefix_len = 0;
+ char path[SHARD_PATH_MAX];
uuid_t gfid = {
0,
};
@@ -1055,6 +1060,9 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
else
gf_uuid_copy(gfid, local->base_gfid);
+ /* Build base shard path before appending index of the shard */
+ prefix_len = shard_make_base_path(path, gfid);
+
while (shard_idx_iter <= resolve_count) {
i++;
if (shard_idx_iter == 0) {
@@ -1062,16 +1070,13 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
shard_idx_iter++;
continue;
}
-
- shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
-
+ shard_append_index(path, SHARD_PATH_MAX, prefix_len, shard_idx_iter);
inode = NULL;
inode = inode_resolve(this->itable, path);
if (inode) {
gf_msg_debug(this->name, 0,
- "Shard %d already "
- "present. gfid=%s. Saving inode for future.",
- shard_idx_iter, uuid_utoa(inode->gfid));
+ "Shard %s already present. Saving inode for future.",
+ path);
local->inode_list[i] = inode;
/* Let the ref on the inodes that are already present
* in inode table still be held so that they don't get
@@ -2153,9 +2158,8 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
int call_count = 0;
uint32_t cur_block = 0;
uint32_t last_block = 0;
- char path[PATH_MAX] = {
- 0,
- };
+ int prefix_len = 0;
+ char path[SHARD_PATH_MAX];
char *bname = NULL;
loc_t loc = {
0,
@@ -2216,6 +2220,10 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
return 0;
}
+ /* Build base shard path before appending index of the shard */
+ prefix_len = shard_make_base_path(path, inode->gfid);
+ bname = path + sizeof(GF_SHARD_DIR) + 1;
+
SHARD_SET_ROOT_FS_ID(frame, local);
while (cur_block <= last_block) {
if (!local->inode_list[i]) {
@@ -2229,15 +2237,12 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
goto next;
}
- shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path));
- bname = strrchr(path, '/') + 1;
+ shard_append_index(path, SHARD_PATH_MAX, prefix_len, cur_block);
loc.parent = inode_ref(priv->dot_shard_inode);
ret = inode_path(loc.parent, bname, (char **)&(loc.path));
if (ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
- "Inode path failed"
- " on %s. Base file gfid = %s",
- bname, uuid_utoa(inode->gfid));
+ "Inode path failed on %s.", bname);
local->op_ret = -1;
local->op_errno = ENOMEM;
loc_wipe(&loc);
@@ -2465,13 +2470,8 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
int call_count = 0;
int32_t shard_idx_iter = 0;
int lookup_count = 0;
- char path[PATH_MAX] = {
- 0,
- };
+ char path[SHARD_PATH_MAX];
char *bname = NULL;
- uuid_t gfid = {
- 0,
- };
loc_t loc = {
0,
};
@@ -2489,10 +2489,16 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
if (local->lookup_shards_barriered)
local->barrier.waitfor = local->call_count;
+ /* Build base shard path before appending index of the shard */
+ strcpy(path, "/" GF_SHARD_DIR "/");
+
if (inode)
- gf_uuid_copy(gfid, inode->gfid);
+ uuid_utoa_r(inode->gfid, path + sizeof(GF_SHARD_DIR) + 1);
else
- gf_uuid_copy(gfid, local->base_gfid);
+ uuid_utoa_r(local->base_gfid, path + sizeof(GF_SHARD_DIR) + 1);
+
+ int prefix_len = sizeof(GF_SHARD_DIR) + GF_UUID_BUF_SIZE;
+ bname = path + sizeof(GF_SHARD_DIR) + 1;
while (shard_idx_iter <= lookup_count) {
if (local->inode_list[i]) {
@@ -2508,18 +2514,14 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
goto next;
}
- shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
-
- bname = strrchr(path, '/') + 1;
+ shard_append_index(path, SHARD_PATH_MAX, prefix_len, shard_idx_iter);
loc.inode = inode_new(this->itable);
loc.parent = inode_ref(priv->dot_shard_inode);
gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid);
ret = inode_path(loc.parent, bname, (char **)&(loc.path));
if (ret < 0 || !(loc.inode)) {
gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
- "Inode path failed"
- " on %s, base file gfid = %s",
- bname, uuid_utoa(gfid));
+ "Inode path failed on %s", bname);
local->op_ret = -1;
local->op_errno = ENOMEM;
loc_wipe(&loc);
@@ -3168,12 +3170,7 @@ shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
uint32_t cur_block = 0;
uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */
char *bname = NULL;
- char path[PATH_MAX] = {
- 0,
- };
- uuid_t gfid = {
- 0,
- };
+ char path[SHARD_PATH_MAX];
loc_t loc = {
0,
};
@@ -3184,10 +3181,16 @@ shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
priv = this->private;
local = frame->local;
+ /* Build base shard path before appending index of the shard */
+ strcpy(path, "/" GF_SHARD_DIR "/");
+
if (inode)
- gf_uuid_copy(gfid, inode->gfid);
+ uuid_utoa_r(inode->gfid, path + sizeof(GF_SHARD_DIR) + 1);
else
- gf_uuid_copy(gfid, local->base_gfid);
+ uuid_utoa_r(local->base_gfid, path + sizeof(GF_SHARD_DIR) + 1);
+
+ int prefix_len = sizeof(GF_SHARD_DIR) + GF_UUID_BUF_SIZE;
+ bname = path + sizeof(GF_SHARD_DIR) + 1;
for (i = 0; i < local->num_blocks; i++) {
if (!local->inode_list[i])
@@ -3203,7 +3206,7 @@ shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
gf_msg_debug(this->name, 0,
"All shards that need to be "
"unlinked are non-existent: %s",
- uuid_utoa(gfid));
+ path);
return 0;
}
@@ -3221,15 +3224,12 @@ shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
goto next;
}
- shard_make_block_abspath(cur_block, gfid, path, sizeof(path));
- bname = strrchr(path, '/') + 1;
+ shard_append_index(path, SHARD_PATH_MAX, prefix_len, cur_block);
loc.parent = inode_ref(priv->dot_shard_inode);
ret = inode_path(loc.parent, bname, (char **)&(loc.path));
if (ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
- "Inode path failed"
- " on %s, base file gfid = %s",
- bname, uuid_utoa(gfid));
+ "Inode path failed on %s", bname);
local->op_ret = -1;
local->op_errno = ENOMEM;
loc_wipe(&loc);
@@ -4971,9 +4971,8 @@ shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
int last_block = 0;
int ret = 0;
int call_count = 0;
- char path[PATH_MAX] = {
- 0,
- };
+ int prefix_len = 0;
+ char path[SHARD_PATH_MAX];
mode_t mode = 0;
char *bname = NULL;
shard_priv_t *priv = NULL;
@@ -4996,6 +4995,10 @@ shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
call_count = local->call_count = local->create_count;
local->post_mknod_handler = post_mknod_handler;
+ /* Build base shard path before appending index of the shard */
+ prefix_len = shard_make_base_path(path, fd->inode->gfid);
+ bname = path + sizeof(GF_SHARD_DIR) + 1;
+
SHARD_SET_ROOT_FS_ID(frame, local);
ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp);
@@ -5022,10 +5025,7 @@ shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
-1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
goto next;
}
-
- shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path,
- sizeof(path));
-
+ shard_append_index(path, SHARD_PATH_MAX, prefix_len, shard_idx_iter);
xattr_req = shard_create_gfid_dict(local->xattr_req);
if (!xattr_req) {
local->op_ret = -1;
@@ -5036,7 +5036,6 @@ shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
goto next;
}
- bname = strrchr(path, '/') + 1;
loc.inode = inode_new(this->itable);
loc.parent = inode_ref(priv->dot_shard_inode);
ret = inode_path(loc.parent, bname, (char **)&(loc.path));
--
1.8.3.1

View File

@ -1,98 +0,0 @@
From 2c0d11bb406e50fb515abf0c5a4006e1b362ac8e Mon Sep 17 00:00:00 2001
From: Emmanuel Dreyfus <manu@netbsd.org>
Date: Tue, 30 Jun 2020 16:42:36 +0200
Subject: [PATCH 571/584] NetBSD build fixes
- Make sure -largp is used at link time
- PTHREAD_MUTEX_ADAPTIVE_NP is not available, use PTHREAD_MUTEX_DEFAULT instead
- Avoid non POSIX [[ ]] in scripts
- Do not check of lock.spinlock is NULL since it is not a pointer
(it is not a pointer on Linux either)
Backport of:
> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24648/
> Change-Id: I5e04a7c552d24f8a473c2b837828d1bddfa7e128
> Fixes: #1347
> Type: Bug
> Signed-off-by: Emmanuel Dreyfus <manu@netbsd.org>
BUG: 1925425
Change-Id: I5e04a7c552d24f8a473c2b837828d1bddfa7e128
Signed-off-by: Emmanuel Dreyfus <manu@netbsd.org>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245040
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
configure.ac | 3 +++
rpc/rpc-lib/src/rpcsvc.c | 4 ++++
tools/gfind_missing_files/gfind_missing_files.sh | 2 +-
xlators/performance/write-behind/src/write-behind.c | 4 ++--
4 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/configure.ac b/configure.ac
index 327733e..6138a59 100644
--- a/configure.ac
+++ b/configure.ac
@@ -998,6 +998,9 @@ case $host_os in
CFLAGS="${CFLAGS} -isystem /usr/local/include"
ARGP_LDADD=-largp
;;
+ *netbsd*)
+ ARGP_LDADD=-largp
+ ;;
esac
dnl argp-standalone does not provide a pkg-config file
AC_CHECK_HEADER([argp.h], AC_DEFINE(HAVE_ARGP, 1, [have argp]))
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
index 3f184bf..b031d93 100644
--- a/rpc/rpc-lib/src/rpcsvc.c
+++ b/rpc/rpc-lib/src/rpcsvc.c
@@ -46,6 +46,10 @@
#include "xdr-rpcclnt.h"
#include <glusterfs/glusterfs-acl.h>
+#ifndef PTHREAD_MUTEX_ADAPTIVE_NP
+#define PTHREAD_MUTEX_ADAPTIVE_NP PTHREAD_MUTEX_DEFAULT
+#endif
+
struct rpcsvc_program gluster_dump_prog;
#define rpcsvc_alloc_request(svc, request) \
diff --git a/tools/gfind_missing_files/gfind_missing_files.sh b/tools/gfind_missing_files/gfind_missing_files.sh
index f42fe7b..e7aaa0b 100644
--- a/tools/gfind_missing_files/gfind_missing_files.sh
+++ b/tools/gfind_missing_files/gfind_missing_files.sh
@@ -61,7 +61,7 @@ mount_slave()
parse_cli()
{
- if [[ $# -ne 4 ]]; then
+ if [ "$#" -ne 4 ]; then
echo "Usage: gfind_missing_files <brick-path> <slave-host> <slave-vol> <OUTFILE>"
exit 1
else
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
index 31ab723..76d257f 100644
--- a/xlators/performance/write-behind/src/write-behind.c
+++ b/xlators/performance/write-behind/src/write-behind.c
@@ -2490,7 +2490,7 @@ wb_mark_readdirp_start(xlator_t *this, inode_t *directory)
wb_directory_inode = wb_inode_create(this, directory);
- if (!wb_directory_inode || !wb_directory_inode->lock.spinlock)
+ if (!wb_directory_inode)
return;
LOCK(&wb_directory_inode->lock);
@@ -2510,7 +2510,7 @@ wb_mark_readdirp_end(xlator_t *this, inode_t *directory)
wb_directory_inode = wb_inode_ctx_get(this, directory);
- if (!wb_directory_inode || !wb_directory_inode->lock.spinlock)
+ if (!wb_directory_inode)
return;
LOCK(&wb_directory_inode->lock);
--
1.8.3.1

View File

@ -1,183 +0,0 @@
From 1491b33007e84be0a0a74354e89deca8a21ed198 Mon Sep 17 00:00:00 2001
From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
Date: Tue, 19 Jan 2021 15:39:35 +0530
Subject: [PATCH 572/584] locks: remove unused conditional switch to spin_lock
code
use of spin_locks is depend on the variable use_spinlocks
but the same is commented in the current code base through
https://review.gluster.org/#/c/glusterfs/+/14763/. So it is
of no use to have conditional switching to spin_lock or
mutex. Removing the dead code as part of the patch
Backport of:
> Upstream-patch: https://github.com/gluster/glusterfs/pull/2007
> Fixes: #1996
> Change-Id: Ib005dd86969ce33d3409164ef3e1011bb3169129
> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
BUG: 1925425
Change-Id: Ib005dd86969ce33d3409164ef3e1011bb3169129
Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244965
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
configure.ac | 7 -----
libglusterfs/src/Makefile.am | 2 +-
libglusterfs/src/common-utils.c | 5 ----
libglusterfs/src/glusterfs/locking.h | 51 ------------------------------------
libglusterfs/src/locking.c | 27 -------------------
5 files changed, 1 insertion(+), 91 deletions(-)
delete mode 100644 libglusterfs/src/locking.c
diff --git a/configure.ac b/configure.ac
index 6138a59..3d99f6a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -585,13 +585,6 @@ AC_CHECK_HEADERS([linux/falloc.h])
AC_CHECK_HEADERS([linux/oom.h], AC_DEFINE(HAVE_LINUX_OOM_H, 1, [have linux/oom.h]))
-dnl Mac OS X does not have spinlocks
-AC_CHECK_FUNC([pthread_spin_init], [have_spinlock=yes])
-if test "x${have_spinlock}" = "xyes"; then
- AC_DEFINE(HAVE_SPINLOCK, 1, [define if found spinlock])
-fi
-AC_SUBST(HAVE_SPINLOCK)
-
dnl some os may not have GNU defined strnlen function
AC_CHECK_FUNC([strnlen], [have_strnlen=yes])
if test "x${have_strnlen}" = "xyes"; then
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
index 970f4b7..830a0c3 100644
--- a/libglusterfs/src/Makefile.am
+++ b/libglusterfs/src/Makefile.am
@@ -35,7 +35,7 @@ libglusterfs_la_SOURCES = dict.c xlator.c logging.c \
strfd.c parse-utils.c $(CONTRIBDIR)/mount/mntent.c \
$(CONTRIBDIR)/libexecinfo/execinfo.c quota-common-utils.c rot-buffs.c \
$(CONTRIBDIR)/timer-wheel/timer-wheel.c \
- $(CONTRIBDIR)/timer-wheel/find_last_bit.c default-args.c locking.c \
+ $(CONTRIBDIR)/timer-wheel/find_last_bit.c default-args.c \
$(CONTRIBDIR)/xxhash/xxhash.c \
compound-fop-utils.c throttle-tbf.c monitoring.c
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index d351b93..c2dfe28 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -860,11 +860,6 @@ gf_dump_config_flags()
gf_msg_plain_nomem(GF_LOG_ALERT, "setfsid 1");
#endif
-/* define if found spinlock */
-#ifdef HAVE_SPINLOCK
- gf_msg_plain_nomem(GF_LOG_ALERT, "spinlock 1");
-#endif
-
/* Define to 1 if you have the <sys/epoll.h> header file. */
#ifdef HAVE_SYS_EPOLL_H
gf_msg_plain_nomem(GF_LOG_ALERT, "epoll.h 1");
diff --git a/libglusterfs/src/glusterfs/locking.h b/libglusterfs/src/glusterfs/locking.h
index 43cc877..63097bb 100644
--- a/libglusterfs/src/glusterfs/locking.h
+++ b/libglusterfs/src/glusterfs/locking.h
@@ -22,55 +22,6 @@
#define pthread_spin_init(l, v) (*l = v)
#endif
-#if defined(HAVE_SPINLOCK)
-
-typedef union {
- pthread_spinlock_t spinlock;
- pthread_mutex_t mutex;
-} gf_lock_t;
-
-#if !defined(LOCKING_IMPL)
-extern int use_spinlocks;
-
-/*
- * Using a dispatch table would be unpleasant because we're dealing with two
- * different types. If the dispatch contains direct pointers to pthread_xx
- * or mutex_xxx then we have to hope that every possible union alternative
- * starts at the same address as the union itself. I'm old enough to remember
- * compilers where this was not the case (for alignment reasons) so I'm a bit
- * paranoid about that. Also, I don't like casting arguments through "void *"
- * which we'd also have to do to avoid type errors. The other alternative would
- * be to define actual functions which pick out the right union member, and put
- * those in the dispatch tables. Now we have a pointer dereference through the
- * dispatch table plus a function call, which is likely to be worse than the
- * branching here from the ?: construct. If it were a clear win it might be
- * worth the extra complexity, but for now this way seems preferable.
- */
-
-#define LOCK_INIT(x) \
- (use_spinlocks ? pthread_spin_init(&((x)->spinlock), 0) \
- : pthread_mutex_init(&((x)->mutex), 0))
-
-#define LOCK(x) \
- (use_spinlocks ? pthread_spin_lock(&((x)->spinlock)) \
- : pthread_mutex_lock(&((x)->mutex)))
-
-#define TRY_LOCK(x) \
- (use_spinlocks ? pthread_spin_trylock(&((x)->spinlock)) \
- : pthread_mutex_trylock(&((x)->mutex)))
-
-#define UNLOCK(x) \
- (use_spinlocks ? pthread_spin_unlock(&((x)->spinlock)) \
- : pthread_mutex_unlock(&((x)->mutex)))
-
-#define LOCK_DESTROY(x) \
- (use_spinlocks ? pthread_spin_destroy(&((x)->spinlock)) \
- : pthread_mutex_destroy(&((x)->mutex)))
-
-#endif
-
-#else
-
typedef pthread_mutex_t gf_lock_t;
#define LOCK_INIT(x) pthread_mutex_init(x, 0)
@@ -79,6 +30,4 @@ typedef pthread_mutex_t gf_lock_t;
#define UNLOCK(x) pthread_mutex_unlock(x)
#define LOCK_DESTROY(x) pthread_mutex_destroy(x)
-#endif /* HAVE_SPINLOCK */
-
#endif /* _LOCKING_H */
diff --git a/libglusterfs/src/locking.c b/libglusterfs/src/locking.c
deleted file mode 100644
index 7577054..0000000
--- a/libglusterfs/src/locking.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#if defined(HAVE_SPINLOCK)
-/* None of this matters otherwise. */
-
-#include <pthread.h>
-#include <unistd.h>
-
-#define LOCKING_IMPL
-#include "glusterfs/locking.h"
-
-int use_spinlocks = 0;
-
-static void __attribute__((constructor)) gf_lock_setup(void)
-{
- // use_spinlocks = (sysconf(_SC_NPROCESSORS_ONLN) > 1);
-}
-
-#endif
--
1.8.3.1

View File

@ -1,148 +0,0 @@
From 0e453ede1f248a004965d0d368e2c4beb83f2ce1 Mon Sep 17 00:00:00 2001
From: Vinayakswami Hariharmath <vharihar@redhat.com>
Date: Mon, 25 Jan 2021 17:32:14 +0530
Subject: [PATCH 573/584] features/shard: unlink fails due to nospace to mknod
marker file
When we hit the max capacity of the storage space, shard_unlink()
starts failing if there is no space left on the brick to create a
marker file.
shard_unlink() happens in below steps:
1. create a marker file in the name of gfid of the base file under
BRICK_PATH/.shard/.remove_me
2. unlink the base file
3. shard_delete_shards() deletes the shards in background by
picking the entries in BRICK_PATH/.shard/.remove_me
If a marker file creation fails then we can't really delete the
shards which eventually a problem for user who is looking to make
space by deleting unwanted data.
Solution:
Create the marker file by marking xdata = GLUSTERFS_INTERNAL_FOP_KEY
which is considered to be internal op and allowed to create under
reserved space.
Backport of:
> Upstream-patch: https://github.com/gluster/glusterfs/pull/2057
> Fixes: #2038
> Change-Id: I7facebab940f9aeee81d489df429e00ef4fb7c5d
> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
BUG: 1891403
Change-Id: I7facebab940f9aeee81d489df429e00ef4fb7c5d
Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244966
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/bugs/shard/issue-2038.t | 56 ++++++++++++++++++++++++++++++++++++++
xlators/features/shard/src/shard.c | 20 ++++++++++++++
2 files changed, 76 insertions(+)
create mode 100644 tests/bugs/shard/issue-2038.t
diff --git a/tests/bugs/shard/issue-2038.t b/tests/bugs/shard/issue-2038.t
new file mode 100644
index 0000000..fc3e7f9
--- /dev/null
+++ b/tests/bugs/shard/issue-2038.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup
+
+FILE_COUNT_TIME=5
+
+function get_file_count {
+ ls $1* | wc -l
+}
+
+TEST verify_lvm_version
+TEST glusterd
+TEST pidof glusterd
+TEST init_n_bricks 1
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+$CLI volume info
+
+TEST $CLI volume set $V0 features.shard on
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+#Setting the size in percentage
+TEST $CLI volume set $V0 storage.reserve 40
+
+#wait 5s to reset disk_space_full flag
+sleep 5
+
+TEST touch $M0/test
+TEST unlink $M0/test
+
+TEST dd if=/dev/zero of=$M0/a bs=80M count=1
+TEST dd if=/dev/zero of=$M0/b bs=10M count=1
+
+gfid_new=$(get_gfid_string $M0/a)
+
+# Wait 5s to update disk_space_full flag because thread check disk space
+# after every 5s
+
+sleep 5
+# setup_lvm create lvm partition of 150M and 40M are reserve so after
+# consuming more than 110M next unlink should not fail
+# Delete the base shard and check shards get cleaned up
+TEST unlink $M0/a
+TEST ! stat $M0/a
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index d1d7d7a..8d4a970 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -4078,6 +4078,16 @@ shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this,
SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc,
local->prebuf.ia_size, 0, err);
+ /* Mark this as an internal operation, so that in case of disk full,
+ * the marker file will be created as part of reserve space */
+ ret = dict_set_int32_sizen(xattr_req, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set key: %s on path %s", GLUSTERFS_INTERNAL_FOP_KEY,
+ local->newloc.path);
+ goto err;
+ }
+
STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
&local->newloc, 0, 0, 0644, xattr_req);
@@ -5843,6 +5853,16 @@ shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
SHARD_SET_ROOT_FS_ID(frame, local);
+ /* Mark this as an internal operation, so that in case of disk full
+ * the internal dir will be created as part of reserve space */
+ ret = dict_set_int32_sizen(xattr_req, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set key: %s on path %s", GLUSTERFS_INTERNAL_FOP_KEY,
+ loc->path);
+ goto err;
+ }
+
STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
0755, 0, xattr_req);
--
1.8.3.1

View File

@ -1,712 +0,0 @@
From cb0d240004e6d40f8d7f30d177d5970ebc8e25fb Mon Sep 17 00:00:00 2001
From: Vinayak hariharmath <65405035+VHariharmath-rh@users.noreply.github.com>
Date: Wed, 3 Feb 2021 17:04:25 +0530
Subject: [PATCH 574/584] features/shard: delay unlink of a file that has
fd_count > 0
When there are multiple processes working on a file and if any
process unlinks that file then unlink operation shouldn't harm
other processes working on it. This is a posix a compliant
behavior and this should be supported when shard feature is
enabled also.
Problem description:
Let's consider 2 clients C1 and C2 working on a file F1 with 5
shards on gluster mount and gluster server has 4 bricks
B1, B2, B3, B4.
Assume that base file/shard is present on B1, 1st, 2nd shards
on B2, 3rd and 4th shards on B3 and 5th shard falls on B4 C1
has opened the F1 in append mode and is writing to it. The
write FOP goes to 5th shard in this case. So the
inode->fd_count = 1 on B1(base file) and B4 (5th shard).
C2 at the same time issued unlink to F1. On the server, the
base file has fd_count = 1 (since C1 has opened the file),
the base file is renamed under .glusterfs/unlink and
returned to C2. Then unlink will be sent to shards on all
bricks and shards on B2 and B3 will be deleted which have
no open reference yet. C1 starts getting errors while
accessing the remaining shards though it has open references
for the file.
This is one such undefined behavior. Likewise we will
encounter many such undefined behaviors as we dont have one
global lock to access all shards as one. Of Course having such
global lock will lead to performance hit as it reduces window
for parallel access of shards.
Solution:
The above undefined behavior can be addressed by delaying the
unlink of a file when there are open references on it.
File unlink happens in 2 steps.
step 1: client creates marker file under .shard/remove_me and
sends unlink on base file to the server
step 2: on return from the server, the associated shards will
be cleaned up and finally marker file will be removed.
In step 2, the back ground deletion process does nameless
lookup using marker file name (marker file is named after the
gfid of the base file) in glusterfs/unlink dir. If the nameless
look up is successful then that means the gfid still has open
fds and deletion of shards has to be delayed. If nameless
lookup fails then that indicates the gfid is unlinked and no
open fds on that file (the gfid path is unlinked during final
close on the file). The shards on which deletion is delayed
are unlinked one the all open fds are closed and this is
done through a thread which wakes up every 10 mins.
Also removed active_fd_count from inode structure and
referring fd_count wherever active_fd_count was used.
Backport of:
> Upstream-patch: https://github.com/gluster/glusterfs/pull/1563
> Fixes: #1358
> Change-Id: I8985093386e26215e0b0dce294c534a66f6ca11c
> Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
BUG: 1782428
Change-Id: I8985093386e26215e0b0dce294c534a66f6ca11c
Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244967
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
libglusterfs/src/glusterfs/glusterfs.h | 1 +
tests/bugs/shard/issue-1358.t | 100 +++++++++++++
tests/bugs/shard/unlinks-and-renames.t | 5 +
xlators/features/shard/src/shard.c | 199 ++++++++++++++++++++++++-
xlators/features/shard/src/shard.h | 11 ++
xlators/storage/posix/src/posix-entry-ops.c | 36 +++++
xlators/storage/posix/src/posix-inode-fd-ops.c | 64 +++++---
7 files changed, 391 insertions(+), 25 deletions(-)
create mode 100644 tests/bugs/shard/issue-1358.t
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
index d3400bf..4401cf6 100644
--- a/libglusterfs/src/glusterfs/glusterfs.h
+++ b/libglusterfs/src/glusterfs/glusterfs.h
@@ -261,6 +261,7 @@ enum gf_internal_fop_indicator {
#define GF_XATTROP_PURGE_INDEX "glusterfs.xattrop-purge-index"
#define GF_GFIDLESS_LOOKUP "gfidless-lookup"
+#define GF_UNLINKED_LOOKUP "unlinked-lookup"
/* replace-brick and pump related internal xattrs */
#define RB_PUMP_CMD_START "glusterfs.pump.start"
#define RB_PUMP_CMD_PAUSE "glusterfs.pump.pause"
diff --git a/tests/bugs/shard/issue-1358.t b/tests/bugs/shard/issue-1358.t
new file mode 100644
index 0000000..1838e06
--- /dev/null
+++ b/tests/bugs/shard/issue-1358.t
@@ -0,0 +1,100 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+FILE_COUNT_TIME=5
+
+function get_file_count {
+ ls $1* | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST dd if=/dev/urandom of=$M0/dir/foo bs=4M count=5
+gfid_new=$(get_gfid_string $M0/dir/foo)
+
+# Ensure its shards dir is created now.
+TEST stat $B0/${V0}0/.shard/$gfid_new.1
+TEST stat $B0/${V0}1/.shard/$gfid_new.1
+TEST stat $B0/${V0}0/.shard/$gfid_new.2
+TEST stat $B0/${V0}1/.shard/$gfid_new.2
+
+# Open a file and store descriptor in fd = 5
+exec 5>$M0/dir/foo
+
+# Write something on the file using the open fd = 5
+echo "issue-1358" >&5
+
+# Write on the descriptor should be succesful
+EXPECT 0 echo $?
+
+# Unlink the same file which is opened in prev step
+TEST unlink $M0/dir/foo
+
+# Check the base file
+TEST ! stat $M0/dir/foo
+TEST ! stat $B0/${V0}0/foo
+TEST ! stat $B0/${V0}1/foo
+
+# Write something on the file using the open fd = 5
+echo "issue-1281" >&5
+
+# Write on the descriptor should be succesful
+EXPECT 0 echo $?
+
+# Check ".shard/.remove_me"
+EXPECT_WITHIN $FILE_COUNT_TIME 1 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_new
+EXPECT_WITHIN $FILE_COUNT_TIME 1 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_new
+
+# Close the fd = 5
+exec 5>&-
+
+###### To see the shards deleted, wait for 10 mins or repeat the same steps i.e open a file #####
+###### write something to it, unlink it and close it. This will wake up the thread that is ######
+###### responsible to delete the shards
+
+TEST touch $M0/dir/new
+exec 6>$M0/dir/new
+echo "issue-1358" >&6
+EXPECT 0 echo $?
+TEST unlink $M0/dir/new
+exec 6>&-
+
+# Now check the ".shard/remove_me" and the gfid will not be there
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_new
+
+# check for the absence of shards
+TEST ! stat $B0/${V0}0/.shard/$gfid_new.1
+TEST ! stat $B0/${V0}1/.shard/$gfid_new.1
+TEST ! stat $B0/${V0}0/.shard/$gfid_new.2
+TEST ! stat $B0/${V0}1/.shard/$gfid_new.2
+
+#### Create the file with same name and check creation and deletion works fine ######
+TEST dd if=/dev/urandom of=$M0/dir/foo bs=4M count=5
+gfid_new=$(get_gfid_string $M0/dir/foo)
+
+# Ensure its shards dir is created now.
+TEST stat $B0/${V0}0/.shard/$gfid_new.1
+TEST stat $B0/${V0}1/.shard/$gfid_new.1
+TEST stat $B0/${V0}0/.shard/$gfid_new.2
+TEST stat $B0/${V0}1/.shard/$gfid_new.2
+
+TEST unlink $M0/dir/foo
+cleanup
+
diff --git a/tests/bugs/shard/unlinks-and-renames.t b/tests/bugs/shard/unlinks-and-renames.t
index 990ca69..3280fcb 100644
--- a/tests/bugs/shard/unlinks-and-renames.t
+++ b/tests/bugs/shard/unlinks-and-renames.t
@@ -24,6 +24,11 @@ TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
TEST $CLI volume set $V0 features.shard on
TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.write-behind off
+
TEST $CLI volume start $V0
TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 8d4a970..b828ff9 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -1242,7 +1242,8 @@ out:
static inode_t *
shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode,
- struct iatt *buf, shard_internal_dir_type_t type)
+ xlator_t *this, struct iatt *buf,
+ shard_internal_dir_type_t type)
{
inode_t *linked_inode = NULL;
shard_priv_t *priv = NULL;
@@ -1250,7 +1251,7 @@ shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode,
inode_t **priv_inode = NULL;
inode_t *parent = NULL;
- priv = THIS->private;
+ priv = this->private;
switch (type) {
case SHARD_INTERNAL_DIR_DOT_SHARD:
@@ -1294,7 +1295,7 @@ shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie,
/* To-Do: Fix refcount increment per call to
* shard_link_internal_dir_inode().
*/
- linked_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+ linked_inode = shard_link_internal_dir_inode(local, inode, this, buf, type);
shard_inode_ctx_mark_dir_refreshed(linked_inode, this);
out:
shard_common_resolve_shards(frame, this, local->post_res_handler);
@@ -1383,7 +1384,7 @@ shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
}
- link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+ link_inode = shard_link_internal_dir_inode(local, inode, this, buf, type);
if (link_inode != inode) {
shard_refresh_internal_dir(frame, this, type);
} else {
@@ -3586,7 +3587,8 @@ shard_resolve_internal_dir(xlator_t *this, shard_local_t *local,
"Lookup on %s failed, exiting", bname);
goto err;
} else {
- shard_link_internal_dir_inode(local, loc->inode, &stbuf, type);
+ shard_link_internal_dir_inode(local, loc->inode, this, &stbuf,
+ type);
}
}
ret = 0;
@@ -3633,6 +3635,45 @@ err:
return ret;
}
+static int
+shard_nameless_lookup_base_file(xlator_t *this, char *gfid)
+{
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ goto out;
+ }
+
+ loc.inode = inode_new(this->itable);
+ if (loc.inode == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_uuid_parse(gfid, loc.gfid);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_set_uint32(xattr_req, GF_UNLINKED_LOOKUP, 1);
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, xattr_req, NULL);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ loc_wipe(&loc);
+
+ return ret;
+}
+
int
shard_delete_shards(void *opaque)
{
@@ -3734,6 +3775,11 @@ shard_delete_shards(void *opaque)
if (ret < 0)
continue;
}
+
+ ret = shard_nameless_lookup_base_file(this, entry->d_name);
+ if (!ret)
+ continue;
+
link_inode = inode_link(entry->inode, local->fd->inode,
entry->d_name, &entry->d_stat);
@@ -4105,6 +4151,9 @@ err:
int
shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
+static int
+shard_unlink_handler_spawn(xlator_t *this);
+
int
shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
@@ -4126,7 +4175,7 @@ shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (xdata)
local->xattr_rsp = dict_ref(xdata);
if (local->cleanup_required)
- shard_start_background_deletion(this);
+ shard_unlink_handler_spawn(this);
}
if (local->entrylk_frame) {
@@ -5785,7 +5834,7 @@ shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
}
- link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+ link_inode = shard_link_internal_dir_inode(local, inode, this, buf, type);
if (link_inode != inode) {
shard_refresh_internal_dir(frame, this, type);
} else {
@@ -7098,6 +7147,132 @@ shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
return 0;
}
+static void
+shard_unlink_wait(shard_unlink_thread_t *ti)
+{
+ struct timespec wait_till = {
+ 0,
+ };
+
+ pthread_mutex_lock(&ti->mutex);
+ {
+ /* shard_unlink_handler() runs every 10 mins of interval */
+ wait_till.tv_sec = time(NULL) + 600;
+
+ while (!ti->rerun) {
+ if (pthread_cond_timedwait(&ti->cond, &ti->mutex, &wait_till) ==
+ ETIMEDOUT)
+ break;
+ }
+ ti->rerun = _gf_false;
+ }
+ pthread_mutex_unlock(&ti->mutex);
+}
+
+static void *
+shard_unlink_handler(void *data)
+{
+ shard_unlink_thread_t *ti = data;
+ xlator_t *this = ti->this;
+
+ THIS = this;
+
+ while (!ti->stop) {
+ shard_start_background_deletion(this);
+ shard_unlink_wait(ti);
+ }
+ return NULL;
+}
+
+static int
+shard_unlink_handler_spawn(xlator_t *this)
+{
+ int ret = 0;
+ shard_priv_t *priv = this->private;
+ shard_unlink_thread_t *ti = &priv->thread_info;
+
+ ti->this = this;
+
+ pthread_mutex_lock(&ti->mutex);
+ {
+ if (ti->running) {
+ pthread_cond_signal(&ti->cond);
+ } else {
+ ret = gf_thread_create(&ti->thread, NULL, shard_unlink_handler, ti,
+ "shard_unlink");
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to create \"shard_unlink\" thread");
+ goto unlock;
+ }
+ ti->running = _gf_true;
+ }
+
+ ti->rerun = _gf_true;
+ }
+unlock:
+ pthread_mutex_unlock(&ti->mutex);
+ return ret;
+}
+
+static int
+shard_unlink_handler_init(shard_unlink_thread_t *ti)
+{
+ int ret = 0;
+ xlator_t *this = THIS;
+
+ ret = pthread_mutex_init(&ti->mutex, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to init mutex for \"shard_unlink\" thread");
+ goto out;
+ }
+
+ ret = pthread_cond_init(&ti->cond, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to init cond var for \"shard_unlink\" thread");
+ pthread_mutex_destroy(&ti->mutex);
+ goto out;
+ }
+
+ ti->running = _gf_false;
+ ti->rerun = _gf_false;
+ ti->stop = _gf_false;
+
+out:
+ return -ret;
+}
+
+static void
+shard_unlink_handler_fini(shard_unlink_thread_t *ti)
+{
+ int ret = 0;
+ xlator_t *this = THIS;
+ if (!ti)
+ return;
+
+ pthread_mutex_lock(&ti->mutex);
+ if (ti->running) {
+ ti->rerun = _gf_true;
+ ti->stop = _gf_true;
+ pthread_cond_signal(&ti->cond);
+ }
+ pthread_mutex_unlock(&ti->mutex);
+
+ if (ti->running) {
+ ret = pthread_join(ti->thread, NULL);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+ "Failed to clean up shard unlink thread.");
+ ti->running = _gf_false;
+ }
+ ti->thread = 0;
+
+ pthread_cond_destroy(&ti->cond);
+ pthread_mutex_destroy(&ti->mutex);
+}
+
int32_t
mem_acct_init(xlator_t *this)
{
@@ -7164,6 +7339,14 @@ init(xlator_t *this)
this->private = priv;
LOCK_INIT(&priv->lock);
INIT_LIST_HEAD(&priv->ilist_head);
+
+ ret = shard_unlink_handler_init(&priv->thread_info);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to initialize resources for \"shard_unlink\" thread");
+ goto out;
+ }
+
ret = 0;
out:
if (ret) {
@@ -7188,6 +7371,8 @@ fini(xlator_t *this)
if (!priv)
goto out;
+ shard_unlink_handler_fini(&priv->thread_info);
+
this->private = NULL;
LOCK_DESTROY(&priv->lock);
GF_FREE(priv);
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index 4fe181b..3dcb112 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -207,6 +207,16 @@ typedef enum {
/* rm = "remove me" */
+typedef struct shard_unlink_thread {
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_t thread;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
+ gf_boolean_t stop;
+ xlator_t *this;
+} shard_unlink_thread_t;
+
typedef struct shard_priv {
uint64_t block_size;
uuid_t dot_shard_gfid;
@@ -220,6 +230,7 @@ typedef struct shard_priv {
shard_bg_deletion_state_t bg_del_state;
gf_boolean_t first_lookup_done;
uint64_t lru_limit;
+ shard_unlink_thread_t thread_info;
} shard_priv_t;
typedef struct {
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
index b3a5381..1511e68 100644
--- a/xlators/storage/posix/src/posix-entry-ops.c
+++ b/xlators/storage/posix/src/posix-entry-ops.c
@@ -183,6 +183,11 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
struct posix_private *priv = NULL;
posix_inode_ctx_t *ctx = NULL;
int ret = 0;
+ uint32_t lookup_unlink_dir = 0;
+ char *unlink_path = NULL;
+ struct stat lstatbuf = {
+ 0,
+ };
VALIDATE_OR_GOTO(frame, out);
VALIDATE_OR_GOTO(this, out);
@@ -208,7 +213,36 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
op_ret = -1;
if (gf_uuid_is_null(loc->pargfid) || (loc->name == NULL)) {
/* nameless lookup */
+ op_ret = op_errno = errno = 0;
MAKE_INODE_HANDLE(real_path, this, loc, &buf);
+
+ /* The gfid will be renamed to ".glusterfs/unlink" in case
+ * there are any open fds on the file in posix_unlink path.
+ * So client can request server to do nameless lookup with
+ * xdata = GF_UNLINKED_LOOKUP in ".glusterfs/unlink"
+ * dir if a client wants to know the status of the all open fds
+ * on the unlinked file. If the file still present in the
+ * ".glusterfs/unlink" dir then it indicates there still
+ * open fds present on the file and the file is still under
+ * unlink process */
+ if (op_ret < 0 && errno == ENOENT) {
+ ret = dict_get_uint32(xdata, GF_UNLINKED_LOOKUP,
+ &lookup_unlink_dir);
+ if (!ret && lookup_unlink_dir) {
+ op_ret = op_errno = errno = 0;
+ POSIX_GET_FILE_UNLINK_PATH(priv->base_path, loc->gfid,
+ unlink_path);
+ ret = sys_lstat(unlink_path, &lstatbuf);
+ if (ret) {
+ op_ret = -1;
+ op_errno = errno;
+ } else {
+ iatt_from_stat(&buf, &lstatbuf);
+ buf.ia_nlink = 0;
+ }
+ goto nameless_lookup_unlink_dir_out;
+ }
+ }
} else {
MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &buf);
if (!real_path || !par_path) {
@@ -328,6 +362,8 @@ out:
if (op_ret == 0)
op_errno = 0;
+
+nameless_lookup_unlink_dir_out:
STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno,
(loc) ? loc->inode : NULL, &buf, xattr, &postparent);
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
index 761e018..4c2983a 100644
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
@@ -2504,6 +2504,39 @@ out:
return 0;
}
+static int
+posix_unlink_renamed_file(xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+ char *unlink_path = NULL;
+ uint64_t ctx_uint = 0;
+ posix_inode_ctx_t *ctx = NULL;
+ struct posix_private *priv = this->private;
+
+ ret = inode_ctx_get(inode, this, &ctx_uint);
+
+ if (ret < 0)
+ goto out;
+
+ ctx = (posix_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+ if (ctx->unlink_flag == GF_UNLINK_TRUE) {
+ POSIX_GET_FILE_UNLINK_PATH(priv->base_path, inode->gfid, unlink_path);
+ if (!unlink_path) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED,
+ "Failed to remove gfid :%s", uuid_utoa(inode->gfid));
+ ret = -1;
+ } else {
+ ret = sys_unlink(unlink_path);
+ if (!ret)
+ ctx->unlink_flag = GF_UNLINK_FALSE;
+ }
+ }
+
+out:
+ return ret;
+}
+
int32_t
posix_release(xlator_t *this, fd_t *fd)
{
@@ -2514,6 +2547,9 @@ posix_release(xlator_t *this, fd_t *fd)
VALIDATE_OR_GOTO(this, out);
VALIDATE_OR_GOTO(fd, out);
+ if (fd->inode->active_fd_count == 0)
+ posix_unlink_renamed_file(this, fd->inode);
+
ret = fd_ctx_del(fd, this, &tmp_pfd);
if (ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
@@ -5881,41 +5917,33 @@ posix_forget(xlator_t *this, inode_t *inode)
uint64_t ctx_uint1 = 0;
uint64_t ctx_uint2 = 0;
posix_inode_ctx_t *ctx = NULL;
- posix_mdata_t *mdata = NULL;
- struct posix_private *priv_posix = NULL;
-
- priv_posix = (struct posix_private *)this->private;
- if (!priv_posix)
- return 0;
+ struct posix_private *priv = this->private;
ret = inode_ctx_del2(inode, this, &ctx_uint1, &ctx_uint2);
+
+ if (ctx_uint2)
+ GF_FREE((posix_mdata_t *)(uintptr_t)ctx_uint2);
+
if (!ctx_uint1)
- goto check_ctx2;
+ return 0;
ctx = (posix_inode_ctx_t *)(uintptr_t)ctx_uint1;
if (ctx->unlink_flag == GF_UNLINK_TRUE) {
- POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, inode->gfid,
- unlink_path);
+ POSIX_GET_FILE_UNLINK_PATH(priv->base_path, inode->gfid, unlink_path);
if (!unlink_path) {
gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED,
"Failed to remove gfid :%s", uuid_utoa(inode->gfid));
ret = -1;
- goto ctx_free;
+ } else {
+ ret = sys_unlink(unlink_path);
}
- ret = sys_unlink(unlink_path);
}
-ctx_free:
+
pthread_mutex_destroy(&ctx->xattrop_lock);
pthread_mutex_destroy(&ctx->write_atomic_lock);
pthread_mutex_destroy(&ctx->pgfid_lock);
GF_FREE(ctx);
-check_ctx2:
- if (ctx_uint2) {
- mdata = (posix_mdata_t *)(uintptr_t)ctx_uint2;
- }
-
- GF_FREE(mdata);
return ret;
}
--
1.8.3.1

View File

@ -1,160 +0,0 @@
From 59e69ae1c7ccda74a8cbf8c9b2ae37bc74cbf612 Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Fri, 4 Jun 2021 10:55:37 +0530
Subject: [PATCH 575/584] libglusterfs: add functions to calculate time
difference
Add gf_tvdiff() and gf_tsdiff() to calculate the difference
between 'struct timeval' and 'struct timespec' values, use
them where appropriate.
Upstream patch details:
> https://github.com/gluster/glusterfs/commit/ba7f24b1cedf2549394c21b3f0df1661227cefae
> Change-Id: I172be06ee84e99a1da76847c15e5ea3fbc059338
> Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
> Updates: #1002
BUG: 1928676
Change-Id: I723ab9555b0f8caef108742acc2cb63d6a32eb96
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245294
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
glusterfsd/src/glusterfsd-mgmt.c | 4 ++--
libglusterfs/src/glusterfs/common-utils.h | 32 +++++++++++++++++++++++++++++++
libglusterfs/src/latency.c | 3 +--
xlators/cluster/dht/src/dht-rebalance.c | 6 ++----
xlators/debug/io-stats/src/io-stats.c | 8 ++------
5 files changed, 39 insertions(+), 14 deletions(-)
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 61d1b21..a51dd9e 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -534,7 +534,7 @@ glusterfs_volume_top_write_perf(uint32_t blk_size, uint32_t blk_count,
}
gettimeofday(&end, NULL);
- *time = (end.tv_sec - begin.tv_sec) * 1e6 + (end.tv_usec - begin.tv_usec);
+ *time = gf_tvdiff(&begin, &end);
*throughput = total_blks / *time;
gf_log("glusterd", GF_LOG_INFO,
"Throughput %.2f Mbps time %.2f secs "
@@ -653,7 +653,7 @@ glusterfs_volume_top_read_perf(uint32_t blk_size, uint32_t blk_count,
}
gettimeofday(&end, NULL);
- *time = (end.tv_sec - begin.tv_sec) * 1e6 + (end.tv_usec - begin.tv_usec);
+ *time = gf_tvdiff(&begin, &end);
*throughput = total_blks / *time;
gf_log("glusterd", GF_LOG_INFO,
"Throughput %.2f Mbps time %.2f secs "
diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
index 604afd0..bd48b6f 100644
--- a/libglusterfs/src/glusterfs/common-utils.h
+++ b/libglusterfs/src/glusterfs/common-utils.h
@@ -1090,4 +1090,36 @@ find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args);
int
gf_d_type_from_ia_type(ia_type_t type);
+/* Return delta value in microseconds. */
+
+static inline double
+gf_tvdiff(struct timeval *start, struct timeval *end)
+{
+ struct timeval t;
+
+ if (start->tv_usec > end->tv_usec)
+ t.tv_sec = end->tv_sec - 1, t.tv_usec = end->tv_usec + 1000000;
+ else
+ t.tv_sec = end->tv_sec, t.tv_usec = end->tv_usec;
+
+ return (double)(t.tv_sec - start->tv_sec) * 1e6 +
+ (double)(t.tv_usec - start->tv_usec);
+}
+
+/* Return delta value in nanoseconds. */
+
+static inline double
+gf_tsdiff(struct timespec *start, struct timespec *end)
+{
+ struct timespec t;
+
+ if (start->tv_nsec > end->tv_nsec)
+ t.tv_sec = end->tv_sec - 1, t.tv_nsec = end->tv_nsec + 1000000000;
+ else
+ t.tv_sec = end->tv_sec, t.tv_nsec = end->tv_nsec;
+
+ return (double)(t.tv_sec - start->tv_sec) * 1e9 +
+ (double)(t.tv_nsec - start->tv_nsec);
+}
+
#endif /* _COMMON_UTILS_H */
diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c
index e1e6de7..ce61399 100644
--- a/libglusterfs/src/latency.c
+++ b/libglusterfs/src/latency.c
@@ -33,8 +33,7 @@ gf_update_latency(call_frame_t *frame)
if (!(begin->tv_sec && end->tv_sec))
goto out;
- elapsed = (end->tv_sec - begin->tv_sec) * 1e9 +
- (end->tv_nsec - begin->tv_nsec);
+ elapsed = gf_tsdiff(begin, end);
if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index eab7558..e07dec0 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2927,8 +2927,7 @@ gf_defrag_migrate_single_file(void *opaque)
if (defrag->stats == _gf_true) {
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
- (end.tv_usec - start.tv_usec);
+ elapsed = gf_tvdiff(&start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration of "
"file:%s size:%" PRIu64
@@ -3529,8 +3528,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
- (end.tv_usec - dir_start.tv_usec);
+ elapsed = gf_tvdiff(&dir_start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration operation on dir %s took "
"%.2f secs",
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index 9b34895..8ad96fb 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -281,9 +281,7 @@ is_fop_latency_started(call_frame_t *frame)
begin = &frame->begin; \
end = &frame->end; \
\
- elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 + \
- (end->tv_nsec - begin->tv_nsec)) / \
- 1000; \
+ elapsed = gf_tsdiff(begin, end) / 1000.0; \
throughput = op_ret / elapsed; \
\
conf = this->private; \
@@ -1774,9 +1772,7 @@ update_ios_latency(struct ios_conf *conf, call_frame_t *frame,
begin = &frame->begin;
end = &frame->end;
- elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 +
- (end->tv_nsec - begin->tv_nsec)) /
- 1000;
+ elapsed = gf_tsdiff(begin, end) / 1000.0;
update_ios_latency_stats(&conf->cumulative, elapsed, op);
update_ios_latency_stats(&conf->incremental, elapsed, op);
--
1.8.3.1

View File

@ -1,573 +0,0 @@
From f2b9d3a089cc9ff9910da0075defe306851aca5c Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Fri, 4 Jun 2021 12:27:57 +0530
Subject: [PATCH 576/584] rpcsvc: Add latency tracking for rpc programs
Added latency tracking of rpc-handling code. With this change we
should be able to monitor the amount of time rpc-handling code is
consuming for each of the rpc call.
Upstream patch details:
> https://review.gluster.org/#/c/glusterfs/+/24955/
> fixes: #1466
> Change-Id: I04fc7f3b12bfa5053c0fc36885f271cb78f581cd
> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
BUG: 1928676
Change-Id: Ibcedddb5db3ff4906607050cf9f7ea3ebb266cc5
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245295
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com>
Reviewed-by: Ashish Pandey <aspandey@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
libglusterfs/src/glusterfs/latency.h | 22 +++++---
libglusterfs/src/glusterfs/mem-types.h | 1 +
libglusterfs/src/glusterfs/stack.h | 7 +--
libglusterfs/src/glusterfs/statedump.h | 2 +
libglusterfs/src/glusterfs/xlator.h | 2 +-
libglusterfs/src/latency.c | 93 +++++++++++++++-------------------
libglusterfs/src/libglusterfs.sym | 5 ++
libglusterfs/src/monitoring.c | 8 +--
libglusterfs/src/statedump.c | 38 +++++++++++++-
libglusterfs/src/xlator.c | 5 ++
rpc/rpc-lib/src/libgfrpc.sym | 1 +
rpc/rpc-lib/src/rpcsvc.c | 72 +++++++++++++++++++++++++-
rpc/rpc-lib/src/rpcsvc.h | 5 ++
xlators/protocol/server/src/server.c | 2 +
14 files changed, 193 insertions(+), 70 deletions(-)
diff --git a/libglusterfs/src/glusterfs/latency.h b/libglusterfs/src/glusterfs/latency.h
index ed47b1f..4d601bb 100644
--- a/libglusterfs/src/glusterfs/latency.h
+++ b/libglusterfs/src/glusterfs/latency.h
@@ -11,13 +11,23 @@
#ifndef __LATENCY_H__
#define __LATENCY_H__
-#include "glusterfs/glusterfs.h"
+#include <inttypes.h>
+#include <time.h>
-typedef struct fop_latency {
- double min; /* min time for the call (microseconds) */
- double max; /* max time for the call (microseconds) */
- double total; /* total time (microseconds) */
+typedef struct _gf_latency {
+ uint64_t min; /* min time for the call (nanoseconds) */
+ uint64_t max; /* max time for the call (nanoseconds) */
+ uint64_t total; /* total time (nanoseconds) */
uint64_t count;
-} fop_latency_t;
+} gf_latency_t;
+gf_latency_t *
+gf_latency_new(size_t n);
+
+void
+gf_latency_reset(gf_latency_t *lat);
+
+void
+gf_latency_update(gf_latency_t *lat, struct timespec *begin,
+ struct timespec *end);
#endif /* __LATENCY_H__ */
diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h
index 92730a9..970b9ff 100644
--- a/libglusterfs/src/glusterfs/mem-types.h
+++ b/libglusterfs/src/glusterfs/mem-types.h
@@ -139,6 +139,7 @@ enum gf_common_mem_types_ {
gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */
gf_common_mt_server_cmdline_t, /* used only in one location */
gf_mt_gfdb_query_record_t,
+ gf_common_mt_latency_t,
gf_common_mt_end
};
#endif
diff --git a/libglusterfs/src/glusterfs/stack.h b/libglusterfs/src/glusterfs/stack.h
index bd466d8..536a330 100644
--- a/libglusterfs/src/glusterfs/stack.h
+++ b/libglusterfs/src/glusterfs/stack.h
@@ -45,6 +45,9 @@ typedef int32_t (*ret_fn_t)(call_frame_t *frame, call_frame_t *prev_frame,
xlator_t *this, int32_t op_ret, int32_t op_errno,
...);
+void
+gf_frame_latency_update(call_frame_t *frame);
+
struct call_pool {
union {
struct list_head all_frames;
@@ -149,8 +152,6 @@ struct _call_stack {
} while (0);
struct xlator_fops;
-void
-gf_update_latency(call_frame_t *frame);
static inline void
FRAME_DESTROY(call_frame_t *frame)
@@ -158,7 +159,7 @@ FRAME_DESTROY(call_frame_t *frame)
void *local = NULL;
if (frame->root->ctx->measure_latency)
- gf_update_latency(frame);
+ gf_frame_latency_update(frame);
list_del_init(&frame->frames);
if (frame->local) {
diff --git a/libglusterfs/src/glusterfs/statedump.h b/libglusterfs/src/glusterfs/statedump.h
index 89d04f9..ce08270 100644
--- a/libglusterfs/src/glusterfs/statedump.h
+++ b/libglusterfs/src/glusterfs/statedump.h
@@ -127,4 +127,6 @@ gf_proc_dump_xlator_meminfo(xlator_t *this, strfd_t *strfd);
void
gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd);
+void
+gf_latency_statedump_and_reset(char *key, gf_latency_t *lat);
#endif /* STATEDUMP_H */
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
index 273039a..ecb9fa4 100644
--- a/libglusterfs/src/glusterfs/xlator.h
+++ b/libglusterfs/src/glusterfs/xlator.h
@@ -808,7 +808,7 @@ struct _xlator {
struct {
/* for latency measurement */
- fop_latency_t latencies[GF_FOP_MAXVALUE];
+ gf_latency_t latencies[GF_FOP_MAXVALUE];
/* for latency measurement */
fop_metrics_t metrics[GF_FOP_MAXVALUE];
diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c
index ce61399..ce4b0e8 100644
--- a/libglusterfs/src/latency.c
+++ b/libglusterfs/src/latency.c
@@ -14,39 +14,34 @@
*/
#include "glusterfs/glusterfs.h"
-#include "glusterfs/xlator.h"
-#include "glusterfs/common-utils.h"
#include "glusterfs/statedump.h"
-#include "glusterfs/libglusterfs-messages.h"
-void
-gf_update_latency(call_frame_t *frame)
+gf_latency_t *
+gf_latency_new(size_t n)
{
- double elapsed;
- struct timespec *begin, *end;
-
- fop_latency_t *lat;
-
- begin = &frame->begin;
- end = &frame->end;
+ int i = 0;
+ gf_latency_t *lat = NULL;
- if (!(begin->tv_sec && end->tv_sec))
- goto out;
+ lat = GF_MALLOC(n * sizeof(*lat), gf_common_mt_latency_t);
+ if (!lat)
+ return NULL;
- elapsed = gf_tsdiff(begin, end);
+ for (i = 0; i < n; i++) {
+ gf_latency_reset(lat + i);
+ }
+ return lat;
+}
- if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
- gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
- frame->op);
+void
+gf_latency_update(gf_latency_t *lat, struct timespec *begin,
+ struct timespec *end)
+{
+ if (!(begin->tv_sec && end->tv_sec)) {
+ /*Measure latency might have been enabled/disabled during the op*/
return;
}
- /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
- set it right anyways for those frames */
- if (!frame->op)
- frame->op = frame->root->op;
-
- lat = &frame->this->stats.interval.latencies[frame->op];
+ double elapsed = gf_tsdiff(begin, end);
if (lat->max < elapsed)
lat->max = elapsed;
@@ -56,40 +51,34 @@ gf_update_latency(call_frame_t *frame)
lat->total += elapsed;
lat->count++;
-out:
- return;
}
void
-gf_proc_dump_latency_info(xlator_t *xl)
+gf_latency_reset(gf_latency_t *lat)
{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i;
-
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
- gf_proc_dump_add_section("%s", key_prefix);
-
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
-
- fop_latency_t *lat = &xl->stats.interval.latencies[i];
+ if (!lat)
+ return;
+ memset(lat, 0, sizeof(*lat));
+ lat->min = ULLONG_MAX;
+ /* make sure 'min' is set to high value, so it would be
+ properly set later */
+}
- /* Doesn't make sense to continue if there are no fops
- came in the given interval */
- if (!lat->count)
- continue;
+void
+gf_frame_latency_update(call_frame_t *frame)
+{
+ gf_latency_t *lat;
+ /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
+ set it right anyways for those frames */
+ if (!frame->op)
+ frame->op = frame->root->op;
- gf_proc_dump_write(key, "%.03f,%" PRId64 ",%.03f",
- (lat->total / lat->count), lat->count, lat->total);
+ if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
+ gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
+ frame->op);
+ return;
}
- memset(xl->stats.interval.latencies, 0,
- sizeof(xl->stats.interval.latencies));
-
- /* make sure 'min' is set to high value, so it would be
- properly set later */
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- xl->stats.interval.latencies[i].min = 0xffffffff;
- }
+ lat = &frame->this->stats.interval.latencies[frame->op];
+ gf_latency_update(lat, &frame->begin, &frame->end);
}
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
index 9072afa..4f968e1 100644
--- a/libglusterfs/src/libglusterfs.sym
+++ b/libglusterfs/src/libglusterfs.sym
@@ -1183,3 +1183,8 @@ gf_latency_reset
gf_latency_update
gf_frame_latency_update
gf_assert
+gf_latency_statedump_and_reset
+gf_latency_new
+gf_latency_reset
+gf_latency_update
+gf_frame_latency_update
diff --git a/libglusterfs/src/monitoring.c b/libglusterfs/src/monitoring.c
index 6d9bfb1..20b7f52 100644
--- a/libglusterfs/src/monitoring.c
+++ b/libglusterfs/src/monitoring.c
@@ -113,15 +113,15 @@ dump_latency_and_count(xlator_t *xl, int fd)
dprintf(fd, "%s.interval.%s.fail_count %" PRIu64 "\n", xl->name,
gf_fop_list[index], cbk);
}
- if (xl->stats.interval.latencies[index].count != 0.0) {
+ if (xl->stats.interval.latencies[index].count != 0) {
dprintf(fd, "%s.interval.%s.latency %lf\n", xl->name,
gf_fop_list[index],
- (xl->stats.interval.latencies[index].total /
+ (((double)xl->stats.interval.latencies[index].total) /
xl->stats.interval.latencies[index].count));
- dprintf(fd, "%s.interval.%s.max %lf\n", xl->name,
+ dprintf(fd, "%s.interval.%s.max %" PRIu64 "\n", xl->name,
gf_fop_list[index],
xl->stats.interval.latencies[index].max);
- dprintf(fd, "%s.interval.%s.min %lf\n", xl->name,
+ dprintf(fd, "%s.interval.%s.min %" PRIu64 "\n", xl->name,
gf_fop_list[index],
xl->stats.interval.latencies[index].min);
}
diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
index d18b50f..4bf4cc2 100644
--- a/libglusterfs/src/statedump.c
+++ b/libglusterfs/src/statedump.c
@@ -201,6 +201,40 @@ gf_proc_dump_write(char *key, char *value, ...)
return ret;
}
+void
+gf_latency_statedump_and_reset(char *key, gf_latency_t *lat)
+{
+ /* Doesn't make sense to continue if there are no fops
+ came in the given interval */
+ if (!lat || !lat->count)
+ return;
+ gf_proc_dump_write(key,
+ "AVG:%lf CNT:%" PRIu64 " TOTAL:%" PRIu64 " MIN:%" PRIu64
+ " MAX:%" PRIu64,
+ (((double)lat->total) / lat->count), lat->count,
+ lat->total, lat->min, lat->max);
+ gf_latency_reset(lat);
+}
+
+void
+gf_proc_dump_xl_latency_info(xlator_t *xl)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
+
+ gf_latency_t *lat = &xl->stats.interval.latencies[i];
+
+ gf_latency_statedump_and_reset(key, lat);
+ }
+}
+
static void
gf_proc_dump_xlator_mem_info(xlator_t *xl)
{
@@ -487,7 +521,7 @@ gf_proc_dump_single_xlator_info(xlator_t *trav)
return;
if (ctx->measure_latency)
- gf_proc_dump_latency_info(trav);
+ gf_proc_dump_xl_latency_info(trav);
gf_proc_dump_xlator_mem_info(trav);
@@ -1024,7 +1058,7 @@ gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd)
{
gf_dump_strfd = strfd;
- gf_proc_dump_latency_info(this);
+ gf_proc_dump_xl_latency_info(this);
gf_dump_strfd = NULL;
}
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index 36cc32c..b9ad411 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -246,6 +246,7 @@ xlator_dynload_apis(xlator_t *xl)
void *handle = NULL;
volume_opt_list_t *vol_opt = NULL;
xlator_api_t *xlapi = NULL;
+ int i = 0;
handle = xl->dlhandle;
@@ -343,6 +344,10 @@ xlator_dynload_apis(xlator_t *xl)
memcpy(xl->op_version, xlapi->op_version,
sizeof(uint32_t) * GF_MAX_RELEASES);
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ gf_latency_reset(&xl->stats.interval.latencies[i]);
+ }
+
ret = 0;
out:
return ret;
diff --git a/rpc/rpc-lib/src/libgfrpc.sym b/rpc/rpc-lib/src/libgfrpc.sym
index f3544e3..a1757cc 100644
--- a/rpc/rpc-lib/src/libgfrpc.sym
+++ b/rpc/rpc-lib/src/libgfrpc.sym
@@ -66,3 +66,4 @@ rpc_transport_unix_options_build
rpc_transport_unref
rpc_clnt_mgmt_pmap_signout
rpcsvc_autoscale_threads
+rpcsvc_statedump
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
index b031d93..855b512 100644
--- a/rpc/rpc-lib/src/rpcsvc.c
+++ b/rpc/rpc-lib/src/rpcsvc.c
@@ -25,6 +25,7 @@
#include <glusterfs/syncop.h>
#include "rpc-drc.h"
#include "protocol-common.h"
+#include <glusterfs/statedump.h>
#include <errno.h>
#include <pthread.h>
@@ -377,6 +378,10 @@ rpcsvc_program_actor(rpcsvc_request_t *req)
goto err;
}
+ if (svc->xl->ctx->measure_latency) {
+ timespec_now(&req->begin);
+ }
+
req->ownthread = program->ownthread;
req->synctask = program->synctask;
@@ -1526,10 +1531,18 @@ rpcsvc_submit_generic(rpcsvc_request_t *req, struct iovec *proghdr,
size_t hdrlen = 0;
char new_iobref = 0;
rpcsvc_drc_globals_t *drc = NULL;
+ gf_latency_t *lat = NULL;
if ((!req) || (!req->trans))
return -1;
+ if (req->prog && req->begin.tv_sec) {
+ if ((req->procnum >= 0) && (req->procnum < req->prog->numactors)) {
+ timespec_now(&req->end);
+ lat = &req->prog->latencies[req->procnum];
+ gf_latency_update(lat, &req->begin, &req->end);
+ }
+ }
trans = req->trans;
for (i = 0; i < hdrcount; i++) {
@@ -1860,6 +1873,15 @@ rpcsvc_submit_message(rpcsvc_request_t *req, struct iovec *proghdr,
iobref);
}
+void
+rpcsvc_program_destroy(rpcsvc_program_t *program)
+{
+ if (program) {
+ GF_FREE(program->latencies);
+ GF_FREE(program);
+ }
+}
+
int
rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
{
@@ -1917,8 +1939,7 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
ret = 0;
out:
- if (prog)
- GF_FREE(prog);
+ rpcsvc_program_destroy(prog);
if (ret == -1) {
if (program) {
@@ -2303,6 +2324,11 @@ rpcsvc_program_register(rpcsvc_t *svc, rpcsvc_program_t *program,
}
memcpy(newprog, program, sizeof(*program));
+ newprog->latencies = gf_latency_new(program->numactors);
+ if (!newprog->latencies) {
+ rpcsvc_program_destroy(newprog);
+ goto out;
+ }
INIT_LIST_HEAD(&newprog->program);
pthread_mutexattr_init(&thr_attr);
@@ -3240,6 +3266,48 @@ out:
return ret;
}
+void
+rpcsvc_program_dump(rpcsvc_program_t *prog)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s", prog->progname);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_build_key(key, key_prefix, "program-number");
+ gf_proc_dump_write(key, "%d", prog->prognum);
+
+ gf_proc_dump_build_key(key, key_prefix, "program-version");
+ gf_proc_dump_write(key, "%d", prog->progver);
+
+ strncat(key_prefix, ".latency",
+ sizeof(key_prefix) - strlen(key_prefix) - 1);
+
+ for (i = 0; i < prog->numactors; i++) {
+ gf_proc_dump_build_key(key, key_prefix, "%s", prog->actors[i].procname);
+ gf_latency_statedump_and_reset(key, &prog->latencies[i]);
+ }
+}
+
+void
+rpcsvc_statedump(rpcsvc_t *svc)
+{
+ rpcsvc_program_t *prog = NULL;
+ int ret = 0;
+ ret = pthread_rwlock_tryrdlock(&svc->rpclock);
+ if (ret)
+ return;
+ {
+ list_for_each_entry(prog, &svc->programs, program)
+ {
+ rpcsvc_program_dump(prog);
+ }
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+}
+
rpcsvc_actor_t gluster_dump_actors[GF_DUMP_MAXVALUE] = {
[GF_DUMP_NULL] = {"NULL", GF_DUMP_NULL, NULL, NULL, 0, DRC_NA},
[GF_DUMP_DUMP] = {"DUMP", GF_DUMP_DUMP, rpcsvc_dump, NULL, 0, DRC_NA},
diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h
index a51edc7..e336d00 100644
--- a/rpc/rpc-lib/src/rpcsvc.h
+++ b/rpc/rpc-lib/src/rpcsvc.h
@@ -275,6 +275,8 @@ struct rpcsvc_request {
gf_boolean_t ownthread;
gf_boolean_t synctask;
+ struct timespec begin; /*req handling start time*/
+ struct timespec end; /*req handling end time*/
};
#define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->prog))
@@ -431,6 +433,7 @@ struct rpcsvc_program {
/* Program specific state handed to actors */
void *private;
+ gf_latency_t *latencies; /*Tracks latency statistics for the rpc call*/
/* This upcall is provided by the program during registration.
* It is used to notify the program about events like connection being
@@ -696,4 +699,6 @@ rpcsvc_autoscale_threads(glusterfs_ctx_t *ctx, rpcsvc_t *rpc, int incr);
extern int
rpcsvc_destroy(rpcsvc_t *svc);
+void
+rpcsvc_statedump(rpcsvc_t *svc);
#endif
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index 54d9c0f..90eb3ff 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -267,6 +267,8 @@ server_priv(xlator_t *this)
gf_proc_dump_build_key(key, "server", "total-bytes-write");
gf_proc_dump_write(key, "%" PRIu64, total_write);
+ rpcsvc_statedump(conf->rpc);
+
ret = 0;
out:
if (ret)
--
1.8.3.1

View File

@ -1,472 +0,0 @@
From d7665cf3249310c5faf87368f395b4e25cb86b48 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Thu, 15 Apr 2021 10:29:06 +0530
Subject: [PATCH 577/584] protocol/client: don't reopen fds on which POSIX
locks are held after a reconnect
XXXXXXXXXXXXXXXXXXX
IMPORTANT:
XXXXXXXXXXXXXXXXXXX
As a best pratice, with this patch we are bumping up the op-version
from GD_OP_VERSION_7_1 to GD_OP_VERSION_7_2 since it introduces a
new volume option. Enabling the new option will have effect only
after all the servers and clients are upgraded to this version.
----------------------------------------------------------------------
Bricks cleanup any granted locks after a client disconnects and
currently these locks are not healed after a reconnect. This means
post reconnect a competing process could be granted a lock even though
the first process which was granted locks has not unlocked. By not
re-opening fds, subsequent operations on such fds will fail forcing
the application to close the current fd and reopen a new one. This way
we prevent any silent corruption.
A new option "client.strict-locks" is introduced to control this
behaviour. This option is set to "off" by default.
> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22712/
> Change-Id: Ieed545efea466cb5e8f5a36199aa26380c301b9e
> Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
> updates: bz#1694920
BUG: 1689375
Change-Id: Ieed545efea466cb5e8f5a36199aa26380c301b9e
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244909
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
---
libglusterfs/src/glusterfs/globals.h | 4 +-
tests/bugs/bug-1694920.t | 63 ++++++++++++++++++++++++
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 14 ++++++
xlators/protocol/client/src/client-handshake.c | 3 +-
xlators/protocol/client/src/client-helpers.c | 5 +-
xlators/protocol/client/src/client-lk.c | 2 +-
xlators/protocol/client/src/client-rpc-fops.c | 45 ++++++++++++++++-
xlators/protocol/client/src/client-rpc-fops_v2.c | 32 +++++++++++-
xlators/protocol/client/src/client.c | 13 +++++
xlators/protocol/client/src/client.h | 16 ++++++
10 files changed, 190 insertions(+), 7 deletions(-)
create mode 100644 tests/bugs/bug-1694920.t
diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
index 33fb023..ce2d110 100644
--- a/libglusterfs/src/glusterfs/globals.h
+++ b/libglusterfs/src/glusterfs/globals.h
@@ -50,7 +50,7 @@
1 /* MIN is the fresh start op-version, mostly \
should not change */
#define GD_OP_VERSION_MAX \
- GD_OP_VERSION_7_1 /* MAX VERSION is the maximum \
+ GD_OP_VERSION_7_2 /* MAX VERSION is the maximum \
count in VME table, should \
keep changing with \
introduction of newer \
@@ -140,6 +140,8 @@
#define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */
+#define GD_OP_VERSION_7_2 70200 /* Op-version for GlusterFS 7.2 */
+
#include "glusterfs/xlator.h"
#include "glusterfs/options.h"
diff --git a/tests/bugs/bug-1694920.t b/tests/bugs/bug-1694920.t
new file mode 100644
index 0000000..5bf93c9
--- /dev/null
+++ b/tests/bugs/bug-1694920.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=300
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fileio.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 $M0;
+
+TEST touch $M0/a
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST fd_close $fd1
+
+#When a brick goes down and comes back up operations on fd which had locks on it should succeed by default
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST $CLI volume stop $V0
+sleep 2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST fd_write $fd1 "data"
+TEST fd_close $fd1
+
+#When a brick goes down and comes back up operations on fd which had locks on it should fail when client.strict-locks is on
+TEST $CLI volume set $V0 client.strict-locks on
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST $CLI volume stop $V0
+sleep 2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST ! fd_write $fd1 "data"
+TEST fd_close $fd1
+
+cleanup
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index c1ca190..01f3912 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2022,6 +2022,20 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.value = "9",
.flags = VOLOPT_FLAG_CLIENT_OPT},
+ {.key = "client.strict-locks",
+ .voltype = "protocol/client",
+ .option = "strict-locks",
+ .value = "off",
+ .op_version = GD_OP_VERSION_7_2,
+ .validate_fn = validate_boolean,
+ .type = GLOBAL_DOC,
+ .description = "When set, doesn't reopen saved fds after reconnect "
+ "if POSIX locks are held on them. Hence subsequent "
+ "operations on these fds will fail. This is "
+ "necessary for stricter lock complaince as bricks "
+ "cleanup any granted locks when a client "
+ "disconnects."},
+
/* Server xlator options */
{.key = "network.tcp-window-size",
.voltype = "protocol/server",
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
index 6b20d92..a12472b 100644
--- a/xlators/protocol/client/src/client-handshake.c
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -910,7 +910,8 @@ client_post_handshake(call_frame_t *frame, xlator_t *this)
{
list_for_each_entry_safe(fdctx, tmp, &conf->saved_fds, sfd_pos)
{
- if (fdctx->remote_fd != -1)
+ if (fdctx->remote_fd != -1 ||
+ (!list_empty(&fdctx->lock_list) && conf->strict_locks))
continue;
fdctx->reopen_done = client_child_up_reopen_done;
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
index 53b4484..6543100 100644
--- a/xlators/protocol/client/src/client-helpers.c
+++ b/xlators/protocol/client/src/client-helpers.c
@@ -410,6 +410,7 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
{
clnt_fd_ctx_t *fdctx = NULL;
clnt_conf_t *conf = NULL;
+ gf_boolean_t locks_held = _gf_false;
GF_VALIDATE_OR_GOTO(this->name, fd, out);
GF_VALIDATE_OR_GOTO(this->name, remote_fd, out);
@@ -431,11 +432,13 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
*remote_fd = -1;
else
*remote_fd = fdctx->remote_fd;
+
+ locks_held = !list_empty(&fdctx->lock_list);
}
}
pthread_spin_unlock(&conf->fd_lock);
- if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1))
+ if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1) && (!locks_held))
*remote_fd = GF_ANON_FD_NO;
return 0;
diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
index 679e198..c1fb055 100644
--- a/xlators/protocol/client/src/client-lk.c
+++ b/xlators/protocol/client/src/client-lk.c
@@ -351,7 +351,7 @@ delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner)
list_for_each_entry_safe(lock, tmp, &fdctx->lock_list, list)
{
- if (!is_same_lkowner(&lock->owner, owner)) {
+ if (is_same_lkowner(&lock->owner, owner)) {
list_del_init(&lock->list);
list_add_tail(&lock->list, &delete_list);
count++;
diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
index 1c8b31b..3110c78 100644
--- a/xlators/protocol/client/src/client-rpc-fops.c
+++ b/xlators/protocol/client/src/client-rpc-fops.c
@@ -22,8 +22,18 @@ int32_t
client3_getspec(call_frame_t *frame, xlator_t *this, void *data);
rpc_clnt_prog_t clnt3_3_fop_prog;
-/* CBK */
+int
+client_is_setlk(int32_t cmd)
+{
+ if ((cmd == F_SETLK) || (cmd == F_SETLK64) || (cmd == F_SETLKW) ||
+ (cmd == F_SETLKW64)) {
+ return 1;
+ }
+ return 0;
+}
+
+/* CBK */
int
client3_3_symlink_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
@@ -816,7 +826,8 @@ client3_3_flush_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- if (rsp.op_ret >= 0 && !fd_is_anonymous(local->fd)) {
+ if ((rsp.op_ret >= 0 || (rsp.op_errno == ENOTCONN)) &&
+ !fd_is_anonymous(local->fd)) {
/* Delete all saved locks of the owner issuing flush */
ret = delete_granted_locks_owner(local->fd, &local->owner);
gf_msg_trace(this->name, 0, "deleting locks of owner (%s) returned %d",
@@ -2388,10 +2399,12 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
int ret = 0;
xlator_t *this = NULL;
dict_t *xdata = NULL;
+ clnt_local_t *local = NULL;
this = THIS;
frame = myframe;
+ local = frame->local;
if (-1 == req->rpc_status) {
rsp.op_ret = -1;
@@ -2412,6 +2425,18 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = client_post_lk(this, &rsp, &lock, &xdata);
if (ret < 0)
goto out;
+
+ /* Save the lock to the client lock cache to be able
+ to recover in the case of server reboot.*/
+
+ if (client_is_setlk(local->cmd)) {
+ ret = client_add_lock_for_recovery(local->fd, &lock, &local->owner,
+ local->cmd);
+ if (ret < 0) {
+ rsp.op_ret = -1;
+ rsp.op_errno = -ret;
+ }
+ }
}
out:
@@ -4263,8 +4288,16 @@ client3_3_flush(call_frame_t *frame, xlator_t *this, void *data)
ret = client_pre_flush(this, &req, args->fd, args->xdata);
if (ret) {
op_errno = -ret;
+ if (op_errno == EBADF) {
+ ret = delete_granted_locks_owner(local->fd, &local->owner);
+ gf_msg_trace(this->name, 0,
+ "deleting locks of owner (%s) returned %d",
+ lkowner_utoa(&local->owner), ret);
+ }
+
goto unwind;
}
+
ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH,
client3_3_flush_cbk, NULL,
(xdrproc_t)xdr_gfs3_flush_req);
@@ -5199,8 +5232,16 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
args->xdata);
if (ret) {
op_errno = -ret;
+
+ if ((op_errno == EBADF) && (args->flock->l_type == F_UNLCK) &&
+ client_is_setlk(local->cmd)) {
+ client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
+ local->cmd);
+ }
+
goto unwind;
}
+
ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK,
client3_3_lk_cbk, NULL,
(xdrproc_t)xdr_gfs3_lk_req);
diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c
index 613dda8..954fc58 100644
--- a/xlators/protocol/client/src/client-rpc-fops_v2.c
+++ b/xlators/protocol/client/src/client-rpc-fops_v2.c
@@ -723,7 +723,8 @@ client4_0_flush_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- if (rsp.op_ret >= 0 && !fd_is_anonymous(local->fd)) {
+ if ((rsp.op_ret >= 0 || (rsp.op_errno == ENOTCONN)) &&
+ !fd_is_anonymous(local->fd)) {
/* Delete all saved locks of the owner issuing flush */
ret = delete_granted_locks_owner(local->fd, &local->owner);
gf_msg_trace(this->name, 0, "deleting locks of owner (%s) returned %d",
@@ -2193,10 +2194,12 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
int ret = 0;
xlator_t *this = NULL;
dict_t *xdata = NULL;
+ clnt_local_t *local = NULL;
this = THIS;
frame = myframe;
+ local = frame->local;
if (-1 == req->rpc_status) {
rsp.op_ret = -1;
@@ -2217,6 +2220,18 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = client_post_lk_v2(this, &rsp, &lock, &xdata);
if (ret < 0)
goto out;
+
+ /* Save the lock to the client lock cache to be able
+ to recover in the case of server reboot.*/
+
+ if (client_is_setlk(local->cmd)) {
+ ret = client_add_lock_for_recovery(local->fd, &lock, &local->owner,
+ local->cmd);
+ if (ret < 0) {
+ rsp.op_ret = -1;
+ rsp.op_errno = -ret;
+ }
+ }
}
out:
@@ -3998,6 +4013,13 @@ client4_0_flush(call_frame_t *frame, xlator_t *this, void *data)
ret = client_pre_flush_v2(this, &req, args->fd, args->xdata);
if (ret) {
op_errno = -ret;
+ if (op_errno == EBADF) {
+ ret = delete_granted_locks_owner(local->fd, &local->owner);
+ gf_msg_trace(this->name, 0,
+ "deleting locks of owner (%s) returned %d",
+ lkowner_utoa(&local->owner), ret);
+ }
+
goto unwind;
}
ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH,
@@ -4771,8 +4793,16 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
args->xdata);
if (ret) {
op_errno = -ret;
+
+ if ((op_errno == EBADF) && (args->flock->l_type == F_UNLCK) &&
+ client_is_setlk(local->cmd)) {
+ client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
+ local->cmd);
+ }
+
goto unwind;
}
+
ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK,
client4_0_lk_cbk, NULL,
(xdrproc_t)xdr_gfx_lk_req);
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index ed855ca..63c90ea 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -2491,6 +2491,7 @@ build_client_config(xlator_t *this, clnt_conf_t *conf)
GF_OPTION_INIT("filter-O_DIRECT", conf->filter_o_direct, bool, out);
GF_OPTION_INIT("send-gids", conf->send_gids, bool, out);
+ GF_OPTION_INIT("strict-locks", conf->strict_locks, bool, out);
conf->client_id = glusterfs_leaf_position(this);
@@ -2676,6 +2677,7 @@ reconfigure(xlator_t *this, dict_t *options)
out);
GF_OPTION_RECONF("send-gids", conf->send_gids, options, bool, out);
+ GF_OPTION_RECONF("strict-locks", conf->strict_locks, options, bool, out);
ret = 0;
out:
@@ -3032,6 +3034,17 @@ struct volume_options options[] = {
" power. Range 1-32 threads.",
.op_version = {GD_OP_VERSION_RHS_3_0},
.flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"strict-locks"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_7_2},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "When set, doesn't reopen saved fds after reconnect "
+ "if POSIX locks are held on them. Hence subsequent "
+ "operations on these fds will fail. This is "
+ "necessary for stricter lock complaince as bricks "
+ "cleanup any granted locks when a client "
+ "disconnects."},
{.key = {NULL}},
};
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index f12fa61..bde3d1a 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -235,6 +235,15 @@ typedef struct clnt_conf {
* up, disconnects can be
* logged
*/
+
+ gf_boolean_t strict_locks; /* When set, doesn't reopen saved fds after
+ reconnect if POSIX locks are held on them.
+ Hence subsequent operations on these fds will
+ fail. This is necessary for stricter lock
+ complaince as bricks cleanup any granted
+ locks when a client disconnects.
+ */
+
} clnt_conf_t;
typedef struct _client_fd_ctx {
@@ -513,4 +522,11 @@ compound_request_cleanup_v2(gfx_compound_req *req);
void
client_compound_rsp_cleanup_v2(gfx_compound_rsp *rsp, int len);
+int
+client_add_lock_for_recovery(fd_t *fd, struct gf_flock *flock,
+ gf_lkowner_t *owner, int32_t cmd);
+
+int
+client_is_setlk(int32_t cmd);
+
#endif /* !_CLIENT_H */
--
1.8.3.1

View File

@ -1,46 +0,0 @@
From ffb4085b3e04878e85bf505a541203aa2ee71e9c Mon Sep 17 00:00:00 2001
From: l17zhou <cynthia.zhou@nokia-sbell.com>
Date: Fri, 6 Mar 2020 03:54:02 +0200
Subject: [PATCH 578/584] protocol/client: fallback to anonymous fd for fsync
> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24203/
> Change-Id: I32f801206ce7fbd05aa693f44c2f140304f2e275
> Fixes: bz#1810842
BUG: 1689375
Change-Id: I32f801206ce7fbd05aa693f44c2f140304f2e275
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245538
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/protocol/client/src/client-common.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/xlators/protocol/client/src/client-common.c b/xlators/protocol/client/src/client-common.c
index 64db98d..1417a60 100644
--- a/xlators/protocol/client/src/client-common.c
+++ b/xlators/protocol/client/src/client-common.c
@@ -449,7 +449,8 @@ client_pre_fsync(xlator_t *this, gfs3_fsync_req *req, fd_t *fd, int32_t flags,
int64_t remote_fd = -1;
int op_errno = 0;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+ out);
req->fd = remote_fd;
req->data = flags;
@@ -2641,7 +2642,8 @@ client_pre_fsync_v2(xlator_t *this, gfx_fsync_req *req, fd_t *fd, int32_t flags,
int64_t remote_fd = -1;
int op_errno = 0;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
+ out);
req->fd = remote_fd;
req->data = flags;
--
1.8.3.1

View File

@ -1,168 +0,0 @@
From 96c4c3c47c914aced8864e7d178a4d57f7fced05 Mon Sep 17 00:00:00 2001
From: Tamar Shacked <tshacked@redhat.com>
Date: Sun, 6 Jun 2021 14:26:18 +0300
Subject: [PATCH 579/584] cli: changing rebal task ID to "None" in case status
is being reset
Rebalance status is being reset during replace/reset-brick operations.
This cause 'volume status' to shows rebalance as "not started".
Fix:
change rebalance-status to "reset due to (replace|reset)-brick"
Backport of:
> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/1869
> Change-Id: Ia73a8bea3dcd8e51acf4faa6434c3cb0d09856d0
> Signed-off-by: Tamar Shacked <tshacked@redhat.com>
> Fixes: #1717
BUG: 1889966
Signed-off-by: Tamar Shacked <tshacked@redhat.com>
Change-Id: Ia73a8bea3dcd8e51acf4faa6434c3cb0d09856d0
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245402
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
cli/src/cli-rpc-ops.c | 15 ++++++-
rpc/xdr/src/cli1-xdr.x | 2 +
tests/bugs/glusterd/reset-rebalance-state.t | 46 ++++++++++++++++++++++
xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 4 +-
xlators/mgmt/glusterd/src/glusterd-reset-brick.c | 3 +-
5 files changed, 65 insertions(+), 5 deletions(-)
create mode 100644 tests/bugs/glusterd/reset-rebalance-state.t
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 51b5447..4167c68 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -72,6 +72,8 @@ char *cli_vol_task_status_str[] = {"not started",
"fix-layout stopped",
"fix-layout completed",
"fix-layout failed",
+ "reset due to replace-brick",
+ "reset due to reset-brick",
"unknown"};
int32_t
@@ -8357,12 +8359,21 @@ cli_print_volume_status_tasks(dict_t *dict)
ret = dict_get_str(dict, key, &task_id_str);
if (ret)
return;
- cli_out("%-20s : %-20s", "ID", task_id_str);
snprintf(key, sizeof(key), "task%d.status", i);
ret = dict_get_int32(dict, key, &status);
- if (ret)
+ if (ret) {
+ cli_out("%-20s : %-20s", "ID", task_id_str);
return;
+ }
+
+ if (!strcmp(op, "Rebalance") &&
+ (status == GF_DEFRAG_STATUS_RESET_DUE_REPLACE_BRC ||
+ status == GF_DEFRAG_STATUS_RESET_DUE_RESET_BRC)) {
+ task_id_str = "None";
+ }
+
+ cli_out("%-20s : %-20s", "ID", task_id_str);
snprintf(task, sizeof(task), "task%d", i);
diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
index 777cb00..17d96f1 100644
--- a/rpc/xdr/src/cli1-xdr.x
+++ b/rpc/xdr/src/cli1-xdr.x
@@ -45,6 +45,8 @@
GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
+ GF_DEFRAG_STATUS_RESET_DUE_REPLACE_BRC,
+ GF_DEFRAG_STATUS_RESET_DUE_RESET_BRC,
GF_DEFRAG_STATUS_MAX
};
diff --git a/tests/bugs/glusterd/reset-rebalance-state.t b/tests/bugs/glusterd/reset-rebalance-state.t
new file mode 100644
index 0000000..829d2b1
--- /dev/null
+++ b/tests/bugs/glusterd/reset-rebalance-state.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+
+get_rebalance_status() {
+ $CLI volume status $V0 | egrep ^"Status " | awk '{print $3}'
+}
+
+run_rebal_check_status() {
+ TEST $CLI volume rebalance $V0 start
+ EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+ REBAL_STATE=$(get_rebalance_status)
+ TEST [ $REBAL_STATE == "completed" ]
+}
+
+replace_brick_check_status() {
+ TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_replace commit force
+ REBAL_STATE=$(get_rebalance_status)
+ TEST [ $REBAL_STATE == "reset" ]
+}
+
+reset_brick_check_status() {
+ TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}2 start
+ TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}2 $H0:$B0/${V0}2 commit force
+ REBAL_STATE=$(get_rebalance_status)
+ TEST [ $REBAL_STATE == "reset" ]
+}
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume info;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..6} force;
+TEST $CLI volume start $V0;
+
+run_rebal_check_status;
+replace_brick_check_status;
+reset_brick_check_status;
+
+cleanup;
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
index 0615081..80b80e4 100644
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -548,8 +548,8 @@ glusterd_op_replace_brick(dict_t *dict, dict_t *rsp_dict)
(void)glusterd_svcs_manager(volinfo);
goto out;
}
-
- volinfo->rebal.defrag_status = 0;
+ if (volinfo->rebal.defrag_status != GF_DEFRAG_STATUS_NOT_STARTED)
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_RESET_DUE_REPLACE_BRC;
ret = glusterd_svcs_manager(volinfo);
if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
index cf04ce8..19d7549 100644
--- a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
@@ -342,7 +342,8 @@ glusterd_op_reset_brick(dict_t *dict, dict_t *rsp_dict)
goto out;
}
- volinfo->rebal.defrag_status = 0;
+ if (volinfo->rebal.defrag_status != GF_DEFRAG_STATUS_NOT_STARTED)
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_RESET_DUE_RESET_BRC;
ret = glusterd_svcs_manager(volinfo);
if (ret) {
--
1.8.3.1

View File

@ -1,138 +0,0 @@
From a5da8bb830e86b6dd77a06cd59d220052e80b21c Mon Sep 17 00:00:00 2001
From: Tamar Shacked <tshacked@redhat.com>
Date: Sun, 6 Jun 2021 11:57:06 +0300
Subject: [PATCH 580/584] cluster/dht: suppress file migration error for node
not supposed to migrate file
A rebalance process does a lookup for every file in the dir it is processing
before checking if it supposed to migrate the file.
In this issue there are two rebalance processses running on a replica subvol:
R1 is migrating the FILE.
R2 is not supposed to migrate the FILE, but it does a lookup and
finds a stale linkfile which is mostly due to a stale layout.
Then, it tries to unlink the stale linkfile and gets EBUSY
as the linkfile fd is open due R1 migration.
As a result a misleading error msg about FILE migration failure
due EBUSY is logged in R2 logfile.
Fix:
suppress the error in case it occured in a node that
is not supposed to migrate the file.
Backport of:
> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24712/
> fixes: #1371
> Change-Id: I37832b404e2b0cc40ac5caf45f14c32c891e71f3
> Signed-off-by: Tamar Shacked <tshacked@redhat.com>
BUG: 1815462
Signed-off-by: Tamar Shacked <tshacked@redhat.com>
Change-Id: I915ee8e7470d85a849b198bfa7d58d368a246aae
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245401
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/dht/src/dht-rebalance.c | 38 ++++++++++++++++++++++-----------
1 file changed, 25 insertions(+), 13 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index e07dec0..cc0f2c9 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2604,10 +2604,10 @@ out:
* all hardlinks.
*/
-int
+gf_boolean_t
gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
{
- int ret = 0;
+ gf_boolean_t ret = _gf_false;
int i = local_subvol_index;
char *str = NULL;
uint32_t hashval = 0;
@@ -2629,12 +2629,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
}
str = uuid_utoa_r(gfid, buf);
- ret = dht_hash_compute(this, 0, str, &hashval);
- if (ret == 0) {
+ if (dht_hash_compute(this, 0, str, &hashval) == 0) {
index = (hashval % entry->count);
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
@@ -2647,12 +2646,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
/* None of the bricks in the subvol are up.
* CHILD_DOWN will kill the process soon */
- return 0;
+ return _gf_false;
}
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
}
@@ -2701,6 +2700,7 @@ gf_defrag_migrate_single_file(void *opaque)
struct iatt *iatt_ptr = NULL;
gf_boolean_t update_skippedcount = _gf_true;
int i = 0;
+ gf_boolean_t should_i_migrate = 0;
rebal_entry = (struct dht_container *)opaque;
if (!rebal_entry) {
@@ -2754,11 +2754,29 @@ gf_defrag_migrate_single_file(void *opaque)
goto out;
}
+ should_i_migrate = gf_defrag_should_i_migrate(
+ this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
+
gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
gf_uuid_copy(entry_loc.pargfid, loc->gfid);
ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
+
+ if (!should_i_migrate) {
+ /* this node isn't supposed to migrate the file. suppressing any
+ * potential error from lookup as this file is under migration by
+ * another node */
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Ignoring lookup failure: node isn't migrating %s",
+ entry_loc.path);
+ ret = 0;
+ }
+ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+ goto out;
+ }
+
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
"Migrate file failed: %s lookup failed", entry_loc.path);
@@ -2779,12 +2797,6 @@ gf_defrag_migrate_single_file(void *opaque)
goto out;
}
- if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index,
- entry->d_stat.ia_gfid)) {
- gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
- goto out;
- }
-
iatt_ptr = &iatt;
hashed_subvol = dht_subvol_get_hashed(this, &entry_loc);
--
1.8.3.1

File diff suppressed because it is too large Load Diff

View File

@ -1,501 +0,0 @@
From adeec3d5d85baad8b50d203f34a47ad5360d7cd7 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Mon, 7 Jun 2021 18:36:11 +0530
Subject: [PATCH 582/584] protocol/client: Fix lock memory leak
Problem-1:
When an overlapping lock is issued the merged lock is not assigned the
owner. When flush is issued on the fd, this particular lock is not freed
leading to memory leak
Fix-1:
Assign the owner while merging the locks.
Problem-2:
On fd-destroy lock structs could be present in fdctx. For some reason
with flock -x command and closing of the bash fd, it leads to this code
path. Which leaks the lock structs.
Fix-2:
When fdctx is being destroyed in client, make sure to cleanup any lock
structs.
> Upstream patch: https://github.com/gluster/glusterfs/pull/2338/commits/926402f639471d2664bf00c6692221ba297c525f
> fixes: gluster#2337
> Change-Id: I298124213ce5a1cf2b1f1756d5e8a9745d9c0a1c
> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
BUG: 1689375
Change-Id: I298124213ce5a1cf2b1f1756d5e8a9745d9c0a1c
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245603
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/bugs/client/issue-2337-lock-mem-leak.c | 52 ++++++++++++++++++
tests/bugs/client/issue-2337-lock-mem-leak.t | 42 ++++++++++++++
tests/bugs/replicate/do-not-reopen-fd.t | 65 ++++++++++++++--------
tests/volume.rc | 8 +++
xlators/protocol/client/src/client-helpers.c | 10 ++++
xlators/protocol/client/src/client-lk.c | 82 ++++++++++++++++++----------
xlators/protocol/client/src/client.h | 8 ++-
7 files changed, 213 insertions(+), 54 deletions(-)
create mode 100644 tests/bugs/client/issue-2337-lock-mem-leak.c
create mode 100644 tests/bugs/client/issue-2337-lock-mem-leak.t
diff --git a/tests/bugs/client/issue-2337-lock-mem-leak.c b/tests/bugs/client/issue-2337-lock-mem-leak.c
new file mode 100644
index 0000000..d4e02a7
--- /dev/null
+++ b/tests/bugs/client/issue-2337-lock-mem-leak.c
@@ -0,0 +1,52 @@
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int
+main(int argc, char *argv[])
+{
+ int fd = -1;
+ char *filename = NULL;
+ struct flock lock = {
+ 0,
+ };
+ int i = 0;
+ int ret = -1;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <filename> ", argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+
+ fd = open(filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
+ goto out;
+ }
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_len = 2;
+
+ while (i < 100) {
+ lock.l_start = i;
+ ret = fcntl(fd, F_SETLK, &lock);
+ if (ret < 0) {
+ fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ i++;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
diff --git a/tests/bugs/client/issue-2337-lock-mem-leak.t b/tests/bugs/client/issue-2337-lock-mem-leak.t
new file mode 100644
index 0000000..64132a2
--- /dev/null
+++ b/tests/bugs/client/issue-2337-lock-mem-leak.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+#Test that lock fop is not leaking any memory for overlapping regions
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+LOCK_TEST=$(dirname $0)/issue-2337-lock-mem-leak
+build_tester $(dirname $0)/issue-2337-lock-mem-leak.c -o ${LOCK_TEST}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+#Guard against flush-behind
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST touch $M0/a
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+statedump=$(generate_mount_statedump $V0 $M0)
+EXPECT_NOT "^nostatedump$" echo $statedump
+#Making sure no one changes this mem-tracker name
+TEST grep gf_client_mt_clnt_lock_t $statedump
+TEST fd_close $fd1
+
+statedump=$(generate_mount_statedump $V0 $M0)
+EXPECT_NOT "^nostatedump$" echo $statedump
+TEST ! grep gf_client_mt_clnt_lock_t $statedump
+
+TEST ${LOCK_TEST} $M0/a
+
+statedump=$(generate_mount_statedump $V0 $M0)
+EXPECT_NOT "^nostatedump$" echo $statedump
+TEST ! grep gf_client_mt_clnt_lock_t $statedump
+TEST cleanup_mount_statedump $V0
+TEST rm ${LOCK_TEST}
+cleanup
diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t
index 76d8e70..13b5218 100644
--- a/tests/bugs/replicate/do-not-reopen-fd.t
+++ b/tests/bugs/replicate/do-not-reopen-fd.t
@@ -45,13 +45,17 @@ EXPECT "data-2" cat $B0/${V0}2/a
gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
TEST fd2=`fd_available`
TEST fd_open $fd2 'rw' $M1/a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+
# Kill 2nd brick and try writing to the file. The write should fail due to
# quorum failure.
TEST kill_brick $V0 $H0 $B0/${V0}1
@@ -66,6 +70,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
TEST ! fd_write $fd1 "data-4"
TEST ! fd_cat $fd1
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
# Enable heal and check the files will have same content on all the bricks after
# the heal is completed.
@@ -89,7 +96,9 @@ TEST ! fd_write $fd1 "data-5"
# At this point only one brick will have the lock. Try taking the lock again on
# the bad fd, which should also fail with EBADFD.
-TEST ! flock -x $fd1
+# TODO: At the moment quorum failure in lk leads to unlock on the bricks where
+# lock succeeds. This will change lock state on 3rd brick, commenting for now
+#TEST ! flock -x $fd1
# Kill the only brick that is having lock and try taking lock on another client
# which should succeed.
@@ -97,15 +106,25 @@ TEST kill_brick $V0 $H0 $B0/${V0}2
EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2
TEST flock -x $fd2
TEST fd_write $fd2 "data-6"
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+
# Bring the brick up and try writing & reading on the old fd, which should still
# fail and operations on the 2nd fd should succeed.
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 2
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
TEST ! fd_write $fd1 "data-7"
TEST ! fd_cat $fd1
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
TEST fd_cat $fd2
# Close both the fds which will release the locks and then re-open and take lock
@@ -113,17 +132,15 @@ TEST fd_cat $fd2
TEST fd_close $fd1
TEST fd_close $fd2
-TEST ! ls /proc/$$/fd/$fd1
-TEST ! ls /proc/$$/fd/$fd2
-EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
-EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
-EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
TEST fd1=`fd_available`
TEST fd_open $fd1 'rw' $M0/a
-EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
-EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
-EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+EXPECT_WITHIN $REOPEN_TIMEOUT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT_WITHIN $REOPEN_TIMEOUT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT_WITHIN $REOPEN_TIMEOUT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
TEST flock -x $fd1
TEST fd_write $fd1 "data-8"
@@ -134,6 +151,10 @@ EXPECT "data-8" head -n 1 $B0/${V0}1/a
EXPECT "data-8" head -n 1 $B0/${V0}2/a
TEST fd_close $fd1
+EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+
# Heal the volume
TEST $CLI volume heal $V0 enable
@@ -152,9 +173,9 @@ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replica
TEST fd1=`fd_available`
TEST fd_open $fd1 'rw' $M0/a
-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
# Restart the brick and then write. Now fd should get re-opened and write should
# succeed on the previously down brick as well since there are no locks held on
@@ -163,7 +184,7 @@ TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
TEST fd_write $fd1 "data-10"
-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
EXPECT "data-10" head -n 1 $B0/${V0}0/a
EXPECT "data-10" head -n 1 $B0/${V0}1/a
@@ -177,9 +198,9 @@ TEST fd1=`fd_available`
TEST fd_open $fd1 'rw' $M0/a
TEST flock -x $fd1
-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
# Kill & restart another brick so that it will return EBADFD
TEST kill_brick $V0 $H0 $B0/${V0}1
@@ -194,9 +215,9 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
TEST ! fd_write $fd1 "data-11"
-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
-EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
EXPECT "data-10" head -n 1 $B0/${V0}0/a
EXPECT "data-10" head -n 1 $B0/${V0}1/a
diff --git a/tests/volume.rc b/tests/volume.rc
index f5dd0b1..17c3835 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -407,6 +407,14 @@ function gf_check_file_opened_in_brick {
fi
}
+function gf_open_file_count_in_brick {
+ vol=$1
+ host=$2
+ brick=$3
+ realpath=$4
+ ls -l /proc/$(get_brick_pid $vol $host $brick)/fd | grep "${realpath}$" | wc -l
+}
+
function gf_get_gfid_backend_file_path {
brickpath=$1
filepath_in_brick=$2
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
index 48b6448..a80f303 100644
--- a/xlators/protocol/client/src/client-helpers.c
+++ b/xlators/protocol/client/src/client-helpers.c
@@ -3156,11 +3156,14 @@ client_fdctx_destroy(xlator_t *this, clnt_fd_ctx_t *fdctx)
int32_t ret = -1;
char parent_down = 0;
fd_lk_ctx_t *lk_ctx = NULL;
+ gf_lkowner_t null_owner = {0};
+ struct list_head deleted_list;
GF_VALIDATE_OR_GOTO("client", this, out);
GF_VALIDATE_OR_GOTO(this->name, fdctx, out);
conf = (clnt_conf_t *)this->private;
+ INIT_LIST_HEAD(&deleted_list);
if (fdctx->remote_fd == -1) {
gf_msg_debug(this->name, 0, "not a valid fd");
@@ -3174,6 +3177,13 @@ client_fdctx_destroy(xlator_t *this, clnt_fd_ctx_t *fdctx)
pthread_mutex_unlock(&conf->lock);
lk_ctx = fdctx->lk_ctx;
fdctx->lk_ctx = NULL;
+ pthread_spin_lock(&conf->fd_lock);
+ {
+ __delete_granted_locks_owner_from_fdctx(fdctx, &null_owner,
+ &deleted_list);
+ }
+ pthread_spin_unlock(&conf->fd_lock);
+ destroy_client_locks_from_list(&deleted_list);
if (lk_ctx)
fd_lk_ctx_unref(lk_ctx);
diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
index c1fb055..cb4e894 100644
--- a/xlators/protocol/client/src/client-lk.c
+++ b/xlators/protocol/client/src/client-lk.c
@@ -253,6 +253,7 @@ __insert_and_merge(clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock)
sum = add_locks(lock, conf);
sum->fd = lock->fd;
+ sum->owner = conf->owner;
__delete_client_lock(conf);
__destroy_client_lock(conf);
@@ -320,56 +321,77 @@ destroy_client_lock(client_posix_lock_t *lock)
GF_FREE(lock);
}
-int32_t
-delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner)
+void
+destroy_client_locks_from_list(struct list_head *deleted)
{
- clnt_fd_ctx_t *fdctx = NULL;
client_posix_lock_t *lock = NULL;
client_posix_lock_t *tmp = NULL;
- xlator_t *this = NULL;
- clnt_conf_t *conf = NULL;
-
- struct list_head delete_list;
- int ret = 0;
+ xlator_t *this = THIS;
int count = 0;
- INIT_LIST_HEAD(&delete_list);
- this = THIS;
- conf = this->private;
+ list_for_each_entry_safe(lock, tmp, deleted, list)
+ {
+ list_del_init(&lock->list);
+ destroy_client_lock(lock);
+ count++;
+ }
- pthread_spin_lock(&conf->fd_lock);
+ /* FIXME: Need to actually print the locks instead of count */
+ gf_msg_trace(this->name, 0, "Number of locks cleared=%d", count);
+}
- fdctx = this_fd_get_ctx(fd, this);
- if (!fdctx) {
- pthread_spin_unlock(&conf->fd_lock);
+void
+__delete_granted_locks_owner_from_fdctx(clnt_fd_ctx_t *fdctx,
+ gf_lkowner_t *owner,
+ struct list_head *deleted)
+{
+ client_posix_lock_t *lock = NULL;
+ client_posix_lock_t *tmp = NULL;
- gf_msg(this->name, GF_LOG_WARNING, EINVAL, PC_MSG_FD_CTX_INVALID,
- "fdctx not valid");
- ret = -1;
- goto out;
+ gf_boolean_t is_null_lkowner = _gf_false;
+
+ if (is_lk_owner_null(owner)) {
+ is_null_lkowner = _gf_true;
}
list_for_each_entry_safe(lock, tmp, &fdctx->lock_list, list)
{
- if (is_same_lkowner(&lock->owner, owner)) {
+ if (is_null_lkowner || is_same_lkowner(&lock->owner, owner)) {
list_del_init(&lock->list);
- list_add_tail(&lock->list, &delete_list);
- count++;
+ list_add_tail(&lock->list, deleted);
}
}
+}
- pthread_spin_unlock(&conf->fd_lock);
+int32_t
+delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner)
+{
+ clnt_fd_ctx_t *fdctx = NULL;
+ xlator_t *this = NULL;
+ clnt_conf_t *conf = NULL;
+ int ret = 0;
+ struct list_head deleted_locks;
- if (!list_empty(&delete_list)) {
- list_for_each_entry_safe(lock, tmp, &delete_list, list)
- {
- list_del_init(&lock->list);
- destroy_client_lock(lock);
+ this = THIS;
+ conf = this->private;
+ INIT_LIST_HEAD(&deleted_locks);
+
+ pthread_spin_lock(&conf->fd_lock);
+ {
+ fdctx = this_fd_get_ctx(fd, this);
+ if (!fdctx) {
+ pthread_spin_unlock(&conf->fd_lock);
+
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, PC_MSG_FD_CTX_INVALID,
+ NULL);
+ ret = -1;
+ goto out;
}
+ __delete_granted_locks_owner_from_fdctx(fdctx, owner, &deleted_locks);
}
+ pthread_spin_unlock(&conf->fd_lock);
- /* FIXME: Need to actually print the locks instead of count */
- gf_msg_trace(this->name, 0, "Number of locks cleared=%d", count);
+ destroy_client_locks_from_list(&deleted_locks);
out:
return ret;
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index 2a50625..f952aea 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -406,8 +406,12 @@ int
client_attempt_lock_recovery(xlator_t *this, clnt_fd_ctx_t *fdctx);
int32_t
delete_granted_locks_owner(fd_t *fd, gf_lkowner_t *owner);
-int32_t
-delete_granted_locks_fd(clnt_fd_ctx_t *fdctx);
+void
+__delete_granted_locks_owner_from_fdctx(clnt_fd_ctx_t *fdctx,
+ gf_lkowner_t *owner,
+ struct list_head *deleted);
+void
+destroy_client_locks_from_list(struct list_head *deleted);
int32_t
client_cmd_to_gf_cmd(int32_t cmd, int32_t *gf_cmd);
void
--
1.8.3.1

View File

@ -1,138 +0,0 @@
From f114ba25fab57d1ab9a51fc1f101f2b5571f167a Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Mon, 7 Jun 2021 19:24:55 +0530
Subject: [PATCH 583/584] protocol/client: Initialize list head to prevent NULL
de-reference
> Upstream patch: https://github.com/gluster/glusterfs/pull/2456/commits/00761df0cd14833ff256b69dba7cf8e2b699554c
> fixes: #2443
> Change-Id: I86ef0270d41d6fb924db97fde3196d7c98c8b564
> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
BUG: 1689375
Change-Id: I86ef0270d41d6fb924db97fde3196d7c98c8b564
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245613
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/bugs/locks/issue-2443-crash.c | 67 +++++++++++++++++++++++++++++++++
tests/bugs/locks/issue-2443-crash.t | 18 +++++++++
xlators/protocol/client/src/client-lk.c | 1 +
3 files changed, 86 insertions(+)
create mode 100644 tests/bugs/locks/issue-2443-crash.c
create mode 100644 tests/bugs/locks/issue-2443-crash.t
diff --git a/tests/bugs/locks/issue-2443-crash.c b/tests/bugs/locks/issue-2443-crash.c
new file mode 100644
index 0000000..5f580bf
--- /dev/null
+++ b/tests/bugs/locks/issue-2443-crash.c
@@ -0,0 +1,67 @@
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int
+main(int argc, char *argv[])
+{
+ int fd = -1;
+ char *filename = NULL;
+ struct flock lock = {
+ 0,
+ };
+ int i = 0;
+ int ret = -1;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <filename> ", argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+
+ fd = open(filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
+ goto out;
+ }
+
+ lock.l_start = 0;
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_len = 2;
+
+ ret = fcntl(fd, F_SETLK, &lock);
+ if (ret < 0) {
+ fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ lock.l_start = 2;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_len = 2;
+
+ ret = fcntl(fd, F_SETLK, &lock);
+ if (ret < 0) {
+ fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ lock.l_start = 0;
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_len = 4;
+
+ ret = fcntl(fd, F_SETLK, &lock);
+ if (ret < 0) {
+ fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
+ goto out;
+ }
+out:
+ return ret;
+}
diff --git a/tests/bugs/locks/issue-2443-crash.t b/tests/bugs/locks/issue-2443-crash.t
new file mode 100644
index 0000000..162a4d7
--- /dev/null
+++ b/tests/bugs/locks/issue-2443-crash.t
@@ -0,0 +1,18 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick0
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+build_tester $(dirname $0)/issue-2443-crash.c
+TEST mv $(dirname $0)/issue-2443-crash $M0
+cd $M0
+TEST ./issue-2443-crash a
+
+cd -
+cleanup;
diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
index cb4e894..37c1d35 100644
--- a/xlators/protocol/client/src/client-lk.c
+++ b/xlators/protocol/client/src/client-lk.c
@@ -101,6 +101,7 @@ add_locks(client_posix_lock_t *l1, client_posix_lock_t *l2)
sum = GF_CALLOC(1, sizeof(*sum), gf_client_mt_clnt_lock_t);
if (!sum)
return NULL;
+ INIT_LIST_HEAD(&sum->list);
sum->fl_start = min(l1->fl_start, l2->fl_start);
sum->fl_end = max(l1->fl_end, l2->fl_end);
--
1.8.3.1

View File

@ -1,429 +0,0 @@
From 2c6c4ad77ba5511a62846af932840deb5bc389ae Mon Sep 17 00:00:00 2001
From: Tamar Shacked <tshacked@redhat.com>
Date: Mon, 7 Jun 2021 12:25:57 +0300
Subject: [PATCH 584/584] dht - fixing xattr inconsistency
The scenario of setting an xattr to a dir, killing one of the bricks,
removing the xattr, bringing back the brick results in xattr
inconsistency - The downed brick will still have the xattr, but the rest
won't.
This patch add a mechanism that will remove the extra xattrs during
lookup.
Backport of:
> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24687/
> fixes: #1324
> Change-Id: Ifec0b7aea6cd40daa8b0319b881191cf83e031d1
> Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com>
BUG: 1600379
Change-Id: I588f69b283e5354cd362d74486d6ec6d226ecc96
Signed-off-by: Tamar Shacked <tshacked@redhat.com>
Signed-off-by: srijan-sivakumar <ssivakum@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245560
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
libglusterfs/src/common-utils.c | 20 +++++++-
libglusterfs/src/glusterfs/common-utils.h | 6 +++
tests/bugs/distribute/bug-1600379.t | 54 ++++++++++++++++++++
xlators/cluster/dht/src/dht-common.c | 14 ++----
xlators/cluster/dht/src/dht-common.h | 4 --
xlators/cluster/dht/src/dht-helper.c | 4 ++
xlators/cluster/dht/src/dht-selfheal.c | 11 ++++
xlators/storage/posix/src/posix-helpers.c | 19 +++++++
xlators/storage/posix/src/posix-inode-fd-ops.c | 69 ++++++++++++++++++++++++++
xlators/storage/posix/src/posix.h | 3 ++
10 files changed, 189 insertions(+), 15 deletions(-)
create mode 100644 tests/bugs/distribute/bug-1600379.t
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index c2dfe28..d8b7c6e 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -54,6 +54,7 @@
#include "xxhash.h"
#include <ifaddrs.h>
#include "glusterfs/libglusterfs-messages.h"
+#include "glusterfs/glusterfs-acl.h"
#include "protocol-common.h"
#ifdef __FreeBSD__
#include <pthread_np.h>
@@ -82,12 +83,21 @@ gf_boolean_t gf_signal_on_assert = false;
typedef int32_t (*rw_op_t)(int32_t fd, char *buf, int32_t size);
typedef int32_t (*rwv_op_t)(int32_t fd, const struct iovec *buf, int32_t size);
-void gf_assert(void)
+char *xattrs_to_heal[] = {"user.",
+ POSIX_ACL_ACCESS_XATTR,
+ POSIX_ACL_DEFAULT_XATTR,
+ QUOTA_LIMIT_KEY,
+ QUOTA_LIMIT_OBJECTS_KEY,
+ GF_SELINUX_XATTR_KEY,
+ GF_XATTR_MDATA_KEY,
+ NULL};
+
+void
+gf_assert(void)
{
if (gf_signal_on_assert) {
raise(SIGCONT);
}
-
}
void
@@ -5430,3 +5440,9 @@ gf_d_type_from_ia_type(ia_type_t type)
return DT_UNKNOWN;
}
}
+
+char **
+get_xattrs_to_heal()
+{
+ return xattrs_to_heal;
+}
diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
index bd48b6f..8439bb6 100644
--- a/libglusterfs/src/glusterfs/common-utils.h
+++ b/libglusterfs/src/glusterfs/common-utils.h
@@ -183,6 +183,12 @@ enum _gf_xlator_ipc_targets {
typedef enum _gf_special_pid gf_special_pid_t;
typedef enum _gf_xlator_ipc_targets _gf_xlator_ipc_targets_t;
+/* Array to hold custom xattr keys */
+extern char *xattrs_to_heal[];
+
+char **
+get_xattrs_to_heal();
+
/* The DHT file rename operation is not a straightforward rename.
* It involves creating linkto and linkfiles, and can unlink or rename the
* source file depending on the hashed and cached subvols for the source
diff --git a/tests/bugs/distribute/bug-1600379.t b/tests/bugs/distribute/bug-1600379.t
new file mode 100644
index 0000000..8d2f615
--- /dev/null
+++ b/tests/bugs/distribute/bug-1600379.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# Initialize
+#------------------------------------------------------------
+cleanup;
+
+# Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+# Create a volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+
+# Verify volume creation
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Start volume and verify successful start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+#------------------------------------------------------------
+
+# Test case - Remove xattr from killed brick on lookup
+#------------------------------------------------------------
+# Create a dir and set custom xattr
+TEST mkdir $M0/testdir
+TEST setfattr -n user.attr -v val $M0/testdir
+xattr_val=`getfattr -d $B0/${V0}2/testdir | awk '{print $1}'`;
+TEST ${xattr_val}='user.attr="val"';
+
+# Kill 2nd brick process
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
+
+# Remove custom xattr
+TEST setfattr -x user.attr $M0/testdir
+
+# Bring up the killed brick process
+TEST $CLI volume start $V0 force
+
+# Perform lookup
+sleep 5
+TEST ls $M0/testdir
+
+# Check brick xattrs
+xattr_val_2=`getfattr -d $B0/${V0}2/testdir`;
+TEST [ ${xattr_val_2} = ''] ;
+
+cleanup;
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index ce0fbbf..edfc6e7 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -19,6 +19,7 @@
#include <glusterfs/byte-order.h>
#include <glusterfs/quota-common-utils.h>
#include <glusterfs/upcall-utils.h>
+#include <glusterfs/common-utils.h>
#include <sys/time.h>
#include <libgen.h>
@@ -127,15 +128,6 @@ dht_read_iatt_from_xdata(xlator_t *this, dict_t *xdata, struct iatt *stbuf)
int
dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
-char *xattrs_to_heal[] = {"user.",
- POSIX_ACL_ACCESS_XATTR,
- POSIX_ACL_DEFAULT_XATTR,
- QUOTA_LIMIT_KEY,
- QUOTA_LIMIT_OBJECTS_KEY,
- GF_SELINUX_XATTR_KEY,
- GF_XATTR_MDATA_KEY,
- NULL};
-
char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
/* Return true if key exists in array
@@ -143,6 +135,8 @@ char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
static gf_boolean_t
dht_match_xattr(const char *key)
{
+ char **xattrs_to_heal = get_xattrs_to_heal();
+
return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0;
}
@@ -5399,11 +5393,13 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
int call_cnt = 0;
dht_local_t *local = NULL;
char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char **xattrs_to_heal;
conf = this->private;
local = frame->local;
call_cnt = conf->subvolume_cnt;
local->flags = flags;
+ xattrs_to_heal = get_xattrs_to_heal();
if (!gf_uuid_is_null(local->gfid)) {
gf_uuid_unparse(local->gfid, gfid_local);
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 132b3b3..b856c68 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -54,10 +54,6 @@
#define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
#define DHT_DBG_HASHED_SUBVOL_KEY "dht.file.hashed-subvol."
-/* Array to hold custom xattr keys
- */
-extern char *xattrs_to_heal[];
-
/* Rebalance nodeuuid flags */
#define REBAL_NODEUUID_MINE 0x01
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 4f7370d..4c3940a 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -2289,6 +2289,7 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
int luret = -1;
int luflag = -1;
int i = 0;
+ char **xattrs_to_heal;
if (!src || !dst) {
gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED,
@@ -2305,6 +2306,9 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
and set it to dst dict, here index start from 1 because
user xattr already checked in previous statement
*/
+
+ xattrs_to_heal = get_xattrs_to_heal();
+
for (i = 1; xattrs_to_heal[i]; i++) {
keyval = dict_get(src, xattrs_to_heal[i]);
if (keyval) {
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index f4e17d1..8af7301 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -2315,6 +2315,15 @@ dht_dir_heal_xattrs(void *data)
if (subvol == mds_subvol)
continue;
if (uret || uflag) {
+ /* Custom xattr heal is required - let posix handle it */
+ ret = dict_set_int8(xdata, "sync_backend_xattrs", _gf_true);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s",
+ "sync_backend_xattrs", NULL);
+ goto out;
+ }
+
ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata,
NULL);
if (ret) {
@@ -2325,6 +2334,8 @@ dht_dir_heal_xattrs(void *data)
"user xattr on path %s on "
"subvol %s, gfid = %s ",
local->loc.path, subvol->name, gfid);
+ } else {
+ dict_del(xdata, "sync_backend_xattrs");
}
}
}
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 16351d8..40a9ee4 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -3656,3 +3656,22 @@ out:
return is_stale;
}
+
+/* Delete user xattr from the file at the file-path specified by data and from
+ * dict */
+int
+posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data)
+{
+ int ret;
+ char *real_path = data;
+
+ ret = sys_lremovexattr(real_path, k);
+ if (ret) {
+ gf_msg("posix-helpers", GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno,
+ "removexattr failed. key %s path %s", k, real_path);
+ }
+
+ dict_del(dict, k);
+
+ return ret;
+}
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
index 4c2983a..be22c5e 100644
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
@@ -62,6 +62,7 @@
#include <glusterfs/events.h>
#include "posix-gfid-path.h"
#include <glusterfs/compat-uuid.h>
+#include <glusterfs/common-utils.h>
extern char *marker_xattrs[];
#define ALIGN_SIZE 4096
@@ -2733,6 +2734,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
int32_t ret = 0;
ssize_t acl_size = 0;
dict_t *xattr = NULL;
+ dict_t *subvol_xattrs = NULL;
posix_xattr_filler_t filler = {
0,
};
@@ -2748,6 +2750,10 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
struct mdata_iatt mdata_iatt = {
0,
};
+ int8_t sync_backend_xattrs = _gf_false;
+ data_pair_t *custom_xattrs;
+ data_t *keyval = NULL;
+ char **xattrs_to_heal = get_xattrs_to_heal();
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID(frame->root->uid, frame->root->gid);
@@ -2930,6 +2936,66 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
goto out;
}
+ ret = dict_get_int8(xdata, "sync_backend_xattrs", &sync_backend_xattrs);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to get sync_backend_xattrs");
+ }
+
+ if (sync_backend_xattrs) {
+ /* List all custom xattrs */
+ subvol_xattrs = dict_new();
+ if (!subvol_xattrs)
+ goto out;
+
+ ret = dict_set_int32_sizen(xdata, "list-xattr", 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "Unable to set list-xattr in dict ");
+ goto out;
+ }
+
+ subvol_xattrs = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
+ NULL);
+
+ /* Remove all user xattrs from the file */
+ dict_foreach_fnmatch(subvol_xattrs, "user.*", posix_delete_user_xattr,
+ real_path);
+
+ /* Remove all custom xattrs from the file */
+ for (i = 1; xattrs_to_heal[i]; i++) {
+ keyval = dict_get(subvol_xattrs, xattrs_to_heal[i]);
+ if (keyval) {
+ ret = sys_lremovexattr(real_path, xattrs_to_heal[i]);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED,
+ errno, "removexattr failed. key %s path %s",
+ xattrs_to_heal[i], loc->path);
+ goto out;
+ }
+
+ dict_del(subvol_xattrs, xattrs_to_heal[i]);
+ keyval = NULL;
+ }
+ }
+
+ /* Set custom xattrs based on info provided by DHT */
+ custom_xattrs = dict->members_list;
+
+ while (custom_xattrs != NULL) {
+ ret = sys_lsetxattr(real_path, custom_xattrs->key,
+ custom_xattrs->value->data,
+ custom_xattrs->value->len, flags);
+ if (ret) {
+ op_errno = errno;
+ gf_log(this->name, GF_LOG_ERROR, "setxattr failed - %s %d",
+ custom_xattrs->key, ret);
+ goto out;
+ }
+
+ custom_xattrs = custom_xattrs->next;
+ }
+ }
+
xattr = dict_new();
if (!xattr)
goto out;
@@ -3037,6 +3103,9 @@ out:
if (xattr)
dict_unref(xattr);
+ if (subvol_xattrs)
+ dict_unref(subvol_xattrs);
+
return 0;
}
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 4be979c..b357d34 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -686,4 +686,7 @@ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata);
gf_boolean_t
posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this);
+int
+posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data);
+
#endif /* _POSIX_H */
--
1.8.3.1

View File

@ -1,77 +0,0 @@
From ba399a083a56963bb7414535ede6eff6afcd1a0a Mon Sep 17 00:00:00 2001
From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
Date: Mon, 14 Jun 2021 12:32:06 -0400
Subject: [PATCH 585/585] ganesha_ha: ganesha_grace RA fails in start() and/or
fails in monitor () (#2523)
shell [[ ]] string compare fails to match returned attr to the
pattern and subsequently returns status of "not running", resulting
in dependencies such as the IPaddr (cluster_ip) RA not starting
Change-Id: I2c8d6f5c4cf0480672d52d8aa0d9226950441dc9
commit 8ec66a43eedd505ec0b40f55c05f13a77fe8074e
PR: https://github.com/gluster/glusterfs/pull/2523
issue: https://github.com/gluster/glusterfs/issues/2522
BUG: 1945143
Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/247613
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
extras/ganesha/ocf/ganesha_grace | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace
index edc6fa2..ca219af 100644
--- a/extras/ganesha/ocf/ganesha_grace
+++ b/extras/ganesha/ocf/ganesha_grace
@@ -122,15 +122,18 @@ ganesha_grace_start()
# case 1
if [[ -z "${attr}" ]]; then
+ ocf_log debug "grace start: returning success case 1"
return ${OCF_SUCCESS}
fi
# case 2
- if [[ "${attr}" = *"value=1" ]]; then
+ if [[ "${attr}" = *"host=\"${host}\" value=\"1\"" ]]; then
+ ocf_log debug "grace start: returning success case 2"
return ${OCF_SUCCESS}
fi
# case 3
+ ocf_log info "grace start returning: not running case 3 (${attr})"
return ${OCF_NOT_RUNNING}
}
@@ -162,7 +165,7 @@ ganesha_grace_monitor()
{
local host=$(ocf_local_nodename)
- ocf_log debug "ganesha_grace monitor ${host}"
+ ocf_log debug "ganesha_grace_monitor ${host}"
attr=$(attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
if [ $? -ne 0 ]; then
@@ -174,13 +177,16 @@ ganesha_grace_monitor()
# chance to create it. In which case we'll pretend
# everything is okay this time around
if [[ -z "${attr}" ]]; then
+ ocf_log debug "grace monitor: returning success case 1"
return ${OCF_SUCCESS}
fi
- if [[ "${attr}" = *"value=1" ]]; then
+ if [[ "${attr}" = *"host=\"${host}\" value=\"1\"" ]]; then
+ ocf_log debug "grace monitor: returning success case 2"
return ${OCF_SUCCESS}
fi
+ ocf_log info "grace monitor: returning not running case 3 (${attr})"
return ${OCF_NOT_RUNNING}
}
--
1.8.3.1

View File

@ -1,298 +0,0 @@
From e431321f1348b5d51733a6b6c5e046fd8c6e28cc Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Mon, 5 Jul 2021 10:52:10 +0530
Subject: [PATCH 586/586] protocol/client: Do not reopen fd post handshake if
posix lock is held
Problem:
With client.strict-locks enabled, in some cases where the posix lock is
taken after a brick gets disconnected, the fd is getting reopened when
the brick gets reconnected to the client as part of client_post_handshake.
In such cases the saved fdctx's lock_list may not have the latest
information.
Fix:
Check the lock information in the fdctx->lk_ctx as well post handshake
which will have the latest information on the locks.
Also check for this field in other places as well to prevent writes
happening with anonymous fd even without re-opening the fd on the
restarted brick.
> Upstream patch: https://github.com/gluster/glusterfs/pull/2582
> Fixes: #2581
> Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
> Signed-off-by: karthik-us ksubrahm@redhat.com
BUG: 1689375
Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/252588
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/bugs/replicate/do-not-reopen-fd.t | 76 ++++++++++++++++++--------
xlators/protocol/client/src/client-handshake.c | 2 +-
xlators/protocol/client/src/client-helpers.c | 11 +++-
xlators/protocol/client/src/client.c | 2 +-
xlators/protocol/client/src/client.h | 3 +
5 files changed, 67 insertions(+), 27 deletions(-)
diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t
index 13b5218..f346709 100644
--- a/tests/bugs/replicate/do-not-reopen-fd.t
+++ b/tests/bugs/replicate/do-not-reopen-fd.t
@@ -20,10 +20,41 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
TEST touch $M0/a
+gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
+gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
+
+
+# Open fd from a client, check for open fd on all the bricks.
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'rw' $M0/a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+
+# Kill a brick and take lock on the fd
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST flock -x $fd1
+
+# Restart the brick and check for no open fd on the restarted brick.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+
+# Write on the fd. It should fail on the restarted brick.
+TEST fd_write $fd1 "data-0"
+EXPECT "" cat $B0/${V0}0/a
+EXPECT "data-0" cat $B0/${V0}1/a
+EXPECT "data-0" cat $B0/${V0}2/a
+
+TEST fd_close $fd1
# Kill one brick and take lock on the fd and do a write.
TEST kill_brick $V0 $H0 $B0/${V0}0
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
TEST fd1=`fd_available`
TEST fd_open $fd1 'rw' $M0/a
@@ -34,7 +65,7 @@ TEST fd_write $fd1 "data-1"
# should still succeed as there were no quorum disconnects.
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
TEST fd_write $fd1 "data-2"
EXPECT "" cat $B0/${V0}0/a
EXPECT "data-2" cat $B0/${V0}1/a
@@ -42,9 +73,6 @@ EXPECT "data-2" cat $B0/${V0}2/a
# Check there is no fd opened on the 1st brick by checking for the gfid inside
# /proc/pid-of-brick/fd/ directory
-gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
-gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
-
EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
@@ -59,7 +87,7 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
# Kill 2nd brick and try writing to the file. The write should fail due to
# quorum failure.
TEST kill_brick $V0 $H0 $B0/${V0}1
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 1
TEST ! fd_write $fd1 "data-3"
TEST ! fd_cat $fd1
@@ -67,7 +95,7 @@ TEST ! fd_cat $fd1
# which were down previously, will return EBADFD now.
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
TEST ! fd_write $fd1 "data-4"
TEST ! fd_cat $fd1
EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
@@ -79,9 +107,9 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
TEST $CLI volume heal $V0 enable
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
TEST $CLI volume heal $V0
EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
@@ -103,7 +131,7 @@ TEST ! fd_write $fd1 "data-5"
# Kill the only brick that is having lock and try taking lock on another client
# which should succeed.
TEST kill_brick $V0 $H0 $B0/${V0}2
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 2
TEST flock -x $fd2
TEST fd_write $fd2 "data-6"
EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
@@ -114,17 +142,17 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
# fail and operations on the 2nd fd should succeed.
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M1 $V0-replicate-0 2
EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
TEST ! fd_write $fd1 "data-7"
TEST ! fd_cat $fd1
EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+EXPECT "^0" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
TEST fd_cat $fd2
# Close both the fds which will release the locks and then re-open and take lock
@@ -159,9 +187,9 @@ EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0
# Heal the volume
TEST $CLI volume heal $V0 enable
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
TEST $CLI volume heal $V0
EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
@@ -169,7 +197,7 @@ TEST $CLI volume heal $V0 disable
# Kill one brick and open a fd.
TEST kill_brick $V0 $H0 $B0/${V0}0
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
TEST fd1=`fd_available`
TEST fd_open $fd1 'rw' $M0/a
@@ -182,7 +210,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
# any of the bricks.
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
TEST fd_write $fd1 "data-10"
EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
@@ -193,7 +221,7 @@ TEST fd_close $fd1
# Kill one brick, open and take lock on a fd.
TEST kill_brick $V0 $H0 $B0/${V0}0
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
TEST fd1=`fd_available`
TEST fd_open $fd1 'rw' $M0/a
TEST flock -x $fd1
@@ -204,7 +232,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
# Kill & restart another brick so that it will return EBADFD
TEST kill_brick $V0 $H0 $B0/${V0}1
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" brick_up_status $V0 $H0 $B0/${V0}1
# Restart the bricks and then write. Now fd should not get re-opened since lock
# is still held on one brick and write should also fail as there is no quorum.
@@ -212,8 +240,8 @@ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
TEST ! fd_write $fd1 "data-11"
EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
index a12472b..20e03d8 100644
--- a/xlators/protocol/client/src/client-handshake.c
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -911,7 +911,7 @@ client_post_handshake(call_frame_t *frame, xlator_t *this)
list_for_each_entry_safe(fdctx, tmp, &conf->saved_fds, sfd_pos)
{
if (fdctx->remote_fd != -1 ||
- (!list_empty(&fdctx->lock_list) && conf->strict_locks))
+ (!fdctx_lock_lists_empty(fdctx) && conf->strict_locks))
continue;
fdctx->reopen_done = client_child_up_reopen_done;
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
index a80f303..b4a7294 100644
--- a/xlators/protocol/client/src/client-helpers.c
+++ b/xlators/protocol/client/src/client-helpers.c
@@ -15,6 +15,15 @@
#include <glusterfs/compat-errno.h>
#include <glusterfs/common-utils.h>
+gf_boolean_t
+fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx)
+{
+ if (list_empty(&fdctx->lock_list) && fd_lk_ctx_empty(fdctx->lk_ctx))
+ return _gf_true;
+
+ return _gf_false;
+}
+
int
client_fd_lk_list_empty(fd_lk_ctx_t *lk_ctx, gf_boolean_t try_lock)
{
@@ -441,7 +450,7 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd,
*remote_fd = fdctx->remote_fd;
}
- locks_involved = !list_empty(&fdctx->lock_list);
+ locks_involved = !fdctx_lock_lists_empty(fdctx);
}
}
pthread_spin_unlock(&conf->fd_lock);
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index 35a5340..6df2ed1 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -881,7 +881,7 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
pthread_spin_lock(&conf->fd_lock);
{
fdctx = this_fd_get_ctx(fd, this);
- if (fdctx && !list_empty(&fdctx->lock_list)) {
+ if (fdctx && !fdctx_lock_lists_empty(fdctx)) {
ret = -1;
op_errno = EBADFD;
}
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index f952aea..799fe6e 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -535,4 +535,7 @@ client_add_lock_for_recovery(fd_t *fd, struct gf_flock *flock,
int
client_is_setlk(int32_t cmd);
+gf_boolean_t
+fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx);
+
#endif /* !_CLIENT_H */
--
1.8.3.1

View File

@ -1,26 +0,0 @@
From f72780b560ea8efe1508aa9ddc574e6dc066bf9a Mon Sep 17 00:00:00 2001
From: Csaba Henk <chenk@redhat.com>
Date: Wed, 29 Sep 2021 10:44:37 +0200
Subject: [PATCH 587/610] Update rfc.sh to rhgs-3.5.6
Signed-off-by: Csaba Henk <chenk@redhat.com>
---
rfc.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/rfc.sh b/rfc.sh
index daeff32..67798cb 100755
--- a/rfc.sh
+++ b/rfc.sh
@@ -18,7 +18,7 @@ done
shift $((OPTIND-1))
-branch="rhgs-3.5.5";
+branch="rhgs-3.5.6";
set_hooks_commit_msg()
{
--
1.8.3.1

View File

@ -1,388 +0,0 @@
From e3813685237dbdf8dc7cf28726fff2caf2288706 Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <xhernandez@redhat.com>
Date: Mon, 19 Jul 2021 15:37:02 +0200
Subject: [PATCH 588/610] locks: Fix null gfid in lock contention notifications
This patch fixes 3 problems:
First problem:
After commit c0bd592e, the pl_inode_t object was also created in the
cbk of lookup requests. Lookup requests are a bit different than any
other request because the inode received may not be completely
initialized. In particular, inode->gfid may be null.
This caused that the gfid stored in the pl_inode_t object was null in
some cases. This gfid is used mostly for logs, but also to send lock
contention notifications. This meant that some notifications could be
sent with a null gfid, making impossible for the client xlator to
correctly identify the contending inode, so the lock was not released
immediately when eager-lock was also enabled.
Second problem:
The feature introduced by c0bd592e needed to track the number of
hardlinks of each inode to detect when it was deleted. However it
was done using the 'get-link-count' special xattr on lookup, while
posix only implements it for unlink and rename.
Also, the number of hardlinks was not incremented for mkdir, mknod,
rename, ..., so it didn't work correctly for directories.
Third problem:
When the last hardlink of an open file is deleted, all locks will be
denied with ESTALE error, but that's not correct. Access to the open
fd must succeed.
The first problem is fixed by avoiding creating pl_inode_t objects
during lookup. Second and third problems are fixed by completely
ignoring if the file has been deleted or not. Even if we grant a
lock on a non-existing file, the next operation done by the client
inside the lock will return the correct error, which should be enough.
Upstream patch:
> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2553
> Fixes: #2551
> Change-Id: Ic73e82f6b725b838c1600b6a128ea36a75f13253
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
BUG: 1962972
Change-Id: Ic73e82f6b725b838c1600b6a128ea36a75f13253
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279192
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/bugs/locks/issue-2551.t | 58 ++++++++++++++++++
xlators/features/locks/src/common.c | 31 +++-------
xlators/features/locks/src/locks.h | 2 -
xlators/features/locks/src/posix.c | 118 +++---------------------------------
4 files changed, 74 insertions(+), 135 deletions(-)
create mode 100644 tests/bugs/locks/issue-2551.t
diff --git a/tests/bugs/locks/issue-2551.t b/tests/bugs/locks/issue-2551.t
new file mode 100644
index 0000000..a32af02
--- /dev/null
+++ b/tests/bugs/locks/issue-2551.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function check_time() {
+ local max="${1}"
+ local start="$(date +"%s")"
+
+ shift
+
+ if "${@}"; then
+ if [[ $(($(date +"%s") - ${start})) -lt ${max} ]]; then
+ return 0
+ fi
+ fi
+
+ return 1
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/brick{0..2}
+TEST $CLI volume set $V0 disperse.eager-lock on
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 30
+TEST $CLI volume set $V0 features.locks-notify-contention on
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M0
+
+TEST mkdir $M0/dir
+TEST dd if=/dev/zero of=$M0/dir/test bs=4k count=1
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M1
+
+TEST dd if=/dev/zero of=$M0/dir/test bs=4k count=1 conv=notrunc
+TEST check_time 5 dd if=/dev/zero of=$M1/dir/test bs=4k count=1 conv=notrunc
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index cddbfa6..5403086 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -468,9 +468,7 @@ pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local)
pl_inode->check_mlock_info = _gf_true;
pl_inode->mlock_enforced = _gf_false;
- /* -2 means never looked up. -1 means something went wrong and link
- * tracking is disabled. */
- pl_inode->links = -2;
+ pl_inode->remove_running = 0;
ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode));
if (ret) {
@@ -1403,11 +1401,6 @@ pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
pthread_mutex_lock(&pl_inode->mutex);
- if (pl_inode->removed) {
- error = ESTALE;
- goto unlock;
- }
-
if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) {
error = -1;
/* We skip the unlock here because the caller must create a stub when
@@ -1420,7 +1413,6 @@ pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
pl_inode->is_locked = _gf_true;
pl_inode->remove_running++;
-unlock:
pthread_mutex_unlock(&pl_inode->mutex);
done:
@@ -1490,20 +1482,18 @@ pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error)
pthread_mutex_lock(&pl_inode->mutex);
- if (error == 0) {
- if (pl_inode->links >= 0) {
- pl_inode->links--;
- }
- if (pl_inode->links == 0) {
- pl_inode->removed = _gf_true;
- }
- }
-
pl_inode->remove_running--;
if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) {
pl_inode->is_locked = _gf_false;
+ /* At this point it's possible that the inode has been deleted, but
+ * there could be open fd's still referencing it, so we can't prevent
+ * pending locks from being granted. If the file has really been
+ * deleted, whatever the client does once the lock is granted will
+ * fail with the appropriate error, so we don't need to worry about
+ * it here. */
+
list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
{
__grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now,
@@ -1555,11 +1545,6 @@ pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock)
pl_dom_list_t *dom;
pl_inode_lock_t *ilock;
- /* If the inode has been deleted, we won't allow any lock. */
- if (pl_inode->removed) {
- return -ESTALE;
- }
-
/* We only synchronize with locks made for regular operations coming from
* the user. Locks done for internal purposes are hard to control and could
* lead to long delays or deadlocks quite easily. */
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 6666feb..2406dcd 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -202,10 +202,8 @@ struct __pl_inode {
int fop_wind_count;
pthread_cond_t check_fop_wind_count;
- int32_t links; /* Number of hard links the inode has. */
uint32_t remove_running; /* Number of remove operations running. */
gf_boolean_t is_locked; /* Regular locks will be blocked. */
- gf_boolean_t removed; /* The inode has been deleted. */
};
typedef struct __pl_inode pl_inode_t;
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 22ef5b8..d5effef 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -2975,104 +2975,24 @@ out:
return ret;
}
-static int32_t
-pl_request_link_count(dict_t **pxdata)
-{
- dict_t *xdata;
-
- xdata = *pxdata;
- if (xdata == NULL) {
- xdata = dict_new();
- if (xdata == NULL) {
- return ENOMEM;
- }
- } else {
- dict_ref(xdata);
- }
-
- if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) {
- dict_unref(xdata);
- return ENOMEM;
- }
-
- *pxdata = xdata;
-
- return 0;
-}
-
-static int32_t
-pl_check_link_count(dict_t *xdata)
-{
- int32_t count;
-
- /* In case we are unable to read the link count from xdata, we take a
- * conservative approach and return -2, which will prevent the inode from
- * being considered deleted. In fact it will cause link tracking for this
- * inode to be disabled completely to avoid races. */
-
- if (xdata == NULL) {
- return -2;
- }
-
- if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) {
- return -2;
- }
-
- return count;
-}
-
int32_t
pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
struct iatt *postparent)
{
- pl_inode_t *pl_inode;
-
- if (op_ret >= 0) {
- pl_inode = pl_inode_get(this, inode, NULL);
- if (pl_inode == NULL) {
- PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL,
- NULL);
- return 0;
- }
-
- pthread_mutex_lock(&pl_inode->mutex);
-
- /* We only update the link count if we previously didn't know it.
- * Doing it always can lead to races since lookup is not executed
- * atomically most of the times. */
- if (pl_inode->links == -2) {
- pl_inode->links = pl_check_link_count(xdata);
- if (buf->ia_type == IA_IFDIR) {
- /* Directories have at least 2 links. To avoid special handling
- * for directories, we simply decrement the value here to make
- * them equivalent to regular files. */
- pl_inode->links--;
- }
- }
-
- pthread_mutex_unlock(&pl_inode->mutex);
- }
-
PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata,
postparent);
+
return 0;
}
int32_t
pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int32_t error;
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
- error = pl_request_link_count(&xdata);
- if (error == 0) {
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
- STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
- dict_unref(xdata);
- } else {
- STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL);
- }
return 0;
}
@@ -3881,9 +3801,7 @@ unlock:
__dump_posixlks(pl_inode);
}
- gf_proc_dump_write("links", "%d", pl_inode->links);
gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running);
- gf_proc_dump_write("removed", "%u", pl_inode->removed);
}
pthread_mutex_unlock(&pl_inode->mutex);
@@ -4508,21 +4426,9 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, inode_t *inode, struct iatt *buf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- pl_inode_t *pl_inode = (pl_inode_t *)cookie;
-
- if (op_ret >= 0) {
- pthread_mutex_lock(&pl_inode->mutex);
-
- /* TODO: can happen pl_inode->links == 0 ? */
- if (pl_inode->links >= 0) {
- pl_inode->links++;
- }
-
- pthread_mutex_unlock(&pl_inode->mutex);
- }
-
PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf,
preparent, postparent, xdata);
+
return 0;
}
@@ -4530,18 +4436,10 @@ int
pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- pl_inode_t *pl_inode;
-
- pl_inode = pl_inode_get(this, oldloc->inode, NULL);
- if (pl_inode == NULL) {
- STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
- }
-
PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
- STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+
return 0;
}
--
1.8.3.1

View File

@ -1,63 +0,0 @@
From 0bb71e1492b1ad442758399eb8dcb5f087d77f12 Mon Sep 17 00:00:00 2001
From: Nikhil Ladha <nladha@redhat.com>
Date: Wed, 28 Apr 2021 02:14:27 +0530
Subject: [PATCH 589/610] extras: fix for postscript failure on logrotation of
snapd logs (#2310)
Issue:
On executing the logrotate command, the postscript runs as a separate process,
and when we do a grep for the snapd process it returns the PID of that
short-term process as well, and executing a kill on that throws the error.
To check a similar error could be seen if we replace the killall for bricks
log rotation with a for loop on PIDs.
Fix:
Use the killall command on the list of snapd processes instead of
using the kill command to individually kill them.
>Fixes: #2360
>Change-Id: I1ad6e3e4d74128706e71900d02e715635294ff72
>Signed-off-by: nik-redhat <nladha@redhat.com>
Upstream patch: https://github.com/gluster/glusterfs/pull/2310
BUG: 1668303
Change-Id: I59910fc3660e11e131b1aa813848c2e19cbffefd
Signed-off-by: nik-redhat <nladha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279533
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
extras/glusterfs-logrotate | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate
index 75f700e..2b9028b 100644
--- a/extras/glusterfs-logrotate
+++ b/extras/glusterfs-logrotate
@@ -45,3 +45,22 @@
compress
delaycompress
}
+
+# Rotate snapd log
+/var/log/glusterfs/snaps/*/*.log {
+ sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+ # 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
+ postrotate
+ /usr/bin/killall -HUP `pgrep -f "glusterfs.*snapd"` > /dev/null 2>&1 || true
+ endscript
+}
--
1.8.3.1

View File

@ -1,128 +0,0 @@
From 87138f86b8cb98d1c9d1a4c9a2393e7978d20b1d Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Tue, 5 Oct 2021 12:33:01 +0530
Subject: [PATCH 590/610] cluster/afr: Don't check for stale entry-index
Problem:
In every entry index heal there is a check to see if the
index is stale or not.
1. If a file is created when the brick is down this
will lead to an extra index lookup because the name is not stale.
2. If a file is deleted when the brick is down this will also lead to
and extra index lookup because the name is not stale.
3. If a file is created and deleted when the brick is down then the
index is stale and this will save entry-heal i.e. 2 entrylks and 2 lookups
Since 1, 2 happen significantly more than 3, this is a bad tradeoff.
Fix:
Let stale index be removed as part of normal entry heal detecting 'the
name is already deleted' code path.
> Upstream patch: https://github.com/gluster/glusterfs/pull/2612
> fixes: gluster#2611
> Change-Id: I29bcc07f2480877a83b30dbd7e2e5631a74df8e8
> Signed-off-by: Pranith Kumar K <pranith.karampuri@phonepe.com>
BUG: 1994593
Change-Id: I29bcc07f2480877a83b30dbd7e2e5631a74df8e8
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279606
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/afr/src/afr-self-heal-entry.c | 46 +++++++--------------------
1 file changed, 11 insertions(+), 35 deletions(-)
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index a17dd93..14b7417 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -933,37 +933,8 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
loc_t *parent, void *data)
{
int ret = 0;
- loc_t loc = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
afr_granular_esh_args_t *args = data;
- /* Look up the actual inode associated with entry. If the lookup returns
- * ESTALE or ENOENT, then it means we have a stale index. Remove it.
- * This is analogous to the check in afr_shd_index_heal() except that
- * here it is achieved through LOOKUP and in afr_shd_index_heal() through
- * a GETXATTR.
- */
-
- loc.inode = inode_new(args->xl->itable);
- loc.parent = inode_ref(args->heal_fd->inode);
- gf_uuid_copy(loc.pargfid, loc.parent->gfid);
- loc.name = entry->d_name;
-
- ret = syncop_lookup(args->xl, &loc, &iatt, NULL, NULL, NULL);
- if ((ret == -ENOENT) || (ret == -ESTALE)) {
- /* The name indices under the pgfid index dir are guaranteed
- * to be regular files. Hence the hardcoding.
- */
- afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
- ret = 0;
- goto out;
- }
- /* TBD: afr_shd_zero_xattrop? */
-
ret = afr_selfheal_entry_dirent(args->frame, args->xl, args->heal_fd,
entry->d_name, parent->inode, subvol,
_gf_false);
@@ -974,8 +945,6 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
if (ret == -1)
args->mismatch = _gf_true;
-out:
- loc_wipe(&loc);
return ret;
}
@@ -1050,7 +1019,9 @@ afr_selfheal_entry_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
local = frame->local;
gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
- "performing entry selfheal on %s", uuid_utoa(fd->inode->gfid));
+ "performing %s entry selfheal on %s",
+ (local->need_full_crawl ? "full" : "granular"),
+ uuid_utoa(fd->inode->gfid));
for (i = 0; i < priv->child_count; i++) {
/* Expunge */
@@ -1112,6 +1083,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd,
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
gf_boolean_t did_sh = _gf_true;
+ char *heal_type = "granular entry";
priv = this->private;
local = frame->local;
@@ -1194,11 +1166,15 @@ postop_unlock:
afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL,
postop_lock, NULL);
out:
- if (did_sh)
- afr_log_selfheal(fd->inode->gfid, this, ret, "entry", source, sources,
+ if (did_sh) {
+ if (local->need_full_crawl) {
+ heal_type = "full entry";
+ }
+ afr_log_selfheal(fd->inode->gfid, this, ret, heal_type, source, sources,
healed_sinks);
- else
+ } else {
ret = 1;
+ }
if (locked_replies)
afr_replies_wipe(locked_replies, priv->child_count);
--
1.8.3.1

View File

@ -1,44 +0,0 @@
From 19460ebc988795eeabaeb8e25d6eba9a3cf2864b Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Mon, 4 Oct 2021 12:44:21 +0530
Subject: [PATCH 591/610] afr: check for valid iatt
Problem:
If the entry being processed by afr_shd_anon_inode_cleaner() is no
longer present, gfid lookup fails with ENOENT on all bricks and iatt
will never be assigned, causing a crash due to null dereference.
Fix:
Add a null-check for iatt.
> Upstream patch: https://github.com/gluster/glusterfs/pull/2660
> Fixes: gluster#2659
> Change-Id: I6abfc8063677861ce9388ca4efdf491ec956dc74
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
BUG: 1995029
Change-Id: I6abfc8063677861ce9388ca4efdf491ec956dc74
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279529
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/afr/src/afr-self-heald.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 18aed93..bc720cf 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -870,7 +870,7 @@ afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
}
/*Inode is deleted from subvol*/
- if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
+ if (count == 1 || (iatt && iatt->ia_type != IA_IFDIR && multiple_links)) {
gf_msg(healer->this->name, GF_LOG_WARNING, 0,
AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
priv->anon_inode_name, entry->d_name, subvol->name);
--
1.8.3.1

View File

@ -1,119 +0,0 @@
From be3448ed5d9d59752cff4df8325ee67eb7d41531 Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <xhernandez@redhat.com>
Date: Mon, 19 Jul 2021 06:56:18 +0200
Subject: [PATCH 592/610] md-cache: fix integer signedness mismatch
md-cache uses a mechanism based on a generation number to detect
modifications made by other clients to the entries and invalidate
the cached data.
This generation number is a 32 bit integer. When it overflows,
special management is done to avoid problems. This overflow condition
is tracked with a single bit.
For many fops, when they are received, the overflow bit and the
current generation number are recorded in a single 64-bit value
which is used later in the cbk.
This is the problematic function:
uint64_t
__mdc_get_generation(xlator_t *this, struct md_cache *mdc)
{
uint64_t gen = 0, rollover;
struct mdc_conf *conf = NULL;
conf = this->private;
gen = GF_ATOMIC_INC(conf->generation);
if (gen == 0) {
gf_log("MDC", GF_LOG_NOTICE, "%p Reset 1", mdc);
mdc->gen_rollover = !mdc->gen_rollover;
gen = GF_ATOMIC_INC(conf->generation);
mdc->ia_time = 0;
mdc->generation = 0;
mdc->invalidation_time = gen - 1;
}
rollover = mdc->gen_rollover;
gen |= (rollover << 32);
return gen;
}
'conf->generation' is declared as an atomic signed 32-bit integer,
and 'gen' is an unsigned 64-bit value. When 'gen' is assigned from
a signed int, the sign bit is extended to fill the high 32 bits of
'gen'. If the counter has overflown the maximum signed positive
value, it will become negative (sign bit = 1).
In this case, when 'rollover' is later combined with 'gen', all the
high bits remain at '1'.
This value is used later in 'mdc_inode_iatt_set_validate' during
callback processing. The overflow condition and generation numbers
from when the operation was received are recovered this way:
rollover = incident_time >> 32;
incident_time = (incident_time & 0xffffffff);
('incident_time' is the saved value from '__mdc_get_generation').
So here rollover will be 0xffffffff, when it's expected to be 0
or 1 only. When this is compared later with the cached overflow
bit, it doesn't match, which prevents updating the cached info.
This is bad in general, but it's even worse when an entry is not
cached and 'rollover' is 0xffffffff the first time. When md-cache
doesn't have cached data it assumes it's everything 0. This causes
a mismatch, which sends an invalidation request to the kernel, but
since the 'rollover' doesn't match, the cached data is not updated.
So the next time the cached data is checked, it will also send an
invalidation to the kernel, indefinitely.
This patch fixes two things:
1. The 'generation' field is made unsigned to avoid sign extension.
2. Invalidation requests are only sent if we already had valid cached
data. Otherwise it doesn't make sense to send an invalidation.
Upstream patch:
> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2619
> Fixes: #2617
> Change-Id: Ie40e68288cf143e1bc1a40f46da98f51bb2d6864
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
BUG: 1904137
Change-Id: Ie40e68288cf143e1bc1a40f46da98f51bb2d6864
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/279188
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/performance/md-cache/src/md-cache.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
index bbbee3b..e0256d6 100644
--- a/xlators/performance/md-cache/src/md-cache.c
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -79,7 +79,7 @@ struct mdc_conf {
gf_boolean_t cache_statfs;
struct mdc_statfs_cache statfs_cache;
char *mdc_xattr_str;
- gf_atomic_int32_t generation;
+ gf_atomic_uint32_t generation;
};
struct mdc_local;
@@ -537,7 +537,7 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
(iatt->ia_mtime_nsec != mdc->md_mtime_nsec) ||
(iatt->ia_ctime != mdc->md_ctime) ||
(iatt->ia_ctime_nsec != mdc->md_ctime_nsec)) {
- if (conf->global_invalidation &&
+ if (conf->global_invalidation && mdc->valid &&
(!prebuf || (prebuf->ia_mtime != mdc->md_mtime) ||
(prebuf->ia_mtime_nsec != mdc->md_mtime_nsec) ||
(prebuf->ia_ctime != mdc->md_ctime) ||
--
1.8.3.1

View File

@ -1,58 +0,0 @@
From 76c9faf5c750428e5eb69462b82ee0c12cbdabc0 Mon Sep 17 00:00:00 2001
From: nik-redhat <nladha@redhat.com>
Date: Fri, 25 Sep 2020 18:39:51 +0530
Subject: [PATCH 593/610] dht: explicit null dereference
Added a null check for uuid_list_copy, to avoid
null dereference in strtok_r() in case of strdup()
failure.
CID: 1325612
CID: 1274223
>Updates: #1060
>Change-Id: I641a5068cd76d7b2ed92eccf39e7f97d6f7b2480
>Signed-off-by: nik-redhat <nladha@redhat.com>
Upstream link: https://review.gluster.org/c/glusterfs/+/25046
BUG: 1997447
Change-Id: I576b4ce610948bdb84eb30377a684c54df718bdc
Signed-off-by: nik-redhat <nladha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280063
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/dht/src/dht-common.c | 2 ++
xlators/cluster/dht/src/dht-shared.c | 2 ++
2 files changed, 4 insertions(+)
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index edfc6e7..e6a16ff 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -4296,6 +4296,8 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
index = conf->local_subvols_cnt;
uuid_list_copy = gf_strdup(uuid_list);
+ if (!uuid_list_copy)
+ goto unlock;
for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str;
uuid_str = next_uuid_str) {
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 58e3339..cca272a 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -567,6 +567,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag,
pattern_str = strtok_r(data, ",", &tmp_str);
while (pattern_str) {
dup_str = gf_strdup(pattern_str);
+ if (!dup_str)
+ goto out;
pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1);
if (!pattern_list) {
goto out;
--
1.8.3.1

View File

@ -1,52 +0,0 @@
From 663df92f9b4b9f35ae10f84487494829987e2f58 Mon Sep 17 00:00:00 2001
From: nik-redhat <nladha@redhat.com>
Date: Fri, 25 Sep 2020 17:56:19 +0530
Subject: [PATCH 594/610] glusterd: resource leaks
Issue:
iobref was not freed before exiting the function.
Fix:
Modified the code to free iobref before exiting.
CID: 1430107
>Updates: #1060
>Change-Id: I89351b3aa645792eb8dda6292d1e559057b02d8b
>Signed-off-by: nik-redhat <nladha@redhat.com>
Upstream link: https://review.gluster.org/c/glusterfs/+/25042
BUG: 1997447
Change-Id: Iea56afca015a7c0f15ab32f490ea27f5ea323a07
Signed-off-by: nik-redhat <nladha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280066
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-utils.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 6d40be5..c037933 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -6042,7 +6042,6 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path,
GF_ATOMIC_INC(conf->blockers);
ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0,
iobref, frame, NULL, 0, NULL, 0, NULL);
- return ret;
free_iobref:
iobref_unref(iobref);
@@ -6051,7 +6050,7 @@ maybe_free_iobuf:
iobuf_unref(iobuf);
}
err:
- return -1;
+ return ret;
}
extern size_t
--
1.8.3.1

View File

@ -1,51 +0,0 @@
From 025718f1734655c411475ea338cee1659d96763e Mon Sep 17 00:00:00 2001
From: nik-redhat <nladha@redhat.com>
Date: Thu, 3 Sep 2020 15:42:45 +0530
Subject: [PATCH 595/610] glusterd: use after free (coverity issue)
Issue:
dict_unref is called on the same dict again,
in the out label of the code, which causes the
use after free issue.
Fix:
Set the dict to NULL after unref, to avoid
use after free issue.
CID: 1430127
>Updates: #1060
>Change-Id: Ide9a5cbc5f496705c671e72b0260da6d4c06f16d
>Signed-off-by: nik-redhat <nladha@redhat.com>
Upstream link: https://review.gluster.org/c/glusterfs/+/24946
BUG: 1997447
Change-Id: Id1e58cd6226b9329ad49bd5b75ee96a3a5ec5ab7
Signed-off-by: nik-redhat <nladha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280067
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
index 386eed2..b0fa490 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
@@ -2039,8 +2039,9 @@ glusterd_update_snaps_synctask(void *opaque)
"Failed to remove snap %s", snap->snapname);
goto out;
}
- if (dict)
- dict_unref(dict);
+
+ dict_unref(dict);
+ dict = NULL;
}
snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix);
ret = dict_get_int32(peer_data, buf, &val);
--
1.8.3.1

View File

@ -1,43 +0,0 @@
From 099fcac6fecef6fc367d8fcae8442195f3f174db Mon Sep 17 00:00:00 2001
From: nik-redhat <nladha@redhat.com>
Date: Fri, 25 Sep 2020 18:19:39 +0530
Subject: [PATCH 596/610] locks: null dereference
Added a null check before executing the strtok_r()
to avoid null dereference in case of strdup() failure.
CID: 1407938
>Updates: #1060
>Change-Id: Iec6e72ae8cb54f6d0a287615c43756325b2026ec
>Signed-off-by: nik-redhat <nladha@redhat.com>
Upstream link: https://review.gluster.org/c/glusterfs/+/25045
BUG: 1997447
Change-Id: I47e6e2402badaf4103607b4164f19142a99a2f71
Signed-off-by: nik-redhat <nladha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280065
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/features/locks/src/posix.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index d5effef..03c4907 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -494,6 +494,9 @@ pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value,
char *save_ptr = NULL;
tmp_key = gf_strdup(key);
+ if (!tmp_key)
+ return -1;
+
strtok_r(tmp_key, ":", &save_ptr);
if (!*save_ptr) {
gf_msg(THIS->name, GF_LOG_ERROR, 0, EINVAL,
--
1.8.3.1

View File

@ -1,163 +0,0 @@
From 59c05230c0df58765e30553c66bbcc0c9965d362 Mon Sep 17 00:00:00 2001
From: nik-redhat <nladha@redhat.com>
Date: Tue, 11 Aug 2020 23:12:26 +0530
Subject: [PATCH 597/610] glusterd: memory deallocated twice
Issue:
If the the pointer tmptier is destroyed in the function
code it still it checks for the same in the out label.
And tries to destroy the same pointer again.
Fix:
So, instead of passing the ptr by value, if we
pass it by reference then, on making the ptr in the
function the value will persist, in the calling
function and next time when the gf_store_iter_destory()
is called it won't try to free the ptr again.
CID: 1430122
>Updates: #1060
>Change-Id: I019cea8e301c7cc87be792c03b58722fc96f04ef
>Signed-off-by: nik-redhat <nladha@redhat.com>
Upstream link: https://review.gluster.org/c/glusterfs/+/24855
BUG: 1997447
Change-Id: Ib403efd08d47a69d25f291ae61c9cbfcaaa05da8
Signed-off-by: nik-redhat <nladha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280076
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
libglusterfs/src/glusterfs/store.h | 2 +-
libglusterfs/src/store.c | 12 +++++++-----
xlators/mgmt/glusterd/src/glusterd-store.c | 16 ++++++++--------
3 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h
index 68a20ad..76af2df 100644
--- a/libglusterfs/src/glusterfs/store.h
+++ b/libglusterfs/src/glusterfs/store.h
@@ -93,7 +93,7 @@ int32_t
gf_store_iter_get_matching(gf_store_iter_t *iter, char *key, char **value);
int32_t
-gf_store_iter_destroy(gf_store_iter_t *iter);
+gf_store_iter_destroy(gf_store_iter_t **iter);
char *
gf_store_strerror(gf_store_op_errno_t op_errno);
diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
index 3af627a..e4931bf 100644
--- a/libglusterfs/src/store.c
+++ b/libglusterfs/src/store.c
@@ -606,23 +606,25 @@ out:
}
int32_t
-gf_store_iter_destroy(gf_store_iter_t *iter)
+gf_store_iter_destroy(gf_store_iter_t **iter)
{
int32_t ret = -1;
- if (!iter)
+ if (!(*iter))
return 0;
/* gf_store_iter_new will not return a valid iter object with iter->file
* being NULL*/
- ret = fclose(iter->file);
+ ret = fclose((*iter)->file);
if (ret)
gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
"Unable"
" to close file: %s, ret: %d",
- iter->filepath, ret);
+ (*iter)->filepath, ret);
+
+ GF_FREE(*iter);
+ *iter = NULL;
- GF_FREE(iter);
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index a8651d8..e027575 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -2576,7 +2576,7 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -2895,13 +2895,13 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(tmpiter)) {
+ if (gf_store_iter_destroy(&tmpiter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
}
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -3067,7 +3067,7 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -3379,7 +3379,7 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -3574,7 +3574,7 @@ glusterd_store_retrieve_options(xlator_t *this)
goto out;
ret = 0;
out:
- (void)gf_store_iter_destroy(iter);
+ (void)gf_store_iter_destroy(&iter);
gf_store_handle_destroy(shandle);
return ret;
}
@@ -4026,7 +4026,7 @@ glusterd_store_update_snap(glusterd_snap_t *snap)
ret = 0;
out:
- if (gf_store_iter_destroy(iter)) {
+ if (gf_store_iter_destroy(&iter)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
"Failed to destroy store iter");
ret = -1;
@@ -4774,7 +4774,7 @@ glusterd_store_retrieve_peers(xlator_t *this)
is_ok = _gf_true;
next:
- (void)gf_store_iter_destroy(iter);
+ (void)gf_store_iter_destroy(&iter);
if (!is_ok) {
gf_log(this->name, GF_LOG_WARNING,
--
1.8.3.1

View File

@ -1,51 +0,0 @@
From 84aaaded4e958a10c7492233c053e3c681f2d575 Mon Sep 17 00:00:00 2001
From: nik-redhat <nladha@redhat.com>
Date: Thu, 2 Jul 2020 18:10:32 +0530
Subject: [PATCH 598/610] glusterd: null dereference
Issue:
There has been either an explicit null
dereference or a dereference after null
check in some cases.
Fix:
Added the proper condition for null check
and fixed null derefencing.
CID: 1430106 : Dereference after null check
CID: 1430120 : Explicit null dereferenced
CID: 1430132 : Dereference after null check
CID: 1430134 : Dereference after null check
>Change-Id: I7e795cf9f7146a633097c26a766f16b159881fa3
>Updates: #1060
>Signed-off-by: nik-redhat <nladha@redhat.com>
Upstream link: https://review.gluster.org/c/glusterfs/+/24664
BUG: 1997447
Change-Id: I2b2632c93094d0e7b9fbd65a2ca2b0eaf6212d79
Signed-off-by: nik-redhat <nladha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280083
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-syncop.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
index 05c9e11..f1807cd 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -1797,7 +1797,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
pending_node = NULL;
ret = 0;
out:
- if (pending_node)
+ if (pending_node && pending_node->node)
glusterd_pending_node_put_rpc(pending_node);
if (rsp_dict)
--
1.8.3.1

View File

@ -1,59 +0,0 @@
From 4186f81596a481a5c0c5a707fc9b2358ee8f49f0 Mon Sep 17 00:00:00 2001
From: nik-redhat <nladha@redhat.com>
Date: Fri, 3 Jul 2020 17:18:33 +0530
Subject: [PATCH 599/610] afr: null dereference & nagative value
Added a check for NULL before dereferencing
the object as it may be NULL in few cases
inside the funtion. Also, added a check for
the negative value of gfid_idx.
CID: 1430140
CID: 1430145
>Change-Id: Ib7d23459b48bbc471dbcccab6d20572261882d11
>Updates: #1060
>Signed-off-by: nik-redhat <nladha@redhat.com>
Upstream link: https://review.gluster.org/c/glusterfs/+/24671
BUG: 1997447
Change-Id: I7e705a106d97001b67f5cde8589413c0c24ee507
Signed-off-by: nik-redhat <nladha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280085
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/afr/src/afr-self-heal-common.c | 2 +-
xlators/cluster/afr/src/afr-self-heal-name.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 0954d2c..cbd5117 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -140,7 +140,7 @@ heal:
}
}
out:
- if (gfid_idx && (*gfid_idx == -1) && (ret == 0)) {
+ if (gfid_idx && (*gfid_idx == -1) && (ret == 0) && local) {
ret = -afr_final_errno(local, priv);
}
loc_wipe(&loc);
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index 9ec2066..c5ab8d7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -353,7 +353,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode,
replies, gfid, locked_on, source, sources,
is_gfid_absent, &gfid_idx);
- if (ret)
+ if (ret || (gfid_idx < 0))
return ret;
ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname,
--
1.8.3.1

View File

@ -1,161 +0,0 @@
From 1cd16553d436fa703f5e18d71c35108d0e179e8b Mon Sep 17 00:00:00 2001
From: nik-redhat <nladha@redhat.com>
Date: Thu, 9 Apr 2020 11:36:34 +0530
Subject: [PATCH 600/610] dht xlator: integer handling issue
Issue: The ret value is passed to the function
instead of the proper errno value
Fix: Passing the errno generated to
the log function
CID: 1415824 : Improper use of negative value
CID: 1420205 : Improper use of negative value
>Change-Id: Iaa7407ebd03eda46a2c027695e6bf0f598b371b2
>Updates: #1060
>Signed-off-by: nik-redhat <nladha@redhat.com>
Upstream link: https://review.gluster.org/c/glusterfs/+/24314
BUG: 1997447
Change-Id: Ibb7f432dbcc9ffd8dff6be6f984a6705894d6bef
Signed-off-by: nik-redhat <nladha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280086
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/dht/src/dht-common.c | 12 ++++++++----
xlators/cluster/dht/src/dht-common.h | 2 +-
xlators/cluster/dht/src/dht-helper.c | 9 ++++++---
xlators/cluster/dht/src/dht-selfheal.c | 8 +++++---
4 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index e6a16ff..5eaaa1e 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -672,13 +672,14 @@ dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
if (local->need_xattr_heal && !heal_path) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret,
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
DHT_MSG_DIR_XATTR_HEAL_FAILED,
"xattr heal failed for "
"directory gfid is %s ",
gfid_local);
+ }
}
}
@@ -1205,7 +1206,7 @@ dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
to non hashed subvol
*/
int
-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno)
{
dht_local_t *copy_local = NULL;
call_frame_t *copy = NULL;
@@ -1217,6 +1218,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"No gfid exists for path %s "
"so healing xattr is not possible",
local->loc.path);
+ *op_errno = EIO;
goto out;
}
@@ -1230,6 +1232,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"Memory allocation failed "
"for path %s gfid %s ",
local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
DHT_STACK_DESTROY(copy);
} else {
copy_local->stbuf = local->stbuf;
@@ -1244,6 +1247,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"Synctask creation failed to heal xattr "
"for path %s gfid %s ",
local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
DHT_STACK_DESTROY(copy);
}
}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index b856c68..1cb1c0c 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -1493,7 +1493,7 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
dict_t *src, int *uret, int *uflag);
int
-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local);
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno);
int32_t
dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 4c3940a..d3444b3 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -2105,6 +2105,7 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
dht_local_t *local = NULL;
xlator_t *this = NULL;
int ret = -1;
+ int op_errno = 0;
local = heal_frame->local;
main_frame = local->main_frame;
@@ -2114,10 +2115,12 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
dht_set_fixed_dir_stat(&local->postparent);
if (local->need_xattr_heal) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret, DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
"xattr heal failed for directory %s ", local->loc.path);
+ }
}
DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf,
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 8af7301..2da9817 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -1471,6 +1471,7 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
{
int missing_dirs = 0;
int i = 0;
+ int op_errno = 0;
int ret = -1;
dht_local_t *local = NULL;
xlator_t *this = NULL;
@@ -1493,13 +1494,14 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
if (!__is_root_gfid(local->stbuf.ia_gfid)) {
if (local->need_xattr_heal) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret,
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
DHT_MSG_DIR_XATTR_HEAL_FAILED,
"%s:xattr heal failed for "
"directory (gfid = %s)",
local->loc.path, local->gfid);
+ }
} else {
if (!gf_uuid_is_null(local->gfid))
gf_uuid_copy(loc->gfid, local->gfid);
--
1.8.3.1

View File

@ -1,99 +0,0 @@
From 6d7049a19029331266f70f68d860bbccef01a35d Mon Sep 17 00:00:00 2001
From: Nikhil Ladha <nladha@redhat.com>
Date: Thu, 8 Jul 2021 11:26:54 +0530
Subject: [PATCH 601/610] coverity: resource leak (#2321)
Issue:
Variable `arg` is not freed before the function exits,
and leads to resource leak.
Fix:
Free the arg variable if the status of function call
`glusterd_compare_friend_volume` is
`GLUSTERD_VOL_COMP_UPDATE_REQ`, or if the `glusterd_launch_synctask`
fails to start the process.
And, added a check for return value on calling
`glusterd_launch_synctask` function and exit if the
thread creation fails.
CID: 1401716
>Updates: #1060
>Change-Id: I4abd621771f88853d8d01e9039cdee2f3d862c4f
>Signed-off-by: nik-redhat <nladha@redhat.com>
Upstream link: https://github.com/gluster/glusterfs/pull/2321
BUG: 1997447
Change-Id: Ida81dfcd58c5ef45d3ae036d6bd6b36dc6693538
Signed-off-by: nik-redhat <nladha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280090
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-utils.c | 10 +++++++---
xlators/mgmt/glusterd/src/glusterd-utils.h | 2 +-
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index c037933..cec9c20 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -5371,6 +5371,7 @@ glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status,
if (GLUSTERD_VOL_COMP_RJT == *status) {
ret = 0;
+ update = _gf_false;
goto out;
}
if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status) {
@@ -5385,11 +5386,12 @@ glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status,
* first brick to come up before attaching the subsequent bricks
* in case brick multiplexing is enabled
*/
- glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, arg);
+ ret = glusterd_launch_synctask(glusterd_import_friend_volumes_synctask,
+ arg);
}
out:
- if (ret && arg) {
+ if ((ret || !update) && arg) {
dict_unref(arg->peer_data);
dict_unref(arg->peer_ver_data);
GF_FREE(arg);
@@ -13115,7 +13117,7 @@ gd_default_synctask_cbk(int ret, call_frame_t *frame, void *opaque)
return ret;
}
-void
+int
glusterd_launch_synctask(synctask_fn_t fn, void *opaque)
{
xlator_t *this = NULL;
@@ -13131,6 +13133,8 @@ glusterd_launch_synctask(synctask_fn_t fn, void *opaque)
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SPAWN_SVCS_FAIL,
"Failed to spawn bricks"
" and other volume related services");
+
+ return ret;
}
/*
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 4541471..3f4f3b8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -681,7 +681,7 @@ int32_t
glusterd_take_lvm_snapshot(glusterd_brickinfo_t *brickinfo,
char *origin_brick_path);
-void
+int
glusterd_launch_synctask(synctask_fn_t fn, void *opaque);
int
--
1.8.3.1

View File

@ -1,87 +0,0 @@
From 2ff83650a5f05e3f06853df6d79d3b18f88dfb23 Mon Sep 17 00:00:00 2001
From: Nikhil Ladha <nladha@redhat.com>
Date: Thu, 6 May 2021 10:45:46 +0530
Subject: [PATCH 602/610] coverity: null dereference (#2395)
Fix:
Updated the code to make it more readable and fixed
the NULL dereferencing.
CID: 1234622
>Updates: #1060
>Change-Id: I05bd203bc46fe84be86398bd664a3485409c3bfe
>Signed-off-by: nik-redhat <nladha@redhat.com>
Upstream link: https://github.com/gluster/glusterfs/pull/2395
BUG: 1997447
Change-Id: If39cc85115de673a83b6c97137ea8d1f0f825245
Signed-off-by: nik-redhat <nladha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280093
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/dht/src/dht-lock.c | 32 +++++++++++++++-----------------
1 file changed, 15 insertions(+), 17 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c
index f9bac4f..6474dfa 100644
--- a/xlators/cluster/dht/src/dht-lock.c
+++ b/xlators/cluster/dht/src/dht-lock.c
@@ -914,37 +914,35 @@ dht_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
dht_local_t *local = NULL;
int lk_index = 0, call_cnt = 0;
char gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_ilock_wrap_t *my_layout;
local = frame->local;
lk_index = (long)cookie;
+ my_layout = &(local->lock[0].layout.my_layout);
+
if (op_ret == -1) {
- local->lock[0].layout.my_layout.op_ret = -1;
- local->lock[0].layout.my_layout.op_errno = op_errno;
-
- if (local && local->lock[0].layout.my_layout.locks[lk_index]) {
- uuid_utoa_r(local->lock[0]
- .layout.my_layout.locks[lk_index]
- ->loc.inode->gfid,
- gfid);
-
- gf_msg_debug(
- this->name, op_errno,
- "inodelk failed on gfid: %s "
- "subvolume: %s",
- gfid,
- local->lock[0].layout.my_layout.locks[lk_index]->xl->name);
+ my_layout->op_ret = -1;
+ my_layout->op_errno = op_errno;
+
+ if (my_layout->locks[lk_index]) {
+ uuid_utoa_r(my_layout->locks[lk_index]->loc.inode->gfid, gfid);
+
+ gf_msg_debug(this->name, op_errno,
+ "inodelk failed on gfid: %s "
+ "subvolume: %s",
+ gfid, my_layout->locks[lk_index]->xl->name);
}
goto out;
}
- local->lock[0].layout.my_layout.locks[lk_index]->locked = _gf_true;
+ my_layout->locks[lk_index]->locked = _gf_true;
out:
call_cnt = dht_frame_return(frame);
if (is_last_call(call_cnt)) {
- if (local->lock[0].layout.my_layout.op_ret < 0) {
+ if (my_layout->op_ret < 0) {
dht_inodelk_cleanup(frame);
return 0;
}
--
1.8.3.1

View File

@ -1,51 +0,0 @@
From 015e6cac71b0a0c330f1e4792f9d60214b191f45 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Thu, 7 Oct 2021 21:07:46 +0530
Subject: [PATCH 603/610] Coverity: Resource leak fix (CID: 1356547)
Issue:
In function gf_svc_readdirp() there is a chance that 'local' will be allocated
memory but not released in the failure path.
Fix:
Assign 'local' to 'frame->local' immediately after the successful allocation, so
it will be released by the existing failure path code itself.
> Upstream patch: https://github.com/gluster/glusterfs/pull/2362/
> Change-Id: I4474dc4d4be5432d169cb7d434728f211054997e
> Signed-off-by: karthik-us <ksubrahm@redhat.com>
> Updates: gluster#1060
BUG: 1997447
Change-Id: I4474dc4d4be5432d169cb7d434728f211054997e
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280100
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/features/snapview-client/src/snapview-client.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/xlators/features/snapview-client/src/snapview-client.c b/xlators/features/snapview-client/src/snapview-client.c
index 9c789ae..e97db89 100644
--- a/xlators/features/snapview-client/src/snapview-client.c
+++ b/xlators/features/snapview-client/src/snapview-client.c
@@ -2156,6 +2156,7 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
"failed to allocate local");
goto out;
}
+ frame->local = local;
/*
* This is mainly for samba shares (or windows clients). As part of
@@ -2184,7 +2185,6 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local->subvolume = subvolume;
local->fd = fd_ref(fd);
- frame->local = local;
STACK_WIND(frame, gf_svc_readdirp_cbk, subvolume, subvolume->fops->readdirp,
fd, size, off, xdata);
--
1.8.3.1

View File

@ -1,50 +0,0 @@
From dee1c932df22ee12fe4568b40e58a475309e62fd Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Thu, 7 Oct 2021 21:18:49 +0530
Subject: [PATCH 604/610] Coverity: Fix dereference before null check (CID:
1391415)
Problem:
In function gf_client_dump_inodes_to_dict() there is a null check for
a variable which is already dereferenced in the previous line. This
means that there could be a chance that this variable is null. But it
is not being validate for null before dereferencing it in the first
place.
Fix:
Added null check before dereferencing the variable at the first place.
> Upstream patch: https://github.com/gluster/glusterfs/pull/2369/
> Change-Id: I988b0e93542782353a8059e33db1522b6a5e55f8
> Signed-off-by: karthik-us <ksubrahm@redhat.com>
> Updates: gluster#1060
BUG: 1997447
Change-Id: I988b0e93542782353a8059e33db1522b6a5e55f8
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280103
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
libglusterfs/src/client_t.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c
index e875c8b..216900a 100644
--- a/libglusterfs/src/client_t.c
+++ b/libglusterfs/src/client_t.c
@@ -828,8 +828,9 @@ gf_client_dump_inodes_to_dict(xlator_t *this, dict_t *dict)
clienttable->cliententries[count].next_free)
continue;
client = clienttable->cliententries[count].client;
- if (!strcmp(client->bound_xl->name, this->name)) {
- if (client->bound_xl && client->bound_xl->itable) {
+ if (client->bound_xl &&
+ !strcmp(client->bound_xl->name, this->name)) {
+ if (client->bound_xl->itable) {
/* Presently every brick contains only
* one bound_xl for all connections.
* This will lead to duplicating of
--
1.8.3.1

View File

@ -1,53 +0,0 @@
From 25fc2530f7ee6d7267e2ccc1b75a47a3ae539dff Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Thu, 7 Oct 2021 21:29:27 +0530
Subject: [PATCH 605/610] Coverity: Fix copy into fixed size buffer (CID:
1325542)
Problem:
In __mnt3_fresh_lookup() mres->resolveloc.path is being copied into
a fixed size string mres->remainingdir, with strncpy without checking
the size of the source string. This could lead to string overflow.
Fix:
Copy only till the destination string length and check whether the
soruce string overflows. If so log an error message and return.
> Upstream patch: https://github.com/gluster/glusterfs/pull/2474/
> Change-Id: I26dd0653d2636c667ad4e356d12d3d51956c77c3
> Signed-off-by: karthik-us <ksubrahm@redhat.com>
> Updates: gluster#1060
BUG: 1997447
Change-Id: I26dd0653d2636c667ad4e356d12d3d51956c77c3
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280106
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/nfs/server/src/mount3.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c
index 734453c..3951b9e 100644
--- a/xlators/nfs/server/src/mount3.c
+++ b/xlators/nfs/server/src/mount3.c
@@ -1104,8 +1104,13 @@ __mnt3_fresh_lookup(mnt3_resolve_t *mres)
{
inode_unlink(mres->resolveloc.inode, mres->resolveloc.parent,
mres->resolveloc.name);
- strncpy(mres->remainingdir, mres->resolveloc.path,
- strlen(mres->resolveloc.path));
+ if (snprintf(mres->remainingdir, sizeof(mres->remainingdir), "%s",
+ mres->resolveloc.path) >= sizeof(mres->remainingdir)) {
+ gf_msg(GF_MNT, GF_LOG_ERROR, EFAULT, NFS_MSG_RESOLVE_INODE_FAIL,
+ "Failed to copy resolve path: %s", mres->resolveloc.path);
+ nfs_loc_wipe(&mres->resolveloc);
+ return -EFAULT;
+ }
nfs_loc_wipe(&mres->resolveloc);
return __mnt3_resolve_subdir(mres);
}
--
1.8.3.1

View File

@ -1,69 +0,0 @@
From a6ba95b73469ad81d8c5a27293f8d09cc26928a3 Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Fri, 18 Dec 2020 16:28:29 +0530
Subject: [PATCH 606/610] dht: handle DHT_SUBVOL_STATUS_KEY in dht_pt_getxattr
(#1934)
In non distribute volumes (plain replicate, ec), DHT uses pass-through
FOPs (dht_pt_getxattr) instead of the usual FOPS (dht_getxattr). The
pass through FOP was not handling the DHT_SUBVOL_STATUS_KEY virtual
xattr because of which geo-rep session was going into a faulty state.
Fixing it now.
> updates: #1925
> Change-Id: I766b5b5c047c954a9957ab78aca680eedef1ff1f
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Upstream patch: https://github.com/gluster/glusterfs/pull/1934
BUG: 2006205
Change-Id: I766b5b5c047c954a9957ab78aca680eedef1ff1f
Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280112
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/dht/src/dht-common.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 5eaaa1e..c8980e5 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -11584,9 +11584,33 @@ int
dht_pt_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *key, dict_t *xdata)
{
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_GETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (key &&
+ strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
+ dht_vgetxattr_subvol_status(frame, this, key);
+ return 0;
+ }
+
STACK_WIND(frame, dht_pt_getxattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
return 0;
+err:
+ DHT_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
static int
--
1.8.3.1

View File

@ -1,121 +0,0 @@
From 4b65ff0d1a3d70fcf3cfa8ab769135ae12f529d8 Mon Sep 17 00:00:00 2001
From: nik-redhat <nladha@redhat.com>
Date: Thu, 7 Oct 2021 22:02:32 +0530
Subject: [PATCH 607/610] SELinux: Fix boolean management
Remove %triggerun ganesha
This trigger shouldn't be needed to begin with since removing
selinux-policy-targeted means that the user is switching SELinux off, or
is is switching the policy (to "mls" or "minimum"). In either case the
current boolean setting is not going to be used any more. The last
option, removal of glusterfs-ganesha, is covered by '%postun ganesha'.
But more importantly, the trigger is called every time
selinux-policy-targeted is updated (which can be avoided).
%triggerun is executed after %triggerin -
https://docs.fedoraproject.org/en-US/packaging-guidelines/Scriptlets/#ordering
So when selinux-policy-targeted is updated, the new version is installed
first triggering `semanage boolean -m ganesha_use_fusefs --on`,
and then the old version is uninstalled triggering
`semanage boolean -m ganesha_use_fusefs --off`.
* use selinux_[un]set_booleans instead of "semanage boolean"
The macro pair properly manages SELinux stores and doesn't disable the
boolean in case it was enabled before ${name}-ganesha was installed.
* Only change booleans when the package is first installed or
uninstalled
Updating ${name}-ganesha would disable the boolean because %postun is
called after %post (same issue as with the triggers).
Signed-off-by: Vit Mojzis <vmojzis@redhat.com>
Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
Change-Id: Ibb926ffbe00c9f000bd740708c0a4b3435ee7871
PR: https://github.com/gluster/glusterfs/pull/2833
Issue: https://github.com/gluster/glusterfs/issues/2522
Resolves: rhbz#1973566
Resolves: rhbz#1975400
BUG: 1973566
Change-Id: Idef6cbd6bce35151518d6f76e5b74774e5756fc9
Signed-off-by: nik-redhat <nladha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280114
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Kaleb Keithley <kkeithle@redhat.com>
---
glusterfs.spec.in | 34 +++++++++++++++++++++-------------
1 file changed, 21 insertions(+), 13 deletions(-)
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 424f4ab..a9a83b1 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -954,7 +954,10 @@ exit 0
%if ( 0%{!?_without_server:1} )
%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
%post ganesha
-semanage boolean -m ganesha_use_fusefs --on
+# first install
+if [ $1 -eq 1 ]; then
+ %selinux_set_booleans ganesha_use_fusefs=1
+fi
exit 0
%endif
%endif
@@ -962,7 +965,9 @@ exit 0
%if ( 0%{!?_without_georeplication:1} )
%post geo-replication
%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
-%selinux_set_booleans %{selinuxbooleans}
+if [ $1 -eq 1 ]; then
+ %selinux_set_booleans %{selinuxbooleans}
+fi
%endif
if [ $1 -ge 1 ]; then
%systemd_postun_with_restart glusterd
@@ -1089,29 +1094,32 @@ exit 0
%if ( 0%{!?_without_server:1} )
%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
%postun ganesha
-semanage boolean -m ganesha_use_fusefs --off
+if [ $1 -eq 0 ]; then
+ # use the value of ganesha_use_fusefs from before glusterfs-ganesha was installed
+ %selinux_unset_booleans ganesha_use_fusefs=1
+fi
exit 0
%endif
%endif
-##-----------------------------------------------------------------------------
-## All %%trigger should be placed here and keep them sorted
-##
-%if ( 0%{!?_without_server:1} )
-%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
-%trigger ganesha -- selinux-policy-targeted
-semanage boolean -m ganesha_use_fusefs --on
+%if ( 0%{!?_without_georeplication:1} )
+%postun geo-replication
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+if [ $1 -eq 0 ]; then
+ %selinux_unset_booleans %{selinuxbooleans}
+fi
exit 0
%endif
%endif
##-----------------------------------------------------------------------------
-## All %%triggerun should be placed here and keep them sorted
+## All %%trigger should be placed here and keep them sorted
##
%if ( 0%{!?_without_server:1} )
%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
-%triggerun ganesha -- selinux-policy-targeted
-semanage boolean -m ganesha_use_fusefs --off
+# ensure ganesha_use_fusefs is on in case of policy mode switch (eg. mls->targeted)
+%triggerin ganesha -- selinux-policy-targeted
+semanage boolean -m ganesha_use_fusefs --on -S targeted
exit 0
%endif
%endif
--
1.8.3.1

View File

@ -1,143 +0,0 @@
From d806760f1d4c78a2519b01f1c2d07aba0c533755 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Fri, 28 Aug 2020 16:03:54 +0530
Subject: [PATCH 608/610] cluster/ec: Track heal statistics in shd
With this change we should be able to inspect number of heals
attempted and completed by each shd.
> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24926/
> fixes: #1453
> Change-Id: I10f5d86efcc0a8e4d648da808751d37725682c39
> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
BUG: 1853631
Change-Id: I10f5d86efcc0a8e4d648da808751d37725682c39
Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280208
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/ec/src/ec-heald.c | 49 ++++++++++++++++++++++++++++++++++++++-
xlators/cluster/ec/src/ec-types.h | 5 ++++
xlators/cluster/ec/src/ec.c | 6 +++++
3 files changed, 59 insertions(+), 1 deletion(-)
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index 4f4b6aa..cd4d3ad 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -152,15 +152,58 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
return ret;
}
+static gf_boolean_t
+ec_is_heal_completed(char *status)
+{
+ char *bad_pos = NULL;
+ char *zero_pos = NULL;
+
+ if (!status) {
+ return _gf_false;
+ }
+
+ /*Logic:
+ * Status will be of the form Good: <binary>, Bad: <binary>
+ * If heal completes, if we do strchr for '0' it should be present after
+ * 'Bad:' i.e. strRchr for ':'
+ * */
+
+ zero_pos = strchr(status, '0');
+ bad_pos = strrchr(status, ':');
+ if (!zero_pos || !bad_pos) {
+ /*malformed status*/
+ return _gf_false;
+ }
+
+ if (zero_pos > bad_pos) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
int
ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
gf_boolean_t full)
{
dict_t *xdata = NULL;
+ dict_t *dict = NULL;
uint32_t count;
int32_t ret;
+ char *heal_status = NULL;
+ ec_t *ec = healer->this->private;
+
+ GF_ATOMIC_INC(ec->stats.shd.attempted);
+ ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
+ &xdata);
+ if (ret == 0) {
+ if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
+ if (ec_is_heal_completed(heal_status)) {
+ GF_ATOMIC_INC(ec->stats.shd.completed);
+ }
+ }
+ }
- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, &xdata);
if (!full && (loc->inode->ia_type == IA_IFDIR)) {
/* If we have just healed a directory, it's possible that
* other index entries have appeared to be healed. */
@@ -179,6 +222,10 @@ ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
dict_unref(xdata);
}
+ if (dict) {
+ dict_unref(dict);
+ }
+
return ret;
}
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 700dc39..ef7a7fe 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -626,6 +626,11 @@ struct _ec_statistics {
requests. (Basically memory allocation
errors). */
} stripe_cache;
+ struct {
+ gf_atomic_t attempted; /*Number of heals attempted on
+ files/directories*/
+ gf_atomic_t completed; /*Number of heals complted on files/directories*/
+ } shd;
};
struct _ec {
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 047cdd8..24de9e8 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -649,6 +649,8 @@ ec_statistics_init(ec_t *ec)
GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.attempted, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.completed, 0);
}
int32_t
@@ -1445,6 +1447,10 @@ ec_dump_private(xlator_t *this)
GF_ATOMIC_GET(ec->stats.stripe_cache.allocs));
gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC,
GF_ATOMIC_GET(ec->stats.stripe_cache.errors));
+ gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.attempted));
+ gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.completed));
return 0;
}
--
1.8.3.1

View File

@ -1,43 +0,0 @@
From 89cdfb40264c12105a1b4990fa9b45290aa6cef0 Mon Sep 17 00:00:00 2001
From: Vinayakswami Hariharmath <vharihar@redhat.com>
Date: Fri, 8 Oct 2021 09:40:41 +0530
Subject: [PATCH 609/610] feature/shard: wrong dname results in dentry not
found error
Due to wrong dname passed to inode_unlink in
shard_evicted_inode_fsync_cbk() resulting in dentry not found
error.
This patch addresses the issue.
> upstream patch: https://github.com/gluster/glusterfs/pull/2475
> Fixes: #2470
> Change-Id: I6c479980ae3fa7ba558327055a9e5e5c2d2a850f
> Signed-off-by: Vinayakswami Hariharmath vharihar@redhat.com
BUG: 1911665
Change-Id: I96aa5f57303b69a08990de039ddeecad7e7ae6af
Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280202
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/features/shard/src/shard.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index b828ff9..882373f 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -950,7 +950,7 @@ shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
__shard_inode_ctx_get(shard_inode, this, &ctx);
if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) {
- shard_make_block_bname(ctx->block_num, shard_inode->gfid,
+ shard_make_block_bname(ctx->block_num, ctx->base_gfid,
block_bname, sizeof(block_bname));
inode_unlink(shard_inode, priv->dot_shard_inode, block_bname);
/* The following unref corresponds to the ref held by
--
1.8.3.1

View File

@ -1,51 +0,0 @@
From b3e86a66de224107f6760157a7cb692227e42954 Mon Sep 17 00:00:00 2001
From: Shwetha Acharya <sacharya@redhat.com>
Date: Mon, 30 Aug 2021 18:54:15 +0530
Subject: [PATCH 610/610] glusterfs.spec.in: remove condtionals from tar
dependency (#2734)
* glusterfs.spec.in: remove condtionals from tar dependency
The conditional on rhel minor version fails and tar is not
marked as required.
As there is not any universal macro to specify the
minor release, removing the conditionals above the
"Requires: tar" statement
with this change irrespective of rhel 8.3 and
above, tar will be marked required for geo-rep.
> Change-Id: Id1e3320a0b1a245fc9cd8c7acb09cc119fca18b8
> Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
Upstream patch: https://github.com/gluster/glusterfs/pull/2734
BUG: 1901468
Change-Id: Id1e3320a0b1a245fc9cd8c7acb09cc119fca18b8
Signed-off-by: Shwetha K Acharya <sacharya@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280116
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
Tested-by: RHGS Build Bot <nigelb@redhat.com>
---
glusterfs.spec.in | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index a9a83b1..8b6646f 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -521,9 +521,8 @@ Requires: python%{_pythonver}-gluster = %{version}-%{release}
Requires: rsync
Requires: util-linux
Requires: %{name}-libs%{?_isa} = %{version}-%{release}
-%if ( 0%{?rhel} && ( ( 0%{?rhel} == 8 && 0%{?rhel_minor_version} >= 3 ) || 0%{?rhel} >= 9 ) )
Requires: tar
-%endif
+
# required for setting selinux bools
%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
Requires(post): policycoreutils-python-utils
--
1.8.3.1

View File

@ -1,54 +0,0 @@
From 5ad4711f40c0e8ab7c196ac1c9025bf78b8b94e0 Mon Sep 17 00:00:00 2001
From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
Date: Thu, 18 Nov 2021 09:21:56 -0500
Subject: [PATCH 611/611] SELinux: Fix boolean management, again
When upgrading from a version of the package that does not include
the previous fix this means the flawed scriptlet is still executed,
undoing the setting of the boolean.
In order to work the boolean needs to be set in %posttrans. This is
a temporary change that can (or should) be removed in the next version
of RHGS, i.e. 3.5.7.
Issue: https://github.com/gluster/glusterfs/issues/2522
Resolves: rhbz#1973566
Resolves: rhbz#1975400
Label: DOWNSTREAM ONLY
BUG: 1973566
Change-Id: Ida39a3ee5e6b4b0d3255bfef95601890afd80709
Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/292189
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
glusterfs.spec.in | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 8b6646f..87176c9 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -1123,6 +1123,17 @@ exit 0
%endif
%endif
+%if ( 0%{!?_without_server:1} )
+%if ( ( 0%{?fedora} && 0%{?fedora} > 25 ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+# temporary fix to be removed in the next version (i.e. RHGS 3.5.7). This
+# is only needed when upgrading from the flawed versions (e.g. RHGS 3.5.5
+# and earlier.)
+%posttrans ganesha
+semanage boolean -m ganesha_use_fusefs --on -S targeted
+exit 0
+%endif
+%endif
+
##-----------------------------------------------------------------------------
## All %%files should be placed here and keep them grouped
##
--
1.8.3.1

Some files were not shown because too many files have changed in this diff Show More