autobuild v3.12.2-6

Resolves: bz#1491785 bz#1518710 bz#1523599 bz#1528733 bz#1550474
Resolves: bz#1550982 bz#1551186 bz#1552360 bz#1552414 bz#1552425
Resolves: bz#1554255 bz#1554905 bz#1555261 bz#1556895 bz#1557297
Resolves: bz#1559084 bz#1559788
Signed-off-by: Milind Changire <mchangir@redhat.com>
This commit is contained in:
Milind Changire 2018-03-26 06:38:12 -04:00
parent 44012ad580
commit 772c9f37aa
22 changed files with 7615 additions and 1 deletions

View File

@ -0,0 +1,55 @@
From ee1c4f7d1303c61725f73870f32afa1bc4f68854 Mon Sep 17 00:00:00 2001
From: Atin Mukherjee <amukherj@redhat.com>
Date: Thu, 4 Jan 2018 22:07:54 +0530
Subject: [PATCH 181/201] glusterd: get-state memory leak fix
>upstream mainline patch : https://review.gluster.org/#/c/19139/
Change-Id: Ic4fcf2087f295d3dade944efb8fd08f7e2d7d516
BUG: 1528733
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/132079
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-handler.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index cf280a7..81926a8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -5180,6 +5180,8 @@ glusterd_print_gsync_status_by_vol (FILE *fp, glusterd_volinfo_t *volinfo)
if (ret)
goto out;
out:
+ if (gsync_rsp_dict)
+ dict_unref (gsync_rsp_dict);
return ret;
}
@@ -5495,9 +5497,19 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)
if (odir[odirlen-1] != '/')
strcat (odir, "/");
- gf_asprintf (&ofilepath, "%s%s", odir, filename);
+ ret = gf_asprintf (&ofilepath, "%s%s", odir, filename);
+ if (ret < 0) {
+ GF_FREE (odir);
+ GF_FREE (filename);
+ gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to get the output path");
+ ret = -1;
+ goto out;
+ }
+ GF_FREE (odir);
+ GF_FREE (filename);
- ret = dict_set_str (dict, "ofilepath", ofilepath);
+ ret = dict_set_dynstr (dict, "ofilepath", ofilepath);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
GD_MSG_DICT_SET_FAILED, "Unable to set output path");
--
1.8.3.1

View File

@ -0,0 +1,141 @@
From 224af5bc6ea27a617f222cd83da871df85c6b7a1 Mon Sep 17 00:00:00 2001
From: Samikshan Bairagya <samikshan@gmail.com>
Date: Thu, 9 Nov 2017 15:15:37 +0530
Subject: [PATCH 182/201] glusterd: Fix coverity issues in glusterd-handler.c
Fixes get-state CLI related coverity issues 477, 511, 515, 523,
526 and 527 from the report at [1]
[1] https://download.gluster.org/pub/gluster/glusterfs/static-analysis/master/glusterfs-coverity/2017-10-30-9aa574a5/html/
>upstream mainline patch : https://review.gluster.org/#/c/18706/
Change-Id: Ieb6f64c9035b4d9338d9515de003d607b7a4e9bc
BUG: 1528733
Signed-off-by: Samikshan Bairagya <samikshan@gmail.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/132080
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-handler.c | 34 ++++++++++++++++++++--------
1 file changed, 24 insertions(+), 10 deletions(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 81926a8..16a3773 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -5426,6 +5426,7 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)
char *odir = NULL;
char *filename = NULL;
char *ofilepath = NULL;
+ char *tmp_str = NULL;
int count = 0;
int count_bkp = 0;
int odirlen = 0;
@@ -5435,6 +5436,7 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)
uint64_t memtotal = 0;
uint64_t memfree = 0;
int start_index = 0;
+ char id_str[64] = {0,};
char *vol_type_str = NULL;
char *hot_tier_type_str = NULL;
@@ -5453,12 +5455,14 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)
GF_VALIDATE_OR_GOTO (this->name, dict, out);
- ret = dict_get_str (dict, "odir", &odir);
+ ret = dict_get_str (dict, "odir", &tmp_str);
if (ret) {
gf_asprintf (&odir, "%s", "/var/run/gluster/");
gf_msg (this->name, GF_LOG_INFO, 0,
GD_MSG_DICT_GET_FAILED,
"Default output directory: %s", odir);
+ } else {
+ gf_asprintf (&odir, "%s", tmp_str);
}
dp = sys_opendir (odir);
@@ -5481,7 +5485,7 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)
goto out;
}
- ret = dict_get_str (dict, "filename", &filename);
+ ret = dict_get_str (dict, "filename", &tmp_str);
if (ret) {
now = time (NULL);
strftime (timestamp, sizeof (timestamp),
@@ -5491,6 +5495,8 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)
gf_msg (this->name, GF_LOG_INFO, 0,
GD_MSG_DICT_GET_FAILED,
"Default filename: %s", filename);
+ } else {
+ gf_asprintf (&filename, "%s", tmp_str);
}
odirlen = strlen (odir);
@@ -5509,7 +5515,7 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)
GF_FREE (odir);
GF_FREE (filename);
- ret = dict_set_dynstr (dict, "ofilepath", ofilepath);
+ ret = dict_set_str (dict, "ofilepath", ofilepath);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
GD_MSG_DICT_SET_FAILED, "Unable to set output path");
@@ -5564,7 +5570,9 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)
fprintf (fp, "[Global]\n");
- fprintf (fp, "MYUUID: %s\n", gf_strdup (uuid_utoa (priv->uuid)));
+ uuid_utoa_r (priv->uuid, id_str);
+ fprintf (fp, "MYUUID: %s\n", id_str);
+
fprintf (fp, "op-version: %d\n", priv->op_version);
fprintf (fp, "\n[Global options]\n");
@@ -5656,8 +5664,10 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)
}
fprintf (fp, "Volume%d.name: %s\n", ++count, volinfo->volname);
- fprintf (fp, "Volume%d.id: %s\n", count,
- gf_strdup (uuid_utoa (volinfo->volume_id)));
+
+ uuid_utoa_r (volinfo->volume_id, id_str);
+ fprintf (fp, "Volume%d.id: %s\n", count, id_str);
+
fprintf (fp, "Volume%d.type: %s\n", count, vol_type_str);
fprintf (fp, "Volume%d.transport_type: %s\n", count,
transport_type_str);
@@ -5777,8 +5787,11 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)
fprintf (fp, "Volume%d.snapd_svc.inited: %s\n", count,
volinfo->snapd.svc.inited ? "True" : "False");
- fprintf (fp, "Volume%d.rebalance.id: %s\n", count,
- gf_strdup (uuid_utoa (volinfo->rebal.rebalance_id)));
+ uuid_utoa_r (volinfo->rebal.rebalance_id, id_str);
+ char *rebal_data = gf_uint64_2human_readable (
+ volinfo->rebal.rebalance_data);
+
+ fprintf (fp, "Volume%d.rebalance.id: %s\n", count, id_str);
fprintf (fp, "Volume%d.rebalance.status: %s\n", count,
rebal_status_str);
fprintf (fp, "Volume%d.rebalance.failures: %"PRIu64"\n", count,
@@ -5789,11 +5802,12 @@ glusterd_get_state (rpcsvc_request_t *req, dict_t *dict)
volinfo->rebal.lookedup_files);
fprintf (fp, "Volume%d.rebalance.files: %"PRIu64"\n", count,
volinfo->rebal.rebalance_files);
- fprintf (fp, "Volume%d.rebalance.data: %s\n", count,
- gf_uint64_2human_readable (volinfo->rebal.rebalance_data));
+ fprintf (fp, "Volume%d.rebalance.data: %s\n", count, rebal_data);
fprintf (fp, "Volume%d.time_left: %"PRIu64"\n", count,
volinfo->rebal.time_left);
+ GF_FREE (rebal_data);
+
if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
ret = glusterd_volume_get_hot_tier_type_str (
volinfo, &hot_tier_type_str);
--
1.8.3.1

View File

@ -0,0 +1,144 @@
From 37897f0b72617e442e4799b35ebda94294218e05 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Wed, 28 Feb 2018 17:58:31 +0530
Subject: [PATCH 183/201] cluster/afr: Fix dict-leak in pre-op
At the time of pre-op, pre_op_xdata is populted with the xattrs we get from the
disk and at the time of post-op it gets over-written without unreffing the
previous value stored leading to a leak.
This is a regression we missed in
https://review.gluster.org/#/q/ba149bac92d169ae2256dbc75202dc9e5d06538e
>BUG: 1550078
>Change-Id: I0456f9ad6f77ce6248b747964a037193af3a3da7
>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
>Upstream: https://review.gluster.org/19647
BUG: 1552360
Change-Id: I0456f9ad6f77ce6248b747964a037193af3a3da7
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/131936
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/afr/src/afr-common.c | 16 ++++++++--------
xlators/cluster/afr/src/afr-transaction.c | 20 ++++++++++----------
xlators/cluster/afr/src/afr.h | 4 ++--
3 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 6e6f5fa..855e568 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1039,7 +1039,7 @@ afr_readables_fill (call_frame_t *frame, xlator_t *this, inode_t *inode,
xdata = replies[i].xdata;
ia_type = replies[i].poststat.ia_type;
} else {/* pre-op xattrop */
- xdata = local->transaction.pre_op_xdata[i];
+ xdata = local->transaction.changelog_xdata[i];
ia_type = inode->ia_type;
}
@@ -1757,13 +1757,13 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
GF_FREE (local->transaction.pre_op);
GF_FREE (local->transaction.pre_op_sources);
- if (local->transaction.pre_op_xdata) {
+ if (local->transaction.changelog_xdata) {
for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op_xdata[i])
+ if (!local->transaction.changelog_xdata[i])
continue;
- dict_unref (local->transaction.pre_op_xdata[i]);
+ dict_unref (local->transaction.changelog_xdata[i]);
}
- GF_FREE (local->transaction.pre_op_xdata);
+ GF_FREE (local->transaction.changelog_xdata);
}
GF_FREE (local->transaction.eager_lock);
@@ -5531,10 +5531,10 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (!local->transaction.pre_op)
goto out;
- local->transaction.pre_op_xdata =
- GF_CALLOC (sizeof (*local->transaction.pre_op_xdata),
+ local->transaction.changelog_xdata =
+ GF_CALLOC (sizeof (*local->transaction.changelog_xdata),
priv->child_count, gf_afr_mt_dict_t);
- if (!local->transaction.pre_op_xdata)
+ if (!local->transaction.changelog_xdata)
goto out;
if (priv->arbiter_count == 1) {
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 19740e1..97f9dd4 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -276,9 +276,9 @@ afr_compute_pre_op_sources (call_frame_t *frame, xlator_t *this)
matrix = ALLOC_MATRIX (priv->child_count, int);
for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op_xdata[i])
+ if (!local->transaction.changelog_xdata[i])
continue;
- xdata = local->transaction.pre_op_xdata[i];
+ xdata = local->transaction.changelog_xdata[i];
afr_selfheal_fill_matrix (this, matrix, i, idx, xdata);
}
@@ -295,13 +295,6 @@ afr_compute_pre_op_sources (call_frame_t *frame, xlator_t *this)
for (j = 0; j < priv->child_count; j++)
if (matrix[i][j] != 0)
local->transaction.pre_op_sources[j] = 0;
-
- /*We don't need the xattrs any more. */
- for (i = 0; i < priv->child_count; i++)
- if (local->transaction.pre_op_xdata[i]) {
- dict_unref (local->transaction.pre_op_xdata[i]);
- local->transaction.pre_op_xdata[i] = NULL;
- }
}
gf_boolean_t
@@ -1173,7 +1166,7 @@ afr_changelog_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if (xattr)
- local->transaction.pre_op_xdata[child_index] = dict_ref (xattr);
+ local->transaction.changelog_xdata[child_index] = dict_ref (xattr);
call_count = afr_frame_return (frame);
@@ -1605,6 +1598,13 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
local = frame->local;
priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.changelog_xdata[i]) {
+ dict_unref (local->transaction.changelog_xdata[i]);
+ local->transaction.changelog_xdata[i] = NULL;
+ }
+ }
+
ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume,
op, &xdata, &newloc_xdata);
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 96fefb1..c822221 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -748,8 +748,8 @@ typedef struct _afr_local {
unsigned char *pre_op;
- /* For arbiter configuration only. */
- dict_t **pre_op_xdata;
+ /* Changelog xattr dict for [f]xattrop*/
+ dict_t **changelog_xdata;
unsigned char *pre_op_sources;
/* @failed_subvols: subvolumes on which a pre-op or a
--
1.8.3.1

View File

@ -0,0 +1,65 @@
From 0b7fa3bdd3334c70d99d1a1b99c3e37d49fc66e3 Mon Sep 17 00:00:00 2001
From: Atin Mukherjee <amukherj@redhat.com>
Date: Mon, 12 Mar 2018 19:47:11 +0530
Subject: [PATCH 184/201] cli/glusterfsd: remove copyright information
There's no point of dumping upstream copyright information in --version.
Label: DOWNSTREAM ONLY
Change-Id: I3a10e30878698e1d53082936bbf22bca560a3896
BUG: 1550474
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/132445
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Milind Changire <mchangir@redhat.com>
---
cli/src/cli.c | 10 +---------
glusterfsd/src/glusterfsd.c | 10 +---------
2 files changed, 2 insertions(+), 18 deletions(-)
diff --git a/cli/src/cli.c b/cli/src/cli.c
index ce06366..52c1b67 100644
--- a/cli/src/cli.c
+++ b/cli/src/cli.c
@@ -66,15 +66,7 @@ extern int connected;
/* using argp for command line parsing */
const char *argp_program_version = "" \
- PACKAGE_NAME" "PACKAGE_VERSION \
- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION "\n" \
- "Copyright (c) 2006-2016 Red Hat, Inc. " \
- "<https://www.gluster.org/>\n" \
- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n" \
- "It is licensed to you under your choice of the GNU Lesser\n" \
- "General Public License, version 3 or any later version (LGPLv3\n" \
- "or later), or the GNU General Public License, version 2 (GPLv2),\n" \
- "in all cases as published by the Free Software Foundation.";
+ PACKAGE_NAME" "PACKAGE_VERSION;
const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
struct rpc_clnt *global_quotad_rpc;
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index eeffdc5..38b863c 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -87,15 +87,7 @@ static char gf_doc[] = "";
static char argp_doc[] = "--volfile-server=SERVER [MOUNT-POINT]\n" \
"--volfile=VOLFILE [MOUNT-POINT]";
const char *argp_program_version = "" \
- PACKAGE_NAME" "PACKAGE_VERSION \
- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION "\n" \
- "Copyright (c) 2006-2016 Red Hat, Inc. " \
- "<https://www.gluster.org/>\n" \
- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n" \
- "It is licensed to you under your choice of the GNU Lesser\n" \
- "General Public License, version 3 or any later version (LGPLv3\n" \
- "or later), or the GNU General Public License, version 2 (GPLv2),\n" \
- "in all cases as published by the Free Software Foundation.";
+ PACKAGE_NAME" "PACKAGE_VERSION;
const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
static error_t parse_opts (int32_t key, char *arg, struct argp_state *_state);
--
1.8.3.1

View File

@ -0,0 +1,51 @@
From 96146ccae552c648f33a19783fad824cf8101790 Mon Sep 17 00:00:00 2001
From: Milind Changire <mchangir@redhat.com>
Date: Tue, 13 Mar 2018 12:03:56 +0530
Subject: [PATCH 185/201] rpcsvc: correct event-thread scaling
Problem:
Auto thread count derived from the number of attachs and detachs
was reset to 1 when server_reconfigure() was called.
Solution:
Avoid auto-thread-count reset to 1.
mainline:
> BUG: 1547888
> Reviewed-on: https://review.gluster.org/19689
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
> Signed-off-by: Milind Changire <mchangir@redhat.com>
(cherry picked from commit 0c3d984287d91d3fe1ffeef297252d912c08a410)
Change-Id: Ic00e86adb81ba3c828e354a6ccb638209ae58b3e
BUG: 1554255
Signed-off-by: Milind Changire <mchangir@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/132509
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/protocol/server/src/server.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index 4627ea0..89fde39 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -978,10 +978,11 @@ do_rpc:
}
/*
- * Let the event subsystem know that we're auto-scaling, with an
- * initial count of one.
+ * Update:
+ * We don't need to reset auto_thread_count since it has been derived
+ * out of the total bricks attached. We can reconfigure event threads
+ * but not auto threads.
*/
- ((struct event_pool *)(this->ctx->event_pool))->auto_thread_count = 1;
GF_OPTION_RECONF ("event-threads", new_nthread, options, int32, out);
ret = server_check_event_threads (this, conf, new_nthread);
--
1.8.3.1

View File

@ -0,0 +1,39 @@
From 51b684d93e2a36dbf7cbded1e117994fddf2a6a9 Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Thu, 15 Mar 2018 12:56:02 +0530
Subject: [PATCH 186/201] cli: Remove upstream doc reference
...that is displayed while creating replica 2 volumes.
Label: DOWNSTREAM ONLY
Change-Id: I16b45c8ad3a33cdd2a464d84f51d006d8f568b23
BUG: 1554905
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/132744
Reviewed-by: Karthik Subrahmanya <ksubrahm@redhat.com>
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
cli/src/cli-cmd-parser.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 54bd57f..e071b7d 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -533,9 +533,8 @@ cli_cmd_volume_create_parse (struct cli_state *state, const char **words,
question = "Replica 2 volumes are prone"
" to split-brain. Use "
"Arbiter or Replica 3 to "
- "avoid this. See: "
- "http://docs.gluster.org/en/latest/Administrator%20Guide/Split%20brain%20and%20ways%20to%20deal%20with%20it/."
- "\nDo you still want to "
+ "avoid this.\n"
+ "Do you still want to "
"continue?\n";
answer = cli_cmd_get_confirmation (state,
question);
--
1.8.3.1

View File

@ -0,0 +1,55 @@
From c4ce2bb15a9df0a1b6a999612ccd053e67dc7083 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Mon, 19 Mar 2018 12:09:18 +0530
Subject: [PATCH 187/201] features/shard: Do list_del_init() while list memory
is valid
Problem:
shard_post_lookup_fsync_handler() goes over the list of inode-ctx that need to
be fsynced and in cbk it removes each of the inode-ctx from the list. When the
first member of list is removed it tries to modifies list head's memory with
the latest next/prev and when this happens, there is no guarantee that the
list-head which is from stack memory of shard_post_lookup_fsync_handler() is
valid.
Fix:
Do list_del_init() in the loop before winding fsync.
>BUG: 1557876
>Change-Id: If429d3634219e1a435bd0da0ed985c646c59c2ca
>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Upstream-patch: https://review.gluster.org/19737
BUG: 1556895
Change-Id: If429d3634219e1a435bd0da0ed985c646c59c2ca
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/133241
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/features/shard/src/shard.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index a661345..945458e 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -4521,7 +4521,6 @@ out:
if (op_ret == 0)
ctx->fsync_needed -= fsync_count;
GF_ASSERT (ctx->fsync_needed >= 0);
- list_del_init (&ctx->to_fsync_list);
if (ctx->fsync_needed != 0) {
list_add_tail (&ctx->to_fsync_list,
&base_ictx->to_fsync_list);
@@ -4596,6 +4595,7 @@ shard_post_lookup_fsync_handler (call_frame_t *frame, xlator_t *this)
anon_fd = NULL;
list_for_each_entry_safe (iter, tmp, &copy, to_fsync_list) {
+ list_del_init (&iter->to_fsync_list);
fsync_count = 0;
shard_inode_ctx_get_fsync_count (iter->inode, this,
&fsync_count);
--
1.8.3.1

View File

@ -0,0 +1,70 @@
From 60575494a3b1ef52ea6374d62654693b6ee0d9bd Mon Sep 17 00:00:00 2001
From: Sunny Kumar <sunkumar@redhat.com>
Date: Fri, 16 Mar 2018 17:11:09 +0530
Subject: [PATCH 188/201] georep : Pause/Resume of geo-replication with wrong
user
While performing pause/resume on geo-replication with wrong user
(other user then you setup), always returns success. Which further
leads to snapshot creation failure as it is detecting active
geo-replication session.
upstream patch : https://review.gluster.org/#/c/19658/
>BUG: 1550936
>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
Change-Id: I6e96e8dd3e861348b057475387f0093cb903ae88
BUG: 1557297
Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/132890
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
index 93d4516..dff8065 100644
--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
@@ -3641,6 +3641,18 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr)
if (path_list)
ret = -1;
}
+
+ /* Check for geo-rep session is active or not for
+ * configured user.*/
+ ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
+ if (ret) {
+ snprintf (errmsg, sizeof(errmsg),
+ "Geo-replication session between %s "
+ "and %s does not exist.",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
}
break;
@@ -3665,6 +3677,17 @@ glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr)
}
}
+ /* Check for geo-rep session is active or not
+ * for configured user.*/
+ ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
+ if (ret) {
+ snprintf (errmsg, sizeof(errmsg), "Geo-replication"
+ " session between %s and %s does not exist.",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
+
if (!is_force) {
ret = gd_pause_resume_validation (type, volinfo, slave,
statefile, op_errstr);
--
1.8.3.1

View File

@ -0,0 +1,73 @@
From 2be2ed1e0da026c4ae932daa263c1215d23342a9 Mon Sep 17 00:00:00 2001
From: Csaba Henk <csaba@redhat.com>
Date: Mon, 5 Mar 2018 13:02:09 +0100
Subject: [PATCH 189/201] fuse: enable proper "fgetattr"-like semantics
GETATTR FUSE message can carry a file handle
reference in which case it serves as a hint
for the FUSE server that the stat data is
preferably acquired in context of the given
filehandle (which we call '"fgetattr"-like
semantics').
So far FUSE ignored the GETTATTR provided
filehandle and grabbed a file handle
heuristically. This caused confusion in the
caching layers, which has been tracked down
as one of the reasons of referred BUG.
As of the BUG, this is just a partial fix.
> BUG: 1512691
> Change-Id: I67eebbf5407ca725ed111fbda4181ead10d03f6d
> Reviewed-on: https://review.gluster.org/19673
> Signed-off-by: Csaba Henk <csaba@redhat.com>
BUG: 1518710
Change-Id: I67eebbf5407ca725ed111fbda4181ead10d03f6d
Signed-off-by: Csaba Henk <csaba@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/133419
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/mount/fuse/src/fuse-bridge.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 03d26eb..3e31eca 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -905,7 +905,10 @@ fuse_getattr_resume (fuse_state_t *state)
}
if (!IA_ISDIR (state->loc.inode->ia_type)) {
- state->fd = fd_lookup (state->loc.inode, 0);
+ if (state->fd == NULL)
+ state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+ if (state->fd == NULL)
+ state->fd = fd_lookup (state->loc.inode, 0);
}
if (!state->fd) {
@@ -931,9 +934,18 @@ fuse_getattr_resume (fuse_state_t *state)
static void
fuse_getattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ struct fuse_getattr_in *fgi = msg;
+ fuse_private_t *priv = NULL;
+#endif
fuse_state_t *state;
GET_STATE (this, finh, state);
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ priv = this->private;
+ if (priv->proto_minor >= 9 && fgi->getattr_flags & FUSE_GETATTR_FH)
+ state->fd = fd_ref ((fd_t *)fgi->fh);
+#endif
fuse_resolve_inode_init (state, &state->resolve, state->finh->nodeid);
--
1.8.3.1

View File

@ -0,0 +1,157 @@
From 0f2adea7ae377ea2efbab388f3af7e2a048f5f68 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Wed, 17 Jan 2018 17:30:06 +0530
Subject: [PATCH 190/201] cluster/afr: Adding option to take full file lock
Problem:
In replica 3 volumes there is a possibilities of ending up in split
brain scenario, when multiple clients writing data on the same file
at non overlapping regions in parallel.
Scenario:
- Initially all the copies are good and all the clients gets the value
of data readables as all good.
- Client C0 performs write W1 which fails on brick B0 and succeeds on
other two bricks.
- C1 performs write W2 which fails on B1 and succeeds on other two bricks.
- C2 performs write W3 which fails on B2 and succeeds on other two bricks.
- All the 3 writes above happen in parallel and fall on different ranges
so afr takes granular locks and all the writes are performed in parallel.
Since each client had data-readables as good, it does not see
file going into split-brain in the in_flight_split_brain check, hence
performs the post-op marking the pending xattrs. Now all the bricks
are being blamed by each other, ending up in split-brain.
Fix:
Have an option to take either full lock or range lock on files while
doing data transactions, to prevent the possibility of ending up in
split brains. With this change, by default the files will take full
lock while doing IO. If you want to make use of the old range lock
change the value of "cluster.full-lock" to "no".
Upstream patch: https://review.gluster.org/#/c/19218/
> Change-Id: I7893fa33005328ed63daa2f7c35eeed7c5218962
> BUG: 1535438
> Signed-off-by: karthik-us <ksubrahm@redhat.com>
Change-Id: I4d8b1c90bfff8f597cf7f7e49a71f5f6eb19f986
BUG: 1552414
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/131966
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
libglusterfs/src/globals.h | 4 +++-
xlators/cluster/afr/src/afr-transaction.c | 2 +-
xlators/cluster/afr/src/afr.c | 8 ++++++++
xlators/cluster/afr/src/afr.h | 5 +++--
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 7 +++++++
5 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index 6bbe3e6..8fd3318 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -43,7 +43,7 @@
*/
#define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly
should not change */
-#define GD_OP_VERSION_MAX GD_OP_VERSION_3_13_1 /* MAX VERSION is the maximum
+#define GD_OP_VERSION_MAX GD_OP_VERSION_3_13_2 /* MAX VERSION is the maximum
count in VME table, should
keep changing with
introduction of newer
@@ -107,6 +107,8 @@
#define GD_OP_VERSION_3_13_1 31301 /* Op-version for GlusterFS 3.13.1 */
+#define GD_OP_VERSION_3_13_2 31302 /* Op-version for GlusterFS 3.13.2 */
+
#include "xlator.h"
/* THIS */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 97f9dd4..1c80c6b 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1991,7 +1991,7 @@ afr_set_transaction_flock (xlator_t *this, afr_local_t *local)
inodelk = afr_get_inodelk (int_lock, int_lock->domain);
priv = this->private;
- if (priv->arbiter_count &&
+ if ((priv->arbiter_count || priv->full_lock) &&
local->transaction.type == AFR_DATA_TRANSACTION) {
/*Lock entire file to avoid network split brains.*/
inodelk->flock.l_len = 0;
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index d3aee77..9493fbb 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -244,6 +244,7 @@ reconfigure (xlator_t *this, dict_t *options)
out);
GF_OPTION_RECONF ("locking-scheme", priv->locking_scheme, options, str,
out);
+ GF_OPTION_RECONF ("full-lock", priv->full_lock, options, bool, out);
GF_OPTION_RECONF ("use-compound-fops", priv->use_compound_fops,
options, bool,
out);
@@ -534,6 +535,7 @@ init (xlator_t *this)
GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out);
GF_OPTION_INIT ("locking-scheme", priv->locking_scheme, str, out);
+ GF_OPTION_INIT ("full-lock", priv->full_lock, bool, out);
GF_OPTION_INIT ("use-compound-fops", priv->use_compound_fops,
bool, out);
GF_OPTION_INIT ("granular-entry-heal", priv->esh_granular, bool, out);
@@ -1084,6 +1086,12 @@ struct volume_options options[] = {
"stop being compatible with afr-v1, which helps afr "
"be more granular while self-healing",
},
+ { .key = {"full-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .description = "If this option is disabled, then the IOs will take "
+ "range locks same as versions till 3.13.1."
+ },
{ .key = {"granular-entry-heal"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "no",
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index c822221..b6f5388 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -178,9 +178,10 @@ typedef struct _afr_private {
void *pump_private;
gf_boolean_t use_afr_in_pump;
char *locking_scheme;
- gf_boolean_t esh_granular;
+ gf_boolean_t full_lock;
+ gf_boolean_t esh_granular;
gf_boolean_t consistent_io;
- gf_boolean_t use_compound_fops;
+ gf_boolean_t use_compound_fops;
} afr_private_t;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index b603c7f..8d3407d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -1507,6 +1507,13 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.flags = OPT_FLAG_CLIENT_OPT
},
+ { .key = "cluster.full-lock",
+ .voltype = "cluster/replicate",
+ .type = NO_DOC,
+ .op_version = GD_OP_VERSION_3_13_2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
/* stripe xlator options */
{ .key = "cluster.stripe-block-size",
.voltype = "cluster/stripe",
--
1.8.3.1

View File

@ -0,0 +1,362 @@
From 9f670a342ffed3eee7cb91a67dcc2f2a27600983 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Fri, 23 Feb 2018 15:12:19 +0530
Subject: [PATCH 191/201] cluster/afr: Make afr_fsync a transaction
Upstream patch: https://review.gluster.org/#/c/19621/
Change-Id: I713401feb96393f668efb074f2d5b870d19e6fda
BUG: 1552425
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/131942
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
xlators/cluster/afr/src/afr-common.c | 163 ------------------------------
xlators/cluster/afr/src/afr-inode-write.c | 108 ++++++++++++++++++++
xlators/cluster/afr/src/afr-inode-write.h | 4 +
xlators/cluster/afr/src/afr.c | 2 +-
xlators/cluster/afr/src/afr.h | 4 +
5 files changed, 117 insertions(+), 164 deletions(-)
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 855e568..a790402 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3435,169 +3435,6 @@ out:
return 0;
}
-/* }}} */
-
-
-/* {{{ fsync */
-
-int
-afr_fsync_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
-}
-
-int
-afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = -1;
- int child_index = (long) cookie;
- int read_subvol = 0;
- call_stub_t *stub = NULL;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- local->replies[child_index].valid = 1;
- local->replies[child_index].op_ret = op_ret;
- local->replies[child_index].op_errno = op_errno;
- if (op_ret == 0) {
- if (prebuf)
- local->replies[child_index].prestat = *prebuf;
- if (postbuf)
- local->replies[child_index].poststat = *postbuf;
- if (xdata)
- local->replies[child_index].xdata =
- dict_ref (xdata);
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->op_ret = -1;
- local->op_errno = afr_final_errno (local, priv);
- read_subvol = afr_data_subvol_get (local->inode, this, NULL,
- local->readable, NULL, NULL);
- /* Pick a reply that is valid and readable, with a preference
- * given to read_subvol. */
- for (i = 0; i < priv->child_count; i++) {
- if (!local->replies[i].valid)
- continue;
- if (local->replies[i].op_ret != 0)
- continue;
- if (!local->readable[i])
- continue;
- local->op_ret = local->replies[i].op_ret;
- local->op_errno = local->replies[i].op_errno;
- local->cont.inode_wfop.prebuf =
- local->replies[i].prestat;
- local->cont.inode_wfop.postbuf =
- local->replies[i].poststat;
- if (local->replies[i].xdata) {
- if (local->xdata_rsp)
- dict_unref (local->xdata_rsp);
- local->xdata_rsp =
- dict_ref (local->replies[i].xdata);
- }
- if (i == read_subvol)
- break;
- }
-
- /* Make a stub out of the frame, and register it
- with the waking up post-op. When the call-stub resumes,
- we are guaranteed that there was no post-op pending
- (i.e changelogs were unset in the server). This is an
- essential "guarantee", that fsync() returns only after
- completely finishing EVERYTHING, including the delayed
- post-op. This guarantee is expected by FUSE graph switching
- for example.
- */
- stub = fop_fsync_cbk_stub (frame, afr_fsync_unwind_cbk,
- local->op_ret, local->op_errno,
- &local->cont.inode_wfop.prebuf,
- &local->cont.inode_wfop.postbuf,
- local->xdata_rsp);
- if (!stub) {
- AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
- return 0;
- }
-
- /* If no new unstable writes happened between the
- time we cleared the unstable write witness flag in afr_fsync
- and now, calling afr_delayed_changelog_wake_up() should
- wake up and skip over the fsync phase and go straight to
- afr_changelog_post_op_now()
- */
- afr_delayed_changelog_wake_resume (this, local->fd, stub);
- }
-
- return 0;
-}
-
-
-int
-afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
- dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = ENOMEM;
-
- priv = this->private;
-
- local = AFR_FRAME_INIT (frame, op_errno);
- if (!local)
- goto out;
-
- local->op = GF_FOP_FSYNC;
- if (!afr_is_consistent_io_possible (local, priv, &op_errno))
- goto out;
-
- local->fd = fd_ref (fd);
-
- if (afr_fd_has_witnessed_unstable_write (this, fd)) {
- /* don't care. we only wanted to CLEAR the bit */
- }
-
- local->inode = inode_ref (fd->inode);
-
- call_count = local->call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_fsync_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsync,
- fd, datasync, xdata);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-out:
- AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-/* }}} */
-
-/* {{{ fsync */
int
afr_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index f0231b7..0e50443 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -2539,3 +2539,111 @@ out:
AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
return 0;
}
+
+
+int
+afr_fsync_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ main_frame = afr_transaction_detach_fop_frame (frame);
+ if (!main_frame)
+ return 0;
+
+ AFR_STACK_UNWIND (fsync, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+
+ return 0;
+}
+
+
+int
+afr_fsync_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, NULL, xdata);
+}
+
+
+int
+afr_fsync_wind (call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE (frame, afr_fsync_wind_cbk, (void *)(long) subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsync,
+ local->fd, local->cont.fsync.datasync,
+ local->xdata_req);
+ return 0;
+}
+
+int
+afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int32_t op_errno = ENOMEM;
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT (transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref (xdata, NULL);
+ else
+ local->xdata_req = dict_new ();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->fd = fd_ref (fd);
+ ret = afr_set_inode_local (this, local, fd->inode);
+ if (ret)
+ goto out;
+
+ local->op = GF_FOP_FSYNC;
+ local->cont.fsync.datasync = datasync;
+
+ if (afr_fd_has_witnessed_unstable_write (this, fd)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
+
+ local->transaction.wind = afr_fsync_wind;
+ local->transaction.fop = __afr_txn_write_fop;
+ local->transaction.done = __afr_txn_write_done;
+ local->transaction.unwind = afr_fsync_unwind;
+
+ local->transaction.main_frame = frame;
+
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+
+ AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
index e174cc2..1e8bb5c 100644
--- a/xlators/cluster/afr/src/afr-inode-write.h
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -87,4 +87,8 @@ afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
int32_t
afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+int
+afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 9493fbb..aa48e76 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -664,7 +664,6 @@ struct xlator_fops fops = {
.lk = afr_lk,
.flush = afr_flush,
.statfs = afr_statfs,
- .fsync = afr_fsync,
.fsyncdir = afr_fsyncdir,
.inodelk = afr_inodelk,
.finodelk = afr_finodelk,
@@ -696,6 +695,7 @@ struct xlator_fops fops = {
.zerofill = afr_zerofill,
.xattrop = afr_xattrop,
.fxattrop = afr_fxattrop,
+ .fsync = afr_fsync,
/*inode open*/
.opendir = afr_opendir,
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index b6f5388..11278fb 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -725,6 +725,10 @@ typedef struct _afr_local {
gf_seek_what_t what;
} seek;
+ struct {
+ int32_t datasync;
+ } fsync;
+
} cont;
struct {
--
1.8.3.1

View File

@ -0,0 +1,631 @@
From 18093998f1e5178e707055904b32149c52acc774 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Fri, 2 Mar 2018 10:13:20 +0530
Subject: [PATCH 192/201] cluster/afr: Remove compound-fops usage in afr
We are not seeing much improvement with this change. So removing the
feature so that it doesn't need to be maintained anymore.
> Fixes: #414
Upstream-patch: https://review.gluster.org/19655
BUG: 1559788
Change-Id: Ic7969b151544daf2547bd262a9fa03f575626411
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/131943
Tested-by: RHGS Build Bot <nigelb@redhat.com>
---
tests/basic/afr/compounded-write-txns.t | 37 ----
xlators/cluster/afr/src/afr-common.c | 43 ----
xlators/cluster/afr/src/afr-transaction.c | 333 +-----------------------------
xlators/cluster/afr/src/afr-transaction.h | 4 +-
xlators/cluster/afr/src/afr.c | 10 +-
xlators/cluster/afr/src/afr.h | 13 --
6 files changed, 7 insertions(+), 433 deletions(-)
delete mode 100644 tests/basic/afr/compounded-write-txns.t
diff --git a/tests/basic/afr/compounded-write-txns.t b/tests/basic/afr/compounded-write-txns.t
deleted file mode 100644
index 7cecd87..0000000
--- a/tests/basic/afr/compounded-write-txns.t
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
-
-cleanup
-
-TEST glusterd
-TEST pidof glusterd
-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
-TEST $CLI volume set $V0 write-behind off
-TEST $CLI volume set $V0 client-io-threads off
-TEST $CLI volume start $V0
-TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
-
-# Create and generate data into a src file
-
-TEST `printf %1024s |tr " " "1" > /tmp/source`
-TEST `printf %1024s |tr " " "2" >> /tmp/source`
-
-TEST dd if=/tmp/source of=$M0/file bs=1024 count=2 2>/dev/null
-md5sum_file=$(md5sum $M0/file | awk '{print $1}')
-
-TEST $CLI volume set $V0 cluster.use-compound-fops on
-
-TEST dd if=$M0/file of=$M0/file-copy bs=1024 count=2 2>/dev/null
-
-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
-TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
-
-EXPECT "$md5sum_file" echo `md5sum $M0/file-copy | awk '{print $1}'`
-
-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
-TEST $CLI volume stop $V0
-TEST $CLI volume delete $V0
-
-TEST rm -f /tmp/source
-cleanup
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index a790402..04c8613 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -43,7 +43,6 @@
#include "afr-self-heal.h"
#include "afr-self-heald.h"
#include "afr-messages.h"
-#include "compound-fop-utils.h"
int32_t
afr_quorum_errno (afr_private_t *priv)
@@ -5246,7 +5245,6 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
local->need_full_crawl = _gf_false;
- local->compound = _gf_false;
INIT_LIST_HEAD (&local->healer);
return 0;
out:
@@ -5393,7 +5391,6 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (!local->pending)
goto out;
- local->compound = _gf_false;
INIT_LIST_HEAD (&local->transaction.eager_locked);
ret = 0;
@@ -6142,46 +6139,6 @@ afr_get_msg_id (char *op_type)
return -1;
}
-gf_boolean_t
-afr_can_compound_pre_op_and_op (afr_private_t *priv, glusterfs_fop_t fop)
-{
- if (priv->arbiter_count != 0)
- return _gf_false;
-
- if (!priv->use_compound_fops)
- return _gf_false;
-
- switch (fop) {
- case GF_FOP_WRITE:
- return _gf_true;
- default:
- return _gf_false;
- }
-}
-
-afr_compound_cbk_t
-afr_pack_fop_args (call_frame_t *frame, compound_args_t *args,
- glusterfs_fop_t fop, int index)
-{
- afr_local_t *local = frame->local;
-
- switch (fop) {
- case GF_FOP_WRITE:
- COMPOUND_PACK_ARGS (writev, GF_FOP_WRITE,
- args, index,
- local->fd, local->cont.writev.vector,
- local->cont.writev.count,
- local->cont.writev.offset,
- local->cont.writev.flags,
- local->cont.writev.iobref,
- local->xdata_req);
- return afr_pre_op_writev_cbk;
- default:
- break;
- }
- return NULL;
-}
-
int
afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *heal_frame,
void *opaque)
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 1c80c6b..02fb10a 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -17,7 +17,6 @@
#include "afr-transaction.h"
#include "afr-self-heal.h"
#include "afr-messages.h"
-#include "compound-fop-utils.h"
#include <signal.h>
@@ -38,10 +37,6 @@ afr_changelog_call_count (afr_transaction_type type,
unsigned char *failed_subvols,
unsigned int child_count);
int
-afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
- afr_changelog_resume_t changelog_resume,
- afr_xattrop_type_t op);
-int
afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
afr_changelog_resume_t changelog_resume,
afr_xattrop_type_t op);
@@ -847,12 +842,10 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
afr_private_t *priv = this->private;
afr_local_t *local = NULL;
dict_t *xattr = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
int i = 0;
int ret = 0;
int idx = 0;
int nothing_failed = 1;
- gf_boolean_t compounded_unlock = _gf_true;
gf_boolean_t need_undirty = _gf_false;
afr_handle_quorum (frame);
@@ -918,36 +911,8 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
goto out;
}
- if (local->compound && local->fd) {
- LOCK (&local->fd->lock);
- {
- fd_ctx = __afr_fd_ctx_get (local->fd, this);
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i] &&
- local->transaction.eager_lock[i]) {
- if (fd_ctx->lock_piggyback[i])
- compounded_unlock = _gf_false;
- else if (fd_ctx->lock_acquired[i])
- compounded_unlock = _gf_false;
- }
- if (compounded_unlock == _gf_false)
- break;
- }
- }
- UNLOCK (&local->fd->lock);
- }
-
- /* Do not compound if any brick got piggybacked lock as
- * unlock should not be done for that. */
- if (local->compound && compounded_unlock) {
- afr_post_op_unlock_do (frame, this, xattr,
- afr_changelog_post_op_done,
- AFR_TRANSACTION_POST_OP);
- } else {
- afr_changelog_do (frame, this, xattr,
- afr_changelog_post_op_done,
- AFR_TRANSACTION_POST_OP);
- }
+ afr_changelog_do (frame, this, xattr, afr_changelog_post_op_done,
+ AFR_TRANSACTION_POST_OP);
out:
if (xattr)
dict_unref (xattr);
@@ -1277,66 +1242,6 @@ out:
}
int
-afr_pre_op_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- void *data, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- call_frame_t *fop_frame = NULL;
- default_args_cbk_t *write_args_cbk = NULL;
- compound_args_cbk_t *args_cbk = data;
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- child_index = (long) cookie;
-
- if (local->pre_op_compat)
- afr_changelog_pre_op_update (frame, this);
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- afr_transaction_fop_failed (frame, this, child_index);
- }
-
- /* If the compound fop failed due to saved_frame_unwind(), then
- * protocol/client fails it even before args_cbk is allocated.
- * Handle that case by passing the op_ret, op_errno values explicitly.
- */
- if ((op_ret == -1) && (args_cbk == NULL)) {
- afr_inode_write_fill (frame, this, child_index, op_ret,
- op_errno, NULL, NULL, NULL);
- } else {
- write_args_cbk = &args_cbk->rsp_list[1];
- afr_inode_write_fill (frame, this, child_index,
- write_args_cbk->op_ret,
- write_args_cbk->op_errno,
- &write_args_cbk->prestat,
- &write_args_cbk->poststat,
- write_args_cbk->xdata);
- }
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- compound_args_cleanup (local->c_args);
- local->c_args = NULL;
- afr_process_post_writev (frame, this);
- if (!afr_txn_nothing_failed (frame, this)) {
- /* Don't unwind until post-op is complete */
- local->transaction.resume (frame, this);
- } else {
- /* frame change, place frame in post-op delay and unwind */
- fop_frame = afr_transaction_detach_fop_frame (frame);
- afr_writev_copy_outvars (frame, fop_frame);
- local->transaction.resume (frame, this);
- afr_writev_unwind (fop_frame, this);
- }
- }
- return 0;
-}
-
-int
afr_changelog_prepare (xlator_t *this, call_frame_t *frame, int *call_count,
afr_changelog_resume_t changelog_resume,
afr_xattrop_type_t op, dict_t **xdata,
@@ -1366,223 +1271,6 @@ afr_changelog_prepare (xlator_t *this, call_frame_t *frame, int *call_count,
}
int
-afr_pre_op_fop_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
- afr_changelog_resume_t changelog_resume,
- afr_xattrop_type_t op)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- dict_t *xdata = NULL;
- dict_t *newloc_xdata = NULL;
- compound_args_t *args = NULL;
- int i = 0, call_count = 0;
- afr_compound_cbk_t compound_cbk;
- int ret = 0;
- int op_errno = ENOMEM;
-
- local = frame->local;
- priv = this->private;
-
- /* If lock failed on all, just unlock and unwind */
- ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume,
- op, &xdata, &newloc_xdata);
-
- if (ret)
- return 0;
-
- local->call_count = call_count;
-
- afr_save_lk_owner (frame);
- frame->root->lk_owner =
- local->transaction.main_frame->root->lk_owner;
-
- args = compound_fop_alloc (2, GF_CFOP_XATTROP_WRITEV, NULL);
-
- if (!args)
- goto err;
-
- /* pack pre-op part */
- i = 0;
- COMPOUND_PACK_ARGS (fxattrop, GF_FOP_FXATTROP,
- args, i,
- local->fd, GF_XATTROP_ADD_ARRAY,
- xattr, xdata);
- i++;
- /* pack whatever fop needs to be packed
- * @compound_cbk holds the cbk that would need to be called
- */
- compound_cbk = afr_pack_fop_args (frame, args, local->op, i);
-
- local->c_args = args;
-
- for (i = 0; i < priv->child_count; i++) {
- /* Means lock did not succeed on this brick */
- if (!local->transaction.pre_op[i] ||
- local->transaction.failed_subvols[i])
- continue;
-
- STACK_WIND_COOKIE (frame, compound_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->compound,
- args,
- NULL);
- if (!--call_count)
- break;
- }
-
- if (xdata)
- dict_unref (xdata);
- if (newloc_xdata)
- dict_unref (newloc_xdata);
- return 0;
-err:
- local->internal_lock.lock_cbk = local->transaction.done;
- local->op_ret = -1;
- local->op_errno = op_errno;
-
- afr_restore_lk_owner (frame);
- afr_unlock (frame, this);
-
- if (xdata)
- dict_unref (xdata);
- if (newloc_xdata)
- dict_unref (newloc_xdata);
- return 0;
-}
-
-int
-afr_post_op_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- void *data, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- int call_count = -1;
- afr_internal_lock_t *int_lock = NULL;
- int32_t child_index = (long)cookie;
-
- local = frame->local;
- child_index = (long) cookie;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- afr_update_uninodelk (local, int_lock, child_index);
-
- LOCK (&frame->lock);
- {
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
-
- if (call_count == 0) {
- compound_args_cleanup (local->c_args);
- local->c_args = NULL;
- if (local->transaction.resume_stub) {
- call_resume (local->transaction.resume_stub);
- local->transaction.resume_stub = NULL;
- }
- gf_msg_trace (this->name, 0,
- "All internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- }
-
- return 0;
-}
-
-int
-afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
- afr_changelog_resume_t changelog_resume,
- afr_xattrop_type_t op)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- dict_t *xdata = NULL;
- dict_t *newloc_xdata = NULL;
- compound_args_t *args = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_inodelk_t *inodelk = NULL;
- int i = 0;
- int call_count = 0;
- struct gf_flock flock = {0,};
- int ret = 0;
-
- local = frame->local;
- priv = this->private;
- int_lock = &local->internal_lock;
-
- if (afr_is_inodelk_transaction(local)) {
- inodelk = afr_get_inodelk (int_lock, int_lock->domain);
-
- flock.l_start = inodelk->flock.l_start;
- flock.l_len = inodelk->flock.l_len;
- flock.l_type = F_UNLCK;
- }
-
- ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume,
- op, &xdata, &newloc_xdata);
-
- if (ret)
- return 0;
-
- int_lock->lk_call_count = call_count;
-
- int_lock->lock_cbk = local->transaction.done;
-
- args = compound_fop_alloc (2, GF_CFOP_XATTROP_UNLOCK, NULL);
-
- if (!args) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- afr_changelog_post_op_done (frame, this);
- goto out;
- }
-
- i = 0;
- COMPOUND_PACK_ARGS (fxattrop, GF_FOP_FXATTROP,
- args, i,
- local->fd, GF_XATTROP_ADD_ARRAY,
- xattr, xdata);
- i++;
-
- if (afr_is_inodelk_transaction(local)) {
- if (local->fd) {
- COMPOUND_PACK_ARGS (finodelk, GF_FOP_FINODELK,
- args, i,
- int_lock->domain, local->fd,
- F_SETLK, &flock, NULL);
- } else {
- COMPOUND_PACK_ARGS (inodelk, GF_FOP_INODELK,
- args, i,
- int_lock->domain, &local->loc,
- F_SETLK, &flock, NULL);
- }
- }
-
- local->c_args = args;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op[i] ||
- local->transaction.failed_subvols[i])
- continue;
- STACK_WIND_COOKIE (frame, afr_post_op_unlock_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->compound,
- args,
- NULL);
- if (!--call_count)
- break;
- }
-out:
- if (xdata)
- dict_unref (xdata);
- if (newloc_xdata)
- dict_unref (newloc_xdata);
- return 0;
-}
-
-int
afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
afr_changelog_resume_t changelog_resume,
afr_xattrop_type_t op)
@@ -1793,21 +1481,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
goto next;
}
- /* Till here we have already decided if pre-op needs to be done,
- * based on various criteria. The only thing that needs to be checked
- * now on is whether compound-fops is enabled or not.
- * If it is, then perform pre-op and fop together for writev op.
- */
- if (afr_can_compound_pre_op_and_op (priv, local->op)) {
- local->compound = _gf_true;
- afr_pre_op_fop_do (frame, this, xdata_req,
- afr_transaction_perform_fop,
- AFR_TRANSACTION_PRE_OP);
- } else {
- afr_changelog_do (frame, this, xdata_req,
- afr_transaction_perform_fop,
- AFR_TRANSACTION_PRE_OP);
- }
+ afr_changelog_do (frame, this, xdata_req, afr_transaction_perform_fop,
+ AFR_TRANSACTION_PRE_OP);
if (xdata_req)
dict_unref (xdata_req);
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index dd19e5b..d01e144 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -58,7 +58,5 @@ afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv,
inode_t *inode1, unsigned char *readable1,
inode_t *inode2, unsigned char *readable2);
int
-afr_pre_op_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- void *data, dict_t *xdata);
+afr_transaction_resume (call_frame_t *frame, xlator_t *this);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index aa48e76..9ed0481 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -245,9 +245,6 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("locking-scheme", priv->locking_scheme, options, str,
out);
GF_OPTION_RECONF ("full-lock", priv->full_lock, options, bool, out);
- GF_OPTION_RECONF ("use-compound-fops", priv->use_compound_fops,
- options, bool,
- out);
GF_OPTION_RECONF ("granular-entry-heal", priv->esh_granular, options,
bool, out);
@@ -536,8 +533,6 @@ init (xlator_t *this)
GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out);
GF_OPTION_INIT ("locking-scheme", priv->locking_scheme, str, out);
GF_OPTION_INIT ("full-lock", priv->full_lock, bool, out);
- GF_OPTION_INIT ("use-compound-fops", priv->use_compound_fops,
- bool, out);
GF_OPTION_INIT ("granular-entry-heal", priv->esh_granular, bool, out);
GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
@@ -1121,9 +1116,8 @@ struct volume_options options[] = {
{ .key = {"use-compound-fops"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "no",
- .description = "Use compound fops framework to modify afr "
- "transaction such that network roundtrips are "
- "reduced, thus improving the performance.",
+ .description = "this option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"
},
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 11278fb..a5b0d3b 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -44,10 +44,6 @@ typedef int (*afr_inode_refresh_cbk_t) (call_frame_t *frame, xlator_t *this, int
typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this);
-typedef int (*afr_compound_cbk_t) (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- void *data, dict_t *xdata);
-
#define AFR_COUNT(array,max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res;})
#define AFR_INTERSECT(dst,src1,src2,max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i];})
#define AFR_CMP(a1,a2,len) ({int __cmp = 0; int __i; for (__i = 0; __i < len; __i++) if (a1[__i] != a2[__i]) { __cmp = 1; break;} __cmp;})
@@ -181,7 +177,6 @@ typedef struct _afr_private {
gf_boolean_t full_lock;
gf_boolean_t esh_granular;
gf_boolean_t consistent_io;
- gf_boolean_t use_compound_fops;
} afr_private_t;
@@ -843,9 +838,7 @@ typedef struct _afr_local {
call_frame_t *heal_frame;
gf_boolean_t need_full_crawl;
- gf_boolean_t compound;
afr_fop_lock_state_t fop_lock_state;
- compound_args_t *c_args;
gf_boolean_t is_read_txn;
afr_inode_ctx_t *inode_ctx;
@@ -1252,12 +1245,6 @@ afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame);
void
afr_update_uninodelk (afr_local_t *local, afr_internal_lock_t *int_lock,
int32_t child_index);
-gf_boolean_t
-afr_can_compound_pre_op_and_op (afr_private_t *priv, glusterfs_fop_t fop);
-
-afr_compound_cbk_t
-afr_pack_fop_args (call_frame_t *frame, compound_args_t *args,
- glusterfs_fop_t fop, int index);
int
afr_is_inodelk_transaction(afr_local_t *local);
--
1.8.3.1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,226 @@
From 0ce89d9d2bb0b162ecd4dc47c663569815acdb7b Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Mon, 19 Mar 2018 15:12:14 +0530
Subject: [PATCH 195/201] storage/posix: Add active-fd-count option in gluster
Problem:
when dd happens on sharded replicate volume all the writes on shards happen
through anon-fd. When the writes don't come quick enough, old anon-fd closes
and new fd gets created to serve the new writes. open-fd-count is decremented
only after the fd is closed as part of fd_destroy(). So even when one fd is on
the way to be closed a new fd will be created and during this short period it
appears as though there are multiple fds opened on the file. AFR thinks another
application opened the same file and switches off eager-lock leading to
extra latency.
Fix:
Have a different option called active-fd whose life cycle starts at
fd_bind() and ends just before fd_destroy()
>BUG: 1557932
Upstream-patch: https://review.gluster.org/19740
BUG: 1491785
Change-Id: I2e221f6030feeedf29fbb3bd6554673b8a5b9c94
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/133659
Tested-by: RHGS Build Bot <nigelb@redhat.com>
---
libglusterfs/src/fd.c | 2 ++
libglusterfs/src/glusterfs.h | 1 +
libglusterfs/src/inode.c | 2 ++
libglusterfs/src/inode.h | 1 +
tests/volume.rc | 14 ++++++++-
xlators/storage/posix/src/posix-helpers.c | 52 ++++++++++++-------------------
xlators/storage/posix/src/posix.c | 12 +++++++
7 files changed, 51 insertions(+), 33 deletions(-)
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
index a824db7..45b0d32 100644
--- a/libglusterfs/src/fd.c
+++ b/libglusterfs/src/fd.c
@@ -557,6 +557,7 @@ fd_unref (fd_t *fd)
if (refcount == 0) {
if (!list_empty (&fd->inode_list)) {
list_del_init (&fd->inode_list);
+ fd->inode->active_fd_count--;
bound = _gf_true;
}
}
@@ -578,6 +579,7 @@ __fd_bind (fd_t *fd)
list_del_init (&fd->inode_list);
list_add (&fd->inode_list, &fd->inode->fd_list);
fd->inode->fd_count++;
+ fd->inode->active_fd_count++;
return fd;
}
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index c8835d9..5abfafa 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -164,6 +164,7 @@
#define GLUSTERFS_WRITE_IS_APPEND "glusterfs.write-is-append"
#define GLUSTERFS_WRITE_UPDATE_ATOMIC "glusterfs.write-update-atomic"
#define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count"
+#define GLUSTERFS_ACTIVE_FD_COUNT "glusterfs.open-active-fd-count"
#define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count"
#define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count"
#define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index b7b5ac6..ffba1bf 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -2344,6 +2344,8 @@ inode_dump (inode_t *inode, char *prefix)
gf_proc_dump_write("gfid", "%s", uuid_utoa (inode->gfid));
gf_proc_dump_write("nlookup", "%ld", inode->nlookup);
gf_proc_dump_write("fd-count", "%u", inode->fd_count);
+ gf_proc_dump_write("active-fd-count", "%u",
+ inode->active_fd_count);
gf_proc_dump_write("ref", "%u", inode->ref);
gf_proc_dump_write("ia_type", "%d", inode->ia_type);
if (inode->_ctx) {
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
index b82b6ba..7a87748 100644
--- a/libglusterfs/src/inode.h
+++ b/libglusterfs/src/inode.h
@@ -93,6 +93,7 @@ struct _inode {
gf_lock_t lock;
uint64_t nlookup;
uint32_t fd_count; /* Open fd count */
+ uint32_t active_fd_count; /* Active open fd count */
uint32_t ref; /* reference count on this inode */
ia_type_t ia_type; /* what kind of file */
struct list_head fd_list; /* list of open files on this inode */
diff --git a/tests/volume.rc b/tests/volume.rc
index a15c8e5..d57aa93 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -804,7 +804,19 @@ function get_fd_count {
local fname=$4
local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
local statedump=$(generate_brick_statedump $vol $host $brick)
- local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+ local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w fd-count | cut -f2 -d'=' | tail -1)
+ rm -f $statedump
+ echo $count
+}
+
+function get_active_fd_count {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ local fname=$4
+ local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
+ local statedump=$(generate_brick_statedump $vol $host $brick)
+ local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w active-fd-count | cut -f2 -d'=' | tail -1)
rm -f $statedump
echo $count
}
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index bc97206..ba1d8c3 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -388,27 +388,6 @@ _get_filler_inode (posix_xattr_filler_t *filler)
}
static int
-_posix_filler_get_openfd_count (posix_xattr_filler_t *filler, char *key)
-{
- inode_t *inode = NULL;
- int ret = -1;
-
- inode = _get_filler_inode (filler);
- if (!inode || gf_uuid_is_null (inode->gfid))
- goto out;
-
- ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
- if (ret < 0) {
- gf_msg (filler->this->name, GF_LOG_WARNING, 0,
- P_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value for %s", key);
- goto out;
- }
-out:
- return ret;
-}
-
-static int
_posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
void *xattrargs)
{
@@ -416,11 +395,11 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
int ret = -1;
char *databuf = NULL;
int _fd = -1;
- loc_t *loc = NULL;
ssize_t req_size = 0;
int32_t list_offset = 0;
ssize_t remaining_size = 0;
char *xattr = NULL;
+ inode_t *inode = NULL;
if (posix_xattr_ignorable (key))
goto out;
@@ -496,16 +475,25 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
GF_FREE (databuf);
}
} else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
- ret = _posix_filler_get_openfd_count (filler, key);
- loc = filler->loc;
- if (loc) {
- ret = dict_set_uint32 (filler->xattr, key,
- loc->inode->fd_count);
- if (ret < 0)
- gf_msg (filler->this->name, GF_LOG_WARNING, 0,
- P_MSG_XDATA_GETXATTR,
- "Failed to set dictionary value for %s",
- key);
+ inode = _get_filler_inode (filler);
+ if (!inode || gf_uuid_is_null (inode->gfid))
+ goto out;
+ ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
+ if (ret < 0) {
+ gf_msg (filler->this->name, GF_LOG_WARNING, 0,
+ P_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value for %s", key);
+ }
+ } else if (!strcmp (key, GLUSTERFS_ACTIVE_FD_COUNT)) {
+ inode = _get_filler_inode (filler);
+ if (!inode || gf_uuid_is_null (inode->gfid))
+ goto out;
+ ret = dict_set_uint32 (filler->xattr, key,
+ inode->active_fd_count);
+ if (ret < 0) {
+ gf_msg (filler->this->name, GF_LOG_WARNING, 0,
+ P_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value for %s", key);
}
} else if (!strcmp (key, GET_ANCESTRY_PATH_KEY)) {
/* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index a412e6d..6856e5e 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -3554,6 +3554,18 @@ _fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
}
}
+ if (dict_get (xdata, GLUSTERFS_ACTIVE_FD_COUNT)) {
+ ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT,
+ fd->inode->active_fd_count);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_DICT_SET_FAILED, "%s: Failed to set "
+ "dictionary value for %s",
+ uuid_utoa (fd->inode->gfid),
+ GLUSTERFS_ACTIVE_FD_COUNT);
+ }
+ }
+
if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) {
ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND,
is_append);
--
1.8.3.1

View File

@ -0,0 +1,83 @@
From 5b32d4317968581f217c82e893822852d63eeae8 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Mon, 19 Mar 2018 15:26:40 +0530
Subject: [PATCH 196/201] cluster/afr: Switch to active-fd-count for open-fd
checks
>BUG: 1557932
Upstream-patch: https://review.gluster.org/19741
BUG: 1491785
Change-Id: I3783e41b3812267bc10c0d05d062a31396ce135b
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/133660
Tested-by: RHGS Build Bot <nigelb@redhat.com>
---
tests/basic/afr/afr-no-fsync.t | 20 ++++++++++++++++++++
xlators/cluster/afr/src/afr-inode-write.c | 16 ++++++++--------
2 files changed, 28 insertions(+), 8 deletions(-)
create mode 100644 tests/basic/afr/afr-no-fsync.t
diff --git a/tests/basic/afr/afr-no-fsync.t b/tests/basic/afr/afr-no-fsync.t
new file mode 100644
index 0000000..0966d9b
--- /dev/null
+++ b/tests/basic/afr/afr-no-fsync.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+#Tests that sequential write workload doesn't lead to FSYNCs
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST dd if=/dev/zero of=$M0/a bs=1M count=500
+TEST ! "$CLI volume profile $V0 info incremental | grep FSYNC"
+
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index b52b6ca..9e6ba35 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -314,10 +314,10 @@ afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
if (ret || !write_is_append)
local->append_write = _gf_false;
- ret = dict_get_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
- &open_fd_count);
- if (ret == -1)
- goto unlock;
+ ret = dict_get_uint32 (xdata, GLUSTERFS_ACTIVE_FD_COUNT,
+ &open_fd_count);
+ if (ret < 0)
+ goto unlock;
if (open_fd_count > local->open_fd_count) {
local->open_fd_count = open_fd_count;
local->update_open_fd_count = _gf_true;
@@ -529,10 +529,10 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (ret)
goto out;
- if (dict_set_uint32 (local->xdata_req, GLUSTERFS_OPEN_FD_COUNT, 4)) {
- op_errno = ENOMEM;
- goto out;
- }
+ if (dict_set_uint32 (local->xdata_req, GLUSTERFS_ACTIVE_FD_COUNT, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
if (dict_set_uint32 (local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
op_errno = ENOMEM;
--
1.8.3.1

View File

@ -0,0 +1,72 @@
From 80810495522f2e0f484c2b593d66318e34bfabb9 Mon Sep 17 00:00:00 2001
From: Jiffin Tony Thottan <jthottan@redhat.com>
Date: Wed, 14 Mar 2018 12:01:30 +0530
Subject: [PATCH 197/201] glusterd/ganesha : create/remove export file only
from the node which performs ganesha.enable
As part of volume set ganesha.enable on the ganesha export configuration file will be created/removed
using "create-export-ganesha.sh". This performed from the nodes which are part of ganesha cluster.
But it is not need since the file is saved in shared storage and consumed by the nodes in the ganesha cluster.
Label: BACKPORT FROM UPSTREAM 3.10
Upstream Reference :
>patch link : https://review.gluster.org/#/c/19716/
>Change-Id: I2583899972b47d451a90711940293004a3af4690
>BUG: 1555195
>Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
Change-Id: I2583899972b47d451a90711940293004a3af4690
BUG: 1551186
Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/133592
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +-
xlators/mgmt/glusterd/src/glusterd-op-sm.c | 2 +-
xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
index 7ba25ee..b130d5e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c
+++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
@@ -543,7 +543,7 @@ ganesha_manage_export (dict_t *dict, char *value,
* Create the export file from the node where ganesha.enable "on"
* is executed
* */
- if (option) {
+ if (option && is_origin_glusterd (dict)) {
ret = manage_export_config (volname, "on", op_errstr);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 0a21e02..a02a0b3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -2364,7 +2364,7 @@ glusterd_op_reset_volume (dict_t *dict, char **op_rspstr)
}
if (!strcmp(key, "ganesha.enable") || !strcmp (key, "all")) {
- if (glusterd_check_ganesha_export (volinfo)) {
+ if (glusterd_check_ganesha_export (volinfo) && is_origin_glusterd (dict)) {
ret = manage_export_config (volname, "off", op_rspstr);
if (ret)
gf_msg (this->name, GF_LOG_WARNING, 0,
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 414f9ba..e34d58a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -2858,7 +2858,7 @@ glusterd_op_delete_volume (dict_t *dict)
volname);
goto out;
}
- if (glusterd_check_ganesha_export (volinfo)) {
+ if (glusterd_check_ganesha_export (volinfo) && is_origin_glusterd (dict)) {
ret = manage_export_config (volname, "off", NULL);
if (ret)
gf_msg (this->name, GF_LOG_WARNING, 0, 0,
--
1.8.3.1

View File

@ -0,0 +1,81 @@
From 8217d00a0a54457961e7ec7d3afb24e953923c7d Mon Sep 17 00:00:00 2001
From: Ashish Pandey <aspandey@redhat.com>
Date: Tue, 13 Mar 2018 14:03:20 +0530
Subject: [PATCH 198/201] cluster/ec: Change default read policy to gfid-hash
Problem:
Whenever we read data from file over NFS, NFS reads
more data then requested and caches it. Based on the
stat information it makes sure that the cached/pre-read
data is valid or not.
Consider 4 + 2 EC volume and all the bricks are on
differnt nodes.
In EC, with round-robin read policy, reads are sent on
different set of data bricks. This way, it balances the
read fops to go on all the bricks and avoid heating UP
(overloading) same set of bricks.
Due to small difference in clock speed, it is possible
that we get minor difference for atime, mtime or ctime
for different bricks. That might cause a different stat
returned to NFS based on which NFS will discard
cached/pre-read data which is actually not changed and
could be used.
Solution:
Change read policy for EC as gfid-hash. That will force
all the read to go to same set of bricks.
>Change-Id: I825441cc519e94bf3dc3aa0bd4cb7c6ae6392c84
>BUG: 1554743
>Signed-off-by: Ashish Pandey <aspandey@redhat.com>
upstream patch: https://review.gluster.org/#/c/19703/
Change-Id: I43e95717980ca52c228fdcb7863c58bd4d14151c
BUG: 1559084
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/133746
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/basic/ec/ec-read-policy.t | 7 +++----
xlators/cluster/ec/src/ec.c | 2 +-
2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/tests/basic/ec/ec-read-policy.t b/tests/basic/ec/ec-read-policy.t
index e4390aa..fe6fe65 100644
--- a/tests/basic/ec/ec-read-policy.t
+++ b/tests/basic/ec/ec-read-policy.t
@@ -20,10 +20,9 @@ TEST $CLI volume start $V0
TEST glusterfs --direct-io-mode=yes --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
#TEST volume operations work fine
-EXPECT "round-robin" mount_get_option_value $M0 $V0-disperse-0 read-policy
-TEST $CLI volume set $V0 disperse.read-policy gfid-hash
-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "gfid-hash" mount_get_option_value $M0 $V0-disperse-0 read-policy
-TEST $CLI volume reset $V0 disperse.read-policy
+
+EXPECT "gfid-hash" mount_get_option_value $M0 $V0-disperse-0 read-policy
+TEST $CLI volume set $V0 disperse.read-policy round-robin
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "round-robin" mount_get_option_value $M0 $V0-disperse-0 read-policy
#TEST if the option gives the intended behavior. The way we perform this test
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 13ce7fb..bfdca64 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -1447,7 +1447,7 @@ struct volume_options options[] =
{ .key = {"read-policy" },
.type = GF_OPTION_TYPE_STR,
.value = {"round-robin", "gfid-hash"},
- .default_value = "round-robin",
+ .default_value = "gfid-hash",
.description = "inode-read fops happen only on 'k' number of bricks in"
" n=k+m disperse subvolume. 'round-robin' selects the read"
" subvolume using round-robin algo. 'gfid-hash' selects read"
--
1.8.3.1

View File

@ -0,0 +1,383 @@
From 09698d53b91786c990a0f7bc067e5c13551b0b12 Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <jahernan@redhat.com>
Date: Wed, 21 Feb 2018 17:47:37 +0100
Subject: [PATCH 199/201] cluster/ec: avoid delays in self-heal
Self-heal creates a thread per brick to sweep the index looking for
files that need to be healed. These threads are started before the
volume comes online, so nothing is done but waiting for the next
sweep. This happens once per minute.
When a replace brick command is executed, the new graph is loaded and
all index sweeper threads started. When all bricks have reported, a
getxattr request is sent to the root directory of the volume. This
causes a heal on it (because the new brick doesn't have good data),
and marks its contents as pending to be healed. This is done by the
index sweeper thread on the next round, one minute later.
This patch solves this problem by waking all index sweeper threads
after a successful check on the root directory.
Additionally, the index sweep thread scans the index directory
sequentially, but it might happen that after healing a directory entry
more index entries are created but skipped by the current directory
scan. This causes the remaining entries to be processed on the next
round, one minute later. The same can happen in the next round, so
the heal is running in bursts and taking a lot to finish, specially
on volumes with many directory levels.
This patch solves this problem by immediately restarting the index
sweep if a directory has been healed.
> Upstream patch: https://review.gluster.org/19718
> master patch: https://review.gluster.org/#/c/19609/
Change-Id: I58d9ab6ef17b30f704dc322e1d3d53b904e5f30e
BUG: 1555261
Signed-off-by: Xavi Hernandez <jahernan@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/133570
Reviewed-by: Ashish Pandey <aspandey@redhat.com>
Tested-by: Ashish Pandey <aspandey@redhat.com>
Tested-by: RHGS Build Bot <nigelb@redhat.com>
---
tests/bugs/ec/bug-1547662.t | 41 ++++++++++++++++
xlators/cluster/ec/src/ec-heal.c | 9 ++++
xlators/cluster/ec/src/ec-heald.c | 27 +++++++---
xlators/cluster/ec/src/ec-heald.h | 4 +-
xlators/cluster/ec/src/ec.c | 101 ++++++++++++++++++++++----------------
5 files changed, 134 insertions(+), 48 deletions(-)
create mode 100644 tests/bugs/ec/bug-1547662.t
diff --git a/tests/bugs/ec/bug-1547662.t b/tests/bugs/ec/bug-1547662.t
new file mode 100644
index 0000000..5748218
--- /dev/null
+++ b/tests/bugs/ec/bug-1547662.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# Immediately after replace-brick, trusted.ec.version will be absent, so if it
+# is present we can assume that heal was started on root
+function root_heal_attempted {
+ if [ -z $(get_hex_xattr trusted.ec.version $1) ]; then
+ echo "N"
+ else
+ echo "Y"
+ fi
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST ${CLI} volume create ${V0} disperse 6 redundancy 2 ${H0}:${B0}/${V0}{0..5}
+TEST ${CLI} volume start ${V0}
+TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
+EXPECT_WITHIN ${CHILD_UP_TIMEOUT} "6" ec_child_up_count ${V0} 0
+
+TEST mkdir ${M0}/base
+TEST mkdir ${M0}/base/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}
+
+TEST ${CLI} volume replace-brick ${V0} ${H0}:${B0}/${V0}5 ${H0}:${B0}/${V0}6 commit force
+EXPECT_WITHIN ${CHILD_UP_TIMEOUT} "6" ec_child_up_count ${V0} 0
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "Y" glustershd_up_status
+EXPECT_WITHIN ${CHILD_UP_TIMEOUT} "6" ec_child_up_count_shd ${V0} 0
+EXPECT_WITHIN ${HEAL_TIMEOUT} "Y" root_heal_attempted ${B0}/${V0}6
+EXPECT_WITHIN ${HEAL_TIMEOUT} "^0$" get_pending_heal_count ${V0}
+EXPECT "^127$" echo $(find ${B0}/${V0}6/base -type d | wc -l)
+
+cleanup;
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index b8518d6..8e02986 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -25,6 +25,7 @@
#include "ec-combine.h"
#include "ec-method.h"
#include "ec-fops.h"
+#include "ec-heald.h"
#define alloca0(size) ({void *__ptr; __ptr = alloca(size); memset(__ptr, 0, size); __ptr; })
#define EC_COUNT(array, max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res; })
@@ -2752,6 +2753,14 @@ ec_replace_heal (ec_t *ec, inode_t *inode)
gf_msg_debug (ec->xl->name, 0,
"Heal failed for replace brick ret = %d", ret);
+ /* Once the root inode has been checked, it might have triggered a
+ * self-heal on it after a replace brick command or for some other
+ * reason. It can also happen that the volume already had damaged
+ * files in the index, even if the heal on the root directory failed.
+ * In both cases we need to wake all index healers to continue
+ * healing remaining entries that are marked as dirty. */
+ ec_shd_index_healer_wake(ec);
+
loc_wipe (&loc);
return ret;
}
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index b4fa6f8..a703379 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -184,8 +184,19 @@ ec_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name)
int
ec_shd_selfheal (struct subvol_healer *healer, int child, loc_t *loc)
{
- return syncop_getxattr (healer->this, loc, NULL, EC_XATTR_HEAL, NULL,
- NULL);
+ int32_t ret;
+
+ ret = syncop_getxattr (healer->this, loc, NULL, EC_XATTR_HEAL, NULL,
+ NULL);
+ if ((ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) {
+ /* If we have just healed a directory, it's possible that
+ * other index entries have appeared to be healed. We put a
+ * mark so that we can check it later and restart a scan
+ * without delay. */
+ healer->rerun = _gf_true;
+ }
+
+ return ret;
}
@@ -472,11 +483,15 @@ ec_shd_index_healer_spawn (xlator_t *this, int subvol)
}
void
-ec_selfheal_childup (ec_t *ec, int child)
+ec_shd_index_healer_wake(ec_t *ec)
{
- if (!ec->shd.iamshd)
- return;
- ec_shd_index_healer_spawn (ec->xl, child);
+ int32_t i;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (((ec->xl_up >> i) & 1) != 0) {
+ ec_shd_index_healer_spawn(ec->xl, i);
+ }
+ }
}
int
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
index 4ae02e2..2a84881 100644
--- a/xlators/cluster/ec/src/ec-heald.h
+++ b/xlators/cluster/ec/src/ec-heald.h
@@ -20,6 +20,8 @@ ec_xl_op (xlator_t *this, dict_t *input, dict_t *output);
int
ec_selfheal_daemon_init (xlator_t *this);
-void ec_selfheal_childup (ec_t *ec, int child);
+
+void
+ec_shd_index_healer_wake(ec_t *ec);
#endif /* __EC_HEALD_H__ */
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index bfdca64..956b45b 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -322,7 +322,7 @@ ec_get_event_from_state (ec_t *ec)
/* If ec is up but some subvolumes are yet to notify, give
* grace time for other subvols to notify to prevent start of
* I/O which may result in self-heals */
- if (ec->timer && ec->xl_notify_count < ec->nodes)
+ if (ec->xl_notify_count < ec->nodes)
return GF_EVENT_MAXVAL;
return GF_EVENT_CHILD_UP;
@@ -344,8 +344,8 @@ ec_up (xlator_t *this, ec_t *ec)
}
ec->up = 1;
- gf_msg (this->name, GF_LOG_INFO, 0,
- EC_MSG_EC_UP, "Going UP");
+ gf_msg (this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP");
+
gf_event (EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
}
@@ -358,8 +358,8 @@ ec_down (xlator_t *this, ec_t *ec)
}
ec->up = 0;
- gf_msg (this->name, GF_LOG_INFO, 0,
- EC_MSG_EC_DOWN, "Going DOWN");
+ gf_msg (this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN");
+
gf_event (EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
}
@@ -383,31 +383,38 @@ ec_notify_cbk (void *data)
gf_timer_call_cancel (ec->xl->ctx, ec->timer);
ec->timer = NULL;
+ /* The timeout has expired, so any subvolume that has not
+ * already reported its state, will be considered to be down.
+ * We mark as if all bricks had reported. */
+ ec->xl_notify = (1ULL << ec->nodes) - 1ULL;
+ ec->xl_notify_count = ec->nodes;
+
+ /* Since we have marked all subvolumes as notified, it's
+ * guaranteed that ec_get_event_from_state() will return
+ * CHILD_UP or CHILD_DOWN, but not MAXVAL. */
event = ec_get_event_from_state (ec);
- /* If event is still MAXVAL then enough subvolumes didn't
- * notify, treat it as CHILD_DOWN. */
- if (event == GF_EVENT_MAXVAL) {
- event = GF_EVENT_CHILD_DOWN;
- ec->xl_notify = (1ULL << ec->nodes) - 1ULL;
- ec->xl_notify_count = ec->nodes;
- } else if (event == GF_EVENT_CHILD_UP) {
- /* Rest of the bricks are still not coming up,
- * notify that ec is up. Files/directories will be
- * healed as in when they come up. */
+ if (event == GF_EVENT_CHILD_UP) {
+ /* We are ready to bring the volume up. If there are
+ * still bricks DOWN, they will be healed when they
+ * come up. */
ec_up (ec->xl, ec);
}
- /* CHILD_DOWN should not come here as no grace period is given
- * for notifying CHILD_DOWN. */
-
propagate = _gf_true;
}
unlock:
UNLOCK(&ec->lock);
if (propagate) {
+ if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) {
+ /* We have just brought the volume UP, so we trigger
+ * a self-heal check on the root directory. */
+ ec_launch_replace_heal (ec);
+ }
+
default_notify (ec->xl, event, NULL);
}
+
}
void
@@ -442,7 +449,7 @@ ec_pending_fops_completed(ec_t *ec)
}
}
-static void
+static gf_boolean_t
ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state)
{
uintptr_t current_state = 0;
@@ -455,23 +462,28 @@ ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state)
if (current_state != new_state) {
ec->xl_up ^= index_mask;
ec->xl_up_count += (current_state ? -1 : 1);
+
+ return _gf_true;
}
+
+ return _gf_false;
}
int32_t
ec_notify (xlator_t *this, int32_t event, void *data, void *data2)
{
- ec_t *ec = this->private;
- int32_t idx = 0;
- int32_t error = 0;
- glusterfs_event_t old_event = GF_EVENT_MAXVAL;
- dict_t *input = NULL;
- dict_t *output = NULL;
- gf_boolean_t propagate = _gf_true;
- int32_t orig_event = event;
+ ec_t *ec = this->private;
+ int32_t idx = 0;
+ int32_t error = 0;
+ glusterfs_event_t old_event = GF_EVENT_MAXVAL;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+ gf_boolean_t propagate = _gf_true;
+ gf_boolean_t needs_shd_check = _gf_false;
+ int32_t orig_event = event;
struct gf_upcall *up_data = NULL;
struct gf_upcall_cache_invalidation *up_ci = NULL;
- uintptr_t mask = 0;
+ uintptr_t mask = 0;
gf_msg_trace (this->name, 0, "NOTIFY(%d): %p, %p",
event, data, data2);
@@ -498,8 +510,6 @@ ec_notify (xlator_t *this, int32_t event, void *data, void *data2)
for (idx = 0; idx < ec->nodes; idx++) {
if (ec->xl_list[idx] == data) {
- if (event == GF_EVENT_CHILD_UP)
- ec_selfheal_childup (ec, idx);
break;
}
}
@@ -525,17 +535,27 @@ ec_notify (xlator_t *this, int32_t event, void *data, void *data2)
mask = 1ULL << idx;
if (event == GF_EVENT_CHILD_UP) {
- ec_set_up_state(ec, mask, mask);
+ /* We need to trigger a selfheal if a brick changes
+ * to UP state. */
+ needs_shd_check = ec_set_up_state(ec, mask, mask);
} else if (event == GF_EVENT_CHILD_DOWN) {
- ec_set_up_state(ec, mask, 0);
+ ec_set_up_state(ec, mask, 0);
}
event = ec_get_event_from_state (ec);
- if (event == GF_EVENT_CHILD_UP && !ec->up) {
- ec_up (this, ec);
- } else if (event == GF_EVENT_CHILD_DOWN && ec->up) {
- ec_down (this, ec);
+ if (event == GF_EVENT_CHILD_UP) {
+ if (!ec->up) {
+ ec_up (this, ec);
+ }
+ } else {
+ /* If the volume is not UP, it's irrelevant if one
+ * brick has come up. We cannot heal anything. */
+ needs_shd_check = _gf_false;
+
+ if ((event == GF_EVENT_CHILD_DOWN) && ec->up) {
+ ec_down (this, ec);
+ }
}
if (event != GF_EVENT_MAXVAL) {
@@ -554,14 +574,13 @@ unlock:
done:
if (propagate) {
+ if (needs_shd_check && ec->shd.iamshd) {
+ ec_launch_replace_heal (ec);
+ }
+
error = default_notify (this, event, data);
}
- if (ec->shd.iamshd &&
- ec->xl_notify_count == ec->nodes &&
- event == GF_EVENT_CHILD_UP) {
- ec_launch_replace_heal (ec);
- }
out:
return error;
}
--
1.8.3.1

View File

@ -0,0 +1,331 @@
From 2c8b94fb5359424a17dc0380b86cb17058f07bf6 Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Wed, 14 Feb 2018 10:36:27 +0530
Subject: [PATCH 200/201] quick-read: Discard cache for fallocate, zerofill and
discard ops
The fallocate, zerofill and discard modify file data on the server thus
rendering stale any cache held by the xlator on the client.
mainline:
> BUG: 1524252
> Reviewed-on: https://review.gluster.org/19018
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
> Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
(cherry picked from commit 429f2436b33793136836042ccc43ce4cfd7f89f3)
BUG: 1523599
Change-Id: I432146c6390a0cd5869420c373f598da43915f3f
Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/130229
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/bugs/quick-read/bz1523599/bz1523599.t | 32 ++++
tests/bugs/quick-read/bz1523599/test_bz1523599.c | 196 +++++++++++++++++++++++
xlators/performance/quick-read/src/quick-read.c | 40 ++++-
3 files changed, 267 insertions(+), 1 deletion(-)
create mode 100755 tests/bugs/quick-read/bz1523599/bz1523599.t
create mode 100644 tests/bugs/quick-read/bz1523599/test_bz1523599.c
diff --git a/tests/bugs/quick-read/bz1523599/bz1523599.t b/tests/bugs/quick-read/bz1523599/bz1523599.t
new file mode 100755
index 0000000..5027efe
--- /dev/null
+++ b/tests/bugs/quick-read/bz1523599/bz1523599.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../fileio.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/test_bz1523599.c -lgfapi -o $(dirname $0)/test_bz1523599
+TEST ./$(dirname $0)/test_bz1523599 0 $H0 $V0 test_bz1523599 $logdir/bz1523599.log
+TEST ./$(dirname $0)/test_bz1523599 1 $H0 $V0 test_bz1523599 $logdir/bz1523599.log
+TEST ./$(dirname $0)/test_bz1523599 0 $H0 $V0 test_bz1523599 $logdir/bz1523599.log
+TEST ./$(dirname $0)/test_bz1523599 2 $H0 $V0 test_bz1523599 $logdir/bz1523599.log
+
+cleanup_tester $(dirname $0)/test_bz1523599
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+
diff --git a/tests/bugs/quick-read/bz1523599/test_bz1523599.c b/tests/bugs/quick-read/bz1523599/test_bz1523599.c
new file mode 100644
index 0000000..f0166e1
--- /dev/null
+++ b/tests/bugs/quick-read/bz1523599/test_bz1523599.c
@@ -0,0 +1,196 @@
+/*
+ * ./test_bz1523599 0 vm140-111 gv0 test211 log
+ * ./test_bz1523599 1 vm140-111 gv0 test211 log
+ * Open - Discard - Read - Then check read information to see if the initial TEST_STR_LEN/2 bytes read zero
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <sys/uio.h>
+
+#define TEST_STR_LEN 2048
+
+enum fallocate_flag {
+ TEST_WRITE,
+ TEST_DISCARD,
+ TEST_ZEROFILL,
+};
+
+void print_str(char *str, int len)
+{
+ int i, addr;
+
+ printf("%07x\t", 0);
+ for (i = 0; i < len; i++) {
+ printf("%02x", str[i]);
+ if (i) {
+ if ((i + 1) % 16 == 0)
+ printf("\n%07x\t", i+1);
+ else if ((i + 1) % 4 == 0)
+ printf(" ");
+ }
+ }
+ printf("\n");
+}
+
+int
+test_read(char *str, int total_length, int len_zero)
+{
+ int i;
+ int ret = 0;
+
+ for (i = 0; i < len_zero; i++) {
+ if (str[i]) {
+ fprintf(stderr, "char at position %d not zeroed out\n",
+ i);
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ for (i = len_zero; i < total_length; i++) {
+ if (str[i] != 0x11) {
+ fprintf(stderr,
+ "char at position %d does not contain pattern\n",
+ i);
+ ret = -EIO;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ int opcode;
+ char *host_name, *volume_name, *file_path, *glfs_log_path;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ off_t offset = 0;
+ size_t len_zero = TEST_STR_LEN / 2;
+ char writestr[TEST_STR_LEN];
+ char readstr[TEST_STR_LEN];
+ struct iovec iov = {&readstr, TEST_STR_LEN};
+ int i;
+ int ret = 1;
+
+ for (i = 0; i < TEST_STR_LEN; i++)
+ writestr[i] = 0x11;
+ for (i = 0; i < TEST_STR_LEN; i++)
+ readstr[i] = 0x22;
+
+ if (argc != 6) {
+ fprintf(stderr,
+ "Syntax: %s <test type> <host> <volname> <file-path> <log-file>\n",
+ argv[0]);
+ return 1;
+ }
+
+ opcode = atoi(argv[1]);
+ host_name = argv[2];
+ volume_name = argv[3];
+ file_path = argv[4];
+ glfs_log_path = argv[5];
+
+ fs = glfs_new(volume_name);
+ if (!fs) {
+ perror("glfs_new");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", host_name, 24007);
+ if (ret != 0) {
+ perror("glfs_set_volfile_server");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, glfs_log_path, 7);
+ if (ret != 0) {
+ perror("glfs_set_logging");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ perror("glfs_init");
+ goto out;
+ }
+
+ fd = glfs_creat(fs, file_path, O_RDWR, 0777);
+ if (fd == NULL) {
+ perror("glfs_creat");
+ ret = -1;
+ goto out;
+ }
+
+ switch (opcode) {
+ case TEST_WRITE:
+ fprintf(stderr, "Test Write\n");
+ ret = glfs_write(fd, writestr, TEST_STR_LEN, 0);
+ if (ret < 0) {
+ perror("glfs_write");
+ goto out;
+ } else if (ret != TEST_STR_LEN) {
+ fprintf(stderr, "insufficient data written %d \n", ret);
+ ret = -EIO;
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ case TEST_DISCARD:
+ fprintf(stderr, "Test Discard\n");
+ ret = glfs_discard(fd, offset, len_zero);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP) {
+ fprintf(stderr, "Operation not supported\n");
+ ret = 0;
+ goto out;
+ }
+ perror("glfs_discard");
+ goto out;
+ }
+ goto test_read;
+ case TEST_ZEROFILL:
+ fprintf(stderr, "Test Zerofill\n");
+ ret = glfs_zerofill(fd, offset, len_zero);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP) {
+ fprintf(stderr, "Operation not supported\n");
+ ret = 0;
+ goto out;
+ }
+ perror("glfs_zerofill");
+ goto out;
+ }
+ goto test_read;
+ default:
+ ret = -1;
+ fprintf(stderr, "Incorrect test code %d\n", opcode);
+ goto out;
+ }
+
+test_read:
+ ret = glfs_readv(fd, &iov, 1, 0);
+ if (ret < 0) {
+ perror("glfs_readv");
+ goto out;
+ }
+
+ /* printf("Read str\n"); print_str(readstr, TEST_STR_LEN); printf("\n"); */
+ ret = test_read(readstr, TEST_STR_LEN, len_zero);
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+
+ if (ret)
+ return -1;
+
+ return 0;
+}
diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c
index 92b2f82..61232c1 100644
--- a/xlators/performance/quick-read/src/quick-read.c
+++ b/xlators/performance/quick-read/src/quick-read.c
@@ -668,6 +668,41 @@ qr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
return 0;
}
+static int
+qr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ qr_inode_prune (this, fd->inode);
+
+ STACK_WIND (frame, default_fallocate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fallocate,
+ fd, keep_size, offset, len, xdata);
+ return 0;
+}
+
+static int
+qr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ qr_inode_prune (this, fd->inode);
+
+ STACK_WIND (frame, default_discard_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->discard,
+ fd, offset, len, xdata);
+ return 0;
+}
+
+static int
+qr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ qr_inode_prune (this, fd->inode);
+
+ STACK_WIND (frame, default_zerofill_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->zerofill,
+ fd, offset, len, xdata);
+ return 0;
+}
int
qr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
@@ -1128,7 +1163,10 @@ struct xlator_fops fops = {
.readv = qr_readv,
.writev = qr_writev,
.truncate = qr_truncate,
- .ftruncate = qr_ftruncate
+ .ftruncate = qr_ftruncate,
+ .fallocate = qr_fallocate,
+ .discard = qr_discard,
+ .zerofill = qr_zerofill
};
struct xlator_cbks cbks = {
--
1.8.3.1

View File

@ -0,0 +1,62 @@
From 78918cf18ead4637c5aea20025c319d845518733 Mon Sep 17 00:00:00 2001
From: moagrawa <moagrawa@redhat.com>
Date: Mon, 26 Mar 2018 11:00:22 +0530
Subject: [PATCH 201/201] posix: After set storage.reserve limit df does not
show correct output
Problem: After set storage.reserve limit df does not show correct
output on client
Solution: Update code in posix_statfs to reflect the disk usage
correctly on client
> BUG: 1533736
> Change-Id: I2c5feda0303d0f4abe5af22fac903011792b2dc8
> Reviewed on https://review.gluster.org/#/c/19186/
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
> (cherry pick from commit c494445c886e16ddc6a960b9074a68fe9621ee09)
BUG: 1550982
Change-Id: I5444fad40f2df2fdcf4ab80c2641fc9cc56b18fe
Signed-off-by: moagrawa <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/133745
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/storage/posix/src/posix.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 6856e5e..56a2ca9 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -3751,6 +3751,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
struct posix_private * priv = NULL;
int shared_by = 1;
int percent = 0;
+ uint64_t reserved_blocks = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -3776,7 +3777,17 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
}
percent = priv->disk_reserve;
- buf.f_bfree = (buf.f_bfree - ((buf.f_blocks * percent) / 100));
+ reserved_blocks = (buf.f_blocks * percent) / 100;
+
+ if (buf.f_bfree > reserved_blocks) {
+ buf.f_bfree = (buf.f_bfree - reserved_blocks);
+ if (buf.f_bavail > buf.f_bfree) {
+ buf.f_bavail = buf.f_bfree;
+ }
+ } else {
+ buf.f_bfree = 0;
+ buf.f_bavail = 0;
+ }
shared_by = priv->shared_brick_count;
if (shared_by > 1) {
--
1.8.3.1

View File

@ -192,7 +192,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist}
%else
Name: glusterfs
Version: 3.12.2
Release: 5%{?dist}
Release: 6%{?dist}
%endif
License: GPLv2 or LGPLv3+
Group: System Environment/Base
@ -445,6 +445,27 @@ Patch0177: 0177-hooks-fix-workdir-in-S13create-subdir-mounts.sh.patch
Patch0178: 0178-cluster-ec-Do-lock-conflict-check-correctly-for-wait.patch
Patch0179: 0179-packaging-adding-missed-part-from-5eed664-while-back.patch
Patch0180: 0180-packaging-adding-missed-part-from-5eed664-while-back.patch
Patch0181: 0181-glusterd-get-state-memory-leak-fix.patch
Patch0182: 0182-glusterd-Fix-coverity-issues-in-glusterd-handler.c.patch
Patch0183: 0183-cluster-afr-Fix-dict-leak-in-pre-op.patch
Patch0184: 0184-cli-glusterfsd-remove-copyright-information.patch
Patch0185: 0185-rpcsvc-correct-event-thread-scaling.patch
Patch0186: 0186-cli-Remove-upstream-doc-reference.patch
Patch0187: 0187-features-shard-Do-list_del_init-while-list-memory-is.patch
Patch0188: 0188-georep-Pause-Resume-of-geo-replication-with-wrong-us.patch
Patch0189: 0189-fuse-enable-proper-fgetattr-like-semantics.patch
Patch0190: 0190-cluster-afr-Adding-option-to-take-full-file-lock.patch
Patch0191: 0191-cluster-afr-Make-afr_fsync-a-transaction.patch
Patch0192: 0192-cluster-afr-Remove-compound-fops-usage-in-afr.patch
Patch0193: 0193-cluster-afr-Remove-unused-code-paths.patch
Patch0194: 0194-cluster-afr-Make-AFR-eager-locking-similar-to-EC.patch
Patch0195: 0195-storage-posix-Add-active-fd-count-option-in-gluster.patch
Patch0196: 0196-cluster-afr-Switch-to-active-fd-count-for-open-fd-ch.patch
Patch0197: 0197-glusterd-ganesha-create-remove-export-file-only-from.patch
Patch0198: 0198-cluster-ec-Change-default-read-policy-to-gfid-hash.patch
Patch0199: 0199-cluster-ec-avoid-delays-in-self-heal.patch
Patch0200: 0200-quick-read-Discard-cache-for-fallocate-zerofill-and-.patch
Patch0201: 0201-posix-After-set-storage.reserve-limit-df-does-not-sh.patch
%description
GlusterFS is a distributed file-system capable of scaling to several
@ -2388,6 +2409,11 @@ fi
%endif
%changelog
* Mon Mar 26 2018 Milind Changire <mchangir@redhat.com> - 3.12.2-6
- fixes bugs bz#1491785 bz#1518710 bz#1523599 bz#1528733 bz#1550474
bz#1550982 bz#1551186 bz#1552360 bz#1552414 bz#1552425 bz#1554255 bz#1554905
bz#1555261 bz#1556895 bz#1557297 bz#1559084 bz#1559788
* Wed Mar 07 2018 Milind Changire <mchangir@redhat.com> - 3.12.2-5
- fixes bugs bz#1378371 bz#1384983 bz#1472445 bz#1493085 bz#1508999
bz#1516638 bz#1518260 bz#1529072 bz#1530519 bz#1537357 bz#1540908 bz#1541122