autobuild v3.12.2-2

Resolves: bz#1264911 bz#1277924 bz#1286820 bz#1360331 bz#1401969
Resolves: bz#1410719 bz#1419438 bz#1426042 bz#1444820 bz#1459101
Resolves: bz#1464150 bz#1464350 bz#1466122 bz#1466129 bz#1467903
Resolves: bz#1468972 bz#1476876 bz#1484446 bz#1492591 bz#1498391
Resolves: bz#1498730 bz#1499865 bz#1500704 bz#1501345 bz#1505570
Resolves: bz#1507361 bz#1507394 bz#1509102 bz#1509191 bz#1509810
Resolves: bz#1509833 bz#1511766 bz#1512470 bz#1512496 bz#1512963
Resolves: bz#1515051 bz#1519076 bz#1519740 bz#1534253 bz#1534530
Signed-off-by: Milind Changire <mchangir@redhat.com>
This commit is contained in:
Milind Changire 2018-01-17 02:21:37 -05:00
parent ee817adf44
commit cf62f1947f
55 changed files with 13722 additions and 44 deletions

View File

@ -0,0 +1,749 @@
From fb84f6c69385e35f3a62504dfebc11b21ff4082a Mon Sep 17 00:00:00 2001
From: N Balachandran <nbalacha@redhat.com>
Date: Mon, 6 Nov 2017 09:30:54 +0530
Subject: [PATCH 075/128] cli: gluster help changes
gluster cli help now shows only the top level
help commands. gluster <component> help will now show
help commands for <component>.
> BUG: 1474768
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
> BUG: 1509786
> https://review.gluster.org/#/c/18666/
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
(cherry picked from commit 89dc54f50c9f800ca4446ea8fe736e4860588845)
Change-Id: I263f53a0870d80ef4cfaad455fdaa47e2ac4423b
BUG: 1498730
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/123525
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
cli/src/cli-cmd-global.c | 3 +
cli/src/cli-cmd-misc.c | 77 ++++++++--
cli/src/cli-cmd-parser.c | 20 ++-
cli/src/cli-cmd-peer.c | 9 +-
cli/src/cli-cmd-snapshot.c | 5 +
cli/src/cli-cmd-volume.c | 347 ++++++++++++++++++++++++++++++++++++++-------
cli/src/cli.c | 2 +-
7 files changed, 387 insertions(+), 76 deletions(-)
diff --git a/cli/src/cli-cmd-global.c b/cli/src/cli-cmd-global.c
index 881506b..1f9cb54 100644
--- a/cli/src/cli-cmd-global.c
+++ b/cli/src/cli-cmd-global.c
@@ -68,11 +68,14 @@ cli_cmd_global_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
count = (sizeof (global_cmds) / sizeof (struct cli_cmd));
cli_cmd_sort (cmd, count);
+ cli_out ("\ngluster global commands");
+ cli_out ("========================\n");
for (global_cmd = cmd; global_cmd->pattern; global_cmd++)
if (_gf_false == global_cmd->disable)
cli_out ("%s - %s", global_cmd->pattern,
global_cmd->desc);
+ cli_out ("\n");
GF_FREE (cmd);
return 0;
}
diff --git a/cli/src/cli-cmd-misc.c b/cli/src/cli-cmd-misc.c
index 9f8c159..c887515 100644
--- a/cli/src/cli-cmd-misc.c
+++ b/cli/src/cli-cmd-misc.c
@@ -23,6 +23,9 @@ extern struct rpc_clnt *global_rpc;
extern rpc_clnt_prog_t *cli_rpc_prog;
extern struct cli_cmd volume_cmds[];
+extern struct cli_cmd bitrot_cmds[];
+extern struct cli_cmd quota_cmds[];
+extern struct cli_cmd tier_cmds[];
extern struct cli_cmd cli_probe_cmds[];
extern struct cli_cmd cli_log_cmds[];
extern struct cli_cmd cli_system_cmds[];
@@ -38,37 +41,76 @@ cli_cmd_quit_cbk (struct cli_state *state, struct cli_cmd_word *word,
exit (0);
}
+
+static gf_boolean_t
+cli_is_help_command (const char *pattern)
+{
+ /* FixFixFix
+ * This is not the best way to determine whether
+ * this is a help command
+ */
+ if (strstr (pattern, "help"))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+
int
cli_cmd_display_help (struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount)
{
- struct cli_cmd *cmd[] = {volume_cmds, cli_probe_cmds,
- cli_misc_cmds, snapshot_cmds,
- global_cmds, NULL};
- struct cli_cmd *cmd_ind = NULL;
- int i = 0;
+ struct cli_cmd *cmd[] = {cli_misc_cmds, cli_probe_cmds,
+ volume_cmds, bitrot_cmds, quota_cmds,
+#if !defined(__NetBSD__)
+ tier_cmds,
+#endif
+ snapshot_cmds, global_cmds, NULL};
+ struct cli_cmd *cmd_ind = NULL;
+ int i = 0;
+ gf_boolean_t list_all = _gf_false;
/* cli_system_cmds commands for internal usage
they are not exposed
*/
- for (i=0; cmd[i]!=NULL; i++)
- for (cmd_ind = cmd[i]; cmd_ind->pattern; cmd_ind++)
- if (_gf_false == cmd_ind->disable)
- cli_out ("%s - %s", cmd_ind->pattern,
- cmd_ind->desc);
+ /* If "help all" */
+ if (wordcount == 2)
+ list_all = _gf_true;
+
+ for (i = 0; cmd[i] != NULL; i++) {
+ for (cmd_ind = cmd[i]; cmd_ind->pattern; cmd_ind++) {
+ if ((_gf_false == cmd_ind->disable) &&
+ cli_is_help_command (cmd_ind->pattern)) {
+ if (list_all && (cmd_ind->cbk)) {
+ cmd_ind->cbk (state, in_word, words,
+ wordcount);
+ } else {
+ cli_out (" %-25s- %s", cmd_ind->pattern,
+ cmd_ind->desc);
+ }
+ }
+ }
+ }
+
+ cli_out ("\n");
return 0;
}
+
+struct cli_cmd cli_help_cmds[] = {
+ { "help [all]",
+ cli_cmd_display_help,
+ "display help for command classes"},
+
+ { NULL, NULL, NULL }
+};
+
+
struct cli_cmd cli_misc_cmds[] = {
{ "quit",
cli_cmd_quit_cbk,
"quit"},
-
- { "help",
- cli_cmd_display_help,
- "display command options"},
-
{ "exit",
cli_cmd_quit_cbk,
"exit"},
@@ -84,7 +126,12 @@ cli_cmd_misc_register (struct cli_state *state)
struct cli_cmd *cmd = NULL;
for (cmd = cli_misc_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register (&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+ for (cmd = cli_help_cmds; cmd->pattern; cmd++) {
ret = cli_cmd_register (&state->tree, cmd);
if (ret)
goto out;
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index a35fc74..c95b262 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -1189,8 +1189,13 @@ cli_cmd_quota_parse (const char **words, int wordcount, dict_t **options)
goto out;
}
- if (wordcount < 4)
+ if (wordcount < 4) {
+
+ if ((wordcount == 3) && !(strcmp (words[2], "help"))) {
+ ret = 1;
+ }
goto out;
+ }
volname = (char *)words[2];
if (!volname) {
@@ -5588,15 +5593,22 @@ cli_cmd_bitrot_parse (const char **words, int wordcount, dict_t **options)
GF_ASSERT (words);
GF_ASSERT (options);
- dict = dict_new ();
- if (!dict)
- goto out;
+
+ /* Hack to print out bitrot help properly */
+ if ((wordcount == 3) && !(strcmp (words[2], "help"))) {
+ ret = 1;
+ return ret;
+ }
if (wordcount < 4 || wordcount > 5) {
gf_log ("cli", GF_LOG_ERROR, "Invalid syntax");
goto out;
}
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
volname = (char *)words[2];
if (!volname) {
ret = -1;
diff --git a/cli/src/cli-cmd-peer.c b/cli/src/cli-cmd-peer.c
index 4802f71..7df60bc 100644
--- a/cli/src/cli-cmd-peer.c
+++ b/cli/src/cli-cmd-peer.c
@@ -264,7 +264,7 @@ struct cli_cmd cli_probe_cmds[] = {
{ "peer help",
cli_cmd_peer_help_cbk,
- "Help command for peer "},
+ "display help for peer commands"},
{ "pool list",
cli_cmd_pool_list_cbk,
@@ -281,17 +281,20 @@ cli_cmd_peer_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
struct cli_cmd *probe_cmd = NULL;
int count = 0;
+ cli_out ("\ngluster peer commands");
+ cli_out ("======================\n");
+
cmd = GF_CALLOC (1, sizeof (cli_probe_cmds), cli_mt_cli_cmd);
memcpy (cmd, cli_probe_cmds, sizeof (cli_probe_cmds));
count = (sizeof (cli_probe_cmds) / sizeof (struct cli_cmd));
cli_cmd_sort (cmd, count);
-
-
for (probe_cmd = cmd; probe_cmd->pattern; probe_cmd++)
cli_out ("%s - %s", probe_cmd->pattern, probe_cmd->desc);
GF_FREE (cmd);
+
+ cli_out ("\n");
return 0;
}
diff --git a/cli/src/cli-cmd-snapshot.c b/cli/src/cli-cmd-snapshot.c
index e79128c..88b4737 100644
--- a/cli/src/cli-cmd-snapshot.c
+++ b/cli/src/cli-cmd-snapshot.c
@@ -140,9 +140,14 @@ cli_cmd_snapshot_help_cbk (struct cli_state *state,
count = (sizeof (snapshot_cmds) / sizeof (struct cli_cmd));
cli_cmd_sort (cmd, count);
+ cli_out ("\ngluster snapshot commands");
+ cli_out ("=========================\n");
+
for (snap_cmd = cmd; snap_cmd->pattern; snap_cmd++)
if (_gf_false == snap_cmd->disable)
cli_out ("%s - %s", snap_cmd->pattern, snap_cmd->desc);
+ cli_out ("\n");
+
GF_FREE (cmd);
return 0;
}
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index ca9da0a..a1e5c51 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -36,7 +36,19 @@ extern rpc_clnt_prog_t cli_quotad_clnt;
int
cli_cmd_volume_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount);
+ const char **words, int wordcount);
+
+int
+cli_cmd_bitrot_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+
+int
+cli_cmd_quota_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+
+int
+cli_cmd_tier_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
int
cli_cmd_volume_info_cbk (struct cli_state *state, struct cli_cmd_word *word,
@@ -1293,9 +1305,12 @@ cli_cmd_volume_tier_cbk (struct cli_state *state,
if (wordcount < 4) {
- cli_usage_out (word->pattern);
- if (wordcount == 3 && !strcmp(words[2], "help"))
+ if (wordcount == 3 && !strcmp(words[2], "help")) {
+ cli_cmd_tier_help_cbk (state, word, words, wordcount);
ret = 0;
+ } else {
+ cli_usage_out (word->pattern);
+ }
goto out;
}
@@ -1719,6 +1734,8 @@ out:
return ret;
}
+
+
int
cli_cmd_bitrot_cbk (struct cli_state *state, struct cli_cmd_word *word,
const char **words, int wordcount)
@@ -1746,6 +1763,13 @@ cli_cmd_bitrot_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
+ if (ret == 1) {
+ /* this is 'volume bitrot help' */
+ cli_cmd_bitrot_help_cbk (state, word, words, wordcount);
+ ret = 0;
+ goto out2;
+ }
+
frame = create_frame (THIS, THIS->ctx->pool);
if (!frame) {
ret = -1;
@@ -1834,7 +1858,7 @@ out:
#endif
CLI_STACK_DESTROY (frame);
-
+out2:
return ret;
}
@@ -1866,6 +1890,12 @@ cli_cmd_quota_cbk (struct cli_state *state, struct cli_cmd_word *word,
}
} else {
ret = cli_cmd_quota_parse (words, wordcount, &options);
+
+ if (ret == 1) {
+ cli_cmd_quota_help_cbk (state, word, words, wordcount);
+ ret = 0;
+ goto out;
+ }
if (ret < 0) {
cli_usage_out (word->pattern);
parse_err = 1;
@@ -3157,7 +3187,159 @@ out:
return ret;
}
+
+/* This is a bit of a hack to display the help. The current bitrot cmd
+ * format does not work well when registering the cmds.
+ * Ideally the should have been of the form
+ * gluster volume bitrot <subcommand> <volumename> ...
+ */
+
+struct cli_cmd bitrot_cmds[] = {
+
+ {"volume bitrot help",
+ cli_cmd_bitrot_help_cbk,
+ "display help for volume bitrot commands"
+ },
+
+ {"volume bitrot <VOLNAME> {enable|disable}",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Enable/disable bitrot for volume <VOLNAME>"
+ },
+
+ {"volume bitrot <VOLNAME> scrub-throttle {lazy|normal|aggressive}",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Set the speed of the scrubber for volume <VOLNAME>"
+ },
+
+ {"volume bitrot <VOLNAME> scrub-frequency {hourly|daily|weekly|biweekly"
+ "|monthly}",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Set the frequency of the scrubber for volume <VOLNAME>"
+ },
+
+ {"volume bitrot <VOLNAME> scrub {pause|resume|status|ondemand}",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Pause/resume the scrubber for <VOLNAME>. Status displays the status of "
+ "the scrubber. ondemand starts the scrubber immediately."
+ },
+
+ {"volume bitrot <VOLNAME> {enable|disable}\n"
+ "volume bitrot <volname> scrub-throttle {lazy|normal|aggressive}\n"
+ "volume bitrot <volname> scrub-frequency {hourly|daily|weekly|biweekly"
+ "|monthly}\n"
+ "volume bitrot <volname> scrub {pause|resume|status|ondemand}",
+ cli_cmd_bitrot_cbk,
+ NULL
+ },
+
+ { NULL, NULL, NULL }
+};
+
+
+struct cli_cmd quota_cmds[] = {
+
+ /* Quota commands */
+ {"volume quota help",
+ cli_cmd_quota_help_cbk,
+ "display help for volume quota commands"
+ },
+
+ {"volume quota <VOLNAME> {enable|disable|list [<path> ...]| "
+ "list-objects [<path> ...] | remove <path>| remove-objects <path> | "
+ "default-soft-limit <percent>}",
+ cli_cmd_quota_cbk,
+ "Enable/disable and configure quota for <VOLNAME>"
+ },
+
+ {"volume quota <VOLNAME> {limit-usage <path> <size> [<percent>]}",
+ cli_cmd_quota_cbk,
+ "Set maximum size for <path> for <VOLNAME>"
+ },
+
+ {"volume quota <VOLNAME> {limit-objects <path> <number> [<percent>]}",
+ cli_cmd_quota_cbk,
+ "Set the maximum number of entries allowed in <path> for <VOLNAME>"
+ },
+
+ {"volume quota <VOLNAME> {alert-time|soft-timeout|hard-timeout} {<time>}",
+ cli_cmd_quota_cbk,
+ "Set quota timeout for <VOLNAME>"
+ },
+
+ { "volume inode-quota <VOLNAME> enable",
+ cli_cmd_quota_cbk,
+ "Enable/disable inode-quota for <VOLNAME>"
+ },
+
+ { "volume quota <VOLNAME> {enable|disable|list [<path> ...]| "
+ "list-objects [<path> ...] | remove <path>| remove-objects <path> | "
+ "default-soft-limit <percent>}\n"
+ "volume quota <VOLNAME> {limit-usage <path> <size> [<percent>]}\n"
+ "volume quota <VOLNAME> {limit-objects <path> <number> [<percent>]}\n"
+ "volume quota <VOLNAME> {alert-time|soft-timeout|hard-timeout} {<time>}",
+ cli_cmd_quota_cbk,
+ NULL
+ },
+
+ { NULL, NULL, NULL }
+};
+
+struct cli_cmd tier_cmds[] = {
+
+ { "volume tier help",
+ cli_cmd_tier_help_cbk,
+ "display help for volume tier commands"},
+
+ { "volume tier <VOLNAME> status",
+ cli_cmd_volume_tier_cbk,
+ "Display tier status for <VOLNAME>"},
+
+ { "volume tier <VOLNAME> start [force]",
+ cli_cmd_volume_tier_cbk,
+ "Start the tier service for <VOLNAME>"},
+
+ { "volume tier <VOLNAME> stop [force]",
+ cli_cmd_volume_tier_cbk,
+ "Stop the tier service for <VOLNAME>"},
+
+ { "volume tier <VOLNAME> attach [<replica COUNT>] <NEW-BRICK>... [force]",
+ cli_cmd_volume_tier_cbk,
+ "Attach a hot tier to <VOLNAME>"},
+
+ { "volume tier <VOLNAME> detach <start|stop|status|commit|[force]>",
+ cli_cmd_volume_tier_cbk,
+ "Detach the hot tier from <VOLNAME>"},
+
+ { "volume attach-tier <VOLNAME> [<replica COUNT>] <NEW-BRICK>...",
+ cli_cmd_volume_tier_cbk,
+ "NOTE: this is old syntax, will be deprecated in next release. "
+ "Please use gluster volume tier <vol> attach "
+ "[<replica COUNT>] <NEW-BRICK>..."},
+
+ { "volume detach-tier <VOLNAME> "
+ "<start|stop|status|commit|force>",
+ cli_cmd_volume_tier_cbk,
+ "NOTE: this is old syntax, will be deprecated in next release. "
+ "Please use gluster volume tier <vol> detach "
+ "{start|stop|commit} [force]"},
+
+ { "volume tier <VOLNAME> status\n"
+ "volume tier <VOLNAME> start [force]\n"
+ "volume tier <VOLNAME> stop\n"
+ "volume tier <VOLNAME> attach [<replica COUNT>] <NEW-BRICK>... [force]\n"
+ "volume tier <VOLNAME> detach <start|stop|status|commit|[force]>\n",
+ cli_cmd_volume_tier_cbk,
+ NULL },
+
+ {NULL, NULL, NULL}
+
+ };
+
struct cli_cmd volume_cmds[] = {
+ { "volume help",
+ cli_cmd_volume_help_cbk,
+ "display help for volume commands"},
+
{ "volume info [all|<VOLNAME>]",
cli_cmd_volume_info_cbk,
"list information of all volumes"},
@@ -3190,29 +3372,6 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_rename_cbk,
"rename volume <VOLNAME> to <NEW-VOLNAME>"},*/
-#if !defined(__NetBSD__)
- { "volume tier <VOLNAME> status\n"
- "volume tier <VOLNAME> start [force]\n"
- "volume tier <VOLNAME> stop\n"
- "volume tier <VOLNAME> attach [<replica COUNT>] <NEW-BRICK>... [force]\n"
- "volume tier <VOLNAME> detach <start|stop|status|commit|[force]>\n",
- cli_cmd_volume_tier_cbk,
- "Tier translator specific operations."},
-
- { "volume attach-tier <VOLNAME> [<replica COUNT>] <NEW-BRICK>...",
- cli_cmd_volume_tier_cbk,
- "NOTE: this is old syntax, will be depreciated in next release. "
- "Please use gluster volume tier <vol> attach "
- "[<replica COUNT>] <NEW-BRICK>..."},
-
- { "volume detach-tier <VOLNAME> "
- " <start|stop|status|commit|force>",
- cli_cmd_volume_tier_cbk,
- "NOTE: this is old syntax, will be depreciated in next release. "
- "Please use gluster volume tier <vol> detach "
- "{start|stop|commit} [force]"},
-#endif
-
{ "volume add-brick <VOLNAME> [<stripe|replica> <COUNT> "
"[arbiter <COUNT>]] <NEW-BRICK> ... [force]",
cli_cmd_volume_add_brick_cbk,
@@ -3240,9 +3399,6 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_set_cbk,
"set options for volume <VOLNAME>"},
- { "volume help",
- cli_cmd_volume_help_cbk,
- "display help for the volume command"},
{ "volume log <VOLNAME> rotate [BRICK]",
cli_cmd_log_rotate_cbk,
@@ -3273,19 +3429,6 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_profile_cbk,
"volume profile operations"},
- { "volume quota <VOLNAME> {enable|disable|list [<path> ...]| "
- "list-objects [<path> ...] | remove <path>| remove-objects <path> | "
- "default-soft-limit <percent>} |\n"
- "volume quota <VOLNAME> {limit-usage <path> <size> [<percent>]} |\n"
- "volume quota <VOLNAME> {limit-objects <path> <number> [<percent>]} |\n"
- "volume quota <VOLNAME> {alert-time|soft-timeout|hard-timeout} {<time>}",
- cli_cmd_quota_cbk,
- "quota translator specific operations"},
-
- { "volume inode-quota <VOLNAME> enable",
- cli_cmd_quota_cbk,
- "quota translator specific operations"},
-
{ "volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick <brick>] [list-cnt <value>] |\n"
"volume top <VOLNAME> {read-perf|write-perf} [bs <size> count <count>] [brick <brick>] [list-cnt <value>]",
cli_cmd_volume_top_cbk,
@@ -3329,26 +3472,99 @@ struct cli_cmd volume_cmds[] = {
" or all option. gluster volume get all all is to get all global "
"options"
},
- {"volume bitrot <VOLNAME> {enable|disable} |\n"
- "volume bitrot <volname> scrub-throttle {lazy|normal|aggressive} |\n"
- "volume bitrot <volname> scrub-frequency {hourly|daily|weekly|biweekly"
- "|monthly} |\n"
- "volume bitrot <volname> scrub {pause|resume|status|ondemand}",
- cli_cmd_bitrot_cbk,
- "Bitrot translator specific operation. For more information about "
- "bitrot command type 'man gluster'"
- },
+
{ "volume reset-brick <VOLNAME> <SOURCE-BRICK> {{start} |"
" {<NEW-BRICK> commit}}",
cli_cmd_volume_reset_brick_cbk,
"reset-brick operations"},
+
{ NULL, NULL, NULL }
};
int
+cli_cmd_quota_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *quota_cmd = NULL;
+ int count = 0;
+
+ cmd = GF_CALLOC (1, sizeof (quota_cmds), cli_mt_cli_cmd);
+ memcpy (cmd, quota_cmds, sizeof (quota_cmds));
+ count = (sizeof (quota_cmds) / sizeof (struct cli_cmd));
+ cli_cmd_sort (cmd, count);
+
+ cli_out ("\ngluster quota commands");
+ cli_out ("=======================\n");
+
+ for (quota_cmd = cmd; quota_cmd->pattern; quota_cmd++)
+ if ((_gf_false == quota_cmd->disable) && (quota_cmd->desc))
+ cli_out ("%s - %s", quota_cmd->pattern,
+ quota_cmd->desc);
+
+ cli_out ("\n");
+ GF_FREE (cmd);
+
+ return 0;
+}
+
+int
+cli_cmd_bitrot_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *bitrot_cmd = NULL;
+ int count = 0;
+
+ cmd = GF_CALLOC (1, sizeof (bitrot_cmds), cli_mt_cli_cmd);
+ memcpy (cmd, bitrot_cmds, sizeof (bitrot_cmds));
+ count = (sizeof (bitrot_cmds) / sizeof (struct cli_cmd));
+ cli_cmd_sort (cmd, count);
+
+ cli_out ("\ngluster bitrot commands");
+ cli_out ("========================\n");
+
+ for (bitrot_cmd = cmd; bitrot_cmd->pattern; bitrot_cmd++)
+ if ((_gf_false == bitrot_cmd->disable) && (bitrot_cmd->desc))
+ cli_out ("%s - %s", bitrot_cmd->pattern,
+ bitrot_cmd->desc);
+
+ cli_out ("\n");
+ GF_FREE (cmd);
+
+ return 0;
+}
+
+int
+cli_cmd_tier_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *tier_cmd = NULL;
+ int count = 0;
+
+ cmd = GF_CALLOC (1, sizeof (tier_cmds), cli_mt_cli_cmd);
+ memcpy (cmd, tier_cmds, sizeof (tier_cmds));
+ count = (sizeof (tier_cmds) / sizeof (struct cli_cmd));
+ cli_cmd_sort (cmd, count);
+
+ cli_out ("\ngluster tier commands");
+ cli_out ("======================\n");
+
+ for (tier_cmd = cmd; tier_cmd->pattern; tier_cmd++) {
+ if ((_gf_false == tier_cmd->disable) && tier_cmd->desc) {
+ cli_out ("%s - %s", tier_cmd->pattern, tier_cmd->desc);
+ }
+ }
+ cli_out ("\n");
+ GF_FREE (cmd);
+ return 0;
+}
+
+int
cli_cmd_volume_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount)
+ const char **words, int wordcount)
{
struct cli_cmd *cmd = NULL;
struct cli_cmd *vol_cmd = NULL;
@@ -3359,10 +3575,14 @@ cli_cmd_volume_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
count = (sizeof (volume_cmds) / sizeof (struct cli_cmd));
cli_cmd_sort (cmd, count);
+ cli_out ("\ngluster volume commands");
+ cli_out ("========================\n");
+
for (vol_cmd = cmd; vol_cmd->pattern; vol_cmd++)
if (_gf_false == vol_cmd->disable)
cli_out ("%s - %s", vol_cmd->pattern, vol_cmd->desc);
+ cli_out ("\n");
GF_FREE (cmd);
return 0;
}
@@ -3374,11 +3594,32 @@ cli_cmd_volume_register (struct cli_state *state)
struct cli_cmd *cmd = NULL;
for (cmd = volume_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register (&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+
+ for (cmd = bitrot_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register (&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+ for (cmd = quota_cmds; cmd->pattern; cmd++) {
ret = cli_cmd_register (&state->tree, cmd);
if (ret)
goto out;
}
+
+#if !defined(__NetBSD__)
+ for (cmd = tier_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register (&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+
+#endif
+
out:
return ret;
}
diff --git a/cli/src/cli.c b/cli/src/cli.c
index 18ca5c8..ce06366 100644
--- a/cli/src/cli.c
+++ b/cli/src/cli.c
@@ -515,7 +515,7 @@ cli_usage_out (const char *usage)
if (!usage || usage[0] == '\0')
return -1;
- cli_err ("Usage: %s", usage);
+ cli_err ("\nUsage:\n%s\n", usage);
return 0;
}
--
1.8.3.1

View File

@ -0,0 +1,376 @@
From 6c176a6f9743ab0518619f784a1fc5ac9562b991 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Tue, 18 Jul 2017 18:39:01 +0530
Subject: [PATCH 076/128] cluster/ec: Handle parallel get_size_version
upstream patch: https://review.gluster.org/#/c/17820/
>Updates #251
>Change-Id: I6244014dbc90af3239d63d75a064ae22ec12a054
>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
BUG: 1459101
Change-Id: I6244014dbc90af3239d63d75a064ae22ec12a054
Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/123551
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-by: Ashish Pandey <aspandey@redhat.com>
---
xlators/cluster/ec/src/ec-common.c | 151 +++++++++++++++++++++++--------------
xlators/cluster/ec/src/ec-common.h | 8 +-
xlators/cluster/ec/src/ec-types.h | 3 +-
3 files changed, 103 insertions(+), 59 deletions(-)
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 732d422..6963907 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -21,6 +21,10 @@
#include "ec.h"
#include "ec-messages.h"
+#define EC_XATTROP_ALL_WAITING_FLAGS (EC_FLAG_WAITING_XATTROP |\
+ EC_FLAG_WAITING_DATA_DIRTY |\
+ EC_FLAG_WAITING_METADATA_DIRTY)
+
uint32_t
ec_select_first_by_read_policy (ec_t *ec, ec_fop_data_t *fop)
{
@@ -882,11 +886,11 @@ void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags)
}
gf_boolean_t
-ec_config_check (ec_fop_data_t *fop, ec_config_t *config)
+ec_config_check (xlator_t *xl, ec_config_t *config)
{
ec_t *ec;
- ec = fop->xl->private;
+ ec = xl->private;
if ((config->version != EC_CONFIG_VERSION) ||
(config->algorithm != EC_CONFIG_ALGORITHM) ||
(config->gf_word_size != EC_GF_BITS) ||
@@ -911,11 +915,11 @@ ec_config_check (ec_fop_data_t *fop, ec_config_t *config)
!ec_is_power_of_2(config->gf_word_size) ||
((config->chunk_size * 8) % (config->gf_word_size * data_bricks)
!= 0)) {
- gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ gf_msg (xl->name, GF_LOG_ERROR, EINVAL,
EC_MSG_INVALID_CONFIG,
"Invalid or corrupted config");
} else {
- gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ gf_msg (xl->name, GF_LOG_ERROR, EINVAL,
EC_MSG_INVALID_CONFIG,
"Unsupported config "
"(V=%u, A=%u, W=%u, "
@@ -962,24 +966,28 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
{
struct list_head list;
ec_fop_data_t *fop = cookie, *parent, *tmp;
- ec_lock_link_t *link = fop->data;
+ ec_lock_link_t *parent_link = fop->data;
+ ec_lock_link_t *link = NULL;
ec_lock_t *lock = NULL;
ec_inode_t *ctx;
gf_boolean_t release = _gf_false;
+ uint64_t waiting_flags = 0;
+ uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
- lock = link->lock;
- parent = link->fop;
+ lock = parent_link->lock;
+ parent = parent_link->fop;
ctx = lock->ctx;
INIT_LIST_HEAD(&list);
+ waiting_flags = parent_link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
LOCK(&lock->loc.inode->lock);
list_for_each_entry(link, &lock->owners, owner_list) {
- if ((link->fop->flags & EC_FLAG_WAITING_XATTROP) != 0) {
- link->fop->flags ^= EC_FLAG_WAITING_XATTROP;
-
- list_add_tail(&link->fop->cbk_list, &list);
+ if ((link->waiting_flags & waiting_flags) != 0) {
+ link->waiting_flags ^= (link->waiting_flags & waiting_flags);
+ if ((link->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS) == 0)
+ list_add_tail(&link->fop->cbk_list, &list);
}
}
@@ -991,8 +999,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
goto unlock;
}
- if (parent->flags & EC_FLAG_QUERY_METADATA) {
- parent->flags ^= EC_FLAG_QUERY_METADATA;
+ if (waiting_flags & EC_FLAG_WAITING_XATTROP) {
op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION,
ctx->pre_version,
EC_VERSION_SIZE);
@@ -1036,7 +1043,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
goto unlock;
}
} else {
- if (!ec_config_check(parent, &ctx->config)) {
+ if (!ec_config_check(parent->xl, &ctx->config)) {
gf_msg (this->name, GF_LOG_ERROR, EINVAL,
EC_MSG_CONFIG_XATTR_INVALID,
"Invalid config xattr");
@@ -1051,12 +1058,22 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
ctx->have_info = _gf_true;
}
- ec_set_dirty_flag (fop->data, ctx, ctx->dirty);
+ ec_set_dirty_flag (fop->data, ctx, dirty);
+ if (dirty[EC_METADATA_TXN] &&
+ (waiting_flags & EC_FLAG_WAITING_METADATA_DIRTY)) {
+ GF_ASSERT (!ctx->dirty[EC_METADATA_TXN]);
+ ctx->dirty[EC_METADATA_TXN] = 1;
+ }
+
+ if (dirty[EC_DATA_TXN] &&
+ (waiting_flags & EC_FLAG_WAITING_DATA_DIRTY)) {
+ GF_ASSERT (!ctx->dirty[EC_DATA_TXN]);
+ ctx->dirty[EC_DATA_TXN] = 1;
+ }
op_errno = 0;
unlock:
- lock->getting_xattr = _gf_false;
- UNLOCK(&lock->loc.inode->lock);
+ lock->waiting_flags ^= waiting_flags;
if (op_errno == 0) {
/* If the fop fails on any of the good bricks, it is important to mark
@@ -1066,33 +1083,24 @@ unlock:
release = _gf_true;
}
- /* lock->release is a critical field that is checked and modified most
- * of the time inside a locked region. This use here is safe because we
- * are in a modifying fop and we currently don't allow two modifying
- * fops to be processed concurrently, so no one else could be checking
- * or modifying it.*/
- if (link->update[0] && !link->dirty[0]) {
+ if (parent_link->update[0] && !parent_link->dirty[0]) {
lock->release |= release;
}
- if (link->update[1] && !link->dirty[1]) {
+ if (parent_link->update[1] && !parent_link->dirty[1]) {
lock->release |= release;
}
/* We don't allow the main fop to be executed on bricks that have not
* succeeded the initial xattrop. */
- parent->mask &= fop->good;
ec_lock_update_good (lock, fop);
/*As of now only data healing marks bricks as healing*/
lock->healing |= fop->healing;
- if (ec_is_data_fop (parent->id)) {
- parent->healing |= fop->healing;
- }
- } else {
- ec_fop_set_error(parent, op_errno);
}
+ UNLOCK(&lock->loc.inode->lock);
+
while (!list_empty(&list)) {
tmp = list_entry(list.next, ec_fop_data_t, cbk_list);
list_del_init(&tmp->cbk_list);
@@ -1104,16 +1112,50 @@ unlock:
if (ec_is_data_fop (tmp->id)) {
tmp->healing |= fop->healing;
}
- } else {
- ec_fop_set_error(tmp, op_errno);
}
- ec_resume(tmp, 0);
+ ec_resume(tmp, op_errno);
}
return 0;
}
+static uint64_t
+ec_set_xattrop_flags_and_params (ec_lock_t *lock, ec_lock_link_t *link,
+ uint64_t *dirty)
+{
+ uint64_t oldflags = 0;
+ uint64_t newflags = 0;
+ ec_inode_t *ctx = lock->ctx;
+
+ oldflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
+
+ if (lock->query && !ctx->have_info) {
+ lock->waiting_flags |= EC_FLAG_WAITING_XATTROP;
+ link->waiting_flags |= EC_FLAG_WAITING_XATTROP;
+ }
+
+ if (dirty[EC_DATA_TXN]) {
+ if (oldflags & EC_FLAG_WAITING_DATA_DIRTY) {
+ dirty[EC_DATA_TXN] = 0;
+ } else {
+ lock->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
+ }
+ link->waiting_flags |= EC_FLAG_WAITING_DATA_DIRTY;
+ }
+
+ if (dirty[EC_METADATA_TXN]) {
+ if (oldflags & EC_FLAG_WAITING_METADATA_DIRTY) {
+ dirty[EC_METADATA_TXN] = 0;
+ } else {
+ lock->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
+ }
+ link->waiting_flags |= EC_FLAG_WAITING_METADATA_DIRTY;
+ }
+ newflags = lock->waiting_flags & EC_XATTROP_ALL_WAITING_FLAGS;
+ return oldflags ^ newflags;
+}
+
void ec_get_size_version(ec_lock_link_t *link)
{
loc_t loc;
@@ -1124,7 +1166,6 @@ void ec_get_size_version(ec_lock_link_t *link)
dict_t *xdata = NULL;
ec_t *ec = NULL;
int32_t error = 0;
- gf_boolean_t getting_xattr;
gf_boolean_t set_dirty = _gf_false;
uint64_t allzero[EC_VERSION_SIZE] = {0, 0};
uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
@@ -1132,6 +1173,7 @@ void ec_get_size_version(ec_lock_link_t *link)
ctx = lock->ctx;
fop = link->fop;
ec = fop->xl->private;
+ uint64_t changed_flags = 0;
if (ec->optimistic_changelog &&
!(ec->node_mask & ~link->lock->good_mask) && !ec_is_data_fop (fop->id))
@@ -1159,19 +1201,20 @@ void ec_get_size_version(ec_lock_link_t *link)
LOCK(&lock->loc.inode->lock);
- getting_xattr = lock->getting_xattr;
- lock->getting_xattr = _gf_true;
- if (getting_xattr) {
- fop->flags |= EC_FLAG_WAITING_XATTROP;
-
- ec_sleep(fop);
+ changed_flags = ec_set_xattrop_flags_and_params (lock, link, dirty);
+ if (link->waiting_flags) {
+ /* This fop needs to wait until all its flags are cleared which
+ * potentially can be cleared by other xattrops that are already
+ * wound*/
+ ec_sleep(fop);
+ } else {
+ GF_ASSERT (!changed_flags);
}
UNLOCK(&lock->loc.inode->lock);
- if (getting_xattr) {
+ if (!changed_flags)
goto out;
- }
dict = dict_new();
if (dict == NULL) {
@@ -1179,17 +1222,7 @@ void ec_get_size_version(ec_lock_link_t *link)
goto out;
}
- if (lock->loc.inode->ia_type == IA_IFREG ||
- lock->loc.inode->ia_type == IA_INVAL) {
- xdata = dict_new();
- if (xdata == NULL || dict_set_int32 (xdata, GF_GET_SIZE, 1)) {
- error = -ENOMEM;
- goto out;
- }
- }
-
- if (lock->query && !ctx->have_info) {
- fop->flags |= EC_FLAG_QUERY_METADATA;
+ if (changed_flags & EC_FLAG_WAITING_XATTROP) {
/* Once we know that an xattrop will be needed,
* we try to get all available information in a
* single call. */
@@ -1208,9 +1241,17 @@ void ec_get_size_version(ec_lock_link_t *link)
if (error != 0) {
goto out;
}
+
+ xdata = dict_new();
+ if (xdata == NULL || dict_set_int32 (xdata, GF_GET_SIZE, 1)) {
+ error = -ENOMEM;
+ goto out;
+ }
+
}
}
- if (set_dirty) {
+
+ if (memcmp (allzero, dirty, sizeof (allzero))) {
error = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty,
EC_VERSION_SIZE);
if (error != 0) {
@@ -1943,7 +1984,7 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,
ctx->have_size = _gf_true;
}
if ((ec_dict_del_config(xdata, EC_XATTR_CONFIG, &ctx->config) == 0) &&
- ec_config_check(fop->parent, &ctx->config)) {
+ ec_config_check(fop->xl, &ctx->config)) {
ctx->have_config = _gf_true;
}
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index a03a590..8f5d20a 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -27,9 +27,11 @@ typedef enum {
#define EC_CONFIG_ALGORITHM 0
-#define EC_FLAG_LOCK_SHARED 0x0001
-#define EC_FLAG_WAITING_XATTROP 0x0002
-#define EC_FLAG_QUERY_METADATA 0x0004
+#define EC_FLAG_LOCK_SHARED 0x0001
+
+#define EC_FLAG_WAITING_XATTROP 0x0001
+#define EC_FLAG_WAITING_DATA_DIRTY 0x0002
+#define EC_FLAG_WAITING_METADATA_DIRTY 0x0004
#define EC_SELFHEAL_BIT 62
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 3e93a1a..5601f96 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -227,8 +227,8 @@ struct _ec_lock {
uintptr_t healing;
uint32_t refs_owners; /* Refs for fops owning the lock */
uint32_t refs_pending; /* Refs assigned to fops being prepared */
+ uint32_t waiting_flags; /*Track xattrop/dirty marking*/
gf_boolean_t acquired;
- gf_boolean_t getting_xattr;
gf_boolean_t unlock_now;
gf_boolean_t release;
gf_boolean_t query;
@@ -250,6 +250,7 @@ struct _ec_lock_link {
gf_boolean_t optimistic_changelog;
loc_t *base;
uint64_t size;
+ uint32_t waiting_flags;
};
struct _ec_fop_data {
--
1.8.3.1

View File

@ -0,0 +1,343 @@
From c3161248afdb42d1bf5e06a32041180cc4be457d Mon Sep 17 00:00:00 2001
From: Xavier Hernandez <jahernan@redhat.com>
Date: Fri, 6 Oct 2017 10:39:58 +0200
Subject: [PATCH 077/128] cluster/ec: add functions for stripe alignment
This patch removes old functions to align offsets and sizes
to stripe size boundaries and adds new ones to offer more
possibilities.
The new functions are:
* ec_adjust_offset_down()
Aligns a given offset to a multiple of the stripe size
equal or smaller than the initial one. It returns the
size of the gap between the aligned offset and the given
one.
* ec_adjust_offset_up()
Aligns a given offset to a multiple of the stripe size
equal or greater than the initial one. It returns the
size of the skipped region between the given offset and
the aligned one. If an overflow happens, the returned
valid has negative sign (but correct value) and the
offset is set to the maximum value (not aligned).
* ec_adjust_size_down()
Aligns the given size to a multiple of the stripe size
equal or smaller than the initial one. It returns the
size of the missed region between the aligned size and
the given one.
* ec_adjust_size_up()
Aligns the given size to a multiple of the stripe size
equal or greater than the initial one. It returns the
size of the gap between the given size and the aligned
one. If an overflow happens, the returned value has
negative sign (but correct value) and the size is set
to the maximum value (not aligned).
These functions have been defined in ec-helpers.h as static
inline since they are very small and compilers can optimize
them (specially the 'scale' argument).
upstream patch: https://review.gluster.org/#/c/18440/
>Change-Id: I4c91009ad02f76c73772034dfde27ee1c78a80d7
>Signed-off-by: Xavier Hernandez <jahernan@redhat.com>
BUG: 1499865
Change-Id: I4c91009ad02f76c73772034dfde27ee1c78a80d7
Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/123556
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Javier Hernandez Juan <jahernan@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/cluster/ec/src/ec-heal.c | 8 ++-
xlators/cluster/ec/src/ec-helpers.c | 29 ---------
xlators/cluster/ec/src/ec-helpers.h | 108 +++++++++++++++++++++++++++++++-
xlators/cluster/ec/src/ec-inode-read.c | 10 +--
xlators/cluster/ec/src/ec-inode-write.c | 13 ++--
xlators/cluster/ec/src/ec-locks.c | 8 +--
6 files changed, 129 insertions(+), 47 deletions(-)
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index a6de3ee..bc25015 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -1670,7 +1670,8 @@ ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies,
* well*/
if (check_ondisksize) {
- source_size = ec_adjust_size (ec, size[source], 1);
+ source_size = size[source];
+ ec_adjust_size_up (ec, &source_size, _gf_true);
for (i = 0; i < ec->nodes; i++) {
if (sources[i]) {
@@ -1983,7 +1984,7 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
heal->fd = fd_ref (fd);
heal->xl = ec->xl;
heal->data = &barrier;
- size = ec_adjust_size (ec, size, 0);
+ ec_adjust_size_up (ec, &size, _gf_false);
heal->total_size = size;
heal->size = (128 * GF_UNIT_KB * (ec->self_heal_window_size));
/* We need to adjust the size to a multiple of the stripe size of the
@@ -2038,7 +2039,8 @@ __ec_heal_trim_sinks (call_frame_t *frame, ec_t *ec,
ret = 0;
goto out;
}
- trim_offset = ec_adjust_size (ec, size, 1);
+ trim_offset = size;
+ ec_adjust_offset_up (ec, &trim_offset, _gf_true);
ret = cluster_ftruncate (ec->xl_list, trim, ec->nodes, replies, output,
frame, ec->xl, fd, trim_offset, NULL);
for (i = 0; i < ec->nodes; i++) {
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
index 64b010f..0c66948 100644
--- a/xlators/cluster/ec/src/ec-helpers.c
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -799,35 +799,6 @@ ec_fd_t * ec_fd_get(fd_t * fd, xlator_t * xl)
return ctx;
}
-uint32_t ec_adjust_offset(ec_t * ec, off_t * offset, int32_t scale)
-{
- off_t head, tmp;
-
- tmp = *offset;
- head = tmp % ec->stripe_size;
- tmp -= head;
- if (scale)
- {
- tmp /= ec->fragments;
- }
-
- *offset = tmp;
-
- return head;
-}
-
-uint64_t ec_adjust_size(ec_t * ec, uint64_t size, int32_t scale)
-{
- size += ec->stripe_size - 1;
- size -= size % ec->stripe_size;
- if (scale)
- {
- size /= ec->fragments;
- }
-
- return size;
-}
-
gf_boolean_t
ec_is_internal_xattr (dict_t *dict, char *key, data_t *value, void *data)
{
diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h
index 4d2145c..cfd7daa 100644
--- a/xlators/cluster/ec/src/ec-helpers.h
+++ b/xlators/cluster/ec/src/ec-helpers.h
@@ -55,8 +55,112 @@ ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl);
ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl);
ec_fd_t * ec_fd_get(fd_t * fd, xlator_t * xl);
-uint32_t ec_adjust_offset(ec_t * ec, off_t * offset, int32_t scale);
-uint64_t ec_adjust_size(ec_t * ec, uint64_t size, int32_t scale);
+static inline uint32_t
+ec_adjust_size_down(ec_t *ec, uint64_t *value, gf_boolean_t scale)
+{
+ uint64_t head, tmp;
+
+ tmp = *value;
+ head = tmp % ec->stripe_size;
+ tmp -= head;
+
+ if (scale) {
+ tmp /= ec->fragments;
+ }
+
+ *value = tmp;
+
+ return (uint32_t)head;
+}
+
+/* This function can cause an overflow if the passed value is too near to the
+ * uint64_t limit. If this happens, it returns the tail in negative form and
+ * the value is set to UINT64_MAX. */
+static inline int32_t
+ec_adjust_size_up(ec_t *ec, uint64_t *value, gf_boolean_t scale)
+{
+ uint64_t tmp;
+ int32_t tail;
+
+ tmp = *value;
+ /* We first adjust the value down. This never causes overflow. */
+ tail = ec_adjust_size_down(ec, &tmp, scale);
+
+ /* If the value was already aligned, tail will be 0 and nothing else
+ * needs to be done. */
+ if (tail != 0) {
+ /* Otherwise, we need to compute the real tail and adjust the
+ * returned value to the next stripe. */
+ tail = ec->stripe_size - tail;
+ if (scale) {
+ tmp += ec->fragment_size;
+ } else {
+ tmp += ec->stripe_size;
+ /* If no scaling is requested there's a posibility of
+ * overflow. */
+ if (tmp < ec->stripe_size) {
+ tmp = UINT64_MAX;
+ tail = -tail;
+ }
+ }
+ }
+
+ *value = tmp;
+
+ return tail;
+}
+
+/* This function is equivalent to ec_adjust_size_down() but with a potentially
+ * different parameter size (off_t vs uint64_t). */
+static inline uint32_t
+ec_adjust_offset_down(ec_t *ec, off_t *value, gf_boolean_t scale)
+{
+ off_t head, tmp;
+
+ tmp = *value;
+ head = tmp % ec->stripe_size;
+ tmp -= head;
+
+ if (scale) {
+ tmp /= ec->fragments;
+ }
+
+ *value = tmp;
+
+ return (uint32_t)head;
+}
+
+/* This function is equivalent to ec_adjust_size_up() but with a potentially
+ * different parameter size (off_t vs uint64_t). */
+static inline int32_t
+ec_adjust_offset_up(ec_t *ec, off_t *value, gf_boolean_t scale)
+{
+ uint64_t tail, tmp;
+
+ /* An offset is a signed type that can only have positive values, so
+ * we take advantage of this to avoid overflows. We simply convert it
+ * to an unsigned integer and operate normally. This won't cause an
+ * overflow. Overflow is only checked when converting back to an
+ * off_t. */
+ tmp = *value;
+ tail = ec->stripe_size;
+ tail -= (tmp + tail - 1) % tail + 1;
+ tmp += tail;
+ if (scale) {
+ /* If we are scaling, we'll never get an overflow. */
+ tmp /= ec->fragments;
+ } else {
+ /* Check if there has been an overflow. */
+ if ((off_t)tmp < 0) {
+ tmp = (1ULL << (sizeof(off_t) * 8 - 1)) - 1ULL;
+ tail = -tail;
+ }
+ }
+
+ *value = (off_t)tmp;
+
+ return (int32_t)tail;
+}
static inline int32_t ec_is_power_of_2(uint32_t value)
{
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index d925e82..829f47f 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -1356,9 +1356,10 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
fop->user_size = fop->size;
- fop->head = ec_adjust_offset(fop->xl->private, &fop->offset, 1);
- fop->size = ec_adjust_size(fop->xl->private, fop->size + fop->head,
- 1);
+ fop->head = ec_adjust_offset_down(fop->xl->private, &fop->offset,
+ _gf_true);
+ fop->size += fop->head;
+ ec_adjust_size_up(fop->xl->private, &fop->size, _gf_true);
/* Fall through */
@@ -1561,7 +1562,8 @@ int32_t ec_manager_seek(ec_fop_data_t *fop, int32_t state)
switch (state) {
case EC_STATE_INIT:
fop->user_size = fop->offset;
- fop->head = ec_adjust_offset(fop->xl->private, &fop->offset, 1);
+ fop->head = ec_adjust_offset_down(fop->xl->private, &fop->offset,
+ _gf_true);
/* Fall through */
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
index 68bea1a..3ed9b2a 100644
--- a/xlators/cluster/ec/src/ec-inode-write.c
+++ b/xlators/cluster/ec/src/ec-inode-write.c
@@ -870,8 +870,10 @@ int32_t ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
return EC_STATE_REPORT;
}
fop->user_size = fop->offset + fop->size;
- fop->head = ec_adjust_offset (fop->xl->private, &fop->offset, 1);
- fop->size = ec_adjust_size (fop->xl->private, fop->head + fop->size, 1);
+ fop->head = ec_adjust_offset_down (fop->xl->private, &fop->offset,
+ _gf_true);
+ fop->size += fop->head;
+ ec_adjust_size_up (fop->xl->private, &fop->size, _gf_true);
/* Fall through */
@@ -1145,7 +1147,7 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
fop->user_size = fop->offset;
- fop->offset = ec_adjust_size(fop->xl->private, fop->offset, 1);
+ ec_adjust_offset_up(fop->xl->private, &fop->offset, _gf_true);
/* Fall through */
@@ -1508,8 +1510,9 @@ ec_writev_prepare_buffers(ec_t *ec, ec_fop_data_t *fop)
int32_t err;
fop->user_size = iov_length(fop->vector, fop->int32);
- fop->head = ec_adjust_offset(ec, &fop->offset, 0);
- fop->size = ec_adjust_size(ec, fop->user_size + fop->head, 0);
+ fop->head = ec_adjust_offset_down(ec, &fop->offset, _gf_false);
+ fop->size = fop->user_size + fop->head;
+ ec_adjust_size_up(ec, &fop->size, _gf_false);
if ((fop->int32 != 1) || (fop->head != 0) ||
(fop->size > fop->user_size) ||
diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c
index ff09852..996035d 100644
--- a/xlators/cluster/ec/src/ec-locks.c
+++ b/xlators/cluster/ec/src/ec-locks.c
@@ -572,10 +572,10 @@ int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state)
switch (state)
{
case EC_STATE_INIT:
- fop->flock.l_len += ec_adjust_offset(fop->xl->private,
- &fop->flock.l_start, 1);
- fop->flock.l_len = ec_adjust_size(fop->xl->private,
- fop->flock.l_len, 1);
+ fop->flock.l_len += ec_adjust_offset_down(fop->xl->private,
+ &fop->flock.l_start,
+ _gf_true);
+ ec_adjust_offset_up(fop->xl->private, &fop->flock.l_len, _gf_true);
if ((fop->int32 == F_SETLKW) && (fop->flock.l_type != F_UNLCK))
{
fop->uint32 = EC_LOCK_MODE_ALL;
--
1.8.3.1

View File

@ -0,0 +1,76 @@
From 1bcb5bbd42a4187ed385853f0364a3941bea9846 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Fri, 17 Nov 2017 17:29:36 +0530
Subject: [PATCH 078/128] cluster/afr: Honor default timeout of 5min for
analyzing split-brain files
Problem:
After setting split-brain-choice option to analyze the file to resolve
the split brain using the command
"setfattr -n replica.split-brain-choice -v "choiceX" <path-to-file>"
should allow to access the file from mount for default timeout of 5mins.
But the timeout was not honored and was able to access the file even after
the timeout.
Fix:
Call the inode_invalidate() in afr_set_split_brain_choice_cbk() so that
it will triger the cache invalidate after resetting the timer and the
split brain choice. So the next calls to access the file will fail with EIO.
Upstream patch: https://review.gluster.org/#/c/18546/
> Change-Id: I698cb833676b22ff3e4c6daf8b883a0958f51a64
> BUG: 1503519
> Signed-off-by: karthik-us <ksubrahm@redhat.com>
Change-Id: I698cb833676b22ff3e4c6daf8b883a0958f51a64
BUG: 1360331
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/123560
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/cluster/afr/src/afr-common.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 2925a1d..9c96056 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -730,6 +730,7 @@ afr_set_split_brain_choice_cbk (void *data)
xlator_t *this = THIS;
afr_spb_choice_timeout_cancel (this, inode);
+ inode_invalidate (inode);
inode_unref (inode);
return;
}
@@ -749,6 +750,7 @@ afr_set_split_brain_choice (int ret, call_frame_t *frame, void *opaque)
gf_boolean_t timer_set = _gf_false;
gf_boolean_t timer_cancelled = _gf_false;
gf_boolean_t timer_reset = _gf_false;
+ gf_boolean_t need_invalidate = _gf_true;
int old_spb_choice = -1;
frame = data->frame;
@@ -861,6 +863,7 @@ set_timer:
timer_set = _gf_true;
if (timer_reset && !ctx->timer)
timer_cancelled = _gf_true;
+ need_invalidate = _gf_false;
}
unlock:
UNLOCK(&inode->lock);
@@ -873,7 +876,8 @@ unlock:
* reads from an older cached value despite a change in spb_choice to
* a new value.
*/
- inode_invalidate (inode);
+ if (need_invalidate)
+ inode_invalidate (inode);
out:
if (data)
GF_FREE (data);
--
1.8.3.1

View File

@ -0,0 +1,959 @@
From c098fa2192eedbfaad7ac850d0fb152695a3becf Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Sun, 25 Jun 2017 16:34:01 +0530
Subject: [PATCH 079/128] cluster/ec: Allow parallel writes in EC if possible
Problem:
Ec at the moment sends one modification fop after another, so if some of
the disks become slow, for a while then the wait time for the writes that
are waiting in the queue becomes really bad.
Fix:
Allow parallel writes when possible. For this we need to make 3 changes.
1) Each fop now has range parameters they will be updating.
2) Xattrop is changed to handle parallel xattrop requests where some
would be modifying just dirty xattr.
3) Fops that refer to size now take locks and update the locks.
upstream patch: https://review.gluster.org/#/c/17625/
>Fixes #251
>Change-Id: Ibc3c15372f91bbd6fb617f0d99399b3149fa64b2
>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Note:
There is a delta compared to upstream patch:
For "disperse.parallel-writes" key we have reverted the flags
to reflect old type. Added New OP_VERSION for 3.13.0 in globals.h.
BUG: 1459101
Change-Id: Ibc3c15372f91bbd6fb617f0d99399b3149fa64b2
Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/123561
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Ashish Pandey <aspandey@redhat.com>
---
libglusterfs/src/globals.h | 4 +-
xlators/cluster/ec/src/ec-common.c | 191 ++++++++++++++++--------
xlators/cluster/ec/src/ec-common.h | 10 +-
xlators/cluster/ec/src/ec-dir-read.c | 6 +-
xlators/cluster/ec/src/ec-generic.c | 12 +-
xlators/cluster/ec/src/ec-inode-read.c | 22 ++-
xlators/cluster/ec/src/ec-inode-write.c | 124 +++++++++------
xlators/cluster/ec/src/ec-types.h | 8 +-
xlators/cluster/ec/src/ec.c | 51 ++++---
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 6 +
10 files changed, 291 insertions(+), 143 deletions(-)
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index bd7cffe..c627cfe 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -43,7 +43,7 @@
*/
#define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly
should not change */
-#define GD_OP_VERSION_MAX GD_OP_VERSION_3_12_2 /* MAX VERSION is the maximum
+#define GD_OP_VERSION_MAX GD_OP_VERSION_3_13_0 /* MAX VERSION is the maximum
count in VME table, should
keep changing with
introduction of newer
@@ -101,6 +101,8 @@
#define GD_OP_VERSION_3_12_2 31202 /* Op-version for GlusterFS 3.12.2 */
+#define GD_OP_VERSION_3_13_0 31300 /* Op-version for GlusterFS 3.13.0 */
+
#include "xlator.h"
/* THIS */
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 6963907..f86ecf8 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -25,6 +25,40 @@
EC_FLAG_WAITING_DATA_DIRTY |\
EC_FLAG_WAITING_METADATA_DIRTY)
+off_t
+ec_range_end_get (off_t fl_start, size_t fl_size)
+{
+ off_t fl_end = 0;
+ switch (fl_size) {
+ case 0:
+ return fl_start;
+ case LLONG_MAX: /*Infinity*/
+ return LLONG_MAX;
+ default:
+ fl_end = fl_start + fl_size - 1;
+ if (fl_end < 0) /*over-flow*/
+ return LLONG_MAX;
+ else
+ return fl_end;
+ }
+}
+
+static gf_boolean_t
+ec_is_range_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
+{
+ return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start));
+}
+
+static gf_boolean_t
+ec_lock_conflict (ec_lock_link_t *l1, ec_lock_link_t *l2)
+{
+ if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) &&
+ (l2->fop->flags & EC_FLAG_LOCK_SHARED))
+ return _gf_false;
+
+ return ec_is_range_conflict (l1, l2);
+}
+
uint32_t
ec_select_first_by_read_policy (ec_t *ec, ec_fop_data_t *fop)
{
@@ -724,7 +758,7 @@ int32_t ec_lock_compare(ec_lock_t * lock1, ec_lock_t * lock2)
}
void ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags,
- loc_t *base)
+ loc_t *base, off_t fl_start, size_t fl_size)
{
ec_lock_link_t *link;
@@ -758,12 +792,15 @@ void ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags,
link->update[EC_DATA_TXN] = (flags & EC_UPDATE_DATA) != 0;
link->update[EC_METADATA_TXN] = (flags & EC_UPDATE_META) != 0;
link->base = base;
+ link->fl_start = fl_start;
+ link->fl_end = ec_range_end_get (fl_start, fl_size);
lock->refs_pending++;
}
void ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc,
- uint32_t flags, loc_t *base)
+ uint32_t flags, loc_t *base,
+ off_t fl_start, size_t fl_size)
{
ec_lock_t *lock = NULL;
ec_inode_t *ctx;
@@ -824,16 +861,17 @@ void ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc,
ctx->inode_lock = lock;
insert:
- ec_lock_insert(fop, lock, flags, base);
+ ec_lock_insert(fop, lock, flags, base, fl_start, fl_size);
update_query:
lock->query |= (flags & EC_QUERY_INFO) != 0;
unlock:
UNLOCK(&loc->inode->lock);
}
-void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags)
+void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags,
+ off_t fl_start, size_t fl_size)
{
- ec_lock_prepare_inode_internal(fop, loc, flags, NULL);
+ ec_lock_prepare_inode_internal(fop, loc, flags, NULL, fl_start, fl_size);
}
void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,
@@ -859,12 +897,13 @@ void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,
base = NULL;
}
- ec_lock_prepare_inode_internal(fop, &tmp, flags, base);
+ ec_lock_prepare_inode_internal(fop, &tmp, flags, base, 0, LLONG_MAX);
loc_wipe(&tmp);
}
-void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags)
+void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags,
+ off_t fl_start, size_t fl_size)
{
loc_t loc;
int32_t err;
@@ -880,7 +919,7 @@ void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags)
return;
}
- ec_lock_prepare_inode_internal(fop, &loc, flags, NULL);
+ ec_lock_prepare_inode_internal(fop, &loc, flags, NULL, fl_start, fl_size);
loc_wipe(&loc);
}
@@ -1314,17 +1353,16 @@ out:
}
}
-gf_boolean_t ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
- uint64_t *size)
+gf_boolean_t
+__ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t *size)
{
ec_inode_t *ctx;
gf_boolean_t found = _gf_false;
- LOCK(&inode->lock);
-
ctx = __ec_inode_get(inode, fop->xl);
if (ctx == NULL) {
- goto unlock;
+ goto out;
}
if (ctx->have_size) {
@@ -1332,23 +1370,35 @@ gf_boolean_t ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
found = _gf_true;
}
-unlock:
+out:
+ return found;
+}
+
+gf_boolean_t
+ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t *size)
+{
+ gf_boolean_t found = _gf_false;
+
+ LOCK(&inode->lock);
+ {
+ found = __ec_get_inode_size (fop, inode, size);
+ }
UNLOCK(&inode->lock);
return found;
}
-gf_boolean_t ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
- uint64_t size)
+gf_boolean_t
+__ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t size)
{
ec_inode_t *ctx;
gf_boolean_t found = _gf_false;
- LOCK(&inode->lock);
-
ctx = __ec_inode_get(inode, fop->xl);
if (ctx == NULL) {
- goto unlock;
+ goto out;
}
/* Normal fops always have ctx->have_size set. However self-heal calls this
@@ -1363,8 +1413,21 @@ gf_boolean_t ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
found = _gf_true;
-unlock:
- UNLOCK(&inode->lock);
+out:
+ return found;
+}
+
+gf_boolean_t
+ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t size)
+{
+ gf_boolean_t found = _gf_false;
+
+ LOCK (&inode->lock);
+ {
+ found = __ec_set_inode_size (fop, inode, size);
+ }
+ UNLOCK (&inode->lock);
return found;
}
@@ -1471,34 +1534,47 @@ ec_lock_update_fd(ec_lock_t *lock, ec_fop_data_t *fop)
}
}
+static gf_boolean_t
+ec_link_has_lock_conflict (ec_lock_link_t *link, struct list_head *owners)
+{
+ ec_lock_link_t *owner_link = NULL;
+ ec_t *ec = link->fop->xl->private;
+
+ if (!ec->parallel_writes)
+ return _gf_true;
+
+ list_for_each_entry (owner_link, owners, owner_list) {
+ if (ec_lock_conflict (owner_link, link))
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
static void
ec_lock_wake_shared(ec_lock_t *lock, struct list_head *list)
{
ec_fop_data_t *fop;
ec_lock_link_t *link;
- gf_boolean_t exclusive = _gf_false;
+ gf_boolean_t conflict = _gf_false;
- while (!exclusive && !list_empty(&lock->waiting)) {
+ while (!conflict && !list_empty(&lock->waiting)) {
link = list_entry(lock->waiting.next, ec_lock_link_t, wait_list);
fop = link->fop;
/* If lock is not acquired, at most one fop can be assigned as owner.
* The following fops will need to wait in the lock->waiting queue
* until the lock has been fully acquired. */
- exclusive = !lock->acquired;
+ conflict = !lock->acquired;
/* If the fop is not shareable, only this fop can be assigned as owner.
* Other fops will need to wait until this one finishes. */
- if ((fop->flags & EC_FLAG_LOCK_SHARED) == 0) {
- exclusive = _gf_true;
-
- /* Avoid other requests to be assigned as owners. */
- lock->exclusive = 1;
+ if (ec_link_has_lock_conflict (link, &lock->owners)) {
+ conflict = _gf_true;
}
/* If only one fop is allowed, it can be assigned as the owner of the
* lock only if there weren't any other owner. */
- if (exclusive && !list_empty(&lock->owners)) {
+ if (conflict && !list_empty(&lock->owners)) {
break;
}
@@ -1565,9 +1641,7 @@ void ec_lock_acquired(ec_lock_link_t *link)
lock->acquired = _gf_true;
ec_lock_update_fd(lock, fop);
- if ((fop->flags & EC_FLAG_LOCK_SHARED) != 0) {
- ec_lock_wake_shared(lock, &list);
- }
+ ec_lock_wake_shared(lock, &list);
UNLOCK(&lock->loc.inode->lock);
@@ -1678,11 +1752,11 @@ ec_lock_assign_owner(ec_lock_link_t *link)
/* We are trying to acquire a lock that has an unlock timer active.
* This means that the lock must be idle, i.e. no fop can be in the
* owner, waiting or frozen lists. It also means that the lock cannot
- * have been marked as being released (this is done without timers)
- * and it must not be exclusive. There should only be one owner
- * reference, but it's possible that some fops are being prepared to
- * use this lock. */
- GF_ASSERT ((lock->exclusive == 0) && (lock->refs_owners == 1) &&
+ * have been marked as being released (this is done without timers).
+ * There should only be one owner reference, but it's possible that
+ * some fops are being prepared to use this lock.
+ */
+ GF_ASSERT ((lock->refs_owners == 1) &&
list_empty(&lock->owners) && list_empty(&lock->waiting));
/* We take the timer_link before cancelling the timer, since a
@@ -1730,13 +1804,15 @@ ec_lock_assign_owner(ec_lock_link_t *link)
lock->timer = NULL;
}
- lock->exclusive |= (fop->flags & EC_FLAG_LOCK_SHARED) == 0;
-
if (!list_empty(&lock->owners)) {
/* There are other owners of this lock. We can only take ownership if
- * the lock is already acquired and can be shared. Otherwise we need
- * to wait. */
- if (!lock->acquired || (lock->exclusive != 0)) {
+ * the lock is already acquired and doesn't have conflict with existing
+ * owners, or waiters(to prevent starvation).
+ * Otherwise we need to wait.
+ */
+ if (!lock->acquired ||
+ ec_link_has_lock_conflict (link, &lock->owners) ||
+ ec_link_has_lock_conflict (link, &lock->waiting)) {
ec_trace("LOCK_QUEUE_WAIT", fop, "lock=%p", lock);
list_add_tail(&link->wait_list, &lock->waiting);
@@ -1814,10 +1890,7 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk,
}
ec_lock_update_good(lock, fop);
- lock->exclusive -= (fop->flags & EC_FLAG_LOCK_SHARED) == 0;
- if (list_empty(&lock->owners)) {
- ec_lock_wake_shared(lock, &list);
- }
+ ec_lock_wake_shared(lock, &list);
UNLOCK(&lock->loc.inode->lock);
@@ -1871,11 +1944,11 @@ ec_lock_unfreeze(ec_lock_link_t *link)
lock->acquired = _gf_false;
/* We are unfreezing a lock. This means that the lock has already been
- * released. In this state it shouldn't be exclusive nor have a pending
- * timer nor have any owner, and the waiting list should be empty. Only
- * the frozen list can contain some fop. */
- GF_ASSERT((lock->exclusive == 0) && (lock->timer == NULL) &&
- list_empty(&lock->waiting) && list_empty(&lock->owners));
+ * released. In this state it shouldn't have a pending timer nor have any
+ * owner, and the waiting list should be empty. Only the frozen list can
+ * contain some fop. */
+ GF_ASSERT((lock->timer == NULL) && list_empty(&lock->waiting) &&
+ list_empty(&lock->owners));
/* We move all frozen fops to the waiting list. */
list_splice_init(&lock->frozen, &lock->waiting);
@@ -2008,7 +2081,7 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
ec_fop_data_t *fop;
ec_lock_t *lock;
ec_inode_t *ctx;
- dict_t * dict;
+ dict_t *dict = NULL;
uintptr_t update_on = 0;
int32_t err = -ENOMEM;
@@ -2198,12 +2271,12 @@ ec_unlock_timer_del(ec_lock_link_t *link)
ec_trace("UNLOCK_DELAYED", link->fop, "lock=%p", lock);
/* The unlock timer has expired without anyone cancelling it.
- * This means that it shouldn't have any owner, and the
- * waiting and frozen lists should be empty. It shouldn't have
- * been marked as release nor be exclusive either. It must have
- * only one owner reference, but there can be fops being
- * prepared though. */
- GF_ASSERT(!lock->release && (lock->exclusive == 0) &&
+ * This means that it shouldn't have any owner, and the waiting
+ * and frozen lists should be empty. It must have only one
+ * owner reference, but there can be fops being prepared
+ * though.
+ * */
+ GF_ASSERT(!lock->release &&
(lock->refs_owners == 1) &&
list_empty(&lock->owners) &&
list_empty(&lock->waiting) &&
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index 8f5d20a..1a947cc 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -91,18 +91,24 @@ ec_fop_prepare_answer(ec_fop_data_t *fop, gf_boolean_t ro);
gf_boolean_t
ec_cbk_set_error(ec_cbk_data_t *cbk, int32_t error, gf_boolean_t ro);
-void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags);
+void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags,
+ off_t fl_start, size_t fl_size);
void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,
uint32_t flags);
-void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags);
+void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags,
+ off_t fl_start, size_t fl_size);
void ec_lock(ec_fop_data_t * fop);
void ec_lock_reuse(ec_fop_data_t *fop);
void ec_unlock(ec_fop_data_t * fop);
gf_boolean_t ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
uint64_t *size);
+gf_boolean_t __ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t *size);
gf_boolean_t ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
uint64_t size);
+gf_boolean_t __ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode,
+ uint64_t size);
void ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode);
void ec_flush_size_version(ec_fop_data_t * fop);
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
index 4fe82e3..48afe54 100644
--- a/xlators/cluster/ec/src/ec-dir-read.c
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -141,7 +141,8 @@ int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state)
/* Fall through */
case EC_STATE_LOCK:
- ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO);
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ LLONG_MAX);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -432,7 +433,8 @@ int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state)
}
fop->mask &= 1ULL << idx;
} else {
- ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO);
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0,
+ LLONG_MAX);
ec_lock(fop);
}
diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
index ddb90ce..a5f986e 100644
--- a/xlators/cluster/ec/src/ec-generic.c
+++ b/xlators/cluster/ec/src/ec-generic.c
@@ -85,7 +85,7 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_prepare_fd(fop, fop->fd, 0);
+ ec_lock_prepare_fd(fop, fop->fd, 0, 0, LLONG_MAX);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -300,7 +300,7 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO);
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0, LLONG_MAX);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -501,7 +501,7 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_prepare_fd(fop, fop->fd, 0);
+ ec_lock_prepare_fd(fop, fop->fd, 0, 0, LLONG_MAX);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -1220,9 +1220,11 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
case EC_STATE_INIT:
case EC_STATE_LOCK:
if (fop->fd == NULL) {
- ec_lock_prepare_inode(fop, &fop->loc[0], EC_UPDATE_META);
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_UPDATE_META, 0,
+ LLONG_MAX);
} else {
- ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META);
+ ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META, 0,
+ LLONG_MAX);
}
ec_lock(fop);
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index 829f47f..33fd7f5 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -72,7 +72,8 @@ ec_manager_access(ec_fop_data_t *fop, int32_t state)
switch (state) {
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_prepare_inode (fop, &fop->loc[0], EC_QUERY_INFO);
+ ec_lock_prepare_inode (fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ LLONG_MAX);
ec_lock (fop);
return EC_STATE_DISPATCH;
@@ -311,9 +312,11 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
(strncmp(fop->str[0], GF_XATTR_CLRLK_CMD,
strlen(GF_XATTR_CLRLK_CMD)) != 0)) {
if (fop->fd == NULL) {
- ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO);
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO,
+ 0, LLONG_MAX);
} else {
- ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO);
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0,
+ LLONG_MAX);
}
ec_lock(fop);
}
@@ -1029,7 +1032,8 @@ int32_t ec_manager_readlink(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
- ec_lock_prepare_inode (fop, &fop->loc[0], EC_QUERY_INFO);
+ ec_lock_prepare_inode (fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ LLONG_MAX);
ec_lock (fop);
return EC_STATE_DISPATCH;
@@ -1364,7 +1368,8 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state)
/* Fall through */
case EC_STATE_LOCK:
- ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO);
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, fop->offset,
+ fop->size);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -1568,7 +1573,7 @@ int32_t ec_manager_seek(ec_fop_data_t *fop, int32_t state)
/* Fall through */
case EC_STATE_LOCK:
- ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO);
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, fop->offset, LLONG_MAX);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -1788,9 +1793,10 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
case EC_STATE_INIT:
case EC_STATE_LOCK:
if (fop->fd == NULL) {
- ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO);
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ LLONG_MAX);
} else {
- ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO);
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0, LLONG_MAX);
}
ec_lock(fop);
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
index 3ed9b2a..e6a67cf 100644
--- a/xlators/cluster/ec/src/ec-inode-write.c
+++ b/xlators/cluster/ec/src/ec-inode-write.c
@@ -127,10 +127,12 @@ ec_manager_xattr (ec_fop_data_t *fop, int32_t state)
case EC_STATE_LOCK:
if (fop->fd == NULL) {
ec_lock_prepare_inode(fop, &fop->loc[0],
- EC_UPDATE_META | EC_QUERY_INFO);
+ EC_UPDATE_META | EC_QUERY_INFO,
+ 0, LLONG_MAX);
} else {
ec_lock_prepare_fd(fop, fop->fd,
- EC_UPDATE_META | EC_QUERY_INFO);
+ EC_UPDATE_META | EC_QUERY_INFO,
+ 0, LLONG_MAX);
}
ec_lock(fop);
@@ -369,10 +371,11 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state)
case EC_STATE_LOCK:
if (fop->fd == NULL) {
ec_lock_prepare_inode(fop, &fop->loc[0],
- EC_UPDATE_META | EC_QUERY_INFO);
+ EC_UPDATE_META | EC_QUERY_INFO,
+ 0, LLONG_MAX);
} else {
- ec_lock_prepare_fd(fop, fop->fd,
- EC_UPDATE_META | EC_QUERY_INFO);
+ ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META | EC_QUERY_INFO,
+ 0, LLONG_MAX);
}
ec_lock(fop);
@@ -879,8 +882,8 @@ int32_t ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
case EC_STATE_LOCK:
ec_lock_prepare_fd(fop, fop->fd,
- EC_UPDATE_DATA | EC_UPDATE_META |
- EC_QUERY_INFO);
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+ fop->offset, fop->size);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -898,24 +901,28 @@ int32_t ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
cbk->count);
/* This shouldn't fail because we have the inode locked. */
- GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
- &cbk->iatt[0].ia_size));
+ LOCK(&fop->locks[0].lock->loc.inode->lock);
+ {
+ GF_ASSERT(__ec_get_inode_size(fop,
+ fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
- /*If mode has FALLOC_FL_KEEP_SIZE keep the size */
- if (fop->int32 & FALLOC_FL_KEEP_SIZE) {
- cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
- } else if (fop->user_size > cbk->iatt[0].ia_size) {
- cbk->iatt[1].ia_size = fop->user_size;
-
- /* This shouldn't fail because we have the inode
- * locked. */
- GF_ASSERT(ec_set_inode_size(fop,
- fop->locks[0].lock->loc.inode,
- cbk->iatt[1].ia_size));
- } else {
- cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ /*If mode has FALLOC_FL_KEEP_SIZE keep the size */
+ if (fop->int32 & FALLOC_FL_KEEP_SIZE) {
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ } else if (fop->user_size > cbk->iatt[0].ia_size) {
+ cbk->iatt[1].ia_size = fop->user_size;
+
+ /* This shouldn't fail because we have the inode
+ * locked. */
+ GF_ASSERT(__ec_set_inode_size(fop,
+ fop->locks[0].lock->loc.inode,
+ cbk->iatt[1].ia_size));
+ } else {
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ }
}
-
+ UNLOCK(&fop->locks[0].lock->loc.inode->lock);
}
return EC_STATE_REPORT;
@@ -1155,11 +1162,11 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
if (fop->id == GF_FOP_TRUNCATE) {
ec_lock_prepare_inode(fop, &fop->loc[0],
EC_UPDATE_DATA | EC_UPDATE_META |
- EC_QUERY_INFO);
+ EC_QUERY_INFO, fop->offset, LLONG_MAX);
} else {
ec_lock_prepare_fd(fop, fop->fd,
EC_UPDATE_DATA | EC_UPDATE_META |
- EC_QUERY_INFO);
+ EC_QUERY_INFO, fop->offset, LLONG_MAX);
}
ec_lock(fop);
@@ -1179,6 +1186,9 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
cbk->count);
/* This shouldn't fail because we have the inode locked. */
+ /* Inode size doesn't need to be updated under locks, because
+ * conflicting operations won't be in-flight
+ */
GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
&cbk->iatt[0].ia_size));
cbk->iatt[1].ia_size = fop->user_size;
@@ -1582,6 +1592,9 @@ void ec_writev_start(ec_fop_data_t *fop)
ctx = ec_fd_get(fop->fd, fop->xl);
if (ctx != NULL) {
if ((ctx->flags & O_APPEND) != 0) {
+ /* Appending writes take full locks so size won't change because
+ * of any parallel operations
+ */
fop->offset = current;
}
}
@@ -1601,6 +1614,10 @@ void ec_writev_start(ec_fop_data_t *fop)
}
tail = fop->size - fop->user_size - fop->head;
if ((tail > 0) && ((fop->head == 0) || (fop->size > ec->stripe_size))) {
+ /* Current locking scheme will make sure the 'current' below will
+ * never decrease while the fop is in progress, so the checks will
+ * work as expected
+ */
if (current > fop->offset + fop->head + fop->user_size) {
if (ec_make_internal_fop_xdata (&xdata)) {
err = -ENOMEM;
@@ -1678,14 +1695,32 @@ ec_writev_encode(ec_fop_data_t *fop)
int32_t ec_manager_writev(ec_fop_data_t *fop, int32_t state)
{
ec_cbk_data_t *cbk;
+ ec_fd_t *ctx = NULL;
+ ec_t *ec = fop->xl->private;
+ off_t fl_start = 0;
+ size_t fl_size = LLONG_MAX;
switch (state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
+ ctx = ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ if ((ctx->flags & O_APPEND) == 0) {
+ off_t user_size = 0;
+ off_t head = 0;
+
+ fl_start = fop->offset;
+ user_size = iov_length(fop->vector, fop->int32);
+ head = ec_adjust_offset_down(ec, &fl_start,
+ _gf_true);
+ fl_size = user_size + head;
+ ec_adjust_size_up(ec, &fl_size, _gf_true);
+ }
+ }
ec_lock_prepare_fd(fop, fop->fd,
EC_UPDATE_DATA | EC_UPDATE_META |
- EC_QUERY_INFO);
+ EC_QUERY_INFO, fl_start, fl_size);
ec_lock(fop);
return EC_STATE_DISPATCH;
@@ -1717,23 +1752,28 @@ int32_t ec_manager_writev(ec_fop_data_t *fop, int32_t state)
cbk->count);
/* This shouldn't fail because we have the inode locked. */
- GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode,
- &cbk->iatt[0].ia_size));
- cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
- size = fop->offset + fop->head + fop->user_size;
- if (size > cbk->iatt[0].ia_size) {
- /* Only update inode size if this is a top level fop.
- * Otherwise this is an internal write and the top
- * level fop should take care of the real inode size.
- */
- if (fop->parent == NULL) {
- /* This shouldn't fail because we have the inode
- * locked. */
- GF_ASSERT(ec_set_inode_size(fop, fop->fd->inode,
- size));
- }
- cbk->iatt[1].ia_size = size;
+ LOCK(&fop->fd->inode->lock);
+ {
+ GF_ASSERT(__ec_get_inode_size(fop, fop->fd->inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ size = fop->offset + fop->head + fop->user_size;
+ if (size > cbk->iatt[0].ia_size) {
+ /* Only update inode size if this is a top level fop.
+ * Otherwise this is an internal write and the top
+ * level fop should take care of the real inode size.
+ */
+ if (fop->parent == NULL) {
+ /* This shouldn't fail because we have the inode
+ * locked. */
+ GF_ASSERT(__ec_set_inode_size(fop,
+ fop->fd->inode, size));
+ }
+ cbk->iatt[1].ia_size = size;
+ }
}
+ UNLOCK(&fop->fd->inode->lock);
+
if (fop->error == 0) {
cbk->op_ret *= ec->fragments;
if (cbk->op_ret < fop->head) {
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 5601f96..354b4ed 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -211,8 +211,8 @@ struct _ec_lock {
struct list_head owners;
/* List of fops waiting to be an owner of the lock. Fops are added to this
- * list when the current owner has an incompatible access (shared vs
- * exclusive) or the lock is not acquired yet. */
+ * list when the current owner has an incompatible access (conflicting lock)
+ * or the lock is not acquired yet. */
struct list_head waiting;
/* List of fops that will wait until the next unlock/lock cycle. This
@@ -221,7 +221,6 @@ struct _ec_lock {
* after the lock is reacquired. */
struct list_head frozen;
- int32_t exclusive;
uintptr_t mask;
uintptr_t good_mask;
uintptr_t healing;
@@ -251,6 +250,8 @@ struct _ec_lock_link {
loc_t *base;
uint64_t size;
uint32_t waiting_flags;
+ off_t fl_start;
+ off_t fl_end;
};
struct _ec_fop_data {
@@ -564,6 +565,7 @@ struct _ec {
gf_boolean_t shutdown;
gf_boolean_t eager_lock;
gf_boolean_t optimistic_changelog;
+ gf_boolean_t parallel_writes;
uint32_t background_heals;
uint32_t heal_wait_qlen;
uint32_t self_heal_window_size; /* max size of read/writes */
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index c32f4ef..856d60c 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -295,6 +295,8 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("optimistic-change-log", ec->optimistic_changelog,
options, bool, failed);
+ GF_OPTION_RECONF ("parallel-writes", ec->parallel_writes,
+ options, bool, failed);
ret = 0;
if (ec_assign_read_policy (ec, read_policy)) {
ret = -1;
@@ -665,6 +667,7 @@ init (xlator_t *this)
GF_OPTION_INIT ("shd-max-threads", ec->shd.max_threads, uint32, failed);
GF_OPTION_INIT ("shd-wait-qlength", ec->shd.wait_qlength, uint32, failed);
GF_OPTION_INIT ("optimistic-change-log", ec->optimistic_changelog, bool, failed);
+ GF_OPTION_INIT ("parallel-writes", ec->parallel_writes, bool, failed);
this->itable = inode_table_new (EC_SHD_INODE_LRU_LIMIT, this);
if (!this->itable)
@@ -1466,28 +1469,34 @@ struct volume_options options[] =
"galois field computations."
},
{ .key = {"self-heal-window-size"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 1024,
- .default_value = "1",
- .description = "Maximum number blocks(128KB) per file for which "
- "self-heal process would be applied simultaneously."
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = "1",
+ .description = "Maximum number blocks(128KB) per file for which "
+ "self-heal process would be applied simultaneously."
},
- { .key = {"optimistic-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- .description = "Set/Unset dirty flag for every update fop at the start"
- "of the fop. If OFF, this option impacts performance of"
- "entry operations or metadata operations as it will"
- "set dirty flag at the start and unset it at the end of"
- "ALL update fop. If ON and all the bricks are good,"
- "dirty flag will be set at the start only for file fops"
- "For metadata and entry fops dirty flag will not be set"
- "at the start, if all the bricks are good. This does"
- "not impact performance for metadata operations and"
- "entry operation but has a very small window to miss"
- "marking entry as dirty in case it is required to be"
- "healed"
+ { .key = {"optimistic-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Set/Unset dirty flag for every update fop at the start"
+ "of the fop. If OFF, this option impacts performance of"
+ "entry operations or metadata operations as it will"
+ "set dirty flag at the start and unset it at the end of"
+ "ALL update fop. If ON and all the bricks are good,"
+ "dirty flag will be set at the start only for file fops"
+ "For metadata and entry fops dirty flag will not be set"
+ "at the start, if all the bricks are good. This does"
+ "not impact performance for metadata operations and"
+ "entry operation but has a very small window to miss"
+ "marking entry as dirty in case it is required to be"
+ "healed"
+ },
+ { .key = {"parallel-writes"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This controls if writes can be wound in parallel as long"
+ "as it doesn't modify same stripes"
},
{ .key = {NULL} }
};
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 7fe76e5..b15a5af 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -3510,6 +3510,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_3_12_0,
.validate_fn = validate_boolean
},
+ { .key = "disperse.parallel-writes",
+ .voltype = "cluster/disperse",
+ .type = NO_DOC,
+ .op_version = GD_OP_VERSION_3_13_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
{ .key = NULL
}
};
--
1.8.3.1

View File

@ -0,0 +1,607 @@
From dd892d811ec66841b274f5ed6e22040cbdc003b0 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Mon, 20 Nov 2017 11:46:59 +0530
Subject: [PATCH 080/128] heal: New feature heal info summary to list the
status of brick and count of entries to be healed
Command output:
Brick 192.168.2.8:/brick/1
Status: Connected
Total Number of entries: 363
Number of entries in heal pending: 362
Number of entries in split-brain: 0
Number of entries possibly healing: 1
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<cliOutput>
<healInfo>
<bricks>
<brick hostUuid="9105dd4b-eca8-4fdb-85b2-b81cdf77eda3">
<name>192.168.2.8:/brick/1</name>
<status>Connected</status>
<totalNumberOfEntries>363</numberOfEntries>
<numberOfEntriesInHealPending>362</numberOfEntriesInHealPending>
<numberOfEntriesInSplitBrain>0</numberOfEntriesInSplitBrain>
<numberOfEntriesPossiblyHealing>1</numberOfEntriesPossiblyHealing>
</brick>
</bricks>
</healInfo>
<opRet>0</opRet>
<opErrno>0</opErrno>
<opErrstr/>
</cliOutput>
> Change-Id: I40cb6f77a14131c9e41b292f4901b41a228863d7
> BUG: 1261463
> Signed-off-by: Mohamed Ashiq Liyazudeen <mliyazud@redhat.com>
> Reviewed-on: https://review.gluster.org/12154
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> Tested-by: Karthik U S <ksubrahm@redhat.com>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Ravishankar N <ravishankar@redhat.com>
> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Change-Id: I40cb6f77a14131c9e41b292f4901b41a228863d7
BUG: 1286820
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/123640
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
cli/src/cli-cmd-parser.c | 5 +
cli/src/cli-cmd-volume.c | 9 +-
cli/src/cli-rpc-ops.c | 3 +-
heal/src/glfs-heal.c | 218 ++++++++++++++++++++----
rpc/rpc-lib/src/protocol-common.h | 1 +
xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 1 +
6 files changed, 205 insertions(+), 32 deletions(-)
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index c95b262..764f420 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -3952,6 +3952,11 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
GF_SHD_OP_SPLIT_BRAIN_FILES);
goto done;
}
+ if (!strcmp (words[4], "summary")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_SHD_OP_HEAL_SUMMARY);
+ goto done;
+ }
}
if (!strcmp (words[3], "statistics")) {
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index a1e5c51..7110145 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -2803,7 +2803,8 @@ cli_print_brick_status (cli_volume_status_t *status)
(op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) || \
(op == GF_SHD_OP_INDEX_SUMMARY) || \
(op == GF_SHD_OP_SPLIT_BRAIN_FILES) || \
- (op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE))
+ (op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) || \
+ (op == GF_SHD_OP_HEAL_SUMMARY))
int
cli_launch_glfs_heal (int heal_op, dict_t *options)
@@ -2856,6 +2857,12 @@ cli_launch_glfs_heal (int heal_op, dict_t *options)
case GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE:
runner_add_args (&runner, "granular-entry-heal-op", NULL);
break;
+ case GF_SHD_OP_HEAL_SUMMARY:
+ runner_add_args (&runner, "info-summary", NULL);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ runner_add_args (&runner, "xml", NULL);
+ }
+ break;
default:
ret = -1;
}
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 67e29a0..b91400b 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -9112,11 +9112,12 @@ gf_cli_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
heal_op_str = "count of entries to be healed per replica";
break;
- /* The below 3 cases are never hit; they're coded only to make
+ /* The below 4 cases are never hit; they're coded only to make
* compiler warnings go away.*/
case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ case GF_SHD_OP_HEAL_SUMMARY:
break;
case GF_SHD_OP_INVALID:
diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
index 27115f3..27a9624 100644
--- a/heal/src/glfs-heal.c
+++ b/heal/src/glfs-heal.c
@@ -40,18 +40,25 @@ xmlDocPtr glfsh_doc = NULL;
ret = 0; \
} while (0) \
-typedef int (*print_status) (dict_t *, char *, uuid_t, uint64_t *,
+typedef struct num_entries {
+ uint64_t num_entries;
+ uint64_t pending_entries;
+ uint64_t spb_entries;
+ uint64_t possibly_healing_entries;
+} num_entries_t;
+
+typedef int (*print_status) (dict_t *, char *, uuid_t, num_entries_t *,
gf_boolean_t flag);
int glfsh_heal_splitbrain_file (glfs_t *fs, xlator_t *top_subvol,
loc_t *rootloc, char *file, dict_t *xattr_req);
-
typedef struct glfs_info {
int (*init)(void);
int (*print_brick_from_xl)(xlator_t *xl, loc_t *rootloc);
int (*print_heal_op_status)(int ret, uint64_t num_entries,
char *fmt_str);
+ int (*print_heal_op_summary)(int ret, num_entries_t *num_entries);
void (*print_heal_status)(char *path, uuid_t gfid, char *status);
void (*print_spb_status)(char *path, uuid_t gfid, char *status);
int (*end) (int op_ret, char *op_errstr);
@@ -64,7 +71,7 @@ int32_t is_xml;
#define USAGE_STR "Usage: %s <VOLNAME> [bigger-file <FILE> | "\
"latest-mtime <FILE> | "\
"source-brick <HOSTNAME:BRICKNAME> [<FILE>] | "\
- "split-brain-info]\n"
+ "split-brain-info | info-summary]\n"
typedef enum {
GLFSH_MODE_CONTINUE_ON_ERROR = 1,
@@ -259,6 +266,54 @@ out:
return ret;
}
+int
+glfsh_print_xml_heal_op_summary (int ret, num_entries_t *num_entries)
+{
+ if (ret < 0 && num_entries == 0) {
+ xmlTextWriterWriteFormatElement (glfsh_writer,
+ (xmlChar *)"status",
+ "%s", strerror (-ret));
+ xmlTextWriterWriteFormatElement (glfsh_writer,
+ (xmlChar *)"totalNumberOfEntries", "-");
+ xmlTextWriterWriteFormatElement (glfsh_writer,
+ (xmlChar *)"numberOfEntriesInHealPending", "-");
+ xmlTextWriterWriteFormatElement (glfsh_writer,
+ (xmlChar *)"numberOfEntriesInSplitBrain", "-");
+ xmlTextWriterWriteFormatElement (glfsh_writer,
+ (xmlChar *)"numberOfEntriesPossiblyHealing",
+ "-");
+ goto out;
+ } else if (ret == 0) {
+ xmlTextWriterWriteFormatElement (glfsh_writer,
+ (xmlChar *)"status",
+ "%s", "Connected");
+ }
+
+ if (ret < 0) {
+ xmlTextWriterWriteFormatElement (glfsh_writer,
+ (xmlChar *)"status", "Failed to process entries"
+ " completely. "
+ "(%s)totalNumberOfEntries%"PRIu64"",
+ strerror (-ret), num_entries->num_entries);
+ } else {
+ xmlTextWriterWriteFormatElement (glfsh_writer,
+ (xmlChar *)"totalNumberOfEntries",
+ "%"PRIu64"", num_entries->num_entries);
+ xmlTextWriterWriteFormatElement (glfsh_writer,
+ (xmlChar *)"numberOfEntriesInHealPending",
+ "%"PRIu64"", num_entries->pending_entries);
+ xmlTextWriterWriteFormatElement (glfsh_writer,
+ (xmlChar *)"numberOfEntriesInSplitBrain",
+ "%"PRIu64"", num_entries->spb_entries);
+ xmlTextWriterWriteFormatElement (glfsh_writer,
+ (xmlChar *)"numberOfEntriesPossiblyHealing",
+ "%"PRIu64"",
+ num_entries->possibly_healing_entries);
+ }
+out:
+ return xmlTextWriterEndElement (glfsh_writer);
+}
+
void
glfsh_print_xml_file_status (char *path, uuid_t gfid, char *status)
{
@@ -338,6 +393,39 @@ glfsh_no_print_hr_heal_op_status (int ret, uint64_t num_entries, char *fmt_str)
}
int
+glfsh_print_hr_heal_op_summary (int ret, num_entries_t *num_entries)
+{
+ if (ret < 0 && num_entries->num_entries == 0) {
+ printf ("Status: %s\n", strerror (-ret));
+ printf ("Total Number of entries: -\n");
+ printf ("Number of entries in heal pending: -\n");
+ printf ("Number of entries in split-brain: -\n");
+ printf ("Number of entries possibly healing: -\n");
+ goto out;
+ } else if (ret == 0) {
+ printf ("Status: Connected\n");
+ }
+
+ if (ret < 0) {
+ printf ("Status: Failed to process entries completely. "
+ "(%s)\nTotal Number of entries: %"PRIu64"\n",
+ strerror (-ret), num_entries->num_entries);
+ } else {
+ printf ("Total Number of entries: %"PRIu64"\n",
+ num_entries->num_entries);
+ printf ("Number of entries in heal pending: %"PRIu64"\n",
+ num_entries->pending_entries);
+ printf ("Number of entries in split-brain: %"PRIu64"\n",
+ num_entries->spb_entries);
+ printf ("Number of entries possibly healing: %"PRIu64"\n",
+ num_entries->possibly_healing_entries);
+ }
+out:
+ printf ("\n");
+ return 0;
+}
+
+int
glfsh_print_hr_heal_op_status (int ret, uint64_t num_entries, char *fmt_str)
{
if (ret < 0 && num_entries == 0) {
@@ -364,6 +452,13 @@ out:
}
int
+glfsh_print_info_summary (int ret, num_entries_t *num_entries)
+{
+ return glfsh_output->print_heal_op_summary (ret, num_entries);
+
+}
+
+int
glfsh_print_heal_op_status (int ret, uint64_t num_entries,
gf_xl_afr_op_t heal_op)
{
@@ -430,7 +525,8 @@ _get_ancestor (xlator_t *xl, gf_xl_afr_op_t heal_op)
NULL};
char **ancestors = NULL;
- if (heal_op == GF_SHD_OP_INDEX_SUMMARY)
+ if (heal_op == GF_SHD_OP_INDEX_SUMMARY ||
+ heal_op == GF_SHD_OP_HEAL_SUMMARY)
ancestors = heal_xls;
else
ancestors = replica_xl;
@@ -465,8 +561,35 @@ glfsh_index_purge (xlator_t *subvol, inode_t *inode, char *name)
}
int
+glfsh_print_summary_status (dict_t *dict, char *path, uuid_t gfid,
+ num_entries_t *num_entries, gf_boolean_t flag)
+{
+ int ret = 0;
+ char *value = NULL;
+
+ ret = dict_get_str (dict, "heal-info", &value);
+ if (ret)
+ goto out;
+
+ if ((!strcmp (value, "heal")) || (!strcmp (value, "heal-pending"))) {
+ (num_entries->pending_entries)++;
+ } else if ((!strcmp (value, "split-brain")) ||
+ (!strcmp (value, "split-brain-pending"))) {
+ (num_entries->spb_entries)++;
+ } else if ((!strcmp (value, "possibly-healing-pending")) ||
+ (!strcmp (value, "possibly-healing"))) {
+ (num_entries->possibly_healing_entries)++;
+ } else {
+ goto out;
+ }
+ (num_entries->num_entries)++;
+out:
+ return ret;
+}
+
+int
glfsh_print_spb_status (dict_t *dict, char *path, uuid_t gfid,
- uint64_t *num_entries, gf_boolean_t flag)
+ num_entries_t *num_entries, gf_boolean_t flag)
{
int ret = 0;
gf_boolean_t pending = _gf_false;
@@ -492,7 +615,7 @@ glfsh_print_spb_status (dict_t *dict, char *path, uuid_t gfid,
*/
if (split_b) {
if (!flag || (flag && !pending)) {
- (*num_entries)++;
+ (num_entries->num_entries)++;
glfsh_output->print_spb_status (path ? path :
uuid_utoa_r (gfid, gfid_str),
gfid, NULL);
@@ -503,7 +626,8 @@ glfsh_print_spb_status (dict_t *dict, char *path, uuid_t gfid,
int
glfsh_print_heal_status (dict_t *dict, char *path, uuid_t gfid,
- uint64_t *num_entries, gf_boolean_t ignore_dirty)
+ num_entries_t *num_entries,
+ gf_boolean_t ignore_dirty)
{
int ret = 0;
gf_boolean_t pending = _gf_false;
@@ -562,7 +686,7 @@ out:
if (ret == -1)
status = NULL;
- (*num_entries)++;
+ (num_entries->num_entries)++;
glfsh_output->print_heal_status (path ? path :
uuid_utoa_r (gfid, gfid_str),
gfid,
@@ -574,7 +698,7 @@ out:
int
glfsh_heal_status_boolean (dict_t *dict, char *path, uuid_t gfid,
- uint64_t *num_entries, gf_boolean_t ignore_dirty)
+ num_entries_t *num_entries, gf_boolean_t ignore_dirty)
{
int ret = 0;
char *value = NULL;
@@ -589,7 +713,7 @@ glfsh_heal_status_boolean (dict_t *dict, char *path, uuid_t gfid,
static int
glfsh_heal_entries (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
gf_dirent_t *entries, uint64_t *offset,
- uint64_t *num_entries, dict_t *xattr_req) {
+ num_entries_t *num_entries, dict_t *xattr_req) {
gf_dirent_t *entry = NULL;
gf_dirent_t *tmp = NULL;
@@ -607,7 +731,7 @@ glfsh_heal_entries (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
xattr_req);
if (ret)
continue;
- (*num_entries)++;
+ (num_entries->num_entries)++;
}
return ret;
@@ -615,7 +739,7 @@ glfsh_heal_entries (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
static int
glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries,
- uint64_t *offset, uint64_t *num_entries,
+ uint64_t *offset, num_entries_t *num_entries,
print_status glfsh_print_status,
gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode)
{
@@ -687,7 +811,7 @@ out:
static int
glfsh_crawl_directory (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
xlator_t *readdir_xl, fd_t *fd, loc_t *loc,
- dict_t *xattr_req, uint64_t *num_entries,
+ dict_t *xattr_req, num_entries_t *num_entries,
gf_boolean_t ignore)
{
int ret = 0;
@@ -732,6 +856,14 @@ glfsh_crawl_directory (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
ignore, mode);
if (ret < 0)
goto out;
+ } else if (heal_op == GF_SHD_OP_HEAL_SUMMARY) {
+ ret = glfsh_process_entries (readdir_xl, fd,
+ &entries, &offset,
+ num_entries,
+ glfsh_print_summary_status,
+ ignore, mode);
+ if (ret < 0)
+ goto out;
} else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) {
ret = glfsh_heal_entries (fs, top_subvol, rootloc,
&entries, &offset,
@@ -786,7 +918,7 @@ int
glfsh_print_pending_heals_type (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
xlator_t *xl, gf_xl_afr_op_t heal_op,
dict_t *xattr_req, char *vgfid,
- uint64_t *num_entries)
+ num_entries_t *num_entries)
{
int ret = 0;
loc_t dirloc = {0};
@@ -827,7 +959,8 @@ glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
is_parent_replicate)
{
int ret = 0;
- uint64_t count = 0, total = 0;
+ num_entries_t num_entries = {0, };
+ num_entries_t total = {0, };
dict_t *xattr_req = NULL;
@@ -851,13 +984,20 @@ glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
ret = glfsh_print_pending_heals_type (fs, top_subvol, rootloc, xl,
heal_op, xattr_req,
- GF_XATTROP_INDEX_GFID, &count);
+ GF_XATTROP_INDEX_GFID,
+ &num_entries);
if (ret < 0 && heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)
goto out;
- total += count;
- count = 0;
+ total.num_entries += num_entries.num_entries;
+ total.pending_entries += num_entries.pending_entries;
+ total.spb_entries += num_entries.spb_entries;
+ total.possibly_healing_entries += num_entries.possibly_healing_entries;
+ num_entries.num_entries = 0;
+ num_entries.pending_entries = 0;
+ num_entries.spb_entries = 0;
+ num_entries.possibly_healing_entries = 0;
if (ret == -ENOTCONN)
goto out;
@@ -866,13 +1006,20 @@ glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
rootloc, xl,
heal_op, xattr_req,
GF_XATTROP_DIRTY_GFID,
- &count);
- total += count;
+ &num_entries);
+ total.num_entries += num_entries.num_entries;
+ total.pending_entries += num_entries.pending_entries;
+ total.spb_entries += num_entries.spb_entries;
+ total.possibly_healing_entries += num_entries.possibly_healing_entries;
}
out:
if (xattr_req)
dict_unref (xattr_req);
- glfsh_print_heal_op_status (ret, total, heal_op);
+ if (heal_op == GF_SHD_OP_HEAL_SUMMARY) {
+ glfsh_print_info_summary (ret, &total);
+ } else {
+ glfsh_print_heal_op_status (ret, total.num_entries, heal_op);
+ }
return ret;
}
@@ -1193,7 +1340,7 @@ int
glfsh_heal_from_brick_type (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
char *hostname, char *brickpath, xlator_t *client,
dict_t *xattr_req, char *vgfid,
- uint64_t *num_entries)
+ num_entries_t *num_entries)
{
fd_t *fd = NULL;
loc_t dirloc = {0};
@@ -1229,9 +1376,10 @@ glfsh_heal_from_brick (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
char *hostname, char *brickpath, char *file)
{
int ret = -1;
- uint64_t count = 0, total = 0;
dict_t *xattr_req = NULL;
xlator_t *client = NULL;
+ num_entries_t num_entries = {0, };
+ num_entries_t total = {0, };
xattr_req = dict_new();
if (!xattr_req)
@@ -1258,9 +1406,9 @@ glfsh_heal_from_brick (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
hostname, brickpath,
client, xattr_req,
GF_XATTROP_INDEX_GFID,
- &count);
- total += count;
- count = 0;
+ &num_entries);
+ total.num_entries += num_entries.num_entries;
+ num_entries.num_entries = 0;
if (ret == -ENOTCONN)
goto out;
@@ -1268,8 +1416,8 @@ glfsh_heal_from_brick (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
hostname, brickpath,
client, xattr_req,
GF_XATTROP_DIRTY_GFID,
- &count);
- total += count;
+ &num_entries);
+ total.num_entries += num_entries.num_entries;
if (ret < 0)
goto out;
}
@@ -1277,7 +1425,7 @@ out:
if (xattr_req)
dict_unref (xattr_req);
if (!file)
- glfsh_print_heal_op_status (ret, total,
+ glfsh_print_heal_op_status (ret, total.num_entries,
GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
return ret;
@@ -1326,6 +1474,7 @@ glfsh_info_t glfsh_human_readable = {
.init = glfsh_init,
.print_brick_from_xl = glfsh_print_brick_from_xl,
.print_heal_op_status = glfsh_print_hr_heal_op_status,
+ .print_heal_op_summary = glfsh_print_hr_heal_op_summary,
.print_heal_status = glfsh_print_hr_heal_status,
.print_spb_status = glfsh_print_hr_spb_status,
.end = glfsh_end
@@ -1345,6 +1494,7 @@ glfsh_info_t glfsh_xml_output = {
.init = glfsh_xml_init,
.print_brick_from_xl = glfsh_print_xml_brick_from_xl,
.print_heal_op_status = glfsh_print_xml_heal_op_status,
+ .print_heal_op_summary = glfsh_print_xml_heal_op_summary,
.print_heal_status = glfsh_print_xml_file_status,
.print_spb_status = glfsh_print_xml_file_status,
.end = glfsh_xml_end
@@ -1385,6 +1535,8 @@ main (int argc, char **argv)
is_xml = 1;
} else if (!strcmp (argv[2], "granular-entry-heal-op")) {
heal_op = GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE;
+ } else if (!strcmp (argv[2], "info-summary")) {
+ heal_op = GF_SHD_OP_HEAL_SUMMARY;
} else {
printf (USAGE_STR, argv[0]);
ret = -1;
@@ -1396,6 +1548,10 @@ main (int argc, char **argv)
&& (!strcmp (argv[3], "xml"))) {
heal_op = GF_SHD_OP_SPLIT_BRAIN_FILES;
is_xml = 1;
+ } else if ((!strcmp (argv[2], "info-summary"))
+ && (!strcmp (argv[3], "xml"))) {
+ heal_op = GF_SHD_OP_HEAL_SUMMARY;
+ is_xml = 1;
} else if (!strcmp (argv[2], "bigger-file")) {
heal_op = GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE;
file = argv[3];
@@ -1517,7 +1673,8 @@ main (int argc, char **argv)
if (ret < 0) {
ret = -EINVAL;
gf_asprintf (&op_errstr, "Volume %s is not of type %s", volname,
- (heal_op == GF_SHD_OP_INDEX_SUMMARY) ?
+ (heal_op == GF_SHD_OP_INDEX_SUMMARY ||
+ heal_op == GF_SHD_OP_HEAL_SUMMARY) ?
"replicate/disperse":"replicate");
goto out;
}
@@ -1528,6 +1685,7 @@ main (int argc, char **argv)
case GF_SHD_OP_INDEX_SUMMARY:
case GF_SHD_OP_SPLIT_BRAIN_FILES:
case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE:
+ case GF_SHD_OP_HEAL_SUMMARY:
ret = glfsh_gather_heal_info (fs, top_subvol, &rootloc,
heal_op);
break;
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index 510817c..80e1f6b 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -265,6 +265,7 @@ typedef enum {
GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME,
GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE,
GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE,
+ GF_SHD_OP_HEAL_SUMMARY,
} gf_xl_afr_op_t ;
struct gf_gsync_detailed_status_ {
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 725d194..222d5f4 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -1867,6 +1867,7 @@ glusterd_handle_heal_cmd (xlator_t *this, glusterd_volinfo_t *volinfo,
case GF_SHD_OP_HEAL_DISABLE:/* This op should be handled in volume-set*/
case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE: /* This op should be handled in volume-set */
case GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE: /* This op should be handled in volume-set */
+ case GF_SHD_OP_HEAL_SUMMARY:/*glfsheal cmd*/
case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:/*glfsheal cmd*/
case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:/*glfsheal cmd*/
case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:/*glfsheal cmd*/
--
1.8.3.1

View File

@ -0,0 +1,59 @@
From 98bd66d16730964b2c6d14d5430e23dd16138193 Mon Sep 17 00:00:00 2001
From: N Balachandran <nbalacha@redhat.com>
Date: Wed, 22 Nov 2017 08:47:47 +0530
Subject: [PATCH 081/128] cluster/dht: Don't set ACLs on linkto file
The trusted.SGI_ACL_FILE appears to set posix
ACLs on the linkto file that is a target of
file migration. This can mess up file permissions
and cause linkto identification to fail.
Now we remove all ACL xattrs from the results of
the listxattr call on the source before setting them
on the target.
> BUG: 1515042
> https://review.gluster.org/#/c/18807/
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
Change-Id: I56802dbaed783a16e3fb90f59f4ce849f8a4a9b4
BUG: 1515051
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/123870
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/cluster/dht/src/dht-rebalance.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 941e982..ae367d7 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -163,6 +163,14 @@ dht_send_rebalance_event (xlator_t *this, int cmd, gf_defrag_status_t status)
}
+static void
+dht_strip_out_acls (dict_t *dict)
+{
+ if (dict) {
+ dict_del (dict, "trusted.SGI_ACL_FILE");
+ dict_del (dict, "POSIX_ACL_ACCESS_XATTR");
+ }
+}
@@ -1624,6 +1632,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
loc->path, from->name);
}
+ /* Copying posix acls to the linkto file messes up the permissions*/
+ dht_strip_out_acls (xattr);
+
/* create the destination, with required modes/xattr */
ret = __dht_rebalance_create_dst_file (this, to, from, loc, &stbuf,
&dst_fd, xattr, fop_errno);
--
1.8.3.1

View File

@ -0,0 +1,41 @@
From 931b40987fa4e2d4f7ddf561fd413663129e9072 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Mon, 27 Nov 2017 11:54:25 +0530
Subject: [PATCH 082/128] cluster/afr: Print heal info summary output in stream
fashion
Problem:
The heal info summary was printing the output at the end after
crawling for pending heal entries completes on all the bricks.
Fix:
Printing the output immediately after the crawl on individual brick
completes, so that it won't give the impression of CLI being hung.
Upstream patch: https://review.gluster.org/#/c/18832/
Change-Id: Ieaf5718736a7ee6837bac02bd30a95836e605dab
BUG: 1419438
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124283
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
heal/src/glfs-heal.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
index 27a9624..22c9395 100644
--- a/heal/src/glfs-heal.c
+++ b/heal/src/glfs-heal.c
@@ -422,6 +422,7 @@ glfsh_print_hr_heal_op_summary (int ret, num_entries_t *num_entries)
}
out:
printf ("\n");
+ fflush (stdout);
return 0;
}
--
1.8.3.1

View File

@ -0,0 +1,55 @@
From 9c31bcd6c4234b6a9e4e04410caf98d2cd3d4946 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Fri, 17 Nov 2017 18:23:23 +0530
Subject: [PATCH 083/128] cluster/afr: Print heal info split-brain output in
stream fashion
Problem:
When we trigger the heal info split-brain command the o/p is not
streamed as it is received, but dumped at the end for all the bricks
together. This gives a perception that the command is hung.
Fix:
When we get a split brain entry while crawling throught the pending
heal entries, flush that immediately so that it prints the output
in a stream fashion and doesn't look like the cli is hung.
Upstream patch: https://review.gluster.org/#/c/18570/
> Change-Id: I7547e86b83202d66616749b8b31d4d0dff0abf07
> BUG: 1506104
> Signed-off-by: karthik-us <ksubrahm@redhat.com>
Change-Id: I7547e86b83202d66616749b8b31d4d0dff0abf07
BUG: 1419438
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/123563
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
heal/src/glfs-heal.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
index 22c9395..532b6f9 100644
--- a/heal/src/glfs-heal.c
+++ b/heal/src/glfs-heal.c
@@ -120,6 +120,7 @@ void
glfsh_print_hr_spb_status (char *path, uuid_t gfid, char *status)
{
printf ("%s\n", path);
+ fflush (stdout);
return;
}
@@ -133,6 +134,7 @@ void
glfsh_print_hr_heal_status (char *path, uuid_t gfid, char *status)
{
printf ("%s%s\n", path, status);
+ fflush (stdout);
}
#if (HAVE_LIB_XML)
--
1.8.3.1

View File

@ -0,0 +1,237 @@
From 5579f616c2c21a2a2cd2ef70b58149df85550db7 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Mon, 27 Nov 2017 12:51:16 +0530
Subject: [PATCH 084/128] cluster/afr: Fix for arbiter becoming source
Problem:
When eager-lock is on, and two writes happen in parallel on a FD
we were observing the following behaviour:
- First write fails on one data brick
- Since the post-op is not yet happened, the inode refresh will get
both the data bricks as readable and set it in the inode context
- In flight split brain check see both the data bricks as readable
and allows the second write
- Second write fails on the other data brick
- Now the post-op happens and marks both the data bricks as bad and
arbiter will become source for healing
Fix:
Adding one more variable called write_suvol in inode context and it
will have the in memory representation of the writable subvols. Inode
refresh will not update this value and its lifetime is pre-op through
unlock in the afr transaction. Initially the pre-op will set this
value same as read_subvol in inode context and then in the in flight
split brain check we will use this value instead of read_subvol.
After all the checks we will update the value of this and set the
read_subvol same as this to avoid having incorrect value in that.
Upstream patch: https://review.gluster.org/#/c/18049/
> Change-Id: I2ef6904524ab91af861d59690974bbc529ab1af3
> BUG: 1482064
> Signed-off-by: karthik-us <ksubrahm@redhat.com>
Change-Id: I91cd21e378a7ae3757c2209fcb91a613d73e09ee
BUG: 1401969
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124292
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/cluster/afr/src/afr-common.c | 76 ++++++++++++++++++++++++++++++-
xlators/cluster/afr/src/afr-lk-common.c | 18 ++++++--
xlators/cluster/afr/src/afr-transaction.c | 4 ++
xlators/cluster/afr/src/afr.h | 10 ++++
4 files changed, 102 insertions(+), 6 deletions(-)
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 9c96056..a8ba5a0 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -149,6 +149,7 @@ __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)
}
tmp_ctx->spb_choice = -1;
tmp_ctx->read_subvol = 0;
+ tmp_ctx->write_subvol = 0;
} else {
tmp_ctx = (afr_inode_ctx_t *) ctx_int;
}
@@ -216,7 +217,7 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
if (ret < 0)
return ret;
- val = ctx->read_subvol;
+ val = ctx->write_subvol;
metadatamap_old = metadatamap = (val & 0x000000000000ffff);
datamap_old = datamap = (val & 0x00000000ffff0000) >> 16;
@@ -276,6 +277,7 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
(((uint64_t) datamap) << 16) |
(((uint64_t) event) << 32);
+ ctx->write_subvol = val;
ctx->read_subvol = val;
return ret;
@@ -6421,3 +6423,75 @@ afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this,
out:
return ret;
}
+
+int
+afr_write_subvol_set (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ uint64_t val = 0;
+ uint64_t val1 = 0;
+ int ret = -1;
+
+ local = frame->local;
+ LOCK(&local->inode->lock);
+ {
+ ret = __afr_inode_ctx_get (this, local->inode, &ctx);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_GET_FAILED,
+ "ERROR GETTING INODE CTX");
+ UNLOCK(&local->inode->lock);
+ return ret;
+ }
+
+ val = ctx->write_subvol;
+ /*
+ * We need to set the value of write_subvol to read_subvol in 2
+ * cases:
+ * 1. Initially when the value is 0. i.e., it's the first lock
+ * request.
+ * 2. If it's a metadata transaction. If metadata transactions
+ * comes in between data transactions and we have a brick
+ * disconnect, the next metadata transaction won't get the
+ * latest value of readables, since we do resetting of
+ * write_subvol in unlock code path only if it's a data
+ * transaction. To handle those scenarios we need to set the
+ * value of write_subvol to read_subvol in case of metadata
+ * transactions.
+ */
+ if (val == 0 ||
+ local->transaction.type == AFR_METADATA_TRANSACTION) {
+ val1 = ctx->read_subvol;
+ ctx->write_subvol = val1;
+ }
+ }
+ UNLOCK (&local->inode->lock);
+
+ return 0;
+}
+
+int
+afr_write_subvol_reset (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ LOCK(&local->inode->lock);
+ {
+ ret = __afr_inode_ctx_get (this, local->inode, &ctx);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_GET_FAILED,
+ "ERROR GETTING INODE CTX");
+ UNLOCK(&local->inode->lock);
+ return ret;
+ }
+ ctx->write_subvol = 0;
+ }
+ UNLOCK(&local->inode->lock);
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 1f2a117..c17f60f 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -613,12 +613,16 @@ static int32_t
afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int ret = 0;
local = frame->local;
int_lock = &local->internal_lock;
+ priv = this->private;
LOCK (&frame->lock);
{
@@ -629,11 +633,15 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (call_count == 0) {
gf_msg_trace (this->name, 0,
"All internal locks unlocked");
-
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (0 == AFR_COUNT (fd_ctx->lock_acquired, priv->child_count))
+ ret = afr_write_subvol_reset (frame, this);
+ }
int_lock->lock_cbk (frame, this);
}
- return 0;
+ return ret;
}
void
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 35621d9..91c4f78 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1791,6 +1791,10 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
if (pre_nop)
goto next;
+ ret = afr_write_subvol_set (frame, this);
+ if (ret)
+ goto err;
+
if (!local->pre_op_compat) {
dict_copy (xdata_req, local->xdata_req);
goto next;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index c4ceb66..672d053 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -837,6 +837,7 @@ typedef struct _afr_local {
typedef struct _afr_inode_ctx {
uint64_t read_subvol;
+ uint64_t write_subvol;
int spb_choice;
gf_timer_t *timer;
gf_boolean_t need_refresh;
@@ -1262,4 +1263,13 @@ int
afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this,
char *buf, const char *default_str,
int32_t *serz_len, char delimiter);
+
+int
+__afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx);
+
+int
+afr_write_subvol_set (call_frame_t *frame, xlator_t *this);
+
+int
+afr_write_subvol_reset (call_frame_t *frame, xlator_t *this);
#endif /* __AFR_H__ */
--
1.8.3.1

View File

@ -0,0 +1,625 @@
From cbdc0b38c18583852fc9b2ca79ea5fdfa92c6ed5 Mon Sep 17 00:00:00 2001
From: Sunny Kumar <sunkumar@redhat.com>
Date: Mon, 27 Nov 2017 14:24:55 +0530
Subject: [PATCH 085/128] snapshot: Issue with other processes accessing the
mounted brick
Added code for unmount of activated snapshot brick during snapshot
deactivation process which make sense as mount point for deactivated
bricks should not exist.
Removed code for mounting newly created snapshot, as newly created
snapshots should not mount until it is activated.
Added code for mount point creation and snapshot mount during snapshot
activation.
Added validation during glusterd init for mounting only those snapshot
whose status is either STARTED or RESTORED.
During snapshot restore, mount point for stopped snap should exist as
it is required to set extended attribute.
During handshake, after getting updates from friend mount point for
activated snapshot should exist and should not for deactivated
snapshot.
While getting snap status we should show relevent information for
deactivated snapshots, after this pathch 'gluster snap status' command
will show output like-
Snap Name : snap1
Snap UUID : snap-uuid
Brick Path : server1:/run/gluster/snaps/snap-vol-name/brick
Volume Group : N/A (Deactivated Snapshot)
Brick Running : No
Brick PID : N/A
Data Percentage : N/A
LV Size : N/A
Fixes: #276
>BUG: 1482023
>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
upstream patch : https://review.gluster.org/18047
Change-Id: I65783488e35fac43632615ce1b8ff7b8e84834dc
BUG: 1464150
Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124305
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
.../bug-1322772-real-path-fix-for-snapshot.t | 1 +
...e-with-other-processes-accessing-mounted-path.t | 114 ++++++++++++++++
.../mgmt/glusterd/src/glusterd-snapshot-utils.c | 85 ++++++++++++
.../mgmt/glusterd/src/glusterd-snapshot-utils.h | 3 +
xlators/mgmt/glusterd/src/glusterd-snapshot.c | 145 ++++++++++++++++-----
xlators/mgmt/glusterd/src/glusterd-store.c | 25 ++--
xlators/mgmt/glusterd/src/glusterd-store.h | 4 +
7 files changed, 333 insertions(+), 44 deletions(-)
create mode 100644 tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
diff --git a/tests/bugs/snapshot/bug-1322772-real-path-fix-for-snapshot.t b/tests/bugs/snapshot/bug-1322772-real-path-fix-for-snapshot.t
index bf625ec..488bd46 100644
--- a/tests/bugs/snapshot/bug-1322772-real-path-fix-for-snapshot.t
+++ b/tests/bugs/snapshot/bug-1322772-real-path-fix-for-snapshot.t
@@ -26,6 +26,7 @@ EXPECT 'Started' volinfo_field $V0 'Status'
TEST $CLI volume start $V1
EXPECT 'Started' volinfo_field $V1 'Status'
+TEST $CLI snapshot config activate-on-create enable
TEST $CLI snapshot create ${V0}_snap $V0 no-timestamp
TEST $CLI snapshot create ${V1}_snap $V1 no-timestamp
diff --git a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
new file mode 100644
index 0000000..c5a0088
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function create_snapshots() {
+ $CLI_1 snapshot create ${V0}_snap ${V0} no-timestamp &
+ PID_1=$!
+
+ $CLI_1 snapshot create ${V1}_snap ${V1} no-timestamp &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function activate_snapshots() {
+ $CLI_1 snapshot activate ${V0}_snap &
+ PID_1=$!
+
+ $CLI_1 snapshot activate ${V1}_snap &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function deactivate_snapshots() {
+ $CLI_1 snapshot deactivate ${V0}_snap &
+ PID_1=$!
+
+ $CLI_1 snapshot deactivate ${V1}_snap &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+cleanup;
+
+TEST verify_lvm_version;
+# Create cluster with 3 nodes
+TEST launch_cluster 3;
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
+# Create volumes
+TEST $CLI_1 volume create $V0 $H1:$L1
+TEST $CLI_2 volume create $V1 $H2:$L2 $H3:$L3
+
+# Start volumes
+TEST $CLI_1 volume start $V0
+TEST $CLI_2 volume start $V1
+
+TEST $CLI_1 snapshot config activate-on-create enable
+
+# Snapshot Operations
+create_snapshots
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+deactivate_snapshots
+
+EXPECT 'Stopped' snapshot_status ${V0}_snap;
+EXPECT 'Stopped' snapshot_status ${V1}_snap;
+
+activate_snapshots
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+# This Function will get snap id form snap info command and will
+# check for mount point in system against snap id.
+function mounted_snaps
+{
+ snap_id=`$CLI_1 snap info $1_snap | grep "Snap Volume Name" |
+ awk -F ":" '{print $2}'`
+ echo `mount | grep $snap_id | wc -l`
+}
+
+EXPECT "1" mounted_snaps ${V0}
+EXPECT "2" mounted_snaps ${V1}
+
+deactivate_snapshots
+
+EXPECT "0" mounted_snaps ${V0}
+EXPECT "0" mounted_snaps ${V1}
+
+# This part of test is designed to validate that updates are properly being
+# handled during handshake.
+
+activate_snapshots
+kill_glusterd 2
+deactivate_snapshots
+TEST start_glusterd 2
+
+# Updates form friend should reflect as snap was deactivated while glusterd
+# process was inactive and mount point should also not exist.
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V0}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V1}
+
+kill_glusterd 2
+activate_snapshots
+TEST start_glusterd 2
+
+# Updates form friend should reflect as snap was activated while glusterd
+# process was inactive and mount point should exist.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" mounted_snaps ${V0}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" mounted_snaps ${V1}
+
+cleanup;
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
index 2a0d321..3f03d2b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
@@ -1678,7 +1678,21 @@ glusterd_import_friend_snap (dict_t *peer_data, int32_t snap_count,
"for snap %s", peer_snap_name);
goto out;
}
+ /* During handshake, after getting updates from friend mount
+ * point for activated snapshot should exist and should not
+ * for deactivated snapshot.
+ */
if (glusterd_is_volume_started (snap_vol)) {
+ ret = glusterd_recreate_vol_brick_mounts (this,
+ snap_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_MNT_RECREATE_FAIL,
+ "Failed to recreate brick mounts"
+ " for %s", snap->snapname);
+ goto out;
+ }
+
(void) glusterd_start_bricks (snap_vol);
ret = glusterd_store_volinfo
(snap_vol,
@@ -1692,6 +1706,13 @@ glusterd_import_friend_snap (dict_t *peer_data, int32_t snap_count,
}
} else {
(void) glusterd_stop_bricks(snap_vol);
+ ret = glusterd_snap_unmount(this, snap_vol);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_UMOUNT_FAIL,
+ "Failed to unmounts for %s",
+ snap->snapname);
+ }
}
ret = glusterd_import_quota_conf (peer_data, i,
@@ -3347,6 +3368,70 @@ out:
return ret;
}
+/* This function will do unmount for snaps.
+ */
+int32_t
+glusterd_snap_unmount (xlator_t *this, glusterd_volinfo_t *volinfo)
+{
+ char *brick_mount_path = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t ret = -1;
+ int retry_count = 0;
+
+ GF_ASSERT (this);
+ GF_ASSERT (volinfo);
+
+ cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ /* If the brick is not of this node, we continue */
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+ continue;
+ }
+ /* If snapshot is pending, we continue */
+ if (brickinfo->snap_status == -1) {
+ continue;
+ }
+
+ /* Fetch the brick mount path from the brickinfo->path */
+ ret = glusterd_get_brick_root (brickinfo->path,
+ &brick_mount_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ GD_MSG_BRICK_PATH_UNMOUNTED,
+ "Failed to find brick_mount_path for %s",
+ brickinfo->path);
+ /* There is chance that brick path is already
+ * unmounted. */
+ ret = 0;
+ goto out;
+ }
+ /* unmount cannot be done when the brick process is still in
+ * the process of shutdown, so give three re-tries
+ */
+ retry_count = 0;
+ while (retry_count <= 2) {
+ retry_count++;
+ /* umount2 system call doesn't cleanup mtab entry
+ * after un-mount, using external umount command.
+ */
+ ret = glusterd_umount(brick_mount_path);
+ if (!ret)
+ break;
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_UMOUNT_FAIL, "umount failed "
+ "for path %s (brick: %s): %s. Retry(%d)",
+ brick_mount_path, brickinfo->path,
+ strerror (errno), retry_count);
+ sleep (3);
+ }
+ }
+
+out:
+ if (brick_mount_path)
+ GF_FREE(brick_mount_path);
+
+ return ret;
+}
+
int32_t
glusterd_umount (const char *path)
{
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h
index e050166..814bf4a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h
@@ -76,6 +76,9 @@ int32_t
glusterd_umount (const char *path);
int32_t
+glusterd_snap_unmount (xlator_t *this, glusterd_volinfo_t *volinfo);
+
+int32_t
glusterd_add_snapshots_to_export_dict (dict_t *peer_data);
int32_t
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
index c38d2ff..275abe3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -872,6 +872,17 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
}
}
+ /* During snapshot restore, mount point for stopped snap
+ * should exist as it is required to set extended attribute.
+ */
+ ret = glusterd_recreate_vol_brick_mounts (this, snap_volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_MNT_RECREATE_FAIL,
+ "Failed to recreate brick mounts for %s",
+ snap->snapname);
+ goto out;
+ }
ret = gd_restore_snap_volume (dict, rsp_dict, parent_volinfo,
snap_volinfo, volcount);
@@ -5195,13 +5206,17 @@ glusterd_take_brick_snapshot (dict_t *dict, glusterd_volinfo_t *snap_vol,
char *origin_brick_path = NULL;
char key[PATH_MAX] = "";
int32_t ret = -1;
+ gf_boolean_t snap_activate = _gf_false;
xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
this = THIS;
+ priv = this->private;
GF_ASSERT (this);
GF_ASSERT (dict);
GF_ASSERT (snap_vol);
GF_ASSERT (brickinfo);
+ GF_ASSERT (priv);
if (strlen(brickinfo->device_path) == 0) {
gf_msg (this->name, GF_LOG_ERROR, EINVAL,
@@ -5245,16 +5260,23 @@ glusterd_take_brick_snapshot (dict_t *dict, glusterd_volinfo_t *snap_vol,
*/
}
- /* create the complete brick here */
- ret = glusterd_snap_brick_create (snap_vol, brickinfo,
- brick_count, clone);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_BRICK_CREATION_FAIL, "not able to"
- " create the brick for the snap %s"
- ", volume %s", snap_vol->snapshot->snapname,
- snap_vol->volname);
- goto out;
+ /* create the complete brick here in case of clone and
+ * activate-on-create configuration.
+ */
+ snap_activate = dict_get_str_boolean (priv->opts,
+ GLUSTERD_STORE_KEY_SNAP_ACTIVATE,
+ _gf_false);
+ if (clone || snap_activate) {
+ ret = glusterd_snap_brick_create (snap_vol, brickinfo,
+ brick_count, clone);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_CREATION_FAIL, "not able to "
+ "create the brick for the snap %s, volume %s",
+ snap_vol->snapshot->snapname,
+ snap_vol->volname);
+ goto out;
+ }
}
out:
@@ -6126,8 +6148,10 @@ glusterd_snapshot_activate_commit (dict_t *dict, char **op_errstr,
char *snapname = NULL;
glusterd_snap_t *snap = NULL;
glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
xlator_t *this = NULL;
- int flags = 0;
+ int flags = 0;
+ int brick_count = -1;
this = THIS;
GF_ASSERT (this);
@@ -6178,6 +6202,24 @@ glusterd_snapshot_activate_commit (dict_t *dict, char **op_errstr,
goto out;
}
+ /* create the complete brick here */
+ cds_list_for_each_entry (brickinfo, &snap_volinfo->bricks,
+ brick_list) {
+ brick_count++;
+ if (gf_uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+ ret = glusterd_snap_brick_create (snap_volinfo, brickinfo,
+ brick_count, _gf_false);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_CREATION_FAIL, "not able to "
+ "create the brick for the snap %s, volume %s",
+ snap_volinfo->snapshot->snapname,
+ snap_volinfo->volname);
+ goto out;
+ }
+ }
+
ret = glusterd_start_volume (snap_volinfo, flags, _gf_true);
if (ret) {
@@ -6263,6 +6305,13 @@ glusterd_snapshot_deactivate_commit (dict_t *dict, char **op_errstr,
goto out;
}
+ ret = glusterd_snap_unmount(this, snap_volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_UMOUNT_FAIL,
+ "Failed to unmounts for %s", snap->snapname);
+ }
+
ret = dict_set_dynstr_with_alloc (rsp_dict, "snapuuid",
uuid_utoa (snap->snap_id));
if (ret) {
@@ -6907,6 +6956,7 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr,
int64_t i = 0;
int64_t volcount = 0;
int32_t snap_activate = 0;
+ int32_t flags = 0;
char *snapname = NULL;
char *volname = NULL;
char *tmp_name = NULL;
@@ -6915,7 +6965,6 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr,
glusterd_snap_t *snap = NULL;
glusterd_volinfo_t *origin_vol = NULL;
glusterd_volinfo_t *snap_vol = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
glusterd_conf_t *priv = NULL;
this = THIS;
@@ -7054,30 +7103,21 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr,
goto out;
}
- cds_list_for_each_entry (snap_vol, &snap->volumes, vol_list) {
- cds_list_for_each_entry (brickinfo, &snap_vol->bricks,
- brick_list) {
- ret = glusterd_brick_start (snap_vol, brickinfo,
- _gf_false);
- if (ret) {
- gf_msg (this->name, GF_LOG_WARNING, 0,
- GD_MSG_BRICK_DISCONNECTED, "starting "
- "the brick %s:%s for the snap %s "
- "(volume: %s) failed",
- brickinfo->hostname, brickinfo->path,
- snap_vol->snapshot->snapname,
- snap_vol->volname);
- goto out;
- }
- }
+ /* Activate created bricks in case of activate-on-create config. */
+ ret = dict_get_int32 (dict, "flags", &flags);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_GET_FAILED, "Unable to get flags");
+ goto out;
+ }
- snap_vol->status = GLUSTERD_STATUS_STARTED;
- ret = glusterd_store_volinfo (snap_vol,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ cds_list_for_each_entry (snap_vol, &snap->volumes, vol_list) {
+ ret = glusterd_start_volume (snap_vol, flags, _gf_true);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_VOLINFO_SET_FAIL, "Failed to store "
- "snap volinfo %s", snap_vol->volname);
+ GD_MSG_SNAP_ACTIVATE_FAIL,
+ "Failed to activate snap volume %s of the "
+ "snap %s", snap_vol->volname, snap->snapname);
goto out;
}
}
@@ -7619,6 +7659,30 @@ glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict,
if (ret < 0) {
goto out;
}
+ /* While getting snap status we should show relevent information
+ * for deactivated snaps.
+ */
+ if (snap_volinfo->status == GLUSTERD_STATUS_STOPPED) {
+ /* Setting vgname as "Deactivated Snapshot" */
+ value = gf_strdup ("N/A (Deactivated Snapshot)");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.brick%d.vgname",
+ keyprefix, index);
+ ret = dict_set_dynstr (rsp_dict, key, value);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Could not save vgname ");
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
ret = glusterd_get_brick_lvm_details (rsp_dict, brickinfo,
snap_volinfo->volname,
@@ -9200,6 +9264,19 @@ glusterd_snapshot_restore_postop (dict_t *dict, int32_t op_ret,
snap->snapname);
goto out;
}
+
+ /* After restore fails, we have to remove mount point for
+ * deactivated snaps which was created at start of restore op.
+ */
+ if (volinfo->status == GLUSTERD_STATUS_STOPPED) {
+ ret = glusterd_snap_unmount(this, volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_UMOUNT_FAIL,
+ "Failed to unmounts for %s",
+ snap->snapname);
+ }
+ }
}
ret = 0;
@@ -9965,7 +10042,7 @@ gd_restore_snap_volume (dict_t *dict, dict_t *rsp_dict,
glusterd_conf_t *conf = NULL;
glusterd_volinfo_t *temp_volinfo = NULL;
glusterd_volinfo_t *voliter = NULL;
- gf_boolean_t conf_present = _gf_false;
+ gf_boolean_t conf_present = _gf_false;
this = THIS;
GF_ASSERT (this);
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 42bb8ce..e35fcde 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -3519,7 +3519,7 @@ out:
return ret;
}
-static int32_t
+int32_t
glusterd_recreate_vol_brick_mounts (xlator_t *this,
glusterd_volinfo_t *volinfo)
{
@@ -4501,17 +4501,22 @@ glusterd_recreate_all_snap_brick_mounts (xlator_t *this)
}
}
- /* Recreate bricks of snapshot volumes */
+ /* Recreate bricks of snapshot volumes
+ * We are not creating brick mounts for stopped snaps.
+ */
cds_list_for_each_entry (snap, &priv->snapshots, snap_list) {
cds_list_for_each_entry (volinfo, &snap->volumes, vol_list) {
- ret = glusterd_recreate_vol_brick_mounts (this,
- volinfo);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_BRK_MNT_RECREATE_FAIL,
- "Failed to recreate brick mounts "
- "for %s", snap->snapname);
- goto out;
+ if (volinfo->status != GLUSTERD_STATUS_STOPPED) {
+ ret = glusterd_recreate_vol_brick_mounts
+ (this, volinfo);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_MNT_RECREATE_FAIL,
+ "Failed to recreate brick "
+ "mounts for %s",
+ snap->snapname);
+ goto out;
+ }
}
}
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index bf504e0..383a475 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -203,4 +203,8 @@ glusterd_quota_conf_write_header (int fd);
int32_t
glusterd_quota_conf_write_gfid (int fd, void *buf, char type);
+int32_t
+glusterd_recreate_vol_brick_mounts (xlator_t *this,
+ glusterd_volinfo_t *volinfo);
+
#endif
--
1.8.3.1

View File

@ -0,0 +1,59 @@
From 164e0384d6ae0ac9be713b4167d2b7151a027dc4 Mon Sep 17 00:00:00 2001
From: Sunny Kumar <sunkumar@redhat.com>
Date: Tue, 28 Nov 2017 18:48:31 +0530
Subject: [PATCH 086/128] snapshot: lvm cleanup during snapshot remove
Problem : During snapshot remove lvm cleanup was skipped for deactivated
snapshots by assuming that its mount point is not present.
Fix : Do no skip lvm cleanup by checking active mount point.
Upstream Patch : https://review.gluster.org/18654
>BUG: 1509254
>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
BUG: 1467903
Change-Id: I856d2d647c75db8b37b7f430277daef6eb7580a8
Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124534
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-snapshot.c | 20 +++++++++++++-------
1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
index 275abe3..5bdf27f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -2984,13 +2984,19 @@ glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol)
continue;
}
- ret = sys_lstat (brick_mount_path, &stbuf);
- if (ret) {
- gf_msg_debug (this->name, 0,
- "Brick %s:%s already deleted.",
- brickinfo->hostname, brickinfo->path);
- ret = 0;
- continue;
+ /* As deactivated snapshot have no active mount point we
+ * check only for activated snapshot.
+ */
+ if (snap_vol->status == GLUSTERD_STATUS_STARTED) {
+ ret = sys_lstat (brick_mount_path, &stbuf);
+ if (ret) {
+ gf_msg_debug (this->name, 0,
+ "Brick %s:%s already deleted.",
+ brickinfo->hostname,
+ brickinfo->path);
+ ret = 0;
+ continue;
+ }
}
if (brickinfo->snap_status == -1) {
--
1.8.3.1

View File

@ -0,0 +1,134 @@
From df02eac6436c86e75aed23f8ba61a061d8db9f35 Mon Sep 17 00:00:00 2001
From: Kotresh HR <khiremat@redhat.com>
Date: Tue, 17 Oct 2017 11:28:43 -0400
Subject: [PATCH 087/128] glusterd: Validate changelog on geo-rep volume
If geo-rep is configured on volume, don't allow
to disable changelog.
Backport of:
> Upstream Patch: https://review.gluster.org/18540
> Change-Id: I7d1ba8b2939c8fe6ee6c59fb923d9aa1bdab553c
> BUG: 1503227
> Signed-off-by: Kotresh HR <khiremat@redhat.com>
Change-Id: I7d1ba8b2939c8fe6ee6c59fb923d9aa1bdab553c
BUG: 1498391
Signed-off-by: Kotresh HR <khiremat@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124533
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-messages.h | 9 ++++-
xlators/mgmt/glusterd/src/glusterd-volgen.c | 53 +++++++++++++++++++++++++++
2 files changed, 61 insertions(+), 1 deletion(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
index fb2079f..225d59b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -41,7 +41,7 @@
#define GLUSTERD_COMP_BASE GLFS_MSGID_GLUSTERD
-#define GLFS_NUM_MESSAGES 613
+#define GLFS_NUM_MESSAGES 614
#define GLFS_MSGID_END (GLUSTERD_COMP_BASE + GLFS_NUM_MESSAGES + 1)
/* Messaged with message IDs */
@@ -4961,6 +4961,12 @@
*/
#define GD_MSG_PORTS_EXHAUSTED (GLUSTERD_COMP_BASE + 612)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
#define GD_MSG_GANESHA_NOT_RUNNING (GLUSTERD_COMP_BASE + 613)
/*!
* @messageid
@@ -4968,6 +4974,7 @@
* @recommendedaction
*
*/
+#define GD_MSG_CHANGELOG_GET_FAIL (GLUSTERD_COMP_BASE + 614)
/*------------*/
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 97049ac..8ff76d6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -1209,6 +1209,56 @@ loglevel_option_handler (volgen_graph_t *graph,
}
static int
+server_check_changelog_off (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t enabled = _gf_false;
+ int ret = 0;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (vme);
+
+ if (strcmp (vme->option, "changelog") != 0)
+ return 0;
+
+ ret = gf_string2boolean (vme->value, &enabled);
+ if (ret || enabled)
+ goto out;
+
+ ret = glusterd_volinfo_get_boolean (volinfo, VKEY_CHANGELOG);
+ if (ret < 0) {
+ gf_msg ("glusterd", GF_LOG_WARNING, 0,
+ GD_MSG_CHANGELOG_GET_FAIL,
+ "failed to get the changelog status");
+ ret = -1;
+ goto out;
+ }
+
+ if (ret) {
+ enabled = _gf_false;
+ glusterd_check_geo_rep_configured (volinfo, &enabled);
+
+ if (enabled) {
+ gf_msg ("glusterd", GF_LOG_WARNING, 0,
+ GD_MSG_XLATOR_SET_OPT_FAIL,
+ GEOREP" sessions active"
+ "for the volume %s, cannot disable changelog ",
+ volinfo->volname);
+ set_graph_errstr (graph,
+ VKEY_CHANGELOG" cannot be disabled "
+ "while "GEOREP" sessions exist");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+ out:
+ gf_msg_debug ("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
server_check_marker_off (volgen_graph_t *graph, struct volopt_map_entry *vme,
glusterd_volinfo_t *volinfo)
{
@@ -1424,6 +1474,9 @@ server_spec_option_handler (volgen_graph_t *graph,
ret = server_check_marker_off (graph, vme, volinfo);
if (!ret)
+ ret = server_check_changelog_off (graph, vme, volinfo);
+
+ if (!ret)
ret = loglevel_option_handler (graph, vme, "brick");
if (!ret)
--
1.8.3.1

View File

@ -0,0 +1,731 @@
From 8b596eacd72527b55ccff6a26a44014b6cf76b48 Mon Sep 17 00:00:00 2001
From: Sunil Kumar Acharya <sheggodu@redhat.com>
Date: Wed, 14 Jun 2017 16:28:40 +0530
Subject: [PATCH 088/128] cluster/ec: Implement DISCARD FOP for EC
Updates #254
This code change implements DISCARD FOP support for
EC.
>BUG: 1461018
>Change-Id: I09a9cb2aa9d91ec27add4f422dc9074af5b8b2db
>Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
Upstream Patch: https://review.gluster.org/#/c/17777/
BUG: 1499865
Change-Id: I09a9cb2aa9d91ec27add4f422dc9074af5b8b2db
Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/123694
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
tests/basic/ec/ec-discard.t | 197 +++++++++++++++++
tests/include.rc | 7 +
xlators/cluster/ec/src/ec-common.h | 3 +-
xlators/cluster/ec/src/ec-fops.h | 4 +
xlators/cluster/ec/src/ec-helpers.h | 5 +-
xlators/cluster/ec/src/ec-inode-write.c | 365 ++++++++++++++++++++++++++++----
xlators/cluster/ec/src/ec.c | 3 +-
7 files changed, 536 insertions(+), 48 deletions(-)
create mode 100644 tests/basic/ec/ec-discard.t
diff --git a/tests/basic/ec/ec-discard.t b/tests/basic/ec/ec-discard.t
new file mode 100644
index 0000000..4a44cec
--- /dev/null
+++ b/tests/basic/ec/ec-discard.t
@@ -0,0 +1,197 @@
+#!/bin/bash
+#
+# Test discard functionality
+#
+# Test that basic discard (hole punch) functionality works via the fallocate
+# command line tool. Hole punch deallocates a region of a file, creating a hole
+# and a zero-filled data region. We verify that hole punch works, frees blocks
+# and that subsequent reads do not read stale data (caches are invalidated).
+#
+# NOTE: fuse fallocate is known to be broken with regard to cache invalidation
+# up to 3.9.0 kernels. Therefore, FOPEN_KEEP_CACHE is not used in this
+# test (opens will invalidate the fuse cache).
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../fallocate.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 disperse.optimistic-change-log on
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Check for fallocate and hole punch support
+require_fallocate -l 1m $M0/file
+require_fallocate -p -l 512k $M0/file && rm -f $M0/file
+
+#Write some data, punch a hole and verify the file content changes
+TEST dd if=/dev/urandom of=$M0/file bs=1024k count=1
+TEST cp $M0/file $M0/file.copy.pre
+TEST fallocate -p -o 512k -l 128k $M0/file
+TEST ! cmp $M0/file.copy.pre $M0/file
+TEST rm -f $M0/file $M0/file.copy.pre
+
+#Allocate some blocks, punch a hole and verify block allocation
+TEST fallocate -l 1m $M0/file
+blksz=`stat -c %B $M0/file`
+nblks=`stat -c %b $M0/file`
+TEST [ $(($blksz * $nblks)) -ge 1048576 ]
+TEST fallocate -p -o 512k -l 128k $M0/file
+nblks=`stat -c %b $M0/file`
+TEST [ $(($blksz * $nblks)) -lt $((933889)) ]
+TEST unlink $M0/file
+
+###Punch hole test cases without fallocate
+##With write
+#Touching starting boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 0 -l 500 $B0/test_file
+TEST fallocate -p -o 0 -l 500 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Touching boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 500 -l 1548 $B0/test_file
+TEST fallocate -p -o 500 -l 1548 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Not touching boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 500 -l 1000 $B0/test_file
+TEST fallocate -p -o 500 -l 1000 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Over boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 1500 -l 1000 $B0/test_file
+TEST fallocate -p -o 1500 -l 1000 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+###Punch hole test cases with fallocate
+##Without write
+
+#Zero size
+TEST dd if=/dev/urandom of=$M0/test_file bs=1024 count=8
+TEST ! fallocate -p -o 1500 -l 0 $M0/test_file
+
+#Negative size
+TEST ! fallocate -p -o 1500 -l -100 $M0/test_file
+TEST rm -f $M0/test_file
+
+#Touching boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 2048 -l 2048 $B0/test_file
+TEST fallocate -p -o 2048 -l 2048 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Touching boundary,multiple stripe
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 2048 -l 4096 $B0/test_file
+TEST fallocate -p -o 2048 -l 4096 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+##With write
+
+#Size ends in boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 600 -l 3496 $B0/test_file
+TEST fallocate -p -o 600 -l 3496 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Offset at boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 2048 -l 3072 $B0/test_file
+TEST fallocate -p -o 2048 -l 3072 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Offset and Size not at boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 1000 -l 3072 $B0/test_file
+TEST fallocate -p -o 1000 -l 3072 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+#TEST rm -f $B0/test_file $M0/test_file
+
+#Data Corruption Tests
+#Kill brick1 and brick2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#verify md5 sum
+EXPECT $md5_sum get_md5_sum $M0/test_file
+
+#Bring up the bricks
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Kill brick3 and brick4
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#verify md5 sum
+EXPECT $md5_sum get_md5_sum $M0/test_file
+
+#Bring up the bricks
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Kill brick5 and brick6
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick $V0 $H0 $B0/${V0}5
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#verify md5 sum
+EXPECT $md5_sum get_md5_sum $M0/test_file
+
+cleanup
diff --git a/tests/include.rc b/tests/include.rc
index 7470ea1..45392e0 100644
--- a/tests/include.rc
+++ b/tests/include.rc
@@ -1229,3 +1229,10 @@ function STAT_INO()
echo 0
fi
}
+
+function get_md5_sum()
+{
+ local file=$1;
+ md5_sum=$(md5sum $file | awk '{print $1}');
+ echo $md5_sum
+}
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index 1a947cc..0f7a252 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -85,6 +85,8 @@ void ec_update_good(ec_fop_data_t *fop, uintptr_t good);
void ec_fop_set_error(ec_fop_data_t *fop, int32_t error);
+void __ec_fop_set_error(ec_fop_data_t *fop, int32_t error);
+
ec_cbk_data_t *
ec_fop_prepare_answer(ec_fop_data_t *fop, gf_boolean_t ro);
@@ -133,5 +135,4 @@ ec_heal_inspect (call_frame_t *frame, ec_t *ec,
gf_boolean_t *need_heal);
int32_t
ec_get_heal_info (xlator_t *this, loc_t *loc, dict_t **dict);
-
#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h
index fab22d8..4a926cf 100644
--- a/xlators/cluster/ec/src/ec-fops.h
+++ b/xlators/cluster/ec/src/ec-fops.h
@@ -172,6 +172,10 @@ void ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd,
int32_t mode, off_t offset, size_t len, dict_t *xdata);
+void ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+
void ec_truncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_truncate_cbk_t func, void *data,
loc_t * loc, off_t offset, dict_t * xdata);
diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h
index cfd7daa..a8f153a 100644
--- a/xlators/cluster/ec/src/ec-helpers.h
+++ b/xlators/cluster/ec/src/ec-helpers.h
@@ -178,8 +178,5 @@ ec_is_data_fop (glusterfs_fop_t fop);
int32_t
ec_launch_replace_heal (ec_t *ec);
-/*
-gf_boolean_t
-ec_is_metadata_fop (glusterfs_fop_t fop);
-*/
+
#endif /* __EC_HELPERS_H__ */
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
index e6a67cf..ae51202 100644
--- a/xlators/cluster/ec/src/ec-inode-write.c
+++ b/xlators/cluster/ec/src/ec-inode-write.c
@@ -19,6 +19,97 @@
#include "ec-method.h"
#include "ec-fops.h"
+int32_t
+ec_update_writev_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_cbk_data_t *cbk = NULL;
+ ec_fop_data_t *parent = fop->parent;
+ int i = 0;
+
+ ec_trace("UPDATE_WRITEV_CBK", cookie, "ret=%d, errno=%d, parent-fop=%s",
+ op_ret, op_errno, ec_fop_name (parent->id));
+
+ if (op_ret < 0) {
+ ec_fop_set_error (parent, op_errno);
+ goto out;
+ }
+ cbk = ec_cbk_data_allocate (parent->frame, this, parent,
+ parent->id, 0, op_ret, op_errno);
+ if (!cbk) {
+ ec_fop_set_error (parent, ENOMEM);
+ goto out;
+ }
+
+ if (xdata)
+ cbk->xdata = dict_ref (xdata);
+
+ if (prebuf)
+ cbk->iatt[i++] = *prebuf;
+
+ if (postbuf)
+ cbk->iatt[i++] = *postbuf;
+
+ LOCK (&parent->lock);
+ {
+ parent->good &= fop->good;
+
+ if (gf_bits_count (parent->good) < parent->minimum) {
+ __ec_fop_set_error (parent, EIO);
+ } else if (fop->error == 0 && parent->answer == NULL) {
+ parent->answer = cbk;
+ }
+ }
+ UNLOCK (&parent->lock);
+out:
+ return 0;
+}
+
+int32_t ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset,
+ size_t size)
+{
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec vector;
+ int32_t err = -ENOMEM;
+
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ goto out;
+ }
+ iobuf = iobuf_get(fop->xl->ctx->iobuf_pool);
+ if (iobuf == NULL) {
+ goto out;
+ }
+ err = iobref_add(iobref, iobuf);
+ if (err != 0) {
+ goto out;
+ }
+
+ vector.iov_base = iobuf->ptr;
+ vector.iov_len = size;
+ memset(vector.iov_base, 0, vector.iov_len);
+
+ ec_writev(fop->frame, fop->xl, mask, fop->minimum,
+ ec_update_writev_cbk, NULL, fop->fd, &vector, 1,
+ offset, 0, iobref, NULL);
+
+ err = 0;
+
+out:
+ if (iobuf != NULL) {
+ iobuf_unref(iobuf);
+ }
+ if (iobref != NULL) {
+ iobref_unref(iobref);
+ }
+
+ return err;
+}
+
int
ec_inode_write_cbk (call_frame_t *frame, xlator_t *this, void *cookie,
int op_ret, int op_errno, struct iatt *prestat,
@@ -1034,62 +1125,252 @@ out:
}
}
-int32_t
-ec_truncate_writev_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
+/*********************************************************************
+ *
+ * File Operation : Discard
+ *
+ *********************************************************************/
+void ec_update_discard_write(ec_fop_data_t *fop, uintptr_t mask)
{
- ec_fop_data_t *fop = cookie;
+ ec_t *ec = fop->xl->private;
+ off_t off_head = 0;
+ off_t off_tail = 0;
+ size_t size_head = 0;
+ size_t size_tail = 0;
+ int error = 0;
+
+ off_head = fop->offset * ec->fragments - fop->int32;
+ if (fop->size == 0) {
+ error = ec_update_write (fop, mask, off_head, fop->user_size);
+ } else {
+ size_head = fop->int32;
+ size_tail = (fop->user_size - fop->int32) % ec->stripe_size;
+ off_tail = off_head + fop->user_size - size_tail;
+ if (size_head) {
+ error = ec_update_write (fop, mask, off_head, size_head);
+ goto out;
+ }
+ if (size_tail) {
+ error = ec_update_write (fop, mask, off_tail, size_tail);
+ }
+ }
+out:
+ if (error)
+ ec_fop_set_error (fop, -error);
+}
- fop->parent->good &= fop->good;
- ec_trace("TRUNCATE_WRITEV_CBK", cookie, "ret=%d, errno=%d",
- op_ret, op_errno);
- return 0;
+void ec_discard_adjust_offset_size(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+
+ fop->user_size = fop->size;
+ /* If discard length covers atleast a fragment on brick, we will
+ * perform discard operation(when fop->size is non-zero) else we just
+ * write zeros.
+ */
+ fop->int32 = ec_adjust_offset_up(ec, &fop->offset, _gf_true);
+ if (fop->size < fop->int32) {
+ fop->size = 0;
+ } else {
+ fop->size -= fop->int32;
+ ec_adjust_size_down(ec, &fop->size, _gf_true);
+ }
}
-int32_t ec_truncate_write(ec_fop_data_t * fop, uintptr_t mask)
+int32_t ec_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- ec_t * ec = fop->xl->private;
- struct iobref * iobref = NULL;
- struct iobuf * iobuf = NULL;
- struct iovec vector;
- int32_t err = -ENOMEM;
+ return ec_inode_write_cbk (frame, this, cookie, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+}
- iobref = iobref_new();
- if (iobref == NULL) {
- goto out;
- }
- iobuf = iobuf_get(fop->xl->ctx->iobuf_pool);
- if (iobuf == NULL) {
- goto out;
+void ec_wind_discard(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_discard_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->discard,
+ fop->fd, fop->offset, fop->size, fop->xdata);
+}
+
+int32_t ec_manager_discard(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk = NULL;
+ off_t fl_start = 0;
+ size_t fl_size = 0;
+
+
+ switch (state) {
+ case EC_STATE_INIT:
+ if ((fop->size <= 0) || (fop->offset < 0)) {
+ ec_fop_set_error(fop, EINVAL);
+ return EC_STATE_REPORT;
+ }
+ /* Because of the head/tail writes, "discard" happens on the remaining
+ * regions, but we need to compute region including head/tail writes
+ * so compute them separately*/
+ fl_start = fop->offset;
+ fl_size = fop->size;
+ fl_size += ec_adjust_offset_down (fop->xl->private, &fl_start,
+ _gf_true);
+ ec_adjust_size_up (fop->xl->private, &fl_size, _gf_true);
+
+ ec_discard_adjust_offset_size(fop);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META |
+ EC_QUERY_INFO, fl_start, fl_size);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+
+ /* Dispatch discard fop only if we have whole fragment
+ * to deallocate */
+ if (fop->size) {
+ ec_dispatch_all(fop);
+ return EC_STATE_DELAYED_START;
+ } else {
+ /*Assume discard to have succeeded on mask*/
+ fop->good = fop->mask;
+ }
+
+ /* Fall through */
+
+ case EC_STATE_DELAYED_START:
+
+ if (fop->size) {
+ if (fop->answer && fop->answer->op_ret == 0)
+ ec_update_discard_write (fop, fop->answer->mask);
+ } else {
+ ec_update_discard_write (fop, fop->mask);
+ }
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2,
+ cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ }
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.discard != NULL) {
+ fop->cbks.discard(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_DELAYED_START:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.discard != NULL) {
+ fop->cbks.discard(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg (fop->xl->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s",
+ state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
}
- err = iobref_add(iobref, iobuf);
- if (err != 0) {
+}
+
+void ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ ec_cbk_t callback = { .discard = func };
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace ("ec", 0, "EC(DISCARD) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target,
+ minimum, ec_wind_discard, ec_manager_discard,
+ callback, data);
+ if (fop == NULL) {
goto out;
}
- vector.iov_base = iobuf->ptr;
- vector.iov_len = fop->offset * ec->fragments - fop->user_size;
- memset(vector.iov_base, 0, vector.iov_len);
+ fop->use_fd = 1;
+ fop->offset = offset;
+ fop->size = len;
- iobuf_unref (iobuf);
- iobuf = NULL;
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ }
- ec_writev(fop->frame, fop->xl, mask, fop->minimum, ec_truncate_writev_cbk,
- NULL, fop->fd, &vector, 1, fop->user_size, 0, iobref, NULL);
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ }
- err = 0;
+ error = 0;
out:
- if (iobuf != NULL) {
- iobuf_unref(iobuf);
- }
- if (iobref != NULL) {
- iobref_unref(iobref);
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
}
+}
- return err;
+/*********************************************************************
+ *
+ * File Operation : truncate
+ *
+ *********************************************************************/
+
+int32_t ec_update_truncate_write (ec_fop_data_t *fop, uintptr_t mask)
+{
+ ec_t *ec = fop->xl->private;
+ size_t size = fop->offset * ec->fragments - fop->user_size;
+ return ec_update_write (fop, mask, fop->user_size, size);
}
int32_t ec_truncate_open_cbk(call_frame_t * frame, void * cookie,
@@ -1102,9 +1383,9 @@ int32_t ec_truncate_open_cbk(call_frame_t * frame, void * cookie,
fop->parent->good &= fop->good;
if (op_ret >= 0) {
fd_bind (fd);
- err = ec_truncate_write(fop->parent, fop->answer->mask);
+ err = ec_update_truncate_write (fop->parent, fop->answer->mask);
if (err != 0) {
- fop->error = -err;
+ ec_fop_set_error (fop->parent, -err);
}
}
@@ -1125,7 +1406,7 @@ int32_t ec_truncate_clean(ec_fop_data_t * fop)
return 0;
} else {
- return ec_truncate_write(fop, fop->answer->mask);
+ return ec_update_truncate_write (fop, fop->answer->mask);
}
}
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 856d60c..09c5fa8 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -729,7 +729,8 @@ int32_t ec_gf_create(call_frame_t * frame, xlator_t * this, loc_t * loc,
int32_t ec_gf_discard(call_frame_t * frame, xlator_t * this, fd_t * fd,
off_t offset, size_t len, dict_t * xdata)
{
- default_discard_failure_cbk(frame, ENOTSUP);
+ ec_discard(frame, this, -1, EC_MINIMUM_MIN, default_discard_cbk,
+ NULL, fd, offset, len, xdata);
return 0;
}
--
1.8.3.1

View File

@ -0,0 +1,43 @@
From 9034056db634852d74423b8324c78ed058d028ba Mon Sep 17 00:00:00 2001
From: Kotresh HR <khiremat@redhat.com>
Date: Tue, 10 Oct 2017 10:27:01 -0400
Subject: [PATCH 089/128] geo-rep: Filter out volume-mark xattr
The volume-mark xattr, maintained at brick root
of slave volume is specific to geo-replication
and should be filtered out for all other clients.
It should also be filtered out from list getxattr
from all mounts including geo-rep mount as it
might cause rsync to read and set.
Backport of:
> Change-Id: If9eb5a3af18051083c853e70d93b2819e8eea222
> BUG: 1500433
> Patch: https://review.gluster.org/18479
> Signed-off-by: Kotresh HR <khiremat@redhat.com>
BUG: 1476876
Change-Id: If9eb5a3af18051083c853e70d93b2819e8eea222
Signed-off-by: Kotresh HR <khiremat@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124531
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/storage/posix/src/posix-helpers.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 18999f0..f97c90b 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -111,6 +111,7 @@ posix_handle_georep_xattrs (call_frame_t *frame, const char *name,
static const char *georep_xattr[] = { "*.glusterfs.*.stime",
"*.glusterfs.*.xtime",
"*.glusterfs.*.entry_stime",
+ "*.glusterfs.volume-mark.*",
NULL
};
if (frame && frame->root) {
--
1.8.3.1

View File

@ -0,0 +1,75 @@
From 16aae2303e2cd70405a3ab5a41c719256dd2db7a Mon Sep 17 00:00:00 2001
From: Sanoj Unnikrishnan <sunnikri@redhat.com>
Date: Tue, 24 Oct 2017 16:10:23 +0530
Subject: [PATCH 090/128] Quota: Adding man page for quota
> Change-Id: I95365c443705f56561cc10138318eb96db3b941e
> BUG: 1505660
> https://review.gluster.org/#/c/18561/
Change-Id: I95365c443705f56561cc10138318eb96db3b941e
BUG: 1501345
Signed-off-by: Sanoj Unnikrishnan <sunnikri@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124658
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
doc/gluster.8 | 42 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 42 insertions(+)
diff --git a/doc/gluster.8 b/doc/gluster.8
index 316a3d2..4c20307 100644
--- a/doc/gluster.8
+++ b/doc/gluster.8
@@ -123,6 +123,48 @@ Check status of data movement from the hot to cold tier.
.TP
\fB\ volume tier <VOLNAME> detach stop\fR
Stop detaching the hot tier from the volume.
+
+.SS "Quota Commands"
+.TP
+\fB\ volume quota <VOLNAME> enable \fR
+Enable quota on the specified volume. This will cause all the directories in the filesystem hierarchy to be accounted and updated thereafter on each operation in the the filesystem. To kick start this accounting, a crawl is done over the hierarchy with an auxiliary client.
+.TP
+\fB\ volume quota <VOLNAME> disable \fR
+Disable quota on the volume. This will disable enforcement and accounting in the filesystem. Any configured limits will be lost.
+.TP
+\fB\ volume quota <VOLNAME> limit-usage <PATH> <SIZE> [<PERCENT>] \fR
+Set a usage limit on the given path. Any previously set limit is overridden to the new value. The soft limit can optionally be specified (as a percentage of hard limit). If soft limit percentage is not provided the default soft limit value for the volume is used to decide the soft limit.
+.TP
+\fB\ volume quota <VOLNAME> limit-objects <PATH> <SIZE> [<PERCENT>] \fR
+Set an inode limit on the given path. Any previously set limit is overridden to the new value. The soft limit can optionally be specified (as a percentage of hard limit). If soft limit percentage is not provided the default soft limit value for the volume is used to decide the soft limit.
+.TP
+NOTE: valid units of SIZE are : B, KB, MB, GB, TB, PB. If no unit is specified, the unit defaults to bytes.
+.TP
+\fB\ volume quota <VOLNAME> remove <PATH> \fR
+Remove any usage limit configured on the specified directory. Note that if any limit is configured on the ancestors of this directory (previous directories along the path), they will still be honored and enforced.
+.TP
+\fB\ volume quota <VOLNAME> remove-objects <PATH> \fR
+Remove any inode limit configured on the specified directory. Note that if any limit is configured on the ancestors of this directory (previous directories along the path), they will still be honored and enforced.
+.TP
+\fB\ volume quota <VOLNAME> list <PATH> \fR
+Lists the usage and limits configured on directory(s). If a path is given only the limit that has been configured on the directory(if any) is displayed along with the directory's usage. If no path is given, usage and limits are displayed for all directories that has limits configured.
+.TP
+\fB\ volume quota <VOLNAME> list-objects <PATH> \fR
+Lists the inode usage and inode limits configured on directory(s). If a path is given only the limit that has been configured on the directory(if any) is displayed along with the directory's inode usage. If no path is given, usage and limits are displayed for all directories that has limits configured.
+.TP
+\fB\ volume quota <VOLNAME> default-soft-limit <PERCENT> \fR
+Set the percentage value for default soft limit for the volume.
+.TP
+\fB\ volume quota <VOLNAME> soft-timeout <TIME> \fR
+Set the soft timeout for the volume. The interval in which limits are retested before the soft limit is breached.
+.TP
+\fB\ volume quota <VOLNAME> hard-timeout <TIME> \fR
+Set the hard timeout for the volume. The interval in which limits are retested after the soft limit is breached.
+.TP
+\fB\ volume quota <VOLNAME> alert-time <TIME> \fR
+Set the frequency in which warning messages need to be logged (in the brick logs) once soft limit is breached.
+.TP
+NOTE: valid units of time and their symbols are : hours(h/hr), minutes(m/min), seconds(s/sec), weeks(w/wk), Days(d/days).
.SS "Geo-replication Commands"
.TP
\fI\ Note\fR: password-less ssh, from the master node (where these commands are executed) to the slave node <SLAVE_HOST>, is a prerequisite for the geo-replication commands.
--
1.8.3.1

View File

@ -0,0 +1,353 @@
From ea20e0a38c9f150d9e96076e04f4b77109e41663 Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawa@redhat.com>
Date: Wed, 27 Sep 2017 11:37:28 +0530
Subject: [PATCH 091/128] extras: scripts to control CPU/MEMORY for any gluster
daemon during runtime
Problem: Sometime gluster daemons like glustershd can consume a lot of cpu and/
or memory if there is a large amount of data/ entries to be healed.
Solution: Until we have some form of throttling/ QoS mechanisms built into
gluster, we can use control groups for regulating cpu and memory of any gluster
daemon using control-cpu-load.sh and control-mem.sh scripts respectively.
Test: To test the control-cpu-load.sh script follow below procedure:
1) Setup distribute replica environment
2) Selfheal daemon off
3) Down one node from replica nodes
4) Create millions of files from mount point
5) Start down node
6) Check cpu usage for shd process in top command
7) Run script after provide shd pid with CPU quota value
8) Check again cpu usage for shd process in top command
Note: control-mem.sh script can cap the memory usage of the process to the set
limit, beyond which the process gets blocked. It resumes either when the memory
usage comes down or if the limit is increased.
> BUG: 1496335
> Change-Id: Id73c36b73ca600fa9f7905d84053d1e8633c996f
> Reviewed on https://review.gluster.org/#/c/18404
> (cherry picked from commit 2c066c4c365e77421d1009851144efae0b028628
BUG: 1484446
Change-Id: Id73c36b73ca600fa9f7905d84053d1e8633c996f
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124875
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
extras/Makefile.am | 6 ++-
extras/control-cpu-load.sh | 116 ++++++++++++++++++++++++++++++++++++++++
extras/control-mem.sh | 128 +++++++++++++++++++++++++++++++++++++++++++++
glusterfs.spec.in | 5 ++
4 files changed, 254 insertions(+), 1 deletion(-)
create mode 100755 extras/control-cpu-load.sh
create mode 100755 extras/control-mem.sh
diff --git a/extras/Makefile.am b/extras/Makefile.am
index 2812a4c..d9572ac 100644
--- a/extras/Makefile.am
+++ b/extras/Makefile.am
@@ -19,6 +19,10 @@ vol_DATA = glusterd.vol
scriptsdir = $(datadir)/glusterfs/scripts
scripts_SCRIPTS = post-upgrade-script-for-quota.sh \
pre-upgrade-script-for-quota.sh stop-all-gluster-processes.sh
+if USE_SYSTEMD
+scripts_SCRIPTS += control-cpu-load.sh
+scripts_SCRIPTS += control-mem.sh
+endif
EXTRA_DIST = $(conf_DATA) specgen.scm glusterfs-mode.el glusterfs.vim \
migrate-unify-to-distribute.sh backend-xattr-sanitize.sh backend-cleanup.sh \
@@ -26,7 +30,7 @@ EXTRA_DIST = $(conf_DATA) specgen.scm glusterfs-mode.el glusterfs.vim \
post-upgrade-script-for-quota.sh pre-upgrade-script-for-quota.sh \
command-completion/gluster.bash command-completion/Makefile \
command-completion/README stop-all-gluster-processes.sh clang-checker.sh \
- mount-shared-storage.sh
+ mount-shared-storage.sh control-cpu-load.sh control-mem.sh
install-data-local:
if [ -n "$(tmpfilesdir)" ]; then \
diff --git a/extras/control-cpu-load.sh b/extras/control-cpu-load.sh
new file mode 100755
index 0000000..b739c82
--- /dev/null
+++ b/extras/control-cpu-load.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+USAGE="This script provides a utility to control CPU utilization for any
+gluster daemon.In this, we use cgroup framework to configure CPU quota
+for a process(like selfheal daemon). Before running this script, make
+sure that daemon is running.Every time daemon restarts, it is required
+to rerun this command to set CPU quota on new daemon process id.
+User can enter any value between 10 to 100 for CPU quota.
+Recommended value of quota period is 25. 25 means, kernel will allocate
+25 ms period to this group of tasks in every 100 ms period. This 25ms
+could be considered as the maximum percentage of CPU quota daemon can take.
+This value will be reflected on CPU usage of "top" command.If provided pid
+is the only process and no other process is in competition to get CPU, more
+ than 25% could be allocated to daemon to speed up the process."
+
+if [ $# -ge 1 ]; then
+ case $1 in
+ -h|--help) echo " " "$USAGE" | sed -r -e 's/^[ ]+//g'
+ exit 0;
+ ;;
+ *) echo "Please Provide correct input for script."
+ echo "For help correct options are -h or --help."
+ exit 1;
+ ;;
+ esac
+fi
+
+DIR_EXIST=0
+LOC="/sys/fs/cgroup/cpu,cpuacct/system.slice/glusterd.service"
+echo "Enter gluster daemon pid for which you want to control CPU."
+read daemon_pid
+
+if expr ${daemon_pid} + 0 > /dev/null 2>&1 ;then
+ CHECK_PID=$(pgrep -f gluster | grep ${daemon_pid})
+ if [ -z "${CHECK_PID}" ]; then
+ echo "No daemon is running or pid ${daemon_pid} does not match."
+ echo "with running gluster processes."
+ exit 1
+ fi
+else
+ echo "Entered daemon_pid is not numeric so Rerun the script."
+ exit 1
+fi
+
+
+if [ -f ${LOC}/tasks ];then
+ CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/tasks)
+ if [ ${CHECK_CGROUP} ]; then
+ echo "pid ${daemon_pid} is attached with glusterd.service cgroup."
+ fi
+fi
+
+cgroup_name=cgroup_gluster_${daemon_pid}
+if [ -f ${LOC}/${cgroup_name}/tasks ]; then
+ CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/${cgroup_name}/tasks)
+ if [ ${CHECK_CGROUP} ]; then
+ val=`cat ${LOC}/${cgroup_name}/cpu.cfs_quota_us`
+ qval=$((val / 1000))
+ echo "pid ${daemon_pid} is already attached ${cgroup_name} with quota value ${qval}."
+ echo "Press n if you don't want to reassign ${daemon_pid} with new quota value."
+ DIR_EXIST=1
+ else
+ echo "pid ${daemon_pid} is not attached with ${cgroup_name}."
+ fi
+fi
+
+read -p "If you want to continue the script to attach ${daemon_pid} with new ${cgroup_name} cgroup Press (y/n)?" choice
+case "$choice" in
+ y|Y ) echo "yes";;
+ n|N ) echo "no";exit;;
+ * ) echo "invalid";exit;;
+esac
+
+systemctl set-property glusterd.service CPUShares=1024
+
+if [ ${DIR_EXIST} -eq 0 ];then
+ echo "Creating child cgroup directory '${cgroup_name} cgroup' for glusterd.service."
+ mkdir -p ${LOC}/${cgroup_name}
+ if [ ! -f ${LOC}/${cgroup_name}/tasks ];then
+ echo "Not able to create ${cgroup_name} directory so exit."
+ exit 1
+ fi
+fi
+
+echo "Enter quota value in range [10,100]: "
+
+read quota_value
+if expr ${quota_value} + 0 > /dev/null 2>&1 ;then
+ if [ ${quota_value} -lt 10 ] || [ ${quota_value} -gt 100 ]; then
+ echo "Entered quota value is not correct,it should be in the range ."
+ echo "10-100. Ideal value is 25."
+ echo "Rerun the sript with correct value."
+ exit 1
+ else
+ echo "Entered quota value is $quota_value"
+ fi
+else
+ echo "Entered quota value is not numeric so Rerun the script."
+ exit 1
+fi
+
+quota_value=$((quota_value * 1000))
+echo "Setting $quota_value to cpu.cfs_quota_us for gluster_cgroup."
+echo ${quota_value} > ${LOC}/${cgroup_name}/cpu.cfs_quota_us
+
+if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
+ for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`;
+ do
+ echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
+ done
+ if cat /proc/${daemon_pid}/cgroup | grep -w ${cgroup_name} > /dev/null; then
+ echo "Tasks are attached successfully specific to ${daemon_pid} to ${cgroup_name}."
+ else
+ echo "Tasks are not attached successfully."
+ fi
+fi
diff --git a/extras/control-mem.sh b/extras/control-mem.sh
new file mode 100755
index 0000000..38aa2a0
--- /dev/null
+++ b/extras/control-mem.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+
+USAGE="This commands provides a utility to control MEMORY utilization for any
+gluster daemon.In this, we use cgroup framework to configure MEMORY limit for
+a process. Before running this script, make sure that daemon is running.Every
+time daemon restarts, it is required to rerun this command to set memory limit
+(in bytes) on new daemon process id.User can enter any value between 100
+(in Mega bytes) to 8000000000000 for Memory limit in Mega bytes.
+Memory limit value is depends on how much maximum memory user wants to restrict
+for specific daemon process.If a process will try to consume memore more than
+configured value then cgroup will hang/sleep this task and to resume the task
+rerun the script with new increase memory limit value ."
+
+if [ $# -ge 1 ]; then
+ case $1 in
+ -h|--help) echo " " "$USAGE" | sed -r -e 's/^[ ]+//g'
+ exit 0;
+ ;;
+ *) echo "Please Provide correct input for script."
+ echo "For help correct options are -h of --help."
+ exit 1;
+ ;;
+ esac
+fi
+
+DIR_EXIST=0
+LOC="/sys/fs/cgroup/memory/system.slice/glusterd.service"
+echo "Enter Any gluster daemon pid for that you want to control MEMORY."
+read daemon_pid
+
+if expr ${daemon_pid} + 0 > /dev/null 2>&1 ;then
+ CHECK_PID=$(pgrep -f gluster | grep ${daemon_pid})
+ if [ -z "${CHECK_PID}" ]; then
+ echo "No daemon is running or pid ${daemon_pid} does not match."
+ echo "with running gluster processes."
+ exit 1
+ fi
+else
+ echo "Entered daemon_pid is not numeric so Rerun the script."
+ exit 1
+fi
+
+
+if [ -f ${LOC}/tasks ]; then
+ CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/tasks)
+ if [ ${CHECK_CGROUP} ] ;then
+ echo "pid ${daemon_pid} is attached with default glusterd.service cgroup."
+ fi
+fi
+
+cgroup_name=cgroup_gluster_${daemon_pid}
+if [ -f ${LOC}/${cgroup_name}/tasks ];then
+ CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/${cgroup_name}/tasks)
+ if [ ${CHECK_CGROUP} ]; then
+ val=`cat ${LOC}/${cgroup_name}/memory.limit_in_bytes`
+ mval=$((val / 1024 / 1024))
+ echo "pid ${daemon_pid} is already attached ${cgroup_name} with mem value ${mval}."
+ echo "Press n if you don't want to reassign ${daemon_pid} with new mem value."
+ DIR_EXIST=1
+ else
+ echo "pid ${daemon_pid} is not attached with ${cgroup_name}."
+ fi
+fi
+
+read -p "If you want to continue the script to attach daeomon with new cgroup. Press (y/n)?" choice
+case "$choice" in
+ y|Y ) echo "yes";;
+ n|N ) echo "no";exit;;
+ * ) echo "invalid";exit;;
+esac
+
+systemctl set-property glusterd.service CPUShares=1024
+
+if [ ${DIR_EXIST} -eq 0 ];then
+ echo "Creating child cgroup directory '${cgroup_name} cgroup' for glusterd.service."
+ mkdir -p ${LOC}/${cgroup_name}
+ if [ ! -f ${LOC}/${cgroup_name}/tasks ];then
+ echo "Not able to create ${LOC}/${cgroup_name} directory so exit."
+ exit 1
+ fi
+fi
+
+echo "Enter Memory value in Mega bytes [100,8000000000000]: "
+
+read mem_value
+if expr ${mem_value} + 0 > /dev/null 2>&1 ;then
+ if [ ${mem_value} -lt 100 ] || [ ${mem_value} -gt 8000000000000 ]; then
+ echo "Entered memory value is not correct,it should be in the range ."
+ echo "100-8000000000000, Rerun the script with correct value ."
+ exit 1
+ else
+ echo "Entered memory limit value is ${mem_value}."
+ fi
+else
+ echo "Entered memory value is not numeric so Rerun the script."
+ exit 1
+fi
+
+mem_value=$(($mem_value * 1024 * 1024))
+if [ ${DIR_EXIST} -eq 0 ];then
+ echo "Setting ${mem_value} to memory.limit_in_bytes for ${LOC}/${cgroup_name}."
+ echo ${mem_value} > ${LOC}/${cgroup_name}/memory.limit_in_bytes
+ #Set memory value to memory.memsw.limit_in_bytes
+ echo ${mem_value} > ${LOC}/${cgroup_name}/memory.memsw.limit_in_bytes
+ # disable oom_control so that kernel will not send kill signal to the
+ # task once limit has reached
+ echo 1 > ${LOC}/${cgroup_name}/memory.oom_control
+else
+ #Increase mem_value to memory.memsw.limit_in_bytes
+ echo ${mem_value} > ${LOC}/${cgroup_name}/memory.memsw.limit_in_bytes
+ echo "Increase ${mem_value} to memory.limit_in_bytes for ${LOC}/${cgroup_name}."
+ echo ${mem_value} > ${LOC}/${cgroup_name}/memory.limit_in_bytes
+ # disable oom_control so that kernel will not send kill signal to the
+ # task once limit has reached
+ echo 1 > ${LOC}/${cgroup_name}/memory.oom_control
+fi
+
+if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
+ for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`;
+ do
+ echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
+ done
+ if cat /proc/${daemon_pid}/cgroup | grep -iw ${cgroup_name} > /dev/null; then
+ echo "Tasks are attached successfully specific to ${daemon_pid} to ${cgroup_name}."
+ else
+ echo "Tasks are not attached successfully."
+ fi
+fi
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index da8a3e5..56a62a9 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -1553,6 +1553,8 @@ exit 0
%{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh
%if ( 0%{?_with_systemd:1} )
%{_libexecdir}/glusterfs/mount-shared-storage.sh
+ %{_datadir}/glusterfs/scripts/control-cpu-load.sh
+ %{_datadir}/glusterfs/scripts/control-mem.sh
%endif
# Incrementalapi
@@ -2178,6 +2180,9 @@ fi
%endif
%changelog
+* Fri Dec 01 2017 Mohit Agrawal <moagrawa@redhat.com>
+- Added control-cpu-load.sh and control-mem.sh scripts to glusterfs-server section(#1484446)
+
* Mon Nov 13 2017 Jiffin Tony Thottan <jthottan@redhat.com>
- Adding ganesha bits back in gluster repository #1499784
--
1.8.3.1

View File

@ -0,0 +1,533 @@
From 3b1c45188c7260ae3dda4bcedd7cb81566f1f2ea Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawa@redhat.com>
Date: Sat, 15 Jul 2017 17:55:14 +0530
Subject: [PATCH 092/128] posix: Needs to reserve disk space to prevent the
brick from getting full
Problem: Currently there is no option available at posix xlator to save the
disk from getting full
Solution: Introduce a new option storage.reserve at posix xlator to
configure disk threshold.posix xlator spawn a thread to update the
disk space status in posix private structure and same flag is checked
by every posix fop before start operation.If flag value is 1 then
it sets op_errno to ENOSPC and goto out from the fop.
> BUG: 1471366
> Change-Id: I98287cd409860f4c754fc69a332e0521bfb1b67e
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
> Reviewed-on: https://review.gluster.org/17780
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Amar Tumballi <amarts@redhat.com>
> Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
BUG: 1464350
Change-Id: I98287cd409860f4c754fc69a332e0521bfb1b67e
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124629
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 +
xlators/storage/posix/src/posix-aio.c | 1 +
xlators/storage/posix/src/posix-helpers.c | 115 ++++++++++++++++++++++++
xlators/storage/posix/src/posix-messages.h | 11 ++-
xlators/storage/posix/src/posix.c | 80 +++++++++++++++--
xlators/storage/posix/src/posix.h | 19 ++++
6 files changed, 222 insertions(+), 8 deletions(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index b15a5af..a57eb9e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2805,6 +2805,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.voltype = "storage/posix",
.op_version = GD_OP_VERSION_3_12_0,
},
+ { .key = "storage.reserve",
+ .voltype = "storage/posix",
+ .op_version = GD_OP_VERSION_3_13_0,
+ },
{ .key = "storage.bd-aio",
.voltype = "storage/bd",
.op_version = GD_OP_VERSION_RHS_3_0
diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c
index b5ac1b9..2adafeb 100644
--- a/xlators/storage/posix/src/posix-aio.c
+++ b/xlators/storage/posix/src/posix-aio.c
@@ -330,6 +330,7 @@ posix_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
VALIDATE_OR_GOTO (fd, err);
priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_errno, op_errno, err);
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index f97c90b..826441f 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1977,6 +1977,121 @@ unlock:
UNLOCK (&priv->lock);
}
+void
+posix_disk_space_check (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ char *subvol_path = NULL;
+ int op_ret = 0;
+ int percent = 0;
+ struct statvfs buf = {0};
+ uint64_t totsz = 0;
+ uint64_t freesz = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("posix-helpers", priv, out);
+
+ subvol_path = priv->base_path;
+ percent = priv->disk_threshhold;
+
+ op_ret = sys_statvfs (subvol_path, &buf);
+
+ if (op_ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
+ "statvfs failed on %s", subvol_path);
+ goto out;
+ }
+ totsz = (buf.f_blocks * buf.f_bsize);
+ freesz = (buf.f_bfree * buf.f_bsize);
+
+ if (freesz <= ((totsz * percent) / 100)) {
+ priv->disk_space_full = 1;
+ } else {
+ priv->disk_space_full = 0;
+ }
+out:
+ return;
+}
+
+
+static void *
+posix_disk_space_check_thread_proc (void *data)
+{
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ uint32_t interval = 0;
+ int ret = -1;
+
+ this = data;
+ priv = this->private;
+
+ interval = 5;
+ gf_msg_debug (this->name, 0, "disk-space thread started, "
+ "interval = %d seconds", interval);
+ while (1) {
+ /* aborting sleep() is a request to exit this thread, sleep()
+ * will normally not return when cancelled */
+ ret = sleep (interval);
+ if (ret > 0)
+ break;
+ /* prevent thread errors while doing the health-check(s) */
+ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+
+ /* Do the disk-check.*/
+ posix_disk_space_check (this);
+ if (!priv->disk_space_check_active)
+ goto out;
+ pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "disk space check thread exiting");
+ LOCK (&priv->lock);
+ {
+ priv->disk_space_check_active = _gf_false;
+ }
+ UNLOCK (&priv->lock);
+
+
+ return NULL;
+}
+
+void
+posix_spawn_disk_space_check_thread (xlator_t *xl)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+
+ priv = xl->private;
+
+ LOCK (&priv->lock);
+ {
+ /* cancel the running thread */
+ if (priv->disk_space_check_active == _gf_true) {
+ pthread_cancel (priv->disk_space_check);
+ priv->disk_space_check_active = _gf_false;
+ }
+
+ ret = gf_thread_create (&priv->disk_space_check, NULL,
+ posix_disk_space_check_thread_proc,
+ xl, "posix_reserve");
+ if (ret < 0) {
+ priv->disk_space_check_active = _gf_false;
+ gf_msg (xl->name, GF_LOG_ERROR, errno,
+ P_MSG_DISK_SPACE_CHECK_FAILED,
+ "unable to setup disk space check thread");
+ goto unlock;
+ }
+
+ /* run the thread detached, resources will be freed on exit */
+ pthread_detach (priv->disk_space_check);
+ priv->disk_space_check_active = _gf_true;
+ }
+unlock:
+ UNLOCK (&priv->lock);
+}
+
int
posix_fsyncer_pick (xlator_t *this, struct list_head *head)
{
diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h
index ee06d6f..20cf1f0 100644
--- a/xlators/storage/posix/src/posix-messages.h
+++ b/xlators/storage/posix/src/posix-messages.h
@@ -45,7 +45,7 @@
*/
#define POSIX_COMP_BASE GLFS_MSGID_COMP_POSIX
-#define GLFS_NUM_MESSAGES 110
+#define GLFS_NUM_MESSAGES 111
#define GLFS_MSGID_END (POSIX_COMP_BASE + GLFS_NUM_MESSAGES + 1)
/* Messaged with message IDs */
#define glfs_msg_start_x POSIX_COMP_BASE, "Invalid: Start of messages"
@@ -955,6 +955,15 @@
*/
+#define P_MSG_DISK_SPACE_CHECK_FAILED (POSIX_COMP_BASE + 112)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
/*------------*/
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index d858878..1cb0fef 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -766,6 +766,7 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct posix_fd *pfd = NULL;
gf_boolean_t locked = _gf_false;
posix_inode_ctx_t *ctx = NULL;
+ struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -775,6 +776,9 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, ret, ret, out);
+
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd);
@@ -1073,20 +1077,27 @@ posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int32_t ret = 0;
struct iatt statpre = {0,};
struct iatt statpost = {0,};
+ struct posix_private *priv = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
ret = posix_do_zerofill (frame, this, fd, offset, len,
&statpre, &statpost, xdata);
- if (ret < 0) {
- goto err;
- }
+ if (ret < 0)
+ goto out;
STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL);
return 0;
-err:
- STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL);
+out:
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL);
return 0;
-
}
static int32_t
@@ -1354,6 +1365,7 @@ posix_mknod (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
@@ -1574,6 +1586,7 @@ posix_mkdir (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
if (!real_path || !par_path) {
@@ -2400,6 +2413,7 @@ posix_symlink (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
@@ -2558,6 +2572,7 @@ posix_rename (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL);
@@ -2840,6 +2855,7 @@ posix_link (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf);
@@ -3049,6 +3065,7 @@ posix_create (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
@@ -3236,6 +3253,9 @@ posix_open (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
+ if (flags & O_CREAT)
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+
MAKE_INODE_HANDLE (real_path, this, loc, &stbuf);
if (!real_path) {
op_ret = -1;
@@ -3559,6 +3579,7 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
@@ -3698,6 +3719,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
struct statvfs buf = {0, };
struct posix_private * priv = NULL;
int shared_by = 1;
+ int percent = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -3722,6 +3744,9 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
goto out;
}
+ percent = priv->disk_threshhold;
+ buf.f_bfree = (buf.f_bfree - ((buf.f_blocks * percent) / 100));
+
shared_by = priv->shared_brick_count;
if (shared_by > 1) {
buf.f_blocks /= shared_by;
@@ -3879,6 +3904,7 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
#endif
priv = this->private;
+
if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) {
posix_batch_fsync (frame, this, fd, datasync, xdata);
return 0;
@@ -3983,6 +4009,7 @@ posix_setxattr (call_frame_t *frame, xlator_t *this,
ssize_t acl_size = 0;
dict_t *xattr = NULL;
posix_xattr_filler_t filler = {0,};
+ struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -3992,6 +4019,9 @@ posix_setxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, out);
VALIDATE_OR_GOTO (dict, out);
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+
MAKE_INODE_HANDLE (real_path, this, loc, NULL);
if (!real_path) {
op_ret = -1;
@@ -5346,6 +5376,7 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
struct iatt stbuf = {0,};
dict_t *xattr = NULL;
posix_xattr_filler_t filler = {0,};
+ struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -5355,6 +5386,9 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (fd, out);
VALIDATE_OR_GOTO (dict, out);
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
@@ -6018,11 +6052,17 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
dict_t *xattr_rsp = NULL;
dict_t *xdata_rsp = NULL;
struct iatt stbuf = {0};
+ struct posix_private *priv = NULL;
+
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (xattr, out);
VALIDATE_OR_GOTO (this, out);
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+
+
if (fd) {
op_ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (op_ret < 0) {
@@ -6120,7 +6160,6 @@ posix_fxattrop (call_frame_t *frame, xlator_t *this,
return 0;
}
-
int
posix_access (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t mask, dict_t *xdata)
@@ -6944,6 +6983,11 @@ notify (xlator_t *this,
pthread_cancel (priv->health_check);
priv->health_check = 0;
}
+ if (priv->disk_space_check) {
+ priv->disk_space_check_active = _gf_false;
+ pthread_cancel (priv->disk_space_check);
+ priv->disk_space_check = 0;
+ }
if (priv->janitor) {
(void) gf_thread_cleanup_xint (priv->janitor);
priv->janitor = 0;
@@ -7140,6 +7184,11 @@ reconfigure (xlator_t *this, dict_t *options)
" fallback to <hostname>:<export>");
}
+ GF_OPTION_RECONF ("reserve", priv->disk_threshhold,
+ options, uint32, out);
+ if (priv->disk_threshhold)
+ posix_spawn_disk_space_check_thread (this);
+
GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval,
options, uint32, out);
posix_spawn_health_check_thread (this);
@@ -7738,6 +7787,13 @@ init (xlator_t *this)
" fallback to <hostname>:<export>");
}
+ _private->disk_space_check_active = _gf_false;
+ _private->disk_space_full = 0;
+ GF_OPTION_INIT ("reserve",
+ _private->disk_threshhold, uint32, out);
+ if (_private->disk_threshhold)
+ posix_spawn_disk_space_check_thread (this);
+
_private->health_check_active = _gf_false;
GF_OPTION_INIT ("health-check-interval",
_private->health_check_interval, uint32, out);
@@ -7940,6 +7996,16 @@ struct volume_options options[] = {
.description = "Interval in seconds for a filesystem health check, "
"set to 0 to disable"
},
+ {
+ .key = {"reserve"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "1",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Value in percentage in integer form required "
+ "to set reserve disk, "
+ "set to 0 to disable"
+ },
{ .key = {"batch-fsync-mode"},
.type = GF_OPTION_TYPE_STR,
.default_value = "reverse-fsync",
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index c2dcfda..21c7d36 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -63,6 +63,18 @@
#define GF_UNLINK_TRUE 0x0000000000000001
#define GF_UNLINK_FALSE 0x0000000000000000
+#define DISK_SPACE_CHECK_AND_GOTO(frame, priv, op_ret, op_errno, out) do { \
+ if (frame->root->pid >= 0 && priv->disk_space_full) { \
+ op_ret = -1; \
+ op_errno = ENOSPC; \
+ gf_msg_debug ("posix", ENOSPC, \
+ "disk space utilization reached limits" \
+ " for path %s ", priv->base_path); \
+ goto out; \
+ } \
+ } while (0)
+
+
/**
* posix_fd - internal structure common to file and directory fd's
*/
@@ -197,6 +209,11 @@ struct posix_private {
pthread_t health_check;
gf_boolean_t health_check_active;
+ uint32_t disk_threshhold;
+ uint32_t disk_space_full;
+ pthread_t disk_space_check;
+ gf_boolean_t disk_space_check_active;
+
#ifdef GF_DARWIN_HOST_OS
enum {
XATTR_NONE = 0,
@@ -304,6 +321,8 @@ __posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
off_t offset, size_t size);
void posix_spawn_health_check_thread (xlator_t *this);
+void posix_spawn_disk_space_check_thread (xlator_t *this);
+
void *posix_fsyncer (void *);
int
posix_get_ancestry (xlator_t *this, inode_t *leaf_inode,
--
1.8.3.1

View File

@ -0,0 +1,324 @@
From 9ced91d6064b0cdea9090fe6ebddbf36a492b585 Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawa@redhat.com>
Date: Sat, 15 Jul 2017 17:55:14 +0530
Subject: [PATCH 093/128] posix: Ignore disk space reserve check for internal
FOPS
Problem: Currently disk space reserve check is applicable for internal FOP
also it needs to be ignore for internal FOP.
Solution: Update the DISK_SPACE_CHECK_AND_GOTO macro at posix component.
Macro will call only while key "GLUSTERFS_INTERNAL_FOP_KEY"
exists in xdata.
> BUG: 1506083
> Change-Id: I2b0840bbf4fa14bc247855b024ca136773d68d16
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
> Reviwed on https://review.gluster.org/#/c/18567
> (cherry picked from commit a320f2021ee4dcab85483dbe10d85e797bd6b3b4)
BUG: 1464350
Change-Id: I2b0840bbf4fa14bc247855b024ca136773d68d16
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124878
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/storage/posix/src/posix-aio.c | 2 +-
xlators/storage/posix/src/posix-helpers.c | 12 +++----
xlators/storage/posix/src/posix-messages.h | 2 +-
xlators/storage/posix/src/posix.c | 52 ++++++++++++++++--------------
xlators/storage/posix/src/posix.h | 7 ++--
5 files changed, 38 insertions(+), 37 deletions(-)
diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c
index 2adafeb..47460bc 100644
--- a/xlators/storage/posix/src/posix-aio.c
+++ b/xlators/storage/posix/src/posix-aio.c
@@ -330,7 +330,7 @@ posix_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
VALIDATE_OR_GOTO (fd, err);
priv = this->private;
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_errno, op_errno, err);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_errno, op_errno, err);
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 826441f..77affc4 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1990,10 +1990,10 @@ posix_disk_space_check (xlator_t *this)
GF_VALIDATE_OR_GOTO (this->name, this, out);
priv = this->private;
- GF_VALIDATE_OR_GOTO ("posix-helpers", priv, out);
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
subvol_path = priv->base_path;
- percent = priv->disk_threshhold;
+ percent = priv->disk_reserve;
op_ret = sys_statvfs (subvol_path, &buf);
@@ -2073,9 +2073,9 @@ posix_spawn_disk_space_check_thread (xlator_t *xl)
priv->disk_space_check_active = _gf_false;
}
- ret = gf_thread_create (&priv->disk_space_check, NULL,
- posix_disk_space_check_thread_proc,
- xl, "posix_reserve");
+ ret = gf_thread_create_detached (&priv->disk_space_check,
+ posix_disk_space_check_thread_proc,
+ xl, "posix_reserve");
if (ret < 0) {
priv->disk_space_check_active = _gf_false;
gf_msg (xl->name, GF_LOG_ERROR, errno,
@@ -2084,8 +2084,6 @@ posix_spawn_disk_space_check_thread (xlator_t *xl)
goto unlock;
}
- /* run the thread detached, resources will be freed on exit */
- pthread_detach (priv->disk_space_check);
priv->disk_space_check_active = _gf_true;
}
unlock:
diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h
index 20cf1f0..fbae4d8 100644
--- a/xlators/storage/posix/src/posix-messages.h
+++ b/xlators/storage/posix/src/posix-messages.h
@@ -45,7 +45,7 @@
*/
#define POSIX_COMP_BASE GLFS_MSGID_COMP_POSIX
-#define GLFS_NUM_MESSAGES 111
+#define GLFS_NUM_MESSAGES 112
#define GLFS_MSGID_END (POSIX_COMP_BASE + GLFS_NUM_MESSAGES + 1)
/* Messaged with message IDs */
#define glfs_msg_start_x POSIX_COMP_BASE, "Invalid: Start of messages"
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 1cb0fef..d0433ec 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -777,7 +777,7 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
VALIDATE_OR_GOTO (fd, out);
priv = this->private;
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, ret, ret, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, ret, ret, out);
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
@@ -824,6 +824,8 @@ out:
locked = _gf_false;
}
SET_TO_OLD_FS_ID ();
+ if (ret == ENOSPC)
+ ret = -ENOSPC;
return ret;
}
@@ -1079,18 +1081,21 @@ posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
struct iatt statpost = {0,};
struct posix_private *priv = NULL;
int op_ret = -1;
- int op_errno = -1;
+ int op_errno = -EINVAL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
priv = this->private;
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out);
ret = posix_do_zerofill (frame, this, fd, offset, len,
&statpre, &statpost, xdata);
- if (ret < 0)
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
goto out;
+ }
STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL);
return 0;
@@ -1365,13 +1370,12 @@ posix_mknod (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
out);
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
-
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
gid = frame->root->gid;
SET_FS_ID (frame->root->uid, gid);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out);
if (!real_path || !par_path) {
op_ret = -1;
@@ -1586,7 +1590,7 @@ posix_mkdir (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
out);
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out);
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
if (!real_path || !par_path) {
@@ -2413,7 +2417,7 @@ posix_symlink (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
out);
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out);
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
@@ -2572,7 +2576,7 @@ posix_rename (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL);
@@ -2855,7 +2859,7 @@ posix_link (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf);
@@ -3065,7 +3069,7 @@ posix_create (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
out);
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out);
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
@@ -3254,7 +3258,7 @@ posix_open (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
if (flags & O_CREAT)
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out);
MAKE_INODE_HANDLE (real_path, this, loc, &stbuf);
if (!real_path) {
@@ -3579,7 +3583,7 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out);
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
@@ -3744,7 +3748,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
goto out;
}
- percent = priv->disk_threshhold;
+ percent = priv->disk_reserve;
buf.f_bfree = (buf.f_bfree - ((buf.f_blocks * percent) / 100));
shared_by = priv->shared_brick_count;
@@ -4020,7 +4024,7 @@ posix_setxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (dict, out);
priv = this->private;
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out);
MAKE_INODE_HANDLE (real_path, this, loc, NULL);
if (!real_path) {
@@ -5387,7 +5391,7 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (dict, out);
priv = this->private;
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out);
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
@@ -6060,8 +6064,7 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
VALIDATE_OR_GOTO (this, out);
priv = this->private;
- DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
-
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out);
if (fd) {
op_ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
@@ -7184,9 +7187,9 @@ reconfigure (xlator_t *this, dict_t *options)
" fallback to <hostname>:<export>");
}
- GF_OPTION_RECONF ("reserve", priv->disk_threshhold,
+ GF_OPTION_RECONF ("reserve", priv->disk_reserve,
options, uint32, out);
- if (priv->disk_threshhold)
+ if (priv->disk_reserve)
posix_spawn_disk_space_check_thread (this);
GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval,
@@ -7790,8 +7793,8 @@ init (xlator_t *this)
_private->disk_space_check_active = _gf_false;
_private->disk_space_full = 0;
GF_OPTION_INIT ("reserve",
- _private->disk_threshhold, uint32, out);
- if (_private->disk_threshhold)
+ _private->disk_reserve, uint32, out);
+ if (_private->disk_reserve)
posix_spawn_disk_space_check_thread (this);
_private->health_check_active = _gf_false;
@@ -8002,9 +8005,8 @@ struct volume_options options[] = {
.min = 0,
.default_value = "1",
.validate = GF_OPT_VALIDATE_MIN,
- .description = "Value in percentage in integer form required "
- "to set reserve disk, "
- "set to 0 to disable"
+ .description = "Percentage of disk space to be reserved."
+ " Set to 0 to disable"
},
{ .key = {"batch-fsync-mode"},
.type = GF_OPTION_TYPE_STR,
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 21c7d36..777adac 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -63,8 +63,9 @@
#define GF_UNLINK_TRUE 0x0000000000000001
#define GF_UNLINK_FALSE 0x0000000000000000
-#define DISK_SPACE_CHECK_AND_GOTO(frame, priv, op_ret, op_errno, out) do { \
- if (frame->root->pid >= 0 && priv->disk_space_full) { \
+#define DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out) do { \
+ if (frame->root->pid >= 0 && priv->disk_space_full && \
+ !dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { \
op_ret = -1; \
op_errno = ENOSPC; \
gf_msg_debug ("posix", ENOSPC, \
@@ -209,7 +210,7 @@ struct posix_private {
pthread_t health_check;
gf_boolean_t health_check_active;
- uint32_t disk_threshhold;
+ uint32_t disk_reserve;
uint32_t disk_space_full;
pthread_t disk_space_check;
gf_boolean_t disk_space_check_active;
--
1.8.3.1

View File

@ -0,0 +1,675 @@
From 46a4c05ce998a72a006f79ddac4e1ad2384e66bb Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Mon, 4 Sep 2017 16:57:25 +0530
Subject: [PATCH 094/128] cluster/afr: Fail open on split-brain
Problem:
Append on a file with split-brain succeeds. Open is intercepted by open-behind,
when write comes on the file, open-behind does open+write. Open succeeds
because afr doesn't fail it. Then write succeeds because write-behind
intercepts it. Flush is also intercepted by write-behind, so the application
never gets to know that the write failed.
Fix:
Fail open on split-brain, so that when open-behind does open+write open fails
which leads to write failure. Application will know about this failure.
> Change-Id: I4bff1c747c97bb2925d6987f4ced5f1ce75dbc15
> BUG: 1294051
> Upstream-patch: https://review.gluster.org/13075
> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Change-Id: I4bff1c747c97bb2925d6987f4ced5f1ce75dbc15
BUG: 1277924
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124882
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
---
tests/basic/afr/split-brain-open.t | 38 ++++++++++
tests/bugs/nfs/bug-974972.t | 1 +
xlators/cluster/afr/src/afr-common.c | 77 ++++++++++++++++++--
xlators/cluster/afr/src/afr-inode-write.c | 2 +-
xlators/cluster/afr/src/afr-open.c | 93 +++++++++++++++++-------
xlators/cluster/afr/src/afr-self-heal-common.c | 11 ++-
xlators/cluster/afr/src/afr-self-heal-data.c | 58 ++++++++++++++-
xlators/cluster/afr/src/afr-self-heal-metadata.c | 4 +-
xlators/cluster/afr/src/afr-self-heal-name.c | 2 +-
xlators/cluster/afr/src/afr-self-heal.h | 2 +-
xlators/cluster/afr/src/afr-self-heald.c | 6 +-
xlators/cluster/afr/src/afr-transaction.c | 43 +----------
xlators/cluster/afr/src/afr.h | 6 +-
13 files changed, 248 insertions(+), 95 deletions(-)
create mode 100644 tests/basic/afr/split-brain-open.t
diff --git a/tests/basic/afr/split-brain-open.t b/tests/basic/afr/split-brain-open.t
new file mode 100644
index 0000000..9b2f285
--- /dev/null
+++ b/tests/basic/afr/split-brain-open.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+#Disable self-heal-daemon
+TEST $CLI volume heal $V0 disable
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+TEST touch $M0/data-split-brain.txt
+
+#Create data split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+`echo "brick1_alive" > $M0/data-split-brain.txt`
+TEST [ $? == 0 ];
+
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+`echo "brick0_alive" > $M0/data-split-brain.txt`
+TEST [ $? == 0 ];
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+echo "all-alive" >> $M0/data-split-brain.txt
+TEST [ $? != 0 ];
+
+cleanup;
diff --git a/tests/bugs/nfs/bug-974972.t b/tests/bugs/nfs/bug-974972.t
index d05e7df..7047825 100755
--- a/tests/bugs/nfs/bug-974972.t
+++ b/tests/bugs/nfs/bug-974972.t
@@ -11,6 +11,7 @@ TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.eager-lock off
TEST $CLI volume set $V0 nfs.disable false
TEST $CLI volume start $V0
EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index a8ba5a0..692f198 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -254,8 +254,9 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
local->transaction.in_flight_sb = _gf_true;
metadatamap |= (1 << index);
}
- if (metadatamap_old != metadatamap)
+ if (metadatamap_old != metadatamap) {
event = 0;
+ }
break;
case AFR_DATA_TRANSACTION:
@@ -283,19 +284,71 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
return ret;
}
-int
-afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local, inode_t *inode)
+gf_boolean_t
+afr_is_symmetric_error (call_frame_t *frame, xlator_t *this)
{
- int ret = -1;
+ afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ int op_errno = 0;
+ int i_errno = 0;
+ gf_boolean_t matching_errors = _gf_true;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret != -1) {
+ /* Operation succeeded on at least one subvol,
+ so it is not a failed-everywhere situation.
+ */
+ matching_errors = _gf_false;
+ break;
+ }
+ i_errno = local->replies[i].op_errno;
+
+ if (i_errno == ENOTCONN) {
+ /* ENOTCONN is not a symmetric error. We do not
+ know if the operation was performed on the
+ backend or not.
+ */
+ matching_errors = _gf_false;
+ break;
+ }
+
+ if (!op_errno) {
+ op_errno = i_errno;
+ } else if (op_errno != i_errno) {
+ /* Mismatching op_errno's */
+ matching_errors = _gf_false;
+ break;
+ }
+ }
+
+ return matching_errors;
+}
+
+int
+afr_set_in_flight_sb_status (xlator_t *this, call_frame_t *frame,
+ inode_t *inode)
+{
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
priv = this->private;
+ local = frame->local;
/* If this transaction saw no failures, then exit. */
if (AFR_COUNT (local->transaction.failed_subvols,
priv->child_count) == 0)
return 0;
+ if (afr_is_symmetric_error (frame, this))
+ return 0;
+
LOCK (&inode->lock);
{
ret = __afr_set_in_flight_sb_status (this, local, inode);
@@ -548,8 +601,9 @@ afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this,
}
} else {
/* For files, abort in case of data/metadata split-brain. */
- if (!data_count || !metadata_count)
+ if (!data_count || !metadata_count) {
return -EIO;
+ }
}
if (type == AFR_METADATA_TRANSACTION && readable)
@@ -1958,6 +2012,11 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
GF_FREE (local->cont.opendir.checksum);
}
+ { /* open */
+ if (local->cont.open.fd)
+ fd_unref (local->cont.open.fd);
+ }
+
{ /* readdirp */
if (local->cont.readdir.dict)
dict_unref (local->cont.readdir.dict);
@@ -2535,9 +2594,11 @@ afr_lookup_metadata_heal_check (call_frame_t *frame, xlator_t *this)
if (!afr_can_start_metadata_self_heal (frame, this))
goto out;
- heal = afr_frame_create (this);
- if (!heal)
+ heal = afr_frame_create (this, &ret);
+ if (!heal) {
+ ret = -ret;
goto out;
+ }
ret = synctask_new (this->ctx->env, afr_lookup_sh_metadata_wrap,
afr_refresh_selfheal_done, heal, frame);
@@ -2630,7 +2691,7 @@ afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this)
}
if (need_heal) {
- heal = afr_frame_create (this);
+ heal = afr_frame_create (this, NULL);
if (!heal)
goto metadata_heal;
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 6651e92..97397f9 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -131,7 +131,7 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)
}
}
- afr_set_in_flight_sb_status (this, local, local->inode);
+ afr_set_in_flight_sb_status (this, frame, local->inode);
}
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 7a62835..6c625cc 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -66,16 +66,15 @@ afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-
int
afr_open_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
fd_t *fd, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- afr_fd_ctx_t *fd_ctx = NULL;
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = (long) cookie;
+ afr_fd_ctx_t *fd_ctx = NULL;
local = frame->local;
fd_ctx = local->fd_ctx;
@@ -103,24 +102,62 @@ afr_open_cbk (call_frame_t *frame, void *cookie,
fd, 0, NULL);
} else {
AFR_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd,
- local->xdata_rsp);
+ local->op_errno, local->cont.open.fd,
+ local->xdata_rsp);
}
}
return 0;
}
+
+int
+afr_open_continue (call_frame_t *frame, xlator_t *this, int err)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (err) {
+ AFR_STACK_UNWIND (open, frame, -1, -err, NULL, NULL);
+ } else {
+ local->call_count = AFR_COUNT (local->child_up,
+ priv->child_count);
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_open_cbk,
+ (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->open,
+ &local->loc,
+ (local->cont.open.flags & ~O_TRUNC),
+ local->cont.open.fd,
+ local->xdata_req);
+ if (!--call_count)
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
int
afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int spb_choice = 0;
+ int event_generation = 0;
+ int ret = 0;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
//We can't let truncation to happen outside transaction.
@@ -140,23 +177,27 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
if (!afr_is_consistent_io_possible (local, priv, &op_errno))
goto out;
- local->fd = fd_ref (fd);
+ local->inode = inode_ref (loc->inode);
+ loc_copy (&local->loc, loc);
local->fd_ctx = fd_ctx;
fd_ctx->flags = flags;
-
- call_count = local->call_count;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
local->cont.open.flags = flags;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- loc, (flags & ~O_TRUNC), fd, xdata);
- if (!--call_count)
- break;
- }
+ local->cont.open.fd = fd_ref (fd);
+
+ ret = afr_inode_get_readable (frame, local->inode, this,
+ NULL, &event_generation,
+ AFR_DATA_TRANSACTION);
+ if ((ret < 0) &&
+ (afr_inode_split_brain_choice_get (local->inode,
+ this, &spb_choice) == 0) &&
+ spb_choice < 0) {
+ afr_inode_refresh (frame, this, local->inode,
+ local->inode->gfid, afr_open_continue);
+ } else {
+ afr_open_continue (frame, this, 0);
}
return 0;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 20e81dd..26d3860 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -66,9 +66,9 @@ afr_lookup_and_heal_gfid (xlator_t *this, inode_t *parent, const char *name,
goto out;
}
- frame = afr_frame_create (this);
+ frame = afr_frame_create (this, &ret);
if (!frame) {
- ret = -ENOMEM;
+ ret = -ret;
goto out;
}
@@ -2349,18 +2349,17 @@ afr_inode_find (xlator_t *this, uuid_t gfid)
call_frame_t *
-afr_frame_create (xlator_t *this)
+afr_frame_create (xlator_t *this, int32_t *op_errno)
{
call_frame_t *frame = NULL;
afr_local_t *local = NULL;
- int op_errno = 0;
pid_t pid = GF_CLIENT_PID_SELF_HEALD;
frame = create_frame (this, this->ctx->pool);
if (!frame)
return NULL;
- local = AFR_FRAME_INIT (frame, op_errno);
+ local = AFR_FRAME_INIT (frame, (*op_errno));
if (!local) {
STACK_DESTROY (frame->root);
return NULL;
@@ -2490,7 +2489,7 @@ afr_selfheal (xlator_t *this, uuid_t gfid)
call_frame_t *frame = NULL;
afr_local_t *local = NULL;
- frame = afr_frame_create (this);
+ frame = afr_frame_create (this, NULL);
if (!frame)
return ret;
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 2c254e8..8cf43f2 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -776,13 +776,37 @@ out:
return ret;
}
+int
+afr_selfheal_data_open_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int i = (long) cookie;
+
+ local = frame->local;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+
+ syncbarrier_wake (&local->barrier);
+
+ return 0;
+}
int
afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd)
{
- int ret = 0;
- fd_t *fd_tmp = NULL;
- loc_t loc = {0,};
+ int ret = 0;
+ fd_t *fd_tmp = NULL;
+ loc_t loc = {0,};
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
fd_tmp = fd_create (inode, 0);
if (!fd_tmp)
@@ -791,7 +815,31 @@ afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd)
loc.inode = inode_ref (inode);
gf_uuid_copy (loc.gfid, inode->gfid);
- ret = syncop_open (this, &loc, O_RDWR|O_LARGEFILE, fd_tmp, NULL, NULL);
+ frame = afr_frame_create (this, &ret);
+ if (!frame) {
+ ret = -ret;
+ fd_unref (fd_tmp);
+ goto out;
+ }
+ local = frame->local;
+
+ AFR_ONLIST (local->child_up, frame, afr_selfheal_data_open_cbk, open,
+ &loc, O_RDWR|O_LARGEFILE, fd_tmp, NULL);
+
+ ret = -ENOTCONN;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret < 0) {
+ ret = -local->replies[i].op_errno;
+ continue;
+ }
+
+ ret = 0;
+ break;
+ }
+
if (ret < 0) {
fd_unref (fd_tmp);
goto out;
@@ -802,6 +850,8 @@ afr_selfheal_data_open (xlator_t *this, inode_t *inode, fd_t **fd)
*fd = fd_tmp;
out:
loc_wipe (&loc);
+ if (frame)
+ AFR_STACK_DESTROY (frame);
return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index f23cf8e..199f896 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -486,9 +486,9 @@ afr_selfheal_metadata_by_stbuf (xlator_t *this, struct iatt *stbuf)
goto out;
}
- frame = afr_frame_create (this);
+ frame = afr_frame_create (this, &ret);
if (!frame) {
- ret = -ENOMEM;
+ ret = -ret;
goto out;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index 352d151..556d14b 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -670,7 +670,7 @@ afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname,
if (!parent)
goto out;
- frame = afr_frame_create (this);
+ frame = afr_frame_create (this, NULL);
if (!frame)
goto out;
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index a1da433..188a334 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -209,7 +209,7 @@ afr_selfheal_post_op (call_frame_t *frame, xlator_t *this, inode_t *inode,
int subvol, dict_t *xattr, dict_t *xdata);
call_frame_t *
-afr_frame_create (xlator_t *this);
+afr_frame_create (xlator_t *this, int32_t *op_errno);
inode_t *
afr_inode_find (xlator_t *this, uuid_t gfid);
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 74c9bb6..19cde88 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -260,7 +260,7 @@ afr_shd_zero_xattrop (xlator_t *this, uuid_t gfid)
int raw[AFR_NUM_CHANGE_LOGS] = {0};
priv = this->private;
- frame = afr_frame_create (this);
+ frame = afr_frame_create (this, NULL);
if (!frame)
goto out;
inode = afr_inode_find (this, gfid);
@@ -457,9 +457,9 @@ afr_shd_index_sweep (struct subvol_healer *healer, char *vgfid)
priv = healer->this->private;
subvol = priv->children[healer->subvol];
- frame = afr_frame_create (healer->this);
+ frame = afr_frame_create (healer->this, &ret);
if (!frame) {
- ret = -ENOMEM;
+ ret = -ret;
goto out;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 91c4f78..a04636f 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -626,51 +626,10 @@ afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)
return _gf_true;
}
-
void
afr_handle_symmetric_errors (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int op_errno = 0;
- int i_errno = 0;
- gf_boolean_t matching_errors = _gf_true;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->replies[i].valid)
- continue;
- if (local->replies[i].op_ret != -1) {
- /* Operation succeeded on at least on subvol,
- so it is not a failed-everywhere situation.
- */
- matching_errors = _gf_false;
- break;
- }
- i_errno = local->replies[i].op_errno;
-
- if (i_errno == ENOTCONN) {
- /* ENOTCONN is not a symmetric error. We do not
- know if the operation was performed on the
- backend or not.
- */
- matching_errors = _gf_false;
- break;
- }
-
- if (!op_errno) {
- op_errno = i_errno;
- } else if (op_errno != i_errno) {
- /* Mismatching op_errno's */
- matching_errors = _gf_false;
- break;
- }
- }
-
- if (matching_errors)
+ if (afr_is_symmetric_error (frame, this))
__mark_all_success (frame, this);
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 672d053..0a06eb6 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -519,6 +519,7 @@ typedef struct _afr_local {
struct {
int32_t flags;
+ fd_t *fd;
} open;
struct {
@@ -1214,7 +1215,7 @@ int
afr_get_msg_id (char *op_type);
int
-afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
+afr_set_in_flight_sb_status (xlator_t *this, call_frame_t *frame,
inode_t *inode);
int32_t
@@ -1272,4 +1273,7 @@ afr_write_subvol_set (call_frame_t *frame, xlator_t *this);
int
afr_write_subvol_reset (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+afr_is_symmetric_error (call_frame_t *frame, xlator_t *this);
#endif /* __AFR_H__ */
--
1.8.3.1

View File

@ -0,0 +1,309 @@
From 5c8fd80edb0133cad6ae10c2a6dc23b660b9fa38 Mon Sep 17 00:00:00 2001
From: Anoop C S <anoopcs@redhat.com>
Date: Wed, 11 Oct 2017 13:29:13 +0530
Subject: [PATCH 095/128] extras/hooks: Fix errors reported via shellcheck
utility
> Change-Id: I217c6b2a39955f1709bb3452b00d33c2dcb60faa
> BUG: 1500649
> Upstream: https://review.gluster.org/#/c/18485/
Change-Id: I217c6b2a39955f1709bb3452b00d33c2dcb60faa
BUG: 1444820
Signed-off-by: Anoop C S <anoopcs@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124640
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
.../add-brick/post/disabled-quota-root-xattr-heal.sh | 2 +-
.../add-brick/pre/S28Quota-enable-root-xattr-heal.sh | 8 ++++----
extras/hook-scripts/create/post/S10selinux-label-brick.sh | 4 ++--
extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh | 7 ++-----
extras/hook-scripts/set/post/S30samba-set.sh | 12 ++++++------
.../set/post/S32gluster_enable_shared_storage.sh | 4 ++--
extras/hook-scripts/start/post/S29CTDBsetup.sh | 4 ++--
extras/hook-scripts/start/post/S30samba-start.sh | 10 +++++-----
extras/hook-scripts/stop/pre/S29CTDB-teardown.sh | 4 ++--
extras/hook-scripts/stop/pre/S30samba-stop.sh | 8 ++++----
10 files changed, 30 insertions(+), 33 deletions(-)
diff --git a/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh b/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh
index bde7249..ce81816 100755
--- a/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh
+++ b/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh
@@ -71,7 +71,7 @@ get_and_set_xattr ()
##------------------------------------------
## Parse the arguments
##------------------------------------------
-ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
eval set -- "$ARGS"
while true;
diff --git a/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh b/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
index 348f34e..38af73f 100755
--- a/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
+++ b/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
@@ -29,7 +29,7 @@ VERSION=
ENABLED_NAME="S28Quota-root-xattr-heal.sh"
DISABLED_NAME="disabled-quota-root-xattr-heal.sh"
-enable ()
+activate ()
{
ln -sf $DISABLED_STATE $1;
}
@@ -37,7 +37,7 @@ enable ()
##------------------------------------------
## Parse the arguments
##------------------------------------------
-ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
eval set -- "$ARGS"
while true;
@@ -92,9 +92,9 @@ FLAG=`cat $GLUSTERD_WORKDIR/vols/$VOL_NAME/info | grep "^status=" \
| awk -F'=' '{print $NF}'`;
if [ "$FLAG" != "1" ]
then
- enable $ENABLED_STATE_START;
+ activate $ENABLED_STATE_START;
exit $?
fi
-enable $ENABLED_STATE_ADD_BRICK;
+activate $ENABLED_STATE_ADD_BRICK;
exit $?
diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
index f38555c..94c624d 100755
--- a/extras/hook-scripts/create/post/S10selinux-label-brick.sh
+++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
@@ -14,7 +14,7 @@ OPTSPEC="volname:"
VOL=
parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
eval set -- "$ARGS"
while true; do
@@ -53,7 +53,7 @@ set_brick_labels()
SELINUX_STATE=$(which getenforce && getenforce)
[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
-parse_args $@
+parse_args "$@"
[ -z "$VOL" ] && exit 1
set_brick_labels $VOL
diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
index 2c83331..7851e70 100755
--- a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
+++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
@@ -13,12 +13,9 @@
PROGNAME="Sselinux"
OPTSPEC="volname:"
VOL=
-CONFIGFILE=
-LOGFILEBASE=
-PIDDIR=
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
eval set -- "$ARGS"
while true; do
@@ -53,7 +50,7 @@ function delete_brick_fcontext()
SELINUX_STATE=$(which getenforce && getenforce)
[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
-parse_args $@
+parse_args "$@"
[ -z "$VOL" ] && exit 1
delete_brick_fcontext $VOL
diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh
index 97d067f..c21cfb5 100755
--- a/extras/hook-scripts/set/post/S30samba-set.sh
+++ b/extras/hook-scripts/set/post/S30samba-set.sh
@@ -28,7 +28,7 @@ USERSMB_SET=""
USERCIFS_SET=""
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC --name $PROGNAME -o "o:" -- $@)
+ ARGS=$(getopt -l $OPTSPEC --name $PROGNAME -o "o:" -- "$@")
eval set -- "$ARGS"
while true; do
@@ -123,23 +123,23 @@ function get_smb () {
usersmbvalue=$(grep user.smb $GLUSTERD_WORKDIR/vols/"$volname"/info |\
cut -d"=" -f2)
- if [[ $usercifsvalue = "disable" || $usersmbvalue = "disable" ]]; then
+ if [ $usercifsvalue = "disable" ] || [ $usersmbvalue = "disable" ]; then
uservalue="disable"
fi
echo "$uservalue"
}
-parse_args $@
-if [ "0" = $(is_volume_started "$VOL") ]; then
+parse_args "$@"
+if [ "0" = "$(is_volume_started "$VOL")" ]; then
exit 0
fi
-if [[ "$USERCIFS_SET" = "YES" || "$USERSMB_SET" = "YES" ]]; then
+if [ "$USERCIFS_SET" = "YES" ] || [ "$USERSMB_SET" = "YES" ]; then
#Find smb.conf, smbd pid directory and smbd logfile path
find_config_info
- if [ $(get_smb "$VOL") = "disable" ]; then
+ if [ "$(get_smb "$VOL")" = "disable" ]; then
del_samba_share $VOL
sighup_samba
else
diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
index c0aa735..885ed03 100755
--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
+++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
@@ -104,7 +104,7 @@ function check_volume_status()
echo $status
}
-mount_cmd="mount -t glusterfs "$local_node_hostname":/gluster_shared_storage \
+mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \
/var/run/gluster/shared_storage"
if [ "$option" == "enable" ]; then
@@ -117,7 +117,7 @@ if [ "$option" == "enable" ]; then
if [ "$retry" == 3 ]; then
break;
fi
- status = check_volume_status;
+ status=$(check_volume_status)
done
# Mount the volume on all the nodes
umount /var/run/gluster/shared_storage
diff --git a/extras/hook-scripts/start/post/S29CTDBsetup.sh b/extras/hook-scripts/start/post/S29CTDBsetup.sh
index 4265cba..330ce74 100755
--- a/extras/hook-scripts/start/post/S29CTDBsetup.sh
+++ b/extras/hook-scripts/start/post/S29CTDBsetup.sh
@@ -21,7 +21,7 @@ VOL=
META="all"
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
eval set -- "$ARGS"
while true; do
@@ -55,7 +55,7 @@ function add_fstab_entry () {
fi
}
-parse_args $@
+parse_args "$@"
if [ "$META" = "$VOL" ]
then
mkdir -p $CTDB_MNT
diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh
index 3e0f257..d6b94e6 100755
--- a/extras/hook-scripts/start/post/S30samba-start.sh
+++ b/extras/hook-scripts/start/post/S30samba-start.sh
@@ -29,7 +29,7 @@ PIDDIR=
GLUSTERD_WORKDIR=
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
eval set -- "$ARGS"
while true; do
@@ -57,8 +57,8 @@ function find_config_info () {
echo "Samba is not installed"
exit 1
fi
- CONFIGFILE=`echo $cmdout | awk {'print $2'}`
- PIDDIR=`smbd -b | grep PIDDIR | awk {'print $2'}`
+ CONFIGFILE=`echo $cmdout | awk '{print $2}'`
+ PIDDIR=`smbd -b | grep PIDDIR | awk '{print $2}'`
LOGFILEBASE=`smbd -b | grep 'LOGFILEBASE' | awk '{print $2}'`
}
@@ -95,13 +95,13 @@ function get_smb () {
usersmbvalue=$(grep user.smb $GLUSTERD_WORKDIR/vols/"$volname"/info |\
cut -d"=" -f2)
- if [[ $usercifsvalue = "disable" || $usersmbvalue = "disable" ]]; then
+ if [ $usercifsvalue = "disable" ] || [ $usersmbvalue = "disable" ]; then
uservalue="disable"
fi
echo "$uservalue"
}
-parse_args $@
+parse_args "$@"
if [ "$(get_smb "$VOL")" = "disable" ]; then
exit 0
fi
diff --git a/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh b/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
index 5fb49bd..e9116c8 100755
--- a/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
+++ b/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
@@ -12,7 +12,7 @@ VOL=
META="all"
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
eval set -- "$ARGS"
while true; do
@@ -51,7 +51,7 @@ function remove_fstab_entry () {
fi
}
-parse_args $@
+parse_args "$@"
if [ "$META" = "$VOL" ]
then
umount "$CTDB_MNT"
diff --git a/extras/hook-scripts/stop/pre/S30samba-stop.sh b/extras/hook-scripts/stop/pre/S30samba-stop.sh
index 62cf7d1..6e542da 100755
--- a/extras/hook-scripts/stop/pre/S30samba-stop.sh
+++ b/extras/hook-scripts/stop/pre/S30samba-stop.sh
@@ -22,7 +22,7 @@ CONFIGFILE=
PIDDIR=
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
eval set -- "$ARGS"
while true; do
@@ -46,8 +46,8 @@ function find_config_info () {
echo "Samba is not installed"
exit 1
fi
- CONFIGFILE=`echo $cmdout | awk {'print $2'}`
- PIDDIR=`smbd -b | grep PIDDIR | awk {'print $2'}`
+ CONFIGFILE=`echo $cmdout | awk '{print $2}'`
+ PIDDIR=`smbd -b | grep PIDDIR | awk '{print $2}'`
}
function del_samba_share () {
@@ -65,7 +65,7 @@ function sighup_samba () {
fi
}
-parse_args $@
+parse_args "$@"
find_config_info
del_samba_share $VOL
sighup_samba
--
1.8.3.1

View File

@ -0,0 +1,225 @@
From 281f33b36d3ac39869c313e5c6ba4909ae2d74e0 Mon Sep 17 00:00:00 2001
From: Anoop C S <anoopcs@redhat.com>
Date: Wed, 25 Oct 2017 12:21:12 +0530
Subject: [PATCH 096/128] extras/hooks: Honour all input arguments to scripts
Some of the hook scripts were not honouring the arguments with which
they are invoked during various volume operations. So make sure that
we consider everything while parsing the command line arguments to
avoid following warnings:
. . .
ame: unrecognized option '--first=no'
ame: unrecognized option '--version=1'
ame: unrecognized option '--volume-op=start'
. . .
> Change-Id: I5b08e5e7f32908c8509e97098a042096b507783e
> BUG: 1503983
> Upstream: https://review.gluster.org/#/c/18569/
Change-Id: I5b08e5e7f32908c8509e97098a042096b507783e
BUG: 1444820
Signed-off-by: Anoop C S <anoopcs@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124642
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
extras/hook-scripts/start/post/S29CTDBsetup.sh | 32 ++++++++++++++++++------
extras/hook-scripts/start/post/S30samba-start.sh | 18 ++++++++++++-
extras/hook-scripts/stop/pre/S29CTDB-teardown.sh | 18 +++++++------
extras/hook-scripts/stop/pre/S30samba-stop.sh | 30 +++++++++++++---------
4 files changed, 70 insertions(+), 28 deletions(-)
diff --git a/extras/hook-scripts/start/post/S29CTDBsetup.sh b/extras/hook-scripts/start/post/S29CTDBsetup.sh
index 330ce74..54a4c0c 100755
--- a/extras/hook-scripts/start/post/S29CTDBsetup.sh
+++ b/extras/hook-scripts/start/post/S29CTDBsetup.sh
@@ -9,10 +9,14 @@ CTDB_MNT=/gluster/lock
# Make sure ping-timeout is not default for CTDB volume
PING_TIMEOUT_SECS=10
PROGNAME="ctdb"
-OPTSPEC="volname:"
+OPTSPEC="volname:,gd-workdir:,version:,volume-op:,first:"
HOSTNAME=`hostname`
MNTOPTS="_netdev,transport=tcp,xlator-option=*client*.ping-timeout=${PING_TIMEOUT_SECS}"
VOL=
+GLUSTERD_WORKDIR=
+VERSION=
+VOLUME_OP=
+FIRST=
# $META is the volume that will be used by CTDB as a shared filesystem.
# It is not desirable to use this volume for storing 'data' as well.
# META is set to 'all' (viz. a keyword and hence not a legal volume name)
@@ -29,13 +33,27 @@ function parse_args () {
--volname)
shift
VOL=$1
- ;;
-
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --version)
+ shift
+ VERSION=$1
+ ;;
+ --volume-op)
+ shift
+ VOLUME_OP=$1
+ ;;
+ --first)
+ shift
+ FIRST=$1
+ ;;
*)
- shift
- break
- ;;
-
+ shift
+ break
+ ;;
esac
shift
diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh
index d6b94e6..a448dbd 100755
--- a/extras/hook-scripts/start/post/S30samba-start.sh
+++ b/extras/hook-scripts/start/post/S30samba-start.sh
@@ -21,12 +21,15 @@
#volume.
PROGNAME="Ssamba-start"
-OPTSPEC="volname:,gd-workdir:"
+OPTSPEC="volname:,gd-workdir:,version:,volume-op:,first:"
VOL=
CONFIGFILE=
LOGFILEBASE=
PIDDIR=
GLUSTERD_WORKDIR=
+VERSION=
+VOLUME_OP=
+FIRST=
function parse_args () {
ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
@@ -42,11 +45,24 @@ function parse_args () {
shift
GLUSTERD_WORKDIR=$1
;;
+ --version)
+ shift
+ VERSION=$1
+ ;;
+ --volume-op)
+ shift
+ VOLUME_OP=$1
+ ;;
+ --first)
+ shift
+ FIRST=$1
+ ;;
*)
shift
break
;;
esac
+
shift
done
}
diff --git a/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh b/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
index e9116c8..12f49da 100755
--- a/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
+++ b/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
@@ -2,8 +2,9 @@
CTDB_MNT=/gluster/lock
PROGNAME="ctdb"
-OPTSPEC="volname:"
+OPTSPEC="volname:,last:"
VOL=
+LAST=
# $META is the volume that will be used by CTDB as a shared filesystem.
# It is not desirable to use this volume for storing 'data' as well.
# META is set to 'all' (viz. a keyword and hence not a legal volume name)
@@ -20,15 +21,16 @@ function parse_args () {
--volname)
shift
VOL=$1
- ;;
-
+ ;;
+ --last)
+ shift
+ LAST=$1
+ ;;
*)
- shift
- break
- ;;
-
+ shift
+ break
+ ;;
esac
-
shift
done
}
diff --git a/extras/hook-scripts/stop/pre/S30samba-stop.sh b/extras/hook-scripts/stop/pre/S30samba-stop.sh
index 6e542da..a5c8dd5 100755
--- a/extras/hook-scripts/stop/pre/S30samba-stop.sh
+++ b/extras/hook-scripts/stop/pre/S30samba-stop.sh
@@ -16,27 +16,33 @@
#event by removing the volume related entries(if any) in smb.conf file.
PROGNAME="Ssamba-stop"
-OPTSPEC="volname:"
+OPTSPEC="volname:,last:"
VOL=
CONFIGFILE=
PIDDIR=
+LAST=
function parse_args () {
ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
eval set -- "$ARGS"
while true; do
- case $1 in
- --volname)
- shift
- VOL=$1
- ;;
- *)
- shift
- break
- ;;
- esac
- shift
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ --last)
+ shift
+ LAST=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+
+ shift
done
}
--
1.8.3.1

View File

@ -0,0 +1,164 @@
From ca93504c637b922cd633f0f2422b18b828982332 Mon Sep 17 00:00:00 2001
From: Anoop C S <anoopcs@redhat.com>
Date: Thu, 19 Oct 2017 13:40:35 +0530
Subject: [PATCH 097/128] extras/hooks: Fix getopt usage
`getopt` does not have an optional argument as '-name'. It should
be either '-n' or '--name'(see man getopt(1)). This wrong usage
resulted in setting the script name as 'ame' instead of $PROGNAME
in most of the hook-scripts.
Additionally the following line from DESCRIPTION given for `getopt`
shell command expects short options for almost every kind of usage
mentioned in SYNOPSIS:
. . .
If no '-o' or '--options' option is found in the first part, the
first parameter of the second part is used as the short options string.
. . .
Refer http://man7.org/linux/man-pages/man1/getopt.1.html for more
clarity on its usage.
> Change-Id: I95baf5fa8c99025e66b2d83656dd838d4f6048ce
> BUG: 1503983
> Upstream: https://review.gluster.org/#/c/18548/
Change-Id: I95baf5fa8c99025e66b2d83656dd838d4f6048ce
BUG: 1444820
Signed-off-by: Anoop C S <anoopcs@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124641
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh | 2 +-
extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh | 2 +-
extras/hook-scripts/create/post/S10selinux-label-brick.sh | 2 +-
extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh | 2 +-
extras/hook-scripts/set/post/S30samba-set.sh | 2 +-
extras/hook-scripts/start/post/S29CTDBsetup.sh | 2 +-
extras/hook-scripts/start/post/S30samba-start.sh | 2 +-
extras/hook-scripts/stop/pre/S29CTDB-teardown.sh | 2 +-
extras/hook-scripts/stop/pre/S30samba-stop.sh | 2 +-
9 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh b/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh
index ce81816..49ab0a6 100755
--- a/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh
+++ b/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh
@@ -71,7 +71,7 @@ get_and_set_xattr ()
##------------------------------------------
## Parse the arguments
##------------------------------------------
-ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
+ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true;
diff --git a/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh b/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
index 38af73f..17ae4b4 100755
--- a/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
+++ b/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
@@ -37,7 +37,7 @@ activate ()
##------------------------------------------
## Parse the arguments
##------------------------------------------
-ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
+ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true;
diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
index 94c624d..6be4072 100755
--- a/extras/hook-scripts/create/post/S10selinux-label-brick.sh
+++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
@@ -14,7 +14,7 @@ OPTSPEC="volname:"
VOL=
parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true; do
diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
index 7851e70..6eba66f 100755
--- a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
+++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
@@ -15,7 +15,7 @@ OPTSPEC="volname:"
VOL=
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true; do
diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh
index c21cfb5..b93415b 100755
--- a/extras/hook-scripts/set/post/S30samba-set.sh
+++ b/extras/hook-scripts/set/post/S30samba-set.sh
@@ -28,7 +28,7 @@ USERSMB_SET=""
USERCIFS_SET=""
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC --name $PROGNAME -o "o:" -- "$@")
+ ARGS=$(getopt -o 'o:' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true; do
diff --git a/extras/hook-scripts/start/post/S29CTDBsetup.sh b/extras/hook-scripts/start/post/S29CTDBsetup.sh
index 54a4c0c..69a0d89 100755
--- a/extras/hook-scripts/start/post/S29CTDBsetup.sh
+++ b/extras/hook-scripts/start/post/S29CTDBsetup.sh
@@ -25,7 +25,7 @@ FIRST=
META="all"
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true; do
diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh
index a448dbd..92ddaf4 100755
--- a/extras/hook-scripts/start/post/S30samba-start.sh
+++ b/extras/hook-scripts/start/post/S30samba-start.sh
@@ -32,7 +32,7 @@ VOLUME_OP=
FIRST=
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true; do
diff --git a/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh b/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
index 12f49da..0975a00 100755
--- a/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
+++ b/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
@@ -13,7 +13,7 @@ LAST=
META="all"
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true; do
diff --git a/extras/hook-scripts/stop/pre/S30samba-stop.sh b/extras/hook-scripts/stop/pre/S30samba-stop.sh
index a5c8dd5..5e87845 100755
--- a/extras/hook-scripts/stop/pre/S30samba-stop.sh
+++ b/extras/hook-scripts/stop/pre/S30samba-stop.sh
@@ -23,7 +23,7 @@ PIDDIR=
LAST=
function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME "$@")
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
eval set -- "$ARGS"
while true; do
--
1.8.3.1

View File

@ -0,0 +1,138 @@
From b69a36e8c61a0e1d45b3def6436d223bd14e76ef Mon Sep 17 00:00:00 2001
From: Sunny Kumar <sunkumar@redhat.com>
Date: Tue, 28 Nov 2017 13:37:43 +0530
Subject: [PATCH 098/128] snapshot : snapshot creation failed after brick
reset/replace
Problem : snapshot creation was failing after brick reset/replace
Fix : changed code to set mount_dir value in rsp_dict during prerequisites
phase i.e glusterd_brick_op_prerequisites call and removed form prevalidate
phase.
Upstream patch : https://review.gluster.org/c/18730/
>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
>BUG: 1512451
BUG: 1507394
Change-Id: Ief5d0fafe882a7eb1a7da8535b7c7ce6f011604c
Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124467
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
...51-snapshot-creation-failed-after-brick-reset.t | 39 ++++++++++++++++++++++
xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 8 -----
xlators/mgmt/glusterd/src/glusterd-reset-brick.c | 10 ------
xlators/mgmt/glusterd/src/glusterd-utils.c | 9 +++++
4 files changed, 48 insertions(+), 18 deletions(-)
create mode 100644 tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t
diff --git a/tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t b/tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t
new file mode 100644
index 0000000..0624a5d
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version
+TEST launch_cluster 2
+TEST setup_lvm 2
+
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$L1/B1 $H2:$L2/B1
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $CLI_1 snapshot create ${V0}_snap1 ${V0} no-timestamp
+TEST snapshot_exists 1 ${V0}_snap1
+
+TEST $CLI_1 snapshot delete ${V0}_snap1
+TEST ! snapshot_exists 1 ${V0}_snap1
+
+TEST $CLI_1 volume reset-brick $V0 $H1:$L1/B1 start
+TEST $CLI_1 volume reset-brick $V0 $H1:$L1/B1 $H1:$L1/B1 commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $L1/B1
+
+TEST $CLI_1 snapshot create ${V0}_snap1 ${V0} no-timestamp
+TEST snapshot_exists 1 ${V0}_snap1
+
+TEST $CLI_1 snapshot delete ${V0}_snap1
+TEST ! snapshot_exists 1 ${V0}_snap1
+
+cleanup;
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
index 18fc741..0e28608 100644
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -319,14 +319,6 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr,
goto out;
}
- ret = dict_set_dynstr_with_alloc (rsp_dict, "brick1.mount_dir",
- dst_brickinfo->mount_dir);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_DICT_SET_FAILED,
- "Failed to set brick1.mount_dir");
- goto out;
- }
ret = dict_set_int32 (rsp_dict, "brick_count", 1);
if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
index abb44e0..10ee6f4 100644
--- a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
@@ -208,16 +208,6 @@ glusterd_reset_brick_prevalidate (dict_t *dict, char **op_errstr,
goto out;
}
- ret = dict_set_dynstr_with_alloc (rsp_dict,
- "brick1.mount_dir",
- dst_brickinfo->mount_dir);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_DICT_SET_FAILED,
- "Failed to set brick1.mount_dir");
- goto out;
- }
-
ret = dict_set_int32 (rsp_dict, "brick_count", 1);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index f611fbb..23fc6e9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -13648,6 +13648,15 @@ glusterd_brick_op_prerequisites (dict_t *dict,
(*src_brickinfo)->port);
}
}
+ /* setting mount_dir */
+ ret = dict_set_dynstr_with_alloc (rsp_dict, "brick1.mount_dir",
+ (*src_brickinfo)->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set brick1.mount_dir");
+ goto out;
+ }
v = *volinfo;
b = *src_brickinfo;
--
1.8.3.1

View File

@ -0,0 +1,676 @@
From c793a7c0a3672cfab9bd927ae493bc24be8bbc09 Mon Sep 17 00:00:00 2001
From: hari gowtham <hgowtham@redhat.com>
Date: Fri, 24 Nov 2017 11:47:01 +0530
Subject: [PATCH 099/128] Tier: Stop tierd for detach start
back-port of: https://review.gluster.org/#/c/17137/
Problem: tierd was stopped only after detach commit
This makes the detach take a longer time. The detach
demotes the files to the cold brick and if the promotion
frequency is hit, then the tierd starts to promote files to
hot tier again.
Fix: stop tierd after detach start so the files get
demoted faster.
Note: the is_tier_enabled was not maintained properly.
That has been fixed too. some code clean up has been done.
>BUG: 1446381
>Change-Id: I532f7410cea04fbb960105483810ea3560ca149b
>Signed-off-by: hari gowtham <hgowtham@redhat.com>
Signed-off-by: hari gowtham <hgowtham@redhat.com>
Change-Id: I532f7410cea04fbb960105483810ea3560ca149b
BUG: 1509191
Reviewed-on: https://code.engineering.redhat.com/gerrit/125081
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/basic/tier/new-tier-cmds.t | 22 ++-
xlators/cluster/dht/src/dht-common.h | 13 +-
xlators/mgmt/glusterd/src/glusterd-messages.h | 8 +
xlators/mgmt/glusterd/src/glusterd-mgmt.c | 2 -
xlators/mgmt/glusterd/src/glusterd-tier.c | 224 +++++++++---------------
xlators/mgmt/glusterd/src/glusterd-tierd-svc.c | 65 ++++---
xlators/mgmt/glusterd/src/glusterd-utils.c | 10 ++
xlators/mgmt/glusterd/src/glusterd-utils.h | 3 +
xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 17 +-
9 files changed, 182 insertions(+), 182 deletions(-)
diff --git a/tests/basic/tier/new-tier-cmds.t b/tests/basic/tier/new-tier-cmds.t
index d341e62..2c48e02 100644
--- a/tests/basic/tier/new-tier-cmds.t
+++ b/tests/basic/tier/new-tier-cmds.t
@@ -14,9 +14,9 @@ function check_peers {
}
function create_dist_tier_vol () {
- TEST $CLI_1 volume create $V0 $H1:$B1/${V0} $H2:$B2/${V0} $H3:$B3/${V0}
+ TEST $CLI_1 volume create $V0 disperse 6 redundancy 2 $H1:$B1/${V0}_b1 $H2:$B2/${V0}_b2 $H3:$B3/${V0}_b3 $H1:$B1/${V0}_b4 $H2:$B2/${V0}_b5 $H3:$B3/${V0}_b6
TEST $CLI_1 volume start $V0
- TEST $CLI_1 volume tier $V0 attach $H1:$B1/${V0}_h1 $H2:$B2/${V0}_h2 $H3:$B3/${V0}_h3
+ TEST $CLI_1 volume tier $V0 attach replica 2 $H1:$B1/${V0}_h1 $H2:$B2/${V0}_h2 $H3:$B3/${V0}_h3 $H1:$B1/${V0}_h4 $H2:$B2/${V0}_h5 $H3:$B3/${V0}_h6
}
function tier_daemon_status {
@@ -59,8 +59,19 @@ EXPECT "Tier command failed" $CLI_1 volume tier $V0 detach status
EXPECT "0" detach_xml_status
-#after starting detach tier the detach tier status should display the status
+#kill a node
+TEST kill_node 2
+#check if we have the rest of the node available printed in the output of detach status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_status_node_down
+
+TEST $glusterd_2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers;
+
+#after starting detach tier the detach tier status should display the status
+sleep 2
+$CLI_1 volume status
TEST $CLI_1 volume tier $V0 detach start
EXPECT "1" detach_xml_status
@@ -73,14 +84,11 @@ TEST kill_node 2
#check if we have the rest of the node available printed in the output of detach status
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status_node_down
-#check if we have the rest of the node available printed in the output of tier status
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_status_node_down
-
TEST $glusterd_2;
EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers;
# Make sure we check that the *bricks* are up and not just the node. >:-(
-EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 brick_up_status_1 $V0 $H2 $B2/${V0}
+EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 brick_up_status_1 $V0 $H2 $B2/${V0}_b2
EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 brick_up_status_1 $V0 $H2 $B2/${V0}_h2
# Parsing normal output doesn't work because of line-wrap issues on our
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 6056060..e2afd6c 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -393,10 +393,17 @@ enum gf_defrag_type {
GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9,
GF_DEFRAG_CMD_RESUME_TIER = 1 + 10,
GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11,
- GF_DEFRAG_CMD_DETACH_START = 1 + 12,
- GF_DEFRAG_CMD_DETACH_STOP = 1 + 13,
+ GF_DEFRAG_CMD_STOP_TIER = 1 + 12,
+ GF_DEFRAG_CMD_DETACH_START = 1 + 13,
+ GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14,
+ GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15,
+ GF_DEFRAG_CMD_DETACH_STOP = 1 + 16,
/* new labels are used so it will help
- * while removing old labels by easily differentiating
+ * while removing old labels by easily differentiating.
+ * A few labels are added so that the count remains same
+ * between this enum and the ones on the xdr file.
+ * different values for the same enum cause errors and
+ * confusion.
*/
};
typedef enum gf_defrag_type gf_defrag_type;
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
index 225d59b..4ccf299 100644
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -4976,6 +4976,14 @@
*/
#define GD_MSG_CHANGELOG_GET_FAIL (GLUSTERD_COMP_BASE + 614)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+#define GD_MSG_MANAGER_FUNCTION_FAILED (GLUSTERD_COMP_BASE + 614)
+
/*------------*/
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
index 13a4526..8bc1f1b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
@@ -539,8 +539,6 @@ gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict,
goto out;
}
- volinfo->is_tier_enabled = _gf_true;
-
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, errno,
GD_MSG_DICT_SET_FAILED, "dict set "
diff --git a/xlators/mgmt/glusterd/src/glusterd-tier.c b/xlators/mgmt/glusterd/src/glusterd-tier.c
index 28f02e75..15c4808 100644
--- a/xlators/mgmt/glusterd/src/glusterd-tier.c
+++ b/xlators/mgmt/glusterd/src/glusterd-tier.c
@@ -244,116 +244,6 @@ glusterd_handle_tier (rpcsvc_request_t *req)
return glusterd_big_locked_handler (req, __glusterd_handle_tier);
}
-
-static int
-glusterd_manage_tier (glusterd_volinfo_t *volinfo, int opcode)
-{
- int ret = -1;
- xlator_t *this = NULL;
- glusterd_conf_t *priv = NULL;
-
- this = THIS;
- GF_VALIDATE_OR_GOTO (THIS->name, this, out);
- GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO (this->name, priv, out);
-
- switch (opcode) {
- case GF_DEFRAG_CMD_START_TIER:
- case GF_DEFRAG_CMD_STOP_TIER:
- ret = volinfo->tierd.svc.manager (&(volinfo->tierd.svc),
- volinfo, PROC_START_NO_WAIT);
- break;
- default:
- ret = 0;
- break;
- }
-
-out:
- return ret;
-
-}
-
-static int
-glusterd_tier_enable (glusterd_volinfo_t *volinfo, char **op_errstr)
-{
- int32_t ret = -1;
- xlator_t *this = NULL;
- int32_t tier_online = -1;
- char pidfile[PATH_MAX] = {0};
- int32_t pid = -1;
- glusterd_conf_t *priv = NULL;
-
- this = THIS;
-
- GF_VALIDATE_OR_GOTO (THIS->name, this, out);
- GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
- GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO (this->name, priv, out);
-
- if (glusterd_is_volume_started (volinfo) == 0) {
- *op_errstr = gf_strdup ("Volume is stopped, start volume "
- "to enable tier.");
- ret = -1;
- goto out;
- }
-
- GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv);
- tier_online = gf_is_service_running (pidfile, &pid);
-
- if (tier_online) {
- *op_errstr = gf_strdup ("tier is already enabled");
- ret = -1;
- goto out;
- }
-
- volinfo->is_tier_enabled = _gf_true;
-
- ret = 0;
-out:
- if (ret && op_errstr && !*op_errstr)
- gf_asprintf (op_errstr, "Enabling tier on volume %s has been "
- "unsuccessful", volinfo->volname);
- return ret;
-}
-
-static int
-glusterd_tier_disable (glusterd_volinfo_t *volinfo, char **op_errstr)
-{
- int32_t ret = -1;
- xlator_t *this = NULL;
- int32_t tier_online = -1;
- char pidfile[PATH_MAX] = {0};
- int32_t pid = -1;
- glusterd_conf_t *priv = NULL;
-
- this = THIS;
-
- GF_VALIDATE_OR_GOTO (THIS->name, this, out);
- GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
- GF_VALIDATE_OR_GOTO (this->name, op_errstr, out);
- priv = this->private;
-
- GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv);
- tier_online = gf_is_service_running (pidfile, &pid);
-
- if (!tier_online) {
- *op_errstr = gf_strdup ("tier is already disabled");
- ret = -1;
- goto out;
- }
-
- volinfo->is_tier_enabled = _gf_false;
-
- ret = 0;
-out:
- if (ret && op_errstr && !*op_errstr)
- gf_asprintf (op_errstr, "Disabling tier volume %s has "
- "been unsuccessful", volinfo->volname);
- return ret;
-}
-
int
glusterd_op_remove_tier_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
@@ -455,6 +345,19 @@ glusterd_op_remove_tier_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
continue;
brickinfo->decommissioned = 0;
}
+ volinfo->tier.op = GD_OP_DETACH_NOT_STARTED;
+ ret = volinfo->tierd.svc.manager (&(volinfo->tierd.svc),
+ volinfo,
+ PROC_START_NO_WAIT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MANAGER_FUNCTION_FAILED,
+ "Calling manager for tier "
+ "failed on volume: %s for "
+ "detach stop", volinfo->volname);
+ goto out;
+ }
+
ret = glusterd_create_volfiles_and_notify_services
(volinfo);
@@ -473,22 +376,24 @@ glusterd_op_remove_tier_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
"failed to store volinfo");
goto out;
}
- ret = glusterd_tierdsvc_restart ();
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_TIERD_START_FAIL,
- "Couldn't restart tierd for "
- "vol: %s", volinfo->volname);
- goto out;
- }
-
- volinfo->tier.op = GD_OP_DETACH_NOT_STARTED;
ret = 0;
goto out;
case GF_DEFRAG_CMD_DETACH_START:
+ volinfo->tier.op = GD_OP_DETACH_TIER;
+ svc = &(volinfo->tierd.svc);
+ ret = svc->manager (svc, volinfo,
+ PROC_START_NO_WAIT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MANAGER_FUNCTION_FAILED,
+ "calling manager for tier "
+ "failed on volume: %s for "
+ "detach start", volname);
+ goto out;
+ }
ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY,
&task_id_str);
if (ret) {
@@ -504,8 +409,6 @@ glusterd_op_remove_tier_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
}
force = 0;
- volinfo->tier.op = GD_OP_DETACH_TIER;
- volinfo->tier.defrag_status = GF_DEFRAG_STATUS_STARTED;
break;
case GF_DEFRAG_CMD_DETACH_COMMIT:
@@ -522,6 +425,19 @@ glusterd_op_remove_tier_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
}
case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE:
+ if (cmd == GF_DEFRAG_CMD_DETACH_COMMIT_FORCE) {
+ svc = &(volinfo->tierd.svc);
+ ret = svc->manager (svc, volinfo,
+ PROC_START_NO_WAIT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MANAGER_FUNCTION_FAILED,
+ "calling manager for tier "
+ "failed on volume: %s for "
+ "commit force", volname);
+ goto out;
+ }
+ }
glusterd_op_perform_detach_tier (volinfo);
detach_commit = 1;
@@ -700,11 +616,6 @@ glusterd_op_remove_tier_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
if (cmd == GF_DEFRAG_CMD_DETACH_START &&
volinfo->status == GLUSTERD_STATUS_STARTED) {
- svc = &(volinfo->tierd.svc);
- ret = svc->reconfigure (volinfo);
- if (ret)
- goto out;
-
ret = glusterd_svcs_reconfigure ();
if (ret) {
gf_msg (this->name, GF_LOG_WARNING, 0,
@@ -773,6 +684,7 @@ glusterd_op_tier_start_stop (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
glusterd_conf_t *priv = NULL;
int32_t pid = -1;
char pidfile[PATH_MAX] = {0};
+ int is_force = 0;
this = THIS;
GF_VALIDATE_OR_GOTO (THIS->name, this, out);
@@ -814,24 +726,48 @@ glusterd_op_tier_start_stop (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
if (!retval)
goto out;
+ if (glusterd_is_volume_started (volinfo) == 0) {
+ *op_errstr = gf_strdup ("Volume is stopped, start "
+ "volume to enable/disable tier.");
+ ret = -1;
+ goto out;
+ }
+
+ GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv);
+
switch (cmd) {
case GF_DEFRAG_CMD_START_TIER:
- GLUSTERD_GET_TIER_PID_FILE(pidfile, volinfo, priv);
/* we check if its running and skip so that we dont get a
* failure during force start
*/
- if (gf_is_service_running (pidfile, &pid))
- goto out;
- ret = glusterd_tier_enable (volinfo, op_errstr);
- if (ret < 0)
- goto out;
- glusterd_store_perform_node_state_store (volinfo);
+ ret = dict_get_int32 (dict, "force", &is_force);
+ if (ret) {
+ gf_msg_debug (this->name, 0, "Unable to get is_force"
+ " from dict");
+ }
+ ret = dict_set_int32 (volinfo->dict, "force", is_force);
+ if (ret) {
+ gf_msg_debug (this->name, errno, "Unable to set"
+ " is_force to dict");
+ }
+
+ if (!is_force) {
+ if (gf_is_service_running (pidfile, &pid)) {
+ gf_asprintf (op_errstr, "Tier is already "
+ "enabled on volume %s." ,
+ volinfo->volname);
+ goto out;
+ }
+ }
+
break;
case GF_DEFRAG_CMD_STOP_TIER:
- ret = glusterd_tier_disable (volinfo, op_errstr);
- if (ret < 0)
+ if (!gf_is_service_running (pidfile, &pid)) {
+ gf_asprintf (op_errstr, "Tier is alreaady disabled on "
+ "volume %s.", volinfo->volname);
goto out;
+ }
break;
default:
gf_asprintf (op_errstr, "tier command failed. Invalid "
@@ -840,7 +776,8 @@ glusterd_op_tier_start_stop (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
}
- ret = glusterd_manage_tier (volinfo, cmd);
+ ret = volinfo->tierd.svc.manager (&(volinfo->tierd.svc),
+ volinfo, PROC_START_NO_WAIT);
if (ret)
goto out;
@@ -977,6 +914,19 @@ glusterd_op_stage_tier (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
"start validate failed");
goto out;
}
+ if (volinfo->tier.op == GD_OP_DETACH_TIER) {
+ snprintf (msg, sizeof (msg), "A detach tier task "
+ "exists for volume %s. Either commit it"
+ " or stop it before starting a new task.",
+ volinfo->volname);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OLD_REMOVE_BRICK_EXISTS,
+ "Earlier detach-tier"
+ " task exists for volume %s.",
+ volinfo->volname);
+ ret = -1;
+ goto out;
+ }
break;
case GF_DEFRAG_CMD_STOP_TIER:
diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c
index c75b378..a2876ae 100644
--- a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c
@@ -72,6 +72,8 @@ glusterd_tierdsvc_init (void *data)
notify = glusterd_svc_common_rpc_notify;
glusterd_store_perform_node_state_store (volinfo);
+ volinfo->type = GF_CLUSTER_TYPE_TIER;
+
glusterd_svc_build_tierd_rundir (volinfo, rundir, sizeof (rundir));
glusterd_svc_create_rundir (rundir);
@@ -150,6 +152,7 @@ glusterd_tierdsvc_manager (glusterd_svc_t *svc, void *data, int flags)
int ret = 0;
xlator_t *this = THIS;
glusterd_volinfo_t *volinfo = NULL;
+ int is_force = 0;
volinfo = data;
GF_VALIDATE_OR_GOTO (this->name, data, out);
@@ -169,25 +172,29 @@ glusterd_tierdsvc_manager (glusterd_svc_t *svc, void *data, int flags)
}
}
- ret = glusterd_is_tierd_enabled (volinfo);
- if (ret == -1) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_VOLINFO_GET_FAIL, "Failed to read volume "
- "options");
- goto out;
+ ret = dict_get_int32 (volinfo->dict, "force", &is_force);
+ if (ret) {
+ gf_msg_debug (this->name, errno, "Unable to get"
+ " is_force from dict");
}
+ if (is_force)
+ ret = 1;
+ else
+ ret = (glusterd_is_tierd_supposed_to_be_enabled (volinfo));
+
if (ret) {
if (!glusterd_is_volume_started (volinfo)) {
if (glusterd_proc_is_running (&svc->proc)) {
ret = svc->stop (svc, SIGTERM);
if (ret)
gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_TIERD_STOP_FAIL,
+ GD_MSG_SNAPD_STOP_FAIL,
"Couldn't stop tierd for "
"volume: %s",
volinfo->volname);
} else {
+ /* Since tierd is not running set ret to 0 */
ret = 0;
}
goto out;
@@ -209,6 +216,7 @@ glusterd_tierdsvc_manager (glusterd_svc_t *svc, void *data, int flags)
"tierd for volume: %s", volinfo->volname);
goto out;
}
+ volinfo->is_tier_enabled = _gf_true;
glusterd_volinfo_ref (volinfo);
ret = glusterd_conn_connect (&(svc->conn));
@@ -216,16 +224,19 @@ glusterd_tierdsvc_manager (glusterd_svc_t *svc, void *data, int flags)
glusterd_volinfo_unref (volinfo);
goto out;
}
-
- } else if (glusterd_proc_is_running (&svc->proc)) {
- ret = svc->stop (svc, SIGTERM);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_TIERD_STOP_FAIL,
- "Couldn't stop tierd for volume: %s",
- volinfo->volname);
- goto out;
+ } else {
+ if (glusterd_proc_is_running (&svc->proc)) {
+ ret = svc->stop (svc, SIGTERM);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TIERD_STOP_FAIL,
+ "Couldn't stop tierd for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
+ volinfo->is_tier_enabled = _gf_false;
}
+ ret = 0;
}
out:
@@ -361,7 +372,6 @@ out:
return ret;
}
-
int
glusterd_tierdsvc_restart ()
{
@@ -379,15 +389,18 @@ glusterd_tierdsvc_restart ()
cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
/* Start per volume tierd svc */
if (volinfo->status == GLUSTERD_STATUS_STARTED &&
- glusterd_is_tierd_enabled (volinfo)) {
+ volinfo->type == GF_CLUSTER_TYPE_TIER) {
svc = &(volinfo->tierd.svc);
- ret = svc->manager (svc, volinfo, PROC_START_NO_WAIT);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_TIERD_START_FAIL,
- "Couldn't restart tierd for "
- "vol: %s", volinfo->volname);
- goto out;
+ if (volinfo->tier.op != GD_OP_DETACH_TIER) {
+ ret = svc->manager (svc, volinfo,
+ PROC_START_NO_WAIT);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TIERD_START_FAIL,
+ "Couldn't restart tierd for "
+ "vol: %s", volinfo->volname);
+ goto out;
+ }
}
}
}
@@ -418,7 +431,7 @@ glusterd_tierdsvc_reconfigure (void *data)
this = THIS;
GF_VALIDATE_OR_GOTO (THIS->name, this, out);
- if (glusterd_is_tierd_enabled (volinfo))
+ if (!glusterd_is_tierd_enabled (volinfo))
goto manager;
/*
* Check both OLD and NEW volfiles, if they are SAME by size
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 23fc6e9..504e5af 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -11964,6 +11964,16 @@ glusterd_is_volume_inode_quota_enabled (glusterd_volinfo_t *volinfo)
}
int
+glusterd_is_tierd_supposed_to_be_enabled (glusterd_volinfo_t *volinfo)
+{
+ if ((volinfo->type != GF_CLUSTER_TYPE_TIER) ||
+ (volinfo->tier.op == GD_OP_DETACH_TIER))
+ return _gf_false;
+ else
+ return _gf_true;
+}
+
+int
glusterd_is_tierd_enabled (glusterd_volinfo_t *volinfo)
{
return volinfo->is_tier_enabled;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index b802f6c..abaec4b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -629,6 +629,9 @@ int
glusterd_is_tierd_enabled (glusterd_volinfo_t *volinfo);
int
+glusterd_is_tierd_supposed_to_be_enabled (glusterd_volinfo_t *volinfo);
+
+int
glusterd_is_volume_quota_enabled (glusterd_volinfo_t *volinfo);
int
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 222d5f4..de97e6a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -2757,6 +2757,16 @@ glusterd_stop_volume (glusterd_volinfo_t *volinfo)
}
}
+ /* call tier manager before the voluem status is set as stopped
+ * as tier uses that as a check in the manager
+ * */
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ svc = &(volinfo->tierd.svc);
+ ret = svc->manager (svc, volinfo, PROC_START_NO_WAIT);
+ if (ret)
+ goto out;
+ }
+
glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STOPPED);
ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
@@ -2774,13 +2784,6 @@ glusterd_stop_volume (glusterd_volinfo_t *volinfo)
goto out;
}
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- svc = &(volinfo->tierd.svc);
- ret = svc->manager (svc, volinfo, PROC_START_NO_WAIT);
- if (ret)
- goto out;
- }
-
ret = glusterd_svcs_manager (volinfo);
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, 0,
--
1.8.3.1

View File

@ -0,0 +1,292 @@
From dbf993f9c56c7ee995e9054b09c5552f88253bb3 Mon Sep 17 00:00:00 2001
From: Ashish Pandey <aspandey@redhat.com>
Date: Mon, 31 Jul 2017 12:45:21 +0530
Subject: [PATCH 100/128] cluster/ec: Improve heal info command to handle
obvious cases
Problem:
1 - If a brick is down and we see an index entry in
.glusterfs/indices, we should show it in heal info
output as it most certainly needs heal.
2 - The first problem is also not getting handled after
ec_heal_inspect. Even if in ec_heal_inspect, lookup will
mark need_heal as true, we don't handle it properly in
ec_get_heal_info and continue with locked inspect which
takes lot of time.
Solution:
1 - In first case we need not to do any further invstigation.
As soon as we see that a brick is down, we should say that
this index entry needs heal for sure.
2 - In second case, if we have need_heal as _gf_true after
ec_heal_inspect, we should show it as heal requires.
>Change-Id: Ibe7f9d7602cc0b382ba53bddaf75a2a2c3326aa6
>BUG: 1476668
>Signed-off-by: Ashish Pandey <aspandey@redhat.com>
Upstream Patch: https://review.gluster.org/#/c/17923/
BUG: 1509833
Change-Id: Ibe7f9d7602cc0b382ba53bddaf75a2a2c3326aa6
Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/125194
Tested-by: RHGS Build Bot <nigelb@redhat.com>
---
xlators/cluster/ec/src/ec-common.h | 2 +-
xlators/cluster/ec/src/ec-heal.c | 54 ++++++++++++++++++++++----------------
xlators/cluster/ec/src/ec-types.h | 9 +++++++
3 files changed, 41 insertions(+), 24 deletions(-)
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index 0f7a252..dec81ca 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -132,7 +132,7 @@ int32_t
ec_heal_inspect (call_frame_t *frame, ec_t *ec,
inode_t *inode, unsigned char *locked_on,
gf_boolean_t self_locked, gf_boolean_t thorough,
- gf_boolean_t *need_heal);
+ ec_heal_need_t *need_heal);
int32_t
ec_get_heal_info (xlator_t *this, loc_t *loc, dict_t **dict);
#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index bc25015..fd8c902 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -2421,9 +2421,9 @@ ec_heal_do (xlator_t *this, void *data, loc_t *loc, int32_t partial)
intptr_t bad = 0;
ec_fop_data_t *fop = data;
gf_boolean_t blocking = _gf_false;
- gf_boolean_t need_heal = _gf_false;
+ ec_heal_need_t need_heal = EC_HEAL_NONEED;
unsigned char *up_subvols = NULL;
- char up_bricks[32];
+ char up_bricks[32];
ec = this->private;
@@ -2470,7 +2470,8 @@ ec_heal_do (xlator_t *this, void *data, loc_t *loc, int32_t partial)
* triggers heals periodically which need not be thorough*/
ec_heal_inspect (frame, ec, loc->inode, up_subvols, _gf_false,
!ec->shd.iamshd, &need_heal);
- if (!need_heal) {
+
+ if (need_heal == EC_HEAL_NONEED) {
gf_msg (ec->xl->name, GF_LOG_DEBUG, 0,
EC_MSG_HEAL_FAIL, "Heal is not required for : %s ",
uuid_utoa(loc->gfid));
@@ -2776,18 +2777,18 @@ out:
static int32_t
_need_heal_calculate (ec_t *ec, uint64_t *dirty, unsigned char *sources,
gf_boolean_t self_locked, int32_t lock_count,
- gf_boolean_t *need_heal)
+ ec_heal_need_t *need_heal)
{
int i = 0;
int source_count = 0;
source_count = EC_COUNT (sources, ec->nodes);
if (source_count == ec->nodes) {
- *need_heal = _gf_false;
+ *need_heal = EC_HEAL_NONEED;
if (self_locked || lock_count == 0) {
for (i = 0; i < ec->nodes; i++) {
if (dirty[i]) {
- *need_heal = _gf_true;
+ *need_heal = EC_HEAL_MUST;
goto out;
}
}
@@ -2799,13 +2800,13 @@ _need_heal_calculate (ec_t *ec, uint64_t *dirty, unsigned char *sources,
* set and this indicates a problem in the
* inode.*/
if (dirty[i] > 1) {
- *need_heal = _gf_true;
+ *need_heal = EC_HEAL_MUST;
goto out;
}
}
}
} else {
- *need_heal = _gf_true;
+ *need_heal = EC_HEAL_MUST;
}
out:
@@ -2815,7 +2816,7 @@ out:
static int32_t
ec_need_metadata_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
int32_t lock_count, gf_boolean_t self_locked,
- gf_boolean_t thorough, gf_boolean_t *need_heal)
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
{
uint64_t *dirty = NULL;
unsigned char *sources = NULL;
@@ -2836,10 +2837,10 @@ ec_need_metadata_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
ret = _need_heal_calculate (ec, dirty, sources, self_locked, lock_count,
need_heal);
- if (ret == ec->nodes && !(*need_heal)) {
+ if (ret == ec->nodes && *need_heal == EC_HEAL_NONEED) {
for (i = 1; i < ec->nodes; i++) {
if (meta_versions[i] != meta_versions[0]) {
- *need_heal = _gf_true;
+ *need_heal = EC_HEAL_MUST;
goto out;
}
}
@@ -2851,7 +2852,7 @@ out:
static int32_t
ec_need_data_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
int32_t lock_count, gf_boolean_t self_locked,
- gf_boolean_t thorough, gf_boolean_t *need_heal)
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
{
uint64_t *dirty = NULL;
unsigned char *sources = NULL;
@@ -2888,7 +2889,7 @@ out:
static int32_t
ec_need_entry_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
int32_t lock_count, gf_boolean_t self_locked,
- gf_boolean_t thorough, gf_boolean_t *need_heal)
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
{
uint64_t *dirty = NULL;
unsigned char *sources = NULL;
@@ -2916,7 +2917,7 @@ out:
static int32_t
ec_need_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
int32_t lock_count, gf_boolean_t self_locked,
- gf_boolean_t thorough, gf_boolean_t *need_heal)
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
{
int ret = 0;
@@ -2926,7 +2927,7 @@ ec_need_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
if (ret < 0)
goto out;
- if (*need_heal)
+ if (*need_heal == EC_HEAL_MUST)
goto out;
if (inode->ia_type == IA_IFREG) {
@@ -2945,7 +2946,7 @@ int32_t
ec_heal_inspect (call_frame_t *frame, ec_t *ec,
inode_t *inode, unsigned char *locked_on,
gf_boolean_t self_locked, gf_boolean_t thorough,
- gf_boolean_t *need_heal)
+ ec_heal_need_t *need_heal)
{
loc_t loc = {0};
int i = 0;
@@ -2989,7 +2990,7 @@ ec_heal_inspect (call_frame_t *frame, ec_t *ec,
if (ret != ec->nodes) {
ret = ec->nodes;
- *need_heal = _gf_true;
+ *need_heal = EC_HEAL_MUST;
goto out;
}
@@ -3009,6 +3010,9 @@ need_heal:
ret = ec_need_heal (ec, inode, replies, lock_count,
self_locked, thorough, need_heal);
+ if (!self_locked && *need_heal == EC_HEAL_MUST) {
+ *need_heal = EC_HEAL_MAYBE;
+ }
out:
cluster_replies_wipe (replies, ec->nodes);
loc_wipe (&loc);
@@ -3020,7 +3024,7 @@ out:
int32_t
ec_heal_locked_inspect (call_frame_t *frame, ec_t *ec, inode_t *inode,
- gf_boolean_t *need_heal)
+ ec_heal_need_t *need_heal)
{
unsigned char *locked_on = NULL;
unsigned char *up_subvols = NULL;
@@ -3038,7 +3042,7 @@ ec_heal_locked_inspect (call_frame_t *frame, ec_t *ec, inode_t *inode,
replies, locked_on, frame, ec->xl,
ec->xl->name, inode, 0, 0);
if (ret != ec->nodes) {
- *need_heal = _gf_true;
+ *need_heal = EC_HEAL_MUST;
goto unlock;
}
ret = ec_heal_inspect (frame, ec, inode, locked_on, _gf_true, _gf_true,
@@ -3055,7 +3059,7 @@ int32_t
ec_get_heal_info (xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)
{
int ret = -ENOMEM;
- gf_boolean_t need_heal = _gf_false;
+ ec_heal_need_t need_heal = EC_HEAL_NONEED;
call_frame_t *frame = NULL;
ec_t *ec = NULL;
unsigned char *up_subvols = NULL;
@@ -3068,6 +3072,10 @@ ec_get_heal_info (xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)
up_subvols = alloca0(ec->nodes);
ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes);
+ if (EC_COUNT (up_subvols, ec->nodes) != ec->nodes) {
+ need_heal = EC_HEAL_MUST;
+ goto set_heal;
+ }
frame = create_frame (this, this->ctx->pool);
if (!frame) {
goto out;
@@ -3092,16 +3100,16 @@ ec_get_heal_info (xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)
ret = ec_heal_inspect (frame, ec, loc.inode, up_subvols, _gf_false,
_gf_false, &need_heal);
- if (ret == ec->nodes && !need_heal) {
+ if (ret == ec->nodes && need_heal == EC_HEAL_NONEED) {
goto set_heal;
}
- need_heal = _gf_false;
+ need_heal = EC_HEAL_NONEED;
ret = ec_heal_locked_inspect (frame, ec, loc.inode,
&need_heal);
if (ret < 0)
goto out;
set_heal:
- if (need_heal) {
+ if (need_heal == EC_HEAL_MUST) {
ret = ec_set_heal_info (dict_rsp, "heal");
} else {
ret = ec_set_heal_info (dict_rsp, "no-heal");
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 354b4ed..a891ff5 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -17,6 +17,9 @@
#define EC_GF_MAX_REGS 16
+enum _ec_heal_need;
+typedef enum _ec_heal_need ec_heal_need_t;
+
enum _ec_read_policy;
typedef enum _ec_read_policy ec_read_policy_t;
@@ -115,6 +118,12 @@ enum _ec_read_policy {
EC_READ_POLICY_MAX
};
+enum _ec_heal_need {
+ EC_HEAL_NONEED,
+ EC_HEAL_MAYBE,
+ EC_HEAL_MUST
+};
+
struct _ec_config {
uint32_t version;
uint8_t algorithm;
--
1.8.3.1

View File

@ -0,0 +1,188 @@
From a4f3087ecbd1979525add83a149acaf2443d8e59 Mon Sep 17 00:00:00 2001
From: Xavier Hernandez <jahernan@redhat.com>
Date: Wed, 22 Nov 2017 11:10:32 +0100
Subject: [PATCH 101/128] cluster/ec: Prevent self-heal to work after
PARENT_DOWN
When the volume is being stopped, PARENT_DOWN event is received.
This instructs EC to wait until all pending operations are completed
before declaring itself down. However heal operations are ignored
and allowed to continue even after having said it was down.
This may cause unexpected results and crashes.
To solve this, heal operations are considered exactly equal as any
other operation and EC won't propagate PARENT_DOWN until all
operations, including healing, are complete. To avoid big delays
if this happens in the middle of a big heal, a check has been
added to quit current heal if shutdown is detected.
>Change-Id: I26645e236ebd115eb22c7ad4972461111a2d2034
>BUG: 1515266
>Signed-off-by: Xavier Hernandez <jahernan@redhat.com>
Upstream Patch: https://review.gluster.org/#/c/18840/
BUG: 1505570
Change-Id: I26645e236ebd115eb22c7ad4972461111a2d2034
Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/125199
Tested-by: RHGS Build Bot <nigelb@redhat.com>
---
xlators/cluster/ec/src/ec-data.c | 21 ++------------
xlators/cluster/ec/src/ec-heal.c | 59 +++++++++++++++++++++++++++++++++-------
2 files changed, 52 insertions(+), 28 deletions(-)
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
index 28bf988..54c708a 100644
--- a/xlators/cluster/ec/src/ec-data.c
+++ b/xlators/cluster/ec/src/ec-data.c
@@ -103,19 +103,6 @@ void ec_cbk_data_destroy(ec_cbk_data_t * cbk)
mem_put(cbk);
}
-/* PARENT_DOWN will be notified to children only after these fops are complete
- * when graph switch happens. We do not want graph switch to be waiting on
- * heal to complete as healing big file/directory could take a while. Which
- * will lead to hang on the mount.
- */
-static gf_boolean_t
-ec_needs_graceful_completion (ec_fop_data_t *fop)
-{
- if ((fop->id != EC_FOP_HEAL) && (fop->id != EC_FOP_FHEAL))
- return _gf_true;
- return _gf_false;
-}
-
ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
int32_t id, uint32_t flags,
uintptr_t target, int32_t minimum,
@@ -202,13 +189,11 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
fop->parent = parent;
}
- if (ec_needs_graceful_completion (fop)) {
- LOCK(&ec->lock);
+ LOCK(&ec->lock);
- list_add_tail(&fop->pending_list, &ec->pending_fops);
+ list_add_tail(&fop->pending_list, &ec->pending_fops);
- UNLOCK(&ec->lock);
- }
+ UNLOCK(&ec->lock);
return fop;
}
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index fd8c902..b8518d6 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -1418,6 +1418,12 @@ ec_name_heal_handler (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
int i = 0;
int ret = 0;
+ if (ec->shutdown) {
+ gf_msg_debug(this->name, 0, "Cancelling directory heal "
+ "because EC is stopping.");
+ return -ENOTCONN;
+ }
+
memcpy (name_on, name_data->participants, ec->nodes);
ret = ec_heal_name (name_data->frame, ec, parent->inode,
entry->d_name, name_on);
@@ -1439,6 +1445,7 @@ ec_heal_names (call_frame_t *frame, ec_t *ec, inode_t *inode,
int j = 0;
loc_t loc = {0};
struct ec_name_data name_data = {0};
+ int ret = 0;
loc.inode = inode_ref (inode);
gf_uuid_copy (loc.gfid, inode->gfid);
@@ -1449,18 +1456,23 @@ ec_heal_names (call_frame_t *frame, ec_t *ec, inode_t *inode,
for (i = 0; i < ec->nodes; i++) {
if (!participants[i])
continue;
- syncop_dir_scan (ec->xl_list[i], &loc,
- GF_CLIENT_PID_SELF_HEALD, &name_data,
- ec_name_heal_handler);
+ ret = syncop_dir_scan (ec->xl_list[i], &loc,
+ GF_CLIENT_PID_SELF_HEALD, &name_data,
+ ec_name_heal_handler);
+ if (ret < 0) {
+ break;
+ }
for (j = 0; j < ec->nodes; j++)
if (name_data.failed_on[j])
participants[j] = 0;
- if (EC_COUNT (participants, ec->nodes) <= ec->fragments)
- return -ENOTCONN;
+ if (EC_COUNT (participants, ec->nodes) <= ec->fragments) {
+ ret = -ENOTCONN;
+ break;
+ }
}
loc_wipe (&loc);
- return 0;
+ return ret;
}
int
@@ -1999,6 +2011,17 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
for (heal->offset = 0; (heal->offset < size) && !heal->done;
heal->offset += heal->size) {
+ /* We immediately abort any heal if a shutdown request has been
+ * received to avoid delays. The healing of this file will be
+ * restarted by another SHD or other client that accesses the
+ * file. */
+ if (ec->shutdown) {
+ gf_msg_debug(ec->xl->name, 0, "Cancelling heal because "
+ "EC is stopping.");
+ ret = -ENOTCONN;
+ break;
+ }
+
gf_msg_debug (ec->xl->name, 0, "%s: sources: %d, sinks: "
"%d, offset: %"PRIu64" bsize: %"PRIu64,
uuid_utoa (fd->inode->gfid),
@@ -2595,16 +2618,32 @@ ec_handle_healers_done (ec_fop_data_t *fop)
return;
LOCK (&ec->lock);
- {
- list_del_init (&fop->healer);
+
+ list_del_init (&fop->healer);
+
+ do {
ec->healers--;
heal_fop = __ec_dequeue_heals (ec);
- }
+
+ if ((heal_fop != NULL) && ec->shutdown) {
+ /* This will prevent ec_handle_healers_done() to be
+ * called recursively. That would be problematic if
+ * the queue is too big. */
+ list_del_init(&heal_fop->healer);
+
+ UNLOCK(&ec->lock);
+
+ ec_fop_set_error(fop, ENOTCONN);
+ ec_heal_fail(ec, heal_fop);
+
+ LOCK(&ec->lock);
+ }
+ } while ((heal_fop != NULL) && ec->shutdown);
+
UNLOCK (&ec->lock);
if (heal_fop)
ec_launch_heal (ec, heal_fop);
-
}
void
--
1.8.3.1

View File

@ -0,0 +1,261 @@
From b72f9f0d7da441db7e144f58459d98aa4838d032 Mon Sep 17 00:00:00 2001
From: Csaba Henk <csaba@redhat.com>
Date: Fri, 15 Dec 2017 08:02:30 +0100
Subject: [PATCH 102/128] libglusterfs: fix the call_stack_set_group() function
- call_stack_set_group() will take the ownership of passed
buffer from caller;
- to indicate the change, its signature is changed from
including the buffer directly to take a pointer to it;
- either the content of the buffer is copied to the
groups_small embedded buffer of the call stack, or
the buffer is set as groups_large member of the call
stack;
- the groups member of the call stack is set to,
respectively, groups_small or groups_large, according
to the memory management conventions of the call stack;
- the buffer address is overwritten with junk to effectively
prevent the caller from using it further on.
Also move call_stack_set_group to stack.c from stack.h
to prevent "defined but not used [-Wunused-function]"
warnings (not using it anymore in call_stack_alloc_group()
implementation, which saved us from this so far).
protocol/server: refactor gid_resolve()
In gid_resolve there are two cases:
either the gid_cache_lookup() call returns
a value or not. The result is caputured in
the agl variable, and throughout the function,
each particular stage of the implementation
comes with an agl and a no-agl variant.
In most cases this is explicitly indicated
via an
if (agl) {
...
} else {
...
}
but some of this branching are expressed via
goto constructs (obfuscating the fact we stated
above, that is, each particular stage having
an agl/no-agl variant).
In the current refactor, we bring the agl
conditional to the top, and present the
agl/non-agl implementations sequentially.
Also we take the opportunity to clean up and
fix the agl case:
- remove the spurious
gl.gl_list = agl->gl_list;
setting, as gl is not used in the agl caae
- populate the group list of call stack from
agl, fixing thus referred BUG.
Also fixes BUG: 1513920
> Change-Id: I61f4574ba21969f7661b9ff0c9dce202b874025d
> BUG: 1513928
> Signed-off-by: Csaba Henk <csaba@redhat.com>
> Reviewed-on: https://review.gluster.org/18789
Change-Id: I61f4574ba21969f7661b9ff0c9dce202b874025d
BUG: 1512963
Signed-off-by: Csaba Henk <csaba@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/125931
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
libglusterfs/src/stack.c | 20 +++++++++
libglusterfs/src/stack.h | 14 +++---
xlators/mount/fuse/src/fuse-helpers.c | 2 +-
xlators/protocol/server/src/server-helpers.c | 65 +++++++++++++---------------
4 files changed, 57 insertions(+), 44 deletions(-)
diff --git a/libglusterfs/src/stack.c b/libglusterfs/src/stack.c
index 6977814..d64ac8a 100644
--- a/libglusterfs/src/stack.c
+++ b/libglusterfs/src/stack.c
@@ -65,6 +65,26 @@ create_frame (xlator_t *xl, call_pool_t *pool)
}
void
+call_stack_set_groups (call_stack_t *stack, int ngrps, gid_t **groupbuf_p)
+{
+ /* We take the ownership of the passed group buffer. */
+
+ if (ngrps <= SMALL_GROUP_COUNT) {
+ memcpy (stack->groups_small, *groupbuf_p,
+ sizeof (gid_t) * ngrps);
+ stack->groups = stack->groups_small;
+ GF_FREE (*groupbuf_p);
+ } else {
+ stack->groups_large = *groupbuf_p;
+ stack->groups = stack->groups_large;
+ }
+
+ stack->ngrps = ngrps;
+ /* Set a canary. */
+ *groupbuf_p = (void *)0xdeadf00d;
+}
+
+void
gf_proc_dump_call_frame (call_frame_t *call_frame, const char *key_buf,...)
{
diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/stack.h
index eb5848e..50a6fc7 100644
--- a/libglusterfs/src/stack.h
+++ b/libglusterfs/src/stack.h
@@ -357,26 +357,21 @@ STACK_RESET (call_stack_t *stack)
} while (0)
-static void
-call_stack_set_groups (call_stack_t *stack, int ngrps, gid_t *groupbuf)
-{
- stack->groups = groupbuf;
- stack->ngrps = ngrps;
-}
-
static inline int
call_stack_alloc_groups (call_stack_t *stack, int ngrps)
{
if (ngrps <= SMALL_GROUP_COUNT) {
- call_stack_set_groups (stack, ngrps, stack->groups_small);
+ stack->groups = stack->groups_small;
} else {
stack->groups_large = GF_CALLOC (ngrps, sizeof (gid_t),
gf_common_mt_groups_t);
if (!stack->groups_large)
return -1;
- call_stack_set_groups (stack, ngrps, stack->groups_large);
+ stack->groups = stack->groups_large;
}
+ stack->ngrps = ngrps;
+
return 0;
}
@@ -465,6 +460,7 @@ copy_frame (call_frame_t *frame)
return newframe;
}
+void call_stack_set_groups (call_stack_t *stack, int ngrps, gid_t **groupbuf_p);
void gf_proc_dump_pending_frames(call_pool_t *call_pool);
void gf_proc_dump_pending_frames_to_dict (call_pool_t *call_pool,
dict_t *dict);
diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c
index 3fc6b16..c59ff77 100644
--- a/xlators/mount/fuse/src/fuse-helpers.c
+++ b/xlators/mount/fuse/src/fuse-helpers.c
@@ -181,7 +181,7 @@ frame_fill_groups (call_frame_t *frame)
return;
}
- call_stack_set_groups (frame->root, ngroups, mygroups);
+ call_stack_set_groups (frame->root, ngroups, &mygroups);
} else {
ret = snprintf (filename, sizeof filename, "/proc/%d/status",
frame->root->pid);
diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c
index 51eb491..c8f5382 100644
--- a/xlators/protocol/server/src/server-helpers.c
+++ b/xlators/protocol/server/src/server-helpers.c
@@ -31,13 +31,24 @@ gid_resolve (server_conf_t *conf, call_stack_t *root)
struct passwd *result;
gid_t *mygroups;
gid_list_t gl;
- const gid_list_t *agl;
int ngroups;
+ const gid_list_t *agl;
agl = gid_cache_lookup (&conf->gid_cache, root->uid, 0, 0);
if (agl) {
root->ngrps = agl->gl_count;
- goto fill_groups;
+
+ if (root->ngrps > 0) {
+ ret = call_stack_alloc_groups (root, agl->gl_count);
+ if (ret == 0) {
+ memcpy (root->groups, agl->gl_list,
+ sizeof (gid_t) * agl->gl_count);
+ }
+ }
+
+ gid_cache_release (&conf->gid_cache, agl);
+
+ return ret;
}
ret = getpwuid_r (root->uid, &mypw, mystrs, sizeof(mystrs), &result);
@@ -66,42 +77,28 @@ gid_resolve (server_conf_t *conf, call_stack_t *root)
}
root->ngrps = (uint16_t) ngroups;
-fill_groups:
- if (agl) {
- /* the gl is not complete, we only use gl.gl_list later on */
- gl.gl_list = agl->gl_list;
- } else {
- /* setup a full gid_list_t to add it to the gid_cache */
- gl.gl_id = root->uid;
- gl.gl_uid = root->uid;
- gl.gl_gid = root->gid;
- gl.gl_count = root->ngrps;
-
- gl.gl_list = GF_MALLOC (root->ngrps * sizeof(gid_t),
- gf_common_mt_groups_t);
- if (gl.gl_list)
- memcpy (gl.gl_list, mygroups,
- sizeof(gid_t) * root->ngrps);
- else {
- GF_FREE (mygroups);
- return -1;
- }
+ /* setup a full gid_list_t to add it to the gid_cache */
+ gl.gl_id = root->uid;
+ gl.gl_uid = root->uid;
+ gl.gl_gid = root->gid;
+ gl.gl_count = root->ngrps;
+
+ gl.gl_list = GF_MALLOC (root->ngrps * sizeof(gid_t),
+ gf_common_mt_groups_t);
+ if (gl.gl_list)
+ memcpy (gl.gl_list, mygroups,
+ sizeof(gid_t) * root->ngrps);
+ else {
+ GF_FREE (mygroups);
+ return -1;
}
- if (root->ngrps == 0) {
- ret = 0;
- goto out;
+ if (root->ngrps > 0) {
+ call_stack_set_groups (root, root->ngrps, &mygroups);
}
- call_stack_set_groups (root, root->ngrps, mygroups);
-
-out:
- if (agl) {
- gid_cache_release (&conf->gid_cache, agl);
- } else {
- if (gid_cache_add (&conf->gid_cache, &gl) != 1)
- GF_FREE (gl.gl_list);
- }
+ if (gid_cache_add (&conf->gid_cache, &gl) != 1)
+ GF_FREE (gl.gl_list);
return ret;
}
--
1.8.3.1

View File

@ -0,0 +1,116 @@
From 09c73d2c7793d2316c62837a80b41904a2a5b15f Mon Sep 17 00:00:00 2001
From: Xavier Hernandez <jahernan@redhat.com>
Date: Mon, 20 Nov 2017 10:51:09 +0100
Subject: [PATCH 103/128] features/locks: Fix memory leaks
> Upstream patch: https://review.gluster.org/18812
Change-Id: Ic1d2e17a7d14389b6734d1b88bd28c0a2907bbd6
BUG: 1507361
Signed-off-by: Xavier Hernandez <jahernan@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/125955
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/features/locks/src/clear.c | 2 +-
xlators/features/locks/src/common.c | 1 +
xlators/features/locks/src/entrylk.c | 3 ++-
xlators/features/locks/src/inodelk.c | 7 ++++---
xlators/features/locks/src/posix.c | 3 +++
5 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
index d7c210f..22c03b5 100644
--- a/xlators/features/locks/src/clear.c
+++ b/xlators/features/locks/src/clear.c
@@ -184,7 +184,7 @@ clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
} else {
gcount++;
}
- GF_FREE (plock);
+ __destroy_lock(plock);
}
}
pthread_mutex_unlock (&pl_inode->mutex);
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index 4b63c43..7311126 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -438,6 +438,7 @@ pl_inode_get (xlator_t *this, inode_t *inode)
ret = __inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
if (ret) {
+ pthread_mutex_destroy (&pl_inode->mutex);
GF_FREE (pl_inode);
pl_inode = NULL;
goto unlock;
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 4231d76..8e9008b 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -784,7 +784,7 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this,
break;
default:
- inode_unref (pinode->inode);
+ need_inode_unref = _gf_true;
gf_log (this->name, GF_LOG_ERROR,
"Unexpected case in entrylk (cmd=%d). Please file"
"a bug report at http://bugs.gluster.com", cmd);
@@ -960,6 +960,7 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx)
__pl_entrylk_unref (l);
}
pthread_mutex_unlock (&pinode->mutex);
+
inode_unref (pinode->inode);
}
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index e1702c7..4aebac6 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -721,9 +721,6 @@ out:
if (ctx)
pthread_mutex_unlock (&ctx->lock);
- if (need_inode_unref)
- inode_unref (pl_inode->inode);
-
/* The following (extra) unref corresponds to the ref that
* was done at the time the lock was granted.
*/
@@ -732,6 +729,10 @@ out:
grant_blocked_inode_locks (this, pl_inode, dom);
}
+ if (need_inode_unref) {
+ inode_unref (pl_inode->inode);
+ }
+
return ret;
}
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index ff9a3da..a158227 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -2472,6 +2472,7 @@ pl_forget (xlator_t *this,
list) {
list_del (&rw_req->list);
+ call_stub_destroy(rw_req->stub);
GF_FREE (rw_req);
}
}
@@ -2555,6 +2556,8 @@ pl_forget (xlator_t *this,
}
+ pthread_mutex_destroy(&pl_inode->mutex);
+
GF_FREE (pl_inode);
return 0;
--
1.8.3.1

View File

@ -0,0 +1,61 @@
From ea71f09cd6ea45a2d1525519843fc553f0e46bec Mon Sep 17 00:00:00 2001
From: Zhang Huan <zhanghuan@open-fs.com>
Date: Tue, 5 Sep 2017 11:36:25 +0800
Subject: [PATCH 104/128] cluster/dht: fix crash when deleting directories
In DHT, after locks on all subvolumes are acquired, it would perform the
following steps sequentially,
1. send remove dir on all other subvolumes except the hashed one in a loop;
2. wait for all pending rmdir to be done
3. remove dir on the hashed subvolume
The problem is that in step 1 there is a check to skip hashed subvolume
in the loop. If the last subvolume to check is actually the
hashed one, and step 3 is quickly done before the last and hashed
subvolume is checked, by accessing shared context data be destroyed in
step 3, would cause a crash.
Fix by saving shared data in a local variable to access later in the
loop.
> BUG: 1490642
> Signed-off-by: Zhang Huan <zhanghuan@open-fs.com>
(cherry picked from commit 206120126d455417a81a48ae473d49be337e9463)
Change-Id: I8db7cf7cb262d74efcb58eb00f02ea37df4be4e2
BUG: 1519076
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124755
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
---
xlators/cluster/dht/src/dht-common.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index c6944b2..f611278 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -8094,6 +8094,7 @@ dht_rmdir_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
int i = 0;
+ xlator_t *hashed_subvol;
VALIDATE_OR_GOTO (this->private, err);
@@ -8111,9 +8112,10 @@ dht_rmdir_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto err;
}
+ hashed_subvol = local->hashed_subvol;
for (i = 0; i < conf->subvolume_cnt; i++) {
- if (local->hashed_subvol &&
- (local->hashed_subvol == conf->subvolumes[i]))
+ if (hashed_subvol &&
+ (hashed_subvol == conf->subvolumes[i]))
continue;
STACK_WIND_COOKIE (frame, dht_rmdir_cbk, conf->subvolumes[i],
--
1.8.3.1

View File

@ -0,0 +1,105 @@
From ad7ea067e2f7f9e7fb533ddf67e9c1f3c70e222f Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawa@redhat.com>
Date: Thu, 7 Dec 2017 10:32:05 +0530
Subject: [PATCH 105/128] glusterd : Fix glusterd mem leaks
Problem: glusterd eats a huge amount of meory during volume set/stop/start.
Solution: At the time of compare graph topology create a graph and populate
key values in the dictionary, after finished graph comparison we
do destroy the new graph.At the time of construct graph we don't take
any reference and for server xlators we do take reference in
server_setvolume so in glusterd we do take reference after prepare
a new graph while we do create a graph to compare graph topology.
> BUG: 1520245
> Change-Id: I573133d57771b7dc431a04422c5001a06b7dda9a
> Reviewed on https://review.gluster.org/#/c/18915/
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
> (cherry pick from commit e016bcaf8171373cbc327faf42a6b2f2c5449b0e)
BUG: 1512470
Change-Id: Id9aa37146f3ae887f4d06492edad6dedcafc6681
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/126229
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Tested-by: RHGS Build Bot <nigelb@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-handshake.c | 3 +++
xlators/mgmt/glusterd/src/glusterd-utils.c | 34 ++++++++++++++++++++++++++
2 files changed, 37 insertions(+)
diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
index 8dfb528..35aeca3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
@@ -1256,6 +1256,9 @@ out:
if (rsp.hndsk.hndsk_val)
GF_FREE (rsp.hndsk.hndsk_val);
+ if (args_dict)
+ dict_unref (args_dict);
+
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 504e5af..1434d64 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -8971,6 +8971,36 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo,
return ret;
}
+/*
+ The function is required to take dict ref for every xlator at graph.
+ At the time of compare graph topology create a graph and populate
+ key values in the dictionary, after finished graph comparison we do destroy
+ the new graph.At the time of construct graph we don't take any reference
+ so to avoid leak due to ref counter underflow we need to call dict_ref here.
+
+*/
+
+void
+glusterd_graph_take_reference (xlator_t *tree)
+{ xlator_t *trav = tree;
+ xlator_t *prev = tree;
+
+ if (!tree) {
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND,
+ "Translator tree not found");
+ return;
+ }
+
+ while (prev) {
+ trav = prev->next;
+ if (prev->options)
+ dict_ref (prev->options);
+ prev = trav;
+ }
+ return;
+}
+
+
int
glusterd_check_topology_identical (const char *filename1,
@@ -9018,11 +9048,15 @@ glusterd_check_topology_identical (const char *filename1,
if (grph1 == NULL)
goto out;
+ glusterd_graph_take_reference (grph1->first);
+
/* create the graph for filename2 */
grph2 = glusterfs_graph_construct(fp2);
if (grph2 == NULL)
goto out;
+ glusterd_graph_take_reference (grph2->first);
+
/* compare the graph topology */
*identical = is_graph_topology_equal(grph1, grph2);
ret = 0; /* SUCCESS */
--
1.8.3.1

View File

@ -0,0 +1,42 @@
From 86db0fb0570a0a0f2b55280c9d1be581f16ab312 Mon Sep 17 00:00:00 2001
From: moagrawa <moagrawa@redhat.com>
Date: Fri, 15 Dec 2017 17:21:59 +0530
Subject: [PATCH 106/128] glusterd: Free up svc->conn on volume delete
Daemons snapd/tierd is maintained on per volume basis and on a volume
delete we should destroy the rpc connection established for the same.
Change-Id: Id1440e39da07b990fdb9b207df18da04b1ca8014
> BUG: 1522775
> Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
> Reviewed on https://review.gluster.org/18957
> (Cherry pick from commit 36ce4c614a3391043a3417aa061d0aa16e60b2d3)
BUG: 1512470
Signed-off-by: moagrawa <moagrawa@redhat.com>
Change-Id: I21426893bc1c326bf51cb83a68b0ae3c31ea5b9a
Reviewed-on: https://code.engineering.redhat.com/gerrit/126230
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-utils.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 1434d64..4b53898 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -1029,6 +1029,10 @@ glusterd_volinfo_delete (glusterd_volinfo_t *volinfo)
if (volinfo->rebal.dict)
dict_unref (volinfo->rebal.dict);
+ /* Destroy the connection object for per volume svc daemons */
+ glusterd_conn_term (&volinfo->snapd.svc.conn);
+ glusterd_conn_term (&volinfo->tierd.svc.conn);
+
gf_store_handle_destroy (volinfo->quota_conf_shandle);
gf_store_handle_destroy (volinfo->shandle);
gf_store_handle_destroy (volinfo->node_state_shandle);
--
1.8.3.1

View File

@ -0,0 +1,165 @@
From d2d4750a70b30174a52f63a39b174f48ce0879e9 Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Mon, 11 Dec 2017 16:14:03 +0530
Subject: [PATCH 107/128] feature/bitrot: remove internal xattrs from lookup
cbk
Backport of https://review.gluster.org/#/c/19021/
Problem:
afr requests all xattrs in lookup via the list-xattr key. If bitrot is
enabled and later disabled, or if the bitrot xattrs were present due to
an older version of bitrot which used to create the xattrs without
enabling the feature, the xattrs (trusted.bit-rot.version in particular)
was not getting filtered and ended up reaching the client stack. AFR, on
noticing different values of the xattr across bricks of the replica,
started triggering spurious metadata heals.
Fix:
Filter all internal xattrs in bitrot xlator before unwinding lookup,
(f)getxattr.
Thanks to Kotresh for the help in RCA'ing.
Change-Id: I5bc70e4b901359c3daefc67b8e4fa6ddb47f046c
BUG: 1519740
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/126154
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/features/bit-rot/src/stub/bit-rot-stub.c | 23 ++++++++++++++++-------
xlators/features/bit-rot/src/stub/bit-rot-stub.h | 5 +++++
2 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index fb187a3..4be7caa 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -1585,6 +1585,7 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t ret = 0;
size_t totallen = 0;
size_t signaturelen = 0;
+ br_stub_private_t *priv = NULL;
br_version_t *obuf = NULL;
br_signature_t *sbuf = NULL;
br_isignature_out_t *sign = NULL;
@@ -1592,9 +1593,15 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
br_stub_local_t *local = NULL;
inode_t *inode = NULL;
gf_boolean_t bad_object = _gf_false;
+ gf_boolean_t ver_enabled = _gf_false;
+
+ BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
+ priv = this->private;
if (op_ret < 0)
goto unwind;
+ BR_STUB_VER_COND_GOTO (priv, (!ver_enabled), delkeys);
+
if (cookie != (void *) BR_STUB_REQUEST_COOKIE)
goto unwind;
@@ -1740,8 +1747,7 @@ br_stub_getxattr (call_frame_t *frame, xlator_t *this,
goto unwind;
priv = this->private;
- if (!priv->do_versioning)
- goto wind;
+ BR_STUB_VER_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
/**
* If xattr is node-uuid and the inode is marked bad, return EIO.
@@ -1762,6 +1768,7 @@ br_stub_getxattr (call_frame_t *frame, xlator_t *this,
strlen (GLUSTERFS_GET_BR_STUB_INIT_TIME)) == 0)
&& ((gf_uuid_compare (loc->gfid, rootgfid) == 0)
|| (gf_uuid_compare (loc->inode->gfid, rootgfid) == 0))) {
+ BR_STUB_RESET_LOCAL_NULL (frame);
br_stub_send_stub_init_time (frame, this);
return 0;
}
@@ -1792,6 +1799,7 @@ br_stub_getxattr (call_frame_t *frame, xlator_t *this,
FIRST_CHILD (this)->fops->getxattr, loc, name, xdata);
return 0;
unwind:
+ BR_STUB_RESET_LOCAL_NULL (frame);
STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, NULL, NULL);
return 0;
}
@@ -1809,6 +1817,7 @@ br_stub_fgetxattr (call_frame_t *frame, xlator_t *this,
br_stub_private_t *priv = NULL;
rootgfid[15] = 1;
+ priv = this->private;
if (!name) {
cbk = br_stub_listxattr_cbk;
@@ -1818,9 +1827,7 @@ br_stub_fgetxattr (call_frame_t *frame, xlator_t *this,
if (br_stub_is_internal_xattr (name))
goto unwind;
- priv = this->private;
- if (!priv->do_versioning)
- goto wind;
+ BR_STUB_VER_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
/**
* If xattr is node-uuid and the inode is marked bad, return EIO.
@@ -1840,6 +1847,7 @@ br_stub_fgetxattr (call_frame_t *frame, xlator_t *this,
&& (strncmp (name, GLUSTERFS_GET_BR_STUB_INIT_TIME,
strlen (GLUSTERFS_GET_BR_STUB_INIT_TIME)) == 0)
&& (gf_uuid_compare (fd->inode->gfid, rootgfid) == 0)) {
+ BR_STUB_RESET_LOCAL_NULL (frame);
br_stub_send_stub_init_time (frame, this);
return 0;
}
@@ -1870,6 +1878,7 @@ br_stub_fgetxattr (call_frame_t *frame, xlator_t *this,
FIRST_CHILD (this)->fops->fgetxattr, fd, name, xdata);
return 0;
unwind:
+ BR_STUB_RESET_LOCAL_NULL (frame);
STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, NULL, NULL);
return 0;
}
@@ -2867,13 +2876,14 @@ br_stub_lookup_cbk (call_frame_t *frame, void *cookie,
BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
priv = this->private;
- BR_STUB_VER_COND_GOTO (priv, (!ver_enabled), unwind);
if (op_ret < 0) {
(void) br_stub_handle_lookup_error (this, inode, op_errno);
goto unwind;
}
+ BR_STUB_VER_COND_GOTO (priv, (!ver_enabled), delkey);
+
if (!IA_ISREG (stbuf->ia_type))
goto unwind;
@@ -2892,7 +2902,6 @@ br_stub_lookup_cbk (call_frame_t *frame, void *cookie,
op_errno = EIO;
goto unwind;
}
-
goto delkey;
}
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
index 433fa68..8f1b185 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
@@ -54,6 +54,11 @@
frame->local = NULL; \
} while (0)
+#define BR_STUB_RESET_LOCAL_NULL(frame) do { \
+ if (frame->local == (void *)0x1) \
+ frame->local = NULL; \
+ } while (0)
+
typedef int (br_stub_version_cbk) (call_frame_t *, void *,
xlator_t *, int32_t, int32_t, dict_t *);
--
1.8.3.1

View File

@ -0,0 +1,89 @@
From b1a9673560ec20df2c7944fce55a64f7b5913f77 Mon Sep 17 00:00:00 2001
From: Raghavendra G <rgowdapp@redhat.com>
Date: Tue, 7 Nov 2017 16:09:37 +0530
Subject: [PATCH 108/128] mount/fuse: use fstat in getattr implementation if
any opened fd is available
The restriction of using fds opened by the same Pid means fds cannot
be shared across threads of multithreaded application. Note that fops
from kernel have different Pid for different threads. Imagine
following sequence of operations:
* Turn off performance.open-behind
* Thread t1 opens an fd - fd1 - on file "file". Let's assume nodeid of
"file" is "nodeid-file".
* Thread t2 does RENAME ("newfile", "file"). Let's assume nodeid of
"newfile" as "nodeid-newfile".
* t2 proceeds to do fstat (fd1)
The above set of operations can sometimes result in ESTALE/ENOENT
errors. RENAME overwrites "file" with "newfile" changing its nodeid
from "nodeid-file" to "nodeid-newfile" and post RENAME, "nodeid-file" is
removed from the backend. If fstat carries nodeid-file as argument,
which can happen if lookup has not refreshed the nodeid of "file" and
since t2 doesn't have an fd opened, fuse_getattr_resume uses STAT
which will fail as "nodeid-file" no longer exists.
Since the above set of operations and sharing of fds across
multiple threads are valid, this is a bug.
The fix is to use any fd opened on the inode. In this specific example
fuse_getattr_resume will find fd1 and winds down the call as fstat
(fd1) which won't fail.
Cross-checked with "Miklos Szeredi" <mszeredi.at.redhat.dot.com> for
any security issues with this solution and he approves the solution.
Thanks to "Miklos Szeredi" <mszeredi.at.redhat.dot.com> for all the
pointers and discussions.
Change-Id: I88dd29b3607cd2594eee9d72a1637b5346c8d49c
BUG: 1492591
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
Upstream patch: https://review.gluster.org/#/c/18681/
Reviewed-on: https://code.engineering.redhat.com/gerrit/126511
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
tests/bugs/replicate/bug-913051.t | 11 -----------
xlators/mount/fuse/src/fuse-bridge.c | 2 +-
2 files changed, 1 insertion(+), 12 deletions(-)
diff --git a/tests/bugs/replicate/bug-913051.t b/tests/bugs/replicate/bug-913051.t
index 43d1330..6794995 100644
--- a/tests/bugs/replicate/bug-913051.t
+++ b/tests/bugs/replicate/bug-913051.t
@@ -37,17 +37,6 @@ TEST fd_open $rfd "r" $M0/dir/b
TEST $CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
-#check that the files are not opned on brick-0
-TEST stat $M0/dir/a
-realpatha=$(gf_get_gfid_backend_file_path $B0/${V0}0 "dir/a")
-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpatha"
-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/dir/a
-
-TEST stat $M0/dir/b
-realpathb=$(gf_get_gfid_backend_file_path $B0/${V0}0 "dir/b")
-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpathb"
-EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/dir/b
-
#attempt self-heal so that the files are created on brick-0
TEST dd if=$M0/dir/a of=/dev/null bs=1024k
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 34a0dbb..74b59b8 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -908,7 +908,7 @@ fuse_getattr_resume (fuse_state_t *state)
}
if (!IA_ISDIR (state->loc.inode->ia_type)) {
- state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+ state->fd = fd_lookup (state->loc.inode, 0);
}
if (!state->fd) {
--
1.8.3.1

View File

@ -0,0 +1,41 @@
From b777482ac36960537be213c54490aec9e1077a1f Mon Sep 17 00:00:00 2001
From: Raghavendra G <rgowdapp@redhat.com>
Date: Fri, 13 Oct 2017 20:00:47 +0530
Subject: [PATCH 109/128] mount/fuse: never fail open(dir) with ENOENT
open(dir) being an operation on inode should never fail with
ENOENT. If gfid is not present, the appropriate error is ESTALE. This
will enable kernel to retry open after a revalidate lookup.
Change-Id: I8d07d2ebb5a0da6c3ea478317442cb42f1797a4b
BUG: 1492591
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
upstream patch: https://review.gluster.org/#/c/18521/
Reviewed-on: https://code.engineering.redhat.com/gerrit/126510
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/mount/fuse/src/fuse-bridge.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 74b59b8..a6dfd66 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -1069,6 +1069,13 @@ fuse_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
fd_bind (fd);
} else {
err:
+ /* OPEN(DIR) being an operation on inode should never fail with
+ * ENOENT. If gfid is not present, the appropriate error is
+ * ESTALE.
+ */
+ if (op_errno == ENOENT)
+ op_errno = ESTALE;
+
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
"%"PRIu64": %s() %s => -1 (%s)", frame->root->unique,
gf_fop_list[frame->root->op], state->loc.path,
--
1.8.3.1

View File

@ -0,0 +1,57 @@
From de1ee9800f8e20d0c0a8adb1994f208d2dc8ae09 Mon Sep 17 00:00:00 2001
From: Raghavendra G <rgowdapp@redhat.com>
Date: Tue, 10 Oct 2017 11:29:04 +0530
Subject: [PATCH 110/128] Revert "mount/fuse: report ESTALE as ENOENT"
This reverts commit 26d16b90ec7f8acbe07e56e8fe1baf9c9fa1519e.
Consider rename (index.new, store.idx) and open (store.idx) being
executed in parallel. When we break down operations following sequence
is possible.
* lookup (store.idx) - as part of open(store.idx) returns gfid1 as the
result.
* rename (index.new, store.idx) changes gfid of store.idx to
gfid2. Note that gfid2 was the nodeid of index.new. Since rename is
successful, gfid2 is associated with store.idx.
* open (store.idx) resumes and issues open fop to glusterfs with
gfid1. open in glusterfs fails as gfid1 doesn't exist and the error
returned by glusterfs to kernel-fuse is ENOENT.
* kernel passes back the same error to application as a result to
open.
This error could've been prevented if kernel retries open with
gfid2. Interestingly kernel do retry open when it receives ESTALE
error. Even though failure to find gfid resulted in ESTALE error,
commit 26d16b90ec7f8acb converted that error to ENOENT while sending
an error reply to kernel. This prevented kernel from retrying open
resulting in error.
Change-Id: I2e752ca60dd8af1b989dd1d29c7b002ee58440b4
BUG: 1492591
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
upstream patch: https://review.gluster.org/#/c/18463/
Reviewed-on: https://code.engineering.redhat.com/gerrit/126509
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/mount/fuse/src/fuse-bridge.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index a6dfd66..03d26eb 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -421,9 +421,6 @@ send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error)
struct iovec iov_out;
inode_t *inode = NULL;
- if (error == ESTALE)
- error = ENOENT;
-
fouh.error = -error;
iov_out.iov_base = &fouh;
--
1.8.3.1

View File

@ -0,0 +1,86 @@
From 712e62721d7d95c05d87510eb1fbe5d12381e1ab Mon Sep 17 00:00:00 2001
From: Raghavendra G <rgowdapp@redhat.com>
Date: Mon, 18 Sep 2017 16:01:34 +0530
Subject: [PATCH 111/128] cluster/dht: don't overfill the buffer in readdir(p)
Superflous dentries that cannot be fit in the buffer size provided by
kernel are thrown away by fuse-bridge. This means,
* the next readdir(p) seen by readdir-ahead would have an offset of a
dentry returned in a previous readdir(p) response. When readdir-ahead
detects non-monotonic offset it turns itself off which can result in
poor readdir performance.
* readdirp can be cpu-intensive on brick and there is no point to read
all those dentries just to be thrown away by fuse-bridge.
So, the best strategy would be to fill the buffer optimally - neither
overfill nor underfill.
Change-Id: Idb3d85dd4c08fdc4526b2df801d49e69e439ba84
BUG: 1264911
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
upstream patch: https://review.gluster.org/18312
Reviewed-on: https://code.engineering.redhat.com/gerrit/126504
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/dht/src/dht-common.c | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index f611278..b55cb36 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -5238,6 +5238,13 @@ list:
}
done:
+ if ((op_ret == 0) && op_errno != ENOENT) {
+ /* remaining buffer size is not enough to hold even one
+ * dentry
+ */
+ goto unwind;
+ }
+
if ((count == 0) || (local && (local->filled < local->size))) {
if ((next_offset == 0) || (op_errno == ENOENT)) {
next_offset = 0;
@@ -5268,8 +5275,8 @@ done:
STACK_WIND_COOKIE (frame, dht_readdirp_cbk, next_subvol,
next_subvol, next_subvol->fops->readdirp,
- local->fd, local->size, next_offset,
- local->xattr);
+ local->fd, (local->size - local->filled),
+ next_offset, local->xattr);
return 0;
}
@@ -5359,6 +5366,13 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
done:
+ if ((op_ret == 0) && op_errno != ENOENT) {
+ /* remaining buffer size is not enough to hold even one
+ * dentry
+ */
+ goto unwind;
+ }
+
if ((count == 0) || (local && (local->filled < local->size))) {
if ((op_ret <= 0) || (op_errno == ENOENT)) {
next_subvol = dht_subvol_next (this, prev);
@@ -5372,7 +5386,8 @@ done:
STACK_WIND_COOKIE (frame, dht_readdir_cbk, next_subvol,
next_subvol, next_subvol->fops->readdir,
- local->fd, local->size, next_offset, NULL);
+ local->fd, (local->size - local->filled),
+ next_offset, NULL);
return 0;
}
--
1.8.3.1

View File

@ -0,0 +1,79 @@
From 1425f0c723c7f3811a4104efdf3cb55f0bb02731 Mon Sep 17 00:00:00 2001
From: Csaba Henk <csaba@redhat.com>
Date: Fri, 15 Dec 2017 08:22:21 +0100
Subject: [PATCH 112/128] write-behind: Allow trickling-writes to be
configurable
This is the undisputed/trivial part of Shreyas' patch
he attached to https://bugzilla.redhat.com/1364740 (of
which the current bug is a clone).
We need more evaluation for the page_size and window_size
bits before taking them on.
> Change-Id: Iaa0b9a69d35e522b77a52a09acef47460e8ae3e9
> BUG: 1428060
> Co-authored-by: Shreyas Siravara <sshreyas@fb.com>
> Signed-off-by: Csaba Henk <csaba@redhat.com>
> Reviewed-on: https://review.gluster.org/18719
Change-Id: Iaa0b9a69d35e522b77a52a09acef47460e8ae3e9
BUG: 1426042
Co-authored-by: Shreyas Siravara <sshreyas@fb.com>
Signed-off-by: Csaba Henk <csaba@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/125932
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
libglusterfs/src/globals.h | 4 +++-
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 12 ++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index c627cfe..692d49d 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -43,7 +43,7 @@
*/
#define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly
should not change */
-#define GD_OP_VERSION_MAX GD_OP_VERSION_3_13_0 /* MAX VERSION is the maximum
+#define GD_OP_VERSION_MAX GD_OP_VERSION_3_13_1 /* MAX VERSION is the maximum
count in VME table, should
keep changing with
introduction of newer
@@ -103,6 +103,8 @@
#define GD_OP_VERSION_3_13_0 31300 /* Op-version for GlusterFS 3.13.0 */
+#define GD_OP_VERSION_3_13_1 31301 /* Op-version for GlusterFS 3.13.1 */
+
#include "xlator.h"
/* THIS */
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index a57eb9e..c255be0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -1762,6 +1762,18 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_RHS_3_0,
.flags = OPT_FLAG_CLIENT_OPT
},
+ { .key = "performance.write-behind-trickling-writes",
+ .voltype = "performance/write-behind",
+ .option = "trickling-writes",
+ .op_version = GD_OP_VERSION_3_13_1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.nfs.write-behind-trickling-writes",
+ .voltype = "performance/write-behind",
+ .option = "trickling-writes",
+ .op_version = GD_OP_VERSION_3_13_1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
{ .key = "performance.lazy-open",
.voltype = "performance/open-behind",
.option = "lazy-open",
--
1.8.3.1

View File

@ -0,0 +1,460 @@
From baa8ec64e180a342a431344af0a362c5838c06f1 Mon Sep 17 00:00:00 2001
From: Soumya Koduri <skoduri@redhat.com>
Date: Tue, 17 Oct 2017 16:12:06 +0530
Subject: [PATCH 113/128] gfapi: set lkowner in glfd
We need a provision to be able to set lkowner (which is
used to distinguish locks maintained by server) in gfapi.
Since the same lk_owner need to be used to be able to
flush the lock while closing the fd, store the lkowner
in the glfd structure itself.
A new API has been added to be able to set lkowner in glfd.
Upstream reference :
1.) This is backport of below mainline fix -
https://review.gluster.org/#/c/18429
https://review.gluster.org/#/c/18522/
2.) 3.12 branch
https://review.gluster.org/#/c/18524/
>Change-Id: I67591d6b9a89c20b9617d52616513ff9e6c06b47
>BUG: 1501956
>Signed-off-by: Soumya Koduri <skoduri@redhat.com>
Change-Id: I67591d6b9a89c20b9617d52616513ff9e6c06b47
BUG: 1500704
Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/126599
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
api/src/gfapi.aliases | 1 +
api/src/gfapi.map | 7 +-
api/src/glfs-fops.c | 51 ++++++++++
api/src/glfs-internal.h | 1 +
api/src/glfs.h | 29 ++++++
tests/basic/gfapi/glfd-lkowner.c | 212 +++++++++++++++++++++++++++++++++++++++
tests/basic/gfapi/glfd-lkowner.t | 27 +++++
7 files changed, 327 insertions(+), 1 deletion(-)
create mode 100644 tests/basic/gfapi/glfd-lkowner.c
create mode 100755 tests/basic/gfapi/glfd-lkowner.t
diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases
index b0facb7..85e8448 100644
--- a/api/src/gfapi.aliases
+++ b/api/src/gfapi.aliases
@@ -156,6 +156,7 @@ _pub_glfs_upcall_inode_get_oldpstat _glfs_upcall_inode_get_oldpstat$GFAPI_3.7.16
_pub_glfs_realpath _glfs_realpath$GFAPI_3.7.17
_pub_glfs_sysrq _glfs_sysrq$GFAPI_3.10.0
+_pub_glfs_fd_set_lkowner _glfs_fd_set_lkowner$GFAPI_3.10.7
_pub_glfs_xreaddirplus_r _glfs_xreaddirplus_r$GFAPI_3.11.0
_pub_glfs_xreaddirplus_r_get_stat _glfs_xreaddirplus_r_get_stat$GFAPI_3.11.0
diff --git a/api/src/gfapi.map b/api/src/gfapi.map
index 7f19e1e..8867300 100644
--- a/api/src/gfapi.map
+++ b/api/src/gfapi.map
@@ -198,12 +198,17 @@ GFAPI_3.10.0 {
glfs_sysrq;
} GFAPI_3.7.17;
+GFAPI_3.10.7 {
+ global:
+ glfs_fd_set_lkowner;
+} GFAPI_3.10.0;
+
GFAPI_3.11.0 {
glfs_xreaddirplus_r;
glfs_xreaddirplus_r_get_stat;
glfs_xreaddirplus_r_get_object;
glfs_object_copy;
-} GFAPI_3.10.0;
+} GFAPI_3.10.7;
GFAPI_PRIVATE_3.12.0 {
global:
diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
index c8ddeea..7fb86fc 100644
--- a/api/src/glfs-fops.c
+++ b/api/src/glfs-fops.c
@@ -267,6 +267,12 @@ pub_glfs_close (struct glfs_fd *glfd)
goto out;
}
+ if (glfd->lk_owner.len != 0) {
+ ret = syncopctx_setfslkowner (&glfd->lk_owner);
+ if (ret)
+ goto out;
+ }
+
ret = syncop_flush (subvol, fd, NULL, NULL);
DECODE_SYNCOP_ERR (ret);
out:
@@ -4272,6 +4278,14 @@ pub_glfs_posix_lock (struct glfs_fd *glfd, int cmd, struct flock *flock)
gf_flock_from_flock (&gf_flock, flock);
gf_flock_from_flock (&saved_flock, flock);
+
+ if (glfd->lk_owner.len != 0) {
+ ret = syncopctx_setfslkowner (&glfd->lk_owner);
+
+ if (ret)
+ goto out;
+ }
+
ret = syncop_lk (subvol, fd, cmd, &gf_flock, NULL, NULL);
DECODE_SYNCOP_ERR (ret);
gf_flock_to_flock (&gf_flock, flock);
@@ -4294,6 +4308,43 @@ invalid_fs:
GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_posix_lock, 3.4.0);
+int
+pub_glfs_fd_set_lkowner (glfs_fd_t *glfd, void *data, int len)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD (glfd, invalid_fs);
+
+ if (!GF_REF_GET (glfd)) {
+ goto invalid_fs;
+ }
+
+ GF_VALIDATE_OR_GOTO (THIS->name, data, out);
+
+ if ((len <= 0) || (len > GFAPI_MAX_LOCK_OWNER_LEN)) {
+ errno = EINVAL;
+ gf_msg (THIS->name, GF_LOG_ERROR, errno,
+ LG_MSG_INVALID_ARG,
+ "Invalid lk_owner len (%d)", len);
+ goto out;
+ }
+
+ glfd->lk_owner.len = len;
+
+ memcpy (glfd->lk_owner.data, data, len);
+
+ ret = 0;
+out:
+ if (glfd)
+ GF_REF_PUT (glfd);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fd_set_lkowner, 3.10.7);
struct glfs_fd *
pub_glfs_dup (struct glfs_fd *glfd)
diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
index 1809818..c94fcd9 100644
--- a/api/src/glfs-internal.h
+++ b/api/src/glfs-internal.h
@@ -215,6 +215,7 @@ struct glfs_fd {
struct list_head entries;
gf_dirent_t *next;
struct dirent *readdirbuf;
+ gf_lkowner_t lk_owner;
};
/* glfs object handle introduced for the alternate gfapi implementation based
diff --git a/api/src/glfs.h b/api/src/glfs.h
index 5420a1d..26b15e0 100644
--- a/api/src/glfs.h
+++ b/api/src/glfs.h
@@ -855,6 +855,35 @@ glfs_xreaddirplus_r (struct glfs_fd *glfd, uint32_t flags,
struct dirent *ext, struct dirent **res);
GFAPI_PUBLIC(glfs_xreaddirplus_r, 3.11.0);
+#define GFAPI_MAX_LOCK_OWNER_LEN 255
+
+/*
+ *
+ * DESCRIPTION
+ *
+ * This API allows application to set lk_owner on a fd.
+ * A glfd can be associated with only single lk_owner. In case if there
+ * is need to set another lk_owner, applications can make use of
+ * 'glfs_dup' to get duplicate glfd and set new lk_owner on that second
+ * glfd.
+ *
+ * Also its not recommended to override or clear lk_owner value as the
+ * same shall be used to flush any outstanding locks while closing the fd.
+ *
+ * PARAMETERS
+ *
+ * INPUT:
+ * @glfd: GFAPI file descriptor
+ * @len: Size of lk_owner buffer. Max value can be GFAPI_MAX_LOCK_OWNER_LEN
+ * @data: lk_owner data buffer.
+ *
+ * OUTPUT:
+ * 0: SUCCESS
+ * -1: FAILURE
+ */
+int glfs_fd_set_lkowner (glfs_fd_t *glfd, void *data, int len);
+ GFAPI_PUBLIC(glfs_fd_set_lkowner, 3.10.7);
+
__END_DECLS
#endif /* !_GLFS_H */
diff --git a/tests/basic/gfapi/glfd-lkowner.c b/tests/basic/gfapi/glfd-lkowner.c
new file mode 100644
index 0000000..031a076
--- /dev/null
+++ b/tests/basic/gfapi/glfd-lkowner.c
@@ -0,0 +1,212 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+
+int gfapi = 1;
+
+#define LOG_ERR(func, ret) do { \
+ if (ret != 0) { \
+ fprintf (stderr, "%s : returned error %d (%s)\n", \
+ func, ret, strerror (errno)); \
+ goto out; \
+ } else { \
+ fprintf (stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+char lownera[8] = "lownera", lownerb[8] = "lownerb";
+char lownerc[8] = "lownerc";
+
+int lock_test (glfs_fd_t *glfd1, glfs_fd_t *glfd2, bool should_fail,
+ int l1_start, int l1_len, char *l1_owner, int lo1_len,
+ int l2_start, int l2_len, char *l2_owner, int lo2_len)
+{
+ int ret = -1, f_ret = -1;
+ struct flock lock1 = {0, }, lock2 = {0, };
+
+lock1:
+ if (!glfd1)
+ goto lock2;
+
+ /* lock on glfd1 */
+ lock1.l_type = F_WRLCK;
+ lock1.l_whence = SEEK_SET;
+ lock1.l_start = l1_start;
+ lock1.l_len = l1_len;
+
+ ret = glfs_fd_set_lkowner (glfd1, l1_owner, lo1_len);
+ LOG_ERR ("glfs_fd_set_lkowner on glfd1", ret);
+
+ ret = glfs_posix_lock (glfd1, F_SETLK, &lock1);
+ LOG_ERR ("glfs_posix_lock on glfd1", ret);
+
+lock2:
+ if (!glfd2)
+ goto out;
+
+ /* lock on glfd2 */
+ lock2.l_type = F_WRLCK;
+ lock2.l_whence = SEEK_SET;
+ lock2.l_start = l2_start;
+ lock2.l_len = l2_len;
+
+ ret = glfs_fd_set_lkowner (glfd2, l2_owner, lo2_len);
+ LOG_ERR ("glfs_fd_set_lkowner on glfd2", ret);
+
+ ret = glfs_posix_lock (glfd2, F_SETLK, &lock2);
+
+ if (should_fail && ret) {
+ f_ret = 0;
+ } else if (!ret && !should_fail) {
+ f_ret = 0;
+ } else {
+ f_ret = -1;
+ }
+out:
+ fprintf (stderr, "Lock test on glfd1 (start(%d), len(%d),"
+ " lk_owner(%s)) and glfd2 (start(%d), len(%d), "
+ "lk_owner(%s)) - expected(%s) - result(%s)\n",
+ l1_start, l1_len, l1_owner, l2_start, l2_len, l2_owner,
+ (should_fail ? "FAIL" : "SUCCESS"),
+ (ret ? "FAIL" : "SUCCESS"));
+ return f_ret;
+}
+
+int
+main (int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0, i, status = 0;
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ glfs_fd_t *fd3 = NULL;
+ char *filename = "file_tmp";
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+
+ if (argc != 4) {
+ fprintf (stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new (volname);
+ if (!fs) {
+ fprintf (stderr, "glfs_new: returned NULL\n");
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server (fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging (fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init (fs);
+ LOG_ERR("glfs_init", ret);
+
+ fd1 = glfs_creat(fs, filename, O_RDWR|O_SYNC, 0644);
+ if (fd1 <= 0) {
+ ret = -1;
+ LOG_ERR ("glfs_creat", ret);
+ }
+ fprintf (stderr, "glfs-create fd1 - %d\n", fd1);
+
+ fd2 = glfs_dup(fd1);
+ fprintf (stderr, "glfs-dup fd2 - %d\n", fd2);
+
+ fd3 = glfs_open(fs, filename, O_RDWR|O_SYNC);
+ if (fd2 <= 0) {
+ ret = -1;
+ LOG_ERR ("glfs_open", ret);
+ }
+ fprintf (stderr, "glfs-open fd3 - %d\n", fd3);
+
+ /* TEST 1: Conflicting ranges, same lk_owner
+ * lock1 (0, 10, lownera)
+ * lock2 (5, 10, lownera)
+ * Expected: should not fail but get merged
+ */
+ ret = lock_test (fd1, fd2, false, 0, 10, lownera, 8,
+ 5, 10, lownera, 8);
+ LOG_ERR ("==== glfs_lock_test_1", ret);
+
+ /* TEST 2: Conflicting ranges, different lk_owner
+ * lock1 (0, 10, lownera) - already taken
+ * lock2 (5, 10, lownerb)
+ * Expected: should fail and not get merged
+ */
+ ret = lock_test (NULL, fd2, true, 0, 10, lownera, 8,
+ 5, 10, lownerb, 8);
+ LOG_ERR ("==== glfs_lock_test_2", ret);
+
+ /* TEST 3: Different ranges, same lk_owner
+ * lock1 (0, 10, lownera) - already taken
+ * lock2 (30, 10, lownera)
+ * Expected: should not fail
+ */
+ ret = lock_test (NULL, fd2, false, 0, 10, lownera, 8,
+ 30, 10, lownera, 8);
+ LOG_ERR ("==== glfs_lock_test_3", ret);
+
+ /* TEST 4: Conflicting ranges, different lk_owner
+ * lock1 (0, 10, lownera) - already taken
+ * lock2 (50, 10, lownerb)
+ * Expected: should not fail
+ */
+ ret = lock_test (NULL, fd2, false, 0, 10, lownera, 8,
+ 50, 10, lownerb, 8);
+ LOG_ERR ("==== glfs_lock_test_4", ret);
+
+ /* TEST 5: Close fd1 & retry TEST2
+ * lock1 (not applicable)
+ * lock2 (5, 10, lownerb)
+ * Expected: should succeed now
+ */
+ ret = glfs_close(fd1);
+ LOG_ERR ("glfs_close", ret);
+
+ ret = lock_test (NULL, fd2, false, 0, 10, lownera, 8,
+ 5, 10, lownerb, 8);
+ LOG_ERR ("==== glfs_lock_test_5", ret);
+
+ /* TEST 6: Check closing fd1 doesn't flush fd2 locks
+ * retry TEST 4 but with fd2 and fd3.
+ * lock1 (50, 10, lownerb) - already taken
+ * lock2 (55, 10, lownerc)
+ * Expected: should fail
+ */
+ ret = lock_test (NULL, fd3, true, 50, 10, lownerb, 8,
+ 55, 10, lownerc, 8);
+ LOG_ERR ("==== glfs_lock_test_6", ret);
+
+err:
+ ret = glfs_close(fd2);
+ LOG_ERR ("glfs_close", ret);
+
+ ret = glfs_close(fd3);
+ LOG_ERR ("glfs_close", ret);
+
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf (stderr, "glfs_fini(fs) returned %d\n", ret);
+ }
+
+ if (ret)
+ exit(1);
+ exit(0);
+}
diff --git a/tests/basic/gfapi/glfd-lkowner.t b/tests/basic/gfapi/glfd-lkowner.t
new file mode 100755
index 0000000..ad7b026
--- /dev/null
+++ b/tests/basic/gfapi/glfd-lkowner.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/glfd-lkowner.c -lgfapi
+
+TEST ./$(dirname $0)/glfd-lkowner $H0 $V0 $logdir/glfd-lkowner.log
+
+cleanup_tester $(dirname $0)/glfd-lkowner
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
--
1.8.3.1

View File

@ -0,0 +1,275 @@
From 21154511978486010405a1d3a826d46dd8b9d324 Mon Sep 17 00:00:00 2001
From: Aravinda VK <avishwan@redhat.com>
Date: Mon, 18 Sep 2017 14:34:54 +0530
Subject: [PATCH 114/128] eventsapi: Add JWT signing support
New argument added to accept secret to generate JWT token. This patch
does not affect the backward compatibility.
Usage:
gluster-eventsapi webhook-add <url> [-t <TOKEN>] \
[-s SECRET]
With `--token` argument, Token header will be added as is.
Authorization: Bearer <TOKEN>
In case of shared secret, Gluster will generate JWT token using the
secret and then add it to Authorization header.
Authorization: Bearer <GENERATED_TOKEN>
Secret/Token can be updated using `webhook-mod` command.
Generated token will include the following payload,
{
"iss": "gluster",
"exp": EXPIRY_TIME,
"sub": EVENT_TYPE,
"iat": EVENT_TIME
}
Where: iss - Issuer, exp - Expiry Time, sub - Event Type
used as Subject, iat - Event Time used as Issue Time
>upstream mainline patch : https://review.gluster.org/#/c/18405
BUG: 1466129
Change-Id: Ib6b6fab23fb212d7f5e9bbc9e1416a9e9813ab1b
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/126551
Tested-by: RHGS Build Bot <nigelb@redhat.com>
---
events/src/peer_eventsapi.py | 40 ++++++++++++++++++++++++++++++++++-----
events/src/utils.py | 45 +++++++++++++++++++++++++++++++++++---------
glusterfs.spec.in | 4 ++--
3 files changed, 73 insertions(+), 16 deletions(-)
diff --git a/events/src/peer_eventsapi.py b/events/src/peer_eventsapi.py
index 59808ad..3a6a0eb 100644
--- a/events/src/peer_eventsapi.py
+++ b/events/src/peer_eventsapi.py
@@ -18,6 +18,7 @@ import fcntl
from errno import EACCES, EAGAIN
import signal
import sys
+import time
import requests
from prettytable import PrettyTable
@@ -26,7 +27,7 @@ from gluster.cliutils import (Cmd, node_output_ok, node_output_notok,
sync_file_to_peers, GlusterCmdException,
output_error, execute_in_peers, runcli,
set_common_args_func)
-from events.utils import LockedOpen
+from events.utils import LockedOpen, get_jwt_token
from events.eventsapiconf import (WEBHOOKS_FILE_TO_SYNC,
WEBHOOKS_FILE,
@@ -307,6 +308,8 @@ class WebhookAddCmd(Cmd):
parser.add_argument("url", help="URL of Webhook")
parser.add_argument("--bearer_token", "-t", help="Bearer Token",
default="")
+ parser.add_argument("--secret", "-s",
+ help="Secret to add JWT Bearer Token", default="")
def run(self, args):
create_webhooks_file_if_not_exists(args)
@@ -318,7 +321,8 @@ class WebhookAddCmd(Cmd):
errcode=ERROR_WEBHOOK_ALREADY_EXISTS,
json_output=args.json)
- data[args.url] = args.bearer_token
+ data[args.url] = {"token": args.bearer_token,
+ "secret": args.secret}
file_content_overwrite(WEBHOOKS_FILE, data)
sync_to_peers(args)
@@ -331,6 +335,8 @@ class WebhookModCmd(Cmd):
parser.add_argument("url", help="URL of Webhook")
parser.add_argument("--bearer_token", "-t", help="Bearer Token",
default="")
+ parser.add_argument("--secret", "-s",
+ help="Secret to add JWT Bearer Token", default="")
def run(self, args):
create_webhooks_file_if_not_exists(args)
@@ -342,7 +348,16 @@ class WebhookModCmd(Cmd):
errcode=ERROR_WEBHOOK_NOT_EXISTS,
json_output=args.json)
- data[args.url] = args.bearer_token
+ if isinstance(data[args.url], str) or \
+ isinstance(data[args.url], unicode):
+ data[args.url]["token"] = data[args.url]
+
+ if args.bearer_token != "":
+ data[args.url]["token"] = args.bearer_token
+
+ if args.secret != "":
+ data[args.url]["secret"] = args.secret
+
file_content_overwrite(WEBHOOKS_FILE, data)
sync_to_peers(args)
@@ -376,11 +391,19 @@ class NodeWebhookTestCmd(Cmd):
def args(self, parser):
parser.add_argument("url")
parser.add_argument("bearer_token")
+ parser.add_argument("secret")
def run(self, args):
http_headers = {}
+ hashval = ""
if args.bearer_token != ".":
- http_headers["Authorization"] = "Bearer " + args.bearer_token
+ hashval = args.bearer_token
+
+ if args.secret != ".":
+ hashval = get_jwt_token(args.secret, "TEST", int(time.time()))
+
+ if hashval:
+ http_headers["Authorization"] = "Bearer " + hashval
try:
resp = requests.post(args.url, headers=http_headers)
@@ -401,16 +424,23 @@ class WebhookTestCmd(Cmd):
def args(self, parser):
parser.add_argument("url", help="URL of Webhook")
parser.add_argument("--bearer_token", "-t", help="Bearer Token")
+ parser.add_argument("--secret", "-s",
+ help="Secret to generate Bearer Token")
def run(self, args):
url = args.url
bearer_token = args.bearer_token
+ secret = args.secret
+
if not args.url:
url = "."
if not args.bearer_token:
bearer_token = "."
+ if not args.secret:
+ secret = "."
- out = execute_in_peers("node-webhook-test", [url, bearer_token])
+ out = execute_in_peers("node-webhook-test", [url, bearer_token,
+ secret])
if not args.json:
table = PrettyTable(["NODE", "NODE STATUS", "WEBHOOK STATUS"])
diff --git a/events/src/utils.py b/events/src/utils.py
index 2a77b13..5130720 100644
--- a/events/src/utils.py
+++ b/events/src/utils.py
@@ -13,10 +13,11 @@ import json
import os
import logging
import fcntl
-from errno import ESRCH, EBADF
+from errno import EBADF
from threading import Thread
import multiprocessing
from Queue import Queue
+from datetime import datetime, timedelta
from eventsapiconf import (LOG_FILE,
WEBHOOKS_FILE,
@@ -183,15 +184,33 @@ def autoload_webhooks():
load_webhooks()
-def publish_to_webhook(url, token, message_queue):
+def get_jwt_token(secret, event_type, event_ts, jwt_expiry_time_seconds=60):
+ import jwt
+ payload = {
+ "exp": datetime.utcnow() + timedelta(seconds=jwt_expiry_time_seconds),
+ "iss": "gluster",
+ "sub": event_type,
+ "iat": event_ts
+ }
+ return jwt.encode(payload, secret, algorithm='HS256')
+
+
+def publish_to_webhook(url, token, secret, message_queue):
# Import requests here since not used in any other place
import requests
http_headers = {"Content-Type": "application/json"}
while True:
- message_json = message_queue.get()
+ hashval = ""
+ event_type, event_ts, message_json = message_queue.get()
if token != "" and token is not None:
- http_headers["Authorization"] = "Bearer " + token
+ hashval = token
+
+ if secret != "" and secret is not None:
+ hashval = get_jwt_token(secret, event_type, event_ts)
+
+ if hashval:
+ http_headers["Authorization"] = "Bearer " + hashval
try:
resp = requests.post(url, headers=http_headers, data=message_json)
@@ -218,7 +237,7 @@ def publish_to_webhook(url, token, message_queue):
def plugin_webhook(message):
message_json = json.dumps(message, sort_keys=True)
logger.debug("EVENT: {0}".format(message_json))
- webhooks_pool.send(message_json)
+ webhooks_pool.send(message["event"], message["ts"], message_json)
class LockedOpen(object):
@@ -298,9 +317,17 @@ class PidFile(object):
def webhook_monitor(proc_queue, webhooks):
queues = {}
- for url, token in webhooks.items():
+ for url, data in webhooks.items():
+ if isinstance(data, str) or isinstance(data, unicode):
+ token = data
+ secret = None
+ else:
+ token = data["token"]
+ secret = data["secret"]
+
queues[url] = Queue()
- t = Thread(target=publish_to_webhook, args=(url, token, queues[url]))
+ t = Thread(target=publish_to_webhook, args=(url, token, secret,
+ queues[url]))
t.start()
# Get the message sent to Process queue and distribute to all thread queues
@@ -329,8 +356,8 @@ class WebhookThreadPool(object):
self.proc.terminate()
self.start()
- def send(self, message):
- self.queue.put(message)
+ def send(self, event_type, event_ts, message):
+ self.queue.put((event_type, event_ts, message))
def init_webhook_pool():
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 56a62a9..29329fa 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -671,9 +671,9 @@ Requires: %{name}-server%{?_isa} = %{version}-%{release}
Requires: python2 python-prettytable
Requires: python2-gluster = %{version}-%{release}
%if ( 0%{?rhel} )
-Requires: python-requests
+Requires: python-requests python-jwt
%else
-Requires: python2-requests
+Requires: python2-requests python2-jwt
%endif
%if ( 0%{?rhel} && 0%{?rhel} < 7 )
Requires: python-argparse
--
1.8.3.1

View File

@ -0,0 +1,84 @@
From c55511be71a6181788067fa018b5f0deaca10e61 Mon Sep 17 00:00:00 2001
From: Aravinda VK <avishwan@redhat.com>
Date: Thu, 28 Dec 2017 14:04:50 +0530
Subject: [PATCH 115/128] eventsapi: JWT signing without external dependency
Added support for JWT signing without using python-jwt since it is not
available in all the distributions.
>upstream mainline patch : https://review.gluster.org/19102
BUG: 1466129
Change-Id: I95699055442fbf9da15249f5defe8a8b287010f1
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/126619
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
events/src/utils.py | 20 +++++++++++++++++---
glusterfs.spec.in | 4 ++--
2 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/events/src/utils.py b/events/src/utils.py
index 5130720..f24d64d 100644
--- a/events/src/utils.py
+++ b/events/src/utils.py
@@ -18,6 +18,10 @@ from threading import Thread
import multiprocessing
from Queue import Queue
from datetime import datetime, timedelta
+import base64
+import hmac
+from hashlib import sha256
+from calendar import timegm
from eventsapiconf import (LOG_FILE,
WEBHOOKS_FILE,
@@ -184,15 +188,25 @@ def autoload_webhooks():
load_webhooks()
+def base64_urlencode(inp):
+ return base64.urlsafe_b64encode(inp).replace("=", "").strip()
+
+
def get_jwt_token(secret, event_type, event_ts, jwt_expiry_time_seconds=60):
- import jwt
+ exp = datetime.utcnow() + timedelta(seconds=jwt_expiry_time_seconds)
payload = {
- "exp": datetime.utcnow() + timedelta(seconds=jwt_expiry_time_seconds),
+ "exp": timegm(exp.utctimetuple()),
"iss": "gluster",
"sub": event_type,
"iat": event_ts
}
- return jwt.encode(payload, secret, algorithm='HS256')
+ header = '{"alg":"HS256","typ":"JWT"}'
+ payload = json.dumps(payload, separators=(',', ':'), sort_keys=True)
+ msg = base64_urlencode(header) + "." + base64_urlencode(payload)
+ return "%s.%s" % (
+ msg,
+ base64_urlencode(hmac.HMAC(secret, msg, sha256).digest())
+ )
def publish_to_webhook(url, token, secret, message_queue):
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 29329fa..56a62a9 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -671,9 +671,9 @@ Requires: %{name}-server%{?_isa} = %{version}-%{release}
Requires: python2 python-prettytable
Requires: python2-gluster = %{version}-%{release}
%if ( 0%{?rhel} )
-Requires: python-requests python-jwt
+Requires: python-requests
%else
-Requires: python2-requests python2-jwt
+Requires: python2-requests
%endif
%if ( 0%{?rhel} && 0%{?rhel} < 7 )
Requires: python-argparse
--
1.8.3.1

View File

@ -0,0 +1,242 @@
From cee93742430f0ecd3defb65e5ca62ef37f581703 Mon Sep 17 00:00:00 2001
From: Aravinda VK <avishwan@redhat.com>
Date: Tue, 17 Oct 2017 12:50:48 +0530
Subject: [PATCH 116/128] eventsapi: HTTPS support for Webhooks
First it tries to call URL with verify=True without specifying the cert
path, it succeeds if a webhook is HTTP or HTTPS with CA trusted
certificates(for example https://github..).
If above call fails with SSL error then it tries to get the server
certificate and calls URL again. If call fails with SSL error even after
using the certificate, then verification will be disabled and logged in
the log file.
All other errors will be catched and logged as usual.
>upstream mainline patch : https://review.gluster.org/18578
BUG: 1466122
Change-Id: I86a3390ed48b75dffdc7848022af23a1e1d7f076
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/126618
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
events/src/eventsapiconf.py.in | 1 +
events/src/peer_eventsapi.py | 48 +++++++++++++++++----
events/src/utils.py | 94 ++++++++++++++++++++++++++++++++----------
3 files changed, 114 insertions(+), 29 deletions(-)
diff --git a/events/src/eventsapiconf.py.in b/events/src/eventsapiconf.py.in
index 08a3602..687eaa3 100644
--- a/events/src/eventsapiconf.py.in
+++ b/events/src/eventsapiconf.py.in
@@ -26,6 +26,7 @@ UUID_FILE = "@GLUSTERD_WORKDIR@/glusterd.info"
PID_FILE = "@localstatedir@/run/glustereventsd.pid"
AUTO_BOOL_ATTRIBUTES = ["force", "push-pem", "no-verify"]
AUTO_INT_ATTRIBUTES = ["ssh-port"]
+CERTS_DIR = "@GLUSTERD_WORKDIR@/events"
# Errors
ERROR_SAME_CONFIG = 2
diff --git a/events/src/peer_eventsapi.py b/events/src/peer_eventsapi.py
index 3a6a0eb..d72fdbe 100644
--- a/events/src/peer_eventsapi.py
+++ b/events/src/peer_eventsapi.py
@@ -27,7 +27,7 @@ from gluster.cliutils import (Cmd, node_output_ok, node_output_notok,
sync_file_to_peers, GlusterCmdException,
output_error, execute_in_peers, runcli,
set_common_args_func)
-from events.utils import LockedOpen, get_jwt_token
+from events.utils import LockedOpen, get_jwt_token, save_https_cert
from events.eventsapiconf import (WEBHOOKS_FILE_TO_SYNC,
WEBHOOKS_FILE,
@@ -47,7 +47,8 @@ from events.eventsapiconf import (WEBHOOKS_FILE_TO_SYNC,
ERROR_PARTIAL_SUCCESS,
ERROR_ALL_NODES_STATUS_NOT_OK,
ERROR_SAME_CONFIG,
- ERROR_WEBHOOK_SYNC_FAILED)
+ ERROR_WEBHOOK_SYNC_FAILED,
+ CERTS_DIR)
def handle_output_error(err, errcode=1, json_output=False):
@@ -405,12 +406,43 @@ class NodeWebhookTestCmd(Cmd):
if hashval:
http_headers["Authorization"] = "Bearer " + hashval
- try:
- resp = requests.post(args.url, headers=http_headers)
- except requests.ConnectionError as e:
- node_output_notok("{0}".format(e))
- except requests.exceptions.InvalidSchema as e:
- node_output_notok("{0}".format(e))
+ urldata = requests.utils.urlparse(args.url)
+ parts = urldata.netloc.split(":")
+ domain = parts[0]
+ # Default https port if not specified
+ port = 443
+ if len(parts) == 2:
+ port = int(parts[1])
+
+ cert_path = os.path.join(CERTS_DIR, args.url.replace("/", "_").strip())
+ verify = True
+ while True:
+ try:
+ resp = requests.post(args.url, headers=http_headers,
+ verify=verify)
+ # Successful webhook push
+ break
+ except requests.exceptions.SSLError as e:
+ # If verify is equal to cert path, but still failed with
+ # SSLError, Looks like some issue with custom downloaded
+ # certificate, Try with verify = false
+ if verify == cert_path:
+ verify = False
+ continue
+
+ # If verify is instance of bool and True, then custom cert
+ # is required, download the cert and retry
+ try:
+ save_https_cert(domain, port, cert_path)
+ verify = cert_path
+ except Exception:
+ verify = False
+
+ # Done with collecting cert, continue
+ continue
+ except Exception as e:
+ node_output_notok("{0}".format(e))
+ break
if resp.status_code != 200:
node_output_notok("{0}".format(resp.status_code))
diff --git a/events/src/utils.py b/events/src/utils.py
index f24d64d..f405e44 100644
--- a/events/src/utils.py
+++ b/events/src/utils.py
@@ -27,7 +27,8 @@ from eventsapiconf import (LOG_FILE,
WEBHOOKS_FILE,
DEFAULT_CONFIG_FILE,
CUSTOM_CONFIG_FILE,
- UUID_FILE)
+ UUID_FILE,
+ CERTS_DIR)
import eventtypes
@@ -209,11 +210,33 @@ def get_jwt_token(secret, event_type, event_ts, jwt_expiry_time_seconds=60):
)
+def save_https_cert(domain, port, cert_path):
+ import ssl
+
+ # Cert file already available for this URL
+ if os.path.exists(cert_path):
+ return
+
+ cert_data = ssl.get_server_certificate((domain, port))
+ with open(cert_path, "w") as f:
+ f.write(cert_data)
+
+
def publish_to_webhook(url, token, secret, message_queue):
# Import requests here since not used in any other place
import requests
http_headers = {"Content-Type": "application/json"}
+ urldata = requests.utils.urlparse(url)
+ parts = urldata.netloc.split(":")
+ domain = parts[0]
+ # Default https port if not specified
+ port = 443
+ if len(parts) == 2:
+ port = int(parts[1])
+
+ cert_path = os.path.join(CERTS_DIR, url.replace("/", "_").strip())
+
while True:
hashval = ""
event_type, event_ts, message_json = message_queue.get()
@@ -226,26 +249,55 @@ def publish_to_webhook(url, token, secret, message_queue):
if hashval:
http_headers["Authorization"] = "Bearer " + hashval
- try:
- resp = requests.post(url, headers=http_headers, data=message_json)
- except requests.ConnectionError as e:
- logger.warn("Event push failed to URL: {url}, "
- "Event: {event}, "
- "Status: {error}".format(
- url=url,
- event=message_json,
- error=e))
- continue
- finally:
- message_queue.task_done()
-
- if resp.status_code != 200:
- logger.warn("Event push failed to URL: {url}, "
- "Event: {event}, "
- "Status Code: {status_code}".format(
- url=url,
- event=message_json,
- status_code=resp.status_code))
+ verify = True
+ while True:
+ try:
+ resp = requests.post(url, headers=http_headers,
+ data=message_json,
+ verify=verify)
+ # Successful webhook push
+ message_queue.task_done()
+ if resp.status_code != 200:
+ logger.warn("Event push failed to URL: {url}, "
+ "Event: {event}, "
+ "Status Code: {status_code}".format(
+ url=url,
+ event=message_json,
+ status_code=resp.status_code))
+ break
+ except requests.exceptions.SSLError as e:
+ # If verify is equal to cert path, but still failed with
+ # SSLError, Looks like some issue with custom downloaded
+ # certificate, Try with verify = false
+ if verify == cert_path:
+ logger.warn("Event push failed with certificate, "
+ "ignoring verification url={0} "
+ "Error={1}".format(url, e))
+ verify = False
+ continue
+
+ # If verify is instance of bool and True, then custom cert
+ # is required, download the cert and retry
+ try:
+ save_https_cert(domain, port, cert_path)
+ verify = cert_path
+ except Exception as ex:
+ verify = False
+ logger.warn("Unable to get Server certificate, "
+ "ignoring verification url={0} "
+ "Error={1}".format(url, ex))
+
+ # Done with collecting cert, continue
+ continue
+ except Exception as e:
+ logger.warn("Event push failed to URL: {url}, "
+ "Event: {event}, "
+ "Status: {error}".format(
+ url=url,
+ event=message_json,
+ error=e))
+ message_queue.task_done()
+ break
def plugin_webhook(message):
--
1.8.3.1

View File

@ -0,0 +1,105 @@
From 8216bddb4f563823a2e7cf046646b3b92239924d Mon Sep 17 00:00:00 2001
From: Aravinda VK <avishwan@redhat.com>
Date: Thu, 28 Dec 2017 15:17:34 +0530
Subject: [PATCH 117/128] geo-rep: Log message improvements
>upstream mainline patch: https://review.gluster.org/19103
BUG: 1468972
Change-Id: If4775ed9886990c0e1bcf4e44c7dfef95cc4f0c3
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/126634
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
geo-replication/syncdaemon/gsyncd.py | 2 +-
geo-replication/syncdaemon/master.py | 8 ++++----
geo-replication/syncdaemon/repce.py | 2 +-
geo-replication/syncdaemon/resource.py | 4 ++--
geo-replication/syncdaemon/syncdutils.py | 2 +-
5 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
index 932e37d..d77b90f 100644
--- a/geo-replication/syncdaemon/gsyncd.py
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -796,7 +796,7 @@ def main_i():
remote.connect_remote(go_daemon='done')
local.connect()
if ffd:
- logging.info("Closing feedback fd, waking up the monitor")
+ logging.info("Worker spawn successful. Acknowledging back to monitor")
os.close(ffd)
local.service_loop(*[r for r in [remote] if r])
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
index 4c1a529..e484692 100644
--- a/geo-replication/syncdaemon/master.py
+++ b/geo-replication/syncdaemon/master.py
@@ -1639,8 +1639,8 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
if isinstance(xtr_root, int):
if xtr_root != ENOENT:
logging.warn(lf("slave cluster not returning the "
- "correct xtime for root",
- xtime=xtr_root))
+ "xtime for root",
+ error=xtr_root))
xtr_root = self.minus_infinity
xtl = self.xtime(path)
if isinstance(xtl, int):
@@ -1649,9 +1649,9 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
if isinstance(xtr, int):
if xtr != ENOENT:
logging.warn(lf("slave cluster not returning the "
- "correct xtime",
+ "xtime for dir",
path=path,
- xtime=xtr))
+ error=xtr))
xtr = self.minus_infinity
xtr = max(xtr, xtr_root)
zero_zero = (0, 0)
diff --git a/geo-replication/syncdaemon/repce.py b/geo-replication/syncdaemon/repce.py
index 0ac1449..8db7d9e 100644
--- a/geo-replication/syncdaemon/repce.py
+++ b/geo-replication/syncdaemon/repce.py
@@ -203,7 +203,7 @@ class RepceClient(object):
meth, *args, **{'cbk': lambda rj, res: rj.wakeup(res)})
exc, res = rjob.wait()
if exc:
- logging.error(lf('call failed on peer',
+ logging.error(lf('call failed',
call=repr(rjob),
method=meth,
error=str(type(res).__name__)))
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index 0ca023c..22aaf85 100644
--- a/geo-replication/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -834,8 +834,8 @@ class Server(object):
except OSError as e:
if e.errno == ENOTEMPTY:
logging.error(
- lf("Unable to delete directory"
- ", Both Old and New"
+ lf("Directory Rename failed. "
+ "Both Old and New"
" directories exists",
old=entry,
new=en))
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
index 2187ecd..269f301 100644
--- a/geo-replication/syncdaemon/syncdutils.py
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -321,7 +321,7 @@ def log_raise_exception(excont):
gconf.transport.terminate_geterr()
elif isinstance(exc, OSError) and exc.errno in (ENOTCONN,
ECONNABORTED):
- logging.error(lf('glusterfs session went down',
+ logging.error(lf('Gluster Mount process exited',
error=errorcode[exc.errno]))
else:
logtag = "FAIL"
--
1.8.3.1

View File

@ -0,0 +1,133 @@
From 85d0f78a989304cfe4ee74840f22bacf6685a659 Mon Sep 17 00:00:00 2001
From: Sunny Kumar <sunkumar@redhat.com>
Date: Tue, 2 Jan 2018 12:25:33 +0530
Subject: [PATCH 118/128] snapshot : after brick reset/replace snapshot
creation fails
Problem : after brick reset/replace snapshot creation fails
Solution : During brick reset/replace when we validate and aggrigate
dictionary data from another node it was rewriting
'mount_dir' value to NULL which is critical for snapshot
creation.
Upstream patch : https://review.gluster.org/c/19049/
>BUG: 1512451
>Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
Change-Id: Iabefbfcef7d8ac4cbd2a241e821c0e51492c093e
BUG: 1507394
Signed-off-by: Sunny Kumar <sunkumar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/126712
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 30 +++++++++++++++-------
xlators/mgmt/glusterd/src/glusterd-reset-brick.c | 27 ++++++++++++-------
xlators/mgmt/glusterd/src/glusterd-utils.c | 9 -------
3 files changed, 39 insertions(+), 27 deletions(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
index 0e28608..b11adf1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -309,16 +309,28 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr,
/* A bricks mount dir is required only by snapshots which were
* introduced in gluster-3.6.0
*/
- ret = glusterd_get_brick_mount_dir (dst_brickinfo->path,
- dst_brickinfo->hostname,
- dst_brickinfo->mount_dir);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
- "Failed to get brick mount_dir");
- goto out;
- }
+ if (!(gf_uuid_compare (dst_brickinfo->uuid, MY_UUID))) {
+ ret = glusterd_get_brick_mount_dir
+ (dst_brickinfo->path,
+ dst_brickinfo->hostname,
+ dst_brickinfo->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
+ "Failed to get brick mount_dir");
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc
+ (rsp_dict, "brick1.mount_dir",
+ dst_brickinfo->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set brick.mount_dir");
+ goto out;
+ }
+ }
ret = dict_set_int32 (rsp_dict, "brick_count", 1);
if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
index 10ee6f4..c1de043 100644
--- a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
@@ -197,15 +197,24 @@ glusterd_reset_brick_prevalidate (dict_t *dict, char **op_errstr,
}
- ret = glusterd_get_brick_mount_dir
- (dst_brickinfo->path,
- dst_brickinfo->hostname,
- dst_brickinfo->mount_dir);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
- "Failed to get brick mount_dir.");
- goto out;
+ if (!(gf_uuid_compare (dst_brickinfo->uuid, MY_UUID))) {
+ ret = glusterd_get_brick_mount_dir (dst_brickinfo->path,
+ dst_brickinfo->hostname,
+ dst_brickinfo->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
+ "Failed to get brick mount_dir");
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc (rsp_dict, "brick1.mount_dir",
+ dst_brickinfo->mount_dir);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set brick.mount_dir");
+ goto out;
+ }
}
ret = dict_set_int32 (rsp_dict, "brick_count", 1);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 4b53898..1b2cc43 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -13696,15 +13696,6 @@ glusterd_brick_op_prerequisites (dict_t *dict,
(*src_brickinfo)->port);
}
}
- /* setting mount_dir */
- ret = dict_set_dynstr_with_alloc (rsp_dict, "brick1.mount_dir",
- (*src_brickinfo)->mount_dir);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_DICT_SET_FAILED,
- "Failed to set brick1.mount_dir");
- goto out;
- }
v = *volinfo;
b = *src_brickinfo;
--
1.8.3.1

View File

@ -0,0 +1,77 @@
From a47d863ea4501d3d0daceacb194c9f900cefe1a7 Mon Sep 17 00:00:00 2001
From: Kotresh HR <khiremat@redhat.com>
Date: Mon, 13 Nov 2017 05:27:50 -0500
Subject: [PATCH 119/128] geo-rep: Fix data sync issue during hardlink, rename
Problem:
The data is not getting synced if master witnessed
IO as below.
1. echo "test_data" > f1
2. ln f1 f2
3. mv f2 f3
4. unlink f1
On master, 'f3' exists with data "test_data" but on
slave, only f3 exists with zero byte file without
backend gfid link.
Cause:
On master, since 'f2' no longer exists, the hardlink
is skipped during processing. Later, on trying to sync
rename, since source ('f2') doesn't exist, dst ('f3')
is created with same gfid. But in this use case, it
succeeds but backend gfid would not have linked as 'f1'
exists with the same gfid. So, rsync would fail with
ENOENT as backend gfid is not linked with 'f3' and 'f1'
is unlinked.
Fix:
On processing rename, if src doesn't exist on slave,
don't blindly create dst with same gfid. The gfid
needs to be checked, if it exists, hardlink needs
to be created instead of mknod.
Thanks Aravinda for helping in RCA :)
Upstream Reference:
> Patch: https://review.gluster.org/18731
> BUG: 1512483
Change-Id: I5af4f99798ed1bcb297598a4bc796b701d1e0130
BUG: 1512496
Signed-off-by: Kotresh HR <khiremat@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/126728
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
geo-replication/syncdaemon/resource.py | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index 22aaf85..5ad5b97 100644
--- a/geo-replication/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -814,8 +814,17 @@ class Server(object):
elif not matching_disk_gfid(gfid, en):
collect_failure(e, EEXIST, True)
else:
- (pg, bname) = entry2pb(en)
- blob = entry_pack_reg_stat(gfid, bname, e['stat'])
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ # don't create multiple entries with same gfid
+ if isinstance(st, int):
+ (pg, bname) = entry2pb(en)
+ blob = entry_pack_reg_stat(gfid, bname,
+ e['stat'])
+ else:
+ cmd_ret = errno_wrap(os.link, [slink, en],
+ [ENOENT, EEXIST], [ESTALE])
+ collect_failure(e, cmd_ret)
else:
st1 = lstat(en)
if isinstance(st1, int):
--
1.8.3.1

View File

@ -0,0 +1,225 @@
From 4bf98e63a481aea6143e8f404aa4650f7a80e317 Mon Sep 17 00:00:00 2001
From: Atin Mukherjee <amukherj@redhat.com>
Date: Wed, 3 Jan 2018 14:29:51 +0530
Subject: [PATCH 120/128] glusterd: connect to an existing brick process when
qourum status is NOT_APPLICABLE_QUORUM
First of all, this patch reverts commit 635c1c3 as the same is causing a
regression with bricks not coming up on time when a node is rebooted.
This patch tries to fix the problem in a different way by just trying to
connect to an existing running brick when quorum status is not
applicable.
> upstream patch : https://review.gluster.org/#/c/19134/
Change-Id: I0efb5901832824b1c15dcac529bffac85173e097
BUG: 1509102
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/126996
Tested-by: RHGS Build Bot <nigelb@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 2 +-
xlators/mgmt/glusterd/src/glusterd-handshake.c | 2 +-
xlators/mgmt/glusterd/src/glusterd-op-sm.c | 1 +
xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 3 ++-
xlators/mgmt/glusterd/src/glusterd-server-quorum.c | 27 ++++++++++++++++++----
xlators/mgmt/glusterd/src/glusterd-utils.c | 13 +++++++----
xlators/mgmt/glusterd/src/glusterd-utils.h | 3 ++-
xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 3 ++-
8 files changed, 40 insertions(+), 14 deletions(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index e88fa3f..416412e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -1554,7 +1554,7 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
}
}
ret = glusterd_brick_start (volinfo, brickinfo,
- _gf_true);
+ _gf_true, _gf_false);
if (ret)
goto out;
i++;
diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
index 35aeca3..3d1dfb2 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
@@ -658,7 +658,7 @@ glusterd_create_missed_snap (glusterd_missed_snap_info *missed_snapinfo,
}
brickinfo->snap_status = 0;
- ret = glusterd_brick_start (snap_vol, brickinfo, _gf_false);
+ ret = glusterd_brick_start (snap_vol, brickinfo, _gf_false, _gf_false);
if (ret) {
gf_msg (this->name, GF_LOG_WARNING, 0,
GD_MSG_BRICK_DISCONNECTED, "starting the "
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 86f18f0..b1a6e06 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -2437,6 +2437,7 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo)
pthread_mutex_lock (&brickinfo->restart_mutex);
{
ret = glusterd_brick_start (volinfo, brickinfo,
+ _gf_false,
_gf_false);
}
pthread_mutex_unlock (&brickinfo->restart_mutex);
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
index b11adf1..a037323 100644
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -429,7 +429,8 @@ glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo,
goto out;
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_brick_start (volinfo, new_brickinfo, _gf_false);
+ ret = glusterd_brick_start (volinfo, new_brickinfo, _gf_false,
+ _gf_false);
if (ret)
goto out;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
index 995a568..b01bfaa 100644
--- a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
+++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
@@ -314,6 +314,7 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo = NULL;
gd_quorum_status_t quorum_status = NOT_APPLICABLE_QUORUM;
gf_boolean_t follows_quorum = _gf_false;
+ gf_boolean_t quorum_status_unchanged = _gf_false;
if (volinfo->status != GLUSTERD_STATUS_STARTED) {
volinfo->quorum_status = NOT_APPLICABLE_QUORUM;
@@ -341,9 +342,10 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
* the bricks that are down are brought up again. In this process it
* also brings up the brick that is purposefully taken down.
*/
- if (quorum_status != NOT_APPLICABLE_QUORUM &&
- volinfo->quorum_status == quorum_status)
+ if (volinfo->quorum_status == quorum_status) {
+ quorum_status_unchanged = _gf_true;
goto out;
+ }
if (quorum_status == MEETS_QUORUM) {
gf_msg (this->name, GF_LOG_CRITICAL, 0,
@@ -368,9 +370,10 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
if (!brickinfo->start_triggered) {
pthread_mutex_lock (&brickinfo->restart_mutex);
{
- glusterd_brick_start (volinfo,
- brickinfo,
- _gf_false);
+ ret = glusterd_brick_start (volinfo,
+ brickinfo,
+ _gf_false,
+ _gf_false);
}
pthread_mutex_unlock (&brickinfo->restart_mutex);
}
@@ -392,6 +395,20 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
}
}
out:
+ if (quorum_status_unchanged) {
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (!glusterd_is_local_brick (this, volinfo, brickinfo))
+ continue;
+ ret = glusterd_brick_start (volinfo, brickinfo,
+ _gf_false, _gf_true);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_DISCONNECTED, "Failed to "
+ "connect to %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ }
+ }
+ }
return;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 1b2cc43..f1b365f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -5796,7 +5796,8 @@ glusterd_get_sock_from_brick_pid (int pid, char *sockpath, size_t len)
int
glusterd_brick_start (glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
- gf_boolean_t wait)
+ gf_boolean_t wait,
+ gf_boolean_t only_connect)
{
int ret = -1;
xlator_t *this = NULL;
@@ -5847,7 +5848,9 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
ret = 0;
goto out;
}
- brickinfo->start_triggered = _gf_true;
+ if (!only_connect)
+ brickinfo->start_triggered = _gf_true;
+
GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, conf);
if (gf_is_service_running (pidfile, &pid)) {
if (brickinfo->status != GF_BRICK_STARTING &&
@@ -5905,6 +5908,8 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
}
return 0;
}
+ if (only_connect)
+ return 0;
run:
ret = _mk_rundir_p (volinfo);
@@ -6032,7 +6037,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
{
glusterd_brick_start
(volinfo, brickinfo,
- _gf_false);
+ _gf_false, _gf_false);
}
pthread_mutex_unlock
(&brickinfo->restart_mutex);
@@ -6081,7 +6086,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
{
glusterd_brick_start
(volinfo, brickinfo,
- _gf_false);
+ _gf_false, _gf_false);
}
pthread_mutex_unlock
(&brickinfo->restart_mutex);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index abaec4b..9194da0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -277,7 +277,8 @@ glusterd_all_volume_cond_check (glusterd_condition_func func, int status,
int
glusterd_brick_start (glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
- gf_boolean_t wait);
+ gf_boolean_t wait,
+ gf_boolean_t only_connect);
int
glusterd_brick_stop (glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index de97e6a..414f9ba 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -2564,7 +2564,8 @@ glusterd_start_volume (glusterd_volinfo_t *volinfo, int flags,
if (flags & GF_CLI_FLAG_OP_FORCE) {
brickinfo->start_triggered = _gf_false;
}
- ret = glusterd_brick_start (volinfo, brickinfo, wait);
+ ret = glusterd_brick_start (volinfo, brickinfo, wait,
+ _gf_false);
/* If 'force' try to start all bricks regardless of success or
* failure
*/
--
1.8.3.1

View File

@ -0,0 +1,572 @@
From cd8c116ba97432f585408de509280a501816d3a5 Mon Sep 17 00:00:00 2001
From: Sunil Kumar Acharya <sheggodu@redhat.com>
Date: Thu, 23 Mar 2017 12:50:41 +0530
Subject: [PATCH 121/128] cluster/ec: OpenFD heal implementation for EC
Existing EC code doesn't try to heal the OpenFD to
avoid unnecessary healing of the data later.
Fix implements the healing of open FDs before
carrying out file operations on them by making an
attempt to open the FDs on required up nodes.
>BUG: 1431955
>Change-Id: Ib696f59c41ffd8d5678a484b23a00bb02764ed15
>Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
Upstream Patch: https://review.gluster.org/17077
3.13 Patch: https://review.gluster.org/19176
BUG: 1509810
Change-Id: Ib696f59c41ffd8d5678a484b23a00bb02764ed15
Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/127271
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Javier Hernandez Juan <jahernan@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
tests/basic/ec/ec-fix-openfd.t | 109 +++++++++++++++++++++++++++++++
tests/bugs/core/bug-908146.t | 12 +---
tests/volume.rc | 12 ++++
xlators/cluster/ec/src/ec-common.c | 113 +++++++++++++++++++++++++++++++++
xlators/cluster/ec/src/ec-common.h | 4 ++
xlators/cluster/ec/src/ec-dir-read.c | 8 ++-
xlators/cluster/ec/src/ec-dir-write.c | 1 +
xlators/cluster/ec/src/ec-helpers.c | 29 +++++----
xlators/cluster/ec/src/ec-inode-read.c | 3 +
xlators/cluster/ec/src/ec-types.h | 59 +++++++++++------
10 files changed, 307 insertions(+), 43 deletions(-)
create mode 100644 tests/basic/ec/ec-fix-openfd.t
diff --git a/tests/basic/ec/ec-fix-openfd.t b/tests/basic/ec/ec-fix-openfd.t
new file mode 100644
index 0000000..b62fbf4
--- /dev/null
+++ b/tests/basic/ec/ec-fix-openfd.t
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+# This test checks for open fd heal on EC
+
+#Create Volume
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 performance.read-after-open yes
+TEST $CLI volume set $V0 performance.lazy-open no
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Touch a file
+TEST touch "$M0/test_file"
+
+#Kill a brick
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Open the file in write mode
+TEST fd=`fd_available`
+TEST fd_open $fd 'rw' "$M0/test_file"
+
+#Bring up the killed brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Test the fd count
+EXPECT "0" get_fd_count $V0 $H0 $B0/${V0}0 test_file
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}1 test_file
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}2 test_file
+
+#Write to file
+dd iflag=fullblock if=/dev/random bs=1024 count=2 >&$fd 2>/dev/null
+
+#Test the fd count
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}0 test_file
+
+#Close fd
+TEST fd_close $fd
+
+#Stop the volume
+TEST $CLI volume stop $V0
+
+#Start the volume
+TEST $CLI volume start $V0
+
+#Kill brick1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum0=`get_md5_sum "$M0/test_file"`
+
+#Bring up the brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Kill brick2
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum1=`get_md5_sum "$M0/test_file"`
+
+#Bring up the brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Kill brick3
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum2=`get_md5_sum "$M0/test_file"`
+
+#compare the md5sum
+EXPECT "$md5sum0" echo $md5sum1
+EXPECT "$md5sum0" echo $md5sum2
+EXPECT "$md5sum1" echo $md5sum2
+
+cleanup
diff --git a/tests/bugs/core/bug-908146.t b/tests/bugs/core/bug-908146.t
index bf34992..327be6e 100755
--- a/tests/bugs/core/bug-908146.t
+++ b/tests/bugs/core/bug-908146.t
@@ -2,18 +2,8 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
-function get_fd_count {
- local vol=$1
- local host=$2
- local brick=$3
- local fname=$4
- local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
- local statedump=$(generate_brick_statedump $vol $host $brick)
- local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
- rm -f $statedump
- echo $count
-}
cleanup;
TEST glusterd
diff --git a/tests/volume.rc b/tests/volume.rc
index 1cee648..1ca17ab 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -796,3 +796,15 @@ function count_sh_entries()
{
ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l
}
+
+function get_fd_count {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ local fname=$4
+ local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
+ local statedump=$(generate_brick_statedump $vol $host $brick)
+ local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+ rm -f $statedump
+ echo $count
+}
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index f86ecf8..18ed274 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -25,6 +25,114 @@
EC_FLAG_WAITING_DATA_DIRTY |\
EC_FLAG_WAITING_METADATA_DIRTY)
+void
+ec_update_fd_status (fd_t *fd, xlator_t *xl, int idx,
+ int32_t ret_status)
+{
+ ec_fd_t *fd_ctx;
+
+ if (fd == NULL)
+ return;
+
+ LOCK (&fd->lock);
+ {
+ fd_ctx = __ec_fd_get(fd, xl);
+ if (fd_ctx) {
+ if (ret_status >= 0)
+ fd_ctx->fd_status[idx] = EC_FD_OPENED;
+ else
+ fd_ctx->fd_status[idx] = EC_FD_NOT_OPENED;
+ }
+ }
+ UNLOCK (&fd->lock);
+}
+
+static int
+ec_fd_ctx_need_open (fd_t *fd, xlator_t *this, uintptr_t *need_open)
+{
+ int i = 0;
+ int count = 0;
+ ec_t *ec = NULL;
+ ec_fd_t *fd_ctx = NULL;
+
+ ec = this->private;
+ *need_open = 0;
+
+ fd_ctx = ec_fd_get (fd, this);
+ if (!fd_ctx)
+ return count;
+
+ LOCK (&fd->lock);
+ {
+ for (i = 0; i < ec->nodes; i++) {
+ if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) &&
+ (ec->xl_up & (1<<i))) {
+ fd_ctx->fd_status[i] = EC_FD_OPENING;
+ *need_open |= (1<<i);
+ count++;
+ }
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ /* If fd needs to open on minimum number of nodes
+ * then ignore fixing the fd as it has been
+ * requested from heal operation.
+ */
+ if (count >= ec->fragments)
+ count = 0;
+
+ return count;
+}
+
+static gf_boolean_t
+ec_is_fd_fixable (fd_t *fd)
+{
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (gf_uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+static void
+ec_fix_open (ec_fop_data_t *fop)
+{
+ int call_count = 0;
+ uintptr_t need_open = 0;
+ int ret = 0;
+ loc_t loc = {0, };
+
+ if (!ec_is_fd_fixable (fop->fd))
+ goto out;
+
+ /* Evaluate how many remote fd's to be opened */
+ call_count = ec_fd_ctx_need_open (fop->fd, fop->xl, &need_open);
+ if (!call_count)
+ goto out;
+
+ loc.inode = inode_ref (fop->fd->inode);
+ gf_uuid_copy (loc.gfid, fop->fd->inode->gfid);
+ ret = loc_path (&loc, NULL);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (IA_IFDIR == fop->fd->inode->ia_type) {
+ ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE,
+ NULL, NULL, &fop->loc[0], fop->fd, NULL);
+ } else{
+ ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE,
+ NULL, NULL, &loc, fop->fd->flags, fop->fd, NULL);
+ }
+
+out:
+ loc_wipe (&loc);
+}
+
off_t
ec_range_end_get (off_t fl_start, size_t fl_size)
{
@@ -1647,6 +1755,11 @@ void ec_lock_acquired(ec_lock_link_t *link)
ec_lock_apply(link);
+ if (fop->use_fd &&
+ (link->update[EC_DATA_TXN] || link->update[EC_METADATA_TXN])) {
+ ec_fix_open(fop);
+ }
+
ec_lock_resume_shared(&list);
}
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index dec81ca..c0ad604 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -135,4 +135,8 @@ ec_heal_inspect (call_frame_t *frame, ec_t *ec,
ec_heal_need_t *need_heal);
int32_t
ec_get_heal_info (xlator_t *this, loc_t *loc, dict_t **dict);
+
+void
+ec_update_fd_status (fd_t *fd, xlator_t *xl,
+ int child_index, int32_t ret_status);
#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
index 48afe54..b44bb42 100644
--- a/xlators/cluster/ec/src/ec-dir-read.c
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -19,7 +19,11 @@
#include "ec-method.h"
#include "ec-fops.h"
-/* FOP: opendir */
+/****************************************************************
+ *
+ * File Operation: opendir
+ *
+ ***************************************************************/
int32_t ec_combine_opendir(ec_fop_data_t * fop, ec_cbk_data_t * dst,
ec_cbk_data_t * src)
@@ -88,6 +92,8 @@ int32_t ec_opendir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
ec_combine(cbk, ec_combine_opendir);
+
+ ec_update_fd_status (fd, this, idx, op_ret);
}
out:
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
index 150dc66..7779d48 100644
--- a/xlators/cluster/ec/src/ec-dir-write.c
+++ b/xlators/cluster/ec/src/ec-dir-write.c
@@ -71,6 +71,7 @@ ec_dir_write_cbk (call_frame_t *frame, xlator_t *this,
out:
if (cbk)
ec_combine (cbk, ec_combine_write);
+
if (fop)
ec_complete (fop);
return 0;
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
index 0c66948..d54340c 100644
--- a/xlators/cluster/ec/src/ec-helpers.c
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -751,27 +751,32 @@ ec_inode_t * ec_inode_get(inode_t * inode, xlator_t * xl)
ec_fd_t * __ec_fd_get(fd_t * fd, xlator_t * xl)
{
+ int i = 0;
ec_fd_t * ctx = NULL;
uint64_t value = 0;
+ ec_t *ec = xl->private;
- if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0))
- {
- ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_fd_t);
- if (ctx != NULL)
- {
+ if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0)) {
+ ctx = GF_MALLOC(sizeof(*ctx) + (sizeof (ec_fd_status_t) * ec->nodes),
+ ec_mt_ec_fd_t);
+ if (ctx != NULL) {
memset(ctx, 0, sizeof(*ctx));
- value = (uint64_t)(uintptr_t)ctx;
- if (__fd_ctx_set(fd, xl, value) != 0)
- {
- GF_FREE(ctx);
+ for (i = 0; i < ec->nodes; i++) {
+ if (fd_is_anonymous (fd)) {
+ ctx->fd_status[i] = EC_FD_OPENED;
+ } else {
+ ctx->fd_status[i] = EC_FD_NOT_OPENED;
+ }
+ }
+ value = (uint64_t)(uintptr_t)ctx;
+ if (__fd_ctx_set(fd, xl, value) != 0) {
+ GF_FREE (ctx);
return NULL;
}
}
- }
- else
- {
+ } else {
ctx = (ec_fd_t *)(uintptr_t)value;
}
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index 33fd7f5..24fcdb9 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -739,6 +739,9 @@ int32_t ec_open_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
}
ec_combine(cbk, ec_combine_open);
+
+ ec_update_fd_status (fd, this, idx, op_ret);
+
}
out:
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index a891ff5..3129586 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -124,6 +124,13 @@ enum _ec_heal_need {
EC_HEAL_MUST
};
+/* Enumartions to indicate FD status. */
+typedef enum {
+ EC_FD_NOT_OPENED,
+ EC_FD_OPENED,
+ EC_FD_OPENING
+} ec_fd_status_t;
+
struct _ec_config {
uint32_t version;
uint8_t algorithm;
@@ -137,6 +144,7 @@ struct _ec_fd {
loc_t loc;
uintptr_t open;
int32_t flags;
+ ec_fd_status_t fd_status[0];
};
struct _ec_inode {
@@ -263,17 +271,21 @@ struct _ec_lock_link {
off_t fl_end;
};
+/* EC xlator data structure to collect all the data required to perform
+ * the file operation.*/
struct _ec_fop_data {
- int32_t id;
+ int32_t id; /* ID of the file operation */
int32_t refs;
int32_t state;
- int32_t minimum;
+ int32_t minimum; /* Mininum number of successful
+ operation required to conclude a
+ fop as successful */
int32_t expected;
int32_t winds;
int32_t jobs;
int32_t error;
ec_fop_data_t *parent;
- xlator_t *xl;
+ xlator_t *xl; /* points to EC xlator */
call_frame_t *req_frame; /* frame of the calling xlator */
call_frame_t *frame; /* frame used by this fop */
struct list_head cbk_list; /* sorted list of groups of answers */
@@ -299,10 +311,10 @@ struct _ec_fop_data {
uid_t uid;
gid_t gid;
- ec_wind_f wind;
- ec_handler_f handler;
+ ec_wind_f wind; /* Function to wind to */
+ ec_handler_f handler; /* FOP manager function */
ec_resume_f resume;
- ec_cbk_t cbks;
+ ec_cbk_t cbks; /* Callback function for this FOP */
void *data;
ec_heal_t *heal;
struct list_head healer;
@@ -310,7 +322,8 @@ struct _ec_fop_data {
uint64_t user_size;
uint32_t head;
- int32_t use_fd;
+ int32_t use_fd; /* Indicates whether this FOP uses FD or
+ not */
dict_t *xdata;
dict_t *dict;
@@ -324,10 +337,12 @@ struct _ec_fop_data {
gf_xattrop_flags_t xattrop_flags;
dev_t dev;
inode_t *inode;
- fd_t *fd;
+ fd_t *fd; /* FD of the file on which FOP is
+ being carried upon */
struct iatt iatt;
char *str[2];
- loc_t loc[2];
+ loc_t loc[2]; /* Holds the location details for
+ the file */
struct gf_flock flock;
struct iovec *vector;
struct iobref *buffers;
@@ -555,18 +570,24 @@ struct _ec {
xlator_t *xl;
int32_t healers;
int32_t heal_waiters;
- int32_t nodes;
+ int32_t nodes; /* Total number of bricks(n) */
int32_t bits_for_nodes;
- int32_t fragments;
- int32_t redundancy;
- uint32_t fragment_size;
- uint32_t stripe_size;
- int32_t up;
+ int32_t fragments; /* Data bricks(k) */
+ int32_t redundancy; /* Redundant bricks(m) */
+ uint32_t fragment_size; /* Size of fragment/chunk on a
+ brick. */
+ uint32_t stripe_size; /* (fragment_size * fragments)
+ maximum size of user data
+ stored in one stripe. */
+ int32_t up; /* Represents whether EC volume is
+ up or not. */
uint32_t idx;
- uint32_t xl_up_count;
- uintptr_t xl_up;
- uint32_t xl_notify_count;
- uintptr_t xl_notify;
+ uint32_t xl_up_count; /* Number of UP bricks. */
+ uintptr_t xl_up; /* Bit flag representing UP
+ bricks */
+ uint32_t xl_notify_count; /* Number of notifications. */
+ uintptr_t xl_notify; /* Bit flag representing
+ notification for bricks. */
uintptr_t node_mask;
xlator_t **xl_list;
gf_lock_t lock;
--
1.8.3.1

View File

@ -0,0 +1,42 @@
From b13aa33a408b8ca85b306be9c8dbadaff4ed7c64 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Mon, 8 Jan 2018 11:53:17 +0530
Subject: [PATCH 122/128] tests: Use /dev/urandom instead of /dev/random for dd
If there's not enough entropy in the system then reading /dev/random would take
a significant time since it would take a long time for the /dev/random buffers
to get full as is desired in this dd run.
Milind found that this test file takes almost a 1000 seconds or more to pass
instead of just a minute because of this.
>BUG: 1431955
>Change-Id: I9145b17f77f09d0ab71816ae249c69b8fe14c1a5
>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Upstream Patch: https://review.gluster.org/#/c/19160/
BUG: 1509810
Change-Id: I9145b17f77f09d0ab71816ae249c69b8fe14c1a5
Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/127405
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
tests/basic/ec/ec-fix-openfd.t | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/basic/ec/ec-fix-openfd.t b/tests/basic/ec/ec-fix-openfd.t
index b62fbf4..c32f933 100644
--- a/tests/basic/ec/ec-fix-openfd.t
+++ b/tests/basic/ec/ec-fix-openfd.t
@@ -43,7 +43,7 @@ EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}1 test_file
EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}2 test_file
#Write to file
-dd iflag=fullblock if=/dev/random bs=1024 count=2 >&$fd 2>/dev/null
+dd iflag=fullblock if=/dev/urandom bs=1024 count=2 >&$fd 2>/dev/null
#Test the fd count
EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}0 test_file
--
1.8.3.1

View File

@ -0,0 +1,155 @@
From a9310d6e4c9990020ff76f5b815bc99b5703f17e Mon Sep 17 00:00:00 2001
From: Sanoj Unnikrishnan <sunnikri@redhat.com>
Date: Wed, 8 Nov 2017 16:18:56 +0530
Subject: [PATCH 123/128] quota: fixes issue in quota.conf when setting large
number of limits
Problem: It was not possible to configure more than 7712 quota limits.
This was because a stack buffer of size 131072 was used to read from
quota.conf file. In the new format of quota.conf file each gfid entry
takes 17bytes (16byte gfid + 1 byte type). So, the buf_size was not a
multiple of gfid entry size and as per code this was considered as
corruption.
Solution: make buf size a multiple of gfid entry size
> Change-Id: Id036225505a47a4f6fa515a572ee7b0c958f30ed
> BUG: 1510940
> Patch: https://review.gluster.org/#/c/18695/
BUG: 1511766
Change-Id: Id036225505a47a4f6fa515a572ee7b0c958f30ed
Signed-off-by: Sanoj Unnikrishnan <sunnikri@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124647
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-quota.c | 45 ++++++++++++++++++++++--------
1 file changed, 33 insertions(+), 12 deletions(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c
index fc34042..1c3a801 100644
--- a/xlators/mgmt/glusterd/src/glusterd-quota.c
+++ b/xlators/mgmt/glusterd/src/glusterd-quota.c
@@ -907,24 +907,31 @@ out:
}
/* The function glusterd_copy_to_tmp_file() reads the "remaining" bytes from
- * the source fd and writes them to destination fd, at the rate of 128K bytes
- * of read+write at a time.
+ * the source fd and writes them to destination fd, at the rate of 1000 entries
+ * a time (qconf_line_sz is the size of an entry)
*/
static int
-glusterd_copy_to_tmp_file (int src_fd, int dst_fd)
+glusterd_copy_to_tmp_file (int src_fd, int dst_fd, int qconf_line_sz)
{
int ret = 0;
- size_t entry_sz = 131072;
ssize_t bytes_read = 0;
- unsigned char buf[131072] = {0,};
xlator_t *this = NULL;
+ unsigned char *buf = 0;
+ int buf_sz = qconf_line_sz * 1000;
this = THIS;
GF_ASSERT (this);
+ GF_ASSERT (buf_sz > 0);
- while ((bytes_read = sys_read (src_fd, (void *)&buf, entry_sz)) > 0) {
- if (bytes_read % 16 != 0) {
+ buf = GF_CALLOC(buf_sz, 1, gf_common_mt_char);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+
+ while ((bytes_read = sys_read (src_fd, buf, buf_sz)) > 0) {
+ if (bytes_read % qconf_line_sz != 0) {
gf_msg (this->name, GF_LOG_ERROR, 0,
GD_MSG_QUOTA_CONF_CORRUPT, "quota.conf "
"corrupted");
@@ -942,6 +949,8 @@ glusterd_copy_to_tmp_file (int src_fd, int dst_fd)
ret = 0;
out:
+ if (buf)
+ GF_FREE(buf);
return ret;
}
@@ -1034,7 +1043,6 @@ glusterd_store_quota_config (glusterd_volinfo_t *volinfo, char *path,
int conf_fd = -1;
ssize_t bytes_read = 0;
size_t bytes_to_write = 0;
- unsigned char buf[131072] = {0,};
uuid_t gfid = {0,};
xlator_t *this = NULL;
gf_boolean_t found = _gf_false;
@@ -1045,6 +1053,8 @@ glusterd_store_quota_config (glusterd_volinfo_t *volinfo, char *path,
float version = 0.0f;
char type = 0;
int quota_conf_line_sz = 16;
+ unsigned char *buf = 0;
+ int buf_sz = 0;
this = THIS;
GF_ASSERT (this);
@@ -1098,6 +1108,14 @@ glusterd_store_quota_config (glusterd_volinfo_t *volinfo, char *path,
if (conf->op_version >= GD_OP_VERSION_3_7_0)
quota_conf_line_sz++;
+ buf_sz = quota_conf_line_sz * 1000;
+
+ buf = GF_CALLOC(buf_sz, 1, gf_common_mt_char);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+
fd = gf_store_mkstemp (volinfo->quota_conf_shandle);
if (fd < 0) {
ret = -1;
@@ -1129,7 +1147,7 @@ glusterd_store_quota_config (glusterd_volinfo_t *volinfo, char *path,
type = GF_QUOTA_CONF_TYPE_USAGE;
for (;;) {
- bytes_read = sys_read (conf_fd, (void *)&buf, sizeof (buf));
+ bytes_read = sys_read (conf_fd, buf, buf_sz);
if (bytes_read <= 0) {
/*The flag @is_first_read is TRUE when the loop is
* entered, and is set to false if the first read
@@ -1166,7 +1184,8 @@ glusterd_store_quota_config (glusterd_volinfo_t *volinfo, char *path,
* Else continue with the search.
*/
if (found) {
- ret = glusterd_copy_to_tmp_file (conf_fd, fd);
+ ret = glusterd_copy_to_tmp_file (conf_fd, fd,
+ quota_conf_line_sz);
if (ret)
goto out;
break;
@@ -1238,6 +1257,9 @@ out:
sys_close (conf_fd);
}
+ if (buf)
+ GF_FREE(buf);
+
if (ret && (fd > 0)) {
gf_store_unlink_tmppath (volinfo->quota_conf_shandle);
} else if (!ret && GF_QUOTA_OPTION_TYPE_UPGRADE != opcode) {
@@ -1260,8 +1282,7 @@ out:
"store quota version and cksum");
}
}
-
- return ret;
+ return ret;
}
int32_t
--
1.8.3.1

View File

@ -0,0 +1,48 @@
From 319f93e9f83681d754e76db104b63a4a5200aad7 Mon Sep 17 00:00:00 2001
From: Milind Changire <mchangir@redhat.com>
Date: Wed, 20 Dec 2017 14:02:17 +0530
Subject: [PATCH 124/128] build: remove ExclusiveArch from spec file
ExclusiveArch directive limits builds for specific architectures.
This gets in the way of building for non-x86_64 architectures.
So, removing the directive keeps the sources open to be built
for non-x86_64 architectures as well.
Label: DOWNSTREAM ONLY
Change-Id: Id178e30658b03a48acfa97c0e7556c1f5a8b533c
BUG: 1534253
Signed-off-by: Milind Changire <mchangir@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/126251
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
glusterfs.spec.in | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 56a62a9..2ac9a60b0 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -193,7 +193,6 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist}
Name: @PACKAGE_NAME@
Version: @PACKAGE_VERSION@
Release: @PACKAGE_RELEASE@%{?dist}
-ExclusiveArch: x86_64 aarch64
%endif
License: GPLv2 or LGPLv3+
Group: System Environment/Base
@@ -2180,6 +2179,9 @@ fi
%endif
%changelog
+* Wed Dec 20 2017 Milind Changire <mchangir@redhat.com>
+- Remove ExclusiveArch directive to help building on non-x86_64 arches (#1527772)
+
* Fri Dec 01 2017 Mohit Agrawal <moagrawa@redhat.com>
- Added control-cpu-load.sh and control-mem.sh scripts to glusterfs-server section(#1484446)
--
1.8.3.1

View File

@ -0,0 +1,903 @@
From 3a4682ccd935744a0c5346bae23658ff08d65343 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Mon, 15 Jan 2018 12:48:54 +0530
Subject: [PATCH 125/128] cluster/afr: Fixing the flaws in arbiter becoming
source patch
Problem:
Setting the write_subvol value to read_subvol in case of metadata
transaction during pre-op (commit 19f9bcff4aada589d4321356c2670ed283f02c03)
might lead to the original problem of arbiter becoming source.
Scenario:
1) All bricks are up and good
2) 2 writes w1 and w2 are in progress in parallel
3) ctx->read_subvol is good for all the subvolumes
4) w1 succeeds on brick0 and fails on brick1, yet to do post-op on
the disk
5) read/lookup comes on the same file and refreshes read_subvols back
to all good
6) metadata transaction happens which makes ctx->write_subvol to be
assigned with ctx->read_subvol which is all good
7) w2 succeeds on brick1 and fails on brick0 and this will update the
brick in reverse order leading to arbiter becoming source
Fix:
Instead of setting the ctx->write_subvol to ctx->read_subvol in the
pre-op statge, if there is a metadata transaction, check in the
function __afr_set_in_flight_sb_status() if it is a data/metadata
transaction. Use the value of ctx->write_subvol if it is a data
transactions and ctx->read_subvol value for other transactions.
With this patch we assign the value of ctx->write_subvol in the
afr_transaction_perform_fop() with the on disk value, instead of
assigning it in the afr_changelog_pre_op() with the in memory value.
Upstream Patch: https://review.gluster.org/#/c/19045/
> Change-Id: Id2025a7e965f0578af35b1abaac793b019c43cc4
> BUG: 1482064
> Signed-off-by: karthik-us <ksubrahm@redhat.com>
Change-Id: Ie5d6745703fa5024d27e413093f7dfd08992e1df
BUG: 1401969
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/127644
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
Tested-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
---
xlators/cluster/afr/src/afr-common.c | 266 +++++++++++++++++-------------
xlators/cluster/afr/src/afr-dir-write.c | 16 +-
xlators/cluster/afr/src/afr-inode-write.c | 57 +++++--
xlators/cluster/afr/src/afr-lk-common.c | 42 +++--
xlators/cluster/afr/src/afr-messages.h | 9 +-
xlators/cluster/afr/src/afr-transaction.c | 45 ++---
xlators/cluster/afr/src/afr.h | 22 ++-
7 files changed, 277 insertions(+), 180 deletions(-)
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 692f198..6e6f5fa 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -150,6 +150,7 @@ __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)
tmp_ctx->spb_choice = -1;
tmp_ctx->read_subvol = 0;
tmp_ctx->write_subvol = 0;
+ tmp_ctx->lock_count = 0;
} else {
tmp_ctx = (afr_inode_ctx_t *) ctx_int;
}
@@ -195,7 +196,6 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
inode_t *inode)
{
int i = 0;
- int ret = -1;
int txn_type = 0;
int count = 0;
int index = -1;
@@ -208,16 +208,14 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
uint32_t event = 0;
uint64_t val = 0;
afr_private_t *priv = NULL;
- afr_inode_ctx_t *ctx = NULL;
priv = this->private;
txn_type = local->transaction.type;
- ret = __afr_inode_ctx_get (this, inode, &ctx);
- if (ret < 0)
- return ret;
-
- val = ctx->write_subvol;
+ if (txn_type == AFR_DATA_TRANSACTION)
+ val = local->inode_ctx->write_subvol;
+ else
+ val = local->inode_ctx->read_subvol;
metadatamap_old = metadatamap = (val & 0x000000000000ffff);
datamap_old = datamap = (val & 0x00000000ffff0000) >> 16;
@@ -278,10 +276,11 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
(((uint64_t) datamap) << 16) |
(((uint64_t) event) << 32);
- ctx->write_subvol = val;
- ctx->read_subvol = val;
+ if (txn_type == AFR_DATA_TRANSACTION)
+ local->inode_ctx->write_subvol = val;
+ local->inode_ctx->read_subvol = val;
- return ret;
+ return 0;
}
gf_boolean_t
@@ -1001,6 +1000,81 @@ afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,
}
int
+afr_readables_fill (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *data_accused,
+ unsigned char *metadata_accused,
+ unsigned char *data_readable,
+ unsigned char *metadata_readable,
+ struct afr_reply *replies)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ int i = 0;
+ int ret = 0;
+ ia_type_t ia_type = IA_INVAL;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ data_readable[i] = 1;
+ metadata_readable[i] = 1;
+ }
+ if (AFR_IS_ARBITER_BRICK (priv, ARBITER_BRICK_INDEX)) {
+ data_readable[ARBITER_BRICK_INDEX] = 0;
+ metadata_readable[ARBITER_BRICK_INDEX] = 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies) {/* Lookup */
+ if (!replies[i].valid || replies[i].op_ret == -1 ||
+ (replies[i].xdata && dict_get (replies[i].xdata,
+ GLUSTERFS_BAD_INODE))) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
+
+ xdata = replies[i].xdata;
+ ia_type = replies[i].poststat.ia_type;
+ } else {/* pre-op xattrop */
+ xdata = local->transaction.pre_op_xdata[i];
+ ia_type = inode->ia_type;
+ }
+
+ afr_accused_fill (this, xdata, data_accused,
+ (ia_type == IA_IFDIR) ?
+ AFR_ENTRY_TRANSACTION : AFR_DATA_TRANSACTION);
+
+ afr_accused_fill (this, xdata,
+ metadata_accused, AFR_METADATA_TRANSACTION);
+ }
+
+ if (replies && ia_type != IA_INVAL && ia_type != IA_IFDIR &&
+ /* We want to accuse small files only when we know for
+ * sure that there is no IO happening. Otherwise, the
+ * ia_sizes obtained in post-refresh replies may
+ * mismatch due to a race between inode-refresh and
+ * ongoing writes, causing spurious heal launches*/
+ !afr_is_possibly_under_txn (AFR_DATA_TRANSACTION, local, this)) {
+ afr_accuse_smallfiles (this, replies, data_accused);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i]) {
+ data_readable[i] = 0;
+ ret = 1;
+ }
+ if (metadata_accused[i]) {
+ metadata_readable[i] = 0;
+ ret = 1;
+ }
+ }
+ return ret;
+}
+
+int
afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
gf_boolean_t *start_heal)
{
@@ -1025,62 +1099,9 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
metadata_accused = alloca0 (priv->child_count);
metadata_readable = alloca0 (priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- data_readable[i] = 1;
- metadata_readable[i] = 1;
- }
- if (AFR_IS_ARBITER_BRICK (priv, ARBITER_BRICK_INDEX)) {
- data_readable[ARBITER_BRICK_INDEX] = 0;
- metadata_readable[ARBITER_BRICK_INDEX] = 0;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid) {
- data_readable[i] = 0;
- metadata_readable[i] = 0;
- continue;
- }
-
- if (replies[i].op_ret == -1) {
- data_readable[i] = 0;
- metadata_readable[i] = 0;
- continue;
- }
-
- if (replies[i].xdata &&
- dict_get (replies[i].xdata, GLUSTERFS_BAD_INODE)) {
- data_readable[i] = 0;
- metadata_readable[i] = 0;
- continue;
- }
-
- afr_accused_fill (this, replies[i].xdata, data_accused,
- (replies[i].poststat.ia_type == IA_IFDIR) ?
- AFR_ENTRY_TRANSACTION : AFR_DATA_TRANSACTION);
-
- afr_accused_fill (this, replies[i].xdata,
- metadata_accused, AFR_METADATA_TRANSACTION);
-
- }
-
- if ((inode->ia_type != IA_IFDIR) &&
- /* We want to accuse small files only when we know for sure that
- * there is no IO happening. Otherwise, the ia_sizes obtained in
- * post-refresh replies may mismatch due to a race between inode-
- * refresh and ongoing writes, causing spurious heal launches*/
- !afr_is_possibly_under_txn (AFR_DATA_TRANSACTION, local, this))
- afr_accuse_smallfiles (this, replies, data_accused);
-
- for (i = 0; i < priv->child_count; i++) {
- if (data_accused[i]) {
- data_readable[i] = 0;
- ret = 1;
- }
- if (metadata_accused[i]) {
- metadata_readable[i] = 0;
- ret = 1;
- }
- }
+ ret = afr_readables_fill (frame, this, inode, data_accused,
+ metadata_accused, data_readable,
+ metadata_readable, replies);
for (i = 0; i < priv->child_count; i++) {
if (start_heal && priv->child_up[i] &&
@@ -5510,13 +5531,13 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (!local->transaction.pre_op)
goto out;
- if (priv->arbiter_count == 1) {
- local->transaction.pre_op_xdata =
- GF_CALLOC (sizeof (*local->transaction.pre_op_xdata),
- priv->child_count, gf_afr_mt_dict_t);
- if (!local->transaction.pre_op_xdata)
- goto out;
+ local->transaction.pre_op_xdata =
+ GF_CALLOC (sizeof (*local->transaction.pre_op_xdata),
+ priv->child_count, gf_afr_mt_dict_t);
+ if (!local->transaction.pre_op_xdata)
+ goto out;
+ if (priv->arbiter_count == 1) {
local->transaction.pre_op_sources =
GF_CALLOC (sizeof (*local->transaction.pre_op_sources),
priv->child_count, gf_afr_mt_char);
@@ -6489,42 +6510,45 @@ int
afr_write_subvol_set (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
- afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *data_accused = NULL;
+ unsigned char *metadata_accused = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
uint64_t val = 0;
- uint64_t val1 = 0;
- int ret = -1;
+ int event = 0;
+ int i = 0;
local = frame->local;
+ priv = this->private;
+ data_accused = alloca0 (priv->child_count);
+ metadata_accused = alloca0 (priv->child_count);
+ data_readable = alloca0 (priv->child_count);
+ metadata_readable = alloca0 (priv->child_count);
+ event = local->event_generation;
+
+ afr_readables_fill (frame, this, local->inode, data_accused,
+ metadata_accused, data_readable, metadata_readable,
+ NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_readable[i])
+ datamap |= (1 << i);
+ if (metadata_readable[i])
+ metadatamap |= (1 << i);
+ }
+
+ val = ((uint64_t) metadatamap) |
+ (((uint64_t) datamap) << 16) |
+ (((uint64_t) event) << 32);
+
LOCK(&local->inode->lock);
{
- ret = __afr_inode_ctx_get (this, local->inode, &ctx);
- if (ret < 0) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- AFR_MSG_DICT_GET_FAILED,
- "ERROR GETTING INODE CTX");
- UNLOCK(&local->inode->lock);
- return ret;
- }
-
- val = ctx->write_subvol;
- /*
- * We need to set the value of write_subvol to read_subvol in 2
- * cases:
- * 1. Initially when the value is 0. i.e., it's the first lock
- * request.
- * 2. If it's a metadata transaction. If metadata transactions
- * comes in between data transactions and we have a brick
- * disconnect, the next metadata transaction won't get the
- * latest value of readables, since we do resetting of
- * write_subvol in unlock code path only if it's a data
- * transaction. To handle those scenarios we need to set the
- * value of write_subvol to read_subvol in case of metadata
- * transactions.
- */
- if (val == 0 ||
- local->transaction.type == AFR_METADATA_TRANSACTION) {
- val1 = ctx->read_subvol;
- ctx->write_subvol = val1;
+ if (local->inode_ctx->write_subvol == 0 &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ local->inode_ctx->write_subvol = val;
}
}
UNLOCK (&local->inode->lock);
@@ -6536,23 +6560,37 @@ int
afr_write_subvol_reset (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
- afr_inode_ctx_t *ctx = NULL;
- int ret = -1;
local = frame->local;
LOCK(&local->inode->lock);
{
- ret = __afr_inode_ctx_get (this, local->inode, &ctx);
- if (ret < 0) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- AFR_MSG_DICT_GET_FAILED,
- "ERROR GETTING INODE CTX");
- UNLOCK(&local->inode->lock);
- return ret;
- }
- ctx->write_subvol = 0;
+ local->inode_ctx->lock_count--;
+
+ if (!local->inode_ctx->lock_count)
+ local->inode_ctx->write_subvol = 0;
}
UNLOCK(&local->inode->lock);
return 0;
}
+
+int
+afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode)
+{
+ int ret = 0;
+
+ local->inode = inode_ref (inode);
+ LOCK(&local->inode->lock);
+ {
+ ret = __afr_inode_ctx_get (this, local->inode,
+ &local->inode_ctx);
+ }
+ UNLOCK (&local->inode->lock);
+ if (ret < 0) {
+ gf_msg_callingfn (this->name, GF_LOG_ERROR, ENOMEM,
+ AFR_MSG_INODE_CTX_GET_FAILED,
+ "Error getting inode ctx %s",
+ uuid_utoa (local->inode->gfid));
+ }
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 9099b8c..e088ed6 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -477,7 +477,7 @@ afr_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
if (!local->fd_ctx)
goto out;
- local->inode = inode_ref (loc->inode);
+ local->inode = inode_ref (loc->inode);
local->parent = inode_ref (loc->parent);
local->op = GF_FOP_CREATE;
@@ -609,7 +609,7 @@ afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
goto out;
loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ local->inode = inode_ref (loc->inode);
local->parent = inode_ref (loc->parent);
local->op = GF_FOP_MKNOD;
@@ -740,7 +740,7 @@ afr_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
goto out;
loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ local->inode = inode_ref (loc->inode);
local->parent = inode_ref (loc->parent);
local->cont.mkdir.mode = mode;
@@ -877,7 +877,7 @@ afr_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
- local->inode = inode_ref (oldloc->inode);
+ local->inode = inode_ref (oldloc->inode);
local->parent = inode_ref (newloc->parent);
if (xdata)
@@ -1005,7 +1005,7 @@ afr_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
goto out;
loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ local->inode = inode_ref (loc->inode);
local->parent = inode_ref (loc->parent);
local->cont.symlink.linkpath = gf_strdup (linkpath);
@@ -1142,7 +1142,7 @@ afr_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
- local->inode = inode_ref (oldloc->inode);
+ local->inode = inode_ref (oldloc->inode);
local->parent = inode_ref (oldloc->parent);
local->parent2 = inode_ref (newloc->parent);
@@ -1295,7 +1295,7 @@ afr_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
loc_copy (&local->loc, loc);
local->xflag = xflag;
- local->inode = inode_ref (loc->inode);
+ local->inode = inode_ref (loc->inode);
local->parent = inode_ref (loc->parent);
if (xdata)
@@ -1421,7 +1421,7 @@ afr_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ local->inode = inode_ref (loc->inode);
local->parent = inode_ref (loc->parent);
local->cont.rmdir.flags = flags;
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 97397f9..f0231b7 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -507,6 +507,7 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
{
afr_local_t *local = NULL;
int op_errno = ENOMEM;
+ int ret = -1;
local = AFR_FRAME_INIT (frame, op_errno);
if (!local)
@@ -529,7 +530,9 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto out;
local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ ret = afr_set_inode_local (this, local, fd->inode);
+ if (ret)
+ goto out;
if (dict_set_uint32 (local->xdata_req, GLUSTERFS_OPEN_FD_COUNT, 4)) {
op_errno = ENOMEM;
@@ -654,7 +657,9 @@ afr_truncate (call_frame_t *frame, xlator_t *this,
local->transaction.unwind = afr_truncate_unwind;
loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ ret = afr_set_inode_local (this, local, loc->inode);
+ if (ret)
+ goto out;
local->op = GF_FOP_TRUNCATE;
@@ -768,7 +773,9 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
goto out;
local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ ret = afr_set_inode_local (this, local, fd->inode);
+ if (ret)
+ goto out;
local->op = GF_FOP_FTRUNCATE;
@@ -886,7 +893,9 @@ afr_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
local->transaction.unwind = afr_setattr_unwind;
loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ ret = afr_set_inode_local (this, local, loc->inode);
+ if (ret)
+ goto out;
local->op = GF_FOP_SETATTR;
@@ -991,7 +1000,9 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
local->transaction.unwind = afr_fsetattr_unwind;
local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ ret = afr_set_inode_local (this, local, fd->inode);
+ if (ret)
+ goto out;
local->op = GF_FOP_FSETATTR;
@@ -1633,7 +1644,9 @@ afr_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
local->transaction.unwind = afr_setxattr_unwind;
loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ ret = afr_set_inode_local (this, local, loc->inode);
+ if (ret)
+ goto out;
local->transaction.main_frame = frame;
local->transaction.start = LLONG_MAX - 1;
@@ -1745,7 +1758,9 @@ afr_fsetxattr (call_frame_t *frame, xlator_t *this,
local->transaction.unwind = afr_fsetxattr_unwind;
local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ ret = afr_set_inode_local (this, local, fd->inode);
+ if (ret)
+ goto out;
local->op = GF_FOP_FSETXATTR;
@@ -1858,7 +1873,9 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,
local->transaction.unwind = afr_removexattr_unwind;
loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ ret = afr_set_inode_local (this, local, loc->inode);
+ if (ret)
+ goto out;
local->op = GF_FOP_REMOVEXATTR;
@@ -1965,7 +1982,9 @@ afr_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
local->transaction.unwind = afr_fremovexattr_unwind;
local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ ret = afr_set_inode_local (this, local, fd->inode);
+ if (ret)
+ goto out;
local->op = GF_FOP_FREMOVEXATTR;
@@ -2060,7 +2079,9 @@ afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
local->cont.fallocate.len = len;
local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ ret = afr_set_inode_local (this, local, fd->inode);
+ if (ret)
+ goto out;
if (xdata)
local->xdata_req = dict_copy_with_ref (xdata, NULL);
@@ -2172,7 +2193,9 @@ afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
local->cont.discard.len = len;
local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ ret = afr_set_inode_local (this, local, fd->inode);
+ if (ret)
+ goto out;
if (xdata)
local->xdata_req = dict_copy_with_ref (xdata, NULL);
@@ -2281,7 +2304,9 @@ afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
local->cont.zerofill.len = len;
local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ ret = afr_set_inode_local (this, local, fd->inode);
+ if (ret)
+ goto out;
if (xdata)
local->xdata_req = dict_copy_with_ref (xdata, NULL);
@@ -2393,7 +2418,9 @@ afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
local->transaction.unwind = afr_xattrop_unwind;
loc_copy (&local->loc, loc);
- local->inode = inode_ref (loc->inode);
+ ret = afr_set_inode_local (this, local, loc->inode);
+ if (ret)
+ goto out;
local->op = GF_FOP_XATTROP;
@@ -2487,7 +2514,9 @@ afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
local->transaction.unwind = afr_fxattrop_unwind;
local->fd = fd_ref (fd);
- local->inode = inode_ref (fd->inode);
+ ret = afr_set_inode_local (this, local, fd->inode);
+ if (ret)
+ goto out;
local->op = GF_FOP_FXATTROP;
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index c17f60f..f50c7b6 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -615,14 +615,14 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- afr_private_t *priv = NULL;
int call_count = 0;
int ret = 0;
local = frame->local;
int_lock = &local->internal_lock;
- priv = this->private;
+
+ if (local->transaction.type == AFR_DATA_TRANSACTION && op_ret != 1)
+ ret = afr_write_subvol_reset (frame, this);
LOCK (&frame->lock);
{
@@ -633,11 +633,6 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (call_count == 0) {
gf_msg_trace (this->name, 0,
"All internal locks unlocked");
- if (local->fd) {
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (0 == AFR_COUNT (fd_ctx->lock_acquired, priv->child_count))
- ret = afr_write_subvol_reset (frame, this);
- }
int_lock->lock_cbk (frame, this);
}
@@ -947,6 +942,15 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
} else {
int_lock->locked_nodes[child_index] |= LOCKED_YES;
int_lock->lock_count++;
+
+ if (local->transaction.type ==
+ AFR_DATA_TRANSACTION) {
+ LOCK(&local->inode->lock);
+ {
+ local->inode_ctx->lock_count++;
+ }
+ UNLOCK (&local->inode->lock);
+ }
}
}
afr_lock_blocking (frame, this, cky + 1);
@@ -1502,13 +1506,12 @@ int32_t
afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_inodelk_t *inodelk = NULL;
- afr_local_t *local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
- afr_fd_ctx_t *fd_ctx = NULL;
-
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
local = frame->local;
int_lock = &local->internal_lock;
@@ -1553,6 +1556,15 @@ afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
fd_ctx->lock_acquired[child_index]++;
}
}
+
+ if (local->transaction.type == AFR_DATA_TRANSACTION &&
+ op_ret == 0) {
+ LOCK(&local->inode->lock);
+ {
+ local->inode_ctx->lock_count++;
+ }
+ UNLOCK (&local->inode->lock);
+ }
}
call_count = --int_lock->lk_call_count;
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
index 02eb206..53ffcd8 100644
--- a/xlators/cluster/afr/src/afr-messages.h
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -40,7 +40,7 @@
*/
#define GLFS_COMP_BASE_AFR GLFS_MSGID_COMP_AFR
-#define GLFS_NUM_MESSAGES 42
+#define GLFS_NUM_MESSAGES 43
#define GLFS_MSGID_END (GLFS_COMP_BASE_AFR + GLFS_NUM_MESSAGES + 1)
#define glfs_msg_start_x GLFS_COMP_BASE_AFR, "Invalid: Start of messages"
@@ -369,5 +369,12 @@
*/
#define AFR_MSG_SBRAIN_FAV_CHILD_POLICY (GLFS_COMP_BASE_AFR + 42)
+/*!
+ * @messageid 108043
+ * @diagnosis
+ * @recommendation
+*/
+#define AFR_MSG_INODE_CTX_GET_FAILED (GLFS_COMP_BASE_AFR + 43)
+
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
#endif /* !_AFR_MESSAGES_H_ */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index a04636f..7e40bba 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -372,14 +372,27 @@ afr_txn_arbitrate_fop (call_frame_t *frame, xlator_t *this)
int
afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ int i = 0;
+ int ret = 0;
local = frame->local;
priv = this->private;
fd = local->fd;
+ if (local->transaction.type == AFR_DATA_TRANSACTION &&
+ !local->transaction.inherited) {
+ ret = afr_write_subvol_set (frame, this);
+ if (ret) {
+ /*act as if operation failed on all subvols*/
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ for (i = 0; i < priv->child_count; i++)
+ local->transaction.failed_subvols[i] = 1;
+ }
+ }
/* Perform fops with the lk-owner from top xlator.
* Eg: lk-owner of posix-lk and flush should be same,
* flush cant clear the posix-lks without that lk-owner.
@@ -1116,32 +1129,28 @@ unlock:
int
afr_changelog_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int call_count = -1;
int child_index = -1;
local = frame->local;
- priv = this->private;
child_index = (long) cookie;
- if (op_ret == -1) {
+ if (op_ret == -1) {
local->op_errno = op_errno;
- afr_transaction_fop_failed (frame, this, child_index);
+ afr_transaction_fop_failed (frame, this, child_index);
}
- if (priv->arbiter_count == 1 && !op_ret) {
- if (xattr)
- local->transaction.pre_op_xdata[child_index] =
- dict_ref (xattr);
- }
+ if (xattr)
+ local->transaction.pre_op_xdata[child_index] = dict_ref (xattr);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0)
- local->transaction.changelog_resume (frame, this);
+ if (call_count == 0) {
+ local->transaction.changelog_resume (frame, this);
+ }
return 0;
}
@@ -1750,10 +1759,6 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
if (pre_nop)
goto next;
- ret = afr_write_subvol_set (frame, this);
- if (ret)
- goto err;
-
if (!local->pre_op_compat) {
dict_copy (xdata_req, local->xdata_req);
goto next;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 0a06eb6..96fefb1 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -377,6 +377,16 @@ typedef enum {
AFR_FOP_LOCK_QUORUM_FAILED,
} afr_fop_lock_state_t;
+typedef struct _afr_inode_ctx {
+ uint64_t read_subvol;
+ uint64_t write_subvol;
+ int lock_count;
+ int spb_choice;
+ gf_timer_t *timer;
+ gf_boolean_t need_refresh;
+} afr_inode_ctx_t;
+
+
typedef struct _afr_local {
glusterfs_fop_t op;
unsigned int call_count;
@@ -833,17 +843,10 @@ typedef struct _afr_local {
compound_args_t *c_args;
gf_boolean_t is_read_txn;
+ afr_inode_ctx_t *inode_ctx;
} afr_local_t;
-typedef struct _afr_inode_ctx {
- uint64_t read_subvol;
- uint64_t write_subvol;
- int spb_choice;
- gf_timer_t *timer;
- gf_boolean_t need_refresh;
-} afr_inode_ctx_t;
-
typedef struct afr_spbc_timeout {
call_frame_t *frame;
gf_boolean_t d_spb;
@@ -1274,6 +1277,9 @@ afr_write_subvol_set (call_frame_t *frame, xlator_t *this);
int
afr_write_subvol_reset (call_frame_t *frame, xlator_t *this);
+int
+afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode);
+
gf_boolean_t
afr_is_symmetric_error (call_frame_t *frame, xlator_t *this);
#endif /* __AFR_H__ */
--
1.8.3.1

View File

@ -0,0 +1,46 @@
From 80c028452000465c607cce054211075d44634996 Mon Sep 17 00:00:00 2001
From: moagrawa <moagrawa@redhat.com>
Date: Mon, 15 Jan 2018 18:21:27 +0530
Subject: [PATCH 126/128] spec: unpackaged files found for RHEL-7 client build
Problem: unpackages files found for RHEL-7 client build
Solution: Update glusterfs.specs.in to exclude unpackage files
Label: DOWNSTREAM ONLY
BUG: 1534530
Change-Id: I211a739fb5ce18011bc75fb36c6112cc802fccc9
Signed-off-by: moagrawa <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/127681
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
glusterfs.spec.in | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 2ac9a60b0..cb273f5 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -1219,6 +1219,8 @@ exit 0
%exclude %{_libdir}/pkgconfig/libgfdb.pc
%exclude %{_sbindir}/gluster-setgfid2path
%exclude %{_mandir}/man8/gluster-setgfid2path.8*
+%exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh
+%exclude %{_datadir}/glusterfs/scripts/control-mem.sh
%endif
%files api
@@ -2179,6 +2181,9 @@ fi
%endif
%changelog
+* Tue Jan 16 2018 Mohit Agrawal <moagrawa@redhat.com>
+- Exclude control-cpu-load.sh and control-mem.sh from client builds
+
* Wed Dec 20 2017 Milind Changire <mchangir@redhat.com>
- Remove ExclusiveArch directive to help building on non-x86_64 arches (#1527772)
--
1.8.3.1

View File

@ -0,0 +1,51 @@
From 09a7493dcf0f88aa47a176ab0372289664578fb7 Mon Sep 17 00:00:00 2001
From: moagrawa <moagrawa@redhat.com>
Date: Mon, 15 Jan 2018 18:21:27 +0530
Subject: [PATCH 127/128] spec: unpackaged files found for RHEL-7 client build
Problem: unpackages files found for RHEL-7 client build
Solution: Update glusterfs.specs.in to exclude unpackage files
Label: DOWNSTREAM ONLY
BUG: 1534530
Change-Id: I761188a6a8447105b53bf3334ded963c645cab5b
Signed-off-by: moagrawa <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/127758
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Milind Changire <mchangir@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
glusterfs.spec.in | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index cb273f5..abebb28 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -1219,9 +1219,11 @@ exit 0
%exclude %{_libdir}/pkgconfig/libgfdb.pc
%exclude %{_sbindir}/gluster-setgfid2path
%exclude %{_mandir}/man8/gluster-setgfid2path.8*
+%if ( 0%{?_with_systemd:1} )
%exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh
%exclude %{_datadir}/glusterfs/scripts/control-mem.sh
%endif
+%endif
%files api
%exclude %{_libdir}/*.so
@@ -2181,8 +2183,8 @@ fi
%endif
%changelog
-* Tue Jan 16 2018 Mohit Agrawal <moagrawa@redhat.com>
-- Exclude control-cpu-load.sh and control-mem.sh from client builds
+* Wed Jan 17 2018 Mohit Agrawal <moagrawa@redhat.com>
+- Exclude control-cpu-load.sh and control-mem.sh for RHEL_7 client builds only (#1534530)
* Wed Dec 20 2017 Milind Changire <mchangir@redhat.com>
- Remove ExclusiveArch directive to help building on non-x86_64 arches (#1527772)
--
1.8.3.1

View File

@ -0,0 +1,87 @@
From ea80ad2022197bbc910a2a8426f968d3a657dab7 Mon Sep 17 00:00:00 2001
From: Milind Changire <mchangir@redhat.com>
Date: Thu, 30 Nov 2017 17:37:12 +0530
Subject: [PATCH 128/128] build: remove pretrans script for ganesha
pretrans script for ganesha gets in the way of package installation.
There's no hard requirement for gluster processes to be shut down
for pacakge installation.
Label: DOWNSTREAM ONLY
BUG: 1410719
Change-Id: I3611cfa8eacbd8caa5560909b55d7705d2fc8678
Signed-off-by: Milind Changire <mchangir@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/127783
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
glusterfs.spec.in | 45 +++------------------------------------------
1 file changed, 3 insertions(+), 42 deletions(-)
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index abebb28..a230b24 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -1875,48 +1875,6 @@ end
-%pretrans ganesha -p <lua>
-if not posix.access("/bin/bash", "x") then
- -- initial installation, no shell, no running glusterfsd
- return 0
-end
-
--- TODO: move this completely to a lua script
--- For now, we write a temporary bash script and execute that.
-
-script = [[#!/bin/sh
-pidof -c -o %PPID -x glusterfsd &>/dev/null
-
-if [ $? -eq 0 ]; then
- pushd . > /dev/null 2>&1
- for volume in /var/lib/glusterd/vols/*; do cd $volume;
- vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
- volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
- if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
- exit 1;
- fi
- done
-
- popd > /dev/null 2>&1
- exit 1;
-fi
-]]
-
--- Since we run pretrans scripts only for RPMs built for a server build,
--- we can now use os.tmpname() since it is available on RHEL6 and later
--- platforms which are server platforms.
-tmpname = os.tmpname()
-tmpfile = io.open(tmpname, "w")
-tmpfile:write(script)
-tmpfile:close()
-ok, how, val = os.execute("/bin/bash " .. tmpname)
-os.remove(tmpname)
-if not (ok == 0) then
- error("Detected running glusterfs processes", ok)
-end
-
-
-
%if ( 0%{!?_without_georeplication:1} )
%pretrans geo-replication -p <lua>
if not posix.access("/bin/bash", "x") then
@@ -2183,6 +2141,9 @@ fi
%endif
%changelog
+* Wed Jan 17 2018 Milind Changire <mchangir@redhat.com>
+- DOWNSTREAM ONLY - Removed pretrans script for glusterfs-ganesha - (#1410719)
+
* Wed Jan 17 2018 Mohit Agrawal <moagrawa@redhat.com>
- Exclude control-cpu-load.sh and control-mem.sh for RHEL_7 client builds only (#1534530)
--
1.8.3.1

View File

@ -192,8 +192,7 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist}
%else
Name: glusterfs
Version: 3.12.2
Release: 1%{?dist}
ExclusiveArch: x86_64 aarch64
Release: 2%{?dist}
%endif
License: GPLv2 or LGPLv3+
Group: System Environment/Base
@ -340,6 +339,60 @@ Patch0071: 0071-common-ha-enable-pacemaker-at-end-of-setup.patch
Patch0072: 0072-common-ha-Fix-an-incorrect-syntax-during-setup.patch
Patch0073: 0073-Fix-build-issues-related-to-nfs-ganesha-package.patch
Patch0074: 0074-build-make-var-run-available-on-RHEL-6.patch
Patch0075: 0075-cli-gluster-help-changes.patch
Patch0076: 0076-cluster-ec-Handle-parallel-get_size_version.patch
Patch0077: 0077-cluster-ec-add-functions-for-stripe-alignment.patch
Patch0078: 0078-cluster-afr-Honor-default-timeout-of-5min-for-analyz.patch
Patch0079: 0079-cluster-ec-Allow-parallel-writes-in-EC-if-possible.patch
Patch0080: 0080-heal-New-feature-heal-info-summary-to-list-the-statu.patch
Patch0081: 0081-cluster-dht-Don-t-set-ACLs-on-linkto-file.patch
Patch0082: 0082-cluster-afr-Print-heal-info-summary-output-in-stream.patch
Patch0083: 0083-cluster-afr-Print-heal-info-split-brain-output-in-st.patch
Patch0084: 0084-cluster-afr-Fix-for-arbiter-becoming-source.patch
Patch0085: 0085-snapshot-Issue-with-other-processes-accessing-the-mo.patch
Patch0086: 0086-snapshot-lvm-cleanup-during-snapshot-remove.patch
Patch0087: 0087-glusterd-Validate-changelog-on-geo-rep-volume.patch
Patch0088: 0088-cluster-ec-Implement-DISCARD-FOP-for-EC.patch
Patch0089: 0089-geo-rep-Filter-out-volume-mark-xattr.patch
Patch0090: 0090-Quota-Adding-man-page-for-quota.patch
Patch0091: 0091-extras-scripts-to-control-CPU-MEMORY-for-any-gluster.patch
Patch0092: 0092-posix-Needs-to-reserve-disk-space-to-prevent-the-bri.patch
Patch0093: 0093-posix-Ignore-disk-space-reserve-check-for-internal-F.patch
Patch0094: 0094-cluster-afr-Fail-open-on-split-brain.patch
Patch0095: 0095-extras-hooks-Fix-errors-reported-via-shellcheck-util.patch
Patch0096: 0096-extras-hooks-Honour-all-input-arguments-to-scripts.patch
Patch0097: 0097-extras-hooks-Fix-getopt-usage.patch
Patch0098: 0098-snapshot-snapshot-creation-failed-after-brick-reset-.patch
Patch0099: 0099-Tier-Stop-tierd-for-detach-start.patch
Patch0100: 0100-cluster-ec-Improve-heal-info-command-to-handle-obvio.patch
Patch0101: 0101-cluster-ec-Prevent-self-heal-to-work-after-PARENT_DO.patch
Patch0102: 0102-libglusterfs-fix-the-call_stack_set_group-function.patch
Patch0103: 0103-features-locks-Fix-memory-leaks.patch
Patch0104: 0104-cluster-dht-fix-crash-when-deleting-directories.patch
Patch0105: 0105-glusterd-Fix-glusterd-mem-leaks.patch
Patch0106: 0106-glusterd-Free-up-svc-conn-on-volume-delete.patch
Patch0107: 0107-feature-bitrot-remove-internal-xattrs-from-lookup-cb.patch
Patch0108: 0108-mount-fuse-use-fstat-in-getattr-implementation-if-an.patch
Patch0109: 0109-mount-fuse-never-fail-open-dir-with-ENOENT.patch
Patch0110: 0110-Revert-mount-fuse-report-ESTALE-as-ENOENT.patch
Patch0111: 0111-cluster-dht-don-t-overfill-the-buffer-in-readdir-p.patch
Patch0112: 0112-write-behind-Allow-trickling-writes-to-be-configurab.patch
Patch0113: 0113-gfapi-set-lkowner-in-glfd.patch
Patch0114: 0114-eventsapi-Add-JWT-signing-support.patch
Patch0115: 0115-eventsapi-JWT-signing-without-external-dependency.patch
Patch0116: 0116-eventsapi-HTTPS-support-for-Webhooks.patch
Patch0117: 0117-geo-rep-Log-message-improvements.patch
Patch0118: 0118-snapshot-after-brick-reset-replace-snapshot-creation.patch
Patch0119: 0119-geo-rep-Fix-data-sync-issue-during-hardlink-rename.patch
Patch0120: 0120-glusterd-connect-to-an-existing-brick-process-when-q.patch
Patch0121: 0121-cluster-ec-OpenFD-heal-implementation-for-EC.patch
Patch0122: 0122-tests-Use-dev-urandom-instead-of-dev-random-for-dd.patch
Patch0123: 0123-quota-fixes-issue-in-quota.conf-when-setting-large-n.patch
Patch0124: 0124-build-remove-ExclusiveArch-from-spec-file.patch
Patch0125: 0125-cluster-afr-Fixing-the-flaws-in-arbiter-becoming-sou.patch
Patch0126: 0126-spec-unpackaged-files-found-for-RHEL-7-client-build.patch
Patch0127: 0127-spec-unpackaged-files-found-for-RHEL-7-client-build.patch
Patch0128: 0128-build-remove-pretrans-script-for-ganesha.patch
%description
GlusterFS is a distributed file-system capable of scaling to several
@ -1345,6 +1398,10 @@ exit 0
%exclude %{_libdir}/pkgconfig/libgfdb.pc
%exclude %{_sbindir}/gluster-setgfid2path
%exclude %{_mandir}/man8/gluster-setgfid2path.8*
%if ( 0%{?_with_systemd:1} )
%exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh
%exclude %{_datadir}/glusterfs/scripts/control-mem.sh
%endif
%endif
%files api
@ -1678,6 +1735,8 @@ exit 0
%{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh
%if ( 0%{?_with_systemd:1} )
%{_libexecdir}/glusterfs/mount-shared-storage.sh
%{_datadir}/glusterfs/scripts/control-cpu-load.sh
%{_datadir}/glusterfs/scripts/control-mem.sh
%endif
# Incrementalapi
@ -1995,48 +2054,6 @@ end
%pretrans ganesha -p <lua>
if not posix.access("/bin/bash", "x") then
-- initial installation, no shell, no running glusterfsd
return 0
end
-- TODO: move this completely to a lua script
-- For now, we write a temporary bash script and execute that.
script = [[#!/bin/sh
pidof -c -o %PPID -x glusterfsd &>/dev/null
if [ $? -eq 0 ]; then
pushd . > /dev/null 2>&1
for volume in /var/lib/glusterd/vols/*; do cd $volume;
vol_type=`grep '^type=' info | awk -F'=' '{print $2}'`
volume_started=`grep '^status=' info | awk -F'=' '{print $2}'`
if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then
exit 1;
fi
done
popd > /dev/null 2>&1
exit 1;
fi
]]
-- Since we run pretrans scripts only for RPMs built for a server build,
-- we can now use os.tmpname() since it is available on RHEL6 and later
-- platforms which are server platforms.
tmpname = os.tmpname()
tmpfile = io.open(tmpname, "w")
tmpfile:write(script)
tmpfile:close()
ok, how, val = os.execute("/bin/bash " .. tmpname)
os.remove(tmpname)
if not (ok == 0) then
error("Detected running glusterfs processes", ok)
end
%if ( 0%{!?_without_georeplication:1} )
%pretrans geo-replication -p <lua>
if not posix.access("/bin/bash", "x") then
@ -2303,6 +2320,14 @@ fi
%endif
%changelog
* Wed Jan 17 2018 Milind Changire <mchangir@redhat.com> - 3.12.2-2
- fixes bugs bz#1264911 bz#1277924 bz#1286820 bz#1360331 bz#1401969
bz#1410719 bz#1419438 bz#1426042 bz#1444820 bz#1459101 bz#1464150 bz#1464350
bz#1466122 bz#1466129 bz#1467903 bz#1468972 bz#1476876 bz#1484446 bz#1492591
bz#1498391 bz#1498730 bz#1499865 bz#1500704 bz#1501345 bz#1505570 bz#1507361
bz#1507394 bz#1509102 bz#1509191 bz#1509810 bz#1509833 bz#1511766 bz#1512470
bz#1512496 bz#1512963 bz#1515051 bz#1519076 bz#1519740 bz#1534253 bz#1534530
* Wed Nov 15 2017 Milind Changire <mchangir@redhat.com> - 3.12.2-1
- rebase to upstream glusterfs at v3.12.2
- fixes bugs bz#1442983 bz#1474745 bz#1503244 bz#1505363 bz#1509102