cf62f1947f
Resolves: bz#1264911 bz#1277924 bz#1286820 bz#1360331 bz#1401969 Resolves: bz#1410719 bz#1419438 bz#1426042 bz#1444820 bz#1459101 Resolves: bz#1464150 bz#1464350 bz#1466122 bz#1466129 bz#1467903 Resolves: bz#1468972 bz#1476876 bz#1484446 bz#1492591 bz#1498391 Resolves: bz#1498730 bz#1499865 bz#1500704 bz#1501345 bz#1505570 Resolves: bz#1507361 bz#1507394 bz#1509102 bz#1509191 bz#1509810 Resolves: bz#1509833 bz#1511766 bz#1512470 bz#1512496 bz#1512963 Resolves: bz#1515051 bz#1519076 bz#1519740 bz#1534253 bz#1534530 Signed-off-by: Milind Changire <mchangir@redhat.com>
904 lines
34 KiB
Diff
904 lines
34 KiB
Diff
From 3a4682ccd935744a0c5346bae23658ff08d65343 Mon Sep 17 00:00:00 2001
|
|
From: karthik-us <ksubrahm@redhat.com>
|
|
Date: Mon, 15 Jan 2018 12:48:54 +0530
|
|
Subject: [PATCH 125/128] cluster/afr: Fixing the flaws in arbiter becoming
|
|
source patch
|
|
|
|
Problem:
|
|
Setting the write_subvol value to read_subvol in case of metadata
|
|
transaction during pre-op (commit 19f9bcff4aada589d4321356c2670ed283f02c03)
|
|
might lead to the original problem of arbiter becoming source.
|
|
|
|
Scenario:
|
|
1) All bricks are up and good
|
|
2) 2 writes w1 and w2 are in progress in parallel
|
|
3) ctx->read_subvol is good for all the subvolumes
|
|
4) w1 succeeds on brick0 and fails on brick1, yet to do post-op on
|
|
the disk
|
|
5) read/lookup comes on the same file and refreshes read_subvols back
|
|
to all good
|
|
6) metadata transaction happens which makes ctx->write_subvol to be
|
|
assigned with ctx->read_subvol which is all good
|
|
7) w2 succeeds on brick1 and fails on brick0 and this will update the
|
|
brick in reverse order leading to arbiter becoming source
|
|
|
|
Fix:
|
|
Instead of setting the ctx->write_subvol to ctx->read_subvol in the
|
|
pre-op statge, if there is a metadata transaction, check in the
|
|
function __afr_set_in_flight_sb_status() if it is a data/metadata
|
|
transaction. Use the value of ctx->write_subvol if it is a data
|
|
transactions and ctx->read_subvol value for other transactions.
|
|
|
|
With this patch we assign the value of ctx->write_subvol in the
|
|
afr_transaction_perform_fop() with the on disk value, instead of
|
|
assigning it in the afr_changelog_pre_op() with the in memory value.
|
|
|
|
Upstream Patch: https://review.gluster.org/#/c/19045/
|
|
|
|
> Change-Id: Id2025a7e965f0578af35b1abaac793b019c43cc4
|
|
> BUG: 1482064
|
|
> Signed-off-by: karthik-us <ksubrahm@redhat.com>
|
|
|
|
Change-Id: Ie5d6745703fa5024d27e413093f7dfd08992e1df
|
|
BUG: 1401969
|
|
Signed-off-by: karthik-us <ksubrahm@redhat.com>
|
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/127644
|
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
|
|
Tested-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
|
|
---
|
|
xlators/cluster/afr/src/afr-common.c | 266 +++++++++++++++++-------------
|
|
xlators/cluster/afr/src/afr-dir-write.c | 16 +-
|
|
xlators/cluster/afr/src/afr-inode-write.c | 57 +++++--
|
|
xlators/cluster/afr/src/afr-lk-common.c | 42 +++--
|
|
xlators/cluster/afr/src/afr-messages.h | 9 +-
|
|
xlators/cluster/afr/src/afr-transaction.c | 45 ++---
|
|
xlators/cluster/afr/src/afr.h | 22 ++-
|
|
7 files changed, 277 insertions(+), 180 deletions(-)
|
|
|
|
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
|
|
index 692f198..6e6f5fa 100644
|
|
--- a/xlators/cluster/afr/src/afr-common.c
|
|
+++ b/xlators/cluster/afr/src/afr-common.c
|
|
@@ -150,6 +150,7 @@ __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)
|
|
tmp_ctx->spb_choice = -1;
|
|
tmp_ctx->read_subvol = 0;
|
|
tmp_ctx->write_subvol = 0;
|
|
+ tmp_ctx->lock_count = 0;
|
|
} else {
|
|
tmp_ctx = (afr_inode_ctx_t *) ctx_int;
|
|
}
|
|
@@ -195,7 +196,6 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
|
|
inode_t *inode)
|
|
{
|
|
int i = 0;
|
|
- int ret = -1;
|
|
int txn_type = 0;
|
|
int count = 0;
|
|
int index = -1;
|
|
@@ -208,16 +208,14 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
|
|
uint32_t event = 0;
|
|
uint64_t val = 0;
|
|
afr_private_t *priv = NULL;
|
|
- afr_inode_ctx_t *ctx = NULL;
|
|
|
|
priv = this->private;
|
|
txn_type = local->transaction.type;
|
|
|
|
- ret = __afr_inode_ctx_get (this, inode, &ctx);
|
|
- if (ret < 0)
|
|
- return ret;
|
|
-
|
|
- val = ctx->write_subvol;
|
|
+ if (txn_type == AFR_DATA_TRANSACTION)
|
|
+ val = local->inode_ctx->write_subvol;
|
|
+ else
|
|
+ val = local->inode_ctx->read_subvol;
|
|
|
|
metadatamap_old = metadatamap = (val & 0x000000000000ffff);
|
|
datamap_old = datamap = (val & 0x00000000ffff0000) >> 16;
|
|
@@ -278,10 +276,11 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
|
|
(((uint64_t) datamap) << 16) |
|
|
(((uint64_t) event) << 32);
|
|
|
|
- ctx->write_subvol = val;
|
|
- ctx->read_subvol = val;
|
|
+ if (txn_type == AFR_DATA_TRANSACTION)
|
|
+ local->inode_ctx->write_subvol = val;
|
|
+ local->inode_ctx->read_subvol = val;
|
|
|
|
- return ret;
|
|
+ return 0;
|
|
}
|
|
|
|
gf_boolean_t
|
|
@@ -1001,6 +1000,81 @@ afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,
|
|
}
|
|
|
|
int
|
|
+afr_readables_fill (call_frame_t *frame, xlator_t *this, inode_t *inode,
|
|
+ unsigned char *data_accused,
|
|
+ unsigned char *metadata_accused,
|
|
+ unsigned char *data_readable,
|
|
+ unsigned char *metadata_readable,
|
|
+ struct afr_reply *replies)
|
|
+{
|
|
+ afr_local_t *local = NULL;
|
|
+ afr_private_t *priv = NULL;
|
|
+ dict_t *xdata = NULL;
|
|
+ int i = 0;
|
|
+ int ret = 0;
|
|
+ ia_type_t ia_type = IA_INVAL;
|
|
+
|
|
+ local = frame->local;
|
|
+ priv = this->private;
|
|
+
|
|
+ for (i = 0; i < priv->child_count; i++) {
|
|
+ data_readable[i] = 1;
|
|
+ metadata_readable[i] = 1;
|
|
+ }
|
|
+ if (AFR_IS_ARBITER_BRICK (priv, ARBITER_BRICK_INDEX)) {
|
|
+ data_readable[ARBITER_BRICK_INDEX] = 0;
|
|
+ metadata_readable[ARBITER_BRICK_INDEX] = 0;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < priv->child_count; i++) {
|
|
+ if (replies) {/* Lookup */
|
|
+ if (!replies[i].valid || replies[i].op_ret == -1 ||
|
|
+ (replies[i].xdata && dict_get (replies[i].xdata,
|
|
+ GLUSTERFS_BAD_INODE))) {
|
|
+ data_readable[i] = 0;
|
|
+ metadata_readable[i] = 0;
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ xdata = replies[i].xdata;
|
|
+ ia_type = replies[i].poststat.ia_type;
|
|
+ } else {/* pre-op xattrop */
|
|
+ xdata = local->transaction.pre_op_xdata[i];
|
|
+ ia_type = inode->ia_type;
|
|
+ }
|
|
+
|
|
+ afr_accused_fill (this, xdata, data_accused,
|
|
+ (ia_type == IA_IFDIR) ?
|
|
+ AFR_ENTRY_TRANSACTION : AFR_DATA_TRANSACTION);
|
|
+
|
|
+ afr_accused_fill (this, xdata,
|
|
+ metadata_accused, AFR_METADATA_TRANSACTION);
|
|
+ }
|
|
+
|
|
+ if (replies && ia_type != IA_INVAL && ia_type != IA_IFDIR &&
|
|
+ /* We want to accuse small files only when we know for
|
|
+ * sure that there is no IO happening. Otherwise, the
|
|
+ * ia_sizes obtained in post-refresh replies may
|
|
+ * mismatch due to a race between inode-refresh and
|
|
+ * ongoing writes, causing spurious heal launches*/
|
|
+ !afr_is_possibly_under_txn (AFR_DATA_TRANSACTION, local, this)) {
|
|
+ afr_accuse_smallfiles (this, replies, data_accused);
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < priv->child_count; i++) {
|
|
+ if (data_accused[i]) {
|
|
+ data_readable[i] = 0;
|
|
+ ret = 1;
|
|
+ }
|
|
+ if (metadata_accused[i]) {
|
|
+ metadata_readable[i] = 0;
|
|
+ ret = 1;
|
|
+ }
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int
|
|
afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
|
|
gf_boolean_t *start_heal)
|
|
{
|
|
@@ -1025,62 +1099,9 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode,
|
|
metadata_accused = alloca0 (priv->child_count);
|
|
metadata_readable = alloca0 (priv->child_count);
|
|
|
|
- for (i = 0; i < priv->child_count; i++) {
|
|
- data_readable[i] = 1;
|
|
- metadata_readable[i] = 1;
|
|
- }
|
|
- if (AFR_IS_ARBITER_BRICK (priv, ARBITER_BRICK_INDEX)) {
|
|
- data_readable[ARBITER_BRICK_INDEX] = 0;
|
|
- metadata_readable[ARBITER_BRICK_INDEX] = 0;
|
|
- }
|
|
-
|
|
- for (i = 0; i < priv->child_count; i++) {
|
|
- if (!replies[i].valid) {
|
|
- data_readable[i] = 0;
|
|
- metadata_readable[i] = 0;
|
|
- continue;
|
|
- }
|
|
-
|
|
- if (replies[i].op_ret == -1) {
|
|
- data_readable[i] = 0;
|
|
- metadata_readable[i] = 0;
|
|
- continue;
|
|
- }
|
|
-
|
|
- if (replies[i].xdata &&
|
|
- dict_get (replies[i].xdata, GLUSTERFS_BAD_INODE)) {
|
|
- data_readable[i] = 0;
|
|
- metadata_readable[i] = 0;
|
|
- continue;
|
|
- }
|
|
-
|
|
- afr_accused_fill (this, replies[i].xdata, data_accused,
|
|
- (replies[i].poststat.ia_type == IA_IFDIR) ?
|
|
- AFR_ENTRY_TRANSACTION : AFR_DATA_TRANSACTION);
|
|
-
|
|
- afr_accused_fill (this, replies[i].xdata,
|
|
- metadata_accused, AFR_METADATA_TRANSACTION);
|
|
-
|
|
- }
|
|
-
|
|
- if ((inode->ia_type != IA_IFDIR) &&
|
|
- /* We want to accuse small files only when we know for sure that
|
|
- * there is no IO happening. Otherwise, the ia_sizes obtained in
|
|
- * post-refresh replies may mismatch due to a race between inode-
|
|
- * refresh and ongoing writes, causing spurious heal launches*/
|
|
- !afr_is_possibly_under_txn (AFR_DATA_TRANSACTION, local, this))
|
|
- afr_accuse_smallfiles (this, replies, data_accused);
|
|
-
|
|
- for (i = 0; i < priv->child_count; i++) {
|
|
- if (data_accused[i]) {
|
|
- data_readable[i] = 0;
|
|
- ret = 1;
|
|
- }
|
|
- if (metadata_accused[i]) {
|
|
- metadata_readable[i] = 0;
|
|
- ret = 1;
|
|
- }
|
|
- }
|
|
+ ret = afr_readables_fill (frame, this, inode, data_accused,
|
|
+ metadata_accused, data_readable,
|
|
+ metadata_readable, replies);
|
|
|
|
for (i = 0; i < priv->child_count; i++) {
|
|
if (start_heal && priv->child_up[i] &&
|
|
@@ -5510,13 +5531,13 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
|
|
if (!local->transaction.pre_op)
|
|
goto out;
|
|
|
|
- if (priv->arbiter_count == 1) {
|
|
- local->transaction.pre_op_xdata =
|
|
- GF_CALLOC (sizeof (*local->transaction.pre_op_xdata),
|
|
- priv->child_count, gf_afr_mt_dict_t);
|
|
- if (!local->transaction.pre_op_xdata)
|
|
- goto out;
|
|
+ local->transaction.pre_op_xdata =
|
|
+ GF_CALLOC (sizeof (*local->transaction.pre_op_xdata),
|
|
+ priv->child_count, gf_afr_mt_dict_t);
|
|
+ if (!local->transaction.pre_op_xdata)
|
|
+ goto out;
|
|
|
|
+ if (priv->arbiter_count == 1) {
|
|
local->transaction.pre_op_sources =
|
|
GF_CALLOC (sizeof (*local->transaction.pre_op_sources),
|
|
priv->child_count, gf_afr_mt_char);
|
|
@@ -6489,42 +6510,45 @@ int
|
|
afr_write_subvol_set (call_frame_t *frame, xlator_t *this)
|
|
{
|
|
afr_local_t *local = NULL;
|
|
- afr_inode_ctx_t *ctx = NULL;
|
|
+ afr_private_t *priv = NULL;
|
|
+ unsigned char *data_accused = NULL;
|
|
+ unsigned char *metadata_accused = NULL;
|
|
+ unsigned char *data_readable = NULL;
|
|
+ unsigned char *metadata_readable = NULL;
|
|
+ uint16_t datamap = 0;
|
|
+ uint16_t metadatamap = 0;
|
|
uint64_t val = 0;
|
|
- uint64_t val1 = 0;
|
|
- int ret = -1;
|
|
+ int event = 0;
|
|
+ int i = 0;
|
|
|
|
local = frame->local;
|
|
+ priv = this->private;
|
|
+ data_accused = alloca0 (priv->child_count);
|
|
+ metadata_accused = alloca0 (priv->child_count);
|
|
+ data_readable = alloca0 (priv->child_count);
|
|
+ metadata_readable = alloca0 (priv->child_count);
|
|
+ event = local->event_generation;
|
|
+
|
|
+ afr_readables_fill (frame, this, local->inode, data_accused,
|
|
+ metadata_accused, data_readable, metadata_readable,
|
|
+ NULL);
|
|
+
|
|
+ for (i = 0; i < priv->child_count; i++) {
|
|
+ if (data_readable[i])
|
|
+ datamap |= (1 << i);
|
|
+ if (metadata_readable[i])
|
|
+ metadatamap |= (1 << i);
|
|
+ }
|
|
+
|
|
+ val = ((uint64_t) metadatamap) |
|
|
+ (((uint64_t) datamap) << 16) |
|
|
+ (((uint64_t) event) << 32);
|
|
+
|
|
LOCK(&local->inode->lock);
|
|
{
|
|
- ret = __afr_inode_ctx_get (this, local->inode, &ctx);
|
|
- if (ret < 0) {
|
|
- gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
- AFR_MSG_DICT_GET_FAILED,
|
|
- "ERROR GETTING INODE CTX");
|
|
- UNLOCK(&local->inode->lock);
|
|
- return ret;
|
|
- }
|
|
-
|
|
- val = ctx->write_subvol;
|
|
- /*
|
|
- * We need to set the value of write_subvol to read_subvol in 2
|
|
- * cases:
|
|
- * 1. Initially when the value is 0. i.e., it's the first lock
|
|
- * request.
|
|
- * 2. If it's a metadata transaction. If metadata transactions
|
|
- * comes in between data transactions and we have a brick
|
|
- * disconnect, the next metadata transaction won't get the
|
|
- * latest value of readables, since we do resetting of
|
|
- * write_subvol in unlock code path only if it's a data
|
|
- * transaction. To handle those scenarios we need to set the
|
|
- * value of write_subvol to read_subvol in case of metadata
|
|
- * transactions.
|
|
- */
|
|
- if (val == 0 ||
|
|
- local->transaction.type == AFR_METADATA_TRANSACTION) {
|
|
- val1 = ctx->read_subvol;
|
|
- ctx->write_subvol = val1;
|
|
+ if (local->inode_ctx->write_subvol == 0 &&
|
|
+ local->transaction.type == AFR_DATA_TRANSACTION) {
|
|
+ local->inode_ctx->write_subvol = val;
|
|
}
|
|
}
|
|
UNLOCK (&local->inode->lock);
|
|
@@ -6536,23 +6560,37 @@ int
|
|
afr_write_subvol_reset (call_frame_t *frame, xlator_t *this)
|
|
{
|
|
afr_local_t *local = NULL;
|
|
- afr_inode_ctx_t *ctx = NULL;
|
|
- int ret = -1;
|
|
|
|
local = frame->local;
|
|
LOCK(&local->inode->lock);
|
|
{
|
|
- ret = __afr_inode_ctx_get (this, local->inode, &ctx);
|
|
- if (ret < 0) {
|
|
- gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
- AFR_MSG_DICT_GET_FAILED,
|
|
- "ERROR GETTING INODE CTX");
|
|
- UNLOCK(&local->inode->lock);
|
|
- return ret;
|
|
- }
|
|
- ctx->write_subvol = 0;
|
|
+ local->inode_ctx->lock_count--;
|
|
+
|
|
+ if (!local->inode_ctx->lock_count)
|
|
+ local->inode_ctx->write_subvol = 0;
|
|
}
|
|
UNLOCK(&local->inode->lock);
|
|
|
|
return 0;
|
|
}
|
|
+
|
|
+int
|
|
+afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode)
|
|
+{
|
|
+ int ret = 0;
|
|
+
|
|
+ local->inode = inode_ref (inode);
|
|
+ LOCK(&local->inode->lock);
|
|
+ {
|
|
+ ret = __afr_inode_ctx_get (this, local->inode,
|
|
+ &local->inode_ctx);
|
|
+ }
|
|
+ UNLOCK (&local->inode->lock);
|
|
+ if (ret < 0) {
|
|
+ gf_msg_callingfn (this->name, GF_LOG_ERROR, ENOMEM,
|
|
+ AFR_MSG_INODE_CTX_GET_FAILED,
|
|
+ "Error getting inode ctx %s",
|
|
+ uuid_utoa (local->inode->gfid));
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
|
|
index 9099b8c..e088ed6 100644
|
|
--- a/xlators/cluster/afr/src/afr-dir-write.c
|
|
+++ b/xlators/cluster/afr/src/afr-dir-write.c
|
|
@@ -477,7 +477,7 @@ afr_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
|
|
if (!local->fd_ctx)
|
|
goto out;
|
|
|
|
- local->inode = inode_ref (loc->inode);
|
|
+ local->inode = inode_ref (loc->inode);
|
|
local->parent = inode_ref (loc->parent);
|
|
|
|
local->op = GF_FOP_CREATE;
|
|
@@ -609,7 +609,7 @@ afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
|
|
goto out;
|
|
|
|
loc_copy (&local->loc, loc);
|
|
- local->inode = inode_ref (loc->inode);
|
|
+ local->inode = inode_ref (loc->inode);
|
|
local->parent = inode_ref (loc->parent);
|
|
|
|
local->op = GF_FOP_MKNOD;
|
|
@@ -740,7 +740,7 @@ afr_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
|
|
goto out;
|
|
|
|
loc_copy (&local->loc, loc);
|
|
- local->inode = inode_ref (loc->inode);
|
|
+ local->inode = inode_ref (loc->inode);
|
|
local->parent = inode_ref (loc->parent);
|
|
|
|
local->cont.mkdir.mode = mode;
|
|
@@ -877,7 +877,7 @@ afr_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
|
|
loc_copy (&local->loc, oldloc);
|
|
loc_copy (&local->newloc, newloc);
|
|
|
|
- local->inode = inode_ref (oldloc->inode);
|
|
+ local->inode = inode_ref (oldloc->inode);
|
|
local->parent = inode_ref (newloc->parent);
|
|
|
|
if (xdata)
|
|
@@ -1005,7 +1005,7 @@ afr_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
|
|
goto out;
|
|
|
|
loc_copy (&local->loc, loc);
|
|
- local->inode = inode_ref (loc->inode);
|
|
+ local->inode = inode_ref (loc->inode);
|
|
local->parent = inode_ref (loc->parent);
|
|
|
|
local->cont.symlink.linkpath = gf_strdup (linkpath);
|
|
@@ -1142,7 +1142,7 @@ afr_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
|
|
loc_copy (&local->loc, oldloc);
|
|
loc_copy (&local->newloc, newloc);
|
|
|
|
- local->inode = inode_ref (oldloc->inode);
|
|
+ local->inode = inode_ref (oldloc->inode);
|
|
local->parent = inode_ref (oldloc->parent);
|
|
local->parent2 = inode_ref (newloc->parent);
|
|
|
|
@@ -1295,7 +1295,7 @@ afr_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
|
|
loc_copy (&local->loc, loc);
|
|
local->xflag = xflag;
|
|
|
|
- local->inode = inode_ref (loc->inode);
|
|
+ local->inode = inode_ref (loc->inode);
|
|
local->parent = inode_ref (loc->parent);
|
|
|
|
if (xdata)
|
|
@@ -1421,7 +1421,7 @@ afr_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
|
|
|
|
|
|
loc_copy (&local->loc, loc);
|
|
- local->inode = inode_ref (loc->inode);
|
|
+ local->inode = inode_ref (loc->inode);
|
|
local->parent = inode_ref (loc->parent);
|
|
|
|
local->cont.rmdir.flags = flags;
|
|
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
|
|
index 97397f9..f0231b7 100644
|
|
--- a/xlators/cluster/afr/src/afr-inode-write.c
|
|
+++ b/xlators/cluster/afr/src/afr-inode-write.c
|
|
@@ -507,6 +507,7 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
{
|
|
afr_local_t *local = NULL;
|
|
int op_errno = ENOMEM;
|
|
+ int ret = -1;
|
|
|
|
local = AFR_FRAME_INIT (frame, op_errno);
|
|
if (!local)
|
|
@@ -529,7 +530,9 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
goto out;
|
|
|
|
local->fd = fd_ref (fd);
|
|
- local->inode = inode_ref (fd->inode);
|
|
+ ret = afr_set_inode_local (this, local, fd->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
if (dict_set_uint32 (local->xdata_req, GLUSTERFS_OPEN_FD_COUNT, 4)) {
|
|
op_errno = ENOMEM;
|
|
@@ -654,7 +657,9 @@ afr_truncate (call_frame_t *frame, xlator_t *this,
|
|
local->transaction.unwind = afr_truncate_unwind;
|
|
|
|
loc_copy (&local->loc, loc);
|
|
- local->inode = inode_ref (loc->inode);
|
|
+ ret = afr_set_inode_local (this, local, loc->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
local->op = GF_FOP_TRUNCATE;
|
|
|
|
@@ -768,7 +773,9 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
|
|
goto out;
|
|
|
|
local->fd = fd_ref (fd);
|
|
- local->inode = inode_ref (fd->inode);
|
|
+ ret = afr_set_inode_local (this, local, fd->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
local->op = GF_FOP_FTRUNCATE;
|
|
|
|
@@ -886,7 +893,9 @@ afr_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
|
|
local->transaction.unwind = afr_setattr_unwind;
|
|
|
|
loc_copy (&local->loc, loc);
|
|
- local->inode = inode_ref (loc->inode);
|
|
+ ret = afr_set_inode_local (this, local, loc->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
local->op = GF_FOP_SETATTR;
|
|
|
|
@@ -991,7 +1000,9 @@ afr_fsetattr (call_frame_t *frame, xlator_t *this,
|
|
local->transaction.unwind = afr_fsetattr_unwind;
|
|
|
|
local->fd = fd_ref (fd);
|
|
- local->inode = inode_ref (fd->inode);
|
|
+ ret = afr_set_inode_local (this, local, fd->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
local->op = GF_FOP_FSETATTR;
|
|
|
|
@@ -1633,7 +1644,9 @@ afr_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
|
|
local->transaction.unwind = afr_setxattr_unwind;
|
|
|
|
loc_copy (&local->loc, loc);
|
|
- local->inode = inode_ref (loc->inode);
|
|
+ ret = afr_set_inode_local (this, local, loc->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
local->transaction.main_frame = frame;
|
|
local->transaction.start = LLONG_MAX - 1;
|
|
@@ -1745,7 +1758,9 @@ afr_fsetxattr (call_frame_t *frame, xlator_t *this,
|
|
local->transaction.unwind = afr_fsetxattr_unwind;
|
|
|
|
local->fd = fd_ref (fd);
|
|
- local->inode = inode_ref (fd->inode);
|
|
+ ret = afr_set_inode_local (this, local, fd->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
local->op = GF_FOP_FSETXATTR;
|
|
|
|
@@ -1858,7 +1873,9 @@ afr_removexattr (call_frame_t *frame, xlator_t *this,
|
|
local->transaction.unwind = afr_removexattr_unwind;
|
|
|
|
loc_copy (&local->loc, loc);
|
|
- local->inode = inode_ref (loc->inode);
|
|
+ ret = afr_set_inode_local (this, local, loc->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
local->op = GF_FOP_REMOVEXATTR;
|
|
|
|
@@ -1965,7 +1982,9 @@ afr_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
local->transaction.unwind = afr_fremovexattr_unwind;
|
|
|
|
local->fd = fd_ref (fd);
|
|
- local->inode = inode_ref (fd->inode);
|
|
+ ret = afr_set_inode_local (this, local, fd->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
local->op = GF_FOP_FREMOVEXATTR;
|
|
|
|
@@ -2060,7 +2079,9 @@ afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
|
|
local->cont.fallocate.len = len;
|
|
|
|
local->fd = fd_ref (fd);
|
|
- local->inode = inode_ref (fd->inode);
|
|
+ ret = afr_set_inode_local (this, local, fd->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
if (xdata)
|
|
local->xdata_req = dict_copy_with_ref (xdata, NULL);
|
|
@@ -2172,7 +2193,9 @@ afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
|
|
local->cont.discard.len = len;
|
|
|
|
local->fd = fd_ref (fd);
|
|
- local->inode = inode_ref (fd->inode);
|
|
+ ret = afr_set_inode_local (this, local, fd->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
if (xdata)
|
|
local->xdata_req = dict_copy_with_ref (xdata, NULL);
|
|
@@ -2281,7 +2304,9 @@ afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
|
|
local->cont.zerofill.len = len;
|
|
|
|
local->fd = fd_ref (fd);
|
|
- local->inode = inode_ref (fd->inode);
|
|
+ ret = afr_set_inode_local (this, local, fd->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
if (xdata)
|
|
local->xdata_req = dict_copy_with_ref (xdata, NULL);
|
|
@@ -2393,7 +2418,9 @@ afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
|
|
local->transaction.unwind = afr_xattrop_unwind;
|
|
|
|
loc_copy (&local->loc, loc);
|
|
- local->inode = inode_ref (loc->inode);
|
|
+ ret = afr_set_inode_local (this, local, loc->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
local->op = GF_FOP_XATTROP;
|
|
|
|
@@ -2487,7 +2514,9 @@ afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
local->transaction.unwind = afr_fxattrop_unwind;
|
|
|
|
local->fd = fd_ref (fd);
|
|
- local->inode = inode_ref (fd->inode);
|
|
+ ret = afr_set_inode_local (this, local, fd->inode);
|
|
+ if (ret)
|
|
+ goto out;
|
|
|
|
local->op = GF_FOP_FXATTROP;
|
|
|
|
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
|
|
index c17f60f..f50c7b6 100644
|
|
--- a/xlators/cluster/afr/src/afr-lk-common.c
|
|
+++ b/xlators/cluster/afr/src/afr-lk-common.c
|
|
@@ -615,14 +615,14 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
{
|
|
afr_local_t *local = NULL;
|
|
afr_internal_lock_t *int_lock = NULL;
|
|
- afr_fd_ctx_t *fd_ctx = NULL;
|
|
- afr_private_t *priv = NULL;
|
|
int call_count = 0;
|
|
int ret = 0;
|
|
|
|
local = frame->local;
|
|
int_lock = &local->internal_lock;
|
|
- priv = this->private;
|
|
+
|
|
+ if (local->transaction.type == AFR_DATA_TRANSACTION && op_ret != 1)
|
|
+ ret = afr_write_subvol_reset (frame, this);
|
|
|
|
LOCK (&frame->lock);
|
|
{
|
|
@@ -633,11 +633,6 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
if (call_count == 0) {
|
|
gf_msg_trace (this->name, 0,
|
|
"All internal locks unlocked");
|
|
- if (local->fd) {
|
|
- fd_ctx = afr_fd_ctx_get (local->fd, this);
|
|
- if (0 == AFR_COUNT (fd_ctx->lock_acquired, priv->child_count))
|
|
- ret = afr_write_subvol_reset (frame, this);
|
|
- }
|
|
int_lock->lock_cbk (frame, this);
|
|
}
|
|
|
|
@@ -947,6 +942,15 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
} else {
|
|
int_lock->locked_nodes[child_index] |= LOCKED_YES;
|
|
int_lock->lock_count++;
|
|
+
|
|
+ if (local->transaction.type ==
|
|
+ AFR_DATA_TRANSACTION) {
|
|
+ LOCK(&local->inode->lock);
|
|
+ {
|
|
+ local->inode_ctx->lock_count++;
|
|
+ }
|
|
+ UNLOCK (&local->inode->lock);
|
|
+ }
|
|
}
|
|
}
|
|
afr_lock_blocking (frame, this, cky + 1);
|
|
@@ -1502,13 +1506,12 @@ int32_t
|
|
afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
int32_t op_ret, int32_t op_errno, dict_t *xdata)
|
|
{
|
|
- afr_internal_lock_t *int_lock = NULL;
|
|
- afr_inodelk_t *inodelk = NULL;
|
|
- afr_local_t *local = NULL;
|
|
- int call_count = 0;
|
|
- int child_index = (long) cookie;
|
|
- afr_fd_ctx_t *fd_ctx = NULL;
|
|
-
|
|
+ afr_internal_lock_t *int_lock = NULL;
|
|
+ afr_inodelk_t *inodelk = NULL;
|
|
+ afr_local_t *local = NULL;
|
|
+ afr_fd_ctx_t *fd_ctx = NULL;
|
|
+ int call_count = 0;
|
|
+ int child_index = (long) cookie;
|
|
|
|
local = frame->local;
|
|
int_lock = &local->internal_lock;
|
|
@@ -1553,6 +1556,15 @@ afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
fd_ctx->lock_acquired[child_index]++;
|
|
}
|
|
}
|
|
+
|
|
+ if (local->transaction.type == AFR_DATA_TRANSACTION &&
|
|
+ op_ret == 0) {
|
|
+ LOCK(&local->inode->lock);
|
|
+ {
|
|
+ local->inode_ctx->lock_count++;
|
|
+ }
|
|
+ UNLOCK (&local->inode->lock);
|
|
+ }
|
|
}
|
|
|
|
call_count = --int_lock->lk_call_count;
|
|
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
|
|
index 02eb206..53ffcd8 100644
|
|
--- a/xlators/cluster/afr/src/afr-messages.h
|
|
+++ b/xlators/cluster/afr/src/afr-messages.h
|
|
@@ -40,7 +40,7 @@
|
|
*/
|
|
|
|
#define GLFS_COMP_BASE_AFR GLFS_MSGID_COMP_AFR
|
|
-#define GLFS_NUM_MESSAGES 42
|
|
+#define GLFS_NUM_MESSAGES 43
|
|
#define GLFS_MSGID_END (GLFS_COMP_BASE_AFR + GLFS_NUM_MESSAGES + 1)
|
|
|
|
#define glfs_msg_start_x GLFS_COMP_BASE_AFR, "Invalid: Start of messages"
|
|
@@ -369,5 +369,12 @@
|
|
*/
|
|
#define AFR_MSG_SBRAIN_FAV_CHILD_POLICY (GLFS_COMP_BASE_AFR + 42)
|
|
|
|
+/*!
|
|
+ * @messageid 108043
|
|
+ * @diagnosis
|
|
+ * @recommendation
|
|
+*/
|
|
+#define AFR_MSG_INODE_CTX_GET_FAILED (GLFS_COMP_BASE_AFR + 43)
|
|
+
|
|
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
|
|
#endif /* !_AFR_MESSAGES_H_ */
|
|
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
|
|
index a04636f..7e40bba 100644
|
|
--- a/xlators/cluster/afr/src/afr-transaction.c
|
|
+++ b/xlators/cluster/afr/src/afr-transaction.c
|
|
@@ -372,14 +372,27 @@ afr_txn_arbitrate_fop (call_frame_t *frame, xlator_t *this)
|
|
int
|
|
afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)
|
|
{
|
|
- afr_local_t *local = NULL;
|
|
- afr_private_t *priv = NULL;
|
|
- fd_t *fd = NULL;
|
|
+ afr_local_t *local = NULL;
|
|
+ afr_private_t *priv = NULL;
|
|
+ fd_t *fd = NULL;
|
|
+ int i = 0;
|
|
+ int ret = 0;
|
|
|
|
local = frame->local;
|
|
priv = this->private;
|
|
fd = local->fd;
|
|
|
|
+ if (local->transaction.type == AFR_DATA_TRANSACTION &&
|
|
+ !local->transaction.inherited) {
|
|
+ ret = afr_write_subvol_set (frame, this);
|
|
+ if (ret) {
|
|
+ /*act as if operation failed on all subvols*/
|
|
+ local->op_ret = -1;
|
|
+ local->op_errno = -ret;
|
|
+ for (i = 0; i < priv->child_count; i++)
|
|
+ local->transaction.failed_subvols[i] = 1;
|
|
+ }
|
|
+ }
|
|
/* Perform fops with the lk-owner from top xlator.
|
|
* Eg: lk-owner of posix-lk and flush should be same,
|
|
* flush cant clear the posix-lks without that lk-owner.
|
|
@@ -1116,32 +1129,28 @@ unlock:
|
|
|
|
int
|
|
afr_changelog_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
|
|
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
|
|
{
|
|
afr_local_t *local = NULL;
|
|
- afr_private_t *priv = NULL;
|
|
int call_count = -1;
|
|
int child_index = -1;
|
|
|
|
local = frame->local;
|
|
- priv = this->private;
|
|
child_index = (long) cookie;
|
|
|
|
- if (op_ret == -1) {
|
|
+ if (op_ret == -1) {
|
|
local->op_errno = op_errno;
|
|
- afr_transaction_fop_failed (frame, this, child_index);
|
|
+ afr_transaction_fop_failed (frame, this, child_index);
|
|
}
|
|
|
|
- if (priv->arbiter_count == 1 && !op_ret) {
|
|
- if (xattr)
|
|
- local->transaction.pre_op_xdata[child_index] =
|
|
- dict_ref (xattr);
|
|
- }
|
|
+ if (xattr)
|
|
+ local->transaction.pre_op_xdata[child_index] = dict_ref (xattr);
|
|
|
|
- call_count = afr_frame_return (frame);
|
|
+ call_count = afr_frame_return (frame);
|
|
|
|
- if (call_count == 0)
|
|
- local->transaction.changelog_resume (frame, this);
|
|
+ if (call_count == 0) {
|
|
+ local->transaction.changelog_resume (frame, this);
|
|
+ }
|
|
|
|
return 0;
|
|
}
|
|
@@ -1750,10 +1759,6 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
|
|
if (pre_nop)
|
|
goto next;
|
|
|
|
- ret = afr_write_subvol_set (frame, this);
|
|
- if (ret)
|
|
- goto err;
|
|
-
|
|
if (!local->pre_op_compat) {
|
|
dict_copy (xdata_req, local->xdata_req);
|
|
goto next;
|
|
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
|
|
index 0a06eb6..96fefb1 100644
|
|
--- a/xlators/cluster/afr/src/afr.h
|
|
+++ b/xlators/cluster/afr/src/afr.h
|
|
@@ -377,6 +377,16 @@ typedef enum {
|
|
AFR_FOP_LOCK_QUORUM_FAILED,
|
|
} afr_fop_lock_state_t;
|
|
|
|
+typedef struct _afr_inode_ctx {
|
|
+ uint64_t read_subvol;
|
|
+ uint64_t write_subvol;
|
|
+ int lock_count;
|
|
+ int spb_choice;
|
|
+ gf_timer_t *timer;
|
|
+ gf_boolean_t need_refresh;
|
|
+} afr_inode_ctx_t;
|
|
+
|
|
+
|
|
typedef struct _afr_local {
|
|
glusterfs_fop_t op;
|
|
unsigned int call_count;
|
|
@@ -833,17 +843,10 @@ typedef struct _afr_local {
|
|
compound_args_t *c_args;
|
|
|
|
gf_boolean_t is_read_txn;
|
|
+ afr_inode_ctx_t *inode_ctx;
|
|
} afr_local_t;
|
|
|
|
|
|
-typedef struct _afr_inode_ctx {
|
|
- uint64_t read_subvol;
|
|
- uint64_t write_subvol;
|
|
- int spb_choice;
|
|
- gf_timer_t *timer;
|
|
- gf_boolean_t need_refresh;
|
|
-} afr_inode_ctx_t;
|
|
-
|
|
typedef struct afr_spbc_timeout {
|
|
call_frame_t *frame;
|
|
gf_boolean_t d_spb;
|
|
@@ -1274,6 +1277,9 @@ afr_write_subvol_set (call_frame_t *frame, xlator_t *this);
|
|
int
|
|
afr_write_subvol_reset (call_frame_t *frame, xlator_t *this);
|
|
|
|
+int
|
|
+afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode);
|
|
+
|
|
gf_boolean_t
|
|
afr_is_symmetric_error (call_frame_t *frame, xlator_t *this);
|
|
#endif /* __AFR_H__ */
|
|
--
|
|
1.8.3.1
|
|
|