144 lines
5.8 KiB
Diff
144 lines
5.8 KiB
Diff
|
From cf13847a6341b7519ed0dc51e3b9ecf12444a3e4 Mon Sep 17 00:00:00 2001
|
||
|
From: Kotresh HR <khiremat@redhat.com>
|
||
|
Date: Mon, 29 Jul 2019 16:22:10 +0530
|
||
|
Subject: [PATCH 267/276] posix/ctime: Fix race during lookup ctime xattr heal
|
||
|
|
||
|
Problem:
|
||
|
Ctime heals the ctime xattr ("trusted.glusterfs.mdata") in lookup
|
||
|
if it's not present. In a multi client scenario, there is a race
|
||
|
which results in updating the ctime xattr to older value.
|
||
|
|
||
|
e.g. Let c1 and c2 be two clients and file1 be the file which
|
||
|
doesn't have the ctime xattr. Let the ctime of file1 be t1.
|
||
|
(from backend, ctime heals time attributes from backend when not present).
|
||
|
|
||
|
Now following operations are done on mount
|
||
|
c1 -> ls -l /mnt/file1 | c2 -> ls -l /mnt/file1;echo "append" >> /mnt/file1;
|
||
|
|
||
|
The race is that the both c1 and c2 didn't fetch the ctime xattr in lookup,
|
||
|
so both of them tries to heal ctime to time 't1'. If c2 wins the race and
|
||
|
appends the file before c1 heals it, it sets the time to 't1' and updates
|
||
|
it to 't2' (because of append). Now c1 proceeds to heal and sets it to 't1'
|
||
|
which is incorrect.
|
||
|
|
||
|
Solution:
|
||
|
Compare the times during heal and only update the larger time. This is the
|
||
|
general approach used in ctime feature but got missed with healing legacy
|
||
|
files.
|
||
|
|
||
|
> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23131/
|
||
|
|
||
|
>fixes: bz#1734299
|
||
|
>Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
|
||
|
>Signed-off-by: Kotresh HR <khiremat@redhat.com>
|
||
|
|
||
|
BUG: 1734305
|
||
|
Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
|
||
|
Signed-off-by: Kotresh HR <khiremat@redhat.com>
|
||
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/177866
|
||
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
||
|
---
|
||
|
xlators/storage/posix/src/posix-metadata.c | 76 +++++++++++++++++++++++-------
|
||
|
1 file changed, 58 insertions(+), 18 deletions(-)
|
||
|
|
||
|
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
|
||
|
index 647c0bb..57791fa 100644
|
||
|
--- a/xlators/storage/posix/src/posix-metadata.c
|
||
|
+++ b/xlators/storage/posix/src/posix-metadata.c
|
||
|
@@ -344,33 +344,73 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
|
||
|
struct mdata_iatt *mdata_iatt, int *op_errno)
|
||
|
{
|
||
|
posix_mdata_t *mdata = NULL;
|
||
|
+ posix_mdata_t imdata = {
|
||
|
+ 0,
|
||
|
+ };
|
||
|
int ret = 0;
|
||
|
+ gf_boolean_t mdata_already_set = _gf_false;
|
||
|
|
||
|
GF_VALIDATE_OR_GOTO("posix", this, out);
|
||
|
GF_VALIDATE_OR_GOTO(this->name, inode, out);
|
||
|
|
||
|
LOCK(&inode->lock);
|
||
|
{
|
||
|
- mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
|
||
|
- if (!mdata) {
|
||
|
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
|
||
|
- "Could not allocate mdata. gfid: %s",
|
||
|
- uuid_utoa(inode->gfid));
|
||
|
- ret = -1;
|
||
|
- *op_errno = ENOMEM;
|
||
|
- goto unlock;
|
||
|
- }
|
||
|
+ ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
|
||
|
+ if (ret == 0 && mdata) {
|
||
|
+ mdata_already_set = _gf_true;
|
||
|
+ } else if (ret == -1 || !mdata) {
|
||
|
+ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
|
||
|
+ if (!mdata) {
|
||
|
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
|
||
|
+ "Could not allocate mdata. gfid: %s",
|
||
|
+ uuid_utoa(inode->gfid));
|
||
|
+ ret = -1;
|
||
|
+ *op_errno = ENOMEM;
|
||
|
+ goto unlock;
|
||
|
+ }
|
||
|
+
|
||
|
+ ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata,
|
||
|
+ op_errno);
|
||
|
+ if (ret == 0) {
|
||
|
+ /* Got mdata from disk. This is a race, another client
|
||
|
+ * has healed the xattr during lookup. So set it in inode
|
||
|
+ * ctx */
|
||
|
+ __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
|
||
|
+ mdata_already_set = _gf_true;
|
||
|
+ } else {
|
||
|
+ *op_errno = 0;
|
||
|
+ mdata->version = 1;
|
||
|
+ mdata->flags = 0;
|
||
|
+ mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
|
||
|
+ mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
|
||
|
+ mdata->atime.tv_sec = mdata_iatt->ia_atime;
|
||
|
+ mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
|
||
|
+ mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
|
||
|
+ mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
|
||
|
|
||
|
- mdata->version = 1;
|
||
|
- mdata->flags = 0;
|
||
|
- mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
|
||
|
- mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
|
||
|
- mdata->atime.tv_sec = mdata_iatt->ia_atime;
|
||
|
- mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
|
||
|
- mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
|
||
|
- mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
|
||
|
+ __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
|
||
|
+ }
|
||
|
+ }
|
||
|
|
||
|
- __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
|
||
|
+ if (mdata_already_set) {
|
||
|
+ /* Compare and update the larger time */
|
||
|
+ imdata.ctime.tv_sec = mdata_iatt->ia_ctime;
|
||
|
+ imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
|
||
|
+ imdata.atime.tv_sec = mdata_iatt->ia_atime;
|
||
|
+ imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec;
|
||
|
+ imdata.mtime.tv_sec = mdata_iatt->ia_mtime;
|
||
|
+ imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
|
||
|
+
|
||
|
+ if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) {
|
||
|
+ mdata->ctime = imdata.ctime;
|
||
|
+ }
|
||
|
+ if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) {
|
||
|
+ mdata->mtime = imdata.mtime;
|
||
|
+ }
|
||
|
+ if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) {
|
||
|
+ mdata->atime = imdata.atime;
|
||
|
+ }
|
||
|
+ }
|
||
|
|
||
|
ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
|
||
|
if (ret) {
|
||
|
--
|
||
|
1.8.3.1
|
||
|
|