c459b4cbb4
Resolves: bz#1569657 bz#1608352 bz#1609163 bz#1609724 bz#1610825 Resolves: bz#1611151 bz#1612098 bz#1615338 bz#1615440 Signed-off-by: Milind Changire <mchangir@redhat.com>
138 lines
7.4 KiB
Diff
138 lines
7.4 KiB
Diff
From 16b5177ca40cb5f8b37ea5331003ecc7dbb6d992 Mon Sep 17 00:00:00 2001
|
|
From: Atin Mukherjee <amukherj@redhat.com>
|
|
Date: Tue, 7 Aug 2018 10:25:49 +0530
|
|
Subject: [PATCH 342/351] glusterd: more stricter checks of if brick is running
|
|
in multiplex mode
|
|
|
|
While gf_attach () utility can help in detaching a brick instance from
|
|
the brick process which the kill_brick () function in tests/volume.rc
|
|
uses it has a caveat which is as follows:
|
|
1. It doesn't ensure the respective brick is marked as stopped which
|
|
glusterd does from glusterd_brick_stop
|
|
2. Sometimes if kill_brick () is executed just after a brick stack is
|
|
up, the mgmt_rpc_notify () can take some time before marking
|
|
priv->connected to 1 and before it if kill_brick () is executed, brick
|
|
will fail to initiate the pmap_signout which would inturn cleans up the
|
|
pidfile.
|
|
|
|
To avoid such possibilities, a more stricter check on if a brick is
|
|
running or not in brick multiplexing has been brought in now where it
|
|
not only checks for its pid's existance but checks if the respective
|
|
process has the brick instance associated with it before checking for
|
|
brick's status.
|
|
|
|
> Change-Id: I98b92df949076663b9686add7aab4ec2f24ad5ab
|
|
> Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
|
|
> (cherry pick from commit bb9b8f61501cc633e585593de4d9f2fe5494d5ce)
|
|
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/20651/)
|
|
|
|
Change-Id: I47b70927c839ca4828a0499006b5c1f604d3d6a4
|
|
BUG: 1612098
|
|
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
|
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/146874
|
|
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
Tested-by: Atin Mukherjee <amukherj@redhat.com>
|
|
---
|
|
xlators/mgmt/glusterd/src/glusterd-utils.c | 71 ++++++++++++++++--------------
|
|
1 file changed, 39 insertions(+), 32 deletions(-)
|
|
|
|
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
index 372d5f4..6f7c787 100644
|
|
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
@@ -6083,6 +6083,7 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
|
|
char *brickpath = NULL;
|
|
glusterd_volinfo_t *other_vol;
|
|
struct statvfs brickstat = {0,};
|
|
+ gf_boolean_t is_service_running = _gf_false;
|
|
|
|
this = THIS;
|
|
GF_ASSERT (this);
|
|
@@ -6149,8 +6150,39 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
|
|
brickinfo->path);
|
|
goto out;
|
|
}
|
|
-
|
|
- if (gf_is_service_running (pidfile, &pid)) {
|
|
+ is_service_running = gf_is_service_running (pidfile, &pid);
|
|
+ if (is_service_running) {
|
|
+ if (is_brick_mx_enabled ()) {
|
|
+ brickpath = search_brick_path_from_proc
|
|
+ (pid, brickinfo->path);
|
|
+ if (!brickpath) {
|
|
+ gf_log (this->name, GF_LOG_INFO,
|
|
+ "Either pid %d is not running or brick"
|
|
+ " path %s is not consumed so cleanup pidfile",
|
|
+ pid, brickinfo->path);
|
|
+ /* brick isn't running,so unlink stale pidfile
|
|
+ * if any.
|
|
+ */
|
|
+ if (sys_access (pidfile , R_OK) == 0) {
|
|
+ sys_unlink (pidfile);
|
|
+ }
|
|
+ goto run;
|
|
+ }
|
|
+ GF_FREE (brickpath);
|
|
+ ret = glusterd_get_sock_from_brick_pid (pid, socketpath,
|
|
+ sizeof(socketpath));
|
|
+ if (ret) {
|
|
+ gf_log (this->name, GF_LOG_INFO,
|
|
+ "Either pid %d is not running or does "
|
|
+ "not match with any running brick "
|
|
+ "processes", pid);
|
|
+ /* Fetch unix socket is failed so unlink pidfile */
|
|
+ if (sys_access (pidfile , R_OK) == 0) {
|
|
+ sys_unlink (pidfile);
|
|
+ }
|
|
+ goto run;
|
|
+ }
|
|
+ }
|
|
if (brickinfo->status != GF_BRICK_STARTING &&
|
|
brickinfo->status != GF_BRICK_STARTED) {
|
|
gf_log (this->name, GF_LOG_INFO,
|
|
@@ -6168,36 +6200,11 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo,
|
|
* same port (on another brick) and re-use that.
|
|
* TBD: re-use RPC connection across bricks
|
|
*/
|
|
- if (is_brick_mx_enabled ()) {
|
|
- brickpath = search_brick_path_from_proc (pid, brickinfo->path);
|
|
- if (!brickpath) {
|
|
- gf_log (this->name, GF_LOG_INFO,
|
|
- "Either pid %d is not running or brick"
|
|
- " path %s is not consumed so cleanup pidfile",
|
|
- pid, brickinfo->path);
|
|
- /* search brick is failed so unlink pidfile */
|
|
- if (sys_access (pidfile , R_OK) == 0) {
|
|
- sys_unlink (pidfile);
|
|
- }
|
|
- goto run;
|
|
- }
|
|
- GF_FREE (brickpath);
|
|
- ret = glusterd_get_sock_from_brick_pid (pid, socketpath,
|
|
- sizeof(socketpath));
|
|
- if (ret) {
|
|
- gf_log (this->name, GF_LOG_INFO,
|
|
- "Either pid %d is not running or is not match"
|
|
- " with any running brick process ", pid);
|
|
- /* Fetch unix socket is failed so unlink pidfile */
|
|
- if (sys_access (pidfile , R_OK) == 0) {
|
|
- sys_unlink (pidfile);
|
|
- }
|
|
- goto run;
|
|
- }
|
|
- } else {
|
|
- glusterd_set_brick_socket_filepath (volinfo, brickinfo,
|
|
- socketpath,
|
|
- sizeof (socketpath));
|
|
+ if (!is_brick_mx_enabled ()) {
|
|
+ glusterd_set_brick_socket_filepath
|
|
+ (volinfo, brickinfo,
|
|
+ socketpath,
|
|
+ sizeof (socketpath));
|
|
}
|
|
gf_log (this->name, GF_LOG_DEBUG,
|
|
"Using %s as sockfile for brick %s of volume %s ",
|
|
--
|
|
1.8.3.1
|
|
|