From ac30968b7858d4ca3743d2b4d296eca543864fe2 Mon Sep 17 00:00:00 2001 From: Jiri Vymazal Date: Fri, 22 Nov 2019 14:25:59 +0100 Subject: [PATCH] Thorougher state-file renaming and cleaning Now checking if file-id changes and reanming - cleaning state file accordingly and always checking and cleaning old inode-only style state files. --- plugins/imfile/imfile.c | 66 +++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/plugins/imfile/imfile.c b/plugins/imfile/imfile.c index d9bf0fbb6d..9db2b47ac9 100644 --- a/plugins/imfile/imfile.c +++ b/plugins/imfile/imfile.c @@ -182,6 +182,7 @@ struct act_obj_s { time_t timeoutBase; /* what time to calculate the timeout against? */ /* file dynamic data */ char file_id[FILE_ID_HASH_SIZE]; /* file id for this entry, once we could obtain it */ + char file_id_prev[FILE_ID_HASH_SIZE]; /* previous file id for this entry, set if changed */ int in_move; /* workaround for inotify move: if set, state file must not be deleted */ ino_t ino; /* current inode nbr */ int fd; /* fd to file in order to obtain file_id (needs to be preserved across move) */ @@ -711,7 +712,7 @@ act_obj_add(fs_edge_t *const edge, const char *const name, const int is_file, if (is_file) { LogError(errno, RS_RET_ERR, "imfile: error accessing file '%s'", name); } else { /* reporting only in debug for dirs as higher lvl paths are likely blocked by selinux */ - DBGPRINTF("imfile: error accessing file '%s'", name); + DBGPRINTF("imfile: error accessing directory '%s'", name); } FINALIZE; } @@ -727,6 +728,7 @@ act_obj_add(fs_edge_t *const edge, const char *const name, const int is_file, act->ino = ino; act->fd = fd; act->file_id[0] = '\0'; + act->file_id_prev[0] = '\0'; act->is_symlink = is_symlink; if (source) { /* we are target of symlink */ CHKmalloc(act->source_name = strdup(source)); @@ -1256,17 +1258,15 @@ get_file_id_hash(const char *data, size_t lendata, static void ATTR_NONNULL(1) getFileID(act_obj_t *const act) { - if(act->file_id[0] != '\0') { - return; /* everything already done */ - } + /* save the old id for cleaning purposes */ + strncpy(act->file_id_prev, (const char*)act->file_id, FILE_ID_HASH_SIZE); + act->file_id[0] = '\0'; assert(act->fd >= 0); /* fd must have been opened at act_obj_t creation! */ char filedata[FILE_ID_SIZE]; + lseek(act->fd, 0, SEEK_SET); /* Seek to beginning of file so we have correct id */ const int r = read(act->fd, filedata, FILE_ID_SIZE); if(r == FILE_ID_SIZE) { get_file_id_hash(filedata, sizeof(filedata), act->file_id, sizeof(act->file_id)); - dbgprintf("file_id '%s' obtained, closing monitoring file handle\n", act->file_id); - close(act->fd); /* we will never go here! */ - act->fd = -1; } else { DBGPRINTF("getFileID partial or error read, ret %d\n", r); } @@ -1378,28 +1378,13 @@ openFileWithStateFile(act_obj_t *const act) if(fd < 0) { if(errno == ENOENT) { if(act->file_id[0] != '\0') { - const char *pszSFNamHash = strdup((const char*)pszSFNam); - CHKmalloc(pszSFNamHash); DBGPRINTF("state file %s for %s does not exist - trying to see if " "inode-only file exists\n", pszSFNam, act->name); getFullStateFileName(statefn, "", pszSFNam, sizeof(pszSFNam)); fd = open((char*)pszSFNam, O_CLOEXEC | O_NOCTTY | O_RDONLY, 0600); if(fd >= 0) { - dbgprintf("found inode-only state file, renaming it now that we " - "know the file_id, new name: %s\n", pszSFNamHash); - /* we now can use identify the file, so let's rename it */ - if(rename((const char*)pszSFNam, pszSFNamHash) != 0) { - LogError(errno, RS_RET_IO_ERROR, - "imfile error trying to rename state file for '%s' - " - "ignoring this error, usually this means a file no " - "longer file is left over, but this may also cause " - "some real trouble. Still the best we can do ", - act->name); - free((void*) pszSFNamHash); - ABORT_FINALIZE(RS_RET_IO_ERROR); - } + dbgprintf("found inode-only state file, will be renamed at next persist\n"); } - free((void*) pszSFNamHash); } if(fd < 0) { DBGPRINTF("state file %s for %s does not exist - trying to see if " @@ -2609,6 +2594,36 @@ atomicWriteStateFile(const char *fn, const char *content) RETiRet; } +/* This function should be called after any file ID change - that is if + * file grown from hash-only statefile, or was truncated, this will ensure + * we delete the old file so we do not make garbage in our working dir and + * there are no leftover statefiles which can in theory later bind to something + * and cause data loss. + * jvymazal 2019-11-27 + */ +static void +removeOldStatefile(const uchar *statefn, const char *hashToDelete) +{ + int ret; + uchar statefname[MAXFNAME]; + + getFullStateFileName(statefn, hashToDelete, statefname, sizeof(statefname)); + DBGPRINTF("removing old state file: '%s'\n", statefname); + ret = unlink((const char*)statefname); + if(ret != 0) { + if (errno != ENOENT) { + LogError(errno, RS_RET_IO_ERROR, + "imfile error trying to delete old state file: '%s' - ignoring this " + "error, usually this means a file no longer file is left over, but " + "this may also cause some real trouble. Still the best we can do ", + statefname); + } else { + DBGPRINTF("trying to delete no longer valid statefile '%s' which no " + "longer exists (probably already deleted)\n", statefname); + } + } +} + /* This function persists information for a specific file being monitored. * To do so, it simply persists the stream object. We do NOT abort on error @@ -2660,6 +2675,11 @@ persistStrmState(act_obj_t *const act) CHKiRet(atomicWriteStateFile((const char*)statefname, jstr)); json_object_put(json); + /* file-id changed remove the old statefile */ + if (strncmp((const char *)act->file_id_prev, (const char *)act->file_id, FILE_ID_HASH_SIZE)) { + removeOldStatefile(statefn, act->file_id_prev); + } + finalize_it: if(iRet != RS_RET_OK) { LogError(0, iRet, "imfile: could not persist state "