65700164d9
1. It looks like the at command does not validate that it has succeeded to write all data before it makes the file executable (i.e on a disk full an empty file might get created). 2. After 60 minutes (run_time + CHECK_INTERVAL <= now), atd (approx line 750) starts to unlink the lockfile and retries the execution of the job, leaving us with this unfortunate loop. Reproducer: ATD_PID=$(ps -C atd -o pid=) QUEUE=a JOBNO=$(printf '%05x' 123) BAD_TIME=$(expr $(date +%s) / 60 - 61) CTM=$(printf '%08x' $BAD_TIME ) FILENAME=/var/spool/at/${QUEUE}${JOBNO}${CTM} touch $FILENAME chmod 0700 $FILENAME kill -HUP ${ATD_PID} sleep 0.5 ls -l $FILENAME rm -f $FILENAME Thanks to: Anders Blomdell
24 lines
871 B
Diff
24 lines
871 B
Diff
diff -up at-3.1.14/atd.c.seg at-3.1.14/atd.c
|
|
--- at-3.1.14/atd.c.seg 2013-12-02 14:33:48.650769756 +0100
|
|
+++ at-3.1.14/atd.c 2013-12-02 14:52:49.057437721 +0100
|
|
@@ -752,14 +752,17 @@ run_loop()
|
|
/* Is the file already locked?
|
|
*/
|
|
if (buf.st_nlink > 1) {
|
|
- if (run_time + CHECK_INTERVAL <= now) {
|
|
-
|
|
+ if (buf.st_mtime + CHECK_INTERVAL <= now) {
|
|
/* Something went wrong the last time this was executed.
|
|
* Let's remove the lockfile and reschedule.
|
|
+ * We also change the timestamp to avoid rerunning the job more
|
|
+ * than once every CHECK_INTERVAL.
|
|
*/
|
|
strncpy(lock_name, dirent->d_name, sizeof(lock_name));
|
|
lock_name[sizeof(lock_name)-1] = '\0';
|
|
lock_name[0] = '=';
|
|
+ if (utime(lock_name, 0) < 0)
|
|
+ syslog(LOG_ERR, "utime couldn't be set for lock file %s\n", lock_name);
|
|
unlink(lock_name);
|
|
next_job = now;
|
|
nothing_to_do = 0;
|