common: Fix another race condition, and address the other one.

* common/dotlock.c (dotlock_take_unix): Do same when same PID process detects stale lockfile. Add comment. -- GnuPG-bug-id: 5884 Signed-off-by: NIIBE Yutaka <gniibe@fsij.org>
author: NIIBE Yutaka <gniibe@fsij.org> 2022-03-19 06:47:59 +0100
committer: NIIBE Yutaka <gniibe@fsij.org> 2022-03-19 06:47:59 +0100
commit: 2f1afc1296621624f34aa6a9db03ef29c741ba6c (patch)
tree: bac958e5d5f6b45fee06deaed070cb63f68d5da7 /common/dotlock.c
parent: gpg: Always use version >= 4 to generate signature. (diff)
download: gnupg2-2f1afc1296621624f34aa6a9db03ef29c741ba6c.tar.xz
gnupg2-2f1afc1296621624f34aa6a9db03ef29c741ba6c.zip
1 files changed, 34 insertions, 12 deletions
diff --git a/common/dotlock.c b/common/dotlock.c
index 3d948f718..80d80deec 100644
--- a/common/dotlock.c
+++ b/common/dotlock.c
@@ -1118,30 +1118,52 @@ dotlock_take_unix (dotlock_t h, long timeout)
       my_info_0 ("lockfile disappeared\n");
       goto again;
     }
-  else if ( pid == getpid() && same_node )
-    {
-      my_info_0 ("Oops: lock already held by us\n");
-      h->locked = 1;
-
-      close (fd);
-      return 0; /* okay */
-    }
-  else if ( same_node && kill (pid, 0) && errno == ESRCH )
+  else if ( (pid == getpid() && same_node)
+            || (same_node && kill (pid, 0) && errno == ESRCH) )
+    /* Stale lockfile is detected. */
     {
       struct stat sb;
 
       /* Check if it's unlocked during examining the lockfile.  */
       if (fstat (fd, &sb) || sb.st_nlink == 0)
         {
-          /* It's gone already.  */
+          /* It's gone already by another process.  */
           close (fd);
           goto again;
         }
 
-      close (fd);
+      /*
+       * Here, although it's quite _rare_, we have a race condition.
+       *
+       * When multiple processes race on a stale lockfile, detecting
+       * AND removing should be done atomically.  That is, to work
+       * correctly, the file to be removed should be the one which is
+       * examined for detection.
+       *
+       * But, when it's not atomic, consider the case for us where it
+       * takes some time between the detection and the removal of the
+       * lockfile.
+       *
+       * In this situation, it is possible that the file which was
+       * detected as stale is already removed by another process and
+       * then new lockfile is created (by that process or other one).
+       *
+       * And it is newly created valid lockfile which is going to be
+       * removed by us.
+       *
+       * Consider this long comment as it expresses possible (long)
+       * time between fstat above and unlink below; Meanwhile, the
+       * lockfile in question may be removed and there may be new
+       * valid one.
+       *
+       * In short, when you see the message of removing stale lockfile
+       * when there are multiple processes for the work, there is
+       * (very) little possibility something went wrong.
+       */
 
-      my_info_1 (_("removing stale lockfile (created by %d)\n"), pid);
       unlink (h->lockname);
+      my_info_1 (_("removing stale lockfile (created by %d)\n"), pid);
+      close (fd);
       goto again;
     }
author	NIIBE Yutaka <gniibe@fsij.org>	2022-03-19 06:47:59 +0100
committer	NIIBE Yutaka <gniibe@fsij.org>	2022-03-19 06:47:59 +0100
commit	2f1afc1296621624f34aa6a9db03ef29c741ba6c (patch)
tree	bac958e5d5f6b45fee06deaed070cb63f68d5da7 /common/dotlock.c
parent	gpg: Always use version >= 4 to generate signature. (diff)
download	gnupg2-2f1afc1296621624f34aa6a9db03ef29c741ba6c.tar.xz gnupg2-2f1afc1296621624f34aa6a9db03ef29c741ba6c.zip