PR28514: debuginfod: limit groom operation times

For large databases and many stale files, it was possible to starve rescan operations by numerous groom "nuke" (database delete ops). Under the theory that including new data is at least as important as aging old, we now impose a rough deadline on groom queries. In the process, we discovered that we were commiting some undefined-behaviour sqlite ops (deleting rows while iterating), which may explain some previous heisenbug occurrences. So the groom nuke operations are split into decision & action phases, with associated progress-tracking metrics. Testing the timeout facility requires hand-testing beyond the testsuite (since it requires LARGE databases to show measurable query times). So confirmed this part by hand. Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
author: Frank Ch. Eigler <fche@redhat.com> 2021-11-04 13:08:35 -0400
committer: Frank Ch. Eigler <fche@redhat.com> 2021-11-05 08:16:06 -0400
commit: c1e8c8c6b25cb2b5c16553609f19a9ed5dd4e146 (patch)
tree: 5fabbc0e2aba69bcab73c33f43620767a127517f
parent: 5350f549dcd310c62ddbac7c1f7852d88eabbb37 (diff)
download: elfutils-c1e8c8c6b25cb2b5c16553609f19a9ed5dd4e146.tar.gz
5 files changed, 88 insertions, 11 deletions
diff --git a/NEWS b/NEWS
index 9ab58422..b812b743 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,7 @@ debuginfod: Supply extra HTTP response headers, describing archive/file
             Protect against loops in federated server configurations.
             Add -r option to use -I/-X regexes for grooming stale files.
             Protect against wasted CPU from duplicate concurrent requests.
+            Limit the duration of groom ops roughly to rescan (-t) times.
             Several other performance improvements & prometheus metrics.
 
 Version 0.185
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index e0616b0d..15b2ba40 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,10 @@
+2021-11-04  Frank Ch. Eigler  <fche@redhat.com>
+
+	PR28514
+	* debuginfod.cxx (groom): Rework into separate decision/action
+	phases.  Add new metrics to monitor progress.  Limit indefinite
+	operation times to avoid starving rescan.
+
 2021-10-23  Frank Ch. Eigler  <fche@redhat.com>
 
 	PR28240
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index d22571ad..45981d8d 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -3431,16 +3431,30 @@ void groom()
   clock_gettime (CLOCK_MONOTONIC, &ts_start);
 
   // scan for files that have disappeared
-  sqlite_ps files (db, "check old files", "select s.mtime, s.file, f.name from "
-                       BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f "
-                       "where f.id = s.file");
-  sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?");
-  sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?");
-  sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned "
-                            "where file = ? and mtime = ?");
+  sqlite_ps files (db, "check old files",
+                   "select distinct s.mtime, s.file, f.name from "
+                   BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f "
+                   "where f.id = s.file");
+  // NB: Because _ftime_mtime_scanned can contain both F and
+  // R records for the same file, this query would return duplicates if the
+  // DISTINCT qualifier were not there.
   files.reset();
+
+  // DECISION TIME - we enumerate stale fileids/mtimes
+  deque<pair<int64_t,int64_t> > stale_fileid_mtime;
+  
+  time_t time_start = time(NULL);
   while(1)
     {
+      // PR28514: limit grooming iteration to O(rescan time), to avoid
+      // slow filesystem tests over many files locking out rescans for
+      // too long.
+      if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
+        {
+          inc_metric("groomed_total", "decision", "aborted");
+          break;
+        }
+
       if (interrupted) break;
 
       int rc = files.step();
@@ -3458,19 +3472,67 @@ void groom()
       if ( (regex_groom && reg_exclude && !reg_include) ||  rc < 0 || (mtime != (int64_t) s.st_mtime) )
         {
           if (verbose > 2)
-            obatched(clog) << "groom: forgetting file=" << filename << " mtime=" << mtime << endl;
-          files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
-          files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
-          files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+            obatched(clog) << "groom: stale file=" << filename << " mtime=" << mtime << endl;
+          stale_fileid_mtime.push_back(make_pair(fileid,mtime));
           inc_metric("groomed_total", "decision", "stale");
+          set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
         }
       else
         inc_metric("groomed_total", "decision", "fresh");
+      
       if (sigusr1 != forced_rescan_count) // stop early if scan triggered
         break;
     }
   files.reset();
 
+  // ACTION TIME
+
+  // Now that we know which file/mtime tuples are stale, actually do
+  // the deletion from the database.  Doing this during the SELECT
+  // iteration above results in undefined behaviour in sqlite, as per
+  // https://www.sqlite.org/isolation.html
+
+  // We could shuffle stale_fileid_mtime[] here.  It'd let aborted
+  // sequences of nuke operations resume at random locations, instead
+  // of just starting over.  But it doesn't matter much either way,
+  // as long as we make progress.
+
+  sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?");
+  sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?");
+  sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned "
+                            "where file = ? and mtime = ?");
+
+  while (! stale_fileid_mtime.empty())
+    {
+      auto stale = stale_fileid_mtime.front();
+      stale_fileid_mtime.pop_front();
+      set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
+
+      // PR28514: limit grooming iteration to O(rescan time), to avoid
+      // slow nuke_* queries over many files locking out rescans for too
+      // long.  We iterate over the files in random() sequence to avoid
+      // partial checks going over the same set.
+      if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
+        {
+          inc_metric("groomed_total", "action", "aborted");
+          break;
+        }
+
+      if (interrupted) break;
+
+      int64_t fileid = stale.first;
+      int64_t mtime = stale.second;
+      files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+      files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+      files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+      inc_metric("groomed_total", "action", "cleaned");
+      
+       if (sigusr1 != forced_rescan_count) // stop early if scan triggered
+        break;
+    }
+  stale_fileid_mtime.clear(); // no need for this any longer
+  set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
+      
   // delete buildids with no references in _r_de or _f_de tables;
   // cascades to _r_sref & _f_s records
   sqlite_ps buildids_del (db, "nuke orphan buildids",
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 46302b56..b791cd7f 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,8 @@
+2021-11-04  Frank Ch. Eigler  <fche@redhat.com>
+
+	PR28514
+	* run-debuginfod-archive-groom.sh: Look for new groom metric.
+
 2021-10-23  Frank Ch. Eigler  <fche@redhat.com>
 
 	PR28240
diff --git a/tests/run-debuginfod-archive-groom.sh b/tests/run-debuginfod-archive-groom.sh
index 7813ee28..030e0aa6 100755
--- a/tests/run-debuginfod-archive-groom.sh
+++ b/tests/run-debuginfod-archive-groom.sh
@@ -150,6 +150,8 @@ kill -USR2 $PID1  # groom cycle
 wait_ready $PORT1 'thread_work_total{role="groom"}' 2
 # Expect 4 rpms containing 2 buildids to be deleted by the groom
 wait_ready $PORT1 'groomed_total{decision="stale"}' 4
+# Expect no more groom actions pending
+wait_ready $PORT1 'thread_work_pending{role="groom"}' 0
 
 rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests
author	Frank Ch. Eigler <fche@redhat.com>	2021-11-04 13:08:35 -0400
committer	Frank Ch. Eigler <fche@redhat.com>	2021-11-05 08:16:06 -0400
commit	c1e8c8c6b25cb2b5c16553609f19a9ed5dd4e146 (patch)
tree	5fabbc0e2aba69bcab73c33f43620767a127517f
parent	5350f549dcd310c62ddbac7c1f7852d88eabbb37 (diff)
download	elfutils-c1e8c8c6b25cb2b5c16553609f19a9ed5dd4e146.tar.gz