Merge pull request #30633 from mrc0mmand/cocci-shenanigans

coccinelle: rework how we run the Coccinelle transformations
author: Yu Watanabe <watanabe.yu+github@gmail.com> 2023-12-25 21:45:58 +0100
committer: GitHub <noreply@github.com> 2023-12-25 21:45:58 +0100
commit: fe3fcb94924877a362bc9799e0c8092be876c9a8 (patch)
tree: 1ba5d6cbdd09f936923c66ea542ec76f9df1863e /coccinelle/run-coccinelle.sh
parent: core/unit: don't log 0 values in unit_log_resources (diff)
parent: systemctl: use SYNTHETIC_ERRNO() (diff)
download: systemd-fe3fcb94924877a362bc9799e0c8092be876c9a8.tar.xz
systemd-fe3fcb94924877a362bc9799e0c8092be876c9a8.zip
1 files changed, 47 insertions, 4 deletions
diff --git a/coccinelle/run-coccinelle.sh b/coccinelle/run-coccinelle.sh
index cd951790b9..bb72a493f0 100755
--- a/coccinelle/run-coccinelle.sh
+++ b/coccinelle/run-coccinelle.sh
@@ -2,6 +2,14 @@
 # SPDX-License-Identifier: LGPL-2.1-or-later
 set -e
 
+# FIXME:
+#   - Coccinelle doesn't like our TEST() macros, which then causes name conflicts; i.e. Cocci can't process
+#     that TEST(xsetxattr) yields test_xsetxattr() and uses just xsetxattr() in this case, which then conflicts
+#     with the tested xsetxattr() function, leading up to the whole test case getting skipped due to
+#     conflicting typedefs
+#   - something keeps pulling in src/boot/efi/*.h stuff, even though it's excluded
+#   - Coccinelle has issues with some of our more complex macros
+
 # Exclude following paths from the Coccinelle transformations
 EXCLUDED_PATHS=(
     "src/boot/efi/*"
@@ -10,13 +18,17 @@ EXCLUDED_PATHS=(
     # Symlinked to test-bus-vtable-cc.cc, which causes issues with the IN_SET macro
     "src/libsystemd/sd-bus/test-bus-vtable.c"
     "src/libsystemd/sd-journal/lookup3.c"
+    # Ignore man examples, as they redefine some macros we use internally, which makes Coccinelle complain
+    # and ignore code that tries to use the redefined stuff
+    "man/*"
 )
 
 TOP_DIR="$(git rev-parse --show-toplevel)"
+CACHE_DIR="$(dirname "$0")/.coccinelle-cache"
 ARGS=()
 
 # Create an array from files tracked by git...
-mapfile -t FILES < <(git ls-files ':/*.[ch]')
+mapfile -t FILES < <(git ls-files ':/*.c')
 # ...and filter everything that matches patterns from EXCLUDED_PATHS
 for excl in "${EXCLUDED_PATHS[@]}"; do
     # shellcheck disable=SC2206
@@ -37,12 +49,43 @@ fi
 
 [[ ${#@} -ne 0 ]] && SCRIPTS=("$@") || SCRIPTS=("$TOP_DIR"/coccinelle/*.cocci)
 
+mkdir -p "$CACHE_DIR"
+echo "--x-- Using Coccinelle cache directory: $CACHE_DIR"
+echo "--x--"
+echo "--x-- Note: running spatch for the first time without populated cache takes"
+echo "--x--       a _long_ time (15-30 minutes). Also, the cache is quite large"
+echo "--x--       (~15 GiB), so make sure you have enough free space."
+echo
+
 for script in "${SCRIPTS[@]}"; do
     echo "--x-- Processing $script --x--"
     TMPFILE="$(mktemp)"
     echo "+ spatch --sp-file $script ${ARGS[*]} ..."
-    parallel --halt now,fail=1 --keep-order --noswap --max-args=20 \
-             spatch --macro-file="$TOP_DIR/coccinelle/macros.h" --smpl-spacing --sp-file "$script" "${ARGS[@]}" ::: "${FILES[@]}" \
-             2>"$TMPFILE" || cat "$TMPFILE"
+    # A couple of notes:
+    #
+    # 1) Limit this to 10 files at once, as processing the ASTs is _very_ memory hungry - e.g. with 20 files
+    # at once one spatch process can take around 2.5 GiB of RAM, which can easily eat up all available RAM
+    # when paired together with parallel
+    #
+    # 2) Make sure spatch can find our includes via -I <dir>, similarly as we do when compiling stuff
+    #
+    # 3) Make sure to include includes from includes (--recursive-includes), but use them only to get type
+    # definitions (--include-headers-for-types) - otherwise we'd start formating them as well, which might be
+    # unwanted, especially for includes we fetch verbatim from third-parties
+    #
+    # 4) Use cache, since generating the full AST is _very_ expensive, i.e. the uncached run takes 15 - 30
+    # minutes (for one rule(!)), vs 30 - 90 seconds when the cache is populated. One major downside of the
+    # cache is that it's quite big - ATTOW the cache takes around 15 GiB, but the performance boost is
+    # definitely worth it
+    parallel --halt now,fail=1 --keep-order --noswap --max-args=10 \
+        spatch --cache-prefix "$CACHE_DIR" \
+               -I src \
+               --recursive-includes \
+               --include-headers-for-types \
+               --smpl-spacing \
+               --sp-file "$script" \
+               "${ARGS[@]}" ::: "${FILES[@]}" \
+               2>"$TMPFILE" || cat "$TMPFILE"
+    rm -f "$TMPFILE"
     echo -e "--x-- Processed $script --x--\n"
 done
author	Yu Watanabe <watanabe.yu+github@gmail.com>	2023-12-25 21:45:58 +0100
committer	GitHub <noreply@github.com>	2023-12-25 21:45:58 +0100
commit	fe3fcb94924877a362bc9799e0c8092be876c9a8 (patch)
tree	1ba5d6cbdd09f936923c66ea542ec76f9df1863e /coccinelle/run-coccinelle.sh
parent	core/unit: don't log 0 values in unit_log_resources (diff)
parent	systemctl: use SYNTHETIC_ERRNO() (diff)
download	systemd-fe3fcb94924877a362bc9799e0c8092be876c9a8.tar.xz systemd-fe3fcb94924877a362bc9799e0c8092be876c9a8.zip