diff options
author | Ryan Wilson <ryantimwilson@meta.com> | 2024-10-18 20:41:09 +0200 |
---|---|---|
committer | Ryan Wilson <ryantimwilson@meta.com> | 2024-10-28 16:37:36 +0100 |
commit | cd58b5a13537fc89b669ff9232ba2206214c9fa1 (patch) | |
tree | e5dd41b7cf691378b2023deb37042721dba70cfd /test/units/TEST-07-PID1.protect-control-groups.sh | |
parent | core: Refactor ProtectControlGroups= to use enum vs bool (diff) | |
download | systemd-cd58b5a13537fc89b669ff9232ba2206214c9fa1.tar.xz systemd-cd58b5a13537fc89b669ff9232ba2206214c9fa1.zip |
cgroup: Add support for ProtectControlGroups= private and strict
This commit adds two settings private and strict to
the ProtectControlGroups= property. Private will unshare the cgroup
namespace and mount a read-write private cgroup2 filesystem at /sys/fs/cgroup.
Strict does the same except the mount is read-only. Since the unit is
running in a cgroup namespace, the new root of /sys/fs/cgroup is the unit's
own cgroup.
We also add a new dbus property ProtectControlGroupsEx which accepts strings
instead of boolean. This will allow users to use private/strict via dbus
and systemd-run in addition to service files.
Note private and strict fall back to no and yes respectively if the kernel
doesn't support cgroup2 or system is not using unified hierarchy.
Fixes: #34634
Diffstat (limited to '')
-rwxr-xr-x | test/units/TEST-07-PID1.protect-control-groups.sh | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/test/units/TEST-07-PID1.protect-control-groups.sh b/test/units/TEST-07-PID1.protect-control-groups.sh new file mode 100755 index 0000000000..e7752ffb4b --- /dev/null +++ b/test/units/TEST-07-PID1.protect-control-groups.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +# shellcheck disable=SC2016 +set -eux +set -o pipefail + +# shellcheck source=test/units/test-control.sh +. "$(dirname "$0")"/test-control.sh +# shellcheck source=test/units/util.sh +. "$(dirname "$0")"/util.sh + +SLICE="system.slice" +UNIT_PREFIX="test-07-protect-control-groups" + +READ_ONLY_MOUNT_FLAG="ro" +READ_WRITE_MOUNT_FLAG="rw" + +at_exit() { + set +e + + systemctl stop "$UNIT_PREFIX*.service" + systemctl reset-failed +} + +trap at_exit EXIT + +ROOT_CGROUP_NS=$(readlink /proc/self/ns/cgroup) + +ENABLE_MEM_PRESSURE_TEST=true + +# We do not just test if the file exists, but try to read from it, since if +# CONFIG_PSI_DEFAULT_DISABLED is set in the kernel the file will exist and can +# be opened, but any read()s will fail with EOPNOTSUPP, which we want to +# detect. +if ! cat /proc/pressure/memory >/dev/null ; then + echo "Kernel too old, has no PSI, not running ProtectControlGroups= with MemoryPressureWatch= test." >&2 + ENABLE_MEM_PRESSURE_TEST=false +fi + +if ! test -f "/sys/fs/cgroup/$(systemctl show TEST-07-PID1.service -P ControlGroup)/memory.pressure" ; then + echo "No memory accounting/PSI delegated via cgroup, not running ProtectControlGroups= with MemoryPressureWatch= test." >&2 + ENABLE_MEM_PRESSURE_TEST=false +fi + +test_basic() { + local protect_control_groups_ex="$1" + local protect_control_groups="$2" + local in_cgroup_ns="$3" + local mount_flag="$4" + + if [[ $in_cgroup_ns == true ]]; then + local ns_cmp_op="!=" + local unit_cgroup="0::/" + local memory_pressure_watch="/sys/fs/cgroup/memory.pressure" + else + local ns_cmp_op="==" + local unit_cgroup="0::/$SLICE/$UNIT_PREFIX-$protect_control_groups_ex-1.service" + local memory_pressure_watch="/sys/fs/cgroup/$SLICE/$UNIT_PREFIX-$protect_control_groups_ex-2.service/memory.pressure" + fi + + # Compare cgroup namespace to root namespace + systemd-run -p "ProtectControlGroupsEx=$protect_control_groups_ex" --slice "$SLICE" --wait \ + bash -xec "test \"\$(readlink /proc/self/ns/cgroup)\" $ns_cmp_op \"$ROOT_CGROUP_NS\"" + # Verify unit cgroup + systemd-run -p "ProtectControlGroupsEx=$protect_control_groups_ex" --slice "$SLICE" --wait \ + --unit "$UNIT_PREFIX-$protect_control_groups_ex-1" \ + bash -xec "test \"\$(cat /proc/self/cgroup)\" == \"$unit_cgroup\"" + # Verify memory pressure watch points to correct file + if [[ $ENABLE_MEM_PRESSURE_TEST == true ]]; then + systemd-run -p "ProtectControlGroupsEx=$protect_control_groups_ex" -p MemoryPressureWatch=yes --slice "$SLICE" --wait \ + --unit "$UNIT_PREFIX-$protect_control_groups_ex-2" \ + bash -xec "test \"\$MEMORY_PRESSURE_WATCH\" == \"$memory_pressure_watch\"" + fi + # Verify /sys/fs/cgroup mount is read-only or read-write + systemd-run -p "ProtectControlGroupsEx=$protect_control_groups_ex" --slice "$SLICE" --wait \ + bash -xec "[[ \"\$\$(findmnt --mountpoint /sys/fs/cgroup --noheadings -o FSTYPE)\" == cgroup2 ]]; + [[ \"\$\$(findmnt --mountpoint /sys/fs/cgroup --noheadings -o FS-OPTIONS)\" =~ nsdelegate ]]; + [[ \"\$\$(findmnt --mountpoint /sys/fs/cgroup --noheadings -o VFS-OPTIONS)\" =~ noexec ]]; + [[ \"\$\$(findmnt --mountpoint /sys/fs/cgroup --noheadings -o VFS-OPTIONS)\" =~ nosuid ]]; + [[ \"\$\$(findmnt --mountpoint /sys/fs/cgroup --noheadings -o VFS-OPTIONS)\" =~ nodev ]]; + [[ \"\$\$(findmnt --mountpoint /sys/fs/cgroup --noheadings -o VFS-OPTIONS)\" =~ \"$mount_flag\" ]];" + + # Verify dbus properties + systemd-run -p "ProtectControlGroupsEx=$protect_control_groups_ex" --slice "$SLICE" --remain-after-exit \ + --unit "$UNIT_PREFIX-$protect_control_groups_ex-3" true + assert_eq "$(systemctl show -P ProtectControlGroupsEx "$UNIT_PREFIX-$protect_control_groups_ex-3")" "$protect_control_groups_ex" + assert_eq "$(systemctl show -P ProtectControlGroups "$UNIT_PREFIX-$protect_control_groups_ex-3")" "$protect_control_groups" + systemctl stop "$UNIT_PREFIX-$protect_control_groups_ex-3" +} + +testcase_basic_no() { + test_basic "no" "no" false "$READ_WRITE_MOUNT_FLAG" +} + +testcase_basic_yes() { + test_basic "yes" "yes" false "$READ_ONLY_MOUNT_FLAG" +} + +testcase_basic_private() { + test_basic "private" "yes" true "$READ_WRITE_MOUNT_FLAG" +} + +testcase_basic_strict() { + test_basic "strict" "yes" true "$READ_ONLY_MOUNT_FLAG" +} + +run_testcases |