diff options
author | Christian Hopps <chopps@gmail.com> | 2021-03-04 03:56:46 +0100 |
---|---|---|
committer | Christian Hopps <chopps@gmail.com> | 2021-03-19 17:45:37 +0100 |
commit | 3f950192fea01af89e17f9fed07ea261ce6f12b3 (patch) | |
tree | 231a87d6f9ed1014f56fe27133209c2f03a8bb74 | |
parent | Merge pull request #8129 from mjstapp/backup_recursives (diff) | |
download | frr-3f950192fea01af89e17f9fed07ea261ce6f12b3.tar.xz frr-3f950192fea01af89e17f9fed07ea261ce6f12b3.zip |
tests: add options for debugging topotest failures
Signed-off-by: Christian Hopps <chopps@labn.net>
-rw-r--r-- | doc/developer/topotests.rst | 79 | ||||
-rwxr-xr-x | tests/topotests/conftest.py | 189 | ||||
-rw-r--r-- | tests/topotests/lib/topotest.py | 137 |
3 files changed, 329 insertions, 76 deletions
diff --git a/doc/developer/topotests.rst b/doc/developer/topotests.rst index 7976a206f..e684b9c8a 100644 --- a/doc/developer/topotests.rst +++ b/doc/developer/topotests.rst @@ -232,6 +232,85 @@ for ``master`` branch: and create ``frr`` user and ``frrvty`` group as shown above. +Debugging Topotest Failures +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For the below debugging options which launch programs, if the topotest is run +within screen_ or tmux_, ``gdb``, the shell or ``vtysh`` will be launched using +that windowing program, otherwise mininet's ``xterm`` functionality will be used +to launch the given program. + +If you wish to force the use of ``xterm`` rather than ``tmux`` or ``screen``, or +wish to use ``gnome-terminal`` instead of ``xterm``, set the environment +variable ``FRR_TOPO_TERMINAL`` to either ``xterm`` or ``gnome-terminal``. + +.. _screen: https://www.gnu.org/software/screen/ +.. _tmux: https://github.com/tmux/tmux/wiki + +Spawning ``vtysh`` or Shells on Routers +""""""""""""""""""""""""""""""""""""""" + +Topotest can automatically launch a shell or ``vtysh`` for any or all routers in +a test. This is enabled by specifying 1 of 2 CLI arguments ``--shell`` or +``--vtysh``. Both of these options can be set to a single router value, multiple +comma-seperated values, or ``all``. + +When either of these options are specified topotest will pause after each test +to allow for inspection of the router state. + +Here's an example of launching ``vtysh`` on routers ``rt1`` and ``rt2``. + +.. code:: shell + + pytest --vtysh=rt1,rt2 all-protocol-startup + +Spawning Mininet CLI, ``vtysh`` or Shells on Routers on Test Failure +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +Similar to the previous section one can have ``vtysh`` or a shell launched on +routers, but in this case only when a test fails. To launch the given process on +each router after a test failure specify one of ``--shell-on-error`` or +``--vtysh-on-error``. + + +Here's an example of having ``vtysh`` launched on test failure. + +.. code:: shell + + pytest --vtysh-on-error all-protocol-startup + + +Additionally, one can have the mininet CLI invoked on test failures by +specifying the ``--mininet-on-error`` CLI option as shown in the example below. + +.. code:: shell + + pytest --mininet-on-error all-protocol-startup + +Debugging with GDB +"""""""""""""""""" + +Topotest can automatically launch any daemon with ``gdb``, possibly setting +breakpoints for any test run. This is enabled by specifying 1 or 2 CLI arguments +``--gdb-routers`` and ``--gdb-daemons``. Additionally ``--gdb-breakpoints`` can +be used to automatically set breakpoints in the launched ``gdb`` processes. + +Each of these options can be set to a single value, multiple comma-seperated +values, or ``all``. If ``--gdb-routers`` is empty but ``--gdb_daemons`` is set +then the given daemons will be launched in ``gdb`` on all routers in the test. +Likewise if ``--gdb_routers`` is set, but ``--gdb_daemons`` is empty then all +daemons on the given routers will be launched in ``gdb``. + +Here's an example of launching ``zebra`` and ``bgpd`` inside ``gdb`` on router +``r1`` with a breakpoint set on ``nb_config_diff`` + +.. code:: shell + + pytest --gdb-routers=r1 \ + --gdb-daemons=bgpd,zebra \ + --gdb-breakpoints=nb_config_diff \ + all-protocol-startup + .. _topotests_docker: Running Tests with Docker diff --git a/tests/topotests/conftest.py b/tests/topotests/conftest.py index 04e9961f1..7ad5d8c9a 100755 --- a/tests/topotests/conftest.py +++ b/tests/topotests/conftest.py @@ -2,13 +2,14 @@ Topotest conftest.py file. """ +import os +import pdb +import pytest + from lib.topogen import get_topogen, diagnose_env from lib.topotest import json_cmp_result +from lib.topotest import g_extra_config as topotest_extra_config from lib.topolog import logger -import pytest - -topology_only = False - def pytest_addoption(parser): """ @@ -16,20 +17,72 @@ def pytest_addoption(parser): only run the setup_module() to setup the topology without running any tests. """ parser.addoption( + "--gdb-breakpoints", + metavar="SYMBOL[,SYMBOL...]", + help="Comma-separated list of functions to set gdb breakpoints on", + ) + + parser.addoption( + "--gdb-daemons", + metavar="DAEMON[,DAEMON...]", + help="Comma-separated list of daemons to spawn gdb on, or 'all'", + ) + + parser.addoption( + "--gdb-routers", + metavar="ROUTER[,ROUTER...]", + help="Comma-separated list of routers to spawn gdb on, or 'all'", + ) + + parser.addoption( + "--mininet-on-error", + action="store_true", + help="Mininet cli on test failure", + ) + + parser.addoption( + "--pause-after", + action="store_true", + help="Pause after each test", + ) + + parser.addoption( + "--shell", + metavar="ROUTER[,ROUTER...]", + help="Comma-separated list of routers to spawn shell on, or 'all'", + ) + + parser.addoption( + "--shell-on-error", + action="store_true", + help="Spawn shell on all routers on test failure", + ) + + parser.addoption( "--topology-only", action="store_true", help="Only set up this topology, don't run tests", ) + parser.addoption( + "--vtysh", + metavar="ROUTER[,ROUTER...]", + help="Comma-separated list of routers to spawn vtysh on, or 'all'", + ) + + parser.addoption( + "--vtysh-on-error", + action="store_true", + help="Spawn vtysh on all routers on test failure", + ) + def pytest_runtest_call(): """ This function must be run after setup_module(), it does standarized post setup routines. It is only being used for the 'topology-only' option. """ - global topology_only - - if topology_only: + if topotest_extra_config["topology_only"]: tgen = get_topogen() if tgen is not None: # Allow user to play with the setup. @@ -42,6 +95,8 @@ def pytest_assertrepr_compare(op, left, right): """ Show proper assertion error message for json_cmp results. """ + del op + json_result = left if not isinstance(json_result, json_cmp_result): json_result = right @@ -52,43 +107,105 @@ def pytest_assertrepr_compare(op, left, right): def pytest_configure(config): - "Assert that the environment is correctly configured." - - global topology_only + """ + Assert that the environment is correctly configured, and get extra config. + """ if not diagnose_env(): - pytest.exit("enviroment has errors, please read the logs") + pytest.exit("environment has errors, please read the logs") + + gdb_routers = config.getoption("--gdb-routers") + gdb_routers = gdb_routers.split(",") if gdb_routers else [] + topotest_extra_config["gdb_routers"] = gdb_routers + + gdb_daemons = config.getoption("--gdb-daemons") + gdb_daemons = gdb_daemons.split(",") if gdb_daemons else [] + topotest_extra_config["gdb_daemons"] = gdb_daemons + + gdb_breakpoints = config.getoption("--gdb-breakpoints") + gdb_breakpoints = gdb_breakpoints.split(",") if gdb_breakpoints else [] + topotest_extra_config["gdb_breakpoints"] = gdb_breakpoints + + mincli_on_error = config.getoption("--mininet-on-error") + topotest_extra_config["mininet_on_error"] = mincli_on_error - if config.getoption("--topology-only"): - topology_only = True + shell = config.getoption("--shell") + topotest_extra_config["shell"] = shell.split(",") if shell else [] + + pause_after = config.getoption("--pause-after") + + shell_on_error = config.getoption("--shell-on-error") + topotest_extra_config["shell_on_error"] = shell_on_error + + vtysh = config.getoption("--vtysh") + topotest_extra_config["vtysh"] = vtysh.split(",") if vtysh else [] + + vtysh_on_error = config.getoption("--vtysh-on-error") + topotest_extra_config["vtysh_on_error"] = vtysh_on_error + + topotest_extra_config["pause_after"] = ( + pause_after or shell or vtysh + ) + + topotest_extra_config["topology_only"] = config.getoption("--topology-only") def pytest_runtest_makereport(item, call): "Log all assert messages to default logger with error level" - # Nothing happened - if call.excinfo is None: - return - parent = item.parent - modname = parent.module.__name__ + # Nothing happened + if call.when == "call": + pause = topotest_extra_config["pause_after"] + else: + pause = False - # Treat skips as non errors - if call.excinfo.typename != "AssertionError": - logger.info( - 'assert skipped at "{}/{}": {}'.format( - modname, item.name, call.excinfo.value + if call.excinfo is None: + error = False + else: + parent = item.parent + modname = parent.module.__name__ + + # Treat skips as non errors, don't pause after + if call.excinfo.typename != "AssertionError": + pause = False + error = False + logger.info( + 'assert skipped at "{}/{}": {}'.format( + modname, item.name, call.excinfo.value + ) + ) + else: + error = True + # Handle assert failures + parent._previousfailed = item # pylint: disable=W0212 + logger.error( + 'assert failed at "{}/{}": {}'.format(modname, item.name, call.excinfo.value) ) - ) - return - - # Handle assert failures - parent._previousfailed = item - logger.error( - 'assert failed at "{}/{}": {}'.format(modname, item.name, call.excinfo.value) - ) - # (topogen) Set topology error to avoid advancing in the test. - tgen = get_topogen() - if tgen is not None: - # This will cause topogen to report error on `routers_have_failure`. - tgen.set_error("{}/{}".format(modname, item.name)) + # (topogen) Set topology error to avoid advancing in the test. + tgen = get_topogen() + if tgen is not None: + # This will cause topogen to report error on `routers_have_failure`. + tgen.set_error("{}/{}".format(modname, item.name)) + + + if error and topotest_extra_config["shell_on_error"]: + for router in tgen.routers(): + pause = True + tgen.net[router].runInWindow(os.getenv("SHELL", "bash")) + + if error and topotest_extra_config["vtysh_on_error"]: + for router in tgen.routers(): + pause = True + tgen.net[router].runInWindow("vtysh") + + if error and topotest_extra_config["mininet_on_error"]: + tgen.mininet_cli() + + if pause: + try: + user = raw_input('Testing paused, "pdb" to debug, "Enter" to continue: ') + except NameError: + user = input('Testing paused, "pdb" to debug, "Enter" to continue: ') + if user.strip() == "pdb": + pdb.set_trace() diff --git a/tests/topotests/lib/topotest.py b/tests/topotests/lib/topotest.py index 5cc1a6981..7f768f5b8 100644 --- a/tests/topotests/lib/topotest.py +++ b/tests/topotests/lib/topotest.py @@ -50,7 +50,9 @@ from mininet.node import Node, OVSSwitch, Host from mininet.log import setLogLevel, info from mininet.cli import CLI from mininet.link import Intf +from mininet.term import makeTerm +g_extra_config = {} def gdb_core(obj, daemon, corefiles): gdbcmds = """ @@ -1303,6 +1305,37 @@ class Router(Node): logger.info("No daemon {} known".format(daemon)) # print "Daemons after:", self.daemons + # Run a command in a new window (gnome-terminal, screen, tmux, xterm) + def runInWindow(self, cmd, title=None): + topo_terminal = os.getenv("FRR_TOPO_TERMINAL") + if topo_terminal or ( + "TMUX" not in os.environ and "STY" not in os.environ + ): + term = topo_terminal if topo_terminal else "xterm" + makeTerm( + self, + title=title if title else cmd, + term=term, + cmd=cmd) + else: + nscmd = "sudo nsenter -m -n -t {} {}".format(self.pid, cmd) + if "TMUX" in os.environ: + self.cmd("tmux select-layout main-horizontal") + wcmd = "tmux split-window -h" + cmd = "{} {}".format(wcmd, nscmd) + elif "STY" in os.environ: + if os.path.exists( + "/run/screen/S-{}/{}".format( + os.environ['USER'], os.environ['STY'] + ) + ): + wcmd = "screen" + else: + wcmd = "sudo -u {} screen".format(os.environ["SUDO_USER"]) + cmd = "{} {}".format(wcmd, nscmd) + self.cmd(cmd) + + def startRouter(self, tgen=None): # Disable integrated-vtysh-config self.cmd( @@ -1355,6 +1388,14 @@ class Router(Node): return "LDP/MPLS Tests need mpls kernel modules" self.cmd("echo 100000 > /proc/sys/net/mpls/platform_labels") + shell_routers = g_extra_config["shell"] + if "all" in shell_routers or self.name in shell_routers: + self.runInWindow(os.getenv("SHELL", "bash")) + + vtysh_routers = g_extra_config["vtysh"] + if "all" in vtysh_routers or self.name in vtysh_routers: + self.runInWindow("vtysh") + if self.daemons["eigrpd"] == 1: eigrpd_path = os.path.join(self.daemondir, "eigrpd") if not os.path.isfile(eigrpd_path): @@ -1381,6 +1422,10 @@ class Router(Node): def startRouterDaemons(self, daemons=None): "Starts all FRR daemons for this router." + gdb_breakpoints = g_extra_config["gdb_breakpoints"] + gdb_daemons = g_extra_config["gdb_daemons"] + gdb_routers = g_extra_config["gdb_routers"] + bundle_data = "" if os.path.exists("/etc/frr/support_bundle_commands.conf"): @@ -1410,7 +1455,7 @@ class Router(Node): # If `daemons` was specified then some upper API called us with # specific daemons, otherwise just use our own configuration. daemons_list = [] - if daemons != None: + if daemons is not None: daemons_list = daemons else: # Append all daemons configured. @@ -1418,47 +1463,67 @@ class Router(Node): if self.daemons[daemon] == 1: daemons_list.append(daemon) - # Start Zebra first - if "zebra" in daemons_list: - zebra_path = os.path.join(self.daemondir, "zebra") - zebra_option = self.daemons_options["zebra"] - self.cmd( - "ASAN_OPTIONS=log_path=zebra.asan {0} {1} --log file:zebra.log --log-level debug -s 90000000 -d > zebra.out 2> zebra.err".format( - zebra_path, zebra_option + def start_daemon(daemon, extra_opts=None): + daemon_opts = self.daemons_options.get(daemon, "") + rediropt = " > {0}.out 2> {0}.err".format(daemon) + if daemon == "snmpd": + binary = "/usr/sbin/snmpd" + cmdenv = "" + cmdopt = "{} -C -c /etc/frr/snmpd.conf -p ".format( + daemon_opts + ) + "/var/run/{}/snmpd.pid -x /etc/frr/agentx".format(self.routertype) + else: + binary = os.path.join(self.daemondir, daemon) + cmdenv = "ASAN_OPTIONS=log_path={0}.asan".format(daemon) + cmdopt = "{} --log file:{}.log --log-level debug".format( + daemon_opts, daemon ) - ) - logger.debug("{}: {} zebra started".format(self, self.routertype)) + if extra_opts: + cmdopt += " " + extra_opts + + if ( + (gdb_routers or gdb_daemons) + and (not gdb_routers + or self.name in gdb_routers + or "all" in gdb_routers) + and (not gdb_daemons + or daemon in gdb_daemons + or "all" in gdb_daemons) + ): + if daemon == "snmpd": + cmdopt += " -f " + + cmdopt += rediropt + gdbcmd = "sudo -E gdb " + binary + if gdb_breakpoints: + gdbcmd += " -ex 'set breakpoint pending on'" + for bp in gdb_breakpoints: + gdbcmd += " -ex 'b {}'".format(bp) + gdbcmd += " -ex 'run {}'".format(cmdopt) + + self.runInWindow(gdbcmd, daemon) + else: + if daemon != "snmpd": + cmdopt += " -d " + cmdopt += rediropt + self.cmd(" ".join([cmdenv, binary, cmdopt])) + logger.info("{}: {} {} started".format(self, self.routertype, daemon)) - # Remove `zebra` so we don't attempt to start it again. + + # Start Zebra first + if "zebra" in daemons_list: + start_daemon("zebra", "-s 90000000") while "zebra" in daemons_list: daemons_list.remove("zebra") # Start staticd next if required if "staticd" in daemons_list: - staticd_path = os.path.join(self.daemondir, "staticd") - staticd_option = self.daemons_options["staticd"] - self.cmd( - "ASAN_OPTIONS=log_path=staticd.asan {0} {1} --log file:staticd.log --log-level debug -d > staticd.out 2> staticd.err".format( - staticd_path, staticd_option - ) - ) - logger.debug("{}: {} staticd started".format(self, self.routertype)) - - # Remove `staticd` so we don't attempt to start it again. + start_daemon("staticd") while "staticd" in daemons_list: daemons_list.remove("staticd") if "snmpd" in daemons_list: - snmpd_path = "/usr/sbin/snmpd" - snmpd_option = self.daemons_options["snmpd"] - self.cmd( - "{0} {1} -C -c /etc/frr/snmpd.conf -p /var/run/{2}/snmpd.pid -x /etc/frr/agentx > snmpd.out 2> snmpd.err".format( - snmpd_path, snmpd_option, self.routertype - ) - ) - logger.info("{}: {} snmpd started".format(self, self.routertype)) - - # Remove `snmpd` so we don't attempt to start it again. + start_daemon("snmpd") while "snmpd" in daemons_list: daemons_list.remove("snmpd") @@ -1470,17 +1535,9 @@ class Router(Node): # Now start all the other daemons for daemon in daemons_list: - # Skip disabled daemons and zebra if self.daemons[daemon] == 0: continue - - daemon_path = os.path.join(self.daemondir, daemon) - self.cmd( - "ASAN_OPTIONS=log_path={2}.asan {0} {1} --log file:{2}.log --log-level debug -d > {2}.out 2> {2}.err".format( - daemon_path, self.daemons_options.get(daemon, ""), daemon - ) - ) - logger.debug("{}: {} {} started".format(self, self.routertype, daemon)) + start_daemon(daemon) # Check if daemons are running. rundaemons = self.cmd("ls -1 /var/run/%s/*.pid" % self.routertype) |