summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Lamparter <equinox@diac24.net>2020-05-04 17:30:24 +0200
committerDavid Lamparter <equinox@diac24.net>2020-05-05 14:39:12 +0200
commit0045c130676694aa95f9cbd2b2fa2c3835971077 (patch)
treea46724fa584f54f7ee2e1933fc5b4e9aac73690b
parentbuild: rework Makefile var extraction... again (diff)
downloadfrr-0045c130676694aa95f9cbd2b2fa2c3835971077.tar.xz
frr-0045c130676694aa95f9cbd2b2fa2c3835971077.zip
tools: frr-llvm-cg
This dumps call graph data from LLVM bitcode files into a JSON file. Specifically for FRR, it understands thread_add_*(), hook_*() and install_element() so it can provide extra information in these cases. As a general feature, it tries to track down function pointers as far as easily feasible. Signed-off-by: David Lamparter <equinox@diac24.net>
-rw-r--r--Makefile.am1
-rw-r--r--tools/frr-llvm-cg.c649
-rw-r--r--tools/subdir.am12
3 files changed, 662 insertions, 0 deletions
diff --git a/Makefile.am b/Makefile.am
index 1e3311fa7..f99f05ed5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -89,6 +89,7 @@ clippy-only: Makefile lib/clippy config.h
#AUTODERP# endif
EXTRA_DIST =
+EXTRA_PROGRAMS =
BUILT_SOURCES =
CLEANFILES =
DISTCLEANFILES =
diff --git a/tools/frr-llvm-cg.c b/tools/frr-llvm-cg.c
new file mode 100644
index 000000000..84a756a37
--- /dev/null
+++ b/tools/frr-llvm-cg.c
@@ -0,0 +1,649 @@
+// This is free and unencumbered software released into the public domain.
+//
+// Anyone is free to copy, modify, publish, use, compile, sell, or
+// distribute this software, either in source code form or as a compiled
+// binary, for any purpose, commercial or non-commercial, and by any
+// means.
+//
+// In jurisdictions that recognize copyright laws, the author or authors
+// of this software dedicate any and all copyright interest in the
+// software to the public domain. We make this dedication for the benefit
+// of the public at large and to the detriment of our heirs and
+// successors. We intend this dedication to be an overt act of
+// relinquishment in perpetuity of all present and future rights to this
+// software under copyright law.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// For more information, please refer to <http://unlicense.org/>
+
+/* based on example code: https://github.com/sheredom/llvm_bc_parsing_example
+ * which came under the above (un-)license. does not depend on any FRR
+ * pieces, so no reason to change the license.
+ *
+ * please note that while included in the FRR sources, this tool is in no way
+ * supported or maintained by the FRR community. it is provided as a
+ * "convenience"; while it worked at some point (using LLVM 8 / 9), it may
+ * easily break with a future LLVM version or any other factors.
+ *
+ * 2020-05-04, David Lamparter
+ */
+
+#include <string.h>
+#include <strings.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+
+#include <llvm-c/BitReader.h>
+#include <llvm-c/BitWriter.h>
+#include <llvm-c/Core.h>
+
+#include <json-c/json.h>
+
+/* if you want to use this without the special FRRouting defines,
+ * remove the following #define
+ */
+#define FRR_SPECIFIC
+
+static void dbgloc_add(struct json_object *jsobj, LLVMValueRef obj)
+{
+ unsigned file_len = 0;
+ const char *file = LLVMGetDebugLocFilename(obj, &file_len);
+ unsigned line = LLVMGetDebugLocLine(obj);
+
+ if (!file)
+ file = "???", file_len = 3;
+ else if (file[0] == '.' && file[1] == '/')
+ file += 2, file_len -= 2;
+
+ json_object_object_add(jsobj, "filename",
+ json_object_new_string_len(file, file_len));
+ json_object_object_add(jsobj, "line", json_object_new_int64(line));
+}
+
+static struct json_object *js_get_or_make(struct json_object *parent,
+ const char *key,
+ struct json_object *(*maker)(void))
+{
+ struct json_object *ret;
+
+ ret = json_object_object_get(parent, key);
+ if (ret)
+ return ret;
+ ret = maker();
+ json_object_object_add(parent, key, ret);
+ return ret;
+}
+
+static bool details_fptr_vars = false;
+static bool details_fptr_consts = true;
+
+enum called_fn {
+ FN_GENERIC = 0,
+ FN_NONAME,
+ FN_INSTALL_ELEMENT,
+ FN_THREAD_ADD,
+};
+
+static void walk_const_fptrs(struct json_object *js_call, LLVMValueRef value,
+ const char *prefix, bool *hdr_written)
+{
+ LLVMTypeRef type;
+ LLVMValueKind kind;
+
+ if (LLVMIsAGlobalVariable(value)) {
+ type = LLVMGlobalGetValueType(value);
+ value = LLVMGetInitializer(value);
+ } else {
+ type = LLVMTypeOf(value);
+ }
+
+ if (LLVMIsAFunction(value)) {
+ struct json_object *js_fptrs;
+
+ js_fptrs = js_get_or_make(js_call, "funcptrs",
+ json_object_new_array);
+
+ size_t fn_len;
+ const char *fn_name = LLVMGetValueName2(value, &fn_len);
+
+ size_t curlen = json_object_array_length(js_fptrs);
+ struct json_object *jsobj;
+ const char *s;
+
+ for (size_t i = 0; i < curlen; i++) {
+ jsobj = json_object_array_get_idx(js_fptrs, i);
+ s = json_object_get_string(jsobj);
+
+ if (s && !strcmp(s, fn_name))
+ return;
+ }
+
+ if (details_fptr_consts && !*hdr_written) {
+ fprintf(stderr,
+ "%s: calls function pointer from constant or global data\n",
+ prefix);
+ *hdr_written = true;
+ }
+ if (details_fptr_consts)
+ fprintf(stderr, "%s- constant: %.*s()\n",
+ prefix, (int)fn_len, fn_name);
+
+ json_object_array_add(js_fptrs,
+ json_object_new_string_len(fn_name,
+ fn_len));
+ return;
+ }
+
+ kind = LLVMGetValueKind(value);
+
+ unsigned len;
+ char *dump;
+
+ switch (kind) {
+ case LLVMUndefValueValueKind:
+ case LLVMConstantAggregateZeroValueKind:
+ case LLVMConstantPointerNullValueKind:
+ /* null pointer / array - ignore */
+ break;
+
+ case LLVMConstantIntValueKind:
+ /* integer - ignore */
+ break;
+
+ case LLVMConstantStructValueKind:
+ len = LLVMCountStructElementTypes(type);
+ for (unsigned i = 0; i < len; i++)
+ walk_const_fptrs(js_call, LLVMGetOperand(value, i),
+ prefix, hdr_written);
+ break;
+
+ case LLVMConstantArrayValueKind:
+ len = LLVMGetArrayLength(type);
+ for (unsigned i = 0; i < len; i++)
+ walk_const_fptrs(js_call, LLVMGetOperand(value, i),
+ prefix, hdr_written);
+ return;
+
+ default:
+ /* to help the user / development */
+ if (!*hdr_written) {
+ fprintf(stderr,
+ "%s: calls function pointer from constant or global data\n",
+ prefix);
+ *hdr_written = true;
+ }
+ dump = LLVMPrintValueToString(value);
+ fprintf(stderr,
+ "%s- value could not be processed:\n"
+ "%s- [kind=%d] %s\n",
+ prefix, prefix, kind, dump);
+ LLVMDisposeMessage(dump);
+ return;
+ }
+ return;
+}
+
+#ifdef FRR_SPECIFIC
+static bool is_thread_sched(const char *name, size_t len)
+{
+#define thread_prefix "funcname_"
+ static const char *const names[] = {
+ thread_prefix "thread_add_read_write",
+ thread_prefix "thread_add_timer",
+ thread_prefix "thread_add_timer_msec",
+ thread_prefix "thread_add_timer_tv",
+ thread_prefix "thread_add_event",
+ thread_prefix "thread_execute",
+ };
+ size_t i;
+
+ for (i = 0; i < sizeof(names) / sizeof(names[0]); i++) {
+ if (strlen(names[i]) != len)
+ continue;
+ if (!memcmp(names[i], name, len))
+ return true;
+ }
+ return false;
+}
+#endif
+
+static void process_call(struct json_object *js_calls,
+ struct json_object *js_special,
+ LLVMValueRef instr,
+ LLVMValueRef function)
+{
+ struct json_object *js_call, *js_fptrs = NULL;
+
+ LLVMValueRef called = LLVMGetCalledValue(instr);
+
+ if (LLVMIsAConstantExpr(called)) {
+ LLVMOpcode opcode = LLVMGetConstOpcode(called);
+
+ if (opcode == LLVMBitCast) {
+ LLVMValueRef op0 = LLVMGetOperand(called, 0);
+
+ if (LLVMIsAFunction(op0))
+ called = op0;
+ }
+ }
+
+ size_t called_len = 0;
+ const char *called_name = LLVMGetValueName2(called, &called_len);
+ unsigned n_args = LLVMGetNumArgOperands(instr);
+
+ bool is_external = LLVMIsDeclaration(called);
+ enum called_fn called_type = FN_GENERIC;
+
+ js_call = json_object_new_object();
+ json_object_array_add(js_calls, js_call);
+ dbgloc_add(js_call, instr);
+ json_object_object_add(js_call, "is_external",
+ json_object_new_boolean(is_external));
+
+ if (!called_name || called_len == 0) {
+ called_type = FN_NONAME;
+ json_object_object_add(js_call, "type",
+ json_object_new_string("indirect"));
+
+ LLVMValueRef last = called;
+
+ size_t name_len = 0;
+ const char *name_c = LLVMGetValueName2(function, &name_len);
+
+#ifdef FRR_SPECIFIC
+ /* information for FRR hooks is dumped for the registration
+ * in _hook_typecheck; we can safely ignore the funcptr here
+ */
+ if (strncmp(name_c, "hook_call_", 10) == 0)
+ return;
+#endif
+
+ unsigned file_len = 0;
+ const char *file = LLVMGetDebugLocFilename(instr, &file_len);
+ unsigned line = LLVMGetDebugLocLine(instr);
+
+ char prefix[256];
+ snprintf(prefix, sizeof(prefix), "%.*s:%d:%.*s()",
+ (int)file_len, file, line, (int)name_len, name_c);
+
+ while (LLVMIsALoadInst(last) || LLVMIsAGetElementPtrInst(last))
+ /* skipping over details for GEP here, but meh. */
+ last = LLVMGetOperand(last, 0);
+
+ if (LLVMIsAAllocaInst(last)) {
+ /* "alloca" is just generically all variables on the
+ * stack, this does not refer to C alloca() calls
+ *
+ * looking at the control flow in the function can
+ * give better results here, it's just not implemented
+ * (yet?)
+ */
+ fprintf(stderr,
+ "%s: call to a function pointer variable\n",
+ prefix);
+
+ if (details_fptr_vars) {
+ char *dump = LLVMPrintValueToString(called);
+ printf("%s- %s\n", prefix, dump);
+ LLVMDisposeMessage(dump);
+ }
+
+ json_object_object_add(
+ js_call, "type",
+ json_object_new_string("stack_fptr"));
+ } else if (LLVMIsACallInst(last)) {
+ /* calling the a function pointer returned from
+ * another function.
+ */
+ struct json_object *js_indirect;
+
+ js_indirect = js_get_or_make(js_call, "return_of",
+ json_object_new_array);
+
+ process_call(js_indirect, js_special, last, function);
+ } else if (LLVMIsAConstant(last)) {
+ /* function pointer is a constant (includes loading
+ * from complicated constants like structs or arrays.)
+ */
+ bool hdr_written = false;
+ walk_const_fptrs(js_call, last, prefix, &hdr_written);
+ if (details_fptr_consts && !hdr_written)
+ fprintf(stderr,
+ "%s: calls function pointer from constant or global data, but no non-NULL function pointers found\n",
+ prefix);
+ } else {
+ char *dump = LLVMPrintValueToString(called);
+ printf("\t%s\n", dump);
+ LLVMDisposeMessage(dump);
+ }
+ return;
+#ifdef FRR_SPECIFIC
+ } else if (!strcmp(called_name, "install_element")) {
+ called_type = FN_INSTALL_ELEMENT;
+
+ LLVMValueRef param0 = LLVMGetOperand(instr, 0);
+ if (!LLVMIsAConstantInt(param0))
+ goto out_nonconst;
+
+ long long vty_node = LLVMConstIntGetSExtValue(param0);
+ json_object_object_add(js_call, "vty_node",
+ json_object_new_int64(vty_node));
+
+ LLVMValueRef param1 = LLVMGetOperand(instr, 1);
+ if (!LLVMIsAGlobalVariable(param1))
+ goto out_nonconst;
+
+ LLVMValueRef intlz = LLVMGetInitializer(param1);
+ assert(intlz && LLVMIsConstant(intlz));
+
+ LLVMValueKind intlzkind = LLVMGetValueKind(intlz);
+ assert(intlzkind == LLVMConstantStructValueKind);
+
+ LLVMValueRef funcptr = LLVMGetOperand(intlz, 4);
+ assert(LLVMIsAFunction(funcptr));
+
+ size_t target_len = 0;
+ const char *target;
+ target = LLVMGetValueName2(funcptr, &target_len);
+
+ json_object_object_add(
+ js_call, "type",
+ json_object_new_string("install_element"));
+ json_object_object_add(
+ js_call, "target",
+ json_object_new_string_len(target, target_len));
+ return;
+
+ out_nonconst:
+ json_object_object_add(
+ js_call, "target",
+ json_object_new_string("install_element"));
+ return;
+ } else if (is_thread_sched(called_name, called_len)) {
+ called_type = FN_THREAD_ADD;
+
+ json_object_object_add(js_call, "type",
+ json_object_new_string("thread_sched"));
+ json_object_object_add(
+ js_call, "subtype",
+ json_object_new_string_len(called_name, called_len));
+
+ LLVMValueRef fparam;
+ if (strstr(called_name, "_read_"))
+ fparam = LLVMGetOperand(instr, 2);
+ else
+ fparam = LLVMGetOperand(instr, 1);
+ assert(fparam);
+
+ size_t target_len = 0;
+ const char *target;
+ target = LLVMGetValueName2(fparam, &target_len);
+
+ json_object_object_add(js_call, "target",
+ !target_len ? NULL :
+ json_object_new_string_len(target, target_len));
+ if (!LLVMIsAFunction(fparam))
+ json_object_object_add(js_call, "target_unresolved",
+ json_object_new_boolean(true));
+ return;
+ } else if (!strncmp(called_name, "_hook_typecheck_",
+ strlen("_hook_typecheck_"))) {
+ struct json_object *js_hook, *js_this;
+ const char *hook_name;
+
+ hook_name = called_name + strlen("_hook_typecheck_");
+
+ json_object_object_add(js_call, "type",
+ json_object_new_string("hook"));
+
+ LLVMValueRef param0 = LLVMGetOperand(instr, 0);
+ if (!LLVMIsAFunction(param0))
+ return;
+
+ size_t target_len = 0;
+ const char *target;
+ target = LLVMGetValueName2(param0, &target_len);
+
+ js_hook = js_get_or_make(js_special, "hooks",
+ json_object_new_object);
+ js_hook = js_get_or_make(js_hook, hook_name,
+ json_object_new_array);
+
+ js_this = json_object_new_object();
+ json_object_array_add(js_hook, js_this);
+
+ dbgloc_add(js_this, instr);
+ json_object_object_add(
+ js_this, "target",
+ json_object_new_string_len(target, target_len));
+ return;
+
+ /* TODO (FRR specifics):
+ * - workqueues - not sure we can do much there
+ * - zclient->* ?
+ */
+#endif /* FRR_SPECIFIC */
+ } else {
+ json_object_object_add(
+ js_call, "target",
+ json_object_new_string_len(called_name, called_len));
+ }
+
+ for (unsigned argno = 0; argno < n_args; argno++) {
+ LLVMValueRef param = LLVMGetOperand(instr, argno);
+ size_t target_len;
+ const char *target_name;
+
+ if (LLVMIsAFunction(param)) {
+ js_fptrs = js_get_or_make(js_call, "funcptrs",
+ json_object_new_array);
+
+ target_name = LLVMGetValueName2(param, &target_len);
+
+ json_object_array_add(js_fptrs,
+ json_object_new_string_len(
+ target_name, target_len));
+ }
+ }
+}
+
+static void process_fn(struct json_object *funcs,
+ struct json_object *js_special,
+ LLVMValueRef function)
+{
+ struct json_object *js_func, *js_calls;
+
+ size_t name_len = 0;
+ const char *name_c = LLVMGetValueName2(function, &name_len);
+ char *name;
+
+ name = strndup(name_c, name_len);
+
+ js_func = json_object_object_get(funcs, name);
+ if (js_func) {
+ unsigned file_len = 0;
+ const char *file = LLVMGetDebugLocFilename(function, &file_len);
+ unsigned line = LLVMGetDebugLocLine(function);
+
+ fprintf(stderr, "%.*s:%d:%s(): duplicate definition!\n",
+ (int)file_len, file, line, name);
+ free(name);
+ return;
+ }
+
+ js_func = json_object_new_object();
+ json_object_object_add(funcs, name, js_func);
+ free(name);
+
+ js_calls = json_object_new_array();
+ json_object_object_add(js_func, "calls", js_calls);
+
+ dbgloc_add(js_func, function);
+
+ for (LLVMBasicBlockRef basicBlock = LLVMGetFirstBasicBlock(function);
+ basicBlock; basicBlock = LLVMGetNextBasicBlock(basicBlock)) {
+
+ for (LLVMValueRef instr = LLVMGetFirstInstruction(basicBlock);
+ instr; instr = LLVMGetNextInstruction(instr)) {
+
+ if (LLVMIsAIntrinsicInst(instr))
+ continue;
+
+ if (LLVMIsACallInst(instr) || LLVMIsAInvokeInst(instr))
+ process_call(js_calls, js_special, instr,
+ function);
+ }
+ }
+}
+
+static void help(int retcode)
+{
+ fprintf(stderr,
+ "FRR LLVM bitcode to callgraph analyzer\n"
+ "\n"
+ "usage: frr-llvm-cg [-q|-v] [-o <JSONOUTPUT>] BITCODEINPUT\n"
+ "\n"
+ "\t-o FILENAME\twrite JSON output to file instead of stdout\n"
+ "\t-v\t\tbe more verbose\n"
+ "\t-q\t\tbe quiet\n"
+ "\n"
+ "BITCODEINPUT must be a LLVM binary bitcode file (not text\n"
+ "representation.) Use - to read from stdin.\n"
+ "\n"
+ "Note it may be necessary to build this binary tool against\n"
+ "the specific LLVM version that created the bitcode file.\n");
+ exit(retcode);
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+ const char *out = NULL;
+ const char *inp = NULL;
+ char v_or_q = '\0';
+
+ while ((opt = getopt(argc, argv, "hvqo:")) != -1) {
+ switch (opt) {
+ case 'o':
+ if (out)
+ help(1);
+ out = optarg;
+ break;
+ case 'v':
+ if (v_or_q && v_or_q != 'v')
+ help(1);
+ details_fptr_vars = true;
+ details_fptr_consts = true;
+ v_or_q = 'v';
+ break;
+ case 'q':
+ if (v_or_q && v_or_q != 'q')
+ help(1);
+ details_fptr_vars = false;
+ details_fptr_consts = false;
+ v_or_q = 'q';
+ break;
+ case 'h':
+ help(0);
+ return 0;
+ default:
+ help(1);
+ }
+ }
+
+ if (optind != argc - 1)
+ help(1);
+
+ inp = argv[optind];
+
+ LLVMMemoryBufferRef memoryBuffer;
+ char *message;
+ int ret;
+
+ // check if we are to read our input file from stdin
+ if (!strcmp(inp, "-")) {
+ inp = "<stdin>";
+ ret = LLVMCreateMemoryBufferWithSTDIN(&memoryBuffer, &message);
+ } else {
+ ret = LLVMCreateMemoryBufferWithContentsOfFile(
+ inp, &memoryBuffer, &message);
+ }
+
+ if (ret) {
+ fprintf(stderr, "failed to open %s: %s\n", inp, message);
+ free(message);
+ return 1;
+ }
+
+ // now create our module using the memorybuffer
+ LLVMModuleRef module;
+ if (LLVMParseBitcode2(memoryBuffer, &module)) {
+ fprintf(stderr, "%s: invalid bitcode\n", inp);
+ LLVMDisposeMemoryBuffer(memoryBuffer);
+ return 1;
+ }
+
+ // done with the memory buffer now, so dispose of it
+ LLVMDisposeMemoryBuffer(memoryBuffer);
+
+ struct json_object *js_root, *js_funcs, *js_special;
+
+ js_root = json_object_new_object();
+ js_funcs = json_object_new_object();
+ json_object_object_add(js_root, "functions", js_funcs);
+ js_special = json_object_new_object();
+ json_object_object_add(js_root, "special", js_special);
+
+ // loop through all the functions in the module
+ for (LLVMValueRef function = LLVMGetFirstFunction(module); function;
+ function = LLVMGetNextFunction(function)) {
+ if (LLVMIsDeclaration(function))
+ continue;
+
+ process_fn(js_funcs, js_special, function);
+ }
+
+ if (out) {
+ char tmpout[strlen(out) + 5];
+
+ snprintf(tmpout, sizeof(tmpout), "%s.tmp", out);
+ ret = json_object_to_file_ext(tmpout, js_root,
+ JSON_C_TO_STRING_PRETTY |
+ JSON_C_TO_STRING_PRETTY_TAB |
+ JSON_C_TO_STRING_NOSLASHESCAPE);
+ if (ret < 0) {
+ fprintf(stderr, "could not write JSON to file\n");
+ return 1;
+ }
+ if (rename(tmpout, out)) {
+ fprintf(stderr, "could not rename JSON output: %s\n",
+ strerror(errno));
+ unlink(tmpout);
+ return 1;
+ }
+ } else {
+ ret = json_object_to_fd(1, js_root,
+ JSON_C_TO_STRING_PRETTY |
+ JSON_C_TO_STRING_PRETTY_TAB |
+ JSON_C_TO_STRING_NOSLASHESCAPE);
+ if (ret < 0) {
+ fprintf(stderr, "could not write JSON to stdout\n");
+ return 1;
+ }
+ }
+
+ LLVMDisposeModule(module);
+
+ return 0;
+}
diff --git a/tools/subdir.am b/tools/subdir.am
index c637db6eb..69c696722 100644
--- a/tools/subdir.am
+++ b/tools/subdir.am
@@ -8,6 +8,10 @@ noinst_PROGRAMS += \
tools/gen_yang_deviations \
# end
+EXTRA_PROGRAMS += \
+ tools/frr-llvm-cg \
+ # end
+
sbin_PROGRAMS += tools/ssd
sbin_SCRIPTS += \
tools/frr-reload \
@@ -31,6 +35,14 @@ tools_gen_yang_deviations_LDADD = lib/libfrr.la $(LIBYANG_LIBS)
tools_ssd_SOURCES = tools/start-stop-daemon.c
+# don't bother autoconf'ing these for a simple optional tool
+llvm_config = llvm-config-$(shell echo __clang_major__ | $(CC) -xc -P -E -)
+tools_frr_llvm_cg_CFLAGS = $(AM_CFLAGS) `$(llvm_config) --cflags`
+tools_frr_llvm_cg_LDFLAGS = `$(llvm_config) --ldflags --libs`
+tools_frr_llvm_cg_SOURCES = \
+ tools/frr-llvm-cg.c \
+ # end
+
EXTRA_DIST += \
tools/etc \
tools/frr-reload \