diff options
author | Quentin Young <qlyoung@cumulusnetworks.com> | 2017-05-30 02:16:52 +0200 |
---|---|---|
committer | Quentin Young <qlyoung@cumulusnetworks.com> | 2017-07-02 01:18:06 +0200 |
commit | 4db0cff16a1b16671384102f1876bebb7f481d89 (patch) | |
tree | d5d003e63905b0f647cbc798f25fbf7255199f96 /lib/hash.c | |
parent | Merge pull request #771 from qlyoung/printf-madness (diff) | |
download | frr-4db0cff16a1b16671384102f1876bebb7f481d89.tar.xz frr-4db0cff16a1b16671384102f1876bebb7f481d89.zip |
lib: add statistics for hash tables
Adds a function that calculates various statistics on our implementation
of a hash table. These are useful for evaluating performance.
Signed-off-by: Quentin Young <qlyoung@cumulusnetworks.com>
Diffstat (limited to 'lib/hash.c')
-rw-r--r-- | lib/hash.c | 150 |
1 files changed, 150 insertions, 0 deletions
diff --git a/lib/hash.c b/lib/hash.c index 553a137eb..210b006c5 100644 --- a/lib/hash.c +++ b/lib/hash.c @@ -19,14 +19,21 @@ */ #include <zebra.h> +#include <math.h> #include "hash.h" #include "memory.h" +#include "linklist.h" +#include "termtable.h" +#include "vty.h" +#include "command.h" DEFINE_MTYPE( LIB, HASH, "Hash") DEFINE_MTYPE( LIB, HASH_BACKET, "Hash Bucket") DEFINE_MTYPE_STATIC(LIB, HASH_INDEX, "Hash Index") +static struct list *_hashes; + /* Allocate a new hash. */ struct hash * hash_create_size (unsigned int size, unsigned int (*hash_key) (void *), @@ -43,6 +50,7 @@ hash_create_size (unsigned int size, unsigned int (*hash_key) (void *), hash->hash_key = hash_key; hash->hash_cmp = hash_cmp; hash->count = 0; + hash->name = NULL; return hash; } @@ -282,6 +290,148 @@ hash_clean (struct hash *hash, void (*free_func) (void *)) void hash_free (struct hash *hash) { + hash_unregister (hash); XFREE (MTYPE_HASH_INDEX, hash->index); XFREE (MTYPE_HASH, hash); } + +/** + * Calculates some statistics on the given hash table that can be used to + * evaluate performance. + * + * Summary statistics calculated are: + * + * - Load factor: This is the number of elements in the table divided by the + * number of buckets. Since this hash table implementation uses chaining, + * this value can be greater than 1. This number provides information on how + * 'full' the table is, but does not provide information on how evenly + * distributed the elements are. Notably, a load factor >= 1 does not imply + * that every bucket has an element; with a pathological hash function, all + * elements could be in a single bucket. + * + * - Std. Dev.: This is the standard deviation from the load factor. If the LF + * is the mean of number of elements per bucket, the standard deviation + * measures how much any particular bucket is likely to deviate from the + * mean. As a rule of thumb this number should be less than 2, and ideally + * less than 1 for optimal performance. A number larger than 3 generally + * indicates a poor hash function. + * + * - Max: Number of elements in the most overloaded bucket(s). + * - Min: Number of elements in the most underloaded bucket(s). + * + * - Empty: Number of empty buckets + * - Avg: average number of elements among the set of full buckets (like load factor but without empty buckets) + * + * Total number of buckets is precomputed and resides in h->size. + * Total number of elements is precomputed and resides in h->count. + */ +void +hash_stats (struct hash *h, double *lf, double *stddev, int *max, int *min, int *empty, double *avg) +{ + struct hash_backet *hb; // iteration pointer + struct hash_backet *next; // iteration pointer + unsigned int backets = 0; // total number of items in ht + int buckets[h->size]; // # items per bucket + unsigned int full; // # buckets with items + + *max = *min = *lf = *stddev = *avg = 0; + *empty = h->size; + + if (h->size == 0 || h->count == 0) + return; + + *empty = 0; + + memset (buckets, 0x00, h->size * sizeof (int)); + + /* collect some important info */ + for (unsigned int i = 0; i < h->size; i++) + { + for (hb = h->index[i]; hb; hb = next) + { + buckets[i]++; + next = hb->next; + backets++; + } + *max = MAX (buckets[i], *max); + *min = MIN (buckets[i], *min); + + if (buckets[i] == 0) + *empty += 1; + } + + assert (backets == h->count); + full = h->size - *empty; + + *lf = h->count / (double) h->size; + *avg = h->count / (double) full; + + if (h->count == 0) + return; + + /* compute population stddev */ + for (unsigned int i = 0; i < h->size; i++) { + if (buckets[i] > 0) + *stddev += pow(((double) buckets[i] - *avg), 2.0); + } + + *stddev = sqrt((1.0/h->size) * *stddev); +} + +void +hash_register (struct hash *h, const char *name) +{ + h->name = name; + listnode_add (_hashes, h); +} + +void +hash_unregister (struct hash *h) +{ + listnode_delete (_hashes, h); +} + +DEFUN(show_hash_stats, + show_hash_stats_cmd, + "show hashtable <statistics>", + SHOW_STR + "Statistics about critical hash tables\n" + "Statistics about critical hash tables\n") +{ + struct hash *h; + struct listnode *ln; + struct ttable *tt = ttable_new (&ttable_styles[TTSTYLE_BLANK]); + double lf, stddev, avg; + int max, min, empty; + + ttable_add_row (tt, "Hash table|Buckets|Entries|Empty|LF|Mean|SD|Max|Min"); + tt->style.cell.lpad = 1; + tt->style.cell.rpad = 2; + ttable_restyle (tt); + ttable_rowseps (tt, 0, BOTTOM, true, '-'); + + for (ALL_LIST_ELEMENTS_RO (_hashes, ln, h)) + { + if (h->name == NULL) + continue; + + hash_stats (h, &lf, &stddev, &max, &min, &empty, &avg); + ttable_add_row (tt, "%s|%d|%d|%.0f%%|%.2f|%.2f|%.2f|%d|%d", h->name, + h->size, h->count, (empty / (double) h->size)*100, lf, avg, stddev, + max, min); + } + + char *table = ttable_dump (tt, VTY_NEWLINE); + vty_out (vty, "%s%s%s", VTY_NEWLINE, table, VTY_NEWLINE); + XFREE (MTYPE_TMP, table); + ttable_del (tt); + + return CMD_SUCCESS; +} + +void +hash_cmd_init () +{ + _hashes = list_new(); + install_element (ENABLE_NODE, &show_hash_stats_cmd); +} |