diff options
author | Daniel Baumann <daniel@debian.org> | 2024-11-21 15:51:37 +0100 |
---|---|---|
committer | Daniel Baumann <daniel@debian.org> | 2024-11-21 15:51:37 +0100 |
commit | ebb64aabedd789b5affbf30f03e43fcf3a0561f4 (patch) | |
tree | ec4dd9937434be85039f900efcc48c75c182d81d /src/sql.cpp | |
parent | Initial commit. (diff) | |
download | packetq-ebb64aabedd789b5affbf30f03e43fcf3a0561f4.tar.xz packetq-ebb64aabedd789b5affbf30f03e43fcf3a0561f4.zip |
Adding upstream version 1.7.3+dfsg.upstream/1.7.3+dfsgupstream
Signed-off-by: Daniel Baumann <daniel@debian.org>
Diffstat (limited to 'src/sql.cpp')
-rw-r--r-- | src/sql.cpp | 2696 |
1 files changed, 2696 insertions, 0 deletions
diff --git a/src/sql.cpp b/src/sql.cpp new file mode 100644 index 0000000..94ad7ed --- /dev/null +++ b/src/sql.cpp @@ -0,0 +1,2696 @@ +/* + * Copyright (c) 2017-2024 OARC, Inc. + * Copyright (c) 2011-2017, IIS - The Internet Foundation in Sweden + * All rights reserved. + * + * This file is part of PacketQ. + * + * PacketQ is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PacketQ is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PacketQ. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "config.h" + +#include "sql.h" +#include "output.h" +#include "packet_handler.h" +#include "packetq.h" +#include "reader.h" + +#include <unordered_map> +#include <utility> +#include <vector> +#ifdef WIN32 +#include <windows.h> +#endif +#ifdef HAVE_LIBMAXMINDDB +#include <maxminddb.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +static MMDB_s* __cc_mmdb = 0; +static MMDB_s* __asn_mmdb = 0; +#endif + +namespace packetq { + +bool verbose = false; + +int g_allocs = 0; + +Column* Table::add_column(const char* name, const char* type, int id, bool hidden) +{ + if (!type) + return add_column(name, Coltype::_text, id, hidden); + else if (strcmp(type, "bool") == 0) + return add_column(name, Coltype::_bool, id, hidden); + else if (strcmp(type, "int") == 0) + return add_column(name, Coltype::_int, id, hidden); + else if (strcmp(type, "float") == 0) + return add_column(name, Coltype::_float, id, hidden); + else + return add_column(name, Coltype::_text, id, hidden); +} + +Column* Table::add_column(const char* name, Coltype::Type type, int id, bool hidden) +{ + Column* col = new Column(name, type, id, hidden); + col->m_offset = Table::align(m_curpos, col->m_def.m_align); + m_curpos = col->m_offset + col->m_def.m_size; + if (type == Coltype::_text) + m_text_column_offsets.push_back(col->m_offset); + m_cols.push_back(col); + return col; +} + +void Table::delete_row(Row* row) +{ + row->decref_text_columns(m_text_column_offsets); + m_row_allocator->deallocate(row); +} + +Row* Table::create_row() +{ + if (!m_row_allocator) { + m_rsize = sizeof(Row) - ROW_DUMMY_SIZE; // exclude the dummy + m_dsize = m_curpos; + m_row_allocator = new Allocator<Row>(m_rsize + m_dsize, 10000); + } + + Row* r = m_row_allocator->allocate(); + r->zero_text_columns(m_text_column_offsets); + return r; +} + +void Table::add_row(Row* row) +{ + m_rows.push_back(row); +} + +int g_comp = 0; + +void Ordering_terms::compile(const std::vector<Table*>& tables, const std::vector<int>& search_order, Query& q) +{ + for (auto it = m_terms.begin(); it != m_terms.end(); it++) { + OP* op = it->m_op; + it->m_op = op->compile(tables, search_order, q); + } +} + +class Sorter { +public: + Sorter(Ordering_terms& order) + : m_order(order) + { + } + bool operator()(Row* ia, Row* ib) + { + // this works under the assumption that the ordering terms have + // been compiled with only one table so the row index i is 0 + Row** ia_rows = &ia; + Row** ib_rows = &ib; + + for (auto it = m_order.m_terms.begin(); it != m_order.m_terms.end(); it++) { + g_comp++; + + OP* op = it->m_op; + op->evaluate(it->m_asc ? ia_rows : ib_rows, m_a); + op->evaluate(it->m_asc ? ib_rows : ia_rows, m_b); + int res = m_a.cmp(m_b); + if (res < 0) + return true; + if (res > 0) + return false; + } + return false; + } + bool eq(Row* ia, Row* ib) + { + Row** ia_rows = &ia; + Row** ib_rows = &ib; + + for (auto it = m_order.m_terms.begin(); it != m_order.m_terms.end(); it++) { + g_comp++; + + OP* op = it->m_op; + op->evaluate(it->m_asc ? ia_rows : ib_rows, m_a); + op->evaluate(it->m_asc ? ib_rows : ia_rows, m_b); + int res = m_a.cmp(m_b); + if (res != 0) + return false; + } + return true; + } + int cmp(Row* ia, Row* ib) + { + Row** ia_rows = &ia; + Row** ib_rows = &ib; + + for (auto it = m_order.m_terms.begin(); it != m_order.m_terms.end(); it++) { + g_comp++; + + OP* op = it->m_op; + op->evaluate(it->m_asc ? ia_rows : ib_rows, m_a); + op->evaluate(it->m_asc ? ib_rows : ia_rows, m_b); + int res = m_a.cmp(m_b); + if (res != 0) + return res; + } + return 0; + } + Ordering_terms& m_order; + Variant m_a, m_b; +}; + +struct Stki { + int s; + int l; + int b; +}; + +struct Spkt { + Variant cache; + int row; +}; + +class Per_sort { +public: + struct Tlink { + Tlink() + : m_next(0) + , row(0) + , m_eq(0) + { + } + + Tlink* get_eq() { return m_eq; } + void reset() + { + m_next = 0; + row = 0; + m_eq = 0; + } + void add_eq(Tlink* o) + { + if (!o->m_eq) { + // add single + if (!m_eq) { + // as first + m_eq = o; + o->m_eq_last = o; + return; + } + // to list + m_eq->m_eq_last->m_eq = o; + m_eq->m_eq_last = o; + return; + } else { + // add list + if (!m_eq) { + // as first + m_eq = o; + o->m_eq_last = o->m_eq->m_eq_last; + return; + } + // to list + m_eq->m_eq_last->m_eq = o; + m_eq->m_eq_last = o->m_eq->m_eq_last; + } + } + union { + Tlink* m_next; + Tlink* m_eq_last; + }; + Row* row; + Variant cache; + + private: + Tlink* m_eq; + }; + struct List { + public: + void reset() + { + m_size = 0; + m_fl[0] = 0; + m_fl[1] = 0; + } + int m_size; + Tlink* m_fl[2]; + }; + Per_sort(Table& table, Ordering_terms& order) + : m_sorter(order) + , m_table(table) + { + m_escalate_sort = order.m_terms.size() > 1; + m_asc = order.m_terms.begin()->m_asc ? 1 : -1; + m_op = order.m_terms.begin()->m_op; + memset(m_groups, 0, sizeof(m_groups)); + m_current.reset(); + } + inline bool add_to_list(List& list, Tlink* t) + { + if (list.m_fl[0] == 0) { + list.m_fl[0] = list.m_fl[1] = t; + list.m_size = 1; + return true; + } + list.m_fl[1]->m_next = t; + list.m_fl[1] = t; + list.m_size++; + return true; + } + inline bool insert_into_list(List& list, Tlink* t) + { + if (list.m_fl[0] == 0) { + list.m_fl[0] = list.m_fl[1] = t; + list.m_size = 1; + return true; + } + int cmp0 = cmp(t, list.m_fl[0]); + if (cmp0 == 0) { + list.m_fl[0]->add_eq(t); + return true; + } + if (cmp0 < 0) { + t->m_next = list.m_fl[0]; + list.m_fl[0] = t; + list.m_size++; + return true; + } + int cmp1 = cmp(t, list.m_fl[1]); + if (cmp1 == 0) { + list.m_fl[1]->add_eq(t); + return true; + } + if (cmp1 > 0) { + list.m_fl[1]->m_next = t; + list.m_fl[1] = t; + list.m_size++; + return true; + } + return false; + } + int cmp(Tlink* a, Tlink* b) + { + int cmp = a->cache.cmp(b->cache) * m_asc; + if (cmp != 0 || !m_escalate_sort) + return cmp; + return m_sorter.cmp(a->row, b->row); + } + inline void add(Tlink* t) + { + if (!insert_into_list(m_current, t)) { + insert_list(m_current); + m_current.reset(); + insert_into_list(m_current, t); + } + } + inline void insert_list(List& l) + { + unsigned int size = l.m_size; + int offs = 0; + size >>= 1; + while (size != 0) { + offs++; + size >>= 1; + } + if (m_groups[offs].m_size != 0) { + List m = merge(m_groups[offs], l); + m_groups[offs].reset(); + insert_list(m); + } else + m_groups[offs] = l; + } + List merge(List& l1, List& l2) + { + List r; + r.reset(); + Tlink* a = l1.m_fl[0]; + Tlink* b = l2.m_fl[0]; + if (!a) + return l2; + if (!b) + return l1; + + int size = l1.m_size + l2.m_size; + + while (a && b) { + int c = cmp(a, b); + if (c == 0) { + Tlink* a2 = a; + Tlink* b2 = b; + a = a->m_next; + b = b->m_next; + a2->m_next = 0; + b2->m_next = 0; + a2->add_eq(b2); + add_to_list(r, a2); + size--; + } else if (c < 0) { + Tlink* a2 = a; + a = a->m_next; + a2->m_next = 0; + add_to_list(r, a2); + } else { + Tlink* b2 = b; + b = b->m_next; + b2->m_next = 0; + add_to_list(r, b2); + } + } + if (a) { + r.m_fl[1]->m_next = a; + r.m_fl[1] = l1.m_fl[1]; + } + if (b) { + r.m_fl[1]->m_next = b; + r.m_fl[1] = l2.m_fl[1]; + } + + l1.reset(); + l2.reset(); + r.m_size = size; + return r; + } + + void sort() + { + int table_size = (int)m_table.m_rows.size(); + if (table_size <= 1) + return; + auto it = m_table.m_rows.begin(); + Tlink* links = new Tlink[table_size]; + + int i; + for (i = 0; i < table_size; i++) { + Tlink& r = links[i]; + r.reset(); + r.row = *it; + // &row works under the assumption that m_op has been compiled with + // this table only so row index is 0 + m_op->evaluate(&r.row, r.cache); + it++; + add(&r); + } + if (m_current.m_size) + insert_list(m_current); + List result; + result.reset(); + for (i = 0; i < sizeof(m_groups) / sizeof(List); i++) { + if (m_groups[i].m_size > 0) { + result = merge(result, m_groups[i]); + m_groups[i].reset(); + } + } + Tlink* p = result.m_fl[0]; + it = m_table.m_rows.begin(); + while (p) { + *it++ = p->row; + Tlink* e = p->get_eq(); + while (e) { + *it++ = e->row; + e = e->get_eq(); + } + p = p->m_next; + }; + + delete[] links; + } + + OP* m_op; + Sorter m_sorter; + bool m_escalate_sort; + int m_asc; + Table& m_table; + List m_groups[32]; + List m_current; +}; + +void Table::per_sort(Ordering_terms& order) +{ + Per_sort sort(*this, order); + + sort.sort(); + + return; +} + +void Table::merge_sort(Ordering_terms& order) +{ + Sorter sorter(order); + + bool escalate_sort = order.m_terms.size() > 1; + int asc = order.m_terms.begin()->m_asc ? 1 : -1; + OP* op = order.m_terms.begin()->m_op; + + int table_size = (int)m_rows.size(); + if (table_size <= 1) + return; + Row** row_ptrs = new Row*[table_size]; + Spkt* spktpool = new Spkt[table_size * 2]; + Spkt* rows[2]; + rows[0] = spktpool; + rows[1] = &spktpool[table_size]; + auto it = m_rows.begin(); + + int i = 0; + Spkt* r = rows[0]; + for (i = 0; i < table_size; i += 2) { + + row_ptrs[i] = *it++; + r[i].row = i; + // &row works under the assumption that m_op has been compiled with + // this table only so row index is 0 + op->evaluate(&row_ptrs[r[i].row], r[i].cache); + if (i + 1 < table_size) { + row_ptrs[i + 1] = *it++; + r[i + 1].row = i + 1; + op->evaluate(&row_ptrs[r[i + 1].row], r[i + 1].cache); + } + } + int swap = 0; + + Stki stack[64]; + Stki* sp = stack; + sp->s = 0; + sp->l = 2; + sp->b = 1; + rows[1][0] = rows[0][1]; + rows[1][1] = rows[0][0]; + + sp--; + + int npos = 0; + while (true) { + int start, len; + if (sp > stack && sp->l == sp[-1].l) { + // two equal size -> merge + + len = sp->l <<= 1; + start = sp[-1].s; + swap = sp[-1].b; + sp--; + sp--; + } else { + start = npos; + npos += 2; + swap = 0; + len = 2; + } + + int cnt = start + len > table_size ? table_size - start : len; + Spkt* s = rows[swap]; + Spkt* d = rows[1 - swap]; + if (cnt > 0) { + int p1 = start; + int p2 = start + (len >> 1); + int l1 = len >> 1; + int l2 = l1; + if (p1 + l1 > table_size) + l1 = table_size - p1; + if (p2 + l2 > table_size) + l2 = table_size - p2; + + i = start; + while (cnt-- > 0) { + if (l1 <= 0) { + d[i++] = s[p2++]; + } else if (l2 <= 0) { + d[i++] = s[p1++]; + } else { + int cmp = s[p1].cache.cmp(s[p2].cache) * asc; + if (cmp < 0 || (cmp == 0 && escalate_sort && sorter(row_ptrs[s[p1].row], row_ptrs[s[p2].row]))) { + l1--; + d[i++] = s[p1++]; + } else { + l2--; + d[i++] = s[p2++]; + } + } + } + } + ++sp; + sp->l = len; + sp->s = start; + sp->b = 1 - swap; + if (len > table_size) { + for (it = m_rows.begin(); it != m_rows.end(); it++) { + *it = row_ptrs[(d++)->row]; + } + + break; + } + } + delete[] row_ptrs; + delete[] spktpool; + return; +} + +void Table::limit(int limit, int offset) +{ + int count = 0; + auto e = m_rows.end(); + for (auto it = m_rows.begin(); it != m_rows.end(); it++) { + if (e != m_rows.end()) { + delete_row(*e); + m_rows.erase(e); + e = m_rows.end(); + } + + int l = count++; + if (l < offset || l >= offset + limit) { + e = it; + } + } + if (e != m_rows.end()) { + delete_row(*e); + m_rows.erase(e); + } +} + +void printrep(int n, char c) +{ + if (n >= 3000) + return; + char buf[3000]; + int i; + for (i = 0; i < n; i++) + buf[i] = c; + buf[i] = 0; + printf("%s", buf); +} + +void Table::xml() +{ + g_output.reset(); + int cols = (int)m_cols.size(); + + g_output.add_string("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); + g_output.add_string("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"); + g_output.add_string("<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\n<head>\n <title>"); + g_output.add_string(m_name.c_str()); + g_output.add_string("</title>\n"); + g_output.add_string("<style type=\"text/css\">\n"); + g_output.add_string(" th.int { color: #0F0C00; }\n"); + g_output.add_string(" th.float { color: #0F0900; }\n"); + g_output.add_string(" th.text { color: #0F0600; }\n"); + g_output.add_string(" th.bool { color: #0C0900; }\n"); + g_output.add_string("</style>\n"); + g_output.add_string("</head>\n"); + g_output.add_string("<body>\n"); + g_output.add_string("<table>\n"); + + g_output.add_string("<tr>"); + + for (int i = 0; i < cols; i++) { + if (m_cols[i]->m_hidden) + continue; + + const char* t = ""; + switch (m_cols[i]->m_type) { + case (Coltype::_float): + t = "float"; + break; + case (Coltype::_int): + t = "int"; + break; + case (Coltype::_text): + t = "text"; + break; + case (Coltype::_bool): + t = "bool"; + break; + } + g_output.add_string("<th class=\""); + g_output.add_string(t); + g_output.add_string("\">"); + g_output.add_string(m_cols[i]->m_name.c_str()); + g_output.add_string("</th>"); + } + g_output.add_string("</tr>\n"); + for (auto it = m_rows.begin(); it != m_rows.end(); it++) { + g_output.add_string("<tr>"); + Row* r = *it; + + Variant v; + for (int i = 0; i < cols; i++) { + Column* c = m_cols[i]; + + if (c->m_hidden) + continue; + + int offset = c->m_offset; + + static const int bufsize = 100; + char buf[bufsize]; + + g_output.add_string("<td>"); + switch (c->m_type) { + case Coltype::_bool: + g_output.add_string(r->access_column<bool_column>(offset) ? "1" : "0"); + break; + case Coltype::_int: + snprintf(buf, bufsize, "%i", r->access_column<int_column>(offset)); + g_output.add_string(buf); + break; + case Coltype::_float: + snprintf(buf, bufsize, "%g", r->access_column<float_column>(offset)); + g_output.add_string(buf); + break; + case Coltype::_text: + g_output.add_string(r->access_column<text_column>(offset)->data); + break; + } + g_output.add_string("</td> "); + } + + g_output.add_string("</tr>\n"); + } + g_output.add_string("</table>\n"); + g_output.add_string("</body>\n"); + g_output.add_string("</html>\n"); + g_output.print(); +} + +void Table::json(bool trailing_comma) +{ + g_output.reset(); + int cols = (int)m_cols.size(); + + g_output.add_string(" {\n "); + + g_output.add_q_string("table_name"); + g_output.add_string(": "); + g_output.add_q_string(m_name.c_str()); + g_output.add_string(",\n "); + + g_output.add_q_string("query"); + g_output.add_string(": "); + g_output.add_q_string(m_qstring.c_str()); + g_output.add_string(",\n "); + + g_output.add_q_string("head"); + g_output.add_string(": ["); + + bool append_comma = false; + for (int i = 0; i < cols; i++) { + if (m_cols[i]->m_hidden) + continue; + + if (append_comma) + g_output.add_string(",\n"); + else + g_output.add_string("\n"); + append_comma = true; + g_output.add_string(" { "); + g_output.add_q_string("name"); + g_output.add_string(": "); + g_output.add_q_string(m_cols[i]->m_name.c_str()); + g_output.add_string(","); + g_output.add_q_string("type"); + g_output.add_string(": "); + const char* t = ""; + switch (m_cols[i]->m_type) { + case (Coltype::_float): + t = "float"; + break; + case (Coltype::_int): + t = "int"; + break; + case (Coltype::_text): + t = "text"; + break; + case (Coltype::_bool): + t = "bool"; + break; + } + g_output.add_q_string(t); + g_output.add_string(" }"); + } + g_output.add_string("\n ],\n "); + g_output.add_q_string("data"); + g_output.add_string(": ["); + bool outer_comma = false; + for (auto it = m_rows.begin(); it != m_rows.end(); it++) { + if (outer_comma) + g_output.add_string(",\n ["); + else + g_output.add_string("\n ["); + outer_comma = true; + bool comma = false; + Row* r = *it; + + for (int i = 0; i < cols; i++) { + Column* c = m_cols[i]; + + if (c->m_hidden) + continue; + + if (comma) + g_output.add_string(","); + + comma = true; + + int offset = c->m_offset; + static const int bufsize = 100; + char buf[bufsize]; + + switch (c->m_type) { + case Coltype::_bool: + g_output.add_string(r->access_column<bool_column>(offset) ? "1" : "0"); + break; + case Coltype::_int: + snprintf(buf, bufsize, "%i", r->access_column<int_column>(offset)); + g_output.add_string(buf); + break; + case Coltype::_float: + snprintf(buf, bufsize, "%g", r->access_column<float_column>(offset)); + g_output.add_string(buf); + break; + case Coltype::_text: + g_output.add_q_string(r->access_column<text_column>(offset)->data); + break; + } + } + + g_output.add_string("]"); + } + g_output.add_string("\n ]\n"); + if (trailing_comma) { + g_output.add_string(" },\n"); + } else { + g_output.add_string(" }\n"); + } + g_output.print(); +} + +std::string csv_qoute_string(const std::string& s) +{ + std::string r = "\""; + int len = s.length(); + for (int i = 0; i < len; i++) { + if (s[i] == '"') { + r += '"'; + } + r += s[i]; + } + r += '"'; + return r; +} + +void Table::csv(bool format) +{ + int cols = (int)m_cols.size(); + std::vector<int> col_len(cols); + + for (int i = 0; i < cols; i++) + col_len[i] = 0; + int max = 0; + char* tmp = 0; + if (format) { + for (auto it = m_rows.begin(); it != m_rows.end(); it++) { + Row* r = *it; + + for (int i = 0; i < cols; i++) { + Column* c = m_cols[i]; + + if (c->m_hidden) + continue; + + int len = 0; + + int offset = c->m_offset; + static const int bufsize = 100; + char buf[bufsize]; + + switch (c->m_type) { + case Coltype::_bool: + len = 1; + break; + case Coltype::_int: + snprintf(buf, bufsize, "%i", r->access_column<int_column>(offset)); + len = strlen(buf); + break; + case Coltype::_float: + snprintf(buf, bufsize, "%g", r->access_column<float_column>(offset)); + len = strlen(buf); + break; + case Coltype::_text: + len = csv_qoute_string(r->access_column<text_column>(offset)->data).length(); + break; + } + len++; + if (len > col_len[i]) + col_len[i] = len; + if (len > max) + max = len; + } + } + for (int i = 0; i < cols; i++) { + if (m_cols[i]->m_hidden) + continue; + + int l = csv_qoute_string(m_cols[i]->m_name).length(); + l++; + if (l > col_len[i]) + col_len[i] = l; + if (l > max) + max = l; + } + + tmp = new char[max + 1]; + for (int i = 0; i < max; i++) + tmp[i] = 32; + tmp[max] = 0; + } + + for (int i = 0; i < cols; i++) { + if (m_cols[i]->m_hidden) + continue; + + printf("%s", csv_qoute_string(m_cols[i]->m_name).c_str()); + if (i < cols - 1) { + size_t pos = csv_qoute_string(m_cols[i]->m_name).length() + max - col_len[i] + 1; + if (format && pos < max) { + printf("%s,", &tmp[pos]); + } else { + printf(","); + } + } + } + printf("\n"); + for (auto it = m_rows.begin(); it != m_rows.end(); it++) { + Row* r = *it; + + for (int i = 0; i < cols; i++) { + Column* c = m_cols[i]; + + if (c->m_hidden) + continue; + + int offset = c->m_offset; + static const int bufsize = 100; + char buf[bufsize]; + + std::string out; + + switch (c->m_type) { + case Coltype::_bool: + out = r->access_column<bool_column>(offset) ? "1" : "0"; + break; + case Coltype::_int: + snprintf(buf, bufsize, "%i", r->access_column<int_column>(offset)); + out = buf; + break; + case Coltype::_float: + snprintf(buf, bufsize, "%g", r->access_column<float_column>(offset)); + out = buf; + break; + case Coltype::_text: + out = csv_qoute_string(r->access_column<text_column>(offset)->data); + break; + } + + fputs(out.c_str(), stdout); + if (i < cols - 1) { + if (format) { + printf("%s,", &tmp[out.length() + max - col_len[i] + 1]); + } else { + printf(","); + } + } + } + + printf("\n"); + } + delete[] tmp; +} + +void Table::dump() +{ + int cols = (int)m_cols.size(); + int width = 25; + char fmti[40]; + snprintf(fmti, sizeof(fmti) - 1, "%%%dd |", width); + fmti[39] = 0; + char fmtd[40]; + snprintf(fmtd, sizeof(fmtd) - 1, "%%%dg |", width); + fmtd[39] = 0; + char fmts[40]; + snprintf(fmts, sizeof(fmts) - 1, "%%%ds |", width); + fmts[39] = 0; + + printf("Table::dump() table:%s cols:%d\n", m_name.c_str(), cols); + printrep((width + 2) * cols + 1, '-'); + printf("\n"); + printf("|"); + for (int i = 0; i < cols; i++) + printf(fmti, m_cols[i]->m_type); + printf("\n"); + printf("|"); + for (int i = 0; i < cols; i++) + printf(fmts, m_cols[i]->m_name.c_str()); + printf("\n"); + printrep((width + 2) * cols + 1, '*'); + printf("\n"); + for (auto it = m_rows.begin(); it != m_rows.end(); it++) { + printf("|"); + Row* r = *it; + + for (int i = 0; i < cols; i++) { + Column* c = m_cols[i]; + int offset = c->m_offset; + + switch (c->m_type) { + case Coltype::_bool: + printf(fmts, r->access_column<bool_column>(offset) ? "1" : "0"); + break; + case Coltype::_int: + printf(fmti, r->access_column<int_column>(offset)); + break; + case Coltype::_float: + printf(fmtd, r->access_column<float_column>(offset)); + break; + case Coltype::_text: + printf(fmts, r->access_column<text_column>(offset)); + break; + } + } + + printf("\n"); + } + printrep((width + 2) * cols + 1, '-'); + printf("\n"); +} + +class Parser { +private: + Token::Type m_last; + +public: + std::list<Token> m_tokens; + typedef std::list<Token>::iterator Lit; + + Parser() + { + m_last = Token::_invalid; + } + + void push(Token::Type type, const char* string) + { + if (!(type == Token::_semicolon && m_last == Token::_semicolon)) + m_tokens.push_back(Token(type, string)); + m_last = type; + } + + void dump() + { + for (auto it = m_tokens.begin(); it != m_tokens.end(); it++) { + printf("Type %d: %s\n", it->get_type(), it->get_token()); + } + } + + bool analyze(Query& q) + { + auto it = m_tokens.begin(); + bool ok = true; + while (ok) { + ok = false; + if (get_sample_stmt(q, it)) + ok = true; + + if (get_select_stmt(q, it)) + ok = true; + } + + return true; + } + + bool get_sample_stmt(Query& q, Lit& i_iter) + { + Lit it = i_iter; + if (!is(it, Token::_label, "sample")) + return false; + it++; + if (!is(it, Token::_number)) + return false; + int sample = atoi(it->get_token()); + it++; + if (!is(it, Token::_semicolon)) + return false; + + it++; + q.m_sample = sample; + i_iter = it; + return true; + } + + bool get_select_stmt(Query& q, Lit& i_iter) + { + Lit it = i_iter; + if (!get_select_core(q, it)) { + return false; + } + + get_from(q, it); + get_where(q, it); + get_group_by(q, it); + get_order_by(q, it); + get_limit(q, it); + get_as(q, it); + if (!is(it, Token::_semicolon)) { + throw Error("Expected ';' but found '%s' !", it->get_token()); + } + it++; + i_iter = it; + return true; + } + + bool is(Lit& it, Token::Type type, const char* str = 0) + { + if (!str) + return (it->get_type() == type); + + return (it->get_type() == type && cmpi(it->get_token(), str)); + } + + OP* get_result_column(std::list<Token>::iterator& it) + { + OP* res = 0; + Lit save = it; + if (is(it, Token::_op, "*")) { + it++; + return new OP(Token(Token::_column, "*")); + } + if (it->get_type() == Token::_label) { + std::string table = it->get_token(); + it++; + if (is(it, Token::_op, ".")) { + it++; + if (is(it, Token::_op, "*")) { + it++; + std::string c = table + ".*"; + return new OP(Token(Token::_column, c.c_str())); + } + it = save; + return 0; + } + } + it = save; + if ((res = get_expr(it, 0))) { + save = it; + + if (is(it, Token::_label, "as")) { + it++; + if (is(it, Token::_label)) { + res->m_name = it->get_token(); + it++; + } else + it = save; + } + } + return res; + + // check for valid table + } + + bool get_select_core(Query& q, Lit& it) + { + Lit save = it; + if (!is(it, Token::_label, "select")) + return false; + it++; + bool again = true; + bool success = true; + while (again) { + OP* op; + if ((op = get_result_column(it))) { + q.m_select.push_back(op); + } else { + success = false; + break; + } + + if (is(it, Token::_op, ",")) + it++; + else + again = false; + } + if (success) + return true; + + it = save; + return false; + } + + bool get_ordering_terms(Ordering_terms& ordering, std::list<Token>::iterator& it) + { + OP* op; + while ((op = get_expr(it, 0))) { + bool asc = true; + if (it->get_type() == Token::_label) { + if (cmpi(it->get_token(), "asc")) { + // default + } else if (cmpi(it->get_token(), "desc")) { + asc = false; + } else if (cmpi(it->get_token(), "collate")) { + throw Error("unhandled option:collate"); + } else { + ordering.m_terms.push_back(Ordering_terms::OP_dir(op, asc)); + break; + } + + it++; + } + + ordering.m_terms.push_back(Ordering_terms::OP_dir(op, asc)); + + if (!is(it, Token::_op, ",")) + break; + it++; + } + return true; + } + bool get_group_by(Query& q, Lit& it) + { + if (!is(it, Token::_label, "group")) { + return true; + } + it++; + if (!is(it, Token::_label, "by")) + return false; + it++; + bool res = get_ordering_terms(q.m_group_by, it); + get_having(q, it); + return res; + } + bool get_as(Query& q, Lit& it) + { + if (!is(it, Token::_label, "as")) { + return true; + } + it++; + if (!is(it, Token::_label)) + return false; + q.m_result->m_name = it->get_token(); + it++; + return true; + } + + bool get_order_by(Query& q, Lit& it) + { + if (!is(it, Token::_label, "order")) { + return true; + } + it++; + if (!is(it, Token::_label, "by")) + return false; + it++; + return get_ordering_terms(q.m_order_by, it); + } + + bool get_limit(Query& q, Lit& it) + { + Lit save = it; + if (!is(it, Token::_label, "limit")) { + return true; + } + it++; + if (!is(it, Token::_number)) { + it = save; + throw Error("non numeric operand to limit"); + } + q.m_limit = atoi(it->get_token()); + it++; + save = it; + + if (!is(it, Token::_label, "offset")) { + return true; + } + it++; + if (!is(it, Token::_number)) { + it = save; + throw Error("non numeric operand to offset"); + } + q.m_offset = atoi(it->get_token()); + it++; + return true; + } + + bool get_having(Query& q, Lit& it) + { + Lit save = it; + if (!is(it, Token::_label, "having")) + return true; + it++; + OP* res = 0; + if ((res = get_expr(it, 0))) { + q.m_having = res; + return true; + } + it = save; + return false; + } + + bool get_where(Query& q, Lit& it) + { + Lit save = it; + if (!is(it, Token::_label, "where")) + return true; + it++; + OP* res = 0; + if ((res = get_expr(it, 0))) { + q.m_where = res; + return true; + } + it = save; + return false; + } + + bool get_from(Query& q, Lit& it) + { + if (!is(it, Token::_label, "from")) + return false; + it++; + if (it->get_type() == Token::_label) { + const char* name = it->get_token(); + if (get_packet_handler(name)) { + q.m_from_name = name; + it++; + return true; + } else + throw Error("Error in from statement, table '%s' not found", name); + } + throw Error("Error in from statement"); + } + + int get_stack_precedence(std::stack<OP*>& operator_stack) + { + int pre = 0; + if (!operator_stack.empty()) + pre = operator_stack.top()->m_precedence; + return pre; + } + + // using The shunting yard algorithm + OP* get_expr(Lit& it, int rec) + { + std::stack<OP*> operator_stack; + std::stack<OP*> operand_stack; + bool success = true; + bool expect_expr = true; + while (success) { + success = false; + Lit save = it; + Lit next = it; + next++; + + // match literal + if (expect_expr && is_literal(it)) { + OP* op = new OP(*it); + it++; + operand_stack.push(op); + success = true; + expect_expr = false; + continue; + } + // match literal + if (expect_expr && is_unary_op(it)) { + OP* op = new OP(*it); + op->set_type(Token::_uop); + it++; + if (!(op->m_right = get_expr(it, rec + 1))) + throw Error("Got unary '%s' but could not parse following expression", op->get_token()); + + operand_stack.push(op); + success = true; + expect_expr = false; + continue; + } + // match function-name ( + if (expect_expr && is(it, Token::_label) && is(next, Token::_paren, "(")) { + OP* func = new OP(*it); + func->set_type(Token::_function); + it++; + it++; + if (is(it, Token::_op, "*")) { + it->set_type(Token::_number); + it->set_token("1"); + } + func->m_param[0] = get_expr(it, rec + 1); + if (!func->m_param[0]) + throw Error("Missing operand to function"); + operand_stack.push(func); + int n = 1; + while (n < OP::max_param() && is(it, Token::_op, ",")) { + + it++; + func->m_param[n++] = get_expr(it, rec + 1); + } + if (!is(it, Token::_paren, ")")) + throw Error("Expected ) after %s", func->get_token()); + it++; + expect_expr = false; + success = true; + continue; + } + // match [[databasename .] table-name . ] column name + if (expect_expr && is(it, Token::_label)) { + OP* op = new OP(*it); + it++; + success = true; + op->set_type(Token::_column); + operand_stack.push(op); + expect_expr = false; + continue; + } + // match ( expr ) + if (!expect_expr && is(it, Token::_paren, ",")) { + break; + } + if (!expect_expr && is(it, Token::_paren, ")")) { + break; + } + if (expect_expr && is(it, Token::_paren, "(")) { + it++; + OP* op = 0; + if ((op = get_expr(it, rec + 1))) { + if (is(it, Token::_paren, ")")) { + it++; + operand_stack.push(op); + expect_expr = false; + success = true; + continue; + } + throw Error("Error in expression no )"); + } + it = save; + throw Error("Error in expression"); + } + // bin op + if (!expect_expr && is(it, Token::_op, "is") && is(next, Token::_op, "not")) { + it++; + it->set_token("is not"); + } + // bin op + if (!expect_expr && is(it, Token::_op, "not") && is(next, Token::_op, "like")) { + it++; + it->set_token("not like"); + } + if (!expect_expr && is(it, Token::_op) && OP::is_binary(it->get_token())) { + OP* bop = new OP(*it); + while (bop->m_precedence <= get_stack_precedence(operator_stack)) { + + OP* stack_op = operator_stack.top(); + operator_stack.pop(); + if (operand_stack.size() >= 2) { + + OP* stk1 = operand_stack.top(); + stack_op->m_right = stk1; + operand_stack.pop(); + OP* stk2 = operand_stack.top(); + stack_op->m_left = stk2; + operand_stack.pop(); + operand_stack.push(stack_op); + } + } + + operator_stack.push(bop); + it++; + success = true; + expect_expr = true; + continue; + } + } + while (!operator_stack.empty()) { + OP* bop = operator_stack.top(); + operator_stack.pop(); + if (bop) { + if (operand_stack.size() >= 2) { + OP* stk = operand_stack.top(); + bop->m_right = stk; + operand_stack.pop(); + OP* stk2 = operand_stack.top(); + bop->m_left = stk2; + operand_stack.pop(); + operand_stack.push(bop); + } + } + } + if (operand_stack.size() == 0) + return 0; + return operand_stack.top(); + } + + bool is_unary_op(Lit& it) + { + if (is(it, Token::_op, "-")) + return true; + if (is(it, Token::_op, "+")) + return true; + if (is(it, Token::_op, "~")) + return true; + if (is(it, Token::_op, "not")) + return true; + return false; + } + bool is_literal(Lit& it) + { + if (it->get_type() == Token::_number || it->get_type() == Token::_string) + return true; + if (it->get_type() == Token::_label && cmpi(it->get_token(), "null")) + return true; + return false; + } +}; + +class Lexer { +public: + Lexer(Parser& p) + : m_parser(p) + , num_state(_nan) + { + } + + Parser& m_parser; + + enum State { + _unknown, + _white, + _label, + _number, + _op, + _paren, + _string + }; + bool is_white(const char c) { return (c == ' ' || c == 9 || c == 10 || c == 13); } + bool is_char(const char c) { return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')); } + bool is_num(const char c) { return ((c >= '0' && c <= '9')); } + bool is_op(const char c) { return (c == '*' || c == ',' || c == '=' || c == '<' || c == '>' || c == '/' || c == '|' || c == '%' || c == '+' || c == '-' || c == '&' || c == '~' || c == '!'); } + bool is_paren(const char c) { return (c == '(' || c == ')'); } + bool is_termination(const char c) { return (c == 0 || c == ';'); } + bool is_quote(const char c) { return (c == '\''); } + bool is_label_start(const char c) { return (is_char(c)); } + bool is_label_part(const char c) { return (is_num(c) || is_char(c) || c == '_'); } + + bool lex(const char* i_str) + { + const char* p = i_str; + State state = _white; + const char* strstart = 0; + bool is_escaped = false; + std::string str = ""; + + while (true) { + switch (state) { + case (_unknown): { + if (is_white(*p)) + state = _white; + else if (is_label_start(*p)) + state = _label; + else if (is_num(*p) || (*p == '.' && is_num(p[1]))) + state = _number; + else if (is_quote(*p)) + state = _string; + else if (is_paren(*p)) + state = _paren; + else if (is_op(*p)) + state = _op; + else if (is_termination(*p)) { + m_parser.push(Token::_semicolon, ";"); + if (*p++ == 0) { + m_parser.push(Token::_end, "END"); + return true; + } + } else { + m_parser.push(Token::_end, "END"); + printf("Unknown char %c (%d) at %d! in statement %s\n", *p, *p, int(p - i_str), i_str); + return false; + } + } break; + case (_white): { + if (is_white(*p)) + p++; + else { + state = _unknown; + } + } break; + case (_string): { + if ((!is_escaped) && is_quote(*p)) { + if (!strstart) { + strstart = p; + str = ""; + } else { + m_parser.push(Token::_string, str.c_str()); + + str = ""; + strstart = 0; + state = _unknown; + is_escaped = false; + } + p++; + } else { + if (is_escaped) { + if (!is_quote(*p)) + str += '\\'; + is_escaped = false; + } + + if (*p == 0) { + printf("Unexpected end of string in statement %s\n", strstart); + m_parser.push(Token::_end, "END"); + return false; + } + if (*p == '\\') + is_escaped = true; + else + str += (*p); + p++; + } + + } break; + case (_number): { + if (parse_num(*p)) { + if (!strstart) + strstart = p; + p++; + } else { + if (!strstart) + throw Error("Numeric problem"); + std::string label = strstart; + label = label.substr(0, p - strstart); + m_parser.push(Token::_number, label.c_str()); + + strstart = 0; + state = _unknown; + } + } break; + case (_label): { + if (!strstart) { + strstart = p++; + } else if (is_label_part(*p)) { + p++; + } else { + std::string label = strstart; + label = label.substr(0, p - strstart); + + Token::Type type = Token::_label; + if (cmpi(label, "is")) + type = Token::_op; + if (cmpi(label, "not")) + type = Token::_op; + if (cmpi(label, "in")) + type = Token::_op; + if (cmpi(label, "like")) + type = Token::_op; + if (cmpi(label, "glob")) + type = Token::_op; + if (cmpi(label, "match")) + type = Token::_op; + if (cmpi(label, "regexp")) + type = Token::_op; + if (cmpi(label, "and")) + type = Token::_op; + if (cmpi(label, "or")) + type = Token::_op; + + m_parser.push(type, label.c_str()); + + strstart = 0; + state = _unknown; + } + + } break; + case (_paren): { + std::string s; + s = *p; + m_parser.push(Token::_paren, s.c_str()); + p++; + state = _unknown; + } break; + case (_op): { + std::string s; + s = *p; + char n = p[1]; + switch (*p) { + case ('|'): + if (n == '|') { + p++; + s += *p; + } + break; + case ('>'): + if (n == '=') { + p++; + s += *p; + } + if (n == '>') { + p++; + s += *p; + } + break; + case ('<'): + if (n == '<') { + p++; + s += *p; + } + if (n == '=') { + p++; + s += *p; + } + if (n == '>') { + p++; + s += *p; + } + break; + case ('='): + if (n == '=') { + p++; + s += *p; + } + break; + case ('!'): + if (n == '=') { + p++; + s += *p; + } + break; + } + m_parser.push(Token::_op, s.c_str()); + p++; + state = _unknown; + } break; + default: + printf("Missing impl char %c at %d! in statement %s\n", *p, int(p - i_str), i_str); + m_parser.push(Token::_end, "END"); + return false; + } + } + } + bool parse_num(const char p) + { + switch (num_state) { + case (_nan): + num_state = _int; + if (p == '.') + num_state = _dot; + break; + case (_int): + if (p == '.') + num_state = _dot; + else if (!is_num(p)) { + num_state = _nan; + return false; + } + break; + case (_dot): + if (p == 'E' || p == 'e') + num_state = _e; + else if (!is_num(p)) { + num_state = _nan; + return false; + } + break; + case (_dec): + if (p == 'E' || p == 'e') + num_state = _e; + else if (!is_num(p)) { + num_state = _nan; + return false; + } + break; + case (_e): + if (p == '+' || p == '-') + num_state = _sign; + else if (is_num(p)) + num_state = _exp; + else { + throw Error("expected number digit after E"); + } + break; + case (_sign): + if (!is_num(p)) + throw Error("expected number digit after E"); + num_state = _exp; + break; + case (_exp): + if (!is_num(p)) { + num_state = _nan; + return false; + } + break; + } + return true; + } + enum Num_state { + _nan, + _int, + _dot, + _dec, + _e, + _sign, + _exp, + }; + + Num_state num_state; +}; + +void Query::parse() +{ + Parser p; + Lexer l(p); + l.lex(m_sql.c_str()); + // p.dump(); + if (!p.analyze(*this)) + throw Error("error parsing select statement"); +} + +// return column and index in tables, or 0 for column if column isn't found +std::pair<Column*, int> lookup_column_in_tables(const std::vector<Table*>& tables, + const std::vector<int>& search_order, + const char* name) +{ + if (strcmp(name, "*") == 0) + return std::pair<Column*, int>((Column*)0, 0); + + for (auto i = search_order.begin(); i != search_order.end(); ++i) { + Table* table = tables[*i]; + int col_index = table->get_col_index(name); + if (col_index >= 0) + return std::pair<Column*, int>(table->m_cols[col_index], *i); + } + + return std::pair<Column*, int>((Column*)0, 0); +} + +OP* OP::compile(const std::vector<Table*>& tables, const std::vector<int>& search_order, Query& q) +{ + OP* ret = 0; + for (int i = 0; i < max_param(); i++) { + if (m_param[i]) { + m_param[i] = m_param[i]->compile(tables, search_order, q); + if (m_param[i]->m_has_aggregate_function) + m_has_aggregate_function = true; + } + } + + if (m_left) { + m_left = m_left->compile(tables, search_order, q); + if (m_left->m_has_aggregate_function) + m_has_aggregate_function = true; + } + + if (m_right) { + m_right = m_right->compile(tables, search_order, q); + if (m_right->m_has_aggregate_function) + m_has_aggregate_function = true; + } + + // default to destination row + m_row_index = tables.size() - 1; + + if (get_type() == _column) { + auto lookup = lookup_column_in_tables(tables, search_order, get_token()); + Column* column = lookup.first; + m_row_index = lookup.second; + + if (!column) + throw Error("Column '%s' not found", get_token()); + + int offset = column->m_offset; + + m_t = column->m_type; + + switch (m_t) { + case Coltype::_int: + ret = new Column_access_int(*this, offset); + break; + case Coltype::_bool: + ret = new Column_access_bool(*this, offset); + break; + case Coltype::_float: + ret = new Column_access_float(*this, offset); + break; + case Coltype::_text: + ret = new Column_access_text(*this, offset); + break; + } + } else if (get_type() == _number) { + const char* p = get_token(); + bool integer = true; + while (*p != 0) { + if (*p < '0' || *p > '9') + integer = false; + p++; + } + if (integer) { + m_t = Coltype::_int; + ret = new Static_int(*this); + } else { + m_t = Coltype::_float; + ret = new Static_float(*this); + } + } else if (get_type() == _string) { + m_t = Coltype::_text; + ret = new Static_text(*this); + } else if ((get_type() == _function) && m_param[0]) { + Table* dest_table = tables[m_row_index]; + + m_t = Coltype::_int; + if (cmpi(get_token(), "if") && m_param[1] && m_param[2]) { + m_t = m_param[1]->m_t; + if (m_param[2]->m_t > m_t) + m_t = m_param[2]->m_t; + ret = new If_func(*this); + } else if (cmpi(get_token(), "name") && m_param[1]) { + m_t = Coltype::_text; + ret = new Name_func(*this); + } else if (cmpi(get_token(), "trim")) { + m_t = Coltype::_text; + ret = new Trim_func(*this); + } else if (cmpi(get_token(), "rsplit") && m_param[1]) { + m_t = Coltype::_text; + ret = new Rsplit_func(*this); + } else if (cmpi(get_token(), "netmask")) { + m_t = Coltype::_text; + ret = new Netmask_func(*this); + } else if (cmpi(get_token(), "cc")) { + m_t = Coltype::_text; + ret = new Cc_func(*this); + } else if (cmpi(get_token(), "asn")) { + m_t = Coltype::_int; + ret = new Asn_func(*this); + } else if (cmpi(get_token(), "count")) { + m_t = Coltype::_int; + ret = new Count_func(*this, dest_table); + } else if (m_param[0]->ret_type() == Coltype::_float && cmpi(get_token(), "min")) { + m_t = Coltype::_float; + ret = new Min_func_float(*this, dest_table); + } else if (m_param[0]->ret_type() == Coltype::_float && cmpi(get_token(), "max")) { + m_t = Coltype::_float; + ret = new Max_func_float(*this, dest_table); + } else if (m_param[0]->ret_type() == Coltype::_float && cmpi(get_token(), "sum")) { + m_t = Coltype::_float; + ret = new Sum_func_float(*this, dest_table); + } else if (cmpi(get_token(), "min")) { + m_t = Coltype::_int; + ret = new Min_func_int(*this, dest_table); + } else if (cmpi(get_token(), "max")) { + m_t = Coltype::_int; + ret = new Max_func_int(*this, dest_table); + } else if (cmpi(get_token(), "sum")) { + m_t = Coltype::_int; + ret = new Sum_func_int(*this, dest_table); + } else if (cmpi(get_token(), "lower")) { + m_t = Coltype::_text; + ret = new Lower_func(*this); + } else if (cmpi(get_token(), "len")) { + m_t = Coltype::_int; + ret = new Len_func(*this); + } else if (cmpi(get_token(), "truncate")) { + m_t = Coltype::_int; + ret = new Truncate_func(*this); + } else if (cmpi(get_token(), "stdev")) { + m_t = Coltype::_float; + ret = new Stdev_func(*this, dest_table); + } else if (cmpi(get_token(), "avg")) { + m_t = Coltype::_float; + ret = new Avg_func(*this, dest_table); + } + } else if ((get_type() == _op) && m_left && m_right) { + if (cmpi(get_token(), "||")) { + m_t = Coltype::_text; + ret = new Bin_op_concatenate(*this); + } else if (cmpi(get_token(), "*")) { + if (m_left->ret_type() == Coltype::_float || m_right->ret_type() == Coltype::_float) { + m_t = Coltype::_float; + ret = new Bin_op_mul_float(*this); + } else { + m_t = Coltype::_int; + ret = new Bin_op_mul(*this); + } + } else if (cmpi(get_token(), "/")) { + m_t = Coltype::_float; + ret = new Bin_op_div(*this); + } else if (cmpi(get_token(), "%")) { + m_t = Coltype::_float; + ret = new Bin_op_modulo(*this); + } else if (cmpi(get_token(), "+")) { + if (m_left->ret_type() == Coltype::_float || m_right->ret_type() == Coltype::_float) { + m_t = Coltype::_float; + ret = new Bin_op_add_float(*this); + } else { + m_t = Coltype::_int; + ret = new Bin_op_add(*this); + } + } else if (cmpi(get_token(), "-")) { + if (m_left->ret_type() == Coltype::_float || m_right->ret_type() == Coltype::_float) { + m_t = Coltype::_float; + ret = new Bin_op_sub_float(*this); + } else { + m_t = Coltype::_int; + ret = new Bin_op_sub(*this); + } + } else if (cmpi(get_token(), "<<")) { + m_t = Coltype::_int; + ret = new Bin_op_arithmetic_shift_left(*this); + } else if (cmpi(get_token(), ">>")) { + m_t = Coltype::_int; + ret = new Bin_op_arithmetic_shift_right(*this); + } else if (cmpi(get_token(), "&")) { + m_t = Coltype::_int; + ret = new Bin_op_bitwise_and(*this); + } else if (cmpi(get_token(), "|")) { + m_t = Coltype::_int; + ret = new Bin_op_bitwise_or(*this); + } else if (cmpi(get_token(), "<")) { + m_t = Coltype::_bool; + ret = new Bin_op_lt(*this); + } else if (cmpi(get_token(), "<=")) { + m_t = Coltype::_bool; + ret = new Bin_op_lteq(*this); + } else if (cmpi(get_token(), ">")) { + m_t = Coltype::_bool; + ret = new Bin_op_gt(*this); + } else if (cmpi(get_token(), ">=")) { + m_t = Coltype::_bool; + ret = new Bin_op_gteq(*this); + } else if (cmpi(get_token(), "=")) { + m_t = Coltype::_bool; + ret = new Bin_op_eq(*this); + } else if (cmpi(get_token(), "==")) { + m_t = Coltype::_bool; + ret = new Bin_op_eq(*this); + } else if (cmpi(get_token(), "like")) { + m_t = Coltype::_bool; + ret = new Bin_op_like(*this); + } else if (cmpi(get_token(), "not like")) { + m_t = Coltype::_bool; + ret = new Bin_op_not_like(*this); + } else if (cmpi(get_token(), "!=")) { + m_t = Coltype::_bool; + ret = new Bin_op_not_eq(*this); + } else if (cmpi(get_token(), "<>")) { + m_t = Coltype::_bool; + ret = new Bin_op_not_eq(*this); + } else if (cmpi(get_token(), "is")) { + m_t = Coltype::_bool; + ret = new Bin_op_eq(*this); + } else if (cmpi(get_token(), "is not")) { + m_t = Coltype::_bool; + ret = new Bin_op_not_eq(*this); + } else if (cmpi(get_token(), "and")) { + m_t = Coltype::_bool; + ret = new Bin_op_and(*this); + } else if (cmpi(get_token(), "or")) { + m_t = Coltype::_bool; + ret = new Bin_op_or(*this); + } + } else if ((get_type() == _uop) && m_right) { + if (cmpi(get_token(), "not")) { + m_t = Coltype::_bool; + ret = new Un_op_not(*this); + } else if (cmpi(get_token(), "+")) { + ret = m_right; + } else if (cmpi(get_token(), "-")) { + if (m_right->ret_type() == Coltype::_float) { + m_t = Coltype::_float; + ret = new Un_op_neg_float(*this); + } else { + m_t = Coltype::_int; + ret = new Un_op_neg(*this); + } + } else if (cmpi(get_token(), "~")) { + m_t = Coltype::_int; + ret = new Un_op_ones_complement(*this); + } + } + clear_ptr(); + if (!ret) + throw Error("Unknown operator error '%s' !", get_token()); + + delete this; + + return ret; +} + +void OP::evaluate_aggregate_operands(Row** rows) +{ + if (m_left) + m_left->evaluate_aggregate_operands(rows); + if (m_right) + m_right->evaluate_aggregate_operands(rows); + for (int i = 0; i < max_param(); ++i) + if (m_param[i]) + m_param[i]->evaluate_aggregate_operands(rows); +} + +void OP::combine_aggregate(Row* base_row, Row* other_row) +{ + if (m_left) + m_left->combine_aggregate(base_row, other_row); + if (m_right) + m_right->combine_aggregate(base_row, other_row); + for (int i = 0; i < max_param(); ++i) + if (m_param[i]) + m_param[i]->combine_aggregate(base_row, other_row); +} + +// return any column access ops found in given list of op trees - they don't +// have to be compiled beforehand; duplicate column tokens are skipped +std::vector<OP*> find_unique_column_ops(std::vector<OP*> ops) +{ + std::vector<OP*> res; + + while (!ops.empty()) { + OP* op = ops.back(); + ops.pop_back(); + + if (op->m_left) + ops.push_back(op->m_left); + if (op->m_right) + ops.push_back(op->m_right); + for (int i = 0; i < op->max_param(); ++i) + if (op->m_param[i]) + ops.push_back(op->m_param[i]); + + if (op->get_type() == Token::_column) { + bool found = false; + + for (auto i = res.begin(); i != res.end(); ++i) { + if (cmpii((*i)->get_token(), op->get_token())) { + found = true; + break; + } + } + + if (!found) + res.push_back(op); + } + } + + return res; +} + +void Query::replace_star_column_with_all_columns() +{ + bool found_star = false; + for (auto i = m_select.begin(); i != m_select.end(); ++i) { + if (strcmp((*i)->get_token(), "*") == 0) { + found_star = true; + break; + } + } + + if (found_star) { + for (auto i = m_select.begin(); i != m_select.end(); ++i) + delete *i; + m_select.clear(); + + if (!m_from_name.empty()) { + Packet_handler* handler = get_packet_handler(m_from_name); + + for (auto i = handler->packet_columns.begin(); i != handler->packet_columns.end(); ++i) + m_select.push_back(new OP(Token(Token::_column, i->name))); + } + } +} + +void Query::process_from() +{ + replace_star_column_with_all_columns(); + + if (m_from_name.empty()) + return; + + std::vector<OP*> all_ops; + all_ops.insert(all_ops.end(), m_select.begin(), m_select.end()); + if (m_where) + all_ops.push_back(m_where); + // skip m_having, it can't access source columns + for (auto i = m_order_by.m_terms.begin(); i != m_order_by.m_terms.end(); ++i) + all_ops.push_back(i->m_op); + for (auto i = m_group_by.m_terms.begin(); i != m_group_by.m_terms.end(); ++i) + all_ops.push_back(i->m_op); + + auto used_columns = find_unique_column_ops(all_ops); + + // add from table with used columns + Packet_handler* handler = get_packet_handler(m_from_name); + for (auto j = handler->packet_columns.begin(); j != handler->packet_columns.end(); ++j) + for (auto i = used_columns.begin(); i != used_columns.end(); ++i) + if (cmpii(j->name, (*i)->get_token())) + m_used_from_column_ids.push_back(j->id); + + m_from = handler->create_table(m_used_from_column_ids); +} + +void Query::process_select(Row** rows, Row* dest, GenericAccessor* dest_accessors) +{ + for (unsigned int i = 0, size = m_select.size(); i < size; ++i) { + OP* op = m_select[i]; + if (!op) + continue; + + if (op->m_has_aggregate_function) { + // defer evaluating aggregate functions, just eval their operands + op->evaluate_aggregate_operands(rows); + } else { + Variant v; + op->evaluate(rows, v); + dest_accessors[i].set(dest, v); + } + } +} + +void Query::combine_aggregates_in_select(Row* base_row, Row* other_row) +{ + for (unsigned int i = 0; i < m_select.size(); ++i) { + OP* op = m_select[i]; + if (op && op->m_has_aggregate_function) + op->combine_aggregate(base_row, other_row); + } +} + +void Query::process_aggregates_in_select(Row** rows, Row* dest, GenericAccessor dest_accessors[]) +{ + for (unsigned int i = 0; i < m_select.size(); ++i) { + OP* op = m_select[i]; + if (op && op->m_has_aggregate_function) { + Variant v; + op->evaluate(rows, v); + dest_accessors[i].set(dest, v); + } + } +} + +bool Query::process_where(Row** rows) +{ + if (!m_where) + return true; + + Variant v; + m_where->evaluate(rows, v); + return v.get_bool(); +} + +bool Query::process_having(Row** rows) +{ + if (!m_having) + return true; + + Variant v; + m_having->evaluate(rows, v); + return v.get_bool(); +} + +std::vector<Variant> process_group_by_key(Ordering_terms& group_by, Row** rows) +{ + int size = group_by.m_terms.size(); + + std::vector<Variant> res(size); + + for (int i = 0; i < size; ++i) + group_by.m_terms[i].m_op->evaluate(rows, res[i]); + + return res; +} + +bool Query::has_aggregate_functions() +{ + // this assumes the ops have been compiled + for (auto it = m_select.begin(); it != m_select.end(); it++) + if ((*it)->m_has_aggregate_function) + return true; + + return false; +} + +void Query::execute(Reader& reader) +{ + std::vector<Table*> tables; + std::vector<int> search_results_last, search_results_first, search_results_only; + + // set up tables + process_from(); + + if (m_from) + tables.push_back(m_from); + tables.push_back(m_result); + + for (int i = 0; i < int(tables.size()); ++i) + search_results_last.push_back(i); + for (int i = int(tables.size()) - 1; i >= 0; --i) + search_results_first.push_back(i); + + search_results_only.push_back(tables.size() - 1); + + std::vector<Row*> row_ptrs(tables.size()); + Row** rows = &row_ptrs[0]; + + std::vector<GenericAccessor> result_accessors_vector; + + // compile + for (auto i = m_select.begin(); i != m_select.end(); ++i) { + *i = (*i)->compile(tables, search_results_last, *this); + Column* col = m_result->add_column((*i)->get_name(), (*i)->ret_type()); + GenericAccessor a; + a.m_offset = col->m_offset; + a.m_type = col->m_type; + result_accessors_vector.push_back(a); + } + + if (m_where) + m_where = m_where->compile(tables, search_results_last, *this); + + if (m_having) + m_having = m_having->compile(tables, search_results_only, *this); + + if (m_group_by.exist()) + m_group_by.compile(tables, search_results_last, *this); + + if (m_order_by.exist()) { + // copy any missing columns to result table as hidden so we can + // order by them + std::vector<OP*> ops; + for (auto i = m_order_by.m_terms.begin(); i != m_order_by.m_terms.end(); ++i) + ops.push_back(i->m_op); + + std::vector<OP*> column_ops = find_unique_column_ops(ops); + + for (auto i = column_ops.begin(); i != column_ops.end(); ++i) { + const char* name = (*i)->get_token(); + auto lookup = lookup_column_in_tables(tables, search_results_first, name); + if (lookup.first and lookup.second < int(tables.size()) - 1) { + // found, but not in result table + OP* copying_op = new OP(**i); + copying_op = copying_op->compile(tables, search_results_last, *this); + m_select.push_back(copying_op); + Column* col = m_result->add_column(copying_op->get_name(), copying_op->ret_type(), -1, Column::HIDDEN); + GenericAccessor a; + a.m_offset = col->m_offset; + a.m_type = col->m_type; + result_accessors_vector.push_back(a); + } + } + + // we only provide access to result table for "order by"; in order + // to make the sort thing work correctly the result table currently + // has to be at index 0 + std::vector<Table*> tables_result_only = { m_result }; + std::vector<int> tables_result_only_search = { 0 }; + m_order_by.compile(tables_result_only, tables_result_only_search, *this); + } + + // execute + GenericAccessor* result_accessors = &result_accessors_vector[0]; + bool aggregate_functions = has_aggregate_functions(); + + int count = 0; + bool limiter = !m_order_by.exist() && !m_group_by.exist() && !aggregate_functions && m_limit >= 0; + + if (m_from) { + bool first_row = true; + Packet_handler* handler = get_packet_handler(m_from_name); + + reader.seek_to_start(); + + const int src_i = 0, dest_i = tables.size() - 1; + + rows[src_i] = m_from->create_row(); + + if (m_group_by.exist() || aggregate_functions) { + std::unordered_map<std::vector<Variant>, Row*> groups; + + rows[dest_i] = 0; + while (reader.read_next(handler, m_used_from_column_ids, *rows[src_i], first_row or m_sample == 0 ? 0 : m_sample - 1)) { + // fill in groups + if (rows[dest_i]) + rows[dest_i]->reset_text_columns(m_result->m_text_column_offsets); + else + rows[dest_i] = m_result->create_row(); + + process_select(rows, rows[dest_i], result_accessors); + if (process_where(rows)) { + auto key = process_group_by_key(m_group_by, rows); + Row*& entry = groups[key]; + if (entry) { + combine_aggregates_in_select(entry, rows[dest_i]); + } else { + entry = rows[dest_i]; + rows[dest_i] = 0; + } + } + + first_row = false; + rows[src_i]->reset_text_columns(m_from->m_text_column_offsets); + } + if (rows[dest_i]) + m_result->delete_row(rows[dest_i]); + + // put groups into result + for (auto i = groups.begin(); i != groups.end(); ++i) { + rows[dest_i] = i->second; + // propagate the aggregate results through the evaluation tree + process_aggregates_in_select(rows, rows[dest_i], result_accessors); + if (process_having(rows)) + m_result->add_row(rows[dest_i]); + else + m_result->delete_row(rows[dest_i]); + } + } else { + rows[dest_i] = m_result->create_row(); + while (reader.read_next(handler, m_used_from_column_ids, *rows[src_i], first_row or m_sample == 0 ? 0 : m_sample - 1)) { + // fill in result + process_select(rows, rows[dest_i], result_accessors); + if (process_where(rows)) { + bool commit = true; + if (limiter) { + int l = count++; + if (m_offset > 0) + l -= m_offset; + if (m_limit >= 0 && l >= m_limit) + break; + + if (l < 0) + commit = false; + } + + if (commit) { + m_result->add_row(rows[dest_i]); + rows[dest_i] = m_result->create_row(); + } + } + + first_row = false; + rows[src_i]->reset_text_columns(m_from->m_text_column_offsets); + } + m_result->delete_row(rows[dest_i]); + } + + m_from->delete_row(rows[src_i]); + } else { + const int dest_i = tables.size() - 1; + rows[dest_i] = m_result->create_row(); + process_select(rows, rows[dest_i], result_accessors); + if (process_where(rows)) + m_result->add_row(rows[dest_i]); + else + m_result->delete_row(rows[dest_i]); + } + + if (m_order_by.exist()) + m_result->per_sort(m_order_by); + + if (m_limit >= 0 && !limiter) + m_result->limit(m_limit, m_offset); +} + +DB::DB() +{ + Column::init_defs(); +} + +DB::~DB() +{ +} + +bool DB::query(const char* q) +{ + return false; +} + +Table* DB::get_table(const char* i_name) +{ + std::string name = lower(i_name); + Table* t = 0; + auto it = m_tables.find(name); + if (it != m_tables.end()) + t = it->second; + + return t; +} +Table* DB::create_or_use_table(const char* i_name) +{ + std::string name = lower(i_name); + Table* t = get_table(name.c_str()); + if (!t) + t = create_table(name.c_str()); + + return t; +} +Table* DB::create_table(const char* i_name) +{ + std::string name = lower(i_name); + Table* t = new Table(name.c_str()); + m_tables[std::string(name.c_str())] = t; + + return t; +} +Column::Column(const char* name, Coltype::Type type, int id, bool hidden) + : m_name(name) + , m_type(type) + , m_def(Column::m_coldefs[type]) + , m_id(id) + , m_offset(0) +{ + m_hidden = hidden; +} + +void Trim_func::evaluate(Row** rows, Variant& v) +{ + Variant str; + m_param[0]->evaluate(rows, str); + RefCountStringHandle str_handle(str.get_text()); + const char* s = (*str_handle)->data; + + const char* t; + RefCountStringHandle trim_handle; + if (m_param[1]) { + Variant trim; + m_param[1]->evaluate(rows, trim); + trim_handle.set(trim.get_text()); + t = (*trim_handle)->data; + } else + t = " "; + + int l = strlen(t); + if (l <= 0) { + v = *str_handle; + return; + } + + int slen = strlen(s); + int start = 0, end = slen; + + // left trim + while (end - start >= l && memcmp(s + start, t, l) == 0) + start += l; + + // right trim + while (end - start >= l && memcmp(s + end - l, t, l) == 0) + end -= l; + + if (start == 0 && end == slen) + v = *str_handle; + else { + RefCountStringHandle res(RefCountString::construct(s, start, end)); + v = *res; + } +} + +Cc_func::Cc_func(const OP& op) + : OP(op) +{ +#ifdef HAVE_LIBMAXMINDDB + if (__cc_mmdb) { + return; + } + + std::string db; + char* env = getenv("PACKETQ_MAXMIND_CC_DB"); + if (env) { + db = env; + } + + if (db.empty()) { + std::list<std::string> paths = { + "/var/lib/GeoIP", "/usr/share/GeoIP", "/usr/local/share/GeoIP" + }; + + if ((env = getenv("PACKETQ_MAXMIND_PATH"))) { + paths.push_front(std::string(env)); + } + + auto i = paths.begin(); + for (; i != paths.end(); i++) { + db = (*i) + "/GeoLite2-Country.mmdb"; + struct stat s; + if (!stat(db.c_str(), &s)) { + break; + } + } + if (i == paths.end()) { + return; + } + } + + MMDB_s* mmdb = new (std::nothrow) MMDB_s; + if (!mmdb) { + return; + } + + int ret = MMDB_open(db.c_str(), 0, mmdb); + if (ret != MMDB_SUCCESS) { + fprintf(stderr, "Warning: cannot open MaxMind CC database \"%s\": %s\n", db.c_str(), MMDB_strerror(ret)); + delete mmdb; + return; + } + + __cc_mmdb = mmdb; +#endif +} + +void Cc_func::evaluate(Row** rows, Variant& v) +{ +#ifdef HAVE_LIBMAXMINDDB + if (!__cc_mmdb) { + RefCountStringHandle res(RefCountString::construct("")); + v = *res; + return; + } + + Variant str; + m_param[0]->evaluate(rows, str); + RefCountStringHandle str_handle(str.get_text()); + + int gai_error, ret; + + MMDB_lookup_result_s mmdb_result = MMDB_lookup_string(__cc_mmdb, (*str_handle)->data, &gai_error, &ret); + + if (gai_error || ret != MMDB_SUCCESS || !mmdb_result.found_entry) { + RefCountStringHandle res(RefCountString::construct("")); + v = *res; + return; + } + + MMDB_entry_data_s entry_data; + ret = MMDB_get_value(&mmdb_result.entry, &entry_data, "country", "iso_code", NULL); + + if (ret != MMDB_SUCCESS || !entry_data.has_data || entry_data.type != MMDB_DATA_TYPE_UTF8_STRING) { + RefCountStringHandle res(RefCountString::construct("")); + v = *res; + return; + } + + RefCountStringHandle res(RefCountString::construct(entry_data.utf8_string, 0, entry_data.data_size)); + v = *res; +#else + RefCountStringHandle res(RefCountString::construct("")); + v = *res; +#endif +} + +Asn_func::Asn_func(const OP& op) + : OP(op) +{ +#ifdef HAVE_LIBMAXMINDDB + if (__asn_mmdb) { + return; + } + + std::string db; + char* env = getenv("PACKETQ_MAXMIND_ASN_DB"); + if (env) { + db = env; + } + + if (db.empty()) { + std::list<std::string> paths = { + "/var/lib/GeoIP", "/usr/share/GeoIP", "/usr/local/share/GeoIP" + }; + + if ((env = getenv("PACKETQ_MAXMIND_PATH"))) { + paths.push_front(std::string(env)); + } + + auto i = paths.begin(); + for (; i != paths.end(); i++) { + db = (*i) + "/GeoLite2-ASN.mmdb"; + struct stat s; + if (!stat(db.c_str(), &s)) { + break; + } + } + if (i == paths.end()) { + return; + } + } + + MMDB_s* mmdb = new (std::nothrow) MMDB_s; + if (!mmdb) { + return; + } + + int ret = MMDB_open(db.c_str(), 0, mmdb); + if (ret != MMDB_SUCCESS) { + fprintf(stderr, "Warning: cannot open MaxMind ASN database \"%s\": %s\n", db.c_str(), MMDB_strerror(ret)); + delete mmdb; + return; + } + + __asn_mmdb = mmdb; +#endif +} + +void Asn_func::evaluate(Row** rows, Variant& v) +{ +#ifdef HAVE_LIBMAXMINDDB + if (!__asn_mmdb) { + v = -1; + return; + } + + Variant str; + m_param[0]->evaluate(rows, str); + RefCountStringHandle str_handle(str.get_text()); + + int gai_error, ret; + + MMDB_lookup_result_s mmdb_result = MMDB_lookup_string(__asn_mmdb, (*str_handle)->data, &gai_error, &ret); + + if (gai_error || ret != MMDB_SUCCESS || !mmdb_result.found_entry) { + v = -1; + return; + } + + MMDB_entry_data_s entry_data; + ret = MMDB_get_value(&mmdb_result.entry, &entry_data, "autonomous_system_number", NULL); + + if (ret != MMDB_SUCCESS || !entry_data.has_data || entry_data.type != MMDB_DATA_TYPE_UINT32) { + v = -1; + return; + } + + v = (int_column)entry_data.uint32; +#else + v = -1; +#endif +} + +DB g_db; + +Coldef Column::m_coldefs[COLTYPE_MAX]; + +} // namespace packetq |