summaryrefslogtreecommitdiffstats
path: root/src/sql.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/sql.cpp')
-rw-r--r--src/sql.cpp2696
1 files changed, 2696 insertions, 0 deletions
diff --git a/src/sql.cpp b/src/sql.cpp
new file mode 100644
index 0000000..94ad7ed
--- /dev/null
+++ b/src/sql.cpp
@@ -0,0 +1,2696 @@
+/*
+ * Copyright (c) 2017-2024 OARC, Inc.
+ * Copyright (c) 2011-2017, IIS - The Internet Foundation in Sweden
+ * All rights reserved.
+ *
+ * This file is part of PacketQ.
+ *
+ * PacketQ is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PacketQ is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PacketQ. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include "sql.h"
+#include "output.h"
+#include "packet_handler.h"
+#include "packetq.h"
+#include "reader.h"
+
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#ifdef WIN32
+#include <windows.h>
+#endif
+#ifdef HAVE_LIBMAXMINDDB
+#include <maxminddb.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+static MMDB_s* __cc_mmdb = 0;
+static MMDB_s* __asn_mmdb = 0;
+#endif
+
+namespace packetq {
+
+bool verbose = false;
+
+int g_allocs = 0;
+
+Column* Table::add_column(const char* name, const char* type, int id, bool hidden)
+{
+ if (!type)
+ return add_column(name, Coltype::_text, id, hidden);
+ else if (strcmp(type, "bool") == 0)
+ return add_column(name, Coltype::_bool, id, hidden);
+ else if (strcmp(type, "int") == 0)
+ return add_column(name, Coltype::_int, id, hidden);
+ else if (strcmp(type, "float") == 0)
+ return add_column(name, Coltype::_float, id, hidden);
+ else
+ return add_column(name, Coltype::_text, id, hidden);
+}
+
+Column* Table::add_column(const char* name, Coltype::Type type, int id, bool hidden)
+{
+ Column* col = new Column(name, type, id, hidden);
+ col->m_offset = Table::align(m_curpos, col->m_def.m_align);
+ m_curpos = col->m_offset + col->m_def.m_size;
+ if (type == Coltype::_text)
+ m_text_column_offsets.push_back(col->m_offset);
+ m_cols.push_back(col);
+ return col;
+}
+
+void Table::delete_row(Row* row)
+{
+ row->decref_text_columns(m_text_column_offsets);
+ m_row_allocator->deallocate(row);
+}
+
+Row* Table::create_row()
+{
+ if (!m_row_allocator) {
+ m_rsize = sizeof(Row) - ROW_DUMMY_SIZE; // exclude the dummy
+ m_dsize = m_curpos;
+ m_row_allocator = new Allocator<Row>(m_rsize + m_dsize, 10000);
+ }
+
+ Row* r = m_row_allocator->allocate();
+ r->zero_text_columns(m_text_column_offsets);
+ return r;
+}
+
+void Table::add_row(Row* row)
+{
+ m_rows.push_back(row);
+}
+
+int g_comp = 0;
+
+void Ordering_terms::compile(const std::vector<Table*>& tables, const std::vector<int>& search_order, Query& q)
+{
+ for (auto it = m_terms.begin(); it != m_terms.end(); it++) {
+ OP* op = it->m_op;
+ it->m_op = op->compile(tables, search_order, q);
+ }
+}
+
+class Sorter {
+public:
+ Sorter(Ordering_terms& order)
+ : m_order(order)
+ {
+ }
+ bool operator()(Row* ia, Row* ib)
+ {
+ // this works under the assumption that the ordering terms have
+ // been compiled with only one table so the row index i is 0
+ Row** ia_rows = &ia;
+ Row** ib_rows = &ib;
+
+ for (auto it = m_order.m_terms.begin(); it != m_order.m_terms.end(); it++) {
+ g_comp++;
+
+ OP* op = it->m_op;
+ op->evaluate(it->m_asc ? ia_rows : ib_rows, m_a);
+ op->evaluate(it->m_asc ? ib_rows : ia_rows, m_b);
+ int res = m_a.cmp(m_b);
+ if (res < 0)
+ return true;
+ if (res > 0)
+ return false;
+ }
+ return false;
+ }
+ bool eq(Row* ia, Row* ib)
+ {
+ Row** ia_rows = &ia;
+ Row** ib_rows = &ib;
+
+ for (auto it = m_order.m_terms.begin(); it != m_order.m_terms.end(); it++) {
+ g_comp++;
+
+ OP* op = it->m_op;
+ op->evaluate(it->m_asc ? ia_rows : ib_rows, m_a);
+ op->evaluate(it->m_asc ? ib_rows : ia_rows, m_b);
+ int res = m_a.cmp(m_b);
+ if (res != 0)
+ return false;
+ }
+ return true;
+ }
+ int cmp(Row* ia, Row* ib)
+ {
+ Row** ia_rows = &ia;
+ Row** ib_rows = &ib;
+
+ for (auto it = m_order.m_terms.begin(); it != m_order.m_terms.end(); it++) {
+ g_comp++;
+
+ OP* op = it->m_op;
+ op->evaluate(it->m_asc ? ia_rows : ib_rows, m_a);
+ op->evaluate(it->m_asc ? ib_rows : ia_rows, m_b);
+ int res = m_a.cmp(m_b);
+ if (res != 0)
+ return res;
+ }
+ return 0;
+ }
+ Ordering_terms& m_order;
+ Variant m_a, m_b;
+};
+
+struct Stki {
+ int s;
+ int l;
+ int b;
+};
+
+struct Spkt {
+ Variant cache;
+ int row;
+};
+
+class Per_sort {
+public:
+ struct Tlink {
+ Tlink()
+ : m_next(0)
+ , row(0)
+ , m_eq(0)
+ {
+ }
+
+ Tlink* get_eq() { return m_eq; }
+ void reset()
+ {
+ m_next = 0;
+ row = 0;
+ m_eq = 0;
+ }
+ void add_eq(Tlink* o)
+ {
+ if (!o->m_eq) {
+ // add single
+ if (!m_eq) {
+ // as first
+ m_eq = o;
+ o->m_eq_last = o;
+ return;
+ }
+ // to list
+ m_eq->m_eq_last->m_eq = o;
+ m_eq->m_eq_last = o;
+ return;
+ } else {
+ // add list
+ if (!m_eq) {
+ // as first
+ m_eq = o;
+ o->m_eq_last = o->m_eq->m_eq_last;
+ return;
+ }
+ // to list
+ m_eq->m_eq_last->m_eq = o;
+ m_eq->m_eq_last = o->m_eq->m_eq_last;
+ }
+ }
+ union {
+ Tlink* m_next;
+ Tlink* m_eq_last;
+ };
+ Row* row;
+ Variant cache;
+
+ private:
+ Tlink* m_eq;
+ };
+ struct List {
+ public:
+ void reset()
+ {
+ m_size = 0;
+ m_fl[0] = 0;
+ m_fl[1] = 0;
+ }
+ int m_size;
+ Tlink* m_fl[2];
+ };
+ Per_sort(Table& table, Ordering_terms& order)
+ : m_sorter(order)
+ , m_table(table)
+ {
+ m_escalate_sort = order.m_terms.size() > 1;
+ m_asc = order.m_terms.begin()->m_asc ? 1 : -1;
+ m_op = order.m_terms.begin()->m_op;
+ memset(m_groups, 0, sizeof(m_groups));
+ m_current.reset();
+ }
+ inline bool add_to_list(List& list, Tlink* t)
+ {
+ if (list.m_fl[0] == 0) {
+ list.m_fl[0] = list.m_fl[1] = t;
+ list.m_size = 1;
+ return true;
+ }
+ list.m_fl[1]->m_next = t;
+ list.m_fl[1] = t;
+ list.m_size++;
+ return true;
+ }
+ inline bool insert_into_list(List& list, Tlink* t)
+ {
+ if (list.m_fl[0] == 0) {
+ list.m_fl[0] = list.m_fl[1] = t;
+ list.m_size = 1;
+ return true;
+ }
+ int cmp0 = cmp(t, list.m_fl[0]);
+ if (cmp0 == 0) {
+ list.m_fl[0]->add_eq(t);
+ return true;
+ }
+ if (cmp0 < 0) {
+ t->m_next = list.m_fl[0];
+ list.m_fl[0] = t;
+ list.m_size++;
+ return true;
+ }
+ int cmp1 = cmp(t, list.m_fl[1]);
+ if (cmp1 == 0) {
+ list.m_fl[1]->add_eq(t);
+ return true;
+ }
+ if (cmp1 > 0) {
+ list.m_fl[1]->m_next = t;
+ list.m_fl[1] = t;
+ list.m_size++;
+ return true;
+ }
+ return false;
+ }
+ int cmp(Tlink* a, Tlink* b)
+ {
+ int cmp = a->cache.cmp(b->cache) * m_asc;
+ if (cmp != 0 || !m_escalate_sort)
+ return cmp;
+ return m_sorter.cmp(a->row, b->row);
+ }
+ inline void add(Tlink* t)
+ {
+ if (!insert_into_list(m_current, t)) {
+ insert_list(m_current);
+ m_current.reset();
+ insert_into_list(m_current, t);
+ }
+ }
+ inline void insert_list(List& l)
+ {
+ unsigned int size = l.m_size;
+ int offs = 0;
+ size >>= 1;
+ while (size != 0) {
+ offs++;
+ size >>= 1;
+ }
+ if (m_groups[offs].m_size != 0) {
+ List m = merge(m_groups[offs], l);
+ m_groups[offs].reset();
+ insert_list(m);
+ } else
+ m_groups[offs] = l;
+ }
+ List merge(List& l1, List& l2)
+ {
+ List r;
+ r.reset();
+ Tlink* a = l1.m_fl[0];
+ Tlink* b = l2.m_fl[0];
+ if (!a)
+ return l2;
+ if (!b)
+ return l1;
+
+ int size = l1.m_size + l2.m_size;
+
+ while (a && b) {
+ int c = cmp(a, b);
+ if (c == 0) {
+ Tlink* a2 = a;
+ Tlink* b2 = b;
+ a = a->m_next;
+ b = b->m_next;
+ a2->m_next = 0;
+ b2->m_next = 0;
+ a2->add_eq(b2);
+ add_to_list(r, a2);
+ size--;
+ } else if (c < 0) {
+ Tlink* a2 = a;
+ a = a->m_next;
+ a2->m_next = 0;
+ add_to_list(r, a2);
+ } else {
+ Tlink* b2 = b;
+ b = b->m_next;
+ b2->m_next = 0;
+ add_to_list(r, b2);
+ }
+ }
+ if (a) {
+ r.m_fl[1]->m_next = a;
+ r.m_fl[1] = l1.m_fl[1];
+ }
+ if (b) {
+ r.m_fl[1]->m_next = b;
+ r.m_fl[1] = l2.m_fl[1];
+ }
+
+ l1.reset();
+ l2.reset();
+ r.m_size = size;
+ return r;
+ }
+
+ void sort()
+ {
+ int table_size = (int)m_table.m_rows.size();
+ if (table_size <= 1)
+ return;
+ auto it = m_table.m_rows.begin();
+ Tlink* links = new Tlink[table_size];
+
+ int i;
+ for (i = 0; i < table_size; i++) {
+ Tlink& r = links[i];
+ r.reset();
+ r.row = *it;
+ // &row works under the assumption that m_op has been compiled with
+ // this table only so row index is 0
+ m_op->evaluate(&r.row, r.cache);
+ it++;
+ add(&r);
+ }
+ if (m_current.m_size)
+ insert_list(m_current);
+ List result;
+ result.reset();
+ for (i = 0; i < sizeof(m_groups) / sizeof(List); i++) {
+ if (m_groups[i].m_size > 0) {
+ result = merge(result, m_groups[i]);
+ m_groups[i].reset();
+ }
+ }
+ Tlink* p = result.m_fl[0];
+ it = m_table.m_rows.begin();
+ while (p) {
+ *it++ = p->row;
+ Tlink* e = p->get_eq();
+ while (e) {
+ *it++ = e->row;
+ e = e->get_eq();
+ }
+ p = p->m_next;
+ };
+
+ delete[] links;
+ }
+
+ OP* m_op;
+ Sorter m_sorter;
+ bool m_escalate_sort;
+ int m_asc;
+ Table& m_table;
+ List m_groups[32];
+ List m_current;
+};
+
+void Table::per_sort(Ordering_terms& order)
+{
+ Per_sort sort(*this, order);
+
+ sort.sort();
+
+ return;
+}
+
+void Table::merge_sort(Ordering_terms& order)
+{
+ Sorter sorter(order);
+
+ bool escalate_sort = order.m_terms.size() > 1;
+ int asc = order.m_terms.begin()->m_asc ? 1 : -1;
+ OP* op = order.m_terms.begin()->m_op;
+
+ int table_size = (int)m_rows.size();
+ if (table_size <= 1)
+ return;
+ Row** row_ptrs = new Row*[table_size];
+ Spkt* spktpool = new Spkt[table_size * 2];
+ Spkt* rows[2];
+ rows[0] = spktpool;
+ rows[1] = &spktpool[table_size];
+ auto it = m_rows.begin();
+
+ int i = 0;
+ Spkt* r = rows[0];
+ for (i = 0; i < table_size; i += 2) {
+
+ row_ptrs[i] = *it++;
+ r[i].row = i;
+ // &row works under the assumption that m_op has been compiled with
+ // this table only so row index is 0
+ op->evaluate(&row_ptrs[r[i].row], r[i].cache);
+ if (i + 1 < table_size) {
+ row_ptrs[i + 1] = *it++;
+ r[i + 1].row = i + 1;
+ op->evaluate(&row_ptrs[r[i + 1].row], r[i + 1].cache);
+ }
+ }
+ int swap = 0;
+
+ Stki stack[64];
+ Stki* sp = stack;
+ sp->s = 0;
+ sp->l = 2;
+ sp->b = 1;
+ rows[1][0] = rows[0][1];
+ rows[1][1] = rows[0][0];
+
+ sp--;
+
+ int npos = 0;
+ while (true) {
+ int start, len;
+ if (sp > stack && sp->l == sp[-1].l) {
+ // two equal size -> merge
+
+ len = sp->l <<= 1;
+ start = sp[-1].s;
+ swap = sp[-1].b;
+ sp--;
+ sp--;
+ } else {
+ start = npos;
+ npos += 2;
+ swap = 0;
+ len = 2;
+ }
+
+ int cnt = start + len > table_size ? table_size - start : len;
+ Spkt* s = rows[swap];
+ Spkt* d = rows[1 - swap];
+ if (cnt > 0) {
+ int p1 = start;
+ int p2 = start + (len >> 1);
+ int l1 = len >> 1;
+ int l2 = l1;
+ if (p1 + l1 > table_size)
+ l1 = table_size - p1;
+ if (p2 + l2 > table_size)
+ l2 = table_size - p2;
+
+ i = start;
+ while (cnt-- > 0) {
+ if (l1 <= 0) {
+ d[i++] = s[p2++];
+ } else if (l2 <= 0) {
+ d[i++] = s[p1++];
+ } else {
+ int cmp = s[p1].cache.cmp(s[p2].cache) * asc;
+ if (cmp < 0 || (cmp == 0 && escalate_sort && sorter(row_ptrs[s[p1].row], row_ptrs[s[p2].row]))) {
+ l1--;
+ d[i++] = s[p1++];
+ } else {
+ l2--;
+ d[i++] = s[p2++];
+ }
+ }
+ }
+ }
+ ++sp;
+ sp->l = len;
+ sp->s = start;
+ sp->b = 1 - swap;
+ if (len > table_size) {
+ for (it = m_rows.begin(); it != m_rows.end(); it++) {
+ *it = row_ptrs[(d++)->row];
+ }
+
+ break;
+ }
+ }
+ delete[] row_ptrs;
+ delete[] spktpool;
+ return;
+}
+
+void Table::limit(int limit, int offset)
+{
+ int count = 0;
+ auto e = m_rows.end();
+ for (auto it = m_rows.begin(); it != m_rows.end(); it++) {
+ if (e != m_rows.end()) {
+ delete_row(*e);
+ m_rows.erase(e);
+ e = m_rows.end();
+ }
+
+ int l = count++;
+ if (l < offset || l >= offset + limit) {
+ e = it;
+ }
+ }
+ if (e != m_rows.end()) {
+ delete_row(*e);
+ m_rows.erase(e);
+ }
+}
+
+void printrep(int n, char c)
+{
+ if (n >= 3000)
+ return;
+ char buf[3000];
+ int i;
+ for (i = 0; i < n; i++)
+ buf[i] = c;
+ buf[i] = 0;
+ printf("%s", buf);
+}
+
+void Table::xml()
+{
+ g_output.reset();
+ int cols = (int)m_cols.size();
+
+ g_output.add_string("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+ g_output.add_string("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
+ g_output.add_string("<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\n<head>\n <title>");
+ g_output.add_string(m_name.c_str());
+ g_output.add_string("</title>\n");
+ g_output.add_string("<style type=\"text/css\">\n");
+ g_output.add_string(" th.int { color: #0F0C00; }\n");
+ g_output.add_string(" th.float { color: #0F0900; }\n");
+ g_output.add_string(" th.text { color: #0F0600; }\n");
+ g_output.add_string(" th.bool { color: #0C0900; }\n");
+ g_output.add_string("</style>\n");
+ g_output.add_string("</head>\n");
+ g_output.add_string("<body>\n");
+ g_output.add_string("<table>\n");
+
+ g_output.add_string("<tr>");
+
+ for (int i = 0; i < cols; i++) {
+ if (m_cols[i]->m_hidden)
+ continue;
+
+ const char* t = "";
+ switch (m_cols[i]->m_type) {
+ case (Coltype::_float):
+ t = "float";
+ break;
+ case (Coltype::_int):
+ t = "int";
+ break;
+ case (Coltype::_text):
+ t = "text";
+ break;
+ case (Coltype::_bool):
+ t = "bool";
+ break;
+ }
+ g_output.add_string("<th class=\"");
+ g_output.add_string(t);
+ g_output.add_string("\">");
+ g_output.add_string(m_cols[i]->m_name.c_str());
+ g_output.add_string("</th>");
+ }
+ g_output.add_string("</tr>\n");
+ for (auto it = m_rows.begin(); it != m_rows.end(); it++) {
+ g_output.add_string("<tr>");
+ Row* r = *it;
+
+ Variant v;
+ for (int i = 0; i < cols; i++) {
+ Column* c = m_cols[i];
+
+ if (c->m_hidden)
+ continue;
+
+ int offset = c->m_offset;
+
+ static const int bufsize = 100;
+ char buf[bufsize];
+
+ g_output.add_string("<td>");
+ switch (c->m_type) {
+ case Coltype::_bool:
+ g_output.add_string(r->access_column<bool_column>(offset) ? "1" : "0");
+ break;
+ case Coltype::_int:
+ snprintf(buf, bufsize, "%i", r->access_column<int_column>(offset));
+ g_output.add_string(buf);
+ break;
+ case Coltype::_float:
+ snprintf(buf, bufsize, "%g", r->access_column<float_column>(offset));
+ g_output.add_string(buf);
+ break;
+ case Coltype::_text:
+ g_output.add_string(r->access_column<text_column>(offset)->data);
+ break;
+ }
+ g_output.add_string("</td> ");
+ }
+
+ g_output.add_string("</tr>\n");
+ }
+ g_output.add_string("</table>\n");
+ g_output.add_string("</body>\n");
+ g_output.add_string("</html>\n");
+ g_output.print();
+}
+
+void Table::json(bool trailing_comma)
+{
+ g_output.reset();
+ int cols = (int)m_cols.size();
+
+ g_output.add_string(" {\n ");
+
+ g_output.add_q_string("table_name");
+ g_output.add_string(": ");
+ g_output.add_q_string(m_name.c_str());
+ g_output.add_string(",\n ");
+
+ g_output.add_q_string("query");
+ g_output.add_string(": ");
+ g_output.add_q_string(m_qstring.c_str());
+ g_output.add_string(",\n ");
+
+ g_output.add_q_string("head");
+ g_output.add_string(": [");
+
+ bool append_comma = false;
+ for (int i = 0; i < cols; i++) {
+ if (m_cols[i]->m_hidden)
+ continue;
+
+ if (append_comma)
+ g_output.add_string(",\n");
+ else
+ g_output.add_string("\n");
+ append_comma = true;
+ g_output.add_string(" { ");
+ g_output.add_q_string("name");
+ g_output.add_string(": ");
+ g_output.add_q_string(m_cols[i]->m_name.c_str());
+ g_output.add_string(",");
+ g_output.add_q_string("type");
+ g_output.add_string(": ");
+ const char* t = "";
+ switch (m_cols[i]->m_type) {
+ case (Coltype::_float):
+ t = "float";
+ break;
+ case (Coltype::_int):
+ t = "int";
+ break;
+ case (Coltype::_text):
+ t = "text";
+ break;
+ case (Coltype::_bool):
+ t = "bool";
+ break;
+ }
+ g_output.add_q_string(t);
+ g_output.add_string(" }");
+ }
+ g_output.add_string("\n ],\n ");
+ g_output.add_q_string("data");
+ g_output.add_string(": [");
+ bool outer_comma = false;
+ for (auto it = m_rows.begin(); it != m_rows.end(); it++) {
+ if (outer_comma)
+ g_output.add_string(",\n [");
+ else
+ g_output.add_string("\n [");
+ outer_comma = true;
+ bool comma = false;
+ Row* r = *it;
+
+ for (int i = 0; i < cols; i++) {
+ Column* c = m_cols[i];
+
+ if (c->m_hidden)
+ continue;
+
+ if (comma)
+ g_output.add_string(",");
+
+ comma = true;
+
+ int offset = c->m_offset;
+ static const int bufsize = 100;
+ char buf[bufsize];
+
+ switch (c->m_type) {
+ case Coltype::_bool:
+ g_output.add_string(r->access_column<bool_column>(offset) ? "1" : "0");
+ break;
+ case Coltype::_int:
+ snprintf(buf, bufsize, "%i", r->access_column<int_column>(offset));
+ g_output.add_string(buf);
+ break;
+ case Coltype::_float:
+ snprintf(buf, bufsize, "%g", r->access_column<float_column>(offset));
+ g_output.add_string(buf);
+ break;
+ case Coltype::_text:
+ g_output.add_q_string(r->access_column<text_column>(offset)->data);
+ break;
+ }
+ }
+
+ g_output.add_string("]");
+ }
+ g_output.add_string("\n ]\n");
+ if (trailing_comma) {
+ g_output.add_string(" },\n");
+ } else {
+ g_output.add_string(" }\n");
+ }
+ g_output.print();
+}
+
+std::string csv_qoute_string(const std::string& s)
+{
+ std::string r = "\"";
+ int len = s.length();
+ for (int i = 0; i < len; i++) {
+ if (s[i] == '"') {
+ r += '"';
+ }
+ r += s[i];
+ }
+ r += '"';
+ return r;
+}
+
+void Table::csv(bool format)
+{
+ int cols = (int)m_cols.size();
+ std::vector<int> col_len(cols);
+
+ for (int i = 0; i < cols; i++)
+ col_len[i] = 0;
+ int max = 0;
+ char* tmp = 0;
+ if (format) {
+ for (auto it = m_rows.begin(); it != m_rows.end(); it++) {
+ Row* r = *it;
+
+ for (int i = 0; i < cols; i++) {
+ Column* c = m_cols[i];
+
+ if (c->m_hidden)
+ continue;
+
+ int len = 0;
+
+ int offset = c->m_offset;
+ static const int bufsize = 100;
+ char buf[bufsize];
+
+ switch (c->m_type) {
+ case Coltype::_bool:
+ len = 1;
+ break;
+ case Coltype::_int:
+ snprintf(buf, bufsize, "%i", r->access_column<int_column>(offset));
+ len = strlen(buf);
+ break;
+ case Coltype::_float:
+ snprintf(buf, bufsize, "%g", r->access_column<float_column>(offset));
+ len = strlen(buf);
+ break;
+ case Coltype::_text:
+ len = csv_qoute_string(r->access_column<text_column>(offset)->data).length();
+ break;
+ }
+ len++;
+ if (len > col_len[i])
+ col_len[i] = len;
+ if (len > max)
+ max = len;
+ }
+ }
+ for (int i = 0; i < cols; i++) {
+ if (m_cols[i]->m_hidden)
+ continue;
+
+ int l = csv_qoute_string(m_cols[i]->m_name).length();
+ l++;
+ if (l > col_len[i])
+ col_len[i] = l;
+ if (l > max)
+ max = l;
+ }
+
+ tmp = new char[max + 1];
+ for (int i = 0; i < max; i++)
+ tmp[i] = 32;
+ tmp[max] = 0;
+ }
+
+ for (int i = 0; i < cols; i++) {
+ if (m_cols[i]->m_hidden)
+ continue;
+
+ printf("%s", csv_qoute_string(m_cols[i]->m_name).c_str());
+ if (i < cols - 1) {
+ size_t pos = csv_qoute_string(m_cols[i]->m_name).length() + max - col_len[i] + 1;
+ if (format && pos < max) {
+ printf("%s,", &tmp[pos]);
+ } else {
+ printf(",");
+ }
+ }
+ }
+ printf("\n");
+ for (auto it = m_rows.begin(); it != m_rows.end(); it++) {
+ Row* r = *it;
+
+ for (int i = 0; i < cols; i++) {
+ Column* c = m_cols[i];
+
+ if (c->m_hidden)
+ continue;
+
+ int offset = c->m_offset;
+ static const int bufsize = 100;
+ char buf[bufsize];
+
+ std::string out;
+
+ switch (c->m_type) {
+ case Coltype::_bool:
+ out = r->access_column<bool_column>(offset) ? "1" : "0";
+ break;
+ case Coltype::_int:
+ snprintf(buf, bufsize, "%i", r->access_column<int_column>(offset));
+ out = buf;
+ break;
+ case Coltype::_float:
+ snprintf(buf, bufsize, "%g", r->access_column<float_column>(offset));
+ out = buf;
+ break;
+ case Coltype::_text:
+ out = csv_qoute_string(r->access_column<text_column>(offset)->data);
+ break;
+ }
+
+ fputs(out.c_str(), stdout);
+ if (i < cols - 1) {
+ if (format) {
+ printf("%s,", &tmp[out.length() + max - col_len[i] + 1]);
+ } else {
+ printf(",");
+ }
+ }
+ }
+
+ printf("\n");
+ }
+ delete[] tmp;
+}
+
+void Table::dump()
+{
+ int cols = (int)m_cols.size();
+ int width = 25;
+ char fmti[40];
+ snprintf(fmti, sizeof(fmti) - 1, "%%%dd |", width);
+ fmti[39] = 0;
+ char fmtd[40];
+ snprintf(fmtd, sizeof(fmtd) - 1, "%%%dg |", width);
+ fmtd[39] = 0;
+ char fmts[40];
+ snprintf(fmts, sizeof(fmts) - 1, "%%%ds |", width);
+ fmts[39] = 0;
+
+ printf("Table::dump() table:%s cols:%d\n", m_name.c_str(), cols);
+ printrep((width + 2) * cols + 1, '-');
+ printf("\n");
+ printf("|");
+ for (int i = 0; i < cols; i++)
+ printf(fmti, m_cols[i]->m_type);
+ printf("\n");
+ printf("|");
+ for (int i = 0; i < cols; i++)
+ printf(fmts, m_cols[i]->m_name.c_str());
+ printf("\n");
+ printrep((width + 2) * cols + 1, '*');
+ printf("\n");
+ for (auto it = m_rows.begin(); it != m_rows.end(); it++) {
+ printf("|");
+ Row* r = *it;
+
+ for (int i = 0; i < cols; i++) {
+ Column* c = m_cols[i];
+ int offset = c->m_offset;
+
+ switch (c->m_type) {
+ case Coltype::_bool:
+ printf(fmts, r->access_column<bool_column>(offset) ? "1" : "0");
+ break;
+ case Coltype::_int:
+ printf(fmti, r->access_column<int_column>(offset));
+ break;
+ case Coltype::_float:
+ printf(fmtd, r->access_column<float_column>(offset));
+ break;
+ case Coltype::_text:
+ printf(fmts, r->access_column<text_column>(offset));
+ break;
+ }
+ }
+
+ printf("\n");
+ }
+ printrep((width + 2) * cols + 1, '-');
+ printf("\n");
+}
+
+class Parser {
+private:
+ Token::Type m_last;
+
+public:
+ std::list<Token> m_tokens;
+ typedef std::list<Token>::iterator Lit;
+
+ Parser()
+ {
+ m_last = Token::_invalid;
+ }
+
+ void push(Token::Type type, const char* string)
+ {
+ if (!(type == Token::_semicolon && m_last == Token::_semicolon))
+ m_tokens.push_back(Token(type, string));
+ m_last = type;
+ }
+
+ void dump()
+ {
+ for (auto it = m_tokens.begin(); it != m_tokens.end(); it++) {
+ printf("Type %d: %s\n", it->get_type(), it->get_token());
+ }
+ }
+
+ bool analyze(Query& q)
+ {
+ auto it = m_tokens.begin();
+ bool ok = true;
+ while (ok) {
+ ok = false;
+ if (get_sample_stmt(q, it))
+ ok = true;
+
+ if (get_select_stmt(q, it))
+ ok = true;
+ }
+
+ return true;
+ }
+
+ bool get_sample_stmt(Query& q, Lit& i_iter)
+ {
+ Lit it = i_iter;
+ if (!is(it, Token::_label, "sample"))
+ return false;
+ it++;
+ if (!is(it, Token::_number))
+ return false;
+ int sample = atoi(it->get_token());
+ it++;
+ if (!is(it, Token::_semicolon))
+ return false;
+
+ it++;
+ q.m_sample = sample;
+ i_iter = it;
+ return true;
+ }
+
+ bool get_select_stmt(Query& q, Lit& i_iter)
+ {
+ Lit it = i_iter;
+ if (!get_select_core(q, it)) {
+ return false;
+ }
+
+ get_from(q, it);
+ get_where(q, it);
+ get_group_by(q, it);
+ get_order_by(q, it);
+ get_limit(q, it);
+ get_as(q, it);
+ if (!is(it, Token::_semicolon)) {
+ throw Error("Expected ';' but found '%s' !", it->get_token());
+ }
+ it++;
+ i_iter = it;
+ return true;
+ }
+
+ bool is(Lit& it, Token::Type type, const char* str = 0)
+ {
+ if (!str)
+ return (it->get_type() == type);
+
+ return (it->get_type() == type && cmpi(it->get_token(), str));
+ }
+
+ OP* get_result_column(std::list<Token>::iterator& it)
+ {
+ OP* res = 0;
+ Lit save = it;
+ if (is(it, Token::_op, "*")) {
+ it++;
+ return new OP(Token(Token::_column, "*"));
+ }
+ if (it->get_type() == Token::_label) {
+ std::string table = it->get_token();
+ it++;
+ if (is(it, Token::_op, ".")) {
+ it++;
+ if (is(it, Token::_op, "*")) {
+ it++;
+ std::string c = table + ".*";
+ return new OP(Token(Token::_column, c.c_str()));
+ }
+ it = save;
+ return 0;
+ }
+ }
+ it = save;
+ if ((res = get_expr(it, 0))) {
+ save = it;
+
+ if (is(it, Token::_label, "as")) {
+ it++;
+ if (is(it, Token::_label)) {
+ res->m_name = it->get_token();
+ it++;
+ } else
+ it = save;
+ }
+ }
+ return res;
+
+ // check for valid table
+ }
+
+ bool get_select_core(Query& q, Lit& it)
+ {
+ Lit save = it;
+ if (!is(it, Token::_label, "select"))
+ return false;
+ it++;
+ bool again = true;
+ bool success = true;
+ while (again) {
+ OP* op;
+ if ((op = get_result_column(it))) {
+ q.m_select.push_back(op);
+ } else {
+ success = false;
+ break;
+ }
+
+ if (is(it, Token::_op, ","))
+ it++;
+ else
+ again = false;
+ }
+ if (success)
+ return true;
+
+ it = save;
+ return false;
+ }
+
+ bool get_ordering_terms(Ordering_terms& ordering, std::list<Token>::iterator& it)
+ {
+ OP* op;
+ while ((op = get_expr(it, 0))) {
+ bool asc = true;
+ if (it->get_type() == Token::_label) {
+ if (cmpi(it->get_token(), "asc")) {
+ // default
+ } else if (cmpi(it->get_token(), "desc")) {
+ asc = false;
+ } else if (cmpi(it->get_token(), "collate")) {
+ throw Error("unhandled option:collate");
+ } else {
+ ordering.m_terms.push_back(Ordering_terms::OP_dir(op, asc));
+ break;
+ }
+
+ it++;
+ }
+
+ ordering.m_terms.push_back(Ordering_terms::OP_dir(op, asc));
+
+ if (!is(it, Token::_op, ","))
+ break;
+ it++;
+ }
+ return true;
+ }
+ bool get_group_by(Query& q, Lit& it)
+ {
+ if (!is(it, Token::_label, "group")) {
+ return true;
+ }
+ it++;
+ if (!is(it, Token::_label, "by"))
+ return false;
+ it++;
+ bool res = get_ordering_terms(q.m_group_by, it);
+ get_having(q, it);
+ return res;
+ }
+ bool get_as(Query& q, Lit& it)
+ {
+ if (!is(it, Token::_label, "as")) {
+ return true;
+ }
+ it++;
+ if (!is(it, Token::_label))
+ return false;
+ q.m_result->m_name = it->get_token();
+ it++;
+ return true;
+ }
+
+ bool get_order_by(Query& q, Lit& it)
+ {
+ if (!is(it, Token::_label, "order")) {
+ return true;
+ }
+ it++;
+ if (!is(it, Token::_label, "by"))
+ return false;
+ it++;
+ return get_ordering_terms(q.m_order_by, it);
+ }
+
+ bool get_limit(Query& q, Lit& it)
+ {
+ Lit save = it;
+ if (!is(it, Token::_label, "limit")) {
+ return true;
+ }
+ it++;
+ if (!is(it, Token::_number)) {
+ it = save;
+ throw Error("non numeric operand to limit");
+ }
+ q.m_limit = atoi(it->get_token());
+ it++;
+ save = it;
+
+ if (!is(it, Token::_label, "offset")) {
+ return true;
+ }
+ it++;
+ if (!is(it, Token::_number)) {
+ it = save;
+ throw Error("non numeric operand to offset");
+ }
+ q.m_offset = atoi(it->get_token());
+ it++;
+ return true;
+ }
+
+ bool get_having(Query& q, Lit& it)
+ {
+ Lit save = it;
+ if (!is(it, Token::_label, "having"))
+ return true;
+ it++;
+ OP* res = 0;
+ if ((res = get_expr(it, 0))) {
+ q.m_having = res;
+ return true;
+ }
+ it = save;
+ return false;
+ }
+
+ bool get_where(Query& q, Lit& it)
+ {
+ Lit save = it;
+ if (!is(it, Token::_label, "where"))
+ return true;
+ it++;
+ OP* res = 0;
+ if ((res = get_expr(it, 0))) {
+ q.m_where = res;
+ return true;
+ }
+ it = save;
+ return false;
+ }
+
+ bool get_from(Query& q, Lit& it)
+ {
+ if (!is(it, Token::_label, "from"))
+ return false;
+ it++;
+ if (it->get_type() == Token::_label) {
+ const char* name = it->get_token();
+ if (get_packet_handler(name)) {
+ q.m_from_name = name;
+ it++;
+ return true;
+ } else
+ throw Error("Error in from statement, table '%s' not found", name);
+ }
+ throw Error("Error in from statement");
+ }
+
+ int get_stack_precedence(std::stack<OP*>& operator_stack)
+ {
+ int pre = 0;
+ if (!operator_stack.empty())
+ pre = operator_stack.top()->m_precedence;
+ return pre;
+ }
+
+ // using The shunting yard algorithm
+ OP* get_expr(Lit& it, int rec)
+ {
+ std::stack<OP*> operator_stack;
+ std::stack<OP*> operand_stack;
+ bool success = true;
+ bool expect_expr = true;
+ while (success) {
+ success = false;
+ Lit save = it;
+ Lit next = it;
+ next++;
+
+ // match literal
+ if (expect_expr && is_literal(it)) {
+ OP* op = new OP(*it);
+ it++;
+ operand_stack.push(op);
+ success = true;
+ expect_expr = false;
+ continue;
+ }
+ // match literal
+ if (expect_expr && is_unary_op(it)) {
+ OP* op = new OP(*it);
+ op->set_type(Token::_uop);
+ it++;
+ if (!(op->m_right = get_expr(it, rec + 1)))
+ throw Error("Got unary '%s' but could not parse following expression", op->get_token());
+
+ operand_stack.push(op);
+ success = true;
+ expect_expr = false;
+ continue;
+ }
+ // match function-name (
+ if (expect_expr && is(it, Token::_label) && is(next, Token::_paren, "(")) {
+ OP* func = new OP(*it);
+ func->set_type(Token::_function);
+ it++;
+ it++;
+ if (is(it, Token::_op, "*")) {
+ it->set_type(Token::_number);
+ it->set_token("1");
+ }
+ func->m_param[0] = get_expr(it, rec + 1);
+ if (!func->m_param[0])
+ throw Error("Missing operand to function");
+ operand_stack.push(func);
+ int n = 1;
+ while (n < OP::max_param() && is(it, Token::_op, ",")) {
+
+ it++;
+ func->m_param[n++] = get_expr(it, rec + 1);
+ }
+ if (!is(it, Token::_paren, ")"))
+ throw Error("Expected ) after %s", func->get_token());
+ it++;
+ expect_expr = false;
+ success = true;
+ continue;
+ }
+ // match [[databasename .] table-name . ] column name
+ if (expect_expr && is(it, Token::_label)) {
+ OP* op = new OP(*it);
+ it++;
+ success = true;
+ op->set_type(Token::_column);
+ operand_stack.push(op);
+ expect_expr = false;
+ continue;
+ }
+ // match ( expr )
+ if (!expect_expr && is(it, Token::_paren, ",")) {
+ break;
+ }
+ if (!expect_expr && is(it, Token::_paren, ")")) {
+ break;
+ }
+ if (expect_expr && is(it, Token::_paren, "(")) {
+ it++;
+ OP* op = 0;
+ if ((op = get_expr(it, rec + 1))) {
+ if (is(it, Token::_paren, ")")) {
+ it++;
+ operand_stack.push(op);
+ expect_expr = false;
+ success = true;
+ continue;
+ }
+ throw Error("Error in expression no )");
+ }
+ it = save;
+ throw Error("Error in expression");
+ }
+ // bin op
+ if (!expect_expr && is(it, Token::_op, "is") && is(next, Token::_op, "not")) {
+ it++;
+ it->set_token("is not");
+ }
+ // bin op
+ if (!expect_expr && is(it, Token::_op, "not") && is(next, Token::_op, "like")) {
+ it++;
+ it->set_token("not like");
+ }
+ if (!expect_expr && is(it, Token::_op) && OP::is_binary(it->get_token())) {
+ OP* bop = new OP(*it);
+ while (bop->m_precedence <= get_stack_precedence(operator_stack)) {
+
+ OP* stack_op = operator_stack.top();
+ operator_stack.pop();
+ if (operand_stack.size() >= 2) {
+
+ OP* stk1 = operand_stack.top();
+ stack_op->m_right = stk1;
+ operand_stack.pop();
+ OP* stk2 = operand_stack.top();
+ stack_op->m_left = stk2;
+ operand_stack.pop();
+ operand_stack.push(stack_op);
+ }
+ }
+
+ operator_stack.push(bop);
+ it++;
+ success = true;
+ expect_expr = true;
+ continue;
+ }
+ }
+ while (!operator_stack.empty()) {
+ OP* bop = operator_stack.top();
+ operator_stack.pop();
+ if (bop) {
+ if (operand_stack.size() >= 2) {
+ OP* stk = operand_stack.top();
+ bop->m_right = stk;
+ operand_stack.pop();
+ OP* stk2 = operand_stack.top();
+ bop->m_left = stk2;
+ operand_stack.pop();
+ operand_stack.push(bop);
+ }
+ }
+ }
+ if (operand_stack.size() == 0)
+ return 0;
+ return operand_stack.top();
+ }
+
+ bool is_unary_op(Lit& it)
+ {
+ if (is(it, Token::_op, "-"))
+ return true;
+ if (is(it, Token::_op, "+"))
+ return true;
+ if (is(it, Token::_op, "~"))
+ return true;
+ if (is(it, Token::_op, "not"))
+ return true;
+ return false;
+ }
+ bool is_literal(Lit& it)
+ {
+ if (it->get_type() == Token::_number || it->get_type() == Token::_string)
+ return true;
+ if (it->get_type() == Token::_label && cmpi(it->get_token(), "null"))
+ return true;
+ return false;
+ }
+};
+
+class Lexer {
+public:
+ Lexer(Parser& p)
+ : m_parser(p)
+ , num_state(_nan)
+ {
+ }
+
+ Parser& m_parser;
+
+ enum State {
+ _unknown,
+ _white,
+ _label,
+ _number,
+ _op,
+ _paren,
+ _string
+ };
+ bool is_white(const char c) { return (c == ' ' || c == 9 || c == 10 || c == 13); }
+ bool is_char(const char c) { return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')); }
+ bool is_num(const char c) { return ((c >= '0' && c <= '9')); }
+ bool is_op(const char c) { return (c == '*' || c == ',' || c == '=' || c == '<' || c == '>' || c == '/' || c == '|' || c == '%' || c == '+' || c == '-' || c == '&' || c == '~' || c == '!'); }
+ bool is_paren(const char c) { return (c == '(' || c == ')'); }
+ bool is_termination(const char c) { return (c == 0 || c == ';'); }
+ bool is_quote(const char c) { return (c == '\''); }
+ bool is_label_start(const char c) { return (is_char(c)); }
+ bool is_label_part(const char c) { return (is_num(c) || is_char(c) || c == '_'); }
+
+ bool lex(const char* i_str)
+ {
+ const char* p = i_str;
+ State state = _white;
+ const char* strstart = 0;
+ bool is_escaped = false;
+ std::string str = "";
+
+ while (true) {
+ switch (state) {
+ case (_unknown): {
+ if (is_white(*p))
+ state = _white;
+ else if (is_label_start(*p))
+ state = _label;
+ else if (is_num(*p) || (*p == '.' && is_num(p[1])))
+ state = _number;
+ else if (is_quote(*p))
+ state = _string;
+ else if (is_paren(*p))
+ state = _paren;
+ else if (is_op(*p))
+ state = _op;
+ else if (is_termination(*p)) {
+ m_parser.push(Token::_semicolon, ";");
+ if (*p++ == 0) {
+ m_parser.push(Token::_end, "END");
+ return true;
+ }
+ } else {
+ m_parser.push(Token::_end, "END");
+ printf("Unknown char %c (%d) at %d! in statement %s\n", *p, *p, int(p - i_str), i_str);
+ return false;
+ }
+ } break;
+ case (_white): {
+ if (is_white(*p))
+ p++;
+ else {
+ state = _unknown;
+ }
+ } break;
+ case (_string): {
+ if ((!is_escaped) && is_quote(*p)) {
+ if (!strstart) {
+ strstart = p;
+ str = "";
+ } else {
+ m_parser.push(Token::_string, str.c_str());
+
+ str = "";
+ strstart = 0;
+ state = _unknown;
+ is_escaped = false;
+ }
+ p++;
+ } else {
+ if (is_escaped) {
+ if (!is_quote(*p))
+ str += '\\';
+ is_escaped = false;
+ }
+
+ if (*p == 0) {
+ printf("Unexpected end of string in statement %s\n", strstart);
+ m_parser.push(Token::_end, "END");
+ return false;
+ }
+ if (*p == '\\')
+ is_escaped = true;
+ else
+ str += (*p);
+ p++;
+ }
+
+ } break;
+ case (_number): {
+ if (parse_num(*p)) {
+ if (!strstart)
+ strstart = p;
+ p++;
+ } else {
+ if (!strstart)
+ throw Error("Numeric problem");
+ std::string label = strstart;
+ label = label.substr(0, p - strstart);
+ m_parser.push(Token::_number, label.c_str());
+
+ strstart = 0;
+ state = _unknown;
+ }
+ } break;
+ case (_label): {
+ if (!strstart) {
+ strstart = p++;
+ } else if (is_label_part(*p)) {
+ p++;
+ } else {
+ std::string label = strstart;
+ label = label.substr(0, p - strstart);
+
+ Token::Type type = Token::_label;
+ if (cmpi(label, "is"))
+ type = Token::_op;
+ if (cmpi(label, "not"))
+ type = Token::_op;
+ if (cmpi(label, "in"))
+ type = Token::_op;
+ if (cmpi(label, "like"))
+ type = Token::_op;
+ if (cmpi(label, "glob"))
+ type = Token::_op;
+ if (cmpi(label, "match"))
+ type = Token::_op;
+ if (cmpi(label, "regexp"))
+ type = Token::_op;
+ if (cmpi(label, "and"))
+ type = Token::_op;
+ if (cmpi(label, "or"))
+ type = Token::_op;
+
+ m_parser.push(type, label.c_str());
+
+ strstart = 0;
+ state = _unknown;
+ }
+
+ } break;
+ case (_paren): {
+ std::string s;
+ s = *p;
+ m_parser.push(Token::_paren, s.c_str());
+ p++;
+ state = _unknown;
+ } break;
+ case (_op): {
+ std::string s;
+ s = *p;
+ char n = p[1];
+ switch (*p) {
+ case ('|'):
+ if (n == '|') {
+ p++;
+ s += *p;
+ }
+ break;
+ case ('>'):
+ if (n == '=') {
+ p++;
+ s += *p;
+ }
+ if (n == '>') {
+ p++;
+ s += *p;
+ }
+ break;
+ case ('<'):
+ if (n == '<') {
+ p++;
+ s += *p;
+ }
+ if (n == '=') {
+ p++;
+ s += *p;
+ }
+ if (n == '>') {
+ p++;
+ s += *p;
+ }
+ break;
+ case ('='):
+ if (n == '=') {
+ p++;
+ s += *p;
+ }
+ break;
+ case ('!'):
+ if (n == '=') {
+ p++;
+ s += *p;
+ }
+ break;
+ }
+ m_parser.push(Token::_op, s.c_str());
+ p++;
+ state = _unknown;
+ } break;
+ default:
+ printf("Missing impl char %c at %d! in statement %s\n", *p, int(p - i_str), i_str);
+ m_parser.push(Token::_end, "END");
+ return false;
+ }
+ }
+ }
+ bool parse_num(const char p)
+ {
+ switch (num_state) {
+ case (_nan):
+ num_state = _int;
+ if (p == '.')
+ num_state = _dot;
+ break;
+ case (_int):
+ if (p == '.')
+ num_state = _dot;
+ else if (!is_num(p)) {
+ num_state = _nan;
+ return false;
+ }
+ break;
+ case (_dot):
+ if (p == 'E' || p == 'e')
+ num_state = _e;
+ else if (!is_num(p)) {
+ num_state = _nan;
+ return false;
+ }
+ break;
+ case (_dec):
+ if (p == 'E' || p == 'e')
+ num_state = _e;
+ else if (!is_num(p)) {
+ num_state = _nan;
+ return false;
+ }
+ break;
+ case (_e):
+ if (p == '+' || p == '-')
+ num_state = _sign;
+ else if (is_num(p))
+ num_state = _exp;
+ else {
+ throw Error("expected number digit after E");
+ }
+ break;
+ case (_sign):
+ if (!is_num(p))
+ throw Error("expected number digit after E");
+ num_state = _exp;
+ break;
+ case (_exp):
+ if (!is_num(p)) {
+ num_state = _nan;
+ return false;
+ }
+ break;
+ }
+ return true;
+ }
+ enum Num_state {
+ _nan,
+ _int,
+ _dot,
+ _dec,
+ _e,
+ _sign,
+ _exp,
+ };
+
+ Num_state num_state;
+};
+
+void Query::parse()
+{
+ Parser p;
+ Lexer l(p);
+ l.lex(m_sql.c_str());
+ // p.dump();
+ if (!p.analyze(*this))
+ throw Error("error parsing select statement");
+}
+
+// return column and index in tables, or 0 for column if column isn't found
+std::pair<Column*, int> lookup_column_in_tables(const std::vector<Table*>& tables,
+ const std::vector<int>& search_order,
+ const char* name)
+{
+ if (strcmp(name, "*") == 0)
+ return std::pair<Column*, int>((Column*)0, 0);
+
+ for (auto i = search_order.begin(); i != search_order.end(); ++i) {
+ Table* table = tables[*i];
+ int col_index = table->get_col_index(name);
+ if (col_index >= 0)
+ return std::pair<Column*, int>(table->m_cols[col_index], *i);
+ }
+
+ return std::pair<Column*, int>((Column*)0, 0);
+}
+
+OP* OP::compile(const std::vector<Table*>& tables, const std::vector<int>& search_order, Query& q)
+{
+ OP* ret = 0;
+ for (int i = 0; i < max_param(); i++) {
+ if (m_param[i]) {
+ m_param[i] = m_param[i]->compile(tables, search_order, q);
+ if (m_param[i]->m_has_aggregate_function)
+ m_has_aggregate_function = true;
+ }
+ }
+
+ if (m_left) {
+ m_left = m_left->compile(tables, search_order, q);
+ if (m_left->m_has_aggregate_function)
+ m_has_aggregate_function = true;
+ }
+
+ if (m_right) {
+ m_right = m_right->compile(tables, search_order, q);
+ if (m_right->m_has_aggregate_function)
+ m_has_aggregate_function = true;
+ }
+
+ // default to destination row
+ m_row_index = tables.size() - 1;
+
+ if (get_type() == _column) {
+ auto lookup = lookup_column_in_tables(tables, search_order, get_token());
+ Column* column = lookup.first;
+ m_row_index = lookup.second;
+
+ if (!column)
+ throw Error("Column '%s' not found", get_token());
+
+ int offset = column->m_offset;
+
+ m_t = column->m_type;
+
+ switch (m_t) {
+ case Coltype::_int:
+ ret = new Column_access_int(*this, offset);
+ break;
+ case Coltype::_bool:
+ ret = new Column_access_bool(*this, offset);
+ break;
+ case Coltype::_float:
+ ret = new Column_access_float(*this, offset);
+ break;
+ case Coltype::_text:
+ ret = new Column_access_text(*this, offset);
+ break;
+ }
+ } else if (get_type() == _number) {
+ const char* p = get_token();
+ bool integer = true;
+ while (*p != 0) {
+ if (*p < '0' || *p > '9')
+ integer = false;
+ p++;
+ }
+ if (integer) {
+ m_t = Coltype::_int;
+ ret = new Static_int(*this);
+ } else {
+ m_t = Coltype::_float;
+ ret = new Static_float(*this);
+ }
+ } else if (get_type() == _string) {
+ m_t = Coltype::_text;
+ ret = new Static_text(*this);
+ } else if ((get_type() == _function) && m_param[0]) {
+ Table* dest_table = tables[m_row_index];
+
+ m_t = Coltype::_int;
+ if (cmpi(get_token(), "if") && m_param[1] && m_param[2]) {
+ m_t = m_param[1]->m_t;
+ if (m_param[2]->m_t > m_t)
+ m_t = m_param[2]->m_t;
+ ret = new If_func(*this);
+ } else if (cmpi(get_token(), "name") && m_param[1]) {
+ m_t = Coltype::_text;
+ ret = new Name_func(*this);
+ } else if (cmpi(get_token(), "trim")) {
+ m_t = Coltype::_text;
+ ret = new Trim_func(*this);
+ } else if (cmpi(get_token(), "rsplit") && m_param[1]) {
+ m_t = Coltype::_text;
+ ret = new Rsplit_func(*this);
+ } else if (cmpi(get_token(), "netmask")) {
+ m_t = Coltype::_text;
+ ret = new Netmask_func(*this);
+ } else if (cmpi(get_token(), "cc")) {
+ m_t = Coltype::_text;
+ ret = new Cc_func(*this);
+ } else if (cmpi(get_token(), "asn")) {
+ m_t = Coltype::_int;
+ ret = new Asn_func(*this);
+ } else if (cmpi(get_token(), "count")) {
+ m_t = Coltype::_int;
+ ret = new Count_func(*this, dest_table);
+ } else if (m_param[0]->ret_type() == Coltype::_float && cmpi(get_token(), "min")) {
+ m_t = Coltype::_float;
+ ret = new Min_func_float(*this, dest_table);
+ } else if (m_param[0]->ret_type() == Coltype::_float && cmpi(get_token(), "max")) {
+ m_t = Coltype::_float;
+ ret = new Max_func_float(*this, dest_table);
+ } else if (m_param[0]->ret_type() == Coltype::_float && cmpi(get_token(), "sum")) {
+ m_t = Coltype::_float;
+ ret = new Sum_func_float(*this, dest_table);
+ } else if (cmpi(get_token(), "min")) {
+ m_t = Coltype::_int;
+ ret = new Min_func_int(*this, dest_table);
+ } else if (cmpi(get_token(), "max")) {
+ m_t = Coltype::_int;
+ ret = new Max_func_int(*this, dest_table);
+ } else if (cmpi(get_token(), "sum")) {
+ m_t = Coltype::_int;
+ ret = new Sum_func_int(*this, dest_table);
+ } else if (cmpi(get_token(), "lower")) {
+ m_t = Coltype::_text;
+ ret = new Lower_func(*this);
+ } else if (cmpi(get_token(), "len")) {
+ m_t = Coltype::_int;
+ ret = new Len_func(*this);
+ } else if (cmpi(get_token(), "truncate")) {
+ m_t = Coltype::_int;
+ ret = new Truncate_func(*this);
+ } else if (cmpi(get_token(), "stdev")) {
+ m_t = Coltype::_float;
+ ret = new Stdev_func(*this, dest_table);
+ } else if (cmpi(get_token(), "avg")) {
+ m_t = Coltype::_float;
+ ret = new Avg_func(*this, dest_table);
+ }
+ } else if ((get_type() == _op) && m_left && m_right) {
+ if (cmpi(get_token(), "||")) {
+ m_t = Coltype::_text;
+ ret = new Bin_op_concatenate(*this);
+ } else if (cmpi(get_token(), "*")) {
+ if (m_left->ret_type() == Coltype::_float || m_right->ret_type() == Coltype::_float) {
+ m_t = Coltype::_float;
+ ret = new Bin_op_mul_float(*this);
+ } else {
+ m_t = Coltype::_int;
+ ret = new Bin_op_mul(*this);
+ }
+ } else if (cmpi(get_token(), "/")) {
+ m_t = Coltype::_float;
+ ret = new Bin_op_div(*this);
+ } else if (cmpi(get_token(), "%")) {
+ m_t = Coltype::_float;
+ ret = new Bin_op_modulo(*this);
+ } else if (cmpi(get_token(), "+")) {
+ if (m_left->ret_type() == Coltype::_float || m_right->ret_type() == Coltype::_float) {
+ m_t = Coltype::_float;
+ ret = new Bin_op_add_float(*this);
+ } else {
+ m_t = Coltype::_int;
+ ret = new Bin_op_add(*this);
+ }
+ } else if (cmpi(get_token(), "-")) {
+ if (m_left->ret_type() == Coltype::_float || m_right->ret_type() == Coltype::_float) {
+ m_t = Coltype::_float;
+ ret = new Bin_op_sub_float(*this);
+ } else {
+ m_t = Coltype::_int;
+ ret = new Bin_op_sub(*this);
+ }
+ } else if (cmpi(get_token(), "<<")) {
+ m_t = Coltype::_int;
+ ret = new Bin_op_arithmetic_shift_left(*this);
+ } else if (cmpi(get_token(), ">>")) {
+ m_t = Coltype::_int;
+ ret = new Bin_op_arithmetic_shift_right(*this);
+ } else if (cmpi(get_token(), "&")) {
+ m_t = Coltype::_int;
+ ret = new Bin_op_bitwise_and(*this);
+ } else if (cmpi(get_token(), "|")) {
+ m_t = Coltype::_int;
+ ret = new Bin_op_bitwise_or(*this);
+ } else if (cmpi(get_token(), "<")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_lt(*this);
+ } else if (cmpi(get_token(), "<=")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_lteq(*this);
+ } else if (cmpi(get_token(), ">")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_gt(*this);
+ } else if (cmpi(get_token(), ">=")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_gteq(*this);
+ } else if (cmpi(get_token(), "=")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_eq(*this);
+ } else if (cmpi(get_token(), "==")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_eq(*this);
+ } else if (cmpi(get_token(), "like")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_like(*this);
+ } else if (cmpi(get_token(), "not like")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_not_like(*this);
+ } else if (cmpi(get_token(), "!=")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_not_eq(*this);
+ } else if (cmpi(get_token(), "<>")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_not_eq(*this);
+ } else if (cmpi(get_token(), "is")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_eq(*this);
+ } else if (cmpi(get_token(), "is not")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_not_eq(*this);
+ } else if (cmpi(get_token(), "and")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_and(*this);
+ } else if (cmpi(get_token(), "or")) {
+ m_t = Coltype::_bool;
+ ret = new Bin_op_or(*this);
+ }
+ } else if ((get_type() == _uop) && m_right) {
+ if (cmpi(get_token(), "not")) {
+ m_t = Coltype::_bool;
+ ret = new Un_op_not(*this);
+ } else if (cmpi(get_token(), "+")) {
+ ret = m_right;
+ } else if (cmpi(get_token(), "-")) {
+ if (m_right->ret_type() == Coltype::_float) {
+ m_t = Coltype::_float;
+ ret = new Un_op_neg_float(*this);
+ } else {
+ m_t = Coltype::_int;
+ ret = new Un_op_neg(*this);
+ }
+ } else if (cmpi(get_token(), "~")) {
+ m_t = Coltype::_int;
+ ret = new Un_op_ones_complement(*this);
+ }
+ }
+ clear_ptr();
+ if (!ret)
+ throw Error("Unknown operator error '%s' !", get_token());
+
+ delete this;
+
+ return ret;
+}
+
+void OP::evaluate_aggregate_operands(Row** rows)
+{
+ if (m_left)
+ m_left->evaluate_aggregate_operands(rows);
+ if (m_right)
+ m_right->evaluate_aggregate_operands(rows);
+ for (int i = 0; i < max_param(); ++i)
+ if (m_param[i])
+ m_param[i]->evaluate_aggregate_operands(rows);
+}
+
+void OP::combine_aggregate(Row* base_row, Row* other_row)
+{
+ if (m_left)
+ m_left->combine_aggregate(base_row, other_row);
+ if (m_right)
+ m_right->combine_aggregate(base_row, other_row);
+ for (int i = 0; i < max_param(); ++i)
+ if (m_param[i])
+ m_param[i]->combine_aggregate(base_row, other_row);
+}
+
+// return any column access ops found in given list of op trees - they don't
+// have to be compiled beforehand; duplicate column tokens are skipped
+std::vector<OP*> find_unique_column_ops(std::vector<OP*> ops)
+{
+ std::vector<OP*> res;
+
+ while (!ops.empty()) {
+ OP* op = ops.back();
+ ops.pop_back();
+
+ if (op->m_left)
+ ops.push_back(op->m_left);
+ if (op->m_right)
+ ops.push_back(op->m_right);
+ for (int i = 0; i < op->max_param(); ++i)
+ if (op->m_param[i])
+ ops.push_back(op->m_param[i]);
+
+ if (op->get_type() == Token::_column) {
+ bool found = false;
+
+ for (auto i = res.begin(); i != res.end(); ++i) {
+ if (cmpii((*i)->get_token(), op->get_token())) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ res.push_back(op);
+ }
+ }
+
+ return res;
+}
+
+void Query::replace_star_column_with_all_columns()
+{
+ bool found_star = false;
+ for (auto i = m_select.begin(); i != m_select.end(); ++i) {
+ if (strcmp((*i)->get_token(), "*") == 0) {
+ found_star = true;
+ break;
+ }
+ }
+
+ if (found_star) {
+ for (auto i = m_select.begin(); i != m_select.end(); ++i)
+ delete *i;
+ m_select.clear();
+
+ if (!m_from_name.empty()) {
+ Packet_handler* handler = get_packet_handler(m_from_name);
+
+ for (auto i = handler->packet_columns.begin(); i != handler->packet_columns.end(); ++i)
+ m_select.push_back(new OP(Token(Token::_column, i->name)));
+ }
+ }
+}
+
+void Query::process_from()
+{
+ replace_star_column_with_all_columns();
+
+ if (m_from_name.empty())
+ return;
+
+ std::vector<OP*> all_ops;
+ all_ops.insert(all_ops.end(), m_select.begin(), m_select.end());
+ if (m_where)
+ all_ops.push_back(m_where);
+ // skip m_having, it can't access source columns
+ for (auto i = m_order_by.m_terms.begin(); i != m_order_by.m_terms.end(); ++i)
+ all_ops.push_back(i->m_op);
+ for (auto i = m_group_by.m_terms.begin(); i != m_group_by.m_terms.end(); ++i)
+ all_ops.push_back(i->m_op);
+
+ auto used_columns = find_unique_column_ops(all_ops);
+
+ // add from table with used columns
+ Packet_handler* handler = get_packet_handler(m_from_name);
+ for (auto j = handler->packet_columns.begin(); j != handler->packet_columns.end(); ++j)
+ for (auto i = used_columns.begin(); i != used_columns.end(); ++i)
+ if (cmpii(j->name, (*i)->get_token()))
+ m_used_from_column_ids.push_back(j->id);
+
+ m_from = handler->create_table(m_used_from_column_ids);
+}
+
+void Query::process_select(Row** rows, Row* dest, GenericAccessor* dest_accessors)
+{
+ for (unsigned int i = 0, size = m_select.size(); i < size; ++i) {
+ OP* op = m_select[i];
+ if (!op)
+ continue;
+
+ if (op->m_has_aggregate_function) {
+ // defer evaluating aggregate functions, just eval their operands
+ op->evaluate_aggregate_operands(rows);
+ } else {
+ Variant v;
+ op->evaluate(rows, v);
+ dest_accessors[i].set(dest, v);
+ }
+ }
+}
+
+void Query::combine_aggregates_in_select(Row* base_row, Row* other_row)
+{
+ for (unsigned int i = 0; i < m_select.size(); ++i) {
+ OP* op = m_select[i];
+ if (op && op->m_has_aggregate_function)
+ op->combine_aggregate(base_row, other_row);
+ }
+}
+
+void Query::process_aggregates_in_select(Row** rows, Row* dest, GenericAccessor dest_accessors[])
+{
+ for (unsigned int i = 0; i < m_select.size(); ++i) {
+ OP* op = m_select[i];
+ if (op && op->m_has_aggregate_function) {
+ Variant v;
+ op->evaluate(rows, v);
+ dest_accessors[i].set(dest, v);
+ }
+ }
+}
+
+bool Query::process_where(Row** rows)
+{
+ if (!m_where)
+ return true;
+
+ Variant v;
+ m_where->evaluate(rows, v);
+ return v.get_bool();
+}
+
+bool Query::process_having(Row** rows)
+{
+ if (!m_having)
+ return true;
+
+ Variant v;
+ m_having->evaluate(rows, v);
+ return v.get_bool();
+}
+
+std::vector<Variant> process_group_by_key(Ordering_terms& group_by, Row** rows)
+{
+ int size = group_by.m_terms.size();
+
+ std::vector<Variant> res(size);
+
+ for (int i = 0; i < size; ++i)
+ group_by.m_terms[i].m_op->evaluate(rows, res[i]);
+
+ return res;
+}
+
+bool Query::has_aggregate_functions()
+{
+ // this assumes the ops have been compiled
+ for (auto it = m_select.begin(); it != m_select.end(); it++)
+ if ((*it)->m_has_aggregate_function)
+ return true;
+
+ return false;
+}
+
+void Query::execute(Reader& reader)
+{
+ std::vector<Table*> tables;
+ std::vector<int> search_results_last, search_results_first, search_results_only;
+
+ // set up tables
+ process_from();
+
+ if (m_from)
+ tables.push_back(m_from);
+ tables.push_back(m_result);
+
+ for (int i = 0; i < int(tables.size()); ++i)
+ search_results_last.push_back(i);
+ for (int i = int(tables.size()) - 1; i >= 0; --i)
+ search_results_first.push_back(i);
+
+ search_results_only.push_back(tables.size() - 1);
+
+ std::vector<Row*> row_ptrs(tables.size());
+ Row** rows = &row_ptrs[0];
+
+ std::vector<GenericAccessor> result_accessors_vector;
+
+ // compile
+ for (auto i = m_select.begin(); i != m_select.end(); ++i) {
+ *i = (*i)->compile(tables, search_results_last, *this);
+ Column* col = m_result->add_column((*i)->get_name(), (*i)->ret_type());
+ GenericAccessor a;
+ a.m_offset = col->m_offset;
+ a.m_type = col->m_type;
+ result_accessors_vector.push_back(a);
+ }
+
+ if (m_where)
+ m_where = m_where->compile(tables, search_results_last, *this);
+
+ if (m_having)
+ m_having = m_having->compile(tables, search_results_only, *this);
+
+ if (m_group_by.exist())
+ m_group_by.compile(tables, search_results_last, *this);
+
+ if (m_order_by.exist()) {
+ // copy any missing columns to result table as hidden so we can
+ // order by them
+ std::vector<OP*> ops;
+ for (auto i = m_order_by.m_terms.begin(); i != m_order_by.m_terms.end(); ++i)
+ ops.push_back(i->m_op);
+
+ std::vector<OP*> column_ops = find_unique_column_ops(ops);
+
+ for (auto i = column_ops.begin(); i != column_ops.end(); ++i) {
+ const char* name = (*i)->get_token();
+ auto lookup = lookup_column_in_tables(tables, search_results_first, name);
+ if (lookup.first and lookup.second < int(tables.size()) - 1) {
+ // found, but not in result table
+ OP* copying_op = new OP(**i);
+ copying_op = copying_op->compile(tables, search_results_last, *this);
+ m_select.push_back(copying_op);
+ Column* col = m_result->add_column(copying_op->get_name(), copying_op->ret_type(), -1, Column::HIDDEN);
+ GenericAccessor a;
+ a.m_offset = col->m_offset;
+ a.m_type = col->m_type;
+ result_accessors_vector.push_back(a);
+ }
+ }
+
+ // we only provide access to result table for "order by"; in order
+ // to make the sort thing work correctly the result table currently
+ // has to be at index 0
+ std::vector<Table*> tables_result_only = { m_result };
+ std::vector<int> tables_result_only_search = { 0 };
+ m_order_by.compile(tables_result_only, tables_result_only_search, *this);
+ }
+
+ // execute
+ GenericAccessor* result_accessors = &result_accessors_vector[0];
+ bool aggregate_functions = has_aggregate_functions();
+
+ int count = 0;
+ bool limiter = !m_order_by.exist() && !m_group_by.exist() && !aggregate_functions && m_limit >= 0;
+
+ if (m_from) {
+ bool first_row = true;
+ Packet_handler* handler = get_packet_handler(m_from_name);
+
+ reader.seek_to_start();
+
+ const int src_i = 0, dest_i = tables.size() - 1;
+
+ rows[src_i] = m_from->create_row();
+
+ if (m_group_by.exist() || aggregate_functions) {
+ std::unordered_map<std::vector<Variant>, Row*> groups;
+
+ rows[dest_i] = 0;
+ while (reader.read_next(handler, m_used_from_column_ids, *rows[src_i], first_row or m_sample == 0 ? 0 : m_sample - 1)) {
+ // fill in groups
+ if (rows[dest_i])
+ rows[dest_i]->reset_text_columns(m_result->m_text_column_offsets);
+ else
+ rows[dest_i] = m_result->create_row();
+
+ process_select(rows, rows[dest_i], result_accessors);
+ if (process_where(rows)) {
+ auto key = process_group_by_key(m_group_by, rows);
+ Row*& entry = groups[key];
+ if (entry) {
+ combine_aggregates_in_select(entry, rows[dest_i]);
+ } else {
+ entry = rows[dest_i];
+ rows[dest_i] = 0;
+ }
+ }
+
+ first_row = false;
+ rows[src_i]->reset_text_columns(m_from->m_text_column_offsets);
+ }
+ if (rows[dest_i])
+ m_result->delete_row(rows[dest_i]);
+
+ // put groups into result
+ for (auto i = groups.begin(); i != groups.end(); ++i) {
+ rows[dest_i] = i->second;
+ // propagate the aggregate results through the evaluation tree
+ process_aggregates_in_select(rows, rows[dest_i], result_accessors);
+ if (process_having(rows))
+ m_result->add_row(rows[dest_i]);
+ else
+ m_result->delete_row(rows[dest_i]);
+ }
+ } else {
+ rows[dest_i] = m_result->create_row();
+ while (reader.read_next(handler, m_used_from_column_ids, *rows[src_i], first_row or m_sample == 0 ? 0 : m_sample - 1)) {
+ // fill in result
+ process_select(rows, rows[dest_i], result_accessors);
+ if (process_where(rows)) {
+ bool commit = true;
+ if (limiter) {
+ int l = count++;
+ if (m_offset > 0)
+ l -= m_offset;
+ if (m_limit >= 0 && l >= m_limit)
+ break;
+
+ if (l < 0)
+ commit = false;
+ }
+
+ if (commit) {
+ m_result->add_row(rows[dest_i]);
+ rows[dest_i] = m_result->create_row();
+ }
+ }
+
+ first_row = false;
+ rows[src_i]->reset_text_columns(m_from->m_text_column_offsets);
+ }
+ m_result->delete_row(rows[dest_i]);
+ }
+
+ m_from->delete_row(rows[src_i]);
+ } else {
+ const int dest_i = tables.size() - 1;
+ rows[dest_i] = m_result->create_row();
+ process_select(rows, rows[dest_i], result_accessors);
+ if (process_where(rows))
+ m_result->add_row(rows[dest_i]);
+ else
+ m_result->delete_row(rows[dest_i]);
+ }
+
+ if (m_order_by.exist())
+ m_result->per_sort(m_order_by);
+
+ if (m_limit >= 0 && !limiter)
+ m_result->limit(m_limit, m_offset);
+}
+
+DB::DB()
+{
+ Column::init_defs();
+}
+
+DB::~DB()
+{
+}
+
+bool DB::query(const char* q)
+{
+ return false;
+}
+
+Table* DB::get_table(const char* i_name)
+{
+ std::string name = lower(i_name);
+ Table* t = 0;
+ auto it = m_tables.find(name);
+ if (it != m_tables.end())
+ t = it->second;
+
+ return t;
+}
+Table* DB::create_or_use_table(const char* i_name)
+{
+ std::string name = lower(i_name);
+ Table* t = get_table(name.c_str());
+ if (!t)
+ t = create_table(name.c_str());
+
+ return t;
+}
+Table* DB::create_table(const char* i_name)
+{
+ std::string name = lower(i_name);
+ Table* t = new Table(name.c_str());
+ m_tables[std::string(name.c_str())] = t;
+
+ return t;
+}
+Column::Column(const char* name, Coltype::Type type, int id, bool hidden)
+ : m_name(name)
+ , m_type(type)
+ , m_def(Column::m_coldefs[type])
+ , m_id(id)
+ , m_offset(0)
+{
+ m_hidden = hidden;
+}
+
+void Trim_func::evaluate(Row** rows, Variant& v)
+{
+ Variant str;
+ m_param[0]->evaluate(rows, str);
+ RefCountStringHandle str_handle(str.get_text());
+ const char* s = (*str_handle)->data;
+
+ const char* t;
+ RefCountStringHandle trim_handle;
+ if (m_param[1]) {
+ Variant trim;
+ m_param[1]->evaluate(rows, trim);
+ trim_handle.set(trim.get_text());
+ t = (*trim_handle)->data;
+ } else
+ t = " ";
+
+ int l = strlen(t);
+ if (l <= 0) {
+ v = *str_handle;
+ return;
+ }
+
+ int slen = strlen(s);
+ int start = 0, end = slen;
+
+ // left trim
+ while (end - start >= l && memcmp(s + start, t, l) == 0)
+ start += l;
+
+ // right trim
+ while (end - start >= l && memcmp(s + end - l, t, l) == 0)
+ end -= l;
+
+ if (start == 0 && end == slen)
+ v = *str_handle;
+ else {
+ RefCountStringHandle res(RefCountString::construct(s, start, end));
+ v = *res;
+ }
+}
+
+Cc_func::Cc_func(const OP& op)
+ : OP(op)
+{
+#ifdef HAVE_LIBMAXMINDDB
+ if (__cc_mmdb) {
+ return;
+ }
+
+ std::string db;
+ char* env = getenv("PACKETQ_MAXMIND_CC_DB");
+ if (env) {
+ db = env;
+ }
+
+ if (db.empty()) {
+ std::list<std::string> paths = {
+ "/var/lib/GeoIP", "/usr/share/GeoIP", "/usr/local/share/GeoIP"
+ };
+
+ if ((env = getenv("PACKETQ_MAXMIND_PATH"))) {
+ paths.push_front(std::string(env));
+ }
+
+ auto i = paths.begin();
+ for (; i != paths.end(); i++) {
+ db = (*i) + "/GeoLite2-Country.mmdb";
+ struct stat s;
+ if (!stat(db.c_str(), &s)) {
+ break;
+ }
+ }
+ if (i == paths.end()) {
+ return;
+ }
+ }
+
+ MMDB_s* mmdb = new (std::nothrow) MMDB_s;
+ if (!mmdb) {
+ return;
+ }
+
+ int ret = MMDB_open(db.c_str(), 0, mmdb);
+ if (ret != MMDB_SUCCESS) {
+ fprintf(stderr, "Warning: cannot open MaxMind CC database \"%s\": %s\n", db.c_str(), MMDB_strerror(ret));
+ delete mmdb;
+ return;
+ }
+
+ __cc_mmdb = mmdb;
+#endif
+}
+
+void Cc_func::evaluate(Row** rows, Variant& v)
+{
+#ifdef HAVE_LIBMAXMINDDB
+ if (!__cc_mmdb) {
+ RefCountStringHandle res(RefCountString::construct(""));
+ v = *res;
+ return;
+ }
+
+ Variant str;
+ m_param[0]->evaluate(rows, str);
+ RefCountStringHandle str_handle(str.get_text());
+
+ int gai_error, ret;
+
+ MMDB_lookup_result_s mmdb_result = MMDB_lookup_string(__cc_mmdb, (*str_handle)->data, &gai_error, &ret);
+
+ if (gai_error || ret != MMDB_SUCCESS || !mmdb_result.found_entry) {
+ RefCountStringHandle res(RefCountString::construct(""));
+ v = *res;
+ return;
+ }
+
+ MMDB_entry_data_s entry_data;
+ ret = MMDB_get_value(&mmdb_result.entry, &entry_data, "country", "iso_code", NULL);
+
+ if (ret != MMDB_SUCCESS || !entry_data.has_data || entry_data.type != MMDB_DATA_TYPE_UTF8_STRING) {
+ RefCountStringHandle res(RefCountString::construct(""));
+ v = *res;
+ return;
+ }
+
+ RefCountStringHandle res(RefCountString::construct(entry_data.utf8_string, 0, entry_data.data_size));
+ v = *res;
+#else
+ RefCountStringHandle res(RefCountString::construct(""));
+ v = *res;
+#endif
+}
+
+Asn_func::Asn_func(const OP& op)
+ : OP(op)
+{
+#ifdef HAVE_LIBMAXMINDDB
+ if (__asn_mmdb) {
+ return;
+ }
+
+ std::string db;
+ char* env = getenv("PACKETQ_MAXMIND_ASN_DB");
+ if (env) {
+ db = env;
+ }
+
+ if (db.empty()) {
+ std::list<std::string> paths = {
+ "/var/lib/GeoIP", "/usr/share/GeoIP", "/usr/local/share/GeoIP"
+ };
+
+ if ((env = getenv("PACKETQ_MAXMIND_PATH"))) {
+ paths.push_front(std::string(env));
+ }
+
+ auto i = paths.begin();
+ for (; i != paths.end(); i++) {
+ db = (*i) + "/GeoLite2-ASN.mmdb";
+ struct stat s;
+ if (!stat(db.c_str(), &s)) {
+ break;
+ }
+ }
+ if (i == paths.end()) {
+ return;
+ }
+ }
+
+ MMDB_s* mmdb = new (std::nothrow) MMDB_s;
+ if (!mmdb) {
+ return;
+ }
+
+ int ret = MMDB_open(db.c_str(), 0, mmdb);
+ if (ret != MMDB_SUCCESS) {
+ fprintf(stderr, "Warning: cannot open MaxMind ASN database \"%s\": %s\n", db.c_str(), MMDB_strerror(ret));
+ delete mmdb;
+ return;
+ }
+
+ __asn_mmdb = mmdb;
+#endif
+}
+
+void Asn_func::evaluate(Row** rows, Variant& v)
+{
+#ifdef HAVE_LIBMAXMINDDB
+ if (!__asn_mmdb) {
+ v = -1;
+ return;
+ }
+
+ Variant str;
+ m_param[0]->evaluate(rows, str);
+ RefCountStringHandle str_handle(str.get_text());
+
+ int gai_error, ret;
+
+ MMDB_lookup_result_s mmdb_result = MMDB_lookup_string(__asn_mmdb, (*str_handle)->data, &gai_error, &ret);
+
+ if (gai_error || ret != MMDB_SUCCESS || !mmdb_result.found_entry) {
+ v = -1;
+ return;
+ }
+
+ MMDB_entry_data_s entry_data;
+ ret = MMDB_get_value(&mmdb_result.entry, &entry_data, "autonomous_system_number", NULL);
+
+ if (ret != MMDB_SUCCESS || !entry_data.has_data || entry_data.type != MMDB_DATA_TYPE_UINT32) {
+ v = -1;
+ return;
+ }
+
+ v = (int_column)entry_data.uint32;
+#else
+ v = -1;
+#endif
+}
+
+DB g_db;
+
+Coldef Column::m_coldefs[COLTYPE_MAX];
+
+} // namespace packetq