From e3caf3b12bb23003dc6c8f339e84e51e356bb019 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Wed, 22 Jan 2025 13:35:47 -0700 Subject: [PATCH] prefix column with column type and length when -u is passed in gufi_query and gufi_sqlite3 --- include/bf.h | 18 +++ include/dbutils.h | 2 + .../{validate_inputs.h => handle_sql.h} | 6 +- include/gufi_query/query.h | 2 +- include/print.h | 1 + src/CMakeLists.txt | 2 +- src/bf.c | 10 ++ src/dbutils.c | 27 ++++ src/gufi_query/aggregate.c | 14 +- .../{validate_inputs.c => handle_sql.c} | 44 +++++- src/gufi_query/main.c | 6 +- src/gufi_query/process_queries.c | 4 +- src/gufi_query/processdir.c | 4 +- src/gufi_query/query.c | 16 +- src/gufi_sqlite3.c | 32 +++- src/gufi_vt.c | 142 ++++++++++-------- src/print.c | 29 ++++ test/regression/gufi_query.expected | 1 + test/regression/gufi_sqlite3.expected | 1 + test/regression/gufi_vt.expected | 4 +- test/regression/gufi_vt.sh.in | 2 +- test/unit/googletest/PoolArgs.cpp.in | 1 + test/unit/googletest/print.cpp | 1 + 23 files changed, 278 insertions(+), 91 deletions(-) rename include/gufi_query/{validate_inputs.h => handle_sql.h} (96%) rename src/gufi_query/{validate_inputs.c => handle_sql.c} (80%) diff --git a/include/bf.h b/include/bf.h index 8ba0272f9..594d9362e 100644 --- a/include/bf.h +++ b/include/bf.h @@ -178,6 +178,24 @@ struct input { refstr_t fin; } sql; + /* + * if outputting to STDOUT or OUTFILE, get list of + * types of final output to prefix columns with + * + * set up by gufi_query but cleaned up by input_fini + */ + struct { + int prefix; + + /* set if not aggregating */ + int *tsum; + int *sum; + int *ent; + + /* set if aggregating */ + int *agg; + } types; + int printdir; int printing; int printheader; diff --git a/include/dbutils.h b/include/dbutils.h index 2bc981b83..34ca0ce3b 100644 --- a/include/dbutils.h +++ b/include/dbutils.h @@ -268,6 +268,8 @@ enum CheckRollupScore { int bottomup_collect_treesummary(sqlite3 *db, const char *dirname, sll_t *subdirs, const enum CheckRollupScore check_rollupscore); +int *get_col_types(sqlite3 *db, const refstr_t *sql, int *cols); + #ifdef __cplusplus } #endif diff --git a/include/gufi_query/validate_inputs.h b/include/gufi_query/handle_sql.h similarity index 96% rename from include/gufi_query/validate_inputs.h rename to include/gufi_query/handle_sql.h index e5781aecc..f29e2cc3d 100644 --- a/include/gufi_query/validate_inputs.h +++ b/include/gufi_query/handle_sql.h @@ -62,11 +62,11 @@ OF SUCH DAMAGE. -#ifndef GUFI_QUERY_VALIDATE_INPUTS_H -#define GUFI_QUERY_VALIDATE_INPUTS_H +#ifndef GUFI_QUERY_HANDLE_SQL_H +#define GUFI_QUERY_HANDLE_SQL_H #include "bf.h" -int validate_inputs(struct input *in); +int handle_sql(struct input *in); #endif diff --git a/include/gufi_query/query.h b/include/gufi_query/query.h index cd8afe743..308e30be8 100644 --- a/include/gufi_query/query.h +++ b/include/gufi_query/query.h @@ -71,7 +71,7 @@ OF SUCH DAMAGE. void querydb(struct work *work, const char *dbname, const size_t dbname_len, - sqlite3 *db, const char *query, + sqlite3 *db, const char *query, int *types, PoolArgs_t *pa, int id, int (*callback)(void *, int, char **, char**), int *rc); diff --git a/include/print.h b/include/print.h index 3542cfa83..7c60b438d 100644 --- a/include/print.h +++ b/include/print.h @@ -82,6 +82,7 @@ typedef struct PrintArgs { pthread_mutex_t *mutex; /* mutex for printing to stdout */ FILE *outfile; size_t rows; /* number of rows returned by the query */ + int *types; /* if types is set, prefix each column with a 1 char type (https://www.sqlite.org/c3ref/c_blob.html) and a 4 byte human readable length */ /* size_t printed; /\* number of records printed by the callback *\/ */ } PrintArgs_t; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5612f0e95..e3d847b3b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -200,10 +200,10 @@ add_library(gufi_query_lib OBJECT gufi_query/aggregate.c gufi_query/external.c gufi_query/gqw.c + gufi_query/handle_sql.c gufi_query/process_queries.c gufi_query/processdir.c gufi_query/query.c - gufi_query/validate_inputs.c ) add_dependencies(gufi_query_lib GUFI) diff --git a/src/bf.c b/src/bf.c index bc8844ca4..13d7824dc 100644 --- a/src/bf.c +++ b/src/bf.c @@ -124,6 +124,10 @@ struct input *input_init(struct input *in) { void input_fini(struct input *in) { if (in) { + free(in->types.agg); + free(in->types.ent); + free(in->types.sum); + free(in->types.tsum); sll_destroy(&in->external_attach, free); trie_free(in->skip); } @@ -156,6 +160,7 @@ void print_help(const char* prog_name, case 'd': printf(" -d delimiter (one char) [use 'x' for 0x%02X]", (uint8_t)fielddelim); break; case 'o': printf(" -o output file (one-per-thread, with thread-id suffix)"); break; case 'O': printf(" -O output DB"); break; + case 'u': printf(" -u prefix output with 1 byte type and 4 byte human readable length"); break; /* need to use text to avoid \x0a confusion */ case 'I': printf(" -I SQL init"); break; case 'T': printf(" -T SQL for tree-summary table"); break; case 'S': printf(" -S SQL for summary table"); break; @@ -208,6 +213,7 @@ void show_input(struct input* in, int retval) { printf("in.maxthreads = %zu\n", in->maxthreads); printf("in.delim = '%c'\n", in->delim); printf("in.andor = %d\n", (int) in->andor); + printf("in.types.prefix = %d\n", in->types.prefix); printf("in.process_xattrs = %d\n", in->process_xattrs); printf("in.nobody.uid = %" STAT_uid "\n", in->nobody.uid); printf("in.nobody.gid = %" STAT_gid "\n", in->nobody.gid); @@ -353,6 +359,10 @@ int parse_cmd_line(int argc, INSTALL_STR(&in->outname, optarg); break; + case 'u': + in->types.prefix = 1; + break; + case 'I': // SQL initializations INSTALL_STR(&in->sql.init, optarg); break; diff --git a/src/dbutils.c b/src/dbutils.c index 634f33df8..3a2ba63e7 100644 --- a/src/dbutils.c +++ b/src/dbutils.c @@ -1190,3 +1190,30 @@ int bottomup_collect_treesummary(sqlite3 *db, const char *dirname, sll_t *subdir return inserttreesumdb(dirname, db, &tsum, 0, 0, 0); } + +int *get_col_types(sqlite3 *db, const refstr_t *sql, int *cols) { + /* parse sql */ + sqlite3_stmt *stmt = NULL; + const int rc = sqlite3_prepare_v2(db, sql->data, sql->len, &stmt, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr, "Error: Could not prepare '%s' for getting column types: %s (%d)\n", sql->data, sqlite3_errstr(rc), rc); + return NULL; + } + + /* get column count */ + *cols = sqlite3_column_count(stmt); + if (*cols == 0) { + fprintf(stderr, "Error: '%s' was detected to have 0 columns\n", sql->data); + sqlite3_finalize(stmt); + return NULL; + } + + /* get each column's type */ + int *types = malloc(*cols * sizeof(int)); + for(int i = 0; i < *cols; i++) { + types[i] = sqlite3_column_type(stmt, i); + } + + sqlite3_finalize(stmt); + return types; +} diff --git a/src/gufi_query/aggregate.c b/src/gufi_query/aggregate.c index c5531d69c..fe6593b1d 100644 --- a/src/gufi_query/aggregate.c +++ b/src/gufi_query/aggregate.c @@ -157,12 +157,14 @@ int aggregate_process(Aggregate_t *aggregate, struct input *in) { /* normally expect STDOUT/OUTFILE to have SQL to run, but OUTDB can have SQL to run as well */ if ((in->output != OUTDB) || in->sql.agg.len) { - PrintArgs_t pa; - pa.output_buffer = &aggregate->ob; - pa.delim = in->delim; - pa.mutex = NULL; - pa.outfile = aggregate->outfile; - pa.rows = 0; + PrintArgs_t pa = { + .output_buffer = &aggregate->ob, + .delim = in->delim, + .mutex = NULL, + .outfile = aggregate->outfile, + .rows = 0, + .types = in->types.agg, + }; char *err = NULL; if (sqlite3_exec(aggregate->db, in->sql.agg.data, print_parallel, &pa, &err) != SQLITE_OK) { diff --git a/src/gufi_query/validate_inputs.c b/src/gufi_query/handle_sql.c similarity index 80% rename from src/gufi_query/validate_inputs.c rename to src/gufi_query/handle_sql.c index 7d64831dd..1e609ec92 100644 --- a/src/gufi_query/validate_inputs.c +++ b/src/gufi_query/handle_sql.c @@ -63,10 +63,13 @@ OF SUCH DAMAGE. #include +#include -#include "gufi_query/validate_inputs.h" +#include "dbutils.h" +#include "gufi_query/handle_sql.h" +#include "template_db.h" -int validate_inputs(struct input *in) { +int handle_sql(struct input *in) { /* * - Leaves are final outputs * - OUTFILE/OUTDB + aggregation will create per thread and final aggregation files @@ -140,5 +143,42 @@ int validate_inputs(struct input *in) { } } + /* now that the SQL has been validated, generate types if necessary */ + if ((in->types.prefix == 1) && ((in->output == STDOUT) || (in->output == OUTFILE))) { + /* have to create temporary db since there is no guarantee of a db yet */ + sqlite3 *db = opendb(":memory:", SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, 0, 0, create_dbdb_tables, NULL); + if (!db) { + return -1; + } + + int cols = 0; /* discarded */ + + /* if not aggregating, get types for T, S, and E */ + if (!in->sql.init_agg.len) { + if (in->sql.tsum.len) { + in->types.tsum = get_col_types(db, &in->sql.tsum, &cols); + } + if (in->sql.sum.len) { + in->types.sum = get_col_types(db, &in->sql.sum, &cols); + } + if (in->sql.ent.len) { + in->types.ent = get_col_types(db, &in->sql.ent, &cols); + } + } + /* types for G */ + else { + char *err = NULL; + if (sqlite3_exec(db, in->sql.init_agg.data, NULL, NULL, &err) != SQLITE_OK) { + fprintf(stderr, "Error: Init failed while getting column types: %s\n", err); + sqlite3_free(err); + closedb(db); + return -1; + } + in->types.agg = get_col_types(db, &in->sql.agg, &cols); + } + + closedb(db); + } + return 0; } diff --git a/src/gufi_query/main.c b/src/gufi_query/main.c index 4f79eb6dc..398e8cb85 100644 --- a/src/gufi_query/main.c +++ b/src/gufi_query/main.c @@ -78,8 +78,8 @@ OF SUCH DAMAGE. #include "gufi_query/aggregate.h" #include "gufi_query/gqw.h" +#include "gufi_query/handle_sql.h" #include "gufi_query/processdir.h" -#include "gufi_query/validate_inputs.h" static void sub_help(void) { printf("GUFI_index find GUFI index here\n"); @@ -93,9 +93,9 @@ int main(int argc, char *argv[]) /* Callers provide the options-string for get_opt(), which will */ /* control which options are parsed for each program. */ struct input in; - process_args_and_maybe_exit("hHvT:S:E:an:jo:d:O:I:F:y:z:J:K:G:mB:wxk:M:s:" COMPRESS_OPT "Q:", 1, "GUFI_index ...", &in); + process_args_and_maybe_exit("hHvT:S:E:an:jo:d:O:uI:F:y:z:J:K:G:mB:wxk:M:s:" COMPRESS_OPT "Q:", 1, "GUFI_index ...", &in); - if (validate_inputs(&in) != 0) { + if (handle_sql(&in) != 0) { input_fini(&in); return EXIT_FAILURE; } diff --git a/src/gufi_query/process_queries.c b/src/gufi_query/process_queries.c index cd11d5dfd..9ed62a8ee 100644 --- a/src/gufi_query/process_queries.c +++ b/src/gufi_query/process_queries.c @@ -255,7 +255,7 @@ int process_queries(PoolArgs_t *pa, if (in->sql.sum.len) { recs=1; /* set this to one record - if the sql succeeds it will set to 0 or 1 */ /* put in the path relative to the user's input */ - querydb(&gqw->work, dbname, dbname_len, db, in->sql.sum.data, pa, id, print_parallel, &recs); + querydb(&gqw->work, dbname, dbname_len, db, in->sql.sum.data, in->types.sum, pa, id, print_parallel, &recs); } else { recs = 1; } @@ -265,7 +265,7 @@ int process_queries(PoolArgs_t *pa, /* if we have recs (or are running an OR) query the entries table */ if (recs > 0) { if (in->sql.ent.len) { - querydb(&gqw->work, dbname, dbname_len, db, in->sql.ent.data, pa, id, print_parallel, &recs); /* recs is not used */ + querydb(&gqw->work, dbname, dbname_len, db, in->sql.ent.data, in->types.ent, pa, id, print_parallel, &recs); /* recs is not used */ } } } diff --git a/src/gufi_query/processdir.c b/src/gufi_query/processdir.c index 36a88cea8..eb9531640 100644 --- a/src/gufi_query/processdir.c +++ b/src/gufi_query/processdir.c @@ -214,14 +214,14 @@ int processdir(QPTPool_t *ctx, const size_t id, void *data, void *args) { if (in->andor == AND) { /* make sure the treesummary table exists */ querydb(&gqw->work, dbname, dbname_len, db, "SELECT name FROM " ATTACH_NAME ".sqlite_master " - "WHERE (type == 'table') AND (name == '" TREESUMMARY "');", + "WHERE (type == 'table') AND (name == '" TREESUMMARY "');", NULL, pa, id, count_rows, &recs); if (recs < 1) { recs = -1; } else { /* run in->sql.tsum */ - querydb(&gqw->work, dbname, dbname_len, db, in->sql.tsum.data, pa, id, print_parallel, &recs); + querydb(&gqw->work, dbname, dbname_len, db, in->sql.tsum.data, in->types.tsum, pa, id, print_parallel, &recs); } } /* this is an OR or we got a record back. go on to summary/entries */ diff --git a/src/gufi_query/query.c b/src/gufi_query/query.c index c7970c97c..209cd9ab3 100644 --- a/src/gufi_query/query.c +++ b/src/gufi_query/query.c @@ -69,16 +69,18 @@ OF SUCH DAMAGE. /* wrapper wround sqlite3_exec to pass arguments and check for errors */ void querydb(struct work *work, const char *dbname, const size_t dbname_len, - sqlite3 *db, const char *query, + sqlite3 *db, const char *query, int *types, PoolArgs_t *pa, int id, int (*callback)(void *, int, char **, char**), int *rc) { ThreadArgs_t *ta = &pa->ta[id]; - PrintArgs_t args; - args.output_buffer = &ta->output_buffer; - args.delim = pa->in->delim; - args.mutex = pa->stdout_mutex; - args.outfile = ta->outfile; - args.rows = 0; + PrintArgs_t args = { + .output_buffer = &ta->output_buffer, + .delim = pa->in->delim, + .mutex = pa->stdout_mutex, + .outfile = ta->outfile, + .rows = 0, + .types = types, + }; char *err = NULL; #ifdef SQL_EXEC diff --git a/src/gufi_sqlite3.c b/src/gufi_sqlite3.c index 4578ab8aa..f23011016 100644 --- a/src/gufi_sqlite3.c +++ b/src/gufi_sqlite3.c @@ -81,7 +81,7 @@ static void sub_help(void) { int main(int argc, char *argv[]) { struct input in; - process_args_and_maybe_exit("hvd:", 0, "[db [SQL]...]", &in); + process_args_and_maybe_exit("hvd:u", 0, "[db [SQL]...]", &in); const int args_left = argc - idx; const char *dbname = (args_left == 0)?SQLITE_MEMORY:argv[idx++]; @@ -95,7 +95,6 @@ int main(int argc, char *argv[]) { } addqueryfuncs(db); - addhistfuncs(db); /* no buffering */ struct OutputBuffer ob; @@ -107,6 +106,7 @@ int main(int argc, char *argv[]) { .mutex = NULL, .outfile = stdout, .rows = 0, + .types = NULL, }; char *err = NULL; @@ -116,19 +116,47 @@ int main(int argc, char *argv[]) { char *line = NULL; size_t len = 0; while (getline(&line, &len, stdin) != -1) { + if (in.types.prefix) { + refstr_t sql = { + .data = line, + .len = strlen(line), + }; + int cols = 0; + pa.types = get_col_types(db, &sql, &cols); + } + if (sqlite3_exec(db, line, print_parallel, &pa, &err) != SQLITE_OK) { sqlite_print_err_and_free(err, stderr, "Error: SQL error: %s\n", err); + free(pa.types); + pa.types = NULL; break; } + + free(pa.types); + pa.types = NULL; } free(line); } else { for(int i = idx; i < argc; i++) { + if (in.types.prefix) { + refstr_t sql = { + .data = argv[i], + .len = strlen(argv[i]), + }; + int cols = 0; + pa.types = get_col_types(db, &sql, &cols); + } + if (sqlite3_exec(db, argv[i], print_parallel, &pa, &err) != SQLITE_OK) { sqlite_print_err_and_free(err, stderr, "Error: SQL error: %s\n", err); + free(pa.types); + pa.types = NULL; break; } + + free(pa.types); + pa.types = NULL; } } diff --git a/src/gufi_vt.c b/src/gufi_vt.c index 1612b6e89..a894d4970 100644 --- a/src/gufi_vt.c +++ b/src/gufi_vt.c @@ -125,11 +125,6 @@ typedef struct gufi_query_sql { typedef struct gufi_vtab { sqlite3_vtab base; gq_sql_t sql; /* not const to allow for T and S to be modified */ - - /* TODO: track multiple aggregate result files to not rerun queries unnecessarily */ - /* hash(sql) -> file name */ - char *dbname; /* keep track of aggregate db file name to delete upon unload; */ - /* this is always freed */ } gufi_vtab; typedef struct gufi_vtab_cursor { @@ -177,15 +172,16 @@ static int addvtfuncs(sqlite3 *db) { * everything to link dynamically */ static int gufi_query_aggregate_db(const char *indexroot, const char *threads, const gq_sql_t *sql, - FILE **output, char **errmsg) { - const char *argv[23] = { + FILE **output, char **errmsg) { + const char *argv[24] = { "gufi_query", + "-u", "-d", delim, }; #define set_argv(argc, argv, flag, value) if (value) { argv[argc++] = flag; argv[argc++] = value; } - int argc = 3; + int argc = 4; set_argv(argc, argv, "-n", threads); set_argv(argc, argv, "-I", sql->I); set_argv(argc, argv, "-T", sql->T); @@ -254,11 +250,11 @@ static int gufi_vtConnect(sqlite3 *db, } /* positional arguments to virtual table/table-valued function */ -#define GUFI_VT_ARGS_INDEXROOT 0 -#define GUFI_VT_ARGS_THREADS 1 -#define GUFI_VT_ARGS_T 2 -#define GUFI_VT_ARGS_S 3 -#define GUFI_VT_ARGS_COUNT 4 +#define GUFI_VT_ARGS_INDEXROOT 0 +#define GUFI_VT_ARGS_THREADS 1 +#define GUFI_VT_ARGS_T 2 +#define GUFI_VT_ARGS_S 3 +#define GUFI_VT_ARGS_COUNT 4 #define GUFI_VT_ARG_COLUMNS "indexroot TEXT HIDDEN, threads INT64 HIDDEN, " \ "T TEXT HIDDEN, S TEXT HIDDEN, " @@ -285,14 +281,14 @@ static int gufi_vtConnect(sqlite3 *db, #define SELECT_FROM(name, extra_sql) \ (const char *) ( \ "INSERT INTO " INTERMEDIATE " " \ - "SELECT " GUFI_VT_EXTRA_COLUMNS_SQL "*" \ + "SELECT " GUFI_VT_EXTRA_COLUMNS_SQL "* " \ "FROM " name ";" extra_sql \ ) #define SELECT_FROM_VR(name, extra_sql) \ (const char *) ( \ "INSERT INTO " INTERMEDIATE " " \ - "SELECT " GUFI_VT_EXTRA_COLUMNS_SQL_VR "*" \ + "SELECT " GUFI_VT_EXTRA_COLUMNS_SQL_VR "* " \ "FROM " name ";" extra_sql \ ) @@ -369,9 +365,6 @@ static int gufi_vtBestIndex(sqlite3_vtab *tab, } static int gufi_vtDisconnect(sqlite3_vtab *pVtab) { - gufi_vtab *vtab = (gufi_vtab *) pVtab; - remove(vtab->dbname); /* check for error? */ - sqlite3_free(vtab->dbname); sqlite3_free(pVtab); return SQLITE_OK; } @@ -413,8 +406,6 @@ static int gufi_vtFilter(sqlite3_vtab_cursor *cur, size_t nthreads = 0; if ((sscanf(threads, "%zu", &nthreads) != 1) || (nthreads == 0)) { vtab->base.zErrMsg = sqlite3_mprintf("Bad thread count: '%s'", threads); - sqlite3_free(vtab->dbname); - vtab->dbname = NULL; return SQLITE_CONSTRAINT; } } @@ -496,51 +487,48 @@ static int gufi_vtEof(sqlite3_vtab_cursor *cur) { return eof; } -static int find_col(const char *str, const size_t len, const char c, const size_t n, - const char **ptr, size_t *col_len) { +static int find_col(const char *str, const size_t len, const char sep, const size_t idx, + int *type, const char **col, size_t *col_len) { + (void) sep; + if (!str) { - *ptr = NULL; + *type = 0; + *col = NULL; *col_len = 0; return 1; } - size_t start = 0; - size_t end = 0; - - /* find first column */ - while ((end < len) && (str[end] != c)) { - end++; - } - - if (n == 0) { - *ptr = str + start; - *col_len = end - start; - return 0; - } - - /* columns > 0 */ - size_t col = 1; - while ((end < len) && (col <= n)) { - start = end + 1; - end = start; - - while ((end < len) && (str[end] != c)) { - end++; - } - - col++; + char *curr = (char *) str; + size_t i = 0; + while ((i <= idx) && ((size_t) (curr - str) < len)) { + /* type */ + *type = *curr; + curr++; + + /* length */ + /* have to use text instead of binary to distinguish 10 from '\x0a' */ + char buf[5] = {0}; + memcpy(buf, curr, 4); + sscanf(buf, "%zu", col_len); + curr += 4; + + /* value */ + *col = curr; + curr += *col_len; + + /* delimiter */ + curr++; + i++; } - if (col == (n + 1)) { - *ptr = str + start; - *col_len = end - start; - return 0; + if (i != (idx + 1)) { + *type = 0; + *col = NULL; + *col_len = 0; + return 1; } - *ptr = NULL; - *col_len = 0; - - return 1; + return 0; } static int gufi_vtColumn(sqlite3_vtab_cursor *cur, @@ -548,12 +536,48 @@ static int gufi_vtColumn(sqlite3_vtab_cursor *cur, int N) { gufi_vtab_cursor *pCur = (gufi_vtab_cursor *) cur; + int type = 0; const char *col = NULL; size_t len = 0; - find_col(pCur->row, strlen(pCur->row), delim[0], N - GUFI_VT_ARGS_COUNT, &col, &len); - if (col && len) { - sqlite3_result_text(ctx, col, len, SQLITE_TRANSIENT); + if (find_col(pCur->row, pCur->len, delim[0], N - GUFI_VT_ARGS_COUNT, &type, &col, &len) == 0) { + switch(type) { + case SQLITE_INTEGER: + { + int value = 0; + if (sscanf(col, "%d", &value) != 1) { + const int err = errno; + gufi_vtab *vtab = (gufi_vtab *) &pCur->base.pVtab; + vtab->base.zErrMsg = sqlite3_mprintf("Could not parse '%.*s' as a double: %s (%d)\n", + len, col, strerror(err), err); + return SQLITE_ERROR; + } + sqlite3_result_int(ctx, value); + break; + } + case SQLITE_FLOAT: + { + double value = 0; + if (sscanf(col, "%lf", &value) != 1) { + const int err = errno; + gufi_vtab *vtab = (gufi_vtab *) &pCur->base.pVtab; + vtab->base.zErrMsg = sqlite3_mprintf("Could not parse '%.*s' as a double: %s (%d)\n", + len, col, strerror(err), err); + return SQLITE_ERROR; + } + sqlite3_result_double(ctx, value); + break; + } + case SQLITE_TEXT: + case SQLITE_BLOB: + sqlite3_result_text(ctx, col, len, SQLITE_TRANSIENT); + break; + case SQLITE_NULL: + default: + sqlite3_result_text(ctx, col, len, SQLITE_TRANSIENT); + /* sqlite3_result_null(ctx); */ + break; + } } else { sqlite3_result_null(ctx); diff --git a/src/print.c b/src/print.c index 68e2fde8a..7230ae26f 100644 --- a/src/print.c +++ b/src/print.c @@ -62,6 +62,7 @@ OF SUCH DAMAGE. +#include #include #include @@ -72,6 +73,7 @@ int print_parallel(void *args, int count, char **data, char **columns) { PrintArgs_t *print = (PrintArgs_t *) args; struct OutputBuffer *ob = print->output_buffer; + const int *types = print->types; size_t *lens = malloc(count * sizeof(size_t)); size_t row_len = count - 1 + 1; /* one delimiter per column except last column + newline */ @@ -83,6 +85,10 @@ int print_parallel(void *args, int count, char **data, char **columns) { } } + if (types) { + row_len += count * 5; /* one byte type + 4 byte human readable length per column */ + } + /* if a row cannot fit the buffer for whatever reason, flush the existing buffer */ if ((ob->capacity - ob->filled) < row_len) { if (print->mutex) { @@ -102,12 +108,28 @@ int print_parallel(void *args, int count, char **data, char **columns) { } const int last = count - 1; for(int i = 0; i < last; i++) { + if (types) { + const char col_type = types[i]; + fwrite(&col_type, sizeof(char), sizeof(col_type), print->outfile); + + char buf[5]; + const size_t len = snprintf(buf, sizeof(buf), "%04zu", lens[i]); + fwrite(buf, sizeof(char), len, print->outfile); + } if (data[i]) { fwrite(data[i], sizeof(char), lens[i], print->outfile); } fwrite(&print->delim, sizeof(char), 1, print->outfile); } /* print last column with no follow up delimiter */ + if (types) { + const char col_type = types[last]; + fwrite(&col_type, sizeof(char), sizeof(col_type), print->outfile); + + char buf[5]; + const size_t len = snprintf(buf, sizeof(buf), "%04zu", lens[last]); + fwrite(buf, sizeof(char), len, print->outfile); + } fwrite(data[last], sizeof(char), lens[last], print->outfile); fwrite("\n", sizeof(char), 1, print->outfile); ob->count++; @@ -122,6 +144,13 @@ int print_parallel(void *args, int count, char **data, char **columns) { char *buf = ob->buf; size_t filled = ob->filled; for(int i = 0; i < count; i++) { + if (types) { + buf[filled] = types[i]; + filled++; + + const ssize_t len = snprintf(&buf[filled], ob->capacity - filled, "%04zu", lens[i]); + filled += len; + } if (data[i]) { memcpy(&buf[filled], data[i], lens[i]); filled += lens[i]; diff --git a/test/regression/gufi_query.expected b/test/regression/gufi_query.expected index b638e2e2c..828700554 100644 --- a/test/regression/gufi_query.expected +++ b/test/regression/gufi_query.expected @@ -14,6 +14,7 @@ options: -o output file (one-per-thread, with thread-id suffix) -d delimiter (one char) [use 'x' for 0x1E] -O output DB + -u prefix output with 1 byte type and 4 byte human readable length -I SQL init -F SQL cleanup -y minimum level to go down diff --git a/test/regression/gufi_sqlite3.expected b/test/regression/gufi_sqlite3.expected index bc2202536..cc7d96937 100644 --- a/test/regression/gufi_sqlite3.expected +++ b/test/regression/gufi_sqlite3.expected @@ -5,6 +5,7 @@ options: -h help -v version -d delimiter (one char) [use 'x' for 0x1E] + -u prefix output with 1 byte type and 4 byte human readable length db db file path SQL SQL statements to run diff --git a/test/regression/gufi_vt.expected b/test/regression/gufi_vt.expected index 8092f3ca5..d2483796c 100644 --- a/test/regression/gufi_vt.expected +++ b/test/regression/gufi_vt.expected @@ -4,13 +4,13 @@ $ gufi_treesummary_all "prefix" # Query treesummary $ ( echo ".load gufi_vt" - echo "SELECT minsize, maxsize, minmtime, maxmtime FROM gufi_vt_treesummary('prefix', 2) ORDER BY minsize ASC, maxsize ASC;" + echo "SELECT minsize, maxsize, minmtime, maxmtime FROM gufi_vt_treesummary('prefix', 2) ORDER BY CAST(minsize AS INT64) ASC, CAST(maxsize AS INT64) ASC;" ) | sqlite3 0|1048576|0|1048576 1|5|1|5 +5|5|4|5 11|12|11|12 15|15|15|15 -5|5|4|5 9223372036854775807|-9223372036854775808|9223372036854775807|-9223372036854775808 # Query summary diff --git a/test/regression/gufi_vt.sh.in b/test/regression/gufi_vt.sh.in index a3ffcd061..389940526 100755 --- a/test/regression/gufi_vt.sh.in +++ b/test/regression/gufi_vt.sh.in @@ -86,7 +86,7 @@ echo "# Generate treesummary tables in all directories" run_no_sort "${GUFI_TREESUMMARY_ALL} \"${INDEXROOT}\"" | sed '/^Started .*$/d' echo "# Query treesummary" -query_vt "SELECT minsize, maxsize, minmtime, maxmtime FROM gufi_vt_treesummary('${INDEXROOT}', ${THREADS}) ORDER BY minsize ASC, maxsize ASC;" +query_vt "SELECT minsize, maxsize, minmtime, maxmtime FROM gufi_vt_treesummary('${INDEXROOT}', ${THREADS}) ORDER BY CAST(minsize AS INT64) ASC, CAST(maxsize AS INT64) ASC;" for name in summary entries pentries vrsummary vrpentries do diff --git a/test/unit/googletest/PoolArgs.cpp.in b/test/unit/googletest/PoolArgs.cpp.in index 6c60e5ce6..557649606 100644 --- a/test/unit/googletest/PoolArgs.cpp.in +++ b/test/unit/googletest/PoolArgs.cpp.in @@ -124,6 +124,7 @@ void test_common(PoolArgs *pa) { print.mutex = nullptr; print.outfile = file; print.rows = 0; + print.types = nullptr; // read from the database being processed // no need for WHERE - there should only be 1 table diff --git a/test/unit/googletest/print.cpp b/test/unit/googletest/print.cpp index 2188848f4..9a021a5fa 100644 --- a/test/unit/googletest/print.cpp +++ b/test/unit/googletest/print.cpp @@ -103,6 +103,7 @@ static void print_parallel_mutex(pthread_mutex_t *mutex) { pa.mutex = mutex; pa.outfile = file; pa.rows = 0; + pa.types = nullptr; // A\n is buffered in OutputBuffer and takes up all available space {