Skip to content

Commit

Permalink
SQLite3 virtual tables/table-valued functions
Browse files Browse the repository at this point in the history
This exposes an entire GUFI tree in a single view instead of as many
multiple database files through the SQLite virtual table interface.
Users can query gufi_vt_* virtual tables as though they have access
to that table across the entire GUFI tree in one database i.e.:

    SELECT name, size FROM gufi_vt_pentries('index root') WHERE size > 1024;

This is done by calling gufi_query through fork+exec and aggregating
all of the results into a single database file and returning those
rows the user. The aggregate database file should be automatically
cleaned up after running.

The behavior of gufi_query may be changed with positional
arguments. The first positional argument points to the starting
directory, and is required. The remaining arguments are listed in the
expected order and are optional: thread count, aggregate file root
directory, -I, -T, -S, -E, -K, -J, -G, -F. To skip an argument to the
left of an argument not being skipped, pass in None. After the final
unskipped argument, the remaining arguments maybe skipped by not
passing in any value.
  • Loading branch information
calccrypto committed Jan 13, 2025
1 parent e057d94 commit b8fa841
Show file tree
Hide file tree
Showing 10 changed files with 1,707 additions and 577 deletions.
98 changes: 98 additions & 0 deletions include/addqueryfuncs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
This file is part of GUFI, which is part of MarFS, which is released
under the BSD license.
Copyright (c) 2017, Los Alamos National Security (LANS), LLC
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
From Los Alamos National Security, LLC:
LA-CC-15-039
Copyright (c) 2017, Los Alamos National Security, LLC All rights reserved.
Copyright 2017. Los Alamos National Security, LLC. This software was produced
under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
Laboratory (LANL), which is operated by Los Alamos National Security, LLC for
the U.S. Department of Energy. The U.S. Government has rights to use,
reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is
modified to produce derivative works, such modified software should be
clearly marked, so as not to confuse it with the version available from
LANL.
THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
OF SUCH DAMAGE.
*/



#ifndef ADDQUERYFUNCS_H
#define ADDQUERYFUNCS_H

#include <sqlite3.h>

#include "bf.h"

#ifdef __cplusplus
extern "C" {
#endif

/* list of functions to add to a SQLite3 db handle that do not have user data/context */

extern void uidtouser(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void gidtogroup(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void modetotxt(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void sqlite3_strftime(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void blocksize(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void human_readable_size(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void sqlite_basename(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void stdev_step(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void stdevs_final(sqlite3_context *context);
extern void stdevp_final(sqlite3_context *context);
extern void median_step(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void median_final(sqlite3_context *context);

int addqueryfuncs(sqlite3 *db);
int addqueryfuncs_with_context(sqlite3 *db, struct work *work);

#ifdef __cplusplus
}
#endif

#endif
24 changes: 17 additions & 7 deletions include/dbutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ OF SUCH DAMAGE.
#include <sqlite3.h>

#include "SinglyLinkedList.h"
#include "addqueryfuncs.h"
#include "template_db.h"
#include "utils.h"
#include "xattrs.h"
Expand All @@ -91,18 +92,24 @@ extern const char *SQLITE_MEMORY;
#define DROP_TABLE(name) "DROP TABLE IF EXISTS " #name ";"
#define DROP_VIEW(name) "DROP VIEW IF EXISTS " #name ";"

#define READDIRPLUS "readdirplus"
#define READDIRPLUS "readdirplus"
#define READDIRPLUS_SCHEMA(name) \
"CREATE TABLE " name "(path TEXT, type TEXT, inode TEXT PRIMARY KEY, pinode TEXT, suspect INT64);"
extern const char READDIRPLUS_CREATE[];
extern const char READDIRPLUS_INSERT[];

/* contains all file and link metadata for the current directory */
/* prefer pentries over entries */
#define ENTRIES "entries"
#define ENTRIES_SCHEMA(name, extra_cols) \
"CREATE TABLE " name "(name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, crtime INT64, ossint1 INT64, ossint2 INT64, ossint3 INT64, ossint4 INT64, osstext1 TEXT, osstext2 TEXT" extra_cols ");"
extern const char ENTRIES_CREATE[];
extern const char ENTRIES_INSERT[];

/* directory metadata + aggregate data */
#define SUMMARY "summary"
#define SUMMARY_SCHEMA(name, extra_cols) \
"CREATE TABLE " name "(name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, rectype INT64, pinode TEXT, isroot INT64, rollupscore INT64" extra_cols ");"
extern const char SUMMARY_CREATE[];

/* view of summary table with rollups */
Expand All @@ -111,22 +118,28 @@ extern const char VRSUMMARY_CREATE[];

/* pentries pulled from children */
#define PENTRIES_ROLLUP "pentries_rollup"
#define PENTRIES_ROLLUP_SCHEMA(name) \
"CREATE TABLE " name "(name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, crtime INT64, ossint1 INT64, ossint2 INT64, ossint3 INT64, ossint4 INT64, osstext1 TEXT, osstext2 TEXT, pinode TEXT, ppinode TEXT);"
extern const char PENTRIES_ROLLUP_CREATE[];
extern const char PENTRIES_ROLLUP_INSERT[];

/* (entries + summary.inode) UNION pentries_rollup */
#define PENTRIES "pentries"
extern const char PENTRIES_CREATE[];

/* vrentries is not created because rolled up entries tables are not correct */

/* view of pentries view with rollups */
#define VRPENTRIES "vrpentries"
extern const char VRPENTRIES_CREATE[];

/* aggregate data of tree starting at current directory */
#define TREESUMMARY "treesummary"
#define TREESUMMARY_CREATE \
DROP_TABLE(TREESUMMARY) \
"CREATE TABLE " TREESUMMARY "(inode TEXT, pinode TEXT, totsubdirs INT64, maxsubdirfiles INT64, maxsubdirlinks INT64, maxsubdirsize INT64, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, totextdbs INT64, rectype INT64, uid INT64, gid INT64);"
#define TREESUMMARY_SCHEMA(name, extra_cols) \
"CREATE TABLE " name "(inode TEXT, pinode TEXT, totsubdirs INT64, maxsubdirfiles INT64, maxsubdirlinks INT64, maxsubdirsize INT64, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, totextdbs INT64, rectype INT64, uid INT64, gid INT64" extra_cols ");"
#define TREESUMMARY_CREATE \
DROP_TABLE(TREESUMMARY) \
TREESUMMARY_SCHEMA(TREESUMMARY, "")

extern const char TREESUMMARY_EXISTS[];

Expand Down Expand Up @@ -202,9 +215,6 @@ int insertsumdb(sqlite3 *sdb, const char *path, struct work *pwork, struct entry

int inserttreesumdb(const char *name, sqlite3 *sdb, struct sum *su, int rectype, int uid, int gid);

int addqueryfuncs(sqlite3 *db);
int addqueryfuncs_with_context(sqlite3 *db, struct work *work);

/* xattr db list item */
struct xattr_db {
long long int pinode;
Expand Down
24 changes: 24 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ set(GUFI_SOURCES
OutputBuffers.c
QueuePerThreadPool.c
SinglyLinkedList.c
addqueryfuncs.c
bf.c
compress.c
dbutils.c
Expand Down Expand Up @@ -212,6 +213,29 @@ build_and_install_one(${BIN} TRUE gufi_query
$<TARGET_OBJECTS:gufi_query_lib>
)

# build custom SQLite virtual tables
add_library(gufi_vt MODULE
gufi_vt.c

# have to recompile with -fPIC
SinglyLinkedList.c
addqueryfuncs.c
histogram.c
trie.c
utils.c
)
set_target_properties(gufi_vt PROPERTIES PREFIX "")
if(APPLE)
set(EXT "dylib")
elseif (CYGWIN)
set(EXT "dll.a")
else ()
set(EXT "so")
endif()
target_link_libraries(gufi_vt "${DEP_INSTALL_PREFIX}/sqlite3/lib/libsqlite3.${EXT}")
add_dependencies(gufi_vt install_dependencies)
install(TARGETS gufi_vt DESTINATION ${LIB} COMPONENT Server)

# build binaries that do not need to link with GUFI
set(MISC_SOURCES tsmtime2epoch.c tsmepoch2time.c)
build_and_install(${BIN} FALSE ${MISC_SOURCES})
Expand Down
Loading

0 comments on commit b8fa841

Please sign in to comment.