Skip to content

Commit

Permalink
add where and several QueryMaker; v0.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
yymao committed Oct 26, 2021
1 parent 1d2176a commit fbd7a6d
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 18 deletions.
67 changes: 61 additions & 6 deletions easyquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
NumPy structured arrays, astropy Table, and Pandas DataFrame.
Project website: https://github.com/yymao/easyquery
The MIT License (MIT)
Copyright (c) 2017-2020 Yao-Yuan Mao (yymao)
Copyright (c) 2017-2021 Yao-Yuan Mao (yymao)
http://opensource.org/licenses/MIT
"""

Expand All @@ -20,7 +20,7 @@


__all__ = ['Query', 'QueryMaker']
__version__ = '0.1.6'
__version__ = '0.2.0'


def _is_string_like(obj):
Expand All @@ -42,7 +42,8 @@ class Query(object):
All of them operate on NumPy structured array and astropy Table:
- `filter` returns a new table that only has entries satisfying the query;
- `count` returns the number of entries satisfying the query;
- `mask` returns a bool array for masking the table.
- `mask` returns a bool array for masking the table;
- `where` returns a int array for the indices that select satisfying entries.
For most simple cases a Query object can be created with a numexpr string.
A Query object can also be created with a tuple, where the first element of
Expand All @@ -69,6 +70,8 @@ class Query(object):
1
>>> q.mask(t)
array([False, False, False, True], dtype=bool)
>>> q.where(t)
array([3], dtype=int64)
>>> q2 = (~q & Query('b > c'))
>>> q2.count(t)
Expand Down Expand Up @@ -216,7 +219,7 @@ def mask(self, table):
"""
if self._operator is None:
if self._operands is None:
return np.ones(self._get_table_len(table), dtype=np.bool)
return np.ones(self._get_table_len(table), dtype=bool)
else:
return self._create_mask(table, self._operands)

Expand All @@ -243,7 +246,7 @@ def filter(self, table, column_slice=None):
If `column_slice` is provided, also select on columns.
Equivalent to table[Query(...).mask(table)][column_slice]
but with more efficient implementaion.
but with more efficient implementation.
Parameters
----------
Expand Down Expand Up @@ -289,6 +292,24 @@ def count(self, table):

return np.count_nonzero(self.mask(table))

def where(self, table):
"""
Return the indices of the rows in `table` that satisfy input queries.
Equivalent to calling `np.flatnonzero(Query(...).mask(table)`.
Parameters
----------
table : NumPy structured array, astropy Table, etc.
Returns
-------
indices : numpy int array
"""
if self._operator is None and self._operands is None:
return np.arange(self._get_table_len(table))

return np.flatnonzero(self.mask(table))

def copy(self):
"""
Create a copy of the current Query object.
Expand Down Expand Up @@ -405,6 +426,24 @@ def mask(table, *queries):
return _query_class(*queries).mask(table)


def where(table, *queries):
"""
A convenient function to get the indices of the rows in `table` that
satisfy input `queries`.
Equivalent to `Query(*queries).where(table)`
Parameters
----------
table : NumPy structured array, astropy Table, etc.
queries : string, tuple, callable
Returns
-------
indices : numpy int array
"""
return _query_class(*queries).where(table)


class QueryMaker():
"""
provides convenience functions to generate query objects
Expand All @@ -419,7 +458,7 @@ def isin(col_name, test_elements, assume_unique=False, invert=False):

@staticmethod
def vectorize(row_function, *col_names):
return _query_class((lambda *args: np.fromiter(map(row_function, *args), np.bool),) + tuple(col_names))
return _query_class((lambda *args: np.fromiter(map(row_function, *args), bool),) + tuple(col_names))

@staticmethod
def contains(col_name, test_value):
Expand Down Expand Up @@ -456,3 +495,19 @@ def startswith(col_name, prefix, start=0, end=None):
@staticmethod
def endswith(col_name, suffix, start=0, end=None):
return _query_class((functools.partial(np.char.endswith, suffix=suffix, start=start, end=end), col_name))

@staticmethod
def isfinite(col_name):
return QueryMaker.vectorize(np.isfinite, col_name)

@staticmethod
def isnan(col_name):
return QueryMaker.vectorize(np.isnan, col_name)

@staticmethod
def isnotnan(col_name):
return ~QueryMaker.isnan(col_name)

@staticmethod
def isclose(col1_name, col2_name):
return QueryMaker.vectorize(np.isclose, col1_name, col2_name)
40 changes: 28 additions & 12 deletions test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@ def test_valid_init():
"""
test valid Query object creation
"""
q1 = Query()
q2 = Query(None)
q1 = Query() # noqa: F841
q2 = Query(None) # noqa: F841
q3 = Query('x > 2')
q4 = Query(lambda t: t['x'] > 2)
q5 = Query((lambda c: c > 2, 'x'))
q6 = Query('x > 2', lambda t: t['x'] > 2, (lambda c: c > 2, 'x'))
q7 = Query(q3)
q8 = Query(q3, 'x > 2')
q4 = Query(lambda t: t['x'] > 2) # noqa: F841
q5 = Query((lambda c: c > 2, 'x')) # noqa: F841
q6 = Query('x > 2', lambda t: t['x'] > 2, (lambda c: c > 2, 'x')) # noqa: F841
q7 = Query(q3) # noqa: F841
q8 = Query(q3, 'x > 2') # noqa: F841


def check_invalid_init(*queries):
try:
q = Query(*queries)
q = Query(*queries) # noqa: F841
except ValueError:
pass
else:
Expand All @@ -34,29 +34,39 @@ def test_invalid_init():


def gen_test_table():
return np.array([(1, 5, 4.5, "abcd"), (1, 1, 6.2, "pqrs"), (3, 2, 0.5, "asdf"), (5, 5, -3.5, "wxyz")],
dtype=np.dtype([('a', '<i8'), ('b', '<i8'), ('c', '<f8'), ('s', '<U4')]))
return np.array(
[
(1, 5, 4.5, "abcd"),
(1, 1, 6.2, "pqrs"),
(3, 2, 0.5, "asdf"),
(5, 5, -3.5, "wxyz"),
(-2, -5, np.inf, "fwmt"),
],
dtype=np.dtype([('a', '<i8'), ('b', '<i8'), ('c', '<f8'), ('s', '<U4')]),
)


def check_query_on_table(table, query_object, true_mask=None):
if true_mask is None:
true_mask = np.ones(len(table), np.bool)
true_mask = np.ones(len(table), bool)

assert (query_object.filter(table) == table[true_mask]).all(), 'filter not correct'
assert query_object.count(table) == np.count_nonzero(true_mask), 'count not correct'
assert (query_object.mask(table) == true_mask).all(), 'mask not correct'
assert (query_object.where(table) == np.flatnonzero(true_mask)).all(), 'where not correct'


def check_query_on_dict_table(table, query_object, true_mask=None):
if true_mask is None:
true_mask = np.ones(len(next(table.values())), np.bool)
true_mask = np.ones(len(next(table.values())), bool)

ftable = query_object.filter(table)
ftable_true = {k: table[k][true_mask] for k in table}
assert set(ftable) == set(ftable_true), 'filter not correct'
assert all((ftable[k] == ftable_true[k]).all() for k in ftable), 'filter not correct'
assert query_object.count(table) == np.count_nonzero(true_mask), 'count not correct'
assert (query_object.mask(table) == true_mask).all(), 'mask not correct'
assert (query_object.where(table) == np.flatnonzero(true_mask)).all(), 'where not correct'


def test_simple_query():
Expand Down Expand Up @@ -170,8 +180,14 @@ def test_query_maker():
check_query_on_table(t, QueryMaker.contains("s", "a"), np.char.find(t["s"], "a") > -1)
check_query_on_table(t, QueryMaker.find("s", "a"), np.char.find(t["s"], "a") > -1)

check_query_on_table(t, QueryMaker.isfinite("c"), np.isfinite(t["c"]))
check_query_on_table(t, QueryMaker.isnan("c"), np.isnan(t["c"]))
check_query_on_table(t, QueryMaker.isnotnan("c"), ~np.isnan(t["c"]))
check_query_on_table(t, QueryMaker.isclose("a", "b"), np.isclose(t["a"], t["b"]))

assert QueryMaker.equal_columns("s", "s").mask(t).all()


if __name__ == '__main__':
test_valid_init()
test_invalid_init()
Expand Down

0 comments on commit fbd7a6d

Please sign in to comment.