Skip to content

Commit

Permalink
Add RecordArray (and Record) to Numba. (#26)
Browse files Browse the repository at this point in the history
RecordArray and Record objects can be used in Numba and `FillableArray.beginrecord`, `field`, `endrecord` has been extended to Numba.

* Start adding RecordArray (and Record) to Numba.

* Stub files for RecordArray in Numba.

* Access a RecordArray's 'lookup' dict in Python.

* Adding accessors to C++ that support Numba.

* Record's first attribute should be named 'array', not 'recordarray'.

* Record* made a round-trip to Numba.

* Compute 'length' once in iteration.

* Stubs for StringLiteral in slices.

* RecordArray.getitem_range.

* Finished 'getitem_str' for all array types.

* Record.getitem_str.

* Bring signatures up to date.

* Finished Record getitems.

* [skip ci] Started implementing getitem_tuple; broke everything.

* Fixed it: RecordArray.getitem_next(anything but field) should work.

* Finished RecordArray.getitem_next(*), need to do *.getitem_next(string).

* Finished *.getitem_next(str). Moving on to FillableArray.

* FillableArray.begintuple/index/endtuple works.

* FillableArray.beginrecord/field/endrecord works. Probably done with this PR.
  • Loading branch information
jpivarski authored Dec 3, 2019
1 parent 293184c commit 8581bdf
Show file tree
Hide file tree
Showing 32 changed files with 1,672 additions and 236 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ Completed items are ☑check-marked. See [closed PRs](https://github.com/scikit-
* [X] Test all (tested in mock [studies/fillable.py](tree/master/studies/fillable.py)).
* [X] JSON → Awkward via header-only [RapidJSON](https://rapidjson.org) and `awkward.fromiter`.
* [ ] Explicit broadcasting functions for jagged and non-jagged arrays and scalars.
* [ ] Structure-preserving ufunc-like operation on the C++ side that applies a lambda function to inner data. The Python `__array_ufunc__` implementation will _call_ this to preserve structure.
* [ ] ~~Structure-preserving ufunc-like operation on the C++ side that applies a lambda function to inner data. The Python `__array_ufunc__` implementation will _call_ this to preserve structure.~~
* [ ] Extend `__getitem__` to take jagged arrays of integers and booleans (same behavior as old).
* [ ] Full suite of array types:
* [X] `EmptyArray`: 1-dimensional array with length 0 and unknown type (result of `UnknownFillable`, compatible with all types of arrays).
Expand All @@ -75,7 +75,7 @@ Completed items are ☑check-marked. See [closed PRs](https://github.com/scikit-
* [X] `ListOffsetArray`: the `JaggedArray` case with no unreachable data between reachable data (gaps).
* [X] `RegularArray`: for building rectilinear, N-dimensional arrays of arbitrary contents, e.g. putting jagged dimensions inside fixed dimensions.
* [X] `RecordArray`: the new `Table` _without_ lazy-slicing.
* [ ] Implement it in Numba as well.
* [X] Implement it in Numba as well.
* [ ] `MaskedArray`, `BitMaskedArray`, `IndexedMaskedArray`: same as the old versions.
* [ ] `UnionArray`: same as the old version; `SparseUnionArray`: the additional case found in Apache Arrow.
* [ ] `IndexedArray`: same as the old version.
Expand Down
2 changes: 1 addition & 1 deletion VERSION_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.25
0.1.26
1 change: 1 addition & 0 deletions awkward1/_numba/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@
import awkward1._numba.array.listoffsetarray
import awkward1._numba.array.emptyarray
import awkward1._numba.array.regulararray
import awkward1._numba.array.recordarray
3 changes: 3 additions & 0 deletions awkward1/_numba/array/emptyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ def getitem_int(self):
def getitem_range(self):
return self

def getitem_str(self):
raise IndexError("cannot slice EmptyArray with str (Record field name)")

def getitem_tuple(self, wheretpe):
if len(wheretpe.types) == 0:
return self
Expand Down
33 changes: 33 additions & 0 deletions awkward1/_numba/array/listarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def getitem_int(self):
def getitem_range(self):
return self

def getitem_str(self, key):
return ListArrayType(self.startstpe, self.stopstpe, self.contenttpe.getitem_str(key), self.idtpe)

def getitem_tuple(self, wheretpe):
nexttpe = ListArrayType(util.index64tpe, util.index64tpe, self, numba.none)
outtpe = nexttpe.getitem_next(wheretpe, False)
Expand All @@ -65,6 +68,9 @@ def getitem_next(self, wheretpe, isadvanced):
contenttpe = self.contenttpe.carry().getitem_next(tailtpe, isadvanced)
return awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(self.indexname), contenttpe, self.idtpe)

elif isinstance(headtpe, numba.types.StringLiteral):
return self.getitem_str(headtpe.literal_value).getitem_next(tailtpe, isadvanced)

elif isinstance(headtpe, numba.types.EllipsisType):
raise NotImplementedError("ellipsis")

Expand Down Expand Up @@ -102,6 +108,10 @@ def lower_getitem_int(self):
def lower_getitem_range(self):
return lower_getitem_range

@property
def lower_getitem_str(self):
return lower_getitem_str

@property
def lower_getitem_next(self):
return lower_getitem_next
Expand Down Expand Up @@ -224,6 +234,24 @@ def lower_getitem_range(context, builder, sig, args):
context.nrt.incref(builder, rettpe, out)
return out

@numba.extending.lower_builtin(operator.getitem, ListArrayType, numba.types.StringLiteral)
def lower_getitem_str(context, builder, sig, args):
rettpe, (tpe, wheretpe) = sig.return_type, sig.args
val, whereval = args

proxyin = numba.cgutils.create_struct_proxy(tpe)(context, builder, value=val)
proxyout = numba.cgutils.create_struct_proxy(rettpe)(context, builder)
proxyout.starts = proxyin.starts
proxyout.stops = proxyin.stops
proxyout.content = tpe.contenttpe.lower_getitem_str(context, builder, rettpe.contenttpe(tpe.contenttpe, wheretpe), (proxyin.content, whereval))
if tpe.idtpe != numba.none:
proxyout.id = proxyin.id

out = proxyout._getvalue()
if context.enable_nrt:
context.nrt.incref(builder, rettpe, out)
return out

@numba.extending.lower_builtin(operator.getitem, ListArrayType, numba.types.BaseTuple)
def lower_getitem_tuple(context, builder, sig, args):
return content.lower_getitem_tuple(context, builder, sig, args)
Expand Down Expand Up @@ -364,6 +392,11 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
proxyout.id = proxyin.id
return proxyout._getvalue()

elif isinstance(headtpe, numba.types.StringLiteral):
nexttpe = arraytpe.getitem_str(headtpe.literal_value)
nextval = lower_getitem_str(context, builder, nexttpe(arraytpe, headtpe), (arrayval, headval))
return lower_getitem_next(context, builder, nexttpe, tailtpe, nextval, tailval, advanced)

elif isinstance(headtpe, numba.types.EllipsisType):
raise NotImplementedError("ListArray.getitem_next(ellipsis)")

Expand Down
32 changes: 32 additions & 0 deletions awkward1/_numba/array/listoffsetarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ def getitem_int(self):
def getitem_range(self):
return self

def getitem_str(self, key):
return ListOffsetArrayType(self.offsetstpe, self.contenttpe.getitem_str(key), self.idtpe)

def getitem_tuple(self, wheretpe):
import awkward1._numba.array.listarray
nexttpe = awkward1._numba.array.listarray.ListArrayType(util.index64tpe, util.index64tpe, self, numba.none)
Expand All @@ -60,6 +63,9 @@ def getitem_next(self, wheretpe, isadvanced):
contenttpe = self.contenttpe.carry().getitem_next(tailtpe, isadvanced)
return ListOffsetArrayType(util.indextpe(self.indexname), contenttpe, self.idtpe)

elif isinstance(headtpe, numba.types.StringLiteral):
return self.getitem_str(headtpe.literal_value).getitem_next(tailtpe, isadvanced)

elif isinstance(headtpe, numba.types.EllipsisType):
raise NotImplementedError("ellipsis")

Expand Down Expand Up @@ -98,6 +104,10 @@ def lower_getitem_int(self):
def lower_getitem_range(self):
return lower_getitem_range

@property
def lower_getitem_str(self):
return lower_getitem_str

@property
def lower_getitem_next(self):
return lower_getitem_next
Expand Down Expand Up @@ -222,6 +232,23 @@ def lower_getitem_range(context, builder, sig, args):
context.nrt.incref(builder, rettpe, out)
return out

@numba.extending.lower_builtin(operator.getitem, ListOffsetArrayType, numba.types.StringLiteral)
def lower_getitem_str(context, builder, sig, args):
rettpe, (tpe, wheretpe) = sig.return_type, sig.args
val, whereval = args

proxyin = numba.cgutils.create_struct_proxy(tpe)(context, builder, value=val)
proxyout = numba.cgutils.create_struct_proxy(rettpe)(context, builder)
proxyout.offsets = proxyin.offsets
proxyout.content = tpe.contenttpe.lower_getitem_str(context, builder, rettpe.contenttpe(tpe.contenttpe, wheretpe), (proxyin.content, whereval))
if tpe.idtpe != numba.none:
proxyout.id = proxyin.id

out = proxyout._getvalue()
if context.enable_nrt:
context.nrt.incref(builder, rettpe, out)
return out

@numba.extending.lower_builtin(operator.getitem, ListOffsetArrayType, numba.types.BaseTuple)
def lower_getitem_tuple(context, builder, sig, args):
return content.lower_getitem_tuple(context, builder, sig, args)
Expand Down Expand Up @@ -377,6 +404,11 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
proxyout.id = proxyin.id
return proxyout._getvalue()

elif isinstance(headtpe, numba.types.StringLiteral):
nexttpe = arraytpe.getitem_str(headtpe.literal_value)
nextval = lower_getitem_str(context, builder, nexttpe(arraytpe, headtpe), (arrayval, headval))
return lower_getitem_next(context, builder, nexttpe, tailtpe, nextval, tailval, advanced)

elif isinstance(headtpe, numba.types.EllipsisType):
raise NotImplementedError("ListOffsetArray.getitem_next(ellipsis)")

Expand Down
6 changes: 6 additions & 0 deletions awkward1/_numba/array/numpyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ def getitem_int(self):
def getitem_range(self):
return self.getitem_tuple(numba.types.slice2_type)

def getitem_str(self):
raise IndexError("cannot slice NumpyArray with str (Record field name)")

def getitem_tuple(self, wheretpe):
outtpe = numba.typing.arraydecl.get_array_index_type(self.arraytpe, wheretpe).result
if isinstance(outtpe, numba.types.Array):
Expand All @@ -40,6 +43,9 @@ def getitem_tuple(self, wheretpe):
def getitem_next(self, wheretpe, isadvanced):
if len(wheretpe.types) > self.arraytpe.ndim:
raise IndexError("too many dimensions in slice")
if any(isinstance(x, numba.types.StringLiteral) for x in wheretpe):
raise IndexError("cannot slice NumpyArray with str (Record field name)")

if isadvanced:
numreduce = sum(1 if isinstance(x, (numba.types.Integer, numba.types.Array)) else 0 for x in wheretpe.types)
else:
Expand Down
Loading

0 comments on commit 8581bdf

Please sign in to comment.