Add RecordArray (and Record) to Numba. (#26)

RecordArray and Record objects can be used in Numba and `FillableArray.beginrecord`, `field`, `endrecord` has been extended to Numba. * Start adding RecordArray (and Record) to Numba. * Stub files for RecordArray in Numba. * Access a RecordArray's 'lookup' dict in Python. * Adding accessors to C++ that support Numba. * Record's first attribute should be named 'array', not 'recordarray'. * Record* made a round-trip to Numba. * Compute 'length' once in iteration. * Stubs for StringLiteral in slices. * RecordArray.getitem_range. * Finished 'getitem_str' for all array types. * Record.getitem_str. * Bring signatures up to date. * Finished Record getitems. * [skip ci] Started implementing getitem_tuple; broke everything. * Fixed it: RecordArray.getitem_next(anything but field) should work. * Finished RecordArray.getitem_next(*), need to do *.getitem_next(string). * Finished *.getitem_next(str). Moving on to FillableArray. * FillableArray.begintuple/index/endtuple works. * FillableArray.beginrecord/field/endrecord works. Probably done with this PR.
scikit-hep · Dec 3, 2019 · 8581bdf · 8581bdf
1 parent 293184c
commit 8581bdf
Show file tree

Hide file tree

Showing 32 changed files with 1,672 additions and 236 deletions.
diff --git a/README.md b/README.md
@@ -65,7 +65,7 @@ Completed items are ☑check-marked. See [closed PRs](https://github.com/scikit-
       * [X] Test all (tested in mock [studies/fillable.py](tree/master/studies/fillable.py)).
    * [X] JSON → Awkward via header-only [RapidJSON](https://rapidjson.org) and `awkward.fromiter`.
    * [ ] Explicit broadcasting functions for jagged and non-jagged arrays and scalars.
-   * [ ] Structure-preserving ufunc-like operation on the C++ side that applies a lambda function to inner data. The Python `__array_ufunc__` implementation will _call_ this to preserve structure.
+   * [ ] ~~Structure-preserving ufunc-like operation on the C++ side that applies a lambda function to inner data. The Python `__array_ufunc__` implementation will _call_ this to preserve structure.~~
    * [ ] Extend `__getitem__` to take jagged arrays of integers and booleans (same behavior as old).
    * [ ] Full suite of array types:
       * [X] `EmptyArray`: 1-dimensional array with length 0 and unknown type (result of `UnknownFillable`, compatible with all types of arrays).
@@ -75,7 +75,7 @@ Completed items are ☑check-marked. See [closed PRs](https://github.com/scikit-
       * [X] `ListOffsetArray`: the `JaggedArray` case with no unreachable data between reachable data (gaps).
       * [X] `RegularArray`: for building rectilinear, N-dimensional arrays of arbitrary contents, e.g. putting jagged dimensions inside fixed dimensions.
       * [X] `RecordArray`: the new `Table` _without_ lazy-slicing.
-         * [ ] Implement it in Numba as well.
+         * [X] Implement it in Numba as well.
       * [ ] `MaskedArray`, `BitMaskedArray`, `IndexedMaskedArray`: same as the old versions.
       * [ ] `UnionArray`: same as the old version; `SparseUnionArray`: the additional case found in Apache Arrow.
       * [ ] `IndexedArray`: same as the old version.

diff --git a/VERSION_INFO b/VERSION_INFO
@@ -1 +1 @@
-0.1.25
+0.1.26
diff --git a/awkward1/_numba/__init__.py b/awkward1/_numba/__init__.py
@@ -18,3 +18,4 @@
     import awkward1._numba.array.listoffsetarray
     import awkward1._numba.array.emptyarray
     import awkward1._numba.array.regulararray
+    import awkward1._numba.array.recordarray
diff --git a/awkward1/_numba/array/emptyarray.py b/awkward1/_numba/array/emptyarray.py
@@ -27,6 +27,9 @@ def getitem_int(self):
     def getitem_range(self):
         return self
 
+    def getitem_str(self):
+        raise IndexError("cannot slice EmptyArray with str (Record field name)")
+
     def getitem_tuple(self, wheretpe):
         if len(wheretpe.types) == 0:
             return self

diff --git a/awkward1/_numba/array/listarray.py b/awkward1/_numba/array/listarray.py
@@ -46,6 +46,9 @@ def getitem_int(self):
     def getitem_range(self):
         return self
 
+    def getitem_str(self, key):
+        return ListArrayType(self.startstpe, self.stopstpe, self.contenttpe.getitem_str(key), self.idtpe)
+
     def getitem_tuple(self, wheretpe):
         nexttpe = ListArrayType(util.index64tpe, util.index64tpe, self, numba.none)
         outtpe = nexttpe.getitem_next(wheretpe, False)
@@ -65,6 +68,9 @@ def getitem_next(self, wheretpe, isadvanced):
             contenttpe = self.contenttpe.carry().getitem_next(tailtpe, isadvanced)
             return awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(self.indexname), contenttpe, self.idtpe)
 
+        elif isinstance(headtpe, numba.types.StringLiteral):
+            return self.getitem_str(headtpe.literal_value).getitem_next(tailtpe, isadvanced)
+
         elif isinstance(headtpe, numba.types.EllipsisType):
             raise NotImplementedError("ellipsis")
 
@@ -102,6 +108,10 @@ def lower_getitem_int(self):
     def lower_getitem_range(self):
         return lower_getitem_range
 
+    @property
+    def lower_getitem_str(self):
+        return lower_getitem_str
+
     @property
     def lower_getitem_next(self):
         return lower_getitem_next
@@ -224,6 +234,24 @@ def lower_getitem_range(context, builder, sig, args):
         context.nrt.incref(builder, rettpe, out)
     return out
 
+@numba.extending.lower_builtin(operator.getitem, ListArrayType, numba.types.StringLiteral)
+def lower_getitem_str(context, builder, sig, args):
+    rettpe, (tpe, wheretpe) = sig.return_type, sig.args
+    val, whereval = args
+
+    proxyin = numba.cgutils.create_struct_proxy(tpe)(context, builder, value=val)
+    proxyout = numba.cgutils.create_struct_proxy(rettpe)(context, builder)
+    proxyout.starts = proxyin.starts
+    proxyout.stops = proxyin.stops
+    proxyout.content = tpe.contenttpe.lower_getitem_str(context, builder, rettpe.contenttpe(tpe.contenttpe, wheretpe), (proxyin.content, whereval))
+    if tpe.idtpe != numba.none:
+        proxyout.id = proxyin.id
+
+    out = proxyout._getvalue()
+    if context.enable_nrt:
+        context.nrt.incref(builder, rettpe, out)
+    return out
+
 @numba.extending.lower_builtin(operator.getitem, ListArrayType, numba.types.BaseTuple)
 def lower_getitem_tuple(context, builder, sig, args):
     return content.lower_getitem_tuple(context, builder, sig, args)
@@ -364,6 +392,11 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
             proxyout.id = proxyin.id
         return proxyout._getvalue()
 
+    elif isinstance(headtpe, numba.types.StringLiteral):
+        nexttpe = arraytpe.getitem_str(headtpe.literal_value)
+        nextval = lower_getitem_str(context, builder, nexttpe(arraytpe, headtpe), (arrayval, headval))
+        return lower_getitem_next(context, builder, nexttpe, tailtpe, nextval, tailval, advanced)
+
     elif isinstance(headtpe, numba.types.EllipsisType):
         raise NotImplementedError("ListArray.getitem_next(ellipsis)")
 

diff --git a/awkward1/_numba/array/listoffsetarray.py b/awkward1/_numba/array/listoffsetarray.py
@@ -40,6 +40,9 @@ def getitem_int(self):
     def getitem_range(self):
         return self
 
+    def getitem_str(self, key):
+        return ListOffsetArrayType(self.offsetstpe, self.contenttpe.getitem_str(key), self.idtpe)
+
     def getitem_tuple(self, wheretpe):
         import awkward1._numba.array.listarray
         nexttpe = awkward1._numba.array.listarray.ListArrayType(util.index64tpe, util.index64tpe, self, numba.none)
@@ -60,6 +63,9 @@ def getitem_next(self, wheretpe, isadvanced):
             contenttpe = self.contenttpe.carry().getitem_next(tailtpe, isadvanced)
             return ListOffsetArrayType(util.indextpe(self.indexname), contenttpe, self.idtpe)
 
+        elif isinstance(headtpe, numba.types.StringLiteral):
+            return self.getitem_str(headtpe.literal_value).getitem_next(tailtpe, isadvanced)
+
         elif isinstance(headtpe, numba.types.EllipsisType):
             raise NotImplementedError("ellipsis")
 
@@ -98,6 +104,10 @@ def lower_getitem_int(self):
     def lower_getitem_range(self):
         return lower_getitem_range
 
+    @property
+    def lower_getitem_str(self):
+        return lower_getitem_str
+
     @property
     def lower_getitem_next(self):
         return lower_getitem_next
@@ -222,6 +232,23 @@ def lower_getitem_range(context, builder, sig, args):
         context.nrt.incref(builder, rettpe, out)
     return out
 
+@numba.extending.lower_builtin(operator.getitem, ListOffsetArrayType, numba.types.StringLiteral)
+def lower_getitem_str(context, builder, sig, args):
+    rettpe, (tpe, wheretpe) = sig.return_type, sig.args
+    val, whereval = args
+
+    proxyin = numba.cgutils.create_struct_proxy(tpe)(context, builder, value=val)
+    proxyout = numba.cgutils.create_struct_proxy(rettpe)(context, builder)
+    proxyout.offsets = proxyin.offsets
+    proxyout.content = tpe.contenttpe.lower_getitem_str(context, builder, rettpe.contenttpe(tpe.contenttpe, wheretpe), (proxyin.content, whereval))
+    if tpe.idtpe != numba.none:
+        proxyout.id = proxyin.id
+
+    out = proxyout._getvalue()
+    if context.enable_nrt:
+        context.nrt.incref(builder, rettpe, out)
+    return out
+
 @numba.extending.lower_builtin(operator.getitem, ListOffsetArrayType, numba.types.BaseTuple)
 def lower_getitem_tuple(context, builder, sig, args):
     return content.lower_getitem_tuple(context, builder, sig, args)
@@ -377,6 +404,11 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
             proxyout.id = proxyin.id
         return proxyout._getvalue()
 
+    elif isinstance(headtpe, numba.types.StringLiteral):
+        nexttpe = arraytpe.getitem_str(headtpe.literal_value)
+        nextval = lower_getitem_str(context, builder, nexttpe(arraytpe, headtpe), (arrayval, headval))
+        return lower_getitem_next(context, builder, nexttpe, tailtpe, nextval, tailval, advanced)
+
     elif isinstance(headtpe, numba.types.EllipsisType):
         raise NotImplementedError("ListOffsetArray.getitem_next(ellipsis)")
 

diff --git a/awkward1/_numba/array/numpyarray.py b/awkward1/_numba/array/numpyarray.py
@@ -30,6 +30,9 @@ def getitem_int(self):
     def getitem_range(self):
         return self.getitem_tuple(numba.types.slice2_type)
 
+    def getitem_str(self):
+        raise IndexError("cannot slice NumpyArray with str (Record field name)")
+
     def getitem_tuple(self, wheretpe):
         outtpe = numba.typing.arraydecl.get_array_index_type(self.arraytpe, wheretpe).result
         if isinstance(outtpe, numba.types.Array):
@@ -40,6 +43,9 @@ def getitem_tuple(self, wheretpe):
     def getitem_next(self, wheretpe, isadvanced):
         if len(wheretpe.types) > self.arraytpe.ndim:
             raise IndexError("too many dimensions in slice")
+        if any(isinstance(x, numba.types.StringLiteral) for x in wheretpe):
+            raise IndexError("cannot slice NumpyArray with str (Record field name)")
+
         if isadvanced:
             numreduce = sum(1 if isinstance(x, (numba.types.Integer, numba.types.Array)) else 0 for x in wheretpe.types)
         else: