Skip to content

Commit

Permalink
Support unsigned index type for 32-bit. (#20)
Browse files Browse the repository at this point in the history
* start work

* Is Azure is up and running?

* Modify Azure test matrix for Python 3.8.

* Generalized some ListArray::setid code for U32.

* Keyword 'struct' not allowed on return type in Windows.

* ListArray::getitem_next and ::carry are no longer duplicated for each index type.

* Implemented ListArrayU32 and ListOffsetArrayU32 (no Python yet).

* Added tests for ListArrayU32 and ListOffsetArrayU32.

* Update signatures.

* Updated Numba and the PR is done.
  • Loading branch information
jpivarski authored Nov 11, 2019
1 parent 4d79a75 commit 8d49067
Show file tree
Hide file tree
Showing 28 changed files with 2,753 additions and 733 deletions.
34 changes: 23 additions & 11 deletions .ci/azure-buildtest-awkward.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ jobs:
"py27-32bit":
python.version: "2.7"
python.architecture: "x86"
numpy.version: "latest"
numpy.version: "1.16.5"
"py27-64bit":
python.version: "2.7"
python.architecture: "x64"
numpy.version: "latest"
numpy.version: "1.16.5"
"py35-32bit":
python.version: "3.5"
python.architecture: "x86"
Expand All @@ -57,6 +57,14 @@ jobs:
python.version: "3.7"
python.architecture: "x64"
numpy.version: "latest"
"py38-32bit":
python.version: "3.8"
python.architecture: "x86"
numpy.version: "latest"
"py38-64bit":
python.version: "3.8"
python.architecture: "x64"
numpy.version: "latest"

steps:
- checkout: self
Expand Down Expand Up @@ -90,11 +98,7 @@ jobs:
"py27":
python.version: "2.7"
python.architecture: "x64"
numpy.version: "latest"
"py34":
python.version: "3.4"
python.architecture: "x64"
numpy.version: "latest"
numpy.version: "1.16.5"
"py35":
python.version: "3.5"
python.architecture: "x64"
Expand All @@ -107,6 +111,10 @@ jobs:
python.version: "3.7"
python.architecture: "x64"
numpy.version: "latest"
"py38":
python.version: "3.8"
python.architecture: "x64"
numpy.version: "latest"

steps:
- checkout: self
Expand Down Expand Up @@ -141,18 +149,18 @@ jobs:
python.version: "2.7"
python.architecture: "x64"
numpy.version: "1.13.1"
"py27-np*":
"py27-np16":
python.version: "2.7"
python.architecture: "x64"
numpy.version: "latest"
numpy.version: "1.16.5"
"py34-np13":
python.version: "3.4"
python.architecture: "x64"
numpy.version: "1.13.1"
"py34-np*":
"py34-np16":
python.version: "3.4"
python.architecture: "x64"
numpy.version: "latest"
numpy.version: "1.16.5"
"py35-np13":
python.version: "3.5"
python.architecture: "x64"
Expand All @@ -177,6 +185,10 @@ jobs:
python.version: "3.7"
python.architecture: "x64"
numpy.version: "latest"
"py38-np*":
python.version: "3.8"
python.architecture: "x64"
numpy.version: "latest"

steps:
- checkout: self
Expand Down
25 changes: 20 additions & 5 deletions .ci/azure-deploy-awkward.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ jobs:
"py27-32bit":
python.version: "2.7"
python.architecture: "x86"
numpy.version: "latest"
numpy.version: "1.16.5"
"py27-64bit":
python.version: "2.7"
python.architecture: "x64"
numpy.version: "latest"
numpy.version: "1.16.5"
"py35-32bit":
python.version: "3.5"
python.architecture: "x86"
Expand All @@ -101,6 +101,14 @@ jobs:
python.version: "3.7"
python.architecture: "x64"
numpy.version: "latest"
"py38-32bit":
python.version: "3.8"
python.architecture: "x86"
numpy.version: "latest"
"py38-64bit":
python.version: "3.8"
python.architecture: "x64"
numpy.version: "latest"

steps:
- checkout: self
Expand Down Expand Up @@ -150,6 +158,10 @@ jobs:
python.version: "3.7"
python.architecture: "x64"
numpy.version: "latest"
"py38":
python.version: "3.8"
python.architecture: "x64"
numpy.version: "latest"

steps:
- checkout: self
Expand All @@ -164,13 +176,16 @@ jobs:
- script: |
case $(python.version) in
2.7)
FULL_VERSION=2.7.16
FULL_VERSION=2.7.17
;;
3.6)
FULL_VERSION=3.6.8
FULL_VERSION=3.6.9
;;
3.7)
FULL_VERSION=3.7.3
FULL_VERSION=3.7.5
;;
3.8)
FULL_VERSION=3.8.0
;;
esac
Expand Down
2 changes: 1 addition & 1 deletion VERSION_INFO
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.19
0.1.20
75 changes: 47 additions & 28 deletions awkward1/_numba/array/listarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
from ..._numba import cpu, util, content

@numba.extending.typeof_impl.register(awkward1.layout.ListArray32)
@numba.extending.typeof_impl.register(awkward1.layout.ListArrayU32)
@numba.extending.typeof_impl.register(awkward1.layout.ListArray64)
def typeof(val, c):
return ListArrayType(numba.typeof(numpy.asarray(val.starts)), numba.typeof(numpy.asarray(val.stops)), numba.typeof(val.content), numba.typeof(val.id))

class ListArrayType(content.ContentType):
def __init__(self, startstpe, stopstpe, contenttpe, idtpe):
assert startstpe == stopstpe
super(ListArrayType, self).__init__(name="ListArray{}Type({}, id={})".format(startstpe.dtype.bitwidth, contenttpe.name, idtpe.name))
super(ListArrayType, self).__init__(name="ListArray{}{}Type({}, id={})".format("" if startstpe.dtype.signed else "U", startstpe.dtype.bitwidth, contenttpe.name, idtpe.name))
self.startstpe = startstpe
self.contenttpe = contenttpe
self.idtpe = idtpe
Expand All @@ -31,6 +32,10 @@ def stopstpe(self):
def bitwidth(self):
return self.startstpe.dtype.bitwidth

@property
def indexname(self):
return ("" if self.startstpe.dtype.signed else "U") + str(self.startstpe.dtype.bitwidth)

@property
def ndim(self):
return 1 + self.contenttpe.ndim
Expand Down Expand Up @@ -58,7 +63,7 @@ def getitem_next(self, wheretpe, isadvanced):

elif isinstance(headtpe, numba.types.SliceType):
contenttpe = self.contenttpe.carry().getitem_next(tailtpe, isadvanced)
return awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(self.bitwidth), contenttpe, self.idtpe)
return awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(self.indexname), contenttpe, self.idtpe)

elif isinstance(headtpe, numba.types.EllipsisType):
raise NotImplementedError("ellipsis")
Expand All @@ -71,7 +76,7 @@ def getitem_next(self, wheretpe, isadvanced):
raise NotImplementedError("array.ndim != 1")
contenttpe = self.contenttpe.carry().getitem_next(tailtpe, True)
if not isadvanced:
return awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(self.bitwidth), contenttpe, self.idtpe)
return awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(self.indexname), contenttpe, self.idtpe)
else:
return contenttpe

Expand Down Expand Up @@ -138,14 +143,17 @@ def unbox(tpe, obj, c):

@numba.extending.box(ListArrayType)
def box(tpe, val, c):
if tpe.bitwidth == 32:
Index_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.Index32))
ListArray_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.ListArray32))
elif tpe.bitwidth == 64:
if tpe.indexname == "64":
Index_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.Index64))
ListArray_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.ListArray64))
elif tpe.indexname == "32":
Index_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.Index32))
ListArray_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.ListArray32))
elif tpe.indexname == "U32":
Index_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.IndexU32))
ListArray_obj = c.pyapi.unserialize(c.pyapi.serialize_object(awkward1.layout.ListArrayU32))
else:
raise AssertionError("unrecognized bitwidth")
raise AssertionError("unrecognized index type: {}".format(tpe.indexname))
proxyin = numba.cgutils.create_struct_proxy(tpe)(c.context, c.builder, value=val)
startsarray_obj = c.pyapi.from_native_value(tpe.startstpe, proxyin.starts, c.env_manager)
stopsarray_obj = c.pyapi.from_native_value(tpe.stopstpe, proxyin.stops, c.env_manager)
Expand Down Expand Up @@ -258,12 +266,14 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,

if isinstance(headtpe, numba.types.Integer):
assert advanced is None
if arraytpe.bitwidth == 64:
if arraytpe.indexname == "64":
kernel = cpu.kernels.awkward_listarray64_getitem_next_at_64
elif arraytpe.bitwidth == 32:
elif arraytpe.indexname == "32":
kernel = cpu.kernels.awkward_listarray32_getitem_next_at_64
elif arraytpe.indexname == "U32":
kernel = cpu.kernels.awkward_listarrayU32_getitem_next_at_64
else:
raise AssertionError("unrecognized bitwidth")
raise AssertionError("unrecognized index type: {}".format(arraytpe.indexname))

nextcarry = util.newindex64(context, builder, numba.int64, lenstarts)
util.call(context, builder, kernel,
Expand All @@ -282,18 +292,23 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
elif isinstance(headtpe, numba.types.SliceType):
proxyslicein = numba.cgutils.create_struct_proxy(headtpe)(context, builder, value=headval)

if arraytpe.bitwidth == 64:
if arraytpe.indexname == "64":
determine_carrylength = cpu.kernels.awkward_listarray64_getitem_next_range_carrylength
fill_carry = cpu.kernels.awkward_listarray64_getitem_next_range_64
determine_total = cpu.kernels.awkward_listarray64_getitem_next_range_counts_64
fill_nextadvanced = cpu.kernels.awkward_listarray64_getitem_next_range_spreadadvanced_64
elif arraytpe.bitwidth == 32:
elif arraytpe.indexname == "32":
determine_carrylength = cpu.kernels.awkward_listarray32_getitem_next_range_carrylength
fill_carry = cpu.kernels.awkward_listarray32_getitem_next_range_64
determine_total = cpu.kernels.awkward_listarray32_getitem_next_range_counts_64
fill_nextadvanced = cpu.kernels.awkward_listarray32_getitem_next_range_spreadadvanced_64
elif arraytpe.indexname == "U32":
determine_carrylength = cpu.kernels.awkward_listarrayU32_getitem_next_range_carrylength
fill_carry = cpu.kernels.awkward_listarrayU32_getitem_next_range_64
determine_total = cpu.kernels.awkward_listarrayU32_getitem_next_range_counts_64
fill_nextadvanced = cpu.kernels.awkward_listarrayU32_getitem_next_range_spreadadvanced_64
else:
raise AssertionError("unrecognized bitwidth")
raise AssertionError("unrecognized index type: {}".format(arraytpe.indexname))

carrylength = numba.cgutils.alloca_once(builder, context.get_value_type(numba.int64))
util.call(context, builder, determine_carrylength,
Expand All @@ -308,10 +323,10 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
util.cast(context, builder, numba.intp, numba.int64, proxyslicein.step)),
"in {}, indexing error".format(arraytpe.shortname))

nextoffsets = util.newindex(arraytpe.bitwidth, context, builder, numba.int64, builder.add(lenstarts, context.get_constant(numba.int64, 1)))
nextoffsets = util.newindex(arraytpe.indexname, context, builder, numba.int64, builder.add(lenstarts, context.get_constant(numba.int64, 1)))
nextcarry = util.newindex64(context, builder, numba.int64, builder.load(carrylength))
util.call(context, builder, fill_carry,
(util.arrayptr(context, builder, util.indextpe(arraytpe.bitwidth), nextoffsets),
(util.arrayptr(context, builder, util.indextpe(arraytpe.indexname), nextoffsets),
util.arrayptr(context, builder, util.index64tpe, nextcarry),
util.arrayptr(context, builder, arraytpe.startstpe, proxyin.starts),
util.arrayptr(context, builder, arraytpe.stopstpe, proxyin.stops),
Expand All @@ -334,22 +349,22 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
total = numba.cgutils.alloca_once(builder, context.get_value_type(numba.int64))
util.call(context, builder, determine_total,
(total,
util.arrayptr(context, builder, util.indextpe(arraytpe.bitwidth), nextoffsets),
util.arrayptr(context, builder, util.indextpe(arraytpe.indexname), nextoffsets),
lenstarts),
"in {}, indexing error".format(arraytpe.shortname))

nextadvanced = util.newindex64(context, builder, numba.int64, builder.load(total))
util.call(context, builder, fill_nextadvanced,
(util.arrayptr(context, builder, util.index64tpe, nextadvanced),
util.arrayptr(context, builder, util.index64tpe, advanced),
util.arrayptr(context, builder, util.indextpe(arraytpe.bitwidth), nextoffsets),
util.arrayptr(context, builder, util.indextpe(arraytpe.indexname), nextoffsets),
lenstarts),
"in {}, indexing error".format(arraytpe.shortname))

outcontenttpe = nextcontenttpe.getitem_next(tailtpe, True)
outcontentval = nextcontenttpe.lower_getitem_next(context, builder, nextcontenttpe, tailtpe, nextcontentval, tailval, nextadvanced)

outtpe = awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(arraytpe.bitwidth), outcontenttpe, arraytpe.idtpe)
outtpe = awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(arraytpe.indexname), outcontenttpe, arraytpe.idtpe)
proxyout = numba.cgutils.create_struct_proxy(outtpe)(context, builder)
proxyout.offsets = nextoffsets
proxyout.content = outcontentval
Expand All @@ -371,21 +386,23 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
lenflathead = util.arraylen(context, builder, util.index64tpe, flathead, totpe=numba.int64)

if advanced is None:
if arraytpe.bitwidth == 64:
if arraytpe.indexname == "64":
kernel = cpu.kernels.awkward_listarray64_getitem_next_array_64
elif arraytpe.bitwidth == 32:
elif arraytpe.indexname == "32":
kernel = cpu.kernels.awkward_listarray32_getitem_next_array_64
elif arraytpe.indexname == "U32":
kernel = cpu.kernels.awkward_listarrayU32_getitem_next_array_64
else:
raise AssertionError("unrecognized bitwidth")
raise AssertionError("unrecognized index type: {}".format(arraytpe.indexname))

lencarry = builder.mul(lenstarts, lenflathead)
lenoffsets = builder.add(lenstarts, context.get_constant(numba.int64, 1))

nextcarry = util.newindex64(context, builder, numba.int64, lencarry)
nextadvanced = util.newindex64(context, builder, numba.int64, lencarry)
nextoffsets = util.newindex(arraytpe.bitwidth, context, builder, numba.int64, lenoffsets)
nextoffsets = util.newindex(arraytpe.indexname, context, builder, numba.int64, lenoffsets)
util.call(context, builder, kernel,
(util.arrayptr(context, builder, util.indextpe(arraytpe.bitwidth), nextoffsets),
(util.arrayptr(context, builder, util.indextpe(arraytpe.indexname), nextoffsets),
util.arrayptr(context, builder, util.index64tpe, nextcarry),
util.arrayptr(context, builder, util.index64tpe, nextadvanced),
util.arrayptr(context, builder, arraytpe.startstpe, proxyin.starts),
Expand All @@ -404,7 +421,7 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
contenttpe = nexttpe.getitem_next(tailtpe, True)
contentval = nexttpe.lower_getitem_next(context, builder, nexttpe, tailtpe, nextval, tailval, nextadvanced)

outtpe = awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(arraytpe.bitwidth), contenttpe, arraytpe.idtpe)
outtpe = awkward1._numba.array.listoffsetarray.ListOffsetArrayType(util.indextpe(arraytpe.indexname), contenttpe, arraytpe.idtpe)
proxyout = numba.cgutils.create_struct_proxy(outtpe)(context, builder)
proxyout.offsets = nextoffsets
proxyout.content = contentval
Expand All @@ -413,12 +430,14 @@ def lower_getitem_next(context, builder, arraytpe, wheretpe, arrayval, whereval,
return proxyout._getvalue()

else:
if arraytpe.bitwidth == 64:
if arraytpe.indexname == "64":
kernel = cpu.kernels.awkward_listarray64_getitem_next_array_advanced_64
elif arraytpe.bitwidth == 32:
elif arraytpe.indexname == "32":
kernel = cpu.kernels.awkward_listarray32_getitem_next_array_advanced_64
elif arraytpe.indexname == "U32":
kernel = cpu.kernels.awkward_listarrayU32_getitem_next_array_advanced_64
else:
raise AssertionError("unrecognized bitwidth")
raise AssertionError("unrecognized index type: {}".format(arraytpe.indexname))

nextcarry = util.newindex64(context, builder, numba.int64, lenstarts)
nextadvanced = util.newindex64(context, builder, numba.int64, lenstarts)
Expand Down
Loading

0 comments on commit 8d49067

Please sign in to comment.