Skip to content

Commit

Permalink
Add float16 support with configurable conversion behavior (#49)
Browse files Browse the repository at this point in the history
* Enhance dtype support by adding float16 type and conversion methods

- Introduced a new dtype "<f2" for float16 with size 16 and Uint16Array as its array constructor.
- Implemented `float16ToFloat32Array` method to convert Uint16Array (float16) to Float32Array.
- Added static method `float16ToFloat32` for converting individual float16 values to float32.
- Updated the `Parsed` type to handle float16 conversion when necessary.

This update improves the handling of float16 data types in the npyjs library.

* feat: Add convertFloat16 option and improve test infrastructure

- Add `convertFloat16` constructor option to control float16 conversion behavior
- Fix test data generation script to use correct paths
- Update records.json format for better git diff visibility
- Fix float16 test timeout issues
- Ensure test data consistency between runs

The `convertFloat16` option (defaults to true) allows users to:
- true: automatically convert float16 to float32 (default)
- false: keep raw uint16 values for custom handling
  • Loading branch information
eek authored Jan 13, 2025
1 parent 0d530dd commit f03b25c
Show file tree
Hide file tree
Showing 10 changed files with 396 additions and 21 deletions.
28 changes: 24 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
<img alt="GitHub Workflow Status" src="https://img.shields.io/github/actions/workflow/status/aplbrain/npyjs/test-node.yml?label=Tests&style=for-the-badge">
</p>

Read .npy files from [numpy](https://numpy.org/doc/1.18/reference/generated/numpy.save.html) in Node/JS.

## Installation

Include npy.js in your project directly, or:
Expand All @@ -18,18 +20,17 @@ yarn add npyjs
```

## Import

```javascript
import npyjs from "npyjs";
```


## Usage

- Create a new npyjs object.

- Create a new npyjs object:
```javascript
let n = new npyjs();
// Or with options:
let n = new npyjs({ convertFloat16: false }); // Disable float16 to float32 conversion
```

- This object can now be used load .npy files. Arrays can be returned via a JavaScript callback, so usage looks like this:
Expand Down Expand Up @@ -66,6 +67,25 @@ const npyArray = ndarray(data, shape);
npyArray.get(10, 15)
```

## Supported Data Types
The library supports the following NumPy data types:
- `int8`, `uint8`
- `int16`, `uint16`
- `int32`, `uint32`
- `int64`, `uint64` (as BigInt)
- `float32`
- `float64`
- `float16` (converted to float32 by default)

### Float16 Support
By default, float16 arrays are automatically converted to float32 for compatibility, since JavaScript doesn't natively support float16. You can control this behavior with the constructor options:
```javascript
// Default behavior - float16 is converted to float32
const n1 = new npyjs();
// Keep float16 as raw uint16 values without conversion
const n2 = new npyjs({ convertFloat16: false });
```

Unless otherwise specified, all code inside of this repository is covered under the license in [LICENSE](LICENSE).

Please report bugs or contribute pull-requests on [GitHub](https://github.com/aplbrain/npyjs).
Expand Down
18 changes: 16 additions & 2 deletions index.d.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
type ValueOf<T> = T[keyof T];

// Add constructor options type
export interface NpyjsOptions {
convertFloat16?: boolean;
}

export type Dtypes = {
"<u1": {
name: "uint8";
Expand Down Expand Up @@ -56,20 +61,26 @@ export type Dtypes = {
size: 64;
arrayConstructor: typeof Float64Array;
};
"<f2": {
name: "float16";
size: 16;
arrayConstructor: typeof Uint16Array;
converter?: (array: Uint16Array) => Float32Array;
};
};

export type Parsed = ValueOf<{
[K in keyof Dtypes]: {
dtype: Dtypes[K]["name"];
data: InstanceType<Dtypes[K]["arrayConstructor"]>;
data: K extends "<f2" ? Float32Array : InstanceType<Dtypes[K]["arrayConstructor"]>;
shape: number[];
fortranOrder: boolean;
};
}>;

declare class npyjs {

constructor(opts?: never);
constructor(opts?: NpyjsOptions);

dtypes: Dtypes;

Expand All @@ -80,6 +91,9 @@ declare class npyjs {
callback?: (result?: Parsed) => any,
fetchArgs?: RequestInit
): Promise<Parsed>;

float16ToFloat32Array(float16Array: Uint16Array): Float32Array;
static float16ToFloat32(float16: number): number;
}

export default npyjs;
66 changes: 61 additions & 5 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ import fetch from 'cross-fetch';
class npyjs {

constructor(opts) {
if (opts) {
console.error([
"No arguments accepted to npyjs constructor.",
if (opts && !('convertFloat16' in opts)) {
console.warn([
"npyjs constructor now accepts {convertFloat16?: boolean}.",
"For usage, go to https://github.com/jhuapl-boss/npyjs."
].join(" "));
}

this.convertFloat16 = opts?.convertFloat16 ?? true;

this.dtypes = {
"<u1": {
name: "uint8",
Expand Down Expand Up @@ -66,9 +68,53 @@ class npyjs {
size: 64,
arrayConstructor: Float64Array
},
"<f2": {
name: "float16",
size: 16,
arrayConstructor: Uint16Array,
converter: this.convertFloat16 ? this.float16ToFloat32Array : undefined
},
};
}

float16ToFloat32Array(float16Array) {
const length = float16Array.length;
const float32Array = new Float32Array(length);

for (let i = 0; i < length; i++) {
float32Array[i] = npyjs.float16ToFloat32(float16Array[i]);
}

return float32Array;
}

static float16ToFloat32(float16) {
// Extract the parts of the float16
const sign = (float16 >> 15) & 0x1;
const exponent = (float16 >> 10) & 0x1f;
const fraction = float16 & 0x3ff;

// Handle special cases
if (exponent === 0) {
if (fraction === 0) {
// Zero
return sign ? -0 : 0;
}
// Denormalized number
return (sign ? -1 : 1) * Math.pow(2, -14) * (fraction / 0x400);
} else if (exponent === 0x1f) {
if (fraction === 0) {
// Infinity
return sign ? -Infinity : Infinity;
}
// NaN
return NaN;
}

// Normalized number
return (sign ? -1 : 1) * Math.pow(2, exponent - 15) * (1 + fraction / 0x400);
}

parse(arrayBufferContents) {
// const version = arrayBufferContents.slice(6, 8); // Uint8-encoded
const headerLength = new DataView(arrayBufferContents.slice(8, 10)).getUint8(0);
Expand All @@ -86,13 +132,23 @@ class npyjs {
);
const shape = header.shape;
const dtype = this.dtypes[header.descr];
const nums = new dtype["arrayConstructor"](

if (!dtype) {
console.error(`Unsupported dtype: ${header.descr}`);
return null;
}

const nums = new dtype.arrayConstructor(
arrayBufferContents,
offsetBytes
);

// Convert float16 to float32 if converter exists
const data = dtype.converter ? dtype.converter.call(this, nums) : nums;

return {
dtype: dtype.name,
data: nums,
data: data,
shape,
fortranOrder: header.fortran_order
};
Expand Down
Binary file added test/data/10-float16.npy
Binary file not shown.
Binary file added test/data/100x100x100-float16.npy
Binary file not shown.
Binary file added test/data/4x4x4x4x4-float16.npy
Binary file not shown.
Binary file added test/data/65x65-float16.npy
Binary file not shown.
44 changes: 38 additions & 6 deletions test/generate-test-data.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,46 @@
import numpy as np
import json
import os
import pathlib
from pathlib import Path

records = {}
# Get the script's directory and create data directory relative to it
script_dir = Path(__file__).parent
data_dir = script_dir / "data"
data_dir.mkdir(parents=True, exist_ok=True)

# Load existing records if any
records_file = script_dir / "records.json"
if os.path.exists(records_file):
with open(records_file) as f:
print(f"Loading records from {records_file}")
records = json.load(f)
else:
records = {}

# Generate test data for each combination
for dimensions in [(10,), (65, 65), (100, 100, 100), (4, 4, 4, 4, 4)]:
for dtype in ["int8", "int16", "int64", "float32", "float64"]:
for dtype in ["int8", "int16", "int64", "float16", "float32", "float64"]:
name = f"./data/{'x'.join(str(i) for i in dimensions)}-{dtype}"

# Skip if file already exists
if name in records:
continue

data = np.random.randint(0, 255, dimensions).astype(dtype)
# Store the last 5 values consistently for all types
records[name] = data.ravel()[-5:].tolist()
np.save(name, data)
json.dump(
records, open("records.json", "w"),
)

# Save file using the correct path
file_path = script_dir / name.lstrip("./")
file_path.parent.mkdir(parents=True, exist_ok=True)
np.save(file_path, data)

# Save records in a pretty, sorted format
with open(records_file, 'w') as f:
json.dump(
records,
f,
indent=4,
sort_keys=True
)
Loading

0 comments on commit f03b25c

Please sign in to comment.