Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add float16 support with configurable conversion behavior #49

Merged
merged 2 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 24 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
<img alt="GitHub Workflow Status" src="https://img.shields.io/github/actions/workflow/status/aplbrain/npyjs/test-node.yml?label=Tests&style=for-the-badge">
</p>

Read .npy files from [numpy](https://numpy.org/doc/1.18/reference/generated/numpy.save.html) in Node/JS.

## Installation

Include npy.js in your project directly, or:
Expand All @@ -18,18 +20,17 @@ yarn add npyjs
```

## Import

```javascript
import npyjs from "npyjs";
```


## Usage

- Create a new npyjs object.

- Create a new npyjs object:
```javascript
let n = new npyjs();
// Or with options:
let n = new npyjs({ convertFloat16: false }); // Disable float16 to float32 conversion
```

- This object can now be used load .npy files. Arrays can be returned via a JavaScript callback, so usage looks like this:
Expand Down Expand Up @@ -66,6 +67,25 @@ const npyArray = ndarray(data, shape);
npyArray.get(10, 15)
```

## Supported Data Types
The library supports the following NumPy data types:
- `int8`, `uint8`
- `int16`, `uint16`
- `int32`, `uint32`
- `int64`, `uint64` (as BigInt)
- `float32`
- `float64`
- `float16` (converted to float32 by default)

### Float16 Support
By default, float16 arrays are automatically converted to float32 for compatibility, since JavaScript doesn't natively support float16. You can control this behavior with the constructor options:
```javascript
// Default behavior - float16 is converted to float32
const n1 = new npyjs();
// Keep float16 as raw uint16 values without conversion
const n2 = new npyjs({ convertFloat16: false });
```

Unless otherwise specified, all code inside of this repository is covered under the license in [LICENSE](LICENSE).

Please report bugs or contribute pull-requests on [GitHub](https://github.com/aplbrain/npyjs).
Expand Down
18 changes: 16 additions & 2 deletions index.d.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
type ValueOf<T> = T[keyof T];

// Add constructor options type
export interface NpyjsOptions {
convertFloat16?: boolean;
}

export type Dtypes = {
"<u1": {
name: "uint8";
Expand Down Expand Up @@ -56,20 +61,26 @@ export type Dtypes = {
size: 64;
arrayConstructor: typeof Float64Array;
};
"<f2": {
name: "float16";
size: 16;
arrayConstructor: typeof Uint16Array;
converter?: (array: Uint16Array) => Float32Array;
};
};

export type Parsed = ValueOf<{
[K in keyof Dtypes]: {
dtype: Dtypes[K]["name"];
data: InstanceType<Dtypes[K]["arrayConstructor"]>;
data: K extends "<f2" ? Float32Array : InstanceType<Dtypes[K]["arrayConstructor"]>;
shape: number[];
fortranOrder: boolean;
};
}>;

declare class npyjs {

constructor(opts?: never);
constructor(opts?: NpyjsOptions);

dtypes: Dtypes;

Expand All @@ -80,6 +91,9 @@ declare class npyjs {
callback?: (result?: Parsed) => any,
fetchArgs?: RequestInit
): Promise<Parsed>;

float16ToFloat32Array(float16Array: Uint16Array): Float32Array;
static float16ToFloat32(float16: number): number;
}

export default npyjs;
66 changes: 61 additions & 5 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ import fetch from 'cross-fetch';
class npyjs {

constructor(opts) {
if (opts) {
console.error([
"No arguments accepted to npyjs constructor.",
if (opts && !('convertFloat16' in opts)) {
console.warn([
"npyjs constructor now accepts {convertFloat16?: boolean}.",
"For usage, go to https://github.com/jhuapl-boss/npyjs."
].join(" "));
}

this.convertFloat16 = opts?.convertFloat16 ?? true;

this.dtypes = {
"<u1": {
name: "uint8",
Expand Down Expand Up @@ -66,9 +68,53 @@ class npyjs {
size: 64,
arrayConstructor: Float64Array
},
"<f2": {
name: "float16",
size: 16,
arrayConstructor: Uint16Array,
converter: this.convertFloat16 ? this.float16ToFloat32Array : undefined
},
};
}

float16ToFloat32Array(float16Array) {
const length = float16Array.length;
const float32Array = new Float32Array(length);

for (let i = 0; i < length; i++) {
float32Array[i] = npyjs.float16ToFloat32(float16Array[i]);
}

return float32Array;
}

static float16ToFloat32(float16) {
// Extract the parts of the float16
const sign = (float16 >> 15) & 0x1;
const exponent = (float16 >> 10) & 0x1f;
const fraction = float16 & 0x3ff;

// Handle special cases
if (exponent === 0) {
if (fraction === 0) {
// Zero
return sign ? -0 : 0;
}
// Denormalized number
return (sign ? -1 : 1) * Math.pow(2, -14) * (fraction / 0x400);
} else if (exponent === 0x1f) {
if (fraction === 0) {
// Infinity
return sign ? -Infinity : Infinity;
}
// NaN
return NaN;
}

// Normalized number
return (sign ? -1 : 1) * Math.pow(2, exponent - 15) * (1 + fraction / 0x400);
}

parse(arrayBufferContents) {
// const version = arrayBufferContents.slice(6, 8); // Uint8-encoded
const headerLength = new DataView(arrayBufferContents.slice(8, 10)).getUint8(0);
Expand All @@ -86,13 +132,23 @@ class npyjs {
);
const shape = header.shape;
const dtype = this.dtypes[header.descr];
const nums = new dtype["arrayConstructor"](

if (!dtype) {
console.error(`Unsupported dtype: ${header.descr}`);
return null;
}

const nums = new dtype.arrayConstructor(
arrayBufferContents,
offsetBytes
);

// Convert float16 to float32 if converter exists
const data = dtype.converter ? dtype.converter.call(this, nums) : nums;

return {
dtype: dtype.name,
data: nums,
data: data,
shape,
fortranOrder: header.fortran_order
};
Expand Down
Binary file added test/data/10-float16.npy
Binary file not shown.
Binary file added test/data/100x100x100-float16.npy
Binary file not shown.
Binary file added test/data/4x4x4x4x4-float16.npy
Binary file not shown.
Binary file added test/data/65x65-float16.npy
Binary file not shown.
44 changes: 38 additions & 6 deletions test/generate-test-data.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,46 @@
import numpy as np
import json
import os
import pathlib
from pathlib import Path

records = {}
# Get the script's directory and create data directory relative to it
script_dir = Path(__file__).parent
data_dir = script_dir / "data"
data_dir.mkdir(parents=True, exist_ok=True)

# Load existing records if any
records_file = script_dir / "records.json"
if os.path.exists(records_file):
with open(records_file) as f:
print(f"Loading records from {records_file}")
records = json.load(f)
else:
records = {}

# Generate test data for each combination
for dimensions in [(10,), (65, 65), (100, 100, 100), (4, 4, 4, 4, 4)]:
for dtype in ["int8", "int16", "int64", "float32", "float64"]:
for dtype in ["int8", "int16", "int64", "float16", "float32", "float64"]:
name = f"./data/{'x'.join(str(i) for i in dimensions)}-{dtype}"

# Skip if file already exists
if name in records:
continue

data = np.random.randint(0, 255, dimensions).astype(dtype)
# Store the last 5 values consistently for all types
records[name] = data.ravel()[-5:].tolist()
np.save(name, data)
json.dump(
records, open("records.json", "w"),
)

# Save file using the correct path
file_path = script_dir / name.lstrip("./")
file_path.parent.mkdir(parents=True, exist_ok=True)
np.save(file_path, data)

# Save records in a pretty, sorted format
with open(records_file, 'w') as f:
json.dump(
records,
f,
indent=4,
sort_keys=True
)
Loading