Add float16 support with configurable conversion behavior (#49)

* Enhance dtype support by adding float16 type and conversion methods - Introduced a new dtype "<f2" for float16 with size 16 and Uint16Array as its array constructor. - Implemented `float16ToFloat32Array` method to convert Uint16Array (float16) to Float32Array. - Added static method `float16ToFloat32` for converting individual float16 values to float32. - Updated the `Parsed` type to handle float16 conversion when necessary. This update improves the handling of float16 data types in the npyjs library. * feat: Add convertFloat16 option and improve test infrastructure - Add `convertFloat16` constructor option to control float16 conversion behavior - Fix test data generation script to use correct paths - Update records.json format for better git diff visibility - Fix float16 test timeout issues - Ensure test data consistency between runs The `convertFloat16` option (defaults to true) allows users to: - true: automatically convert float16 to float32 (default) - false: keep raw uint16 values for custom handling
aplbrain · Jan 13, 2025 · f03b25c · f03b25c
1 parent 0d530dd
commit f03b25c
Show file tree

Hide file tree

Showing 10 changed files with 396 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -8,6 +8,8 @@
     <img alt="GitHub Workflow Status" src="https://img.shields.io/github/actions/workflow/status/aplbrain/npyjs/test-node.yml?label=Tests&style=for-the-badge">
 </p>
 
+Read .npy files from [numpy](https://numpy.org/doc/1.18/reference/generated/numpy.save.html) in Node/JS.
+
 ## Installation
 
 Include npy.js in your project directly, or:
@@ -18,18 +20,17 @@ yarn add npyjs
 ```
 
 ## Import 
-
 ```javascript
 import npyjs from "npyjs";
 ```
 
 
 ## Usage
-
--   Create a new npyjs object.
-
+-   Create a new npyjs object:
 ```javascript
 let n = new npyjs();
+// Or with options:
+let n = new npyjs({ convertFloat16: false }); // Disable float16 to float32 conversion
 ```
 
 -   This object can now be used load .npy files. Arrays can be returned via a JavaScript callback, so usage looks like this:
@@ -66,6 +67,25 @@ const npyArray = ndarray(data, shape);
 npyArray.get(10, 15)
 ```
 
+## Supported Data Types
+The library supports the following NumPy data types:
+- `int8`, `uint8`
+- `int16`, `uint16`
+- `int32`, `uint32`
+- `int64`, `uint64` (as BigInt)
+- `float32`
+- `float64`
+- `float16` (converted to float32 by default)
+
+### Float16 Support
+By default, float16 arrays are automatically converted to float32 for compatibility, since JavaScript doesn't natively support float16. You can control this behavior with the constructor options:
+```javascript
+// Default behavior - float16 is converted to float32
+const n1 = new npyjs();
+// Keep float16 as raw uint16 values without conversion
+const n2 = new npyjs({ convertFloat16: false });
+```
+
 Unless otherwise specified, all code inside of this repository is covered under the license in [LICENSE](LICENSE).
 
 Please report bugs or contribute pull-requests on [GitHub](https://github.com/aplbrain/npyjs).

diff --git a/index.d.ts b/index.d.ts
@@ -1,5 +1,10 @@
 type ValueOf<T> = T[keyof T];
 
+// Add constructor options type
+export interface NpyjsOptions {
+    convertFloat16?: boolean;
+}
+
 export type Dtypes = {
     "<u1": {
         name: "uint8";
@@ -56,20 +61,26 @@ export type Dtypes = {
         size: 64;
         arrayConstructor: typeof Float64Array;
     };
+    "<f2": {
+        name: "float16";
+        size: 16;
+        arrayConstructor: typeof Uint16Array;
+        converter?: (array: Uint16Array) => Float32Array;
+    };
 };
 
 export type Parsed = ValueOf<{
     [K in keyof Dtypes]: {
         dtype: Dtypes[K]["name"];
-        data: InstanceType<Dtypes[K]["arrayConstructor"]>;
+        data: K extends "<f2" ? Float32Array : InstanceType<Dtypes[K]["arrayConstructor"]>;
         shape: number[];
         fortranOrder: boolean;
     };
 }>;
 
 declare class npyjs {
 
-    constructor(opts?: never);
+    constructor(opts?: NpyjsOptions);
 
     dtypes: Dtypes;
 
@@ -80,6 +91,9 @@ declare class npyjs {
         callback?: (result?: Parsed) => any,
         fetchArgs?: RequestInit
     ): Promise<Parsed>;
+
+    float16ToFloat32Array(float16Array: Uint16Array): Float32Array;
+    static float16ToFloat32(float16: number): number;
 }
 
 export default npyjs;
diff --git a/index.js b/index.js
@@ -3,13 +3,15 @@ import fetch from 'cross-fetch';
 class npyjs {
 
     constructor(opts) {
-        if (opts) {
-            console.error([
-                "No arguments accepted to npyjs constructor.",
+        if (opts && !('convertFloat16' in opts)) {
+            console.warn([
+                "npyjs constructor now accepts {convertFloat16?: boolean}.",
                 "For usage, go to https://github.com/jhuapl-boss/npyjs."
             ].join(" "));
         }
 
+        this.convertFloat16 = opts?.convertFloat16 ?? true;
+
         this.dtypes = {
             "<u1": {
                 name: "uint8",
@@ -66,9 +68,53 @@ class npyjs {
                 size: 64,
                 arrayConstructor: Float64Array
             },
+            "<f2": {
+                name: "float16",
+                size: 16,
+                arrayConstructor: Uint16Array,
+                converter: this.convertFloat16 ? this.float16ToFloat32Array : undefined
+            },
         };
     }
 
+    float16ToFloat32Array(float16Array) {
+        const length = float16Array.length;
+        const float32Array = new Float32Array(length);
+
+        for (let i = 0; i < length; i++) {
+            float32Array[i] = npyjs.float16ToFloat32(float16Array[i]);
+        }
+
+        return float32Array;
+    }
+
+    static float16ToFloat32(float16) {
+        // Extract the parts of the float16
+        const sign = (float16 >> 15) & 0x1;
+        const exponent = (float16 >> 10) & 0x1f;
+        const fraction = float16 & 0x3ff;
+
+        // Handle special cases
+        if (exponent === 0) {
+            if (fraction === 0) {
+                // Zero
+                return sign ? -0 : 0;
+            }
+            // Denormalized number
+            return (sign ? -1 : 1) * Math.pow(2, -14) * (fraction / 0x400);
+        } else if (exponent === 0x1f) {
+            if (fraction === 0) {
+                // Infinity
+                return sign ? -Infinity : Infinity;
+            }
+            // NaN
+            return NaN;
+        }
+
+        // Normalized number
+        return (sign ? -1 : 1) * Math.pow(2, exponent - 15) * (1 + fraction / 0x400);
+    }
+
     parse(arrayBufferContents) {
         // const version = arrayBufferContents.slice(6, 8); // Uint8-encoded
         const headerLength = new DataView(arrayBufferContents.slice(8, 10)).getUint8(0);
@@ -86,13 +132,23 @@ class npyjs {
         );
         const shape = header.shape;
         const dtype = this.dtypes[header.descr];
-        const nums = new dtype["arrayConstructor"](
+
+        if (!dtype) {
+            console.error(`Unsupported dtype: ${header.descr}`);
+            return null;
+        }
+
+        const nums = new dtype.arrayConstructor(
             arrayBufferContents,
             offsetBytes
         );
+
+        // Convert float16 to float32 if converter exists
+        const data = dtype.converter ? dtype.converter.call(this, nums) : nums;
+
         return {
             dtype: dtype.name,
-            data: nums,
+            data: data,
             shape,
             fortranOrder: header.fortran_order
         };

diff --git a/test/data/10-float16.npy b/test/data/10-float16.npy
diff --git a/test/data/100x100x100-float16.npy b/test/data/100x100x100-float16.npy
diff --git a/test/data/4x4x4x4x4-float16.npy b/test/data/4x4x4x4x4-float16.npy
diff --git a/test/data/65x65-float16.npy b/test/data/65x65-float16.npy
diff --git a/test/generate-test-data.py b/test/generate-test-data.py
@@ -1,14 +1,46 @@
 import numpy as np
 import json
+import os
+import pathlib
+from pathlib import Path
 
-records = {}
+# Get the script's directory and create data directory relative to it
+script_dir = Path(__file__).parent
+data_dir = script_dir / "data"
+data_dir.mkdir(parents=True, exist_ok=True)
 
+# Load existing records if any
+records_file = script_dir / "records.json"
+if os.path.exists(records_file):
+    with open(records_file) as f:
+        print(f"Loading records from {records_file}")
+        records = json.load(f)
+else:
+    records = {}
+
+# Generate test data for each combination
 for dimensions in [(10,), (65, 65), (100, 100, 100), (4, 4, 4, 4, 4)]:
-    for dtype in ["int8", "int16", "int64", "float32", "float64"]:
+    for dtype in ["int8", "int16", "int64", "float16", "float32", "float64"]:
         name = f"./data/{'x'.join(str(i) for i in dimensions)}-{dtype}"
+
+        # Skip if file already exists
+        if name in records:
+            continue
+
         data = np.random.randint(0, 255, dimensions).astype(dtype)
+        # Store the last 5 values consistently for all types
         records[name] = data.ravel()[-5:].tolist()
-        np.save(name, data)
-json.dump(
-    records, open("records.json", "w"),
-)
+
+        # Save file using the correct path
+        file_path = script_dir / name.lstrip("./")
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        np.save(file_path, data)
+
+# Save records in a pretty, sorted format
+with open(records_file, 'w') as f:
+    json.dump(
+        records,
+        f,
+        indent=4,
+        sort_keys=True
+    )