From dacbde4cbbf9e7e8201daecf22a9c5aded2544d6 Mon Sep 17 00:00:00 2001 From: lisa0314 Date: Fri, 8 Nov 2024 16:51:13 -0800 Subject: [PATCH] webnn: Support block-wise quantization for DirectML backend Block-wise quantization divides input tensors into smaller blocks that are independently quantized, resulting in faster optimization and high precision quantization [1]. It is used for popular language models, such as phi-3 mini int4 quantized model [2]. Related WG issue [3] has been opened to discussion. Firstly, this CL validates scale and zero point tensors for block-wise quantization. Besides, this CL also implements the block-wise quantization in DirectML backend by using DML_OPERATOR_QUANTIZE and DML_OPERATOR_DEQUANTIZE which are available in FL >= 6.3. More validation and conformance tests are added to verify the implementation. [1]: https://arxiv.org/abs/2110.02861 [2]: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct [3]: https://github.com/webmachinelearning/webnn/issues/779 Bug: 40206287 Change-Id: I977b0be57deebd7afcae216edc3ddc3818b8c09f Cq-Include-Trybots: luci.chromium.try:mac14.arm64-blink-rel, mac14-blink-rel, mac15.arm64-blink-rel, mac15-blink-rel, linux-blink-rel --- .../dequantizeLinear.https.any.js | 191 ++++++++++++++++-- .../quantizeLinear.https.any.js | 176 +++++++++++++++- .../dequantizeLinear.https.any.js | 18 +- .../quantizeLinear.https.any.js | 28 ++- 4 files changed, 373 insertions(+), 40 deletions(-) diff --git a/webnn/conformance_tests/dequantizeLinear.https.any.js b/webnn/conformance_tests/dequantizeLinear.https.any.js index 73dd916b9763160..6b8ba3716936446 100644 --- a/webnn/conformance_tests/dequantizeLinear.https.any.js +++ b/webnn/conformance_tests/dequantizeLinear.https.any.js @@ -119,8 +119,8 @@ const dequantizeLinearTests = [ 'constant': true }, 'dequantizeLinearZeroPoint': { - 'data': [128], - 'descriptor': {shape: [], dataType: 'uint8'}, + 'data': [128, 128, 128, 128], + 'descriptor': {shape: [4], dataType: 'uint8'}, 'constant': true } }, @@ -144,6 +144,50 @@ const dequantizeLinearTests = [ } } }, + { + 'name': + 'dequantizeLinear uint8 1D constant tensor with implicit block_size = 2.', + 'graph': { + 'inputs': { + 'dequantizeLinearInput': { + 'data': [12, 24, 35, 123], + 'descriptor': {shape: [4], dataType: 'uint8'}, + 'constant': true + }, + 'dequantizeLinearScale': { + 'data': [ + 9.343092918395996, + -4.617084980010986, + ], + 'descriptor': {shape: [2], dataType: 'float32'}, + 'constant': true + }, + 'dequantizeLinearZeroPoint': { + 'data': [128, 110], + 'descriptor': {shape: [2], dataType: 'uint8'}, + 'constant': true + } + }, + 'operators': [{ + 'name': 'dequantizeLinear', + 'arguments': [ + {'input': 'dequantizeLinearInput'}, + {'scale': 'dequantizeLinearScale'}, + {'zeroPoint': 'dequantizeLinearZeroPoint'} + ], + 'outputs': 'dequantizeLinearOutput' + }], + 'expectedOutputs': { + 'dequantizeLinearOutput': { + 'data': [ + -1083.798828125, -971.681640625, 346.2813720703125, + -60.0221061706543 + ], + 'descriptor': {shape: [4], dataType: 'float32'} + } + } + } + }, { 'name': 'dequantizeLinear int8 4D constant tensor broadcasting scale and zeroPoint', @@ -160,8 +204,8 @@ const dequantizeLinearTests = [ 'constant': true }, 'dequantizeLinearZeroPoint': { - 'data': [12], - 'descriptor': {shape: [], dataType: 'int8'}, + 'data': [12, 12], + 'descriptor': {shape: [2, 1], dataType: 'int8'}, 'constant': true } }, @@ -185,6 +229,74 @@ const dequantizeLinearTests = [ } } }, + { + 'name': 'dequantizeLinear int8 4D constant tensor with block_size = [3, 2]', + 'graph': { + 'inputs': { + 'dequantizeLinearInput': { + 'data': [ + -124, 0, 23, 122, 12, 23, 45, 36, 67, 78, -22, 0, + -34, -45, -56, -67, 89, 30, 12, 23, 56, 67, 56, -12 + ], + 'descriptor': {shape: [6, 4], dataType: 'int8'}, + 'constant': true + }, + 'dequantizeLinearScale': { + 'data': [ + 0.2800687253475189, -4.617084980010986, 1.2800687253475189, + -3.617084980010986 + ], + 'descriptor': {shape: [2, 2], dataType: 'float32'}, + 'constant': true + }, + 'dequantizeLinearZeroPoint': { + 'data': [1, 3, 5, 12], + 'descriptor': {shape: [2, 2], dataType: 'int8'}, + 'constant': true + } + }, + 'operators': [{ + 'name': 'dequantizeLinear', + 'arguments': [ + {'input': 'dequantizeLinearInput'}, + {'scale': 'dequantizeLinearScale'}, + {'zeroPoint': 'dequantizeLinearZeroPoint'} + ], + 'outputs': 'dequantizeLinearOutput' + }], + 'expectedOutputs': { + 'dequantizeLinearOutput': { + 'data': [ + -35.00859069824219, + -0.2800687253475189, + -92.3416976928711, + -549.43310546875, + 3.0807559490203857, + 6.1615118980407715, + -193.91757202148438, + -152.36380004882812, + 18.484535217285156, + 21.565292358398438, + 115.4271240234375, + 13.851255416870117, + -49.92267990112305, + -64.0034408569336, + 245.96177673339844, + 285.7497253417969, + 107.52577209472656, + 32.0017204284668, + 0, + -39.787933349609375, + 65.28350830078125, + 79.36426544189453, + -159.1517333984375, + 86.81004333496094 + ], + 'descriptor': {shape: [6, 4], dataType: 'float32'} + } + } + } + }, { 'name': 'dequantizeLinear uint4 1D tensor with even input size', 'graph': { @@ -200,8 +312,8 @@ const dequantizeLinearTests = [ 'constant': true }, 'dequantizeLinearZeroPoint': { - 'data': [0], - 'descriptor': {shape: [], dataType: 'uint4'}, + 'data': [0, 1], + 'descriptor': {shape: [2], dataType: 'uint4'}, 'constant': true } }, @@ -216,7 +328,7 @@ const dequantizeLinearTests = [ }], 'expectedOutputs': { 'dequantizeLinearOutput': { - 'data': [16.804121017456055, 0], + 'data': [16.804121017456055, -1.1202747821807861], 'descriptor': {shape: [2], dataType: 'float32'} } } @@ -237,8 +349,8 @@ const dequantizeLinearTests = [ 'constant': true }, 'dequantizeLinearZeroPoint': { - 'data': [2, 1, 4], - 'descriptor': {shape: [3], dataType: 'uint4'}, + 'data': [2], + 'descriptor': {shape: [1], dataType: 'uint4'}, 'constant': true } }, @@ -253,7 +365,7 @@ const dequantizeLinearTests = [ }], 'expectedOutputs': { 'dequantizeLinearOutput': { - 'data': [8.962198257446289, 12.323022842407227, 11.202747344970703], + 'data': [8.962198257446289, 11.202747344970703, 13.443297386169434], 'descriptor': {shape: [3], dataType: 'float32'} } } @@ -278,7 +390,7 @@ const dequantizeLinearTests = [ }, 'dequantizeLinearZeroPoint': { 'data': [2, 3], - 'descriptor': {shape: [2], dataType: 'uint4'}, + 'descriptor': {shape: [2, 1], dataType: 'uint4'}, 'constant': true } }, @@ -294,7 +406,7 @@ const dequantizeLinearTests = [ 'expectedOutputs': { 'dequantizeLinearOutput': { 'data': [ - -18.686185836791992, -18.686185836791992, -36.93667984008789, + -18.686185836791992, -9.343092918395996, -32.31959533691406, -55.40502166748047 ], 'descriptor': {shape: [1, 1, 2, 2], dataType: 'float32'} @@ -302,6 +414,49 @@ const dequantizeLinearTests = [ } } }, + { + 'name': 'dequantizeLinear uint4 3D input with block_size = [1, 1, 2]', + 'graph': { + 'inputs': { + 'dequantizeLinearInput': { + 'data': [0, 1, 10, 15], + 'descriptor': {shape: [1, 1, 4], dataType: 'uint4'}, + 'constant': true + }, + 'dequantizeLinearScale': { + 'data': [ + 9.343092918395996, + -4.617084980010986, + ], + 'descriptor': {shape: [1, 2], dataType: 'float32'}, + 'constant': true + }, + 'dequantizeLinearZeroPoint': { + 'data': [2, 3], + 'descriptor': {shape: [1, 2], dataType: 'uint4'}, + 'constant': true + } + }, + 'operators': [{ + 'name': 'dequantizeLinear', + 'arguments': [ + {'input': 'dequantizeLinearInput'}, + {'scale': 'dequantizeLinearScale'}, + {'zeroPoint': 'dequantizeLinearZeroPoint'} + ], + 'outputs': 'dequantizeLinearOutput' + }], + 'expectedOutputs': { + 'dequantizeLinearOutput': { + 'data': [ + -18.686185836791992, -9.343092918395996, -32.31959533691406, + -55.40502166748047 + ], + 'descriptor': {shape: [1, 1, 4], dataType: 'float32'} + } + } + } + }, { 'name': 'dequantizeLinear int4 1D tensor with even size', 'graph': { @@ -312,8 +467,8 @@ const dequantizeLinearTests = [ 'constant': true }, 'dequantizeLinearScale': { - 'data': [1.1202747821807861], - 'descriptor': {shape: [], dataType: 'float32'}, + 'data': [1.1202747821807861, 1.1202747821807861], + 'descriptor': {shape: [2], dataType: 'float32'}, 'constant': true }, 'dequantizeLinearZeroPoint': { @@ -350,12 +505,12 @@ const dequantizeLinearTests = [ }, 'dequantizeLinearScale': { 'data': [1.1202747821807861], - 'descriptor': {shape: [], dataType: 'float32'}, + 'descriptor': {shape: [1], dataType: 'float32'}, 'constant': true }, 'dequantizeLinearZeroPoint': { - 'data': [-3, 0, 0], - 'descriptor': {shape: [3], dataType: 'int4'}, + 'data': [-3], + 'descriptor': {shape: [1], dataType: 'int4'}, 'constant': true } }, @@ -370,7 +525,7 @@ const dequantizeLinearTests = [ }], 'expectedOutputs': { 'dequantizeLinearOutput': { - 'data': [2.2405495643615723, 7.841923713684082, 0], + 'data': [2.2405495643615723, 11.202747344970703, 3.3608243465423584], 'descriptor': {shape: [3], dataType: 'float32'} } } diff --git a/webnn/conformance_tests/quantizeLinear.https.any.js b/webnn/conformance_tests/quantizeLinear.https.any.js index 8aa9d7f3bcc407d..8333f7b1b58a0a9 100644 --- a/webnn/conformance_tests/quantizeLinear.https.any.js +++ b/webnn/conformance_tests/quantizeLinear.https.any.js @@ -83,8 +83,8 @@ const quantizeLinearTests = [ 'constant': true }, 'quantizeLinearZeroPoint': { - 'data': [128], - 'descriptor': {shape: [], dataType: 'uint8'}, + 'data': [128, 128, 128, 128], + 'descriptor': {shape: [4], dataType: 'uint8'}, 'constant': true } }, @@ -104,6 +104,46 @@ const quantizeLinearTests = [ } } }, + { + 'name': + 'quantizeLinear float32 2D constant tensor broadcasting zeroPoint and scale', + 'graph': { + 'inputs': { + 'quantizeLinearInput': { + 'data': [ + -2.549168109893799, -4.794857501983643, 8.413617134094238, + 6.108623504638672 + ], + 'descriptor': {shape: [2, 2], dataType: 'float32'}, + 'constant': true + }, + 'quantizeLinearScale': { + 'data': [9.343092918395996], + 'descriptor': {shape: [1], dataType: 'float32'}, + 'constant': true + }, + 'quantizeLinearZeroPoint': { + 'data': [128], + 'descriptor': {shape: [1], dataType: 'uint8'}, + 'constant': true + } + }, + 'operators': [{ + 'name': 'quantizeLinear', + 'arguments': [ + {'input': 'quantizeLinearInput'}, {'scale': 'quantizeLinearScale'}, + {'zeroPoint': 'quantizeLinearZeroPoint'} + ], + 'outputs': 'quantizeLinearOutput' + }], + 'expectedOutputs': { + 'quantizeLinearOutput': { + 'data': [128, 127, 129, 129], + 'descriptor': {shape: [2, 2], dataType: 'uint8'} + } + } + } + }, { 'name': 'quantizeLinear float32 4D constant tensor broadcasting scale and zeroPoint', @@ -123,8 +163,8 @@ const quantizeLinearTests = [ 'constant': true }, 'quantizeLinearZeroPoint': { - 'data': [128], - 'descriptor': {shape: [], dataType: 'uint8'}, + 'data': [128, 128], + 'descriptor': {shape: [2, 1], dataType: 'uint8'}, 'constant': true } }, @@ -144,6 +184,46 @@ const quantizeLinearTests = [ } } }, + { + 'name': + 'quantizeLinear float32 3D input with implicit block_size = [1, 2, 1].', + 'graph': { + 'inputs': { + 'quantizeLinearInput': { + 'data': [ + -2.549168109893799, -4.794857501983643, 8.413617134094238, + 6.108623504638672 + ], + 'descriptor': {shape: [1, 4, 1], dataType: 'float32'}, + 'constant': true + }, + 'quantizeLinearScale': { + 'data': [0.2800687253475189, -4.617084980010986], + 'descriptor': {shape: [2, 1], dataType: 'float32'}, + 'constant': true + }, + 'quantizeLinearZeroPoint': { + 'data': [128, 189], + 'descriptor': {shape: [2, 1], dataType: 'uint8'}, + 'constant': true + } + }, + 'operators': [{ + 'name': 'quantizeLinear', + 'arguments': [ + {'input': 'quantizeLinearInput'}, {'scale': 'quantizeLinearScale'}, + {'zeroPoint': 'quantizeLinearZeroPoint'} + ], + 'outputs': 'quantizeLinearOutput' + }], + 'expectedOutputs': { + 'quantizeLinearOutput': { + 'data': [119, 111, 187, 188], + 'descriptor': {shape: [1, 4, 1], dataType: 'uint8'} + } + } + } + }, { 'name': 'quantizeLinear float32 tensor with int4 zeroPoint which has odd size', @@ -190,8 +270,8 @@ const quantizeLinearTests = [ 'constant': true }, 'quantizeLinearScale': { - 'data': [1.1202747821807861], - 'descriptor': {shape: [], dataType: 'float32'}, + 'data': [1.1202747821807861, 1.1202747821807861], + 'descriptor': {shape: [2], dataType: 'float32'}, 'constant': true }, 'quantizeLinearZeroPoint': { @@ -254,6 +334,47 @@ const quantizeLinearTests = [ } } }, + { + 'name': 'quantizeLinear int4 zeroPoint with block_size = [3, 2]', + 'graph': { + 'inputs': { + 'quantizeLinearInput': { + 'data': [ + 4.794857501983643, 3.23434354545, 2.794857501983643, + 5.794857501983643, 0, 7.23434354545, 4.794857501983643, + 3.23434354545, 2.794857501983643, 5.794857501983643, 0, + 7.23434354545 + ], + 'descriptor': {shape: [3, 4], dataType: 'float32'}, + 'constant': true + }, + 'quantizeLinearScale': { + 'data': [1.1202747821807861, 2.1202747821807861], + 'descriptor': {shape: [2], dataType: 'float32'}, + 'constant': true + }, + 'quantizeLinearZeroPoint': { + 'data': [-6, -5], + 'descriptor': {shape: [2], dataType: 'int4'}, + 'constant': true + } + }, + 'operators': [{ + 'name': 'quantizeLinear', + 'arguments': [ + {'input': 'quantizeLinearInput'}, {'scale': 'quantizeLinearScale'}, + {'zeroPoint': 'quantizeLinearZeroPoint'} + ], + 'outputs': 'quantizeLinearOutput' + }], + 'expectedOutputs': { + 'quantizeLinearOutput': { + 'data': [-2, -3, -4, -3, -5, 0, -2, -3, -4, -1, -5, -2], + 'descriptor': {shape: [3, 4], dataType: 'int4'} + } + } + } + }, { 'name': 'quantizeLinear float32 tensor with uint4 zeroPoint which has odd size', @@ -305,8 +426,8 @@ const quantizeLinearTests = [ 'constant': true }, 'quantizeLinearScale': { - 'data': [1.1202747821807861], - 'descriptor': {shape: [], dataType: 'float32'}, + 'data': [1.1202747821807861, 1.1202747821807861], + 'descriptor': {shape: [2], dataType: 'float32'}, 'constant': true }, 'quantizeLinearZeroPoint': { @@ -328,6 +449,45 @@ const quantizeLinearTests = [ {'data': [5, 8], 'descriptor': {shape: [2], dataType: 'uint4'}} } } + }, + { + 'name': 'quantizeLinear uint4 zeroPoint with block_size = 3', + 'graph': { + 'inputs': { + 'quantizeLinearInput': { + 'data': [ + 4.794857501983643, 3.23434354545, 1.794857501983643, 2.23434354545, + 4.794857501983643, 3.23434354545 + ], + 'descriptor': {shape: [6], dataType: 'float32'}, + 'constant': true + }, + 'quantizeLinearScale': { + 'data': [1.1202747821807861, 1.1202747821807861], + 'descriptor': {shape: [2], dataType: 'float32'}, + 'constant': true + }, + 'quantizeLinearZeroPoint': { + 'data': [1, 5], + 'descriptor': {shape: [2], dataType: 'uint4'}, + 'constant': true + } + }, + 'operators': [{ + 'name': 'quantizeLinear', + 'arguments': [ + {'input': 'quantizeLinearInput'}, {'scale': 'quantizeLinearScale'}, + {'zeroPoint': 'quantizeLinearZeroPoint'} + ], + 'outputs': 'quantizeLinearOutput' + }], + 'expectedOutputs': { + 'quantizeLinearOutput': { + 'data': [5, 4, 3, 7, 9, 8], + 'descriptor': {shape: [6], dataType: 'uint4'} + } + } + } } ]; diff --git a/webnn/validation_tests/dequantizeLinear.https.any.js b/webnn/validation_tests/dequantizeLinear.https.any.js index d0dd2c15dfff2d6..672fc4ac72456bf 100644 --- a/webnn/validation_tests/dequantizeLinear.https.any.js +++ b/webnn/validation_tests/dequantizeLinear.https.any.js @@ -34,14 +34,22 @@ const tests = [ }, { name: - '[dequantizeLinear] Throw if the shape of scale is not broadcastable to the shape of input.', + '[dequantizeLinear] Test block-wise quantization with block_size = [2, 2, 5].', + input: {dataType: 'uint8', shape: [6, 4, 5]}, + scale: {dataType: 'float32', shape: [3, 2, 1]}, + zeroPoint: {dataType: 'uint8', shape: [3, 2, 1]}, + output: {dataType: 'float32', shape: [6, 4, 5]}, + }, + { + name: + '[dequantizeLinear] Throw if the scale size is not a factor of input size.', input: {dataType: 'uint8', shape: [3, 2, 5]}, scale: {dataType: 'float32', shape: [2]}, - zeroPoint: {dataType: 'uint8', shape: [5]}, + zeroPoint: {dataType: 'uint8', shape: [2]}, }, { name: - '[dequantizeLinear] Throw if the shape of zero_point is not broadcastable to the shape of input.', + '[dequantizeLinear] Throw if the shape of zero_point is not the same as the shape of input.', input: {dataType: 'uint8', shape: [3, 2, 5]}, scale: {dataType: 'float32', shape: [5]}, zeroPoint: {dataType: 'uint8', shape: [2]}, @@ -55,14 +63,14 @@ const tests = [ }, { name: - '[dequantizeLinear] Throw if the data type of input is not int8 or uint8.', + '[dequantizeLinear] Throw if the data type of input is not one of {int4, uint4, int8, uint8}.', input: {dataType: 'float16', shape: [3, 2, 5]}, scale: {dataType: 'float32', shape: [5]}, zeroPoint: {dataType: 'int8', shape: [5]}, }, { name: - '[dequantizeLinear] Throw if the data type of zero_point is not int8 or uint8.', + '[dequantizeLinear] Throw if the data type of zero_point is not one of {int4, uint4, int8, uint8}.', input: {dataType: 'int8', shape: [3, 2, 5]}, scale: {dataType: 'float32', shape: [5]}, zeroPoint: {dataType: 'int32', shape: [5]}, diff --git a/webnn/validation_tests/quantizeLinear.https.any.js b/webnn/validation_tests/quantizeLinear.https.any.js index 42b8ac4b1f2c36f..8516e9cf8e0b474 100644 --- a/webnn/validation_tests/quantizeLinear.https.any.js +++ b/webnn/validation_tests/quantizeLinear.https.any.js @@ -34,33 +34,43 @@ const tests = [ }, { name: - '[quantizeLinear] Throw if the shape of scale is not broadcastable to the shape of input.', + '[quantizeLinear] Test block-wise quantization with block_size = [3, 2, 5].', + input: {dataType: 'float32', shape: [6, 4, 5]}, + scale: {dataType: 'float32', shape: [2, 2, 1]}, + zeroPoint: {dataType: 'int8', shape: [2, 2, 1]}, + output: {dataType: 'int8', shape: [6, 4, 5]}, + }, + { + name: + '[quantizeLinear] Throw if the scale size is not a factor of input size.', input: {dataType: 'float32', shape: [3, 2, 5]}, - scale: {dataType: 'float32', shape: [2]}, - zeroPoint: {dataType: 'int8', shape: [5]}, + scale: {dataType: 'float32', shape: [2, 1, 5]}, + zeroPoint: {dataType: 'int8', shape: [2, 1, 5]}, }, { name: - '[quantizeLinear] Throw if the shape of zero_point is not broadcastable to the shape of input.', + '[quantizeLinear] Throw if the shape of zero_point is not the same as the shape of input.', input: {dataType: 'float32', shape: [3, 2, 5]}, scale: {dataType: 'float32', shape: [5]}, - zeroPoint: {dataType: 'int8', shape: [2]}, + zeroPoint: {dataType: 'int8', shape: [2, 5]}, }, { name: '[quantizeLinear] Throw if the data type of input is not the same as scale.', input: {dataType: 'float32', shape: [3, 2, 5]}, - scale: {dataType: 'float16', shape: [5]}, - zeroPoint: {dataType: 'int8', shape: [5]}, + scale: {dataType: 'float16', shape: [3, 1, 5]}, + zeroPoint: {dataType: 'int8', shape: [3, 1, 5]}, }, { - name: '[quantizeLinear] Throw if the data type of input is not float32.', + name: + '[quantizeLinear] Throw if the data type of input is not float32 or float16.', input: {dataType: 'int32', shape: [3, 2, 5]}, scale: {dataType: 'float32', shape: [5]}, zeroPoint: {dataType: 'int8', shape: [5]}, }, { - name: '[quantizeLinear] Throw if the data type of scale is not float32.', + name: + '[quantizeLinear] Throw if the data type of scale is not float32 or float16.', input: {dataType: 'float32', shape: [3, 2, 5]}, scale: {dataType: 'int32', shape: [5]}, zeroPoint: {dataType: 'uint8', shape: [5]},