From d7a9066de529eb053f0cbd814f83e20a1e055c79 Mon Sep 17 00:00:00 2001 From: Vivek Khandelwal Date: Mon, 9 Dec 2024 07:31:29 +0000 Subject: [PATCH 1/4] add per channel quantization for onnx.qlinearconv op --- .../TorchOnnxToTorch/DefaultDomainQtoZ.cpp | 137 +++++++++++------- 1 file changed, 86 insertions(+), 51 deletions(-) diff --git a/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp b/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp index 85b51ca7efaa..0df242074358 100644 --- a/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp +++ b/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp @@ -9,9 +9,11 @@ #include "torch-mlir/Conversion/TorchOnnxToTorch/Patterns.h" #include "torch-mlir/Conversion/TorchOnnxToTorch/Utils.h" +#include "torch-mlir/Dialect/Torch/IR/TorchTypes.h" #include "torch-mlir/Dialect/Torch/Utils/Utils.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" using namespace mlir; using namespace mlir::torch; @@ -318,24 +320,25 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ( binder.tensorOperands(operands, 9)) || binder.tensorResultType(resultType)) return failure(); - Value a = operands[0]; - Value aScale = operands[1]; - Value aZp = operands[2]; - Value b = operands[3]; - Value bScale = operands[4]; - Value bZp = operands[5]; - Value cScale = operands[6]; - Value cZp = operands[7]; - Value c = operands.size() == 9 ? operands[8] : nullptr; - - auto check = [](Value v) { - auto vTy = cast(v.getType()); - return llvm::all_of(vTy.getSizes(), [](int64_t d) { return d == 1; }); - }; - if (!check(aScale) || !check(aZp) || !check(bScale) || !check(bZp) || - !check(cScale) || !check(cScale)) - return rewriter.notifyMatchFailure( - binder.op, "not supported for non per-tensor quantization"); + Value input = operands[0]; + Value inputScale = operands[1]; + Value inputZp = operands[2]; + Value weight = operands[3]; + Value weightScale = operands[4]; + Value weightZp = operands[5]; + Value outputScale = operands[6]; + Value outputZp = operands[7]; + Value output = operands.size() == 9 ? operands[8] : nullptr; + + // auto check = [](Value v) { + // auto vTy = cast(v.getType()); + // return llvm::all_of(vTy.getSizes(), [](int64_t d) { return d == 1; + // }); + // }; + // if (!check(aScale) || !check(aZp) || !check(bScale) || !check(bZp) || + // !check(cScale) || !check(cScale)) + // return rewriter.notifyMatchFailure( + // binder.op, "not supported for non per-tensor quantization"); auto extract = [&rewriter, &binder](Value v) { auto vTy = cast(v.getType()); @@ -347,34 +350,64 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ( v); }; - aZp = extract(aZp); - bZp = extract(bZp); - cZp = extract(cZp); - aScale = extract(aScale); - bScale = extract(bScale); - cScale = extract(cScale); - - auto make = [&rewriter, &binder](Value v, Value scale, - Value zp) -> Value { + inputZp = extract(inputZp); + outputZp = extract(outputZp); + inputScale = extract(inputScale); + outputScale = extract(outputScale); + auto makePerTensor = [&rewriter, &binder](Value v, Value scale, + Value zp) -> Value { auto ty = cast(v.getType()); auto newTy = getQTorchTypeFromTorchIntType(ty); return rewriter.create( binder.getLoc(), newTy, v, scale, zp); }; - a = make(a, aScale, aZp); - b = make(b, bScale, bZp); + auto makePerChannel = [&rewriter, &binder](Value v, Value scale, + Value zp, + Value axis) -> Value { + auto ty = cast(v.getType()); + auto newTy = getQTorchTypeFromTorchIntType(ty); + return rewriter.create( + binder.getLoc(), newTy, v, scale, zp, axis); + }; - auto cTy = rewriter.getType( + input = makePerTensor(input, inputScale, inputZp); + // The onnx's QLinearConv op expects per channel quantization only for + // the weight tensor for axis = 0. + llvm::outs() << "I'm here\n"; + auto weightTy = dyn_cast(weight.getType()); + auto weightScaleTy = + dyn_cast(weightScale.getType()); + if (!weightTy || !weightScaleTy || !weightTy.hasSizes() || + !weightScaleTy.hasSizes()) + return failure(); + llvm::outs() << "I'm here 1\n"; + auto weightShape = weightTy.getSizes(); + auto weightScaleShape = weightScaleTy.getSizes(); + Value weightScaleScalar = extract(weightScale); + if (weightScaleShape.size() == 1 && + weightScaleShape[0] != Torch::kUnknownSize && + weightScaleShape[0] == weightShape[0]) { + Value axis = rewriter.create( + binder.getLoc(), rewriter.getI64IntegerAttr(0)); + weight = makePerChannel(weight, weightScale, weightZp, axis); + } else { + weightZp = extract(weightZp); + weight = makePerTensor(weight, weightScaleScalar, weightZp); + } + weight = weightScaleScalar; + + auto outputTy = rewriter.getType( resultType.getOptionalSizes(), rewriter.getIntegerType(32, /*issigned=*/true)); + llvm::outs() << "I'm here 2\n"; // TODO(suderman): insert convolution operator. - llvm::SmallVector newOperands = {a, b}; - if (c) - newOperands.push_back(c); + llvm::SmallVector newOperands = {input, weight}; + if (output) + newOperands.push_back(output); - cTy = rewriter.getType( + outputTy = rewriter.getType( resultType.getOptionalSizes(), rewriter.getType()); @@ -388,36 +421,38 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ( newAttributes.push_back(namedAttr); } - c = rewriter - .create(binder.getLoc(), cTy, newOperands, - newAttributes, - binder.op->getRegions().size()) - .getResult(0); + output = rewriter + .create(binder.getLoc(), outputTy, + newOperands, newAttributes, + binder.op->getRegions().size()) + .getResult(0); Value outScale = rewriter.create( - binder.getLoc(), rewriter.getType(), aScale, - bScale); + binder.getLoc(), rewriter.getType(), inputScale, + weightScale); Value outZp = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0)); - c = rewriter.create( - binder.getLoc(), cTy, c, outScale, outZp); - cTy = rewriter.getType( + output = rewriter.create( + binder.getLoc(), outputTy, output, outScale, outZp); + outputTy = rewriter.getType( resultType.getOptionalSizes(), rewriter.getF32Type()); - c = rewriter.create(binder.getLoc(), cTy, - c); - cTy = getQTorchTypeFromTorchIntType(resultType); + llvm::outs() << "I'm here 3\n"; + output = rewriter.create(binder.getLoc(), + outputTy, output); + outputTy = getQTorchTypeFromTorchIntType(resultType); Value dtyVal = rewriter.create( binder.getLoc(), rewriter.getType(), rewriter.getIntegerAttr( rewriter.getIntegerType(64), static_cast( - Torch::getScalarTypeForType(cTy.getDtype())))); - c = rewriter.create( - binder.getLoc(), cTy, c, cScale, cZp, dtyVal); + Torch::getScalarTypeForType(outputTy.getDtype())))); + output = rewriter.create( + binder.getLoc(), outputTy, output, outputScale, outputZp, dtyVal); rewriter.replaceOpWithNewOp(binder.op, resultType, - c); + output); + llvm::outs() << "I'm here 4\n"; return success(); }); patterns.onOp( From ffffb0cb0ef8bb78f950546c3368b58506ee243a Mon Sep 17 00:00:00 2001 From: Vivek Khandelwal Date: Thu, 12 Dec 2024 17:55:54 +0530 Subject: [PATCH 2/4] More changes --- .../TorchOnnxToTorch/DefaultDomainQtoZ.cpp | 17 +---------------- lib/Conversion/TorchToLinalg/Linear.cpp | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp b/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp index 0df242074358..f0a102f8df54 100644 --- a/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp +++ b/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp @@ -330,16 +330,6 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ( Value outputZp = operands[7]; Value output = operands.size() == 9 ? operands[8] : nullptr; - // auto check = [](Value v) { - // auto vTy = cast(v.getType()); - // return llvm::all_of(vTy.getSizes(), [](int64_t d) { return d == 1; - // }); - // }; - // if (!check(aScale) || !check(aZp) || !check(bScale) || !check(bZp) || - // !check(cScale) || !check(cScale)) - // return rewriter.notifyMatchFailure( - // binder.op, "not supported for non per-tensor quantization"); - auto extract = [&rewriter, &binder](Value v) { auto vTy = cast(v.getType()); Type extractTy = rewriter.getType(); @@ -374,14 +364,12 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ( input = makePerTensor(input, inputScale, inputZp); // The onnx's QLinearConv op expects per channel quantization only for // the weight tensor for axis = 0. - llvm::outs() << "I'm here\n"; auto weightTy = dyn_cast(weight.getType()); auto weightScaleTy = dyn_cast(weightScale.getType()); if (!weightTy || !weightScaleTy || !weightTy.hasSizes() || !weightScaleTy.hasSizes()) return failure(); - llvm::outs() << "I'm here 1\n"; auto weightShape = weightTy.getSizes(); auto weightScaleShape = weightScaleTy.getSizes(); Value weightScaleScalar = extract(weightScale); @@ -395,13 +383,12 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ( weightZp = extract(weightZp); weight = makePerTensor(weight, weightScaleScalar, weightZp); } - weight = weightScaleScalar; + weightScale = weightScaleScalar; auto outputTy = rewriter.getType( resultType.getOptionalSizes(), rewriter.getIntegerType(32, /*issigned=*/true)); - llvm::outs() << "I'm here 2\n"; // TODO(suderman): insert convolution operator. llvm::SmallVector newOperands = {input, weight}; if (output) @@ -438,7 +425,6 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ( outputTy = rewriter.getType( resultType.getOptionalSizes(), rewriter.getF32Type()); - llvm::outs() << "I'm here 3\n"; output = rewriter.create(binder.getLoc(), outputTy, output); outputTy = getQTorchTypeFromTorchIntType(resultType); @@ -452,7 +438,6 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ( binder.getLoc(), outputTy, output, outputScale, outputZp, dtyVal); rewriter.replaceOpWithNewOp(binder.op, resultType, output); - llvm::outs() << "I'm here 4\n"; return success(); }); patterns.onOp( diff --git a/lib/Conversion/TorchToLinalg/Linear.cpp b/lib/Conversion/TorchToLinalg/Linear.cpp index 9ec7761704ea..7a16e463914a 100644 --- a/lib/Conversion/TorchToLinalg/Linear.cpp +++ b/lib/Conversion/TorchToLinalg/Linear.cpp @@ -779,6 +779,21 @@ class ConvertAtenConvolutionOp : public OpConversionPattern { weight = make.getSelf(); weightZp = make.getZeroPoint(); + weight = typeConverter->materializeTargetConversion( + rewriter, loc, typeConverter->convertType(weight.getType()), weight); + weightZp = typeConverter->materializeTargetConversion( + rewriter, loc, typeConverter->convertType(weightZp.getType()), + weightZp); + weightZp = rewriter.create(loc, rewriter.getI32Type(), + weightZp); + auto torchDtype = cast(make.getType()).getDtype(); + weightUnsigned = torch_to_linalg::isUnsignedTorchType(torchDtype); + } else if (auto make = + op.getWeight() + .getDefiningOp()) { + weight = make.getSelf(); + weightZp = make.getZeroPoint(); + weight = typeConverter->materializeTargetConversion( rewriter, loc, typeConverter->convertType(weight.getType()), weight); weightZp = typeConverter->materializeTargetConversion( From 599a877343a45131e339a05e71bbffd31892c642 Mon Sep 17 00:00:00 2001 From: Vivek Khandelwal Date: Fri, 13 Dec 2024 12:01:54 +0530 Subject: [PATCH 3/4] Remove some code --- .../TorchOnnxToTorch/DefaultDomainQtoZ.cpp | 2 -- lib/Conversion/TorchToLinalg/Linear.cpp | 15 --------------- 2 files changed, 17 deletions(-) diff --git a/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp b/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp index f0a102f8df54..2a6f42a45c86 100644 --- a/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp +++ b/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp @@ -9,11 +9,9 @@ #include "torch-mlir/Conversion/TorchOnnxToTorch/Patterns.h" #include "torch-mlir/Conversion/TorchOnnxToTorch/Utils.h" -#include "torch-mlir/Dialect/Torch/IR/TorchTypes.h" #include "torch-mlir/Dialect/Torch/Utils/Utils.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/raw_ostream.h" using namespace mlir; using namespace mlir::torch; diff --git a/lib/Conversion/TorchToLinalg/Linear.cpp b/lib/Conversion/TorchToLinalg/Linear.cpp index 7a16e463914a..9ec7761704ea 100644 --- a/lib/Conversion/TorchToLinalg/Linear.cpp +++ b/lib/Conversion/TorchToLinalg/Linear.cpp @@ -779,21 +779,6 @@ class ConvertAtenConvolutionOp : public OpConversionPattern { weight = make.getSelf(); weightZp = make.getZeroPoint(); - weight = typeConverter->materializeTargetConversion( - rewriter, loc, typeConverter->convertType(weight.getType()), weight); - weightZp = typeConverter->materializeTargetConversion( - rewriter, loc, typeConverter->convertType(weightZp.getType()), - weightZp); - weightZp = rewriter.create(loc, rewriter.getI32Type(), - weightZp); - auto torchDtype = cast(make.getType()).getDtype(); - weightUnsigned = torch_to_linalg::isUnsignedTorchType(torchDtype); - } else if (auto make = - op.getWeight() - .getDefiningOp()) { - weight = make.getSelf(); - weightZp = make.getZeroPoint(); - weight = typeConverter->materializeTargetConversion( rewriter, loc, typeConverter->convertType(weight.getType()), weight); weightZp = typeConverter->materializeTargetConversion( From 68ad21bc57ab7a35f78021b88976bc389fe89edf Mon Sep 17 00:00:00 2001 From: Vivek Khandelwal Date: Fri, 13 Dec 2024 13:03:33 +0530 Subject: [PATCH 4/4] Update lit test --- .../TorchOnnxToTorch/simple_ops_q_to_z.mlir | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/test/Conversion/TorchOnnxToTorch/simple_ops_q_to_z.mlir b/test/Conversion/TorchOnnxToTorch/simple_ops_q_to_z.mlir index 16c86218dbc8..80cea34818aa 100644 --- a/test/Conversion/TorchOnnxToTorch/simple_ops_q_to_z.mlir +++ b/test/Conversion/TorchOnnxToTorch/simple_ops_q_to_z.mlir @@ -65,15 +65,15 @@ func.func @test_quantizelinear_f8(%arg0: !torch.vtensor<[6],f32>, %arg1: !torch. // ----- // CHECK-LABEL: @test_qlinearconv_nobias -func.func @test_qlinearconv_nobias(%arg0: !torch.vtensor<[1,1,7,7],ui8>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.vtensor<[],ui8>, %arg3: !torch.vtensor<[1,1,1,1],ui8>, %arg4: !torch.vtensor<[1],f32>, %arg5: !torch.vtensor<[1],ui8>, %arg6: !torch.vtensor<[],f32>, %arg7: !torch.vtensor<[],ui8>) -> !torch.vtensor<[1,1,7,7],ui8> attributes {torch.onnx_meta.ir_version = 5 : si64, torch.onnx_meta.opset_version = 10 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} { - %0 = torch.operator "onnx.QLinearConv"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7) : (!torch.vtensor<[1,1,7,7],ui8>, !torch.vtensor<[],f32>, !torch.vtensor<[],ui8>, !torch.vtensor<[1,1,1,1],ui8>, !torch.vtensor<[1],f32>, !torch.vtensor<[1],ui8>, !torch.vtensor<[],f32>, !torch.vtensor<[],ui8>) -> !torch.vtensor<[1,1,7,7],ui8> +func.func @test_qlinearconv_nobias(%arg0: !torch.vtensor<[1,1,7,7],ui8>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.vtensor<[],ui8>, %arg3: !torch.vtensor<[1,1,1,1],ui8>, %arg4: !torch.vtensor<[],f32>, %arg5: !torch.vtensor<[],ui8>, %arg6: !torch.vtensor<[],f32>, %arg7: !torch.vtensor<[],ui8>) -> !torch.vtensor<[1,1,7,7],ui8> attributes {torch.onnx_meta.ir_version = 5 : si64, torch.onnx_meta.opset_version = 10 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} { + %0 = torch.operator "onnx.QLinearConv"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7) : (!torch.vtensor<[1,1,7,7],ui8>, !torch.vtensor<[],f32>, !torch.vtensor<[],ui8>, !torch.vtensor<[1,1,1,1],ui8>, !torch.vtensor<[],f32>, !torch.vtensor<[],ui8>, !torch.vtensor<[],f32>, !torch.vtensor<[],ui8>) -> !torch.vtensor<[1,1,7,7],ui8> // CHECK: %[[aZp:.+]] = torch.aten.item %arg2 : !torch.vtensor<[],ui8> -> !torch.int - // CHECK: %[[bZp:.+]] = torch.aten.item %arg5 : !torch.vtensor<[1],ui8> -> !torch.int // CHECK: %[[cZp:.+]] = torch.aten.item %arg7 : !torch.vtensor<[],ui8> -> !torch.int // CHECK: %[[aScale:.+]] = torch.aten.item %arg1 : !torch.vtensor<[],f32> -> !torch.float - // CHECK: %[[bScale:.+]] = torch.aten.item %arg4 : !torch.vtensor<[1],f32> -> !torch.float // CHECK: %[[cScale:.+]] = torch.aten.item %arg6 : !torch.vtensor<[],f32> -> !torch.float // CHECK: %[[A:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg0, %[[aScale]], %[[aZp]] : !torch.vtensor<[1,1,7,7],ui8>, !torch.float, !torch.int -> !torch.vtensor<[1,1,7,7],!torch.quint8> + // CHECK: %[[bScale:.+]] = torch.aten.item %arg4 : !torch.vtensor<[],f32> -> !torch.float + // CHECK: %[[bZp:.+]] = torch.aten.item %arg5 : !torch.vtensor<[],ui8> -> !torch.int // CHECK: %[[B:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg3, %[[bScale]], %[[bZp]] : !torch.vtensor<[1,1,1,1],ui8>, !torch.float, !torch.int -> !torch.vtensor<[1,1,1,1],!torch.quint8> // CHECK: %[[INT0_0:.+]] = torch.constant.int 0 // CHECK: %[[INT0_1:.+]] = torch.constant.int 0 @@ -103,17 +103,17 @@ func.func @test_qlinearconv_nobias(%arg0: !torch.vtensor<[1,1,7,7],ui8>, %arg1: // ----- -// CHECK-LABEL: @test_qlinearconv_bias -func.func @test_qlinearconv_bias(%arg0: !torch.vtensor<[1,1,7,7],ui8>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.vtensor<[],ui8>, %arg3: !torch.vtensor<[1,1,1,1],ui8>, %arg4: !torch.vtensor<[1],f32>, %arg5: !torch.vtensor<[1],ui8>, %arg6: !torch.vtensor<[],f32>, %arg7: !torch.vtensor<[],ui8>, %arg8 : !torch.vtensor<[7],si32>) -> !torch.vtensor<[1,1,7,7],ui8> attributes {torch.onnx_meta.ir_version = 5 : si64, torch.onnx_meta.opset_version = 10 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} { +// CHECK-LABEL: @test_qlinearconv_bias_weight_per_channel +func.func @test_qlinearconv_bias_weight_per_channel(%arg0: !torch.vtensor<[1,1,7,7],ui8>, %arg1: !torch.vtensor<[],f32>, %arg2: !torch.vtensor<[],ui8>, %arg3: !torch.vtensor<[1,1,1,1],ui8>, %arg4: !torch.vtensor<[1],f32>, %arg5: !torch.vtensor<[1],ui8>, %arg6: !torch.vtensor<[],f32>, %arg7: !torch.vtensor<[],ui8>, %arg8 : !torch.vtensor<[7],si32>) -> !torch.vtensor<[1,1,7,7],ui8> attributes {torch.onnx_meta.ir_version = 5 : si64, torch.onnx_meta.opset_version = 10 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} { %0 = torch.operator "onnx.QLinearConv"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8) : (!torch.vtensor<[1,1,7,7],ui8>, !torch.vtensor<[],f32>, !torch.vtensor<[],ui8>, !torch.vtensor<[1,1,1,1],ui8>, !torch.vtensor<[1],f32>, !torch.vtensor<[1],ui8>, !torch.vtensor<[],f32>, !torch.vtensor<[],ui8>, !torch.vtensor<[7],si32>) -> !torch.vtensor<[1,1,7,7],ui8> // CHECK: %[[aZp:.+]] = torch.aten.item %arg2 : !torch.vtensor<[],ui8> -> !torch.int - // CHECK: %[[bZp:.+]] = torch.aten.item %arg5 : !torch.vtensor<[1],ui8> -> !torch.int // CHECK: %[[cZp:.+]] = torch.aten.item %arg7 : !torch.vtensor<[],ui8> -> !torch.int // CHECK: %[[aScale:.+]] = torch.aten.item %arg1 : !torch.vtensor<[],f32> -> !torch.float - // CHECK: %[[bScale:.+]] = torch.aten.item %arg4 : !torch.vtensor<[1],f32> -> !torch.float // CHECK: %[[cScale:.+]] = torch.aten.item %arg6 : !torch.vtensor<[],f32> -> !torch.float // CHECK: %[[A:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg0, %[[aScale]], %[[aZp]] : !torch.vtensor<[1,1,7,7],ui8>, !torch.float, !torch.int -> !torch.vtensor<[1,1,7,7],!torch.quint8> - // CHECK: %[[B:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg3, %[[bScale]], %[[bZp]] : !torch.vtensor<[1,1,1,1],ui8>, !torch.float, !torch.int -> !torch.vtensor<[1,1,1,1],!torch.quint8> + // CHECK: %[[bScale:.+]] = torch.aten.item %arg4 : !torch.vtensor<[1],f32> -> !torch.float + // CHECK: %[[INT0:.+]] = torch.constant.int 0 + // CHECK: %[[B:.+]] = torch.aten._make_per_channel_quantized_tensor %arg3, %arg4, %arg5, %[[INT0]] : !torch.vtensor<[1,1,1,1],ui8>, !torch.vtensor<[1],f32>, !torch.vtensor<[1],ui8>, !torch.int -> !torch.vtensor<[1,1,1,1],!torch.quint8> // CHECK: %[[INT0_0:.+]] = torch.constant.int 0 // CHECK: %[[INT0_1:.+]] = torch.constant.int 0 // CHECK: %[[PAD:.+]] = torch.prim.ListConstruct %[[INT0_0]], %[[INT0_1]]