From 250f6f44a1d5bbd60a0a6b457a8874f20281e9b1 Mon Sep 17 00:00:00 2001 From: Yizhuo Zhang Date: Thu, 9 Jan 2025 15:29:33 -0800 Subject: [PATCH] Add OutlineTensorRTOpPass --- .../TensorRTToExecutable/CMakeLists.txt | 4 + .../Compiler/TensorRTToExecutable/Passes.h | 8 +- .../Compiler/TensorRTToExecutable/Passes.td | 35 ++++ .../TensorRTToExecutable.h | 29 +-- .../TensorRTToExecutable/CMakeLists.txt | 3 +- .../Compiler/TensorRTToExecutable/Passes.cpp | 189 ++++++++++++++++++ .../TensorRTToExecutable.cpp | 77 +++++-- 7 files changed, 315 insertions(+), 30 deletions(-) create mode 100644 mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt create mode 100644 mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt new file mode 100644 index 000000000..e549a6d5c --- /dev/null +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/CMakeLists.txt @@ -0,0 +1,4 @@ +set(_TABLEGEN_ARGS ) +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name TensorRTToExecutable ${_TABLEGEN_ARGS}) +add_public_tablegen_target(MLIRTensorRTTensorRTToExecutableIncGen) diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h index 68a0ea7ca..53d6eb705 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h @@ -30,14 +30,18 @@ namespace mlirtrt::compiler { -// TODO: Does this also need Tablegen'd pass? +//===----------------------------------------------------------------------===// +// Add Tablegen'd pass declarations and registration methods. +//===----------------------------------------------------------------------===// +#define GEN_PASS_DECL +#define GEN_PASS_REGISTRATION +#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h.inc" //===----------------------------------------------------------------------===// // Pipeline Registrations //===----------------------------------------------------------------------===// /// Register the TensorRT clustering and compilation pipelines. -// TODO (pranavm): How to do pipeline registration? void registerTensorRTToExecutablePipelines(); } // namespace mlirtrt::compiler diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td new file mode 100644 index 000000000..a49940af0 --- /dev/null +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.td @@ -0,0 +1,35 @@ +//===- Passes.td ----------------------------------------------------------===// +// +// SPDX-FileCopyrightText: Copyright 2024 NVIDIA CORPORATION & AFFILIATES. +// All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#ifndef MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES +#define MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES + +include "mlir/Pass/PassBase.td" + +//===----------------------------------------------------------------------===// +// OutlineTensorRTOpPass +//===----------------------------------------------------------------------===// +// TODO: what are the dependent dialects? what are the options? + +def OutlineTensorRTOpPass : Pass<"outline-tensorrt-op", + "::mlir::ModuleOp"> { + let summary = "Outline all tensorrt ops into a tensorrt module"; +} + +#endif // MLIR_TENSORRT_COMPILER_TENSORRTTOEXECUTABLE_PASSES diff --git a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h index d6e97d0e3..c0d204940 100644 --- a/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h +++ b/mlir-tensorrt/compiler/include/mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h @@ -25,11 +25,16 @@ // won't need it. #ifdef MLIR_TRT_TARGET_TENSORRT #include "mlir-tensorrt-dialect/Target/TranslateToTensorRT.h" + +#include "mlir-executor/Runtime/API/API.h" +#include "mlir-executor/Support/Status.h" #include "mlir-tensorrt-dialect/Utils/Options.h" #include "mlir-tensorrt-dialect/Utils/OptionsBundle.h" #include "mlir-tensorrt/Compiler/Client.h" #include "mlir-tensorrt/Compiler/Extension.h" #include "mlir-tensorrt/Compiler/OptionsProviders.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Pass/PassManager.h" #include "mlir/Support/TypeID.h" namespace mlirtrt::compiler { @@ -38,12 +43,17 @@ namespace mlirtrt::compiler { // TensorRTToExecutableOptions //===----------------------------------------------------------------------===// +class TensorRTToExecutableTask; + // TODO (pranavm): Figure out a better way to reuse TRT translation options - // maybe move to options providers? -struct TensorRTOptions - : public mlirtrt::compiler::OptionsProvider { +struct TensorRTOptions : public OptionsProvider { +public: + using OptionsProvider::OptionsProvider; mlir::tensorrt::TensorRTTranslationOptions options; + TensorRTOptions(mlir::OptionsContext &ctx) : OptionsProvider(ctx) {} + void addToOptions(mlir::OptionsContext &context) { options.addToOptions(context); } @@ -52,12 +62,10 @@ struct TensorRTOptions struct TensorRTToExecutableOptions : public mlir::OptionsBundle { + // Default initialization does not require any extensions. + TensorRTToExecutableOptions() = default; TensorRTToExecutableOptions(TaskExtensionRegistry extensions); - - /// Initializes the options using a default extension set (TensorRT - /// extension). - StablehloToExecutableOptions(); Option entrypoint{this, "entrypoint", llvm::cl::init("main"), llvm::cl::desc("entrypoint function name")}; @@ -71,6 +79,8 @@ class TensorRTToExecutableTask : public CompilationTask { public: + TensorRTToExecutableTask(mlir::MLIRContext *ctx, + const TensorRTToExecutableOptions &options); /// Build the clustering pipeline that occurs on TensorRT Ops. static void @@ -84,13 +94,6 @@ class TensorRTToExecutableTask static void populatePassManager(mlir::PassManager &pm, const TensorRTToExecutableOptions &options); - - /// Compile a TensorRT module into a MLIR-TensorRT Runtime executable. - /// This is the "functional" entrypoint that will allocate a new PassManager - /// for a single run. - // static mlirtrt::StatusOr> - // compileTensorRTToExecutable(CompilerClient &client, mlir::ModuleOp module, - // const TensorRTToExecutableOptions &options); }; /// Register the task/options with the client's registry. diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt index a32b549b5..63ab3d933 100644 --- a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt +++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/CMakeLists.txt @@ -1,12 +1,11 @@ add_mlir_tensorrt_library(MLIRTensorRTCompilerTensorRTToExecutable TensorRTToExecutable.cpp - TensorRTExtension.cpp Passes.cpp PARTIAL_SOURCES_INTENDED DEPENDS - MLIRTensorRTStablehloToExecutableIncGen + MLIRTensorRTTensorRTToExecutableIncGen LINK_LIBS PUBLIC MLIRIR diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp index 0eab83331..fd31cf3fe 100644 --- a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp +++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/Passes.cpp @@ -22,14 +22,203 @@ #include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h" #include "mlir-tensorrt/Conversion/Passes.h" #include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Pass/PassOptions.h" #ifdef MLIR_TRT_ENABLE_HLO +namespace mlirtrt::compiler { +#define GEN_PASS_DEF_OUTLINETENSORRTOPPASS +#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h.inc" +} // namespace mlirtrt::compiler + using namespace mlirtrt; using namespace mlirtrt::compiler; using namespace mlir; +namespace { + +//===----------------------------------------------------------------------===// +// OutlineTensorRTOpPass +//===----------------------------------------------------------------------===// + +/// ClusteringOpts that identifies groups of TensorRT operations and will be +/// clustered into one TensorRT function (which is eventually translated to a +/// engine). +static FailureOr +getTensorRTClusteringOptions(Operation *op) { + ClusteringOpts opts; + opts.mergeIndependentClusters = [](Operation *, ClusterRange, Operation *, + ClusterRange) { return true; }; + opts.clusterTarget = Attribute{}; + opts.isClusterableOp = [](Operation *op) { + if (llvm::isa(op->getDialect())) + return true; + return false; + }; + + return opts; +} + +/// Create a `func.func` operation that represents `regionOp` and inserts into +/// the `module` SymbolTable. The function is given a name starting with +/// `nameBase` but may have numbers appended in order to unique the name. The +/// created function has argument/result types as indicated by the parameters. +static FailureOr +createOutlinedFunc(RewriterBase &rewriter, Location loc, Operation *module, + StringRef nameBase, TypeRange funcArgTypes, + TypeRange funcResultTypes) { + OpBuilder::InsertionGuard g(rewriter); + + // Create the func for outlining the region body. + FunctionType type = + FunctionType::get(rewriter.getContext(), funcArgTypes, funcResultTypes); + auto outlinedFunc = func::FuncOp::create(loc, nameBase, type, {}); + Block *funcBody = outlinedFunc.addEntryBlock(); + + // Add an empty terminator. + rewriter.setInsertionPointToEnd(funcBody); + rewriter.create(loc); + + // Insert into the module. + SymbolTable(module).insert(outlinedFunc, + module->getRegions().front().front().end()); + + // Tag the function with a UnitAttr for identifying the different kinds of + // functions based on the cluster type. + return cast(outlinedFunc.getOperation()); +} + +/// Given the `op`, find the closest ModuleOp and check if the module has a +/// `tensorrt.module` operation in it. If it does, then return the existing +/// `tensorrt.module` operation. Otherwise, create a new `tensorrt.module`. +static tensorrt::TensorRTModuleOp getOrCreateTensorRTModuleOp(Operation *op) { + auto moduleOp = op->getParentOfType(); + if (!moduleOp) + return nullptr; + SymbolTable symbolTable(moduleOp); + tensorrt::TensorRTModuleOp result = nullptr; + for (auto trtModuleOp : + moduleOp.getBody()->getOps()) { + result = trtModuleOp; + break; + } + if (result) + return result; + + // Create the function. Symbol name de-duplication occurs with insert into the + // symbol table. + result = tensorrt::TensorRTModuleOp::create(moduleOp.getLoc(), "trt_engines"); + symbolTable.insert(result, op->getParentOp() == moduleOp ? Block::iterator(op) + : Block::iterator{}); + return result; +} + +/// Helper function to call the `makeRegionIsolatedFromAbove` to capture all +/// required arguments into the InlineGroupOp region. +// static LogicalResult +// makeIsolatedFromAboveImpl(RewriterBase &rewriter, plan::InlineGroupOp regionOp, +// llvm::function_ref callBack) { +// Region ®ion = regionOp.getRegion(); +// SmallVector capturedValues = +// makeRegionIsolatedFromAbove(rewriter, region, callBack); +// SmallVector operands = regionOp.getOperands(); +// operands.append(capturedValues); +// auto isolatedRegionOp = +// rewriter.create(regionOp.getLoc(), operands); +// rewriter.inlineRegionBefore(region, isolatedRegionOp.getRegion(), +// isolatedRegionOp.getRegion().begin()); +// rewriter.eraseOp(regionOp); +// return success(); +// } + +static FailureOr +outlineOp(RewriterBase &rewriter, tensorrt::TensorRTModuleOp trtModule, plan::InlineGroupOp op) { + + // Make the region isolated from above. This captures the input operands. + SmallVector inputs = + makeRegionIsolatedFromAbove(rewriter, op.getRegion()); + + // Create the outlined function + FailureOr func = + createOutlinedFunc(rewriter, op.getLoc(), trtModule, + "tensorrt_cluster", TypeRange(inputs), op->getResultTypes()); + if (failed(func)) + return failure(); + + rewriter.setInsertionPoint(op); + auto callOp = rewriter.create( + op.getLoc(), op.getResultTypes(), inputs, + SymbolRefAttr::get(trtModule.getNameAttr(), + {FlatSymbolRefAttr::get(*func)})); + + // Populate the function entry block. + rewriter.eraseBlock(&func->getFunctionBody().front()); + + // Move region op operations to the func body. + Operation *regionYieldOp = op.getYield(); + rewriter.inlineRegionBefore(op.getRegion(), func->getFunctionBody(), + func->getFunctionBody().end()); + rewriter.setInsertionPoint(regionYieldOp); + rewriter.replaceOpWithNewOp(regionYieldOp, + regionYieldOp->getOperands()); + // replace the original region results. + rewriter.replaceOp(op, callOp); + + return callOp; +} + + +class OutlineTensorRTOpPass + : public compiler::impl::OutlineTensorRTOpPassBase< + OutlineTensorRTOpPass> { +public: + using Base::Base; + void runOnOperation() override { + ModuleOp module = getOperation(); + + SymbolTableCollection symbolTable; + IRRewriter rewriter(&getContext()); + // what are these? are they needed? + // DataFlowSolver solver; + // solver.load(); + // solver.load(); + // solver.load(symbolTable); + // if (failed(solver.initializeAndRun(module))) + // return signalPassFailure(); + + FailureOr opts = getTensorRTClusteringOptions(module); + if (failed(opts)) { + emitError(module.getLoc()) << "failed to create clustering options"; + return signalPassFailure(); + } + // What do they do here? + // patterns.add(*opts, createInlineGroupOp, isOpInClusterRegion, + // target.getClusterFilter(), + // PatternBenefit(target.getClusterBenefit())); + + // FailureOr> regionOps = + // rewrite->findClusterAndCreateRegionOp(module, rewriter); + // if (failed(regionOps)) { + // emitError(module.getLoc()) + // << "clustering rewrite " << rewrite->getTarget() << " failed "; + // return signalPassFailure(); + // } + + tensorrt::TensorRTModuleOp trtModuleOp = getOrCreateTensorRTModuleOp(module); + + SmallVector clusters; + module.walk( + [&](plan::InlineGroupOp cluster) { clusters.push_back(cluster); }); + + for (plan::InlineGroupOp cluster : clusters) { + if (failed(outlineOp(rewriter, trtModuleOp, cluster))) + return signalPassFailure(); + } + } +}; +} // namespace + //===----------------------------------------------------------------------===// // Pipeline Registrations //===----------------------------------------------------------------------===// diff --git a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp index 88dd9dea0..af39858d7 100644 --- a/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp +++ b/mlir-tensorrt/compiler/lib/Compiler/TensorRTToExecutable/TensorRTToExecutable.cpp @@ -19,12 +19,13 @@ //===----------------------------------------------------------------------===// #ifdef MLIR_TRT_TARGET_TENSORRT -#include "mlir-tensorrt/Compiler/TensorRTToExecutable.h" +#include "mlir-tensorrt/Compiler/TensorRTToExecutable/TensorRTToExecutable.h" #include "mlir-executor/Conversion/Passes.h" #include "mlir-executor/Executor/Transforms/Passes.h" #include "mlir-tensorrt-dialect/TensorRT/Transforms/Passes.h" +#include "mlir-tensorrt/Compiler/OptionsProviders.h" #include "mlir-tensorrt/Compiler/OptionsRegistry.h" -#include "mlir-tensorrt/Compiler/PassManagerUtils.h" +#include "mlir-tensorrt/Compiler/TensorRTToExecutable/Passes.h" #include "mlir-tensorrt/Conversion/Passes.h" #include "mlir-tensorrt/Dialect/Plan/Transforms/Passes.h" #include "mlir-tensorrt/Transforms/Passes.h" @@ -34,6 +35,10 @@ using namespace mlir; using namespace mlirtrt::compiler; +//===----------------------------------------------------------------------===// +// TensorRTToExecutableOptions +//===----------------------------------------------------------------------===// + TensorRTToExecutableOptions::TensorRTToExecutableOptions( TaskExtensionRegistry extensions) { // TODO (pranavm): We don't need extensions - remove from constructor and add @@ -41,19 +46,23 @@ TensorRTToExecutableOptions::TensorRTToExecutableOptions( assert(extensions.extensions.size() == 0); } - //===----------------------------------------------------------------------===// // TensorRTToExecutableTask //===----------------------------------------------------------------------===// +TensorRTToExecutableTask::TensorRTToExecutableTask( + MLIRContext *ctx, const TensorRTToExecutableOptions &options) + : CompilationTask(ctx, options) { + options.get().applyToPassManager(*this); +} + void TensorRTToExecutableTask::buildTensorRTClusteringPipeline( OpPassManager &pm, const TensorRTToExecutableOptions &opts) { - // TODO: add TRT clustering passes. - return; + pm.addPass(createOutlineTensorRTOpPass()); } void TensorRTToExecutableTask::buildPostClusteringPipeline( - OpPassManager &pm, const TensorRTToExecutableOptions &opts) { + OpPassManager &pm, const TensorRTToExecutableOptions &options) { // Post-clustering pm.addPass(createConvertTensorRTToTensorRTRuntimePass()); @@ -115,10 +124,6 @@ void TensorRTToExecutableTask::buildPostClusteringPipeline( void TensorRTToExecutableTask::populatePassManager( mlir::PassManager &pm, const TensorRTToExecutableOptions &options) { - pm.addPass(createPopulateDefaultBackendMetadataPass( - PopulateDefaultBackendMetadataPassOptions{ - options.disallowHostTensorsInTensorRTClusters, NV_TENSORRT_MAJOR})); - buildTensorRTClusteringPipeline(pm, options); buildPostClusteringPipeline(pm, options); @@ -130,9 +135,55 @@ void TensorRTToExecutableTask::populatePassManager( } void mlirtrt::compiler::registerTensorRTToExecutableTask() { - registerOption("tensorrt-to-executable", - optionsCreateFromArgs); + registerOption( + "tensorrt-to-executable", + [](MLIRContext *ctx, ArrayRef opts) + -> StatusOr> { + auto task = optionsCreateFromArgs(ctx, opts); + if (!task.isOk()) + return task.getStatus(); + return std::unique_ptr(std::move(*task)); + }); + + registerCompilationTask( + "tensorrt-to-executable", + [](CompilerClient &client, llvm::ArrayRef options) + -> StatusOr { + TensorRTToExecutableOptions result; + std::string err; + if (failed(result.parse(options, err))) + return getInvalidArgStatus( + "failed to parse options string \"{0:$[ ]}\" due to error {1}", + llvm::iterator_range(options), err); + + llvm::Error finalizeStatus = result.finalize(); + std::optional errMsg{}; + llvm::handleAllErrors(std::move(finalizeStatus), + [&errMsg](const llvm::StringError &err) { + errMsg = err.getMessage(); + }); + + if (errMsg) + return getInvalidArgStatus("failed to parse options due to error {0}", + errMsg); + + std::optional hashCode = result.getHash(); + if (!hashCode) + return getInvalidArgStatus("failed to hash options"); + + CompilationTaskBase *cached = client.lookupCachedCompilationTask( + mlir::TypeID::get(), *hashCode); + if (cached) + return cached; + + auto newPM = std::make_unique( + client.getContext(), result); + auto ptr = newPM.get(); + client.updateCachedCompilationTask( + *hashCode, std::move(newPM)); + return ptr; + }); } MLIR_DEFINE_EXPLICIT_TYPE_ID(mlirtrt::compiler::TensorRTToExecutableTask)