From 4cc985fa32a659abf757b5320db561715aaa6028 Mon Sep 17 00:00:00 2001 From: RandySheriffH <48490400+RandySheriffH@users.noreply.github.com> Date: Thu, 31 Aug 2023 12:57:47 -0700 Subject: [PATCH] Update Azure Op doc for main (#554) * Add details to Azure ops documentation (#552) * doc ops * typo --------- Co-authored-by: Randy Shuai * rename the input and output as example --------- Co-authored-by: Randy Shuai --- docs/custom_ops.md | 179 +++++++++++---------------------------------- 1 file changed, 42 insertions(+), 137 deletions(-) diff --git a/docs/custom_ops.md b/docs/custom_ops.md index 8141ad45f..1634531a8 100644 --- a/docs/custom_ops.md +++ b/docs/custom_ops.md @@ -1357,6 +1357,21 @@ A byte array containing raw data from the audio file. #### Examples +Note - OpenAIAudioToText operator composes a request based on last part of the input and output names split by "/", + +Meaning for input names, they must be of format: +- auth_token: "whatever-name-you-want-to-use" +- model_name: ".../.../.../model_name" +- response_format: ".../.../.../response_format" +- audio_blob: ".../.../.../file" + +for output name, it must be of format: +- transcriptions: ".../.../.../transcriptions" + +Hence there could be multiple OpenAIAudioToText operators accepting different inputs inside a model, and give varied outputs. + +Pls find sample code below for a better illustration. + ```python @@ -1368,19 +1383,23 @@ from onnxruntime_extensions import PyOrtFunction, util, get_library_path from onnxruntime import * +openai_model_uri = os.getenv('URI', '') # read uri from env +openai_auth_token = os.getenv('AUTH', '') # read auto token from env + + def create_openai_audio_model(): auth_token = helper.make_tensor_value_info('auth_token', TensorProto.STRING, [1]) - model = helper.make_tensor_value_info('model_name', TensorProto.STRING, [1]) - response_format = helper.make_tensor_value_info('response_format', TensorProto.STRING, [-1]) - file = helper.make_tensor_value_info('file', TensorProto.UINT8, [-1]) - transcriptions = helper.make_tensor_value_info('transcriptions', TensorProto.STRING, [-1]) + model = helper.make_tensor_value_info('node_1/model_name', TensorProto.STRING, [1]) + response_format = helper.make_tensor_value_info('node_1/response_format', TensorProto.STRING, [-1]) + file = helper.make_tensor_value_info('node_1/file', TensorProto.UINT8, [-1]) + transcriptions = helper.make_tensor_value_info('node_1/transcriptions', TensorProto.STRING, [-1]) invoker = helper.make_node('OpenAIAudioToText', - ['auth_token', 'model_name', 'response_format', 'file'], - ['transcriptions'], + ['auth_token', 'node_1/model_name', 'node_1/response_format', 'node_1/file'], # names must follow the format + ['node_1/transcriptions'], # names must follow the format domain='com.microsoft.extensions', name='audio_invoker', - model_uri='https://api.openai.com/v1/audio/transcriptions', + model_uri=openai_model_uri, audio_format='wav') graph = helper.make_graph([invoker], 'graph', [auth_token, model, response_format, file], [transcriptions]) @@ -1395,7 +1414,7 @@ opt = SessionOptions() opt.register_custom_ops_library(get_library_path()) sess = InferenceSession(os.path.join(test_data_dir, "openai_audio.onnx"), opt, providers=["CPUExecutionProvider", "AzureExecutionProvider"]) -auth_token = np.array([os.getenv('MYAUTH', '')]) +auth_token = np.array([openai_auth_token]) model = np.array(['whisper-1']) response_format = np.array(['text']) @@ -1403,9 +1422,9 @@ with open(os.path.join(test_data_dir, "test16.wav"), "rb") as _f: audio_blob = np.asarray(list(_f.read()), dtype=np.uint8) ort_inputs = { "auth_token": auth_token, - "model_name": model, - "response_format": response_format, - "file": audio_blob, + "node_1/model_name": model, + "node_1/response_format": response_format, + "node_1/file": audio_blob, } out = sess.run(None, ort_inputs)[0] ``` @@ -1457,6 +1476,10 @@ from onnxruntime_extensions import PyOrtFunction, util, get_library_path from onnxruntime import * +azure_model_uri = os.getenv('URI', '') # read uri from env +azure_auth_token = os.getenv('AUTH', '') # read auto token from env + + def create_azure_chat_model(): auth_token = helper.make_tensor_value_info('auth_token', TensorProto.STRING, [-1]) chat = helper.make_tensor_value_info('chat', TensorProto.STRING, [-1]) @@ -1465,7 +1488,7 @@ def create_azure_chat_model(): invoker = helper.make_node('AzureTextToText', ['auth_token', 'chat'], ['response'], domain='com.microsoft.extensions', name='chat_invoker', - model_uri='https://rashuai-aoai-test.openai.azure.com/openai/deployments/randysgpt/chat/completions?api-version=2023-05-15') + model_uri=azure_model_uri) graph = helper.make_graph([invoker], 'graph', [auth_token, chat], [response]) model = helper.make_model(graph, @@ -1478,7 +1501,7 @@ create_azure_chat_model() opt = SessionOptions() opt.register_custom_ops_library(get_library_path()) sess = InferenceSession(os.path.join(test_data_dir, "azure_chat.onnx"), opt, providers=["CPUExecutionProvider", "AzureExecutionProvider"]) -auth_token = np.array([os.getenv('MYAUTH', '')]) +auth_token = np.array([azure_auth_token]) chat = np.array([r'{"messages":[{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},{"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},{"role": "user", "content": "Do other Azure AI services support this too?"}]}']) ort_inputs = { "auth_token": auth_token, @@ -1540,6 +1563,10 @@ from onnxruntime_extensions import PyOrtFunction, util, get_library_path from onnxruntime import * +triton_uri = os.getenv('URI', '') # read uri from env +triton_auth_token = os.getenv('AUTH', '') # read auto token from env + + def createAddf(): auth_token = helper.make_tensor_value_info('auth_token', TensorProto.STRING, [-1]) X = helper.make_tensor_value_info('X', TensorProto.FLOAT, [-1]) @@ -1547,7 +1574,7 @@ def createAddf(): Z = helper.make_tensor_value_info('Z', TensorProto.FLOAT, [-1]) invoker = helper.make_node('AzureTritonInvoker', ['auth_token', 'X', 'Y'], ['Z'], domain='com.microsoft.extensions', name='triton_invoker', - model_uri='https://endpoint-1.westus2.inference.ml.azure.com', + model_uri=triton_uri, model_name='addf', model_version='1') graph = helper.make_graph([invoker], 'graph', [auth_token, X, Y], [Z]) model = helper.make_model(graph, @@ -1555,72 +1582,12 @@ def createAddf(): save(model, 'triton_addf.onnx') -def createAddf8(): - auth_token = helper.make_tensor_value_info('auth_token', TensorProto.STRING, [-1]) - X = helper.make_tensor_value_info('X', TensorProto.DOUBLE, [-1]) - Y = helper.make_tensor_value_info('Y', TensorProto.DOUBLE, [-1]) - Z = helper.make_tensor_value_info('Z', TensorProto.DOUBLE, [-1]) - invoker = helper.make_node('AzureTritonInvoker', ['auth_token', 'X', 'Y'], ['Z'], - domain='com.microsoft.extensions', name='triton_invoker', - model_uri='https://endpoint-2.westus2.inference.ml.azure.com', - model_name='addf8', model_version='1') - graph = helper.make_graph([invoker], 'graph', [auth_token, X, Y], [Z]) - model = helper.make_model(graph, - opset_imports=[helper.make_operatorsetid('com.microsoft.extensions', 1)]) - save(model, 'triton_addf8.onnx') - - -def createAddi4(): - auth_token = helper.make_tensor_value_info('auth_token', TensorProto.STRING, [-1]) - X = helper.make_tensor_value_info('X', TensorProto.INT32, [-1]) - Y = helper.make_tensor_value_info('Y', TensorProto.INT32, [-1]) - Z = helper.make_tensor_value_info('Z', TensorProto.INT32, [-1]) - invoker = helper.make_node('AzureTritonInvoker', ['auth_token', 'X', 'Y'], ['Z'], - domain='com.microsoft.extensions', name='triton_invoker', - model_uri='https://endpoint-3.westus2.inference.ml.azure.com', - model_name='addi4', model_version='1') - graph = helper.make_graph([invoker], 'graph', [auth_token, X, Y], [Z]) - model = helper.make_model(graph, - opset_imports=[helper.make_operatorsetid('com.microsoft.extensions', 1)]) - save(model, 'triton_addi4.onnx') - - -def createAnd(): - auth_token = helper.make_tensor_value_info('auth_token', TensorProto.STRING, [-1]) - X = helper.make_tensor_value_info('X', TensorProto.BOOL, [-1]) - Y = helper.make_tensor_value_info('Y', TensorProto.BOOL, [-1]) - Z = helper.make_tensor_value_info('Z', TensorProto.BOOL, [-1]) - invoker = helper.make_node('AzureTritonInvoker', ['auth_token', 'X', 'Y'], ['Z'], - domain='com.microsoft.extensions', name='triton_invoker', - model_uri='https://endpoint-4.westus2.inference.ml.azure.com', - model_name='and', model_version='1') - graph = helper.make_graph([invoker], 'graph', [auth_token, X, Y], [Z]) - model = helper.make_model(graph, - opset_imports=[helper.make_operatorsetid('com.microsoft.extensions', 1)]) - save(model, 'triton_and.onnx') - - -def createStr(): - auth_token = helper.make_tensor_value_info('auth_token', TensorProto.STRING, [-1]) - str_in = helper.make_tensor_value_info('str_in', TensorProto.STRING, [-1]) - str_out1 = helper.make_tensor_value_info('str_out1', TensorProto.STRING, [-1]) - str_out2 = helper.make_tensor_value_info('str_out2', TensorProto.STRING, [-1]) - invoker = helper.make_node('AzureTritonInvoker', ['auth_token', 'str_in'], ['str_out1','str_out2'], - domain='com.microsoft.extensions', name='triton_invoker', - model_uri='https://endpoint-5.westus2.inference.ml.azure.com', - model_name='str', model_version='1') - graph = helper.make_graph([invoker], 'graph', [auth_token, str_in], [str_out1, str_out2]) - model = helper.make_model(graph, - opset_imports=[helper.make_operatorsetid('com.microsoft.extensions', 1)]) - save(model, 'triton_str.onnx') - - def run_add_f(): opt = SessionOptions() opt.register_custom_ops_library(get_library_path()) sess = InferenceSession(os.path.join(test_data_dir, "triton_addf.onnx"), opt, providers=["CPUExecutionProvider", "AzureExecutionProvider"]) - auth_token = np.array([os.getenv('MYAUTH', '')]) + auth_token = np.array([triton_auth_token]) x = np.array([1,2,3,4]).astype(np.float32) y = np.array([4,3,2,1]).astype(np.float32) ort_inputs = { @@ -1629,67 +1596,5 @@ def run_add_f(): "Y": y } out = sess.run(None, ort_inputs)[0] - - -def run_add_f8(): - opt = SessionOptions() - opt.register_custom_ops_library(get_library_path()) - sess = InferenceSession(os.path.join(test_data_dir, "triton_addf8.onnx"), - opt, providers=["CPUExecutionProvider", "AzureExecutionProvider"]) - auth_token = np.array([os.getenv('MYAUTH', '')]) - x = np.array([1,2,3,4]).astype(np.double) - y = np.array([4,3,2,1]).astype(np.double) - ort_inputs = { - "auth_token": auth_token, - "X": x, - "Y": y - } - out = sess.run(None, ort_inputs)[0] - - -def run_add_i4(): - opt = SessionOptions() - opt.register_custom_ops_library(get_library_path()) - sess = InferenceSession(os.path.join(test_data_dir, "triton_addi4.onnx"), - opt, providers=["CPUExecutionProvider", "AzureExecutionProvider"]) - auth_token = np.array([os.getenv('MYAUTH', '')]) - x = np.array([1,2,3,4]).astype(np.int32) - y = np.array([4,3,2,1]).astype(np.int32) - ort_inputs = { - "auth_token": auth_token, - "X": x, - "Y": y - } - out = sess.run(None, ort_inputs)[0] - - -def run_and(): - opt = SessionOptions() - opt.register_custom_ops_library(get_library_path()) - sess = InferenceSession(os.path.join(test_data_dir, "triton_and.onnx"), - opt, providers=["CPUExecutionProvider", "AzureExecutionProvider"]) - auth_token = np.array([os.getenv('MYAUTH', '')]) - x = np.array([True, True]) - y = np.array([True, False]) - ort_inputs = { - "auth_token": auth_token, - "X": x, - "Y": y - } - out = sess.run(None, ort_inputs)[0] - - -def run_str(): - opt = SessionOptions() - opt.register_custom_ops_library(get_library_path()) - sess = InferenceSession(os.path.join(test_data_dir, "triton_str.onnx"), - self.__opt, providers=["CPUExecutionProvider", "AzureExecutionProvider"]) - auth_token = np.array([os.getenv('MYAUTH', '')]) - str_in = np.array(['this is the input']) - ort_inputs = { - "auth_token": auth_token, - "str_in": str_in - } - outs = sess.run(None, ort_inputs) ``` \ No newline at end of file