-
Notifications
You must be signed in to change notification settings - Fork 19
Add Canary support #55
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: release/2.18.0
Are you sure you want to change the base?
Changes from 4 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: MIT | ||
|
|
||
| import yaml | ||
| import logging | ||
|
|
||
|
|
||
| def config_for_trtllm(model, artifacts, **kwargs): | ||
| if model.__class__.__name__ == 'EncDecMultiTaskModel': | ||
|
|
||
| model_config = yaml.safe_load(artifacts['model_config.yaml']['content']) | ||
|
|
||
| keys_required = [ | ||
| 'beam_search', | ||
| 'encoder', | ||
| 'head', | ||
| 'model_defaults', | ||
| 'prompt_format', | ||
| 'sample_rate', | ||
| 'target', | ||
| 'preprocessor', | ||
| ] | ||
| if 'beam_search' not in model_config and 'decoding' in model_config: | ||
| model_config['beam_search'] = model_config['decoding'].get('beam', {'beam_size': 1, 'len_pen': 0.0, | ||
| 'max_generation_delta': 50} | ||
| ) | ||
| config = dict({k: model_config[k] for k in keys_required}) | ||
| config['decoder'] = { | ||
| 'transf_decoder': model_config['transf_decoder'], | ||
| 'transf_encoder': model_config['transf_encoder'], | ||
| 'vocabulary': make_vocabulary_file(model,artifacts), | ||
| 'num_classes': model_config['head']['num_classes'], | ||
| 'feat_in': model_config['model_defaults']['asr_enc_hidden'], | ||
| 'n_layers': model_config['transf_decoder']['config_dict']['num_layers'], | ||
| } | ||
| config['target'] = 'trtllm.canary' | ||
|
|
||
|
anand-nv marked this conversation as resolved.
|
||
| artifacts['model_config.yaml']['content'] = yaml.safe_dump(config, encoding=('utf-8')) | ||
|
|
||
|
|
||
| def make_vocabulary_file(model, artifacts, **kwargs): | ||
| if model.__class__.__name__ == 'EncDecMultiTaskModel': | ||
|
|
||
| tokenizer_vocab = {'tokens': {}, | ||
| 'offsets': model.tokenizer.token_id_offset | ||
| } | ||
| for lang in model.tokenizer.langs: | ||
| tokenizer_vocab['tokens'][lang] = {} | ||
| tokenizer_vocab['size'] = model.tokenizer.vocab_size | ||
|
|
||
| try: | ||
| tokenizer_vocab['bos_id'] = model.tokenizer.bos_id | ||
| except Exception as e: | ||
| logging.warning(f"Tokenizer is missing bos_id. Could affect accuracy") | ||
|
|
||
| try: | ||
| tokenizer_vocab['eos_id'] = model.tokenizer.eos_id | ||
| except Exception as e: | ||
| logging.warning(f"Tokenizer is missing eos_id. Could affect accuracy") | ||
| try: | ||
| tokenizer_vocab['nospeech_id'] = model.tokenizer.nospeech_id | ||
| except Exception as e: | ||
| logging.warning(f"Tokenizer is missing nospeech_id. Could affect accuracy") | ||
| try: | ||
| tokenizer_vocab['pad_id'] = model.tokenizer.pad_id | ||
| except Exception as e: | ||
| logging.warning(f"Tokenizer is missing pad_id. Could affect accuracy") | ||
|
|
||
| for t_id in range(0, model.tokenizer.vocab_size): | ||
| lang = model.tokenizer.ids_to_lang([t_id]) | ||
| tokenizer_vocab['tokens'][lang][t_id] = model.tokenizer.ids_to_tokens([t_id])[0] | ||
|
|
||
| return tokenizer_vocab | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,7 +17,7 @@ | |
|
|
||
| schema_dict = None | ||
|
|
||
| supported_formats = ["ONNX", "CKPT", "TS", "NEMO"] | ||
| supported_formats = ["ONNX", "CKPT", "TS", "NEMO", "PYTORCH", "STATE"] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need to add separate "PYTORCH" as supported format or can we use "CKPT" for it?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PYTORCH in case we are just exporting part of the model or tensors. |
||
|
|
||
|
|
||
| @dataclass | ||
|
|
@@ -48,15 +48,27 @@ def get_export_config(export_obj, args): | |
| need_autocast = False | ||
| if export_obj: | ||
| conf.export_file = list(export_obj)[0] | ||
| attribs = export_obj[conf.export_file] | ||
| conf.export_subnet = attribs.get('export_subnet', None) | ||
| conf.is_onnx=attribs.get('onnx', False) | ||
|
|
||
| if not conf.is_onnx: | ||
| conf.states_only = attribs.get('states_only', False) | ||
| conf.is_torch = attribs.get('torch', False) | ||
|
|
||
| if conf.export_file.endswith('.onnx'): | ||
| conf.export_format = "ONNX" | ||
| elif conf.export_file.endswith('.ts'): | ||
| conf.export_format = "TS" | ||
| elif conf.export_file.endswith('.nemo'): | ||
| conf.export_format = "NEMO" | ||
| elif conf.is_torch: | ||
| if conf.states_only: | ||
| conf.export_format = "STATE" | ||
| else: | ||
| conf.export_format = "PYTORCH" | ||
| else: | ||
| conf.export_format = "CKPT" | ||
| attribs = export_obj[conf.export_file] | ||
| conf.autocast = attribs.get('autocast', False) | ||
| need_autocast = conf.autocast | ||
|
|
||
|
|
@@ -66,8 +78,6 @@ def get_export_config(export_obj, args): | |
| if conf.encryption and args.key is None: | ||
| raise Exception(f"{conf.export_file} requires encryption and no key was given") | ||
|
|
||
| conf.export_subnet = attribs.get('export_subnet', None) | ||
|
|
||
| if args.export_subnet: | ||
| if conf.export_subnet: | ||
| raise Exception("Can't combine schema's export_subnet and export-subnet argument!") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: MIT | ||
|
|
||
| # Define required metadata fields expected in the archive (optional). | ||
| metadata: | ||
| - obj_cls: nemo.collections.asr.models.EncDecMultiTaskModel | ||
|
|
||
|
|
||
| # Define list of files that are expected (optional). | ||
| artifact_properties: | ||
| # List of files. | ||
| - model_config.yaml | ||
| - encoder.onnx: | ||
| export_subnet: encoder | ||
| onnx: True | ||
| - decoder.pt: | ||
| export_subnet: transf_decoder | ||
| states_only: True | ||
| torch: True | ||
| onnx: False | ||
| - log_softmax.pt: | ||
| export_subnet: log_softmax | ||
| states_only: True | ||
| torch: True | ||
| onnx: False | ||
|
|
||
|
|
||
| # Define list of files with expected content (optional). | ||
| # Functionality limited to yaml files (e.g. model_config.yaml). | ||
| artifact_content: | ||
| # List of files. | ||
| - model_config.yaml: | ||
| # List of sections.subsections. ... that are required. | ||
| # (Optional `: True` instructs to check the presence of the file in indicated as leaf in the archive) | ||
| - transf_decoder | ||
| - transf_encoder | ||
| - vocabulary | ||
| - num_classes | ||
| - feat_in | ||
| - n_layers | ||
| - target | ||
| - beam_search | ||
| - encoder | ||
| - head | ||
| - model_defaults | ||
| - prompt_format | ||
| - sample_rate | ||
| - target | ||
| - preprocessor |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,10 +2,11 @@ | |
| # SPDX-License-Identifier: MIT | ||
|
|
||
| nemo_toolkit>=1.6.0 | ||
| torch>=2.4.0 | ||
| nvidia-eff>=0.6.4 | ||
| nvidia-eff-tao-encryption>=0.1.8 | ||
| nvidia-pyindex==1.0.6 | ||
| onnx==1.14.1 | ||
| onnx==1.16.1 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would the same versions as Riva container work here? torch 2.5.0 will be good them same to avoid any possible discrepancies due to version mismatches b/w nemo and Riva. |
||
| onnxruntime==1.16.3 | ||
| onnxruntime-gpu==1.16.3 | ||
| onnx-graphsurgeon==0.3.27 | ||
Uh oh!
There was an error while loading. Please reload this page.