Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,8 @@ jobs:
- script: L2_Launch_models_qwen_quantization
- script: L2_Launch_models_qwen_vl
- script: L2_Launch_models_qwen35_vl
- script: L2_Launch_models_flux
- script: L2_Launch_models_wan
- script: L2_Launch_recipes_gemma_vl
- script: L2_Launch_recipes_gpt_oss
- script: L2_Launch_models_qwen_vl_quantization
Expand Down
3 changes: 3 additions & 0 deletions examples/diffusion/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Megatron Examples

Recipes and configuration overrides for megatron training.
3 changes: 3 additions & 0 deletions examples/diffusion/override_configs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Override Configs

Parallelism configuration overrides for different CP/TP/SP sizes.
44 changes: 44 additions & 0 deletions examples/diffusion/override_configs/wan_pretrain_sample_data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# WAN Pretrain Mock Data Test Configuration
# Converted from L2_Function_Tests_GPU_Wan_Mock_Data.sh

model:
tensor_model_parallel_size: 1
pipeline_model_parallel_size: 1
context_parallel_size: 1
crossattn_emb_size: 1536
hidden_size: 1536
ffn_hidden_size: 8960
num_attention_heads: 12
num_layers: 3
qkv_format: thd
seq_length: 2048

train:
eval_iters: 0
train_iters: 10
global_batch_size: 2
micro_batch_size: 1

optimizer:
lr: 5.0e-6
min_lr: 5.0e-6

scheduler:
lr_decay_style: constant
lr_warmup_iters: 0

checkpoint:
save: ${oc.env:CHECKPOINT_DIR,null}
load: ${oc.env:CHECKPOINT_DIR,null}
load_optim: false
save_interval: 200

dataset:
path: ${oc.env:DATASET_PATH,null}
seq_length: 2048
global_batch_size: 2
micro_batch_size: 1
packing_buffer_size: 50

logger:
log_interval: 1
3 changes: 3 additions & 0 deletions examples/diffusion/recipes/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Recipe

Training recipes for Wan2.1 pretraining, finetuning, and weight verification.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Example FLUX pretrain configuration override file
# This file shows common overrides for FLUX pretraining

# Model configuration
model:
# Parallelism settings
tensor_model_parallel_size: 4
pipeline_model_parallel_size: 1
context_parallel_size: 1
sequence_parallel: false

# FLUX architecture (FLUX-schnell defaults)
num_joint_layers: 19
num_single_layers: 38
hidden_size: 3072
num_attention_heads: 24
in_channels: 64
context_dim: 4096

# For FLUX-dev, set guidance_embed: true
guidance_embed: false
guidance_scale: 3.5

# Training configuration
train:
train_iters: 10000
eval_interval: 2000
eval_iters: 32
global_batch_size: 64
micro_batch_size: 1

# Optimizer configuration
optimizer:
lr: 1.0e-4

# Checkpoint configuration
checkpoint:
save_interval: 2000

# Logger configuration
logger:
log_interval: 1
Loading
Loading