Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions examples/diffusion/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Megatron Examples

Recipes and configuration overrides for megatron training.
3 changes: 3 additions & 0 deletions examples/diffusion/override_configs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Override Configs

Parallelism configuration overrides for different CP/TP/SP sizes.
44 changes: 44 additions & 0 deletions examples/diffusion/override_configs/wan_pretrain_sample_data.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# WAN Pretrain Mock Data Test Configuration
# Converted from L2_Function_Tests_GPU_Wan_Mock_Data.sh

model:
tensor_model_parallel_size: 1
pipeline_model_parallel_size: 1
context_parallel_size: 1
crossattn_emb_size: 1536
hidden_size: 1536
ffn_hidden_size: 8960
num_attention_heads: 12
num_layers: 3
qkv_format: thd
seq_length: 2048

train:
eval_iters: 0
train_iters: 10
global_batch_size: 2
micro_batch_size: 1

optimizer:
lr: 5.0e-6
min_lr: 5.0e-6

scheduler:
lr_decay_style: constant
lr_warmup_iters: 0

checkpoint:
save: ${oc.env:CHECKPOINT_DIR,null}
load: ${oc.env:CHECKPOINT_DIR,null}
load_optim: false
save_interval: 200

dataset:
path: ${oc.env:DATASET_PATH,null}
seq_length: 2048
global_batch_size: 2
micro_batch_size: 1
packing_buffer_size: 50

logger:
log_interval: 1
3 changes: 3 additions & 0 deletions examples/diffusion/recipes/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Recipe

Training recipes for Wan2.1 pretraining, finetuning, and weight verification.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Example FLUX pretrain configuration override file
# This file shows common overrides for FLUX pretraining

# Model configuration
model:
# Parallelism settings
tensor_model_parallel_size: 4
pipeline_model_parallel_size: 1
context_parallel_size: 1
sequence_parallel: false

# FLUX architecture (FLUX-schnell defaults)
num_joint_layers: 19
num_single_layers: 38
hidden_size: 3072
num_attention_heads: 24
in_channels: 64
context_dim: 4096

# For FLUX-dev, set guidance_embed: true
guidance_embed: false
guidance_scale: 3.5

# Training configuration
train:
train_iters: 10000
eval_interval: 2000
eval_iters: 32
global_batch_size: 64
micro_batch_size: 1

# Optimizer configuration
optimizer:
lr: 1.0e-4

# Checkpoint configuration
checkpoint:
save_interval: 2000

# Logger configuration
logger:
log_interval: 1
Loading
Loading