Skip to content
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions modules/util/triton_mm_8bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
)

@triton.jit
def __mm_kernel(
def _mm_kernel(
a_ptr, b_ptr, c_ptr,
M, N, K,
stride_am, stride_ak,
Expand Down Expand Up @@ -109,7 +109,7 @@ def mm_8bit(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:

def grid(META):
return (triton.cdiv(N, META['BLOCK_SIZE_N']) , triton.cdiv(M, META['BLOCK_SIZE_M']), )
__mm_kernel[grid](
_mm_kernel[grid](
a, b, c,
M, N, K,
a.stride(0), a.stride(1),
Expand Down
12 changes: 6 additions & 6 deletions requirements-cuda.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# pytorch
--extra-index-url https://download.pytorch.org/whl/cu128
torch==2.8.0+cu128
torchvision==0.23.0+cu128
onnxruntime-gpu==1.22.0
nvidia-nccl-cu12==2.27.3; sys_platform == "linux"
triton-windows==3.4.0.post20; sys_platform == "win32"
torch==2.9.1+cu128
torchvision==0.24.1+cu128
onnxruntime-gpu==1.23.2
nvidia-nccl-cu12==2.27.5; sys_platform == "linux"
triton-windows==3.5.1.post24; sys_platform == "win32"

# optimizers
bitsandbytes==0.46.0 # bitsandbytes for 8-bit optimizers and weight quantization
bitsandbytes==0.49.1 # bitsandbytes for 8-bit optimizers and weight quantization
6 changes: 3 additions & 3 deletions requirements-default.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# pytorch
torch==2.8.0
torchvision==0.23.0
onnxruntime==1.22.1
torch==2.9.1
torchvision==0.24.1
onnxruntime==1.23.2

# optimizers
# TODO
10 changes: 5 additions & 5 deletions requirements-global.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ yt-dlp #no pinned version, frequently updated for compatibility with sites
scenedetect==0.6.6

# pytorch
accelerate==1.7.0
safetensors==0.5.3
tensorboard==2.19.0
pytorch-lightning==2.5.1.post0
accelerate==1.12.0
safetensors==0.7.0
tensorboard==2.20.0
pytorch-lightning==2.6.0

# diffusion models
#Note: check whether Qwen bugs in diffusers have been fixed before upgrading diffusers (see BaseQwenSetup):
-e git+https://github.com/huggingface/diffusers.git@256e010#egg=diffusers
gguf==0.17.1
transformers==4.56.2
transformers==4.57.3
sentencepiece==0.2.1 # transitive dependency of transformers for tokenizer loading
omegaconf==2.3.0 # needed to load stable diffusion from single ckpt files
invisible-watermark==0.2.0 # needed for the SDXL pipeline
Expand Down
6 changes: 3 additions & 3 deletions requirements-rocm.txt
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can add bitsandbytes here but not in requirements-global.txt or requirements-cpu.txt because macOS doesn't support 8-bit optimizers and that's like 90% of what we use bitsandbytes for.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good but that needs to be seperate (and tested!), I dont have a AMD gpu ;).

Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

# pytorch
--extra-index-url https://download.pytorch.org/whl/rocm6.3
torch==2.7.1+rocm6.3 #intentionally not upgraded because of reported problems
torchvision==0.22.1+rocm6.3
onnxruntime==1.22.1
torch==2.9.1+rocm6.3
torchvision==0.24.1+rocm6.3
onnxruntime==1.23.2

# optimizers
# TODO