Skip to content
This repository was archived by the owner on Feb 18, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions python/triton/runtime/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,26 @@ def is_msvc(cc):
return cc == "cl" or cc == "cl.exe"


def is_clang_cl(cc):
cc = os.path.basename(cc).lower()
return cc == "clang-cl" or cc == "clang-cl.exe"


def is_clang(cc):
cc = os.path.basename(cc).lower()
return cc == "clang" or cc == "clang.exe"


def _cc_cmd(cc: str, src: str, out: str, include_dirs: list[str], library_dirs: list[str], libraries: list[str],
ccflags: list[str]) -> list[str]:
if is_msvc(cc):
if is_msvc(cc) or is_clang_cl(cc):
out_base = os.path.splitext(out)[0]
cc_cmd = [cc, src, "/nologo", "/O2", "/LD", "/std:c11", "/wd4819"]
cc_cmd = [cc, src, "/nologo", "/O2", "/LD", "/wd4819"]
# clang-cl doesn't support /std:c11, use -std=c11 instead
if is_clang_cl(cc):
cc_cmd += ["-std=c11"]
else:
cc_cmd += ["/std:c11"]
cc_cmd += [f"/I{dir}" for dir in include_dirs if dir is not None]
cc_cmd += [f"/Fo{out_base + '.obj'}"]
cc_cmd += ["/link"]
Expand Down Expand Up @@ -110,7 +120,7 @@ def _build(name: str, src: str, srcdir: str, library_dirs: list[str], include_di
if sysconfig.get_config_var("Py_GIL_DISABLED"):
version += "t"
libraries = libraries + [f"python{version}"]
if is_msvc(cc):
if is_msvc(cc) or is_clang_cl(cc):
_, msvc_winsdk_inc_dirs, msvc_winsdk_lib_dirs = find_msvc_winsdk()
include_dirs = include_dirs + msvc_winsdk_inc_dirs
library_dirs = library_dirs + msvc_winsdk_lib_dirs
Expand Down
39 changes: 33 additions & 6 deletions third_party/amd/backend/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@
from typing import Any, Dict, Tuple
from types import ModuleType
import hashlib
import os
import platform
import tempfile
import re
import functools
import warnings
from pathlib import Path

def _is_windows():
return platform.system() == 'Windows'


def get_min_dot_size(target: GPUTarget):
# We fallback to use FMA and cast arguments if certain configurations is
Expand Down Expand Up @@ -437,13 +442,35 @@ def make_hsaco(src, metadata, options):
if knobs.compilation.enable_asan:
target_features = '+xnack'
hsaco = amd.assemble_amdgcn(src, options.arch, target_features)
with tempfile.NamedTemporaryFile() as tmp_out:
with tempfile.NamedTemporaryFile() as tmp_in:
with open(tmp_in.name, "wb") as fd_in:
fd_in.write(hsaco)
# On Windows, NamedTemporaryFile cannot be reopened while open, so we
# use delete=False and manually clean up.
if _is_windows():
tmp_in = tempfile.NamedTemporaryFile(delete=False, suffix='.o')
tmp_out = tempfile.NamedTemporaryFile(delete=False, suffix='.hsaco')
try:
tmp_in.write(hsaco)
tmp_in.close()
tmp_out.close()
amd.link_hsaco(tmp_in.name, tmp_out.name)
with open(tmp_out.name, "rb") as fd_out:
ret = fd_out.read()
with open(tmp_out.name, "rb") as fd_out:
ret = fd_out.read()
finally:
try:
os.unlink(tmp_in.name)
except OSError:
pass
try:
os.unlink(tmp_out.name)
except OSError:
pass
else:
with tempfile.NamedTemporaryFile() as tmp_out:
with tempfile.NamedTemporaryFile() as tmp_in:
with open(tmp_in.name, "wb") as fd_in:
fd_in.write(hsaco)
amd.link_hsaco(tmp_in.name, tmp_out.name)
with open(tmp_out.name, "rb") as fd_out:
ret = fd_out.read()
return ret

def add_stages(self, stages, options, language):
Expand Down
33 changes: 32 additions & 1 deletion third_party/amd/backend/driver.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,42 @@
#include <hip/hip_runtime_api.h>
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <dlfcn.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

#ifdef _WIN32
#include <windows.h>
// Windows compatibility layer for dlopen/dlsym/dlclose/dlerror
#define RTLD_NOW 0
#define RTLD_LAZY 0
#define RTLD_LOCAL 0
static char dlerror_buf[512];
static inline void *dlopen(const char *filename, int flags) {
(void)flags;
HMODULE h = LoadLibraryA(filename);
if (!h) {
snprintf(dlerror_buf, sizeof(dlerror_buf), "LoadLibrary failed with error %lu", GetLastError());
}
return (void *)h;
}
static inline void *dlsym(void *handle, const char *symbol) {
void *p = (void *)GetProcAddress((HMODULE)handle, symbol);
if (!p) {
snprintf(dlerror_buf, sizeof(dlerror_buf), "GetProcAddress failed for %s with error %lu", symbol, GetLastError());
}
return p;
}
static inline int dlclose(void *handle) {
return FreeLibrary((HMODULE)handle) ? 0 : -1;
}
static inline const char *dlerror(void) {
return dlerror_buf[0] ? dlerror_buf : NULL;
}
#else
#include <dlfcn.h>
#endif

// The list of paths to search for the HIP runtime library. The caller Python
// code should substitute the search path placeholder.
static const char *hipLibSearchPaths[] = {"/*py_libhip_search_path*/"};
Expand Down
Loading