Instructions to use kernels-community/gemm with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Kernels
How to use kernels-community/gemm with Kernels:
# !pip install kernels from kernels import get_kernel kernel = get_kernel("kernels-community/gemm") - Notebooks
- Google Colab
- Kaggle
| import logging | |
| import os | |
| from shutil import which, move | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| from setuptools import Extension, find_packages, setup | |
| from setuptools.command.build_ext import build_ext | |
| logger = logging.getLogger(__name__) | |
| def is_sccache_available() -> bool: | |
| return which("sccache") is not None | |
| def is_ccache_available() -> bool: | |
| return which("ccache") is not None | |
| def is_ninja_available() -> bool: | |
| return which("ninja") is not None | |
| class CMakeExtension(Extension): | |
| def __init__(self, name: str, sourcedir: str = "") -> None: | |
| super().__init__(name, sources=[], py_limited_api=True) | |
| self.sourcedir = os.fspath(Path(sourcedir).resolve()) | |
| class CMakeBuild(build_ext): | |
| def build_extension(self, ext: CMakeExtension) -> None: | |
| ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name) | |
| extdir = ext_fullpath.parent.resolve() | |
| debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug | |
| cfg = "Debug" if debug else "Release" | |
| cmake_generator = os.environ.get("CMAKE_GENERATOR", "") | |
| # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON | |
| # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code | |
| # from Python. | |
| cmake_args = [ | |
| f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}", | |
| f"-DPython_EXECUTABLE={sys.executable}", | |
| f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm | |
| ] | |
| build_args = [] | |
| if "CMAKE_ARGS" in os.environ: | |
| cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item] | |
| if not cmake_generator or cmake_generator == "Ninja": | |
| try: | |
| import ninja | |
| ninja_executable_path = Path(ninja.BIN_DIR) / "ninja" | |
| cmake_args += [ | |
| "-GNinja", | |
| f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}", | |
| ] | |
| except ImportError: | |
| pass | |
| if is_sccache_available(): | |
| cmake_args += [ | |
| "-DCMAKE_C_COMPILER_LAUNCHER=sccache", | |
| "-DCMAKE_CXX_COMPILER_LAUNCHER=sccache", | |
| "-DCMAKE_CUDA_COMPILER_LAUNCHER=sccache", | |
| "-DCMAKE_HIP_COMPILER_LAUNCHER=sccache", | |
| ] | |
| elif is_ccache_available(): | |
| cmake_args += [ | |
| "-DCMAKE_C_COMPILER_LAUNCHER=ccache", | |
| "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", | |
| "-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache", | |
| "-DCMAKE_HIP_COMPILER_LAUNCHER=ccache", | |
| ] | |
| num_jobs = os.getenv("MAX_JOBS", None) | |
| if num_jobs is not None: | |
| num_jobs = int(num_jobs) | |
| logger.info("Using MAX_JOBS=%d as the number of jobs.", num_jobs) | |
| else: | |
| try: | |
| # os.sched_getaffinity() isn't universally available, so fall | |
| # back to os.cpu_count() if we get an error here. | |
| num_jobs = len(os.sched_getaffinity(0)) | |
| except AttributeError: | |
| num_jobs = os.cpu_count() | |
| nvcc_threads = os.getenv("NVCC_THREADS", None) | |
| if nvcc_threads is not None: | |
| nvcc_threads = int(nvcc_threads) | |
| logger.info( | |
| "Using NVCC_THREADS=%d as the number of nvcc threads.", nvcc_threads | |
| ) | |
| else: | |
| nvcc_threads = 1 | |
| num_jobs = max(1, num_jobs // nvcc_threads) | |
| build_args += [f"-j{num_jobs}"] | |
| if sys.platform == "win32": | |
| build_args += ["--config", cfg] | |
| if nvcc_threads: | |
| cmake_args += ["-DNVCC_THREADS={}".format(nvcc_threads)] | |
| build_temp = Path(self.build_temp) / ext.name | |
| if not build_temp.exists(): | |
| build_temp.mkdir(parents=True) | |
| subprocess.run( | |
| ["cmake", ext.sourcedir, *cmake_args], cwd=build_temp, check=True | |
| ) | |
| subprocess.run( | |
| ["cmake", "--build", ".", *build_args], cwd=build_temp, check=True | |
| ) | |
| if sys.platform == "win32": | |
| # Move the dylib one folder up for discovery. | |
| for filename in os.listdir(extdir / cfg): | |
| move(extdir / cfg / filename, extdir / filename) | |
| setup( | |
| name="gemm", | |
| # The version is just a stub, it's not used by the final build artefact. | |
| version="0.1.0", | |
| ext_modules=[CMakeExtension("gemm._gemm_926a158_dirty")], | |
| cmdclass={"build_ext": CMakeBuild}, | |
| packages=find_packages(where="torch-ext", include=["gemm*"]), | |
| package_dir={"": "torch-ext"}, | |
| zip_safe=False, | |
| install_requires=["torch"], | |
| python_requires=">=3.9", | |
| ) |