-
Notifications
You must be signed in to change notification settings - Fork 4
Add initial configurations for the fbgemm-xpu package #70
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 21 commits
efcf839
abd1972
317d08d
5eab912
0142efb
de487a1
af721e3
1fef4aa
4f59cec
42d229f
3dd2b18
5a0e9c3
6791a9d
2e75207
5ae127c
351b182
02a9b2c
a14c891
6e00bf5
4c870d2
a4d9124
ff9ee1f
2644774
2ecdfc9
33addcf
a3a82aa
f60cee6
e519374
20fc968
8990c27
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| cmake_minimum_required(VERSION 3.18) | ||
| project(${SKBUILD_PROJECT_NAME}) | ||
|
|
||
| add_subdirectory(src/fbgemm_xpu) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| BSD 3-Clause License | ||
|
|
||
| Copyright (c) 2026 Intel Corporation. All Rights Reserved. | ||
|
|
||
| Redistribution and use in source and binary forms, with or without | ||
| modification, are permitted provided that the following conditions are met: | ||
|
|
||
| 1. Redistributions of source code must retain the above copyright notice, this | ||
| list of conditions and the following disclaimer. | ||
|
|
||
| 2. Redistributions in binary form must reproduce the above copyright notice, | ||
| this list of conditions and the following disclaimer in the documentation | ||
| and/or other materials provided with the distribution. | ||
|
|
||
| 3. Neither the name of the copyright holder nor the names of its | ||
| contributors may be used to endorse or promote products derived from | ||
| this software without specific prior written permission. | ||
|
|
||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
| FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| include LICENSE | ||
| include README.md | ||
| recursive-include src *.cpp *.h *.sycl *.py | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| # fbgemm-xpu | ||
|
|
||
| Intel XPU plugin package for FBGEMM operators. | ||
|
|
||
| ## Build from source | ||
|
|
||
| * Install [uv] | ||
|
|
||
| * Install Intel oneAPI (DPC++ compiler `icpx`), version 2025.3 or newer | ||
|
|
||
| * Clone the repository: | ||
|
|
||
| ```bash | ||
| git clone https://github.com/intel/torchlib-xpu.git && cd torchlib-xpu | ||
| ``` | ||
|
|
||
| * Create and activate a virtual environment: | ||
|
|
||
| ```bash | ||
| uv venv | ||
| source .venv/bin/activate | ||
| ``` | ||
|
|
||
| * Build and install `fbgemm-xpu`: | ||
|
|
||
| ```bash | ||
| uv pip install -e packages/fbgemm-xpu \ | ||
| --index https://download.pytorch.org/whl/xpu | ||
| ``` | ||
|
|
||
| * (Optional) Install test dependencies: | ||
|
|
||
| ```bash | ||
| uv pip install -e "packages/fbgemm-xpu[test]" \ | ||
| --index https://download.pytorch.org/whl/xpu | ||
| ``` | ||
|
|
||
| * Get installed package version: | ||
|
|
||
| ```bash | ||
| python -c "import fbgemm_xpu; print(fbgemm_xpu.__version__)" | ||
| ``` | ||
|
|
||
| [uv]: https://github.com/astral-sh/uv |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| [build-system] | ||
| requires = [ | ||
|
dvrogozh marked this conversation as resolved.
|
||
| "numpy~=2.0", | ||
| "pybind11", | ||
| "scikit-build-core>=0.10", | ||
| "torch~=2.11.0", | ||
|
dvrogozh marked this conversation as resolved.
Outdated
|
||
| ] | ||
| build-backend = "scikit_build_core.build" | ||
|
|
||
| [project] | ||
| name = "fbgemm-xpu" | ||
| description = "FBGEMM XPU operators for Intel GPUs" | ||
| readme = "README.md" | ||
| requires-python = ">=3.10" | ||
| license = { file = "LICENSE" } | ||
| authors = [ | ||
| { name = "Alberto Gallegos Muro", email = "alberto.gallegos.muro@intel.com" }, | ||
| { name = "Felipe Leza Alvarez", email = "felipe.leza.alvarez@intel.com" }, | ||
| { name = "Manuel Santana Castolo", email = "manuel.santana.castolo@intel.com" }, | ||
| ] | ||
| dynamic = ["version"] | ||
| dependencies = [ | ||
| "fbgemm-gpu-cpu==1.7.0", | ||
|
dvrogozh marked this conversation as resolved.
|
||
| "numpy~=2.0", | ||
| "torch~=2.11.0", | ||
| ] | ||
|
|
||
| [project.optional-dependencies] | ||
| test = [ | ||
| "pytest", | ||
| "hypothesis", | ||
| "expecttest" | ||
| ] | ||
|
|
||
| [project.urls] | ||
| GitHub = "https://github.com/intel/torchlib-xpu" | ||
|
|
||
| [tool.scikit-build] | ||
| cmake.version = ">=3.18" | ||
|
|
||
| [tool.scikit-build.cmake.define] | ||
| CMAKE_CXX_COMPILER = {env="CXX", default="icpx"} | ||
|
|
||
| [[tool.scikit-build.generate]] | ||
| path = "src/fbgemm_xpu/_version.py" | ||
| template = '__version__ = "${version}"' | ||
|
|
||
| [tool.scikit-build.metadata.version] | ||
| provider = "scikit_build_core.metadata.regex" | ||
| input = "version.txt" | ||
| regex = "^v?(?P<value>[0-9a-zA-Z.+-_]+)" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,130 @@ | ||
| cmake_minimum_required(VERSION 3.18) | ||
| set(CMAKE_CXX_STANDARD 17) | ||
| set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
|
|
||
| set(PYBIND11_FINDPYTHON ON) | ||
| # Prefer the active virtual environment Python when available. | ||
| set(Python3_FIND_VIRTUALENV FIRST) | ||
| if(DEFINED ENV{VIRTUAL_ENV}) | ||
| set(Python3_ROOT_DIR "$ENV{VIRTUAL_ENV}") | ||
| set(Python3_EXECUTABLE "$ENV{VIRTUAL_ENV}/bin/python") | ||
| endif() | ||
|
|
||
| find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module) | ||
| find_package(pybind11 REQUIRED) | ||
| find_package(Torch REQUIRED) | ||
|
|
||
| # -------------------------------------------------------------------------- | ||
| # SYCL backend detection: enabled only when CXX is the Intel oneAPI compiler | ||
| # (icpx). Falls back to a host-only CPU build otherwise. Mirrors the previous | ||
| # setup.py behavior driven by USE_SYCL / torch.xpu.is_available(). | ||
| # -------------------------------------------------------------------------- | ||
| if(CMAKE_CXX_COMPILER MATCHES "icpx") | ||
| set(WITH_SYCL ON) | ||
| message(STATUS "Intel compiler detected: SYCL kernels enabled") | ||
| else() | ||
| set(WITH_SYCL OFF) | ||
| message(STATUS "Non-Intel compiler in use: SYCL kernels disabled") | ||
|
dvrogozh marked this conversation as resolved.
Outdated
|
||
| endif() | ||
|
|
||
| # -------------------------------------------------------------------------- | ||
| # XPU architecture list. Honors TORCH_XPU_ARCH_LIST env var, otherwise queries | ||
| # torch.xpu.get_arch_list(), otherwise defaults to "pvc". | ||
| # -------------------------------------------------------------------------- | ||
| if(NOT "$ENV{TORCH_XPU_ARCH_LIST}" STREQUAL "") | ||
| set(TORCH_XPU_ARCH_LIST "$ENV{TORCH_XPU_ARCH_LIST}") | ||
| else() | ||
| execute_process( | ||
| COMMAND ${Python3_EXECUTABLE} -c "import torch; print(','.join(torch.xpu.get_arch_list()))" | ||
| OUTPUT_VARIABLE TORCH_XPU_ARCH_LIST | ||
| OUTPUT_STRIP_TRAILING_WHITESPACE | ||
| RESULT_VARIABLE _arch_rc | ||
| ) | ||
| if(NOT _arch_rc EQUAL 0 OR TORCH_XPU_ARCH_LIST STREQUAL "") | ||
| set(TORCH_XPU_ARCH_LIST "pvc") | ||
|
flezaalv marked this conversation as resolved.
Outdated
|
||
| endif() | ||
| endif() | ||
| message(STATUS "Building for XPU architectures: ${TORCH_XPU_ARCH_LIST}") | ||
|
flezaalv marked this conversation as resolved.
|
||
|
|
||
| set(SYCL_TARGETS -fsycl-targets=spir64_gen,spir64) | ||
| set(SYCL_DEVICE_LIST -Xs "-device ${TORCH_XPU_ARCH_LIST} -options -cl-poison-unsupported-fp64-kernels") | ||
|
|
||
| # -------------------------------------------------------------------------- | ||
| # Sources | ||
| # | ||
| # host_sources lists host C++ files that live alongside this CMakeLists. | ||
| # SYCL kernels (.sycl) and companion .cpp files under sycl_kernels/ or | ||
| # fbgemm_utils/ are picked up automatically by the GLOB_RECURSE below. | ||
| # -------------------------------------------------------------------------- | ||
| # Auto-include all host C++ sources in this directory. | ||
| file(GLOB host_sources CONFIGURE_DEPENDS | ||
| *.cpp | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be honest I would prefer to have explicit list of files to build rather than wildcards. That's current consensus across the ecosystem that explicit is always better. |
||
| ) | ||
|
|
||
| file(GLOB_RECURSE sycl_sources CONFIGURE_DEPENDS | ||
| sycl_kernels/*.sycl | ||
| sycl_kernels/*.cpp | ||
| fbgemm_utils/*.sycl | ||
| fbgemm_utils/*.cpp | ||
| ) | ||
|
|
||
| # CMake does not recognize the .sycl extension as a C++ source. Mirror every | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we will just implement sycl kernels in the |
||
| # .sycl file into the build tree with a .cpp suffix so CMake schedules a | ||
| # proper compile step for it; the icpx driver then handles SYCL via -fsycl. | ||
| set(sycl_sources_cxx "") | ||
| foreach(_src IN LISTS sycl_sources) | ||
| if(_src MATCHES "\\.sycl$") | ||
| file(RELATIVE_PATH _rel ${CMAKE_CURRENT_SOURCE_DIR} ${_src}) | ||
| set(_mirrored ${CMAKE_CURRENT_BINARY_DIR}/${_rel}.cpp) | ||
| configure_file(${_src} ${_mirrored} COPYONLY) | ||
| list(APPEND sycl_sources_cxx ${_mirrored}) | ||
| else() | ||
| list(APPEND sycl_sources_cxx ${_src}) | ||
| endif() | ||
| endforeach() | ||
|
|
||
| if(WITH_SYCL) | ||
| set(all_sources ${host_sources} ${sycl_sources_cxx}) | ||
| else() | ||
| set(all_sources ${host_sources}) | ||
| endif() | ||
|
|
||
| # -------------------------------------------------------------------------- | ||
| # Extension module: fbgemm._C | ||
| # | ||
| # Skip target creation if no sources have been added yet — this lets the | ||
| # scaffolding configure cleanly before any code is written. | ||
| # -------------------------------------------------------------------------- | ||
| if(NOT all_sources) | ||
| message(WARNING "fbgemm._C: no sources defined yet; skipping extension target.") | ||
| return() | ||
| endif() | ||
|
|
||
| Python3_add_library(_C MODULE WITH_SOABI ${all_sources}) | ||
| set_target_properties(_C PROPERTIES PREFIX "") | ||
| set_target_properties(_C PROPERTIES CXX_STANDARD 17) | ||
|
|
||
| # The package root is needed because host sources include | ||
| # "fbgemm_utils/torch_library.h". sycl_kernels and fbgemm_utils are added so | ||
| # SYCL sources mirrored into the build tree can still resolve same-dir headers | ||
| # ("feature_gates.h", "utils.h", ...). | ||
| target_include_directories(_C PRIVATE | ||
| ${CMAKE_CURRENT_SOURCE_DIR} | ||
| ${CMAKE_CURRENT_SOURCE_DIR}/sycl_kernels | ||
| ${CMAKE_CURRENT_SOURCE_DIR}/fbgemm_utils | ||
| ${Python3_INCLUDE_DIRS} | ||
| ) | ||
|
|
||
| target_link_libraries(_C PRIVATE ${TORCH_LIBRARIES}) | ||
|
|
||
| target_compile_options(_C PRIVATE | ||
| -fdiagnostics-color=always | ||
| -Wno-c++11-narrowing | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed. |
||
| ) | ||
|
|
||
| if(WITH_SYCL) | ||
| target_compile_options(_C PRIVATE -fsycl ${SYCL_TARGETS}) | ||
| target_link_options(_C PRIVATE -fsycl ${SYCL_TARGETS} ${SYCL_DEVICE_LIST}) | ||
| endif() | ||
|
|
||
| install(TARGETS _C DESTINATION fbgemm_xpu) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| # Copyright (c) 2026 Intel Corporation. All Rights Reserved. | ||
| # | ||
| # Portions of this file are derived from FBGEMM | ||
| # Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| # SPDX-License-Identifier: BSD-3-Clause | ||
|
flezaalv marked this conversation as resolved.
|
||
|
|
||
| # Import the compiled C extension (_C) which contains the registered operators. | ||
| # If native dependencies (for example libtorch.so) are unavailable, keep import | ||
| # working so metadata like __version__ remains accessible. | ||
| try: | ||
| from . import _C as _C | ||
| except ImportError: | ||
| _C = None | ||
|
|
||
| from . import ops as ops | ||
|
|
||
| __all__ = ["_C", "ops", "__version__"] | ||
|
|
||
| try: | ||
| from ._version import __version__ | ||
| except ModuleNotFoundError: | ||
| try: | ||
| from importlib.metadata import PackageNotFoundError, version | ||
| __version__ = version("fbgemm-xpu") | ||
| except (ImportError, PackageNotFoundError): | ||
| __version__ = "unknown" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| # Copyright (c) 2026 Intel Corporation. All Rights Reserved. | ||
| # | ||
| # Portions of this file are derived from FBGEMM | ||
| # Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| # SPDX-License-Identifier: BSD-3-Clause | ||
|
flezaalv marked this conversation as resolved.
|
||
|
|
||
| # Python wrapper functions for all custom operators under the fbgemm namespace | ||
| # This module provides user-friendly interfaces to the C++ operators | ||
|
|
||
| __all__ = [ | ||
| "dense_embedding_codegen_lookup_function", | ||
| ] | ||
|
|
||
| def dense_embedding_codegen_lookup_function(*args, **kwargs): | ||
| """Temporary stub for the planned dense embedding API.""" | ||
| raise NotImplementedError( | ||
| "dense_embedding_codegen_lookup_function is not implemented yet in src/fbgemm_xpu/ops.py" | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| /* | ||
| * Copyright 2026 Intel Corporation | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Portions of this file are derived from FBGEMM | ||
| * Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| * SPDX-License-Identifier: BSD-3-Clause | ||
| */ | ||
|
flezaalv marked this conversation as resolved.
|
||
|
|
||
| #include <Python.h> | ||
|
|
||
| #include <ATen/core/Tensor.h> | ||
| #include <torch/library.h> | ||
|
|
||
|
|
||
| extern "C" { | ||
| /** | ||
| * Creates a dummy empty _C module that can be imported from Python. | ||
| * | ||
| * When this module is imported from Python (via 'import fbgemm._C'), | ||
| * it loads the shared library (.so file) and runs all TORCH_LIBRARY | ||
| * static initializers to register the custom operators with PyTorch's | ||
| * dispatch system. | ||
| * | ||
| * @return PyObject* pointer to the created module | ||
| */ | ||
| PyObject* PyInit__C(void) | ||
| { | ||
| static struct PyModuleDef module_def = { | ||
| PyModuleDef_HEAD_INIT, | ||
| "_C", /* name of module - imported as fbgemm._C */ | ||
| NULL, /* module documentation, may be NULL */ | ||
| -1, /* size of per-interpreter state of the module, | ||
| or -1 if the module keeps state in global variables. */ | ||
| NULL, /* methods - no Python-callable methods needed */ | ||
| }; | ||
| return PyModule_Create(&module_def); | ||
| } | ||
| } | ||
| /** | ||
| * Central operator registry for ALL custom operators under the "fbgemm" namespace. | ||
| * | ||
| * Uses TORCH_LIBRARY_FRAGMENT so this can coexist with upstream fbgemm_gpu | ||
| * which may already own the "fbgemm" namespace via TORCH_LIBRARY(fbgemm, m). | ||
| * | ||
| * Operator schemas are declared here; device-specific implementations are | ||
| * registered separately via TORCH_LIBRARY_IMPL(fbgemm, <KEY>, m) in the | ||
| * respective .cpp / .sycl / .cu files. | ||
| */ | ||
| TORCH_LIBRARY_FRAGMENT(fbgemm, m) | ||
| { | ||
| m.def("dense_embedding_codegen_lookup_function(" | ||
| " Tensor dev_weights, " | ||
| " Tensor weights_offsets, " | ||
| " Tensor D_offsets, " | ||
| " SymInt total_D, " | ||
| " SymInt max_D, " | ||
| " Tensor hash_size_cumsum, " | ||
| " int total_hash_size_bits, " | ||
| " Tensor indices, " | ||
| " Tensor offsets, " | ||
| " int pooling_mode, " | ||
| " Tensor? indice_weights, " | ||
| " Tensor? feature_requires_grad, " | ||
| " int output_dtype=0, " | ||
| " Tensor? B_offsets=None, " | ||
| " Tensor? vbe_output_offsets_feature_rank=None, " | ||
| " Tensor? vbe_B_offsets_rank_per_feature=None, " | ||
| " SymInt max_B=-1, " | ||
| " SymInt max_B_feature_rank=-1, " | ||
| " SymInt vbe_output_size=-1, " | ||
| " bool mixed_D=True) -> Tensor"); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| 0.1.0 | ||
|
flezaalv marked this conversation as resolved.
Outdated
|
||
Uh oh!
There was an error while loading. Please reload this page.