intel · flezaalv · May 27, 2026 · May 27, 2026 · May 27, 2026 · May 27, 2026
@@ -0,0 +1,4 @@
+cmake_minimum_required(VERSION 3.18)
+project(${SKBUILD_PROJECT_NAME})
+
+add_subdirectory(src/fbgemm_xpu)
@@ -0,0 +1,28 @@
+BSD 3-Clause License
+
+Copyright (c) 2026 Intel Corporation. All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,74 @@
+# Intel XPU Plugin for FBGEMM
+
+## Overview
+
+[FBGEMM] is an optimized library for GEMMs and low-precision training. The Intel® XPU plugin for [FBGEMM] enables hardware acceleration for specific [FBGEMM] operators on Intel GPUs using SYCL kernels. Currently, acceleration is primarily targeted for DLRM v3 workloads.
+
+To use Intel® XPU plugin for [FBGEMM], load it in your Python script and ensure tensors are on XPU device:
+
+```python
+import torch
+import fbgemm_xpu
+
+# Usage examples will be added as operators are integrated into this project
+```
+
+## Supported hardware
+
+Currently, this package has been tested only on Intel® Data Center GPU Max Series (Ponte Vecchio, PVC) GPUs.
+
+## Installation
+
+Pre-built wheels will be available on [PyPI](https://pypi.org) in the future.
+
+For now, build from source:
+
+* Install [uv]
+
+* Install Intel oneAPI (DPC++ compiler `icpx`), version 2025.3 or newer
+
+* Clone the repository:
+
+```bash
+git clone https://github.com/intel/torchlib-xpu.git && cd torchlib-xpu
+```
+
+* Create and activate a virtual environment:
+
+```bash
+uv venv
+source .venv/bin/activate
+```
+
+* Build and install `fbgemm-xpu`:
+
+```bash
+uv pip install -e packages/fbgemm-xpu \
+  --index https://download.pytorch.org/whl/xpu
+```
+
+* (Optional) Install test dependencies:
+
+```bash
+uv pip install -e "packages/fbgemm-xpu[test]" \
+  --index https://download.pytorch.org/whl/xpu
+```
+
+* Get installed package version:
+
+```bash
+python -c "import fbgemm_xpu; print(fbgemm_xpu.__version__)"
+```
+
+## Environment variables
+
+Environment variables will be added as new FBGEMM operators are integrated into this project.
+
+## Known limitations
+
+Known limitations will be documented as new FBGEMM operators are integrated into this project.
+
+[FBGEMM]: https://github.com/pytorch/FBGEMM
+[uv]: https://github.com/astral-sh/uv
+[PVC]: https://www.intel.com/content/www/us/en/ark/products/series/232874/intel-data-center-gpu-max-series.html
+
@@ -0,0 +1,51 @@
+[build-system]
+requires = [
+    "numpy~=2.0",
+    "pybind11",
+    "scikit-build-core>=0.10",
+    "torch~=2.12.0",
+]
+build-backend = "scikit_build_core.build"
+
+[project]
+name = "fbgemm-xpu"
+description = "FBGEMM XPU operators for Intel GPUs"
+readme = "README.md"
+requires-python = ">=3.10"
+license = { file = "LICENSE" }
+authors = [
+    { name = "Alberto Gallegos Muro", email = "alberto.gallegos.muro@intel.com" },
+    { name = "Felipe Leza Alvarez", email = "felipe.leza.alvarez@intel.com" },
+    { name = "Manuel Santana Castolo", email = "manuel.santana.castolo@intel.com" },
+]
+dynamic = ["version"]
+dependencies = [
+    "fbgemm-gpu-cpu==1.7.0",
+    "numpy~=2.0",
+    "torch~=2.12.0",
+]
+
+[project.optional-dependencies]
+test = [
+    "pytest",
+    "hypothesis",
+    "expecttest"
+]
+
+[project.urls]
+GitHub = "https://github.com/intel/torchlib-xpu"
+
+[tool.scikit-build]
+cmake.version = ">=3.18"
+
+[tool.scikit-build.cmake.define]
+CMAKE_CXX_COMPILER = {env="CXX", default="icpx"}
+
+[[tool.scikit-build.generate]]
+path = "src/fbgemm_xpu/_version.py"
+template = '__version__ = "${version}"'
+
+[tool.scikit-build.metadata.version]
+provider = "scikit_build_core.metadata.regex"
+input = "version.txt"
+regex = "^v?(?P<value>[0-9a-zA-Z.+-_]+)"
@@ -0,0 +1,104 @@
+cmake_minimum_required(VERSION 3.18)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+set(PYBIND11_FINDPYTHON ON)
+# Prefer the active virtual environment Python when available.
+set(Python3_FIND_VIRTUALENV FIRST)
+if(DEFINED ENV{VIRTUAL_ENV})
+    set(Python3_ROOT_DIR "$ENV{VIRTUAL_ENV}")
+    set(Python3_EXECUTABLE "$ENV{VIRTUAL_ENV}/bin/python")
+endif()
+
+find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module)
+find_package(pybind11 REQUIRED)
+find_package(Torch REQUIRED)
+
+# --------------------------------------------------------------------------
+# SYCL backend: required for fbgemm-xpu. All kernels are SYCL-based, so the
+# Intel oneAPI compiler (icpx) is mandatory.
+# --------------------------------------------------------------------------
+if(NOT CMAKE_CXX_COMPILER MATCHES "icpx")
+    message(FATAL_ERROR "fbgemm-xpu requires Intel oneAPI compiler (icpx) with SYCL support")
+endif()
+message(STATUS "Intel compiler detected: SYCL kernels enabled")
+
+# --------------------------------------------------------------------------
+# XPU architecture: currently tested only on PVC (Ponte Vecchio).
+# --------------------------------------------------------------------------
+if(NOT "$ENV{TORCH_XPU_ARCH_LIST}" STREQUAL "")
+    set(TORCH_XPU_ARCH_LIST "$ENV{TORCH_XPU_ARCH_LIST}")
+else()
+    set(TORCH_XPU_ARCH_LIST "pvc")
+endif()
+message(STATUS "Building for XPU architectures: ${TORCH_XPU_ARCH_LIST}")
+
+set(SYCL_TARGETS -fsycl-targets=spir64_gen,spir64)
+set(SYCL_DEVICE_LIST -Xs "-device ${TORCH_XPU_ARCH_LIST} -options -cl-poison-unsupported-fp64-kernels")
+
+# --------------------------------------------------------------------------
+# Sources: keep build inputs explicit so new files are reviewed when added.
+# Add new host .cpp files to host_sources and new SYCL .sycl files to
+# sycl_sources as operators are integrated.
+# --------------------------------------------------------------------------
+set(host_sources
+    ${CMAKE_CURRENT_SOURCE_DIR}/ops_registry.cpp
+)
+
+set(sycl_sources
+)
+
+# CMake does not recognize the .sycl extension as a C++ source. Mirror every
+# .sycl file into the build tree with a .cpp suffix so CMake schedules a
+# proper compile step for it; the icpx driver then handles SYCL via -fsycl.
+set(sycl_sources_cxx "")
+foreach(_src IN LISTS sycl_sources)
+    if(_src MATCHES "\\.sycl$")
+        file(RELATIVE_PATH _rel ${CMAKE_CURRENT_SOURCE_DIR} ${_src})
+        set(_mirrored ${CMAKE_CURRENT_BINARY_DIR}/${_rel}.cpp)
+        configure_file(${_src} ${_mirrored} COPYONLY)
+        list(APPEND sycl_sources_cxx ${_mirrored})
+    else()
+        list(APPEND sycl_sources_cxx ${_src})
+    endif()
+endforeach()
+
+set(all_sources ${host_sources} ${sycl_sources_cxx})
+
+# --------------------------------------------------------------------------
+# Extension module: fbgemm._C
+#
+# SYCL kernels are required. Skip target creation if no sources have been
+# added yet, this lets the scaffolding configure cleanly before any code
+# is written.
+# --------------------------------------------------------------------------
+if(NOT all_sources)
+    message(WARNING "fbgemm._C: no sources defined yet; skipping extension target.")
+    return()
+endif()
+
+Python3_add_library(_C MODULE WITH_SOABI ${all_sources})
+set_target_properties(_C PROPERTIES PREFIX "")
+set_target_properties(_C PROPERTIES CXX_STANDARD 17)
+
+# The package root is needed because host sources include
+# "fbgemm_utils/torch_library.h". sycl_kernels and fbgemm_utils are added so
+# SYCL sources mirrored into the build tree can still resolve same-dir headers
+# ("feature_gates.h", "utils.h", ...).
+target_include_directories(_C PRIVATE
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${CMAKE_CURRENT_SOURCE_DIR}/sycl_kernels
+    ${CMAKE_CURRENT_SOURCE_DIR}/fbgemm_utils
+    ${Python3_INCLUDE_DIRS}
+)
+
+target_link_libraries(_C PRIVATE ${TORCH_LIBRARIES})
+
+target_compile_options(_C PRIVATE
+    -fdiagnostics-color=always
+)
+
+target_compile_options(_C PRIVATE -fsycl ${SYCL_TARGETS})
+target_link_options(_C PRIVATE -fsycl ${SYCL_TARGETS} ${SYCL_DEVICE_LIST})
+
+install(TARGETS _C DESTINATION fbgemm_xpu)
@@ -0,0 +1,24 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.
+# Copyright (c) 2026 Intel Corporation. All Rights Reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+
+# Import the compiled C extension (_C) which contains the registered operators.
+# If native dependencies (for example libtorch.so) are unavailable, keep import
+# working so metadata like __version__ remains accessible.
+try:
+    from . import _C as _C
+except ImportError:
+    _C = None
+
+from . import ops as ops
+
+__all__ = ["_C", "ops", "__version__"]
+
+try:
+    from ._version import __version__
+except ModuleNotFoundError:
+    try:
+        from importlib.metadata import PackageNotFoundError, version
+        __version__ = version("fbgemm-xpu")
+    except (ImportError, PackageNotFoundError):
+        __version__ = "unknown"
@@ -0,0 +1,16 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.
+# Copyright (c) 2026 Intel Corporation. All Rights Reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+
+# Python wrapper functions for all custom operators under the fbgemm namespace
+# This module provides user-friendly interfaces to the C++ operators
+
+__all__ = [
+	"dense_embedding_codegen_lookup_function",
+]
+
+def dense_embedding_codegen_lookup_function(*args, **kwargs):
+	"""Temporary stub for the planned dense embedding API."""
+	raise NotImplementedError(
+		"dense_embedding_codegen_lookup_function is not implemented yet in src/fbgemm_xpu/ops.py"
+	)
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.
+ * Copyright (c) 2026 Intel Corporation. All Rights Reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+ #include <Python.h>
+
+#include <ATen/core/Tensor.h>
+#include <torch/library.h>
+
+
+extern "C" {
+  /**
+   * Creates a dummy empty _C module that can be imported from Python.
+   *
+   * When this module is imported from Python (via 'import fbgemm._C'),
+   * it loads the shared library (.so file) and runs all TORCH_LIBRARY
+   * static initializers to register the custom operators with PyTorch's
+   * dispatch system.
+   *
+   * @return PyObject* pointer to the created module
+   */
+  PyObject* PyInit__C(void)
+  {
+      static struct PyModuleDef module_def = {
+          PyModuleDef_HEAD_INIT,
+          "_C",   /* name of module - imported as fbgemm._C */
+          NULL,   /* module documentation, may be NULL */
+          -1,     /* size of per-interpreter state of the module,
+                     or -1 if the module keeps state in global variables. */
+          NULL,   /* methods - no Python-callable methods needed */
+      };
+      return PyModule_Create(&module_def);
+  }
+}
+/**
+ * Central operator registry for ALL custom operators under the "fbgemm" namespace.
+ *
+ * Uses TORCH_LIBRARY_FRAGMENT so this can coexist with upstream fbgemm_gpu
+ * which may already own the "fbgemm" namespace via TORCH_LIBRARY(fbgemm, m).
+ *
+ * Operator schemas are declared here; device-specific implementations are
+ * registered separately via TORCH_LIBRARY_IMPL(fbgemm, <KEY>, m) in the
+ * respective .cpp / .sycl / .cu files.
+ */
+TORCH_LIBRARY_FRAGMENT(fbgemm, m)
+{
+    m.def("dense_embedding_codegen_lookup_function("
+          "    Tensor dev_weights, "
+          "    Tensor weights_offsets, "
+          "    Tensor D_offsets, "
+          "    SymInt total_D, "
+          "    SymInt max_D, "
+          "    Tensor hash_size_cumsum, "
+          "    int total_hash_size_bits, "
+          "    Tensor indices, "
+          "    Tensor offsets, "
+          "    int pooling_mode, "
+          "    Tensor? indice_weights, "
+          "    Tensor? feature_requires_grad, "
+          "    int output_dtype=0, "
+          "    Tensor? B_offsets=None, "
+          "    Tensor? vbe_output_offsets_feature_rank=None, "
+          "    Tensor? vbe_B_offsets_rank_per_feature=None, "
+          "    SymInt max_B=-1, "
+          "    SymInt max_B_feature_rank=-1, "
+          "    SymInt vbe_output_size=-1, "
+          "    bool mixed_D=True) -> Tensor");
+}
@@ -0,0 +1 @@
+0.7.0