Skip to content

[CI] Disable ROCm wheel builds (tensordict is CPU-only) #2938

[CI] Disable ROCm wheel builds (tensordict is CPU-only)

[CI] Disable ROCm wheel builds (tensordict is CPU-only) #2938

Workflow file for this run

name: Continuous Benchmark
on:
push:
branches:
- main
pull_request:
branches:
- "*"
workflow_dispatch:
workflow_call:
inputs:
skip-upload:
description: 'Skip benchmark upload to gh-pages'
type: boolean
default: false
permissions:
deployments: write
contents: write
pull-requests: write
concurrency:
# Documentation suggests ${{ github.head_ref }}, but that's only available on pull_request/pull_request_target triggers, so using ${{ github.ref }}.
# On master, we want all builds to complete even if merging happens faster to make it easier to discover at which point something broke.
group: benchmarks-${{ github.ref == 'refs/heads/main' && format('ci-master-{0}', github.sha) || format('ci-{0}', github.ref) }}
cancel-in-progress: true
jobs:
benchmark:
name: ${{ matrix.device }} Pytest benchmark
runs-on: linux.g5.4xlarge.nvidia.gpu
strategy:
matrix:
device: [CPU, GPU]
python-version: ['3.12']
defaults:
run:
shell: bash -l {0}
container:
image: nvidia/cuda:12.8.0-runtime-ubuntu22.04
options: --gpus all
steps:
- name: Who triggered this?
run: |
echo "Action triggered by ${{ github.event.pull_request.html_url }}"
- name: Install deps
run: |
export TZ=Europe/London
export DEBIAN_FRONTEND=noninteractive # tzdata bug
apt-get update -y
apt-get update -y
apt-get upgrade -y
apt-get -y install gcc curl g++ unzip wget sudo git cmake
- name: Check ldd --version
run: ldd --version
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 50 # this is to make sure we obtain the target base commit
- name: Python Setup
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Setup git
run: |
git config --global --add safe.directory /__w/tensordict/tensordict
git config --global user.name "github-action-benchmark"
git config --global user.email "github@users.noreply.github.com"
- name: setup Path
run: |
echo /usr/local/bin >> $GITHUB_PATH
- name: Setup Environment
run: |
set -e
set -x
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.local/bin" >> $GITHUB_PATH
source $HOME/.local/bin/env
# Create a local venv for the benchmark run
uv venv .venv/local --python ${{ matrix.python-version }}
source .venv/local/bin/activate
echo "=== uv version ==="
uv --version
uv pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu128
uv pip install "pybind11[global]" "setuptools" "wheel" "ninja"
uv pip install pytest pytest-benchmark
# Do not resolve runtime dependencies here: we want to keep the PyTorch build (nightly)
# that was explicitly installed above, and avoid any solver replacing it.
uv pip install -e . --no-deps
uv run --active python -c "import torch; print(torch.__version__)"
${{ matrix.device == 'CPU' && 'export CUDA_VISIBLE_DEVICES=' || '' }}
- name: check GPU presence
if: matrix.device == 'GPU'
run: |
source .venv/local/bin/activate
uv run --active python -c """import torch
print(f'CUDA available: {torch.cuda.is_available()}')
print(f'CUDA device count: {torch.cuda.device_count()}')
print(f'CUDA version: {torch.version.cuda}')
assert torch.cuda.is_available() and torch.cuda.device_count() > 0, 'CUDA is not available or no GPU devices found'
"""
- name: Run benchmarks
run: |
set -e
set -x
source .venv/local/bin/activate
cd benchmarks/
# Check Python version to ensure compatibility with PyTorch Dynamo
uv run --active python --version
uv run --active python -c "import sys; actual_version = f'{sys.version_info.major}.{sys.version_info.minor}'; expected_version = '${{ matrix.python-version }}'; print(f'Expected: {expected_version}, Actual: {actual_version}'); exit(1) if actual_version != expected_version else None; exit(1) if sys.version_info >= (3, 13) else print(f'Python version check passed: {actual_version}')"
export TORCHDYNAMO_INLINE_INBUILT_NN_MODULES=1
export TD_GET_DEFAULTS_TO_NONE=1
uv run --active pytest -vvv --rank 0 --benchmark-json output.json
# Upload benchmark results for main branch, manual dispatch, or PRs with 'benchmarks/upload' label
- name: Upload benchmark results
uses: actions/upload-artifact@v4
if: ${{ !inputs.skip-upload && (github.ref == 'refs/heads/main' || github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmarks/upload'))) }}
with:
name: ${{ matrix.device }}-benchmark-results
path: benchmarks/output.json
# Upload benchmark results to gh-pages branch (only for main, manual dispatch, or PRs with 'benchmarks/upload' label)
benchmark-upload:
name: Upload benchmark results
runs-on: ubuntu-latest
needs: benchmark
if: ${{ !inputs.skip-upload && (github.ref == 'refs/heads/main' || github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'benchmarks/upload'))) }}
steps:
- name: Show upload trigger reason
run: |
if [ "${{ github.ref }}" == "refs/heads/main" ]; then
echo "Uploading benchmarks because this is the main branch"
elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
echo "Uploading benchmarks because of manual workflow dispatch"
elif [ "${{ github.event_name }}" == "pull_request" ] && [[ "${{ contains(github.event.pull_request.labels.*.name, 'benchmarks/upload') }}" == "true" ]]; then
echo "Uploading benchmarks because PR has 'benchmarks/upload' label"
else
echo "Uploading benchmarks for other reason"
fi
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Download CPU benchmark results
uses: actions/download-artifact@v4
with:
name: CPU-benchmark-results
path: cpu-results
- name: Download GPU benchmark results
uses: actions/download-artifact@v4
with:
name: GPU-benchmark-results
path: gpu-results
- name: Store CPU benchmark results
uses: benchmark-action/github-action-benchmark@v1
with:
name: CPU Benchmark Results
tool: 'pytest'
output-file-path: cpu-results/output.json
fail-on-alert: true
alert-threshold: '200%'
alert-comment-cc-users: '@vmoens'
# Disable PR comments to avoid permission issues with PR reviews
comment-on-alert: false
github-token: ${{ secrets.GITHUB_TOKEN }}
gh-pages-branch: gh-pages
auto-push: true
- name: Store GPU benchmark results
uses: benchmark-action/github-action-benchmark@v1
with:
name: GPU Benchmark Results
tool: 'pytest'
output-file-path: gpu-results/output.json
fail-on-alert: true
alert-threshold: '200%'
alert-comment-cc-users: '@vmoens'
# Disable PR comments to avoid permission issues with PR reviews
comment-on-alert: false
github-token: ${{ secrets.GITHUB_TOKEN }}
gh-pages-branch: gh-pages
auto-push: true