Skip to content

Index Developer Docs #295

Index Developer Docs

Index Developer Docs #295

name: Index Developer Docs
on:
schedule:
- cron: '0 2 * * *'
workflow_dispatch:
inputs:
reason:
description: 'Reason for running (optional)'
required: false
default: 'Manual trigger'
dry_run:
description: 'Dry run (validate extraction without uploading)'
required: false
type: boolean
default: false
force_reindex:
description: 'Force full reindex (calls processalldocuments after upload)'
required: false
type: boolean
default: false
disable_stale_deletion_check:
description: 'Bypass 20% stale-deletion safeguard (use for one-off cleanup runs only)'
required: false
type: boolean
default: false
jobs:
run-indexer:
runs-on: ubuntu-latest
timeout-minutes: 45
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
- name: Set up mise
uses: jdx/mise-action@v4
with:
cache: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Install site dependencies
run: pnpm install --frozen-lockfile
- name: Build site
run: pnpm build
- name: Install indexing dependencies
working-directory: scripts/indexing
run: uv sync
- name: Run indexing script
working-directory: scripts/indexing
env:
GLEAN_INDEXING_API_TOKEN: ${{ secrets.GLEAN_INDEXING_API_TOKEN }}
GLEAN_INSTANCE: ${{ secrets.GLEAN_INSTANCE }}
DRY_RUN: ${{ inputs.dry_run }}
DISABLE_STALE_DELETION_CHECK: ${{ inputs.disable_stale_deletion_check }}
run: uv run main.py
- name: Force reindex
if: inputs.force_reindex && inputs.dry_run != true
working-directory: scripts/indexing
env:
GLEAN_INDEXING_API_TOKEN: ${{ secrets.GLEAN_INDEXING_API_TOKEN }}
GLEAN_INSTANCE: ${{ secrets.GLEAN_INSTANCE }}
run: |
uv run python3 -c "
from glean.indexing.common import api_client
from glean.api_client.models import ProcessAllDocumentsRequest
with api_client() as client:
client.indexing.documents.process_all(request=ProcessAllDocumentsRequest(datasource='devdocs'))
print('processalldocuments called for devdocs')
"