evaleval · gbemike · May 20, 2026 · May 21, 2026 · May 21, 2026 · May 22, 2026
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
@@ -0,0 +1,51 @@
+name: Deploy Docs to GitHub Pages
+
+on:
+  push:
+    branches: [main]
+
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Ruby
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: '3.2'
+          bundler-cache: true
+
+      - name: Setup Pages
+        uses: actions/configure-pages@v5
+
+      - name: Build site
+        run: bundle exec jekyll build
+
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: ./_site
+
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Deploy
+        id: deployment
+        uses: actions/deploy-pages@v5
diff --git a/.gitignore b/.gitignore
@@ -213,4 +213,14 @@ __marimo__/
 *.DS_Store*
 None/
 global-mmlu-lite/
-/data/
+/data/
+
+# Ignore folders generated by Bundler
+.bundle/
+vendor/
+
+# Ignore the default location of the built site, and caches and metadata generated by Jekyll
+_site/
+.sass-cache/
+.jekyll-cache/
+.jekyll-metadata
diff --git a/Gemfile b/Gemfile
@@ -0,0 +1,2 @@
+source "https://rubygems.org"
+gem "just-the-docs"
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -0,0 +1,91 @@
+GEM
+  remote: https://rubygems.org/
+  specs:
+    addressable (2.9.0)
+      public_suffix (>= 2.0.2, < 8.0)
+    base64 (0.3.0)
+    bigdecimal (4.1.2)
+    colorator (1.1.0)
+    concurrent-ruby (1.3.6)
+    csv (3.3.5)
+    em-websocket (0.5.3)
+      eventmachine (>= 0.12.9)
+      http_parser.rb (~> 0)
+    eventmachine (1.2.7)
+    ffi (1.17.4-x86_64-linux-gnu)
+    forwardable-extended (2.6.0)
+    google-protobuf (4.35.0-x86_64-linux-gnu)
+      bigdecimal
+      rake (~> 13.3)
+    http_parser.rb (0.8.1)
+    i18n (1.14.8)
+      concurrent-ruby (~> 1.0)
+    jekyll (4.4.1)
+      addressable (~> 2.4)
+      base64 (~> 0.2)
+      colorator (~> 1.0)
+      csv (~> 3.0)
+      em-websocket (~> 0.5)
+      i18n (~> 1.0)
+      jekyll-sass-converter (>= 2.0, < 4.0)
+      jekyll-watch (~> 2.0)
+      json (~> 2.6)
+      kramdown (~> 2.3, >= 2.3.1)
+      kramdown-parser-gfm (~> 1.0)
+      liquid (~> 4.0)
+      mercenary (~> 0.3, >= 0.3.6)
+      pathutil (~> 0.9)
+      rouge (>= 3.0, < 5.0)
+      safe_yaml (~> 1.0)
+      terminal-table (>= 1.8, < 4.0)
+      webrick (~> 1.7)
+    jekyll-include-cache (0.2.1)
+      jekyll (>= 3.7, < 5.0)
+    jekyll-sass-converter (3.1.0)
+      sass-embedded (~> 1.75)
+    jekyll-seo-tag (2.9.0)
+      jekyll (>= 3.8, < 5.0)
+    jekyll-watch (2.2.1)
+      listen (~> 3.0)
+    json (2.19.5)
+    just-the-docs (0.12.0)
+      jekyll (>= 3.8.5)
+      jekyll-include-cache
+      jekyll-seo-tag (>= 2.0)
+      rake (>= 12.3.1)
+    kramdown (2.5.2)
+      rexml (>= 3.4.4)
+    kramdown-parser-gfm (1.1.0)
+      kramdown (~> 2.0)
+    liquid (4.0.4)
+    listen (3.10.0)
+      logger
+      rb-fsevent (~> 0.10, >= 0.10.3)
+      rb-inotify (~> 0.9, >= 0.9.10)
+    logger (1.7.0)
+    mercenary (0.4.0)
+    pathutil (0.16.2)
+      forwardable-extended (~> 2.6)
+    public_suffix (7.0.5)
+    rake (13.4.2)
+    rb-fsevent (0.11.2)
+    rb-inotify (0.11.1)
+      ffi (~> 1.0)
+    rexml (3.4.4)
+    rouge (4.7.0)
+    safe_yaml (1.0.5)
+    sass-embedded (1.99.0-x86_64-linux-gnu)
+      google-protobuf (~> 4.31)
+    terminal-table (3.0.2)
+      unicode-display_width (>= 1.1.1, < 3)
+    unicode-display_width (2.6.0)
+    webrick (1.9.2)
+
+PLATFORMS
+  x86_64-linux-gnu
+
+DEPENDENCIES
+  just-the-docs
+
+BUNDLED WITH
+   2.4.20
diff --git a/_config.yml b/_config.yml
@@ -0,0 +1,31 @@
+title: Every Eval Ever
+description: Documentation for the Every Eval Ever schema, CLI, and converters
+theme: just-the-docs
+color_scheme: light
+
+source: docs
+
+baseurl: ""
+url: "https://docs.evalevalai.com"
+repository: evaleval/every_eval_ever
+
+permalink: pretty
+
+search_enabled: true
+heading_anchors: true
+
+aux_links:
+  "Every Eval Ever on GitHub":
+    - https://github.com/evaleval/every_eval_ever
+
+defaults:
+  - scope:
+      path: ""
+    values:
+      layout: default
+
+nav_sort: case_sensitive
+
+# Back to top link
+back_to_top: true
+back_to_top_text: "Back to top"
diff --git a/docs/contributing/index.md b/docs/contributing/index.md
@@ -0,0 +1,26 @@
+---
+layout: default
+title: Contributing
+nav_order: 5
+---
+
+# Contributing
+
+Data contributions land in the datastore, while validation gates run through the validator/EvalEvalBot workflow.
+
+To contribute evaluation data:
+
+1. Add files under `data/{benchmark}/{developer}/{model}/`
+2. Name aggregate files as `{uuid}.json`
+3. Optionally add instance-level `{uuid}_samples.jsonl`
+4. Validate before submission
+
+Datastore: https://huggingface.co/datasets/evaleval/EEE_datastore
+
+The validator checks datastore pull requests using core checks from this repository and additional checks that are being upstreamed.
+
+Before submitting, run:
+
+```bash
+uv run python -m every_eval_ever validate data/
+```
diff --git a/docs/data-structure/index.md b/docs/data-structure/index.md
@@ -0,0 +1,15 @@
+---
+layout: default
+title: Data Structure
+nav_order: 3
+has_children: true
+---
+
+# Data Structure
+
+Evaluation data is represented in two layers:
+
+- Aggregate JSON records (`{uuid}.json`)
+- Instance-level JSONL records (`{uuid}_samples.jsonl`)
+
+Use the child pages in this section for schema and validation details.
diff --git a/docs/data-structure/schema.md b/docs/data-structure/schema.md
@@ -0,0 +1,24 @@
+---
+layout: default
+title: Schema
+parent: Data Structure
+nav_order: 1
+---
+
+# Schema
+
+The canonical schemas are:
+
+- [Aggregate schema](../../eval.schema.json)
+- [Instance-level schema](../../instance_level_eval.schema.json)
+
+Both schema definitions are currently version `0.2.2`.
+
+The repository enforces schema compatibility by generating Pydantic models from JSON Schema and applying post-generation patches (`post_codegen.py`). This generation flow is automated in CI and can also be run manually.
+
+For aggregate records, keep these conventions:
+
+1. `evaluation_id` uses `{benchmark_name}/{model_id}/{retrieved_timestamp}`
+2. `source_metadata.source_type` is `documentation` or `evaluation_run`
+3. `source_data` is set per result (`url`, `hf_dataset`, or `other`)
+4. Level-based metrics use integer values plus `level_names`
diff --git a/docs/data-structure/validation.md b/docs/data-structure/validation.md
@@ -0,0 +1,24 @@
+---
+layout: default
+title: Validation
+parent: Data Structure
+nav_order: 2
+---
+
+# Validation
+
+Validate aggregate `.json` files and instance-level `.jsonl` files:
+
+```bash
+uv run python -m every_eval_ever validate data/
+```
+
+Output formats:
+
+```bash
+uv run python -m every_eval_ever validate --format rich data/
+uv run python -m every_eval_ever validate --format json data/
+uv run python -m every_eval_ever validate --format github data/
+```
+
+Exit code is `0` when all files pass and `1` when any file fails.
diff --git a/docs/eval-converters/index.md b/docs/eval-converters/index.md
@@ -0,0 +1,27 @@
+---
+layout: default
+title: Eval Converters
+nav_order: 4
+---
+
+# Eval Converters
+
+Supported conversion targets:
+
+- Inspect AI
+- HELM
+- lm-evaluation-harness
+
+These are the three main general-purpose converters expected to be supported in the core package.
+
+Example commands:
+
+```bash
+uv run python -m every_eval_ever convert inspect --log_path <path>
+uv run python -m every_eval_ever convert helm --log_path <path>
+uv run python -m every_eval_ever convert lm_eval --log_path <path>
+```
+
+Adapter source code lives under [every_eval_ever/converters](../../every_eval_ever/converters/).
+
+One-off adapters also exist under [utils](../../utils/) for source-specific parsing and business logic.
diff --git a/docs/getting-started/index.md b/docs/getting-started/index.md
@@ -0,0 +1,33 @@
+---
+layout: default
+title: Getting Started
+nav_order: 2
+---
+
+# Getting Started
+
+Install the package:
+
+```bash
+pip install every-eval-ever
+```
+
+Optional converter dependencies:
+
+```bash
+pip install 'every-eval-ever[inspect]'
+pip install 'every-eval-ever[helm]'
+pip install 'every-eval-ever[all]'
+```
+
+## Run the CLI
+
+```bash
+uv run python -m every_eval_ever --help
+```
+
+## Continue
+
+- See [Data Structure](../data-structure/)
+- See [Eval Converters](../eval-converters/)
+- See [Contributing](../contributing/)
diff --git a/docs/index.md b/docs/index.md
@@ -0,0 +1,37 @@
+---
+layout: default
+title: Home
+nav_order: 1
+---
+
+# Every Eval Ever
+
+> [EvalEval Coalition](https://evalevalai.com) — "We are a researcher community developing scientifically grounded research outputs and robust deployment infrastructure for broader impact evaluations."
+
+**Every Eval Ever** is a shared schema and crowdsourced eval database. It defines a standardized metadata format for storing AI evaluation results — from leaderboard scrapes and research papers to local evaluation runs — so that results from different frameworks can be compared, reproduced, and reused. The three components that make it work:
+
+- 📋 **A metadata schema** ([eval.schema.json](https://github.com/gbemike/every_eval_ever/blob/add-read-the-docs/eval.schema.json)) that defines the information needed for meaningful comparison of evaluation results, including [instance-level data](https://github.com/gbemike/every_eval_ever/blob/add-read-the-docs/instance_level_eval.schema.json)
+- 🔧 **Validation** that checks data against the schema before it enters the repository
+- 🔌 **Converters** for [Inspect AI](https://github.com/gbemike/every_eval_ever/tree/add-read-the-docs/every_eval_ever/converters/inspect), [HELM](https://github.com/gbemike/every_eval_ever/blob/add-read-the-docs/every_eval_ever/converters/helm), and [lm-eval-harness](https://github.com/gbemike/every_eval_ever/blob/add-read-the-docs/every_eval_ever/converters/lm_eval), so you can transform your existing evaluation logs into the standard format
+
+## Project Components
+
+Every Eval Ever is maintained across three connected components:
+
+- [GitHub repository](https://github.com/evaleval/every_eval_ever): the `every_eval_ever` Python package with schema definitions, converters/adapters, tests, and core tooling.
+- [EEE Datastore](https://huggingface.co/datasets/evaleval/EEE_datastore): the Hugging Face datastore that stores normalized Every Eval Ever evaluation data.
+- [EEE Validator](https://huggingface.co/spaces/evaleval/eee_validator): validator and EvalEvalBot checks used on datastore pull requests, built from repository logic plus additional checks that are being upstreamed.
+
+Install the package:
+
+```bash
+pip install every-eval-ever
+```
+
+Optional converter dependencies:
+
+```bash
+pip install 'every-eval-ever[inspect]'
+pip install 'every-eval-ever[helm]'
+pip install 'every-eval-ever[all]'
+```
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		source "https://rubygems.org"
		gem "just-the-docs"