StanfordBDHG
diff --git a/‎.github/workflows/publish.yml‎
Lines changed: 53 additions & 0 deletions b/‎.github/workflows/publish.yml‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎.github/workflows/static-analysis.yml‎
Lines changed: 1 addition & 2 deletions b/‎.github/workflows/static-analysis.yml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎.gitignore‎
Lines changed: 9 additions & 5 deletions b/‎.gitignore‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎.gitmodules‎
Lines changed: 0 additions & 12 deletions b/‎.gitmodules‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎.linkspector.yml‎
Lines changed: 1 addition & 2 deletions b/‎.linkspector.yml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/model/__init__.py‎ ‎.python-version‎src/model/__init__.py renamed to .python-version
Lines changed: 3 additions & 1 deletion b/‎src/model/__init__.py‎ ‎.python-version‎src/model/__init__.py renamed to .python-version
Lines changed: 3 additions & 1 deletion
diff --git a/‎.reuse/templates/opentslm.jinja2‎
Lines changed: 0 additions & 9 deletions b/‎.reuse/templates/opentslm.jinja2‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎CONTRIBUTORS.md‎
Lines changed: 1 addition & 2 deletions b/‎CONTRIBUTORS.md‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎README.md‎
Lines changed: 82 additions & 54 deletions b/‎README.md‎
Lines changed: 82 additions & 54 deletions
diff --git a/‎REUSE.toml‎
Lines changed: 1 addition & 1 deletion b/‎REUSE.toml‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,53 @@
+# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
+# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project.
+#
+# SPDX-License-Identifier: MIT
+
+name: "Publish to PyPI"
+
+on:
+  workflow_dispatch:
+    target:
+      inputs:
+      target:
+        description: 'Target'
+        required: true
+        default: 'PyPI'
+        type: choice
+        options:
+        - PyPI
+        - TestPyPi
+  # push:
+  #   tags:
+  #     # Publish on any tag starting with a `v`, e.g., v0.1.0
+  #     - v*
+
+run-name: Publish to ${{ inputs.target }}
+
+jobs:
+  run:
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v5
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+      - name: Build
+        run: uv build
+      # Check that basic features work and we didn't miss to include crucial files
+      - name: Smoke test (wheel)
+        run: uv run --isolated --no-project --with dist/*.whl tests/smoke_test.py
+      - name: Smoke test (source distribution)
+        run: uv run --isolated --no-project --with dist/*.tar.gz tests/smoke_test.py
+      - name: Publish
+        run: uv publish ${{ inputs.target == 'TestPyPi' && '--index testpypi' || '' }}
+      - name: Summary
+        run: |
+          echo "### Published OpenTSLM to ${{ inputs.target }} :rocket:" >> $GITHUB_STEP_SUMMARY
+          echo "Version: `$(uv version --short)`" >> $GITHUB_STEP_SUMMARY
+          echo "URL: https://${{ inputs.target == 'TestPyPi' && 'test.' || '' }}pypi.org/project/opentslm/$(uv version --short)/" >>> $GITHUB_STEP_SUMMARY
@@ -1,6 +1,5 @@
-# This source file is part of the OpenTSLM open-source project
-#
 # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
+# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project.
 #
 # SPDX-License-Identifier: MIT
 
 
@@ -1,18 +1,22 @@
-# This source file is part of the OpenTSLM open-source project
-#
 # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
+# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project.
 #
 # SPDX-License-Identifier: MIT
 
-venv
+.venv
+.vscode
 __pycache__
 .DS_STORE
-**/.DS_STORE
 
 raw_data
 
+**/data/*
+!**/data/.gitkeep
 
 *.ts
 *.zip
-./__pycache__
 upload_to_huggingface.py
+
+dist/
+
+*.license
@@ -1,6 +1,5 @@
-# This source file is part of the OpenTSLM open-source project
-#
 # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
+# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project.
 #
 # SPDX-License-Identifier: MIT
 
 
@@ -1,5 +1,7 @@
-# This source file is part of the OpenTSLM open-source project
 #
 # SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
 #
 # SPDX-License-Identifier: MIT
+#
+
+3.12
@@ -1,7 +1,6 @@
 <!--
-This source file is part of the OpenTSLM open-source project
-
 SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
+SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project.
 
 SPDX-License-Identifier: MIT
 -->
 
@@ -1,20 +1,20 @@
 <!--
-This source file is part of the OpenTSLM open-source project
-
 SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)
+SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project.
 
 SPDX-License-Identifier: MIT
 -->
 
 # OpenTSLM: Time-Series Language Models for Reasoning over Multivariate Medical Text- and Time-Series Data
+[![PyPI - Version](https://img.shields.io/pypi/v/opentslm)](https://pypi.org/project/opentslm)
 [![DOI](https://img.shields.io/badge/DOI-10.13140/RG.2.2.14827.60963-blue.svg)](https://doi.org/10.13140/RG.2.2.14827.60963)
 [![Static Analysis](https://github.com/StanfordBDHG/OpenTSLM/actions/workflows/static-analysis.yml/badge.svg)](https://github.com/StanfordBDHG/OpenTSLM/actions/workflows/static-analysis.yml)
 
 
 Large Language Models (LLMs) have emerged as powerful tools for interpreting multimodal data (e.g., images, audio, text), often surpassing specialized models. In medicine, they hold particular promise for synthesizing large volumes of clinical information into actionable insights and patient-facing digital health applications.  Yet, a major limitation remains their inability to handle time series data. To overcome this gap, we present OpenTSLM, a family of Time Series Language Models (TSLMs) created by integrating time series as a native modality to pretrained Large Language Models, enabling natural-language prompting and reasoning over multiple time series of any length [...] **[🔗 Read the full paper](https://doi.org/10.13140/RG.2.2.14827.60963)**  
 
 <p align="center">
-  <img src="assets/schematic_overview_3.png" alt="Schematic Overview" width="100%">
+   <img src="https://raw.githubusercontent.com/StanfordBDHG/OpenTSLM/main/assets/schematic_overview_3.png" alt="Schematic Overview" width="100%">
 </p>
 
 
@@ -23,44 +23,37 @@ Large Language Models (LLMs) have emerged as powerful tools for interpreting mul
 OpenTSLM models can reason over multiple time series of any length at once, generating findings, captions, and rationales in natural language. We tested these models across a wide range of tasks spanning Human Activity Recognition (HAR) from 3-axis acceleration data, sleep staging from EEG readings, 12-lead ECG question answering, and time series captioning. Some examples are shown below, more are available in the paper.
 
 <p align="center">
-  <img src="assets/ecg_rationale.png" alt="ECG Rationale" width="32%">
-  <img src="assets/har_rationale.png" alt="HAR Rationale" width="32%">
-    <img src="assets/m4_caption.png" alt="M4 Caption" width="34%">
+   <img src="https://raw.githubusercontent.com/StanfordBDHG/OpenTSLM/main/assets/ecg_rationale.png" alt="ECG Rationale" width="32%">
+   <img src="https://raw.githubusercontent.com/StanfordBDHG/OpenTSLM/main/assets/har_rationale.png" alt="HAR Rationale" width="32%">
+      <img src="https://raw.githubusercontent.com/StanfordBDHG/OpenTSLM/main/assets/m4_caption.png" alt="M4 Caption" width="34%">
 
 </p>
 
 ## Installation
 
-1. **Clone the Repository**
-
-   ```bash
-   git clone https://github.com/StanfordBDHG/OpenTSLM.git --recurse-submodules
-   ```
-
-2. **Install Dependencies**
-   ```bash
-   pip install -r requirements.txt
-   ```
+```bash
+pip install opentslm
+```
 
 
 ## LLM Setup
 
 OpenTSLM is designed to work with Llama and Gemma models, with Llama 3.2 1B as the default. These models are stored in Hugging Face repositories which may require access permissions. Follow these steps to gain access and download:
 
 1. **Request Access (for Llama models)**  
-   Visit the Llama model repository (e.g., https://huggingface.co/meta-llama/Llama-3.2-1B) or Gemma models repository (https://huggingface.co/google/gemma-3-270m) and request access from Meta.
+    Visit the Llama model repository (e.g., https://huggingface.co/meta-llama/Llama-3.2-1B) or Gemma models repository (https://huggingface.co/google/gemma-3-270m) and request access from Meta.
 
 2. **Authenticate with Hugging Face**  
-   Log in to your Hugging Face account and configure the CLI:
+    Log in to your Hugging Face account and configure the CLI:
 
-   ```bash
-   huggingface-cli login
-   ```
+    ```bash
+    huggingface-cli login
+    ```
 
 3. **Create an API Token**
-   - Go to your Hugging Face settings: https://huggingface.co/settings/tokens
-   - Generate a new token with `read` scope.
-   - Copy the token for CLI login.
+    - Go to your Hugging Face settings: https://huggingface.co/settings/tokens
+    - Generate a new token with `read` scope.
+    - Copy the token for CLI login.
 
 ### Supported Models
 
@@ -87,15 +80,11 @@ A factory class called `OpenTSLM` for easily loading pre-trained models from Hug
 There are [demo scripts](demo/huggingface/) available which use the following minimal code. If you want to create your own applications, create a new file in **this repo folder** and use the following code as start:
 
 ```python
-import sys
-import os
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src")))
-
-from model.llm.OpenTSLM import OpenTSLM
-from time_series_datasets.TSQADataset import TSQADataset
-from time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate
+from opentslm import OpenTSLM
+from opentslm.time_series_datasets.TSQADataset import TSQADataset
+from opentslm.time_series_datasets.util import extend_time_series_to_match_patch_size_and_aggregate
 from torch.utils.data import DataLoader
-from model_config import PATCH_SIZE
+from opentslm.model_config import PATCH_SIZE
 
 REPO_ID = "OpenTSLM/llama-3.2-1b-tsqa-sp"
 
@@ -104,22 +93,41 @@ model = OpenTSLM.load_pretrained(REPO_ID, device="cuda" if torch.cuda.is_availab
 test_dataset = TSQADataset("test", EOS_TOKEN=model.get_eos_token())
 
 test_loader = DataLoader(
-   test_dataset,
-   shuffle=False,
-   batch_size=1,
-   collate_fn=lambda batch: extend_time_series_to_match_patch_size_and_aggregate(
-      batch, patch_size=PATCH_SIZE
-   ),
+    test_dataset,
+    shuffle=False,
+    batch_size=1,
+    collate_fn=lambda batch: extend_time_series_to_match_patch_size_and_aggregate(
+         batch, patch_size=PATCH_SIZE
+    ),
 )
 
 for i, batch in enumerate(test_loader):
-   predictions = model.generate(batch, max_new_tokens=200)
-   for sample, pred in zip(batch, predictions):
-      print("Question:", sample.get("pre_prompt", "N/A"))
-      print("Answer:", sample.get("answer", "N/A"))
-      print("Output:", pred)
-   if i >= 4:
-      break
+    predictions = model.generate(batch, max_new_tokens=200)
+    for sample, pred in zip(batch, predictions):
+         print("Question:", sample.get("pre_prompt", "N/A"))
+         print("Answer:", sample.get("answer", "N/A"))
+         print("Output:", pred)
+    if i >= 4:
+         break
+```
+
+## Building and finetuning your own models
+
+To run the demos and use finetuning scripts **clone the repository** and set up all dependencies. We recommend using [uv](https://docs.astral.sh/uv/) to set up the environment, but you can also use pip:
+
+```bash
+git clone https://github.com/StanfordBDHG/OpenTSLM.git
+
+
+# uv environment management (recommended). Installs uv if it does not exist and creates the virtual environment
+command uv > /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
+uv sync --all-groups
+source .venv/bin/activate
+
+
+# or alternatively install via pip:
+pip install -r requirements.txt
+
 ```
 
 ### HuggingFace Demo Scripts
@@ -166,9 +174,9 @@ REPO_ID = "OpenTSLM/llama-3.2-1b-tsqa-flamingo"  # Flamingo model
 
 All pretrained models are available under the `OpenTSLM` organization on HuggingFace Hub. Model names follow the pattern:
 - `OpenTSLM/{base_model}-{dataset}-{model_type}`
-  - `base_model`: `llama-3.2-1b`, `llama-3.2-3b`, `gemma-3-1b-pt`, `gemma-3-270m`
-  - `dataset`: `tsqa`, `m4`, `har`, `sleep`, `ecg`
-  - `model_type`: `sp` (Soft Prompt) or `flamingo` (Flamingo)
+   - `base_model`: `llama-3.2-1b`, `llama-3.2-3b`, `gemma-3-1b-pt`, `gemma-3-270m`
+   - `dataset`: `tsqa`, `m4`, `har`, `sleep`, `ecg`
+   - `model_type`: `sp` (Soft Prompt) or `flamingo` (Flamingo)
 
 Example: `OpenTSLM/llama-3.2-1b-ecg-flamingo`
 
@@ -229,6 +237,24 @@ python curriculum_learning.py --model OpenTSLMFlamingo --eval_only
 - `--gradient_checkpointing`: Enable gradient checkpointing for memory efficiency
 - `--verbose`: Enable verbose logging
 
+### Helper Scripts
+
+Helper scripts for analysis, testing, and batch processing are available in the `scripts/` directory:
+
+**Shell Scripts:**
+- **`run_all_memory.sh`** - Run comprehensive memory usage analysis across all stages
+- **`run_all_memory_missing.sh`** - Run memory analysis for missing stages only
+
+**Python Scripts:**
+- **`create_doctor_eval_dataset.py`** - Create evaluation dataset for doctor assessments
+- **`get_memory_use.py`** - Analyze and report memory usage across stages
+- **`plot_memory_usage.py`** - Visualize memory usage patterns
+- **`plot_memory_simulation.py`** - Simulate and plot memory requirements
+- **`plot_memory_simulation_per_length.py`** - Analyze memory usage by sequence length
+- **`hf_test.py`** - Test HuggingFace model loading and inference
+
+These scripts can be customized by editing the parameters directly or by passing command-line arguments.
+
 ### Repository Naming Convention
 
 - Repository IDs ending with `-sp` will load and return `OpenTSLMSP` models
@@ -335,22 +361,24 @@ For researchers and project partners interested in collaboration opportunities,
 
 This project is licensed under the MIT License.
 
-We use the [REUSE specification](https://reuse.software/spec/) to ensure consistent and machine-readable licensing across the repository.
+OpenTSLM uses [REUSE specification](https://reuse.software/spec/) to ensure consistent and machine-readable licensing across the repository.
 
 To add or update license headers, run:
 
 ```bash
 reuse annotate --recursive \
-  --template opentslm \
   --copyright "Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)" \
+  --copyright "This source file is part of the OpenTSLM open-source project." \
   --license MIT \
-  --skip-unrecognised \
+  --skip-unrecognized \
   .
 ```
 
+
+
 <div align="left">
-  <img src="assets/stanford_biodesign_logo.png" alt="Stanford Biodesign" height="90">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
-  <img src="assets/CDHI_white.svg" alt="ETH Centre for Digital Health Interventions" height="90">
-    <img src="assets/ASLwhite.svg" alt="ETH Agentic Systems Lab" height="90">
+   <img src="https://raw.githubusercontent.com/StanfordBDHG/OpenTSLM/main/assets/stanford_biodesign_logo.png" alt="Stanford Biodesign" height="90">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+   <img src="https://raw.githubusercontent.com/StanfordBDHG/OpenTSLM/main/assets/CDHI_white.svg" alt="ETH Centre for Digital Health Interventions" height="90">
+      <img src="https://raw.githubusercontent.com/StanfordBDHG/OpenTSLM/main/assets/ASLwhite.svg" alt="ETH Agentic Systems Lab" height="90">
 
 </div>
@@ -1,6 +1,6 @@
 version = 1
 
 [[annotations]]
-path = ["data/**"]
+path = ["assets/**", "data/**", "**/*.png", "*.svg", "*.png", "**/*.pt", "**/*.jsonl", "**/*.json", ".gitignore", "**/uv.lock", "LICENSE.md", "**/requirements.txt"]
 SPDX-FileCopyrightText = "2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)"
 SPDX-License-Identifier = "MIT"
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,5 @@`
`1`		`-# This source file is part of the OpenTSLM open-source project`
`2`		`-#`
`3`	`1`	`# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)`
	`2`	`+# SPDX-FileCopyrightText: 2025 This source file is part of the OpenTSLM open-source project.`
`4`	`3`	`#`
`5`	`4`	`# SPDX-License-Identifier: MIT`
`6`	`5`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,7 @@`
`1`		`-# This source file is part of the OpenTSLM open-source project`
`2`	`1`	`#`
`3`	`2`	`# SPDX-FileCopyrightText: 2025 Stanford University, ETH Zurich, and the project authors (see CONTRIBUTORS.md)`
`4`	`3`	`#`
`5`	`4`	`# SPDX-License-Identifier: MIT`
	`5`	`+#`
	`6`	`+`
	`7`	`+3.12`