Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .github/workflows/accbase_checker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Check failed cycles and samples for accbase

on:
schedule:
# run every 2 days at 13:00 (offset from bedbase)
- cron: '0 13 1/2 * *'
workflow_dispatch:

jobs:
check:
runs-on: ubuntu-latest
env:
POSTGRES_DB: ${{ secrets.POSTGRES_DB }}
POSTGRES_HOST: ${{ secrets.POSTGRES_HOST }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
POSTGRES_PORT: ${{ secrets.POSTGRES_PORT }}
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}

steps:
- uses: actions/checkout@v6

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.12'

- name: Install package
run: python -m pip install .

- name: Check cycles 1-5
run: |
for cycle in 1 2 3 4 5; do
echo "Checking cycle $cycle..."
geopephub run-checker --target accbase --period 2 --cycle-count $cycle
done
34 changes: 34 additions & 0 deletions .github/workflows/accbase_uploader.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Queue and upload Accbase projects

on:
schedule:
# run every 2 days at 12:00 (offset from bedbase)
- cron: '0 12 1/2 * *'
workflow_dispatch:

jobs:
upload:
runs-on: ubuntu-latest
env:
POSTGRES_DB: ${{ secrets.POSTGRES_DB }}
POSTGRES_HOST: ${{ secrets.POSTGRES_HOST }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
POSTGRES_PORT: ${{ secrets.POSTGRES_PORT }}
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}

steps:
- uses: actions/checkout@v6

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.12'

- name: Install package
run: python -m pip install .

- name: Add to queue
run: geopephub run-queuer --target accbase --period 2

- name: Upload to PEPhub
run: geopephub run-uploader --target accbase
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@
This repository contains `geopephub` CLI, that enables to automatic upload GEO projects to PEPhub based on date and scheduled automatic uploading using GitHub actions.
Additionally, the CLI includes a download command, enabling users to retrieve projects from specified namespace directly from the PEPhub database. This feature is particularly helpful for downloading all GEO projects at once.

## Supported Targets

The pipeline supports three targets:

- **geo**: All GEO projects (default behavior, no filtering)
- **bedbase**: BED file projects - filters GEO for BED, narrowPeak, and broadPeak files, uploads to the `bedbase` namespace
- **accbase**: Chromatin accessibility projects - filters GEO for ATAC-seq, scATAC-seq, and DNase-seq assays, uploads to the `accbase` namespace

## Installation
To install `geopephub` use this command:
```
Expand Down
4 changes: 2 additions & 2 deletions geopephub/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@


def validate_target(value: str):
valid_target = ["geo", "bedbase"]
valid_target = ["geo", "bedbase", "accbase"]
if value.lower() not in valid_target:
raise typer.BadParameter(
f"Invalid color '{value}'. Choose from: {', '.join(valid_target)}"
f"Invalid target '{value}'. Choose from: {', '.join(valid_target)}"
)
return value.lower()

Expand Down
4 changes: 4 additions & 0 deletions geopephub/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,7 @@
POSTGRES_DIALECT = "postgresql+psycopg"

BEDBASE_MAX_SIZE = "500MB"

# Accbase specific constants
ACCBASE_FINDER_FILTER = "((ATAC-seq) OR (scATAC-seq) OR (DNase-seq))"
ACCBASE_MAX_SIZE = "1GB"
17 changes: 16 additions & 1 deletion geopephub/metageo_pephub.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import peppy

from geopephub.const import LAST_UPDATE_DATES, BEDBASE_MAX_SIZE
from geopephub.const import LAST_UPDATE_DATES, BEDBASE_MAX_SIZE, ACCBASE_FINDER_FILTER, ACCBASE_MAX_SIZE
from geopephub.utils import get_agent, get_base_db_engine
from geopephub.models import StatusModel, CycleModel
from geopephub.utils import run_geofetch, add_link_to_description
Expand Down Expand Up @@ -59,6 +59,11 @@ def add_to_queue_by_period(
gse_list = geofetch.Finder(
filters="((bed) OR narrowPeak) OR broadPeak"
).get_gse_by_date(start_date_str, today_date_str)
elif target == "accbase":
# get chromatin accessibility projects (ATAC-seq, scATAC-seq, DNase-seq)
gse_list = geofetch.Finder(
filters=ACCBASE_FINDER_FILTER
).get_gse_by_date(start_date_str, today_date_str)
elif target == "geo":
gse_list = geofetch.Finder().get_gse_by_date(start_date_str, today_date_str)
else:
Expand Down Expand Up @@ -195,6 +200,14 @@ def _upload_gse_project(
data_source="all",
processed=True,
)
elif target == "accbase":
# For accbase, we want all files from ATAC-seq/DNase-seq projects
# No file extension filter - we filter by assay type in the Finder
geofetcher_obj = geofetch.Geofetcher(
filter_size=ACCBASE_MAX_SIZE,
data_source="all",
processed=True,
)
else:
geofetcher_obj = geofetch.Geofetcher()
total_nb = len(log_model_dict.keys())
Expand Down Expand Up @@ -254,6 +267,8 @@ def _upload_gse_project(
gse_log.status_info = "pepdbagent"
if target == "bedbase":
tag = pep_tag
elif target == "accbase":
tag = pep_tag
else:
tag = "default"
try:
Expand Down
Loading