Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 9 additions & 26 deletions app/api/routes/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
ModelsResponse,
TranscriptionResponse,
)
from app.core.config import OLLAMA_HOST, OLLAMA_MODEL, WHISPER_HOST, BASE_DIR, RETENTION_PERIOD_DAYS
from app.core.config import OLLAMA_HOST, OLLAMA_MODEL, BASE_DIR, RETENTION_PERIOD_DAYS
from app.services.whisper import call_whisper_asr
from app.core.errors.base import AppError
from app.db.repositories import create_form, get_template, get_form_submission, delete_form_submission
from app.models import FormSubmission
Expand Down Expand Up @@ -94,34 +95,16 @@ def transcribe(audio: UploadFile = File(...)):
audio is streamed straight through to the local STT service and never
persisted — no PII leaves the machine.
"""
whisper_url = f"{WHISPER_HOST}/asr"

files = {
"audio_file": (
audio.filename or "audio.wav",
try:
text = call_whisper_asr(
audio.file.read(),
audio.filename or "audio.wav",
audio.content_type or "audio/wav",
)
}
params = {"task": "transcribe", "output": "json", "encode": "true"}

try:
response = requests.post(whisper_url, params=params, files=files, timeout=120)
response.raise_for_status()
except requests.exceptions.ConnectionError:
raise AppError(
f"Could not connect to the speech-to-text service at {whisper_url}. "
"Please ensure the whisper service is running.",
status_code=503,
error_code="STT_UNAVAILABLE",
)
except requests.exceptions.RequestException as e:
raise AppError(f"Transcription failed: {e}", status_code=502, error_code="TRANSCRIPTION_FAILED")

try:
text = (response.json().get("text") or "").strip()
except ValueError:
text = response.text.strip()
except ConnectionError as exc:
raise AppError(str(exc), status_code=503, error_code="STT_UNAVAILABLE")
except RuntimeError as exc:
raise AppError(str(exc), status_code=502, error_code="TRANSCRIPTION_FAILED")

return TranscriptionResponse(text=text)

Expand Down
87 changes: 84 additions & 3 deletions app/api/routes/input.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,100 @@
from datetime import date
from pathlib import Path
from uuid import UUID

from fastapi import APIRouter, Depends
from fastapi import APIRouter, Depends, File, Form, UploadFile
from fastapi.exceptions import RequestValidationError
from sqlmodel import Session

from app.api.deps import get_db
from app.api.schemas.enums import InputStatus
from app.api.schemas.input import InputRecordResponse, TextInputRequest, TextInputResponse
from app.core.config import INPUT_POLL_INTERVAL_SECONDS
from app.api.schemas.input import (
InputRecordResponse,
TextInputRequest,
TextInputResponse,
VoiceInputResponse,
)
from app.core.config import (
ALLOWED_AUDIO_EXTENSIONS,
AUDIO_CONTENT_TYPES,
ESTIMATED_TRANSCRIPTION_SECONDS,
INPUT_POLL_INTERVAL_SECONDS,
)
from app.core.errors.base import AppError
from app.db.repositories import create_input, get_input as repo_get_input
from app.services.input import InputService
from app.services.whisper import check_whisper_available

router = APIRouter(prefix="/input", tags=["input"])

_MAX_AUDIO_BYTES = 500 * 1024 * 1024 # 500 MB


@router.post("/voice", response_model=VoiceInputResponse, status_code=201)
def submit_voice_input(
audio_file: UploadFile = File(...),
station_id: str | None = Form(default=None),
responder_badge: str | None = Form(default=None),
incident_date_hint: date | None = Form(default=None),
db: Session = Depends(get_db),
):
# 415 — format check (HTTP concern, before any I/O)
filename = audio_file.filename or ""
ext = Path(filename).suffix.lstrip(".").lower()
if not ext or ext not in ALLOWED_AUDIO_EXTENSIONS:
raise AppError(
"Audio format not supported. Accepted formats: wav, mp3, m4a, ogg, webm",
status_code=415,
error_code="UNSUPPORTED_FORMAT",
detail={"accepted_formats": list(AUDIO_CONTENT_TYPES)},
)

# 413 — size check: fast path via Content-Length (no read), fallback via len(bytes)
if audio_file.size is not None and audio_file.size > _MAX_AUDIO_BYTES:
raise AppError(
"Audio file exceeds maximum size of 500MB",
status_code=413,
error_code="FILE_TOO_LARGE",
detail={"max_size_bytes": _MAX_AUDIO_BYTES, "received_size_bytes": audio_file.size},
)
content = audio_file.file.read()
if len(content) > _MAX_AUDIO_BYTES:
raise AppError(
"Audio file exceeds maximum size of 500MB",
status_code=413,
error_code="FILE_TOO_LARGE",
detail={"max_size_bytes": _MAX_AUDIO_BYTES, "received_size_bytes": len(content)},
)

# 503 — Whisper availability (HTTP concern)
if not check_whisper_available():
raise AppError(
"Whisper transcription service is not available",
status_code=503,
error_code="STT_UNAVAILABLE",
)

svc = InputService()
record, job = svc.process_voice_upload(
session=db,
audio_content=content,
ext=ext,
filename=filename,
station_id=station_id,
responder_badge=responder_badge,
incident_date_hint=incident_date_hint,
)

return VoiceInputResponse(
input_id=record.input_id,
status=record.status,
input_type=record.input_type,
job_id=job.job_id,
poll_url=f"/api/v1/input/{record.input_id}",
estimated_processing_seconds=ESTIMATED_TRANSCRIPTION_SECONDS,
created_at=record.created_at,
)


@router.post("/text", response_model=TextInputResponse, status_code=201)
def submit_text_input(body: TextInputRequest, db: Session = Depends(get_db)):
Expand Down
10 changes: 10 additions & 0 deletions app/api/schemas/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@
from app.api.schemas.enums import InputStatus, InputType


class VoiceInputResponse(BaseModel):
input_id: UUID
status: InputStatus
input_type: InputType
estimated_processing_seconds: int | None = None
created_at: datetime | None = None
job_id: str | None = None
poll_url: str | None = None


class TextInputRequest(BaseModel):
narrative: str = Field(min_length=20)
station_id: str | None = None
Expand Down
2 changes: 1 addition & 1 deletion app/core/celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
result_expires=86400,
)

celery_app.conf.include = ["app.tasks.fill", "app.tasks.purge"]
celery_app.conf.include = ["app.tasks.fill", "app.tasks.purge", "app.tasks.transcribe"]

# Optional Celery Beat schedule — runs purge_old_submissions once a day.
# Enable by running: celery -A app.core.celery beat
Expand Down
21 changes: 20 additions & 1 deletion app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,23 @@
FIREFORM_API_KEY = os.getenv("FIREFORM_API_KEY", "")

# --- Data Retention --------------------------------------------------------
RETENTION_PERIOD_DAYS = int(os.getenv("RETENTION_PERIOD_DAYS", "30"))
RETENTION_PERIOD_DAYS = int(os.getenv("RETENTION_PERIOD_DAYS", "30"))

# --- Audio storage --------------------------------------------------------
# Voice input audio files land here: {AUDIO_DIR}/{input_id}.{ext}
AUDIO_DIR = DATA_DIR / "audio"

# Advisory estimate returned in VoiceInputResponse.estimated_processing_seconds.
ESTIMATED_TRANSCRIPTION_SECONDS = int(os.getenv("ESTIMATED_TRANSCRIPTION_SECONDS", "30"))

# Canonical audio format mapping — single source of truth for both the route
# (membership check, 415 detail list) and the task (content-type lookup).
# Dict insertion order gives the stable list shown in error responses.
AUDIO_CONTENT_TYPES: dict[str, str] = {
"wav": "audio/wav",
"mp3": "audio/mpeg",
"m4a": "audio/m4a",
"ogg": "audio/ogg",
"webm": "audio/webm",
}
ALLOWED_AUDIO_EXTENSIONS: frozenset[str] = frozenset(AUDIO_CONTENT_TYPES)
7 changes: 7 additions & 0 deletions app/db/repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,10 @@ def create_input(session: Session, input_obj: Input) -> Input:
def get_input(session: Session, input_id: UUID) -> Input | None:
return session.get(Input, input_id)


def update_input(session: Session, input_obj: Input) -> Input:
session.add(input_obj)
session.commit()
session.refresh(input_obj)
return input_obj

63 changes: 62 additions & 1 deletion app/services/input.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,71 @@
from datetime import date, datetime, timezone

from sqlmodel import Session

from app.api.schemas.enums import InputStatus, InputType
from app.models import Input
from app.core.config import AUDIO_CONTENT_TYPES, AUDIO_DIR
from app.db.repositories import create_input, create_job, update_job
from app.models import Input, Job
from app.tasks.transcribe import transcribe_audio_task


class InputService:
def build_voice_input(
self,
original_filename: str,
station_id: str | None = None,
responder_badge: str | None = None,
incident_date_hint: date | None = None,
) -> Input:
now = datetime.now(timezone.utc)
return Input(
input_type=InputType.voice,
status=InputStatus.queued,
original_filename=original_filename,
station_id=station_id,
responder_badge=responder_badge,
incident_date_hint=incident_date_hint,
created_at=now,
updated_at=now,
)

def process_voice_upload(
self,
session: Session,
audio_content: bytes,
ext: str,
filename: str,
station_id: str | None,
responder_badge: str | None,
incident_date_hint: date | None,
) -> tuple[Input, Job]:
record = self.build_voice_input(
original_filename=filename,
station_id=station_id,
responder_badge=responder_badge,
incident_date_hint=incident_date_hint,
)
record = create_input(session, record)

AUDIO_DIR.mkdir(parents=True, exist_ok=True)
audio_path = AUDIO_DIR / f"{record.input_id}.{ext}"
audio_path.write_bytes(audio_content)

# Create Job, dispatch, and backfill celery_task_id.
# On any failure after the file write, remove the orphaned audio file.
try:
job = Job(celery_task_id="", job_type="transcription", status="queued")
job = create_job(session, job)
result = transcribe_audio_task.delay(str(record.input_id), str(audio_path), job.job_id)
job.celery_task_id = result.id
job = update_job(session, job)
except Exception:
if audio_path.exists():
audio_path.unlink()
raise

return record, job

def build_text_input(
self,
narrative: str,
Expand Down
38 changes: 38 additions & 0 deletions app/services/whisper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import requests

from app.core.config import WHISPER_HOST


def call_whisper_asr(audio_bytes: bytes, filename: str, content_type: str) -> str:
"""Post audio to the local Whisper ASR sidecar and return the transcript.

Raises ConnectionError if the service is unreachable, RuntimeError for any
other HTTP failure. Callers map these to their own error codes.
"""
whisper_url = f"{WHISPER_HOST}/asr"
files = {"audio_file": (filename, audio_bytes, content_type)}
params = {"task": "transcribe", "output": "json", "encode": "true"}

try:
response = requests.post(whisper_url, params=params, files=files, timeout=120)
response.raise_for_status()
except requests.exceptions.ConnectionError as exc:
raise ConnectionError(
f"Could not connect to the speech-to-text service at {whisper_url}. "
"Please ensure the whisper service is running."
) from exc
except requests.exceptions.RequestException as exc:
raise RuntimeError(f"Transcription failed: {exc}") from exc

try:
return (response.json().get("text") or "").strip()
except ValueError:
return response.text.strip()


def check_whisper_available() -> bool:
"""Return True if the Whisper sidecar responds with a successful status."""
try:
return requests.get(WHISPER_HOST, timeout=3).ok
except requests.exceptions.RequestException:
return False
Loading
Loading