fireform-core · marcvergees · Jul 1, 2026 · Jun 28, 2026 · Jun 29, 2026
diff --git a/app/api/routes/forms.py b/app/api/routes/forms.py
@@ -11,7 +11,8 @@
     ModelsResponse,
     TranscriptionResponse,
 )
-from app.core.config import OLLAMA_HOST, OLLAMA_MODEL, WHISPER_HOST, BASE_DIR, RETENTION_PERIOD_DAYS
+from app.core.config import OLLAMA_HOST, OLLAMA_MODEL, BASE_DIR, RETENTION_PERIOD_DAYS
+from app.services.whisper import call_whisper_asr
 from app.core.errors.base import AppError
 from app.db.repositories import create_form, get_template, get_form_submission, delete_form_submission
 from app.models import FormSubmission
@@ -94,34 +95,16 @@ def transcribe(audio: UploadFile = File(...)):
     audio is streamed straight through to the local STT service and never
     persisted — no PII leaves the machine.
     """
-    whisper_url = f"{WHISPER_HOST}/asr"
-
-    files = {
-        "audio_file": (
-            audio.filename or "audio.wav",
+    try:
+        text = call_whisper_asr(
             audio.file.read(),
+            audio.filename or "audio.wav",
             audio.content_type or "audio/wav",
         )
-    }
-    params = {"task": "transcribe", "output": "json", "encode": "true"}
-
-    try:
-        response = requests.post(whisper_url, params=params, files=files, timeout=120)
-        response.raise_for_status()
-    except requests.exceptions.ConnectionError:
-        raise AppError(
-            f"Could not connect to the speech-to-text service at {whisper_url}. "
-            "Please ensure the whisper service is running.",
-            status_code=503,
-            error_code="STT_UNAVAILABLE",
-        )
-    except requests.exceptions.RequestException as e:
-        raise AppError(f"Transcription failed: {e}", status_code=502, error_code="TRANSCRIPTION_FAILED")
-
-    try:
-        text = (response.json().get("text") or "").strip()
-    except ValueError:
-        text = response.text.strip()
+    except ConnectionError as exc:
+        raise AppError(str(exc), status_code=503, error_code="STT_UNAVAILABLE")
+    except RuntimeError as exc:
+        raise AppError(str(exc), status_code=502, error_code="TRANSCRIPTION_FAILED")
 
     return TranscriptionResponse(text=text)
 

diff --git a/app/api/routes/input.py b/app/api/routes/input.py
@@ -1,19 +1,100 @@
+from datetime import date
+from pathlib import Path
 from uuid import UUID
 
-from fastapi import APIRouter, Depends
+from fastapi import APIRouter, Depends, File, Form, UploadFile
 from fastapi.exceptions import RequestValidationError
 from sqlmodel import Session
 
 from app.api.deps import get_db
 from app.api.schemas.enums import InputStatus
-from app.api.schemas.input import InputRecordResponse, TextInputRequest, TextInputResponse
-from app.core.config import INPUT_POLL_INTERVAL_SECONDS
+from app.api.schemas.input import (
+    InputRecordResponse,
+    TextInputRequest,
+    TextInputResponse,
+    VoiceInputResponse,
+)
+from app.core.config import (
+    ALLOWED_AUDIO_EXTENSIONS,
+    AUDIO_CONTENT_TYPES,
+    ESTIMATED_TRANSCRIPTION_SECONDS,
+    INPUT_POLL_INTERVAL_SECONDS,
+)
 from app.core.errors.base import AppError
 from app.db.repositories import create_input, get_input as repo_get_input
 from app.services.input import InputService
+from app.services.whisper import check_whisper_available
 
 router = APIRouter(prefix="/input", tags=["input"])
 
+_MAX_AUDIO_BYTES = 500 * 1024 * 1024  # 500 MB
+
+
+@router.post("/voice", response_model=VoiceInputResponse, status_code=201)
+def submit_voice_input(
+    audio_file: UploadFile = File(...),
+    station_id: str | None = Form(default=None),
+    responder_badge: str | None = Form(default=None),
+    incident_date_hint: date | None = Form(default=None),
+    db: Session = Depends(get_db),
+):
+    # 415 — format check (HTTP concern, before any I/O)
+    filename = audio_file.filename or ""
+    ext = Path(filename).suffix.lstrip(".").lower()
+    if not ext or ext not in ALLOWED_AUDIO_EXTENSIONS:
+        raise AppError(
+            "Audio format not supported. Accepted formats: wav, mp3, m4a, ogg, webm",
+            status_code=415,
+            error_code="UNSUPPORTED_FORMAT",
+            detail={"accepted_formats": list(AUDIO_CONTENT_TYPES)},
+        )
+
+    # 413 — size check: fast path via Content-Length (no read), fallback via len(bytes)
+    if audio_file.size is not None and audio_file.size > _MAX_AUDIO_BYTES:
+        raise AppError(
+            "Audio file exceeds maximum size of 500MB",
+            status_code=413,
+            error_code="FILE_TOO_LARGE",
+            detail={"max_size_bytes": _MAX_AUDIO_BYTES, "received_size_bytes": audio_file.size},
+        )
+    content = audio_file.file.read()
+    if len(content) > _MAX_AUDIO_BYTES:
+        raise AppError(
+            "Audio file exceeds maximum size of 500MB",
+            status_code=413,
+            error_code="FILE_TOO_LARGE",
+            detail={"max_size_bytes": _MAX_AUDIO_BYTES, "received_size_bytes": len(content)},
+        )
+
+    # 503 — Whisper availability (HTTP concern)
+    if not check_whisper_available():
+        raise AppError(
+            "Whisper transcription service is not available",
+            status_code=503,
+            error_code="STT_UNAVAILABLE",
+        )
+
+    svc = InputService()
+    record, job = svc.process_voice_upload(
+        session=db,
+        audio_content=content,
+        ext=ext,
+        filename=filename,
+        station_id=station_id,
+        responder_badge=responder_badge,
+        incident_date_hint=incident_date_hint,
+    )
+
+    return VoiceInputResponse(
+        input_id=record.input_id,
+        status=record.status,
+        input_type=record.input_type,
+        job_id=job.job_id,
+        poll_url=f"/api/v1/input/{record.input_id}",
+        estimated_processing_seconds=ESTIMATED_TRANSCRIPTION_SECONDS,
+        created_at=record.created_at,
+    )
+
 
 @router.post("/text", response_model=TextInputResponse, status_code=201)
 def submit_text_input(body: TextInputRequest, db: Session = Depends(get_db)):

diff --git a/app/api/schemas/input.py b/app/api/schemas/input.py
@@ -8,6 +8,16 @@
 from app.api.schemas.enums import InputStatus, InputType
 
 
+class VoiceInputResponse(BaseModel):
+    input_id: UUID
+    status: InputStatus
+    input_type: InputType
+    estimated_processing_seconds: int | None = None
+    created_at: datetime | None = None
+    job_id: str | None = None
+    poll_url: str | None = None
+
+
 class TextInputRequest(BaseModel):
     narrative: str = Field(min_length=20)
     station_id: str | None = None

diff --git a/app/core/celery.py b/app/core/celery.py
@@ -16,7 +16,7 @@
     result_expires=86400,
 )
 
-celery_app.conf.include = ["app.tasks.fill", "app.tasks.purge"]
+celery_app.conf.include = ["app.tasks.fill", "app.tasks.purge", "app.tasks.transcribe"]
 
 # Optional Celery Beat schedule — runs purge_old_submissions once a day.
 # Enable by running: celery -A app.core.celery beat

diff --git a/app/core/config.py b/app/core/config.py
@@ -65,4 +65,23 @@
 FIREFORM_API_KEY = os.getenv("FIREFORM_API_KEY", "")
 
 # --- Data Retention --------------------------------------------------------
-RETENTION_PERIOD_DAYS = int(os.getenv("RETENTION_PERIOD_DAYS", "30"))
+RETENTION_PERIOD_DAYS = int(os.getenv("RETENTION_PERIOD_DAYS", "30"))
+
+# --- Audio storage --------------------------------------------------------
+# Voice input audio files land here: {AUDIO_DIR}/{input_id}.{ext}
+AUDIO_DIR = DATA_DIR / "audio"
+
+# Advisory estimate returned in VoiceInputResponse.estimated_processing_seconds.
+ESTIMATED_TRANSCRIPTION_SECONDS = int(os.getenv("ESTIMATED_TRANSCRIPTION_SECONDS", "30"))
+
+# Canonical audio format mapping — single source of truth for both the route
+# (membership check, 415 detail list) and the task (content-type lookup).
+# Dict insertion order gives the stable list shown in error responses.
+AUDIO_CONTENT_TYPES: dict[str, str] = {
+    "wav": "audio/wav",
+    "mp3": "audio/mpeg",
+    "m4a": "audio/m4a",
+    "ogg": "audio/ogg",
+    "webm": "audio/webm",
+}
+ALLOWED_AUDIO_EXTENSIONS: frozenset[str] = frozenset(AUDIO_CONTENT_TYPES)
diff --git a/app/db/repositories.py b/app/db/repositories.py
@@ -81,3 +81,10 @@ def create_input(session: Session, input_obj: Input) -> Input:
 def get_input(session: Session, input_id: UUID) -> Input | None:
     return session.get(Input, input_id)
 
+
+def update_input(session: Session, input_obj: Input) -> Input:
+    session.add(input_obj)
+    session.commit()
+    session.refresh(input_obj)
+    return input_obj
+
diff --git a/app/services/input.py b/app/services/input.py
@@ -1,10 +1,71 @@
 from datetime import date, datetime, timezone
 
+from sqlmodel import Session
+
 from app.api.schemas.enums import InputStatus, InputType
-from app.models import Input
+from app.core.config import AUDIO_CONTENT_TYPES, AUDIO_DIR
+from app.db.repositories import create_input, create_job, update_job
+from app.models import Input, Job
+from app.tasks.transcribe import transcribe_audio_task
 
 
 class InputService:
+    def build_voice_input(
+        self,
+        original_filename: str,
+        station_id: str | None = None,
+        responder_badge: str | None = None,
+        incident_date_hint: date | None = None,
+    ) -> Input:
+        now = datetime.now(timezone.utc)
+        return Input(
+            input_type=InputType.voice,
+            status=InputStatus.queued,
+            original_filename=original_filename,
+            station_id=station_id,
+            responder_badge=responder_badge,
+            incident_date_hint=incident_date_hint,
+            created_at=now,
+            updated_at=now,
+        )
+
+    def process_voice_upload(
+        self,
+        session: Session,
+        audio_content: bytes,
+        ext: str,
+        filename: str,
+        station_id: str | None,
+        responder_badge: str | None,
+        incident_date_hint: date | None,
+    ) -> tuple[Input, Job]:
+        record = self.build_voice_input(
+            original_filename=filename,
+            station_id=station_id,
+            responder_badge=responder_badge,
+            incident_date_hint=incident_date_hint,
+        )
+        record = create_input(session, record)
+
+        AUDIO_DIR.mkdir(parents=True, exist_ok=True)
+        audio_path = AUDIO_DIR / f"{record.input_id}.{ext}"
+        audio_path.write_bytes(audio_content)
+
+        # Create Job, dispatch, and backfill celery_task_id.
+        # On any failure after the file write, remove the orphaned audio file.
+        try:
+            job = Job(celery_task_id="", job_type="transcription", status="queued")
+            job = create_job(session, job)
+            result = transcribe_audio_task.delay(str(record.input_id), str(audio_path), job.job_id)
+            job.celery_task_id = result.id
+            job = update_job(session, job)
+        except Exception:
+            if audio_path.exists():
+                audio_path.unlink()
+            raise
+
+        return record, job
+
     def build_text_input(
         self,
         narrative: str,

diff --git a/app/services/whisper.py b/app/services/whisper.py
@@ -0,0 +1,38 @@
+import requests
+
+from app.core.config import WHISPER_HOST
+
+
+def call_whisper_asr(audio_bytes: bytes, filename: str, content_type: str) -> str:
+    """Post audio to the local Whisper ASR sidecar and return the transcript.
+
+    Raises ConnectionError if the service is unreachable, RuntimeError for any
+    other HTTP failure. Callers map these to their own error codes.
+    """
+    whisper_url = f"{WHISPER_HOST}/asr"
+    files = {"audio_file": (filename, audio_bytes, content_type)}
+    params = {"task": "transcribe", "output": "json", "encode": "true"}
+
+    try:
+        response = requests.post(whisper_url, params=params, files=files, timeout=120)
+        response.raise_for_status()
+    except requests.exceptions.ConnectionError as exc:
+        raise ConnectionError(
+            f"Could not connect to the speech-to-text service at {whisper_url}. "
+            "Please ensure the whisper service is running."
+        ) from exc
+    except requests.exceptions.RequestException as exc:
+        raise RuntimeError(f"Transcription failed: {exc}") from exc
+
+    try:
+        return (response.json().get("text") or "").strip()
+    except ValueError:
+        return response.text.strip()
+
+
+def check_whisper_available() -> bool:
+    """Return True if the Whisper sidecar responds with a successful status."""
+    try:
+        return requests.get(WHISPER_HOST, timeout=3).ok
+    except requests.exceptions.RequestException:
+        return False