{"openapi":"3.0.3","info":{"title":"haven-voice-gateway","version":"1.0.0","description":"Full-duplex voice pipeline gateway for the Haiven AI platform.\n\nRoutes audio or text through a three-stage pipeline:\n1. **STT** — haiven-transcribe converts audio to transcript\n2. **Orchestrator** — haiven-orchestrator classifies intent and dispatches to an agent\n3. **TTS** — haven-tts-gateway synthesizes the response as spoken audio\n\nThree interaction modes are available:\n- `POST /voice` — Audio in, spoken audio out (full pipeline)\n- `POST /voice/text` — Text in, spoken audio out (skips STT)\n- `POST /voice/note` — Audio in, JSON confirmation out (capture without playback)\n\n**Privacy:** Audio bytes are zeroed in memory and discarded immediately after STT\nprocessing. No audio data is written to disk.\n\n**Authentication:** All endpoints (except `/health` and `/metrics`) require a valid\nAuthentik SSO session. Unauthenticated requests are redirected to the Authentik\nlogin page by the Traefik middleware.\n","contact":{"name":"Haiven Platform"},"license":{"name":"Private — Haiven internal use only"}},"servers":[{"url":"https://voice.haiven.site","description":"Production (via Traefik, Authentik SSO required)"},{"url":"http://localhost:8490","description":"Direct host access (bypasses SSO — for operator use only)"},{"url":"http://haven-voice-gateway:8000","description":"Internal Docker network (backend services only)"}],"tags":[{"name":"voice","description":"Voice pipeline endpoints"},{"name":"system","description":"Health and observability endpoints"}],"paths":{"/voice":{"post":{"operationId":"voiceToVoice","summary":"Voice-to-voice pipeline","description":"Full pipeline: audio → STT → orchestrator → TTS → audio.\n\nAccepts an uploaded audio file and an optional session identifier.\nTranscribes the audio using haiven-transcribe, routes the transcript\nthrough haiven-orchestrator for intent classification and agent dispatch,\nsynthesizes the response with haven-tts-gateway, and returns the spoken\nreply as a streaming WAV file.\n\nPer-stage latency is reported in response headers for observability.\nAudio bytes are zeroed and discarded after transcription — nothing is\nwritten to disk.\n","tags":["voice"],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"type":"object","required":["file"],"properties":{"file":{"type":"string","format":"binary","description":"Audio recording to transcribe. Accepted formats: WAV, MP3, FLAC,\nOGG/Opus, M4A/AAC. Recommended: 16kHz mono WAV for lowest\ntranscription latency.\n"},"session_id":{"type":"string","format":"uuid","description":"Optional conversation session identifier. Pass the same UUID across\nmultiple turns to maintain context in the orchestrator. A new UUID\nis generated internally if omitted.\n","example":"550e8400-e29b-41d4-a716-446655440000"}}}}}},"responses":{"200":{"description":"Spoken audio response (streaming WAV)","headers":{"X-Request-Id":{"description":"Unique identifier for this request, for log correlation.","schema":{"type":"string","format":"uuid"}},"X-Total-Latency-Ms":{"description":"Wall-clock time for the entire pipeline in milliseconds.","schema":{"type":"integer","example":2420}},"X-STT-Latency-Ms":{"description":"Time spent in haiven-transcribe in milliseconds.","schema":{"type":"integer","example":800}},"X-Orch-Latency-Ms":{"description":"Time spent in haiven-orchestrator in milliseconds.","schema":{"type":"integer","example":1500}},"X-TTS-Latency-Ms":{"description":"Time spent in haven-tts-gateway in milliseconds.","schema":{"type":"integer","example":120}},"X-Intent":{"description":"Intent label returned by the orchestrator\n(e.g. `calendar_query`, `voice_note`, `general_chat`).\n","schema":{"type":"string","example":"calendar_query"}}},"content":{"audio/wav":{"schema":{"type":"string","format":"binary","description":"Streaming WAV audio of the spoken response."}}}},"400":{"description":"Missing or invalid audio file in the request body.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"502":{"description":"One or more upstream services (STT, orchestrator, TTS) are unreachable.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"504":{"description":"An upstream service did not respond within the timeout window.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}}},"/voice/text":{"post":{"operationId":"textToVoice","summary":"Text-to-voice pipeline","description":"Skips the STT stage. Sends text directly to the orchestrator, then\nsynthesizes the response as spoken audio.\n\nUseful when the caller already has text (typed input, a downstream\nservice, or a pre-processed transcript) and wants a spoken reply\nwithout incurring transcription latency.\n","tags":["voice"],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/TextToVoiceRequest"},"examples":{"simple":{"summary":"Simple query","value":{"text":"What's on my calendar today?"}},"with_session":{"summary":"Query with session context","value":{"text":"Move the 3pm meeting to 4pm","session_id":"550e8400-e29b-41d4-a716-446655440000"}}}}}},"responses":{"200":{"description":"Spoken audio response (streaming WAV)","headers":{"X-Request-Id":{"description":"Unique identifier for this request.","schema":{"type":"string","format":"uuid"}},"X-Total-Latency-Ms":{"description":"Wall-clock time for the orchestrator + TTS stages in milliseconds.","schema":{"type":"integer","example":1620}},"X-Orch-Latency-Ms":{"description":"Time spent in haiven-orchestrator in milliseconds.","schema":{"type":"integer","example":1500}},"X-TTS-Latency-Ms":{"description":"Time spent in haven-tts-gateway in milliseconds.","schema":{"type":"integer","example":120}},"X-Intent":{"description":"Intent label returned by the orchestrator.","schema":{"type":"string","example":"general_chat"}}},"content":{"audio/wav":{"schema":{"type":"string","format":"binary","description":"Streaming WAV audio of the spoken response."}}}},"400":{"description":"Missing or invalid request body.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"422":{"description":"Request body failed validation (e.g. `text` field missing).","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ValidationError"}}}},"502":{"description":"Orchestrator or TTS service is unreachable.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"504":{"description":"An upstream service timed out.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}}},"/voice/note":{"post":{"operationId":"captureVoiceNote","summary":"Capture a voice note","description":"Voice note capture pipeline: audio → STT → orchestrator (voice_note intent) → JSON.\n\nTranscribes the uploaded audio and routes it to the orchestrator with the\n`voice_note` intent. The orchestrator ingests the note (e.g. stores it in\nthe knowledge base or task tracker) and returns a confirmation.\n\nNo audio is synthesized — the response is JSON, making this suitable for\nquick capture flows where playback is unnecessary.\n\nAudio bytes are zeroed and discarded after transcription, consistent with\nall other pipeline modes.\n","tags":["voice"],"requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"type":"object","required":["file"],"properties":{"file":{"type":"string","format":"binary","description":"Audio recording of the note. Accepted formats: WAV, MP3, FLAC,\nOGG/Opus, M4A/AAC.\n"}}}}}},"responses":{"200":{"description":"Voice note successfully transcribed and ingested.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/VoiceNoteResponse"},"examples":{"success":{"summary":"Note ingested","value":{"transcript":"The vendor agreed to 30 days net terms","ingested":true,"message":"Note recorded."}},"not_ingested":{"summary":"Transcribed but not stored","value":{"transcript":"Reminder to call the client back","ingested":false,"message":"Transcription succeeded but the orchestrator could not store the note."}}}}}},"400":{"description":"Missing or invalid audio file.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"502":{"description":"STT or orchestrator service is unreachable.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"504":{"description":"An upstream service timed out.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}}},"/health":{"get":{"operationId":"getHealth","summary":"Service health check","description":"Checks connectivity to all three upstream services (STT, TTS, orchestrator)\nand returns per-service status.\n\n`status` is `\"healthy\"` when all upstreams respond. It is `\"degraded\"` when\none or more upstreams are unreachable. Individual service fields reflect\nper-service state.\n\nThis endpoint is not protected by SSO and can be called by monitoring\nagents and Traefik health probes without authentication.\n","tags":["system"],"responses":{"200":{"description":"Health status returned. Note: HTTP 200 is returned even when `status`\nis `\"degraded\"` — the caller should inspect the response body.\n","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HealthResponse"},"examples":{"healthy":{"summary":"All upstreams reachable","value":{"status":"healthy","stt":"up","tts":"up","orchestrator":"up"}},"degraded":{"summary":"One upstream down","value":{"status":"degraded","stt":"up","tts":"down","orchestrator":"up"}}}}}}}}},"/metrics":{"get":{"operationId":"getMetrics","summary":"Prometheus metrics","description":"Exposes Prometheus-format metrics for scraping. Scraped automatically\nby the Prometheus instance via the `prometheus.scrape=true` Docker label\non the container.\n\nNot protected by SSO (scraped on the internal Docker network).\n","tags":["system"],"responses":{"200":{"description":"Prometheus text format metrics","content":{"text/plain":{"schema":{"type":"string","description":"Prometheus exposition format metrics."}}}}}}}},"components":{"schemas":{"TextToVoiceRequest":{"type":"object","required":["text"],"properties":{"text":{"type":"string","description":"Text to route through the orchestrator and synthesize as audio.","minLength":1,"example":"What's on my calendar today?"},"session_id":{"type":"string","format":"uuid","description":"Optional conversation session identifier. Pass the same UUID across\nmultiple turns to maintain context in the orchestrator.\n","example":"550e8400-e29b-41d4-a716-446655440000"}}},"VoiceNoteResponse":{"type":"object","required":["transcript","ingested","message"],"properties":{"transcript":{"type":"string","description":"Raw STT output — exactly what was heard in the audio.","example":"The vendor agreed to 30 days net terms"},"ingested":{"type":"boolean","description":"Whether the orchestrator confirmed the note was successfully stored.\nIf `false`, the transcript is still returned but storage failed.\n","example":true},"message":{"type":"string","description":"Human-readable status message.","example":"Note recorded."}}},"HealthResponse":{"type":"object","required":["status","stt","tts","orchestrator"],"properties":{"status":{"type":"string","enum":["healthy","degraded"],"description":"`healthy` — all upstream services are reachable.\n`degraded` — one or more upstream services are unreachable.\n","example":"healthy"},"stt":{"type":"string","enum":["up","down"],"description":"Reachability of haiven-transcribe.","example":"up"},"tts":{"type":"string","enum":["up","down"],"description":"Reachability of haven-tts-gateway.","example":"up"},"orchestrator":{"type":"string","enum":["up","down"],"description":"Reachability of haiven-orchestrator.","example":"up"}}},"Error":{"type":"object","required":["detail"],"properties":{"detail":{"type":"string","description":"Human-readable error description.","example":"STT service unreachable"}}},"ValidationError":{"type":"object","description":"FastAPI/Pydantic validation error response.","required":["detail"],"properties":{"detail":{"type":"array","items":{"type":"object","required":["loc","msg","type"],"properties":{"loc":{"type":"array","items":{"type":"string"},"description":"Path to the field that failed validation.","example":["body","text"]},"msg":{"type":"string","description":"Human-readable validation error message.","example":"field required"},"type":{"type":"string","description":"Pydantic error type code.","example":"value_error.missing"}}}}}}}}}