{"openapi":"3.1.0","info":{"title":"Piper TTS API","description":"Fast CPU-based text-to-speech API using Piper neural TTS engine.\n\nProvides OpenAI-compatible `/v1/audio/speech` endpoint for drop-in\nreplacement of OpenAI's TTS API. Runs entirely on CPU, making it\nideal for high-throughput speech synthesis without GPU resource contention.\n\n## Features\n- 30 English voice models with varying qualities\n- OpenAI API compatibility\n- Sub-second latency for typical requests\n- Prometheus metrics for monitoring\n- Voice caching for performance\n\n## Access Points\n- Direct: `https://piper.haiven.site`\n- Gateway: `https://ai.haiven.site/v1/audio/speech`\n- Internal: `http://piper-api:5000`\n","version":"1.0.0","contact":{"name":"Haiven Infrastructure","url":"https://docs.haiven.site"},"license":{"name":"MIT"}},"servers":[{"url":"https://piper.haiven.site","description":"Production (Direct)"},{"url":"https://ai.haiven.site","description":"Production (Unified AI Gateway)"},{"url":"http://piper-api:5000","description":"Internal Docker Network"},{"url":"http://localhost:5000","description":"Local Development"}],"tags":[{"name":"Speech Synthesis","description":"Text-to-speech generation endpoints"},{"name":"Voice Management","description":"Voice model listing and information"},{"name":"Health","description":"Service health and monitoring"}],"paths":{"/":{"get":{"tags":["Health"],"summary":"Service information","description":"Returns API service information and available endpoints","operationId":"getServiceInfo","responses":{"200":{"description":"Service information","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ServiceInfo"},"example":{"service":"Piper TTS API","version":"1.0.0","description":"OpenAI-compatible TTS API wrapper for Piper","endpoints":{"health":"/health","voices":"/v1/voices","models":"/v1/models","speech":"/v1/audio/speech (POST)"}}}}}}}},"/health":{"get":{"tags":["Health"],"summary":"Health check","description":"Returns service health status including voice availability","operationId":"healthCheck","responses":{"200":{"description":"Service is healthy","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HealthResponse"},"example":{"status":"healthy","service":"piper-tts-api","voices_available":30,"voices_loaded":3}}}}}}},"/metrics":{"get":{"tags":["Health"],"summary":"Prometheus metrics","description":"Returns Prometheus-format metrics for monitoring","operationId":"getMetrics","responses":{"200":{"description":"Prometheus metrics","content":{"text/plain":{"schema":{"type":"string"},"example":"# HELP tts_requests_total Total TTS requests\n# TYPE tts_requests_total counter\ntts_requests_total{status=\"success\",voice=\"en_US-lessac-medium\"} 42\n"}}}}}},"/v1/voices":{"get":{"tags":["Voice Management"],"summary":"List available voices","description":"Returns all available Piper voice models in OpenAI-compatible format","operationId":"listVoices","responses":{"200":{"description":"List of voice models","content":{"application/json":{"schema":{"$ref":"#/components/schemas/VoiceList"},"example":{"object":"list","data":[{"id":"en_US-lessac-medium","object":"voice","name":"en_US-lessac-medium"},{"id":"en_US-amy-medium","object":"voice","name":"en_US-amy-medium"},{"id":"en_US-ryan-high","object":"voice","name":"en_US-ryan-high"}]}}}}}}},"/v1/models":{"get":{"tags":["Voice Management"],"summary":"List available models","description":"Returns voice models as 'models' for OpenAI API compatibility","operationId":"listModels","responses":{"200":{"description":"List of models","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ModelList"},"example":{"object":"list","data":[{"id":"en_US-lessac-medium","object":"model","created":0,"owned_by":"piper-tts"},{"id":"en_US-amy-medium","object":"model","created":0,"owned_by":"piper-tts"}]}}}}}}},"/v1/audio/speech":{"post":{"tags":["Speech Synthesis"],"summary":"Generate speech from text","description":"OpenAI-compatible text-to-speech endpoint. Converts input text to\nspoken audio using the specified voice model.\n\n## Request Parameters\n- `input` (required): Text to synthesize (max 10,000 characters)\n- `voice` or `model`: Voice model ID (default: en_US-lessac-medium)\n- `response_format`: Output format (only \"wav\" currently supported)\n- `speed`: Speech rate (not yet implemented)\n\n## Response\nReturns binary WAV audio data (16-bit PCM).\n\n## Performance Notes\n- First request for a voice may take 1-2 seconds to load the model\n- Subsequent requests use cached models (~100-500ms for typical text)\n- Very long text (>5000 chars) may take several seconds\n","operationId":"createSpeech","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SpeechRequest"},"examples":{"basic":{"summary":"Basic request","value":{"input":"Hello, this is a test of the Piper TTS service."}},"with_voice":{"summary":"With voice selection","value":{"input":"Hello from Amy!","voice":"en_US-amy-medium"}},"openai_compatible":{"summary":"OpenAI-compatible format","value":{"model":"en_US-ryan-high","input":"This uses the model parameter for compatibility."}}}}}},"responses":{"200":{"description":"Successfully generated audio","content":{"audio/wav":{"schema":{"type":"string","format":"binary","description":"16-bit PCM WAV audio file"}}},"headers":{"Content-Disposition":{"schema":{"type":"string","example":"attachment; filename=\"speech.wav\""},"description":"Suggested filename for download"}}},"400":{"description":"Invalid request","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"examples":{"missing_input":{"summary":"Missing input text","value":{"error":"Missing required field: input"}},"text_too_long":{"summary":"Text exceeds limit","value":{"error":"Input text too long (max 10000 characters)"}},"invalid_json":{"summary":"Invalid JSON body","value":{"error":"Request body must be JSON"}}}}}},"500":{"description":"Server error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"},"examples":{"synthesis_failed":{"summary":"Synthesis failure","value":{"error":"Failed to synthesize speech with voice: invalid-voice"}}}}}}}}}},"components":{"schemas":{"ServiceInfo":{"type":"object","required":["service","version","description","endpoints"],"properties":{"service":{"type":"string","description":"Service name","example":"Piper TTS API"},"version":{"type":"string","description":"API version","example":"1.0.0"},"description":{"type":"string","description":"Service description","example":"OpenAI-compatible TTS API wrapper for Piper"},"endpoints":{"type":"object","description":"Available endpoints","additionalProperties":{"type":"string"}}}},"HealthResponse":{"type":"object","required":["status","service","voices_available","voices_loaded"],"properties":{"status":{"type":"string","enum":["healthy","unhealthy"],"description":"Service health status","example":"healthy"},"service":{"type":"string","description":"Service identifier","example":"piper-tts-api"},"voices_available":{"type":"integer","description":"Number of voice models available on disk","example":30},"voices_loaded":{"type":"integer","description":"Number of voice models currently loaded in memory","example":3}}},"VoiceList":{"type":"object","required":["object","data"],"properties":{"object":{"type":"string","enum":["list"],"description":"Object type","example":"list"},"data":{"type":"array","description":"Array of voice objects","items":{"$ref":"#/components/schemas/Voice"}}}},"Voice":{"type":"object","required":["id","object","name"],"properties":{"id":{"type":"string","description":"Voice model identifier","example":"en_US-lessac-medium"},"object":{"type":"string","enum":["voice"],"description":"Object type","example":"voice"},"name":{"type":"string","description":"Voice display name","example":"en_US-lessac-medium"}}},"ModelList":{"type":"object","required":["object","data"],"properties":{"object":{"type":"string","enum":["list"],"description":"Object type","example":"list"},"data":{"type":"array","description":"Array of model objects","items":{"$ref":"#/components/schemas/Model"}}}},"Model":{"type":"object","required":["id","object","created","owned_by"],"properties":{"id":{"type":"string","description":"Model identifier","example":"en_US-lessac-medium"},"object":{"type":"string","enum":["model"],"description":"Object type","example":"model"},"created":{"type":"integer","description":"Creation timestamp (always 0 for Piper)","example":0},"owned_by":{"type":"string","description":"Model owner","example":"piper-tts"}}},"SpeechRequest":{"type":"object","required":["input"],"properties":{"input":{"type":"string","description":"Text to synthesize into speech","minLength":1,"maxLength":10000,"example":"Hello, this is a test of the text-to-speech service."},"voice":{"type":"string","description":"Voice model to use for synthesis","default":"en_US-lessac-medium","example":"en_US-amy-medium"},"model":{"type":"string","description":"Alternative to 'voice' for OpenAI compatibility","default":"en_US-lessac-medium","example":"en_US-lessac-medium"},"response_format":{"type":"string","description":"Output audio format (only 'wav' currently supported)","enum":["wav"],"default":"wav","example":"wav"},"speed":{"type":"number","description":"Speech rate multiplier (not yet implemented)","minimum":0.25,"maximum":4.0,"default":1.0,"example":1.0}}},"ErrorResponse":{"type":"object","required":["error"],"properties":{"error":{"type":"string","description":"Error message","example":"Missing required field: input"}}}}},"externalDocs":{"description":"Piper TTS GitHub Repository","url":"https://github.com/rhasspy/piper"}}