{"openapi":"3.1.0","info":{"title":"LiteLLM Proxy API","description":"OpenAI-compatible API gateway with unified model access, virtual keys, spend tracking, and Langfuse observability.\n\nLiteLLM Proxy sits in front of llama-swap and provides:\n- Unified OpenAI API interface for all LLM models\n- Request/response logging to Langfuse for observability\n- Virtual API keys for user/team-based spend tracking\n- Rate limiting (100 avg / 200 burst via Traefik)\n- Model aliasing (gpt-4 -> qwen3-30b-a3b, etc.)\n\n## Model Mappings\n\n| Client Model | Actual Model |\n|--------------|--------------|\n| `gpt-4` | `qwen3-30b-a3b` |\n| `gpt-4-turbo` | `qwen3-30b-a3b` |\n| `gpt-3.5-turbo` | `qwen2.5-14b-instruct` |\n| `*` (any other) | Passed through to llama-swap |\n","version":"1.0.0","contact":{"name":"Haiven Infrastructure"},"license":{"name":"MIT","url":"https://github.com/BerriAI/litellm/blob/main/LICENSE"}},"servers":[{"url":"https://llm.haiven.site","description":"Production API endpoint (via Traefik)"},{"url":"https://litellm.haiven.site","description":"Admin UI (via Traefik)"},{"url":"http://litellm:4000","description":"Internal Docker network"}],"tags":[{"name":"Chat Completions","description":"Generate chat-based completions (OpenAI-compatible)"},{"name":"Completions","description":"Generate text completions (OpenAI-compatible)"},{"name":"Embeddings","description":"Generate vector embeddings for text"},{"name":"Models","description":"List and retrieve available models"},{"name":"Keys","description":"Virtual API key management"},{"name":"Spend","description":"Usage and spend tracking"},{"name":"Health","description":"Health and readiness checks"}],"security":[{"BearerAuth":[]}],"paths":{"/v1/chat/completions":{"post":{"tags":["Chat Completions"],"summary":"Create chat completion","description":"Creates a chat completion for the given messages. Supports streaming responses.\n\nThis endpoint is fully compatible with the OpenAI Chat Completions API.\n","operationId":"createChatCompletion","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionRequest"},"examples":{"simple":{"summary":"Simple chat","value":{"model":"gpt-4","messages":[{"role":"user","content":"Hello, how are you?"}]}},"with_system":{"summary":"With system prompt","value":{"model":"qwen3-30b-a3b","messages":[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"Explain quantum computing"}],"temperature":0.7,"max_tokens":500}},"streaming":{"summary":"Streaming response","value":{"model":"gpt-4","messages":[{"role":"user","content":"Write a haiku about coding"}],"stream":true}}}}}},"responses":{"200":{"description":"Chat completion response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionResponse"}},"text/event-stream":{"schema":{"type":"string","description":"Server-Sent Events stream when streaming is enabled"}}}},"400":{"$ref":"#/components/responses/BadRequest"},"401":{"$ref":"#/components/responses/Unauthorized"},"429":{"$ref":"#/components/responses/RateLimited"},"500":{"$ref":"#/components/responses/InternalError"}}}},"/v1/completions":{"post":{"tags":["Completions"],"summary":"Create text completion","description":"Creates a completion for the provided prompt. This is the legacy completions API.\n","operationId":"createCompletion","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CompletionRequest"}}}},"responses":{"200":{"description":"Completion response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CompletionResponse"}}}},"400":{"$ref":"#/components/responses/BadRequest"},"401":{"$ref":"#/components/responses/Unauthorized"}}}},"/v1/embeddings":{"post":{"tags":["Embeddings"],"summary":"Create embeddings","description":"Creates an embedding vector representing the input text.\n","operationId":"createEmbeddings","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/EmbeddingRequest"}}}},"responses":{"200":{"description":"Embedding response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/EmbeddingResponse"}}}},"400":{"$ref":"#/components/responses/BadRequest"},"401":{"$ref":"#/components/responses/Unauthorized"}}}},"/v1/models":{"get":{"tags":["Models"],"summary":"List models","description":"Lists all available models. Models are sourced from the llama-swap backend.\n","operationId":"listModels","responses":{"200":{"description":"List of available models","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ModelList"},"example":{"object":"list","data":[{"id":"qwen3-30b-a3b","object":"model","created":1700000000,"owned_by":"haiven"},{"id":"qwen2.5-14b-instruct","object":"model","created":1700000000,"owned_by":"haiven"}]}}}}}}},"/v1/models/{model}":{"get":{"tags":["Models"],"summary":"Retrieve model","description":"Retrieves a specific model's information.","operationId":"retrieveModel","parameters":[{"name":"model","in":"path","required":true,"schema":{"type":"string"},"description":"The model ID"}],"responses":{"200":{"description":"Model information","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Model"}}}},"404":{"description":"Model not found"}}}},"/key/generate":{"post":{"tags":["Keys"],"summary":"Generate API key","description":"Creates a new virtual API key with optional budget and model restrictions.\nRequires master key authentication.\n","operationId":"generateKey","security":[{"BearerAuth":[]}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/KeyGenerateRequest"},"example":{"models":["gpt-4","gpt-3.5-turbo"],"user_id":"user@example.com","max_budget":100.0,"duration":"30d"}}}},"responses":{"200":{"description":"Generated key","content":{"application/json":{"schema":{"$ref":"#/components/schemas/KeyGenerateResponse"}}}},"401":{"$ref":"#/components/responses/Unauthorized"},"403":{"description":"Forbidden - master key required"}}}},"/key/delete":{"post":{"tags":["Keys"],"summary":"Delete API key","description":"Deletes one or more virtual API keys.","operationId":"deleteKey","security":[{"BearerAuth":[]}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"object","required":["keys"],"properties":{"keys":{"type":"array","items":{"type":"string"},"description":"List of key hashes to delete"}}}}}},"responses":{"200":{"description":"Keys deleted successfully"},"401":{"$ref":"#/components/responses/Unauthorized"}}}},"/key/info":{"get":{"tags":["Keys"],"summary":"Get key information","description":"Retrieves information about the current API key.","operationId":"getKeyInfo","responses":{"200":{"description":"Key information","content":{"application/json":{"schema":{"$ref":"#/components/schemas/KeyInfo"}}}},"401":{"$ref":"#/components/responses/Unauthorized"}}}},"/spend/logs":{"get":{"tags":["Spend"],"summary":"Get spend logs","description":"Retrieves spend and usage logs for the authenticated key or user.","operationId":"getSpendLogs","parameters":[{"name":"start_date","in":"query","schema":{"type":"string","format":"date"},"description":"Start date for log range"},{"name":"end_date","in":"query","schema":{"type":"string","format":"date"},"description":"End date for log range"},{"name":"user_id","in":"query","schema":{"type":"string"},"description":"Filter by user ID (admin only)"}],"responses":{"200":{"description":"Spend logs","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/SpendLog"}}}}},"401":{"$ref":"#/components/responses/Unauthorized"}}}},"/user/info":{"get":{"tags":["Spend"],"summary":"Get user information","description":"Retrieves information and spend totals for the current user.","operationId":"getUserInfo","responses":{"200":{"description":"User information","content":{"application/json":{"schema":{"$ref":"#/components/schemas/UserInfo"}}}},"401":{"$ref":"#/components/responses/Unauthorized"}}}},"/health":{"get":{"tags":["Health"],"summary":"Health check","description":"Basic health check endpoint.","operationId":"healthCheck","security":[],"responses":{"200":{"description":"Service is healthy","content":{"application/json":{"schema":{"type":"object","properties":{"status":{"type":"string","example":"healthy"},"version":{"type":"string"},"timestamp":{"type":"string","format":"date-time"}}}}}}}}},"/health/liveliness":{"get":{"tags":["Health"],"summary":"Liveness probe","description":"Kubernetes-style liveness probe.","operationId":"livenessProbe","security":[],"responses":{"200":{"description":"Service is alive","content":{"application/json":{"schema":{"type":"object","properties":{"status":{"type":"string","example":"healthy"}}}}}}}}},"/health/readiness":{"get":{"tags":["Health"],"summary":"Readiness probe","description":"Kubernetes-style readiness probe.","operationId":"readinessProbe","security":[],"responses":{"200":{"description":"Service is ready"},"503":{"description":"Service not ready"}}}},"/metrics":{"get":{"tags":["Health"],"summary":"Prometheus metrics","description":"Exposes Prometheus-compatible metrics.","operationId":"getMetrics","security":[],"responses":{"200":{"description":"Prometheus metrics","content":{"text/plain":{"schema":{"type":"string"}}}}}}}},"components":{"securitySchemes":{"BearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"API Key","description":"LiteLLM API key (virtual key or master key)"}},"schemas":{"ChatCompletionRequest":{"type":"object","required":["model","messages"],"properties":{"model":{"type":"string","description":"Model to use for completion","example":"gpt-4"},"messages":{"type":"array","items":{"$ref":"#/components/schemas/ChatMessage"},"description":"Conversation messages"},"temperature":{"type":"number","minimum":0,"maximum":2,"default":1,"description":"Sampling temperature"},"top_p":{"type":"number","minimum":0,"maximum":1,"default":1,"description":"Nucleus sampling probability"},"max_tokens":{"type":"integer","minimum":1,"description":"Maximum tokens to generate"},"stream":{"type":"boolean","default":false,"description":"Enable streaming response"},"stop":{"oneOf":[{"type":"string"},{"type":"array","items":{"type":"string"}}],"description":"Stop sequences"},"presence_penalty":{"type":"number","minimum":-2,"maximum":2,"default":0},"frequency_penalty":{"type":"number","minimum":-2,"maximum":2,"default":0},"user":{"type":"string","description":"User identifier for tracking"},"response_format":{"type":"object","properties":{"type":{"type":"string","enum":["text","json_object"]}}}}},"ChatMessage":{"type":"object","required":["role","content"],"properties":{"role":{"type":"string","enum":["system","user","assistant","tool"],"description":"Message role"},"content":{"oneOf":[{"type":"string"},{"type":"array","items":{"type":"object"}}],"description":"Message content"},"name":{"type":"string","description":"Optional name for the participant"}}},"ChatCompletionResponse":{"type":"object","properties":{"id":{"type":"string","example":"chatcmpl-abc123"},"object":{"type":"string","example":"chat.completion"},"created":{"type":"integer","description":"Unix timestamp"},"model":{"type":"string"},"choices":{"type":"array","items":{"type":"object","properties":{"index":{"type":"integer"},"message":{"$ref":"#/components/schemas/ChatMessage"},"finish_reason":{"type":"string","enum":["stop","length","content_filter","tool_calls"]}}}},"usage":{"$ref":"#/components/schemas/Usage"}}},"CompletionRequest":{"type":"object","required":["model","prompt"],"properties":{"model":{"type":"string"},"prompt":{"oneOf":[{"type":"string"},{"type":"array","items":{"type":"string"}}]},"max_tokens":{"type":"integer","default":16},"temperature":{"type":"number","default":1},"stream":{"type":"boolean","default":false}}},"CompletionResponse":{"type":"object","properties":{"id":{"type":"string"},"object":{"type":"string","example":"text_completion"},"created":{"type":"integer"},"model":{"type":"string"},"choices":{"type":"array","items":{"type":"object","properties":{"text":{"type":"string"},"index":{"type":"integer"},"finish_reason":{"type":"string"}}}},"usage":{"$ref":"#/components/schemas/Usage"}}},"EmbeddingRequest":{"type":"object","required":["model","input"],"properties":{"model":{"type":"string"},"input":{"oneOf":[{"type":"string"},{"type":"array","items":{"type":"string"}}]},"encoding_format":{"type":"string","enum":["float","base64"],"default":"float"}}},"EmbeddingResponse":{"type":"object","properties":{"object":{"type":"string","example":"list"},"data":{"type":"array","items":{"type":"object","properties":{"object":{"type":"string","example":"embedding"},"embedding":{"type":"array","items":{"type":"number"}},"index":{"type":"integer"}}}},"model":{"type":"string"},"usage":{"type":"object","properties":{"prompt_tokens":{"type":"integer"},"total_tokens":{"type":"integer"}}}}},"Model":{"type":"object","properties":{"id":{"type":"string"},"object":{"type":"string","example":"model"},"created":{"type":"integer"},"owned_by":{"type":"string"}}},"ModelList":{"type":"object","properties":{"object":{"type":"string","example":"list"},"data":{"type":"array","items":{"$ref":"#/components/schemas/Model"}}}},"Usage":{"type":"object","properties":{"prompt_tokens":{"type":"integer"},"completion_tokens":{"type":"integer"},"total_tokens":{"type":"integer"}}},"KeyGenerateRequest":{"type":"object","properties":{"models":{"type":"array","items":{"type":"string"},"description":"Models this key can access"},"user_id":{"type":"string","description":"User identifier"},"team_id":{"type":"string","description":"Team identifier"},"max_budget":{"type":"number","description":"Maximum spend budget"},"duration":{"type":"string","description":"Key validity duration (e.g., '30d', '1h')"},"metadata":{"type":"object","additionalProperties":true,"description":"Custom metadata"}}},"KeyGenerateResponse":{"type":"object","properties":{"key":{"type":"string","description":"The generated API key (only shown once)"},"key_name":{"type":"string"},"expires":{"type":"string","format":"date-time"},"user_id":{"type":"string"}}},"KeyInfo":{"type":"object","properties":{"key":{"type":"string","description":"Key hash (partial)"},"models":{"type":"array","items":{"type":"string"}},"spend":{"type":"number"},"max_budget":{"type":"number"},"expires":{"type":"string","format":"date-time"},"user_id":{"type":"string"}}},"SpendLog":{"type":"object","properties":{"request_id":{"type":"string"},"model":{"type":"string"},"tokens":{"type":"integer"},"cost":{"type":"number"},"timestamp":{"type":"string","format":"date-time"},"user_id":{"type":"string"}}},"UserInfo":{"type":"object","properties":{"user_id":{"type":"string"},"spend":{"type":"number"},"max_budget":{"type":"number"},"models":{"type":"array","items":{"type":"string"}}}},"Error":{"type":"object","properties":{"error":{"type":"object","properties":{"message":{"type":"string"},"type":{"type":"string"},"code":{"type":"string"}}}}}},"responses":{"BadRequest":{"description":"Bad request - invalid parameters","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"Unauthorized":{"description":"Unauthorized - invalid or missing API key","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"RateLimited":{"description":"Rate limit exceeded (100 avg / 200 burst)","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}},"headers":{"Retry-After":{"schema":{"type":"integer"},"description":"Seconds to wait before retrying"}}},"InternalError":{"description":"Internal server error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"externalDocs":{"description":"LiteLLM Documentation","url":"https://docs.litellm.ai/"}}