{"openapi":"3.0.3","info":{"title":"Research Agent API","description":"Autonomous web research agent with a 9-state pipeline, work-hub task integration,\nknowledge base auto-ingest, and Langfuse tracing.\n\n## Overview\n\nThe Research Agent conducts multi-round web research with automatic question\nrefinement and LLM synthesis. It integrates with:\n- **SearXNG** for privacy-focused web search\n- **Crawl4AI** for intelligent content extraction\n- **LiteLLM** for AI synthesis (any model via proxy)\n- **Langfuse** for full observability and tracing\n- **work-hub** for task artifact write-back\n- **haiven-knowledge** for automatic research output ingestion\n\n## Research Workflow\n\n1. Submit a research query via `POST /api/research` (optionally with `task_id`)\n2. Track progress via WebSocket at `/ws/research/{session_id}`\n3. Approve or modify open questions (if `auto_approve=false`)\n4. Receive synthesized results with sources\n5. On COMPLETED: artifact written to work-hub (if `task_id` provided) and ingested into KB\n\n## Pipeline States\n\n```\nPENDING -> SEARCHING -> CRAWLING -> CLEANING -> SYNTHESIZING\n        -> VALIDATING -> [AWAITING_APPROVAL] -> COMPLETED / FAILED\n```\n\n## Authentication\n\nNo authentication required for research endpoints. Credential management\nendpoints store Fernet-encrypted credentials in Redis for authenticated crawling.\n","version":"2.1.0","contact":{"name":"Haiven Infrastructure"},"license":{"name":"MIT"}},"servers":[{"url":"https://research.haiven.site","description":"Internal (haiven.site)"},{"url":"http://localhost:8010","description":"Local development"}],"tags":[{"name":"Research","description":"Start, track, and manage research sessions"},{"name":"History","description":"Browse and manage past research sessions"},{"name":"Credentials","description":"Secure credential storage for authenticated crawling"},{"name":"Health","description":"Health and readiness checks"},{"name":"Observability","description":"Prometheus metrics and tracing"}],"paths":{"/health":{"get":{"tags":["Health"],"summary":"Health check","description":"Comprehensive health check with dependency validation","operationId":"healthCheck","responses":{"200":{"description":"Service is healthy","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HealthResponse"},"example":{"status":"healthy","service":"research-agent","checks":{"database":"ok","data_dir":"ok","cache_dir":"ok"}}}}},"503":{"description":"Service is degraded","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HealthResponse"}}}}}}},"/health/ready":{"get":{"tags":["Health"],"summary":"Readiness probe","description":"Kubernetes-style readiness probe","operationId":"readinessCheck","responses":{"200":{"description":"Service is ready","content":{"application/json":{"schema":{"type":"object","properties":{"ready":{"type":"boolean","example":true}}}}}}}}},"/health/live":{"get":{"tags":["Health"],"summary":"Liveness probe","description":"Kubernetes-style liveness probe","operationId":"livenessCheck","responses":{"200":{"description":"Service is alive","content":{"application/json":{"schema":{"type":"object","properties":{"alive":{"type":"boolean","example":true}}}}}}}}},"/metrics":{"get":{"tags":["Observability"],"summary":"Prometheus metrics","description":"Expose service metrics in Prometheus text format","operationId":"metrics","responses":{"200":{"description":"Prometheus metrics","content":{"text/plain":{"schema":{"type":"string"}}}}}}},"/api/research":{"post":{"tags":["Research"],"summary":"Start research session","description":"Start a new research session. Returns session_id and WebSocket URL for\nprogress tracking.\n\nIf similar research is found and `force=false`, returns similar sessions\ninstead of starting new research.\n\nIf `task_id` is provided, the completed research artifact is written back\nto the work-hub task and the final answer is ingested into haiven-knowledge.\n","operationId":"startResearch","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResearchRequest"},"examples":{"basic":{"summary":"Basic research query","value":{"query":"RAG best practices 2025"}},"with_task":{"summary":"Research linked to work-hub task","value":{"query":"RAGAS evaluation framework comparison","task_id":"550e8400-e29b-41d4-a716-446655440000","auto_approve":true}},"advanced":{"summary":"Advanced with domain filtering","value":{"query":"How to implement vector search with PostgreSQL","max_iterations":3,"model":"qwen3-30b-a3b-q8-abl","auto_approve":true,"domains":{"exclude":["pinterest.com","reddit.com"]}}}}}}},"responses":{"202":{"description":"Research session started","content":{"application/json":{"schema":{"$ref":"#/components/schemas/StartResponse"},"examples":{"started":{"summary":"New session started","value":{"session_id":"abc123-def456","status":"pending","ws_url":"/ws/research/abc123-def456","message":"Research session started"}},"similar_found":{"summary":"Similar research found","value":{"session_id":null,"status":"similar_found","similar":[{"session_id":"xyz789","similarity":0.92,"query":"RAG best practices"}],"message":"Similar research found. Use force=true to start new."}}}}}}}},"get":{"tags":["Research"],"summary":"List research sessions","description":"List research sessions with pagination and filtering","operationId":"listSessions","parameters":[{"name":"status","in":"query","description":"Filter by pipeline state","schema":{"type":"string","enum":["pending","searching","crawling","cleaning","synthesizing","awaiting_approval","validating","completed","failed"]}},{"name":"limit","in":"query","description":"Number of results to return","schema":{"type":"integer","minimum":1,"maximum":100,"default":20}},{"name":"offset","in":"query","schema":{"type":"integer","minimum":0,"default":0}},{"name":"sort","in":"query","description":"Sort field (prefix with - for descending)","schema":{"type":"string","default":"-created_at"}}],"responses":{"200":{"description":"List of sessions","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SessionListResponse"}}}}}}},"/api/research/{session_id}":{"get":{"tags":["Research"],"summary":"Get session status","description":"Get research session status, current state, and results","operationId":"getSession","parameters":[{"name":"session_id","in":"path","required":true,"schema":{"type":"string","format":"uuid"}}],"responses":{"200":{"description":"Session details","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResearchResponse"}}}},"404":{"description":"Session not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}},"/api/research/{session_id}/approve":{"post":{"tags":["Research"],"summary":"Approve research questions","description":"Approve and optionally modify open questions to continue research pipeline","operationId":"approveQuestions","parameters":[{"name":"session_id","in":"path","required":true,"schema":{"type":"string","format":"uuid"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ApproveRequest"},"example":{"approved_questions":["What are the key differences between FAISS and Qdrant?","How does reranking improve RAG precision?"]}}}},"responses":{"200":{"description":"Research resumed","content":{"application/json":{"schema":{"type":"object","properties":{"status":{"type":"string","example":"resumed"},"session_id":{"type":"string"}}}}}},"400":{"description":"Session not awaiting approval","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"404":{"description":"Session not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}},"/api/research/similar":{"post":{"tags":["Research"],"summary":"Find similar research","description":"Find similar past research sessions using semantic similarity (deduplication)","operationId":"findSimilar","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/SimilarRequest"}}}},"responses":{"200":{"description":"Similar sessions found","content":{"application/json":{"schema":{"type":"object","properties":{"similar":{"type":"array","items":{"type":"object","properties":{"session_id":{"type":"string"},"similarity":{"type":"number","format":"float"},"query":{"type":"string"}}}}}}}}}}}},"/api/history":{"get":{"tags":["History"],"summary":"List research history","description":"List past completed research sessions with filtering and pagination","operationId":"listHistory","parameters":[{"name":"status","in":"query","schema":{"type":"string"}},{"name":"query_text","in":"query","description":"Search by query text","schema":{"type":"string"}},{"name":"limit","in":"query","schema":{"type":"integer","minimum":1,"maximum":100,"default":20}},{"name":"offset","in":"query","schema":{"type":"integer","minimum":0,"default":0}},{"name":"sort","in":"query","schema":{"type":"string","default":"-created_at"}}],"responses":{"200":{"description":"History list","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HistoryListResponse"}}}}}},"delete":{"tags":["History"],"summary":"Delete multiple sessions","description":"Delete multiple research sessions by ID","operationId":"deleteMultipleSessions","parameters":[{"name":"session_ids","in":"query","required":true,"schema":{"type":"array","items":{"type":"string"}}}],"responses":{"200":{"description":"Sessions deleted","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteMultipleResponse"}}}}}}},"/api/history/{session_id}":{"get":{"tags":["History"],"summary":"Get session detail","description":"Get detailed information for a specific past research session including full results","operationId":"getHistoryDetail","parameters":[{"name":"session_id","in":"path","required":true,"schema":{"type":"string"}}],"responses":{"200":{"description":"Session detail with full results","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SessionDetail"}}}},"404":{"description":"Session not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}},"delete":{"tags":["History"],"summary":"Delete session","description":"Delete a specific research session and all its data","operationId":"deleteSession","parameters":[{"name":"session_id","in":"path","required":true,"schema":{"type":"string"}}],"responses":{"200":{"description":"Session deleted","content":{"application/json":{"schema":{"$ref":"#/components/schemas/DeleteResponse"}}}},"404":{"description":"Session not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}},"/api/history/cleanup":{"post":{"tags":["History"],"summary":"Cleanup old sessions","description":"Clean up old research sessions. Use dry_run=true to preview what would be\ndeleted without actually deleting.\n","operationId":"cleanupOldSessions","parameters":[{"name":"days","in":"query","description":"Delete sessions older than this many days","schema":{"type":"integer","minimum":1,"maximum":365,"default":30}},{"name":"status","in":"query","description":"Only delete sessions with this status","schema":{"type":"string"}},{"name":"dry_run","in":"query","description":"Preview deletions without executing","schema":{"type":"boolean","default":true}}],"responses":{"200":{"description":"Cleanup result","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CleanupResponse"}}}}}}},"/api/credentials":{"post":{"tags":["Credentials"],"summary":"Store site credentials","description":"Store encrypted credentials for a domain. Credentials are encrypted with\nFernet (AES-128-CBC) and stored in Redis with a configurable TTL.\n\nSecurity: Passwords are never logged or traced. Credentials are encrypted\nat rest using PBKDF2-SHA256 key derivation with 100k iterations.\n","operationId":"storeCredential","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CredentialRequest"}}}},"responses":{"200":{"description":"Credential stored","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CredentialResponse"}}}}}},"get":{"tags":["Credentials"],"summary":"List credential domains","description":"List domains with stored credentials (actual credentials are never returned)","operationId":"listCredentials","responses":{"200":{"description":"List of domains with stored credentials","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CredentialListResponse"}}}}}}},"/api/credentials/{domain}":{"delete":{"tags":["Credentials"],"summary":"Delete credential","description":"Remove stored credentials for a specific domain","operationId":"deleteCredential","parameters":[{"name":"domain","in":"path","required":true,"schema":{"type":"string","example":"example.com"}}],"responses":{"200":{"description":"Credential deleted","content":{"application/json":{"schema":{"type":"object","properties":{"status":{"type":"string","example":"deleted"},"domain":{"type":"string"},"message":{"type":"string"}}}}}},"404":{"description":"No credentials found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}},"/api/credentials/{domain}/check":{"get":{"tags":["Credentials"],"summary":"Check credential exists","description":"Check if credentials exist for a domain without revealing them","operationId":"checkCredential","parameters":[{"name":"domain","in":"path","required":true,"schema":{"type":"string","example":"example.com"}}],"responses":{"200":{"description":"Credential status","content":{"application/json":{"schema":{"type":"object","properties":{"domain":{"type":"string"},"exists":{"type":"boolean"},"ttl_seconds":{"type":"integer"},"ttl_hours":{"type":"number"}}}}}},"404":{"description":"No credentials found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}},"components":{"schemas":{"HealthResponse":{"type":"object","properties":{"status":{"type":"string","enum":["healthy","degraded"],"example":"healthy"},"service":{"type":"string","example":"research-agent"},"checks":{"type":"object","additionalProperties":{"type":"string"},"example":{"database":"ok","data_dir":"ok","cache_dir":"ok"}}}},"ResearchRequest":{"type":"object","required":["query"],"properties":{"query":{"type":"string","minLength":3,"maxLength":1000,"description":"Research topic or question to investigate","example":"How does RAGAS evaluate RAG pipeline quality?"},"task_id":{"type":"string","format":"uuid","nullable":true,"description":"Optional work-hub task ID. On COMPLETED, writes research artifact to task\nand ingests final answer into haiven-knowledge with source_application=research_agent.\n","example":"550e8400-e29b-41d4-a716-446655440000"},"max_iterations":{"type":"integer","minimum":1,"maximum":10,"default":5,"description":"Maximum research pipeline iterations"},"model":{"type":"string","nullable":true,"description":"LiteLLM model name (default from service config)","example":"qwen3-30b-a3b-q8-abl"},"auto_approve":{"type":"boolean","default":false,"description":"Automatically approve open questions and continue without human review"},"domains":{"type":"object","nullable":true,"properties":{"exclude":{"type":"array","description":"Domains to exclude from web crawling","items":{"type":"string"}}},"example":{"exclude":["pinterest.com","reddit.com"]}},"force":{"type":"boolean","default":false,"description":"Force new research even if similar sessions exist"}}},"StartResponse":{"type":"object","properties":{"session_id":{"type":"string","nullable":true,"example":"abc123-def456"},"status":{"type":"string","enum":["pending","similar_found"],"example":"pending"},"ws_url":{"type":"string","nullable":true,"description":"WebSocket URL for real-time pipeline progress","example":"/ws/research/abc123-def456"},"similar":{"type":"array","nullable":true,"description":"Similar past sessions found (when status=similar_found)","items":{"type":"object","properties":{"session_id":{"type":"string"},"similarity":{"type":"number","format":"float"},"query":{"type":"string"}}}},"message":{"type":"string","nullable":true}}},"ResearchResponse":{"type":"object","properties":{"session_id":{"type":"string"},"query":{"type":"string"},"status":{"type":"string","enum":["pending","searching","crawling","cleaning","synthesizing","awaiting_approval","validating","completed","failed"]},"iteration":{"type":"integer"},"max_iterations":{"type":"integer"},"open_questions":{"type":"array","nullable":true,"description":"Questions pending human approval (when status=awaiting_approval)","items":{"type":"string"}},"results":{"type":"object","nullable":true,"description":"Final research results (when status=completed)"},"task_id":{"type":"string","nullable":true,"description":"Linked work-hub task ID if provided at start"},"created_at":{"type":"string","format":"date-time"},"updated_at":{"type":"string","format":"date-time"}}},"SessionListResponse":{"type":"object","properties":{"sessions":{"type":"array","items":{"type":"object"}},"total":{"type":"integer"},"limit":{"type":"integer"},"offset":{"type":"integer"}}},"ApproveRequest":{"type":"object","required":["approved_questions"],"properties":{"approved_questions":{"type":"array","minItems":1,"description":"List of approved/refined research questions to continue with","items":{"type":"string"}}}},"SimilarRequest":{"type":"object","required":["query"],"properties":{"query":{"type":"string","minLength":3,"description":"Query to find similar past research for"},"limit":{"type":"integer","minimum":1,"maximum":20,"default":5}}},"HistoryListResponse":{"type":"object","properties":{"sessions":{"type":"array","items":{"$ref":"#/components/schemas/SessionSummary"}},"total":{"type":"integer"},"limit":{"type":"integer"},"offset":{"type":"integer"}}},"SessionSummary":{"type":"object","properties":{"session_id":{"type":"string"},"query":{"type":"string"},"status":{"type":"string"},"iteration":{"type":"integer"},"max_iterations":{"type":"integer"},"task_id":{"type":"string","nullable":true},"created_at":{"type":"string"},"updated_at":{"type":"string"},"quality_score":{"type":"number","nullable":true}}},"SessionDetail":{"type":"object","properties":{"session_id":{"type":"string"},"query":{"type":"string"},"status":{"type":"string"},"iteration":{"type":"integer"},"max_iterations":{"type":"integer"},"results":{"type":"object","nullable":true,"properties":{"final_answer":{"type":"string","description":"Synthesized research answer"},"sources":{"type":"array","items":{"type":"object","properties":{"url":{"type":"string"},"title":{"type":"string"},"relevance":{"type":"number"}}}},"quality_score":{"type":"number"}}},"task_id":{"type":"string","nullable":true},"created_at":{"type":"string"},"updated_at":{"type":"string"}}},"DeleteResponse":{"type":"object","properties":{"deleted":{"type":"integer"},"message":{"type":"string"}}},"DeleteMultipleResponse":{"type":"object","properties":{"deleted":{"type":"integer"},"errors":{"type":"array","nullable":true,"items":{"type":"string"}},"message":{"type":"string"}}},"CleanupResponse":{"type":"object","properties":{"deleted":{"type":"integer"},"would_delete":{"type":"integer"},"message":{"type":"string"},"dry_run":{"type":"boolean"}}},"CredentialRequest":{"type":"object","required":["domain","username","password"],"properties":{"domain":{"type":"string","minLength":3,"maxLength":255,"example":"private-site.com"},"username":{"type":"string","minLength":1,"maxLength":255,"example":"researcher@haiven.site"},"password":{"type":"string","format":"password","minLength":1},"credential_type":{"type":"string","default":"basic","enum":["basic","bearer","cookie"]},"metadata":{"type":"object","nullable":true,"description":"Additional metadata for credential management"}}},"CredentialResponse":{"type":"object","properties":{"status":{"type":"string","example":"stored"},"domain":{"type":"string"},"message":{"type":"string","nullable":true}}},"CredentialListResponse":{"type":"object","properties":{"domains":{"type":"array","items":{"type":"object","properties":{"domain":{"type":"string"},"ttl_seconds":{"type":"integer"},"ttl_hours":{"type":"number"}}}},"count":{"type":"integer"}}},"ErrorResponse":{"type":"object","properties":{"detail":{"type":"string","description":"Human-readable error message"}}}}}}