{"openapi":"3.0.3","info":{"title":"Crawl4AI API","description":"AI-optimized web scraping API with JavaScript rendering, LLM-friendly markdown conversion,\nand RAG pipeline integration.\n\n## Features\n- **LLM-optimized output** - Clean markdown perfect for RAG pipelines\n- **JavaScript rendering** - Playwright-based for modern SPAs\n- **Parallel crawling** - Batch processing with smart caching\n- **Structured extraction** - CSS selector and JSON schema support\n\n## Authentication\nAll endpoints require Bearer token authentication via the `Authorization` header.\n","version":"0.4.0","contact":{"name":"Haiven AI Infrastructure"},"license":{"name":"Apache 2.0","url":"https://www.apache.org/licenses/LICENSE-2.0"}},"servers":[{"url":"https://crawler.haiven.site","description":"Production server (via Traefik)"},{"url":"http://localhost:11235","description":"Local development"}],"security":[{"BearerAuth":[]}],"paths":{"/health":{"get":{"summary":"Health Check","description":"Check service health and version information","operationId":"getHealth","tags":["Status"],"security":[],"responses":{"200":{"description":"Service is healthy","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HealthResponse"},"example":{"status":"healthy","version":"0.4.0","cache_enabled":true}}}}}}},"/crawl":{"post":{"summary":"Crawl URLs","description":"Crawl one or more URLs and return clean markdown content optimized for LLM consumption.\n\nSupports:\n- Static HTML pages\n- JavaScript-rendered SPAs (via Playwright)\n- CSS selector extraction\n- Parallel batch processing\n- Intelligent caching\n","operationId":"crawlUrls","tags":["Crawling"],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CrawlRequest"},"examples":{"simple":{"summary":"Simple crawl","value":{"urls":["https://example.com"],"word_count_threshold":10}},"javascript":{"summary":"JavaScript-rendered page","value":{"urls":["https://react-app.com"],"js_code":"await new Promise(r => setTimeout(r, 3000));","wait_for":"css:#content"}},"batch":{"summary":"Batch crawl","value":{"urls":["https://docs.example.com/intro","https://docs.example.com/api","https://docs.example.com/faq"],"word_count_threshold":10}}}}}},"responses":{"200":{"description":"Crawl completed successfully","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CrawlResponse"}}}},"400":{"description":"Invalid request parameters","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"401":{"description":"Authentication required","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"500":{"description":"Server error during crawl","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}}}}}},"components":{"securitySchemes":{"BearerAuth":{"type":"http","scheme":"bearer","description":"API token for authentication"}},"schemas":{"HealthResponse":{"type":"object","properties":{"status":{"type":"string","enum":["healthy","unhealthy"],"description":"Service health status"},"version":{"type":"string","description":"Crawl4AI version"},"cache_enabled":{"type":"boolean","description":"Whether caching is enabled"}}},"CrawlRequest":{"type":"object","required":["urls"],"properties":{"urls":{"type":"array","items":{"type":"string","format":"uri"},"minItems":1,"maxItems":100,"description":"List of URLs to crawl"},"word_count_threshold":{"type":"integer","default":10,"minimum":0,"description":"Minimum words per content block to include"},"bypass_cache":{"type":"boolean","default":false,"description":"Force fresh crawl, ignoring cached content"},"css_selector":{"type":"string","nullable":true,"description":"CSS selector to extract specific elements","example":".article-content"},"js_code":{"type":"string","nullable":true,"description":"JavaScript code to execute on page before extraction","example":"await new Promise(r => setTimeout(r, 2000));"},"wait_for":{"type":"string","nullable":true,"description":"Wait condition before extraction.\nPrefix with `css:` for CSS selector wait.\n","example":"css:#main-content"},"screenshot":{"type":"boolean","default":false,"description":"Capture page screenshot (base64 PNG)"}}},"CrawlResponse":{"type":"object","properties":{"results":{"type":"array","items":{"$ref":"#/components/schemas/CrawlResult"}}}},"CrawlResult":{"type":"object","properties":{"url":{"type":"string","format":"uri","description":"Crawled URL"},"success":{"type":"boolean","description":"Whether crawl was successful"},"error":{"type":"string","nullable":true,"description":"Error message if crawl failed"},"markdown":{"$ref":"#/components/schemas/MarkdownContent"},"metadata":{"$ref":"#/components/schemas/PageMetadata"},"links":{"type":"array","items":{"type":"string","format":"uri"},"description":"Links extracted from page"},"screenshot":{"type":"string","format":"byte","nullable":true,"description":"Base64-encoded PNG screenshot (if requested)"}}},"MarkdownContent":{"type":"object","properties":{"raw_markdown":{"type":"string","description":"Full markdown content with formatting preserved"},"fit_markdown":{"type":"string","description":"Condensed markdown optimized for token efficiency"}}},"PageMetadata":{"type":"object","properties":{"title":{"type":"string","nullable":true,"description":"Page title"},"description":{"type":"string","nullable":true,"description":"Meta description"},"author":{"type":"string","nullable":true,"description":"Content author if available"},"language":{"type":"string","nullable":true,"description":"Page language code"}}},"ErrorResponse":{"type":"object","properties":{"error":{"type":"string","description":"Error message"},"detail":{"type":"string","nullable":true,"description":"Detailed error information"}}}}},"tags":[{"name":"Status","description":"Health and status endpoints"},{"name":"Crawling","description":"Web scraping and content extraction"}]}