{"openapi":"3.1.0","info":{"title":"haiven-ingest-docling","version":"1.13.1","description":"Document format conversion service for the Haiven ingestion pipeline.\nConverts PDF, DOCX, PPTX, XLSX, HTML, images, and other formats to Markdown\n(or JSON) using the Docling engine from IBM Research.\n\nThis service is a deployment of the official `docling-serve` project:\nhttps://github.com/docling-project/docling-serve\n\n**Image:** `quay.io/docling-project/docling-serve:latest`\n**Docling version:** 2.74.0\n**Docling-serve version:** 1.13.1\n\n**Primary consumer:** haiven-knowledge IngestionRouter\n(`http://haiven-ingest-docling:5001` on the backend Docker network)\n"},"servers":[{"url":"https://docling.haiven.site","description":"Traefik HTTPS (LAN)"},{"url":"http://localhost:5001","description":"Direct HTTP (local)"},{"url":"http://haiven-ingest-docling:5001","description":"Docker backend network (for container-to-container calls)"}],"tags":[{"name":"health","description":"Service health and version information"},{"name":"convert-sync","description":"Synchronous document conversion (wait for result)"},{"name":"convert-async","description":"Asynchronous document conversion (submit and poll)"},{"name":"chunk","description":"Document chunking for RAG pipelines"},{"name":"tasks","description":"Async task management"},{"name":"memory","description":"Converter cache and memory management"}],"paths":{"/health":{"get":{"tags":["health"],"summary":"Health check","description":"Returns status ok when the service is healthy.","operationId":"health","responses":{"200":{"description":"Service is healthy","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HealthCheckResponse"},"example":{"status":"ok"}}}}}}},"/version":{"get":{"tags":["health"],"summary":"Version info","description":"Returns version strings for all Docling components.","operationId":"version","responses":{"200":{"description":"Version information","content":{"application/json":{"schema":{"type":"object","additionalProperties":true},"example":{"docling-serve":"1.13.1","docling-jobkit":"1.12.0","docling":"2.74.0","docling-core":"2.65.2","docling-ibm-models":"3.11.0","docling-parse":"5.3.2","python":"cpython-312 (3.12.12)","plaform":"Linux-6.8.0-100-generic-x86_64-with-glibc2.34"}}}}}}},"/v1/convert/source":{"post":{"tags":["convert-sync"],"summary":"Convert document from URL","description":"Downloads a document from one or more URLs and converts it to the\nrequested output format (default: Markdown). Synchronous — waits for\nconversion to complete before returning.\n","operationId":"convertSource","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConvertDocumentsRequest"},"examples":{"single_url":{"summary":"Convert a single PDF from URL","value":{"http_sources":[{"url":"https://example.com/document.pdf"}],"options":{"to_formats":["md"],"do_ocr":true}}},"multiple_urls":{"summary":"Convert multiple documents","value":{"http_sources":[{"url":"https://example.com/report.pdf"},{"url":"https://example.com/slides.pptx"}],"options":{"to_formats":["md"]}}}}}}},"responses":{"200":{"description":"Conversion result","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConvertDocumentResponse"},"example":{"document":{"filename":"document.pdf","md_content":"# Document Title\n\nExtracted text content...","json_content":null,"html_content":null},"status":"success","errors":[],"processing_time":4.21}}}},"422":{"description":"Validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/convert/file":{"post":{"tags":["convert-sync"],"summary":"Convert uploaded file","description":"Accepts one or more files as multipart form data and converts them to\nthe requested output format (default: Markdown). Synchronous — waits\nfor conversion to complete before returning.\n\nThis is the primary endpoint used by haiven-knowledge's IngestionRouter.\n","operationId":"convertFile","requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/ConvertFileRequest"}}}},"responses":{"200":{"description":"Conversion result","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConvertDocumentResponse"},"example":{"document":{"filename":"report.pdf","md_content":"# Report Title\n\n## Section 1\n\nContent here...","json_content":null,"html_content":null},"status":"success","errors":[],"processing_time":8.35}}}},"422":{"description":"Validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/convert/source/async":{"post":{"tags":["convert-async"],"summary":"Convert document from URL (async)","description":"Submits a URL-based conversion job and returns a task ID immediately.\nPoll `/v1/status/poll/{task_id}` for status, then retrieve the result\nfrom `/v1/result/{task_id}`. Recommended for large documents (50+ pages).\n","operationId":"convertSourceAsync","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConvertDocumentsRequest"}}}},"responses":{"200":{"description":"Task submitted","content":{"application/json":{"schema":{"$ref":"#/components/schemas/TaskStatusResponse"},"example":{"task_id":"abc123-def456","status":"pending"}}}},"422":{"description":"Validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/convert/file/async":{"post":{"tags":["convert-async"],"summary":"Convert uploaded file (async)","description":"Submits a file upload conversion job and returns a task ID immediately.\nRecommended for large files to avoid HTTP timeout issues.\n","operationId":"convertFileAsync","requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/ConvertFileRequest"}}}},"responses":{"200":{"description":"Task submitted","content":{"application/json":{"schema":{"$ref":"#/components/schemas/TaskStatusResponse"},"example":{"task_id":"abc123-def456","status":"pending"}}}},"422":{"description":"Validation error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/status/poll/{task_id}":{"get":{"tags":["tasks"],"summary":"Poll async task status","description":"Returns the current status of an async conversion task.","operationId":"pollTaskStatus","parameters":[{"name":"task_id","in":"path","required":true,"schema":{"type":"string"},"description":"Task ID returned from an async endpoint"}],"responses":{"200":{"description":"Task status","content":{"application/json":{"schema":{"$ref":"#/components/schemas/TaskStatusResponse"},"examples":{"pending":{"value":{"task_id":"abc123-def456","status":"pending"}},"started":{"value":{"task_id":"abc123-def456","status":"started"}},"success":{"value":{"task_id":"abc123-def456","status":"success"}}}}}}}}},"/v1/result/{task_id}":{"get":{"tags":["tasks"],"summary":"Retrieve async task result","description":"Retrieves the result of a completed async task. Only call once\nthe task status is `success` or `partial_success`.\n","operationId":"getTaskResult","parameters":[{"name":"task_id","in":"path","required":true,"schema":{"type":"string"},"description":"Task ID returned from an async endpoint"}],"responses":{"200":{"description":"Conversion result","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConvertDocumentResponse"}}}}}}},"/v1/chunk/hybrid/file":{"post":{"tags":["chunk"],"summary":"Hybrid chunking from uploaded file","description":"Converts a file and applies the HybridChunker, which produces\nsemantically meaningful chunks optimized for RAG retrieval.\nNot currently used by haiven-knowledge (which does its own chunking)\nbut available for direct RAG pipeline use.\n","operationId":"chunkHybridFile","requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/ChunkFileRequest"}}}},"responses":{"200":{"description":"Chunked document","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChunkDocumentResponse"}}}}}}},"/v1/chunk/hybrid/source":{"post":{"tags":["chunk"],"summary":"Hybrid chunking from URL","description":"Converts a document from URL and applies HybridChunker.","operationId":"chunkHybridSource","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConvertDocumentsRequest"}}}},"responses":{"200":{"description":"Chunked document","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChunkDocumentResponse"}}}}}}},"/v1/chunk/hierarchical/file":{"post":{"tags":["chunk"],"summary":"Hierarchical chunking from uploaded file","description":"Converts a file and applies the HierarchicalChunker, which preserves\ndocument structure (headings, sections, subsections) in the chunk\nhierarchy.\n","operationId":"chunkHierarchicalFile","requestBody":{"required":true,"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/ChunkFileRequest"}}}},"responses":{"200":{"description":"Chunked document","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChunkDocumentResponse"}}}}}}},"/v1/chunk/hierarchical/source":{"post":{"tags":["chunk"],"summary":"Hierarchical chunking from URL","description":"Converts a document from URL and applies HierarchicalChunker.","operationId":"chunkHierarchicalSource","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ConvertDocumentsRequest"}}}},"responses":{"200":{"description":"Chunked document","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChunkDocumentResponse"}}}}}}},"/v1/clear/converters":{"get":{"tags":["memory"],"summary":"Clear converter cache","description":"Frees memory held by cached converter instances. Converters are\nkept warm between requests to reduce startup latency. Call this\nto reclaim memory if the service is under memory pressure.\n","operationId":"clearConverters","responses":{"200":{"description":"Converters cleared","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClearResponse"}}}}}}},"/v1/clear/results":{"get":{"tags":["memory"],"summary":"Clear async result cache","description":"Frees memory held by completed async task results.","operationId":"clearResults","responses":{"200":{"description":"Results cleared","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClearResponse"}}}}}}},"/v1/memory/stats":{"get":{"tags":["memory"],"summary":"Memory usage statistics","description":"Returns current memory usage for converters and result cache.","operationId":"memoryStats","responses":{"200":{"description":"Memory stats","content":{"application/json":{"schema":{"type":"object","additionalProperties":true}}}}}}},"/v1/memory/counts":{"get":{"tags":["memory"],"summary":"Converter and result counts","description":"Returns count of active converters and cached results.","operationId":"memoryCounts","responses":{"200":{"description":"Memory counts","content":{"application/json":{"schema":{"type":"object","additionalProperties":true}}}}}}}},"components":{"schemas":{"HealthCheckResponse":{"type":"object","properties":{"status":{"type":"string","default":"ok"}},"example":{"status":"ok"}},"ConvertDocumentsRequest":{"type":"object","description":"Request body for URL-based conversion","properties":{"http_sources":{"type":"array","description":"List of URLs to convert","items":{"$ref":"#/components/schemas/HttpSourceRequest"}},"options":{"$ref":"#/components/schemas/ConvertDocumentsRequestOptions"}},"required":[]},"HttpSourceRequest":{"type":"object","properties":{"url":{"type":"string","format":"uri","description":"URL of the document to convert"}},"required":["url"]},"ConvertFileRequest":{"type":"object","description":"Multipart form data for file upload conversion","properties":{"files":{"type":"array","description":"One or more files to convert","items":{"type":"string","format":"binary"}},"options":{"type":"string","description":"JSON string of ConvertDocumentsRequestOptions.\nExample: `{\"do_ocr\": false, \"to_formats\": [\"md\"]}`\n"}}},"ChunkFileRequest":{"type":"object","description":"Multipart form data for file chunking","properties":{"files":{"type":"array","items":{"type":"string","format":"binary"}},"options":{"type":"string","description":"JSON string of chunker options"}}},"ConvertDocumentsRequestOptions":{"type":"object","description":"Conversion options","properties":{"from_formats":{"type":"array","description":"Input formats to accept (defaults to all supported formats)","items":{"$ref":"#/components/schemas/InputFormat"}},"to_formats":{"type":"array","description":"Output formats to generate","items":{"$ref":"#/components/schemas/OutputFormat"},"default":["md"]},"do_ocr":{"type":"boolean","description":"Enable OCR for images and scanned pages","default":true},"force_ocr":{"type":"boolean","description":"Run OCR on all pages, even text-native PDFs","default":false},"ocr_engine":{"type":"string","description":"OCR engine to use","enum":["easyocr","tesseract"],"default":"easyocr"},"pdf_backend":{"type":"string","description":"PDF parsing backend","enum":["docling_parse","pypdfium2"],"default":"docling_parse"},"table_mode":{"type":"string","description":"Table extraction mode","enum":["accurate","fast"],"default":"accurate"},"table_cell_matching":{"type":"boolean","description":"Enable table cell matching for better structure","default":true},"pipeline":{"type":"string","description":"Processing pipeline","enum":["standard","vlm"],"default":"standard"},"image_export_mode":{"type":"string","description":"How to handle images in output","enum":["embedded","referenced","placeholder"],"default":"embedded"}}},"InputFormat":{"type":"string","description":"Document format supported for input","enum":["docx","pptx","html","image","pdf","asciidoc","md","csv","xlsx","xml_uspto","xml_jats","mets_gbs","json_docling","audio","vtt","latex"]},"OutputFormat":{"type":"string","description":"Document format for output","enum":["md","json","html","text","doctags"]},"ConvertDocumentResponse":{"type":"object","required":["document","status","processing_time"],"properties":{"document":{"$ref":"#/components/schemas/ExportDocumentResponse"},"status":{"$ref":"#/components/schemas/ConversionStatus"},"errors":{"type":"array","items":{"$ref":"#/components/schemas/ErrorItem"},"default":[]},"processing_time":{"type":"number","description":"Total conversion time in seconds"},"timings":{"type":"object","description":"Per-stage timing breakdown","additionalProperties":{"type":"object"}}}},"ExportDocumentResponse":{"type":"object","properties":{"filename":{"type":"string","description":"Original filename"},"md_content":{"type":"string","nullable":true,"description":"Converted Markdown content (populated when to_formats includes \"md\")"},"json_content":{"type":"object","nullable":true,"description":"Structured DoclingDocument JSON (populated when to_formats includes \"json\")"},"html_content":{"type":"string","nullable":true,"description":"HTML content (populated when to_formats includes \"html\")"}}},"ConversionStatus":{"type":"string","enum":["pending","started","failure","success","partial_success","skipped"],"description":"Conversion outcome status:\n- `success`: Fully converted\n- `partial_success`: Some elements failed but content was extracted\n- `failure`: Conversion failed — check `errors`\n- `skipped`: File type not supported or file was empty\n- `pending`/`started`: Async task states (not returned in sync responses)\n"},"ErrorItem":{"type":"object","properties":{"component":{"type":"string"},"message":{"type":"string"},"error_code":{"type":"string"}}},"TaskStatusResponse":{"type":"object","properties":{"task_id":{"type":"string","description":"Unique task identifier for async operations"},"status":{"$ref":"#/components/schemas/ConversionStatus"}}},"ChunkDocumentResponse":{"type":"object","properties":{"chunks":{"type":"array","description":"List of document chunks","items":{"$ref":"#/components/schemas/ChunkedDocumentResultItem"}},"status":{"$ref":"#/components/schemas/ConversionStatus"}}},"ChunkedDocumentResultItem":{"type":"object","properties":{"text":{"type":"string","description":"Chunk text content"},"meta":{"type":"object","description":"Chunk metadata (headings path, page number, etc.)","additionalProperties":true}}},"ClearResponse":{"type":"object","properties":{"cleared":{"type":"integer","description":"Number of items cleared"},"message":{"type":"string"}}},"HTTPValidationError":{"type":"object","properties":{"detail":{"type":"array","items":{"$ref":"#/components/schemas/ValidationError"}}}},"ValidationError":{"type":"object","required":["loc","msg","type"],"properties":{"loc":{"type":"array","items":{"oneOf":[{"type":"string"},{"type":"integer"}]}},"msg":{"type":"string"},"type":{"type":"string"}}}}}}