Documentation Index Fetch the complete documentation index at: https://docs.unsiloed.ai/llms.txt
Use this file to discover all available pages before exploring further.
Overview
Unsiloed AI provides a powerful API for processing unstructured documents. You can:
Parse documents into structured Markdown and JSON
Extract data using custom schemas
Classify documents by type
Split multi-document files into separate documents
Prerequisites
Before you begin, you’ll need:
An Unsiloed AI account and API key
A document to process (PDF, DOCX, PPTX, image, etc.)
Python 3.7+ or Node.js 14+ (optional, for SDK usage)
Step 1: Get Your API Key
To get API access, sign up on Unsiloed AI . We’ll get you set up with an API key and help you get started.
Keep your API key secure and never commit it to version control. Use environment variables to store it.
Step 2: Parse Your First Document
Choose your preferred language and run the example below:
import requests
import time
headers = { "api-key" : "your-api-key" }
# Submit a document for parsing
with open ( "document.pdf" , "rb" ) as f:
response = requests.post(
"https://prod.visionapi.unsiloed.ai/parse" ,
headers = headers,
files = { "file" : ( "document.pdf" , f, "application/pdf" )}
)
if response.status_code != 200 :
print ( f "Error: { response.status_code } - { response.text } " )
exit ( 1 )
job_id = response.json()[ "job_id" ]
print ( f "Job submitted: { job_id } " )
# Poll for results
while True :
result = requests.get(
f "https://prod.visionapi.unsiloed.ai/parse/ { job_id } " ,
headers = headers
).json()
print ( f "Status: { result[ 'status' ] } " )
if result[ "status" ] == "Succeeded" :
break
if result[ "status" ] == "Failed" :
print ( f "Error: { result.get( 'message' , 'Unknown error' ) } " )
exit ( 1 )
time.sleep( 5 )
# Access the parsed content
print ( f "Total chunks: { result[ 'total_chunks' ] } " )
# Get the embed content
for chunk in result[ "chunks" ]:
print ( f " \n --- { chunk[ 'embed' ][: 100 ] } ---" )
To extract specific fields from your document, define a JSON schema:
import requests
import json
import time
headers = { "api-key" : "your-api-key" }
# Define extraction schema using JSON Schema format
schema = {
"type" : "object" ,
"properties" : {
"title" : {
"type" : "string" ,
"description" : "Document title"
},
"date" : {
"type" : "string" ,
"description" : "Document date"
}
},
"required" : [ "title" , "date" ],
"additionalProperties" : False
}
# Submit extraction request
with open ( "document.pdf" , "rb" ) as f:
response = requests.post(
"https://prod.visionapi.unsiloed.ai/v2/extract" ,
headers = headers,
files = { "pdf_file" : ( "document.pdf" , f, "application/pdf" )},
data = { "schema_data" : json.dumps(schema)}
)
if response.status_code != 200 :
print ( f "Error: { response.status_code } - { response.text } " )
exit ( 1 )
job_id = response.json()[ "job_id" ]
print ( f "Job submitted: { job_id } " )
# Poll for results
while True :
result = requests.get(
f "https://prod.visionapi.unsiloed.ai/extract/ { job_id } " ,
headers = headers
).json()
print ( f "Status: { result[ 'status' ] } " )
if result[ "status" ] == "completed" :
break
if result[ "status" ] == "failed" :
print ( f "Error: { result.get( 'error' , 'Unknown error' ) } " )
exit ( 1 )
time.sleep( 5 )
# Access extracted data with confidence scores
print ( f "Title: { result[ 'result' ][ 'title' ][ 'value' ] } " )
print ( f "Confidence: { result[ 'result' ][ 'title' ][ 'score' ] :.2%} " )
Understanding the Response
Parsing Response
The parsing API returns structured chunks with markdown, segments, and metadata:
Parsing Response Structure
{
"job_id" : "1699d429-9c2e-464e-b311-d4b68a8444b8" ,
"status" : "Succeeded" ,
"file_name" : "document.pdf" ,
"total_chunks" : 3 ,
"page_count" : 1 ,
"created_at" : "2026-01-05T15:06:27.966175Z" ,
"started_at" : "2026-01-05T15:06:28.130578Z" ,
"finished_at" : "2026-01-05T15:06:36.009842Z" ,
"chunks" : [
{
"chunk_id" : "6b2eca3a-d14f-4164-ba9a-0a3a58fcaf45" ,
"chunk_length" : 118 ,
"embed" : "# Document Title \n\n This is the parsed content..." ,
"segments" : [
{
"segment_id" : "c60d89b1-373e-428d-9950-544e7c903b61" ,
"segment_type" : "Text" ,
"markdown" : "Document content here..." ,
"html" : "<p>Document content here...</p>" ,
"bbox" : {
"left" : 34.47 ,
"top" : 30.99 ,
"width" : 118.26 ,
"height" : 29.03
},
"page_number" : 1 ,
"page_width" : 595.0 ,
"page_height" : 842.0 ,
"confidence" : 0.98
}
]
}
],
"pdf_url" : "https://s3.us-east-1.amazonaws.com/..." ,
"metadata" : {
"segment_filter" : "all"
}
}
The extraction API returns extracted fields with confidence scores and bounding boxes:
Extraction Response Structure
Key Features:
Parsing : Returns chunks with markdown, HTML, segments, and layout information
Extraction : Returns structured fields with confidence scores and precise bounding boxes
Bounding boxes : Pixel-level coordinates for locating data in the original document
Confidence scores : Model confidence (0-1) for each extracted field
Page references : Page numbers where each field was found
Next Steps
Document Processing Learn about advanced parsing options and strategies
Data Extraction Deep dive into schema-based extraction
Classification Classify documents by type
API Reference Explore the complete API documentation
Common Use Cases
Parse documents into clean, hierarchical Markdown chunks optimized for embedding and retrieval. Preserve layout, tables, and images for accurate context. import requests, time
headers = { "api-key" : "your-api-key" }
# Parse a document
with open ( "document.pdf" , "rb" ) as f:
resp = requests.post(
"https://prod.visionapi.unsiloed.ai/parse" ,
headers = headers,
files = { "file" : ( "document.pdf" , f, "application/pdf" )}
)
job_id = resp.json()[ "job_id" ]
# Poll for results
while True :
result = requests.get( f "https://prod.visionapi.unsiloed.ai/parse/ { job_id } " , headers = headers).json()
if result[ "status" ] == "Succeeded" :
break
time.sleep( 5 )
# Use chunks for RAG
for chunk in result[ "chunks" ]:
embedding = embed(chunk[ "embed" ])
vector_db.insert(embedding, chunk[ "embed" ])
Extract structured data from invoices with citations and confidence scores for validation workflows. import requests, json, time
headers = { "api-key" : "your-api-key" }
schema = {
"type" : "object" ,
"properties" : {
"invoice_number" : { "type" : "string" , "description" : "Invoice number" },
"total" : { "type" : "number" , "description" : "Total amount" },
"line_items" : { "type" : "array" , "description" : "Line items" , "items" : { "type" : "object" , "properties" : { "description" : { "type" : "string" }, "amount" : { "type" : "number" }}, "required" : [ "description" , "amount" ], "additionalProperties" : False }}
},
"required" : [ "invoice_number" , "total" ],
"additionalProperties" : False
}
with open ( "invoice.pdf" , "rb" ) as f:
resp = requests.post(
"https://prod.visionapi.unsiloed.ai/v2/extract" ,
headers = headers,
files = { "pdf_file" : ( "invoice.pdf" , f, "application/pdf" )},
data = { "schema_data" : json.dumps(schema)}
)
job_id = resp.json()[ "job_id" ]
while True :
result = requests.get( f "https://prod.visionapi.unsiloed.ai/extract/ { job_id } " , headers = headers).json()
if result[ "status" ] == "completed" :
break
time.sleep( 5 )
print (result[ "result" ])
Parse legal documents while preserving structure, then extract key clauses and dates. import requests, json, time
headers = { "api-key" : "your-api-key" }
# First parse to get structured content
with open ( "contract.pdf" , "rb" ) as f:
resp = requests.post(
"https://prod.visionapi.unsiloed.ai/parse" ,
headers = headers,
files = { "file" : ( "contract.pdf" , f, "application/pdf" )}
)
parse_job_id = resp.json()[ "job_id" ]
while True :
parse_result = requests.get( f "https://prod.visionapi.unsiloed.ai/parse/ { parse_job_id } " , headers = headers).json()
if parse_result[ "status" ] == "Succeeded" :
break
time.sleep( 5 )
# Then extract specific clauses
schema = {
"type" : "object" ,
"properties" : {
"parties" : { "type" : "array" , "description" : "Contract parties" , "items" : { "type" : "string" }},
"effective_date" : { "type" : "string" , "description" : "Effective date" },
"termination_clause" : { "type" : "string" , "description" : "Termination clause" }
},
"required" : [ "parties" , "effective_date" , "termination_clause" ],
"additionalProperties" : False
}
with open ( "contract.pdf" , "rb" ) as f:
resp = requests.post(
"https://prod.visionapi.unsiloed.ai/v2/extract" ,
headers = headers,
files = { "pdf_file" : ( "contract.pdf" , f, "application/pdf" )},
data = { "schema_data" : json.dumps(schema)}
)
extract_job_id = resp.json()[ "job_id" ]
while True :
extract_result = requests.get( f "https://prod.visionapi.unsiloed.ai/extract/ { extract_job_id } " , headers = headers).json()
if extract_result[ "status" ] == "completed" :
break
time.sleep( 5 )
print (extract_result[ "result" ])
Document Classification & Routing
Classify documents to route them to appropriate processing pipelines. import requests, json, time
headers = { "api-key" : "your-api-key" }
with open ( "document.pdf" , "rb" ) as f:
resp = requests.post(
"https://prod.visionapi.unsiloed.ai/classify" ,
headers = headers,
files = { "pdf_file" : ( "document.pdf" , f, "application/pdf" )},
data = { "categories" : json.dumps([{ "name" : "invoice" }, { "name" : "receipt" }, { "name" : "contract" }, { "name" : "form" }])}
)
job_id = resp.json()[ "job_id" ]
while True :
result = requests.get( f "https://prod.visionapi.unsiloed.ai/classify/ { job_id } " , headers = headers).json()
if result.get( "status" ) == "completed" :
break
time.sleep( 5 )
print ( f "Document type: { result[ 'result' ][ 'classification' ] } " )
print ( f "Confidence: { result[ 'result' ][ 'confidence' ] } " )
API Base URL
All API requests should be made to:
https://prod.visionapi.unsiloed.ai
Authentication is required using your API key in the api-key header.
Need Help?
Documentation Explore our comprehensive guides
API Reference View complete API docs
FAQ Common questions and answers