Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.unsiloed.ai/llms.txt

Use this file to discover all available pages before exploring further.

Overview

Unsiloed AI provides a powerful API for processing unstructured documents. You can:
  • Parse documents into structured Markdown and JSON
  • Extract data using custom schemas
  • Classify documents by type
  • Split multi-document files into separate documents

Prerequisites

Before you begin, you’ll need:
  1. An Unsiloed AI account and API key
  2. A document to process (PDF, DOCX, PPTX, image, etc.)
  3. Python 3.7+ or Node.js 14+ (optional, for SDK usage)

Step 1: Get Your API Key

To get API access, sign up on Unsiloed AI. We’ll get you set up with an API key and help you get started.
Keep your API key secure and never commit it to version control. Use environment variables to store it.

Step 2: Parse Your First Document

Choose your preferred language and run the example below:
import requests
import time

headers = {"api-key": "your-api-key"}

# Submit a document for parsing
with open("document.pdf", "rb") as f:
    response = requests.post(
        "https://prod.visionapi.unsiloed.ai/parse",
        headers=headers,
        files={"file": ("document.pdf", f, "application/pdf")}
    )

if response.status_code != 200:
    print(f"Error: {response.status_code} - {response.text}")
    exit(1)

job_id = response.json()["job_id"]
print(f"Job submitted: {job_id}")

# Poll for results
while True:
    result = requests.get(
        f"https://prod.visionapi.unsiloed.ai/parse/{job_id}",
        headers=headers
    ).json()
    print(f"Status: {result['status']}")
    if result["status"] == "Succeeded":
        break
    if result["status"] == "Failed":
        print(f"Error: {result.get('message', 'Unknown error')}")
        exit(1)
    time.sleep(5)

# Access the parsed content
print(f"Total chunks: {result['total_chunks']}")

# Get the embed content
for chunk in result["chunks"]:
    print(f"\n--- {chunk['embed'][:100]} ---")

Step 4: Extract Structured Data

To extract specific fields from your document, define a JSON schema:
import requests
import json
import time

headers = {"api-key": "your-api-key"}

# Define extraction schema using JSON Schema format
schema = {
    "type": "object",
    "properties": {
        "title": {
            "type": "string",
            "description": "Document title"
        },
        "date": {
            "type": "string",
            "description": "Document date"
        }
    },
    "required": ["title", "date"],
    "additionalProperties": False
}

# Submit extraction request
with open("document.pdf", "rb") as f:
    response = requests.post(
        "https://prod.visionapi.unsiloed.ai/v2/extract",
        headers=headers,
        files={"pdf_file": ("document.pdf", f, "application/pdf")},
        data={"schema_data": json.dumps(schema)}
    )

if response.status_code != 200:
    print(f"Error: {response.status_code} - {response.text}")
    exit(1)

job_id = response.json()["job_id"]
print(f"Job submitted: {job_id}")

# Poll for results
while True:
    result = requests.get(
        f"https://prod.visionapi.unsiloed.ai/extract/{job_id}",
        headers=headers
    ).json()
    print(f"Status: {result['status']}")
    if result["status"] == "completed":
        break
    if result["status"] == "failed":
        print(f"Error: {result.get('error', 'Unknown error')}")
        exit(1)
    time.sleep(5)

# Access extracted data with confidence scores
print(f"Title: {result['result']['title']['value']}")
print(f"Confidence: {result['result']['title']['score']:.2%}")

Understanding the Response

Parsing Response

The parsing API returns structured chunks with markdown, segments, and metadata:
{
  "job_id": "1699d429-9c2e-464e-b311-d4b68a8444b8",
  "status": "Succeeded",
  "file_name": "document.pdf",
  "total_chunks": 3,
  "page_count": 1,
  "created_at": "2026-01-05T15:06:27.966175Z",
  "started_at": "2026-01-05T15:06:28.130578Z",
  "finished_at": "2026-01-05T15:06:36.009842Z",
  "chunks": [
    {
      "chunk_id": "6b2eca3a-d14f-4164-ba9a-0a3a58fcaf45",
      "chunk_length": 118,
      "embed": "# Document Title\n\nThis is the parsed content...",
      "segments": [
        {
          "segment_id": "c60d89b1-373e-428d-9950-544e7c903b61",
          "segment_type": "Text",
          "markdown": "Document content here...",
          "html": "<p>Document content here...</p>",
          "bbox": {
            "left": 34.47,
            "top": 30.99,
            "width": 118.26,
            "height": 29.03
          },
          "page_number": 1,
          "page_width": 595.0,
          "page_height": 842.0,
          "confidence": 0.98
        }
      ]
    }
  ],
  "pdf_url": "https://s3.us-east-1.amazonaws.com/...",
  "metadata": {
    "segment_filter": "all"
  }
}

Extraction Response

The extraction API returns extracted fields with confidence scores and bounding boxes:
{
  "job_id": "4943f2a3-7c99-46b9-90e8-c1c4b748a9bb",
  "status": "completed",
  "file_name": "invoice.pdf",
  "created_at": "2026-01-05T15:00:10.836401+00:00",
  "updated_at": "2026-01-05T15:00:53.123541+00:00",
  "result": {
    "invoice_number": {
      "value": "25G1TIZT00000999",
      "score": 0.8686260225487502,
      "page_no": 1,
      "bboxes": [
        {
          "bbox": [65, 251, 1136, 431],
          "type": "segment",
          "confidence": 0.7901723,
          "page_width": 1191.0,
          "page_height": 1684.0
        },
        {
          "bbox": [216, 385, 413, 400],
          "text": "25G1TIZT00000999",
          "type": "ocr",
          "confidence": null
        }
      ]
    },
    "invoice_date": {
      "value": "10/05/2025",
      "score": 0.9999994661137259,
      "page_no": 1,
      "bboxes": [
        {
          "bbox": [232, 407, 344, 422],
          "text": "10/05/2025",
          "type": "ocr",
          "confidence": null
        }
      ]
    },
    "total_amount": {
      "value": 346.5,
      "score": 0.9999993593737946,
      "page_no": 1,
      "bboxes": [
        {
          "bbox": [221, 892, 275, 894],
          "text": "346.5",
          "type": "ocr",
          "confidence": null
        }
      ]
    }
  }
}
Key Features:
  • Parsing: Returns chunks with markdown, HTML, segments, and layout information
  • Extraction: Returns structured fields with confidence scores and precise bounding boxes
  • Bounding boxes: Pixel-level coordinates for locating data in the original document
  • Confidence scores: Model confidence (0-1) for each extracted field
  • Page references: Page numbers where each field was found

Next Steps

Document Processing

Learn about advanced parsing options and strategies

Data Extraction

Deep dive into schema-based extraction

Classification

Classify documents by type

API Reference

Explore the complete API documentation

Common Use Cases

Extract structured data from invoices with citations and confidence scores for validation workflows.
import requests, json, time

headers = {"api-key": "your-api-key"}
schema = {
    "type": "object",
    "properties": {
        "invoice_number": {"type": "string", "description": "Invoice number"},
        "total": {"type": "number", "description": "Total amount"},
        "line_items": {"type": "array", "description": "Line items", "items": {"type": "object", "properties": {"description": {"type": "string"}, "amount": {"type": "number"}}, "required": ["description", "amount"], "additionalProperties": False}}
    },
    "required": ["invoice_number", "total"],
    "additionalProperties": False
}

with open("invoice.pdf", "rb") as f:
    resp = requests.post(
        "https://prod.visionapi.unsiloed.ai/v2/extract",
        headers=headers,
        files={"pdf_file": ("invoice.pdf", f, "application/pdf")},
        data={"schema_data": json.dumps(schema)}
    )
job_id = resp.json()["job_id"]

while True:
    result = requests.get(f"https://prod.visionapi.unsiloed.ai/extract/{job_id}", headers=headers).json()
    if result["status"] == "completed":
        break
    time.sleep(5)

print(result["result"])
Parse legal documents while preserving structure, then extract key clauses and dates.
import requests, json, time

headers = {"api-key": "your-api-key"}

# First parse to get structured content
with open("contract.pdf", "rb") as f:
    resp = requests.post(
        "https://prod.visionapi.unsiloed.ai/parse",
        headers=headers,
        files={"file": ("contract.pdf", f, "application/pdf")}
    )
parse_job_id = resp.json()["job_id"]

while True:
    parse_result = requests.get(f"https://prod.visionapi.unsiloed.ai/parse/{parse_job_id}", headers=headers).json()
    if parse_result["status"] == "Succeeded":
        break
    time.sleep(5)

# Then extract specific clauses
schema = {
    "type": "object",
    "properties": {
        "parties": {"type": "array", "description": "Contract parties", "items": {"type": "string"}},
        "effective_date": {"type": "string", "description": "Effective date"},
        "termination_clause": {"type": "string", "description": "Termination clause"}
    },
    "required": ["parties", "effective_date", "termination_clause"],
    "additionalProperties": False
}

with open("contract.pdf", "rb") as f:
    resp = requests.post(
        "https://prod.visionapi.unsiloed.ai/v2/extract",
        headers=headers,
        files={"pdf_file": ("contract.pdf", f, "application/pdf")},
        data={"schema_data": json.dumps(schema)}
    )
extract_job_id = resp.json()["job_id"]

while True:
    extract_result = requests.get(f"https://prod.visionapi.unsiloed.ai/extract/{extract_job_id}", headers=headers).json()
    if extract_result["status"] == "completed":
        break
    time.sleep(5)

print(extract_result["result"])
Classify documents to route them to appropriate processing pipelines.
import requests, json, time

headers = {"api-key": "your-api-key"}

with open("document.pdf", "rb") as f:
    resp = requests.post(
        "https://prod.visionapi.unsiloed.ai/classify",
        headers=headers,
        files={"pdf_file": ("document.pdf", f, "application/pdf")},
        data={"categories": json.dumps([{"name": "invoice"}, {"name": "receipt"}, {"name": "contract"}, {"name": "form"}])}
    )
job_id = resp.json()["job_id"]

while True:
    result = requests.get(f"https://prod.visionapi.unsiloed.ai/classify/{job_id}", headers=headers).json()
    if result.get("status") == "completed":
        break
    time.sleep(5)

print(f"Document type: {result['result']['classification']}")
print(f"Confidence: {result['result']['confidence']}")

API Base URL

All API requests should be made to:
https://prod.visionapi.unsiloed.ai
Authentication is required using your API key in the api-key header.

Need Help?

Documentation

Explore our comprehensive guides

Support

Contact our team

API Reference

View complete API docs

FAQ

Common questions and answers