Skip to main content

Schema Extraction API

Create a new schema extraction job for a document.

Endpoint: POST /schema/extract

Request Headers

HeaderTypeRequiredDescription
AuthorizationstringYesBearer token authentication
project_idstringYesProject identifier

Request Body

SchemaExtractionRequest

FieldTypeRequiredDescription
file_idstringYesID of the uploaded file to process
document_schemaDocumentSchemaYesSchema definition for extraction

DocumentSchema

FieldTypeRequiredDescription
namestringYesName of the schema
descriptionstringYesDescription of the schema
fieldsSchemaField[]YesList of top-level fields

SchemaField

FieldTypeRequiredDescription
namestringYesField name
descriptionstringYesField description
typestringYesOne of: "string", "number", "email", "phone", "date", "object", "array"
requiredbooleanYesWhether the field is required
fieldsSchemaField[]NoNested fields for object and array types

Example request body:

{
"file_id": "123e4567-e89b-12d3-a456-426614174000",
"document_schema": {
"name": "Invoice Schema",
"description": "Schema for processing invoice documents",
"fields": [
{
"name": "invoice_number",
"description": "Unique invoice identifier",
"type": "string",
"required": true
},
{
"name": "issue_date",
"description": "Date when invoice was issued",
"type": "date",
"required": true
},
{
"name": "line_items",
"description": "List of items in the invoice",
"type": "array",
"required": true,
"fields": [
{
"name": "description",
"description": "Item description",
"type": "string",
"required": true
},
{
"name": "amount",
"description": "Item amount",
"type": "number",
"required": true
}
]
}
]
}
}

Response

SchemaExtractionResponse

FieldTypeRequiredDescription
job_idstringYesID of the created extraction job
statusstringYesCurrent status of the job
created_atdatetimeYesWhen the job was created
resultobject|nullNoExtraction results
{
"job_id": "98765432-abcd-efgh-ijkl-123456789000",
"status": "PENDING",
"created_at": "2024-01-01T12:00:00Z",
"result": null
}

Examples

cURL

curl -X POST 'https://api.example.com/schema/extract' \
-H 'Authorization: Bearer your-api-key' \
-H 'project_id: your-project-id' \
-H 'Content-Type: application/json' \
-d '{
"file_id": "123e4567-e89b-12d3-a456-426614174000",
"document_schema": {
"name": "Invoice Schema",
"description": "Schema for processing invoice documents",
"fields": [
{
"name": "invoice_number",
"description": "Unique invoice identifier",
"type": "string",
"required": true
},
{
"name": "total_amount",
"description": "Total invoice amount",
"type": "number",
"required": true
}
]
}
}'

Python

import requests

url = 'https://api.example.com/schema/extract'
headers = {
'Authorization': 'Bearer your-api-key',
'project_id': 'your-project-id'
}
payload = {
'file_id': '123e4567-e89b-12d3-a456-426614174000',
'document_schema': {
'name': 'Invoice Schema',
'description': 'Schema for processing invoice documents',
'fields': [
{
'name': 'invoice_number',
'description': 'Unique invoice identifier',
'type': 'string',
'required': True
},
{
'name': 'total_amount',
'description': 'Total invoice amount',
'type': 'number',
'required': True
}
]
}
}

response = requests.post(url, headers=headers, json=payload)
print(response.json())

Node.js

const axios = require("axios");

const url = "https://api.example.com/schema/extract";
const headers = {
Authorization: "Bearer your-api-key",
project_id: "your-project-id",
};
const payload = {
file_id: "123e4567-e89b-12d3-a456-426614174000",
document_schema: {
name: "Invoice Schema",
description: "Schema for processing invoice documents",
fields: [
{
name: "invoice_number",
description: "Unique invoice identifier",
type: "string",
required: true,
},
{
name: "total_amount",
description: "Total invoice amount",
type: "number",
required: true,
},
],
},
};

axios
.post(url, payload, { headers })
.then((response) => console.log(response.data))
.catch((error) => console.error(error));

Response Codes

CodeDescription
201Job created successfully
400Missing project_id header
500Internal server error