---
audience: engineers
summary: "End-to-end ScaiCore example \u2014 an invoice-extraction flow over PDFs,\
  \ with structured outputs, validation, and a HITL approval step."
title: 'Example: invoice processor'
path: reference/language/examples/invoice-processor
status: published
---

An end-to-end invoice processing system with OCR extraction, AI-powered
data extraction, validation, vendor matching, categorization, and
multi-level approval workflow.

```scaicore
// ============================================================================
// invoice-processor/main.scaicore
// ============================================================================

@core InvoiceProcessor {
    version = "1.0.0"
    description = "Processes incoming invoices from intake to booking"

    // Stateless: any worker handles any invoice, no per-entity state
    instance = :stateless

    @plugins {
        scaidrive = scailabs/scaidrive@1.0
        scaisend = scailabs/scaisend@1.0
        ocr = scailabs/document-ocr@2.0
        erp = company/exact-online@3.0
    }

    @llm {
        primary = {
            model = "scailabs/poolnoodle-omni"
            temperature = 0.3
            role = "extraction"
        }
        fast = {
            model = "scailabs/poolnoodle-mini"
            temperature = 0.2
            role = "classification"
        }
    }

    @memory {
        // Per-Core memory (shared across workers via atomic backend ops)
        vendor_aliases: map[string, string]
        categorization_history: array[CategorizationRecord]
        approval_patterns: array[ApprovalPattern]
    }

    // Read-only data injected at deploy time — all workers see the same data
    @reference {
        gl_code_catalog: map[string, GLCodeEntry]
        vendor_whitelist: array[string]
        vat_rates: map[string, float]
    }

    @config {
        @param ocr_confidence_threshold: float = 0.5 @runtime_configurable {
            description = "Minimum OCR confidence to proceed"
            validation = value >= 0.0 && value <= 1.0
        }

        @param auto_approve_limit: money = 500.00 @hot_reload {
            description = "Maximum amount for auto-approval"
        }

        @param manager_approve_limit: money = 5000.00 @hot_reload {
            description = "Maximum amount for manager-level approval"
        }

        @param director_approve_limit: money = 25000.00 @hot_reload {
            description = "Maximum amount for director-level approval"
        }
    }

    @triggers {
        @webhook email_invoice {
            flow = process_invoice
            path = "/webhooks/email"
        }
        @api submit_invoice {
            flow = process_invoice
        }
        @webhook scaidrive_upload {
            flow = process_invoice
            path = "/webhooks/scaidrive"
        }
    }
}


// ============================================================================
// Types
// ============================================================================

@types {
    type RawInvoice = {
        file_id: string,
        filename: string,
        uploaded_by: string,
        uploaded_at: datetime,
        source: "email" | "api" | "watch"
    }

    type ExtractedInvoice = {
        vendor_name: string,
        vendor_vat: string?,
        invoice_number: string,
        invoice_date: date,
        due_date: date?,
        line_items: array[LineItem],
        subtotal: money,
        vat_amount: money,
        total: money,
        currency: string,
        po_reference: string?,
        confidence: float
    }

    type LineItem = {
        description: string,
        quantity: int,
        unit_price: money,
        total: money,
        category: string?
    }

    type ValidationResult = {
        valid: bool,
        errors: array[ValidationError],
        warnings: array[ValidationWarning],
        risk_score: float,
        requires_review: bool
    }

    type ValidationError = {
        code: string,
        field: string,
        message: string,
        severity: "error" | "warning"
    }

    type ValidationWarning = {
        code: string,
        message: string,
        suggestion: string?
    }

    type VendorProfile = {
        vendor_id: string,
        name: string,
        average_invoice_amount: money,
        typical_categories: array[string],
        payment_history: enum[good, fair, poor],
        total_invoices: int,
        flagged_invoices: int
    }

    type ApprovalDecision = {
        approved: bool,
        approver: string,
        level: enum[auto, manager, director, cfo],
        notes: string?,
        conditions: array[string]?,
        decided_at: datetime
    }

    type ProcessingResult = {
        invoice_id: string,
        status: enum[processed, pending_approval, rejected, error],
        extracted_data: ExtractedInvoice?,
        validation: ValidationResult?,
        approval: ApprovalDecision?,
        erp_reference: string?,
        error_message: string?
    }

    type CategorizationRecord = {
        vendor: string,
        description: string,
        assigned_category: string,
        gl_code: string,
        confirmed: bool
    }

    type ApprovalPattern = {
        vendor: string,
        amount_range: string,
        typical_approver: string,
        avg_approval_time: duration
    }
}


// ============================================================================
// Main Processing Flow
// ============================================================================

@flow process_invoice(raw: RawInvoice): ProcessingResult {
    @budget {
        max_duration = 60s
        max_plugin_calls = 20
        on_exceeded = "warn"
    }

    // Step 1: Extract text from document
    @rigid {
        file_content = scaidrive.download(raw.file_id)

        ocr_result = ocr.extract_text(
            content = file_content,
            options = { enhance: true, detect_tables: true }
        )

        if ocr_result.confidence < config.ocr_confidence_threshold {
            return {
                invoice_id = generate_id(),
                status = :error,
                error_message = "Document quality too low for processing (confidence: ${ocr_result.confidence})"
            }
        }

        // Archive original
        scaidrive.store(
            file = file_content,
            path = "/invoices/raw/${now().format('YYYY-MM')}/${raw.filename}"
        )
    }

    @debug {
        log.info("OCR confidence: ${ocr_result.confidence}")
        log.info("OCR text length: ${ocr_result.text.length}")
    }

    // Step 2: Extract structured invoice data using AI
    extracted = @guarded {
        goal = "Extract all invoice fields from the OCR text"
        input = {
            ocr_text = ocr_result.text,
            detected_tables = ocr_result.tables,
            filename = raw.filename
        }
        output = ExtractedInvoice

        guard = {
            ocr_result.text.length > 10
        }

        validate = {
            output.total > 0
            output.invoice_number.length > 0
            output.vendor_name.length > 0
            output.line_items.length > 0
        }

        guidance = """
            Extract invoice details carefully:
            - Look for invoice number patterns (INV-, #, No.)
            - Parse dates in various formats (DD/MM/YYYY, DD-Mon-YYYY, etc.)
            - Handle European formats (comma as decimal separator)
            - Extract all line items with their details
            - Calculate totals to verify they match stated amounts
            - If VAT number is partially visible, attempt reconstruction
        """

        examples = [
            {
                input = { ocr_text: "Invoice #12345\nDate: 01/15/2026\nTotal: €1.500,00" },
                output = { invoice_number: "12345", vendor_name: "detected from header", total: 1500.00, currency: "EUR" }
            }
        ]

        on_failure = {
            parse_error = retry(max = 2, with = "respond as structured data only")
            low_confidence = retry(max = 1, with = "focus on the most clearly visible fields")
        }

        on_validation_failure = retry(max = 2) | fail("Could not extract valid invoice data")
    }

    @debug {
        log.info("Extracted: ${extracted.vendor_name} / ${extracted.invoice_number} / ${extracted.total}")
    }

    // Step 3: Enrich with vendor data
    @rigid {
        vendor_lookup = erp.find_vendor(
            name = extracted.vendor_name,
            fuzzy_match = true
        )

        vendor_profile: VendorProfile? = null

        if vendor_lookup.found {
            extracted.vendor_id = vendor_lookup.vendor_id
            vendor_profile = memory.vendor_aliases.get(vendor_lookup.vendor_id)
        } else {
            // Try memory for known aliases
            alias_match = memory.vendor_aliases.get(extracted.vendor_name)
            if alias_match != null {
                extracted.vendor_id = alias_match
                vendor_profile = memory.vendor_aliases.get(alias_match)
            }
        }
    }

    // Step 4: Categorize line items
    categorized_items = @foreach item in extracted.line_items {
        // Check memory first for known patterns
        @rigid {
            known = memory.categorization_history.search(
                query = "${extracted.vendor_name} ${item.description}",
                limit = 3,
                min_similarity = 0.85
            )
        }

        if known.length > 0 && known[0].confirmed {
            yield {
                item = item,
                category = known[0].assigned_category,
                gl_code = known[0].gl_code,
                source = "memory"
            }
        } else {
            cat = @flexible {
                goal = "Categorize this invoice line item for accounting"
                llm = fast
                input = {
                    line_item = item,
                    vendor = extracted.vendor_name,
                    vendor_typical_categories = vendor_profile?.typical_categories,
                    recent_patterns = known,
                    valid_gl_codes = reference.gl_code_catalog
                }
                output = {
                    category: string,
                    gl_code: string,
                    confidence: float
                }

                on_failure = {
                    low_confidence = retry(max = 1, with = "pick the most likely category")
                }
            }

            // Remember for next time
            @rigid {
                memory.categorization_history.add({
                    vendor = extracted.vendor_name,
                    description = item.description,
                    assigned_category = cat.category,
                    gl_code = cat.gl_code,
                    confirmed = cat.confidence > 0.9
                })
            }

            yield {
                item = item,
                category = cat.category,
                gl_code = cat.gl_code,
                source = "ai"
            }
        }
    }

    // Step 5: Validate invoice
    validation = @call validate_invoice(extracted, vendor_profile)

    // Step 6: Determine approval level
    @rigid {
        approval_level = match extracted.total {
            amount if amount <= config.auto_approve_limit => :auto
            amount if amount <= config.manager_approve_limit => :manager
            amount if amount <= config.director_approve_limit => :director
            _ => :cfo
        }

        // Override if validation flags risk
        if validation.risk_score > 0.5 && approval_level == :auto {
            approval_level = :manager
        }
    }

    // Step 7: Route for approval
    if approval_level == :auto && validation.valid && !validation.requires_review {
        @rigid {
            erp_ref = erp.create_invoice(
                data = extracted,
                categories = categorized_items,
                auto_approved = true
            )
        }

        emit invoice_processed {
            invoice_id = generate_id()
            vendor = extracted.vendor_name
            total = extracted.total
            approval_level = :auto
            processed_at = now()
        }

        @rigid {
            return {
                invoice_id = generate_id(),
                status = :processed,
                extracted_data = extracted,
                validation = validation,
                approval = {
                    approved = true,
                    approver = "system",
                    level = :auto,
                    decided_at = now()
                },
                erp_reference = erp_ref
            }
        }
    } else {
        // Human approval needed
        approval = @checkpoint {
            type = "approval"
            assignee = get_approver(approval_level, extracted)
            timeout = 48h
            on_timeout = "escalate"

            present = {
                invoice = extracted,
                validation = validation,
                categories = categorized_items,
                risk_score = validation.risk_score,
                vendor_history = vendor_profile,
                approval_level = approval_level,
                runtime_usage = execution.usage.wall_clock_ms
            }

            options = ["approve", "reject", "request_info"]
        }

        match approval.decision {
            "approve" => {
                @rigid {
                    erp_ref = erp.create_invoice(
                        data = extracted,
                        categories = categorized_items,
                        approved_by = approval.approver
                    )
                }

                return {
                    invoice_id = generate_id(),
                    status = :processed,
                    extracted_data = extracted,
                    validation = validation,
                    approval = {
                        approved = true,
                        approver = approval.approver,
                        level = approval_level,
                        notes = approval.notes,
                        decided_at = now()
                    },
                    erp_reference = erp_ref
                }
            }
            "reject" => {
                return {
                    invoice_id = generate_id(),
                    status = :rejected,
                    extracted_data = extracted,
                    validation = validation,
                    approval = {
                        approved = false,
                        approver = approval.approver,
                        level = approval_level,
                        notes = approval.notes,
                        decided_at = now()
                    }
                }
            }
            "request_info" => {
                @call request_more_info(extracted, approval.notes)
            }
        }
    }
}


// ============================================================================
// Validation Flow
// ============================================================================

@flow validate_invoice(extracted: ExtractedInvoice, vendor: VendorProfile?): ValidationResult {
    errors: array[ValidationError] = []
    warnings: array[ValidationWarning] = []
    risk_score: float = 0.0

    @rigid {
        // Check line items sum to total
        calculated_total = extracted.line_items.sum(item => item.total)
        if (calculated_total - extracted.total).abs() > 0.01 {
            errors = errors.append({
                code = "TOTAL_MISMATCH",
                field = "total",
                message = "Line items sum (${calculated_total}) doesn't match stated total (${extracted.total})",
                severity = "error"
            })
            risk_score = risk_score + 0.3
        }

        // Check for duplicate invoice
        existing = erp.find_invoice(
            vendor = extracted.vendor_name,
            number = extracted.invoice_number
        )
        if existing != null {
            errors = errors.append({
                code = "DUPLICATE",
                field = "invoice_number",
                message = "Invoice ${extracted.invoice_number} already exists (ref: ${existing.id})",
                severity = "error"
            })
            risk_score = risk_score + 0.5
        }

        // Check against vendor history
        if vendor != null {
            if extracted.total > vendor.average_invoice_amount * 3 {
                warnings = warnings.append({
                    code = "AMOUNT_ANOMALY",
                    message = "Amount significantly higher than vendor average (${vendor.average_invoice_amount})",
                    suggestion = "Verify with vendor or requester"
                })
                risk_score = risk_score + 0.2
            }
        }

        // Check due date
        if extracted.due_date != null && extracted.due_date < now().to_date() {
            warnings = warnings.append({
                code = "PAST_DUE",
                message = "Invoice due date (${extracted.due_date}) is in the past"
            })
        }
    }

    return {
        valid = errors.length == 0,
        errors = errors,
        warnings = warnings,
        risk_score = risk_score.min(1.0),
        requires_review = risk_score > 0.3 || warnings.length > 2
    }
}


// ============================================================================
// Utility Flows
// ============================================================================

@flow get_approver(level: enum[auto, manager, director, cfo], invoice: ExtractedInvoice): string {
    @rigid {
        return match level {
            :manager => get_manager_for_department(invoice.line_items[0].category)
            :director => get_director_for_vendor(invoice.vendor_name)
            :cfo => "cfo@company.com"
            _ => "finance-team@company.com"
        }
    }
}

@flow request_more_info(invoice: ExtractedInvoice, request_notes: string) {
    message = @flexible {
        goal = "Draft a polite email requesting additional information about this invoice"
        llm = fast
        input = {
            invoice_summary = "${invoice.vendor_name} / ${invoice.invoice_number} / ${invoice.total}",
            what_is_needed = request_notes
        }
        output = { subject: string, body: string }
    }

    @rigid {
        scaisend.send(
            to = invoice.vendor_email ?? "finance-team@company.com",
            subject = message.subject,
            body = message.body
        )
    }
}


// ============================================================================
// Tests
// ============================================================================

@test flow test_extracts_invoice_correctly {
    description = "Verifies invoice data extraction from OCR text"

    @given {
        raw = {
            file_id = "test-file-123",
            filename = "invoice_acme_jan.pdf",
            uploaded_by = "user@company.com",
            uploaded_at = now(),
            source = "api"
        }

        @mock plugin:scaidrive {
            download = { content: "mock pdf bytes" }
            store = { path: "/invoices/raw/test.pdf" }
        }

        @mock plugin:ocr {
            extract_text = {
                text: """
                    ACME Corporation
                    Invoice #INV-2026-0042
                    Date: January 15, 2026
                    Due: February 14, 2026

                    Description          Qty    Price      Total
                    Widget Type A        10     €50.00     €500.00
                    Widget Type B        5      €75.00     €375.00

                    Subtotal: €875.00
                    VAT 21%:  €183.75
                    Total:    €1,058.75
                """,
                confidence: 0.94,
                tables: []
            }
        }

        @mock plugin:erp {
            find_vendor = { found: true, vendor_id: "V-001" }
            find_invoice = null
        }

        @mock llm {
            response = {
                vendor_name: "ACME Corporation",
                invoice_number: "INV-2026-0042",
                invoice_date: "2026-01-15",
                due_date: "2026-02-14",
                line_items: [
                    { description: "Widget Type A", quantity: 10, unit_price: 50.00, total: 500.00 },
                    { description: "Widget Type B", quantity: 5, unit_price: 75.00, total: 375.00 }
                ],
                subtotal: 875.00,
                vat_amount: 183.75,
                total: 1058.75,
                currency: "EUR",
                confidence: 0.92
            }
        }
    }

    @when {
        result = @call process_invoice(raw)
    }

    @then {
        assert result.status == :processed
        assert result.extracted_data.vendor_name == "ACME Corporation"
        assert result.extracted_data.invoice_number == "INV-2026-0042"
        assert result.extracted_data.total == 1058.75
        assert result.extracted_data.line_items.length == 2
        assert result.approval.level == :auto
    }
}

@test flow test_high_value_invoice_requires_approval {
    description = "Invoices above auto-approve limit go to checkpoint"

    @given {
        raw = {
            file_id = "test-file-456",
            filename = "invoice_big.pdf",
            uploaded_by = "user@company.com",
            uploaded_at = now(),
            source = "api"
        }

        // ... mocks returning invoice with total = 15000.00 ...

        @mock plugin:erp {
            find_vendor = { found: true, vendor_id: "V-002" }
            find_invoice = null
        }
    }

    @when {
        execution = @call process_invoice(raw)

        @resolve_checkpoint execution.checkpoint_id {
            action = "approve"
            notes = "Confirmed with department head"
            user = "director@company.com"
        }

        result = @await execution
    }

    @then {
        assert result.status == :processed
        assert result.approval.level == :director
        assert result.approval.approver == "director@company.com"
    }
}
```


---