Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

Usage-Based SaaS: AI API Billing

A complete end-to-end example for an AI API platform that charges per token. This walkthrough covers everything from onboarding a new customer to issuing their first invoice.

Scenario: You’re running an LLM API service. You charge $0.000002 per input token and $0.000006 per output token. Customers send requests, you track token usage, and bill monthly.


Step 1: Create a Product and Per-Token Plan

import requests, uuid

BASE_URL = "https://api.bill.sh"
ADMIN_TOKEN = "bsk_live_your_admin_key"

headers = {"Authorization": f"Bearer {ADMIN_TOKEN}"}

# Create the product
product = requests.post(f"{BASE_URL}/admin/v1/catalog/products", headers=headers, json={
    "name": "LLM API",
    "description": "Usage-based large language model API",
    "tags": ["llm", "api", "usage-based"],
}).json()

# Create a plan with 14-day trial
plan = requests.post(f"{BASE_URL}/admin/v1/catalog/plans", headers=headers, json={
    "name": "Pay-As-You-Go",
    "description": "No monthly fee — pay only for what you use",
    "billing_cadence": "Monthly",
    "trial_days": 14,
}).json()
plan_id = plan["id"]

# Add input token SKU — $0.000002 per token = 2_000_000 nanos
requests.post(f"{BASE_URL}/admin/v1/catalog/skus", headers=headers, json={
    "plan_id": plan_id,
    "name": "Input Tokens",
    "pricing_model": "PerUnit",
    "unit_amount_nanos": "2000000",       # $0.000002 per token
    "meter_event_type": "llm.completion",
    "meter_property": "input_tokens",
    "currency": "USD",
})

# Add output token SKU — $0.000006 per token = 6_000_000 nanos
requests.post(f"{BASE_URL}/admin/v1/catalog/skus", headers=headers, json={
    "plan_id": plan_id,
    "name": "Output Tokens",
    "pricing_model": "PerUnit",
    "unit_amount_nanos": "6000000",       # $0.000006 per token
    "meter_event_type": "llm.completion",
    "meter_property": "output_tokens",
    "currency": "USD",
})
print(f"Plan ready: {plan_id}")

Step 2: Onboard a Customer

# Create the customer
customer = requests.post(f"{BASE_URL}/admin/v1/customers", headers=headers, json={
    "display_name": "NeuralWorks Inc",
    "email": "billing@neuralworks.io",
    "currency": "USD",
    "account_type": "Organization",
}).json()
customer_id = customer["id"]

# Subscribe them to the plan (with 14-day trial)
subscription = requests.post(
    f"{BASE_URL}/v1/subscriptions",
    headers={**headers, "Idempotency-Key": f"sub-{customer_id}-payg"},
    json={
        "customer_id": customer_id,
        "plan_id": plan_id,
        "currency": "USD",
        "trial_days": 14,
    },
).json()
subscription_id = subscription["id"]
print(f"Customer {customer_id} subscribed: {subscription_id} ({subscription['status']})")

Step 3: Send Usage Events in Bulk

Each time a customer makes an API call, emit an event. In production you’d batch these and flush every few seconds.

def emit_completion_event(customer_id: str, subscription_id: str,
                           model: str, input_tokens: int, output_tokens: int) -> str:
    """Emit a usage event for an LLM completion. Returns the event ID."""
    event_id = f"llm-{uuid.uuid4()}"
    requests.post(
        f"{BASE_URL}/v1/events",
        headers={**headers, "Idempotency-Key": event_id},
        json={
            "id": event_id,
            "event_type": "llm.completion",
            "customer_id": customer_id,
            "subscription_id": subscription_id,
            "timestamp": "2026-02-28T14:30:00Z",
            "properties": {
                "model": model,
                "input_tokens": input_tokens,
                "output_tokens": output_tokens,
                "request_id": str(uuid.uuid4()),
            },
        },
    )
    return event_id

# Simulate a day of API usage
# A typical customer might make 5,000 calls/day
# Each call: ~800 input tokens, ~200 output tokens
CALLS_PER_DAY = 5000
total_input = 0
total_output = 0

# Batch up to 100 events at a time for efficiency
batch = []
for i in range(CALLS_PER_DAY):
    input_tokens = 800 + (i % 400)   # 800-1200 input tokens
    output_tokens = 150 + (i % 200)  # 150-350 output tokens
    total_input += input_tokens
    total_output += output_tokens
    batch.append({
        "id": f"llm-day1-{i:05d}",
        "event_type": "llm.completion",
        "customer_id": customer_id,
        "subscription_id": subscription_id,
        "timestamp": "2026-02-28T14:30:00Z",
        "properties": {
            "model": "gpt-4o",
            "input_tokens": input_tokens,
            "output_tokens": output_tokens,
        },
    })
    if len(batch) == 100:
        requests.post(f"{BASE_URL}/v1/events/batch",
                      headers=headers, json={"events": batch})
        batch = []

if batch:
    requests.post(f"{BASE_URL}/v1/events/batch",
                  headers=headers, json={"events": batch})

input_cost = total_input * 0.000002
output_cost = total_output * 0.000006
print(f"Day 1: {total_input:,} input tokens (${input_cost:.2f}) + "
      f"{total_output:,} output tokens (${output_cost:.2f}) = "
      f"${input_cost + output_cost:.2f}")

Step 4: Query Usage Summary

Check the running meter values at any point during the billing period:

usage = requests.get(
    f"{BASE_URL}/v1/subscriptions/{subscription_id}/usage",
    headers=headers,
).json()

print(f"Billing period: {usage['period_start']} → {usage['period_end']}")
for meter in usage["meters"]:
    print(f"  {meter['meter_id']}: {int(meter['value']):,} {meter.get('unit', 'units')}")

# Calculate estimated bill
input_meter = next(m for m in usage["meters"] if "input" in m["meter_id"])
output_meter = next(m for m in usage["meters"] if "output" in m["meter_id"])
est_cost = int(input_meter["value"]) * 0.000002 + int(output_meter["value"]) * 0.000006
print(f"\nEstimated invoice: ${est_cost:.2f}")

Step 5: Generate and Finalize the Invoice

At the end of the billing period (or on-demand):

# Trigger billing
requests.post(
    f"{BASE_URL}/admin/v1/subscriptions/{subscription_id}/bill",
    headers={**headers, "Idempotency-Key": f"bill-{subscription_id}-2026-02"},
)

# Find the Draft invoice
invoices = requests.get(
    f"{BASE_URL}/admin/v1/invoices",
    headers=headers,
    params={"customer_id": customer_id, "status": "Draft"},
).json()
invoice_id = invoices[0]["id"]

# Apply tax if configured
requests.post(
    f"{BASE_URL}/admin/v1/invoices/{invoice_id}/calculate-tax",
    headers=headers,
)

# Finalize — assigns invoice number, begins payment collection
invoice = requests.post(
    f"{BASE_URL}/admin/v1/invoices/{invoice_id}/finalize",
    headers={**headers, "Idempotency-Key": f"finalize-{invoice_id}"},
).json()

total_usd = int(invoice["total_nanos"]) / 1e12
print(f"Invoice {invoice['invoice_number']} — ${total_usd:.2f} — {invoice['status']}")
for line in invoice.get("line_items", []):
    if not line.get("is_tax"):
        line_usd = int(line["amount_nanos"]) / 1e12
        print(f"  {line['description']}: ${line_usd:.2f}")

What’s Next?

  • Set spend alerts so customers know when they’re approaching their budget
  • Use credit wallets for prepaid top-ups
  • Configure webhooks to notify customers when their invoice is ready