Usage-Based SaaS: AI API Billing
A complete end-to-end example for an AI API platform that charges per token. This walkthrough covers everything from onboarding a new customer to issuing their first invoice.
Scenario: You’re running an LLM API service. You charge $0.000002 per input token and $0.000006 per output token. Customers send requests, you track token usage, and bill monthly.
Step 1: Create a Product and Per-Token Plan
import requests, uuid
BASE_URL = "https://api.bill.sh"
ADMIN_TOKEN = "bsk_live_your_admin_key"
headers = {"Authorization": f"Bearer {ADMIN_TOKEN}"}
# Create the product
product = requests.post(f"{BASE_URL}/admin/v1/catalog/products", headers=headers, json={
"name": "LLM API",
"description": "Usage-based large language model API",
"tags": ["llm", "api", "usage-based"],
}).json()
# Create a plan with 14-day trial
plan = requests.post(f"{BASE_URL}/admin/v1/catalog/plans", headers=headers, json={
"name": "Pay-As-You-Go",
"description": "No monthly fee — pay only for what you use",
"billing_cadence": "Monthly",
"trial_days": 14,
}).json()
plan_id = plan["id"]
# Add input token SKU — $0.000002 per token = 2_000_000 nanos
requests.post(f"{BASE_URL}/admin/v1/catalog/skus", headers=headers, json={
"plan_id": plan_id,
"name": "Input Tokens",
"pricing_model": "PerUnit",
"unit_amount_nanos": "2000000", # $0.000002 per token
"meter_event_type": "llm.completion",
"meter_property": "input_tokens",
"currency": "USD",
})
# Add output token SKU — $0.000006 per token = 6_000_000 nanos
requests.post(f"{BASE_URL}/admin/v1/catalog/skus", headers=headers, json={
"plan_id": plan_id,
"name": "Output Tokens",
"pricing_model": "PerUnit",
"unit_amount_nanos": "6000000", # $0.000006 per token
"meter_event_type": "llm.completion",
"meter_property": "output_tokens",
"currency": "USD",
})
print(f"Plan ready: {plan_id}")
Step 2: Onboard a Customer
# Create the customer
customer = requests.post(f"{BASE_URL}/admin/v1/customers", headers=headers, json={
"display_name": "NeuralWorks Inc",
"email": "billing@neuralworks.io",
"currency": "USD",
"account_type": "Organization",
}).json()
customer_id = customer["id"]
# Subscribe them to the plan (with 14-day trial)
subscription = requests.post(
f"{BASE_URL}/v1/subscriptions",
headers={**headers, "Idempotency-Key": f"sub-{customer_id}-payg"},
json={
"customer_id": customer_id,
"plan_id": plan_id,
"currency": "USD",
"trial_days": 14,
},
).json()
subscription_id = subscription["id"]
print(f"Customer {customer_id} subscribed: {subscription_id} ({subscription['status']})")
Step 3: Send Usage Events in Bulk
Each time a customer makes an API call, emit an event. In production you’d batch these and flush every few seconds.
def emit_completion_event(customer_id: str, subscription_id: str,
model: str, input_tokens: int, output_tokens: int) -> str:
"""Emit a usage event for an LLM completion. Returns the event ID."""
event_id = f"llm-{uuid.uuid4()}"
requests.post(
f"{BASE_URL}/v1/events",
headers={**headers, "Idempotency-Key": event_id},
json={
"id": event_id,
"event_type": "llm.completion",
"customer_id": customer_id,
"subscription_id": subscription_id,
"timestamp": "2026-02-28T14:30:00Z",
"properties": {
"model": model,
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"request_id": str(uuid.uuid4()),
},
},
)
return event_id
# Simulate a day of API usage
# A typical customer might make 5,000 calls/day
# Each call: ~800 input tokens, ~200 output tokens
CALLS_PER_DAY = 5000
total_input = 0
total_output = 0
# Batch up to 100 events at a time for efficiency
batch = []
for i in range(CALLS_PER_DAY):
input_tokens = 800 + (i % 400) # 800-1200 input tokens
output_tokens = 150 + (i % 200) # 150-350 output tokens
total_input += input_tokens
total_output += output_tokens
batch.append({
"id": f"llm-day1-{i:05d}",
"event_type": "llm.completion",
"customer_id": customer_id,
"subscription_id": subscription_id,
"timestamp": "2026-02-28T14:30:00Z",
"properties": {
"model": "gpt-4o",
"input_tokens": input_tokens,
"output_tokens": output_tokens,
},
})
if len(batch) == 100:
requests.post(f"{BASE_URL}/v1/events/batch",
headers=headers, json={"events": batch})
batch = []
if batch:
requests.post(f"{BASE_URL}/v1/events/batch",
headers=headers, json={"events": batch})
input_cost = total_input * 0.000002
output_cost = total_output * 0.000006
print(f"Day 1: {total_input:,} input tokens (${input_cost:.2f}) + "
f"{total_output:,} output tokens (${output_cost:.2f}) = "
f"${input_cost + output_cost:.2f}")
Step 4: Query Usage Summary
Check the running meter values at any point during the billing period:
usage = requests.get(
f"{BASE_URL}/v1/subscriptions/{subscription_id}/usage",
headers=headers,
).json()
print(f"Billing period: {usage['period_start']} → {usage['period_end']}")
for meter in usage["meters"]:
print(f" {meter['meter_id']}: {int(meter['value']):,} {meter.get('unit', 'units')}")
# Calculate estimated bill
input_meter = next(m for m in usage["meters"] if "input" in m["meter_id"])
output_meter = next(m for m in usage["meters"] if "output" in m["meter_id"])
est_cost = int(input_meter["value"]) * 0.000002 + int(output_meter["value"]) * 0.000006
print(f"\nEstimated invoice: ${est_cost:.2f}")
Step 5: Generate and Finalize the Invoice
At the end of the billing period (or on-demand):
# Trigger billing
requests.post(
f"{BASE_URL}/admin/v1/subscriptions/{subscription_id}/bill",
headers={**headers, "Idempotency-Key": f"bill-{subscription_id}-2026-02"},
)
# Find the Draft invoice
invoices = requests.get(
f"{BASE_URL}/admin/v1/invoices",
headers=headers,
params={"customer_id": customer_id, "status": "Draft"},
).json()
invoice_id = invoices[0]["id"]
# Apply tax if configured
requests.post(
f"{BASE_URL}/admin/v1/invoices/{invoice_id}/calculate-tax",
headers=headers,
)
# Finalize — assigns invoice number, begins payment collection
invoice = requests.post(
f"{BASE_URL}/admin/v1/invoices/{invoice_id}/finalize",
headers={**headers, "Idempotency-Key": f"finalize-{invoice_id}"},
).json()
total_usd = int(invoice["total_nanos"]) / 1e12
print(f"Invoice {invoice['invoice_number']} — ${total_usd:.2f} — {invoice['status']}")
for line in invoice.get("line_items", []):
if not line.get("is_tax"):
line_usd = int(line["amount_nanos"]) / 1e12
print(f" {line['description']}: ${line_usd:.2f}")
What’s Next?
- Set spend alerts so customers know when they’re approaching their budget
- Use credit wallets for prepaid top-ups
- Configure webhooks to notify customers when their invoice is ready