"""Backfill api_usage_logs for all generated posts not yet tracked. Assumes per post: - 1x gpt-4o call: ~20,000 tokens (14,000 prompt + 6,000 completion) - 1x gpt-4o-mini call: ~17,000 tokens (13,000 prompt + 4,000 completion) Only processes posts older than 20 minutes whose created_at is not already covered by an existing api_usage_log entry for the same customer. """ import asyncio import sys import os from datetime import datetime, timedelta, timezone from uuid import UUID # Add project root to path sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from src.config import estimate_cost from src.database.client import db # ── Estimated token splits per post ────────────────────────────── GPT4O_PROMPT = 14_000 GPT4O_COMP = 6_000 GPT4O_TOTAL = GPT4O_PROMPT + GPT4O_COMP # 20 000 MINI_PROMPT = 13_000 MINI_COMP = 4_000 MINI_TOTAL = MINI_PROMPT + MINI_COMP # 17 000 async def main(): cutoff = datetime.now(timezone.utc) - timedelta(minutes=20) print(f"Cutoff: posts created before {cutoff.isoformat()}") # ── 1. Load all generated posts ────────────────────────────── customers = await db.list_customers() print(f"Found {len(customers)} customers") all_posts = [] for cust in customers: posts = await db.get_generated_posts(cust.id) all_posts.extend(posts) print(f"Found {len(all_posts)} total generated posts") # Filter to posts older than 20 min eligible = [ p for p in all_posts if p.created_at and p.created_at.replace(tzinfo=timezone.utc) < cutoff ] print(f"{len(eligible)} posts older than 20 min") # ── 2. Load existing logs to avoid duplicates ──────────────── try: existing_logs = await asyncio.to_thread( lambda: db.client.table("api_usage_logs") .select("customer_id, created_at") .eq("operation", "post_creation_backfill") .execute() ) already_logged = set() for log in existing_logs.data: key = (log.get("customer_id"), log.get("created_at", "")[:19]) already_logged.add(key) print(f"{len(already_logged)} existing backfill entries found") except Exception as e: print(f"Could not read existing logs (table may be new): {e}") already_logged = set() # ── 3. Build customer → user_id / company_id map ───────────── cust_map = {} for cust in customers: cust_map[str(cust.id)] = { "user_id": str(cust.user_id) if cust.user_id else None, "company_id": str(cust.company_id) if cust.company_id else None, } # ── 4. Insert two log rows per post ────────────────────────── inserted = 0 skipped = 0 for post in eligible: cid = str(post.customer_id) ts = post.created_at.isoformat()[:19] if post.created_at else "" key = (cid, ts) if key in already_logged: skipped += 1 continue ids = cust_map.get(cid, {}) user_id = ids.get("user_id") company_id = ids.get("company_id") base = { "customer_id": cid, "operation": "post_creation_backfill", "created_at": post.created_at.isoformat() if post.created_at else None, } if user_id: base["user_id"] = user_id if company_id: base["company_id"] = company_id # Row 1: gpt-4o gpt4o_cost = estimate_cost("gpt-4o", GPT4O_PROMPT, GPT4O_COMP) row_4o = { **base, "provider": "openai", "model": "gpt-4o", "prompt_tokens": GPT4O_PROMPT, "completion_tokens": GPT4O_COMP, "total_tokens": GPT4O_TOTAL, "estimated_cost_usd": round(gpt4o_cost, 6), } # Row 2: gpt-4o-mini mini_cost = estimate_cost("gpt-4o-mini", MINI_PROMPT, MINI_COMP) row_mini = { **base, "provider": "openai", "model": "gpt-4o-mini", "prompt_tokens": MINI_PROMPT, "completion_tokens": MINI_COMP, "total_tokens": MINI_TOTAL, "estimated_cost_usd": round(mini_cost, 6), } try: await asyncio.to_thread( lambda r1=row_4o, r2=row_mini: db.client.table("api_usage_logs") .insert([r1, r2]).execute() ) inserted += 2 name = post.topic_title[:40] if post.topic_title else "?" print(f" + {name} (gpt-4o ${gpt4o_cost:.4f} + mini ${mini_cost:.4f})") except Exception as e: print(f" ! Error for post {post.id}: {e}") print(f"\nDone: {inserted} log rows inserted, {skipped} posts skipped (already backfilled)") print(f"Estimated totals per post: gpt-4o ${estimate_cost('gpt-4o', GPT4O_PROMPT, GPT4O_COMP):.4f} + mini ${estimate_cost('gpt-4o-mini', MINI_PROMPT, MINI_COMP):.4f}") if __name__ == "__main__": asyncio.run(main())