Files
Onyva-Postling/scripts/backfill_usage_logs.py
Ruben Fischer f14515e9cf Major updates: LinkedIn auto-posting, timezone fixes, and Docker improvements
Features:
- Add LinkedIn OAuth integration and auto-posting functionality
- Add scheduler service for automated post publishing
- Add metadata field to generated_posts for LinkedIn URLs
- Add privacy policy page for LinkedIn API compliance
- Add company management features and employee accounts
- Add license key system for company registrations

Fixes:
- Fix timezone issues (use UTC consistently across app)
- Fix datetime serialization errors in database operations
- Fix scheduling timezone conversion (local time to UTC)
- Fix import errors (get_database -> db)

Infrastructure:
- Update Docker setup to use port 8001 (avoid conflicts)
- Add SSL support with nginx-proxy and Let's Encrypt
- Add LinkedIn setup documentation
- Add migration scripts for schema updates

Services:
- Add linkedin_service.py for LinkedIn API integration
- Add scheduler_service.py for background job processing
- Add storage_service.py for Supabase Storage
- Add email_service.py improvements
- Add encryption utilities for token storage

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-11 11:30:20 +01:00

146 lines
5.2 KiB
Python

"""Backfill api_usage_logs for all generated posts not yet tracked.
Assumes per post:
- 1x gpt-4o call: ~20,000 tokens (14,000 prompt + 6,000 completion)
- 1x gpt-4o-mini call: ~17,000 tokens (13,000 prompt + 4,000 completion)
Only processes posts older than 20 minutes whose created_at is not already
covered by an existing api_usage_log entry for the same customer.
"""
import asyncio
import sys
import os
from datetime import datetime, timedelta, timezone
from uuid import UUID
# Add project root to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.config import estimate_cost
from src.database.client import db
# ── Estimated token splits per post ──────────────────────────────
GPT4O_PROMPT = 14_000
GPT4O_COMP = 6_000
GPT4O_TOTAL = GPT4O_PROMPT + GPT4O_COMP # 20 000
MINI_PROMPT = 13_000
MINI_COMP = 4_000
MINI_TOTAL = MINI_PROMPT + MINI_COMP # 17 000
async def main():
cutoff = datetime.now(timezone.utc) - timedelta(minutes=20)
print(f"Cutoff: posts created before {cutoff.isoformat()}")
# ── 1. Load all generated posts ──────────────────────────────
customers = await db.list_customers()
print(f"Found {len(customers)} customers")
all_posts = []
for cust in customers:
posts = await db.get_generated_posts(cust.id)
all_posts.extend(posts)
print(f"Found {len(all_posts)} total generated posts")
# Filter to posts older than 20 min
eligible = [
p for p in all_posts
if p.created_at and p.created_at.replace(tzinfo=timezone.utc) < cutoff
]
print(f"{len(eligible)} posts older than 20 min")
# ── 2. Load existing logs to avoid duplicates ────────────────
try:
existing_logs = await asyncio.to_thread(
lambda: db.client.table("api_usage_logs")
.select("customer_id, created_at")
.eq("operation", "post_creation_backfill")
.execute()
)
already_logged = set()
for log in existing_logs.data:
key = (log.get("customer_id"), log.get("created_at", "")[:19])
already_logged.add(key)
print(f"{len(already_logged)} existing backfill entries found")
except Exception as e:
print(f"Could not read existing logs (table may be new): {e}")
already_logged = set()
# ── 3. Build customer → user_id / company_id map ─────────────
cust_map = {}
for cust in customers:
cust_map[str(cust.id)] = {
"user_id": str(cust.user_id) if cust.user_id else None,
"company_id": str(cust.company_id) if cust.company_id else None,
}
# ── 4. Insert two log rows per post ──────────────────────────
inserted = 0
skipped = 0
for post in eligible:
cid = str(post.customer_id)
ts = post.created_at.isoformat()[:19] if post.created_at else ""
key = (cid, ts)
if key in already_logged:
skipped += 1
continue
ids = cust_map.get(cid, {})
user_id = ids.get("user_id")
company_id = ids.get("company_id")
base = {
"customer_id": cid,
"operation": "post_creation_backfill",
"created_at": post.created_at.isoformat() if post.created_at else None,
}
if user_id:
base["user_id"] = user_id
if company_id:
base["company_id"] = company_id
# Row 1: gpt-4o
gpt4o_cost = estimate_cost("gpt-4o", GPT4O_PROMPT, GPT4O_COMP)
row_4o = {
**base,
"provider": "openai",
"model": "gpt-4o",
"prompt_tokens": GPT4O_PROMPT,
"completion_tokens": GPT4O_COMP,
"total_tokens": GPT4O_TOTAL,
"estimated_cost_usd": round(gpt4o_cost, 6),
}
# Row 2: gpt-4o-mini
mini_cost = estimate_cost("gpt-4o-mini", MINI_PROMPT, MINI_COMP)
row_mini = {
**base,
"provider": "openai",
"model": "gpt-4o-mini",
"prompt_tokens": MINI_PROMPT,
"completion_tokens": MINI_COMP,
"total_tokens": MINI_TOTAL,
"estimated_cost_usd": round(mini_cost, 6),
}
try:
await asyncio.to_thread(
lambda r1=row_4o, r2=row_mini: db.client.table("api_usage_logs")
.insert([r1, r2]).execute()
)
inserted += 2
name = post.topic_title[:40] if post.topic_title else "?"
print(f" + {name} (gpt-4o ${gpt4o_cost:.4f} + mini ${mini_cost:.4f})")
except Exception as e:
print(f" ! Error for post {post.id}: {e}")
print(f"\nDone: {inserted} log rows inserted, {skipped} posts skipped (already backfilled)")
print(f"Estimated totals per post: gpt-4o ${estimate_cost('gpt-4o', GPT4O_PROMPT, GPT4O_COMP):.4f} + mini ${estimate_cost('gpt-4o-mini', MINI_PROMPT, MINI_COMP):.4f}")
if __name__ == "__main__":
asyncio.run(main())