Features: - Add LinkedIn OAuth integration and auto-posting functionality - Add scheduler service for automated post publishing - Add metadata field to generated_posts for LinkedIn URLs - Add privacy policy page for LinkedIn API compliance - Add company management features and employee accounts - Add license key system for company registrations Fixes: - Fix timezone issues (use UTC consistently across app) - Fix datetime serialization errors in database operations - Fix scheduling timezone conversion (local time to UTC) - Fix import errors (get_database -> db) Infrastructure: - Update Docker setup to use port 8001 (avoid conflicts) - Add SSL support with nginx-proxy and Let's Encrypt - Add LinkedIn setup documentation - Add migration scripts for schema updates Services: - Add linkedin_service.py for LinkedIn API integration - Add scheduler_service.py for background job processing - Add storage_service.py for Supabase Storage - Add email_service.py improvements - Add encryption utilities for token storage Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
146 lines
5.2 KiB
Python
146 lines
5.2 KiB
Python
"""Backfill api_usage_logs for all generated posts not yet tracked.
|
|
|
|
Assumes per post:
|
|
- 1x gpt-4o call: ~20,000 tokens (14,000 prompt + 6,000 completion)
|
|
- 1x gpt-4o-mini call: ~17,000 tokens (13,000 prompt + 4,000 completion)
|
|
|
|
Only processes posts older than 20 minutes whose created_at is not already
|
|
covered by an existing api_usage_log entry for the same customer.
|
|
"""
|
|
import asyncio
|
|
import sys
|
|
import os
|
|
from datetime import datetime, timedelta, timezone
|
|
from uuid import UUID
|
|
|
|
# Add project root to path
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from src.config import estimate_cost
|
|
from src.database.client import db
|
|
|
|
|
|
# ── Estimated token splits per post ──────────────────────────────
|
|
GPT4O_PROMPT = 14_000
|
|
GPT4O_COMP = 6_000
|
|
GPT4O_TOTAL = GPT4O_PROMPT + GPT4O_COMP # 20 000
|
|
|
|
MINI_PROMPT = 13_000
|
|
MINI_COMP = 4_000
|
|
MINI_TOTAL = MINI_PROMPT + MINI_COMP # 17 000
|
|
|
|
|
|
async def main():
|
|
cutoff = datetime.now(timezone.utc) - timedelta(minutes=20)
|
|
print(f"Cutoff: posts created before {cutoff.isoformat()}")
|
|
|
|
# ── 1. Load all generated posts ──────────────────────────────
|
|
customers = await db.list_customers()
|
|
print(f"Found {len(customers)} customers")
|
|
|
|
all_posts = []
|
|
for cust in customers:
|
|
posts = await db.get_generated_posts(cust.id)
|
|
all_posts.extend(posts)
|
|
print(f"Found {len(all_posts)} total generated posts")
|
|
|
|
# Filter to posts older than 20 min
|
|
eligible = [
|
|
p for p in all_posts
|
|
if p.created_at and p.created_at.replace(tzinfo=timezone.utc) < cutoff
|
|
]
|
|
print(f"{len(eligible)} posts older than 20 min")
|
|
|
|
# ── 2. Load existing logs to avoid duplicates ────────────────
|
|
try:
|
|
existing_logs = await asyncio.to_thread(
|
|
lambda: db.client.table("api_usage_logs")
|
|
.select("customer_id, created_at")
|
|
.eq("operation", "post_creation_backfill")
|
|
.execute()
|
|
)
|
|
already_logged = set()
|
|
for log in existing_logs.data:
|
|
key = (log.get("customer_id"), log.get("created_at", "")[:19])
|
|
already_logged.add(key)
|
|
print(f"{len(already_logged)} existing backfill entries found")
|
|
except Exception as e:
|
|
print(f"Could not read existing logs (table may be new): {e}")
|
|
already_logged = set()
|
|
|
|
# ── 3. Build customer → user_id / company_id map ─────────────
|
|
cust_map = {}
|
|
for cust in customers:
|
|
cust_map[str(cust.id)] = {
|
|
"user_id": str(cust.user_id) if cust.user_id else None,
|
|
"company_id": str(cust.company_id) if cust.company_id else None,
|
|
}
|
|
|
|
# ── 4. Insert two log rows per post ──────────────────────────
|
|
inserted = 0
|
|
skipped = 0
|
|
|
|
for post in eligible:
|
|
cid = str(post.customer_id)
|
|
ts = post.created_at.isoformat()[:19] if post.created_at else ""
|
|
key = (cid, ts)
|
|
if key in already_logged:
|
|
skipped += 1
|
|
continue
|
|
|
|
ids = cust_map.get(cid, {})
|
|
user_id = ids.get("user_id")
|
|
company_id = ids.get("company_id")
|
|
|
|
base = {
|
|
"customer_id": cid,
|
|
"operation": "post_creation_backfill",
|
|
"created_at": post.created_at.isoformat() if post.created_at else None,
|
|
}
|
|
if user_id:
|
|
base["user_id"] = user_id
|
|
if company_id:
|
|
base["company_id"] = company_id
|
|
|
|
# Row 1: gpt-4o
|
|
gpt4o_cost = estimate_cost("gpt-4o", GPT4O_PROMPT, GPT4O_COMP)
|
|
row_4o = {
|
|
**base,
|
|
"provider": "openai",
|
|
"model": "gpt-4o",
|
|
"prompt_tokens": GPT4O_PROMPT,
|
|
"completion_tokens": GPT4O_COMP,
|
|
"total_tokens": GPT4O_TOTAL,
|
|
"estimated_cost_usd": round(gpt4o_cost, 6),
|
|
}
|
|
|
|
# Row 2: gpt-4o-mini
|
|
mini_cost = estimate_cost("gpt-4o-mini", MINI_PROMPT, MINI_COMP)
|
|
row_mini = {
|
|
**base,
|
|
"provider": "openai",
|
|
"model": "gpt-4o-mini",
|
|
"prompt_tokens": MINI_PROMPT,
|
|
"completion_tokens": MINI_COMP,
|
|
"total_tokens": MINI_TOTAL,
|
|
"estimated_cost_usd": round(mini_cost, 6),
|
|
}
|
|
|
|
try:
|
|
await asyncio.to_thread(
|
|
lambda r1=row_4o, r2=row_mini: db.client.table("api_usage_logs")
|
|
.insert([r1, r2]).execute()
|
|
)
|
|
inserted += 2
|
|
name = post.topic_title[:40] if post.topic_title else "?"
|
|
print(f" + {name} (gpt-4o ${gpt4o_cost:.4f} + mini ${mini_cost:.4f})")
|
|
except Exception as e:
|
|
print(f" ! Error for post {post.id}: {e}")
|
|
|
|
print(f"\nDone: {inserted} log rows inserted, {skipped} posts skipped (already backfilled)")
|
|
print(f"Estimated totals per post: gpt-4o ${estimate_cost('gpt-4o', GPT4O_PROMPT, GPT4O_COMP):.4f} + mini ${estimate_cost('gpt-4o-mini', MINI_PROMPT, MINI_COMP):.4f}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|