Major updates: LinkedIn auto-posting, timezone fixes, and Docker improvements
Features: - Add LinkedIn OAuth integration and auto-posting functionality - Add scheduler service for automated post publishing - Add metadata field to generated_posts for LinkedIn URLs - Add privacy policy page for LinkedIn API compliance - Add company management features and employee accounts - Add license key system for company registrations Fixes: - Fix timezone issues (use UTC consistently across app) - Fix datetime serialization errors in database operations - Fix scheduling timezone conversion (local time to UTC) - Fix import errors (get_database -> db) Infrastructure: - Update Docker setup to use port 8001 (avoid conflicts) - Add SSL support with nginx-proxy and Let's Encrypt - Add LinkedIn setup documentation - Add migration scripts for schema updates Services: - Add linkedin_service.py for LinkedIn API integration - Add scheduler_service.py for background job processing - Add storage_service.py for Supabase Storage - Add email_service.py improvements - Add encryption utilities for token storage Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
145
scripts/backfill_usage_logs.py
Normal file
145
scripts/backfill_usage_logs.py
Normal file
@@ -0,0 +1,145 @@
|
||||
"""Backfill api_usage_logs for all generated posts not yet tracked.
|
||||
|
||||
Assumes per post:
|
||||
- 1x gpt-4o call: ~20,000 tokens (14,000 prompt + 6,000 completion)
|
||||
- 1x gpt-4o-mini call: ~17,000 tokens (13,000 prompt + 4,000 completion)
|
||||
|
||||
Only processes posts older than 20 minutes whose created_at is not already
|
||||
covered by an existing api_usage_log entry for the same customer.
|
||||
"""
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from uuid import UUID
|
||||
|
||||
# Add project root to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from src.config import estimate_cost
|
||||
from src.database.client import db
|
||||
|
||||
|
||||
# ── Estimated token splits per post ──────────────────────────────
|
||||
GPT4O_PROMPT = 14_000
|
||||
GPT4O_COMP = 6_000
|
||||
GPT4O_TOTAL = GPT4O_PROMPT + GPT4O_COMP # 20 000
|
||||
|
||||
MINI_PROMPT = 13_000
|
||||
MINI_COMP = 4_000
|
||||
MINI_TOTAL = MINI_PROMPT + MINI_COMP # 17 000
|
||||
|
||||
|
||||
async def main():
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(minutes=20)
|
||||
print(f"Cutoff: posts created before {cutoff.isoformat()}")
|
||||
|
||||
# ── 1. Load all generated posts ──────────────────────────────
|
||||
customers = await db.list_customers()
|
||||
print(f"Found {len(customers)} customers")
|
||||
|
||||
all_posts = []
|
||||
for cust in customers:
|
||||
posts = await db.get_generated_posts(cust.id)
|
||||
all_posts.extend(posts)
|
||||
print(f"Found {len(all_posts)} total generated posts")
|
||||
|
||||
# Filter to posts older than 20 min
|
||||
eligible = [
|
||||
p for p in all_posts
|
||||
if p.created_at and p.created_at.replace(tzinfo=timezone.utc) < cutoff
|
||||
]
|
||||
print(f"{len(eligible)} posts older than 20 min")
|
||||
|
||||
# ── 2. Load existing logs to avoid duplicates ────────────────
|
||||
try:
|
||||
existing_logs = await asyncio.to_thread(
|
||||
lambda: db.client.table("api_usage_logs")
|
||||
.select("customer_id, created_at")
|
||||
.eq("operation", "post_creation_backfill")
|
||||
.execute()
|
||||
)
|
||||
already_logged = set()
|
||||
for log in existing_logs.data:
|
||||
key = (log.get("customer_id"), log.get("created_at", "")[:19])
|
||||
already_logged.add(key)
|
||||
print(f"{len(already_logged)} existing backfill entries found")
|
||||
except Exception as e:
|
||||
print(f"Could not read existing logs (table may be new): {e}")
|
||||
already_logged = set()
|
||||
|
||||
# ── 3. Build customer → user_id / company_id map ─────────────
|
||||
cust_map = {}
|
||||
for cust in customers:
|
||||
cust_map[str(cust.id)] = {
|
||||
"user_id": str(cust.user_id) if cust.user_id else None,
|
||||
"company_id": str(cust.company_id) if cust.company_id else None,
|
||||
}
|
||||
|
||||
# ── 4. Insert two log rows per post ──────────────────────────
|
||||
inserted = 0
|
||||
skipped = 0
|
||||
|
||||
for post in eligible:
|
||||
cid = str(post.customer_id)
|
||||
ts = post.created_at.isoformat()[:19] if post.created_at else ""
|
||||
key = (cid, ts)
|
||||
if key in already_logged:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
ids = cust_map.get(cid, {})
|
||||
user_id = ids.get("user_id")
|
||||
company_id = ids.get("company_id")
|
||||
|
||||
base = {
|
||||
"customer_id": cid,
|
||||
"operation": "post_creation_backfill",
|
||||
"created_at": post.created_at.isoformat() if post.created_at else None,
|
||||
}
|
||||
if user_id:
|
||||
base["user_id"] = user_id
|
||||
if company_id:
|
||||
base["company_id"] = company_id
|
||||
|
||||
# Row 1: gpt-4o
|
||||
gpt4o_cost = estimate_cost("gpt-4o", GPT4O_PROMPT, GPT4O_COMP)
|
||||
row_4o = {
|
||||
**base,
|
||||
"provider": "openai",
|
||||
"model": "gpt-4o",
|
||||
"prompt_tokens": GPT4O_PROMPT,
|
||||
"completion_tokens": GPT4O_COMP,
|
||||
"total_tokens": GPT4O_TOTAL,
|
||||
"estimated_cost_usd": round(gpt4o_cost, 6),
|
||||
}
|
||||
|
||||
# Row 2: gpt-4o-mini
|
||||
mini_cost = estimate_cost("gpt-4o-mini", MINI_PROMPT, MINI_COMP)
|
||||
row_mini = {
|
||||
**base,
|
||||
"provider": "openai",
|
||||
"model": "gpt-4o-mini",
|
||||
"prompt_tokens": MINI_PROMPT,
|
||||
"completion_tokens": MINI_COMP,
|
||||
"total_tokens": MINI_TOTAL,
|
||||
"estimated_cost_usd": round(mini_cost, 6),
|
||||
}
|
||||
|
||||
try:
|
||||
await asyncio.to_thread(
|
||||
lambda r1=row_4o, r2=row_mini: db.client.table("api_usage_logs")
|
||||
.insert([r1, r2]).execute()
|
||||
)
|
||||
inserted += 2
|
||||
name = post.topic_title[:40] if post.topic_title else "?"
|
||||
print(f" + {name} (gpt-4o ${gpt4o_cost:.4f} + mini ${mini_cost:.4f})")
|
||||
except Exception as e:
|
||||
print(f" ! Error for post {post.id}: {e}")
|
||||
|
||||
print(f"\nDone: {inserted} log rows inserted, {skipped} posts skipped (already backfilled)")
|
||||
print(f"Estimated totals per post: gpt-4o ${estimate_cost('gpt-4o', GPT4O_PROMPT, GPT4O_COMP):.4f} + mini ${estimate_cost('gpt-4o-mini', MINI_PROMPT, MINI_COMP):.4f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user