Major updates: LinkedIn auto-posting, timezone fixes, and Docker improvements

Features: - Add LinkedIn OAuth integration and auto-posting functionality - Add scheduler service for automated post publishing - Add metadata field to generated_posts for LinkedIn URLs - Add privacy policy page for LinkedIn API compliance - Add company management features and employee accounts - Add license key system for company registrations Fixes: - Fix timezone issues (use UTC consistently across app) - Fix datetime serialization errors in database operations - Fix scheduling timezone conversion (local time to UTC) - Fix import errors (get_database -> db) Infrastructure: - Update Docker setup to use port 8001 (avoid conflicts) - Add SSL support with nginx-proxy and Let's Encrypt - Add LinkedIn setup documentation - Add migration scripts for schema updates Services: - Add linkedin_service.py for LinkedIn API integration - Add scheduler_service.py for background job processing - Add storage_service.py for Supabase Storage - Add email_service.py improvements - Add encryption utilities for token storage Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-11 11:30:20 +01:00
parent b50594dbfa
commit f14515e9cf
94 changed files with 21601 additions and 5111 deletions
--- a/scripts/backfill_usage_logs.py
+++ b/scripts/backfill_usage_logs.py
@@ -0,0 +1,145 @@
+"""Backfill api_usage_logs for all generated posts not yet tracked.
+
+Assumes per post:
+  - 1x gpt-4o call:      ~20,000 tokens (14,000 prompt + 6,000 completion)
+  - 1x gpt-4o-mini call:  ~17,000 tokens (13,000 prompt + 4,000 completion)
+
+Only processes posts older than 20 minutes whose created_at is not already
+covered by an existing api_usage_log entry for the same customer.
+"""
+import asyncio
+import sys
+import os
+from datetime import datetime, timedelta, timezone
+from uuid import UUID
+
+# Add project root to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from src.config import estimate_cost
+from src.database.client import db
+
+
+# ── Estimated token splits per post ──────────────────────────────
+GPT4O_PROMPT    = 14_000
+GPT4O_COMP      =  6_000
+GPT4O_TOTAL     = GPT4O_PROMPT + GPT4O_COMP        # 20 000
+
+MINI_PROMPT     = 13_000
+MINI_COMP       =  4_000
+MINI_TOTAL      = MINI_PROMPT + MINI_COMP           # 17 000
+
+
+async def main():
+    cutoff = datetime.now(timezone.utc) - timedelta(minutes=20)
+    print(f"Cutoff: posts created before {cutoff.isoformat()}")
+
+    # ── 1. Load all generated posts ──────────────────────────────
+    customers = await db.list_customers()
+    print(f"Found {len(customers)} customers")
+
+    all_posts = []
+    for cust in customers:
+        posts = await db.get_generated_posts(cust.id)
+        all_posts.extend(posts)
+    print(f"Found {len(all_posts)} total generated posts")
+
+    # Filter to posts older than 20 min
+    eligible = [
+        p for p in all_posts
+        if p.created_at and p.created_at.replace(tzinfo=timezone.utc) < cutoff
+    ]
+    print(f"{len(eligible)} posts older than 20 min")
+
+    # ── 2. Load existing logs to avoid duplicates ────────────────
+    try:
+        existing_logs = await asyncio.to_thread(
+            lambda: db.client.table("api_usage_logs")
+                .select("customer_id, created_at")
+                .eq("operation", "post_creation_backfill")
+                .execute()
+        )
+        already_logged = set()
+        for log in existing_logs.data:
+            key = (log.get("customer_id"), log.get("created_at", "")[:19])
+            already_logged.add(key)
+        print(f"{len(already_logged)} existing backfill entries found")
+    except Exception as e:
+        print(f"Could not read existing logs (table may be new): {e}")
+        already_logged = set()
+
+    # ── 3. Build customer → user_id / company_id map ─────────────
+    cust_map = {}
+    for cust in customers:
+        cust_map[str(cust.id)] = {
+            "user_id": str(cust.user_id) if cust.user_id else None,
+            "company_id": str(cust.company_id) if cust.company_id else None,
+        }
+
+    # ── 4. Insert two log rows per post ──────────────────────────
+    inserted = 0
+    skipped = 0
+
+    for post in eligible:
+        cid = str(post.customer_id)
+        ts = post.created_at.isoformat()[:19] if post.created_at else ""
+        key = (cid, ts)
+        if key in already_logged:
+            skipped += 1
+            continue
+
+        ids = cust_map.get(cid, {})
+        user_id = ids.get("user_id")
+        company_id = ids.get("company_id")
+
+        base = {
+            "customer_id": cid,
+            "operation": "post_creation_backfill",
+            "created_at": post.created_at.isoformat() if post.created_at else None,
+        }
+        if user_id:
+            base["user_id"] = user_id
+        if company_id:
+            base["company_id"] = company_id
+
+        # Row 1: gpt-4o
+        gpt4o_cost = estimate_cost("gpt-4o", GPT4O_PROMPT, GPT4O_COMP)
+        row_4o = {
+            **base,
+            "provider": "openai",
+            "model": "gpt-4o",
+            "prompt_tokens": GPT4O_PROMPT,
+            "completion_tokens": GPT4O_COMP,
+            "total_tokens": GPT4O_TOTAL,
+            "estimated_cost_usd": round(gpt4o_cost, 6),
+        }
+
+        # Row 2: gpt-4o-mini
+        mini_cost = estimate_cost("gpt-4o-mini", MINI_PROMPT, MINI_COMP)
+        row_mini = {
+            **base,
+            "provider": "openai",
+            "model": "gpt-4o-mini",
+            "prompt_tokens": MINI_PROMPT,
+            "completion_tokens": MINI_COMP,
+            "total_tokens": MINI_TOTAL,
+            "estimated_cost_usd": round(mini_cost, 6),
+        }
+
+        try:
+            await asyncio.to_thread(
+                lambda r1=row_4o, r2=row_mini: db.client.table("api_usage_logs")
+                    .insert([r1, r2]).execute()
+            )
+            inserted += 2
+            name = post.topic_title[:40] if post.topic_title else "?"
+            print(f"  + {name}  (gpt-4o ${gpt4o_cost:.4f} + mini ${mini_cost:.4f})")
+        except Exception as e:
+            print(f"  ! Error for post {post.id}: {e}")
+
+    print(f"\nDone: {inserted} log rows inserted, {skipped} posts skipped (already backfilled)")
+    print(f"Estimated totals per post: gpt-4o ${estimate_cost('gpt-4o', GPT4O_PROMPT, GPT4O_COMP):.4f} + mini ${estimate_cost('gpt-4o-mini', MINI_PROMPT, MINI_COMP):.4f}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())