added scalability and performance improvements (redis, http caching etc)
This commit is contained in:
@@ -32,10 +32,11 @@ from src.services.email_service import (
|
||||
mark_token_used,
|
||||
)
|
||||
from src.services.background_jobs import (
|
||||
job_manager, JobType, JobStatus,
|
||||
JobType, JobStatus,
|
||||
run_post_scraping, run_profile_analysis, run_post_categorization, run_post_type_analysis,
|
||||
run_full_analysis_pipeline, run_post_recategorization
|
||||
)
|
||||
from src.services.db_job_manager import job_manager
|
||||
from src.services.storage_service import storage
|
||||
|
||||
# Router for user frontend
|
||||
@@ -93,6 +94,7 @@ async def get_user_avatar(session: UserSession, user_id: UUID) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def require_user_session(request: Request) -> Optional[UserSession]:
|
||||
"""Check if user is authenticated, redirect to login if not."""
|
||||
session = get_user_session(request)
|
||||
@@ -676,7 +678,7 @@ async def onboarding_profile_submit(
|
||||
logger.info(f"Skipping scraping - {len(existing_posts)} posts already exist for user {user_id}")
|
||||
|
||||
if should_scrape:
|
||||
job = job_manager.create_job(JobType.POST_SCRAPING, str(user_id))
|
||||
job = await job_manager.create_job(JobType.POST_SCRAPING, str(user_id))
|
||||
background_tasks.add_task(run_post_scraping, user_id, linkedin_url, job.id)
|
||||
logger.info(f"Started background scraping for user {user_id}")
|
||||
|
||||
@@ -829,7 +831,7 @@ async def api_rescrape(request: Request, background_tasks: BackgroundTasks):
|
||||
return JSONResponse({"error": "No LinkedIn URL found"}, status_code=400)
|
||||
|
||||
# Create job and start scraping
|
||||
job = job_manager.create_job(JobType.POST_SCRAPING, session.user_id)
|
||||
job = await job_manager.create_job(JobType.POST_SCRAPING, session.user_id)
|
||||
background_tasks.add_task(run_post_scraping, user_id, profile.linkedin_url, job.id)
|
||||
|
||||
return JSONResponse({"success": True, "job_id": job.id})
|
||||
@@ -1451,53 +1453,45 @@ async def api_categorize_post(request: Request):
|
||||
|
||||
@user_router.get("/api/job-updates")
|
||||
async def job_updates_sse(request: Request):
|
||||
"""Server-Sent Events endpoint for job updates."""
|
||||
"""Server-Sent Events endpoint for job updates (Redis pub/sub — works across workers)."""
|
||||
session = require_user_session(request)
|
||||
tracking_id = getattr(session, 'user_id', None) or getattr(session, 'company_id', None)
|
||||
if not session or not tracking_id:
|
||||
return JSONResponse({"error": "Not authenticated"}, status_code=401)
|
||||
|
||||
async def event_generator():
|
||||
queue = asyncio.Queue()
|
||||
|
||||
async def on_job_update(job):
|
||||
await queue.put(job)
|
||||
|
||||
# Register listener
|
||||
job_manager.add_listener(tracking_id, on_job_update)
|
||||
|
||||
from src.services.redis_client import get_redis
|
||||
r = await get_redis()
|
||||
pubsub = r.pubsub()
|
||||
await pubsub.subscribe(f"job_updates:{tracking_id}")
|
||||
try:
|
||||
# Send initial active jobs
|
||||
active_jobs = job_manager.get_active_jobs(tracking_id)
|
||||
for job in active_jobs:
|
||||
# Send any currently active jobs as the initial state
|
||||
for job in await job_manager.get_active_jobs(tracking_id):
|
||||
data = {
|
||||
"id": job.id,
|
||||
"job_type": job.job_type.value,
|
||||
"status": job.status.value,
|
||||
"progress": job.progress,
|
||||
"message": job.message,
|
||||
"error": job.error
|
||||
"error": job.error,
|
||||
}
|
||||
yield f"data: {json.dumps(data)}\n\n"
|
||||
|
||||
# Stream updates
|
||||
# Stream pub/sub messages, keepalive on timeout
|
||||
while True:
|
||||
try:
|
||||
job = await asyncio.wait_for(queue.get(), timeout=30)
|
||||
data = {
|
||||
"id": job.id,
|
||||
"job_type": job.job_type.value,
|
||||
"status": job.status.value,
|
||||
"progress": job.progress,
|
||||
"message": job.message,
|
||||
"error": job.error
|
||||
}
|
||||
yield f"data: {json.dumps(data)}\n\n"
|
||||
msg = await asyncio.wait_for(
|
||||
pubsub.get_message(ignore_subscribe_messages=True), timeout=30
|
||||
)
|
||||
if msg and msg.get("type") == "message":
|
||||
yield f"data: {msg['data']}\n\n"
|
||||
else:
|
||||
yield ": keepalive\n\n"
|
||||
except asyncio.TimeoutError:
|
||||
# Send keepalive
|
||||
yield ": keepalive\n\n"
|
||||
finally:
|
||||
job_manager.remove_listener(tracking_id, on_job_update)
|
||||
await pubsub.unsubscribe(f"job_updates:{tracking_id}")
|
||||
await pubsub.aclose()
|
||||
|
||||
return StreamingResponse(
|
||||
event_generator(),
|
||||
@@ -1505,8 +1499,8 @@ async def job_updates_sse(request: Request):
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no"
|
||||
}
|
||||
"X-Accel-Buffering": "no",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -1521,7 +1515,7 @@ async def api_run_post_type_analysis(request: Request, background_tasks: Backgro
|
||||
user_id = UUID(session.user_id)
|
||||
|
||||
# Create job
|
||||
job = job_manager.create_job(JobType.POST_TYPE_ANALYSIS, session.user_id)
|
||||
job = await job_manager.create_job(JobType.POST_TYPE_ANALYSIS, session.user_id)
|
||||
|
||||
# Run in background
|
||||
background_tasks.add_task(run_post_type_analysis, user_id, job.id)
|
||||
@@ -3278,13 +3272,13 @@ async def save_all_and_reanalyze(request: Request, background_tasks: BackgroundT
|
||||
# Only trigger re-categorization and analysis if there were structural changes
|
||||
if has_structural_changes:
|
||||
# Create background job for post re-categorization (ALL posts)
|
||||
categorization_job = job_manager.create_job(
|
||||
categorization_job = await job_manager.create_job(
|
||||
job_type=JobType.POST_CATEGORIZATION,
|
||||
user_id=user_id_str
|
||||
)
|
||||
|
||||
# Create background job for post type analysis
|
||||
analysis_job = job_manager.create_job(
|
||||
analysis_job = await job_manager.create_job(
|
||||
job_type=JobType.POST_TYPE_ANALYSIS,
|
||||
user_id=user_id_str
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user