feat: start SERP supplement in parallel as soon as Maps scraping stabilizes

When Maps scraping finishes (totalLeads stable for one poll, < targetCount),
fire the SERP supplement job immediately — don't wait for Anymailfinder
enrichment to complete. Both jobs now poll independently; results are merged
and deduplicated by domain once both are done.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
TimoUttenweiler
2026-04-01 11:28:41 +02:00
parent 1719062b47
commit a39a98b6dc

View File

@@ -77,30 +77,53 @@ export function LoadingCard({ jobId, targetCount, query, region, onDone, onError
useEffect(() => { useEffect(() => {
let cancelled = false; let cancelled = false;
let crawlInterval: ReturnType<typeof setInterval> | null = null; let crawlInterval: ReturnType<typeof setInterval> | null = null;
let pollTimeout: ReturnType<typeof setTimeout> | null = null; let mapsPollTimeout: ReturnType<typeof setTimeout> | null = null;
let currentJobId = jobId; let serpPollTimeout: ReturnType<typeof setTimeout> | null = null;
let toppingActive = false;
let mapsLeads: LeadResult[] = []; // Parallel tracking
let supplementStarted = false;
let mapsDone = false;
let serpDone = false;
let serpNeeded = false;
let mapsLeads: LeadResult[] = [];
let serpLeads: LeadResult[] = [];
let serpJobId: string | null = null;
let lastTotalLeads = 0; // detect when Maps scraping has stabilized
// Progress only ever moves forward
function advanceBar(to: number) { function advanceBar(to: number) {
setProgressWidth(prev => Math.max(prev, to)); setProgressWidth(prev => Math.max(prev, to));
} }
crawlInterval = setInterval(() => { crawlInterval = setInterval(() => {
if (cancelled) return; if (cancelled) return;
setProgressWidth(prev => { setProgressWidth(prev => prev >= 88 ? prev : prev + 0.4);
const cap = toppingActive ? 96 : 88;
if (prev >= cap) return prev;
return prev + 0.4;
});
}, 200); }, 200);
// Called when both Maps and SERP (if needed) are done
function tryFinalize() {
if (!mapsDone || (serpNeeded && !serpDone)) return;
if (crawlInterval) clearInterval(crawlInterval);
let finalLeads: LeadResult[];
if (serpNeeded && serpLeads.length > 0) {
const seenDomains = new Set(mapsLeads.map(l => l.domain).filter(Boolean));
const newSerpLeads = serpLeads.filter(l => !l.domain || !seenDomains.has(l.domain));
finalLeads = [...mapsLeads, ...newSerpLeads];
} else {
finalLeads = mapsLeads;
}
setTotalLeads(finalLeads.length);
setProgressWidth(100);
setPhase("done");
setTimeout(() => { if (!cancelled) onDone(finalLeads); }, 800);
}
// Start SERP supplement in parallel — doesn't block Maps polling
async function startSerpSupplement(foundCount: number) { async function startSerpSupplement(foundCount: number) {
toppingActive = true; supplementStarted = true;
serpNeeded = true;
setIsTopping(true); setIsTopping(true);
setPhase("topping");
advanceBar(88);
try { try {
const res = await fetch("/api/search/supplement", { const res = await fetch("/api/search/supplement", {
@@ -110,110 +133,108 @@ export function LoadingCard({ jobId, targetCount, query, region, onDone, onError
}); });
if (!res.ok) throw new Error("supplement start failed"); if (!res.ok) throw new Error("supplement start failed");
const data = await res.json() as { jobId: string; optimizedQuery: string; usedAI: boolean }; const data = await res.json() as { jobId: string; optimizedQuery: string; usedAI: boolean };
currentJobId = data.jobId; serpJobId = data.jobId;
if (data.optimizedQuery) setOptimizedQuery(data.optimizedQuery); if (data.optimizedQuery) setOptimizedQuery(data.optimizedQuery);
pollTimeout = setTimeout(poll, 2500); serpPollTimeout = setTimeout(pollSerp, 2500);
} catch { } catch {
// Supplement failed — complete with Maps results only // Supplement failed — mark done with no results so tryFinalize can proceed
if (!cancelled) { serpDone = true;
setProgressWidth(100); tryFinalize();
setPhase("done");
setTimeout(() => { if (!cancelled) onDone(mapsLeads); }, 800);
}
} }
} }
async function poll() { // Independent SERP poll loop
async function pollSerp() {
if (cancelled || !serpJobId) return;
try {
const res = await fetch(`/api/jobs/${serpJobId}/status`);
if (!res.ok) throw new Error("fetch failed");
const data = await res.json() as JobStatus;
if (!cancelled) {
if (data.status === "complete" || data.status === "failed") {
serpLeads = (data.results ?? []) as LeadResult[];
serpDone = true;
tryFinalize();
} else {
serpPollTimeout = setTimeout(pollSerp, 2500);
}
}
} catch {
if (!cancelled) serpPollTimeout = setTimeout(pollSerp, 3000);
}
}
// Maps poll loop
async function pollMaps() {
if (cancelled) return; if (cancelled) return;
try { try {
const res = await fetch(`/api/jobs/${currentJobId}/status`); const res = await fetch(`/api/jobs/${jobId}/status`);
if (!res.ok) throw new Error("fetch failed"); if (!res.ok) throw new Error("fetch failed");
const data = await res.json() as JobStatus; const data = await res.json() as JobStatus;
if (!cancelled) { if (!cancelled) {
if (!toppingActive) {
setTotalLeads(data.totalLeads); setTotalLeads(data.totalLeads);
setEmailsFound(data.emailsFound); setEmailsFound(data.emailsFound);
const p = getPhase(data); const p = getPhase(data);
setPhase(p); setPhase(p);
advanceBar(PHASE_MIN[p]); advanceBar(PHASE_MIN[p]);
}
if (data.status === "complete") { // Fire supplement as soon as Maps scraping stabilizes with fewer results than needed.
// "Stabilized" = totalLeads unchanged since last poll (scraping done, enrichment started).
if (
!supplementStarted &&
data.status === "running" &&
data.totalLeads > 0 &&
data.totalLeads < targetCount &&
data.totalLeads === lastTotalLeads
) {
startSerpSupplement(data.totalLeads).catch(console.error);
}
lastTotalLeads = data.totalLeads;
if (data.status === "complete" || data.status === "failed") {
const leads = (data.results ?? []) as LeadResult[]; const leads = (data.results ?? []) as LeadResult[];
if (!toppingActive && data.totalLeads < targetCount) { if (data.status === "failed" && leads.length === 0) {
// Maps returned fewer than requested → supplement with optimized SERP
mapsLeads = leads;
if (crawlInterval) clearInterval(crawlInterval); if (crawlInterval) clearInterval(crawlInterval);
crawlInterval = setInterval(() => { onError(data.error ?? "Unbekannter Fehler");
if (cancelled) return; return;
setProgressWidth(prev => prev >= 96 ? prev : prev + 0.3);
}, 200);
await startSerpSupplement(data.totalLeads);
} else {
// All done
if (crawlInterval) clearInterval(crawlInterval);
let finalLeads: LeadResult[];
if (toppingActive) {
// Deduplicate Maps + SERP by domain
const seenDomains = new Set(mapsLeads.map(l => l.domain).filter(Boolean));
const newLeads = leads.filter(l => !l.domain || !seenDomains.has(l.domain));
finalLeads = [...mapsLeads, ...newLeads];
} else {
finalLeads = leads;
} }
// The poll often catches status=complete before observing the // Flash "E-Mails suchen" step before completing (poll often misses it)
// "emails" phase mid-run (Anymailfinder sets emailsFound then
// immediately sets complete in back-to-back DB writes).
// Always flash through "E-Mails suchen" so the step is visible.
if (!toppingActive) {
setPhase("emails"); setPhase("emails");
advanceBar(PHASE_MIN["emails"]); advanceBar(PHASE_MIN["emails"]);
await new Promise(r => setTimeout(r, 500)); await new Promise(r => setTimeout(r, 500));
if (cancelled) return; if (cancelled) return;
}
setTotalLeads(finalLeads.length); mapsLeads = leads;
setProgressWidth(100); mapsDone = true;
setPhase("done");
setTimeout(() => { if (!cancelled) onDone(finalLeads); }, 800); // If supplement wasn't triggered, no SERP needed
} if (!supplementStarted) serpDone = true;
} else if (data.status === "failed") {
if (crawlInterval) clearInterval(crawlInterval); // Trigger supplement now if Maps finished with fewer results and we haven't yet
const partialLeads = (data.results ?? []) as LeadResult[]; if (!supplementStarted && data.totalLeads < targetCount) {
if (toppingActive) { startSerpSupplement(data.totalLeads).catch(console.error);
// SERP failed — complete with Maps results
setProgressWidth(100);
setPhase("done");
setTimeout(() => { if (!cancelled) onDone(mapsLeads); }, 800);
} else if (partialLeads.length > 0) {
// Job failed mid-way (e.g. Anymailfinder 402) but Maps results exist → show them
setProgressWidth(100);
setPhase("done");
setTimeout(() => { if (!cancelled) onDone(partialLeads, data.error ?? undefined); }, 800);
} else { } else {
onError(data.error ?? "Unbekannter Fehler"); tryFinalize();
} }
} else { } else {
pollTimeout = setTimeout(poll, 2500); mapsPollTimeout = setTimeout(pollMaps, 2500);
} }
} }
} catch { } catch {
if (!cancelled) { if (!cancelled) mapsPollTimeout = setTimeout(pollMaps, 3000);
pollTimeout = setTimeout(poll, 3000);
}
} }
} }
poll(); pollMaps();
return () => { return () => {
cancelled = true; cancelled = true;
if (crawlInterval) clearInterval(crawlInterval); if (crawlInterval) clearInterval(crawlInterval);
if (pollTimeout) clearTimeout(pollTimeout); if (mapsPollTimeout) clearTimeout(mapsPollTimeout);
if (serpPollTimeout) clearTimeout(serpPollTimeout);
}; };
}, [jobId]); // eslint-disable-line react-hooks/exhaustive-deps }, [jobId]); // eslint-disable-line react-hooks/exhaustive-deps