feat: start SERP supplement in parallel as soon as Maps scraping stabilizes

When Maps scraping finishes (totalLeads stable for one poll, < targetCount),
fire the SERP supplement job immediately — don't wait for Anymailfinder
enrichment to complete. Both jobs now poll independently; results are merged
and deduplicated by domain once both are done.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
TimoUttenweiler
2026-04-01 11:28:41 +02:00
parent 1719062b47
commit a39a98b6dc

View File

@@ -77,30 +77,53 @@ export function LoadingCard({ jobId, targetCount, query, region, onDone, onError
useEffect(() => {
let cancelled = false;
let crawlInterval: ReturnType<typeof setInterval> | null = null;
let pollTimeout: ReturnType<typeof setTimeout> | null = null;
let currentJobId = jobId;
let toppingActive = false;
let mapsLeads: LeadResult[] = [];
let mapsPollTimeout: ReturnType<typeof setTimeout> | null = null;
let serpPollTimeout: ReturnType<typeof setTimeout> | null = null;
// Parallel tracking
let supplementStarted = false;
let mapsDone = false;
let serpDone = false;
let serpNeeded = false;
let mapsLeads: LeadResult[] = [];
let serpLeads: LeadResult[] = [];
let serpJobId: string | null = null;
let lastTotalLeads = 0; // detect when Maps scraping has stabilized
// Progress only ever moves forward
function advanceBar(to: number) {
setProgressWidth(prev => Math.max(prev, to));
}
crawlInterval = setInterval(() => {
if (cancelled) return;
setProgressWidth(prev => {
const cap = toppingActive ? 96 : 88;
if (prev >= cap) return prev;
return prev + 0.4;
});
setProgressWidth(prev => prev >= 88 ? prev : prev + 0.4);
}, 200);
// Called when both Maps and SERP (if needed) are done
function tryFinalize() {
if (!mapsDone || (serpNeeded && !serpDone)) return;
if (crawlInterval) clearInterval(crawlInterval);
let finalLeads: LeadResult[];
if (serpNeeded && serpLeads.length > 0) {
const seenDomains = new Set(mapsLeads.map(l => l.domain).filter(Boolean));
const newSerpLeads = serpLeads.filter(l => !l.domain || !seenDomains.has(l.domain));
finalLeads = [...mapsLeads, ...newSerpLeads];
} else {
finalLeads = mapsLeads;
}
setTotalLeads(finalLeads.length);
setProgressWidth(100);
setPhase("done");
setTimeout(() => { if (!cancelled) onDone(finalLeads); }, 800);
}
// Start SERP supplement in parallel — doesn't block Maps polling
async function startSerpSupplement(foundCount: number) {
toppingActive = true;
supplementStarted = true;
serpNeeded = true;
setIsTopping(true);
setPhase("topping");
advanceBar(88);
try {
const res = await fetch("/api/search/supplement", {
@@ -110,110 +133,108 @@ export function LoadingCard({ jobId, targetCount, query, region, onDone, onError
});
if (!res.ok) throw new Error("supplement start failed");
const data = await res.json() as { jobId: string; optimizedQuery: string; usedAI: boolean };
currentJobId = data.jobId;
serpJobId = data.jobId;
if (data.optimizedQuery) setOptimizedQuery(data.optimizedQuery);
pollTimeout = setTimeout(poll, 2500);
serpPollTimeout = setTimeout(pollSerp, 2500);
} catch {
// Supplement failed — complete with Maps results only
if (!cancelled) {
setProgressWidth(100);
setPhase("done");
setTimeout(() => { if (!cancelled) onDone(mapsLeads); }, 800);
}
// Supplement failed — mark done with no results so tryFinalize can proceed
serpDone = true;
tryFinalize();
}
}
async function poll() {
// Independent SERP poll loop
async function pollSerp() {
if (cancelled || !serpJobId) return;
try {
const res = await fetch(`/api/jobs/${serpJobId}/status`);
if (!res.ok) throw new Error("fetch failed");
const data = await res.json() as JobStatus;
if (!cancelled) {
if (data.status === "complete" || data.status === "failed") {
serpLeads = (data.results ?? []) as LeadResult[];
serpDone = true;
tryFinalize();
} else {
serpPollTimeout = setTimeout(pollSerp, 2500);
}
}
} catch {
if (!cancelled) serpPollTimeout = setTimeout(pollSerp, 3000);
}
}
// Maps poll loop
async function pollMaps() {
if (cancelled) return;
try {
const res = await fetch(`/api/jobs/${currentJobId}/status`);
const res = await fetch(`/api/jobs/${jobId}/status`);
if (!res.ok) throw new Error("fetch failed");
const data = await res.json() as JobStatus;
if (!cancelled) {
if (!toppingActive) {
setTotalLeads(data.totalLeads);
setEmailsFound(data.emailsFound);
const p = getPhase(data);
setPhase(p);
advanceBar(PHASE_MIN[p]);
}
setTotalLeads(data.totalLeads);
setEmailsFound(data.emailsFound);
const p = getPhase(data);
setPhase(p);
advanceBar(PHASE_MIN[p]);
if (data.status === "complete") {
// Fire supplement as soon as Maps scraping stabilizes with fewer results than needed.
// "Stabilized" = totalLeads unchanged since last poll (scraping done, enrichment started).
if (
!supplementStarted &&
data.status === "running" &&
data.totalLeads > 0 &&
data.totalLeads < targetCount &&
data.totalLeads === lastTotalLeads
) {
startSerpSupplement(data.totalLeads).catch(console.error);
}
lastTotalLeads = data.totalLeads;
if (data.status === "complete" || data.status === "failed") {
const leads = (data.results ?? []) as LeadResult[];
if (!toppingActive && data.totalLeads < targetCount) {
// Maps returned fewer than requested → supplement with optimized SERP
mapsLeads = leads;
if (data.status === "failed" && leads.length === 0) {
if (crawlInterval) clearInterval(crawlInterval);
crawlInterval = setInterval(() => {
if (cancelled) return;
setProgressWidth(prev => prev >= 96 ? prev : prev + 0.3);
}, 200);
await startSerpSupplement(data.totalLeads);
} else {
// All done
if (crawlInterval) clearInterval(crawlInterval);
let finalLeads: LeadResult[];
if (toppingActive) {
// Deduplicate Maps + SERP by domain
const seenDomains = new Set(mapsLeads.map(l => l.domain).filter(Boolean));
const newLeads = leads.filter(l => !l.domain || !seenDomains.has(l.domain));
finalLeads = [...mapsLeads, ...newLeads];
} else {
finalLeads = leads;
}
// The poll often catches status=complete before observing the
// "emails" phase mid-run (Anymailfinder sets emailsFound then
// immediately sets complete in back-to-back DB writes).
// Always flash through "E-Mails suchen" so the step is visible.
if (!toppingActive) {
setPhase("emails");
advanceBar(PHASE_MIN["emails"]);
await new Promise(r => setTimeout(r, 500));
if (cancelled) return;
}
setTotalLeads(finalLeads.length);
setProgressWidth(100);
setPhase("done");
setTimeout(() => { if (!cancelled) onDone(finalLeads); }, 800);
}
} else if (data.status === "failed") {
if (crawlInterval) clearInterval(crawlInterval);
const partialLeads = (data.results ?? []) as LeadResult[];
if (toppingActive) {
// SERP failed — complete with Maps results
setProgressWidth(100);
setPhase("done");
setTimeout(() => { if (!cancelled) onDone(mapsLeads); }, 800);
} else if (partialLeads.length > 0) {
// Job failed mid-way (e.g. Anymailfinder 402) but Maps results exist → show them
setProgressWidth(100);
setPhase("done");
setTimeout(() => { if (!cancelled) onDone(partialLeads, data.error ?? undefined); }, 800);
} else {
onError(data.error ?? "Unbekannter Fehler");
return;
}
// Flash "E-Mails suchen" step before completing (poll often misses it)
setPhase("emails");
advanceBar(PHASE_MIN["emails"]);
await new Promise(r => setTimeout(r, 500));
if (cancelled) return;
mapsLeads = leads;
mapsDone = true;
// If supplement wasn't triggered, no SERP needed
if (!supplementStarted) serpDone = true;
// Trigger supplement now if Maps finished with fewer results and we haven't yet
if (!supplementStarted && data.totalLeads < targetCount) {
startSerpSupplement(data.totalLeads).catch(console.error);
} else {
tryFinalize();
}
} else {
pollTimeout = setTimeout(poll, 2500);
mapsPollTimeout = setTimeout(pollMaps, 2500);
}
}
} catch {
if (!cancelled) {
pollTimeout = setTimeout(poll, 3000);
}
if (!cancelled) mapsPollTimeout = setTimeout(pollMaps, 3000);
}
}
poll();
pollMaps();
return () => {
cancelled = true;
if (crawlInterval) clearInterval(crawlInterval);
if (pollTimeout) clearTimeout(pollTimeout);
if (mapsPollTimeout) clearTimeout(mapsPollTimeout);
if (serpPollTimeout) clearTimeout(serpPollTimeout);
};
}, [jobId]); // eslint-disable-line react-hooks/exhaustive-deps