Files
lead-scraper/lib/services/apify.ts
Timo Uttenweiler facf8c9f69 Initial commit: LeadFlow lead generation platform
Full-stack Next.js 16 app with three scraping pipelines:
- AirScale CSV → Anymailfinder Bulk Decision Maker search
- LinkedIn Sales Navigator → Vayne → Anymailfinder email enrichment
- Apify Google SERP → domain extraction → Anymailfinder bulk enrichment

Includes Docker multi-stage build + docker-compose for Coolify deployment.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 11:21:11 +01:00

85 lines
2.1 KiB
TypeScript

import axios from "axios";
import { extractDomainFromUrl } from "@/lib/utils/domains";
const BASE_URL = "https://api.apify.com/v2";
const ACTOR_ID = "apify~google-search-scraper";
export interface SerpResult {
title: string;
url: string;
domain: string;
description: string;
position: number;
}
export async function runGoogleSerpScraper(
query: string,
maxPages: number,
countryCode: string,
languageCode: string,
apiToken: string
): Promise<string> {
// maxPages: to get ~N results, set maxPagesPerQuery = ceil(N/10)
const response = await axios.post(
`${BASE_URL}/acts/${ACTOR_ID}/runs`,
{
queries: query,
maxPagesPerQuery: maxPages,
countryCode: countryCode.toUpperCase(),
languageCode: languageCode.toLowerCase(),
},
{
params: { token: apiToken },
headers: { "Content-Type": "application/json" },
timeout: 30000,
}
);
return response.data.data.id;
}
export async function pollRunStatus(
runId: string,
apiToken: string
): Promise<{ status: string; defaultDatasetId: string }> {
const response = await axios.get(`${BASE_URL}/actor-runs/${runId}`, {
params: { token: apiToken },
timeout: 15000,
});
const { status, defaultDatasetId } = response.data.data;
return { status, defaultDatasetId };
}
export async function fetchDatasetItems(
datasetId: string,
apiToken: string
): Promise<SerpResult[]> {
const response = await axios.get(
`${BASE_URL}/datasets/${datasetId}/items`,
{
params: { token: apiToken, format: "json" },
timeout: 30000,
}
);
const items = response.data as Array<{
query?: string;
organicResults?: Array<{ title: string; url: string; description: string; position: number }>;
}>;
const results: SerpResult[] = [];
for (const item of items) {
if (item.organicResults) {
for (const r of item.organicResults) {
results.push({
title: r.title || "",
url: r.url || "",
domain: extractDomainFromUrl(r.url || ""),
description: r.description || "",
position: r.position || 0,
});
}
}
}
return results;
}