Full-stack Next.js 16 app with three scraping pipelines: - AirScale CSV → Anymailfinder Bulk Decision Maker search - LinkedIn Sales Navigator → Vayne → Anymailfinder email enrichment - Apify Google SERP → domain extraction → Anymailfinder bulk enrichment Includes Docker multi-stage build + docker-compose for Coolify deployment. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
85 lines
2.1 KiB
TypeScript
85 lines
2.1 KiB
TypeScript
import axios from "axios";
|
|
import { extractDomainFromUrl } from "@/lib/utils/domains";
|
|
|
|
const BASE_URL = "https://api.apify.com/v2";
|
|
const ACTOR_ID = "apify~google-search-scraper";
|
|
|
|
export interface SerpResult {
|
|
title: string;
|
|
url: string;
|
|
domain: string;
|
|
description: string;
|
|
position: number;
|
|
}
|
|
|
|
export async function runGoogleSerpScraper(
|
|
query: string,
|
|
maxPages: number,
|
|
countryCode: string,
|
|
languageCode: string,
|
|
apiToken: string
|
|
): Promise<string> {
|
|
// maxPages: to get ~N results, set maxPagesPerQuery = ceil(N/10)
|
|
const response = await axios.post(
|
|
`${BASE_URL}/acts/${ACTOR_ID}/runs`,
|
|
{
|
|
queries: query,
|
|
maxPagesPerQuery: maxPages,
|
|
countryCode: countryCode.toUpperCase(),
|
|
languageCode: languageCode.toLowerCase(),
|
|
},
|
|
{
|
|
params: { token: apiToken },
|
|
headers: { "Content-Type": "application/json" },
|
|
timeout: 30000,
|
|
}
|
|
);
|
|
return response.data.data.id;
|
|
}
|
|
|
|
export async function pollRunStatus(
|
|
runId: string,
|
|
apiToken: string
|
|
): Promise<{ status: string; defaultDatasetId: string }> {
|
|
const response = await axios.get(`${BASE_URL}/actor-runs/${runId}`, {
|
|
params: { token: apiToken },
|
|
timeout: 15000,
|
|
});
|
|
const { status, defaultDatasetId } = response.data.data;
|
|
return { status, defaultDatasetId };
|
|
}
|
|
|
|
export async function fetchDatasetItems(
|
|
datasetId: string,
|
|
apiToken: string
|
|
): Promise<SerpResult[]> {
|
|
const response = await axios.get(
|
|
`${BASE_URL}/datasets/${datasetId}/items`,
|
|
{
|
|
params: { token: apiToken, format: "json" },
|
|
timeout: 30000,
|
|
}
|
|
);
|
|
|
|
const items = response.data as Array<{
|
|
query?: string;
|
|
organicResults?: Array<{ title: string; url: string; description: string; position: number }>;
|
|
}>;
|
|
|
|
const results: SerpResult[] = [];
|
|
for (const item of items) {
|
|
if (item.organicResults) {
|
|
for (const r of item.organicResults) {
|
|
results.push({
|
|
title: r.title || "",
|
|
url: r.url || "",
|
|
domain: extractDomainFromUrl(r.url || ""),
|
|
description: r.description || "",
|
|
position: r.position || 0,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
return results;
|
|
}
|