import axios from "axios"; import { extractDomainFromUrl } from "@/lib/utils/domains"; const BASE_URL = "https://api.apify.com/v2"; const ACTOR_ID = "apify~google-search-scraper"; export interface SerpResult { title: string; url: string; domain: string; description: string; position: number; } export async function runGoogleSerpScraper( query: string, maxPages: number, countryCode: string, languageCode: string, apiToken: string ): Promise { // maxPages: to get ~N results, set maxPagesPerQuery = ceil(N/10) const response = await axios.post( `${BASE_URL}/acts/${ACTOR_ID}/runs`, { queries: query, maxPagesPerQuery: maxPages, countryCode: countryCode.toLowerCase(), languageCode: languageCode.toLowerCase(), }, { params: { token: apiToken }, headers: { "Content-Type": "application/json" }, timeout: 30000, } ); return response.data.data.id; } export async function pollRunStatus( runId: string, apiToken: string ): Promise<{ status: string; defaultDatasetId: string }> { const response = await axios.get(`${BASE_URL}/actor-runs/${runId}`, { params: { token: apiToken }, timeout: 15000, }); const { status, defaultDatasetId } = response.data.data; return { status, defaultDatasetId }; } export async function fetchDatasetItems( datasetId: string, apiToken: string ): Promise { const response = await axios.get( `${BASE_URL}/datasets/${datasetId}/items`, { params: { token: apiToken, format: "json" }, timeout: 30000, } ); const items = response.data as Array<{ query?: string; organicResults?: Array<{ title: string; url: string; description: string; position: number }>; }>; const results: SerpResult[] = []; for (const item of items) { if (item.organicResults) { for (const r of item.organicResults) { results.push({ title: r.title || "", url: r.url || "", domain: extractDomainFromUrl(r.url || ""), description: r.description || "", position: r.position || 0, }); } } } return results; }