Initial commit: LeadFlow lead generation platform
Full-stack Next.js 16 app with three scraping pipelines: - AirScale CSV → Anymailfinder Bulk Decision Maker search - LinkedIn Sales Navigator → Vayne → Anymailfinder email enrichment - Apify Google SERP → domain extraction → Anymailfinder bulk enrichment Includes Docker multi-stage build + docker-compose for Coolify deployment. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
232
lib/services/anymailfinder.ts
Normal file
232
lib/services/anymailfinder.ts
Normal file
@@ -0,0 +1,232 @@
|
||||
// Anymailfinder API v5.1
|
||||
// Docs: https://anymailfinder.com/api
|
||||
// Auth: Authorization: YOUR_API_KEY (header)
|
||||
// No rate limits on individual searches.
|
||||
// Bulk API processes ~1,000 rows per 5 minutes asynchronously.
|
||||
|
||||
import axios from "axios";
|
||||
|
||||
const BASE_URL = "https://api.anymailfinder.com/v5.1";
|
||||
|
||||
export type DecisionMakerCategory =
|
||||
| "ceo" | "engineering" | "finance" | "hr" | "it"
|
||||
| "logistics" | "marketing" | "operations" | "buyer" | "sales";
|
||||
|
||||
export interface DecisionMakerResult {
|
||||
decision_maker_category: string | null;
|
||||
email: string | null;
|
||||
email_status: "valid" | "risky" | "not_found" | "blacklisted";
|
||||
person_full_name: string | null;
|
||||
person_job_title: string | null;
|
||||
person_linkedin_url: string | null;
|
||||
valid_email: string | null;
|
||||
domain?: string;
|
||||
}
|
||||
|
||||
export interface BulkSearchResult {
|
||||
domain: string;
|
||||
email: string | null;
|
||||
email_status: string;
|
||||
person_full_name: string | null;
|
||||
person_job_title: string | null;
|
||||
valid_email: string | null;
|
||||
}
|
||||
|
||||
// ─── Individual search (used for small batches / LinkedIn enrichment) ─────────
|
||||
|
||||
export async function searchDecisionMakerByDomain(
|
||||
domain: string,
|
||||
categories: DecisionMakerCategory[],
|
||||
apiKey: string
|
||||
): Promise<DecisionMakerResult> {
|
||||
const response = await axios.post(
|
||||
`${BASE_URL}/find-email/decision-maker`,
|
||||
{ domain, decision_maker_category: categories },
|
||||
{
|
||||
headers: { Authorization: apiKey, "Content-Type": "application/json" },
|
||||
timeout: 180000,
|
||||
}
|
||||
);
|
||||
return { ...response.data, domain };
|
||||
}
|
||||
|
||||
// ─── Bulk JSON search (preferred for large domain lists) ────────────────────
|
||||
|
||||
export interface BulkJobStatus {
|
||||
id: string;
|
||||
status: "queued" | "running" | "completed" | "failed" | "paused" | "on_deck";
|
||||
counts: {
|
||||
total: number;
|
||||
found_valid: number;
|
||||
found_unknown: number;
|
||||
not_found: number;
|
||||
failed: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Submit a bulk decision-maker search via the JSON API.
|
||||
* Returns a searchId to poll for completion.
|
||||
*/
|
||||
export async function submitBulkDecisionMakerSearch(
|
||||
domains: string[],
|
||||
category: DecisionMakerCategory,
|
||||
apiKey: string,
|
||||
fileName?: string
|
||||
): Promise<string> {
|
||||
// Build data array: header row + data rows
|
||||
const data: string[][] = [
|
||||
["domain"],
|
||||
...domains.map(d => [d]),
|
||||
];
|
||||
|
||||
const response = await axios.post(
|
||||
`${BASE_URL}/bulk/json`,
|
||||
{
|
||||
data,
|
||||
domain_field_index: 0,
|
||||
decision_maker_category: category,
|
||||
file_name: fileName || `leadflow-${Date.now()}`,
|
||||
},
|
||||
{
|
||||
headers: { Authorization: apiKey, "Content-Type": "application/json" },
|
||||
timeout: 30000,
|
||||
}
|
||||
);
|
||||
return response.data.id as string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Submit a bulk person name search via the JSON API.
|
||||
* Used for LinkedIn enrichment where we have names + domains.
|
||||
*/
|
||||
export async function submitBulkPersonSearch(
|
||||
leads: Array<{ domain: string; firstName: string; lastName: string }>,
|
||||
apiKey: string,
|
||||
fileName?: string
|
||||
): Promise<string> {
|
||||
const data: string[][] = [
|
||||
["domain", "first_name", "last_name"],
|
||||
...leads.map(l => [l.domain, l.firstName, l.lastName]),
|
||||
];
|
||||
|
||||
const response = await axios.post(
|
||||
`${BASE_URL}/bulk/json`,
|
||||
{
|
||||
data,
|
||||
domain_field_index: 0,
|
||||
first_name_field_index: 1,
|
||||
last_name_field_index: 2,
|
||||
file_name: fileName || `leadflow-${Date.now()}`,
|
||||
},
|
||||
{
|
||||
headers: { Authorization: apiKey, "Content-Type": "application/json" },
|
||||
timeout: 30000,
|
||||
}
|
||||
);
|
||||
return response.data.id as string;
|
||||
}
|
||||
|
||||
export async function getBulkSearchStatus(
|
||||
searchId: string,
|
||||
apiKey: string
|
||||
): Promise<BulkJobStatus> {
|
||||
const response = await axios.get(`${BASE_URL}/bulk/${searchId}`, {
|
||||
headers: { Authorization: apiKey },
|
||||
timeout: 15000,
|
||||
});
|
||||
return response.data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Download bulk search results as JSON array.
|
||||
* IMPORTANT: Credits are charged on first download.
|
||||
*/
|
||||
export async function downloadBulkResults(
|
||||
searchId: string,
|
||||
apiKey: string
|
||||
): Promise<Array<Record<string, string>>> {
|
||||
const response = await axios.get(`${BASE_URL}/bulk/${searchId}/download`, {
|
||||
params: { download_as: "json_arr" },
|
||||
headers: { Authorization: apiKey },
|
||||
timeout: 60000,
|
||||
});
|
||||
return response.data as Array<Record<string, string>>;
|
||||
}
|
||||
|
||||
/**
|
||||
* High-level bulk enrichment: submit → poll → download → return results.
|
||||
* Uses the Bulk JSON API for efficiency (1,000 rows/5 min).
|
||||
* Calls onProgress with status updates while waiting.
|
||||
*/
|
||||
export async function bulkSearchDomains(
|
||||
domains: string[],
|
||||
categories: DecisionMakerCategory[],
|
||||
apiKey: string,
|
||||
onProgress?: (completed: number, total: number, result?: DecisionMakerResult) => Promise<void> | void
|
||||
): Promise<DecisionMakerResult[]> {
|
||||
if (domains.length === 0) return [];
|
||||
|
||||
// Use the primary category (first in list) for bulk search.
|
||||
// Anymailfinder bulk API takes one category at a time.
|
||||
const primaryCategory = categories[0] || "ceo";
|
||||
|
||||
// 1. Submit bulk job
|
||||
const searchId = await submitBulkDecisionMakerSearch(
|
||||
domains,
|
||||
primaryCategory,
|
||||
apiKey,
|
||||
`leadflow-bulk-${Date.now()}`
|
||||
);
|
||||
|
||||
// 2. Poll until complete (~1,000 rows per 5 min)
|
||||
let status: BulkJobStatus;
|
||||
do {
|
||||
await sleep(5000);
|
||||
status = await getBulkSearchStatus(searchId, apiKey);
|
||||
const processed = (status.counts?.found_valid || 0) + (status.counts?.not_found || 0) + (status.counts?.found_unknown || 0);
|
||||
onProgress?.(processed, status.counts?.total || domains.length);
|
||||
} while (status.status !== "completed" && status.status !== "failed");
|
||||
|
||||
if (status.status === "failed") {
|
||||
throw new Error(`Anymailfinder bulk search failed (id: ${searchId})`);
|
||||
}
|
||||
|
||||
// 3. Download results
|
||||
const rows = await downloadBulkResults(searchId, apiKey);
|
||||
|
||||
// 4. Normalize to DecisionMakerResult[]
|
||||
return rows.map(row => {
|
||||
const email = row["email"] || row["Email"] || null;
|
||||
const emailStatus = (row["email_status"] || row["Email Status"] || "not_found").toLowerCase();
|
||||
const validEmail = emailStatus === "valid" ? email : null;
|
||||
|
||||
return {
|
||||
domain: row["domain"] || row["Domain"] || "",
|
||||
decision_maker_category: primaryCategory,
|
||||
email,
|
||||
email_status: emailStatus as DecisionMakerResult["email_status"],
|
||||
valid_email: validEmail,
|
||||
person_full_name: row["person_full_name"] || row["Full Name"] || null,
|
||||
person_job_title: row["person_job_title"] || row["Job Title"] || null,
|
||||
person_linkedin_url: row["person_linkedin_url"] || row["LinkedIn URL"] || null,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export async function getRemainingCredits(apiKey: string): Promise<number | null> {
|
||||
try {
|
||||
// Try account endpoint (may not be documented publicly, returns null if unavailable)
|
||||
const response = await axios.get(`${BASE_URL}/account`, {
|
||||
headers: { Authorization: apiKey },
|
||||
timeout: 10000,
|
||||
});
|
||||
return response.data?.credits_remaining ?? null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function sleep(ms: number) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
84
lib/services/apify.ts
Normal file
84
lib/services/apify.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
import axios from "axios";
|
||||
import { extractDomainFromUrl } from "@/lib/utils/domains";
|
||||
|
||||
const BASE_URL = "https://api.apify.com/v2";
|
||||
const ACTOR_ID = "apify~google-search-scraper";
|
||||
|
||||
export interface SerpResult {
|
||||
title: string;
|
||||
url: string;
|
||||
domain: string;
|
||||
description: string;
|
||||
position: number;
|
||||
}
|
||||
|
||||
export async function runGoogleSerpScraper(
|
||||
query: string,
|
||||
maxPages: number,
|
||||
countryCode: string,
|
||||
languageCode: string,
|
||||
apiToken: string
|
||||
): Promise<string> {
|
||||
// maxPages: to get ~N results, set maxPagesPerQuery = ceil(N/10)
|
||||
const response = await axios.post(
|
||||
`${BASE_URL}/acts/${ACTOR_ID}/runs`,
|
||||
{
|
||||
queries: query,
|
||||
maxPagesPerQuery: maxPages,
|
||||
countryCode: countryCode.toUpperCase(),
|
||||
languageCode: languageCode.toLowerCase(),
|
||||
},
|
||||
{
|
||||
params: { token: apiToken },
|
||||
headers: { "Content-Type": "application/json" },
|
||||
timeout: 30000,
|
||||
}
|
||||
);
|
||||
return response.data.data.id;
|
||||
}
|
||||
|
||||
export async function pollRunStatus(
|
||||
runId: string,
|
||||
apiToken: string
|
||||
): Promise<{ status: string; defaultDatasetId: string }> {
|
||||
const response = await axios.get(`${BASE_URL}/actor-runs/${runId}`, {
|
||||
params: { token: apiToken },
|
||||
timeout: 15000,
|
||||
});
|
||||
const { status, defaultDatasetId } = response.data.data;
|
||||
return { status, defaultDatasetId };
|
||||
}
|
||||
|
||||
export async function fetchDatasetItems(
|
||||
datasetId: string,
|
||||
apiToken: string
|
||||
): Promise<SerpResult[]> {
|
||||
const response = await axios.get(
|
||||
`${BASE_URL}/datasets/${datasetId}/items`,
|
||||
{
|
||||
params: { token: apiToken, format: "json" },
|
||||
timeout: 30000,
|
||||
}
|
||||
);
|
||||
|
||||
const items = response.data as Array<{
|
||||
query?: string;
|
||||
organicResults?: Array<{ title: string; url: string; description: string; position: number }>;
|
||||
}>;
|
||||
|
||||
const results: SerpResult[] = [];
|
||||
for (const item of items) {
|
||||
if (item.organicResults) {
|
||||
for (const r of item.organicResults) {
|
||||
results.push({
|
||||
title: r.title || "",
|
||||
url: r.url || "",
|
||||
domain: extractDomainFromUrl(r.url || ""),
|
||||
description: r.description || "",
|
||||
position: r.position || 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
153
lib/services/vayne.ts
Normal file
153
lib/services/vayne.ts
Normal file
@@ -0,0 +1,153 @@
|
||||
// Vayne API integration
|
||||
// Docs: https://www.vayne.io (OpenAPI spec available at /api endpoint)
|
||||
// Auth: Authorization: Bearer <api_token>
|
||||
// Base URL: https://www.vayne.io
|
||||
//
|
||||
// Flow:
|
||||
// 1. POST /api/orders with { url, limit, name, email_enrichment: false, export_format: "simple" }
|
||||
// 2. Poll GET /api/orders/{id} until scraping_status === "finished" | "failed"
|
||||
// 3. POST /api/orders/{id}/export with { export_format: "simple" }
|
||||
// 4. Poll GET /api/orders/{id} until exports[0].status === "completed"
|
||||
// 5. Download CSV from exports[0].file_url (S3 presigned URL)
|
||||
|
||||
import axios from "axios";
|
||||
import Papa from "papaparse";
|
||||
|
||||
const BASE_URL = "https://www.vayne.io";
|
||||
|
||||
export interface VayneOrder {
|
||||
id: number;
|
||||
name: string;
|
||||
order_type: string;
|
||||
scraping_status: "initialization" | "pending" | "segmenting" | "scraping" | "finished" | "failed";
|
||||
limit: number;
|
||||
scraped: number;
|
||||
created_at: string;
|
||||
exports?: Array<{ status: "completed" | "pending" | "not_started"; file_url?: string }>;
|
||||
}
|
||||
|
||||
export interface LeadProfile {
|
||||
firstName: string;
|
||||
lastName: string;
|
||||
fullName: string;
|
||||
title: string;
|
||||
company: string;
|
||||
companyDomain: string;
|
||||
linkedinUrl: string;
|
||||
location: string;
|
||||
}
|
||||
|
||||
export async function createOrder(
|
||||
salesNavUrl: string,
|
||||
maxResults: number,
|
||||
apiToken: string,
|
||||
orderName?: string
|
||||
): Promise<VayneOrder> {
|
||||
const response = await axios.post(
|
||||
`${BASE_URL}/api/orders`,
|
||||
{
|
||||
url: salesNavUrl,
|
||||
limit: maxResults,
|
||||
name: orderName || `LeadFlow-${Date.now()}`,
|
||||
email_enrichment: false,
|
||||
export_format: "simple",
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
timeout: 30000,
|
||||
}
|
||||
);
|
||||
return response.data.order;
|
||||
}
|
||||
|
||||
export async function getOrderStatus(
|
||||
orderId: number,
|
||||
apiToken: string
|
||||
): Promise<VayneOrder> {
|
||||
const response = await axios.get(`${BASE_URL}/api/orders/${orderId}`, {
|
||||
headers: { Authorization: `Bearer ${apiToken}` },
|
||||
timeout: 15000,
|
||||
});
|
||||
return response.data.order;
|
||||
}
|
||||
|
||||
export async function triggerExport(
|
||||
orderId: number,
|
||||
apiToken: string
|
||||
): Promise<VayneOrder> {
|
||||
const response = await axios.post(
|
||||
`${BASE_URL}/api/orders/${orderId}/export`,
|
||||
{ export_format: "simple" },
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
timeout: 15000,
|
||||
}
|
||||
);
|
||||
return response.data.order;
|
||||
}
|
||||
|
||||
export async function downloadOrderCSV(
|
||||
fileUrl: string
|
||||
): Promise<LeadProfile[]> {
|
||||
const response = await axios.get(fileUrl, {
|
||||
timeout: 60000,
|
||||
responseType: "text",
|
||||
});
|
||||
return parseVayneCSV(response.data);
|
||||
}
|
||||
|
||||
function parseVayneCSV(csvContent: string): LeadProfile[] {
|
||||
const { data } = Papa.parse<Record<string, string>>(csvContent, {
|
||||
header: true,
|
||||
skipEmptyLines: true,
|
||||
});
|
||||
|
||||
return data.map((row) => {
|
||||
// Vayne simple format columns (may vary; handle common variants)
|
||||
const fullName = row["Name"] || row["Full Name"] || row["full_name"] || "";
|
||||
const nameParts = fullName.trim().split(/\s+/);
|
||||
const firstName = nameParts[0] || "";
|
||||
const lastName = nameParts.slice(1).join(" ") || "";
|
||||
|
||||
// Extract domain from company URL or website column
|
||||
const companyUrl = row["Company URL"] || row["company_url"] || row["Website"] || "";
|
||||
let companyDomain = "";
|
||||
if (companyUrl) {
|
||||
try {
|
||||
const u = new URL(companyUrl.startsWith("http") ? companyUrl : `https://${companyUrl}`);
|
||||
companyDomain = u.hostname.replace(/^www\./i, "");
|
||||
} catch {
|
||||
companyDomain = companyUrl.replace(/^www\./i, "").split("/")[0];
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
firstName,
|
||||
lastName,
|
||||
fullName,
|
||||
title: row["Job Title"] || row["Title"] || row["title"] || "",
|
||||
company: row["Company"] || row["Company Name"] || row["company"] || "",
|
||||
companyDomain,
|
||||
linkedinUrl: row["LinkedIn URL"] || row["linkedin_url"] || row["Profile URL"] || "",
|
||||
location: row["Location"] || row["location"] || "",
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export async function checkLinkedInAuth(apiToken: string): Promise<boolean> {
|
||||
try {
|
||||
const response = await axios.get(`${BASE_URL}/api/linkedin/status`, {
|
||||
headers: { Authorization: `Bearer ${apiToken}` },
|
||||
timeout: 10000,
|
||||
});
|
||||
return response.data?.connected === true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user