Initial commit: LeadFlow lead generation platform

Full-stack Next.js 16 app with three scraping pipelines:
- AirScale CSV → Anymailfinder Bulk Decision Maker search
- LinkedIn Sales Navigator → Vayne → Anymailfinder email enrichment
- Apify Google SERP → domain extraction → Anymailfinder bulk enrichment

Includes Docker multi-stage build + docker-compose for Coolify deployment.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Timo Uttenweiler
2026-03-17 11:21:11 +01:00
parent 5b84001c1e
commit facf8c9f69
59 changed files with 5800 additions and 233 deletions

View File

@@ -0,0 +1,232 @@
// Anymailfinder API v5.1
// Docs: https://anymailfinder.com/api
// Auth: Authorization: YOUR_API_KEY (header)
// No rate limits on individual searches.
// Bulk API processes ~1,000 rows per 5 minutes asynchronously.
import axios from "axios";
const BASE_URL = "https://api.anymailfinder.com/v5.1";
export type DecisionMakerCategory =
| "ceo" | "engineering" | "finance" | "hr" | "it"
| "logistics" | "marketing" | "operations" | "buyer" | "sales";
export interface DecisionMakerResult {
decision_maker_category: string | null;
email: string | null;
email_status: "valid" | "risky" | "not_found" | "blacklisted";
person_full_name: string | null;
person_job_title: string | null;
person_linkedin_url: string | null;
valid_email: string | null;
domain?: string;
}
export interface BulkSearchResult {
domain: string;
email: string | null;
email_status: string;
person_full_name: string | null;
person_job_title: string | null;
valid_email: string | null;
}
// ─── Individual search (used for small batches / LinkedIn enrichment) ─────────
export async function searchDecisionMakerByDomain(
domain: string,
categories: DecisionMakerCategory[],
apiKey: string
): Promise<DecisionMakerResult> {
const response = await axios.post(
`${BASE_URL}/find-email/decision-maker`,
{ domain, decision_maker_category: categories },
{
headers: { Authorization: apiKey, "Content-Type": "application/json" },
timeout: 180000,
}
);
return { ...response.data, domain };
}
// ─── Bulk JSON search (preferred for large domain lists) ────────────────────
export interface BulkJobStatus {
id: string;
status: "queued" | "running" | "completed" | "failed" | "paused" | "on_deck";
counts: {
total: number;
found_valid: number;
found_unknown: number;
not_found: number;
failed: number;
};
}
/**
* Submit a bulk decision-maker search via the JSON API.
* Returns a searchId to poll for completion.
*/
export async function submitBulkDecisionMakerSearch(
domains: string[],
category: DecisionMakerCategory,
apiKey: string,
fileName?: string
): Promise<string> {
// Build data array: header row + data rows
const data: string[][] = [
["domain"],
...domains.map(d => [d]),
];
const response = await axios.post(
`${BASE_URL}/bulk/json`,
{
data,
domain_field_index: 0,
decision_maker_category: category,
file_name: fileName || `leadflow-${Date.now()}`,
},
{
headers: { Authorization: apiKey, "Content-Type": "application/json" },
timeout: 30000,
}
);
return response.data.id as string;
}
/**
* Submit a bulk person name search via the JSON API.
* Used for LinkedIn enrichment where we have names + domains.
*/
export async function submitBulkPersonSearch(
leads: Array<{ domain: string; firstName: string; lastName: string }>,
apiKey: string,
fileName?: string
): Promise<string> {
const data: string[][] = [
["domain", "first_name", "last_name"],
...leads.map(l => [l.domain, l.firstName, l.lastName]),
];
const response = await axios.post(
`${BASE_URL}/bulk/json`,
{
data,
domain_field_index: 0,
first_name_field_index: 1,
last_name_field_index: 2,
file_name: fileName || `leadflow-${Date.now()}`,
},
{
headers: { Authorization: apiKey, "Content-Type": "application/json" },
timeout: 30000,
}
);
return response.data.id as string;
}
export async function getBulkSearchStatus(
searchId: string,
apiKey: string
): Promise<BulkJobStatus> {
const response = await axios.get(`${BASE_URL}/bulk/${searchId}`, {
headers: { Authorization: apiKey },
timeout: 15000,
});
return response.data;
}
/**
* Download bulk search results as JSON array.
* IMPORTANT: Credits are charged on first download.
*/
export async function downloadBulkResults(
searchId: string,
apiKey: string
): Promise<Array<Record<string, string>>> {
const response = await axios.get(`${BASE_URL}/bulk/${searchId}/download`, {
params: { download_as: "json_arr" },
headers: { Authorization: apiKey },
timeout: 60000,
});
return response.data as Array<Record<string, string>>;
}
/**
* High-level bulk enrichment: submit → poll → download → return results.
* Uses the Bulk JSON API for efficiency (1,000 rows/5 min).
* Calls onProgress with status updates while waiting.
*/
export async function bulkSearchDomains(
domains: string[],
categories: DecisionMakerCategory[],
apiKey: string,
onProgress?: (completed: number, total: number, result?: DecisionMakerResult) => Promise<void> | void
): Promise<DecisionMakerResult[]> {
if (domains.length === 0) return [];
// Use the primary category (first in list) for bulk search.
// Anymailfinder bulk API takes one category at a time.
const primaryCategory = categories[0] || "ceo";
// 1. Submit bulk job
const searchId = await submitBulkDecisionMakerSearch(
domains,
primaryCategory,
apiKey,
`leadflow-bulk-${Date.now()}`
);
// 2. Poll until complete (~1,000 rows per 5 min)
let status: BulkJobStatus;
do {
await sleep(5000);
status = await getBulkSearchStatus(searchId, apiKey);
const processed = (status.counts?.found_valid || 0) + (status.counts?.not_found || 0) + (status.counts?.found_unknown || 0);
onProgress?.(processed, status.counts?.total || domains.length);
} while (status.status !== "completed" && status.status !== "failed");
if (status.status === "failed") {
throw new Error(`Anymailfinder bulk search failed (id: ${searchId})`);
}
// 3. Download results
const rows = await downloadBulkResults(searchId, apiKey);
// 4. Normalize to DecisionMakerResult[]
return rows.map(row => {
const email = row["email"] || row["Email"] || null;
const emailStatus = (row["email_status"] || row["Email Status"] || "not_found").toLowerCase();
const validEmail = emailStatus === "valid" ? email : null;
return {
domain: row["domain"] || row["Domain"] || "",
decision_maker_category: primaryCategory,
email,
email_status: emailStatus as DecisionMakerResult["email_status"],
valid_email: validEmail,
person_full_name: row["person_full_name"] || row["Full Name"] || null,
person_job_title: row["person_job_title"] || row["Job Title"] || null,
person_linkedin_url: row["person_linkedin_url"] || row["LinkedIn URL"] || null,
};
});
}
export async function getRemainingCredits(apiKey: string): Promise<number | null> {
try {
// Try account endpoint (may not be documented publicly, returns null if unavailable)
const response = await axios.get(`${BASE_URL}/account`, {
headers: { Authorization: apiKey },
timeout: 10000,
});
return response.data?.credits_remaining ?? null;
} catch {
return null;
}
}
function sleep(ms: number) {
return new Promise(resolve => setTimeout(resolve, ms));
}

84
lib/services/apify.ts Normal file
View File

@@ -0,0 +1,84 @@
import axios from "axios";
import { extractDomainFromUrl } from "@/lib/utils/domains";
const BASE_URL = "https://api.apify.com/v2";
const ACTOR_ID = "apify~google-search-scraper";
export interface SerpResult {
title: string;
url: string;
domain: string;
description: string;
position: number;
}
export async function runGoogleSerpScraper(
query: string,
maxPages: number,
countryCode: string,
languageCode: string,
apiToken: string
): Promise<string> {
// maxPages: to get ~N results, set maxPagesPerQuery = ceil(N/10)
const response = await axios.post(
`${BASE_URL}/acts/${ACTOR_ID}/runs`,
{
queries: query,
maxPagesPerQuery: maxPages,
countryCode: countryCode.toUpperCase(),
languageCode: languageCode.toLowerCase(),
},
{
params: { token: apiToken },
headers: { "Content-Type": "application/json" },
timeout: 30000,
}
);
return response.data.data.id;
}
export async function pollRunStatus(
runId: string,
apiToken: string
): Promise<{ status: string; defaultDatasetId: string }> {
const response = await axios.get(`${BASE_URL}/actor-runs/${runId}`, {
params: { token: apiToken },
timeout: 15000,
});
const { status, defaultDatasetId } = response.data.data;
return { status, defaultDatasetId };
}
export async function fetchDatasetItems(
datasetId: string,
apiToken: string
): Promise<SerpResult[]> {
const response = await axios.get(
`${BASE_URL}/datasets/${datasetId}/items`,
{
params: { token: apiToken, format: "json" },
timeout: 30000,
}
);
const items = response.data as Array<{
query?: string;
organicResults?: Array<{ title: string; url: string; description: string; position: number }>;
}>;
const results: SerpResult[] = [];
for (const item of items) {
if (item.organicResults) {
for (const r of item.organicResults) {
results.push({
title: r.title || "",
url: r.url || "",
domain: extractDomainFromUrl(r.url || ""),
description: r.description || "",
position: r.position || 0,
});
}
}
}
return results;
}

153
lib/services/vayne.ts Normal file
View File

@@ -0,0 +1,153 @@
// Vayne API integration
// Docs: https://www.vayne.io (OpenAPI spec available at /api endpoint)
// Auth: Authorization: Bearer <api_token>
// Base URL: https://www.vayne.io
//
// Flow:
// 1. POST /api/orders with { url, limit, name, email_enrichment: false, export_format: "simple" }
// 2. Poll GET /api/orders/{id} until scraping_status === "finished" | "failed"
// 3. POST /api/orders/{id}/export with { export_format: "simple" }
// 4. Poll GET /api/orders/{id} until exports[0].status === "completed"
// 5. Download CSV from exports[0].file_url (S3 presigned URL)
import axios from "axios";
import Papa from "papaparse";
const BASE_URL = "https://www.vayne.io";
export interface VayneOrder {
id: number;
name: string;
order_type: string;
scraping_status: "initialization" | "pending" | "segmenting" | "scraping" | "finished" | "failed";
limit: number;
scraped: number;
created_at: string;
exports?: Array<{ status: "completed" | "pending" | "not_started"; file_url?: string }>;
}
export interface LeadProfile {
firstName: string;
lastName: string;
fullName: string;
title: string;
company: string;
companyDomain: string;
linkedinUrl: string;
location: string;
}
export async function createOrder(
salesNavUrl: string,
maxResults: number,
apiToken: string,
orderName?: string
): Promise<VayneOrder> {
const response = await axios.post(
`${BASE_URL}/api/orders`,
{
url: salesNavUrl,
limit: maxResults,
name: orderName || `LeadFlow-${Date.now()}`,
email_enrichment: false,
export_format: "simple",
},
{
headers: {
Authorization: `Bearer ${apiToken}`,
"Content-Type": "application/json",
},
timeout: 30000,
}
);
return response.data.order;
}
export async function getOrderStatus(
orderId: number,
apiToken: string
): Promise<VayneOrder> {
const response = await axios.get(`${BASE_URL}/api/orders/${orderId}`, {
headers: { Authorization: `Bearer ${apiToken}` },
timeout: 15000,
});
return response.data.order;
}
export async function triggerExport(
orderId: number,
apiToken: string
): Promise<VayneOrder> {
const response = await axios.post(
`${BASE_URL}/api/orders/${orderId}/export`,
{ export_format: "simple" },
{
headers: {
Authorization: `Bearer ${apiToken}`,
"Content-Type": "application/json",
},
timeout: 15000,
}
);
return response.data.order;
}
export async function downloadOrderCSV(
fileUrl: string
): Promise<LeadProfile[]> {
const response = await axios.get(fileUrl, {
timeout: 60000,
responseType: "text",
});
return parseVayneCSV(response.data);
}
function parseVayneCSV(csvContent: string): LeadProfile[] {
const { data } = Papa.parse<Record<string, string>>(csvContent, {
header: true,
skipEmptyLines: true,
});
return data.map((row) => {
// Vayne simple format columns (may vary; handle common variants)
const fullName = row["Name"] || row["Full Name"] || row["full_name"] || "";
const nameParts = fullName.trim().split(/\s+/);
const firstName = nameParts[0] || "";
const lastName = nameParts.slice(1).join(" ") || "";
// Extract domain from company URL or website column
const companyUrl = row["Company URL"] || row["company_url"] || row["Website"] || "";
let companyDomain = "";
if (companyUrl) {
try {
const u = new URL(companyUrl.startsWith("http") ? companyUrl : `https://${companyUrl}`);
companyDomain = u.hostname.replace(/^www\./i, "");
} catch {
companyDomain = companyUrl.replace(/^www\./i, "").split("/")[0];
}
}
return {
firstName,
lastName,
fullName,
title: row["Job Title"] || row["Title"] || row["title"] || "",
company: row["Company"] || row["Company Name"] || row["company"] || "",
companyDomain,
linkedinUrl: row["LinkedIn URL"] || row["linkedin_url"] || row["Profile URL"] || "",
location: row["Location"] || row["location"] || "",
};
});
}
export async function checkLinkedInAuth(apiToken: string): Promise<boolean> {
try {
const response = await axios.get(`${BASE_URL}/api/linkedin/status`, {
headers: { Authorization: `Bearer ${apiToken}` },
timeout: 10000,
});
return response.data?.connected === true;
} catch {
return false;
}
}