435 lines
17 KiB
Python
435 lines
17 KiB
Python
"""Profile analyzer agent."""
|
||
import json
|
||
from typing import Dict, Any, List
|
||
from collections import Counter
|
||
from loguru import logger
|
||
|
||
from src.agents.base import BaseAgent
|
||
from src.database.models import LinkedInProfile, LinkedInPost
|
||
|
||
|
||
class ProfileAnalyzerAgent(BaseAgent):
|
||
"""Agent for analyzing LinkedIn profiles and extracting writing patterns."""
|
||
|
||
def __init__(self):
|
||
"""Initialize profile analyzer agent."""
|
||
super().__init__("ProfileAnalyzer")
|
||
|
||
async def process(
|
||
self,
|
||
profile: LinkedInProfile,
|
||
posts: List[LinkedInPost],
|
||
customer_data: Dict[str, Any]
|
||
) -> Dict[str, Any]:
|
||
"""
|
||
Analyze LinkedIn profile and extract writing patterns.
|
||
|
||
Args:
|
||
profile: LinkedIn profile data
|
||
posts: List of LinkedIn posts
|
||
customer_data: Additional customer data from input file
|
||
|
||
Returns:
|
||
Comprehensive profile analysis
|
||
"""
|
||
logger.info(f"Analyzing profile for: {profile.name}")
|
||
|
||
# Prepare analysis data
|
||
profile_summary = {
|
||
"name": profile.name,
|
||
"headline": profile.headline,
|
||
"summary": profile.summary,
|
||
"industry": profile.industry,
|
||
"location": profile.location
|
||
}
|
||
|
||
# Prepare posts with engagement data - use up to 30 posts
|
||
posts_with_engagement = self._prepare_posts_for_analysis(posts[:15])
|
||
|
||
# Also identify top performing posts by engagement
|
||
top_posts = self._get_top_performing_posts(posts, limit=5)
|
||
|
||
system_prompt = self._get_system_prompt()
|
||
user_prompt = self._get_user_prompt(profile_summary, posts_with_engagement, top_posts, customer_data)
|
||
|
||
response = await self.call_openai(
|
||
system_prompt=system_prompt,
|
||
user_prompt=user_prompt,
|
||
model="gpt-4o",
|
||
temperature=0.3,
|
||
response_format={"type": "json_object"}
|
||
)
|
||
|
||
# Parse JSON response
|
||
analysis = json.loads(response)
|
||
|
||
# Add N-gram analysis
|
||
post_texts = [p.post_text for p in posts if p.post_text]
|
||
ngram_patterns = self._extract_ngram_patterns(post_texts)
|
||
analysis["ngram_patterns"] = ngram_patterns
|
||
|
||
logger.info("Profile analysis completed successfully (with N-gram analysis)")
|
||
|
||
return analysis
|
||
|
||
def _prepare_posts_for_analysis(self, posts: List[LinkedInPost]) -> List[Dict[str, Any]]:
|
||
"""Prepare posts with engagement data for analysis."""
|
||
prepared = []
|
||
for i, post in enumerate(posts):
|
||
if not post.post_text:
|
||
continue
|
||
prepared.append({
|
||
"index": i + 1,
|
||
"text": post.post_text,
|
||
"likes": post.likes or 0,
|
||
"comments": post.comments or 0,
|
||
"shares": post.shares or 0,
|
||
"engagement_total": (post.likes or 0) + (post.comments or 0) * 2 + (post.shares or 0) * 3
|
||
})
|
||
return prepared
|
||
|
||
def _get_top_performing_posts(self, posts: List[LinkedInPost], limit: int = 5) -> List[Dict[str, Any]]:
|
||
"""Get top performing posts by engagement."""
|
||
posts_with_engagement = []
|
||
for post in posts:
|
||
if not post.post_text or len(post.post_text) < 50:
|
||
continue
|
||
engagement = (post.likes or 0) + (post.comments or 0) * 2 + (post.shares or 0) * 3
|
||
posts_with_engagement.append({
|
||
"text": post.post_text,
|
||
"likes": post.likes or 0,
|
||
"comments": post.comments or 0,
|
||
"shares": post.shares or 0,
|
||
"engagement_score": engagement
|
||
})
|
||
|
||
# Sort by engagement and return top posts
|
||
sorted_posts = sorted(posts_with_engagement, key=lambda x: x["engagement_score"], reverse=True)
|
||
return sorted_posts[:limit]
|
||
|
||
def _get_system_prompt(self) -> str:
|
||
"""Get system prompt for profile analysis."""
|
||
return """Du bist ein hochspezialisierter AI-Analyst für LinkedIn-Profile und Content-Strategie.
|
||
|
||
Deine Aufgabe ist es, aus LinkedIn-Profildaten und Posts ein umfassendes Content-Analyse-Profil zu erstellen, das als BLAUPAUSE für das Schreiben neuer Posts dient.
|
||
|
||
WICHTIG: Extrahiere ECHTE BEISPIELE aus den Posts! Keine generischen Beschreibungen.
|
||
|
||
Das Profil soll folgende Dimensionen analysieren:
|
||
|
||
1. **Schreibstil & Tonalität**
|
||
- Wie schreibt die Person? (formal, locker, inspirierend, provokativ, etc.)
|
||
- Welche Perspektive wird genutzt? (Ich, Wir, Man)
|
||
- Wie ist die Ansprache? (Du, Sie, neutral)
|
||
- Satzdynamik und Rhythmus
|
||
|
||
2. **Phrasen-Bibliothek (KRITISCH!)**
|
||
- Hook-Phrasen: Wie beginnen Posts? Extrahiere 5-10 ECHTE Beispiele!
|
||
- Übergangs-Phrasen: Wie werden Absätze verbunden?
|
||
- Emotionale Ausdrücke: Ausrufe, Begeisterung, etc.
|
||
- CTA-Phrasen: Wie werden Leser aktiviert?
|
||
- Signature Phrases: Wiederkehrende Markenzeichen
|
||
|
||
3. **Struktur-Templates**
|
||
- Analysiere die STRUKTUR der Top-Posts
|
||
- Erstelle 2-3 konkrete Templates (z.B. "Hook → Flashback → Erkenntnis → CTA")
|
||
- Typische Satzanfänge für jeden Abschnitt
|
||
|
||
4. **Visuelle Muster**
|
||
- Emoji-Nutzung (welche, wo, wie oft)
|
||
- Unicode-Formatierung (fett, kursiv)
|
||
- Strukturierung (Absätze, Listen, etc.)
|
||
|
||
5. **Audience Insights**
|
||
- Wer ist die Zielgruppe?
|
||
- Welche Probleme werden adressiert?
|
||
- Welcher Mehrwert wird geboten?
|
||
|
||
Gib deine Analyse als strukturiertes JSON zurück."""
|
||
|
||
def _get_user_prompt(
|
||
self,
|
||
profile_summary: Dict[str, Any],
|
||
posts_with_engagement: List[Dict[str, Any]],
|
||
top_posts: List[Dict[str, Any]],
|
||
customer_data: Dict[str, Any]
|
||
) -> str:
|
||
"""Get user prompt with data for analysis."""
|
||
# Format all posts with engagement data
|
||
all_posts_text = ""
|
||
for post in posts_with_engagement:
|
||
all_posts_text += f"\n--- Post {post['index']} (Likes: {post['likes']}, Comments: {post['comments']}, Shares: {post['shares']}) ---\n"
|
||
all_posts_text += post['text'][:2000] # Limit each post to 2000 chars
|
||
all_posts_text += "\n"
|
||
|
||
# Format top performing posts
|
||
top_posts_text = ""
|
||
if top_posts:
|
||
for i, post in enumerate(top_posts, 1):
|
||
top_posts_text += f"\n--- TOP POST {i} (Engagement Score: {post['engagement_score']}, Likes: {post['likes']}, Comments: {post['comments']}) ---\n"
|
||
top_posts_text += post['text'][:2000]
|
||
top_posts_text += "\n"
|
||
|
||
return f"""Bitte analysiere folgendes LinkedIn-Profil BASIEREND AUF DEN ECHTEN POSTS:
|
||
|
||
**PROFIL-INFORMATIONEN:**
|
||
- Name: {profile_summary.get('name', 'N/A')}
|
||
- Headline: {profile_summary.get('headline', 'N/A')}
|
||
- Branche: {profile_summary.get('industry', 'N/A')}
|
||
- Location: {profile_summary.get('location', 'N/A')}
|
||
- Summary: {profile_summary.get('summary', 'N/A')}
|
||
|
||
**ZUSÄTZLICHE KUNDENDATEN (Persona, Style Guide, etc.):**
|
||
{json.dumps(customer_data, indent=2, ensure_ascii=False)}
|
||
|
||
**TOP-PERFORMING POSTS (die erfolgreichsten Posts - ANALYSIERE DIESE BESONDERS GENAU!):**
|
||
{top_posts_text if top_posts_text else "Keine Engagement-Daten verfügbar"}
|
||
|
||
**ALLE POSTS ({len(posts_with_engagement)} Posts mit Engagement-Daten):**
|
||
{all_posts_text}
|
||
|
||
---
|
||
|
||
WICHTIG: Analysiere die ECHTEN POSTS sehr genau! Deine Analyse muss auf den tatsächlichen Mustern basieren, nicht auf Annahmen. Extrahiere WÖRTLICHE ZITATE wo möglich!
|
||
|
||
Achte besonders auf:
|
||
1. Die TOP-PERFORMING Posts - was macht sie erfolgreich?
|
||
2. Wiederkehrende Phrasen und Formulierungen - WÖRTLICH extrahieren!
|
||
3. Wie beginnen die Posts (Hooks)? - ECHTE BEISPIELE sammeln!
|
||
4. Wie enden die Posts (CTAs)?
|
||
5. Emoji-Verwendung (welche, wo, wie oft)
|
||
6. Länge und Struktur der Absätze
|
||
7. Typische Satzanfänge und Übergänge
|
||
|
||
Erstelle eine umfassende Analyse im folgenden JSON-Format:
|
||
|
||
{{
|
||
"writing_style": {{
|
||
"tone": "Beschreibung der Tonalität basierend auf den echten Posts",
|
||
"perspective": "Ich/Wir/Man/Gemischt - mit Beispielen aus den Posts",
|
||
"form_of_address": "Du/Sie/Neutral - wie spricht die Person die Leser an?",
|
||
"sentence_dynamics": "Kurze Sätze? Lange Sätze? Mischung? Fragen?",
|
||
"average_post_length": "Kurz/Mittel/Lang",
|
||
"average_word_count": 0
|
||
}},
|
||
"linguistic_fingerprint": {{
|
||
"energy_level": 0,
|
||
"shouting_usage": "Beschreibung mit konkreten Beispielen aus den Posts",
|
||
"punctuation_patterns": "Beschreibung (!!!, ..., ?, etc.)",
|
||
"signature_phrases": ["ECHTE Phrasen aus den Posts", "die wiederholt vorkommen"],
|
||
"narrative_anchors": ["Storytelling-Elemente", "die die Person nutzt"]
|
||
}},
|
||
"phrase_library": {{
|
||
"hook_phrases": [
|
||
"ECHTE Hook-Sätze aus den Posts wörtlich kopiert",
|
||
"Mindestens 5-8 verschiedene Beispiele",
|
||
"z.B. '𝗞𝗜-𝗦𝘂𝗰𝗵𝗲 𝗶𝘀𝘁 𝗱𝗲𝗿 𝗲𝗿𝘀𝘁𝗲 𝗦𝗰𝗵𝗿𝗶𝘁𝘁 𝗶𝗺 𝗦𝗮𝗹𝗲𝘀 𝗙𝘂𝗻𝗻𝗲𝗹.'"
|
||
],
|
||
"transition_phrases": [
|
||
"ECHTE Übergangssätze zwischen Absätzen",
|
||
"z.B. 'Und wisst ihr was?', 'Aber Moment...', 'Was das mit X zu tun hat?'"
|
||
],
|
||
"emotional_expressions": [
|
||
"Ausrufe und emotionale Marker",
|
||
"z.B. 'Halleluja!', 'Sorry to say!!', 'Galopp!!!!'"
|
||
],
|
||
"cta_phrases": [
|
||
"ECHTE Call-to-Action Formulierungen",
|
||
"z.B. 'Was denkt ihr?', 'Seid ihr dabei?', 'Lasst uns darüber sprechen.'"
|
||
],
|
||
"filler_expressions": [
|
||
"Typische Füllwörter und Ausdrücke",
|
||
"z.B. 'Ich meine...', 'Wisst ihr...', 'Ok, ok...'"
|
||
]
|
||
}},
|
||
"structure_templates": {{
|
||
"primary_structure": "Die häufigste Struktur beschreiben, z.B. 'Unicode-Hook → Persönliche Anekdote → Erkenntnis → Bullet Points → CTA'",
|
||
"template_examples": [
|
||
{{
|
||
"name": "Storytelling-Post",
|
||
"structure": ["Fetter Hook mit Zitat", "Flashback/Anekdote", "Erkenntnis/Lesson", "Praktische Tipps", "CTA-Frage"],
|
||
"example_post_index": 1
|
||
}},
|
||
{{
|
||
"name": "Insight-Post",
|
||
"structure": ["Provokante These", "Begründung", "Beispiel", "Handlungsaufforderung"],
|
||
"example_post_index": 2
|
||
}}
|
||
],
|
||
"typical_sentence_starters": [
|
||
"ECHTE Satzanfänge aus den Posts",
|
||
"z.B. 'Ich glaube, dass...', 'Was mir aufgefallen ist...', 'Das Verrückte ist...'"
|
||
],
|
||
"paragraph_transitions": [
|
||
"Wie werden Absätze eingeleitet?",
|
||
"z.B. 'Und...', 'Aber:', 'Das bedeutet:'"
|
||
]
|
||
}},
|
||
"tone_analysis": {{
|
||
"primary_tone": "Haupttonalität basierend auf den Posts",
|
||
"emotional_range": "Welche Emotionen werden angesprochen?",
|
||
"authenticity_markers": ["Was macht den Stil einzigartig?", "Erkennbare Merkmale"]
|
||
}},
|
||
"topic_patterns": {{
|
||
"main_topics": ["Hauptthemen aus den Posts"],
|
||
"content_pillars": ["Content-Säulen"],
|
||
"expertise_areas": ["Expertise-Bereiche"],
|
||
"expertise_level": "Anfänger/Fortgeschritten/Experte"
|
||
}},
|
||
"audience_insights": {{
|
||
"target_audience": "Wer wird angesprochen?",
|
||
"pain_points_addressed": ["Probleme die adressiert werden"],
|
||
"value_proposition": "Welchen Mehrwert bietet die Person?",
|
||
"industry_context": "Branchenkontext"
|
||
}},
|
||
"visual_patterns": {{
|
||
"emoji_usage": {{
|
||
"emojis": ["Liste der tatsächlich verwendeten Emojis"],
|
||
"placement": "Anfang/Ende/Inline/Zwischen Absätzen",
|
||
"frequency": "Selten/Mittel/Häufig - pro Post durchschnittlich X"
|
||
}},
|
||
"unicode_formatting": "Wird ✓, →, •, 𝗙𝗲𝘁𝘁 etc. verwendet? Wo?",
|
||
"structure_preferences": "Absätze/Listen/Einzeiler/Nummeriert"
|
||
}},
|
||
"content_strategy": {{
|
||
"hook_patterns": "Wie werden Posts KONKRET eröffnet? Beschreibung des Musters",
|
||
"cta_style": "Wie sehen die CTAs aus? Frage? Aufforderung? Keine?",
|
||
"storytelling_approach": "Persönliche Geschichten? Metaphern? Case Studies?",
|
||
"post_structure": "Hook → Body → CTA? Oder anders?"
|
||
}},
|
||
"best_performing_patterns": {{
|
||
"what_works": "Was machen die Top-Posts anders/besser?",
|
||
"successful_hooks": ["WÖRTLICHE Beispiel-Hooks aus Top-Posts"],
|
||
"engagement_drivers": ["Was treibt Engagement?"]
|
||
}}
|
||
}}
|
||
|
||
KRITISCH: Bei phrase_library und structure_templates müssen ECHTE, WÖRTLICHE Beispiele aus den Posts stehen! Keine generischen Beschreibungen!"""
|
||
|
||
def _extract_ngram_patterns(self, post_texts: List[str]) -> Dict[str, Any]:
|
||
"""
|
||
Extract N-gram patterns from posts for better style fingerprinting.
|
||
|
||
Args:
|
||
post_texts: List of post texts
|
||
|
||
Returns:
|
||
Dictionary with bigrams, trigrams, and signature patterns
|
||
"""
|
||
if not post_texts or len(post_texts) == 0:
|
||
return {
|
||
"typical_bigrams": [],
|
||
"typical_trigrams": [],
|
||
"signature_combinations": [],
|
||
"available": False
|
||
}
|
||
|
||
try:
|
||
import nltk
|
||
from nltk import ngrams, word_tokenize
|
||
|
||
# Download required NLTK data if not available
|
||
try:
|
||
nltk.data.find('tokenizers/punkt')
|
||
except LookupError:
|
||
logger.info("Downloading NLTK punkt tokenizer...")
|
||
nltk.download('punkt', quiet=True)
|
||
|
||
# Combine all text
|
||
all_text = " ".join(post_texts)
|
||
|
||
# Tokenize (try German tokenizer, fallback to default)
|
||
try:
|
||
words = word_tokenize(all_text.lower(), language='german')
|
||
except:
|
||
words = word_tokenize(all_text.lower())
|
||
|
||
# Remove punctuation and very short words
|
||
words = [w for w in words if w.isalnum() and len(w) > 2]
|
||
|
||
# Extract bigrams (2-word combinations)
|
||
bigrams = list(ngrams(words, 2))
|
||
bigram_freq = Counter(bigrams)
|
||
|
||
# Extract trigrams (3-word combinations)
|
||
trigrams = list(ngrams(words, 3))
|
||
trigram_freq = Counter(trigrams)
|
||
|
||
# Get top bigrams and trigrams
|
||
top_bigrams = [" ".join(bg) for bg, _ in bigram_freq.most_common(50)]
|
||
top_trigrams = [" ".join(tg) for tg, _ in trigram_freq.most_common(30)]
|
||
|
||
# Find signature patterns (unique combinations that appear multiple times)
|
||
signature_patterns = self._find_signature_patterns(bigram_freq, trigram_freq)
|
||
|
||
logger.info(f"Extracted {len(top_bigrams)} bigrams, {len(top_trigrams)} trigrams, "
|
||
f"{len(signature_patterns)} signature patterns")
|
||
|
||
return {
|
||
"typical_bigrams": top_bigrams,
|
||
"typical_trigrams": top_trigrams,
|
||
"signature_combinations": signature_patterns,
|
||
"bigram_count": len(bigrams),
|
||
"trigram_count": len(trigrams),
|
||
"available": True
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"N-gram extraction failed: {e}")
|
||
return {
|
||
"typical_bigrams": [],
|
||
"typical_trigrams": [],
|
||
"signature_combinations": [],
|
||
"available": False,
|
||
"error": str(e)
|
||
}
|
||
|
||
def _find_signature_patterns(
|
||
self,
|
||
bigram_freq: Counter,
|
||
trigram_freq: Counter
|
||
) -> List[str]:
|
||
"""
|
||
Find signature patterns - unique combinations that are not generic.
|
||
|
||
Args:
|
||
bigram_freq: Counter of bigram frequencies
|
||
trigram_freq: Counter of trigram frequencies
|
||
|
||
Returns:
|
||
List of signature pattern strings
|
||
"""
|
||
# Generic German words to filter out
|
||
generic_words = {
|
||
'und', 'die', 'der', 'das', 'ist', 'sind', 'ein', 'eine',
|
||
'zu', 'den', 'für', 'mit', 'auf', 'von', 'in', 'des',
|
||
'dem', 'im', 'zum', 'zur', 'am', 'bei', 'hat', 'haben',
|
||
'wird', 'werden', 'kann', 'können', 'soll', 'müssen',
|
||
'auch', 'nur', 'noch', 'schon', 'sehr', 'mehr', 'aber',
|
||
'oder', 'wenn', 'dann', 'als', 'wie', 'nach', 'über'
|
||
}
|
||
|
||
signature_patterns = []
|
||
|
||
# Find non-generic bigrams that appear at least 3 times
|
||
for (w1, w2), count in bigram_freq.most_common(100):
|
||
if count >= 3: # Must appear multiple times
|
||
# Filter out generic combinations
|
||
if w1 not in generic_words or w2 not in generic_words:
|
||
pattern = f"{w1} {w2}"
|
||
if pattern not in signature_patterns:
|
||
signature_patterns.append(pattern)
|
||
|
||
# Find non-generic trigrams that appear at least 2 times
|
||
for (w1, w2, w3), count in trigram_freq.most_common(50):
|
||
if count >= 2:
|
||
# At least one word should not be generic
|
||
non_generic_count = sum(1 for w in [w1, w2, w3] if w not in generic_words)
|
||
if non_generic_count >= 2:
|
||
pattern = f"{w1} {w2} {w3}"
|
||
if pattern not in signature_patterns:
|
||
signature_patterns.append(pattern)
|
||
|
||
# Limit to top 20 most distinctive patterns
|
||
return signature_patterns[:20]
|