new quality save layer
This commit is contained in:
@@ -172,23 +172,31 @@ CHIRURGISCHE KORREKTUR-REGELN (Prüfe diese Punkte!):
|
||||
|
||||
BEWERTUNGSKRITERIEN (100 Punkte total):
|
||||
|
||||
1. Authentizität & Stil-Match (40 Punkte)
|
||||
1. Authentizität & Stil-Match (35 Punkte)
|
||||
- Klingt wie die echte Person (vergleiche mit Beispiel-Posts!)
|
||||
- Keine KI-Muster erkennbar
|
||||
- Richtige Energie und Tonalität
|
||||
- Nutzt ÄHNLICHE Phrasen/Formulierungen wie in der Phrasen-Referenz (nicht identisch kopiert, aber im gleichen Stil!)
|
||||
- Hat die Person typische emotionale Ausdrücke? Sind welche im Post?
|
||||
|
||||
2. Content-Qualität (35 Punkte)
|
||||
2. Content-Qualität (30 Punkte)
|
||||
- Starker, aufmerksamkeitsstarker Hook (vergleiche mit Hook-Beispielen!)
|
||||
- Klarer Mehrwert für die Zielgruppe
|
||||
- Gute Struktur und Lesefluss (folgt der erwarteten Struktur: {primary_structure})
|
||||
- Passender CTA (vergleiche mit CTA-Beispielen!)
|
||||
|
||||
3. Technische Korrektheit (25 Punkte)
|
||||
3. Logik & Sinnigkeit (20 Punkte) - NEU!
|
||||
- Ist die Kernaussage klar und nachvollziehbar?
|
||||
- Sind Argumente logisch aufgebaut ohne Widersprüche?
|
||||
- Passen die Fakten zusammen oder werden nicht-zusammenhängende Dinge verknüpft?
|
||||
- Ist der Mehrwert/die Botschaft offensichtlich?
|
||||
- Würde die echte Person SO argumentieren?
|
||||
|
||||
4. Technische Korrektheit (15 Punkte)
|
||||
- Richtige Perspektive und Ansprache (konsistent!)
|
||||
- Angemessene Länge (~{writing_style.get('average_word_count', 300)} Wörter)
|
||||
- Korrekte Formatierung
|
||||
- Rechtschreibung und Grammatik (wird separat geprüft, hier nur grobe Fehler)
|
||||
|
||||
|
||||
SCORE-KALIBRIERUNG (WICHTIG - lies das genau!):
|
||||
@@ -254,9 +262,10 @@ Antworte im JSON-Format:
|
||||
"approved": true/false,
|
||||
"overall_score": 0-100,
|
||||
"scores": {{
|
||||
"authenticity_and_style": 0-40,
|
||||
"content_quality": 0-35,
|
||||
"technical_execution": 0-25
|
||||
"authenticity_and_style": 0-35,
|
||||
"content_quality": 0-30,
|
||||
"logic_and_coherence": 0-20,
|
||||
"technical_execution": 0-15
|
||||
}},
|
||||
"strengths": ["Stärke 1", "Stärke 2"],
|
||||
"improvements": ["Verbesserung 1", "Verbesserung 2"],
|
||||
|
||||
183
src/agents/grammar_checker.py
Normal file
183
src/agents/grammar_checker.py
Normal file
@@ -0,0 +1,183 @@
|
||||
"""Grammar and spelling checker agent using LanguageTool."""
|
||||
from typing import Dict, Any, List
|
||||
from loguru import logger
|
||||
import language_tool_python
|
||||
|
||||
from src.agents.base import BaseAgent
|
||||
|
||||
|
||||
class GrammarCheckAgent(BaseAgent):
|
||||
"""Agent for checking grammar and spelling using LanguageTool."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize grammar checker agent."""
|
||||
super().__init__("GrammarChecker")
|
||||
# Initialize LanguageTool for German
|
||||
try:
|
||||
self.tool = language_tool_python.LanguageTool('de-DE')
|
||||
logger.info("LanguageTool initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize LanguageTool: {e}")
|
||||
self.tool = None
|
||||
|
||||
async def process(
|
||||
self,
|
||||
text: str,
|
||||
auto_correct: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Check text for grammar and spelling errors.
|
||||
|
||||
Args:
|
||||
text: Text to check
|
||||
auto_correct: If True, automatically apply safe corrections
|
||||
|
||||
Returns:
|
||||
Dictionary with error details and optionally corrected text
|
||||
"""
|
||||
if not self.tool:
|
||||
logger.warning("LanguageTool not available, skipping grammar check")
|
||||
return {
|
||||
"has_errors": False,
|
||||
"error_count": 0,
|
||||
"errors": [],
|
||||
"corrected_text": text,
|
||||
"available": False
|
||||
}
|
||||
|
||||
logger.info("Checking text for grammar and spelling errors")
|
||||
|
||||
try:
|
||||
# Check for errors
|
||||
matches = self.tool.check(text)
|
||||
|
||||
# Categorize errors
|
||||
categorized_errors = self._categorize_errors(matches)
|
||||
|
||||
# Prepare error details
|
||||
errors = []
|
||||
for match in matches:
|
||||
errors.append({
|
||||
"message": match.message,
|
||||
"replacements": match.replacements[:3], # Top 3 suggestions
|
||||
"context": match.context,
|
||||
"offset": match.offset,
|
||||
"error_length": match.errorLength,
|
||||
"category": match.category,
|
||||
"rule_id": match.ruleId
|
||||
})
|
||||
|
||||
# Auto-correct if requested
|
||||
corrected_text = text
|
||||
if auto_correct and len(matches) > 0:
|
||||
# Only auto-correct "safe" errors (spelling, obvious grammar)
|
||||
safe_matches = [m for m in matches if self._is_safe_correction(m)]
|
||||
if safe_matches:
|
||||
corrected_text = language_tool_python.utils.correct(text, safe_matches)
|
||||
logger.info(f"Auto-corrected {len(safe_matches)} safe errors")
|
||||
|
||||
result = {
|
||||
"has_errors": len(matches) > 0,
|
||||
"error_count": len(matches),
|
||||
"errors": errors,
|
||||
"corrected_text": corrected_text,
|
||||
"available": True,
|
||||
"categories": categorized_errors,
|
||||
"auto_corrected": auto_correct and len(matches) > 0
|
||||
}
|
||||
|
||||
if result["has_errors"]:
|
||||
logger.warning(f"Found {len(matches)} grammar/spelling errors")
|
||||
else:
|
||||
logger.info("No grammar/spelling errors found")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Grammar check failed: {e}")
|
||||
return {
|
||||
"has_errors": False,
|
||||
"error_count": 0,
|
||||
"errors": [],
|
||||
"corrected_text": text,
|
||||
"available": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def _categorize_errors(self, matches: List) -> Dict[str, int]:
|
||||
"""Categorize errors by type."""
|
||||
categories = {}
|
||||
for match in matches:
|
||||
category = match.category or "OTHER"
|
||||
categories[category] = categories.get(category, 0) + 1
|
||||
return categories
|
||||
|
||||
def _is_safe_correction(self, match) -> bool:
|
||||
"""
|
||||
Check if a correction is 'safe' to apply automatically.
|
||||
|
||||
Safe corrections:
|
||||
- Spelling errors with clear suggestions
|
||||
- Obvious grammar errors (verb agreement, etc.)
|
||||
|
||||
Unsafe corrections:
|
||||
- Style suggestions
|
||||
- Ambiguous corrections
|
||||
- Punctuation changes that might alter meaning
|
||||
"""
|
||||
# Safe categories
|
||||
safe_categories = {
|
||||
"TYPOS", # Spelling errors
|
||||
"CASING", # Capitalization
|
||||
"COMPOUNDING", # Word compounds
|
||||
}
|
||||
|
||||
# Check category
|
||||
if match.category in safe_categories:
|
||||
return True
|
||||
|
||||
# Check if it's a clear spelling mistake with 1-2 clear suggestions
|
||||
if match.category == "MISC" and len(match.replacements) <= 2:
|
||||
return True
|
||||
|
||||
# Avoid style and punctuation changes
|
||||
if match.category in {"STYLE", "PUNCTUATION", "TYPOGRAPHY"}:
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def get_summary(self, check_result: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Get a human-readable summary of grammar check results.
|
||||
|
||||
Args:
|
||||
check_result: Result from process()
|
||||
|
||||
Returns:
|
||||
Summary string
|
||||
"""
|
||||
if not check_result.get("available"):
|
||||
return "⚠️ Grammatikprüfung nicht verfügbar"
|
||||
|
||||
if not check_result["has_errors"]:
|
||||
return "✅ Keine Rechtschreib- oder Grammatikfehler gefunden"
|
||||
|
||||
error_count = check_result["error_count"]
|
||||
categories = check_result.get("categories", {})
|
||||
|
||||
summary = f"⚠️ {error_count} Fehler gefunden"
|
||||
|
||||
if categories:
|
||||
cat_summary = ", ".join([f"{cat}: {count}" for cat, count in categories.items()])
|
||||
summary += f" ({cat_summary})"
|
||||
|
||||
if check_result.get("auto_corrected"):
|
||||
summary += " - Automatisch korrigiert"
|
||||
|
||||
return summary
|
||||
|
||||
def close(self):
|
||||
"""Clean up LanguageTool resources."""
|
||||
if self.tool:
|
||||
self.tool.close()
|
||||
logger.info("LanguageTool closed")
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Profile analyzer agent."""
|
||||
import json
|
||||
from typing import Dict, Any, List
|
||||
from collections import Counter
|
||||
from loguru import logger
|
||||
|
||||
from src.agents.base import BaseAgent
|
||||
@@ -61,7 +62,13 @@ class ProfileAnalyzerAgent(BaseAgent):
|
||||
|
||||
# Parse JSON response
|
||||
analysis = json.loads(response)
|
||||
logger.info("Profile analysis completed successfully")
|
||||
|
||||
# Add N-gram analysis
|
||||
post_texts = [p.post_text for p in posts if p.post_text]
|
||||
ngram_patterns = self._extract_ngram_patterns(post_texts)
|
||||
analysis["ngram_patterns"] = ngram_patterns
|
||||
|
||||
logger.info("Profile analysis completed successfully (with N-gram analysis)")
|
||||
|
||||
return analysis
|
||||
|
||||
@@ -298,3 +305,130 @@ Erstelle eine umfassende Analyse im folgenden JSON-Format:
|
||||
}}
|
||||
|
||||
KRITISCH: Bei phrase_library und structure_templates müssen ECHTE, WÖRTLICHE Beispiele aus den Posts stehen! Keine generischen Beschreibungen!"""
|
||||
|
||||
def _extract_ngram_patterns(self, post_texts: List[str]) -> Dict[str, Any]:
|
||||
"""
|
||||
Extract N-gram patterns from posts for better style fingerprinting.
|
||||
|
||||
Args:
|
||||
post_texts: List of post texts
|
||||
|
||||
Returns:
|
||||
Dictionary with bigrams, trigrams, and signature patterns
|
||||
"""
|
||||
if not post_texts or len(post_texts) == 0:
|
||||
return {
|
||||
"typical_bigrams": [],
|
||||
"typical_trigrams": [],
|
||||
"signature_combinations": [],
|
||||
"available": False
|
||||
}
|
||||
|
||||
try:
|
||||
import nltk
|
||||
from nltk import ngrams, word_tokenize
|
||||
|
||||
# Download required NLTK data if not available
|
||||
try:
|
||||
nltk.data.find('tokenizers/punkt')
|
||||
except LookupError:
|
||||
logger.info("Downloading NLTK punkt tokenizer...")
|
||||
nltk.download('punkt', quiet=True)
|
||||
|
||||
# Combine all text
|
||||
all_text = " ".join(post_texts)
|
||||
|
||||
# Tokenize (try German tokenizer, fallback to default)
|
||||
try:
|
||||
words = word_tokenize(all_text.lower(), language='german')
|
||||
except:
|
||||
words = word_tokenize(all_text.lower())
|
||||
|
||||
# Remove punctuation and very short words
|
||||
words = [w for w in words if w.isalnum() and len(w) > 2]
|
||||
|
||||
# Extract bigrams (2-word combinations)
|
||||
bigrams = list(ngrams(words, 2))
|
||||
bigram_freq = Counter(bigrams)
|
||||
|
||||
# Extract trigrams (3-word combinations)
|
||||
trigrams = list(ngrams(words, 3))
|
||||
trigram_freq = Counter(trigrams)
|
||||
|
||||
# Get top bigrams and trigrams
|
||||
top_bigrams = [" ".join(bg) for bg, _ in bigram_freq.most_common(50)]
|
||||
top_trigrams = [" ".join(tg) for tg, _ in trigram_freq.most_common(30)]
|
||||
|
||||
# Find signature patterns (unique combinations that appear multiple times)
|
||||
signature_patterns = self._find_signature_patterns(bigram_freq, trigram_freq)
|
||||
|
||||
logger.info(f"Extracted {len(top_bigrams)} bigrams, {len(top_trigrams)} trigrams, "
|
||||
f"{len(signature_patterns)} signature patterns")
|
||||
|
||||
return {
|
||||
"typical_bigrams": top_bigrams,
|
||||
"typical_trigrams": top_trigrams,
|
||||
"signature_combinations": signature_patterns,
|
||||
"bigram_count": len(bigrams),
|
||||
"trigram_count": len(trigrams),
|
||||
"available": True
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"N-gram extraction failed: {e}")
|
||||
return {
|
||||
"typical_bigrams": [],
|
||||
"typical_trigrams": [],
|
||||
"signature_combinations": [],
|
||||
"available": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def _find_signature_patterns(
|
||||
self,
|
||||
bigram_freq: Counter,
|
||||
trigram_freq: Counter
|
||||
) -> List[str]:
|
||||
"""
|
||||
Find signature patterns - unique combinations that are not generic.
|
||||
|
||||
Args:
|
||||
bigram_freq: Counter of bigram frequencies
|
||||
trigram_freq: Counter of trigram frequencies
|
||||
|
||||
Returns:
|
||||
List of signature pattern strings
|
||||
"""
|
||||
# Generic German words to filter out
|
||||
generic_words = {
|
||||
'und', 'die', 'der', 'das', 'ist', 'sind', 'ein', 'eine',
|
||||
'zu', 'den', 'für', 'mit', 'auf', 'von', 'in', 'des',
|
||||
'dem', 'im', 'zum', 'zur', 'am', 'bei', 'hat', 'haben',
|
||||
'wird', 'werden', 'kann', 'können', 'soll', 'müssen',
|
||||
'auch', 'nur', 'noch', 'schon', 'sehr', 'mehr', 'aber',
|
||||
'oder', 'wenn', 'dann', 'als', 'wie', 'nach', 'über'
|
||||
}
|
||||
|
||||
signature_patterns = []
|
||||
|
||||
# Find non-generic bigrams that appear at least 3 times
|
||||
for (w1, w2), count in bigram_freq.most_common(100):
|
||||
if count >= 3: # Must appear multiple times
|
||||
# Filter out generic combinations
|
||||
if w1 not in generic_words or w2 not in generic_words:
|
||||
pattern = f"{w1} {w2}"
|
||||
if pattern not in signature_patterns:
|
||||
signature_patterns.append(pattern)
|
||||
|
||||
# Find non-generic trigrams that appear at least 2 times
|
||||
for (w1, w2, w3), count in trigram_freq.most_common(50):
|
||||
if count >= 2:
|
||||
# At least one word should not be generic
|
||||
non_generic_count = sum(1 for w in [w1, w2, w3] if w not in generic_words)
|
||||
if non_generic_count >= 2:
|
||||
pattern = f"{w1} {w2} {w3}"
|
||||
if pattern not in signature_patterns:
|
||||
signature_patterns.append(pattern)
|
||||
|
||||
# Limit to top 20 most distinctive patterns
|
||||
return signature_patterns[:20]
|
||||
|
||||
457
src/agents/quality_refiner.py
Normal file
457
src/agents/quality_refiner.py
Normal file
@@ -0,0 +1,457 @@
|
||||
"""Quality refiner agent for post-processing improvements."""
|
||||
import re
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
|
||||
from src.agents.base import BaseAgent
|
||||
|
||||
|
||||
class QualityRefinerAgent(BaseAgent):
|
||||
"""Agent for refining posts based on quality check results."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize quality refiner agent."""
|
||||
super().__init__("QualityRefiner")
|
||||
|
||||
async def process(
|
||||
self,
|
||||
post: str,
|
||||
quality_checks: Dict[str, Any],
|
||||
example_posts: Optional[List[str]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Process method (required by BaseAgent).
|
||||
Alias for refine().
|
||||
"""
|
||||
return await self.refine(post, quality_checks, example_posts)
|
||||
|
||||
async def refine(
|
||||
self,
|
||||
post: str,
|
||||
quality_checks: Dict[str, Any],
|
||||
example_posts: Optional[List[str]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Refine post based on quality check results.
|
||||
|
||||
This is a deterministic, rule-based refiner that makes minimal
|
||||
changes to fix detected issues. No LLM calls = 0 API cost.
|
||||
|
||||
Args:
|
||||
post: Original post text
|
||||
quality_checks: Results from quality checks
|
||||
example_posts: Optional reference posts for style
|
||||
|
||||
Returns:
|
||||
Dictionary with refined post and improvement stats
|
||||
"""
|
||||
logger.info("Starting quality refinement (deterministic, 0 API calls)")
|
||||
|
||||
refined_post = post
|
||||
improvements = []
|
||||
|
||||
# Stage 1: Grammar Auto-Fix (if errors detected)
|
||||
grammar_result = quality_checks.get('grammar_check', {})
|
||||
if grammar_result.get('has_errors') and grammar_result.get('available'):
|
||||
logger.info(f"Auto-fixing {grammar_result['error_count']} grammar errors")
|
||||
refined_post = grammar_result.get('corrected_text', refined_post)
|
||||
improvements.append(f"Fixed {grammar_result['error_count']} grammar errors")
|
||||
|
||||
# Stage 2: Readability Improvements
|
||||
readability_result = quality_checks.get('readability_check', {})
|
||||
if not readability_result.get('passed') and readability_result.get('available'):
|
||||
logger.info("Improving readability")
|
||||
refined_post = self._improve_readability(refined_post, readability_result)
|
||||
improvements.append("Improved readability (split long sentences)")
|
||||
|
||||
# Stage 3: Basic Style Adjustments (only if very poor < 60%)
|
||||
style_result = quality_checks.get('style_check', {})
|
||||
if style_result.get('available') and style_result.get('avg_similarity', 1.0) < 0.60:
|
||||
logger.info("Adjusting style (removing KI patterns)")
|
||||
refined_post = self._adjust_style(refined_post)
|
||||
improvements.append("Removed AI-typical patterns")
|
||||
|
||||
# Calculate improvement
|
||||
original_length = len(post)
|
||||
refined_length = len(refined_post)
|
||||
change_percentage = abs(refined_length - original_length) / original_length * 100
|
||||
|
||||
result = {
|
||||
"refined_post": refined_post,
|
||||
"original_post": post,
|
||||
"improvements": improvements,
|
||||
"change_percentage": change_percentage,
|
||||
"refined": len(improvements) > 0
|
||||
}
|
||||
|
||||
if improvements:
|
||||
logger.info(f"Refinement complete: {len(improvements)} improvements made ({change_percentage:.1f}% change)")
|
||||
else:
|
||||
logger.info("No refinements needed")
|
||||
|
||||
return result
|
||||
|
||||
def _improve_readability(self, text: str, readability_result: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Improve readability by splitting long sentences and simplifying.
|
||||
|
||||
Rule-based, deterministic approach.
|
||||
"""
|
||||
metrics = readability_result.get('metrics', {})
|
||||
avg_sentence_length = metrics.get('avg_sentence_length', 0)
|
||||
|
||||
# If sentences are too long (> 25 words), try to split
|
||||
if avg_sentence_length > 25:
|
||||
text = self._split_long_sentences(text)
|
||||
|
||||
return text
|
||||
|
||||
def _split_long_sentences(self, text: str) -> str:
|
||||
"""
|
||||
Split long sentences at natural breakpoints.
|
||||
|
||||
Looks for:
|
||||
- ", und " → ". "
|
||||
- ", aber " → ". Aber "
|
||||
- ", denn " → ". Denn "
|
||||
- " und " (mid-sentence) → ". "
|
||||
"""
|
||||
# Split sentences
|
||||
sentences = re.split(r'([.!?]\s+)', text)
|
||||
refined_sentences = []
|
||||
|
||||
for i, sentence in enumerate(sentences):
|
||||
# Skip punctuation separators
|
||||
if re.match(r'[.!?]\s+', sentence):
|
||||
refined_sentences.append(sentence)
|
||||
continue
|
||||
|
||||
# Count words in sentence
|
||||
words = sentence.split()
|
||||
if len(words) > 25:
|
||||
# Try to split at natural conjunction
|
||||
# ", und " → ". "
|
||||
sentence = re.sub(r',\s+und\s+', '. ', sentence, count=1)
|
||||
# ", aber " → ". Aber "
|
||||
sentence = re.sub(r',\s+aber\s+', '. Aber ', sentence, count=1)
|
||||
# ", denn " → ". Denn "
|
||||
sentence = re.sub(r',\s+denn\s+', '. Denn ', sentence, count=1)
|
||||
|
||||
# Capitalize after new periods
|
||||
sentence = re.sub(r'\.\s+([a-z])', lambda m: '. ' + m.group(1).upper(), sentence)
|
||||
|
||||
refined_sentences.append(sentence)
|
||||
|
||||
return ''.join(refined_sentences)
|
||||
|
||||
def _adjust_style(self, text: str) -> str:
|
||||
"""
|
||||
Remove common AI-typical patterns and phrases.
|
||||
|
||||
Rule-based replacements to make text more authentic.
|
||||
"""
|
||||
# Common AI patterns to remove/replace
|
||||
replacements = [
|
||||
# Remove overly formal phrases
|
||||
(r'\bIn der heutigen Zeit\b', 'Heute'),
|
||||
(r'\bEs ist kein Geheimnis,? dass\b', ''),
|
||||
(r'\bTauchen Sie ein in\b', 'Entdecken Sie'),
|
||||
(r'\bStellen Sie sich vor,?\b', ''),
|
||||
(r'\bLassen Sie uns\b', 'Lass uns'),
|
||||
|
||||
# Remove redundant phrases
|
||||
(r'\bes ist wichtig zu verstehen,? dass\b', ''),
|
||||
(r'\bIch möchte betonen,? dass\b', ''),
|
||||
|
||||
# Simplify overly complex phrases
|
||||
(r'\bdarüber hinaus\b', 'außerdem'),
|
||||
(r'\bdeshalb ist es wichtig\b', 'deshalb'),
|
||||
(r'\bin diesem Zusammenhang\b', 'dabei'),
|
||||
|
||||
# Remove double spaces
|
||||
(r'\s+', ' '),
|
||||
]
|
||||
|
||||
for pattern, replacement in replacements:
|
||||
text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
|
||||
|
||||
# Clean up spacing
|
||||
text = re.sub(r'\s+([.,!?])', r'\1', text) # Remove space before punctuation
|
||||
text = re.sub(r'\s+', ' ', text) # Normalize spaces
|
||||
text = text.strip()
|
||||
|
||||
return text
|
||||
|
||||
async def final_polish(
|
||||
self,
|
||||
post: str,
|
||||
quality_checks: Dict[str, Any],
|
||||
profile_analysis: Dict[str, Any],
|
||||
example_posts: List[str]
|
||||
) -> str:
|
||||
"""
|
||||
Final polish using ONE LLM call to address quality issues.
|
||||
|
||||
This is the simplified approach per user feedback:
|
||||
- Analyze quality issues
|
||||
- One LLM call to fix them
|
||||
- Preserve ALL formatting (line breaks, spaces, structure)
|
||||
- NO score changes
|
||||
|
||||
Args:
|
||||
post: Current post text (after Critic approval)
|
||||
quality_checks: Quality check results
|
||||
profile_analysis: Profile analysis for style
|
||||
example_posts: Reference posts
|
||||
|
||||
Returns:
|
||||
Polished post
|
||||
"""
|
||||
logger.info("Running final polish (1 API call)")
|
||||
|
||||
# Build specific feedback from quality checks
|
||||
feedback_points = []
|
||||
|
||||
grammar_result = quality_checks.get('grammar_check', {})
|
||||
if grammar_result.get('has_errors'):
|
||||
feedback_points.append(f"⚠️ {grammar_result['error_count']} Grammatikfehler gefunden")
|
||||
|
||||
style_result = quality_checks.get('style_check', {})
|
||||
if not style_result.get('passed'):
|
||||
similarity = style_result.get('avg_similarity', 0) * 100
|
||||
feedback_points.append(f"⚠️ Stil-Ähnlichkeit nur {similarity:.0f}% (Ziel: 75%+)")
|
||||
|
||||
readability_result = quality_checks.get('readability_check', {})
|
||||
if not readability_result.get('passed'):
|
||||
issues = readability_result.get('issues', [])
|
||||
feedback_points.append(f"⚠️ Lesbarkeit: {', '.join(issues)}")
|
||||
|
||||
# If no issues, return unchanged
|
||||
if not feedback_points:
|
||||
logger.info("No quality issues - returning unchanged post")
|
||||
return post
|
||||
|
||||
feedback = "\n".join(feedback_points)
|
||||
|
||||
# Create simple polish prompt
|
||||
system_prompt = self._get_final_polish_system_prompt(profile_analysis, example_posts)
|
||||
user_prompt = self._get_final_polish_user_prompt(post, feedback)
|
||||
|
||||
polished_post = await self.call_openai(
|
||||
system_prompt=system_prompt,
|
||||
user_prompt=user_prompt,
|
||||
model="gpt-4o",
|
||||
temperature=0.3 # Low temp for precise, minimal changes
|
||||
)
|
||||
|
||||
logger.info("Final polish complete")
|
||||
return polished_post.strip()
|
||||
|
||||
async def smart_revision(
|
||||
self,
|
||||
post: str,
|
||||
quality_checks: Dict[str, Any],
|
||||
profile_analysis: Dict[str, Any],
|
||||
example_posts: List[str]
|
||||
) -> str:
|
||||
"""
|
||||
Intelligent revision using LLM to fix remaining quality issues.
|
||||
|
||||
Only called if auto-refiner wasn't enough (score < 85).
|
||||
MAX 1 call per post!
|
||||
|
||||
Args:
|
||||
post: Current post text
|
||||
quality_checks: Quality check results
|
||||
profile_analysis: Profile analysis for style
|
||||
example_posts: Reference posts
|
||||
|
||||
Returns:
|
||||
Revised post
|
||||
"""
|
||||
logger.info("Running smart revision (1 API call)")
|
||||
|
||||
# Build specific feedback from quality checks
|
||||
feedback_points = []
|
||||
|
||||
grammar_result = quality_checks.get('grammar_check', {})
|
||||
if grammar_result.get('has_errors'):
|
||||
feedback_points.append(f"⚠️ Noch {grammar_result['error_count']} Grammatikfehler")
|
||||
|
||||
style_result = quality_checks.get('style_check', {})
|
||||
if not style_result.get('passed'):
|
||||
similarity = style_result.get('avg_similarity', 0) * 100
|
||||
feedback_points.append(f"⚠️ Stil-Ähnlichkeit nur {similarity:.0f}% (Ziel: 75%+)")
|
||||
|
||||
readability_result = quality_checks.get('readability_check', {})
|
||||
if not readability_result.get('passed'):
|
||||
issues = readability_result.get('issues', [])
|
||||
feedback_points.append(f"⚠️ Lesbarkeit: {', '.join(issues)}")
|
||||
|
||||
feedback = "\n".join(feedback_points)
|
||||
|
||||
# Use Writer's revision capability with quality feedback
|
||||
system_prompt = self._get_smart_revision_system_prompt(profile_analysis, example_posts)
|
||||
user_prompt = self._get_smart_revision_user_prompt(post, feedback, quality_checks)
|
||||
|
||||
revised_post = await self.call_openai(
|
||||
system_prompt=system_prompt,
|
||||
user_prompt=user_prompt,
|
||||
model="gpt-4o",
|
||||
temperature=0.4 # Lower temp for more controlled revision
|
||||
)
|
||||
|
||||
logger.info("Smart revision complete")
|
||||
return revised_post.strip()
|
||||
|
||||
def _get_smart_revision_system_prompt(
|
||||
self,
|
||||
profile_analysis: Dict[str, Any],
|
||||
example_posts: List[str]
|
||||
) -> str:
|
||||
"""Get system prompt for smart revision."""
|
||||
# Extract key style info
|
||||
writing_style = profile_analysis.get("writing_style", {})
|
||||
linguistic = profile_analysis.get("linguistic_fingerprint", {})
|
||||
phrase_library = profile_analysis.get("phrase_library", {})
|
||||
|
||||
# Build short example section
|
||||
examples_text = ""
|
||||
if example_posts:
|
||||
examples_text = "\n\nSTIL-REFERENZEN (nutze ähnliche Formulierungen):\n"
|
||||
for i, post in enumerate(example_posts[:2], 1): # Only 2 examples
|
||||
examples_text += f"\n--- Beispiel {i} ---\n{post[:800]}\n"
|
||||
|
||||
return f"""Du bist ein LinkedIn-Post-Optimierer. Deine Aufgabe ist es, einen Post MINIMAL zu überarbeiten um Quality-Probleme zu beheben.
|
||||
|
||||
WICHTIG: Behalte den Kern, die Story und die Hauptaussagen bei! Ändere NUR was nötig ist!
|
||||
|
||||
STIL DER PERSON:
|
||||
- Ansprache: {writing_style.get('form_of_address', 'Du')}
|
||||
- Perspektive: {writing_style.get('perspective', 'Ich')}
|
||||
- Energie: {linguistic.get('energy_level', 7)}/10
|
||||
- Typische Phrasen: {', '.join(phrase_library.get('signature_phrases', [])[:3])}
|
||||
{examples_text}
|
||||
|
||||
AUFGABE:
|
||||
- Fixe NUR die genannten Quality-Probleme
|
||||
- Behalte die Story und den Inhalt bei
|
||||
- Ändere maximal 10-15% des Textes
|
||||
- Kein komplettes Rewrite!
|
||||
|
||||
Antworte NUR mit dem überarbeiteten Post."""
|
||||
|
||||
def _get_smart_revision_user_prompt(
|
||||
self,
|
||||
post: str,
|
||||
feedback: str,
|
||||
quality_checks: Dict[str, Any]
|
||||
) -> str:
|
||||
"""Get user prompt for smart revision."""
|
||||
# Get specific improvement suggestions (if available)
|
||||
readability_result = quality_checks.get('readability_check', {})
|
||||
suggestions = []
|
||||
if readability_result and not readability_result.get('passed'):
|
||||
# Suggestions are not in the result, we'll generate them from issues
|
||||
issues = readability_result.get('issues', [])
|
||||
if issues:
|
||||
suggestions = [f"Verbesserung nötig: {issue}" for issue in issues]
|
||||
|
||||
suggestions_text = ""
|
||||
if suggestions:
|
||||
suggestions_text = "\n\nKONKRETE VERBESSERUNGEN:\n" + "\n".join([f"- {s}" for s in suggestions])
|
||||
|
||||
return f"""Überarbeite diesen Post um die Quality-Probleme zu beheben:
|
||||
|
||||
**ORIGINAL POST:**
|
||||
{post}
|
||||
|
||||
**QUALITY-PROBLEME:**
|
||||
{feedback}
|
||||
{suggestions_text}
|
||||
|
||||
**DEINE AUFGABE:**
|
||||
Fixe die genannten Probleme mit MINIMALEN Änderungen.
|
||||
Behalte die Story, den Kern und die Hauptaussagen bei!
|
||||
|
||||
Gib NUR den überarbeiteten Post zurück."""
|
||||
|
||||
def _get_final_polish_system_prompt(
|
||||
self,
|
||||
profile_analysis: Dict[str, Any],
|
||||
example_posts: List[str]
|
||||
) -> str:
|
||||
"""Get system prompt for final polish."""
|
||||
# Extract key style info
|
||||
writing_style = profile_analysis.get("writing_style", {})
|
||||
linguistic = profile_analysis.get("linguistic_fingerprint", {})
|
||||
|
||||
# Build short example section (max 2 examples)
|
||||
examples_text = ""
|
||||
if example_posts:
|
||||
examples_text = "\n\nSTIL-REFERENZEN:\n"
|
||||
for i, post in enumerate(example_posts[:2], 1):
|
||||
examples_text += f"\n--- Beispiel {i} ---\n{post[:800]}\n"
|
||||
|
||||
return f"""Du bist ein LinkedIn-Post-Polierer. Deine Aufgabe ist es, einen bereits guten Post MINIMAL zu verbessern.
|
||||
|
||||
WICHTIG:
|
||||
- Behalte die EXAKTE Formatierung bei (Zeilenumbrüche, Absätze, Leerzeichen)!
|
||||
- Behalte die Story, den Inhalt und die Struktur bei!
|
||||
- Ändere NUR was für Quality-Verbesserungen nötig ist!
|
||||
- Kein Rewrite - nur Politur!
|
||||
|
||||
STIL DER PERSON:
|
||||
- Ansprache: {writing_style.get('form_of_address', 'Du')}
|
||||
- Perspektive: {writing_style.get('perspective', 'Ich')}
|
||||
- Energie: {linguistic.get('energy_level', 7)}/10
|
||||
{examples_text}
|
||||
|
||||
AUFGABE:
|
||||
Fixe NUR die genannten Quality-Probleme (Grammatik, Stil, Lesbarkeit).
|
||||
Behalte ALLES andere exakt gleich - vor allem die Formatierung!
|
||||
|
||||
Antworte NUR mit dem polierten Post."""
|
||||
|
||||
def _get_final_polish_user_prompt(
|
||||
self,
|
||||
post: str,
|
||||
feedback: str
|
||||
) -> str:
|
||||
"""Get user prompt for final polish."""
|
||||
return f"""Poliere diesen Post um die Quality-Probleme zu beheben:
|
||||
|
||||
**POST:**
|
||||
{post}
|
||||
|
||||
**QUALITY-FEEDBACK:**
|
||||
{feedback}
|
||||
|
||||
**WICHTIG:**
|
||||
- Behalte die EXAKTE Formatierung (Zeilenumbrüche, Absätze, Leerzeichen)!
|
||||
- Fixe NUR die genannten Quality-Probleme
|
||||
- Ändere NICHT den Inhalt, die Story oder die Struktur
|
||||
- Minimale Änderungen!
|
||||
|
||||
Gib NUR den polierten Post zurück (ohne Erklärungen)."""
|
||||
|
||||
def get_refinement_summary(self, refinement_result: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Get human-readable summary of refinement.
|
||||
|
||||
Args:
|
||||
refinement_result: Result from refine()
|
||||
|
||||
Returns:
|
||||
Summary string
|
||||
"""
|
||||
if not refinement_result.get("refined"):
|
||||
return "✅ Keine Verbesserungen nötig"
|
||||
|
||||
improvements = refinement_result.get("improvements", [])
|
||||
change = refinement_result.get("change_percentage", 0)
|
||||
|
||||
summary = f"🔧 {len(improvements)} Verbesserungen ({change:.1f}% Änderung):\n"
|
||||
summary += "\n".join([f" - {imp}" for imp in improvements])
|
||||
|
||||
return summary
|
||||
271
src/agents/readability_checker.py
Normal file
271
src/agents/readability_checker.py
Normal file
@@ -0,0 +1,271 @@
|
||||
"""Readability checker for German text."""
|
||||
from typing import Dict, Any, List
|
||||
from loguru import logger
|
||||
|
||||
from src.agents.base import BaseAgent
|
||||
|
||||
|
||||
class ReadabilityChecker(BaseAgent):
|
||||
"""Agent for checking text readability."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize readability checker."""
|
||||
super().__init__("ReadabilityChecker")
|
||||
self.textstat_available = False
|
||||
self._initialize_textstat()
|
||||
|
||||
def _initialize_textstat(self):
|
||||
"""Initialize textstat library."""
|
||||
try:
|
||||
import textstat
|
||||
self.textstat = textstat
|
||||
# Set language to German
|
||||
textstat.set_lang("de")
|
||||
self.textstat_available = True
|
||||
logger.info("Textstat initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize textstat: {e}")
|
||||
self.textstat = None
|
||||
|
||||
async def process(
|
||||
self,
|
||||
text: str,
|
||||
target_grade: float = 10.0,
|
||||
target_flesch: float = 60.0,
|
||||
max_sentence_length: int = 20
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze text readability.
|
||||
|
||||
Args:
|
||||
text: Text to analyze
|
||||
target_grade: Target grade level (Wiener Sachtextformel)
|
||||
target_flesch: Target Flesch reading ease score
|
||||
max_sentence_length: Maximum average sentence length
|
||||
|
||||
Returns:
|
||||
Readability analysis results
|
||||
"""
|
||||
if not self.textstat_available:
|
||||
logger.warning("Textstat not available, skipping readability check")
|
||||
return {
|
||||
"available": False,
|
||||
"passed": True,
|
||||
"score": 0.0
|
||||
}
|
||||
|
||||
logger.info("Analyzing text readability")
|
||||
|
||||
try:
|
||||
# Calculate various readability metrics
|
||||
|
||||
# Wiener Sachtextformel (German readability)
|
||||
# Scale: 4 (very easy) to 15 (very difficult)
|
||||
# Optimal for LinkedIn: 8-12 (general audience)
|
||||
try:
|
||||
wstf = self.textstat.wiener_sachtextformel(text, variant=1)
|
||||
except:
|
||||
wstf = None
|
||||
|
||||
# Flesch Reading Ease (adapted for German)
|
||||
# Scale: 0-100 (higher = easier)
|
||||
# Optimal for LinkedIn: 60-70
|
||||
try:
|
||||
flesch = self.textstat.flesch_reading_ease(text)
|
||||
except:
|
||||
flesch = None
|
||||
|
||||
# Average sentence length
|
||||
avg_sentence_length = self.textstat.avg_sentence_length(text)
|
||||
|
||||
# Sentence count
|
||||
sentence_count = self.textstat.sentence_count(text)
|
||||
|
||||
# Word count
|
||||
word_count = self.textstat.lexicon_count(text, removepunct=True)
|
||||
|
||||
# Average syllables per word
|
||||
avg_syllables = self.textstat.avg_syllables_per_word(text)
|
||||
|
||||
# Difficult words (more than 3 syllables)
|
||||
difficult_words = self.textstat.difficult_words(text)
|
||||
|
||||
# Determine if text passes readability requirements
|
||||
passed = True
|
||||
issues = []
|
||||
|
||||
if wstf is not None and wstf > target_grade:
|
||||
passed = False
|
||||
issues.append(f"Zu komplex (WSTF: {wstf:.1f}, Ziel: ≤{target_grade})")
|
||||
|
||||
if flesch is not None and flesch < target_flesch:
|
||||
passed = False
|
||||
issues.append(f"Schwer lesbar (Flesch: {flesch:.1f}, Ziel: ≥{target_flesch})")
|
||||
|
||||
if avg_sentence_length > max_sentence_length:
|
||||
passed = False
|
||||
issues.append(f"Sätze zu lang (Ø {avg_sentence_length:.1f} Wörter, Ziel: ≤{max_sentence_length})")
|
||||
|
||||
# Get verdict
|
||||
verdict = self._get_verdict(wstf, flesch, avg_sentence_length, target_grade, target_flesch, max_sentence_length)
|
||||
|
||||
result = {
|
||||
"available": True,
|
||||
"passed": passed,
|
||||
"issues": issues,
|
||||
"verdict": verdict,
|
||||
"metrics": {
|
||||
"wiener_sachtextformel": wstf,
|
||||
"flesch_reading_ease": flesch,
|
||||
"avg_sentence_length": avg_sentence_length,
|
||||
"sentence_count": sentence_count,
|
||||
"word_count": word_count,
|
||||
"avg_syllables_per_word": avg_syllables,
|
||||
"difficult_words": difficult_words,
|
||||
"difficult_words_percentage": (difficult_words / word_count * 100) if word_count > 0 else 0
|
||||
},
|
||||
"targets": {
|
||||
"target_grade": target_grade,
|
||||
"target_flesch": target_flesch,
|
||||
"max_sentence_length": max_sentence_length
|
||||
}
|
||||
}
|
||||
|
||||
if passed:
|
||||
logger.info(f"✅ Readability check passed")
|
||||
else:
|
||||
logger.warning(f"⚠️ Readability issues: {', '.join(issues)}")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Readability check failed: {e}")
|
||||
return {
|
||||
"available": False,
|
||||
"passed": True,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def _get_verdict(
|
||||
self,
|
||||
wstf: float,
|
||||
flesch: float,
|
||||
avg_sent_len: float,
|
||||
target_grade: float,
|
||||
target_flesch: float,
|
||||
max_sent_len: int
|
||||
) -> str:
|
||||
"""Get human-readable verdict."""
|
||||
if wstf is None and flesch is None:
|
||||
return "⚠️ Keine Metrik verfügbar"
|
||||
|
||||
issues = []
|
||||
|
||||
# WSTF check
|
||||
if wstf is not None:
|
||||
if wstf <= 8:
|
||||
issues.append("✅ Sehr leicht lesbar")
|
||||
elif wstf <= target_grade:
|
||||
issues.append("✅ Gut lesbar")
|
||||
elif wstf <= target_grade + 2:
|
||||
issues.append("⚠️ Etwas komplex")
|
||||
else:
|
||||
issues.append("❌ Zu komplex - vereinfachen!")
|
||||
|
||||
# Flesch check
|
||||
if flesch is not None:
|
||||
if flesch >= 70:
|
||||
issues.append("✅ Leicht verständlich")
|
||||
elif flesch >= target_flesch:
|
||||
issues.append("✅ Gut verständlich")
|
||||
elif flesch >= 50:
|
||||
issues.append("⚠️ Mittelschwer")
|
||||
else:
|
||||
issues.append("❌ Schwer verständlich")
|
||||
|
||||
# Sentence length check
|
||||
if avg_sent_len > max_sent_len + 5:
|
||||
issues.append("❌ Sätze zu lang")
|
||||
elif avg_sent_len > max_sent_len:
|
||||
issues.append("⚠️ Sätze etwas lang")
|
||||
else:
|
||||
issues.append("✅ Gute Satzlänge")
|
||||
|
||||
return " | ".join(issues)
|
||||
|
||||
def get_summary(self, check_result: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Get human-readable summary of readability check.
|
||||
|
||||
Args:
|
||||
check_result: Result from process()
|
||||
|
||||
Returns:
|
||||
Summary string
|
||||
"""
|
||||
if not check_result.get("available"):
|
||||
return "⚠️ Lesbarkeits-Check nicht verfügbar"
|
||||
|
||||
metrics = check_result.get("metrics", {})
|
||||
wstf = metrics.get("wiener_sachtextformel")
|
||||
flesch = metrics.get("flesch_reading_ease")
|
||||
avg_len = metrics.get("avg_sentence_length")
|
||||
|
||||
summary = "📊 Lesbarkeit: "
|
||||
|
||||
parts = []
|
||||
if wstf is not None:
|
||||
parts.append(f"WSTF={wstf:.1f}")
|
||||
if flesch is not None:
|
||||
parts.append(f"Flesch={flesch:.1f}")
|
||||
if avg_len is not None:
|
||||
parts.append(f"Ø Satzlänge={avg_len:.1f}")
|
||||
|
||||
summary += ", ".join(parts)
|
||||
|
||||
if check_result.get("passed"):
|
||||
summary += " ✅"
|
||||
else:
|
||||
summary += f" ⚠️ ({', '.join(check_result.get('issues', []))})"
|
||||
|
||||
return summary
|
||||
|
||||
def get_improvement_suggestions(self, check_result: Dict[str, Any]) -> List[str]:
|
||||
"""
|
||||
Get specific improvement suggestions based on readability analysis.
|
||||
|
||||
Args:
|
||||
check_result: Result from process()
|
||||
|
||||
Returns:
|
||||
List of improvement suggestions
|
||||
"""
|
||||
if not check_result.get("available") or check_result.get("passed"):
|
||||
return []
|
||||
|
||||
suggestions = []
|
||||
metrics = check_result.get("metrics", {})
|
||||
|
||||
# Sentence length suggestions
|
||||
avg_len = metrics.get("avg_sentence_length", 0)
|
||||
if avg_len > 25:
|
||||
suggestions.append("Teile lange Sätze auf - maximal 20-25 Wörter pro Satz")
|
||||
elif avg_len > 20:
|
||||
suggestions.append("Kürze einige Sätze für besseren Lesefluss")
|
||||
|
||||
# Complexity suggestions
|
||||
wstf = metrics.get("wiener_sachtextformel")
|
||||
if wstf and wstf > 12:
|
||||
suggestions.append("Vereinfache Sprache - weniger Fachbegriffe und Schachtelsätze")
|
||||
|
||||
# Word choice suggestions
|
||||
difficult_pct = metrics.get("difficult_words_percentage", 0)
|
||||
if difficult_pct > 15:
|
||||
suggestions.append("Ersetze komplexe Wörter durch einfachere Alternativen")
|
||||
|
||||
# Syllables suggestions
|
||||
avg_syllables = metrics.get("avg_syllables_per_word", 0)
|
||||
if avg_syllables > 2.0:
|
||||
suggestions.append("Nutze kürzere, prägnantere Wörter")
|
||||
|
||||
return suggestions
|
||||
218
src/agents/style_validator.py
Normal file
218
src/agents/style_validator.py
Normal file
@@ -0,0 +1,218 @@
|
||||
"""Style validator using semantic similarity."""
|
||||
from typing import Dict, Any, List
|
||||
from loguru import logger
|
||||
import numpy as np
|
||||
|
||||
from src.agents.base import BaseAgent
|
||||
|
||||
|
||||
class StyleValidator(BaseAgent):
|
||||
"""Agent for validating writing style using semantic similarity."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize style validator."""
|
||||
super().__init__("StyleValidator")
|
||||
self.model = None
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self):
|
||||
"""Initialize sentence transformer model."""
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
# Store for later use
|
||||
self.cosine_similarity = cosine_similarity
|
||||
|
||||
# Load best multilingual model
|
||||
self.model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
|
||||
logger.info("SentenceTransformer model loaded successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load SentenceTransformer: {e}")
|
||||
self.model = None
|
||||
|
||||
async def process(
|
||||
self,
|
||||
generated_text: str,
|
||||
reference_texts: List[str],
|
||||
threshold: float = 0.75
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate if generated text matches the style of reference texts.
|
||||
|
||||
Args:
|
||||
generated_text: The text to validate
|
||||
reference_texts: List of reference texts to compare against
|
||||
threshold: Minimum similarity score to pass (0-1)
|
||||
|
||||
Returns:
|
||||
Validation result with similarity scores
|
||||
"""
|
||||
if not self.model:
|
||||
logger.warning("SentenceTransformer not available, skipping style validation")
|
||||
return {
|
||||
"passed": True,
|
||||
"available": False,
|
||||
"avg_similarity": 0.0,
|
||||
"max_similarity": 0.0,
|
||||
"threshold": threshold
|
||||
}
|
||||
|
||||
if not reference_texts or len(reference_texts) == 0:
|
||||
logger.warning("No reference texts provided for style validation")
|
||||
return {
|
||||
"passed": True,
|
||||
"available": True,
|
||||
"avg_similarity": 0.0,
|
||||
"max_similarity": 0.0,
|
||||
"threshold": threshold,
|
||||
"warning": "No reference texts available"
|
||||
}
|
||||
|
||||
logger.info(f"Validating style against {len(reference_texts)} reference texts")
|
||||
|
||||
try:
|
||||
# Generate embeddings
|
||||
gen_embedding = self.model.encode([generated_text])
|
||||
ref_embeddings = self.model.encode(reference_texts)
|
||||
|
||||
# Calculate similarities
|
||||
similarities = self.cosine_similarity(gen_embedding, ref_embeddings)[0]
|
||||
avg_similarity = float(np.mean(similarities))
|
||||
max_similarity = float(np.max(similarities))
|
||||
min_similarity = float(np.min(similarities))
|
||||
std_similarity = float(np.std(similarities))
|
||||
|
||||
# Find most similar reference
|
||||
most_similar_idx = int(np.argmax(similarities))
|
||||
|
||||
# Determine if passed
|
||||
passed = avg_similarity >= threshold
|
||||
|
||||
# Get verdict
|
||||
verdict = self._get_verdict(avg_similarity, threshold)
|
||||
|
||||
result = {
|
||||
"passed": passed,
|
||||
"available": True,
|
||||
"avg_similarity": avg_similarity,
|
||||
"max_similarity": max_similarity,
|
||||
"min_similarity": min_similarity,
|
||||
"std_similarity": std_similarity,
|
||||
"threshold": threshold,
|
||||
"most_similar_ref_idx": most_similar_idx,
|
||||
"verdict": verdict,
|
||||
"similarity_scores": [float(s) for s in similarities]
|
||||
}
|
||||
|
||||
if passed:
|
||||
logger.info(f"✅ Style validation passed (avg: {avg_similarity:.3f}, threshold: {threshold})")
|
||||
else:
|
||||
logger.warning(f"⚠️ Style validation failed (avg: {avg_similarity:.3f}, threshold: {threshold})")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Style validation failed: {e}")
|
||||
return {
|
||||
"passed": False,
|
||||
"available": False,
|
||||
"error": str(e),
|
||||
"avg_similarity": 0.0,
|
||||
"max_similarity": 0.0,
|
||||
"threshold": threshold
|
||||
}
|
||||
|
||||
def _get_verdict(self, similarity: float, threshold: float) -> str:
|
||||
"""Get human-readable verdict based on similarity score."""
|
||||
if similarity >= 0.85:
|
||||
return "✅ Exzellenter Stil-Match - klingt sehr authentisch"
|
||||
elif similarity >= threshold:
|
||||
return "✅ Guter Stil-Match - passt zur Person"
|
||||
elif similarity >= threshold - 0.05:
|
||||
return "⚠️ Knapp unter Schwellwert - leichte Stil-Abweichung"
|
||||
elif similarity >= 0.60:
|
||||
return "⚠️ Deutliche Stil-Abweichung - zu KI-typisch?"
|
||||
else:
|
||||
return "❌ Starke Stil-Abweichung - klingt nicht authentisch"
|
||||
|
||||
async def compare_texts(
|
||||
self,
|
||||
text1: str,
|
||||
text2: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Compare two texts for semantic similarity.
|
||||
|
||||
Args:
|
||||
text1: First text
|
||||
text2: Second text
|
||||
|
||||
Returns:
|
||||
Similarity score and analysis
|
||||
"""
|
||||
if not self.model:
|
||||
return {"available": False, "similarity": 0.0}
|
||||
|
||||
try:
|
||||
embeddings = self.model.encode([text1, text2])
|
||||
similarity = float(self.cosine_similarity([embeddings[0]], [embeddings[1]])[0][0])
|
||||
|
||||
return {
|
||||
"available": True,
|
||||
"similarity": similarity,
|
||||
"verdict": "Sehr ähnlich" if similarity > 0.8 else
|
||||
"Ähnlich" if similarity > 0.6 else
|
||||
"Unterschiedlich"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Text comparison failed: {e}")
|
||||
return {"available": False, "similarity": 0.0, "error": str(e)}
|
||||
|
||||
def get_summary(self, validation_result: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Get human-readable summary of validation results.
|
||||
|
||||
Args:
|
||||
validation_result: Result from process()
|
||||
|
||||
Returns:
|
||||
Summary string
|
||||
"""
|
||||
if not validation_result.get("available"):
|
||||
return "⚠️ Stil-Validierung nicht verfügbar"
|
||||
|
||||
if validation_result.get("warning"):
|
||||
return f"⚠️ {validation_result['warning']}"
|
||||
|
||||
avg_sim = validation_result["avg_similarity"]
|
||||
max_sim = validation_result["max_similarity"]
|
||||
threshold = validation_result["threshold"]
|
||||
|
||||
summary = f"Stil-Ähnlichkeit: {avg_sim:.1%} (max: {max_sim:.1%}, threshold: {threshold:.1%})"
|
||||
summary += f"\n{validation_result['verdict']}"
|
||||
|
||||
return summary
|
||||
|
||||
async def batch_validate(
|
||||
self,
|
||||
generated_texts: List[str],
|
||||
reference_texts: List[str],
|
||||
threshold: float = 0.75
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Validate multiple generated texts at once.
|
||||
|
||||
Args:
|
||||
generated_texts: List of texts to validate
|
||||
reference_texts: Reference texts for comparison
|
||||
threshold: Minimum similarity threshold
|
||||
|
||||
Returns:
|
||||
List of validation results
|
||||
"""
|
||||
results = []
|
||||
for text in generated_texts:
|
||||
result = await self.process(text, reference_texts, threshold)
|
||||
results.append(result)
|
||||
return results
|
||||
@@ -518,12 +518,12 @@ Analysiere jeden Entwurf kurz und wähle den besten. Antworte im JSON-Format:
|
||||
system_prompt = self._get_system_prompt(profile_analysis, selected_examples, learned_lessons, post_type, post_type_analysis, company_strategy)
|
||||
user_prompt = self._get_user_prompt(topic, feedback, previous_version, critic_result, user_thoughts, selected_hook)
|
||||
|
||||
# Lower temperature for more consistent style matching
|
||||
# OPTIMIERT: Niedrigere Temperature (0.5 statt 0.6) für konsistenteren Stil
|
||||
post = await self.call_openai(
|
||||
system_prompt=system_prompt,
|
||||
user_prompt=user_prompt,
|
||||
model="gpt-4o",
|
||||
temperature=0.6
|
||||
temperature=0.5
|
||||
)
|
||||
|
||||
logger.info("Post written successfully")
|
||||
@@ -549,12 +549,13 @@ Analysiere jeden Entwurf kurz und wähle den besten. Antworte im JSON-Format:
|
||||
phrase_library = profile_analysis.get("phrase_library", {})
|
||||
structure_templates = profile_analysis.get("structure_templates", {})
|
||||
|
||||
# Build example posts section
|
||||
# Build example posts section (OPTIMIERT: mehr Kontext, weniger kürzen)
|
||||
examples_section = ""
|
||||
if example_posts and len(example_posts) > 0:
|
||||
examples_section = "\n\nREFERENZ-POSTS DER PERSON (Orientiere dich am Stil!):\n"
|
||||
for i, post in enumerate(example_posts, 1):
|
||||
post_text = post[:1800] + "..." if len(post) > 1800 else post
|
||||
# Erhöht von 1800 auf 3000 für mehr Kontext
|
||||
post_text = post[:3000] + "..." if len(post) > 3000 else post
|
||||
examples_section += f"\n--- Beispiel {i} ---\n{post_text}\n"
|
||||
examples_section += "--- Ende Beispiele ---\n"
|
||||
|
||||
@@ -575,7 +576,8 @@ Analysiere jeden Entwurf kurz und wähle den besten. Antworte im JSON-Format:
|
||||
filler_expressions = phrase_library.get('filler_expressions', [])
|
||||
|
||||
# Randomly select a subset of phrases for this post (variation!)
|
||||
def select_phrases(phrases: list, max_count: int = 3) -> str:
|
||||
# OPTIMIERT: Von 3-4 auf 5-6 erhöht für mehr Kontext
|
||||
def select_phrases(phrases: list, max_count: int = 6) -> str:
|
||||
if not phrases:
|
||||
return "Keine verfügbar"
|
||||
selected = random.sample(phrases, min(max_count, len(phrases)))
|
||||
@@ -586,30 +588,58 @@ Analysiere jeden Entwurf kurz und wähle den besten. Antworte im JSON-Format:
|
||||
sentence_starters = structure_templates.get('typical_sentence_starters', [])
|
||||
paragraph_transitions = structure_templates.get('paragraph_transitions', [])
|
||||
|
||||
# Extract N-gram patterns if available (NEU!)
|
||||
ngram_patterns = profile_analysis.get("ngram_patterns", {})
|
||||
typical_bigrams = ngram_patterns.get("typical_bigrams", [])
|
||||
typical_trigrams = ngram_patterns.get("typical_trigrams", [])
|
||||
signature_combinations = ngram_patterns.get("signature_combinations", [])
|
||||
|
||||
# Build N-gram section (NEU!)
|
||||
ngram_section = ""
|
||||
if typical_bigrams or typical_trigrams or signature_combinations:
|
||||
ngram_section = f"""
|
||||
|
||||
2a. TYPISCHE WORTKOMBINATIONEN (Stilistisches Fingerprinting - WICHTIG!):
|
||||
|
||||
Diese Wortkombinationen sind charakteristisch für den Schreibstil der Person.
|
||||
Baue 3-5 dieser Kombinationen NATÜRLICH in den Post ein!
|
||||
|
||||
TYPISCHE 2-WORT-KOMBINATIONEN (Bigrams):
|
||||
{', '.join(typical_bigrams[:20])}
|
||||
|
||||
TYPISCHE 3-WORT-KOMBINATIONEN (Trigrams):
|
||||
{', '.join(typical_trigrams[:15])}
|
||||
|
||||
SIGNATURE-KOMBINATIONEN (besonders charakteristisch):
|
||||
{', '.join(signature_combinations[:10])}
|
||||
|
||||
WICHTIG: Diese Kombinationen machen den Stil authentisch! Nutze sie organisch im Text.
|
||||
"""
|
||||
|
||||
# Build phrase library section
|
||||
phrase_section = ""
|
||||
if hook_phrases or emotional_expressions or cta_phrases:
|
||||
phrase_section = f"""
|
||||
|
||||
2. PHRASEN-BIBLIOTHEK (Wähle passende aus - NICHT alle verwenden!):
|
||||
2b. PHRASEN-BIBLIOTHEK (Wähle passende aus - NICHT alle verwenden!):
|
||||
|
||||
HOOK-VORLAGEN (lass dich inspirieren, kopiere nicht 1:1):
|
||||
- {select_phrases(hook_phrases, 4)}
|
||||
- {select_phrases(hook_phrases, 5)}
|
||||
|
||||
ÜBERGANGS-PHRASEN (nutze 1-2 davon):
|
||||
- {select_phrases(transition_phrases, 3)}
|
||||
- {select_phrases(transition_phrases, 4)}
|
||||
|
||||
EMOTIONALE AUSDRÜCKE (nutze 1-2 passende):
|
||||
- {select_phrases(emotional_expressions, 4)}
|
||||
- {select_phrases(emotional_expressions, 5)}
|
||||
|
||||
CTA-FORMULIERUNGEN (wähle eine passende):
|
||||
- {select_phrases(cta_phrases, 3)}
|
||||
- {select_phrases(cta_phrases, 4)}
|
||||
|
||||
FÜLL-AUSDRÜCKE (für natürlichen Flow):
|
||||
- {select_phrases(filler_expressions, 3)}
|
||||
- {select_phrases(filler_expressions, 4)}
|
||||
|
||||
SIGNATURE PHRASES (nutze maximal 1-2 ORGANISCH):
|
||||
- {select_phrases(sig_phrases, 4)}
|
||||
- {select_phrases(sig_phrases, 5)}
|
||||
|
||||
WICHTIG: Variiere! Nutze NICHT immer die gleichen Phrasen. Wähle die, die zum Thema passen.
|
||||
"""
|
||||
@@ -671,6 +701,38 @@ WICHTIG: Dieser Post MUSS den Mustern und Richtlinien dieses Post-Typs folgen!
|
||||
"""
|
||||
|
||||
return f"""ROLLE: Du bist ein erstklassiger Ghostwriter für LinkedIn. Deine Aufgabe ist es, einen Post zu schreiben, der exakt so klingt wie der digitale Zwilling der beschriebenen Person. Du passt dich zu 100% an das bereitgestellte Profil an.
|
||||
|
||||
=== SCHREIB-PROZESS (Chain-of-Thought - Denke Schritt für Schritt!) ===
|
||||
|
||||
VOR DEM SCHREIBEN - ANALYSIERE & PLANE:
|
||||
|
||||
SCHRITT 1 - STIL-ANALYSE:
|
||||
Analysiere die Referenz-Posts und Muster:
|
||||
- Welche Wortkombinationen sind typisch? (siehe N-gram Patterns)
|
||||
- Welche Satzstrukturen verwendet die Person?
|
||||
- Wie ist der emotionale Ton und das Energie-Level?
|
||||
- Welche Signature-Kombinationen fallen auf?
|
||||
|
||||
SCHRITT 2 - CONTENT-PLANUNG:
|
||||
Plane den Post-Inhalt:
|
||||
- Hook-Strategie: Welcher Hook-Typ passt zum Thema UND zur Person?
|
||||
- Kernbotschaft: Was ist die ONE key message?
|
||||
- Struktur: Welches Template aus den Referenz-Posts passt am besten?
|
||||
- Logischer Fluss: Macht der Argumentationsaufbau Sinn?
|
||||
|
||||
SCHRITT 3 - SINNIGKEITS-CHECK (KRITISCH!):
|
||||
Prüfe BEVOR du schreibst:
|
||||
- Ist die Kernaussage klar, logisch und nachvollziehbar?
|
||||
- Passen alle Argumente zusammen? Gibt es Widersprüche?
|
||||
- Sind die Fakten korrekt verknüpft? (Keine nicht-zusammenhängenden Dinge verbinden!)
|
||||
- Ist der Mehrwert für den Leser offensichtlich?
|
||||
- Würde die echte Person SO argumentieren?
|
||||
|
||||
SCHRITT 4 - SCHREIBEN:
|
||||
Jetzt schreibe den Post unter Berücksichtigung von Schritt 1-3.
|
||||
Baue 3-5 der typischen Wortkombinationen organisch ein!
|
||||
|
||||
WICHTIG: Wenn etwas in Schritt 3 nicht "Sinn macht" - STOPP und überdenke den Angle!
|
||||
{examples_section}
|
||||
|
||||
1. STIL & ENERGIE:
|
||||
@@ -693,6 +755,7 @@ Interpunktion: {linguistic.get('punctuation_patterns', 'Standard')}
|
||||
Branche: {audience.get('industry_context', 'Business')}
|
||||
|
||||
Zielgruppe: {audience.get('target_audience', 'Professionals')}
|
||||
{ngram_section}
|
||||
{phrase_section}
|
||||
{structure_section}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user