new quality save layer
This commit is contained in:
183
src/agents/grammar_checker.py
Normal file
183
src/agents/grammar_checker.py
Normal file
@@ -0,0 +1,183 @@
|
||||
"""Grammar and spelling checker agent using LanguageTool."""
|
||||
from typing import Dict, Any, List
|
||||
from loguru import logger
|
||||
import language_tool_python
|
||||
|
||||
from src.agents.base import BaseAgent
|
||||
|
||||
|
||||
class GrammarCheckAgent(BaseAgent):
|
||||
"""Agent for checking grammar and spelling using LanguageTool."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize grammar checker agent."""
|
||||
super().__init__("GrammarChecker")
|
||||
# Initialize LanguageTool for German
|
||||
try:
|
||||
self.tool = language_tool_python.LanguageTool('de-DE')
|
||||
logger.info("LanguageTool initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize LanguageTool: {e}")
|
||||
self.tool = None
|
||||
|
||||
async def process(
|
||||
self,
|
||||
text: str,
|
||||
auto_correct: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Check text for grammar and spelling errors.
|
||||
|
||||
Args:
|
||||
text: Text to check
|
||||
auto_correct: If True, automatically apply safe corrections
|
||||
|
||||
Returns:
|
||||
Dictionary with error details and optionally corrected text
|
||||
"""
|
||||
if not self.tool:
|
||||
logger.warning("LanguageTool not available, skipping grammar check")
|
||||
return {
|
||||
"has_errors": False,
|
||||
"error_count": 0,
|
||||
"errors": [],
|
||||
"corrected_text": text,
|
||||
"available": False
|
||||
}
|
||||
|
||||
logger.info("Checking text for grammar and spelling errors")
|
||||
|
||||
try:
|
||||
# Check for errors
|
||||
matches = self.tool.check(text)
|
||||
|
||||
# Categorize errors
|
||||
categorized_errors = self._categorize_errors(matches)
|
||||
|
||||
# Prepare error details
|
||||
errors = []
|
||||
for match in matches:
|
||||
errors.append({
|
||||
"message": match.message,
|
||||
"replacements": match.replacements[:3], # Top 3 suggestions
|
||||
"context": match.context,
|
||||
"offset": match.offset,
|
||||
"error_length": match.errorLength,
|
||||
"category": match.category,
|
||||
"rule_id": match.ruleId
|
||||
})
|
||||
|
||||
# Auto-correct if requested
|
||||
corrected_text = text
|
||||
if auto_correct and len(matches) > 0:
|
||||
# Only auto-correct "safe" errors (spelling, obvious grammar)
|
||||
safe_matches = [m for m in matches if self._is_safe_correction(m)]
|
||||
if safe_matches:
|
||||
corrected_text = language_tool_python.utils.correct(text, safe_matches)
|
||||
logger.info(f"Auto-corrected {len(safe_matches)} safe errors")
|
||||
|
||||
result = {
|
||||
"has_errors": len(matches) > 0,
|
||||
"error_count": len(matches),
|
||||
"errors": errors,
|
||||
"corrected_text": corrected_text,
|
||||
"available": True,
|
||||
"categories": categorized_errors,
|
||||
"auto_corrected": auto_correct and len(matches) > 0
|
||||
}
|
||||
|
||||
if result["has_errors"]:
|
||||
logger.warning(f"Found {len(matches)} grammar/spelling errors")
|
||||
else:
|
||||
logger.info("No grammar/spelling errors found")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Grammar check failed: {e}")
|
||||
return {
|
||||
"has_errors": False,
|
||||
"error_count": 0,
|
||||
"errors": [],
|
||||
"corrected_text": text,
|
||||
"available": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def _categorize_errors(self, matches: List) -> Dict[str, int]:
|
||||
"""Categorize errors by type."""
|
||||
categories = {}
|
||||
for match in matches:
|
||||
category = match.category or "OTHER"
|
||||
categories[category] = categories.get(category, 0) + 1
|
||||
return categories
|
||||
|
||||
def _is_safe_correction(self, match) -> bool:
|
||||
"""
|
||||
Check if a correction is 'safe' to apply automatically.
|
||||
|
||||
Safe corrections:
|
||||
- Spelling errors with clear suggestions
|
||||
- Obvious grammar errors (verb agreement, etc.)
|
||||
|
||||
Unsafe corrections:
|
||||
- Style suggestions
|
||||
- Ambiguous corrections
|
||||
- Punctuation changes that might alter meaning
|
||||
"""
|
||||
# Safe categories
|
||||
safe_categories = {
|
||||
"TYPOS", # Spelling errors
|
||||
"CASING", # Capitalization
|
||||
"COMPOUNDING", # Word compounds
|
||||
}
|
||||
|
||||
# Check category
|
||||
if match.category in safe_categories:
|
||||
return True
|
||||
|
||||
# Check if it's a clear spelling mistake with 1-2 clear suggestions
|
||||
if match.category == "MISC" and len(match.replacements) <= 2:
|
||||
return True
|
||||
|
||||
# Avoid style and punctuation changes
|
||||
if match.category in {"STYLE", "PUNCTUATION", "TYPOGRAPHY"}:
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def get_summary(self, check_result: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Get a human-readable summary of grammar check results.
|
||||
|
||||
Args:
|
||||
check_result: Result from process()
|
||||
|
||||
Returns:
|
||||
Summary string
|
||||
"""
|
||||
if not check_result.get("available"):
|
||||
return "⚠️ Grammatikprüfung nicht verfügbar"
|
||||
|
||||
if not check_result["has_errors"]:
|
||||
return "✅ Keine Rechtschreib- oder Grammatikfehler gefunden"
|
||||
|
||||
error_count = check_result["error_count"]
|
||||
categories = check_result.get("categories", {})
|
||||
|
||||
summary = f"⚠️ {error_count} Fehler gefunden"
|
||||
|
||||
if categories:
|
||||
cat_summary = ", ".join([f"{cat}: {count}" for cat, count in categories.items()])
|
||||
summary += f" ({cat_summary})"
|
||||
|
||||
if check_result.get("auto_corrected"):
|
||||
summary += " - Automatisch korrigiert"
|
||||
|
||||
return summary
|
||||
|
||||
def close(self):
|
||||
"""Clean up LanguageTool resources."""
|
||||
if self.tool:
|
||||
self.tool.close()
|
||||
logger.info("LanguageTool closed")
|
||||
Reference in New Issue
Block a user