Files
Onyva-Postling/src/agents/grammar_checker.py
2026-02-12 14:17:36 +01:00

184 lines
5.9 KiB
Python

"""Grammar and spelling checker agent using LanguageTool."""
from typing import Dict, Any, List
from loguru import logger
import language_tool_python
from src.agents.base import BaseAgent
class GrammarCheckAgent(BaseAgent):
"""Agent for checking grammar and spelling using LanguageTool."""
def __init__(self):
"""Initialize grammar checker agent."""
super().__init__("GrammarChecker")
# Initialize LanguageTool for German
try:
self.tool = language_tool_python.LanguageTool('de-DE')
logger.info("LanguageTool initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize LanguageTool: {e}")
self.tool = None
async def process(
self,
text: str,
auto_correct: bool = False
) -> Dict[str, Any]:
"""
Check text for grammar and spelling errors.
Args:
text: Text to check
auto_correct: If True, automatically apply safe corrections
Returns:
Dictionary with error details and optionally corrected text
"""
if not self.tool:
logger.warning("LanguageTool not available, skipping grammar check")
return {
"has_errors": False,
"error_count": 0,
"errors": [],
"corrected_text": text,
"available": False
}
logger.info("Checking text for grammar and spelling errors")
try:
# Check for errors
matches = self.tool.check(text)
# Categorize errors
categorized_errors = self._categorize_errors(matches)
# Prepare error details
errors = []
for match in matches:
errors.append({
"message": match.message,
"replacements": match.replacements[:3], # Top 3 suggestions
"context": match.context,
"offset": match.offset,
"error_length": match.errorLength,
"category": match.category,
"rule_id": match.ruleId
})
# Auto-correct if requested
corrected_text = text
if auto_correct and len(matches) > 0:
# Only auto-correct "safe" errors (spelling, obvious grammar)
safe_matches = [m for m in matches if self._is_safe_correction(m)]
if safe_matches:
corrected_text = language_tool_python.utils.correct(text, safe_matches)
logger.info(f"Auto-corrected {len(safe_matches)} safe errors")
result = {
"has_errors": len(matches) > 0,
"error_count": len(matches),
"errors": errors,
"corrected_text": corrected_text,
"available": True,
"categories": categorized_errors,
"auto_corrected": auto_correct and len(matches) > 0
}
if result["has_errors"]:
logger.warning(f"Found {len(matches)} grammar/spelling errors")
else:
logger.info("No grammar/spelling errors found")
return result
except Exception as e:
logger.error(f"Grammar check failed: {e}")
return {
"has_errors": False,
"error_count": 0,
"errors": [],
"corrected_text": text,
"available": False,
"error": str(e)
}
def _categorize_errors(self, matches: List) -> Dict[str, int]:
"""Categorize errors by type."""
categories = {}
for match in matches:
category = match.category or "OTHER"
categories[category] = categories.get(category, 0) + 1
return categories
def _is_safe_correction(self, match) -> bool:
"""
Check if a correction is 'safe' to apply automatically.
Safe corrections:
- Spelling errors with clear suggestions
- Obvious grammar errors (verb agreement, etc.)
Unsafe corrections:
- Style suggestions
- Ambiguous corrections
- Punctuation changes that might alter meaning
"""
# Safe categories
safe_categories = {
"TYPOS", # Spelling errors
"CASING", # Capitalization
"COMPOUNDING", # Word compounds
}
# Check category
if match.category in safe_categories:
return True
# Check if it's a clear spelling mistake with 1-2 clear suggestions
if match.category == "MISC" and len(match.replacements) <= 2:
return True
# Avoid style and punctuation changes
if match.category in {"STYLE", "PUNCTUATION", "TYPOGRAPHY"}:
return False
return False
def get_summary(self, check_result: Dict[str, Any]) -> str:
"""
Get a human-readable summary of grammar check results.
Args:
check_result: Result from process()
Returns:
Summary string
"""
if not check_result.get("available"):
return "⚠️ Grammatikprüfung nicht verfügbar"
if not check_result["has_errors"]:
return "✅ Keine Rechtschreib- oder Grammatikfehler gefunden"
error_count = check_result["error_count"]
categories = check_result.get("categories", {})
summary = f"⚠️ {error_count} Fehler gefunden"
if categories:
cat_summary = ", ".join([f"{cat}: {count}" for cat, count in categories.items()])
summary += f" ({cat_summary})"
if check_result.get("auto_corrected"):
summary += " - Automatisch korrigiert"
return summary
def close(self):
"""Clean up LanguageTool resources."""
if self.tool:
self.tool.close()
logger.info("LanguageTool closed")