184 lines
5.9 KiB
Python
184 lines
5.9 KiB
Python
"""Grammar and spelling checker agent using LanguageTool."""
|
|
from typing import Dict, Any, List
|
|
from loguru import logger
|
|
import language_tool_python
|
|
|
|
from src.agents.base import BaseAgent
|
|
|
|
|
|
class GrammarCheckAgent(BaseAgent):
|
|
"""Agent for checking grammar and spelling using LanguageTool."""
|
|
|
|
def __init__(self):
|
|
"""Initialize grammar checker agent."""
|
|
super().__init__("GrammarChecker")
|
|
# Initialize LanguageTool for German
|
|
try:
|
|
self.tool = language_tool_python.LanguageTool('de-DE')
|
|
logger.info("LanguageTool initialized successfully")
|
|
except Exception as e:
|
|
logger.error(f"Failed to initialize LanguageTool: {e}")
|
|
self.tool = None
|
|
|
|
async def process(
|
|
self,
|
|
text: str,
|
|
auto_correct: bool = False
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Check text for grammar and spelling errors.
|
|
|
|
Args:
|
|
text: Text to check
|
|
auto_correct: If True, automatically apply safe corrections
|
|
|
|
Returns:
|
|
Dictionary with error details and optionally corrected text
|
|
"""
|
|
if not self.tool:
|
|
logger.warning("LanguageTool not available, skipping grammar check")
|
|
return {
|
|
"has_errors": False,
|
|
"error_count": 0,
|
|
"errors": [],
|
|
"corrected_text": text,
|
|
"available": False
|
|
}
|
|
|
|
logger.info("Checking text for grammar and spelling errors")
|
|
|
|
try:
|
|
# Check for errors
|
|
matches = self.tool.check(text)
|
|
|
|
# Categorize errors
|
|
categorized_errors = self._categorize_errors(matches)
|
|
|
|
# Prepare error details
|
|
errors = []
|
|
for match in matches:
|
|
errors.append({
|
|
"message": match.message,
|
|
"replacements": match.replacements[:3], # Top 3 suggestions
|
|
"context": match.context,
|
|
"offset": match.offset,
|
|
"error_length": match.errorLength,
|
|
"category": match.category,
|
|
"rule_id": match.ruleId
|
|
})
|
|
|
|
# Auto-correct if requested
|
|
corrected_text = text
|
|
if auto_correct and len(matches) > 0:
|
|
# Only auto-correct "safe" errors (spelling, obvious grammar)
|
|
safe_matches = [m for m in matches if self._is_safe_correction(m)]
|
|
if safe_matches:
|
|
corrected_text = language_tool_python.utils.correct(text, safe_matches)
|
|
logger.info(f"Auto-corrected {len(safe_matches)} safe errors")
|
|
|
|
result = {
|
|
"has_errors": len(matches) > 0,
|
|
"error_count": len(matches),
|
|
"errors": errors,
|
|
"corrected_text": corrected_text,
|
|
"available": True,
|
|
"categories": categorized_errors,
|
|
"auto_corrected": auto_correct and len(matches) > 0
|
|
}
|
|
|
|
if result["has_errors"]:
|
|
logger.warning(f"Found {len(matches)} grammar/spelling errors")
|
|
else:
|
|
logger.info("No grammar/spelling errors found")
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Grammar check failed: {e}")
|
|
return {
|
|
"has_errors": False,
|
|
"error_count": 0,
|
|
"errors": [],
|
|
"corrected_text": text,
|
|
"available": False,
|
|
"error": str(e)
|
|
}
|
|
|
|
def _categorize_errors(self, matches: List) -> Dict[str, int]:
|
|
"""Categorize errors by type."""
|
|
categories = {}
|
|
for match in matches:
|
|
category = match.category or "OTHER"
|
|
categories[category] = categories.get(category, 0) + 1
|
|
return categories
|
|
|
|
def _is_safe_correction(self, match) -> bool:
|
|
"""
|
|
Check if a correction is 'safe' to apply automatically.
|
|
|
|
Safe corrections:
|
|
- Spelling errors with clear suggestions
|
|
- Obvious grammar errors (verb agreement, etc.)
|
|
|
|
Unsafe corrections:
|
|
- Style suggestions
|
|
- Ambiguous corrections
|
|
- Punctuation changes that might alter meaning
|
|
"""
|
|
# Safe categories
|
|
safe_categories = {
|
|
"TYPOS", # Spelling errors
|
|
"CASING", # Capitalization
|
|
"COMPOUNDING", # Word compounds
|
|
}
|
|
|
|
# Check category
|
|
if match.category in safe_categories:
|
|
return True
|
|
|
|
# Check if it's a clear spelling mistake with 1-2 clear suggestions
|
|
if match.category == "MISC" and len(match.replacements) <= 2:
|
|
return True
|
|
|
|
# Avoid style and punctuation changes
|
|
if match.category in {"STYLE", "PUNCTUATION", "TYPOGRAPHY"}:
|
|
return False
|
|
|
|
return False
|
|
|
|
def get_summary(self, check_result: Dict[str, Any]) -> str:
|
|
"""
|
|
Get a human-readable summary of grammar check results.
|
|
|
|
Args:
|
|
check_result: Result from process()
|
|
|
|
Returns:
|
|
Summary string
|
|
"""
|
|
if not check_result.get("available"):
|
|
return "⚠️ Grammatikprüfung nicht verfügbar"
|
|
|
|
if not check_result["has_errors"]:
|
|
return "✅ Keine Rechtschreib- oder Grammatikfehler gefunden"
|
|
|
|
error_count = check_result["error_count"]
|
|
categories = check_result.get("categories", {})
|
|
|
|
summary = f"⚠️ {error_count} Fehler gefunden"
|
|
|
|
if categories:
|
|
cat_summary = ", ".join([f"{cat}: {count}" for cat, count in categories.items()])
|
|
summary += f" ({cat_summary})"
|
|
|
|
if check_result.get("auto_corrected"):
|
|
summary += " - Automatisch korrigiert"
|
|
|
|
return summary
|
|
|
|
def close(self):
|
|
"""Clean up LanguageTool resources."""
|
|
if self.tool:
|
|
self.tool.close()
|
|
logger.info("LanguageTool closed")
|