"""Grammar and spelling checker agent using LanguageTool.""" from typing import Dict, Any, List from loguru import logger import language_tool_python from src.agents.base import BaseAgent class GrammarCheckAgent(BaseAgent): """Agent for checking grammar and spelling using LanguageTool.""" def __init__(self): """Initialize grammar checker agent.""" super().__init__("GrammarChecker") # Initialize LanguageTool for German try: self.tool = language_tool_python.LanguageTool('de-DE') logger.info("LanguageTool initialized successfully") except Exception as e: logger.error(f"Failed to initialize LanguageTool: {e}") self.tool = None async def process( self, text: str, auto_correct: bool = False ) -> Dict[str, Any]: """ Check text for grammar and spelling errors. Args: text: Text to check auto_correct: If True, automatically apply safe corrections Returns: Dictionary with error details and optionally corrected text """ if not self.tool: logger.warning("LanguageTool not available, skipping grammar check") return { "has_errors": False, "error_count": 0, "errors": [], "corrected_text": text, "available": False } logger.info("Checking text for grammar and spelling errors") try: # Check for errors matches = self.tool.check(text) # Categorize errors categorized_errors = self._categorize_errors(matches) # Prepare error details errors = [] for match in matches: errors.append({ "message": match.message, "replacements": match.replacements[:3], # Top 3 suggestions "context": match.context, "offset": match.offset, "error_length": match.errorLength, "category": match.category, "rule_id": match.ruleId }) # Auto-correct if requested corrected_text = text if auto_correct and len(matches) > 0: # Only auto-correct "safe" errors (spelling, obvious grammar) safe_matches = [m for m in matches if self._is_safe_correction(m)] if safe_matches: corrected_text = language_tool_python.utils.correct(text, safe_matches) logger.info(f"Auto-corrected {len(safe_matches)} safe errors") result = { "has_errors": len(matches) > 0, "error_count": len(matches), "errors": errors, "corrected_text": corrected_text, "available": True, "categories": categorized_errors, "auto_corrected": auto_correct and len(matches) > 0 } if result["has_errors"]: logger.warning(f"Found {len(matches)} grammar/spelling errors") else: logger.info("No grammar/spelling errors found") return result except Exception as e: logger.error(f"Grammar check failed: {e}") return { "has_errors": False, "error_count": 0, "errors": [], "corrected_text": text, "available": False, "error": str(e) } def _categorize_errors(self, matches: List) -> Dict[str, int]: """Categorize errors by type.""" categories = {} for match in matches: category = match.category or "OTHER" categories[category] = categories.get(category, 0) + 1 return categories def _is_safe_correction(self, match) -> bool: """ Check if a correction is 'safe' to apply automatically. Safe corrections: - Spelling errors with clear suggestions - Obvious grammar errors (verb agreement, etc.) Unsafe corrections: - Style suggestions - Ambiguous corrections - Punctuation changes that might alter meaning """ # Safe categories safe_categories = { "TYPOS", # Spelling errors "CASING", # Capitalization "COMPOUNDING", # Word compounds } # Check category if match.category in safe_categories: return True # Check if it's a clear spelling mistake with 1-2 clear suggestions if match.category == "MISC" and len(match.replacements) <= 2: return True # Avoid style and punctuation changes if match.category in {"STYLE", "PUNCTUATION", "TYPOGRAPHY"}: return False return False def get_summary(self, check_result: Dict[str, Any]) -> str: """ Get a human-readable summary of grammar check results. Args: check_result: Result from process() Returns: Summary string """ if not check_result.get("available"): return "⚠️ Grammatikprüfung nicht verfügbar" if not check_result["has_errors"]: return "✅ Keine Rechtschreib- oder Grammatikfehler gefunden" error_count = check_result["error_count"] categories = check_result.get("categories", {}) summary = f"⚠️ {error_count} Fehler gefunden" if categories: cat_summary = ", ".join([f"{cat}: {count}" for cat, count in categories.items()]) summary += f" ({cat_summary})" if check_result.get("auto_corrected"): summary += " - Automatisch korrigiert" return summary def close(self): """Clean up LanguageTool resources.""" if self.tool: self.tool.close() logger.info("LanguageTool closed")