Files
Onyva-Postling/scripts/setup_nltk.py
2026-02-12 14:17:36 +01:00

29 lines
808 B
Python

#!/usr/bin/env python3
"""Setup script for NLTK data."""
import nltk
from loguru import logger
def setup_nltk():
"""Download required NLTK data."""
logger.info("Setting up NLTK data...")
required_packages = [
'punkt', # Tokenizer
'averaged_perceptron_tagger', # POS tagger (optional)
'stopwords', # Stopwords (optional)
]
for package in required_packages:
try:
nltk.data.find(f'tokenizers/{package}')
logger.info(f"{package} already installed")
except LookupError:
logger.info(f"📥 Downloading {package}...")
nltk.download(package, quiet=False)
logger.info(f"{package} installed")
logger.info("🎉 NLTK setup complete!")
if __name__ == "__main__":
setup_nltk()