29 lines
808 B
Python
29 lines
808 B
Python
#!/usr/bin/env python3
|
|
"""Setup script for NLTK data."""
|
|
import nltk
|
|
from loguru import logger
|
|
|
|
def setup_nltk():
|
|
"""Download required NLTK data."""
|
|
logger.info("Setting up NLTK data...")
|
|
|
|
required_packages = [
|
|
'punkt', # Tokenizer
|
|
'averaged_perceptron_tagger', # POS tagger (optional)
|
|
'stopwords', # Stopwords (optional)
|
|
]
|
|
|
|
for package in required_packages:
|
|
try:
|
|
nltk.data.find(f'tokenizers/{package}')
|
|
logger.info(f"✅ {package} already installed")
|
|
except LookupError:
|
|
logger.info(f"📥 Downloading {package}...")
|
|
nltk.download(package, quiet=False)
|
|
logger.info(f"✅ {package} installed")
|
|
|
|
logger.info("🎉 NLTK setup complete!")
|
|
|
|
if __name__ == "__main__":
|
|
setup_nltk()
|