Files
Onyva-Postling/maintenance_cleanup_reposts.py
2026-02-03 12:48:43 +01:00

147 lines
4.4 KiB
Python

#!/usr/bin/env python3
"""
Maintenance script to remove repost/non-regular posts from the database.
This removes LinkedIn posts that are reposts, shares, or any other non-original content.
Only posts with post_type "regular" in their raw_data should remain.
Usage:
python maintenance_cleanup_reposts.py # Dry run (preview what will be deleted)
python maintenance_cleanup_reposts.py --apply # Actually delete the posts
"""
import asyncio
import sys
from uuid import UUID
from loguru import logger
from src.database import db
async def cleanup_reposts(apply: bool = False):
"""
Find and remove all non-regular posts from the database.
Args:
apply: If True, delete posts. If False, just preview.
"""
logger.info("Loading all customers...")
customers = await db.list_customers()
total_posts = 0
regular_posts = 0
posts_to_delete = []
for customer in customers:
posts = await db.get_linkedin_posts(customer.id)
for post in posts:
total_posts += 1
# Check post_type in raw_data
post_type = None
if post.raw_data and isinstance(post.raw_data, dict):
post_type = post.raw_data.get("post_type", "").lower()
if post_type == "regular":
regular_posts += 1
else:
posts_to_delete.append({
'id': post.id,
'customer': customer.name,
'post_type': post_type or 'unknown',
'text_preview': (post.post_text[:80] + '...') if post.post_text and len(post.post_text) > 80 else post.post_text,
'url': post.post_url
})
# Print summary
print(f"\n{'='*70}")
print(f"SCAN RESULTS")
print(f"{'='*70}")
print(f"Total posts scanned: {total_posts}")
print(f"Regular posts (keep): {regular_posts}")
print(f"Non-regular (delete): {len(posts_to_delete)}")
if not posts_to_delete:
print("\nNo posts to delete! Database is clean.")
return
# Show posts to delete
print(f"\n{'='*70}")
print(f"POSTS TO DELETE")
print(f"{'='*70}")
# Group by post_type for cleaner output
by_type = {}
for post in posts_to_delete:
pt = post['post_type']
if pt not in by_type:
by_type[pt] = []
by_type[pt].append(post)
for post_type, posts in by_type.items():
print(f"\n[{post_type.upper()}] - {len(posts)} posts")
print("-" * 50)
for post in posts[:5]: # Show max 5 per type
print(f" Customer: {post['customer']}")
print(f" Preview: {post['text_preview']}")
print(f" ID: {post['id']}")
print()
if len(posts) > 5:
print(f" ... and {len(posts) - 5} more {post_type} posts\n")
if apply:
print(f"\n{'='*70}")
print(f"DELETING {len(posts_to_delete)} POSTS...")
print(f"{'='*70}")
deleted = 0
errors = 0
for post_data in posts_to_delete:
try:
await asyncio.to_thread(
lambda pid=post_data['id']:
db.client.table("linkedin_posts").delete().eq("id", str(pid)).execute()
)
deleted += 1
if deleted % 10 == 0:
print(f" Deleted {deleted}/{len(posts_to_delete)}...")
except Exception as e:
logger.error(f"Failed to delete post {post_data['id']}: {e}")
errors += 1
print(f"\nDone! Deleted {deleted} posts. Errors: {errors}")
else:
print(f"\n{'='*70}")
print(f"DRY RUN - No changes made.")
print(f"Run with --apply to delete these {len(posts_to_delete)} posts.")
print(f"{'='*70}")
async def main():
apply = '--apply' in sys.argv
if apply:
print("="*70)
print("MODE: DELETE POSTS")
print("="*70)
print(f"\nThis will permanently delete non-regular posts from the database.")
print("This action cannot be undone!\n")
response = input("Are you sure? Type 'DELETE' to confirm: ")
if response != 'DELETE':
print("Aborted.")
return
else:
print("="*70)
print("MODE: DRY RUN (preview only)")
print("="*70)
print("Add --apply flag to actually delete posts.\n")
await cleanup_reposts(apply=apply)
if __name__ == "__main__":
asyncio.run(main())