added file and link create

2026-02-24 13:41:39 +01:00
parent 005059be84
commit ebafaef335
10 changed files with 3208 additions and 6 deletions
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 # Core Dependencies
 python-dotenv==1.0.0
-pydantic==2.5.0
+pydantic==2.7.4
-pydantic-settings==2.1.0
+pydantic-settings==2.3.0
 # AI & APIs
 openai==1.54.0
@@ -41,5 +41,11 @@ uvicorn==0.32.0
 jinja2==3.1.4
 python-multipart==0.0.9
 # Link Extraction
 trafilatura==1.7.0
 youtube-transcript-api==0.6.2
 pypdf==4.2.0
 docling==2.74.0
 # Teams Bot JWT validation
 PyJWT>=2.8.0
--- a/src/agents/link_topic_builder.py
+++ b/src/agents/link_topic_builder.py
@@ -0,0 +1,95 @@
 """Agent to build a structured topic from extracted link content."""
 import json
 from typing import Any, Dict
 from loguru import logger
 from src.agents.base import BaseAgent
 class LinkTopicBuilderAgent(BaseAgent):
    """Build a structured topic dictionary from link content."""
    def __init__(self) -> None:
        super().__init__("link_topic_builder")
    async def process(self, source: Dict[str, Any]) -> Dict[str, Any]:
        content = source.get("text", "")
        source_url = source.get("source_url", "")
        source_type = source.get("source_type", "web")
        source_title = source.get("title", "")
        if not content or len(content.strip()) < 200:
            raise ValueError("Zu wenig Inhalt, um ein sinnvolles Topic zu erstellen.")
        # Keep content length reasonable for the model
        max_chars = 12000
        if len(content) > max_chars:
            content = content[:max_chars]
        system_prompt = """Du bist ein Assistent, der aus einem Link-Inhalt ein strukturiertes Topic für einen LinkedIn-Post erstellt.
 Gib NUR valides JSON zurück (keine Erklärungen)."""
        user_prompt = f"""Quelle:
 - Typ: {source_type}
 - Titel: {source_title}
 - URL: {source_url}
 Inhalt:
 \"\"\"{content}\"\"\"
        Erstelle ein Topic im folgenden JSON-Format:
 {{
  "title": "Kurzer, präziser Titel (max 80 Zeichen)",
  "summary": "Detaillierte, vollumfängliche Zusammenfassung (8-12 Sätze, alle Hauptpunkte abdecken)",
  "extended_summary": "Längere Zusammenfassung (150-250 Wörter) mit Kontext, Kernaussagen, Belegen und Implikationen",
  "outline": ["Abschnitt 1: ...", "Abschnitt 2: ...", "..."],
  "fact": "1-2 Sätze als Kernaussage/Fakt",
  "key_points": ["Punkt 1", "Punkt 2", "..."],
  "key_facts": ["Konkreter Fakt 1", "Konkreter Fakt 2", "..."],
  "quotes": ["Kurzes Zitat falls sinnvoll", "..."],
  "relevance": "Warum ist das für die Zielgruppe relevant? (1 Satz)",
  "category": "Link",
  "source": "{source_url}",
  "source_title": "{source_title}",
  "source_type": "{source_type}"
 }}
 Regeln:
 - Halte dich an das JSON-Format.
 - Keine erfundenen Fakten: nutze NUR den gegebenen Inhalt.
 - Wenn keine Zitate vorhanden sind, gib ein leeres Array zurück.
 - Summary und extended_summary müssen unterschiedliche Detailtiefe haben.
 """
        result = await self.call_openai(
            system_prompt=system_prompt,
            user_prompt=user_prompt,
            model="gpt-4o",
            temperature=0.2,
            response_format={"type": "json_object"}
        )
        try:
            topic = json.loads(result)
        except Exception as exc:
            logger.warning(f"Failed to parse topic JSON: {exc}")
            raise ValueError("Antwort konnte nicht verarbeitet werden.")
        # Ensure required fields exist
        topic.setdefault("category", "Link")
        topic.setdefault("source", source_url)
        topic.setdefault("source_title", source_title)
        topic.setdefault("source_type", source_type)
        topic.setdefault("title", source_title or "Link-Thema")
        topic.setdefault("summary", "")
        topic.setdefault("extended_summary", "")
        topic.setdefault("outline", [])
        topic.setdefault("fact", topic.get("summary", "")[:300])
        topic.setdefault("key_points", [])
        topic.setdefault("key_facts", [])
        topic.setdefault("quotes", [])
        topic.setdefault("relevance", "Relevantes Thema für die Zielgruppe")
        return topic
--- a/src/agents/writer.py
+++ b/src/agents/writer.py
@@ -351,6 +351,35 @@ class WriterAgent(BaseAgent):
        if key_facts and isinstance(key_facts, list) and len(key_facts) > 0:
            facts_section = "\n**KEY FACTS (nutze diese!):**\n" + "\n".join([f"- {f}" for f in key_facts]) + "\n"
        summary_section = ""
        if topic.get('summary'):
            summary_section = f"\n**ZUSAMMENFASSUNG (vom Link):**\n{topic.get('summary')}\n"
        extended_summary_section = ""
        if topic.get('extended_summary'):
            extended_summary_section = f"\n**DETAILLIERTE ZUSAMMENFASSUNG:**\n{topic.get('extended_summary')}\n"
        outline_section = ""
        outline = topic.get('outline', [])
        if outline and isinstance(outline, list) and len(outline) > 0:
            outline_section = "\n**INHALTS-GLIEDERUNG:**\n" + "\n".join([f"- {o}" for o in outline]) + "\n"
        key_points_section = ""
        key_points = topic.get('key_points', [])
        if key_points and isinstance(key_points, list) and len(key_points) > 0:
            key_points_section = "\n**KERNPUNKTE (vom Link):**\n" + "\n".join([f"- {p}" for p in key_points]) + "\n"
        quotes_section = ""
        quotes = topic.get('quotes', [])
        if quotes and isinstance(quotes, list) and len(quotes) > 0:
            quotes_section = "\n**ZITATE (optional verwenden):**\n" + "\n".join([f"- \"{q}\"" for q in quotes]) + "\n"
        source_section = ""
        if topic.get('source_title') or topic.get('source'):
            source_title = topic.get('source_title') or ""
            source_url = topic.get('source') or ""
            source_section = f"\n**QUELLE:** {source_title} {source_url}\n"
        why_section = ""
        if topic.get('why_this_person'):
            why_section = f"\n**WARUM DU DARÜBER SCHREIBEN SOLLTEST:**\n{topic.get('why_this_person')}\n"
@@ -391,7 +420,7 @@ Schreibe einen authentischen LinkedIn-Post, der:
 {angle_section}{hook_section}
 **KERN-FAKT / INHALT:**
 {topic.get('fact', topic.get('description', ''))}
-{facts_section}{thoughts_section}
+{summary_section}{extended_summary_section}{outline_section}{key_points_section}{facts_section}{quotes_section}{source_section}{thoughts_section}
 **WARUM RELEVANT:**
 {topic.get('relevance', 'Aktuelles Thema für die Zielgruppe')}
 {why_section}
@@ -941,6 +970,35 @@ Gib NUR den überarbeiteten Post zurück - keine Kommentare."""
            if key_facts and isinstance(key_facts, list) and len(key_facts) > 0:
                facts_section = "\n**KEY FACTS (nutze diese!):**\n" + "\n".join([f"- {f}" for f in key_facts]) + "\n"
            summary_section = ""
            if topic.get('summary'):
                summary_section = f"\n**ZUSAMMENFASSUNG (vom Link):**\n{topic.get('summary')}\n"
            extended_summary_section = ""
            if topic.get('extended_summary'):
                extended_summary_section = f"\n**DETAILLIERTE ZUSAMMENFASSUNG:**\n{topic.get('extended_summary')}\n"
            outline_section = ""
            outline = topic.get('outline', [])
            if outline and isinstance(outline, list) and len(outline) > 0:
                outline_section = "\n**INHALTS-GLIEDERUNG:**\n" + "\n".join([f"- {o}" for o in outline]) + "\n"
            key_points_section = ""
            key_points = topic.get('key_points', [])
            if key_points and isinstance(key_points, list) and len(key_points) > 0:
                key_points_section = "\n**KERNPUNKTE (vom Link):**\n" + "\n".join([f"- {p}" for p in key_points]) + "\n"
            quotes_section = ""
            quotes = topic.get('quotes', [])
            if quotes and isinstance(quotes, list) and len(quotes) > 0:
                quotes_section = "\n**ZITATE (optional verwenden):**\n" + "\n".join([f"- \"{q}\"" for q in quotes]) + "\n"
            source_section = ""
            if topic.get('source_title') or topic.get('source'):
                source_title = topic.get('source_title') or ""
                source_url = topic.get('source') or ""
                source_section = f"\n**QUELLE:** {source_title} {source_url}\n"
            # User thoughts section
            thoughts_section = ""
            if user_thoughts:
@@ -977,7 +1035,7 @@ Schreibe einen authentischen LinkedIn-Post, der:
 {angle_section}{hook_section}
 **KERN-FAKT / INHALT:**
 {topic.get('fact', topic.get('description', ''))}
-{facts_section}{thoughts_section}
+{summary_section}{extended_summary_section}{outline_section}{key_points_section}{facts_section}{quotes_section}{source_section}{thoughts_section}
 **WARUM RELEVANT:**
 {topic.get('relevance', 'Aktuelles Thema für die Zielgruppe')}
@@ -1063,6 +1121,16 @@ Antworte im JSON-Format:
            if post_type.description:
                post_type_section += f"\n{post_type.description}"
        content_block = topic.get('fact', topic.get('description', 'Keine Details verfügbar'))
        if topic.get('summary'):
            content_block += f"\n\nZUSAMMENFASSUNG:\n{topic.get('summary')}"
        if topic.get('extended_summary'):
            content_block += f"\n\nDETAILLIERTE ZUSAMMENFASSUNG:\n{topic.get('extended_summary')}"
        if topic.get('outline') and isinstance(topic.get('outline'), list):
            content_block += "\n\nGLIEDERUNG:\n" + "\n".join([f"- {o}" for o in topic.get('outline', [])])
        if topic.get('key_points') and isinstance(topic.get('key_points'), list):
            content_block += "\n\nKERNPUNKTE:\n" + "\n".join([f"- {p}" for p in topic.get('key_points', [])])
        user_prompt = f"""Generiere 4 Hooks für dieses Thema:
 THEMA: {topic.get('title', 'Unbekanntes Thema')}
@@ -1070,7 +1138,7 @@ THEMA: {topic.get('title', 'Unbekanntes Thema')}
 KATEGORIE: {topic.get('category', 'Allgemein')}
 KERN-FAKT/INHALT:
-{topic.get('fact', topic.get('description', 'Keine Details verfügbar'))}
+{content_block}
 {thoughts_section}{post_type_section}
 Generiere jetzt die 4 verschiedenen Hooks im JSON-Format."""
--- a/src/config.py
+++ b/src/config.py
@@ -69,6 +69,10 @@ class Settings(BaseSettings):
    redis_url: str = "redis://redis:6379/0"
    scheduler_enabled: bool = False  # True only on dedicated scheduler container
    # YouTube (optional)
    youtube_cookies: str = ""  # Raw Cookie header value for transcript fetching
    transcriptapi_key: str = ""  # TranscriptAPI.com key
    # Telegram Bot (experimental)
    telegram_enabled: bool = False
    telegram_bot_token: str = ""
--- a/src/services/file_extractor.py
+++ b/src/services/file_extractor.py
@@ -0,0 +1,54 @@
 """Extract text content from uploaded files using Docling."""
 import os
 import tempfile
 from typing import Optional
 from docling.document_converter import DocumentConverter, PdfFormatOption
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import PdfPipelineOptions
 class FileExtractionError(RuntimeError):
    """Raised when file extraction fails."""
 class FileExtractor:
    """Extract text from files via Docling."""
    def __init__(self) -> None:
        pdf_options = PdfPipelineOptions(do_ocr=False)
        self._converter = DocumentConverter(
            format_options={
                InputFormat.PDF: PdfFormatOption(pipeline_options=pdf_options)
            }
        )
    def extract_text(self, file_bytes: bytes, filename: str) -> str:
        if not file_bytes:
            raise FileExtractionError("Leere Datei.")
        suffix = ""
        if filename and "." in filename:
            suffix = os.path.splitext(filename)[1]
        try:
            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
                tmp.write(file_bytes)
                tmp_path = tmp.name
            result = self._converter.convert(tmp_path)
            document = result.document
            text = document.export_to_markdown().strip()
            if not text:
                raise FileExtractionError("Keine Inhalte im Dokument gefunden.")
            return text
        except FileExtractionError:
            raise
        except Exception as exc:
            raise FileExtractionError(f"Datei konnte nicht verarbeitet werden: {exc}") from exc
        finally:
            try:
                if "tmp_path" in locals():
                    os.unlink(tmp_path)
            except Exception:
                pass
--- a/src/services/link_extractor.py
+++ b/src/services/link_extractor.py
@@ -0,0 +1,324 @@
 """Extract content from links (web pages, PDFs, YouTube transcripts)."""
 import asyncio
 import ipaddress
 import re
 from io import BytesIO
 from typing import Dict, Optional
 from urllib.parse import urlparse, parse_qs
 import httpx
 from loguru import logger
 from src.config import settings
 import trafilatura
 try:
    from trafilatura.metadata import extract_metadata as trafilatura_extract_metadata
 except Exception:  # pragma: no cover - optional path
    trafilatura_extract_metadata = None
 from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 from pypdf import PdfReader
 class LinkExtractionError(RuntimeError):
    """Raised when link extraction fails."""
 class LinkExtractor:
    """Extract text content from supported link types."""
    MAX_BYTES = 2_000_000  # ~2MB
    TIMEOUT = 10.0
    MAX_REDIRECTS = 3
    async def extract(self, url: str) -> Dict[str, Optional[str]]:
        """Extract content and metadata from a link."""
        normalized = self._normalize_url(url)
        self._validate_url(normalized)
        if self._is_youtube_url(normalized):
            video_id = self._extract_youtube_id(normalized)
            if not video_id:
                raise LinkExtractionError("YouTube-Link konnte nicht erkannt werden.")
            transcript = ""
            title = ""
            if (settings.transcriptapi_key or "").strip():
                try:
                    transcript, title = await self._fetch_transcriptapi(normalized)
                except Exception as exc:
                    logger.warning(f"TranscriptAPI failed, falling back to direct fetch: {exc}")
            if not transcript:
                transcript = await self._fetch_youtube_transcript(video_id)
            if not title:
                title = await self._fetch_youtube_title(normalized)
            return {
                "source_url": normalized,
                "source_type": "youtube",
                "title": title or "YouTube Video",
                "text": transcript,
            }
        content, content_type = await self._fetch_url(normalized)
        if self._is_pdf(normalized, content_type):
            text, title = self._extract_pdf(content)
            return {
                "source_url": normalized,
                "source_type": "pdf",
                "title": title or "PDF Dokument",
                "text": text,
            }
        text, title = self._extract_html(content)
        if not text:
            raise LinkExtractionError("Konnte keinen lesbaren Text aus der Seite extrahieren.")
        return {
            "source_url": normalized,
            "source_type": "web",
            "title": title or "Webseite",
            "text": text,
        }
    def _normalize_url(self, url: str) -> str:
        url = url.strip()
        if not url:
            raise LinkExtractionError("Bitte einen Link eingeben.")
        if not re.match(r"^https?://", url, re.IGNORECASE):
            url = f"https://{url}"
        return url
    def _validate_url(self, url: str) -> None:
        parsed = urlparse(url)
        if parsed.scheme not in ("http", "https"):
            raise LinkExtractionError("Nur http/https Links sind erlaubt.")
        if not parsed.hostname:
            raise LinkExtractionError("Ungültiger Link.")
        hostname = parsed.hostname.lower()
        if hostname in {"localhost", "127.0.0.1", "0.0.0.0", "::1"}:
            raise LinkExtractionError("Lokale Links sind nicht erlaubt.")
        if hostname.endswith(".local"):
            raise LinkExtractionError("Lokale Links sind nicht erlaubt.")
        # Block private IPs if hostname is an IP literal
        try:
            ip = ipaddress.ip_address(hostname)
            if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
                raise LinkExtractionError("Lokale Links sind nicht erlaubt.")
        except ValueError:
            pass
    async def _fetch_url(self, url: str) -> tuple[bytes, str]:
        headers = {
            "User-Agent": "Mozilla/5.0 (compatible; LinkedInWorkflowBot/1.0)"
        }
        async with httpx.AsyncClient(
            follow_redirects=True,
            max_redirects=self.MAX_REDIRECTS,
            timeout=self.TIMEOUT,
            headers=headers,
            limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
        ) as client:
            response = await client.get(url)
            response.raise_for_status()
            content = response.content
            if len(content) > self.MAX_BYTES:
                raise LinkExtractionError("Die Seite ist zu groß, um verarbeitet zu werden.")
            content_type = response.headers.get("content-type", "").lower()
            return content, content_type
    def _is_pdf(self, url: str, content_type: str) -> bool:
        if "application/pdf" in content_type:
            return True
        return url.lower().endswith(".pdf")
    def _extract_pdf(self, content: bytes) -> tuple[str, Optional[str]]:
        try:
            reader = PdfReader(BytesIO(content))
            text_parts = []
            for page in reader.pages:
                page_text = page.extract_text() or ""
                if page_text:
                    text_parts.append(page_text)
            text = "\n".join(text_parts).strip()
            title = None
            try:
                title = reader.metadata.title if reader.metadata else None
            except Exception:
                title = None
            if not text:
                raise LinkExtractionError("Konnte keinen Text aus dem PDF extrahieren.")
            return text, title
        except LinkExtractionError:
            raise
        except Exception as exc:
            raise LinkExtractionError(f"PDF-Extraktion fehlgeschlagen: {exc}") from exc
    def _extract_html(self, content: bytes) -> tuple[str, Optional[str]]:
        html = content.decode("utf-8", errors="ignore")
        text = trafilatura.extract(
            html,
            include_comments=False,
            include_tables=False,
            include_formatting=False,
            output_format="txt"
        )
        title = None
        try:
            if trafilatura_extract_metadata:
                metadata = trafilatura_extract_metadata(html)
                if metadata and metadata.title:
                    title = metadata.title
        except Exception:
            title = None
        return (text or "").strip(), title
    def _is_youtube_url(self, url: str) -> bool:
        host = urlparse(url).hostname or ""
        host = host.lower()
        return "youtube.com" in host or "youtu.be" in host
    def _extract_youtube_id(self, url: str) -> Optional[str]:
        parsed = urlparse(url)
        host = (parsed.hostname or "").lower()
        if "youtu.be" in host:
            return parsed.path.strip("/").split("/")[0] or None
        if "youtube.com" in host:
            qs = parse_qs(parsed.query)
            if "v" in qs and qs["v"]:
                return qs["v"][0]
            # /shorts/{id}
            if parsed.path.startswith("/shorts/"):
                return parsed.path.split("/")[2] if len(parsed.path.split("/")) > 2 else None
        return None
    async def _fetch_youtube_transcript(self, video_id: str) -> str:
        try:
            cookies = (settings.youtube_cookies or "").strip()
            languages = ["de", "de-DE", "de-AT", "de-CH", "en", "en-US", "en-GB"]
            # Prefer full transcript list to handle generated vs. manually created captions
            transcript_list = await asyncio.to_thread(
                YouTubeTranscriptApi.list_transcripts,
                video_id,
                cookies=cookies or None
            )
            transcript = None
            try:
                transcript = transcript_list.find_manually_created_transcript(languages)
            except Exception:
                pass
            if transcript is None:
                try:
                    transcript = transcript_list.find_generated_transcript(languages)
                except Exception:
                    pass
            if transcript is None:
                try:
                    transcript = transcript_list.find_transcript(languages)
                except Exception:
                    transcript = None
            if transcript is None:
                raise LinkExtractionError("Kein passendes Transkript gefunden.")
            data = await asyncio.to_thread(transcript.fetch)
            text = " ".join([item.get("text", "") for item in data]).strip()
            if not text:
                raise LinkExtractionError("Kein Transkript verfügbar.")
            return text
        except (TranscriptsDisabled, NoTranscriptFound):
            raise LinkExtractionError("Für dieses Video ist kein Transkript verfügbar.")
        except Exception as exc:
            logger.exception(f"YouTube transcript fetch failed: {exc}")
            debug_dump = await self._debug_youtube_dump(video_id, cookies or None)
            logger.warning(f"YouTube debug dump for {video_id}:\n{debug_dump}")
            raise LinkExtractionError(
                "YouTube-Transkript konnte nicht geladen werden. "
                "YouTube blockiert den Abruf manchmal. "
                "Wenn du Zugriff hast, setze die Umgebungsvariable "
                "`YOUTUBE_COOKIES` mit gültigen Cookies."
            ) from exc
    async def _debug_youtube_dump(self, video_id: str, cookies: Optional[str]) -> str:
        urls = [
            f"https://www.youtube.com/api/timedtext?type=list&v={video_id}",
            f"https://www.youtube.com/api/timedtext?lang=de&v={video_id}",
            f"https://www.youtube.com/api/timedtext?lang=de&v={video_id}&fmt=json3",
            f"https://www.youtube.com/api/timedtext?lang=en&v={video_id}",
            f"https://www.youtube.com/watch?v={video_id}",
        ]
        headers = {
            "User-Agent": "Mozilla/5.0 (compatible; LinkedInWorkflowBot/1.0)"
        }
        if cookies:
            headers["Cookie"] = cookies
        lines = [f"cookies_used={bool(cookies)}"]
        async with httpx.AsyncClient(timeout=self.TIMEOUT, headers=headers) as client:
            for url in urls:
                try:
                    resp = await client.get(url)
                    body = resp.text
                    snippet = body[:2000].replace("\n", "\\n").replace("\r", "")
                    lines.append(f"URL: {url}")
                    lines.append(f"STATUS: {resp.status_code}")
                    lines.append(f"BODY_SNIPPET: {snippet}")
                except Exception as exc:
                    lines.append(f"URL: {url}")
                    lines.append(f"ERROR: {repr(exc)}")
        return "\n".join(lines)
    async def _fetch_transcriptapi(self, video_url: str) -> tuple[str, str]:
        api_key = (settings.transcriptapi_key or "").strip()
        if not api_key:
            raise LinkExtractionError("TranscriptAPI Key fehlt.")
        endpoint = "https://transcriptapi.com/api/v2/youtube/transcript"
        params = {
            "video_url": video_url,
            "format": "json",
            "include_timestamp": "false",
            "send_metadata": "true",
        }
        headers = {"Authorization": f"Bearer {api_key}"}
        async with httpx.AsyncClient(timeout=self.TIMEOUT) as client:
            resp = await client.get(endpoint, params=params, headers=headers)
            if resp.status_code != 200:
                raise LinkExtractionError(f"TranscriptAPI Fehler: {resp.status_code}")
            data = resp.json()
        transcript = data.get("transcript")
        text = ""
        if isinstance(transcript, list):
            text = " ".join([item.get("text", "") for item in transcript]).strip()
        elif isinstance(transcript, str):
            text = transcript.strip()
        title = ""
        meta = data.get("metadata") or {}
        if isinstance(meta, dict):
            title = meta.get("title", "") or ""
        if not text:
            raise LinkExtractionError("TranscriptAPI lieferte kein Transkript.")
        return text, title
    async def _fetch_youtube_title(self, url: str) -> Optional[str]:
        oembed = f"https://www.youtube.com/oembed?format=json&url={url}"
        try:
            async with httpx.AsyncClient(timeout=self.TIMEOUT) as client:
                response = await client.get(oembed)
                if response.status_code != 200:
                    return None
                data = response.json()
                return data.get("title")
        except Exception:
            return None
--- a/src/web/templates/user/create_post_file.html
+++ b/src/web/templates/user/create_post_file.html
--- a/src/web/templates/user/create_post_link.html
+++ b/src/web/templates/user/create_post_link.html
--- a/src/web/templates/user/create_post_select.html
+++ b/src/web/templates/user/create_post_select.html
@@ -14,7 +14,7 @@
    </div>
    {% endif %}
-    <div class="grid md:grid-cols-2 gap-6">
+    <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
        <a href="/create/wizard"
           class="card-bg border rounded-xl p-6 hover:bg-brand-bg-light transition-colors group {% if limit_reached %}opacity-50 pointer-events-none{% endif %}">
            <div class="w-12 h-12 bg-brand-highlight/20 rounded-lg flex items-center justify-center mb-4">
@@ -38,6 +38,30 @@
            <h2 class="text-lg font-semibold text-white mb-2">Per Chat erstellen</h2>
            <p class="text-gray-400 text-sm">Schnell und flexibel mit dem Chat-Assistenten.</p>
        </a>
        <a href="/create/link-wizard"
           class="card-bg border rounded-xl p-6 hover:bg-brand-bg-light transition-colors group {% if limit_reached %}opacity-50 pointer-events-none{% endif %}">
            <div class="w-12 h-12 bg-brand-highlight/20 rounded-lg flex items-center justify-center mb-4">
                <svg class="w-6 h-6 text-brand-highlight" fill="none" stroke="currentColor" viewBox="0 0 24 24">
                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
                          d="M13.828 10.172a4 4 0 010 5.656l-3 3a4 4 0 11-5.656-5.656l1.5-1.5m4.328-4.328a4 4 0 015.656 0l3 3a4 4 0 11-5.656 5.656l-1.5-1.5"/>
                </svg>
            </div>
            <h2 class="text-lg font-semibold text-white mb-2">Aus Link erstellen</h2>
            <p class="text-gray-400 text-sm">Webseite, PDF oder YouTube-Link analysieren und daraus posten.</p>
        </a>
        <a href="/create/file-wizard"
           class="card-bg border rounded-xl p-6 hover:bg-brand-bg-light transition-colors group {% if limit_reached %}opacity-50 pointer-events-none{% endif %}">
            <div class="w-12 h-12 bg-brand-highlight/20 rounded-lg flex items-center justify-center mb-4">
                <svg class="w-6 h-6 text-brand-highlight" fill="none" stroke="currentColor" viewBox="0 0 24 24">
                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
                          d="M7 2h7l5 5v13a2 2 0 01-2 2H7a2 2 0 01-2-2V4a2 2 0 012-2z"/>
                </svg>
            </div>
            <h2 class="text-lg font-semibold text-white mb-2">Aus Datei erstellen</h2>
            <p class="text-gray-400 text-sm">Datei hochladen und daraus einen Post generieren.</p>
        </a>
    </div>
 </div>
 {% endblock %}
--- a/src/web/user/routes.py
+++ b/src/web/user/routes.py
@@ -41,6 +41,9 @@ from src.services.background_jobs import (
 )
 from src.services.db_job_manager import job_manager
 from src.services.storage_service import storage
 from src.services.link_extractor import LinkExtractor, LinkExtractionError
 from src.services.file_extractor import FileExtractor, FileExtractionError
 from src.agents.link_topic_builder import LinkTopicBuilderAgent
 # Router for user frontend
 user_router = APIRouter(tags=["user"])
@@ -1813,6 +1816,64 @@ async def create_post_page(request: Request):
    })
@user_router.get("/create/link-wizard", response_class=HTMLResponse)
 async def create_post_link_page(request: Request):
    """Create post from link wizard page."""
    session = require_user_session(request)
    if not session:
        return RedirectResponse(url="/login", status_code=302)
    # Check token limit for companies/employees
    limit_reached = False
    limit_message = ""
    if session.account_type in ("company", "employee") and session.company_id:
        can_create, error_msg, _, _ = await db.check_company_token_limit(UUID(session.company_id))
        limit_reached = not can_create
        limit_message = error_msg
    user_id = UUID(session.user_id)
    profile_picture = await get_user_avatar(session, user_id)
    return templates.TemplateResponse("create_post_link.html", {
        "request": request,
        "page": "create",
        "session": session,
        "user_id": session.user_id,
        "limit_reached": limit_reached,
        "limit_message": limit_message,
        "profile_picture": profile_picture
    })
@user_router.get("/create/file-wizard", response_class=HTMLResponse)
 async def create_post_file_page(request: Request):
    """Create post from file wizard page."""
    session = require_user_session(request)
    if not session:
        return RedirectResponse(url="/login", status_code=302)
    # Check token limit for companies/employees
    limit_reached = False
    limit_message = ""
    if session.account_type in ("company", "employee") and session.company_id:
        can_create, error_msg, _, _ = await db.check_company_token_limit(UUID(session.company_id))
        limit_reached = not can_create
        limit_message = error_msg
    user_id = UUID(session.user_id)
    profile_picture = await get_user_avatar(session, user_id)
    return templates.TemplateResponse("create_post_file.html", {
        "request": request,
        "page": "create",
        "session": session,
        "user_id": session.user_id,
        "limit_reached": limit_reached,
        "limit_message": limit_message,
        "profile_picture": profile_picture
    })
@user_router.get("/chat-create", response_class=HTMLResponse)
 async def chat_create_page(request: Request):
    """Chat-based post creation page."""
@@ -2063,6 +2124,117 @@ async def transcribe_audio(request: Request):
        raise HTTPException(status_code=500, detail=str(e))
@user_router.post("/api/link-extract")
 async def extract_link(request: Request):
    """Extract context from a link and build a structured topic."""
    session = require_user_session(request)
    if not session:
        raise HTTPException(status_code=401, detail="Not authenticated")
    # Check token limit for companies/employees
    if session.account_type in ("company", "employee") and session.company_id:
        can_create, error_msg, _, _ = await db.check_company_token_limit(UUID(session.company_id))
        if not can_create:
            raise HTTPException(status_code=429, detail=error_msg)
    try:
        data = await request.json()
        url = (data.get("url") or "").strip()
        transcript = (data.get("transcript") or "").strip()
        manual_title = (data.get("title") or "").strip()
        source_type = (data.get("source_type") or "").strip()
        source_url = (data.get("source_url") or "").strip()
        if transcript:
            source = {
                "source_url": source_url or url,
                "source_type": source_type or "manual",
                "title": manual_title or "Manuelles Transkript",
                "text": transcript
            }
        else:
            extractor = LinkExtractor()
            try:
                source = await extractor.extract(url)
            except LinkExtractionError as exc:
                raise HTTPException(status_code=400, detail=str(exc)) from exc
        builder = LinkTopicBuilderAgent()
        builder.set_tracking_context(
            operation="link_extract",
            user_id=session.user_id,
            company_id=session.company_id
        )
        topic = await builder.process(source)
        return {"topic": topic, "source": source}
    except HTTPException:
        raise
    except Exception as e:
        logger.exception(f"Link extraction failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))
@user_router.post("/api/file-extract")
 async def extract_file(request: Request):
    """Extract context from an uploaded file and build a structured topic."""
    session = require_user_session(request)
    if not session:
        raise HTTPException(status_code=401, detail="Not authenticated")
    # Check token limit for companies/employees
    if session.account_type in ("company", "employee") and session.company_id:
        can_create, error_msg, _, _ = await db.check_company_token_limit(UUID(session.company_id))
        if not can_create:
            raise HTTPException(status_code=429, detail=error_msg)
    try:
        form = await request.form()
        upload: UploadFile = form.get("file")  # type: ignore[assignment]
        if not upload:
            raise HTTPException(status_code=400, detail="Keine Datei hochgeladen.")
        # Basic validation
        allowed_ext = {".pdf", ".docx", ".pptx", ".xlsx", ".txt", ".md", ".rtf"}
        filename = upload.filename or ""
        ext = Path(filename).suffix.lower()
        if not ext or ext not in allowed_ext:
            raise HTTPException(status_code=400, detail="Dateityp nicht unterstützt.")
        file_bytes = await upload.read()
        max_bytes = 10 * 1024 * 1024  # 10 MB
        if len(file_bytes) > max_bytes:
            raise HTTPException(status_code=400, detail="Datei ist zu groß (max 10 MB).")
        extractor = FileExtractor()
        try:
            text = extractor.extract_text(file_bytes, filename)
        except FileExtractionError as exc:
            raise HTTPException(status_code=400, detail=str(exc)) from exc
        source = {
            "source_url": "",
            "source_type": "file",
            "title": filename or "Datei",
            "text": text
        }
        builder = LinkTopicBuilderAgent()
        builder.set_tracking_context(
            operation="file_extract",
            user_id=session.user_id,
            company_id=session.company_id
        )
        topic = await builder.process(source)
        return {"topic": topic, "source": source}
    except HTTPException:
        raise
    except Exception as e:
        logger.exception(f"File extraction failed: {e}")
        raise HTTPException(status_code=500, detail=str(e))
@user_router.post("/api/hooks")
 async def generate_hooks(
    request: Request,