added file and link create
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
# Core Dependencies
|
# Core Dependencies
|
||||||
python-dotenv==1.0.0
|
python-dotenv==1.0.0
|
||||||
pydantic==2.5.0
|
pydantic==2.7.4
|
||||||
pydantic-settings==2.1.0
|
pydantic-settings==2.3.0
|
||||||
|
|
||||||
# AI & APIs
|
# AI & APIs
|
||||||
openai==1.54.0
|
openai==1.54.0
|
||||||
@@ -41,5 +41,11 @@ uvicorn==0.32.0
|
|||||||
jinja2==3.1.4
|
jinja2==3.1.4
|
||||||
python-multipart==0.0.9
|
python-multipart==0.0.9
|
||||||
|
|
||||||
|
# Link Extraction
|
||||||
|
trafilatura==1.7.0
|
||||||
|
youtube-transcript-api==0.6.2
|
||||||
|
pypdf==4.2.0
|
||||||
|
docling==2.74.0
|
||||||
|
|
||||||
# Teams Bot JWT validation
|
# Teams Bot JWT validation
|
||||||
PyJWT>=2.8.0
|
PyJWT>=2.8.0
|
||||||
|
|||||||
95
src/agents/link_topic_builder.py
Normal file
95
src/agents/link_topic_builder.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
"""Agent to build a structured topic from extracted link content."""
|
||||||
|
import json
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from src.agents.base import BaseAgent
|
||||||
|
|
||||||
|
|
||||||
|
class LinkTopicBuilderAgent(BaseAgent):
|
||||||
|
"""Build a structured topic dictionary from link content."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__("link_topic_builder")
|
||||||
|
|
||||||
|
async def process(self, source: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
content = source.get("text", "")
|
||||||
|
source_url = source.get("source_url", "")
|
||||||
|
source_type = source.get("source_type", "web")
|
||||||
|
source_title = source.get("title", "")
|
||||||
|
|
||||||
|
if not content or len(content.strip()) < 200:
|
||||||
|
raise ValueError("Zu wenig Inhalt, um ein sinnvolles Topic zu erstellen.")
|
||||||
|
|
||||||
|
# Keep content length reasonable for the model
|
||||||
|
max_chars = 12000
|
||||||
|
if len(content) > max_chars:
|
||||||
|
content = content[:max_chars]
|
||||||
|
|
||||||
|
system_prompt = """Du bist ein Assistent, der aus einem Link-Inhalt ein strukturiertes Topic für einen LinkedIn-Post erstellt.
|
||||||
|
|
||||||
|
Gib NUR valides JSON zurück (keine Erklärungen)."""
|
||||||
|
|
||||||
|
user_prompt = f"""Quelle:
|
||||||
|
- Typ: {source_type}
|
||||||
|
- Titel: {source_title}
|
||||||
|
- URL: {source_url}
|
||||||
|
|
||||||
|
Inhalt:
|
||||||
|
\"\"\"{content}\"\"\"
|
||||||
|
|
||||||
|
Erstelle ein Topic im folgenden JSON-Format:
|
||||||
|
{{
|
||||||
|
"title": "Kurzer, präziser Titel (max 80 Zeichen)",
|
||||||
|
"summary": "Detaillierte, vollumfängliche Zusammenfassung (8-12 Sätze, alle Hauptpunkte abdecken)",
|
||||||
|
"extended_summary": "Längere Zusammenfassung (150-250 Wörter) mit Kontext, Kernaussagen, Belegen und Implikationen",
|
||||||
|
"outline": ["Abschnitt 1: ...", "Abschnitt 2: ...", "..."],
|
||||||
|
"fact": "1-2 Sätze als Kernaussage/Fakt",
|
||||||
|
"key_points": ["Punkt 1", "Punkt 2", "..."],
|
||||||
|
"key_facts": ["Konkreter Fakt 1", "Konkreter Fakt 2", "..."],
|
||||||
|
"quotes": ["Kurzes Zitat falls sinnvoll", "..."],
|
||||||
|
"relevance": "Warum ist das für die Zielgruppe relevant? (1 Satz)",
|
||||||
|
"category": "Link",
|
||||||
|
"source": "{source_url}",
|
||||||
|
"source_title": "{source_title}",
|
||||||
|
"source_type": "{source_type}"
|
||||||
|
}}
|
||||||
|
|
||||||
|
Regeln:
|
||||||
|
- Halte dich an das JSON-Format.
|
||||||
|
- Keine erfundenen Fakten: nutze NUR den gegebenen Inhalt.
|
||||||
|
- Wenn keine Zitate vorhanden sind, gib ein leeres Array zurück.
|
||||||
|
- Summary und extended_summary müssen unterschiedliche Detailtiefe haben.
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = await self.call_openai(
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
model="gpt-4o",
|
||||||
|
temperature=0.2,
|
||||||
|
response_format={"type": "json_object"}
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
topic = json.loads(result)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(f"Failed to parse topic JSON: {exc}")
|
||||||
|
raise ValueError("Antwort konnte nicht verarbeitet werden.")
|
||||||
|
|
||||||
|
# Ensure required fields exist
|
||||||
|
topic.setdefault("category", "Link")
|
||||||
|
topic.setdefault("source", source_url)
|
||||||
|
topic.setdefault("source_title", source_title)
|
||||||
|
topic.setdefault("source_type", source_type)
|
||||||
|
topic.setdefault("title", source_title or "Link-Thema")
|
||||||
|
topic.setdefault("summary", "")
|
||||||
|
topic.setdefault("extended_summary", "")
|
||||||
|
topic.setdefault("outline", [])
|
||||||
|
topic.setdefault("fact", topic.get("summary", "")[:300])
|
||||||
|
topic.setdefault("key_points", [])
|
||||||
|
topic.setdefault("key_facts", [])
|
||||||
|
topic.setdefault("quotes", [])
|
||||||
|
topic.setdefault("relevance", "Relevantes Thema für die Zielgruppe")
|
||||||
|
|
||||||
|
return topic
|
||||||
@@ -351,6 +351,35 @@ class WriterAgent(BaseAgent):
|
|||||||
if key_facts and isinstance(key_facts, list) and len(key_facts) > 0:
|
if key_facts and isinstance(key_facts, list) and len(key_facts) > 0:
|
||||||
facts_section = "\n**KEY FACTS (nutze diese!):**\n" + "\n".join([f"- {f}" for f in key_facts]) + "\n"
|
facts_section = "\n**KEY FACTS (nutze diese!):**\n" + "\n".join([f"- {f}" for f in key_facts]) + "\n"
|
||||||
|
|
||||||
|
summary_section = ""
|
||||||
|
if topic.get('summary'):
|
||||||
|
summary_section = f"\n**ZUSAMMENFASSUNG (vom Link):**\n{topic.get('summary')}\n"
|
||||||
|
|
||||||
|
extended_summary_section = ""
|
||||||
|
if topic.get('extended_summary'):
|
||||||
|
extended_summary_section = f"\n**DETAILLIERTE ZUSAMMENFASSUNG:**\n{topic.get('extended_summary')}\n"
|
||||||
|
|
||||||
|
outline_section = ""
|
||||||
|
outline = topic.get('outline', [])
|
||||||
|
if outline and isinstance(outline, list) and len(outline) > 0:
|
||||||
|
outline_section = "\n**INHALTS-GLIEDERUNG:**\n" + "\n".join([f"- {o}" for o in outline]) + "\n"
|
||||||
|
|
||||||
|
key_points_section = ""
|
||||||
|
key_points = topic.get('key_points', [])
|
||||||
|
if key_points and isinstance(key_points, list) and len(key_points) > 0:
|
||||||
|
key_points_section = "\n**KERNPUNKTE (vom Link):**\n" + "\n".join([f"- {p}" for p in key_points]) + "\n"
|
||||||
|
|
||||||
|
quotes_section = ""
|
||||||
|
quotes = topic.get('quotes', [])
|
||||||
|
if quotes and isinstance(quotes, list) and len(quotes) > 0:
|
||||||
|
quotes_section = "\n**ZITATE (optional verwenden):**\n" + "\n".join([f"- \"{q}\"" for q in quotes]) + "\n"
|
||||||
|
|
||||||
|
source_section = ""
|
||||||
|
if topic.get('source_title') or topic.get('source'):
|
||||||
|
source_title = topic.get('source_title') or ""
|
||||||
|
source_url = topic.get('source') or ""
|
||||||
|
source_section = f"\n**QUELLE:** {source_title} {source_url}\n"
|
||||||
|
|
||||||
why_section = ""
|
why_section = ""
|
||||||
if topic.get('why_this_person'):
|
if topic.get('why_this_person'):
|
||||||
why_section = f"\n**WARUM DU DARÜBER SCHREIBEN SOLLTEST:**\n{topic.get('why_this_person')}\n"
|
why_section = f"\n**WARUM DU DARÜBER SCHREIBEN SOLLTEST:**\n{topic.get('why_this_person')}\n"
|
||||||
@@ -391,7 +420,7 @@ Schreibe einen authentischen LinkedIn-Post, der:
|
|||||||
{angle_section}{hook_section}
|
{angle_section}{hook_section}
|
||||||
**KERN-FAKT / INHALT:**
|
**KERN-FAKT / INHALT:**
|
||||||
{topic.get('fact', topic.get('description', ''))}
|
{topic.get('fact', topic.get('description', ''))}
|
||||||
{facts_section}{thoughts_section}
|
{summary_section}{extended_summary_section}{outline_section}{key_points_section}{facts_section}{quotes_section}{source_section}{thoughts_section}
|
||||||
**WARUM RELEVANT:**
|
**WARUM RELEVANT:**
|
||||||
{topic.get('relevance', 'Aktuelles Thema für die Zielgruppe')}
|
{topic.get('relevance', 'Aktuelles Thema für die Zielgruppe')}
|
||||||
{why_section}
|
{why_section}
|
||||||
@@ -941,6 +970,35 @@ Gib NUR den überarbeiteten Post zurück - keine Kommentare."""
|
|||||||
if key_facts and isinstance(key_facts, list) and len(key_facts) > 0:
|
if key_facts and isinstance(key_facts, list) and len(key_facts) > 0:
|
||||||
facts_section = "\n**KEY FACTS (nutze diese!):**\n" + "\n".join([f"- {f}" for f in key_facts]) + "\n"
|
facts_section = "\n**KEY FACTS (nutze diese!):**\n" + "\n".join([f"- {f}" for f in key_facts]) + "\n"
|
||||||
|
|
||||||
|
summary_section = ""
|
||||||
|
if topic.get('summary'):
|
||||||
|
summary_section = f"\n**ZUSAMMENFASSUNG (vom Link):**\n{topic.get('summary')}\n"
|
||||||
|
|
||||||
|
extended_summary_section = ""
|
||||||
|
if topic.get('extended_summary'):
|
||||||
|
extended_summary_section = f"\n**DETAILLIERTE ZUSAMMENFASSUNG:**\n{topic.get('extended_summary')}\n"
|
||||||
|
|
||||||
|
outline_section = ""
|
||||||
|
outline = topic.get('outline', [])
|
||||||
|
if outline and isinstance(outline, list) and len(outline) > 0:
|
||||||
|
outline_section = "\n**INHALTS-GLIEDERUNG:**\n" + "\n".join([f"- {o}" for o in outline]) + "\n"
|
||||||
|
|
||||||
|
key_points_section = ""
|
||||||
|
key_points = topic.get('key_points', [])
|
||||||
|
if key_points and isinstance(key_points, list) and len(key_points) > 0:
|
||||||
|
key_points_section = "\n**KERNPUNKTE (vom Link):**\n" + "\n".join([f"- {p}" for p in key_points]) + "\n"
|
||||||
|
|
||||||
|
quotes_section = ""
|
||||||
|
quotes = topic.get('quotes', [])
|
||||||
|
if quotes and isinstance(quotes, list) and len(quotes) > 0:
|
||||||
|
quotes_section = "\n**ZITATE (optional verwenden):**\n" + "\n".join([f"- \"{q}\"" for q in quotes]) + "\n"
|
||||||
|
|
||||||
|
source_section = ""
|
||||||
|
if topic.get('source_title') or topic.get('source'):
|
||||||
|
source_title = topic.get('source_title') or ""
|
||||||
|
source_url = topic.get('source') or ""
|
||||||
|
source_section = f"\n**QUELLE:** {source_title} {source_url}\n"
|
||||||
|
|
||||||
# User thoughts section
|
# User thoughts section
|
||||||
thoughts_section = ""
|
thoughts_section = ""
|
||||||
if user_thoughts:
|
if user_thoughts:
|
||||||
@@ -977,7 +1035,7 @@ Schreibe einen authentischen LinkedIn-Post, der:
|
|||||||
{angle_section}{hook_section}
|
{angle_section}{hook_section}
|
||||||
**KERN-FAKT / INHALT:**
|
**KERN-FAKT / INHALT:**
|
||||||
{topic.get('fact', topic.get('description', ''))}
|
{topic.get('fact', topic.get('description', ''))}
|
||||||
{facts_section}{thoughts_section}
|
{summary_section}{extended_summary_section}{outline_section}{key_points_section}{facts_section}{quotes_section}{source_section}{thoughts_section}
|
||||||
**WARUM RELEVANT:**
|
**WARUM RELEVANT:**
|
||||||
{topic.get('relevance', 'Aktuelles Thema für die Zielgruppe')}
|
{topic.get('relevance', 'Aktuelles Thema für die Zielgruppe')}
|
||||||
|
|
||||||
@@ -1063,6 +1121,16 @@ Antworte im JSON-Format:
|
|||||||
if post_type.description:
|
if post_type.description:
|
||||||
post_type_section += f"\n{post_type.description}"
|
post_type_section += f"\n{post_type.description}"
|
||||||
|
|
||||||
|
content_block = topic.get('fact', topic.get('description', 'Keine Details verfügbar'))
|
||||||
|
if topic.get('summary'):
|
||||||
|
content_block += f"\n\nZUSAMMENFASSUNG:\n{topic.get('summary')}"
|
||||||
|
if topic.get('extended_summary'):
|
||||||
|
content_block += f"\n\nDETAILLIERTE ZUSAMMENFASSUNG:\n{topic.get('extended_summary')}"
|
||||||
|
if topic.get('outline') and isinstance(topic.get('outline'), list):
|
||||||
|
content_block += "\n\nGLIEDERUNG:\n" + "\n".join([f"- {o}" for o in topic.get('outline', [])])
|
||||||
|
if topic.get('key_points') and isinstance(topic.get('key_points'), list):
|
||||||
|
content_block += "\n\nKERNPUNKTE:\n" + "\n".join([f"- {p}" for p in topic.get('key_points', [])])
|
||||||
|
|
||||||
user_prompt = f"""Generiere 4 Hooks für dieses Thema:
|
user_prompt = f"""Generiere 4 Hooks für dieses Thema:
|
||||||
|
|
||||||
THEMA: {topic.get('title', 'Unbekanntes Thema')}
|
THEMA: {topic.get('title', 'Unbekanntes Thema')}
|
||||||
@@ -1070,7 +1138,7 @@ THEMA: {topic.get('title', 'Unbekanntes Thema')}
|
|||||||
KATEGORIE: {topic.get('category', 'Allgemein')}
|
KATEGORIE: {topic.get('category', 'Allgemein')}
|
||||||
|
|
||||||
KERN-FAKT/INHALT:
|
KERN-FAKT/INHALT:
|
||||||
{topic.get('fact', topic.get('description', 'Keine Details verfügbar'))}
|
{content_block}
|
||||||
{thoughts_section}{post_type_section}
|
{thoughts_section}{post_type_section}
|
||||||
|
|
||||||
Generiere jetzt die 4 verschiedenen Hooks im JSON-Format."""
|
Generiere jetzt die 4 verschiedenen Hooks im JSON-Format."""
|
||||||
|
|||||||
@@ -69,6 +69,10 @@ class Settings(BaseSettings):
|
|||||||
redis_url: str = "redis://redis:6379/0"
|
redis_url: str = "redis://redis:6379/0"
|
||||||
scheduler_enabled: bool = False # True only on dedicated scheduler container
|
scheduler_enabled: bool = False # True only on dedicated scheduler container
|
||||||
|
|
||||||
|
# YouTube (optional)
|
||||||
|
youtube_cookies: str = "" # Raw Cookie header value for transcript fetching
|
||||||
|
transcriptapi_key: str = "" # TranscriptAPI.com key
|
||||||
|
|
||||||
# Telegram Bot (experimental)
|
# Telegram Bot (experimental)
|
||||||
telegram_enabled: bool = False
|
telegram_enabled: bool = False
|
||||||
telegram_bot_token: str = ""
|
telegram_bot_token: str = ""
|
||||||
|
|||||||
54
src/services/file_extractor.py
Normal file
54
src/services/file_extractor.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
"""Extract text content from uploaded files using Docling."""
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||||
|
from docling.datamodel.base_models import InputFormat
|
||||||
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||||
|
|
||||||
|
|
||||||
|
class FileExtractionError(RuntimeError):
|
||||||
|
"""Raised when file extraction fails."""
|
||||||
|
|
||||||
|
|
||||||
|
class FileExtractor:
|
||||||
|
"""Extract text from files via Docling."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
pdf_options = PdfPipelineOptions(do_ocr=False)
|
||||||
|
self._converter = DocumentConverter(
|
||||||
|
format_options={
|
||||||
|
InputFormat.PDF: PdfFormatOption(pipeline_options=pdf_options)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
def extract_text(self, file_bytes: bytes, filename: str) -> str:
|
||||||
|
if not file_bytes:
|
||||||
|
raise FileExtractionError("Leere Datei.")
|
||||||
|
|
||||||
|
suffix = ""
|
||||||
|
if filename and "." in filename:
|
||||||
|
suffix = os.path.splitext(filename)[1]
|
||||||
|
|
||||||
|
try:
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||||
|
tmp.write(file_bytes)
|
||||||
|
tmp_path = tmp.name
|
||||||
|
|
||||||
|
result = self._converter.convert(tmp_path)
|
||||||
|
document = result.document
|
||||||
|
text = document.export_to_markdown().strip()
|
||||||
|
if not text:
|
||||||
|
raise FileExtractionError("Keine Inhalte im Dokument gefunden.")
|
||||||
|
return text
|
||||||
|
except FileExtractionError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
raise FileExtractionError(f"Datei konnte nicht verarbeitet werden: {exc}") from exc
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
if "tmp_path" in locals():
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
324
src/services/link_extractor.py
Normal file
324
src/services/link_extractor.py
Normal file
@@ -0,0 +1,324 @@
|
|||||||
|
"""Extract content from links (web pages, PDFs, YouTube transcripts)."""
|
||||||
|
import asyncio
|
||||||
|
import ipaddress
|
||||||
|
import re
|
||||||
|
from io import BytesIO
|
||||||
|
from typing import Dict, Optional
|
||||||
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from src.config import settings
|
||||||
|
import trafilatura
|
||||||
|
try:
|
||||||
|
from trafilatura.metadata import extract_metadata as trafilatura_extract_metadata
|
||||||
|
except Exception: # pragma: no cover - optional path
|
||||||
|
trafilatura_extract_metadata = None
|
||||||
|
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
|
||||||
|
from pypdf import PdfReader
|
||||||
|
|
||||||
|
|
||||||
|
class LinkExtractionError(RuntimeError):
|
||||||
|
"""Raised when link extraction fails."""
|
||||||
|
|
||||||
|
|
||||||
|
class LinkExtractor:
|
||||||
|
"""Extract text content from supported link types."""
|
||||||
|
|
||||||
|
MAX_BYTES = 2_000_000 # ~2MB
|
||||||
|
TIMEOUT = 10.0
|
||||||
|
MAX_REDIRECTS = 3
|
||||||
|
|
||||||
|
async def extract(self, url: str) -> Dict[str, Optional[str]]:
|
||||||
|
"""Extract content and metadata from a link."""
|
||||||
|
normalized = self._normalize_url(url)
|
||||||
|
self._validate_url(normalized)
|
||||||
|
|
||||||
|
if self._is_youtube_url(normalized):
|
||||||
|
video_id = self._extract_youtube_id(normalized)
|
||||||
|
if not video_id:
|
||||||
|
raise LinkExtractionError("YouTube-Link konnte nicht erkannt werden.")
|
||||||
|
transcript = ""
|
||||||
|
title = ""
|
||||||
|
|
||||||
|
if (settings.transcriptapi_key or "").strip():
|
||||||
|
try:
|
||||||
|
transcript, title = await self._fetch_transcriptapi(normalized)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(f"TranscriptAPI failed, falling back to direct fetch: {exc}")
|
||||||
|
|
||||||
|
if not transcript:
|
||||||
|
transcript = await self._fetch_youtube_transcript(video_id)
|
||||||
|
if not title:
|
||||||
|
title = await self._fetch_youtube_title(normalized)
|
||||||
|
return {
|
||||||
|
"source_url": normalized,
|
||||||
|
"source_type": "youtube",
|
||||||
|
"title": title or "YouTube Video",
|
||||||
|
"text": transcript,
|
||||||
|
}
|
||||||
|
|
||||||
|
content, content_type = await self._fetch_url(normalized)
|
||||||
|
if self._is_pdf(normalized, content_type):
|
||||||
|
text, title = self._extract_pdf(content)
|
||||||
|
return {
|
||||||
|
"source_url": normalized,
|
||||||
|
"source_type": "pdf",
|
||||||
|
"title": title or "PDF Dokument",
|
||||||
|
"text": text,
|
||||||
|
}
|
||||||
|
|
||||||
|
text, title = self._extract_html(content)
|
||||||
|
if not text:
|
||||||
|
raise LinkExtractionError("Konnte keinen lesbaren Text aus der Seite extrahieren.")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"source_url": normalized,
|
||||||
|
"source_type": "web",
|
||||||
|
"title": title or "Webseite",
|
||||||
|
"text": text,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _normalize_url(self, url: str) -> str:
|
||||||
|
url = url.strip()
|
||||||
|
if not url:
|
||||||
|
raise LinkExtractionError("Bitte einen Link eingeben.")
|
||||||
|
if not re.match(r"^https?://", url, re.IGNORECASE):
|
||||||
|
url = f"https://{url}"
|
||||||
|
return url
|
||||||
|
|
||||||
|
def _validate_url(self, url: str) -> None:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
if parsed.scheme not in ("http", "https"):
|
||||||
|
raise LinkExtractionError("Nur http/https Links sind erlaubt.")
|
||||||
|
if not parsed.hostname:
|
||||||
|
raise LinkExtractionError("Ungültiger Link.")
|
||||||
|
|
||||||
|
hostname = parsed.hostname.lower()
|
||||||
|
if hostname in {"localhost", "127.0.0.1", "0.0.0.0", "::1"}:
|
||||||
|
raise LinkExtractionError("Lokale Links sind nicht erlaubt.")
|
||||||
|
if hostname.endswith(".local"):
|
||||||
|
raise LinkExtractionError("Lokale Links sind nicht erlaubt.")
|
||||||
|
|
||||||
|
# Block private IPs if hostname is an IP literal
|
||||||
|
try:
|
||||||
|
ip = ipaddress.ip_address(hostname)
|
||||||
|
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
|
||||||
|
raise LinkExtractionError("Lokale Links sind nicht erlaubt.")
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def _fetch_url(self, url: str) -> tuple[bytes, str]:
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (compatible; LinkedInWorkflowBot/1.0)"
|
||||||
|
}
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
follow_redirects=True,
|
||||||
|
max_redirects=self.MAX_REDIRECTS,
|
||||||
|
timeout=self.TIMEOUT,
|
||||||
|
headers=headers,
|
||||||
|
limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
|
||||||
|
) as client:
|
||||||
|
response = await client.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
content = response.content
|
||||||
|
if len(content) > self.MAX_BYTES:
|
||||||
|
raise LinkExtractionError("Die Seite ist zu groß, um verarbeitet zu werden.")
|
||||||
|
content_type = response.headers.get("content-type", "").lower()
|
||||||
|
return content, content_type
|
||||||
|
|
||||||
|
def _is_pdf(self, url: str, content_type: str) -> bool:
|
||||||
|
if "application/pdf" in content_type:
|
||||||
|
return True
|
||||||
|
return url.lower().endswith(".pdf")
|
||||||
|
|
||||||
|
def _extract_pdf(self, content: bytes) -> tuple[str, Optional[str]]:
|
||||||
|
try:
|
||||||
|
reader = PdfReader(BytesIO(content))
|
||||||
|
text_parts = []
|
||||||
|
for page in reader.pages:
|
||||||
|
page_text = page.extract_text() or ""
|
||||||
|
if page_text:
|
||||||
|
text_parts.append(page_text)
|
||||||
|
text = "\n".join(text_parts).strip()
|
||||||
|
title = None
|
||||||
|
try:
|
||||||
|
title = reader.metadata.title if reader.metadata else None
|
||||||
|
except Exception:
|
||||||
|
title = None
|
||||||
|
if not text:
|
||||||
|
raise LinkExtractionError("Konnte keinen Text aus dem PDF extrahieren.")
|
||||||
|
return text, title
|
||||||
|
except LinkExtractionError:
|
||||||
|
raise
|
||||||
|
except Exception as exc:
|
||||||
|
raise LinkExtractionError(f"PDF-Extraktion fehlgeschlagen: {exc}") from exc
|
||||||
|
|
||||||
|
def _extract_html(self, content: bytes) -> tuple[str, Optional[str]]:
|
||||||
|
html = content.decode("utf-8", errors="ignore")
|
||||||
|
text = trafilatura.extract(
|
||||||
|
html,
|
||||||
|
include_comments=False,
|
||||||
|
include_tables=False,
|
||||||
|
include_formatting=False,
|
||||||
|
output_format="txt"
|
||||||
|
)
|
||||||
|
title = None
|
||||||
|
try:
|
||||||
|
if trafilatura_extract_metadata:
|
||||||
|
metadata = trafilatura_extract_metadata(html)
|
||||||
|
if metadata and metadata.title:
|
||||||
|
title = metadata.title
|
||||||
|
except Exception:
|
||||||
|
title = None
|
||||||
|
return (text or "").strip(), title
|
||||||
|
|
||||||
|
def _is_youtube_url(self, url: str) -> bool:
|
||||||
|
host = urlparse(url).hostname or ""
|
||||||
|
host = host.lower()
|
||||||
|
return "youtube.com" in host or "youtu.be" in host
|
||||||
|
|
||||||
|
def _extract_youtube_id(self, url: str) -> Optional[str]:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
host = (parsed.hostname or "").lower()
|
||||||
|
if "youtu.be" in host:
|
||||||
|
return parsed.path.strip("/").split("/")[0] or None
|
||||||
|
if "youtube.com" in host:
|
||||||
|
qs = parse_qs(parsed.query)
|
||||||
|
if "v" in qs and qs["v"]:
|
||||||
|
return qs["v"][0]
|
||||||
|
# /shorts/{id}
|
||||||
|
if parsed.path.startswith("/shorts/"):
|
||||||
|
return parsed.path.split("/")[2] if len(parsed.path.split("/")) > 2 else None
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _fetch_youtube_transcript(self, video_id: str) -> str:
|
||||||
|
try:
|
||||||
|
cookies = (settings.youtube_cookies or "").strip()
|
||||||
|
languages = ["de", "de-DE", "de-AT", "de-CH", "en", "en-US", "en-GB"]
|
||||||
|
|
||||||
|
# Prefer full transcript list to handle generated vs. manually created captions
|
||||||
|
transcript_list = await asyncio.to_thread(
|
||||||
|
YouTubeTranscriptApi.list_transcripts,
|
||||||
|
video_id,
|
||||||
|
cookies=cookies or None
|
||||||
|
)
|
||||||
|
|
||||||
|
transcript = None
|
||||||
|
try:
|
||||||
|
transcript = transcript_list.find_manually_created_transcript(languages)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if transcript is None:
|
||||||
|
try:
|
||||||
|
transcript = transcript_list.find_generated_transcript(languages)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if transcript is None:
|
||||||
|
try:
|
||||||
|
transcript = transcript_list.find_transcript(languages)
|
||||||
|
except Exception:
|
||||||
|
transcript = None
|
||||||
|
|
||||||
|
if transcript is None:
|
||||||
|
raise LinkExtractionError("Kein passendes Transkript gefunden.")
|
||||||
|
|
||||||
|
data = await asyncio.to_thread(transcript.fetch)
|
||||||
|
text = " ".join([item.get("text", "") for item in data]).strip()
|
||||||
|
if not text:
|
||||||
|
raise LinkExtractionError("Kein Transkript verfügbar.")
|
||||||
|
return text
|
||||||
|
except (TranscriptsDisabled, NoTranscriptFound):
|
||||||
|
raise LinkExtractionError("Für dieses Video ist kein Transkript verfügbar.")
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception(f"YouTube transcript fetch failed: {exc}")
|
||||||
|
debug_dump = await self._debug_youtube_dump(video_id, cookies or None)
|
||||||
|
logger.warning(f"YouTube debug dump for {video_id}:\n{debug_dump}")
|
||||||
|
raise LinkExtractionError(
|
||||||
|
"YouTube-Transkript konnte nicht geladen werden. "
|
||||||
|
"YouTube blockiert den Abruf manchmal. "
|
||||||
|
"Wenn du Zugriff hast, setze die Umgebungsvariable "
|
||||||
|
"`YOUTUBE_COOKIES` mit gültigen Cookies."
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
async def _debug_youtube_dump(self, video_id: str, cookies: Optional[str]) -> str:
|
||||||
|
urls = [
|
||||||
|
f"https://www.youtube.com/api/timedtext?type=list&v={video_id}",
|
||||||
|
f"https://www.youtube.com/api/timedtext?lang=de&v={video_id}",
|
||||||
|
f"https://www.youtube.com/api/timedtext?lang=de&v={video_id}&fmt=json3",
|
||||||
|
f"https://www.youtube.com/api/timedtext?lang=en&v={video_id}",
|
||||||
|
f"https://www.youtube.com/watch?v={video_id}",
|
||||||
|
]
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (compatible; LinkedInWorkflowBot/1.0)"
|
||||||
|
}
|
||||||
|
if cookies:
|
||||||
|
headers["Cookie"] = cookies
|
||||||
|
|
||||||
|
lines = [f"cookies_used={bool(cookies)}"]
|
||||||
|
async with httpx.AsyncClient(timeout=self.TIMEOUT, headers=headers) as client:
|
||||||
|
for url in urls:
|
||||||
|
try:
|
||||||
|
resp = await client.get(url)
|
||||||
|
body = resp.text
|
||||||
|
snippet = body[:2000].replace("\n", "\\n").replace("\r", "")
|
||||||
|
lines.append(f"URL: {url}")
|
||||||
|
lines.append(f"STATUS: {resp.status_code}")
|
||||||
|
lines.append(f"BODY_SNIPPET: {snippet}")
|
||||||
|
except Exception as exc:
|
||||||
|
lines.append(f"URL: {url}")
|
||||||
|
lines.append(f"ERROR: {repr(exc)}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
async def _fetch_transcriptapi(self, video_url: str) -> tuple[str, str]:
|
||||||
|
api_key = (settings.transcriptapi_key or "").strip()
|
||||||
|
if not api_key:
|
||||||
|
raise LinkExtractionError("TranscriptAPI Key fehlt.")
|
||||||
|
|
||||||
|
endpoint = "https://transcriptapi.com/api/v2/youtube/transcript"
|
||||||
|
params = {
|
||||||
|
"video_url": video_url,
|
||||||
|
"format": "json",
|
||||||
|
"include_timestamp": "false",
|
||||||
|
"send_metadata": "true",
|
||||||
|
}
|
||||||
|
headers = {"Authorization": f"Bearer {api_key}"}
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=self.TIMEOUT) as client:
|
||||||
|
resp = await client.get(endpoint, params=params, headers=headers)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
raise LinkExtractionError(f"TranscriptAPI Fehler: {resp.status_code}")
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
transcript = data.get("transcript")
|
||||||
|
text = ""
|
||||||
|
if isinstance(transcript, list):
|
||||||
|
text = " ".join([item.get("text", "") for item in transcript]).strip()
|
||||||
|
elif isinstance(transcript, str):
|
||||||
|
text = transcript.strip()
|
||||||
|
|
||||||
|
title = ""
|
||||||
|
meta = data.get("metadata") or {}
|
||||||
|
if isinstance(meta, dict):
|
||||||
|
title = meta.get("title", "") or ""
|
||||||
|
|
||||||
|
if not text:
|
||||||
|
raise LinkExtractionError("TranscriptAPI lieferte kein Transkript.")
|
||||||
|
|
||||||
|
return text, title
|
||||||
|
|
||||||
|
async def _fetch_youtube_title(self, url: str) -> Optional[str]:
|
||||||
|
oembed = f"https://www.youtube.com/oembed?format=json&url={url}"
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=self.TIMEOUT) as client:
|
||||||
|
response = await client.get(oembed)
|
||||||
|
if response.status_code != 200:
|
||||||
|
return None
|
||||||
|
data = response.json()
|
||||||
|
return data.get("title")
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
1221
src/web/templates/user/create_post_file.html
Normal file
1221
src/web/templates/user/create_post_file.html
Normal file
File diff suppressed because it is too large
Load Diff
1234
src/web/templates/user/create_post_link.html
Normal file
1234
src/web/templates/user/create_post_link.html
Normal file
File diff suppressed because it is too large
Load Diff
@@ -14,7 +14,7 @@
|
|||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<div class="grid md:grid-cols-2 gap-6">
|
<div class="grid grid-cols-1 md:grid-cols-2 gap-6">
|
||||||
<a href="/create/wizard"
|
<a href="/create/wizard"
|
||||||
class="card-bg border rounded-xl p-6 hover:bg-brand-bg-light transition-colors group {% if limit_reached %}opacity-50 pointer-events-none{% endif %}">
|
class="card-bg border rounded-xl p-6 hover:bg-brand-bg-light transition-colors group {% if limit_reached %}opacity-50 pointer-events-none{% endif %}">
|
||||||
<div class="w-12 h-12 bg-brand-highlight/20 rounded-lg flex items-center justify-center mb-4">
|
<div class="w-12 h-12 bg-brand-highlight/20 rounded-lg flex items-center justify-center mb-4">
|
||||||
@@ -38,6 +38,30 @@
|
|||||||
<h2 class="text-lg font-semibold text-white mb-2">Per Chat erstellen</h2>
|
<h2 class="text-lg font-semibold text-white mb-2">Per Chat erstellen</h2>
|
||||||
<p class="text-gray-400 text-sm">Schnell und flexibel mit dem Chat-Assistenten.</p>
|
<p class="text-gray-400 text-sm">Schnell und flexibel mit dem Chat-Assistenten.</p>
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
|
<a href="/create/link-wizard"
|
||||||
|
class="card-bg border rounded-xl p-6 hover:bg-brand-bg-light transition-colors group {% if limit_reached %}opacity-50 pointer-events-none{% endif %}">
|
||||||
|
<div class="w-12 h-12 bg-brand-highlight/20 rounded-lg flex items-center justify-center mb-4">
|
||||||
|
<svg class="w-6 h-6 text-brand-highlight" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||||
|
d="M13.828 10.172a4 4 0 010 5.656l-3 3a4 4 0 11-5.656-5.656l1.5-1.5m4.328-4.328a4 4 0 015.656 0l3 3a4 4 0 11-5.656 5.656l-1.5-1.5"/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<h2 class="text-lg font-semibold text-white mb-2">Aus Link erstellen</h2>
|
||||||
|
<p class="text-gray-400 text-sm">Webseite, PDF oder YouTube-Link analysieren und daraus posten.</p>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
<a href="/create/file-wizard"
|
||||||
|
class="card-bg border rounded-xl p-6 hover:bg-brand-bg-light transition-colors group {% if limit_reached %}opacity-50 pointer-events-none{% endif %}">
|
||||||
|
<div class="w-12 h-12 bg-brand-highlight/20 rounded-lg flex items-center justify-center mb-4">
|
||||||
|
<svg class="w-6 h-6 text-brand-highlight" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||||
|
d="M7 2h7l5 5v13a2 2 0 01-2 2H7a2 2 0 01-2-2V4a2 2 0 012-2z"/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
<h2 class="text-lg font-semibold text-white mb-2">Aus Datei erstellen</h2>
|
||||||
|
<p class="text-gray-400 text-sm">Datei hochladen und daraus einen Post generieren.</p>
|
||||||
|
</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|||||||
@@ -41,6 +41,9 @@ from src.services.background_jobs import (
|
|||||||
)
|
)
|
||||||
from src.services.db_job_manager import job_manager
|
from src.services.db_job_manager import job_manager
|
||||||
from src.services.storage_service import storage
|
from src.services.storage_service import storage
|
||||||
|
from src.services.link_extractor import LinkExtractor, LinkExtractionError
|
||||||
|
from src.services.file_extractor import FileExtractor, FileExtractionError
|
||||||
|
from src.agents.link_topic_builder import LinkTopicBuilderAgent
|
||||||
|
|
||||||
# Router for user frontend
|
# Router for user frontend
|
||||||
user_router = APIRouter(tags=["user"])
|
user_router = APIRouter(tags=["user"])
|
||||||
@@ -1813,6 +1816,64 @@ async def create_post_page(request: Request):
|
|||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@user_router.get("/create/link-wizard", response_class=HTMLResponse)
|
||||||
|
async def create_post_link_page(request: Request):
|
||||||
|
"""Create post from link wizard page."""
|
||||||
|
session = require_user_session(request)
|
||||||
|
if not session:
|
||||||
|
return RedirectResponse(url="/login", status_code=302)
|
||||||
|
|
||||||
|
# Check token limit for companies/employees
|
||||||
|
limit_reached = False
|
||||||
|
limit_message = ""
|
||||||
|
if session.account_type in ("company", "employee") and session.company_id:
|
||||||
|
can_create, error_msg, _, _ = await db.check_company_token_limit(UUID(session.company_id))
|
||||||
|
limit_reached = not can_create
|
||||||
|
limit_message = error_msg
|
||||||
|
|
||||||
|
user_id = UUID(session.user_id)
|
||||||
|
profile_picture = await get_user_avatar(session, user_id)
|
||||||
|
|
||||||
|
return templates.TemplateResponse("create_post_link.html", {
|
||||||
|
"request": request,
|
||||||
|
"page": "create",
|
||||||
|
"session": session,
|
||||||
|
"user_id": session.user_id,
|
||||||
|
"limit_reached": limit_reached,
|
||||||
|
"limit_message": limit_message,
|
||||||
|
"profile_picture": profile_picture
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@user_router.get("/create/file-wizard", response_class=HTMLResponse)
|
||||||
|
async def create_post_file_page(request: Request):
|
||||||
|
"""Create post from file wizard page."""
|
||||||
|
session = require_user_session(request)
|
||||||
|
if not session:
|
||||||
|
return RedirectResponse(url="/login", status_code=302)
|
||||||
|
|
||||||
|
# Check token limit for companies/employees
|
||||||
|
limit_reached = False
|
||||||
|
limit_message = ""
|
||||||
|
if session.account_type in ("company", "employee") and session.company_id:
|
||||||
|
can_create, error_msg, _, _ = await db.check_company_token_limit(UUID(session.company_id))
|
||||||
|
limit_reached = not can_create
|
||||||
|
limit_message = error_msg
|
||||||
|
|
||||||
|
user_id = UUID(session.user_id)
|
||||||
|
profile_picture = await get_user_avatar(session, user_id)
|
||||||
|
|
||||||
|
return templates.TemplateResponse("create_post_file.html", {
|
||||||
|
"request": request,
|
||||||
|
"page": "create",
|
||||||
|
"session": session,
|
||||||
|
"user_id": session.user_id,
|
||||||
|
"limit_reached": limit_reached,
|
||||||
|
"limit_message": limit_message,
|
||||||
|
"profile_picture": profile_picture
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
@user_router.get("/chat-create", response_class=HTMLResponse)
|
@user_router.get("/chat-create", response_class=HTMLResponse)
|
||||||
async def chat_create_page(request: Request):
|
async def chat_create_page(request: Request):
|
||||||
"""Chat-based post creation page."""
|
"""Chat-based post creation page."""
|
||||||
@@ -2063,6 +2124,117 @@ async def transcribe_audio(request: Request):
|
|||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@user_router.post("/api/link-extract")
|
||||||
|
async def extract_link(request: Request):
|
||||||
|
"""Extract context from a link and build a structured topic."""
|
||||||
|
session = require_user_session(request)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||||
|
|
||||||
|
# Check token limit for companies/employees
|
||||||
|
if session.account_type in ("company", "employee") and session.company_id:
|
||||||
|
can_create, error_msg, _, _ = await db.check_company_token_limit(UUID(session.company_id))
|
||||||
|
if not can_create:
|
||||||
|
raise HTTPException(status_code=429, detail=error_msg)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = await request.json()
|
||||||
|
url = (data.get("url") or "").strip()
|
||||||
|
transcript = (data.get("transcript") or "").strip()
|
||||||
|
manual_title = (data.get("title") or "").strip()
|
||||||
|
source_type = (data.get("source_type") or "").strip()
|
||||||
|
source_url = (data.get("source_url") or "").strip()
|
||||||
|
|
||||||
|
if transcript:
|
||||||
|
source = {
|
||||||
|
"source_url": source_url or url,
|
||||||
|
"source_type": source_type or "manual",
|
||||||
|
"title": manual_title or "Manuelles Transkript",
|
||||||
|
"text": transcript
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
extractor = LinkExtractor()
|
||||||
|
try:
|
||||||
|
source = await extractor.extract(url)
|
||||||
|
except LinkExtractionError as exc:
|
||||||
|
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||||
|
|
||||||
|
builder = LinkTopicBuilderAgent()
|
||||||
|
builder.set_tracking_context(
|
||||||
|
operation="link_extract",
|
||||||
|
user_id=session.user_id,
|
||||||
|
company_id=session.company_id
|
||||||
|
)
|
||||||
|
topic = await builder.process(source)
|
||||||
|
|
||||||
|
return {"topic": topic, "source": source}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Link extraction failed: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
|
@user_router.post("/api/file-extract")
|
||||||
|
async def extract_file(request: Request):
|
||||||
|
"""Extract context from an uploaded file and build a structured topic."""
|
||||||
|
session = require_user_session(request)
|
||||||
|
if not session:
|
||||||
|
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||||
|
|
||||||
|
# Check token limit for companies/employees
|
||||||
|
if session.account_type in ("company", "employee") and session.company_id:
|
||||||
|
can_create, error_msg, _, _ = await db.check_company_token_limit(UUID(session.company_id))
|
||||||
|
if not can_create:
|
||||||
|
raise HTTPException(status_code=429, detail=error_msg)
|
||||||
|
|
||||||
|
try:
|
||||||
|
form = await request.form()
|
||||||
|
upload: UploadFile = form.get("file") # type: ignore[assignment]
|
||||||
|
if not upload:
|
||||||
|
raise HTTPException(status_code=400, detail="Keine Datei hochgeladen.")
|
||||||
|
|
||||||
|
# Basic validation
|
||||||
|
allowed_ext = {".pdf", ".docx", ".pptx", ".xlsx", ".txt", ".md", ".rtf"}
|
||||||
|
filename = upload.filename or ""
|
||||||
|
ext = Path(filename).suffix.lower()
|
||||||
|
if not ext or ext not in allowed_ext:
|
||||||
|
raise HTTPException(status_code=400, detail="Dateityp nicht unterstützt.")
|
||||||
|
|
||||||
|
file_bytes = await upload.read()
|
||||||
|
max_bytes = 10 * 1024 * 1024 # 10 MB
|
||||||
|
if len(file_bytes) > max_bytes:
|
||||||
|
raise HTTPException(status_code=400, detail="Datei ist zu groß (max 10 MB).")
|
||||||
|
|
||||||
|
extractor = FileExtractor()
|
||||||
|
try:
|
||||||
|
text = extractor.extract_text(file_bytes, filename)
|
||||||
|
except FileExtractionError as exc:
|
||||||
|
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||||
|
|
||||||
|
source = {
|
||||||
|
"source_url": "",
|
||||||
|
"source_type": "file",
|
||||||
|
"title": filename or "Datei",
|
||||||
|
"text": text
|
||||||
|
}
|
||||||
|
|
||||||
|
builder = LinkTopicBuilderAgent()
|
||||||
|
builder.set_tracking_context(
|
||||||
|
operation="file_extract",
|
||||||
|
user_id=session.user_id,
|
||||||
|
company_id=session.company_id
|
||||||
|
)
|
||||||
|
topic = await builder.process(source)
|
||||||
|
|
||||||
|
return {"topic": topic, "source": source}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"File extraction failed: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
@user_router.post("/api/hooks")
|
@user_router.post("/api/hooks")
|
||||||
async def generate_hooks(
|
async def generate_hooks(
|
||||||
request: Request,
|
request: Request,
|
||||||
|
|||||||
Reference in New Issue
Block a user