added file and link create

This commit is contained in:
2026-02-24 13:41:39 +01:00
parent 005059be84
commit ebafaef335
10 changed files with 3208 additions and 6 deletions

View File

@@ -1,7 +1,7 @@
# Core Dependencies # Core Dependencies
python-dotenv==1.0.0 python-dotenv==1.0.0
pydantic==2.5.0 pydantic==2.7.4
pydantic-settings==2.1.0 pydantic-settings==2.3.0
# AI & APIs # AI & APIs
openai==1.54.0 openai==1.54.0
@@ -41,5 +41,11 @@ uvicorn==0.32.0
jinja2==3.1.4 jinja2==3.1.4
python-multipart==0.0.9 python-multipart==0.0.9
# Link Extraction
trafilatura==1.7.0
youtube-transcript-api==0.6.2
pypdf==4.2.0
docling==2.74.0
# Teams Bot JWT validation # Teams Bot JWT validation
PyJWT>=2.8.0 PyJWT>=2.8.0

View File

@@ -0,0 +1,95 @@
"""Agent to build a structured topic from extracted link content."""
import json
from typing import Any, Dict
from loguru import logger
from src.agents.base import BaseAgent
class LinkTopicBuilderAgent(BaseAgent):
"""Build a structured topic dictionary from link content."""
def __init__(self) -> None:
super().__init__("link_topic_builder")
async def process(self, source: Dict[str, Any]) -> Dict[str, Any]:
content = source.get("text", "")
source_url = source.get("source_url", "")
source_type = source.get("source_type", "web")
source_title = source.get("title", "")
if not content or len(content.strip()) < 200:
raise ValueError("Zu wenig Inhalt, um ein sinnvolles Topic zu erstellen.")
# Keep content length reasonable for the model
max_chars = 12000
if len(content) > max_chars:
content = content[:max_chars]
system_prompt = """Du bist ein Assistent, der aus einem Link-Inhalt ein strukturiertes Topic für einen LinkedIn-Post erstellt.
Gib NUR valides JSON zurück (keine Erklärungen)."""
user_prompt = f"""Quelle:
- Typ: {source_type}
- Titel: {source_title}
- URL: {source_url}
Inhalt:
\"\"\"{content}\"\"\"
Erstelle ein Topic im folgenden JSON-Format:
{{
"title": "Kurzer, präziser Titel (max 80 Zeichen)",
"summary": "Detaillierte, vollumfängliche Zusammenfassung (8-12 Sätze, alle Hauptpunkte abdecken)",
"extended_summary": "Längere Zusammenfassung (150-250 Wörter) mit Kontext, Kernaussagen, Belegen und Implikationen",
"outline": ["Abschnitt 1: ...", "Abschnitt 2: ...", "..."],
"fact": "1-2 Sätze als Kernaussage/Fakt",
"key_points": ["Punkt 1", "Punkt 2", "..."],
"key_facts": ["Konkreter Fakt 1", "Konkreter Fakt 2", "..."],
"quotes": ["Kurzes Zitat falls sinnvoll", "..."],
"relevance": "Warum ist das für die Zielgruppe relevant? (1 Satz)",
"category": "Link",
"source": "{source_url}",
"source_title": "{source_title}",
"source_type": "{source_type}"
}}
Regeln:
- Halte dich an das JSON-Format.
- Keine erfundenen Fakten: nutze NUR den gegebenen Inhalt.
- Wenn keine Zitate vorhanden sind, gib ein leeres Array zurück.
- Summary und extended_summary müssen unterschiedliche Detailtiefe haben.
"""
result = await self.call_openai(
system_prompt=system_prompt,
user_prompt=user_prompt,
model="gpt-4o",
temperature=0.2,
response_format={"type": "json_object"}
)
try:
topic = json.loads(result)
except Exception as exc:
logger.warning(f"Failed to parse topic JSON: {exc}")
raise ValueError("Antwort konnte nicht verarbeitet werden.")
# Ensure required fields exist
topic.setdefault("category", "Link")
topic.setdefault("source", source_url)
topic.setdefault("source_title", source_title)
topic.setdefault("source_type", source_type)
topic.setdefault("title", source_title or "Link-Thema")
topic.setdefault("summary", "")
topic.setdefault("extended_summary", "")
topic.setdefault("outline", [])
topic.setdefault("fact", topic.get("summary", "")[:300])
topic.setdefault("key_points", [])
topic.setdefault("key_facts", [])
topic.setdefault("quotes", [])
topic.setdefault("relevance", "Relevantes Thema für die Zielgruppe")
return topic

View File

@@ -351,6 +351,35 @@ class WriterAgent(BaseAgent):
if key_facts and isinstance(key_facts, list) and len(key_facts) > 0: if key_facts and isinstance(key_facts, list) and len(key_facts) > 0:
facts_section = "\n**KEY FACTS (nutze diese!):**\n" + "\n".join([f"- {f}" for f in key_facts]) + "\n" facts_section = "\n**KEY FACTS (nutze diese!):**\n" + "\n".join([f"- {f}" for f in key_facts]) + "\n"
summary_section = ""
if topic.get('summary'):
summary_section = f"\n**ZUSAMMENFASSUNG (vom Link):**\n{topic.get('summary')}\n"
extended_summary_section = ""
if topic.get('extended_summary'):
extended_summary_section = f"\n**DETAILLIERTE ZUSAMMENFASSUNG:**\n{topic.get('extended_summary')}\n"
outline_section = ""
outline = topic.get('outline', [])
if outline and isinstance(outline, list) and len(outline) > 0:
outline_section = "\n**INHALTS-GLIEDERUNG:**\n" + "\n".join([f"- {o}" for o in outline]) + "\n"
key_points_section = ""
key_points = topic.get('key_points', [])
if key_points and isinstance(key_points, list) and len(key_points) > 0:
key_points_section = "\n**KERNPUNKTE (vom Link):**\n" + "\n".join([f"- {p}" for p in key_points]) + "\n"
quotes_section = ""
quotes = topic.get('quotes', [])
if quotes and isinstance(quotes, list) and len(quotes) > 0:
quotes_section = "\n**ZITATE (optional verwenden):**\n" + "\n".join([f"- \"{q}\"" for q in quotes]) + "\n"
source_section = ""
if topic.get('source_title') or topic.get('source'):
source_title = topic.get('source_title') or ""
source_url = topic.get('source') or ""
source_section = f"\n**QUELLE:** {source_title} {source_url}\n"
why_section = "" why_section = ""
if topic.get('why_this_person'): if topic.get('why_this_person'):
why_section = f"\n**WARUM DU DARÜBER SCHREIBEN SOLLTEST:**\n{topic.get('why_this_person')}\n" why_section = f"\n**WARUM DU DARÜBER SCHREIBEN SOLLTEST:**\n{topic.get('why_this_person')}\n"
@@ -391,7 +420,7 @@ Schreibe einen authentischen LinkedIn-Post, der:
{angle_section}{hook_section} {angle_section}{hook_section}
**KERN-FAKT / INHALT:** **KERN-FAKT / INHALT:**
{topic.get('fact', topic.get('description', ''))} {topic.get('fact', topic.get('description', ''))}
{facts_section}{thoughts_section} {summary_section}{extended_summary_section}{outline_section}{key_points_section}{facts_section}{quotes_section}{source_section}{thoughts_section}
**WARUM RELEVANT:** **WARUM RELEVANT:**
{topic.get('relevance', 'Aktuelles Thema für die Zielgruppe')} {topic.get('relevance', 'Aktuelles Thema für die Zielgruppe')}
{why_section} {why_section}
@@ -941,6 +970,35 @@ Gib NUR den überarbeiteten Post zurück - keine Kommentare."""
if key_facts and isinstance(key_facts, list) and len(key_facts) > 0: if key_facts and isinstance(key_facts, list) and len(key_facts) > 0:
facts_section = "\n**KEY FACTS (nutze diese!):**\n" + "\n".join([f"- {f}" for f in key_facts]) + "\n" facts_section = "\n**KEY FACTS (nutze diese!):**\n" + "\n".join([f"- {f}" for f in key_facts]) + "\n"
summary_section = ""
if topic.get('summary'):
summary_section = f"\n**ZUSAMMENFASSUNG (vom Link):**\n{topic.get('summary')}\n"
extended_summary_section = ""
if topic.get('extended_summary'):
extended_summary_section = f"\n**DETAILLIERTE ZUSAMMENFASSUNG:**\n{topic.get('extended_summary')}\n"
outline_section = ""
outline = topic.get('outline', [])
if outline and isinstance(outline, list) and len(outline) > 0:
outline_section = "\n**INHALTS-GLIEDERUNG:**\n" + "\n".join([f"- {o}" for o in outline]) + "\n"
key_points_section = ""
key_points = topic.get('key_points', [])
if key_points and isinstance(key_points, list) and len(key_points) > 0:
key_points_section = "\n**KERNPUNKTE (vom Link):**\n" + "\n".join([f"- {p}" for p in key_points]) + "\n"
quotes_section = ""
quotes = topic.get('quotes', [])
if quotes and isinstance(quotes, list) and len(quotes) > 0:
quotes_section = "\n**ZITATE (optional verwenden):**\n" + "\n".join([f"- \"{q}\"" for q in quotes]) + "\n"
source_section = ""
if topic.get('source_title') or topic.get('source'):
source_title = topic.get('source_title') or ""
source_url = topic.get('source') or ""
source_section = f"\n**QUELLE:** {source_title} {source_url}\n"
# User thoughts section # User thoughts section
thoughts_section = "" thoughts_section = ""
if user_thoughts: if user_thoughts:
@@ -977,7 +1035,7 @@ Schreibe einen authentischen LinkedIn-Post, der:
{angle_section}{hook_section} {angle_section}{hook_section}
**KERN-FAKT / INHALT:** **KERN-FAKT / INHALT:**
{topic.get('fact', topic.get('description', ''))} {topic.get('fact', topic.get('description', ''))}
{facts_section}{thoughts_section} {summary_section}{extended_summary_section}{outline_section}{key_points_section}{facts_section}{quotes_section}{source_section}{thoughts_section}
**WARUM RELEVANT:** **WARUM RELEVANT:**
{topic.get('relevance', 'Aktuelles Thema für die Zielgruppe')} {topic.get('relevance', 'Aktuelles Thema für die Zielgruppe')}
@@ -1063,6 +1121,16 @@ Antworte im JSON-Format:
if post_type.description: if post_type.description:
post_type_section += f"\n{post_type.description}" post_type_section += f"\n{post_type.description}"
content_block = topic.get('fact', topic.get('description', 'Keine Details verfügbar'))
if topic.get('summary'):
content_block += f"\n\nZUSAMMENFASSUNG:\n{topic.get('summary')}"
if topic.get('extended_summary'):
content_block += f"\n\nDETAILLIERTE ZUSAMMENFASSUNG:\n{topic.get('extended_summary')}"
if topic.get('outline') and isinstance(topic.get('outline'), list):
content_block += "\n\nGLIEDERUNG:\n" + "\n".join([f"- {o}" for o in topic.get('outline', [])])
if topic.get('key_points') and isinstance(topic.get('key_points'), list):
content_block += "\n\nKERNPUNKTE:\n" + "\n".join([f"- {p}" for p in topic.get('key_points', [])])
user_prompt = f"""Generiere 4 Hooks für dieses Thema: user_prompt = f"""Generiere 4 Hooks für dieses Thema:
THEMA: {topic.get('title', 'Unbekanntes Thema')} THEMA: {topic.get('title', 'Unbekanntes Thema')}
@@ -1070,7 +1138,7 @@ THEMA: {topic.get('title', 'Unbekanntes Thema')}
KATEGORIE: {topic.get('category', 'Allgemein')} KATEGORIE: {topic.get('category', 'Allgemein')}
KERN-FAKT/INHALT: KERN-FAKT/INHALT:
{topic.get('fact', topic.get('description', 'Keine Details verfügbar'))} {content_block}
{thoughts_section}{post_type_section} {thoughts_section}{post_type_section}
Generiere jetzt die 4 verschiedenen Hooks im JSON-Format.""" Generiere jetzt die 4 verschiedenen Hooks im JSON-Format."""

View File

@@ -69,6 +69,10 @@ class Settings(BaseSettings):
redis_url: str = "redis://redis:6379/0" redis_url: str = "redis://redis:6379/0"
scheduler_enabled: bool = False # True only on dedicated scheduler container scheduler_enabled: bool = False # True only on dedicated scheduler container
# YouTube (optional)
youtube_cookies: str = "" # Raw Cookie header value for transcript fetching
transcriptapi_key: str = "" # TranscriptAPI.com key
# Telegram Bot (experimental) # Telegram Bot (experimental)
telegram_enabled: bool = False telegram_enabled: bool = False
telegram_bot_token: str = "" telegram_bot_token: str = ""

View File

@@ -0,0 +1,54 @@
"""Extract text content from uploaded files using Docling."""
import os
import tempfile
from typing import Optional
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions
class FileExtractionError(RuntimeError):
"""Raised when file extraction fails."""
class FileExtractor:
"""Extract text from files via Docling."""
def __init__(self) -> None:
pdf_options = PdfPipelineOptions(do_ocr=False)
self._converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption(pipeline_options=pdf_options)
}
)
def extract_text(self, file_bytes: bytes, filename: str) -> str:
if not file_bytes:
raise FileExtractionError("Leere Datei.")
suffix = ""
if filename and "." in filename:
suffix = os.path.splitext(filename)[1]
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
tmp.write(file_bytes)
tmp_path = tmp.name
result = self._converter.convert(tmp_path)
document = result.document
text = document.export_to_markdown().strip()
if not text:
raise FileExtractionError("Keine Inhalte im Dokument gefunden.")
return text
except FileExtractionError:
raise
except Exception as exc:
raise FileExtractionError(f"Datei konnte nicht verarbeitet werden: {exc}") from exc
finally:
try:
if "tmp_path" in locals():
os.unlink(tmp_path)
except Exception:
pass

View File

@@ -0,0 +1,324 @@
"""Extract content from links (web pages, PDFs, YouTube transcripts)."""
import asyncio
import ipaddress
import re
from io import BytesIO
from typing import Dict, Optional
from urllib.parse import urlparse, parse_qs
import httpx
from loguru import logger
from src.config import settings
import trafilatura
try:
from trafilatura.metadata import extract_metadata as trafilatura_extract_metadata
except Exception: # pragma: no cover - optional path
trafilatura_extract_metadata = None
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from pypdf import PdfReader
class LinkExtractionError(RuntimeError):
"""Raised when link extraction fails."""
class LinkExtractor:
"""Extract text content from supported link types."""
MAX_BYTES = 2_000_000 # ~2MB
TIMEOUT = 10.0
MAX_REDIRECTS = 3
async def extract(self, url: str) -> Dict[str, Optional[str]]:
"""Extract content and metadata from a link."""
normalized = self._normalize_url(url)
self._validate_url(normalized)
if self._is_youtube_url(normalized):
video_id = self._extract_youtube_id(normalized)
if not video_id:
raise LinkExtractionError("YouTube-Link konnte nicht erkannt werden.")
transcript = ""
title = ""
if (settings.transcriptapi_key or "").strip():
try:
transcript, title = await self._fetch_transcriptapi(normalized)
except Exception as exc:
logger.warning(f"TranscriptAPI failed, falling back to direct fetch: {exc}")
if not transcript:
transcript = await self._fetch_youtube_transcript(video_id)
if not title:
title = await self._fetch_youtube_title(normalized)
return {
"source_url": normalized,
"source_type": "youtube",
"title": title or "YouTube Video",
"text": transcript,
}
content, content_type = await self._fetch_url(normalized)
if self._is_pdf(normalized, content_type):
text, title = self._extract_pdf(content)
return {
"source_url": normalized,
"source_type": "pdf",
"title": title or "PDF Dokument",
"text": text,
}
text, title = self._extract_html(content)
if not text:
raise LinkExtractionError("Konnte keinen lesbaren Text aus der Seite extrahieren.")
return {
"source_url": normalized,
"source_type": "web",
"title": title or "Webseite",
"text": text,
}
def _normalize_url(self, url: str) -> str:
url = url.strip()
if not url:
raise LinkExtractionError("Bitte einen Link eingeben.")
if not re.match(r"^https?://", url, re.IGNORECASE):
url = f"https://{url}"
return url
def _validate_url(self, url: str) -> None:
parsed = urlparse(url)
if parsed.scheme not in ("http", "https"):
raise LinkExtractionError("Nur http/https Links sind erlaubt.")
if not parsed.hostname:
raise LinkExtractionError("Ungültiger Link.")
hostname = parsed.hostname.lower()
if hostname in {"localhost", "127.0.0.1", "0.0.0.0", "::1"}:
raise LinkExtractionError("Lokale Links sind nicht erlaubt.")
if hostname.endswith(".local"):
raise LinkExtractionError("Lokale Links sind nicht erlaubt.")
# Block private IPs if hostname is an IP literal
try:
ip = ipaddress.ip_address(hostname)
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
raise LinkExtractionError("Lokale Links sind nicht erlaubt.")
except ValueError:
pass
async def _fetch_url(self, url: str) -> tuple[bytes, str]:
headers = {
"User-Agent": "Mozilla/5.0 (compatible; LinkedInWorkflowBot/1.0)"
}
async with httpx.AsyncClient(
follow_redirects=True,
max_redirects=self.MAX_REDIRECTS,
timeout=self.TIMEOUT,
headers=headers,
limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
) as client:
response = await client.get(url)
response.raise_for_status()
content = response.content
if len(content) > self.MAX_BYTES:
raise LinkExtractionError("Die Seite ist zu groß, um verarbeitet zu werden.")
content_type = response.headers.get("content-type", "").lower()
return content, content_type
def _is_pdf(self, url: str, content_type: str) -> bool:
if "application/pdf" in content_type:
return True
return url.lower().endswith(".pdf")
def _extract_pdf(self, content: bytes) -> tuple[str, Optional[str]]:
try:
reader = PdfReader(BytesIO(content))
text_parts = []
for page in reader.pages:
page_text = page.extract_text() or ""
if page_text:
text_parts.append(page_text)
text = "\n".join(text_parts).strip()
title = None
try:
title = reader.metadata.title if reader.metadata else None
except Exception:
title = None
if not text:
raise LinkExtractionError("Konnte keinen Text aus dem PDF extrahieren.")
return text, title
except LinkExtractionError:
raise
except Exception as exc:
raise LinkExtractionError(f"PDF-Extraktion fehlgeschlagen: {exc}") from exc
def _extract_html(self, content: bytes) -> tuple[str, Optional[str]]:
html = content.decode("utf-8", errors="ignore")
text = trafilatura.extract(
html,
include_comments=False,
include_tables=False,
include_formatting=False,
output_format="txt"
)
title = None
try:
if trafilatura_extract_metadata:
metadata = trafilatura_extract_metadata(html)
if metadata and metadata.title:
title = metadata.title
except Exception:
title = None
return (text or "").strip(), title
def _is_youtube_url(self, url: str) -> bool:
host = urlparse(url).hostname or ""
host = host.lower()
return "youtube.com" in host or "youtu.be" in host
def _extract_youtube_id(self, url: str) -> Optional[str]:
parsed = urlparse(url)
host = (parsed.hostname or "").lower()
if "youtu.be" in host:
return parsed.path.strip("/").split("/")[0] or None
if "youtube.com" in host:
qs = parse_qs(parsed.query)
if "v" in qs and qs["v"]:
return qs["v"][0]
# /shorts/{id}
if parsed.path.startswith("/shorts/"):
return parsed.path.split("/")[2] if len(parsed.path.split("/")) > 2 else None
return None
async def _fetch_youtube_transcript(self, video_id: str) -> str:
try:
cookies = (settings.youtube_cookies or "").strip()
languages = ["de", "de-DE", "de-AT", "de-CH", "en", "en-US", "en-GB"]
# Prefer full transcript list to handle generated vs. manually created captions
transcript_list = await asyncio.to_thread(
YouTubeTranscriptApi.list_transcripts,
video_id,
cookies=cookies or None
)
transcript = None
try:
transcript = transcript_list.find_manually_created_transcript(languages)
except Exception:
pass
if transcript is None:
try:
transcript = transcript_list.find_generated_transcript(languages)
except Exception:
pass
if transcript is None:
try:
transcript = transcript_list.find_transcript(languages)
except Exception:
transcript = None
if transcript is None:
raise LinkExtractionError("Kein passendes Transkript gefunden.")
data = await asyncio.to_thread(transcript.fetch)
text = " ".join([item.get("text", "") for item in data]).strip()
if not text:
raise LinkExtractionError("Kein Transkript verfügbar.")
return text
except (TranscriptsDisabled, NoTranscriptFound):
raise LinkExtractionError("Für dieses Video ist kein Transkript verfügbar.")
except Exception as exc:
logger.exception(f"YouTube transcript fetch failed: {exc}")
debug_dump = await self._debug_youtube_dump(video_id, cookies or None)
logger.warning(f"YouTube debug dump for {video_id}:\n{debug_dump}")
raise LinkExtractionError(
"YouTube-Transkript konnte nicht geladen werden. "
"YouTube blockiert den Abruf manchmal. "
"Wenn du Zugriff hast, setze die Umgebungsvariable "
"`YOUTUBE_COOKIES` mit gültigen Cookies."
) from exc
async def _debug_youtube_dump(self, video_id: str, cookies: Optional[str]) -> str:
urls = [
f"https://www.youtube.com/api/timedtext?type=list&v={video_id}",
f"https://www.youtube.com/api/timedtext?lang=de&v={video_id}",
f"https://www.youtube.com/api/timedtext?lang=de&v={video_id}&fmt=json3",
f"https://www.youtube.com/api/timedtext?lang=en&v={video_id}",
f"https://www.youtube.com/watch?v={video_id}",
]
headers = {
"User-Agent": "Mozilla/5.0 (compatible; LinkedInWorkflowBot/1.0)"
}
if cookies:
headers["Cookie"] = cookies
lines = [f"cookies_used={bool(cookies)}"]
async with httpx.AsyncClient(timeout=self.TIMEOUT, headers=headers) as client:
for url in urls:
try:
resp = await client.get(url)
body = resp.text
snippet = body[:2000].replace("\n", "\\n").replace("\r", "")
lines.append(f"URL: {url}")
lines.append(f"STATUS: {resp.status_code}")
lines.append(f"BODY_SNIPPET: {snippet}")
except Exception as exc:
lines.append(f"URL: {url}")
lines.append(f"ERROR: {repr(exc)}")
return "\n".join(lines)
async def _fetch_transcriptapi(self, video_url: str) -> tuple[str, str]:
api_key = (settings.transcriptapi_key or "").strip()
if not api_key:
raise LinkExtractionError("TranscriptAPI Key fehlt.")
endpoint = "https://transcriptapi.com/api/v2/youtube/transcript"
params = {
"video_url": video_url,
"format": "json",
"include_timestamp": "false",
"send_metadata": "true",
}
headers = {"Authorization": f"Bearer {api_key}"}
async with httpx.AsyncClient(timeout=self.TIMEOUT) as client:
resp = await client.get(endpoint, params=params, headers=headers)
if resp.status_code != 200:
raise LinkExtractionError(f"TranscriptAPI Fehler: {resp.status_code}")
data = resp.json()
transcript = data.get("transcript")
text = ""
if isinstance(transcript, list):
text = " ".join([item.get("text", "") for item in transcript]).strip()
elif isinstance(transcript, str):
text = transcript.strip()
title = ""
meta = data.get("metadata") or {}
if isinstance(meta, dict):
title = meta.get("title", "") or ""
if not text:
raise LinkExtractionError("TranscriptAPI lieferte kein Transkript.")
return text, title
async def _fetch_youtube_title(self, url: str) -> Optional[str]:
oembed = f"https://www.youtube.com/oembed?format=json&url={url}"
try:
async with httpx.AsyncClient(timeout=self.TIMEOUT) as client:
response = await client.get(oembed)
if response.status_code != 200:
return None
data = response.json()
return data.get("title")
except Exception:
return None

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -14,7 +14,7 @@
</div> </div>
{% endif %} {% endif %}
<div class="grid md:grid-cols-2 gap-6"> <div class="grid grid-cols-1 md:grid-cols-2 gap-6">
<a href="/create/wizard" <a href="/create/wizard"
class="card-bg border rounded-xl p-6 hover:bg-brand-bg-light transition-colors group {% if limit_reached %}opacity-50 pointer-events-none{% endif %}"> class="card-bg border rounded-xl p-6 hover:bg-brand-bg-light transition-colors group {% if limit_reached %}opacity-50 pointer-events-none{% endif %}">
<div class="w-12 h-12 bg-brand-highlight/20 rounded-lg flex items-center justify-center mb-4"> <div class="w-12 h-12 bg-brand-highlight/20 rounded-lg flex items-center justify-center mb-4">
@@ -38,6 +38,30 @@
<h2 class="text-lg font-semibold text-white mb-2">Per Chat erstellen</h2> <h2 class="text-lg font-semibold text-white mb-2">Per Chat erstellen</h2>
<p class="text-gray-400 text-sm">Schnell und flexibel mit dem Chat-Assistenten.</p> <p class="text-gray-400 text-sm">Schnell und flexibel mit dem Chat-Assistenten.</p>
</a> </a>
<a href="/create/link-wizard"
class="card-bg border rounded-xl p-6 hover:bg-brand-bg-light transition-colors group {% if limit_reached %}opacity-50 pointer-events-none{% endif %}">
<div class="w-12 h-12 bg-brand-highlight/20 rounded-lg flex items-center justify-center mb-4">
<svg class="w-6 h-6 text-brand-highlight" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
d="M13.828 10.172a4 4 0 010 5.656l-3 3a4 4 0 11-5.656-5.656l1.5-1.5m4.328-4.328a4 4 0 015.656 0l3 3a4 4 0 11-5.656 5.656l-1.5-1.5"/>
</svg>
</div>
<h2 class="text-lg font-semibold text-white mb-2">Aus Link erstellen</h2>
<p class="text-gray-400 text-sm">Webseite, PDF oder YouTube-Link analysieren und daraus posten.</p>
</a>
<a href="/create/file-wizard"
class="card-bg border rounded-xl p-6 hover:bg-brand-bg-light transition-colors group {% if limit_reached %}opacity-50 pointer-events-none{% endif %}">
<div class="w-12 h-12 bg-brand-highlight/20 rounded-lg flex items-center justify-center mb-4">
<svg class="w-6 h-6 text-brand-highlight" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
d="M7 2h7l5 5v13a2 2 0 01-2 2H7a2 2 0 01-2-2V4a2 2 0 012-2z"/>
</svg>
</div>
<h2 class="text-lg font-semibold text-white mb-2">Aus Datei erstellen</h2>
<p class="text-gray-400 text-sm">Datei hochladen und daraus einen Post generieren.</p>
</a>
</div> </div>
</div> </div>
{% endblock %} {% endblock %}

View File

@@ -41,6 +41,9 @@ from src.services.background_jobs import (
) )
from src.services.db_job_manager import job_manager from src.services.db_job_manager import job_manager
from src.services.storage_service import storage from src.services.storage_service import storage
from src.services.link_extractor import LinkExtractor, LinkExtractionError
from src.services.file_extractor import FileExtractor, FileExtractionError
from src.agents.link_topic_builder import LinkTopicBuilderAgent
# Router for user frontend # Router for user frontend
user_router = APIRouter(tags=["user"]) user_router = APIRouter(tags=["user"])
@@ -1813,6 +1816,64 @@ async def create_post_page(request: Request):
}) })
@user_router.get("/create/link-wizard", response_class=HTMLResponse)
async def create_post_link_page(request: Request):
"""Create post from link wizard page."""
session = require_user_session(request)
if not session:
return RedirectResponse(url="/login", status_code=302)
# Check token limit for companies/employees
limit_reached = False
limit_message = ""
if session.account_type in ("company", "employee") and session.company_id:
can_create, error_msg, _, _ = await db.check_company_token_limit(UUID(session.company_id))
limit_reached = not can_create
limit_message = error_msg
user_id = UUID(session.user_id)
profile_picture = await get_user_avatar(session, user_id)
return templates.TemplateResponse("create_post_link.html", {
"request": request,
"page": "create",
"session": session,
"user_id": session.user_id,
"limit_reached": limit_reached,
"limit_message": limit_message,
"profile_picture": profile_picture
})
@user_router.get("/create/file-wizard", response_class=HTMLResponse)
async def create_post_file_page(request: Request):
"""Create post from file wizard page."""
session = require_user_session(request)
if not session:
return RedirectResponse(url="/login", status_code=302)
# Check token limit for companies/employees
limit_reached = False
limit_message = ""
if session.account_type in ("company", "employee") and session.company_id:
can_create, error_msg, _, _ = await db.check_company_token_limit(UUID(session.company_id))
limit_reached = not can_create
limit_message = error_msg
user_id = UUID(session.user_id)
profile_picture = await get_user_avatar(session, user_id)
return templates.TemplateResponse("create_post_file.html", {
"request": request,
"page": "create",
"session": session,
"user_id": session.user_id,
"limit_reached": limit_reached,
"limit_message": limit_message,
"profile_picture": profile_picture
})
@user_router.get("/chat-create", response_class=HTMLResponse) @user_router.get("/chat-create", response_class=HTMLResponse)
async def chat_create_page(request: Request): async def chat_create_page(request: Request):
"""Chat-based post creation page.""" """Chat-based post creation page."""
@@ -2063,6 +2124,117 @@ async def transcribe_audio(request: Request):
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
@user_router.post("/api/link-extract")
async def extract_link(request: Request):
"""Extract context from a link and build a structured topic."""
session = require_user_session(request)
if not session:
raise HTTPException(status_code=401, detail="Not authenticated")
# Check token limit for companies/employees
if session.account_type in ("company", "employee") and session.company_id:
can_create, error_msg, _, _ = await db.check_company_token_limit(UUID(session.company_id))
if not can_create:
raise HTTPException(status_code=429, detail=error_msg)
try:
data = await request.json()
url = (data.get("url") or "").strip()
transcript = (data.get("transcript") or "").strip()
manual_title = (data.get("title") or "").strip()
source_type = (data.get("source_type") or "").strip()
source_url = (data.get("source_url") or "").strip()
if transcript:
source = {
"source_url": source_url or url,
"source_type": source_type or "manual",
"title": manual_title or "Manuelles Transkript",
"text": transcript
}
else:
extractor = LinkExtractor()
try:
source = await extractor.extract(url)
except LinkExtractionError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
builder = LinkTopicBuilderAgent()
builder.set_tracking_context(
operation="link_extract",
user_id=session.user_id,
company_id=session.company_id
)
topic = await builder.process(source)
return {"topic": topic, "source": source}
except HTTPException:
raise
except Exception as e:
logger.exception(f"Link extraction failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@user_router.post("/api/file-extract")
async def extract_file(request: Request):
"""Extract context from an uploaded file and build a structured topic."""
session = require_user_session(request)
if not session:
raise HTTPException(status_code=401, detail="Not authenticated")
# Check token limit for companies/employees
if session.account_type in ("company", "employee") and session.company_id:
can_create, error_msg, _, _ = await db.check_company_token_limit(UUID(session.company_id))
if not can_create:
raise HTTPException(status_code=429, detail=error_msg)
try:
form = await request.form()
upload: UploadFile = form.get("file") # type: ignore[assignment]
if not upload:
raise HTTPException(status_code=400, detail="Keine Datei hochgeladen.")
# Basic validation
allowed_ext = {".pdf", ".docx", ".pptx", ".xlsx", ".txt", ".md", ".rtf"}
filename = upload.filename or ""
ext = Path(filename).suffix.lower()
if not ext or ext not in allowed_ext:
raise HTTPException(status_code=400, detail="Dateityp nicht unterstützt.")
file_bytes = await upload.read()
max_bytes = 10 * 1024 * 1024 # 10 MB
if len(file_bytes) > max_bytes:
raise HTTPException(status_code=400, detail="Datei ist zu groß (max 10 MB).")
extractor = FileExtractor()
try:
text = extractor.extract_text(file_bytes, filename)
except FileExtractionError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
source = {
"source_url": "",
"source_type": "file",
"title": filename or "Datei",
"text": text
}
builder = LinkTopicBuilderAgent()
builder.set_tracking_context(
operation="file_extract",
user_id=session.user_id,
company_id=session.company_id
)
topic = await builder.process(source)
return {"topic": topic, "source": source}
except HTTPException:
raise
except Exception as e:
logger.exception(f"File extraction failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@user_router.post("/api/hooks") @user_router.post("/api/hooks")
async def generate_hooks( async def generate_hooks(
request: Request, request: Request,