Files
vontor-cz/backend/thirdparty/downloader/views.py
2025-10-29 00:58:37 +01:00

554 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from django.shortcuts import render
from django.db.models import Count
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.permissions import AllowAny
from rest_framework import status
from django.conf import settings
from django.http import StreamingHttpResponse, JsonResponse
from django.utils.text import slugify
from django.views.decorators.csrf import csrf_exempt
from django.utils.decorators import method_decorator
from django.db.utils import OperationalError, ProgrammingError
# docs + schema helpers
from rest_framework import serializers
from drf_spectacular.utils import (
extend_schema,
OpenApiExample,
OpenApiParameter,
OpenApiTypes,
OpenApiResponse,
inline_serializer,
)
import os
import math
import json
import tempfile
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import quote as urlquote
from .models import DownloaderModel
from .serializers import DownloaderLogSerializer
# ---------------------- Inline serializers for documentation only ----------------------
# Using inline_serializer to avoid creating new files.
FormatOptionSchema = inline_serializer(
name="FormatOption",
fields={
"format_id": serializers.CharField(allow_null=True),
"ext": serializers.CharField(allow_null=True),
"vcodec": serializers.CharField(allow_null=True),
"acodec": serializers.CharField(allow_null=True),
"fps": serializers.FloatField(allow_null=True),
"tbr": serializers.FloatField(allow_null=True),
"abr": serializers.FloatField(allow_null=True),
"vbr": serializers.FloatField(allow_null=True),
"asr": serializers.IntegerField(allow_null=True),
"filesize": serializers.IntegerField(allow_null=True),
"filesize_approx": serializers.IntegerField(allow_null=True),
"estimated_size_bytes": serializers.IntegerField(allow_null=True),
"size_ok": serializers.BooleanField(),
"format_note": serializers.CharField(allow_null=True),
"resolution": serializers.CharField(allow_null=True),
"audio_only": serializers.BooleanField(),
},
)
FormatsRequestSchema = inline_serializer(
name="FormatsRequest",
fields={"url": serializers.URLField()},
)
FormatsResponseSchema = inline_serializer(
name="FormatsResponse",
fields={
"title": serializers.CharField(allow_null=True),
"duration": serializers.FloatField(allow_null=True),
"extractor": serializers.CharField(allow_null=True),
"video_id": serializers.CharField(allow_null=True),
"max_size_bytes": serializers.IntegerField(),
"options": serializers.ListField(child=FormatOptionSchema),
},
)
DownloadRequestSchema = inline_serializer(
name="DownloadRequest",
fields={
"url": serializers.URLField(),
"format_id": serializers.CharField(),
},
)
ErrorResponseSchema = inline_serializer(
name="ErrorResponse",
fields={
"detail": serializers.CharField(),
"error": serializers.CharField(required=False),
"estimated_size_bytes": serializers.IntegerField(required=False),
"max_bytes": serializers.IntegerField(required=False),
},
)
# ---------------------------------------------------------------------------------------
def _estimate_size_bytes(fmt: Dict[str, Any], duration: Optional[float]) -> Optional[int]:
"""Estimate (or return exact) size in bytes for a yt-dlp format."""
# Prefer exact sizes from yt-dlp
if fmt.get("filesize"):
return int(fmt["filesize"])
if fmt.get("filesize_approx"):
return int(fmt["filesize_approx"])
# Estimate via total bitrate (tbr is in Kbps)
if duration and fmt.get("tbr"):
try:
kbps = float(fmt["tbr"])
return int((kbps * 1000 / 8) * float(duration))
except Exception:
return None
return None
def _format_option(fmt: Dict[str, Any], duration: Optional[float], max_bytes: int) -> Dict[str, Any]:
"""Project yt-dlp format dict to a compact option object suitable for UI."""
est = _estimate_size_bytes(fmt, duration)
w = fmt.get("width")
h = fmt.get("height")
resolution = f"{w}x{h}" if w and h else None
return {
"format_id": fmt.get("format_id"),
"ext": fmt.get("ext"),
"vcodec": fmt.get("vcodec"),
"acodec": fmt.get("acodec"),
"fps": fmt.get("fps"),
"tbr": fmt.get("tbr"),
"abr": fmt.get("abr"),
"vbr": fmt.get("vbr"),
"asr": fmt.get("asr"),
"filesize": fmt.get("filesize"),
"filesize_approx": fmt.get("filesize_approx"),
"estimated_size_bytes": est,
"size_ok": (est is not None and est <= max_bytes),
"format_note": fmt.get("format_note"),
"resolution": resolution,
"audio_only": (fmt.get("vcodec") in (None, "none")),
}
def _client_meta(request) -> Tuple[Optional[Any], Optional[str], Optional[str]]:
"""Extract current user, client IP and User-Agent."""
xff = request.META.get("HTTP_X_FORWARDED_FOR")
ip = (xff.split(",")[0].strip() if xff else request.META.get("REMOTE_ADDR"))
ua = request.META.get("HTTP_USER_AGENT")
user = getattr(request, "user", None)
return user, ip, ua
# Safe logger: swallow DB errors if table is missing/not migrated yet
def _log_safely(*, info, requested_format, status: str, url: str, user, ip_address: str, user_agent: str, error_message: str | None = None):
try:
DownloaderModel.from_ydl_info(
info=info,
requested_format=requested_format,
status=status,
url=url,
user=user,
ip_address=ip_address,
user_agent=user_agent,
error_message=error_message,
)
except (OperationalError, ProgrammingError):
# migrations not applied or table missing ignore
pass
except Exception:
# never break the request on logging failures
pass
class DownloaderFormatsView(APIView):
"""Probe media URL and return available formats with estimated sizes and limit flags."""
permission_classes = [AllowAny]
authentication_classes = []
@extend_schema(
tags=["downloader"],
operation_id="downloader_formats",
summary="List available formats for a media URL",
description="Uses yt-dlp to extract formats and estimates size. Applies max size policy.",
request=FormatsRequestSchema,
responses={
200: FormatsResponseSchema,
400: OpenApiResponse(response=ErrorResponseSchema),
500: OpenApiResponse(response=ErrorResponseSchema),
},
examples=[
OpenApiExample(
"Formats request",
value={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"},
request_only=True,
),
OpenApiExample(
"Formats response (excerpt)",
value={
"title": "Example Title",
"duration": 213.0,
"extractor": "youtube",
"video_id": "dQw4w9WgXcQ",
"max_size_bytes": 209715200,
"options": [
{
"format_id": "140",
"ext": "m4a",
"vcodec": "none",
"acodec": "mp4a.40.2",
"fps": None,
"tbr": 128.0,
"abr": 128.0,
"vbr": None,
"asr": 44100,
"filesize": None,
"filesize_approx": 3342334,
"estimated_size_bytes": 3400000,
"size_ok": True,
"format_note": "tiny",
"resolution": None,
"audio_only": True,
}
],
},
response_only=True,
),
],
)
def post(self, request):
"""POST to probe a media URL and list available formats."""
try:
import yt_dlp
except Exception:
return Response({"detail": "yt-dlp not installed. pip install yt-dlp"}, status=500)
url = request.data.get("url")
if not url:
return Response({"detail": "Missing 'url'."}, status=400)
max_bytes = getattr(settings, "DOWNLOADER_MAX_SIZE_BYTES", 200 * 1024 * 1024)
ydl_opts = {
"skip_download": True,
"quiet": True,
"no_warnings": True,
"noprogress": True,
"ignoreerrors": True,
"socket_timeout": getattr(settings, "DOWNLOADER_TIMEOUT", 120),
"extract_flat": False,
"allow_playlist": False,
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
except Exception as e:
# log probe error
user, ip, ua = _client_meta(request)
_log_safely(
info={"webpage_url": url},
requested_format=None,
status="probe_error",
url=url,
user=user,
ip_address=ip,
user_agent=ua,
error_message=str(e),
)
return Response({"detail": "Failed to extract formats", "error": str(e)}, status=400)
duration = info.get("duration")
formats = info.get("formats") or []
options: List[Dict[str, Any]] = []
for f in formats:
options.append(_format_option(f, duration, max_bytes))
# optional: sort by size then by resolution desc
def sort_key(o):
size = o["estimated_size_bytes"] if o["estimated_size_bytes"] is not None else math.inf
res = 0
if o["resolution"]:
try:
w, h = o["resolution"].split("x")
res = int(w) * int(h)
except Exception:
res = 0
return (size, -res)
options_sorted = sorted(options, key=sort_key)[:50]
# Log probe
user, ip, ua = _client_meta(request)
_log_safely(
info=info,
requested_format=None,
status="probe_ok",
url=url,
user=user,
ip_address=ip,
user_agent=ua,
)
return Response({
"title": info.get("title"),
"duration": duration,
"extractor": info.get("extractor"),
"video_id": info.get("id"),
"max_size_bytes": max_bytes,
"options": options_sorted,
})
class DownloaderFileView(APIView):
"""Download selected format if under max size, then stream the file back."""
permission_classes = [AllowAny]
authentication_classes = []
@extend_schema(
tags=["downloader"],
operation_id="downloader_download",
summary="Download a selected format and stream file",
description="Downloads with a strict max filesize guard and streams as application/octet-stream.",
request=DownloadRequestSchema,
responses={
200: OpenApiTypes.BINARY, # was OpenApiResponse(..., media_type="application/octet-stream")
400: OpenApiResponse(response=ErrorResponseSchema),
413: OpenApiResponse(response=ErrorResponseSchema),
500: OpenApiResponse(response=ErrorResponseSchema),
},
examples=[
OpenApiExample(
"Download request",
value={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "format_id": "140"},
request_only=True,
),
],
)
def post(self, request):
"""POST to download a media URL in the selected format."""
try:
import yt_dlp
except Exception:
return Response({"detail": "yt-dlp not installed. pip install yt-dlp"}, status=500)
url = request.data.get("url")
fmt_id = request.data.get("format_id")
if not url or not fmt_id:
return Response({"detail": "Missing 'url' or 'format_id'."}, status=400)
max_bytes = getattr(settings, "DOWNLOADER_MAX_SIZE_BYTES", 200 * 1024 * 1024)
timeout = getattr(settings, "DOWNLOADER_TIMEOUT", 120)
tmp_dir = getattr(settings, "DOWNLOADER_TMP_DIR", os.path.join(settings.BASE_DIR, "tmp", "downloader"))
os.makedirs(tmp_dir, exist_ok=True)
# First, extract info to check/estimate size
probe_opts = {
"skip_download": True,
"quiet": True,
"no_warnings": True,
"noprogress": True,
"ignoreerrors": True,
"socket_timeout": timeout,
"extract_flat": False,
"allow_playlist": False,
}
try:
with yt_dlp.YoutubeDL(probe_opts) as ydl:
info = ydl.extract_info(url, download=False)
except Exception as e:
user, ip, ua = _client_meta(request)
_log_safely(
info={"webpage_url": url},
requested_format=fmt_id,
status="precheck_error",
url=url,
user=user,
ip_address=ip,
user_agent=ua,
error_message=str(e),
)
return Response({"detail": "Failed to analyze media", "error": str(e)}, status=400)
duration = info.get("duration")
selected = None
for f in (info.get("formats") or []):
if str(f.get("format_id")) == str(fmt_id):
selected = f
break
if not selected:
return Response({"detail": f"format_id '{fmt_id}' not found"}, status=400)
# Enforce size policy
est_size = _estimate_size_bytes(selected, duration)
if est_size is not None and est_size > max_bytes:
user, ip, ua = _client_meta(request)
_log_safely(
info=selected,
requested_format=fmt_id,
status="blocked_by_size",
url=url,
user=user,
ip_address=ip,
user_agent=ua,
error_message=f"Estimated size {est_size} > max {max_bytes}",
)
return Response(
{"detail": "File too large for this server", "estimated_size_bytes": est_size, "max_bytes": max_bytes},
status=413,
)
# Now download with strict max_filesize guard
ydl_opts = {
"format": str(fmt_id),
"quiet": True,
"no_warnings": True,
"noprogress": True,
"socket_timeout": timeout,
"retries": 3,
"outtmpl": os.path.join(tmp_dir, "%(id)s.%(ext)s"),
"max_filesize": max_bytes, # hard cap during download
"concurrent_fragment_downloads": 1,
"http_chunk_size": 1024 * 1024, # 1MB chunks to reduce memory
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
# Will raise if max_filesize exceeded during transfer
result = ydl.extract_info(url, download=True)
# yt-dlp returns result for entries or single; get final info
if "requested_downloads" in result and result["requested_downloads"]:
rd = result["requested_downloads"][0]
filepath = rd.get("filepath") or rd.get("__final_filename")
else:
# fallback
filepath = result.get("requested_downloads", [{}])[0].get("filepath") or result.get("_filename")
except Exception as e:
user, ip, ua = _client_meta(request)
_log_safely(
info=selected,
requested_format=fmt_id,
status="download_error",
url=url,
user=user,
ip_address=ip,
user_agent=ua,
error_message=str(e),
)
return Response({"detail": "Download failed", "error": str(e)}, status=400)
if not filepath or not os.path.exists(filepath):
return Response({"detail": "Downloaded file not found"}, status=500)
# Build a safe filename
base_title = info.get("title") or "video"
ext = os.path.splitext(filepath)[1].lstrip(".") or (selected.get("ext") or "bin")
safe_name = f"{slugify(base_title)[:80]}.{ext}"
# Log success
user, ip, ua = _client_meta(request)
try:
selected_info = dict(selected)
selected_info["filesize"] = os.path.getsize(filepath)
_log_safely(
info=selected_info,
requested_format=fmt_id,
status="success",
url=url,
user=user,
ip_address=ip,
user_agent=ua,
)
except Exception:
pass
# Stream file and remove after sending
def file_generator(path: str):
with open(path, "rb") as f:
while True:
chunk = f.read(8192)
if not chunk:
break
yield chunk
try:
os.remove(path)
except Exception:
pass
resp = StreamingHttpResponse(file_generator(filepath), content_type="application/octet-stream")
# Include both plain and RFC 5987 encoded filename
resp["Content-Disposition"] = (
f'attachment; filename="{safe_name}"; filename*=UTF-8\'\'{urlquote(safe_name)}'
)
# Expose headers so the browser can read them via XHR/fetch
resp["X-Filename"] = safe_name
resp["Access-Control-Expose-Headers"] = "Content-Disposition, X-Filename, Content-Length, Content-Type"
try:
resp["Content-Length"] = str(os.path.getsize(filepath))
except Exception:
pass
resp["X-Content-Type-Options"] = "nosniff"
return resp
# Simple stats view (aggregations for UI charts)
class DownloaderStatsView(APIView):
permission_classes = [AllowAny]
@extend_schema(
tags=["downloader"],
operation_id="downloader_stats",
summary="Aggregated downloader statistics",
description="Returns top extensions, requested formats, codecs and audio/video split.",
responses={
200: inline_serializer(
name="DownloaderStats",
fields={
"top_ext": serializers.ListField(
child=inline_serializer(name="ExtCount", fields={
"ext": serializers.CharField(allow_null=True),
"count": serializers.IntegerField(),
})
),
"top_requested_format": serializers.ListField(
child=inline_serializer(name="RequestedFormatCount", fields={
"requested_format": serializers.CharField(allow_null=True),
"count": serializers.IntegerField(),
})
),
"top_vcodec": serializers.ListField(
child=inline_serializer(name="VCodecCount", fields={
"vcodec": serializers.CharField(allow_null=True),
"count": serializers.IntegerField(),
})
),
"top_acodec": serializers.ListField(
child=inline_serializer(name="ACodecCount", fields={
"acodec": serializers.CharField(allow_null=True),
"count": serializers.IntegerField(),
})
),
"audio_vs_video": serializers.ListField(
child=inline_serializer(name="AudioVsVideo", fields={
"is_audio_only": serializers.BooleanField(),
"count": serializers.IntegerField(),
})
),
},
)
},
)
def get(self, request):
"""GET to retrieve aggregated downloader statistics."""
top_ext = list(DownloaderModel.objects.values("ext").annotate(count=Count("id")).order_by("-count")[:10])
top_formats = list(DownloaderModel.objects.values("requested_format").annotate(count=Count("id")).order_by("-count")[:10])
top_vcodec = list(DownloaderModel.objects.values("vcodec").annotate(count=Count("id")).order_by("-count")[:10])
top_acodec = list(DownloaderModel.objects.values("acodec").annotate(count=Count("id")).order_by("-count")[:10])
audio_vs_video = list(DownloaderModel.objects.values("is_audio_only").annotate(count=Count("id")).order_by("-count"))
return Response({
"top_ext": top_ext,
"top_requested_format": top_formats,
"top_vcodec": top_vcodec,
"top_acodec": top_acodec,
"audio_vs_video": audio_vs_video,
})