This commit is contained in:
2025-10-28 03:21:01 +01:00
parent 10796dcb31
commit 73da41b514
44 changed files with 1868 additions and 452 deletions

539
backend/thirdparty/downloader/views.py vendored Normal file
View File

@@ -0,0 +1,539 @@
from django.shortcuts import render
from django.db.models import Count
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.permissions import AllowAny
from rest_framework import status
from django.conf import settings
from django.http import StreamingHttpResponse, JsonResponse
from django.utils.text import slugify
from django.views.decorators.csrf import csrf_exempt
from django.utils.decorators import method_decorator
# docs + schema helpers
from rest_framework import serializers
from drf_spectacular.utils import (
extend_schema,
OpenApiExample,
OpenApiParameter,
OpenApiTypes,
OpenApiResponse,
inline_serializer,
)
import os
import math
import json
import tempfile
from typing import Any, Dict, List, Optional, Tuple
from .models import DownloaderModel
from .serializers import DownloaderLogSerializer
# ---------------------- Inline serializers for documentation only ----------------------
# Using inline_serializer to avoid creating new files.
FormatOptionSchema = inline_serializer(
name="FormatOption",
fields={
"format_id": serializers.CharField(allow_null=True),
"ext": serializers.CharField(allow_null=True),
"vcodec": serializers.CharField(allow_null=True),
"acodec": serializers.CharField(allow_null=True),
"fps": serializers.FloatField(allow_null=True),
"tbr": serializers.FloatField(allow_null=True),
"abr": serializers.FloatField(allow_null=True),
"vbr": serializers.FloatField(allow_null=True),
"asr": serializers.IntegerField(allow_null=True),
"filesize": serializers.IntegerField(allow_null=True),
"filesize_approx": serializers.IntegerField(allow_null=True),
"estimated_size_bytes": serializers.IntegerField(allow_null=True),
"size_ok": serializers.BooleanField(),
"format_note": serializers.CharField(allow_null=True),
"resolution": serializers.CharField(allow_null=True),
"audio_only": serializers.BooleanField(),
},
)
FormatsRequestSchema = inline_serializer(
name="FormatsRequest",
fields={"url": serializers.URLField()},
)
FormatsResponseSchema = inline_serializer(
name="FormatsResponse",
fields={
"title": serializers.CharField(allow_null=True),
"duration": serializers.FloatField(allow_null=True),
"extractor": serializers.CharField(allow_null=True),
"video_id": serializers.CharField(allow_null=True),
"max_size_bytes": serializers.IntegerField(),
"options": serializers.ListField(child=FormatOptionSchema),
},
)
DownloadRequestSchema = inline_serializer(
name="DownloadRequest",
fields={
"url": serializers.URLField(),
"format_id": serializers.CharField(),
},
)
ErrorResponseSchema = inline_serializer(
name="ErrorResponse",
fields={
"detail": serializers.CharField(),
"error": serializers.CharField(required=False),
"estimated_size_bytes": serializers.IntegerField(required=False),
"max_bytes": serializers.IntegerField(required=False),
},
)
# ---------------------------------------------------------------------------------------
def _estimate_size_bytes(fmt: Dict[str, Any], duration: Optional[float]) -> Optional[int]:
"""Estimate (or return exact) size in bytes for a yt-dlp format."""
# Prefer exact sizes from yt-dlp
if fmt.get("filesize"):
return int(fmt["filesize"])
if fmt.get("filesize_approx"):
return int(fmt["filesize_approx"])
# Estimate via total bitrate (tbr is in Kbps)
if duration and fmt.get("tbr"):
try:
kbps = float(fmt["tbr"])
return int((kbps * 1000 / 8) * float(duration))
except Exception:
return None
return None
def _format_option(fmt: Dict[str, Any], duration: Optional[float], max_bytes: int) -> Dict[str, Any]:
"""Project yt-dlp format dict to a compact option object suitable for UI."""
est = _estimate_size_bytes(fmt, duration)
w = fmt.get("width")
h = fmt.get("height")
resolution = f"{w}x{h}" if w and h else None
return {
"format_id": fmt.get("format_id"),
"ext": fmt.get("ext"),
"vcodec": fmt.get("vcodec"),
"acodec": fmt.get("acodec"),
"fps": fmt.get("fps"),
"tbr": fmt.get("tbr"),
"abr": fmt.get("abr"),
"vbr": fmt.get("vbr"),
"asr": fmt.get("asr"),
"filesize": fmt.get("filesize"),
"filesize_approx": fmt.get("filesize_approx"),
"estimated_size_bytes": est,
"size_ok": (est is not None and est <= max_bytes),
"format_note": fmt.get("format_note"),
"resolution": resolution,
"audio_only": (fmt.get("vcodec") in (None, "none")),
}
def _client_meta(request) -> Tuple[Optional[Any], Optional[str], Optional[str]]:
"""Extract current user, client IP and User-Agent."""
xff = request.META.get("HTTP_X_FORWARDED_FOR")
ip = (xff.split(",")[0].strip() if xff else request.META.get("REMOTE_ADDR"))
ua = request.META.get("HTTP_USER_AGENT")
user = getattr(request, "user", None)
return user, ip, ua
class DownloaderFormatsView(APIView):
"""Probe media URL and return available formats with estimated sizes and limit flags."""
permission_classes = [AllowAny]
@extend_schema(
tags=["downloader"],
operation_id="downloader_formats",
summary="List available formats for a media URL",
description="Uses yt-dlp to extract formats and estimates size. Applies max size policy.",
request=FormatsRequestSchema,
responses={
200: FormatsResponseSchema,
400: OpenApiResponse(response=ErrorResponseSchema),
500: OpenApiResponse(response=ErrorResponseSchema),
},
examples=[
OpenApiExample(
"Formats request",
value={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"},
request_only=True,
),
OpenApiExample(
"Formats response (excerpt)",
value={
"title": "Example Title",
"duration": 213.0,
"extractor": "youtube",
"video_id": "dQw4w9WgXcQ",
"max_size_bytes": 209715200,
"options": [
{
"format_id": "140",
"ext": "m4a",
"vcodec": "none",
"acodec": "mp4a.40.2",
"fps": None,
"tbr": 128.0,
"abr": 128.0,
"vbr": None,
"asr": 44100,
"filesize": None,
"filesize_approx": 3342334,
"estimated_size_bytes": 3400000,
"size_ok": True,
"format_note": "tiny",
"resolution": None,
"audio_only": True,
}
],
},
response_only=True,
),
],
)
def post(self, request):
"""POST to probe a media URL and list available formats."""
try:
import yt_dlp
except Exception:
return Response({"detail": "yt-dlp not installed. pip install yt-dlp"}, status=500)
url = request.data.get("url")
if not url:
return Response({"detail": "Missing 'url'."}, status=400)
max_bytes = getattr(settings, "DOWNLOADER_MAX_SIZE_BYTES", 200 * 1024 * 1024)
ydl_opts = {
"skip_download": True,
"quiet": True,
"no_warnings": True,
"noprogress": True,
"ignoreerrors": True,
"socket_timeout": getattr(settings, "DOWNLOADER_TIMEOUT", 120),
"extract_flat": False,
"allow_playlist": False,
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
except Exception as e:
# log probe error
user, ip, ua = _client_meta(request)
DownloaderModel.from_ydl_info(
info={"webpage_url": url},
requested_format=None,
status="probe_error",
url=url,
user=user,
ip_address=ip,
user_agent=ua,
error_message=str(e),
)
return Response({"detail": "Failed to extract formats", "error": str(e)}, status=400)
duration = info.get("duration")
formats = info.get("formats") or []
options: List[Dict[str, Any]] = []
for f in formats:
options.append(_format_option(f, duration, max_bytes))
# optional: sort by size then by resolution desc
def sort_key(o):
size = o["estimated_size_bytes"] if o["estimated_size_bytes"] is not None else math.inf
res = 0
if o["resolution"]:
try:
w, h = o["resolution"].split("x")
res = int(w) * int(h)
except Exception:
res = 0
return (size, -res)
options_sorted = sorted(options, key=sort_key)[:50]
# Log probe
user, ip, ua = _client_meta(request)
DownloaderModel.from_ydl_info(
info=info,
requested_format=None,
status="probe_ok",
url=url,
user=user,
ip_address=ip,
user_agent=ua,
)
return Response({
"title": info.get("title"),
"duration": duration,
"extractor": info.get("extractor"),
"video_id": info.get("id"),
"max_size_bytes": max_bytes,
"options": options_sorted,
})
class DownloaderFileView(APIView):
"""Download selected format if under max size, then stream the file back."""
permission_classes = [AllowAny]
@extend_schema(
tags=["downloader"],
operation_id="downloader_download",
summary="Download a selected format and stream file",
description="Downloads with a strict max filesize guard and streams as application/octet-stream.",
request=DownloadRequestSchema,
responses={
200: OpenApiResponse(response=OpenApiTypes.BINARY, media_type="application/octet-stream"),
400: OpenApiResponse(response=ErrorResponseSchema),
413: OpenApiResponse(response=ErrorResponseSchema),
500: OpenApiResponse(response=ErrorResponseSchema),
},
examples=[
OpenApiExample(
"Download request",
value={"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", "format_id": "140"},
request_only=True,
),
],
)
def post(self, request):
"""POST to download a media URL in the selected format."""
try:
import yt_dlp
except Exception:
return Response({"detail": "yt-dlp not installed. pip install yt-dlp"}, status=500)
url = request.data.get("url")
fmt_id = request.data.get("format_id")
if not url or not fmt_id:
return Response({"detail": "Missing 'url' or 'format_id'."}, status=400)
max_bytes = getattr(settings, "DOWNLOADER_MAX_SIZE_BYTES", 200 * 1024 * 1024)
timeout = getattr(settings, "DOWNLOADER_TIMEOUT", 120)
tmp_dir = getattr(settings, "DOWNLOADER_TMP_DIR", os.path.join(settings.BASE_DIR, "tmp", "downloader"))
os.makedirs(tmp_dir, exist_ok=True)
# First, extract info to check/estimate size
probe_opts = {
"skip_download": True,
"quiet": True,
"no_warnings": True,
"noprogress": True,
"ignoreerrors": True,
"socket_timeout": timeout,
"extract_flat": False,
"allow_playlist": False,
}
try:
with yt_dlp.YoutubeDL(probe_opts) as ydl:
info = ydl.extract_info(url, download=False)
except Exception as e:
user, ip, ua = _client_meta(request)
DownloaderModel.from_ydl_info(
info={"webpage_url": url},
requested_format=fmt_id,
status="precheck_error",
url=url,
user=user,
ip_address=ip,
user_agent=ua,
error_message=str(e),
)
return Response({"detail": "Failed to analyze media", "error": str(e)}, status=400)
duration = info.get("duration")
selected = None
for f in (info.get("formats") or []):
if str(f.get("format_id")) == str(fmt_id):
selected = f
break
if not selected:
return Response({"detail": f"format_id '{fmt_id}' not found"}, status=400)
# Enforce size policy
est_size = _estimate_size_bytes(selected, duration)
if est_size is not None and est_size > max_bytes:
user, ip, ua = _client_meta(request)
DownloaderModel.from_ydl_info(
info=selected,
requested_format=fmt_id,
status="blocked_by_size",
url=url,
user=user,
ip_address=ip,
user_agent=ua,
error_message=f"Estimated size {est_size} > max {max_bytes}",
)
return Response(
{"detail": "File too large for this server", "estimated_size_bytes": est_size, "max_bytes": max_bytes},
status=413,
)
# Now download with strict max_filesize guard
ydl_opts = {
"format": str(fmt_id),
"quiet": True,
"no_warnings": True,
"noprogress": True,
"socket_timeout": timeout,
"retries": 3,
"outtmpl": os.path.join(tmp_dir, "%(id)s.%(ext)s"),
"max_filesize": max_bytes, # hard cap during download
"concurrent_fragment_downloads": 1,
"http_chunk_size": 1024 * 1024, # 1MB chunks to reduce memory
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
# Will raise if max_filesize exceeded during transfer
result = ydl.extract_info(url, download=True)
# yt-dlp returns result for entries or single; get final info
if "requested_downloads" in result and result["requested_downloads"]:
rd = result["requested_downloads"][0]
filepath = rd.get("filepath") or rd.get("__final_filename")
else:
# fallback
filepath = result.get("requested_downloads", [{}])[0].get("filepath") or result.get("_filename")
except Exception as e:
user, ip, ua = _client_meta(request)
DownloaderModel.from_ydl_info(
info=selected,
requested_format=fmt_id,
status="download_error",
url=url,
user=user,
ip_address=ip,
user_agent=ua,
error_message=str(e),
)
return Response({"detail": "Download failed", "error": str(e)}, status=400)
if not filepath or not os.path.exists(filepath):
return Response({"detail": "Downloaded file not found"}, status=500)
# Build a safe filename
base_title = info.get("title") or "video"
ext = os.path.splitext(filepath)[1].lstrip(".") or (selected.get("ext") or "bin")
safe_name = f"{slugify(base_title)[:80]}.{ext}"
# Log success
user, ip, ua = _client_meta(request)
try:
selected_info = dict(selected)
selected_info["filesize"] = os.path.getsize(filepath)
DownloaderModel.from_ydl_info(
info=selected_info,
requested_format=fmt_id,
status="success",
url=url,
user=user,
ip_address=ip,
user_agent=ua,
)
except Exception:
pass
# Stream file and remove after sending
def file_generator(path: str):
with open(path, "rb") as f:
while True:
chunk = f.read(8192)
if not chunk:
break
yield chunk
try:
os.remove(path)
except Exception:
pass
resp = StreamingHttpResponse(file_generator(filepath), content_type="application/octet-stream")
resp["Content-Disposition"] = f'attachment; filename="{safe_name}"'
try:
resp["Content-Length"] = str(os.path.getsize(filepath))
except Exception:
pass
resp["X-Content-Type-Options"] = "nosniff"
return resp
# Simple stats view (aggregations for UI charts)
class DownloaderStatsView(APIView):
permission_classes = [AllowAny]
@extend_schema(
tags=["downloader"],
operation_id="downloader_stats",
summary="Aggregated downloader statistics",
description="Returns top extensions, requested formats, codecs and audio/video split.",
responses={
200: inline_serializer(
name="DownloaderStats",
fields={
"top_ext": serializers.ListField(
child=inline_serializer(name="ExtCount", fields={
"ext": serializers.CharField(allow_null=True),
"count": serializers.IntegerField(),
})
),
"top_requested_format": serializers.ListField(
child=inline_serializer(name="RequestedFormatCount", fields={
"requested_format": serializers.CharField(allow_null=True),
"count": serializers.IntegerField(),
})
),
"top_vcodec": serializers.ListField(
child=inline_serializer(name="VCodecCount", fields={
"vcodec": serializers.CharField(allow_null=True),
"count": serializers.IntegerField(),
})
),
"top_acodec": serializers.ListField(
child=inline_serializer(name="ACodecCount", fields={
"acodec": serializers.CharField(allow_null=True),
"count": serializers.IntegerField(),
})
),
"audio_vs_video": serializers.ListField(
child=inline_serializer(name="AudioVsVideo", fields={
"is_audio_only": serializers.BooleanField(),
"count": serializers.IntegerField(),
})
),
},
)
},
)
def get(self, request):
"""GET to retrieve aggregated downloader statistics."""
top_ext = list(DownloaderModel.objects.values("ext").annotate(count=Count("id")).order_by("-count")[:10])
top_formats = list(DownloaderModel.objects.values("requested_format").annotate(count=Count("id")).order_by("-count")[:10])
top_vcodec = list(DownloaderModel.objects.values("vcodec").annotate(count=Count("id")).order_by("-count")[:10])
top_acodec = list(DownloaderModel.objects.values("acodec").annotate(count=Count("id")).order_by("-count")[:10])
audio_vs_video = list(DownloaderModel.objects.values("is_audio_only").annotate(count=Count("id")).order_by("-count"))
return Response({
"top_ext": top_ext,
"top_requested_format": top_formats,
"top_vcodec": top_vcodec,
"top_acodec": top_acodec,
"audio_vs_video": audio_vs_video,
})
# Minimal placeholder so existing URL doesn't break; prefer using automatic logs above.
class DownloaderLogView(APIView):
permission_classes = [AllowAny]
@extend_schema(
tags=["downloader"],
operation_id="downloader_log_helper",
summary="Deprecated helper",
description="Use /api/downloader/formats/ then /api/downloader/download/.",
responses={200: inline_serializer(name="LogHelper", fields={"detail": serializers.CharField()})},
)
def post(self, request):
"""POST to the deprecated log helper endpoint."""
return Response({"detail": "Use /api/downloader/formats/ then /api/downloader/download/."}, status=200)