Files
vontor-cz/backend/thirdparty/downloader/views.py
Brunobrno 264f0116ae Add playlist support to downloader API and frontend
Enhanced the downloader backend and frontend to support playlist URLs for video info and downloads. The API now returns structured playlist information, allows selecting specific videos for download, and returns a ZIP file for playlist downloads. Updated OpenAPI types, removed deprecated parameters (start_time, end_time, playlist_items), and improved Content Security Policy handling in nginx. Refactored frontend to handle playlist selection and updated generated API models accordingly.
2025-12-25 04:54:27 +01:00

618 lines
26 KiB
Python

# ---------------------- Inline serializers for documentation only ----------------------
# Using inline_serializer to avoid creating new files.
import yt_dlp
import tempfile
import os
import shutil
import mimetypes
import base64
import urllib.request
import zipfile
import requests
from urllib.parse import urlparse
from rest_framework import serializers
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.permissions import IsAuthenticated, AllowAny
from drf_spectacular.utils import extend_schema, inline_serializer
from drf_spectacular.types import OpenApiTypes
from django.conf import settings
from django.http import StreamingHttpResponse
from django.utils.text import slugify
# NEW: aggregations and timeseries helpers
from django.db import models
from django.utils import timezone
from django.db.models.functions import TruncDay, TruncHour
from .models import DownloaderRecord
# Common container formats - user can provide any extension supported by ffmpeg
FORMAT_HELP = (
"Container format for the output file. Common formats: "
"mp4 (H.264 + AAC, most compatible), "
"mkv (flexible, lossless container), "
"webm (VP9/AV1 + Opus), "
"flv (legacy), mov (Apple-friendly), "
"avi (older), ogg, m4a (audio only), mp3 (audio only). "
"The extension will be validated by ffmpeg during conversion."
)
class Downloader(APIView):
permission_classes = [AllowAny]
authentication_classes = []
@extend_schema(
tags=["downloader", "public"],
summary="Get video info from URL",
description="""
Fetch detailed information about a video or playlist from supported platforms.
**Supported platforms:** YouTube, TikTok, Vimeo, Twitter, Instagram, Facebook, Reddit, and many more.
**Returns:**
For single videos:
- Video title, duration, and thumbnail
- Available video qualities/resolutions
- Available audio formats
For playlists:
- Array of videos with the same info structure as single videos
- Each video includes title, duration, thumbnail, and available qualities
**Usage:**
```
GET /api/downloader/download/?url=https://youtube.com/watch?v=VIDEO_ID
GET /api/downloader/download/?url=https://youtube.com/playlist?list=PLAYLIST_ID
```
""",
parameters=[
inline_serializer(
name="VideoInfoParams",
fields={
"url": serializers.URLField(
help_text="Video/Playlist URL from YouTube, TikTok, Vimeo, etc. Must be a valid URL from a supported platform."
),
},
)
],
responses={
200: inline_serializer(
name="VideoInfoResponse",
fields={
"is_playlist": serializers.BooleanField(help_text="Whether the URL is a playlist"),
"playlist_title": serializers.CharField(allow_null=True, help_text="Playlist title (if applicable)"),
"playlist_count": serializers.IntegerField(allow_null=True, help_text="Number of videos in playlist (if applicable)"),
"videos": serializers.ListField(
child=inline_serializer(
name="VideoInfo",
fields={
"id": serializers.CharField(help_text="Video ID"),
"title": serializers.CharField(help_text="Video title"),
"duration": serializers.IntegerField(allow_null=True, help_text="Video duration in seconds (null if unavailable)"),
"thumbnail": serializers.CharField(allow_null=True, help_text="Base64 encoded thumbnail image as data URL (e.g., data:image/jpeg;base64,...)"),
"video_resolutions": serializers.ListField(
child=serializers.CharField(),
help_text="List of available video quality options (e.g., '1080p', '720p', '480p')"
),
"audio_resolutions": serializers.ListField(
child=serializers.CharField(),
help_text="List of available audio format options"
),
}
),
help_text="Array of video information (single video for individual URLs, multiple for playlists)"
),
},
),
400: inline_serializer(
name="ErrorResponse",
fields={"error": serializers.CharField(help_text="Error message describing what went wrong")},
),
},
)
def get(self, request):
url = request.data.get("url") or request.query_params.get("url")
if not url:
return Response({"error": "URL is required"}, status=400)
ydl_options = {
"quiet": True,
"no_check_certificates": True, # Bypass SSL verification in Docker
"extract_flat": False, # Extract full info for playlists too
"ignoreerrors": False, # Don't ignore errors to get accurate info
}
try:
with yt_dlp.YoutubeDL(ydl_options) as ydl:
info = ydl.extract_info(url, download=False)
except Exception as e:
return Response({"error": f"Failed to retrieve video info: {str(e)}"}, status=400)
def extract_video_info(video_data):
"""Extract video info from yt-dlp data"""
formats = video_data.get("formats", []) or []
# Video: collect unique heights and sort desc (highest quality first)
heights = {
int(f.get("height"))
for f in formats
if f.get("vcodec") != "none" and isinstance(f.get("height"), int)
}
video_resolutions = [f"{h}p" for h in sorted(heights, reverse=True)]
# Audio: collect unique bitrates (abr kbps), fallback to tbr when abr missing
bitrates = set()
for f in formats:
if f.get("acodec") != "none" and f.get("vcodec") == "none":
abr = f.get("abr")
tbr = f.get("tbr")
val = None
if isinstance(abr, (int, float)):
val = int(abr)
elif isinstance(tbr, (int, float)):
val = int(tbr)
if val and val > 0:
bitrates.add(val)
audio_resolutions = [f"{b}kbps" for b in sorted(bitrates, reverse=True)]
# Fetch thumbnail and convert to base64 blob
thumbnail_blob = None
thumbnail_url = video_data.get("thumbnail")
if thumbnail_url:
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
}
response = requests.get(thumbnail_url, headers=headers, timeout=10)
response.raise_for_status()
if response.headers.get('content-type', '').startswith('image/'):
# Convert to base64
image_data = base64.b64encode(response.content).decode('utf-8')
content_type = response.headers.get('content-type', 'image/jpeg')
thumbnail_blob = f"data:{content_type};base64,{image_data}"
except Exception:
# If thumbnail fetch fails, just continue without it
pass
return {
"id": video_data.get("id", ""),
"title": video_data.get("title", ""),
"duration": video_data.get("duration"),
"thumbnail": thumbnail_blob, # Now a base64 blob instead of URL
"video_resolutions": video_resolutions,
"audio_resolutions": audio_resolutions,
}
# Check if this is a playlist
is_playlist = "entries" in info and info.get("entries") is not None
if is_playlist:
# Handle playlist
videos = []
entries = info.get("entries", [])
for entry in entries:
if entry: # Skip None entries
try:
# For playlist entries, we need to extract full info if not already available
if not entry.get("formats"):
# Re-extract with full info for this specific video
with yt_dlp.YoutubeDL(ydl_options) as ydl:
full_entry = ydl.extract_info(entry.get("url") or entry.get("webpage_url"), download=False)
videos.append(extract_video_info(full_entry))
else:
videos.append(extract_video_info(entry))
except Exception as e:
# Skip videos that fail to extract, but don't fail the entire request
continue
return Response({
"is_playlist": True,
"playlist_title": info.get("title"),
"playlist_count": len(videos),
"videos": videos,
}, status=200)
else:
# Handle single video
video_info = extract_video_info(info)
return Response({
"is_playlist": False,
"playlist_title": None,
"playlist_count": None,
"videos": [video_info],
}, status=200)
@extend_schema(
tags=["downloader", "public"],
summary="Download video or playlist from URL",
description="""
Download video/playlist with optional quality constraints and container format conversion.
**For Playlists:**
- Returns a ZIP file containing all selected videos
- Use `selected_videos` to specify which videos to download (e.g., [1,3,5] or [1,2,3,4,5])
- If `selected_videos` is not provided, all videos in the playlist will be downloaded
**Quality Parameters (optional):**
- If not specified, yt-dlp will automatically select the best available quality.
- `video_quality`: Maximum video height in pixels (e.g., 1080, 720, 480).
- `audio_quality`: Maximum audio bitrate in kbps (e.g., 320, 192, 128).
**Format/Extension:**
- Any format supported by ffmpeg (mp4, mkv, webm, avi, mov, flv, m4a, mp3, etc.).
- Defaults to 'mp4' if not specified.
- The conversion is handled automatically by ffmpeg in the background.
**Advanced Options:**
- `subtitles`: Download subtitles (language codes like 'en,cs' or 'all')
- `embed_subtitles`: Embed subtitles into video file
- `embed_thumbnail`: Embed thumbnail as cover art
- `extract_audio`: Extract audio only (ignores video quality)
- `cookies`: Browser cookies for age-restricted content (Netscape format)
""",
request=inline_serializer(
name="DownloadRequest",
fields={
"url": serializers.URLField(help_text="Video/Playlist URL to download from supported platforms"),
"ext": serializers.CharField(
required=False,
default="mp4",
help_text=FORMAT_HELP,
),
"video_quality": serializers.IntegerField(
required=False,
allow_null=True,
help_text="Optional: Target max video height in pixels (e.g. 1080, 720). If omitted, best quality is selected."
),
"audio_quality": serializers.IntegerField(
required=False,
allow_null=True,
help_text="Optional: Target max audio bitrate in kbps (e.g. 320, 192, 128). If omitted, best quality is selected."
),
"selected_videos": serializers.ListField(
child=serializers.IntegerField(),
required=False,
allow_null=True,
allow_empty=True,
help_text="For playlists: specify which videos to download as array of numbers (e.g., [1,3,5]). If omitted, all videos are downloaded."
),
"subtitles": serializers.CharField(
required=False,
allow_null=True,
allow_blank=True,
help_text="Language codes (e.g., 'en', 'cs', 'en,cs') or 'all' for all available subtitles"
),
"embed_subtitles": serializers.BooleanField(
required=False,
default=False,
help_text="Embed subtitles into the video file (requires mkv or mp4 container)"
),
"embed_thumbnail": serializers.BooleanField(
required=False,
default=False,
help_text="Embed thumbnail as cover art in the file"
),
"extract_audio": serializers.BooleanField(
required=False,
default=False,
help_text="Extract audio only, ignoring video quality settings"
),
"cookies": serializers.CharField(
required=False,
allow_null=True,
allow_blank=True,
help_text="Browser cookies in Netscape format for age-restricted content. Export from browser extensions like 'Get cookies.txt'"
),
},
),
responses={
200: OpenApiTypes.BINARY,
400: inline_serializer(
name="DownloadErrorResponse",
fields={
"error": serializers.CharField(),
},
),
},
)
def post(self, request):
url = request.data.get("url")
# Accept ext parameter, default to mp4
ext = request.data.get("ext", "mp4")
# Optional quality parameters - only parse if provided
video_quality = None
audio_quality = None
if request.data.get("video_quality"):
try:
video_quality = int(request.data.get("video_quality"))
except (ValueError, TypeError):
return Response({"error": "Invalid video_quality parameter, must be an integer!"}, status=400)
if request.data.get("audio_quality"):
try:
audio_quality = int(request.data.get("audio_quality"))
except (ValueError, TypeError):
return Response({"error": "Invalid audio_quality parameter, must be an integer!"}, status=400)
# Advanced options (removed start_time and end_time)
selected_videos = request.data.get("selected_videos")
subtitles = request.data.get("subtitles")
embed_subtitles = request.data.get("embed_subtitles", False)
embed_thumbnail = request.data.get("embed_thumbnail", False)
extract_audio = request.data.get("extract_audio", False)
cookies = request.data.get("cookies")
if not url:
return Response({"error": "URL is required"}, status=400)
if not ext or not isinstance(ext, str):
return Response({"error": "Extension must be a valid string"}, status=400)
# Ensure base tmp dir exists
os.makedirs(settings.DOWNLOADER_TMP_DIR, exist_ok=True)
tmpdir = tempfile.mkdtemp(prefix="downloader_", dir=settings.DOWNLOADER_TMP_DIR)
# First, check if this is a playlist
ydl_info_options = {
"quiet": True,
"no_check_certificates": True,
"extract_flat": False,
}
try:
with yt_dlp.YoutubeDL(ydl_info_options) as ydl:
info = ydl.extract_info(url, download=False)
except Exception as e:
shutil.rmtree(tmpdir, ignore_errors=True)
return Response({"error": f"Failed to retrieve URL info: {str(e)}"}, status=400)
is_playlist = "entries" in info and info.get("entries") is not None
# Build format selector using optional quality caps
if video_quality is not None and audio_quality is not None:
format_selector = f"bv[height<={video_quality}]+ba[abr<={audio_quality}]/b"
elif video_quality is not None:
format_selector = f"bv[height<={video_quality}]+ba/b"
elif audio_quality is not None:
format_selector = f"bv+ba[abr<={audio_quality}]/b"
else:
format_selector = "b/bv+ba"
# Common ydl options
ydl_options = {
"format": format_selector,
"merge_output_format": ext,
"quiet": True,
"no_check_certificates": True,
"max_filesize": settings.DOWNLOADER_MAX_SIZE_BYTES,
"postprocessors": [],
}
# Handle cookies for age-restricted content
if cookies:
cookie_file = os.path.join(tmpdir, "cookies.txt")
try:
with open(cookie_file, "w") as f:
f.write(cookies)
ydl_options["cookiefile"] = cookie_file
except Exception as e:
shutil.rmtree(tmpdir, ignore_errors=True)
return Response({"error": f"Invalid cookies format: {str(e)}"}, status=400)
# Subtitles
if subtitles:
if subtitles.lower() == "all":
ydl_options["writesubtitles"] = True
ydl_options["writeautomaticsub"] = True
ydl_options["subtitleslangs"] = ["all"]
else:
ydl_options["writesubtitles"] = True
ydl_options["subtitleslangs"] = [lang.strip() for lang in subtitles.split(",")]
# Embed subtitles (only for mkv/mp4)
if embed_subtitles and subtitles:
if ext in ["mkv", "mp4"]:
ydl_options["postprocessors"].append({"key": "FFmpegEmbedSubtitle"})
else:
shutil.rmtree(tmpdir, ignore_errors=True)
return Response({"error": "Subtitle embedding requires mkv or mp4 format"}, status=400)
# Embed thumbnail
if embed_thumbnail:
ydl_options["writethumbnail"] = True
ydl_options["postprocessors"].append({"key": "EmbedThumbnail"})
# Extract audio only
if extract_audio:
ydl_options["postprocessors"].append({
"key": "FFmpegExtractAudio",
"preferredcodec": ext if ext in ["mp3", "m4a", "opus", "vorbis", "wav"] else "mp3",
})
# Playlist items (use selected_videos parameter)
if is_playlist and selected_videos:
# Convert array of numbers to yt-dlp format string
playlist_items_str = ",".join(str(num) for num in selected_videos)
ydl_options["playlist_items"] = playlist_items_str
# Add remux postprocessor if not extracting audio
if not extract_audio:
ydl_options["postprocessors"].append(
{"key": "FFmpegVideoRemuxer", "preferedformat": ext}
)
try:
if is_playlist:
# Handle playlist - create ZIP file
ydl_options["outtmpl"] = os.path.join(tmpdir, "%(playlist_index)02d - %(title)s.%(ext)s")
with yt_dlp.YoutubeDL(ydl_options) as ydl:
ydl.download([url])
# Create ZIP file
zip_path = os.path.join(tmpdir, f"playlist.zip")
downloaded_files = []
# Find all downloaded files
for filename in os.listdir(tmpdir):
if filename != "playlist.zip" and filename != "cookies.txt" and not filename.startswith("."):
file_path = os.path.join(tmpdir, filename)
if os.path.isfile(file_path):
downloaded_files.append((filename, file_path))
if not downloaded_files:
shutil.rmtree(tmpdir, ignore_errors=True)
return Response({"error": "No files were downloaded from the playlist"}, status=400)
# Create ZIP
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for filename, file_path in downloaded_files:
zipf.write(file_path, filename)
# Stats for database
total_duration = 0
total_size = os.path.getsize(zip_path)
# Try to get duration from info
if info.get("entries"):
for entry in info["entries"]:
if entry and entry.get("duration"):
total_duration += int(entry.get("duration", 0))
DownloaderRecord.objects.create(
url=url,
format="zip",
length_of_media=total_duration,
file_size=total_size,
)
# Streaming response for ZIP
def stream_and_cleanup_zip(zip_file_path: str, temp_dir: str, chunk_size: int = 8192):
try:
with open(zip_file_path, "rb") as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
yield chunk
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
playlist_title = slugify(info.get("title", "playlist"))
zip_filename = f"{playlist_title}.zip"
response = StreamingHttpResponse(
streaming_content=stream_and_cleanup_zip(zip_path, tmpdir),
content_type="application/zip",
)
response["Content-Length"] = str(total_size)
response["Content-Disposition"] = f'attachment; filename="{zip_filename}"'
return response
else:
# Handle single video (existing logic)
outtmpl = os.path.join(tmpdir, "download.%(ext)s")
ydl_options["outtmpl"] = outtmpl
with yt_dlp.YoutubeDL(ydl_options) as ydl:
info = ydl.extract_info(url, download=True)
base = ydl.prepare_filename(info)
file_path = base if base.endswith(f".{ext}") else os.path.splitext(base)[0] + f".{ext}"
# Stats before streaming
duration = int((info or {}).get("duration") or 0)
size = os.path.getsize(file_path) if os.path.exists(file_path) else 0
DownloaderRecord.objects.create(
url=url,
format=ext,
length_of_media=duration,
file_size=size,
)
# Streaming generator that deletes file & temp dir after send
def stream_and_cleanup(path: str, temp_dir: str, chunk_size: int = 8192):
try:
with open(path, "rb") as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
yield chunk
finally:
try:
if os.path.exists(path):
os.remove(path)
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
safe_title = slugify(info.get("title") or "video")
filename = f"{safe_title}.{ext}"
content_type = mimetypes.guess_type(filename)[0] or "application/octet-stream"
response = StreamingHttpResponse(
streaming_content=stream_and_cleanup(file_path, tmpdir),
content_type=content_type,
)
if size:
response["Content-Length"] = str(size)
response["Content-Disposition"] = f'attachment; filename="{filename}"'
return response
except Exception as e:
shutil.rmtree(tmpdir, ignore_errors=True)
return Response({"error": str(e)}, status=400)
# ---------------- STATS FOR GRAPHS ----------------
from .serializers import DownloaderStatsSerializer
from django.db.models import Count, Avg, Sum
class DownloaderStats(APIView):
"""
Vrací agregované statistiky z tabulky DownloaderRecord.
"""
authentication_classes = []
permission_classes = [AllowAny]
@extend_schema(
tags=["downloader", "public"],
summary="Get aggregated downloader statistics",
responses={200: DownloaderStatsSerializer},
)
def get(self, request):
# agregace číselných polí
agg = DownloaderRecord.objects.aggregate(
total_downloads=Count("id"),
avg_length_of_media=Avg("length_of_media"),
avg_file_size=Avg("file_size"),
total_length_of_media=Sum("length_of_media"),
total_file_size=Sum("file_size"),
)
# zjištění nejčastějšího formátu
most_common = (
DownloaderRecord.objects.values("format")
.annotate(count=Count("id"))
.order_by("-count")
.first()
)
agg["most_common_format"] = most_common["format"] if most_common else None
serializer = DownloaderStatsSerializer(agg)
return Response(serializer.data)