vontor-cz/backend/thirdparty/downloader/views.py

# ---------------------- Inline serializers for documentation only ----------------------
# Using inline_serializer to avoid creating new files.

import yt_dlp
import tempfile
import os
import shutil
import mimetypes
import base64
import urllib.request
import zipfile
import requests
from urllib.parse import urlparse

from rest_framework import serializers
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework.permissions import IsAuthenticated, AllowAny
from drf_spectacular.utils import extend_schema, inline_serializer
from drf_spectacular.types import OpenApiTypes
from django.conf import settings
from django.http import StreamingHttpResponse
from django.utils.text import slugify
# NEW: aggregations and timeseries helpers
from django.db import models
from django.utils import timezone
from django.db.models.functions import TruncDay, TruncHour
from .models import DownloaderRecord

# Common container formats - user can provide any extension supported by ffmpeg
FORMAT_HELP = (
    "Container format for the output file. Common formats: "
    "mp4 (H.264 + AAC, most compatible), "
    "mkv (flexible, lossless container), "
    "webm (VP9/AV1 + Opus), "
    "flv (legacy), mov (Apple-friendly), "
    "avi (older), ogg, m4a (audio only), mp3 (audio only). "
    "The extension will be validated by ffmpeg during conversion."
)

class Downloader(APIView):
    permission_classes = [AllowAny]
    authentication_classes = []

    @extend_schema(
        tags=["downloader", "public"],
        summary="Get video info from URL",
        description="""
        Fetch detailed information about a video or playlist from supported platforms.

        **Supported platforms:** YouTube, TikTok, Vimeo, Twitter, Instagram, Facebook, Reddit, and many more.

        **Returns:**
        For single videos:
        - Video title, duration, and thumbnail
        - Available video qualities/resolutions
        - Available audio formats

        For playlists:
        - Array of videos with the same info structure as single videos
        - Each video includes title, duration, thumbnail, and available qualities

        **Usage:**
        ```
        GET /api/downloader/download/?url=https://youtube.com/watch?v=VIDEO_ID
        GET /api/downloader/download/?url=https://youtube.com/playlist?list=PLAYLIST_ID
        ```
        """,
        parameters=[
            inline_serializer(
                name="VideoInfoParams",
                fields={
                    "url": serializers.URLField(
                        help_text="Video/Playlist URL from YouTube, TikTok, Vimeo, etc. Must be a valid URL from a supported platform."
                    ),
                },
            )
        ],
        responses={
            200: inline_serializer(
                name="VideoInfoResponse",
                fields={
                    "is_playlist": serializers.BooleanField(help_text="Whether the URL is a playlist"),
                    "playlist_title": serializers.CharField(allow_null=True, help_text="Playlist title (if applicable)"),
                    "playlist_count": serializers.IntegerField(allow_null=True, help_text="Number of videos in playlist (if applicable)"),
                    "videos": serializers.ListField(
                        child=inline_serializer(
                            name="VideoInfo",
                            fields={
                                "id": serializers.CharField(help_text="Video ID"),
                                "title": serializers.CharField(help_text="Video title"),
                                "duration": serializers.IntegerField(allow_null=True, help_text="Video duration in seconds (null if unavailable)"),
                                "thumbnail": serializers.CharField(allow_null=True, help_text="Base64 encoded thumbnail image as data URL (e.g., data:image/jpeg;base64,...)"),
                                "video_resolutions": serializers.ListField(
                                    child=serializers.CharField(),
                                    help_text="List of available video quality options (e.g., '1080p', '720p', '480p')"
                                ),
                                "audio_resolutions": serializers.ListField(
                                    child=serializers.CharField(),
                                    help_text="List of available audio format options"
                                ),
                            }
                        ),
                        help_text="Array of video information (single video for individual URLs, multiple for playlists)"
                    ),
                },
            ),
            400: inline_serializer(
                name="ErrorResponse",
                fields={"error": serializers.CharField(help_text="Error message describing what went wrong")},
            ),
        },
    )
    def get(self, request):
        url = request.data.get("url") or request.query_params.get("url")
        if not url:
            return Response({"error": "URL is required"}, status=400)

        ydl_options = {
            "quiet": True,
            "no_check_certificates": True,  # Bypass SSL verification in Docker
            "extract_flat": False,  # Extract full info for playlists too
            "ignoreerrors": False,  # Don't ignore errors to get accurate info
        }

        try:
            with yt_dlp.YoutubeDL(ydl_options) as ydl:
                info = ydl.extract_info(url, download=False)
        except Exception as e:
            return Response({"error": f"Failed to retrieve video info: {str(e)}"}, status=400)

        def extract_video_info(video_data):
            """Extract video info from yt-dlp data"""
            formats = video_data.get("formats", []) or []

            # Video: collect unique heights and sort desc (highest quality first)
            heights = {
                int(f.get("height"))
                for f in formats
                if f.get("vcodec") != "none" and isinstance(f.get("height"), int)
            }
            video_resolutions = [f"{h}p" for h in sorted(heights, reverse=True)]

            # Audio: collect unique bitrates (abr kbps), fallback to tbr when abr missing
            bitrates = set()
            for f in formats:
                if f.get("acodec") != "none" and f.get("vcodec") == "none":
                    abr = f.get("abr")
                    tbr = f.get("tbr")
                    val = None
                    if isinstance(abr, (int, float)):
                        val = int(abr)
                    elif isinstance(tbr, (int, float)):
                        val = int(tbr)
                    if val and val > 0:
                        bitrates.add(val)

            audio_resolutions = [f"{b}kbps" for b in sorted(bitrates, reverse=True)]

            # Fetch thumbnail and convert to base64 blob
            thumbnail_blob = None
            thumbnail_url = video_data.get("thumbnail")
            if thumbnail_url:
                try:
                    headers = {
                        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
                        'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
                    }
                    response = requests.get(thumbnail_url, headers=headers, timeout=10)
                    response.raise_for_status()

                    if response.headers.get('content-type', '').startswith('image/'):
                        # Convert to base64
                        image_data = base64.b64encode(response.content).decode('utf-8')
                        content_type = response.headers.get('content-type', 'image/jpeg')
                        thumbnail_blob = f"data:{content_type};base64,{image_data}"
                except Exception:
                    # If thumbnail fetch fails, just continue without it
                    pass

            return {
                "id": video_data.get("id", ""),
                "title": video_data.get("title", ""),
                "duration": video_data.get("duration"),
                "thumbnail": thumbnail_blob,  # Now a base64 blob instead of URL
                "video_resolutions": video_resolutions,
                "audio_resolutions": audio_resolutions,
            }

        # Check if this is a playlist
        is_playlist = "entries" in info and info.get("entries") is not None

        if is_playlist:
            # Handle playlist
            videos = []
            entries = info.get("entries", [])

            for entry in entries:
                if entry:  # Skip None entries
                    try:
                        # For playlist entries, we need to extract full info if not already available
                        if not entry.get("formats"):
                            # Re-extract with full info for this specific video
                            with yt_dlp.YoutubeDL(ydl_options) as ydl:
                                full_entry = ydl.extract_info(entry.get("url") or entry.get("webpage_url"), download=False)

                                videos.append(extract_video_info(full_entry))
                        else:
                            videos.append(extract_video_info(entry))
                    except Exception as e:
                        # Skip videos that fail to extract, but don't fail the entire request
                        continue

            return Response({
                "is_playlist": True,
                "playlist_title": info.get("title"),
                "playlist_count": len(videos),
                "videos": videos,
            }, status=200)
        else:
            # Handle single video
            video_info = extract_video_info(info)

            return Response({
                "is_playlist": False,
                "playlist_title": None,
                "playlist_count": None,
                "videos": [video_info],
            }, status=200)


    @extend_schema(
        tags=["downloader", "public"],
        summary="Download video or playlist from URL",
        description="""
        Download video/playlist with optional quality constraints and container format conversion.

        **For Playlists:**
        - Returns a ZIP file containing all selected videos
        - Use `selected_videos` to specify which videos to download (e.g., [1,3,5] or [1,2,3,4,5])
        - If `selected_videos` is not provided, all videos in the playlist will be downloaded

        **Quality Parameters (optional):**
        - If not specified, yt-dlp will automatically select the best available quality.
        - `video_quality`: Maximum video height in pixels (e.g., 1080, 720, 480).
        - `audio_quality`: Maximum audio bitrate in kbps (e.g., 320, 192, 128).

        **Format/Extension:**
        - Any format supported by ffmpeg (mp4, mkv, webm, avi, mov, flv, m4a, mp3, etc.).
        - Defaults to 'mp4' if not specified.
        - The conversion is handled automatically by ffmpeg in the background.

        **Advanced Options:**
        - `subtitles`: Download subtitles (language codes like 'en,cs' or 'all')
        - `embed_subtitles`: Embed subtitles into video file
        - `embed_thumbnail`: Embed thumbnail as cover art
        - `extract_audio`: Extract audio only (ignores video quality)
        - `cookies`: Browser cookies for age-restricted content (Netscape format)
        """,
        request=inline_serializer(
            name="DownloadRequest",
            fields={
                "url": serializers.URLField(help_text="Video/Playlist URL to download from supported platforms"),
                "ext": serializers.CharField(
                    required=False,
                    default="mp4",
                    help_text=FORMAT_HELP,
                ),
                "video_quality": serializers.IntegerField(
                    required=False,
                    allow_null=True,
                    help_text="Optional: Target max video height in pixels (e.g. 1080, 720). If omitted, best quality is selected."
                ),
                "audio_quality": serializers.IntegerField(
                    required=False,
                    allow_null=True,
                    help_text="Optional: Target max audio bitrate in kbps (e.g. 320, 192, 128). If omitted, best quality is selected."
                ),
                "selected_videos": serializers.ListField(
                    child=serializers.IntegerField(),
                    required=False,
                    allow_null=True,
                    allow_empty=True,
                    help_text="For playlists: specify which videos to download as array of numbers (e.g., [1,3,5]). If omitted, all videos are downloaded."
                ),
                "subtitles": serializers.CharField(
                    required=False,
                    allow_null=True,
                    allow_blank=True,
                    help_text="Language codes (e.g., 'en', 'cs', 'en,cs') or 'all' for all available subtitles"
                ),
                "embed_subtitles": serializers.BooleanField(
                    required=False,
                    default=False,
                    help_text="Embed subtitles into the video file (requires mkv or mp4 container)"
                ),
                "embed_thumbnail": serializers.BooleanField(
                    required=False,
                    default=False,
                    help_text="Embed thumbnail as cover art in the file"
                ),
                "extract_audio": serializers.BooleanField(
                    required=False,
                    default=False,
                    help_text="Extract audio only, ignoring video quality settings"
                ),
                "cookies": serializers.CharField(
                    required=False,
                    allow_null=True,
                    allow_blank=True,
                    help_text="Browser cookies in Netscape format for age-restricted content. Export from browser extensions like 'Get cookies.txt'"
                ),
            },
        ),
        responses={
            200: OpenApiTypes.BINARY,
            400: inline_serializer(
                name="DownloadErrorResponse",
                fields={
                    "error": serializers.CharField(),
                },
            ),
        },
    )
    def post(self, request):
        url = request.data.get("url")
        # Accept ext parameter, default to mp4
        ext = request.data.get("ext", "mp4")

        # Optional quality parameters - only parse if provided
        video_quality = None
        audio_quality = None

        if request.data.get("video_quality"):
            try:
                video_quality = int(request.data.get("video_quality"))
            except (ValueError, TypeError):
                return Response({"error": "Invalid video_quality parameter, must be an integer!"}, status=400)

        if request.data.get("audio_quality"):
            try:
                audio_quality = int(request.data.get("audio_quality"))
            except (ValueError, TypeError):
                return Response({"error": "Invalid audio_quality parameter, must be an integer!"}, status=400)

        # Advanced options (removed start_time and end_time)
        selected_videos = request.data.get("selected_videos")
        subtitles = request.data.get("subtitles")
        embed_subtitles = request.data.get("embed_subtitles", False)
        embed_thumbnail = request.data.get("embed_thumbnail", False)
        extract_audio = request.data.get("extract_audio", False)
        cookies = request.data.get("cookies")

        if not url:
            return Response({"error": "URL is required"}, status=400)
        if not ext or not isinstance(ext, str):
            return Response({"error": "Extension must be a valid string"}, status=400)

        # Ensure base tmp dir exists
        os.makedirs(settings.DOWNLOADER_TMP_DIR, exist_ok=True)
        tmpdir = tempfile.mkdtemp(prefix="downloader_", dir=settings.DOWNLOADER_TMP_DIR)

        # First, check if this is a playlist
        ydl_info_options = {
            "quiet": True,
            "no_check_certificates": True,
            "extract_flat": False,
        }

        try:
            with yt_dlp.YoutubeDL(ydl_info_options) as ydl:
                info = ydl.extract_info(url, download=False)
        except Exception as e:
            shutil.rmtree(tmpdir, ignore_errors=True)
            return Response({"error": f"Failed to retrieve URL info: {str(e)}"}, status=400)

        is_playlist = "entries" in info and info.get("entries") is not None

        # Build format selector using optional quality caps
        if video_quality is not None and audio_quality is not None:
            format_selector = f"bv[height<={video_quality}]+ba[abr<={audio_quality}]/b"
        elif video_quality is not None:
            format_selector = f"bv[height<={video_quality}]+ba/b"
        elif audio_quality is not None:
            format_selector = f"bv+ba[abr<={audio_quality}]/b"
        else:
            format_selector = "b/bv+ba"

        # Common ydl options
        ydl_options = {
            "format": format_selector,
            "merge_output_format": ext,
            "quiet": True,
            "no_check_certificates": True,
            "max_filesize": settings.DOWNLOADER_MAX_SIZE_BYTES,
            "postprocessors": [],
        }

        # Handle cookies for age-restricted content
        if cookies:
            cookie_file = os.path.join(tmpdir, "cookies.txt")
            try:
                with open(cookie_file, "w") as f:
                    f.write(cookies)
                ydl_options["cookiefile"] = cookie_file
            except Exception as e:
                shutil.rmtree(tmpdir, ignore_errors=True)
                return Response({"error": f"Invalid cookies format: {str(e)}"}, status=400)

        # Subtitles
        if subtitles:
            if subtitles.lower() == "all":
                ydl_options["writesubtitles"] = True
                ydl_options["writeautomaticsub"] = True
                ydl_options["subtitleslangs"] = ["all"]
            else:
                ydl_options["writesubtitles"] = True
                ydl_options["subtitleslangs"] = [lang.strip() for lang in subtitles.split(",")]

        # Embed subtitles (only for mkv/mp4)
        if embed_subtitles and subtitles:
            if ext in ["mkv", "mp4"]:
                ydl_options["postprocessors"].append({"key": "FFmpegEmbedSubtitle"})
            else:
                shutil.rmtree(tmpdir, ignore_errors=True)
                return Response({"error": "Subtitle embedding requires mkv or mp4 format"}, status=400)

        # Embed thumbnail
        if embed_thumbnail:
            ydl_options["writethumbnail"] = True
            ydl_options["postprocessors"].append({"key": "EmbedThumbnail"})

        # Extract audio only
        if extract_audio:
            ydl_options["postprocessors"].append({
                "key": "FFmpegExtractAudio",
                "preferredcodec": ext if ext in ["mp3", "m4a", "opus", "vorbis", "wav"] else "mp3",
            })

        # Playlist items (use selected_videos parameter)
        if is_playlist and selected_videos:
            # Convert array of numbers to yt-dlp format string
            playlist_items_str = ",".join(str(num) for num in selected_videos)
            ydl_options["playlist_items"] = playlist_items_str

        # Add remux postprocessor if not extracting audio
        if not extract_audio:
            ydl_options["postprocessors"].append(
                {"key": "FFmpegVideoRemuxer", "preferedformat": ext}
            )

        try:
            if is_playlist:
                # Handle playlist - create ZIP file
                ydl_options["outtmpl"] = os.path.join(tmpdir, "%(playlist_index)02d - %(title)s.%(ext)s")

                with yt_dlp.YoutubeDL(ydl_options) as ydl:
                    ydl.download([url])

                # Create ZIP file
                zip_path = os.path.join(tmpdir, f"playlist.zip")
                downloaded_files = []

                # Find all downloaded files
                for filename in os.listdir(tmpdir):
                    if filename != "playlist.zip" and filename != "cookies.txt" and not filename.startswith("."):
                        file_path = os.path.join(tmpdir, filename)
                        if os.path.isfile(file_path):
                            downloaded_files.append((filename, file_path))

                if not downloaded_files:
                    shutil.rmtree(tmpdir, ignore_errors=True)
                    return Response({"error": "No files were downloaded from the playlist"}, status=400)

                # Create ZIP
                with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
                    for filename, file_path in downloaded_files:
                        zipf.write(file_path, filename)

                # Stats for database
                total_duration = 0
                total_size = os.path.getsize(zip_path)

                # Try to get duration from info
                if info.get("entries"):
                    for entry in info["entries"]:
                        if entry and entry.get("duration"):
                            total_duration += int(entry.get("duration", 0))

                DownloaderRecord.objects.create(
                    url=url,
                    format="zip",
                    length_of_media=total_duration,
                    file_size=total_size,
                )

                # Streaming response for ZIP
                def stream_and_cleanup_zip(zip_file_path: str, temp_dir: str, chunk_size: int = 8192):
                    try:
                        with open(zip_file_path, "rb") as f:
                            while True:
                                chunk = f.read(chunk_size)
                                if not chunk:
                                    break
                                yield chunk
                    finally:
                        shutil.rmtree(temp_dir, ignore_errors=True)

                playlist_title = slugify(info.get("title", "playlist"))
                zip_filename = f"{playlist_title}.zip"

                response = StreamingHttpResponse(
                    streaming_content=stream_and_cleanup_zip(zip_path, tmpdir),
                    content_type="application/zip",
                )
                response["Content-Length"] = str(total_size)
                response["Content-Disposition"] = f'attachment; filename="{zip_filename}"'

                return response

            else:
                # Handle single video (existing logic)
                outtmpl = os.path.join(tmpdir, "download.%(ext)s")
                ydl_options["outtmpl"] = outtmpl

                with yt_dlp.YoutubeDL(ydl_options) as ydl:
                    info = ydl.extract_info(url, download=True)
                    base = ydl.prepare_filename(info)
                    file_path = base if base.endswith(f".{ext}") else os.path.splitext(base)[0] + f".{ext}"

                # Stats before streaming
                duration = int((info or {}).get("duration") or 0)
                size = os.path.getsize(file_path) if os.path.exists(file_path) else 0

                DownloaderRecord.objects.create(
                    url=url,
                    format=ext,
                    length_of_media=duration,
                    file_size=size,
                )

                # Streaming generator that deletes file & temp dir after send
                def stream_and_cleanup(path: str, temp_dir: str, chunk_size: int = 8192):
                    try:
                        with open(path, "rb") as f:
                            while True:
                                chunk = f.read(chunk_size)
                                if not chunk:
                                    break
                                yield chunk
                    finally:
                        try:
                            if os.path.exists(path):
                                os.remove(path)
                        finally:
                            shutil.rmtree(temp_dir, ignore_errors=True)

                safe_title = slugify(info.get("title") or "video")
                filename = f"{safe_title}.{ext}"
                content_type = mimetypes.guess_type(filename)[0] or "application/octet-stream"

                response = StreamingHttpResponse(
                    streaming_content=stream_and_cleanup(file_path, tmpdir),
                    content_type=content_type,
                )
                if size:
                    response["Content-Length"] = str(size)
                response["Content-Disposition"] = f'attachment; filename="{filename}"'

                return response

        except Exception as e:
            shutil.rmtree(tmpdir, ignore_errors=True)
            return Response({"error": str(e)}, status=400)


# ----------------  STATS FOR GRAPHS  ----------------

from .serializers import DownloaderStatsSerializer
from django.db.models import Count, Avg, Sum

class DownloaderStats(APIView):
    """
    Vrací agregované statistiky z tabulky DownloaderRecord.
    """
    authentication_classes = []
    permission_classes = [AllowAny]
    @extend_schema(
        tags=["downloader", "public"],
        summary="Get aggregated downloader statistics",
        responses={200: DownloaderStatsSerializer},
    )
    def get(self, request):
        # agregace číselných polí
        agg = DownloaderRecord.objects.aggregate(
            total_downloads=Count("id"),
            avg_length_of_media=Avg("length_of_media"),
            avg_file_size=Avg("file_size"),
            total_length_of_media=Sum("length_of_media"),
            total_file_size=Sum("file_size"),
        )

        # zjištění nejčastějšího formátu
        most_common = (
            DownloaderRecord.objects.values("format")
            .annotate(count=Count("id"))
            .order_by("-count")
            .first()
        )

        agg["most_common_format"] = most_common["format"] if most_common else None

        serializer = DownloaderStatsSerializer(agg)
        return Response(serializer.data)