diff --git a/__pycache__/scraper.cpython-311.pyc b/__pycache__/scraper.cpython-311.pyc index fa7ecf6..624814f 100644 Binary files a/__pycache__/scraper.cpython-311.pyc and b/__pycache__/scraper.cpython-311.pyc differ diff --git a/__pycache__/scraper.cpython-313.pyc b/__pycache__/scraper.cpython-313.pyc index afd9586..869788b 100644 Binary files a/__pycache__/scraper.cpython-313.pyc and b/__pycache__/scraper.cpython-313.pyc differ diff --git a/app.py b/app.py index 194ccfe..323c375 100644 --- a/app.py +++ b/app.py @@ -1,4 +1,5 @@ -from flask import Flask, render_template, request, send_file, jsonify, make_response +from flask import Flask, render_template, request, send_file, jsonify +from datetime import datetime from scraper import get_google_first_page import io, json, csv, yaml, os from dotenv import load_dotenv @@ -34,7 +35,9 @@ def export(): ext = data.get("format", "json") results = data.get("results", []) - filename = f"results.{ext}" + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + filename = f"results_{timestamp}.{ext}" if ext == "json": buf = io.BytesIO(json.dumps(results, ensure_ascii=False, indent=2).encode("utf-8")) diff --git a/docker-compose.yml b/docker-compose.yml index 48bd716..ca5f8ae 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,3 @@ -version: "3.8" services: web: build: . @@ -10,7 +9,7 @@ services: volumes: - .:/app healthcheck: - test: ["CMD-SHELL", "pytest -q tests/test_scraper.py::test_get_google_first_page_maps_output || exit 1"] + test: ["CMD-SHELL", "python -m pytest || exit 1"] interval: 1m timeout: 20s retries: 3 diff --git a/scraper.py b/scraper.py index e7eb3ef..282ceb3 100644 --- a/scraper.py +++ b/scraper.py @@ -16,9 +16,6 @@ last_api_call = 0.0 # remember when we last called the API def _throttle_api(): """ Pause if the last API call was too recent. - - This is a super basic rate limiter: we allow one request every 5 seconds. - It helps to not run through your daily quota too fast. """ global last_api_call now = time.time() @@ -51,8 +48,8 @@ def _cse_request(q, num=DEFAULT_NUM, hl=DEFAULT_LOCALE): - GOOGLE_DEVELOPER_KEY (your API key) - GOOGLE_CSE_ID (your search engine ID) """ - api_key = os.environ.get("GOOGLE_DEVELOPER_KEY") - cse_id = os.environ.get("GOOGLE_CSE_ID") + api_key = (os.environ.get("GOOGLE_DEVELOPER_KEY") or "").strip() + cse_id = (os.environ.get("GOOGLE_CSE_ID") or "").strip() if not api_key: raise RuntimeError("GOOGLE_DEVELOPER_KEY není nastaven v .env") if not cse_id: diff --git a/templates/results.html b/templates/results.html index a20c342..7d1b6a1 100644 --- a/templates/results.html +++ b/templates/results.html @@ -65,11 +65,11 @@
{{ item.title }} - {% if r.snippet %} - {{ r.snippet }} + {% if item.snippet %} + {{ item.snippet }} {% endif %} - {{ r.link }} + {{ item.link }}
diff --git a/tests/__pycache__/test_scraper.cpython-311-pytest-8.4.2.pyc b/tests/__pycache__/test_scraper.cpython-311-pytest-8.4.2.pyc index a000e21..a5c5365 100644 Binary files a/tests/__pycache__/test_scraper.cpython-311-pytest-8.4.2.pyc and b/tests/__pycache__/test_scraper.cpython-311-pytest-8.4.2.pyc differ diff --git a/tests/__pycache__/test_scraper.cpython-313-pytest-8.4.2.pyc b/tests/__pycache__/test_scraper.cpython-313-pytest-8.4.2.pyc new file mode 100644 index 0000000..8eec603 Binary files /dev/null and b/tests/__pycache__/test_scraper.cpython-313-pytest-8.4.2.pyc differ diff --git a/tests/test_scraper.py b/tests/test_scraper.py index 60388f4..3f5d036 100644 --- a/tests/test_scraper.py +++ b/tests/test_scraper.py @@ -1,78 +1,43 @@ import os -import json -import types import pytest +from dotenv import load_dotenv import scraper -def env_setup_test(monkeypatch): - """Set required env vars for the API client (helper used by tests).""" - monkeypatch.setenv("GOOGLE_DEVELOPER_KEY", "test-key") - monkeypatch.setenv("GOOGLE_CSE_ID", "test-cse") +def test_env_variables_present(): + """ + Test jestli env proměnné existují a mají obsah + """ + load_dotenv() + + key = (os.getenv("GOOGLE_DEVELOPER_KEY") or "").strip() + cx = (os.getenv("GOOGLE_CSE_ID") or "").strip() + assert key, "GOOGLE_DEVELOPER_KEY is missing or empty" + assert cx, "GOOGLE_CSE_ID is missing or empty" -def mock_api_response_test(monkeypatch): - """Mock out requests.get to return a fixed API payload (helper).""" - class FakeResp: - status_code = 200 - def __init__(self, data): - self._data = data - self.text = json.dumps(data) - def json(self): - return self._data +def test_integration_search_youtube(monkeypatch): + """ + Test výsledku vyhledávání "youtube". + """ + load_dotenv() - data = { - "items": [ - { - "title": "Example Domain", - "link": "https://example.com/", - "snippet": "This domain is for use in illustrative examples.", - }, - { - "title": "OpenAI", - "link": "https://openai.com/research", - "snippet": "Research from OpenAI.", - }, - ] - } + key = (os.getenv("GOOGLE_DEVELOPER_KEY") or "").strip() + cx = (os.getenv("GOOGLE_CSE_ID") or "").strip() + + if not key or not cx: + pytest.skip("Integration test skipped: GOOGLE_DEVELOPER_KEY/GOOGLE_CSE_ID not set") - def fake_get(url, params=None, timeout=15): - return FakeResp(data) - - monkeypatch.setattr(scraper, "requests", types.SimpleNamespace(get=fake_get)) - - -def test_get_google_first_page_maps_output(monkeypatch): - env_setup_test(monkeypatch) - mock_api_response_test(monkeypatch) - # Avoid waiting for the throttle in tests + # Speed up: don't wait during the test monkeypatch.setattr(scraper, "RATE_SECONDS", 0) monkeypatch.setattr(scraper, "last_api_call", 0) - results = scraper.get_google_first_page("example query") + results = [] + try: + results = scraper.get_google_first_page("youtube") + except RuntimeError as e: + return pytest.skip(f"Integration test skipped due to API error: {e}") - assert isinstance(results, list) - assert len(results) == 2 - - first = results[0] - assert first["position"] == 1 - assert first["title"] == "Example Domain" - assert first["link"] == "https://example.com/" - assert first["snippet"].startswith("This domain is for use") - assert first["icon"] == "https://example.com/favicon.ico" - - second = results[1] - assert second["position"] == 2 - assert second["title"] == "OpenAI" - assert second["link"] == "https://openai.com/research" - assert second["icon"] == "https://openai.com/favicon.ico" - - -def test_missing_env_raises(monkeypatch): - # Unset env to simulate missing configuration - monkeypatch.delenv("GOOGLE_DEVELOPER_KEY", raising=False) - monkeypatch.delenv("GOOGLE_CSE_ID", raising=False) - - with pytest.raises(RuntimeError): - scraper.get_google_first_page("anything") + assert isinstance(results, list) and len(results) > 0 + assert any("youtube.com" in (item.get("link") or "") for item in results)