poslední změny

This commit is contained in:
2025-10-20 13:49:38 +02:00
parent 561a91d023
commit 89e32b1d28
9 changed files with 40 additions and 76 deletions

Binary file not shown.

Binary file not shown.

7
app.py
View File

@@ -1,4 +1,5 @@
from flask import Flask, render_template, request, send_file, jsonify, make_response from flask import Flask, render_template, request, send_file, jsonify
from datetime import datetime
from scraper import get_google_first_page from scraper import get_google_first_page
import io, json, csv, yaml, os import io, json, csv, yaml, os
from dotenv import load_dotenv from dotenv import load_dotenv
@@ -34,7 +35,9 @@ def export():
ext = data.get("format", "json") ext = data.get("format", "json")
results = data.get("results", []) results = data.get("results", [])
filename = f"results.{ext}" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"results_{timestamp}.{ext}"
if ext == "json": if ext == "json":
buf = io.BytesIO(json.dumps(results, ensure_ascii=False, indent=2).encode("utf-8")) buf = io.BytesIO(json.dumps(results, ensure_ascii=False, indent=2).encode("utf-8"))

View File

@@ -1,4 +1,3 @@
version: "3.8"
services: services:
web: web:
build: . build: .
@@ -10,7 +9,7 @@ services:
volumes: volumes:
- .:/app - .:/app
healthcheck: healthcheck:
test: ["CMD-SHELL", "pytest -q tests/test_scraper.py::test_get_google_first_page_maps_output || exit 1"] test: ["CMD-SHELL", "python -m pytest || exit 1"]
interval: 1m interval: 1m
timeout: 20s timeout: 20s
retries: 3 retries: 3

View File

@@ -16,9 +16,6 @@ last_api_call = 0.0 # remember when we last called the API
def _throttle_api(): def _throttle_api():
""" """
Pause if the last API call was too recent. Pause if the last API call was too recent.
This is a super basic rate limiter: we allow one request every 5 seconds.
It helps to not run through your daily quota too fast.
""" """
global last_api_call global last_api_call
now = time.time() now = time.time()
@@ -51,8 +48,8 @@ def _cse_request(q, num=DEFAULT_NUM, hl=DEFAULT_LOCALE):
- GOOGLE_DEVELOPER_KEY (your API key) - GOOGLE_DEVELOPER_KEY (your API key)
- GOOGLE_CSE_ID (your search engine ID) - GOOGLE_CSE_ID (your search engine ID)
""" """
api_key = os.environ.get("GOOGLE_DEVELOPER_KEY") api_key = (os.environ.get("GOOGLE_DEVELOPER_KEY") or "").strip()
cse_id = os.environ.get("GOOGLE_CSE_ID") cse_id = (os.environ.get("GOOGLE_CSE_ID") or "").strip()
if not api_key: if not api_key:
raise RuntimeError("GOOGLE_DEVELOPER_KEY není nastaven v .env") raise RuntimeError("GOOGLE_DEVELOPER_KEY není nastaven v .env")
if not cse_id: if not cse_id:

View File

@@ -65,11 +65,11 @@
<div class="flex-grow-1"> <div class="flex-grow-1">
<a class="fw-semibold" href="{{ item.link }}" target="_blank" rel="noopener noreferrer">{{ item.title }}</a> <a class="fw-semibold" href="{{ item.link }}" target="_blank" rel="noopener noreferrer">{{ item.title }}</a>
{% if r.snippet %} {% if item.snippet %}
<small class="text-muted d-block">{{ r.snippet }}</small> <small class="text-muted d-block">{{ item.snippet }}</small>
{% endif %} {% endif %}
<small class="text-break text-secondary">{{ r.link }}</small> <small class="text-break text-secondary">{{ item.link }}</small>
</div> </div>
</div> </div>
</li> </li>

View File

@@ -1,78 +1,43 @@
import os import os
import json
import types
import pytest import pytest
from dotenv import load_dotenv
import scraper import scraper
def env_setup_test(monkeypatch): def test_env_variables_present():
"""Set required env vars for the API client (helper used by tests).""" """
monkeypatch.setenv("GOOGLE_DEVELOPER_KEY", "test-key") Test jestli env proměnné existují a mají obsah
monkeypatch.setenv("GOOGLE_CSE_ID", "test-cse") """
load_dotenv()
key = (os.getenv("GOOGLE_DEVELOPER_KEY") or "").strip()
cx = (os.getenv("GOOGLE_CSE_ID") or "").strip()
assert key, "GOOGLE_DEVELOPER_KEY is missing or empty"
assert cx, "GOOGLE_CSE_ID is missing or empty"
def mock_api_response_test(monkeypatch): def test_integration_search_youtube(monkeypatch):
"""Mock out requests.get to return a fixed API payload (helper).""" """
class FakeResp: Test výsledku vyhledávání "youtube".
status_code = 200 """
def __init__(self, data): load_dotenv()
self._data = data
self.text = json.dumps(data)
def json(self):
return self._data
data = { key = (os.getenv("GOOGLE_DEVELOPER_KEY") or "").strip()
"items": [ cx = (os.getenv("GOOGLE_CSE_ID") or "").strip()
{
"title": "Example Domain",
"link": "https://example.com/",
"snippet": "This domain is for use in illustrative examples.",
},
{
"title": "OpenAI",
"link": "https://openai.com/research",
"snippet": "Research from OpenAI.",
},
]
}
def fake_get(url, params=None, timeout=15): if not key or not cx:
return FakeResp(data) pytest.skip("Integration test skipped: GOOGLE_DEVELOPER_KEY/GOOGLE_CSE_ID not set")
monkeypatch.setattr(scraper, "requests", types.SimpleNamespace(get=fake_get)) # Speed up: don't wait during the test
def test_get_google_first_page_maps_output(monkeypatch):
env_setup_test(monkeypatch)
mock_api_response_test(monkeypatch)
# Avoid waiting for the throttle in tests
monkeypatch.setattr(scraper, "RATE_SECONDS", 0) monkeypatch.setattr(scraper, "RATE_SECONDS", 0)
monkeypatch.setattr(scraper, "last_api_call", 0) monkeypatch.setattr(scraper, "last_api_call", 0)
results = scraper.get_google_first_page("example query") results = []
try:
results = scraper.get_google_first_page("youtube")
except RuntimeError as e:
return pytest.skip(f"Integration test skipped due to API error: {e}")
assert isinstance(results, list) assert isinstance(results, list) and len(results) > 0
assert len(results) == 2 assert any("youtube.com" in (item.get("link") or "") for item in results)
first = results[0]
assert first["position"] == 1
assert first["title"] == "Example Domain"
assert first["link"] == "https://example.com/"
assert first["snippet"].startswith("This domain is for use")
assert first["icon"] == "https://example.com/favicon.ico"
second = results[1]
assert second["position"] == 2
assert second["title"] == "OpenAI"
assert second["link"] == "https://openai.com/research"
assert second["icon"] == "https://openai.com/favicon.ico"
def test_missing_env_raises(monkeypatch):
# Unset env to simulate missing configuration
monkeypatch.delenv("GOOGLE_DEVELOPER_KEY", raising=False)
monkeypatch.delenv("GOOGLE_CSE_ID", raising=False)
with pytest.raises(RuntimeError):
scraper.get_google_first_page("anything")