poslední změny

This commit is contained in:
2025-10-20 13:49:38 +02:00
parent 561a91d023
commit 89e32b1d28
9 changed files with 40 additions and 76 deletions

Binary file not shown.

Binary file not shown.

7
app.py
View File

@@ -1,4 +1,5 @@
from flask import Flask, render_template, request, send_file, jsonify, make_response
from flask import Flask, render_template, request, send_file, jsonify
from datetime import datetime
from scraper import get_google_first_page
import io, json, csv, yaml, os
from dotenv import load_dotenv
@@ -34,7 +35,9 @@ def export():
ext = data.get("format", "json")
results = data.get("results", [])
filename = f"results.{ext}"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"results_{timestamp}.{ext}"
if ext == "json":
buf = io.BytesIO(json.dumps(results, ensure_ascii=False, indent=2).encode("utf-8"))

View File

@@ -1,4 +1,3 @@
version: "3.8"
services:
web:
build: .
@@ -10,7 +9,7 @@ services:
volumes:
- .:/app
healthcheck:
test: ["CMD-SHELL", "pytest -q tests/test_scraper.py::test_get_google_first_page_maps_output || exit 1"]
test: ["CMD-SHELL", "python -m pytest || exit 1"]
interval: 1m
timeout: 20s
retries: 3

View File

@@ -16,9 +16,6 @@ last_api_call = 0.0 # remember when we last called the API
def _throttle_api():
"""
Pause if the last API call was too recent.
This is a super basic rate limiter: we allow one request every 5 seconds.
It helps to not run through your daily quota too fast.
"""
global last_api_call
now = time.time()
@@ -51,8 +48,8 @@ def _cse_request(q, num=DEFAULT_NUM, hl=DEFAULT_LOCALE):
- GOOGLE_DEVELOPER_KEY (your API key)
- GOOGLE_CSE_ID (your search engine ID)
"""
api_key = os.environ.get("GOOGLE_DEVELOPER_KEY")
cse_id = os.environ.get("GOOGLE_CSE_ID")
api_key = (os.environ.get("GOOGLE_DEVELOPER_KEY") or "").strip()
cse_id = (os.environ.get("GOOGLE_CSE_ID") or "").strip()
if not api_key:
raise RuntimeError("GOOGLE_DEVELOPER_KEY není nastaven v .env")
if not cse_id:

View File

@@ -65,11 +65,11 @@
<div class="flex-grow-1">
<a class="fw-semibold" href="{{ item.link }}" target="_blank" rel="noopener noreferrer">{{ item.title }}</a>
{% if r.snippet %}
<small class="text-muted d-block">{{ r.snippet }}</small>
{% if item.snippet %}
<small class="text-muted d-block">{{ item.snippet }}</small>
{% endif %}
<small class="text-break text-secondary">{{ r.link }}</small>
<small class="text-break text-secondary">{{ item.link }}</small>
</div>
</div>
</li>

View File

@@ -1,78 +1,43 @@
import os
import json
import types
import pytest
from dotenv import load_dotenv
import scraper
def env_setup_test(monkeypatch):
"""Set required env vars for the API client (helper used by tests)."""
monkeypatch.setenv("GOOGLE_DEVELOPER_KEY", "test-key")
monkeypatch.setenv("GOOGLE_CSE_ID", "test-cse")
def test_env_variables_present():
"""
Test jestli env proměnné existují a mají obsah
"""
load_dotenv()
key = (os.getenv("GOOGLE_DEVELOPER_KEY") or "").strip()
cx = (os.getenv("GOOGLE_CSE_ID") or "").strip()
assert key, "GOOGLE_DEVELOPER_KEY is missing or empty"
assert cx, "GOOGLE_CSE_ID is missing or empty"
def mock_api_response_test(monkeypatch):
"""Mock out requests.get to return a fixed API payload (helper)."""
class FakeResp:
status_code = 200
def __init__(self, data):
self._data = data
self.text = json.dumps(data)
def json(self):
return self._data
def test_integration_search_youtube(monkeypatch):
"""
Test výsledku vyhledávání "youtube".
"""
load_dotenv()
data = {
"items": [
{
"title": "Example Domain",
"link": "https://example.com/",
"snippet": "This domain is for use in illustrative examples.",
},
{
"title": "OpenAI",
"link": "https://openai.com/research",
"snippet": "Research from OpenAI.",
},
]
}
key = (os.getenv("GOOGLE_DEVELOPER_KEY") or "").strip()
cx = (os.getenv("GOOGLE_CSE_ID") or "").strip()
def fake_get(url, params=None, timeout=15):
return FakeResp(data)
if not key or not cx:
pytest.skip("Integration test skipped: GOOGLE_DEVELOPER_KEY/GOOGLE_CSE_ID not set")
monkeypatch.setattr(scraper, "requests", types.SimpleNamespace(get=fake_get))
def test_get_google_first_page_maps_output(monkeypatch):
env_setup_test(monkeypatch)
mock_api_response_test(monkeypatch)
# Avoid waiting for the throttle in tests
# Speed up: don't wait during the test
monkeypatch.setattr(scraper, "RATE_SECONDS", 0)
monkeypatch.setattr(scraper, "last_api_call", 0)
results = scraper.get_google_first_page("example query")
results = []
try:
results = scraper.get_google_first_page("youtube")
except RuntimeError as e:
return pytest.skip(f"Integration test skipped due to API error: {e}")
assert isinstance(results, list)
assert len(results) == 2
first = results[0]
assert first["position"] == 1
assert first["title"] == "Example Domain"
assert first["link"] == "https://example.com/"
assert first["snippet"].startswith("This domain is for use")
assert first["icon"] == "https://example.com/favicon.ico"
second = results[1]
assert second["position"] == 2
assert second["title"] == "OpenAI"
assert second["link"] == "https://openai.com/research"
assert second["icon"] == "https://openai.com/favicon.ico"
def test_missing_env_raises(monkeypatch):
# Unset env to simulate missing configuration
monkeypatch.delenv("GOOGLE_DEVELOPER_KEY", raising=False)
monkeypatch.delenv("GOOGLE_CSE_ID", raising=False)
with pytest.raises(RuntimeError):
scraper.get_google_first_page("anything")
assert isinstance(results, list) and len(results) > 0
assert any("youtube.com" in (item.get("link") or "") for item in results)