poslední změny
This commit is contained in:
Binary file not shown.
Binary file not shown.
7
app.py
7
app.py
@@ -1,4 +1,5 @@
|
|||||||
from flask import Flask, render_template, request, send_file, jsonify, make_response
|
from flask import Flask, render_template, request, send_file, jsonify
|
||||||
|
from datetime import datetime
|
||||||
from scraper import get_google_first_page
|
from scraper import get_google_first_page
|
||||||
import io, json, csv, yaml, os
|
import io, json, csv, yaml, os
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
@@ -34,7 +35,9 @@ def export():
|
|||||||
ext = data.get("format", "json")
|
ext = data.get("format", "json")
|
||||||
|
|
||||||
results = data.get("results", [])
|
results = data.get("results", [])
|
||||||
filename = f"results.{ext}"
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
|
||||||
|
filename = f"results_{timestamp}.{ext}"
|
||||||
if ext == "json":
|
if ext == "json":
|
||||||
|
|
||||||
buf = io.BytesIO(json.dumps(results, ensure_ascii=False, indent=2).encode("utf-8"))
|
buf = io.BytesIO(json.dumps(results, ensure_ascii=False, indent=2).encode("utf-8"))
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
version: "3.8"
|
|
||||||
services:
|
services:
|
||||||
web:
|
web:
|
||||||
build: .
|
build: .
|
||||||
@@ -10,7 +9,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- .:/app
|
- .:/app
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "pytest -q tests/test_scraper.py::test_get_google_first_page_maps_output || exit 1"]
|
test: ["CMD-SHELL", "python -m pytest || exit 1"]
|
||||||
interval: 1m
|
interval: 1m
|
||||||
timeout: 20s
|
timeout: 20s
|
||||||
retries: 3
|
retries: 3
|
||||||
|
|||||||
@@ -16,9 +16,6 @@ last_api_call = 0.0 # remember when we last called the API
|
|||||||
def _throttle_api():
|
def _throttle_api():
|
||||||
"""
|
"""
|
||||||
Pause if the last API call was too recent.
|
Pause if the last API call was too recent.
|
||||||
|
|
||||||
This is a super basic rate limiter: we allow one request every 5 seconds.
|
|
||||||
It helps to not run through your daily quota too fast.
|
|
||||||
"""
|
"""
|
||||||
global last_api_call
|
global last_api_call
|
||||||
now = time.time()
|
now = time.time()
|
||||||
@@ -51,8 +48,8 @@ def _cse_request(q, num=DEFAULT_NUM, hl=DEFAULT_LOCALE):
|
|||||||
- GOOGLE_DEVELOPER_KEY (your API key)
|
- GOOGLE_DEVELOPER_KEY (your API key)
|
||||||
- GOOGLE_CSE_ID (your search engine ID)
|
- GOOGLE_CSE_ID (your search engine ID)
|
||||||
"""
|
"""
|
||||||
api_key = os.environ.get("GOOGLE_DEVELOPER_KEY")
|
api_key = (os.environ.get("GOOGLE_DEVELOPER_KEY") or "").strip()
|
||||||
cse_id = os.environ.get("GOOGLE_CSE_ID")
|
cse_id = (os.environ.get("GOOGLE_CSE_ID") or "").strip()
|
||||||
if not api_key:
|
if not api_key:
|
||||||
raise RuntimeError("GOOGLE_DEVELOPER_KEY není nastaven v .env")
|
raise RuntimeError("GOOGLE_DEVELOPER_KEY není nastaven v .env")
|
||||||
if not cse_id:
|
if not cse_id:
|
||||||
|
|||||||
@@ -65,11 +65,11 @@
|
|||||||
<div class="flex-grow-1">
|
<div class="flex-grow-1">
|
||||||
<a class="fw-semibold" href="{{ item.link }}" target="_blank" rel="noopener noreferrer">{{ item.title }}</a>
|
<a class="fw-semibold" href="{{ item.link }}" target="_blank" rel="noopener noreferrer">{{ item.title }}</a>
|
||||||
|
|
||||||
{% if r.snippet %}
|
{% if item.snippet %}
|
||||||
<small class="text-muted d-block">{{ r.snippet }}</small>
|
<small class="text-muted d-block">{{ item.snippet }}</small>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<small class="text-break text-secondary">{{ r.link }}</small>
|
<small class="text-break text-secondary">{{ item.link }}</small>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</li>
|
</li>
|
||||||
|
|||||||
Binary file not shown.
BIN
tests/__pycache__/test_scraper.cpython-313-pytest-8.4.2.pyc
Normal file
BIN
tests/__pycache__/test_scraper.cpython-313-pytest-8.4.2.pyc
Normal file
Binary file not shown.
@@ -1,78 +1,43 @@
|
|||||||
import os
|
import os
|
||||||
import json
|
|
||||||
import types
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
import scraper
|
import scraper
|
||||||
|
|
||||||
|
|
||||||
def env_setup_test(monkeypatch):
|
def test_env_variables_present():
|
||||||
"""Set required env vars for the API client (helper used by tests)."""
|
"""
|
||||||
monkeypatch.setenv("GOOGLE_DEVELOPER_KEY", "test-key")
|
Test jestli env proměnné existují a mají obsah
|
||||||
monkeypatch.setenv("GOOGLE_CSE_ID", "test-cse")
|
"""
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
key = (os.getenv("GOOGLE_DEVELOPER_KEY") or "").strip()
|
||||||
|
cx = (os.getenv("GOOGLE_CSE_ID") or "").strip()
|
||||||
|
assert key, "GOOGLE_DEVELOPER_KEY is missing or empty"
|
||||||
|
assert cx, "GOOGLE_CSE_ID is missing or empty"
|
||||||
|
|
||||||
|
|
||||||
def mock_api_response_test(monkeypatch):
|
def test_integration_search_youtube(monkeypatch):
|
||||||
"""Mock out requests.get to return a fixed API payload (helper)."""
|
"""
|
||||||
class FakeResp:
|
Test výsledku vyhledávání "youtube".
|
||||||
status_code = 200
|
"""
|
||||||
def __init__(self, data):
|
load_dotenv()
|
||||||
self._data = data
|
|
||||||
self.text = json.dumps(data)
|
|
||||||
def json(self):
|
|
||||||
return self._data
|
|
||||||
|
|
||||||
data = {
|
key = (os.getenv("GOOGLE_DEVELOPER_KEY") or "").strip()
|
||||||
"items": [
|
cx = (os.getenv("GOOGLE_CSE_ID") or "").strip()
|
||||||
{
|
|
||||||
"title": "Example Domain",
|
if not key or not cx:
|
||||||
"link": "https://example.com/",
|
pytest.skip("Integration test skipped: GOOGLE_DEVELOPER_KEY/GOOGLE_CSE_ID not set")
|
||||||
"snippet": "This domain is for use in illustrative examples.",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "OpenAI",
|
|
||||||
"link": "https://openai.com/research",
|
|
||||||
"snippet": "Research from OpenAI.",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
def fake_get(url, params=None, timeout=15):
|
# Speed up: don't wait during the test
|
||||||
return FakeResp(data)
|
|
||||||
|
|
||||||
monkeypatch.setattr(scraper, "requests", types.SimpleNamespace(get=fake_get))
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_google_first_page_maps_output(monkeypatch):
|
|
||||||
env_setup_test(monkeypatch)
|
|
||||||
mock_api_response_test(monkeypatch)
|
|
||||||
# Avoid waiting for the throttle in tests
|
|
||||||
monkeypatch.setattr(scraper, "RATE_SECONDS", 0)
|
monkeypatch.setattr(scraper, "RATE_SECONDS", 0)
|
||||||
monkeypatch.setattr(scraper, "last_api_call", 0)
|
monkeypatch.setattr(scraper, "last_api_call", 0)
|
||||||
|
|
||||||
results = scraper.get_google_first_page("example query")
|
results = []
|
||||||
|
try:
|
||||||
|
results = scraper.get_google_first_page("youtube")
|
||||||
|
except RuntimeError as e:
|
||||||
|
return pytest.skip(f"Integration test skipped due to API error: {e}")
|
||||||
|
|
||||||
assert isinstance(results, list)
|
assert isinstance(results, list) and len(results) > 0
|
||||||
assert len(results) == 2
|
assert any("youtube.com" in (item.get("link") or "") for item in results)
|
||||||
|
|
||||||
first = results[0]
|
|
||||||
assert first["position"] == 1
|
|
||||||
assert first["title"] == "Example Domain"
|
|
||||||
assert first["link"] == "https://example.com/"
|
|
||||||
assert first["snippet"].startswith("This domain is for use")
|
|
||||||
assert first["icon"] == "https://example.com/favicon.ico"
|
|
||||||
|
|
||||||
second = results[1]
|
|
||||||
assert second["position"] == 2
|
|
||||||
assert second["title"] == "OpenAI"
|
|
||||||
assert second["link"] == "https://openai.com/research"
|
|
||||||
assert second["icon"] == "https://openai.com/favicon.ico"
|
|
||||||
|
|
||||||
|
|
||||||
def test_missing_env_raises(monkeypatch):
|
|
||||||
# Unset env to simulate missing configuration
|
|
||||||
monkeypatch.delenv("GOOGLE_DEVELOPER_KEY", raising=False)
|
|
||||||
monkeypatch.delenv("GOOGLE_CSE_ID", raising=False)
|
|
||||||
|
|
||||||
with pytest.raises(RuntimeError):
|
|
||||||
scraper.get_google_first_page("anything")
|
|
||||||
|
|||||||
Reference in New Issue
Block a user