import os import json import types import pytest import scraper def env_setup_test(monkeypatch): """Set required env vars for the API client (helper used by tests).""" monkeypatch.setenv("GOOGLE_DEVELOPER_KEY", "test-key") monkeypatch.setenv("GOOGLE_CSE_ID", "test-cse") def mock_api_response_test(monkeypatch): """Mock out requests.get to return a fixed API payload (helper).""" class FakeResp: status_code = 200 def __init__(self, data): self._data = data self.text = json.dumps(data) def json(self): return self._data data = { "items": [ { "title": "Example Domain", "link": "https://example.com/", "snippet": "This domain is for use in illustrative examples.", }, { "title": "OpenAI", "link": "https://openai.com/research", "snippet": "Research from OpenAI.", }, ] } def fake_get(url, params=None, timeout=15): return FakeResp(data) monkeypatch.setattr(scraper, "requests", types.SimpleNamespace(get=fake_get)) def test_get_google_first_page_maps_output(monkeypatch): env_setup_test(monkeypatch) mock_api_response_test(monkeypatch) # Avoid waiting for the throttle in tests monkeypatch.setattr(scraper, "RATE_SECONDS", 0) monkeypatch.setattr(scraper, "last_api_call", 0) results = scraper.get_google_first_page("example query") assert isinstance(results, list) assert len(results) == 2 first = results[0] assert first["position"] == 1 assert first["title"] == "Example Domain" assert first["link"] == "https://example.com/" assert first["snippet"].startswith("This domain is for use") assert first["icon"] == "https://example.com/favicon.ico" second = results[1] assert second["position"] == 2 assert second["title"] == "OpenAI" assert second["link"] == "https://openai.com/research" assert second["icon"] == "https://openai.com/favicon.ico" def test_missing_env_raises(monkeypatch): # Unset env to simulate missing configuration monkeypatch.delenv("GOOGLE_DEVELOPER_KEY", raising=False) monkeypatch.delenv("GOOGLE_CSE_ID", raising=False) with pytest.raises(RuntimeError): scraper.get_google_first_page("anything")