103 lines
3.1 KiB
Python
103 lines
3.1 KiB
Python
import os
|
|
import time
|
|
from urllib.parse import urlparse
|
|
|
|
import requests
|
|
|
|
|
|
# --- Configurace ---
|
|
API_ENDPOINT = "https://www.googleapis.com/customsearch/v1"
|
|
DEFAULT_LOCALE = "cs" # language for results
|
|
DEFAULT_NUM = 10 # how many results to fetch (max 10 per API call)
|
|
RATE_SECONDS = 5.0 # wait time between API requests
|
|
last_api_call = 0.0 # remember when we last called the API
|
|
|
|
|
|
def _throttle_api():
|
|
"""
|
|
Pause if the last API call was too recent.
|
|
|
|
This is a super basic rate limiter: we allow one request every 5 seconds.
|
|
It helps to not run through your daily quota too fast.
|
|
"""
|
|
global last_api_call
|
|
now = time.time()
|
|
wait = RATE_SECONDS - (now - last_api_call)
|
|
|
|
if wait > 0:
|
|
time.sleep(wait)
|
|
last_api_call = time.time()
|
|
|
|
|
|
def favicon_from_link(link):
|
|
try:
|
|
u = urlparse(link)
|
|
scheme = u.scheme or "https"
|
|
netloc = u.netloc
|
|
if not netloc and u.path:
|
|
# Handle URLs without a scheme like: example.com/path
|
|
netloc = u.path.split("/")[0]
|
|
if not netloc:
|
|
return ""
|
|
return f"{scheme}://{netloc}/favicon.ico"
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
def _cse_request(q, num=DEFAULT_NUM, hl=DEFAULT_LOCALE):
|
|
"""Call the Custom Search API and return the JSON data.
|
|
|
|
Requires two env variables in your .env file:
|
|
- GOOGLE_DEVELOPER_KEY (your API key)
|
|
- GOOGLE_CSE_ID (your search engine ID)
|
|
"""
|
|
api_key = os.environ.get("GOOGLE_DEVELOPER_KEY")
|
|
cse_id = os.environ.get("GOOGLE_CSE_ID")
|
|
if not api_key:
|
|
raise RuntimeError("GOOGLE_DEVELOPER_KEY není nastaven v .env")
|
|
if not cse_id:
|
|
raise RuntimeError("GOOGLE_CSE_ID (Programmable Search Engine ID) není nastaven v .env")
|
|
|
|
_throttle_api()
|
|
params = {
|
|
"key": api_key,
|
|
"cx": cse_id,
|
|
"q": q,
|
|
"num": min(max(num, 1), 10), # API allows up to 10 per call
|
|
"hl": hl,
|
|
"safe": "off",
|
|
}
|
|
resp = requests.get(API_ENDPOINT, params=params, timeout=15)
|
|
if resp.status_code != 200:
|
|
# Try to extract a nice error message from API response
|
|
try:
|
|
data = resp.json()
|
|
msg = data.get("error", {}).get("message") or resp.text
|
|
except Exception:
|
|
msg = resp.text
|
|
raise RuntimeError(f"Google CSE API chyba ({resp.status_code}): {msg}")
|
|
return resp.json()
|
|
|
|
|
|
def get_google_first_page(query):
|
|
"""Return a list of results for a query using the Custom Search API.
|
|
|
|
Each item has: position, title, link, snippet, icon
|
|
"""
|
|
data = _cse_request(query, num=DEFAULT_NUM, hl=DEFAULT_LOCALE)
|
|
items = data.get("items", []) or []
|
|
|
|
results = []
|
|
for i, item in enumerate(items, start=1):
|
|
link = item.get("link") or item.get("formattedUrl") or ""
|
|
title = item.get("title") or item.get("htmlTitle") or ""
|
|
snippet = item.get("snippet") or item.get("htmlSnippet") or ""
|
|
results.append({
|
|
"position": i,
|
|
"title": title,
|
|
"link": link,
|
|
"snippet": snippet,
|
|
"icon": favicon_from_link(link),
|
|
})
|
|
return results
|