"""curl_cffi session manager with cookie warmup and TLS impersonation."""
import time
from curl_cffi import requests
from src.config import (
BASE_URL,
DEFAULT_HEADERS,
HOMEPAGE_URL,
IMPERSONATE,
)
class DomRiaSession:
"""Lightweight wrapper around curl_cffi.requests.Session.
Establishes cookies by hitting the homepage first, then reuses the
session (cookie jar + connection pool) for all subsequent requests.
"""
def __init__(self, extra_headers: dict = None, timeout: int = 30):
self.timeout = timeout
self.session = requests.Session()
self.headers = {**DEFAULT_HEADERS, **(extra_headers or {})}
def warmup(self) -> None:
"""Hit the homepage so DOM.RIA sets session cookies.
Without this step catalog requests often return 404/403.
"""
print("[session] Warming up cookies via homepage …")
resp = self.session.get(
HOMEPAGE_URL,
headers=self.headers,
impersonate=IMPERSONATE,
timeout=self.timeout,
)
# We only care about side-effects (Set-Cookie), not the body
print(f"[session] Homepage status={resp.status_code} cookies={len(self.session.cookies)} items")
time.sleep(2.0)
def get_catalog(self, url: str) -> requests.Response:
"""GET a catalog listing page with correct Referer."""
headers = {
**self.headers,
"Referer": HOMEPAGE_URL,
}
return self.session.get(
url,
headers=headers,
impersonate=IMPERSONATE,
timeout=self.timeout,
)
def get_detail(self, beautiful_url: str) -> requests.Response:
"""GET a detail page (beautiful_url is relative, e.g. 'realtor/…')."""
url = f"{BASE_URL}/{beautiful_url.lstrip('/')}"
headers = {
**self.headers,
"Referer": HOMEPAGE_URL,
}
return self.session.get(
url,
headers=headers,
impersonate=IMPERSONATE,
timeout=self.timeout,
)