import hashlib
from dataclasses import dataclass
from pathlib import Path
import httpx
import structlog
from PIL import Image
logger = structlog.get_logger()
@dataclass
class PropertyImageDownloadResult:
local_path: str
image_hash: str
width: int
height: int
file_size: int
class ImageDownloader:
def __init__(self, storage_path: Path) -> None:
self._storage_path = storage_path
async def download(
self,
property_id: int,
image_url: str,
order_index: int,
) -> PropertyImageDownloadResult:
logger.info(
"image_download_start",
property_id=property_id,
url=image_url,
order=order_index,
)
async with httpx.AsyncClient(timeout=30) as client:
response = await client.get(image_url)
response.raise_for_status()
content = response.content
image_hash = hashlib.sha256(content).hexdigest()
ext = self._detect_extension(
response.headers.get("content-type", ""), image_url
)
property_dir = self._storage_path / str(property_id)
property_dir.mkdir(parents=True, exist_ok=True)
local_path = property_dir / f"{image_hash}.{ext}"
local_path.write_bytes(content)
with Image.open(local_path) as img:
width, height = img.size
file_size = len(content)
logger.info(
"image_download_complete",
property_id=property_id,
hash=image_hash,
width=width,
height=height,
size=file_size,
)
return PropertyImageDownloadResult(
local_path=str(local_path),
image_hash=image_hash,
width=width,
height=height,
file_size=file_size,
)
@staticmethod
def _detect_extension(content_type: str, url: str) -> str:
ct = content_type.lower()
if "jpeg" in ct or "jpg" in ct:
return "jpg"
if "png" in ct:
return "png"
if "webp" in ct:
return "webp"
if "gif" in ct:
return "gif"
url_lower = url.lower()
for ext in (".jpg", ".jpeg", ".png", ".webp", ".gif"):
if url_lower.endswith(ext):
return ext.lstrip(".")
return "jpg"