Newer
Older
vmk-360-data_collector / src / vmk_data_collector / services / image_downloader.py
import hashlib
from dataclasses import dataclass
from pathlib import Path

import httpx
import structlog
from PIL import Image

logger = structlog.get_logger()


@dataclass
class PropertyImageDownloadResult:
    local_path: str
    image_hash: str
    width: int
    height: int
    file_size: int


class ImageDownloader:
    def __init__(self, storage_path: Path) -> None:
        self._storage_path = storage_path

    async def download(
        self,
        property_id: int,
        image_url: str,
        order_index: int,
    ) -> PropertyImageDownloadResult:
        logger.info(
            "image_download_start",
            property_id=property_id,
            url=image_url,
            order=order_index,
        )

        async with httpx.AsyncClient(timeout=30) as client:
            response = await client.get(image_url)
            response.raise_for_status()
            content = response.content

        image_hash = hashlib.sha256(content).hexdigest()
        ext = self._detect_extension(
            response.headers.get("content-type", ""), image_url
        )

        property_dir = self._storage_path / str(property_id)
        property_dir.mkdir(parents=True, exist_ok=True)

        local_path = property_dir / f"{image_hash}.{ext}"
        local_path.write_bytes(content)

        with Image.open(local_path) as img:
            width, height = img.size

        file_size = len(content)

        logger.info(
            "image_download_complete",
            property_id=property_id,
            hash=image_hash,
            width=width,
            height=height,
            size=file_size,
        )

        return PropertyImageDownloadResult(
            local_path=str(local_path),
            image_hash=image_hash,
            width=width,
            height=height,
            file_size=file_size,
        )

    @staticmethod
    def _detect_extension(content_type: str, url: str) -> str:
        ct = content_type.lower()
        if "jpeg" in ct or "jpg" in ct:
            return "jpg"
        if "png" in ct:
            return "png"
        if "webp" in ct:
            return "webp"
        if "gif" in ct:
            return "gif"

        url_lower = url.lower()
        for ext in (".jpg", ".jpeg", ".png", ".webp", ".gif"):
            if url_lower.endswith(ext):
                return ext.lstrip(".")
        return "jpg"