diff --git a/src/config.py b/src/config.py index 0be9509..57c09aa 100644 --- a/src/config.py +++ b/src/config.py @@ -35,20 +35,22 @@ PAUSE_DURATION_SECONDS = 120.0 # --- Categories --------------------------------------------------------------- -# type_id: slug fragment used in URL (e.g. "kvartir" for /prodazha-kvartir/) +# Discovered from DOM.RIA homepage navigation links. CATEGORY_MAP: Dict[int, str] = { - 1: "kvartir", # apartments - 2: "domov", # houses - 3: "uchastkov", # land - 4: "kommercheskih", # commercial - 5: "garazhey", # garages + 1: "kvartir", # apartments + 2: "domov", # houses + 3: "uchastkov", # land + 4: "kom-nedvizhimosti", # commercial (offices + premises) + 5: "garazhei", # garages / parking + 6: "komnat", # rooms } # operation slug → URL prefix +# Discovered from DOM.RIA homepage: posutochnaia (not posutochnaya) OPERATION_SLUGS: Dict[str, str] = { "sale": "prodazha", "rent": "arenda", - "rent_daily": "posutochnaya-arenda", + "rent_daily": "posutochnaia-arenda", } # Fallback city names (enriched dynamically from sitemap + homepage) diff --git a/src/normalizer.py b/src/normalizer.py index aa6d1dd..d62c282 100644 --- a/src/normalizer.py +++ b/src/normalizer.py @@ -183,6 +183,7 @@ 3: "Земельна ділянка", 4: "Комерційна нерухомість", 5: "Гараж", + 6: "Кімната", }.get(category_id, "Нерухомість") parts.append(cat_name) if city: