"""Реализация инструментов MCP-сервера для поиска по базе недвижимости."""
import json
from datetime import datetime
from vmk_data_mcp.db import USER_COLUMNS, fetch, fetchrow
from vmk_data_mcp.embedder import get_embedding
from vmk_data_mcp.models import (
GetListingInput,
ListingResult,
MetadataFilters,
SearchMetadataInput,
SearchResult,
SearchSimilarInput,
)
# Список колонок для SELECT (пользовательские)
_SELECT_COLUMNS = ", ".join(
f"{col}"
for col in sorted(USER_COLUMNS)
if col not in {"search_vector", "embedding"}
)
def _build_where_clause(filters: MetadataFilters) -> tuple[str, list]:
"""Строит WHERE-условия из фильтров. Возвращает (sql_fragment, params).
Все параметры передаются через placeholders ($1, $2...) — SQL-инъекции невозможны.
"""
conditions: list[str] = []
params: list = []
if filters.deal_type is not None:
conditions.append(f"deal_type = ${len(params) + 1}")
params.append(filters.deal_type)
if filters.city is not None:
conditions.append(f"city ILIKE ${len(params) + 1}")
params.append(f"%{filters.city}%")
if filters.district is not None:
conditions.append(f"district ILIKE ${len(params) + 1}")
params.append(f"%{filters.district}%")
if filters.rooms_count is not None:
conditions.append(f"rooms_count = ${len(params) + 1}")
params.append(filters.rooms_count)
if filters.min_price is not None:
conditions.append(f"price >= ${len(params) + 1}")
params.append(filters.min_price)
if filters.max_price is not None:
conditions.append(f"price <= ${len(params) + 1}")
params.append(filters.max_price)
if filters.currency is not None:
conditions.append(f"currency = ${len(params) + 1}")
params.append(filters.currency)
if filters.min_total_area is not None:
conditions.append(f"total_area >= ${len(params) + 1}")
params.append(filters.min_total_area)
if filters.max_total_area is not None:
conditions.append(f"total_area <= ${len(params) + 1}")
params.append(filters.max_total_area)
if filters.building_type is not None:
conditions.append(f"building_type = ${len(params) + 1}")
params.append(filters.building_type)
if filters.floor is not None:
conditions.append(f"floor = ${len(params) + 1}")
params.append(filters.floor)
if filters.listing_status is not None:
conditions.append(f"listing_status = ${len(params) + 1}")
params.append(filters.listing_status)
if filters.metro_station is not None:
conditions.append(f"metro_station ILIKE ${len(params) + 1}")
params.append(f"%{filters.metro_station}%")
where_sql = " AND ".join(conditions) if conditions else "TRUE"
return where_sql, params
def _record_to_listing(
row: dict, similarity: float | None = None, rank: float | None = None
) -> ListingResult:
"""Конвертирует asyncpg.Record в ListingResult."""
data = dict(row)
data.pop("search_vector", None)
data.pop("embedding", None)
# Приводим datetime -> date где нужно
for key in ("publish_date", "archived_at", "created_at", "updated_at"):
if data.get(key) and isinstance(data[key], datetime):
data[key] = data[key].date()
data["similarity_score"] = similarity
data["rank_score"] = rank
return ListingResult(**data)
async def search_similar_listings(args: SearchSimilarInput) -> str:
"""🔍 Векторный поиск объявлений по смыслу запроса.
Использует pgvector + HNSW-индекс для косинусной близости.
Запрос должен быть на украинском языке.
"""
embedding = await get_embedding(args.query)
embedding_str = "[" + ",".join(str(v) for v in embedding) + "]"
where_sql, params = _build_where_clause(args.filters)
# pgvector <=> возвращает cosine distance с диапазоном [0, 2].
# similarity = 1 - distance/2. Для min_similarity максимальное distance:
# distance_max = 2 * (1 - min_similarity)
max_distance = 2.0 * (1.0 - args.min_similarity)
count_sql = f"""
SELECT COUNT(*) FROM property_listings
WHERE {where_sql}
AND embedding <=> ${len(params) + 1}::vector <= ${len(params) + 2}
"""
count_params = [*params, embedding_str, max_distance]
total_row = await fetchrow(count_sql, *count_params)
total = total_row[0] if total_row else 0
select_sql = f"""
SELECT {_SELECT_COLUMNS},
1 - (embedding <=> ${len(params) + 1}::vector) / 2.0 AS similarity_score
FROM property_listings
WHERE {where_sql}
AND embedding <=> ${len(params) + 1}::vector <= ${len(params) + 2}
ORDER BY embedding <=> ${len(params) + 1}::vector ASC
LIMIT ${len(params) + 3} OFFSET ${len(params) + 4}
"""
select_params = [
*params,
embedding_str,
max_distance,
args.pagination.limit,
args.pagination.offset,
]
rows = await fetch(select_sql, *select_params)
listings = [_record_to_listing(r, similarity=r["similarity_score"]) for r in rows]
result = SearchResult(
total=total,
limit=args.pagination.limit,
offset=args.pagination.offset,
listings=listings,
)
return result.model_dump_json(indent=2, ensure_ascii=False)
async def search_by_metadata(args: SearchMetadataInput) -> str:
"""📋 Полнотекстовый поиск + фильтры по метаданным.
Использует готовую FTS-колонку `search_vector` (ukrainian) + GIN-индекс.
Запрос должен быть на украинском языке.
"""
where_sql, params = _build_where_clause(args.filters)
# Добавляем FTS-условие
tsquery = f"plainto_tsquery('ukrainian', ${len(params) + 1})"
fts_condition = f"search_vector @@ {tsquery}"
params.append(args.query)
full_where = f"{where_sql} AND {fts_condition}" if where_sql != "TRUE" else fts_condition
# COUNT
count_sql = f"""
SELECT COUNT(*) FROM property_listings
WHERE {full_where}
"""
total_row = await fetchrow(count_sql, *params)
total = total_row[0] if total_row else 0
# SELECT с ранжированием
select_sql = f"""
SELECT {_SELECT_COLUMNS},
ts_rank_cd(search_vector, {tsquery}, 32) AS rank_score
FROM property_listings
WHERE {full_where}
ORDER BY ts_rank_cd(search_vector, {tsquery}, 32) DESC
LIMIT ${len(params) + 1} OFFSET ${len(params) + 2}
"""
select_params = [*params, args.pagination.limit, args.pagination.offset]
rows = await fetch(select_sql, *select_params)
listings = [_record_to_listing(r, rank=r["rank_score"]) for r in rows]
result = SearchResult(
total=total,
limit=args.pagination.limit,
offset=args.pagination.offset,
listings=listings,
)
return result.model_dump_json(indent=2, ensure_ascii=False)
async def get_listing_by_id(args: GetListingInput) -> str:
"""📄 Получить объявление по ID.
Возвращает полную карточку объявления со всеми пользовательскими полями.
"""
sql = f"""
SELECT {_SELECT_COLUMNS}
FROM property_listings
WHERE id = $1
LIMIT 1
"""
row = await fetchrow(sql, args.listing_id)
if row is None:
return json.dumps({"error": "Listing not found"}, ensure_ascii=False)
listing = _record_to_listing(row)
return listing.model_dump_json(indent=2, ensure_ascii=False)
async def describe_schema() -> str:
"""ℹ️ Описание схемы базы данных.
Возвращает статическое описание таблицы `property_listings` —
поля, типы, enum-значения и примеры запросов.
"""
schema_info = {
"table": "property_listings",
"description": "Объявления о недвижимости (квартиры, дома, аренда, продажа)",
"language": "ukrainian",
"note": "Все текстовые запросы должны быть на украинском языке.",
"columns": {
"id": {"type": "integer", "description": "Уникальный ID объявления"},
"title": {"type": "text", "description": "Заголовок объявления"},
"description": {"type": "text", "description": "Описание от продавца"},
"generated_description": {"type": "text", "description": "AI-сгенерированное описание"},
"price": {"type": "numeric", "description": "Цена"},
"currency": {"type": "enum(USD,EUR,UAH)", "description": "Валюта"},
"deal_type": {
"type": "enum",
"values": ["sale", "rent_long", "rent_short"],
"description": "Тип сделки: продажа | долгосрочная аренда | посуточная аренда",
},
"city": {"type": "text", "description": "Город (украинский)"},
"district": {"type": "text", "description": "Район (украинский)"},
"rooms_count": {"type": "integer", "description": "Количество комнат"},
"total_area": {"type": "numeric", "description": "Общая площадь, м²"},
"living_area": {"type": "numeric", "description": "Жилая площадь, м²"},
"kitchen_area": {"type": "numeric", "description": "Площадь кухни, м²"},
"floor": {"type": "integer", "description": "Этаж"},
"floors_count": {"type": "integer", "description": "Всего этажей в доме"},
"building_type": {
"type": "enum",
"values": ["brick", "panel", "monolith", "gas_block", "wood"],
"description": "Тип постройки",
},
"building_year": {"type": "integer", "description": "Год постройки"},
"renovation_status": {
"type": "enum",
"values": [
"no_renovation",
"cosmetic",
"european",
"designer",
"full",
],
"description": "Состояние ремонта",
},
"balcony_count": {"type": "integer", "description": "Количество балконов"},
"bathroom_type": {
"type": "enum",
"values": ["combined", "separate", "two_or_more"],
"description": "Тип санузла",
},
"parking_type": {
"type": "enum",
"values": ["no_parking", "ground", "underground", "garage"],
"description": "Тип парковки",
},
"heating_type": {
"type": "enum",
"values": ["central", "autonomous", "individual", "none"],
"description": "Тип отопления",
},
"layout_type": {
"type": "enum",
"values": [
"studio",
"one_room",
"two_room",
"three_room",
"four_room_plus",
"duplex",
],
"description": "Тип планировки",
},
"window_view": {
"type": "enum",
"values": ["courtyard", "street", "park", "water", "mixed"],
"description": "Вид из окон",
},
"metro_station": {"type": "text", "description": "Станция метро (украинский)"},
"metro_distance_type": {
"type": "enum",
"values": ["walking", "transport", "far"],
"description": "Удалённость от метро",
},
"metro_distance_meters": {
"type": "integer",
"description": "Расстояние до метро в метрах",
},
"url_source": {"type": "text", "description": "Исходный URL объявления"},
"publish_date": {"type": "date", "description": "Дата публикации"},
"images_count": {"type": "integer", "description": "Количество фото"},
"contact_phone": {"type": "text", "description": "Телефон контакта"},
"listing_status": {
"type": "enum",
"values": ["active", "sold", "rented", "removed", "archived"],
"description": "Статус объявления",
},
"archived_at": {"type": "date", "description": "Дата архивации"},
"created_at": {"type": "timestamp", "description": "Дата создания записи"},
"updated_at": {"type": "timestamp", "description": "Дата обновления записи"},
},
"search_features": {
"vector_search": {
"column": "embedding vector(768)",
"index": "HNSW (vector_cosine_ops)",
"model": "nomic-embed-text (Ollama)",
"description": "Семантический поиск по смыслу. Запросы на украинском.",
},
"full_text_search": {
"column": "search_vector tsvector (generated)",
"index": "GIN",
"config": "ukrainian",
"description": (
"Полнотекстовый поиск по title, description, city, district, metro_station"
),
},
},
}
return json.dumps(schema_info, indent=2, ensure_ascii=False)