from __future__ import annotations
from dataclasses import asdict
from pathlib import Path
from .config import Settings
from .markdown import parse_frontmatter
EXCLUDED_PARTS = {
".git",
".pytest_cache",
".ruff_cache",
".venv",
"__pycache__",
"gnexus_book_server.egg-info",
"node_modules",
}
class RepositoryError(ValueError):
pass
class DocsRepository:
def __init__(self, settings: Settings) -> None:
self.settings = settings
self.repo_root = settings.repo_root.resolve()
def _resolve_repo_path(self, path: str) -> Path:
candidate = (self.repo_root / path).resolve()
if self.repo_root not in candidate.parents and candidate != self.repo_root:
raise RepositoryError("Path escapes repository root")
return candidate
def list_docs(self) -> list[dict[str, object]]:
docs: list[dict[str, object]] = []
for path in sorted(self.repo_root.rglob("*.md")):
if any(part in EXCLUDED_PARTS for part in path.parts):
continue
rel = path.relative_to(self.repo_root).as_posix()
raw = path.read_text(encoding="utf-8")
parsed = parse_frontmatter(rel, raw)
title = self._extract_title(parsed.body) or path.stem
docs.append(
{
"path": rel,
"title": title,
"frontmatter": parsed.frontmatter,
}
)
return docs
def read_doc(self, path: str) -> dict[str, object]:
file_path = self._resolve_repo_path(path)
if file_path.suffix != ".md" or not file_path.is_file():
raise RepositoryError("Document not found")
rel = file_path.relative_to(self.repo_root).as_posix()
parsed = parse_frontmatter(rel, file_path.read_text(encoding="utf-8"))
return asdict(parsed)
def search(self, query: str) -> list[dict[str, object]]:
normalized = query.strip().lower()
if not normalized:
return []
results: list[dict[str, object]] = []
for doc in self.list_docs():
path = str(doc["path"])
raw = self._resolve_repo_path(path).read_text(encoding="utf-8")
lines = raw.splitlines()
matches = []
for index, line in enumerate(lines, start=1):
if normalized in line.lower():
matches.append({"line": index, "text": line.strip()})
if len(matches) >= 5:
break
if matches:
results.append({"path": path, "title": doc["title"], "matches": matches})
return results
@staticmethod
def _extract_title(markdown_body: str) -> str | None:
for line in markdown_body.splitlines():
if line.startswith("# "):
return line[2:].strip()
return None