gnexus-book/server/app/docs_repository.py at 68fb890843486dd70ffb8170cdb82e3b9556b78c

Fork: 0
root / gnexus-book
Find file
Newer
Older
gnexus-book / server / app / docs_repository.py
Eugene Sukhodolskiy 2 days ago 2 KB Add documentation foundation and backend maintenance API
Raw Blame History
from __future__ import annotations

from dataclasses import asdict
from pathlib import Path

from .config import Settings
from .markdown import parse_frontmatter


EXCLUDED_PARTS = {
    ".git",
    ".pytest_cache",
    ".ruff_cache",
    ".venv",
    "__pycache__",
    "gnexus_book_server.egg-info",
    "node_modules",
}


class RepositoryError(ValueError):
    pass


class DocsRepository:
    def __init__(self, settings: Settings) -> None:
        self.settings = settings
        self.repo_root = settings.repo_root.resolve()

    def _resolve_repo_path(self, path: str) -> Path:
        candidate = (self.repo_root / path).resolve()
        if self.repo_root not in candidate.parents and candidate != self.repo_root:
            raise RepositoryError("Path escapes repository root")
        return candidate

    def list_docs(self) -> list[dict[str, object]]:
        docs: list[dict[str, object]] = []
        for path in sorted(self.repo_root.rglob("*.md")):
            if any(part in EXCLUDED_PARTS for part in path.parts):
                continue
            rel = path.relative_to(self.repo_root).as_posix()
            raw = path.read_text(encoding="utf-8")
            parsed = parse_frontmatter(rel, raw)
            title = self._extract_title(parsed.body) or path.stem
            docs.append(
                {
                    "path": rel,
                    "title": title,
                    "frontmatter": parsed.frontmatter,
                }
            )
        return docs

    def read_doc(self, path: str) -> dict[str, object]:
        file_path = self._resolve_repo_path(path)
        if file_path.suffix != ".md" or not file_path.is_file():
            raise RepositoryError("Document not found")
        rel = file_path.relative_to(self.repo_root).as_posix()
        parsed = parse_frontmatter(rel, file_path.read_text(encoding="utf-8"))
        return asdict(parsed)

    def search(self, query: str) -> list[dict[str, object]]:
        normalized = query.strip().lower()
        if not normalized:
            return []

        results: list[dict[str, object]] = []
        for doc in self.list_docs():
            path = str(doc["path"])
            raw = self._resolve_repo_path(path).read_text(encoding="utf-8")
            lines = raw.splitlines()
            matches = []
            for index, line in enumerate(lines, start=1):
                if normalized in line.lower():
                    matches.append({"line": index, "text": line.strip()})
                if len(matches) >= 5:
                    break
            if matches:
                results.append({"path": path, "title": doc["title"], "matches": matches})
        return results

    @staticmethod
    def _extract_title(markdown_body: str) -> str | None:
        for line in markdown_body.splitlines():
            if line.startswith("# "):
                return line[2:].strip()
        return None