diff --git a/90-maintenance/documentation-rules.md b/90-maintenance/documentation-rules.md index dcbaf4d..f375943 100644 --- a/90-maintenance/documentation-rules.md +++ b/90-maintenance/documentation-rules.md @@ -30,6 +30,9 @@ Store references to secret locations instead, such as password manager item names or future vault paths. +The validation API scans Markdown, YAML, JSON, and dotenv-style files for common raw secret assignments. +Use reference values such as `unknown`, `redacted`, `password manager`, `vault`, or `ref:...` when a field must describe where a secret lives. + ## Review Update `last_reviewed` only when the information has actually been checked. @@ -45,3 +48,13 @@ Every important inventory record should link to a documentation page. Every public traffic route should list its exposure and target services when known. + +Inventory item `id` values must be unique within each inventory file. + +Run `GET /validate` before applying or committing documentation changes. A clean report means: + +- inventory files pass JSON Schema validation; +- required Markdown frontmatter is present; +- inventory `docs` links point to existing files; +- inventory IDs are not duplicated; +- no obvious raw secrets were detected. diff --git a/server/README.md b/server/README.md index 95895e2..7833723 100644 --- a/server/README.md +++ b/server/README.md @@ -43,7 +43,19 @@ - `POST /changes` can create pending change records under `90-maintenance/pending-changes/`. - `POST /changes/{id}/apply` can apply `kind=doc` and `kind=inventory-item` changes after validation. - No authentication yet. -- No commit or review workflow yet. - `POST /commit` creates a local Git commit only. It does not push. - Commit requests must provide an explicit file list. - Validation uses JSON Schema 2020-12 for inventory files. + +## Validation + +`GET /validate` checks: + +- schema validity for `schemas/*.json`; +- required Markdown frontmatter; +- inventory YAML against JSON Schema; +- inventory `docs` links; +- duplicate inventory item IDs; +- common raw secret assignment patterns in Markdown, YAML, JSON, and dotenv-style files. + +`POST /changes/{id}/apply` and `POST /commit` both rely on validation to block unsafe repository states. diff --git a/server/app/validation.py b/server/app/validation.py index e6e7f01..d939978 100644 --- a/server/app/validation.py +++ b/server/app/validation.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import re from datetime import date from dataclasses import dataclass from pathlib import Path @@ -11,7 +12,7 @@ from jsonschema.exceptions import SchemaError from .config import Settings -from .docs_repository import DocsRepository +from .docs_repository import DocsRepository, EXCLUDED_PARTS from .freshness import REQUIRED_FRONTMATTER @@ -27,6 +28,31 @@ "virtual-machines": "virtual-machine.schema.json", } +SECRET_ASSIGNMENT_RE = re.compile( + r"""(?ix) + \b( + password|passwd|token| + api[_-]?(key|token)| + access[_-]?token| + auth[_-]?token| + secret| + private[_ -]?key| + session[_ -]?cookie + )\b + \s*[:=]\s* + (?P.+) + """ +) +PRIVATE_KEY_BLOCK_RE = re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----") +SAFE_SECRET_REFERENCE_RE = re.compile( + r"""(?ix) + ^\s* + (unknown|none|null|redacted|not\s+documented|not\s+stored|password\s+manager|vault|secret\s+reference|reference|ref:) + \b + """ +) +SECRET_SCAN_SUFFIXES = {".md", ".yml", ".yaml", ".json", ".env"} + @dataclass(frozen=True) class ValidationIssue: @@ -42,6 +68,8 @@ issues.extend(_validate_markdown_frontmatter(settings)) issues.extend(_validate_inventory(settings)) issues.extend(_validate_inventory_doc_links(settings.repo_root)) + issues.extend(_validate_unique_inventory_ids(settings)) + issues.extend(_validate_secret_patterns(settings.repo_root)) serialized = [issue.__dict__ for issue in issues] return { @@ -227,3 +255,78 @@ ) ) return issues + + +def _validate_unique_inventory_ids(settings: Settings) -> list[ValidationIssue]: + issues: list[ValidationIssue] = [] + for inventory_file in sorted(settings.inventory_dir.glob("*.yml")): + rel_path = inventory_file.relative_to(settings.repo_root).as_posix() + try: + data = yaml.safe_load(inventory_file.read_text(encoding="utf-8")) + except yaml.YAMLError: + continue + if data is None: + continue + if not isinstance(data, list): + continue + + seen: dict[str, int] = {} + for index, item in enumerate(data): + if not isinstance(item, dict): + continue + item_id = item.get("id") + if not isinstance(item_id, str) or not item_id: + continue + if item_id in seen: + issues.append( + ValidationIssue( + path=rel_path, + severity="error", + code="duplicate-inventory-id", + message=f"Duplicate id {item_id!r} at item indexes {seen[item_id]} and {index}", + ) + ) + else: + seen[item_id] = index + return issues + + +def _validate_secret_patterns(repo_root: Path) -> list[ValidationIssue]: + issues: list[ValidationIssue] = [] + for path in sorted(repo_root.rglob("*")): + if not path.is_file() or path.suffix not in SECRET_SCAN_SUFFIXES: + continue + if any(part in EXCLUDED_PARTS for part in path.parts): + continue + rel_path = path.relative_to(repo_root).as_posix() + try: + text = path.read_text(encoding="utf-8") + except UnicodeDecodeError: + continue + + if PRIVATE_KEY_BLOCK_RE.search(text): + issues.append( + ValidationIssue( + path=rel_path, + severity="error", + code="possible-secret", + message="File contains a private key block marker", + ) + ) + + for line_number, line in enumerate(text.splitlines(), start=1): + match = SECRET_ASSIGNMENT_RE.search(line) + if not match: + continue + value = match.group("value").strip().strip("\"'") + if not value or SAFE_SECRET_REFERENCE_RE.search(value): + continue + issues.append( + ValidationIssue( + path=rel_path, + severity="error", + code="possible-secret", + message=f"Possible raw secret assignment at line {line_number}", + ) + ) + return issues diff --git a/server/tests/test_docs_repository.py b/server/tests/test_docs_repository.py index 029c8f2..a79c57e 100644 --- a/server/tests/test_docs_repository.py +++ b/server/tests/test_docs_repository.py @@ -6,6 +6,31 @@ from app.validation import validate_repository +def _copy_schema_files(tmp_path: Path) -> None: + schema_dir = tmp_path / "schemas" + schema_dir.mkdir() + source_schema = Settings().repo_root / "schemas" + for schema in source_schema.glob("*.json"): + (schema_dir / schema.name).write_text(schema.read_text(encoding="utf-8"), encoding="utf-8") + + +def _create_empty_inventory(tmp_path: Path) -> None: + inventory_dir = tmp_path / "40-inventory" + inventory_dir.mkdir() + for name in [ + "backups", + "databases", + "domains", + "hardware", + "hosts", + "networks", + "services", + "traffic-routes", + "virtual-machines", + ]: + (inventory_dir / f"{name}.yml").write_text("---\n[]\n", encoding="utf-8") + + def test_lists_docs_from_repo_root() -> None: repo_root = Path(__file__).resolve().parents[2] docs = DocsRepository(Settings(repo_root)).list_docs() @@ -47,3 +72,74 @@ assert report["status"] == "ok" assert report["issues"] == [] + + +def test_validation_rejects_duplicate_inventory_ids(tmp_path: Path) -> None: + _copy_schema_files(tmp_path) + _create_empty_inventory(tmp_path) + (tmp_path / "40-inventory" / "services.yml").write_text( + "---\n" + "- id: duplicate-service\n" + " name: Duplicate Service\n" + " type: application\n" + " status: active\n" + " host: unknown\n" + " domains: []\n" + " ports: []\n" + " criticality: low\n" + " docs: ../10-systems/example.md\n" + " last_reviewed: 2026-05-09\n" + "- id: duplicate-service\n" + " name: Duplicate Service Copy\n" + " type: application\n" + " status: active\n" + " host: unknown\n" + " domains: []\n" + " ports: []\n" + " criticality: low\n" + " docs: ../10-systems/example.md\n" + " last_reviewed: 2026-05-09\n", + encoding="utf-8", + ) + docs_dir = tmp_path / "10-systems" + docs_dir.mkdir() + (docs_dir / "example.md").write_text( + "---\n" + "owner: gmikcon\n" + "status: active\n" + "last_reviewed: 2026-05-09\n" + "review_interval: 90d\n" + "confidence: medium\n" + "source_of_truth: test\n" + "---\n\n" + "# Example\n", + encoding="utf-8", + ) + + report = validate_repository(Settings(tmp_path)) + + assert any(issue["code"] == "duplicate-inventory-id" for issue in report["issues"]) + + +def test_validation_rejects_raw_secret_assignment(tmp_path: Path) -> None: + _copy_schema_files(tmp_path) + _create_empty_inventory(tmp_path) + notes_dir = tmp_path / "90-maintenance" + notes_dir.mkdir(exist_ok=True) + (notes_dir / "unsafe.md").write_text( + "---\n" + "owner: gmikcon\n" + "status: draft\n" + "last_reviewed: 2026-05-09\n" + "review_interval: 90d\n" + "confidence: low\n" + "source_of_truth: test\n" + "---\n\n" + "# Unsafe\n\n" + "api_token: raw-token-value\n", + encoding="utf-8", + ) + + report = validate_repository(Settings(tmp_path)) + + assert any(issue["code"] == "possible-secret" for issue in report["issues"])