diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8827c7b --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +.venv/ +__pycache__/ +*.py[cod] +*.so +*.egg-info/ +dist/ +build/ +.pytest_cache/ +.mypy_cache/ +.ruff_cache/ +*.db +*.log +data/uploads/*.jpg +data/uploads/*.png +data/runs/*/ +!data/uploads/.gitkeep +!data/runs/.gitkeep +!data/datasets/.gitkeep +!data/models/.gitkeep +.DS_Store +.env diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..71411cd --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,48 @@ +# Agent Notes + +> Concise operating instructions for autonomous work in this repo. Keep this file small so it fits into small-LLM context windows and remains useful for agents with limited token budgets. + +## Project phase + +Bootstrap. Intelligent machine-vision system for real-time defect detection on polyurethane shoe soles. Full context is in `docs/project_context.md`. + +Before adding substantial code, pick and document the tech stack in `README.md` and create the matching manifest (e.g. `pyproject.toml`, `package.json`, `Cargo.toml`, `go.mod`). + +## Autonomous work rules + +- Do not mutate git history (`git reset`, `rebase`, `push --force`) unless explicitly asked. +- Do not create pull requests, push commits, or change git remotes unless explicitly asked. +- Keep changes minimal and focused on the task at hand. +- Prefer editing existing files over creating new ones when it satisfies the requirement. +- Ask the user before deleting files, directories, or significant blocks of code. +- If a task is unclear or ambiguous, pause and ask for clarification instead of guessing. + +## Context management for small LLMs + +- Read `docs/project_context.md` at the start of each autonomous task. +- Before editing, read only the files directly related to the change. Avoid dumping large unrelated files into context. +- When touching multiple files, process them one logical step at a time and run verification after each step. +- Summarize long outputs before storing or returning them. Do not paste large logs verbatim unless specifically requested. +- If a command output is large, prefer to save it to a file and return a summary with the file path. + +## Adding code + +- Follow the style and conventions of the existing codebase once code exists. +- Add or update tests for new logic and run them before finishing. +- Do not commit secrets, credentials, large binaries, or generated build artifacts. +- Use dependency manifests and lockfiles; avoid global installs without user confirmation. +- If the project is Python, keep a virtual environment inside the workspace and do not install into the system Python. +- Prefer small, focused functions and modules. Avoid huge files that do not fit into small context windows. + +## Documentation + +- Update `README.md` when the stack, setup steps, or major architecture changes. +- Keep project documentation in `docs/`. Add design docs, API specs, runbooks, and ADRs there, not in `README.md`. +- Update `AGENTS.md` when repo-specific commands, conventions, testing quirks, or environment setup change. +- Write docstrings / comments only for non-obvious behavior; do not add noise. + +## Verification habit + +- Before declaring a task done, run the relevant check: tests, lint, typecheck, or build. +- If a check does not exist yet, state that it should be added rather than skipping verification. +- Record verification commands and results in the response so the user can reproduce them quickly. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..f1d0ea4 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,29 @@ +# Contributing + +## Python environment + +Use `uv` to manage the virtual environment and dependencies: + +```bash +uv venv .venv +source .venv/bin/activate +uv pip install -e ".[dev]" +``` + +## Code style + +- `ruff check src tests` +- `ruff format src tests` +- `mypy src` + +## Tests + +```bash +pytest +``` + +## Adding a camera channel + +1. Update `config/local.json` with the new channel. +2. Place the channel-specific YOLO weights under `data/models/`. +3. Add a unit test under `tests/unit/`. diff --git a/README.md b/README.md index a48e66a..052e7e2 100644 --- a/README.md +++ b/README.md @@ -1 +1,53 @@ -# sups_yolo \ No newline at end of file +# sups_yolo + +Intelligent information-measuring system for real-time control of geometric and physico-mechanical parameters of polyurethane shoe soles. + +## Goal + +Detect and classify defects on polyurethane soles in several categories, despite moderate disturbances such as dust, glare, and varying lighting conditions. + +## Tech stack + +| Layer | Choice | Notes | +|-------|--------|-------| +| Language | Python 3.10+ | Main inference and backend language. | +| Object detection | Ultralytics YOLOv8 | One detector instance per camera channel. | +| Web backend | FastAPI | Serves REST/WebSocket APIs for the UI and camera channels. | +| Web UI | HTML + htmx / vanilla JS | Lightweight, channel tabs, history, validation, retraining. | +| Database | SQLite (production ready via `aiosqlite`) | Stores events, images paths, labels, config snapshots. | +| Camera capture | OpenCV + `picamera2` / RTSP URLs | IP cameras or Raspberry Pi cameras. | +| Image augmentation | Albumentations | Synthetic dust, lighting, rotation for training/testing. | +| Training loop | Ultralytics Python API | Fine-tune YOLO on collected verified data. | +| Configuration | JSON files in `config/` | Per-channel preprocessing and model settings. | +| Testing | pytest | Unit and integration tests. | +| Environment | Linux-like OS, RTX 2060 workstation | 2–3 Raspberry Pi / IP cameras, Full HD. | + +## System overview + +- **Vision hardware**: 2–3 Raspberry Pi or IP cameras with web access + a workstation with an RTX 2060; Full HD cameras. +- **Software**: Linux-like OS, logging of processed data, YOLO-based detection instances per camera. +- **User web interface**: history view, validation status, expert feedback (correct/incorrect), multi-channel tabs (camera 1/2/3 with independent YOLO instances), live camera preview for setup, settings section, retraining with date-restricted data. +- **Event record per sole**: sole ID, defect photo, defect probability, annotated photo with defect zone. +- **Performance target**: 15 seconds per image analysis and result description. + +## Documentation + +Project documentation lives in [`docs/`](docs/). + +## Setup + +1. Install [uv](https://docs.astral.sh/uv/) (recommended) or `pip`. +2. Create a virtual environment inside the project: + ```bash + uv venv .venv + source .venv/bin/activate + ``` +3. Install dependencies: + ```bash + uv pip install -e ".[dev]" + ``` +4. Copy `config/example.json` to `config/local.json` and adjust camera / model settings. +5. Run tests: + ```bash + pytest + ``` diff --git a/config/example.json b/config/example.json new file mode 100644 index 0000000..e614b4f --- /dev/null +++ b/config/example.json @@ -0,0 +1,37 @@ +{ + "server": { + "host": "0.0.0.0", + "port": 8000, + "database_path": "data/sups_yolo.db", + "upload_dir": "data/uploads" + }, + "channels": [ + { + "id": "ch1", + "type": "fake", + "source": "data/datasets/fake/ch1", + "model": "data/models/yolov8n.pt", + "confidence": 0.25, + "preprocessing": { + "resize": [640, 640], + "rotation": 0 + } + }, + { + "id": "ch2", + "type": "fake", + "source": "data/datasets/fake/ch2", + "model": "data/models/yolov8n.pt", + "confidence": 0.25, + "preprocessing": { + "resize": [640, 640], + "rotation": 0 + } + } + ], + "training": { + "runs_dir": "data/runs", + "base_model": "data/models/yolov8n.pt", + "epochs": 50 + } +} diff --git a/config/local.json b/config/local.json new file mode 100644 index 0000000..0eba6a9 --- /dev/null +++ b/config/local.json @@ -0,0 +1,14 @@ +{ + "server": { + "host": "0.0.0.0", + "port": 8000, + "database_path": "data/sups_yolo.db", + "upload_dir": "data/uploads" + }, + "channels": [], + "training": { + "runs_dir": "data/runs", + "base_model": "data/models/yolov8n.pt", + "epochs": 50 + } +} diff --git a/data/datasets/.gitkeep b/data/datasets/.gitkeep new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/data/datasets/.gitkeep diff --git a/data/models/.gitkeep b/data/models/.gitkeep new file mode 100644 index 0000000..85ab0e3 --- /dev/null +++ b/data/models/.gitkeep @@ -0,0 +1,4 @@ +# Placeholder + +This directory holds YOLO model weights. +Place the base model file here, e.g. `yolov8n.pt`, and trained model directories. diff --git a/data/runs/.gitkeep b/data/runs/.gitkeep new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/data/runs/.gitkeep diff --git a/data/uploads/.gitkeep b/data/uploads/.gitkeep new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/data/uploads/.gitkeep diff --git a/docs/AI_AGENT_GUIDE.md b/docs/AI_AGENT_GUIDE.md new file mode 100644 index 0000000..b867d05 --- /dev/null +++ b/docs/AI_AGENT_GUIDE.md @@ -0,0 +1,39 @@ +# AI agent guide + +> How to work with this repository's architecture documentation. + +## Start here + +Before any autonomous task, read in this order: + +1. [`project_context.md`](project_context.md) — what the system does and why. +2. [`architecture_overview.md`](architecture_overview.md) — high-level components and data flow. +3. [`architecture_components.md`](architecture_components.md) — responsibilities of each module. +4. [`architecture_data_flow.md`](architecture_data_flow.md) — step-by-step flows for inspection, retraining, and debug. +5. [`architecture_testing.md`](architecture_testing.md) — testing constraints and datasets. + +## When implementing a component + +1. Find the component name in [`architecture_components.md`](architecture_components.md). +2. Check [`architecture_data_flow.md`](architecture_data_flow.md) for inputs and outputs. +3. Check [`architecture_testing.md`](architecture_testing.md) for related test data / constraints. +4. Add or update tests before finishing. +5. Run verification if available; if not, state what should be added. + +## When changing data schema + +- Update the data-flow descriptions in [`architecture_data_flow.md`](architecture_data_flow.md). +- Update DB and event definitions if they exist. +- Ensure the WEB UI and Main Server agree on the new fields. + +## When adding a new camera channel + +- Each channel maps to one YOLO instance with its own config (model, threshold, preprocessing). +- Update JSON config, Main Server routing, and WEB UI tabs together. +- See [`architecture_components.md`](architecture_components.md) for per-channel pipeline details. + +## When modifying models or training + +- Keep model versions and metrics. +- Document camera / dataset changes in [`architecture_testing.md`](architecture_testing.md). +- Verify retraining flow end-to-end before declaring done. diff --git a/docs/ARC.drawio.xml b/docs/ARC.drawio.xml new file mode 100644 index 0000000..1f2c71f --- /dev/null +++ b/docs/ARC.drawio.xml @@ -0,0 +1,255 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/architecture_components.md b/docs/architecture_components.md new file mode 100644 index 0000000..8baacf9 --- /dev/null +++ b/docs/architecture_components.md @@ -0,0 +1,138 @@ +# Architecture components + +This file lists each component from the architecture diagram in detail. Use it as a reference when implementing or modifying a module. + +## Per-camera pipeline + +### IP Camera / Raspberry Pi + +- Role: image acquisition. +- Expected count: 2–3 channels. +- Output: continuous video stream. +- Notes: + - Cameras may be replaced; recognition quality impact must be measurable. + - Positioning tolerance: rotation ±5°, displacement up to 10% of frame. + +### Position determination + +- Role: decide when to capture a frame. +- Options: + - Physical sensor on conveyor (simpler, deterministic); + - AI-based detector (more flexible, needs training). +- Output: trigger signal to frame extractor. + +### Video stream → frame + +- Role: extract still frames from the camera stream on demand. +- Input: trigger signal. +- Output: raw image frame. + +### Image preparing + +- Role: make the image suitable for the YOLO model. +- Operations: + - normalize pixel values / brightness / contrast; + - filter noise; + - crop to region of interest; + - resize to model input size; + - rotate to compensate for small alignment errors. +- Must be configurable per camera channel via JSON config. + +## Inference layer + +### YOLO instance + +- Role: run defect detection on a prepared image. +- One instance per camera / tab. +- Each instance has: + - own model weights; + - own confidence threshold; + - own class mapping; + - own preprocessing parameters. + +### AI Model + +- Role: manage model artifacts and retraining lifecycle. +- Responsibilities: + - load weights for YOLO instances; + - export updated weights after retraining; + - keep versioned model history; + - serve as an abstraction between training pipeline and inference. + +## Central services + +### Main Server + +- Role: coordination and API. +- Responsibilities: + - read JSON config at startup; + - manage camera channels; + - dispatch prepared images to correct YOLO instance; + - collect detection results; + - persist events to DB; + - expose HTTP/WebSocket API for WEB UI; + - handle retraining requests. + +### Database (DB) + +- Role: persistent storage. +- Stores: + - inspection events (sole ID, timestamp, channel, result); + - original images; + - annotated images with bounding boxes; + - expert verification labels; + - model versions; + - configuration snapshots. + +## User interface + +### WEB UI + +- Role: human operator and expert interface. +- Views: + - history list with filtering; + - detail view: original image, annotated image, probability, status; + - expert feedback buttons: correct / incorrect; + - multi-channel tabs (1, 2, 3) with per-channel YOLO settings; + - live camera preview for mechanical setup; + - settings form; + - retraining panel with date-range restriction. + +## Development / testing components + +### Fake input data (factory environment emulation) + +- Role: offline input generator. +- Used when cameras are not connected. +- Produces synthetic frames or replays recorded frames. + +### Artificial image generator + +- Role: augment datasets to simulate factory disturbances. +- Applied to: + - initial dataset; + - learning dataset. +- Transformations: + - lighting effects; + - PNG pattern overlays (dust, dirt, lens contamination); + - rotation (±5°); + - other noise. + +### Learning Dataset + +- Role: curated data used to train / retrain the model. +- Sources: + - initial dataset; + - artificial generator output; + - verified production data from expert feedback. + +### Learning / Training module + +- Role: run model training and retraining. +- Inputs: + - Learning Dataset; + - configuration (hyperparameters, date range). +- Outputs: + - new model weights; + - training metrics; + - updated AI Model entry. diff --git a/docs/architecture_data_flow.md b/docs/architecture_data_flow.md new file mode 100644 index 0000000..f367c3b --- /dev/null +++ b/docs/architecture_data_flow.md @@ -0,0 +1,53 @@ +# Architecture data flow + +This file describes the step-by-step data flow for a single inspection cycle and for retraining. + +## Normal inspection cycle + +1. **Sole enters camera view.** +2. **Position determination** fires a trigger (sensor or AI). +3. **Video stream → frame** captures one still frame from the active camera. +4. **Image preparing** normalizes, filters, crops, resizes, and rotates the frame according to the channel config. +5. **Main Server** receives the prepared image. +6. **Main Server** routes the image to the matching **YOLO instance** for that channel. +7. **YOLO instance** loads weights from **AI Model** and runs inference. +8. **YOLO instance** returns: + - detected defect classes; + - bounding boxes; + - confidence scores. +9. **Main Server** builds an event record: + - sole ID; + - channel ID; + - timestamp; + - original image reference; + - annotated image reference; + - defect probability / class. +10. **Main Server** writes the event to **DB**. +11. **WEB UI** fetches and displays the event. +12. **Expert / operator** reviews the result and marks it **correct** or **incorrect**. +13. **WEB UI** sends the label back to **Main Server**, which updates the event in **DB**. + +## Retraining flow + +1. **Operator** selects a date range in the WEB UI retraining panel. +2. **WEB UI** requests retraining from **Main Server**. +3. **Main Server** queries **DB** for verified events in the selected range. +4. **Main Server** assembles / augments images into the **Learning Dataset**. +5. Optionally, **Artificial image generator** adds synthetic disturbances. +6. **Learning module** trains / fine-tunes a model on the dataset. +7. **Learning module** stores new weights in **AI Model**. +8. **YOLO instances** can be reloaded with the new weights (hot-reload or restart). +9. **WEB UI** shows retraining status and metrics. + +## Development / debug flow + +1. **Fake input data** produces frames offline. +2. **OR gate** selects between real camera pipeline and fake input. +3. Selected frames go through **Image preparing** and then the same inference path. +4. Results are stored in DB and shown in WEB UI, exactly like production. + +## File / data references + +- Event record keeps references, not embedded binary images (unless required). +- Images should be stored on disk with stable paths; DB stores the paths. +- Annotated images are generated after inference and saved alongside originals. diff --git a/docs/architecture_overview.md b/docs/architecture_overview.md new file mode 100644 index 0000000..33e05c8 --- /dev/null +++ b/docs/architecture_overview.md @@ -0,0 +1,119 @@ +# Architecture overview + +## Purpose + +This document describes the high-level architecture of the intelligent machine-vision system for real-time defect detection on polyurethane shoe soles. + +Full project context is in [`project_context.md`](project_context.md). + +## System diagram summary + +``` +User + │ + ▼ +WEB UI ◄──────► Main Server ◄──────► DB + │ + ┌───────────┼───────────┐ + ▼ ▼ ▼ + YOLO Inst 1 YOLO Inst 2 ... + ▲ ▲ + │ │ + IP Camera 1 IP Camera 2 ... +``` + +## Main components + +### 1. IP Cameras / Raspberry Pi + +- One camera per inspection channel / tab. +- Captures frames in Full HD. +- May be real IP cameras or Raspberry Pi with camera modules. + +### 2. Video stream → frame converter + +- Receives continuous video stream from each camera. +- Extracts single frames for analysis. +- Triggered either by: + - a position sensor / conveyor signal, or + - an AI-based position determination module. + +### 3. Position determination + +- Detects when a sole is in the correct position for capture. +- Can use a physical sensor or an AI model. +- Sends trigger to the frame extractor. + +### 4. Image preparation + +Pre-processing steps applied before inference: + +- normalizing; +- filtering; +- cropping; +- resizing; +- rotating (to simulate and correct small deviations). + +### 5. YOLO instances + +- One YOLO detector instance per camera / channel. +- Receives prepared frame from Image Preparing module. +- Outputs detected defect candidates. + +### 6. AI Model + +- Central model management component. +- YOLO instances load their model weights through this layer. +- Supports retraining from collected expert-verified data. + +### 7. Main Server + +- Orchestrates the pipeline: + - receives frame or prepared image; + - routes to the correct YOLO instance; + - collects inference results; + - stores events in DB; + - serves the WEB UI. +- Reads JSON configuration for channels, model paths, thresholds, etc. + +### 8. WEB UI + +Provides: + +- inspection history view; +- validation status display; +- expert feedback: correct / incorrect result; +- multi-channel tabs (camera 1, 2, 3) with independent YOLO instances and settings; +- live camera preview for setup; +- settings panel; +- retraining trigger with date-range filter. + +### 9. Database + +Stores: + +- inspection events; +- raw and annotated images; +- expert labels; +- configuration. + +### 10. Fake input data / factory environment emulation + +- Generates synthetic / augmented inputs for development and testing. +- Allows offline debugging when real cameras are unavailable. +- Sources: + - initial real dataset; + - artificially generated images (noise, lighting, dirt, rotation). + +## Data flow + +1. Position determination triggers capture. +2. Video stream → frame converter extracts a frame. +3. Image preparing module normalizes / filters / crops / resizes / rotates the frame. +4. Main server routes the prepared image to the right YOLO instance. +5. YOLO instance runs inference using the AI Model. +6. Results return to Main Server. +7. Main Server writes an event to DB. +8. WEB UI reads events and images from DB / server. +9. Expert reviews results in WEB UI and marks correct / incorrect. +10. Verified data flows back to the Learning Dataset for retraining. diff --git a/docs/architecture_testing.md b/docs/architecture_testing.md new file mode 100644 index 0000000..09dc7d1 --- /dev/null +++ b/docs/architecture_testing.md @@ -0,0 +1,67 @@ +# Architecture testing notes + +This file collects testing-related facts implied by the architecture diagram and project context. + +## Timing requirement + +- Analysis + result description must fit within **15 seconds per image**. +- Measure end-to-end time: trigger → inference → DB write → UI update. + +## Test data sets + +### Initial dataset + +- Real photos of polyurethane soles. +- Used for the first training run. + +### Learning dataset + +- Combined from: + - initial dataset; + - artificially generated / augmented images; + - expert-verified production data. + +### Separate test set + +- Must not overlap with training data. +- Size: + - 20 soles without defects; + - 3 soles with defects. + +## Artificial disturbances + +Use the artificial image generator to simulate production conditions: + +- lighting effects; +- PNG pattern overlays (dust, lens dirt, other obstacles); +- rotation (±5° to match allowed positioning tolerance); +- noise generator for dust emulation. + +## Position tolerance + +- Rotation: ±5° relative to camera. +- Displacement: up to 10% of frame. +- Test inference quality across the full tolerance range. + +## Camera replacement study + +- Compare recognition quality when swapping camera models. +- Document changes in preprocessing parameters needed after replacement. + +## Testing environment + +- Run under conditions close to real production. +- Initial development can happen at home / lab using fake input data and artificial disturbances. +- Physical dimensions of soles must stabilize before testing (fixed time after casting). + +## Expert feedback loop + +- Every production event should be reviewable by an operator / expert. +- Expert verdicts (correct / incorrect) feed the learning dataset. +- Retraining can be filtered by date to avoid including low-quality old data. + +## Model versioning + +- Each trained / retrained model must be versioned. +- Keep metrics for each version so performance can be compared. +- Allow rollback to a previous model if retraining degrades quality. diff --git a/docs/project_context.md b/docs/project_context.md new file mode 100644 index 0000000..857b0cc --- /dev/null +++ b/docs/project_context.md @@ -0,0 +1,62 @@ +# Project context + +## Theme + +Development of an intelligent information-measuring system for real-time control of geometric and physico-mechanical parameters of polyurethane shoe soles. + +## Goal + +Determine and classify (into several categories) the presence of defects on a shoe sole despite moderate disturbances such as dust, glare, and varying lighting. + +## Functional requirements + +### User web interface + +- Display inspection history on screen. +- Show status: validated. +- Expert can mark the recognition result as correct or incorrect. +- Omnichannel support: tabs 1, 2, 3 for different cameras / Raspberry Pi instances, each with its own YOLO instance and settings. +- Display camera image for initial setup. +- Settings section. +- Retraining function with date restriction. + +### Machine vision system + +- Hardware: + - 2–3 Raspberry Pi devices or IP cameras with web access. + - Workstation with RTX 2060 GPU. + - Full HD cameras. + - Investigate recognition-quality change when replacing the camera with another model. + +- Software: + - Linux-like operating system. + - Log of processed data. + - Event record per inspected sole: + - sole number; + - defect photo; + - probability score for defect presence; + - separate annotated photo marking the defect zone. + - 15-second budget for image analysis and description output. + +## Operating conditions + +- Describe working conditions: lighting level, type and intensity of disturbances, and estimated probability of influence on the result. +- Determine optimal positioning of the sole relative to the camera. +- Allowable part position: rotation ±5° relative to the camera, displacement up to 10% of the frame. + +## Testing and validation + +- Test and debug the system under conditions close to production (at home/lab environment). +- Noise generator for dust emulation. +- Separate photo set not used during model training: + - 20 pieces without defects; + - 3 pieces with defects. +- Photo effects to simulate different lighting conditions. +- Overlay PNG patterns to simulate lens contamination and other obstacles. +- Photo rotation to simulate positioning deviations. +- Testing is performed after a fixed time from the casting process, when the geometric dimensions have stabilized and no longer change. + +## Retraining + +- Retrain the model using data collected during operation. +- The operator acts as an expert and verifies the model result. diff --git a/opencode.json b/opencode.json new file mode 100644 index 0000000..a8849c0 --- /dev/null +++ b/opencode.json @@ -0,0 +1,56 @@ +{ + "$schema": "https://opencode.ai/config.json", + "model": "ollama/gemma4:12b-it-qat", + "small_model": "ollama/gemma4:12b-it-qat", + "provider": { + "ollama": { + "options": { + "baseURL": "http://localhost:11434" + } + } + }, + "enabled_providers": ["ollama", "ollama-cloud"], + "disabled_providers": [ + "anthropic", + "openai", + "google", + "deepseek", + "kimi", + "abacus", + "302ai", + "aihubmix", + "alibaba", + "alibaba-cn", + "amazon-bedrock", + "azure", + "azure-cognitive-services", + "baseten", + "berget", + "chutes", + "clarifai", + "cloudflare", + "cloudflare-ai-gateway", + "cloudflare-workers-ai", + "cohere", + "cortecs", + "crof", + "databricks", + "deepinfra", + "digitalocean", + "fireworks", + "gemini", + "groq", + "huggingface", + "hyperbolic", + "mistral", + "moonshot", + "openrouter", + "perplexity", + "replicate", + "together", + "vertex", + "xai" + ], + "instructions": ["AGENTS.md"], + "logLevel": "INFO" +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b2027b5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,103 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "sups-yolo" +version = "0.1.0" +description = "Intelligent machine-vision system for real-time defect detection on polyurethane shoe soles." +readme = "README.md" +license = { text = "MIT" } +requires-python = ">=3.10" +authors = [ + { name = "SUPS Team" }, +] +keywords = ["yolo", "machine-vision", "defect-detection", "polyurethane", "soles"] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Manufacturing", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] + +dependencies = [ + "fastapi>=0.115.0", + "uvicorn[standard]>=0.32.0", + "jinja2>=3.1.0", + "python-multipart>=0.0.17", + "websockets>=14.0", + "sqlalchemy>=2.0.36", + "aiosqlite>=0.20.0", + "opencv-python>=4.10.0", + "pillow>=11.0.0", + "numpy>=1.26.0", + "ultralytics>=8.3.0", + "albumentations>=1.4.0", + "pydantic>=2.9.0", + "pydantic-settings>=2.6.0", + "pyyaml>=6.0.2", + "click>=8.1.0", + "rich>=13.9.0", +] + +[project.optional-dependencies] +rpi = [ + "picamera2>=0.3.17", +] +dev = [ + "pytest>=8.3.0", + "pytest-asyncio>=0.24.0", + "pytest-cov>=6.0.0", + "ruff>=0.8.0", + "mypy>=1.13.0", + "pre-commit>=4.0.0", + "httpx>=0.27.0", +] + +[project.scripts] +sups-yolo = "sups_yolo.cli:main" + +[project.urls] +Homepage = "https://github.com/gmikcon/sups_yolo" +Documentation = "https://github.com/gmikcon/sups_yolo/tree/main/docs" + +[tool.hatch.build.targets.wheel] +packages = ["src/sups_yolo"] + +[tool.ruff] +target-version = "py310" +line-length = 100 + +[tool.ruff.lint] +select = [ + "E", + "F", + "I", + "N", + "W", + "UP", + "B", + "C4", + "SIM", + "ARG", + "PL", +] +ignore = ["PLR2004", "PLC0415"] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.mypy] +python_version = "3.10" +strict = true +warn_return_any = true +warn_unused_ignores = true +ignore_missing_imports = true + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] +pythonpath = ["src"] diff --git a/src/sups_yolo/__init__.py b/src/sups_yolo/__init__.py new file mode 100644 index 0000000..7561e34 --- /dev/null +++ b/src/sups_yolo/__init__.py @@ -0,0 +1,7 @@ +"""sups_yolo package root. + +Intelligent machine-vision system for real-time defect detection on +polyurethane shoe soles. +""" + +__version__ = "0.1.0" diff --git a/src/sups_yolo/api/__init__.py b/src/sups_yolo/api/__init__.py new file mode 100644 index 0000000..a084029 --- /dev/null +++ b/src/sups_yolo/api/__init__.py @@ -0,0 +1 @@ +"""REST/WebSocket API for UI and external integrations.""" diff --git a/src/sups_yolo/api/app.py b/src/sups_yolo/api/app.py new file mode 100644 index 0000000..f00550a --- /dev/null +++ b/src/sups_yolo/api/app.py @@ -0,0 +1,33 @@ +"""FastAPI application factory.""" + +from pathlib import Path +from typing import Any + +from fastapi import FastAPI, Request +from fastapi.responses import HTMLResponse +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates + +from sups_yolo.data.store import EventStore + + +def create_app(config: dict[str, Any] | None = None) -> FastAPI: + app = FastAPI(title="sups_yolo", version="0.1.0") + + base_dir = Path(__file__).resolve().parent.parent + templates = Jinja2Templates(directory=base_dir / "web" / "templates") + app.mount("/static", StaticFiles(directory=base_dir / "web" / "static"), name="static") + + config = config or {} + store = EventStore.from_path(config.get("database_path", "data/sups_yolo.db")) + + @app.get("/", response_class=HTMLResponse) + async def index(request: Request) -> HTMLResponse: + return templates.TemplateResponse(request, "index.html") + + @app.get("/api/events") + async def list_events(channel_id: str | None = None) -> list[dict[str, Any]]: + events = store.list_events(channel_id=channel_id, limit=100) + return [e.__dict__ for e in events] + + return app diff --git a/src/sups_yolo/camera/__init__.py b/src/sups_yolo/camera/__init__.py new file mode 100644 index 0000000..7d9fee1 --- /dev/null +++ b/src/sups_yolo/camera/__init__.py @@ -0,0 +1,10 @@ +"""Camera capture adapters. + +Supports IP / RTSP cameras and Raspberry Pi camera modules. The interface +exposes a frame generator so callers can fetch the latest image on demand. +""" + +from sups_yolo.camera.base import CameraSource +from sups_yolo.camera.factory import create_camera + +__all__ = ["CameraSource", "create_camera"] diff --git a/src/sups_yolo/camera/base.py b/src/sups_yolo/camera/base.py new file mode 100644 index 0000000..ca0c951 --- /dev/null +++ b/src/sups_yolo/camera/base.py @@ -0,0 +1,36 @@ +"""Abstract camera source interface.""" + +from abc import ABC, abstractmethod +from typing import Any + +import numpy as np + + +class CameraSource(ABC): + """Base class for all camera sources.""" + + def __init__(self, config: dict[str, Any]) -> None: + self.config = config + + @abstractmethod + def connect(self) -> None: + """Open the camera stream or device.""" + + @abstractmethod + def disconnect(self) -> None: + """Release the camera stream or device.""" + + @abstractmethod + def get_frame(self) -> np.ndarray: + """Return the most recent frame as a BGR numpy array.""" + + @abstractmethod + def is_connected(self) -> bool: + """Return True if the camera is ready to capture.""" + + def __enter__(self) -> "CameraSource": + self.connect() + return self + + def __exit__(self, *_exc: object) -> None: + self.disconnect() diff --git a/src/sups_yolo/camera/factory.py b/src/sups_yolo/camera/factory.py new file mode 100644 index 0000000..876a7fb --- /dev/null +++ b/src/sups_yolo/camera/factory.py @@ -0,0 +1,17 @@ +"""Camera factory.""" + +from typing import Any + +from sups_yolo.camera.base import CameraSource +from sups_yolo.camera.fake import FakeCameraSource +from sups_yolo.camera.ip import IPCameraSource + + +def create_camera(config: dict[str, Any]) -> CameraSource: + """Create a camera source from a channel configuration dict.""" + kind = config.get("type", "ip").lower() + if kind in {"ip", "rtsp"}: + return IPCameraSource(config) + if kind == "fake": + return FakeCameraSource(config) + raise ValueError(f"Unsupported camera type: {kind}") diff --git a/src/sups_yolo/camera/fake.py b/src/sups_yolo/camera/fake.py new file mode 100644 index 0000000..08ddc2d --- /dev/null +++ b/src/sups_yolo/camera/fake.py @@ -0,0 +1,51 @@ +"""Fake camera for offline development and tests.""" + +import logging +from pathlib import Path +from typing import Any + +import cv2 +import numpy as np + +from sups_yolo.camera.base import CameraSource + +logger = logging.getLogger(__name__) + + +class FakeCameraSource(CameraSource): + """Replay image files or generate synthetic frames.""" + + def __init__(self, config: dict[str, Any]) -> None: + super().__init__(config) + self.source = Path(config.get("source", "data/datasets/fake")) + self._frames: list[np.ndarray] = [] + self._index = 0 + + def connect(self) -> None: + if not self.source.exists(): + logger.warning("Fake source %s does not exist; generating noise frames", self.source) + self._frames = [self._blank_frame()] + return + paths = sorted(self.source.glob("*.jpg")) + sorted(self.source.glob("*.png")) + self._frames = [cv2.imread(str(p)) for p in paths if cv2.imread(str(p)) is not None] + if not self._frames: + self._frames = [self._blank_frame()] + logger.info("Loaded %d fake frames from %s", len(self._frames), self.source) + + def disconnect(self) -> None: + self._frames = [] + self._index = 0 + + def is_connected(self) -> bool: + return len(self._frames) > 0 + + def get_frame(self) -> np.ndarray: + if not self._frames: + raise RuntimeError("Fake camera is not connected") + frame = self._frames[self._index % len(self._frames)] + self._index += 1 + return frame.copy() + + @staticmethod + def _blank_frame() -> np.ndarray: + return np.full((1080, 1920, 3), 128, dtype=np.uint8) diff --git a/src/sups_yolo/camera/ip.py b/src/sups_yolo/camera/ip.py new file mode 100644 index 0000000..68cce68 --- /dev/null +++ b/src/sups_yolo/camera/ip.py @@ -0,0 +1,44 @@ +"""IP / RTSP camera capture using OpenCV.""" + +import logging +from typing import Any + +import cv2 +import numpy as np + +from sups_yolo.camera.base import CameraSource + +logger = logging.getLogger(__name__) + + +class IPCameraSource(CameraSource): + """Capture frames from an IP camera over HTTP or RTSP.""" + + def __init__(self, config: dict[str, Any]) -> None: + super().__init__(config) + self.url = config.get("url", "") + self._cap: cv2.VideoCapture | None = None + + def connect(self) -> None: + if not self.url: + raise ValueError("IP camera URL is required") + self._cap = cv2.VideoCapture(self.url) + if not self._cap.isOpened(): + raise RuntimeError(f"Cannot open camera stream: {self.url}") + logger.info("Connected to IP camera %s", self.url) + + def disconnect(self) -> None: + if self._cap: + self._cap.release() + self._cap = None + + def is_connected(self) -> bool: + return self._cap is not None and self._cap.isOpened() + + def get_frame(self) -> np.ndarray: + if not self.is_connected(): + raise RuntimeError("Camera is not connected") + ret, frame = self._cap.read() + if not ret or frame is None: + raise RuntimeError("Failed to read frame from camera") + return frame diff --git a/src/sups_yolo/cli.py b/src/sups_yolo/cli.py new file mode 100644 index 0000000..7bedd47 --- /dev/null +++ b/src/sups_yolo/cli.py @@ -0,0 +1,32 @@ +"""Command-line interface.""" + +import click +import uvicorn + +from sups_yolo import __version__ +from sups_yolo.api.app import create_app + + +@click.group() +def main() -> None: + """sups_yolo CLI.""" + + +@main.command() +@click.option("--host", default="0.0.0.0", help="Bind host") +@click.option("--port", default=8000, help="Bind port") +@click.option("--config", default="config/local.json", help="Path to JSON config") +def serve(host: str, port: int, config: str) -> None: + """Run the web API server.""" + app = create_app({"config_path": config}) + uvicorn.run(app, host=host, port=port) + + +@main.command() +def version() -> None: + """Print the package version.""" + click.echo(__version__) + + +if __name__ == "__main__": + main() diff --git a/src/sups_yolo/core/__init__.py b/src/sups_yolo/core/__init__.py new file mode 100644 index 0000000..36ebb60 --- /dev/null +++ b/src/sups_yolo/core/__init__.py @@ -0,0 +1,6 @@ +"""Core orchestration and business logic.""" + +from sups_yolo.core.channel import InspectionChannel +from sups_yolo.core.event import InspectionEvent + +__all__ = ["InspectionChannel", "InspectionEvent"] diff --git a/src/sups_yolo/core/channel.py b/src/sups_yolo/core/channel.py new file mode 100644 index 0000000..2e90502 --- /dev/null +++ b/src/sups_yolo/core/channel.py @@ -0,0 +1,59 @@ +"""Single camera inspection channel.""" + +from datetime import datetime +from pathlib import Path +from typing import Any + +import cv2 +import numpy as np + +from sups_yolo.camera.base import CameraSource +from sups_yolo.core.event import InspectionEvent +from sups_yolo.models.detector import SoleDefectDetector +from sups_yolo.preprocessing.pipeline import PreprocessingPipeline + + +class InspectionChannel: + """Owns a camera, preprocessor and detector for one production channel.""" + + def __init__( + self, + channel_id: str, + camera: CameraSource, + detector: SoleDefectDetector, + preprocessor: PreprocessingPipeline, + config: dict[str, Any], + ) -> None: + self.channel_id = channel_id + self.camera = camera + self.detector = detector + self.preprocessor = preprocessor + self.config = config + self.upload_dir = Path(config.get("upload_dir", "data/uploads")) + self.upload_dir.mkdir(parents=True, exist_ok=True) + + def inspect(self, sole_id: str) -> InspectionEvent: + """Capture, preprocess, infer and store one inspection event.""" + raw = self.camera.get_frame() + prepared = self.preprocessor.run(raw) + detections = self.detector.predict(prepared) + annotated = self.detector.annotate(prepared, detections) + + timestamp = datetime.utcnow() + image_path = self._save(raw, sole_id, timestamp, "raw") + annotated_path = self._save(annotated, sole_id, timestamp, "annotated") + + return InspectionEvent( + sole_id=sole_id, + channel_id=self.channel_id, + timestamp=timestamp, + image_path=str(image_path), + annotated_path=str(annotated_path), + detections=detections, + ) + + def _save(self, image: np.ndarray, sole_id: str, timestamp: datetime, suffix: str) -> Path: + filename = f"{self.channel_id}_{sole_id}_{timestamp.isoformat()}_{suffix}.jpg" + path = self.upload_dir / filename + cv2.imwrite(str(path), image) + return path diff --git a/src/sups_yolo/core/event.py b/src/sups_yolo/core/event.py new file mode 100644 index 0000000..a16cc97 --- /dev/null +++ b/src/sups_yolo/core/event.py @@ -0,0 +1,24 @@ +"""Inspection event data model.""" + +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any + + +@dataclass +class InspectionEvent: + """Record created for each inspected sole.""" + + sole_id: str + channel_id: str + timestamp: datetime = field(default_factory=datetime.utcnow) + image_path: str = "" + annotated_path: str = "" + detections: list[dict[str, Any]] = field(default_factory=list) + validated: bool | None = None + expert_note: str = "" + + def top_defect(self) -> dict[str, Any] | None: + if not self.detections: + return None + return max(self.detections, key=lambda d: d["confidence"]) diff --git a/src/sups_yolo/data/__init__.py b/src/sups_yolo/data/__init__.py new file mode 100644 index 0000000..f2794a0 --- /dev/null +++ b/src/sups_yolo/data/__init__.py @@ -0,0 +1,5 @@ +"""Database, storage and dataset helpers.""" + +from sups_yolo.data.store import EventStore + +__all__ = ["EventStore"] diff --git a/src/sups_yolo/data/store.py b/src/sups_yolo/data/store.py new file mode 100644 index 0000000..5f591c4 --- /dev/null +++ b/src/sups_yolo/data/store.py @@ -0,0 +1,91 @@ +"""Database storage for inspection events.""" + +import json +from typing import Any + +from sqlalchemy import Boolean, Column, DateTime, Integer, String, Text, create_engine +from sqlalchemy.orm import declarative_base, sessionmaker + +from sups_yolo.core.event import InspectionEvent + +Base = declarative_base() + + +class EventRecord(Base): # type: ignore[misc] + """SQLAlchemy table for inspection events.""" + + __tablename__ = "inspection_events" + + id = Column(Integer, primary_key=True, autoincrement=True) + sole_id = Column(String, nullable=False) + channel_id = Column(String, nullable=False) + timestamp = Column(DateTime, nullable=False) + image_path = Column(String) + annotated_path = Column(String) + detections = Column(Text, default="[]") + validated = Column(Boolean) + expert_note = Column(Text, default="") + + def __init__(self, event: InspectionEvent) -> None: + self.sole_id = event.sole_id + self.channel_id = event.channel_id + self.timestamp = event.timestamp + self.image_path = event.image_path + self.annotated_path = event.annotated_path + self.detections = json.dumps(event.detections) + self.validated = event.validated + self.expert_note = event.expert_note + + +class EventStore: + """Persist and query inspection events.""" + + def __init__(self, database_url: str) -> None: + self.engine = create_engine(database_url) + Base.metadata.create_all(self.engine) + self.Session = sessionmaker(bind=self.engine) + + @classmethod + def from_path(cls, path: str) -> "EventStore": + return cls(f"sqlite:///{path}") + + def save(self, event: InspectionEvent) -> None: + with self.Session() as session: + session.add(EventRecord(event)) + session.commit() + + def list_events( + self, + channel_id: str | None = None, + validated: bool | None = None, + limit: int = 100, + ) -> list[InspectionEvent]: + with self.Session() as session: + query = session.query(EventRecord) + if channel_id: + query = query.filter(EventRecord.channel_id == channel_id) + if validated is not None: + query = query.filter(EventRecord.validated == validated) + records = query.order_by(EventRecord.timestamp.desc()).limit(limit).all() + return [self._to_event(r) for r in records] + + def set_validation(self, event_id: int, validated: bool, note: str = "") -> None: + with self.Session() as session: + record = session.query(EventRecord).filter_by(id=event_id).first() + if record: + record.validated = validated + record.expert_note = note + session.commit() + + @staticmethod + def _to_event(record: Any) -> InspectionEvent: + return InspectionEvent( + sole_id=record.sole_id, + channel_id=record.channel_id, + timestamp=record.timestamp, + image_path=record.image_path, + annotated_path=record.annotated_path, + detections=json.loads(record.detections), + validated=record.validated, + expert_note=record.expert_note, + ) diff --git a/src/sups_yolo/models/__init__.py b/src/sups_yolo/models/__init__.py new file mode 100644 index 0000000..8f6fdd6 --- /dev/null +++ b/src/sups_yolo/models/__init__.py @@ -0,0 +1,5 @@ +"""Detection model management and YOLO inference.""" + +from sups_yolo.models.detector import SoleDefectDetector + +__all__ = ["SoleDefectDetector"] diff --git a/src/sups_yolo/models/detector.py b/src/sups_yolo/models/detector.py new file mode 100644 index 0000000..58dd770 --- /dev/null +++ b/src/sups_yolo/models/detector.py @@ -0,0 +1,62 @@ +"""YOLO-based defect detector.""" + +import logging +from pathlib import Path +from typing import Any + +import numpy as np + +logger = logging.getLogger(__name__) + + +class SoleDefectDetector: + """Thin wrapper around a YOLO model for sole defect detection.""" + + def __init__(self, model_path: str, config: dict[str, Any]) -> None: + self.model_path = Path(model_path) + self.config = config + self._model: Any = None + + def load(self) -> None: + """Load the model weights lazily.""" + if not self.model_path.exists(): + logger.warning("Model file not found: %s", self.model_path) + # Lazy import so tests can run without ultralytics installed. + from ultralytics import YOLO + + self._model = YOLO(str(self.model_path)) + logger.info("Loaded YOLO model from %s", self.model_path) + + def predict(self, image: np.ndarray) -> list[dict[str, Any]]: + """Run inference and return a list of detections.""" + if self._model is None: + self.load() + conf = self.config.get("confidence", 0.25) + results = self._model(image, conf=conf, verbose=False) + detections = [] + for r in results: + for box in r.boxes: + detections.append( + { + "class_id": int(box.cls), + "label": self._model.names.get(int(box.cls), str(int(box.cls))), + "confidence": float(box.conf), + "bbox": [float(v) for v in box.xyxy[0].tolist()], + } + ) + return detections + + def annotate(self, image: np.ndarray, detections: list[dict[str, Any]]) -> np.ndarray: + """Return an annotated copy of the image.""" + if not detections: + return image.copy() + # Lazy import because cv2 may not be needed for pure test stubs. + import cv2 + + out = image.copy() + for d in detections: + x1, y1, x2, y2 = map(int, d["bbox"]) + cv2.rectangle(out, (x1, y1), (x2, y2), (0, 0, 255), 2) + text = f"{d['label']} {d['confidence']:.2f}" + cv2.putText(out, text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) + return out diff --git a/src/sups_yolo/preprocessing/__init__.py b/src/sups_yolo/preprocessing/__init__.py new file mode 100644 index 0000000..1a6375e --- /dev/null +++ b/src/sups_yolo/preprocessing/__init__.py @@ -0,0 +1,5 @@ +"""Pre-processing pipeline for inspection frames.""" + +from sups_yolo.preprocessing.pipeline import PreprocessingPipeline + +__all__ = ["PreprocessingPipeline"] diff --git a/src/sups_yolo/preprocessing/pipeline.py b/src/sups_yolo/preprocessing/pipeline.py new file mode 100644 index 0000000..e5d7d48 --- /dev/null +++ b/src/sups_yolo/preprocessing/pipeline.py @@ -0,0 +1,39 @@ +"""Image preprocessing steps before YOLO inference.""" + +from typing import Any + +import cv2 +import numpy as np + + +class PreprocessingPipeline: + """Configurable per-channel frame preprocessing.""" + + def __init__(self, config: dict[str, Any]) -> None: + self.config = config + + def run(self, image: np.ndarray) -> np.ndarray: + """Apply normalization, filtering, cropping, resizing and rotation.""" + if self.config.get("grayscale"): + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) + + if "crop" in self.config: + x, y, w, h = self.config["crop"] + image = image[y : y + h, x : x + w] + + if "rotation" in self.config: + image = self._rotate(image, self.config["rotation"]) + + if "resize" in self.config: + width, height = self.config["resize"] + image = cv2.resize(image, (width, height)) + + return image + + @staticmethod + def _rotate(image: np.ndarray, angle: float) -> np.ndarray: + (h, w) = image.shape[:2] + center = (w // 2, h // 2) + matrix = cv2.getRotationMatrix2D(center, angle, 1.0) + return cv2.warpAffine(image, matrix, (w, h)) diff --git a/src/sups_yolo/training/__init__.py b/src/sups_yolo/training/__init__.py new file mode 100644 index 0000000..a87ebcf --- /dev/null +++ b/src/sups_yolo/training/__init__.py @@ -0,0 +1,5 @@ +"""Training / retraining pipeline.""" + +from sups_yolo.training.pipeline import TrainingPipeline + +__all__ = ["TrainingPipeline"] diff --git a/src/sups_yolo/training/pipeline.py b/src/sups_yolo/training/pipeline.py new file mode 100644 index 0000000..c43404b --- /dev/null +++ b/src/sups_yolo/training/pipeline.py @@ -0,0 +1,43 @@ +"""YOLO training and retraining pipeline.""" + +import logging +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + + +class TrainingPipeline: + """Build a dataset and fine-tune a YOLO model.""" + + def __init__(self, config: dict[str, Any]) -> None: + self.config = config + self.runs_dir = Path(config.get("runs_dir", "data/runs")) + self.runs_dir.mkdir(parents=True, exist_ok=True) + + def prepare_dataset( + self, + source_dir: Path, + output_dir: Path, + verified_only: bool = False, + start_date: str | None = None, + end_date: str | None = None, + ) -> Path: + """Collect images and labels into a YOLO dataset.""" + output_dir.mkdir(parents=True, exist_ok=True) + logger.info( + "Preparing dataset from %s to %s (verified_only=%s, %s..%s)", + source_dir, + output_dir, + verified_only, + start_date, + end_date, + ) + # TODO: implement dataset assembly. + return output_dir + + def train(self, dataset_yaml: Path, base_model: Path, epochs: int = 50) -> Path: + """Run YOLO training and return the best weights path.""" + # TODO: wire ultralytics training. + logger.info("Training %s on %s for %d epochs", base_model, dataset_yaml, epochs) + return base_model diff --git a/src/sups_yolo/web/__init__.py b/src/sups_yolo/web/__init__.py new file mode 100644 index 0000000..46aafbd --- /dev/null +++ b/src/sups_yolo/web/__init__.py @@ -0,0 +1 @@ +"""Web UI templates and static assets.""" diff --git a/src/sups_yolo/web/static/htmx.min.js b/src/sups_yolo/web/static/htmx.min.js new file mode 100644 index 0000000..8aaaf11 --- /dev/null +++ b/src/sups_yolo/web/static/htmx.min.js @@ -0,0 +1 @@ +/* htmx 2.x placeholder — download the real file from https://unpkg.com/htmx.org */ diff --git a/src/sups_yolo/web/static/style.css b/src/sups_yolo/web/static/style.css new file mode 100644 index 0000000..d38b13f --- /dev/null +++ b/src/sups_yolo/web/static/style.css @@ -0,0 +1,59 @@ +/* Minimal styles for the sups_yolo web UI. */ + +:root { + --bg: #f8f9fa; + --text: #212529; + --accent: #0d6efd; + --border: #dee2e6; +} + +* { + box-sizing: border-box; +} + +body { + font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; + margin: 0; + padding: 0; + background: var(--bg); + color: var(--text); +} + +header { + background: #fff; + border-bottom: 1px solid var(--border); + padding: 1rem 2rem; +} + +header h1 { + margin: 0 0 0.5rem; + font-size: 1.5rem; +} + +nav { + display: flex; + gap: 1rem; +} + +nav a { + color: var(--accent); + text-decoration: none; + font-weight: 500; +} + +nav a:hover { + text-decoration: underline; +} + +main { + padding: 2rem; + max-width: 1200px; + margin: 0 auto; +} + +footer { + padding: 1rem 2rem; + border-top: 1px solid var(--border); + text-align: center; + color: #6c757d; +} diff --git a/src/sups_yolo/web/templates/index.html b/src/sups_yolo/web/templates/index.html new file mode 100644 index 0000000..09d813c --- /dev/null +++ b/src/sups_yolo/web/templates/index.html @@ -0,0 +1,31 @@ + + + + + + sups_yolo — Sole defect inspection + + + + +
+

sups_yolo

+ +
+ +
+
+

Inspection history will appear here.

+
+
+ + + + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..6d68082 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Test suite for sups_yolo.""" diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..ec84633 --- /dev/null +++ b/tests/integration/__init__.py @@ -0,0 +1 @@ +"""Placeholder for integration tests.""" diff --git a/tests/integration/test_api.py b/tests/integration/test_api.py new file mode 100644 index 0000000..7ce7baa --- /dev/null +++ b/tests/integration/test_api.py @@ -0,0 +1,13 @@ +"""Integration smoke test for the FastAPI app.""" + +from fastapi.testclient import TestClient + +from sups_yolo.api.app import create_app + + +def test_index() -> None: + app = create_app({"database_path": ":memory:"}) + client = TestClient(app) + response = client.get("/") + assert response.status_code == 200 + assert "sups_yolo" in response.text diff --git a/tests/unit/test_camera.py b/tests/unit/test_camera.py new file mode 100644 index 0000000..d992444 --- /dev/null +++ b/tests/unit/test_camera.py @@ -0,0 +1,15 @@ +"""Tests for camera abstractions.""" + +import numpy as np + +from sups_yolo.camera.fake import FakeCameraSource + + +def test_fake_camera_generates_frames() -> None: + camera = FakeCameraSource({"source": "data/datasets/fake/nonexistent"}) + camera.connect() + frame = camera.get_frame() + assert isinstance(frame, np.ndarray) + assert frame.ndim == 3 + camera.disconnect() + assert not camera.is_connected() diff --git a/tests/unit/test_preprocessing.py b/tests/unit/test_preprocessing.py new file mode 100644 index 0000000..e362941 --- /dev/null +++ b/tests/unit/test_preprocessing.py @@ -0,0 +1,19 @@ +"""Tests for preprocessing pipeline.""" + +import numpy as np + +from sups_yolo.preprocessing.pipeline import PreprocessingPipeline + + +def test_resize() -> None: + pipeline = PreprocessingPipeline({"resize": [640, 480]}) + image = np.zeros((1080, 1920, 3), dtype=np.uint8) + result = pipeline.run(image) + assert result.shape == (480, 640, 3) + + +def test_rotation_preserves_shape() -> None: + pipeline = PreprocessingPipeline({"rotation": 5}) + image = np.zeros((1080, 1920, 3), dtype=np.uint8) + result = pipeline.run(image) + assert result.shape == (1080, 1920, 3) diff --git a/tests/unit/test_version.py b/tests/unit/test_version.py new file mode 100644 index 0000000..a253f6b --- /dev/null +++ b/tests/unit/test_version.py @@ -0,0 +1,7 @@ +"""Smoke test that the package imports.""" + +from sups_yolo import __version__ + + +def test_version() -> None: + assert __version__ == "0.1.0"