haz3141
diff --git a/‎.coverage‎
-52 KB b/‎.coverage‎
-52 KB
diff --git a/‎.cursor/environment.json‎
Lines changed: 7 additions & 18 deletions b/‎.cursor/environment.json‎
Lines changed: 7 additions & 18 deletions
diff --git a/‎.cursor/mcp.json‎
Lines changed: 8 additions & 19 deletions b/‎.cursor/mcp.json‎
Lines changed: 8 additions & 19 deletions
diff --git a/‎.cursor/rules/code-organization.mdc‎
Lines changed: 150 additions & 0 deletions b/‎.cursor/rules/code-organization.mdc‎
Lines changed: 150 additions & 0 deletions
diff --git a/‎.cursor/rules/docs.mdc‎
Lines changed: 0 additions & 17 deletions b/‎.cursor/rules/docs.mdc‎
Lines changed: 0 additions & 17 deletions
@@ -1,22 +1,11 @@
 {
   "commands": {
-    "dev:mcp": "uvicorn mcp_server.server:app --reload --host 127.0.0.1 --port 8765",
-    "health": "curl -fsS http://127.0.0.1:8765/healthz || curl -fsS http://127.0.0.1:8765/ | head -n 1",
-    "test": "pytest -q",
-    "test:security": "pytest lab/security/tests/ -v",
-    "test:integration": "pytest lab/tests/ -v",
-    "eval": "python lab/eval/run_eval.py --dataset lab/eval/dataset.jsonl --k 5",
-    "eval:full": "python lab/eval/run_eval.py --dataset lab/eval/dataset.jsonl --k 5 --output eval_results.json",
-    "obs:ingest": "python lab/obs/ingest.py --path logs/audit/*.jsonl",
-    "obs:audit": "python lab/obs/audit.py --recent 100",
-    "lint": "ruff check .",
-    "format": "ruff format . && black .",
-    "format:check": "ruff format --check . && black --check .",
-    "docs:check": "find docs/ -name '*.md' -exec grep -L '<!-- Version:' {} \\;"
-  },
-  "environment": {
-    "PYTHONPATH": ".",
-    "LOG_LEVEL": "INFO",
-    "GUARDIAN_ALLOW_TOOLS": "health,tools/search_docs,tools/summarize"
+    "pytest": "python -m pytest",
+    "ruff": "python -m ruff",
+    "mcp-list": "cursor-agent mcp list",
+    "mcp-tools": "cursor-agent mcp list-tools lab-server",
+    "eval": "python eval/run.py --dataset eval/data/lab/lab_dev.jsonl --output eval/runs/$(date +%Y%m%d-%H%M%S)",
+    "test": "python -m pytest tests/",
+    "mcp": ".venv/bin/python -m mcp_server.simple_server"
   }
 }
@@ -1,20 +1,9 @@
-[
-  {
-    "name": "lab-server",
-    "url": "http://127.0.0.1:8765/sse",
-    "method": "sse",
-    "allowTools": [
-      "search_docs",
-      "summarize",
-      "rag_query",
-      "run_tests",
-      "eval_metrics",
-      "audit_recent",
-      "audit_by_request",
-      "audit_by_tool"
-    ],
-    "timeout": 30,
-    "retries": 3,
-    "gracePeriodSec": 2
+{
+  "mcpServers": {
+    "lab-server": {
+      "command": ".venv/bin/python",
+      "args": ["-m", "mcp_server.simple_server"],
+      "env": {}
+    }
   }
-]
+}
@@ -0,0 +1,150 @@
+---
+description: Code structure and development patterns
+globs: ["**/*.{py,js,ts,md}"]
+alwaysApply: false
+---
+
+# Code Organization - AI-Dev-Lab v0.6.4
+
+## Directory Structure
+
+### Lab vs App Separation
+```
+lab/                    # Experimental development
+├── dsp/               # Data science pipelines
+├── eval/              # Evaluation frameworks
+├── rag/               # RAG system components
+├── security/          # Security tools and policies
+└── tests/             # Lab-specific tests
+
+app/                    # Production-ready components
+├── mcp-servers/       # MCP server implementations
+└── ...               # Production services
+```
+
+### Evaluation Pipeline Structure
+```
+eval/
+├── configs/          # Evaluation configurations
+├── data/             # Test datasets
+├── pipeline/         # Evaluation execution
+├── prompts/          # Evaluation prompts
+└── runs/             # Evaluation results
+```
+
+## MCP Server Architecture
+
+### Server Organization
+- **Single Responsibility**: Each server handles one domain
+- **Tool Registration**: Tools registered with clear descriptions
+- **Health Endpoints**: `/health` endpoint for monitoring
+- **Graceful Shutdown**: Proper cleanup on termination
+
+### Tool Design Patterns
+```python
+# Tool registration pattern
+@server.tool()
+async def tool_name(args: ToolArgs) -> ToolResult:
+    """Clear tool description."""
+    # Input validation
+    # Business logic
+    # Output formatting
+    return result
+```
+
+## Code Standards
+
+### Import Organization (isort)
+```python
+# Standard library imports
+import os
+import sys
+
+# Third-party imports
+import yaml
+from fastapi import FastAPI
+
+# Local imports
+from .utils import helper_function
+```
+
+### Type Hints and Documentation
+- **Function Signatures**: Full type hints required
+- **Docstrings**: Google/NumPy style docstrings
+- **Return Types**: Explicit return type annotations
+- **Parameter Types**: Input parameter type hints
+
+### Error Handling Patterns
+```python
+try:
+    result = risky_operation()
+except SpecificException as e:
+    logger.error(f"Operation failed: {e}")
+    raise CustomError("User-friendly message") from e
+```
+
+## Testing Requirements
+
+### Test Coverage Thresholds
+- **Minimum Coverage**: 68% overall
+- **Critical Paths**: 85% for security-related code
+- **New Features**: 80% for new functionality
+- **Regression Tests**: Required for bug fixes
+
+### Test Organization
+```python
+# test_file.py
+import pytest
+from src.module import function_to_test
+
+class TestFunctionToTest:
+    def test_success_case(self):
+        # Arrange
+        input_data = "test_input"
+
+        # Act
+        result = function_to_test(input_data)
+
+        # Assert
+        assert result == expected_output
+
+    def test_error_case(self):
+        # Arrange & Act & Assert
+        with pytest.raises(ExpectedException):
+            function_to_test(invalid_input)
+```
+
+### Integration Testing
+- **MCP Server Tests**: End-to-end tool testing
+- **Evaluation Pipeline Tests**: Full pipeline validation
+- **Security Tests**: Penetration testing scenarios
+
+## Performance Guidelines
+
+### Code Efficiency
+- **Algorithm Complexity**: Document Big O for critical paths
+- **Memory Usage**: Monitor and optimize memory consumption
+- **Async Patterns**: Use async/await for I/O operations
+- **Caching**: Implement appropriate caching strategies
+
+### Monitoring and Metrics
+- **Performance Metrics**: Response times, throughput
+- **Error Rates**: Track and alert on error patterns
+- **Resource Usage**: CPU, memory, disk monitoring
+- **Health Checks**: Automated health validation
+
+## Development Workflow
+
+### Code Review Checklist
+- ✅ Type hints present and correct
+- ✅ Tests added/updated with sufficient coverage
+- ✅ Documentation updated
+- ✅ Security review completed
+- ✅ Performance impact assessed
+
+### Promotion Criteria (Lab → App)
+- ✅ All tests passing
+- ✅ Security audit cleared
+- ✅ Documentation complete
+- ✅ Performance benchmarks met
+- ✅ Code review approved