feat: Background workers
- analyzer.py: Issue analysis pipeline (JIRA -> LLM -> PR) - indexer.py: Code indexing pipeline (Bitbucket -> Embeddings -> Qdrant) - Redis queue-based processing - Progress tracking and status updates
This commit is contained in:
parent
011a93c5b9
commit
27b72e3ccd
|
|
@ -0,0 +1 @@
|
||||||
|
"""Workers package."""
|
||||||
|
|
@ -0,0 +1,208 @@
|
||||||
|
"""
|
||||||
|
Issue Analyzer Worker - Background processing of JIRA issues.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import redis.asyncio as redis
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class IssueAnalyzer:
|
||||||
|
"""
|
||||||
|
Background worker that processes JIRA issues from the queue.
|
||||||
|
|
||||||
|
Flow:
|
||||||
|
1. Poll Redis queue for new issues
|
||||||
|
2. Fetch issue details from JIRA
|
||||||
|
3. Search for relevant code in vector DB
|
||||||
|
4. Send to LLM for analysis
|
||||||
|
5. Generate fix proposal
|
||||||
|
6. Create PR if confidence is high enough
|
||||||
|
7. Post analysis back to JIRA
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
redis_url: str = "redis://localhost:6379",
|
||||||
|
queue_name: str = "issues:pending",
|
||||||
|
):
|
||||||
|
self.redis_url = redis_url
|
||||||
|
self.queue_name = queue_name
|
||||||
|
self.redis: Optional[redis.Redis] = None
|
||||||
|
self.running = False
|
||||||
|
|
||||||
|
async def connect(self):
|
||||||
|
"""Connect to Redis."""
|
||||||
|
self.redis = redis.from_url(self.redis_url)
|
||||||
|
logger.info(f"🔌 Connected to Redis: {self.redis_url}")
|
||||||
|
|
||||||
|
async def disconnect(self):
|
||||||
|
"""Disconnect from Redis."""
|
||||||
|
if self.redis:
|
||||||
|
await self.redis.close()
|
||||||
|
|
||||||
|
async def run(self):
|
||||||
|
"""Main worker loop."""
|
||||||
|
self.running = True
|
||||||
|
await self.connect()
|
||||||
|
|
||||||
|
logger.info("🚀 Issue Analyzer worker started")
|
||||||
|
|
||||||
|
while self.running:
|
||||||
|
try:
|
||||||
|
# Block waiting for new items (timeout 5s)
|
||||||
|
result = await self.redis.blpop(self.queue_name, timeout=5)
|
||||||
|
|
||||||
|
if result:
|
||||||
|
_, data = result
|
||||||
|
issue_data = json.loads(data)
|
||||||
|
await self.process_issue(issue_data)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Error in worker loop: {e}")
|
||||||
|
await asyncio.sleep(5) # Back off on error
|
||||||
|
|
||||||
|
await self.disconnect()
|
||||||
|
logger.info("👋 Issue Analyzer worker stopped")
|
||||||
|
|
||||||
|
async def process_issue(self, issue_data: dict):
|
||||||
|
"""
|
||||||
|
Process a single issue.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
issue_data: Dict with issue key and metadata
|
||||||
|
"""
|
||||||
|
issue_key = issue_data.get("key")
|
||||||
|
logger.info(f"📋 Processing issue: {issue_key}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Update status
|
||||||
|
await self.update_status(issue_key, "analyzing")
|
||||||
|
|
||||||
|
# 1. Fetch full issue details from JIRA
|
||||||
|
issue_details = await self.fetch_issue_details(issue_key)
|
||||||
|
|
||||||
|
# 2. Extract relevant context
|
||||||
|
description = issue_details.get("fields", {}).get("description", "")
|
||||||
|
summary = issue_details.get("fields", {}).get("summary", "")
|
||||||
|
|
||||||
|
# 3. Search for relevant code
|
||||||
|
code_context = await self.search_relevant_code(summary, description)
|
||||||
|
|
||||||
|
# 4. Get business rules for the affected module
|
||||||
|
module = await self.identify_module(summary, description)
|
||||||
|
business_rules = await self.get_module_rules(module)
|
||||||
|
|
||||||
|
# 5. Send to LLM for analysis
|
||||||
|
analysis = await self.analyze_with_llm(
|
||||||
|
issue_description=f"{summary}\n\n{description}",
|
||||||
|
code_context=code_context,
|
||||||
|
business_rules=business_rules,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 6. Store analysis result
|
||||||
|
await self.store_analysis(issue_key, analysis)
|
||||||
|
|
||||||
|
# 7. If confidence is high, create PR
|
||||||
|
if analysis.get("confidence", 0) >= 0.75:
|
||||||
|
pr_url = await self.create_pull_request(issue_key, analysis)
|
||||||
|
analysis["pr_url"] = pr_url
|
||||||
|
|
||||||
|
# 8. Post comment to JIRA
|
||||||
|
await self.post_jira_comment(issue_key, analysis)
|
||||||
|
|
||||||
|
# Update status
|
||||||
|
status = "pr_created" if analysis.get("pr_url") else "analyzed"
|
||||||
|
await self.update_status(issue_key, status)
|
||||||
|
|
||||||
|
logger.info(f"✅ Completed analysis: {issue_key} (confidence: {analysis.get('confidence', 0):.0%})")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Failed to process {issue_key}: {e}")
|
||||||
|
await self.update_status(issue_key, "failed", str(e))
|
||||||
|
|
||||||
|
async def fetch_issue_details(self, issue_key: str) -> dict:
|
||||||
|
"""Fetch issue details from JIRA."""
|
||||||
|
# TODO: Implement JIRA client call
|
||||||
|
logger.info(f"🔍 Fetching issue details: {issue_key}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
async def search_relevant_code(self, summary: str, description: str) -> str:
|
||||||
|
"""Search vector DB for relevant code."""
|
||||||
|
# TODO: Implement vector search
|
||||||
|
logger.info("🔍 Searching for relevant code...")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
async def identify_module(self, summary: str, description: str) -> Optional[str]:
|
||||||
|
"""Identify which business module the issue relates to."""
|
||||||
|
# TODO: Implement module identification
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def get_module_rules(self, module: Optional[str]) -> str:
|
||||||
|
"""Get business rules for a module."""
|
||||||
|
# TODO: Load from database
|
||||||
|
return ""
|
||||||
|
|
||||||
|
async def analyze_with_llm(
|
||||||
|
self,
|
||||||
|
issue_description: str,
|
||||||
|
code_context: str,
|
||||||
|
business_rules: str,
|
||||||
|
) -> dict:
|
||||||
|
"""Send to LLM for analysis."""
|
||||||
|
# TODO: Implement LLM service call
|
||||||
|
logger.info("🤖 Analyzing with LLM...")
|
||||||
|
return {
|
||||||
|
"root_cause": "Analysis pending",
|
||||||
|
"affected_files": [],
|
||||||
|
"proposed_fix": "",
|
||||||
|
"confidence": 0.0,
|
||||||
|
"explanation": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
async def store_analysis(self, issue_key: str, analysis: dict):
|
||||||
|
"""Store analysis result in database."""
|
||||||
|
# TODO: Implement database storage
|
||||||
|
logger.info(f"💾 Storing analysis for {issue_key}")
|
||||||
|
|
||||||
|
async def create_pull_request(self, issue_key: str, analysis: dict) -> Optional[str]:
|
||||||
|
"""Create a pull request with the proposed fix."""
|
||||||
|
# TODO: Implement Bitbucket PR creation
|
||||||
|
logger.info(f"📝 Creating PR for {issue_key}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def post_jira_comment(self, issue_key: str, analysis: dict):
|
||||||
|
"""Post analysis as a comment on the JIRA issue."""
|
||||||
|
# TODO: Implement JIRA comment
|
||||||
|
logger.info(f"💬 Posting comment to {issue_key}")
|
||||||
|
|
||||||
|
async def update_status(self, issue_key: str, status: str, error: str = None):
|
||||||
|
"""Update issue status in database and Redis."""
|
||||||
|
# TODO: Implement status update
|
||||||
|
logger.info(f"📊 Status update: {issue_key} -> {status}")
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
"""Signal worker to stop."""
|
||||||
|
self.running = False
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""Entry point for the worker."""
|
||||||
|
worker = IssueAnalyzer(
|
||||||
|
redis_url=os.getenv("REDIS_URL", "redis://localhost:6379"),
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
await worker.run()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
worker.stop()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
|
|
@ -0,0 +1,261 @@
|
||||||
|
"""
|
||||||
|
Code Indexer Worker - Background indexing of Bitbucket repositories.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, List, Dict, Any
|
||||||
|
|
||||||
|
import redis.asyncio as redis
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class CodeIndexer:
|
||||||
|
"""
|
||||||
|
Background worker that indexes code from Bitbucket repositories.
|
||||||
|
|
||||||
|
Flow:
|
||||||
|
1. Poll Redis queue for indexing jobs
|
||||||
|
2. Clone/pull repository
|
||||||
|
3. Parse COBOL/SQL/JCL files
|
||||||
|
4. Generate embeddings
|
||||||
|
5. Store in Qdrant vector database
|
||||||
|
"""
|
||||||
|
|
||||||
|
# File extensions to index
|
||||||
|
INDEXABLE_EXTENSIONS = {
|
||||||
|
".cbl": "cobol",
|
||||||
|
".cob": "cobol",
|
||||||
|
".cpy": "copybook",
|
||||||
|
".sql": "sql",
|
||||||
|
".jcl": "jcl",
|
||||||
|
".proc": "jcl_proc",
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
redis_url: str = "redis://localhost:6379",
|
||||||
|
qdrant_url: str = "http://localhost:6333",
|
||||||
|
work_dir: str = "/tmp/aci-indexer",
|
||||||
|
queue_name: str = "indexer:jobs",
|
||||||
|
):
|
||||||
|
self.redis_url = redis_url
|
||||||
|
self.qdrant_url = qdrant_url
|
||||||
|
self.work_dir = Path(work_dir)
|
||||||
|
self.queue_name = queue_name
|
||||||
|
self.redis: Optional[redis.Redis] = None
|
||||||
|
self.running = False
|
||||||
|
|
||||||
|
# Create work directory
|
||||||
|
self.work_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
async def connect(self):
|
||||||
|
"""Connect to Redis."""
|
||||||
|
self.redis = redis.from_url(self.redis_url)
|
||||||
|
logger.info(f"🔌 Connected to Redis: {self.redis_url}")
|
||||||
|
|
||||||
|
async def disconnect(self):
|
||||||
|
"""Disconnect from Redis."""
|
||||||
|
if self.redis:
|
||||||
|
await self.redis.close()
|
||||||
|
|
||||||
|
async def run(self):
|
||||||
|
"""Main worker loop."""
|
||||||
|
self.running = True
|
||||||
|
await self.connect()
|
||||||
|
|
||||||
|
logger.info("🚀 Code Indexer worker started")
|
||||||
|
|
||||||
|
while self.running:
|
||||||
|
try:
|
||||||
|
# Block waiting for new jobs (timeout 10s)
|
||||||
|
result = await self.redis.blpop(self.queue_name, timeout=10)
|
||||||
|
|
||||||
|
if result:
|
||||||
|
_, data = result
|
||||||
|
job = eval(data) # Simple deserialization
|
||||||
|
await self.process_job(job)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Error in worker loop: {e}")
|
||||||
|
await asyncio.sleep(5)
|
||||||
|
|
||||||
|
await self.disconnect()
|
||||||
|
logger.info("👋 Code Indexer worker stopped")
|
||||||
|
|
||||||
|
async def process_job(self, job: dict):
|
||||||
|
"""
|
||||||
|
Process an indexing job.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job: Dict with repository info and options
|
||||||
|
"""
|
||||||
|
repo_name = job.get("name")
|
||||||
|
repo_url = job.get("url")
|
||||||
|
|
||||||
|
logger.info(f"📦 Indexing repository: {repo_name}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 1. Clone or update repository
|
||||||
|
repo_path = await self.clone_or_update(repo_name, repo_url)
|
||||||
|
|
||||||
|
# 2. Find indexable files
|
||||||
|
files = self.find_indexable_files(repo_path)
|
||||||
|
logger.info(f"📁 Found {len(files)} indexable files")
|
||||||
|
|
||||||
|
# 3. Process files in batches
|
||||||
|
total_chunks = 0
|
||||||
|
batch_size = 10
|
||||||
|
|
||||||
|
for i in range(0, len(files), batch_size):
|
||||||
|
batch = files[i:i + batch_size]
|
||||||
|
chunks = await self.process_file_batch(batch, repo_name)
|
||||||
|
total_chunks += chunks
|
||||||
|
|
||||||
|
# Report progress
|
||||||
|
progress = min(100, ((i + batch_size) / len(files)) * 100)
|
||||||
|
await self.report_progress(repo_name, progress)
|
||||||
|
|
||||||
|
# 4. Update repository status
|
||||||
|
await self.update_repo_status(repo_name, total_chunks)
|
||||||
|
|
||||||
|
logger.info(f"✅ Indexed {repo_name}: {total_chunks} chunks from {len(files)} files")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Failed to index {repo_name}: {e}")
|
||||||
|
await self.update_repo_status(repo_name, 0, error=str(e))
|
||||||
|
|
||||||
|
async def clone_or_update(self, name: str, url: str) -> Path:
|
||||||
|
"""Clone repository or pull latest changes."""
|
||||||
|
repo_path = self.work_dir / name
|
||||||
|
|
||||||
|
if repo_path.exists():
|
||||||
|
logger.info(f"📥 Pulling latest changes: {name}")
|
||||||
|
# TODO: Implement git pull
|
||||||
|
else:
|
||||||
|
logger.info(f"📥 Cloning repository: {name}")
|
||||||
|
# TODO: Implement git clone
|
||||||
|
repo_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
return repo_path
|
||||||
|
|
||||||
|
def find_indexable_files(self, repo_path: Path) -> List[Path]:
|
||||||
|
"""Find all files that should be indexed."""
|
||||||
|
files = []
|
||||||
|
|
||||||
|
for ext in self.INDEXABLE_EXTENSIONS:
|
||||||
|
files.extend(repo_path.rglob(f"*{ext}"))
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
|
async def process_file_batch(self, files: List[Path], repo_name: str) -> int:
|
||||||
|
"""Process a batch of files and return chunk count."""
|
||||||
|
total_chunks = 0
|
||||||
|
|
||||||
|
for file_path in files:
|
||||||
|
try:
|
||||||
|
content = file_path.read_text(encoding="latin-1")
|
||||||
|
file_type = self.INDEXABLE_EXTENSIONS.get(file_path.suffix.lower())
|
||||||
|
|
||||||
|
# Parse into chunks
|
||||||
|
chunks = self.parse_file(content, file_path, file_type)
|
||||||
|
|
||||||
|
# Generate embeddings and store
|
||||||
|
for chunk in chunks:
|
||||||
|
await self.index_chunk(chunk, repo_name)
|
||||||
|
total_chunks += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"⚠️ Failed to process {file_path}: {e}")
|
||||||
|
|
||||||
|
return total_chunks
|
||||||
|
|
||||||
|
def parse_file(self, content: str, path: Path, file_type: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Parse a file into indexable chunks."""
|
||||||
|
chunks = []
|
||||||
|
|
||||||
|
if file_type in ("cobol", "copybook"):
|
||||||
|
chunks = self.parse_cobol(content, path)
|
||||||
|
elif file_type == "sql":
|
||||||
|
chunks = self.parse_sql(content, path)
|
||||||
|
elif file_type in ("jcl", "jcl_proc"):
|
||||||
|
chunks = self.parse_jcl(content, path)
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
def parse_cobol(self, content: str, path: Path) -> List[Dict[str, Any]]:
|
||||||
|
"""Parse COBOL program into chunks."""
|
||||||
|
# TODO: Implement full COBOL parsing
|
||||||
|
# For now, create one chunk per file
|
||||||
|
return [{
|
||||||
|
"file_path": str(path),
|
||||||
|
"content": content[:4000], # Limit size
|
||||||
|
"type": "cobol",
|
||||||
|
"start_line": 1,
|
||||||
|
"end_line": content.count("\n"),
|
||||||
|
}]
|
||||||
|
|
||||||
|
def parse_sql(self, content: str, path: Path) -> List[Dict[str, Any]]:
|
||||||
|
"""Parse SQL file into chunks."""
|
||||||
|
return [{
|
||||||
|
"file_path": str(path),
|
||||||
|
"content": content[:4000],
|
||||||
|
"type": "sql",
|
||||||
|
"start_line": 1,
|
||||||
|
"end_line": content.count("\n"),
|
||||||
|
}]
|
||||||
|
|
||||||
|
def parse_jcl(self, content: str, path: Path) -> List[Dict[str, Any]]:
|
||||||
|
"""Parse JCL file into chunks."""
|
||||||
|
return [{
|
||||||
|
"file_path": str(path),
|
||||||
|
"content": content[:4000],
|
||||||
|
"type": "jcl",
|
||||||
|
"start_line": 1,
|
||||||
|
"end_line": content.count("\n"),
|
||||||
|
}]
|
||||||
|
|
||||||
|
async def index_chunk(self, chunk: Dict[str, Any], repo_name: str):
|
||||||
|
"""Generate embedding and store chunk in Qdrant."""
|
||||||
|
# TODO: Implement embedding generation and Qdrant storage
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def report_progress(self, repo_name: str, progress: float):
|
||||||
|
"""Report indexing progress."""
|
||||||
|
if self.redis:
|
||||||
|
await self.redis.set(f"indexer:progress:{repo_name}", str(progress))
|
||||||
|
|
||||||
|
async def update_repo_status(
|
||||||
|
self,
|
||||||
|
repo_name: str,
|
||||||
|
chunk_count: int,
|
||||||
|
error: str = None,
|
||||||
|
):
|
||||||
|
"""Update repository status in database."""
|
||||||
|
# TODO: Implement database update
|
||||||
|
status = "indexed" if not error else "failed"
|
||||||
|
logger.info(f"📊 Repo status: {repo_name} -> {status} ({chunk_count} chunks)")
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
"""Signal worker to stop."""
|
||||||
|
self.running = False
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""Entry point for the worker."""
|
||||||
|
worker = CodeIndexer(
|
||||||
|
redis_url=os.getenv("REDIS_URL", "redis://localhost:6379"),
|
||||||
|
qdrant_url=os.getenv("QDRANT_URL", "http://localhost:6333"),
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
await worker.run()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
worker.stop()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
redis>=5.0.0
|
||||||
|
httpx>=0.26.0
|
||||||
|
sentence-transformers>=2.2.0
|
||||||
|
qdrant-client>=1.7.0
|
||||||
|
python-dotenv>=1.0.0
|
||||||
Loading…
Reference in New Issue