"""VS Code extension conversation ingester"""
import json
import logging
from pathlib import Path
from typing import Dict, Any, List, Set

from ucts.core.models import Session, Message
from ucts.ingestion.base import SessionIngester, ParseError, InvalidFormatError

# Configure logging
logger = logging.getLogger(__name__)


class VSCodeIngester(SessionIngester):
    """Ingest VS Code Claude/Copilot extension logs"""

    def ingest(self, source_path: str) -> Session:
        """
        Parse VS Code extension conversation logs.

        Handles multiple formats:
        - Claude extension export
        - GitHub Copilot chat logs
        - Custom VS Code extension formats

        Raises:
            FileNotFoundIngestionError: If source file doesn't exist
            ParseError: If JSON cannot be parsed
            InvalidFormatError: If format is not recognized
        """
        path = self.validate_source_path(source_path)
        logger.info(f"Ingesting VS Code extension log: {path}")

        # Read and parse JSON
        try:
            content = self.read_file_safe(path)
            data = json.loads(content)
        except json.JSONDecodeError as e:
            raise ParseError(f"Invalid JSON in VS Code export: {e}")

        # Validate data structure
        if not isinstance(data, (list, dict)):
            raise InvalidFormatError(
                f"Expected JSON array or object, got {type(data).__name__}"
            )

        messages: List[Message] = []
        all_content = ""

        # Handle various VS Code extension formats
        if isinstance(data, list):
            raw_messages = data
            logger.debug("Detected direct array format")
        elif isinstance(data, dict):
            # Try common keys
            raw_messages = (
                data.get('messages') or
                data.get('conversation') or
                data.get('turns') or
                data.get('history') or
                []
            )
            logger.debug(f"Detected object format with {len(raw_messages)} messages")
        else:
            raw_messages = []

        if not raw_messages:
            logger.warning("No messages found in VS Code export")

        for idx, msg in enumerate(raw_messages):
            if not isinstance(msg, dict):
                logger.warning(f"Skipping non-dict message at index {idx}")
                continue

            # Normalize role names
            role = msg.get('role', msg.get('author', 'unknown'))
            role = self._normalize_role(role)

            # Extract content from various possible keys
            content = msg.get('content', msg.get('text', msg.get('message', '')))
            if not isinstance(content, str):
                content = str(content) if content else ''

            # Build metadata safely
            metadata: Dict[str, Any] = {}
            if msg.get('workspace'):
                metadata['workspace'] = msg['workspace']
            if msg.get('file') or msg.get('activeFile'):
                metadata['file'] = msg.get('file', msg.get('activeFile', ''))

            # Merge any existing metadata
            if isinstance(msg.get('metadata'), dict):
                metadata.update(msg['metadata'])

            messages.append(Message(
                role=role,
                content=content,
                timestamp=msg.get('timestamp', ''),
                metadata=metadata
            ))
            all_content += content + "\n"

        # Extract code blocks and other elements
        code_blocks = self.extract_code_blocks(all_content)
        todos = self.extract_todos(all_content)
        decisions = self.extract_decisions(all_content)

        # Extract workspace context
        workspace_info = self._extract_workspace_info(data, messages)

        logger.info(
            f"Ingested {len(messages)} messages, "
            f"{len(code_blocks)} code blocks, "
            f"{len(todos)} TODOs"
        )

        return Session(
            source="vscode",
            messages=messages,
            code_blocks=code_blocks,
            files_created=workspace_info.get('files_modified', []),
            decisions=decisions,
            todos=todos,
            metadata={
                "source_file": str(path),
                "format": "vscode",
                "workspace": workspace_info.get('workspace_path', ''),
                "languages": workspace_info.get('languages', []),
            }
        )

    def _normalize_role(self, role: str) -> str:
        """Normalize role names to standard format"""
        role_lower = role.lower() if role else 'unknown'

        if role_lower in ('human', 'user'):
            return 'user'
        elif role_lower in ('bot', 'ai', 'copilot', 'claude', 'assistant'):
            return 'assistant'
        elif role_lower == 'system':
            return 'system'
        else:
            return role_lower

    def _extract_workspace_info(self, data: Any, messages: List[Message]) -> Dict[str, Any]:
        """Extract VS Code workspace information"""
        info: Dict[str, Any] = {
            'workspace_path': '',
            'files_modified': [],
            'languages': [],
        }
        languages: Set[str] = set()
        seen_files: Set[str] = set()

        # Get workspace from data
        if isinstance(data, dict):
            info['workspace_path'] = data.get('workspace', data.get('workspacePath', ''))

        # Analyze messages for file mentions
        for msg in messages:
            file_path = msg.metadata.get('file', '')
            if file_path and file_path not in seen_files:
                seen_files.add(file_path)
                info['files_modified'].append({
                    'path': file_path,
                    'action': 'referenced'
                })

                # Detect language from extension
                lang = self._detect_language(file_path)
                if lang:
                    languages.add(lang)

        info['languages'] = list(languages)
        return info

    def _detect_language(self, file_path: str) -> str:
        """Detect programming language from file extension"""
        extension_map = {
            '.py': 'python',
            '.pyw': 'python',
            '.js': 'javascript',
            '.jsx': 'javascript',
            '.mjs': 'javascript',
            '.ts': 'typescript',
            '.tsx': 'typescript',
            '.rs': 'rust',
            '.go': 'go',
            '.java': 'java',
            '.kt': 'kotlin',
            '.cs': 'csharp',
            '.rb': 'ruby',
            '.php': 'php',
            '.swift': 'swift',
            '.c': 'c',
            '.cpp': 'cpp',
            '.h': 'c',
            '.hpp': 'cpp',
        }

        for ext, lang in extension_map.items():
            if file_path.endswith(ext):
                return lang
        return ''
