"""Claude Desktop export ingester"""
import json
import logging
import re
from pathlib import Path
from typing import Dict, Any, List

from ucts.core.models import Session, Message
from ucts.ingestion.base import SessionIngester, ParseError, InvalidFormatError

# Configure logging
logger = logging.getLogger(__name__)


class ClaudeDesktopIngester(SessionIngester):
    """Ingest Claude Desktop conversation exports"""

    def ingest(self, source_path: str) -> Session:
        """
        Parse Claude Desktop JSON export.

        Expected format:
        {
            "conversation": [
                {"role": "user", "content": "..."},
                {"role": "assistant", "content": "..."}
            ],
            "metadata": {...}
        }

        Or the direct array format from exports.

        Raises:
            FileNotFoundIngestionError: If source file doesn't exist
            ParseError: If JSON cannot be parsed
            InvalidFormatError: If format is not recognized
        """
        path = self.validate_source_path(source_path)
        logger.info(f"Ingesting Claude Desktop export: {path}")

        # Read and parse JSON
        try:
            content = self.read_file_safe(path)
            data = json.loads(content)
        except json.JSONDecodeError as e:
            raise ParseError(f"Invalid JSON in Claude Desktop export: {e}")

        # Validate data structure
        if not isinstance(data, (list, dict)):
            raise InvalidFormatError(
                f"Expected JSON array or object, got {type(data).__name__}"
            )

        messages: List[Message] = []
        all_content = ""

        # Handle different export formats
        if isinstance(data, list):
            # Direct array of messages
            raw_messages = data
            logger.debug("Detected direct array format")
        elif isinstance(data, dict):
            # Object with conversation key
            raw_messages = data.get('conversation', data.get('messages', []))
            if not raw_messages:
                logger.warning("No conversation or messages key found in export")
            logger.debug(f"Detected object format with {len(raw_messages)} messages")
        else:
            raw_messages = []

        if not raw_messages:
            logger.warning("No messages found in export")

        for idx, msg in enumerate(raw_messages):
            if not isinstance(msg, dict):
                logger.warning(f"Skipping non-dict message at index {idx}")
                continue

            role = msg.get('role', 'unknown')
            content = msg.get('content', '')

            # Handle content that might be a list (Claude API format)
            if isinstance(content, list):
                content_parts = []
                for block in content:
                    if isinstance(block, dict):
                        content_parts.append(block.get('text', str(block)))
                    elif isinstance(block, str):
                        content_parts.append(block)
                content = '\n'.join(content_parts)
            elif not isinstance(content, str):
                content = str(content) if content else ''

            # Normalize role names
            role = self._normalize_role(role)

            messages.append(Message(
                role=role,
                content=content,
                timestamp=msg.get('timestamp', msg.get('created_at', '')),
                metadata=msg.get('metadata', {}) if isinstance(msg.get('metadata'), dict) else {}
            ))
            all_content += content + "\n"

        # Extract code blocks
        code_blocks = self.extract_code_blocks(all_content)

        # Extract files that were created
        files_created = self._extract_files_created(messages)

        # Extract todos and decisions
        todos = self.extract_todos(all_content)
        decisions = self.extract_decisions(all_content)

        logger.info(
            f"Ingested {len(messages)} messages, "
            f"{len(code_blocks)} code blocks, "
            f"{len(todos)} TODOs"
        )

        return Session(
            source="claude_desktop",
            messages=messages,
            code_blocks=code_blocks,
            files_created=files_created,
            decisions=decisions,
            todos=todos,
            metadata={
                "source_file": str(path),
                "format": "claude_desktop",
            }
        )

    def _normalize_role(self, role: str) -> str:
        """Normalize role names to standard format"""
        role_lower = role.lower() if role else 'unknown'

        if role_lower in ('user', 'human'):
            return 'user'
        elif role_lower in ('assistant', 'bot', 'ai', 'claude'):
            return 'assistant'
        elif role_lower == 'system':
            return 'system'
        else:
            return role_lower

    def _extract_files_created(self, messages: List[Message]) -> List[Dict[str, str]]:
        """Extract mentions of file creation from messages"""
        files: List[Dict[str, str]] = []
        seen_files = set()

        for msg in messages:
            if msg.role != "assistant":
                continue

            content_lower = msg.content.lower() if msg.content else ""

            # Look for file creation patterns
            patterns = [
                ("created file", "created"),
                ("wrote to", "wrote"),
                ("saved to", "saved"),
                ("generated", "generated"),
                ("writing", "writing"),
            ]

            for pattern, action in patterns:
                if pattern in content_lower:
                    # Try to extract filename using various patterns
                    filename_patterns = [
                        r'`([^`]+\.[a-zA-Z0-9]+)`',  # Backtick wrapped
                        r'"([^"]+\.[a-zA-Z0-9]+)"',  # Quote wrapped
                        r"'([^']+\.[a-zA-Z0-9]+)'",  # Single quote wrapped
                    ]

                    for fp in filename_patterns:
                        matches = re.findall(fp, msg.content)
                        for match in matches:
                            # Basic validation
                            if match and len(match) < 256 and match not in seen_files:
                                seen_files.add(match)
                                files.append({
                                    "path": match,
                                    "action": action
                                })

        logger.debug(f"Extracted {len(files)} file creation mentions")
        return files
