"""Plain text/markdown transcript ingester"""
import re
from pathlib import Path
from typing import List, Tuple

from ucts.core.models import Session, Message
from ucts.ingestion.base import SessionIngester


class TranscriptIngester(SessionIngester):
    """Ingest plain text or markdown conversation transcripts"""

    # Common role indicators
    ROLE_PATTERNS = [
        (r'^User[:\s]', 'user'),
        (r'^Human[:\s]', 'user'),
        (r'^Me[:\s]', 'user'),
        (r'^Q[:\s]', 'user'),
        (r'^Assistant[:\s]', 'assistant'),
        (r'^Claude[:\s]', 'assistant'),
        (r'^AI[:\s]', 'assistant'),
        (r'^A[:\s]', 'assistant'),
        (r'^Bot[:\s]', 'assistant'),
        (r'^\*\*User\*\*[:\s]?', 'user'),
        (r'^\*\*Assistant\*\*[:\s]?', 'assistant'),
        (r'^>\s*User[:\s]', 'user'),
        (r'^>\s*Assistant[:\s]', 'assistant'),
    ]

    def ingest(self, source_path: str) -> Session:
        """
        Parse plain text or markdown transcripts.

        Attempts to identify speaker turns and extract structure.
        """
        path = Path(source_path)

        with open(path, 'r', encoding='utf-8') as f:
            content = f.read()

        messages = self._parse_transcript(content)
        all_content = content

        # Extract elements
        code_blocks = self.extract_code_blocks(all_content)
        todos = self.extract_todos(all_content)
        decisions = self.extract_decisions(all_content)

        return Session(
            source="transcript",
            messages=messages,
            code_blocks=code_blocks,
            files_created=[],
            decisions=decisions,
            todos=todos,
            metadata={
                "source_file": str(path),
                "format": "transcript",
            }
        )

    def _parse_transcript(self, content: str) -> List[Message]:
        """Parse transcript into messages"""
        messages = []

        # Split into potential message blocks
        lines = content.split('\n')

        current_role = None
        current_content = []

        for line in lines:
            # Check if this line starts a new speaker turn
            new_role = self._detect_role(line)

            if new_role:
                # Save previous message if exists
                if current_role and current_content:
                    messages.append(Message(
                        role=current_role,
                        content='\n'.join(current_content).strip(),
                        timestamp='',
                        metadata={}
                    ))

                current_role = new_role
                # Remove role prefix from line
                cleaned_line = self._remove_role_prefix(line)
                current_content = [cleaned_line] if cleaned_line else []
            else:
                # Continue current message
                if current_role:
                    current_content.append(line)

        # Don't forget the last message
        if current_role and current_content:
            messages.append(Message(
                role=current_role,
                content='\n'.join(current_content).strip(),
                timestamp='',
                metadata={}
            ))

        # If no structured messages found, treat as single assistant message
        if not messages and content.strip():
            messages.append(Message(
                role='assistant',
                content=content.strip(),
                timestamp='',
                metadata={'parsed': 'unstructured'}
            ))

        return messages

    def _detect_role(self, line: str) -> str:
        """Detect role from line prefix"""
        for pattern, role in self.ROLE_PATTERNS:
            if re.match(pattern, line, re.IGNORECASE):
                return role
        return None

    def _remove_role_prefix(self, line: str) -> str:
        """Remove role prefix from line"""
        for pattern, _ in self.ROLE_PATTERNS:
            match = re.match(pattern, line, re.IGNORECASE)
            if match:
                return line[match.end():].strip()
        return line
