"""
UCTS Performance Module

Provides:
- Lazy loading for large conversations
- Streaming parser for huge files
- Caching for repeated analysis
"""

import json
import hashlib
import functools
import time
import os
from pathlib import Path
from typing import Dict, Any, Optional, List, Generator, Iterator, Callable, TypeVar
from datetime import datetime, timedelta
from dataclasses import dataclass, field
import logging
import threading
from collections import OrderedDict
import ijson  # Streaming JSON parser

logger = logging.getLogger(__name__)

T = TypeVar('T')


# =============================================================================
# Lazy Loading for Large Conversations
# =============================================================================

@dataclass
class LazyChunk:
    """Represents a lazily-loaded chunk of conversation"""
    index: int
    file_path: Path
    offset: int
    length: int
    _content: Optional[str] = field(default=None, repr=False)

    @property
    def content(self) -> str:
        """Load content on demand"""
        if self._content is None:
            with open(self.file_path, 'r', encoding='utf-8') as f:
                f.seek(self.offset)
                self._content = f.read(self.length)
        return self._content

    def unload(self):
        """Free memory by unloading content"""
        self._content = None


class LazyConversation:
    """
    Lazy-loading conversation handler for large files.

    Only loads chunks of the conversation as needed,
    reducing memory usage for large files.
    """

    DEFAULT_CHUNK_SIZE = 64 * 1024  # 64KB chunks

    def __init__(self, file_path: str, chunk_size: int = None):
        self.file_path = Path(file_path)
        self.chunk_size = chunk_size or self.DEFAULT_CHUNK_SIZE
        self._chunks: List[LazyChunk] = []
        self._total_size = 0
        self._indexed = False
        self._messages: Optional[List[Dict]] = None

    def _index_file(self):
        """Create index of file chunks"""
        if self._indexed:
            return

        self._total_size = self.file_path.stat().st_size
        offset = 0
        index = 0

        while offset < self._total_size:
            length = min(self.chunk_size, self._total_size - offset)
            self._chunks.append(LazyChunk(
                index=index,
                file_path=self.file_path,
                offset=offset,
                length=length
            ))
            offset += length
            index += 1

        self._indexed = True
        logger.debug(f"Indexed {len(self._chunks)} chunks for {self.file_path}")

    def __len__(self) -> int:
        """Get number of chunks"""
        self._index_file()
        return len(self._chunks)

    def __iter__(self) -> Iterator[LazyChunk]:
        """Iterate over chunks lazily"""
        self._index_file()
        for chunk in self._chunks:
            yield chunk
            chunk.unload()  # Free memory after use

    def get_chunk(self, index: int) -> Optional[LazyChunk]:
        """Get specific chunk by index"""
        self._index_file()
        if 0 <= index < len(self._chunks):
            return self._chunks[index]
        return None

    @property
    def total_size(self) -> int:
        """Get total file size in bytes"""
        self._index_file()
        return self._total_size

    def iter_messages(self, batch_size: int = 100) -> Generator[List[Dict], None, None]:
        """
        Iterate over messages in batches.
        Uses streaming JSON parser for memory efficiency.
        """
        try:
            with open(self.file_path, 'rb') as f:
                batch = []
                # Try to find messages array in JSON
                parser = ijson.items(f, 'messages.item')

                for message in parser:
                    batch.append(message)
                    if len(batch) >= batch_size:
                        yield batch
                        batch = []

                if batch:
                    yield batch

        except Exception as e:
            logger.warning(f"Streaming parse failed, falling back to standard: {e}")
            # Fallback: load entire file
            self._load_messages()
            if self._messages:
                for i in range(0, len(self._messages), batch_size):
                    yield self._messages[i:i + batch_size]

    def _load_messages(self):
        """Load all messages (fallback for non-JSON files)"""
        if self._messages is not None:
            return

        try:
            with open(self.file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)

            if isinstance(data, list):
                self._messages = data
            elif isinstance(data, dict):
                self._messages = data.get('messages', data.get('conversation', []))
            else:
                self._messages = []
        except Exception as e:
            logger.error(f"Failed to load messages: {e}")
            self._messages = []


# =============================================================================
# Streaming Parser for Huge Files
# =============================================================================

class StreamingJSONParser:
    """
    Streaming JSON parser for huge files.

    Parses JSON incrementally without loading entire file into memory.
    """

    def __init__(self, file_path: str):
        self.file_path = Path(file_path)

    def parse_objects(self, path_prefix: str = '') -> Generator[Dict, None, None]:
        """
        Parse JSON objects at given path prefix.

        Args:
            path_prefix: ijson path prefix (e.g., 'messages.item', 'data.conversations.item')

        Yields:
            Parsed JSON objects
        """
        with open(self.file_path, 'rb') as f:
            parser = ijson.items(f, path_prefix or 'item')
            for obj in parser:
                yield obj

    def parse_key_values(self) -> Generator[tuple, None, None]:
        """
        Parse JSON as key-value pairs at top level.

        Yields:
            (key, value) tuples
        """
        with open(self.file_path, 'rb') as f:
            parser = ijson.kvitems(f, '')
            for key, value in parser:
                yield key, value

    def count_objects(self, path_prefix: str = 'item') -> int:
        """Count objects without loading all into memory"""
        count = 0
        with open(self.file_path, 'rb') as f:
            parser = ijson.items(f, path_prefix)
            for _ in parser:
                count += 1
        return count

    def extract_structure(self, max_depth: int = 3) -> Dict[str, Any]:
        """
        Extract JSON structure without loading values.

        Args:
            max_depth: Maximum depth to explore

        Returns:
            Structure description
        """
        structure = {}

        with open(self.file_path, 'rb') as f:
            parser = ijson.parse(f)
            path_stack = []

            for prefix, event, value in parser:
                if event == 'map_key':
                    # Track current path
                    depth = prefix.count('.') + 1 if prefix else 0
                    if depth <= max_depth:
                        current = structure
                        if prefix:
                            for part in prefix.split('.'):
                                if part.isdigit():
                                    continue
                                current = current.setdefault(part, {})
                        current[value] = {}

        return structure


class StreamingTextParser:
    """
    Streaming parser for large text/markdown files.
    """

    def __init__(self, file_path: str, chunk_size: int = 8192):
        self.file_path = Path(file_path)
        self.chunk_size = chunk_size

    def iter_lines(self) -> Generator[str, None, None]:
        """Iterate over lines without loading entire file"""
        with open(self.file_path, 'r', encoding='utf-8') as f:
            for line in f:
                yield line.rstrip('\n\r')

    def iter_code_blocks(self) -> Generator[Dict[str, str], None, None]:
        """
        Extract code blocks from markdown/text file.

        Yields:
            Dict with 'language' and 'content' keys
        """
        in_code_block = False
        current_language = ''
        current_content = []

        for line in self.iter_lines():
            if line.startswith('```'):
                if in_code_block:
                    # End of code block
                    yield {
                        'language': current_language,
                        'content': '\n'.join(current_content)
                    }
                    current_content = []
                    in_code_block = False
                else:
                    # Start of code block
                    current_language = line[3:].strip()
                    in_code_block = True
            elif in_code_block:
                current_content.append(line)

    def iter_sections(self, delimiter: str = '---') -> Generator[str, None, None]:
        """
        Iterate over sections delimited by a separator.

        Args:
            delimiter: Section delimiter

        Yields:
            Section content
        """
        current_section = []

        for line in self.iter_lines():
            if line.strip() == delimiter:
                if current_section:
                    yield '\n'.join(current_section)
                    current_section = []
            else:
                current_section.append(line)

        if current_section:
            yield '\n'.join(current_section)


# =============================================================================
# Caching for Repeated Analysis
# =============================================================================

@dataclass
class CacheEntry:
    """Cache entry with metadata"""
    key: str
    value: Any
    created_at: datetime
    expires_at: Optional[datetime]
    access_count: int = 0
    last_accessed: datetime = field(default_factory=datetime.now)


class AnalysisCache:
    """
    LRU cache for analysis results with TTL support.

    Features:
    - Time-based expiration
    - LRU eviction
    - Disk persistence option
    - Thread-safe operations
    """

    DEFAULT_TTL = 3600  # 1 hour
    DEFAULT_MAX_SIZE = 1000

    def __init__(
        self,
        max_size: int = None,
        default_ttl: int = None,
        persist_path: Optional[str] = None
    ):
        self.max_size = max_size or self.DEFAULT_MAX_SIZE
        self.default_ttl = default_ttl or self.DEFAULT_TTL
        self.persist_path = Path(persist_path) if persist_path else None

        self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
        self._lock = threading.RLock()
        self._stats = {
            'hits': 0,
            'misses': 0,
            'evictions': 0
        }

        # Load from disk if available
        if self.persist_path:
            self._load_from_disk()

    def _generate_key(self, *args, **kwargs) -> str:
        """Generate cache key from arguments"""
        key_data = json.dumps({
            'args': [str(a) for a in args],
            'kwargs': {k: str(v) for k, v in sorted(kwargs.items())}
        }, sort_keys=True)
        return hashlib.sha256(key_data.encode()).hexdigest()[:32]

    def get(self, key: str) -> Optional[Any]:
        """Get value from cache"""
        with self._lock:
            if key not in self._cache:
                self._stats['misses'] += 1
                return None

            entry = self._cache[key]

            # Check expiration
            if entry.expires_at and datetime.now() > entry.expires_at:
                del self._cache[key]
                self._stats['misses'] += 1
                return None

            # Update access stats
            entry.access_count += 1
            entry.last_accessed = datetime.now()

            # Move to end (most recently used)
            self._cache.move_to_end(key)

            self._stats['hits'] += 1
            return entry.value

    def set(self, key: str, value: Any, ttl: int = None):
        """Set value in cache"""
        with self._lock:
            # Evict if at capacity
            while len(self._cache) >= self.max_size:
                oldest_key = next(iter(self._cache))
                del self._cache[oldest_key]
                self._stats['evictions'] += 1

            expires_at = None
            if ttl is not None or self.default_ttl:
                ttl_seconds = ttl if ttl is not None else self.default_ttl
                expires_at = datetime.now() + timedelta(seconds=ttl_seconds)

            self._cache[key] = CacheEntry(
                key=key,
                value=value,
                created_at=datetime.now(),
                expires_at=expires_at
            )

    def delete(self, key: str) -> bool:
        """Delete entry from cache"""
        with self._lock:
            if key in self._cache:
                del self._cache[key]
                return True
            return False

    def clear(self):
        """Clear all entries"""
        with self._lock:
            self._cache.clear()

    def cleanup_expired(self) -> int:
        """Remove expired entries, return count removed"""
        with self._lock:
            now = datetime.now()
            expired_keys = [
                k for k, v in self._cache.items()
                if v.expires_at and now > v.expires_at
            ]
            for key in expired_keys:
                del self._cache[key]
            return len(expired_keys)

    @property
    def stats(self) -> Dict[str, Any]:
        """Get cache statistics"""
        with self._lock:
            total = self._stats['hits'] + self._stats['misses']
            hit_rate = self._stats['hits'] / total if total > 0 else 0

            return {
                'size': len(self._cache),
                'max_size': self.max_size,
                'hits': self._stats['hits'],
                'misses': self._stats['misses'],
                'hit_rate': round(hit_rate, 3),
                'evictions': self._stats['evictions']
            }

    def persist(self):
        """Save cache to disk"""
        if not self.persist_path:
            return

        with self._lock:
            self.persist_path.parent.mkdir(parents=True, exist_ok=True)

            # Serialize cache (excluding non-serializable values)
            data = {}
            for key, entry in self._cache.items():
                try:
                    json.dumps(entry.value)  # Test serializability
                    data[key] = {
                        'value': entry.value,
                        'created_at': entry.created_at.isoformat(),
                        'expires_at': entry.expires_at.isoformat() if entry.expires_at else None,
                        'access_count': entry.access_count
                    }
                except (TypeError, ValueError):
                    continue  # Skip non-serializable entries

            with open(self.persist_path, 'w') as f:
                json.dump(data, f)

    def _load_from_disk(self):
        """Load cache from disk"""
        if not self.persist_path or not self.persist_path.exists():
            return

        try:
            with open(self.persist_path, 'r') as f:
                data = json.load(f)

            for key, entry_data in data.items():
                expires_at = None
                if entry_data.get('expires_at'):
                    expires_at = datetime.fromisoformat(entry_data['expires_at'])
                    if expires_at < datetime.now():
                        continue  # Skip expired

                self._cache[key] = CacheEntry(
                    key=key,
                    value=entry_data['value'],
                    created_at=datetime.fromisoformat(entry_data['created_at']),
                    expires_at=expires_at,
                    access_count=entry_data.get('access_count', 0)
                )
        except Exception as e:
            logger.warning(f"Failed to load cache from disk: {e}")


def cached_analysis(ttl: int = 3600):
    """
    Decorator for caching analysis results.

    Args:
        ttl: Time-to-live in seconds
    """
    def decorator(func: Callable[..., T]) -> Callable[..., T]:
        cache = AnalysisCache(default_ttl=ttl)

        @functools.wraps(func)
        def wrapper(*args, **kwargs) -> T:
            # Generate cache key from function args
            key = cache._generate_key(func.__name__, *args, **kwargs)

            # Check cache
            result = cache.get(key)
            if result is not None:
                logger.debug(f"Cache hit for {func.__name__}")
                return result

            # Execute and cache
            result = func(*args, **kwargs)
            cache.set(key, result)
            logger.debug(f"Cached result for {func.__name__}")

            return result

        wrapper.cache = cache
        wrapper.cache_stats = lambda: cache.stats
        wrapper.clear_cache = cache.clear

        return wrapper
    return decorator


# =============================================================================
# Global Cache Instance
# =============================================================================

_global_cache: Optional[AnalysisCache] = None


def get_analysis_cache() -> AnalysisCache:
    """Get or create global analysis cache"""
    global _global_cache
    if _global_cache is None:
        from ucts.core.context_manager import get_default_storage_path
        cache_path = get_default_storage_path() / "cache" / "analysis_cache.json"
        _global_cache = AnalysisCache(
            max_size=1000,
            default_ttl=3600,
            persist_path=str(cache_path)
        )
    return _global_cache


def cache_analysis_result(key: str, value: Any, ttl: int = None):
    """Cache an analysis result"""
    get_analysis_cache().set(key, value, ttl)


def get_cached_analysis(key: str) -> Optional[Any]:
    """Get cached analysis result"""
    return get_analysis_cache().get(key)
