"""Tests for Conversation Analytics module"""
import json
from datetime import datetime, timedelta
from pathlib import Path
from unittest.mock import patch, MagicMock

import pytest

from ucts.analytics.conversation import (
    AnalyticsConfig,
    UsageMetrics,
    CostEstimate,
    QualityMetrics,
    PatternMetrics,
    AnalyticsReport,
    ConversationAnalytics,
    TOKEN_PRICING,
    get_analytics,
)
from ucts.core.models import Session, Message, CodeBlock


class TestAnalyticsConfig:
    """Tests for AnalyticsConfig dataclass"""

    def test_default_config(self):
        """Test default configuration values"""
        config = AnalyticsConfig()

        assert config.track_costs is True
        assert config.model == "default"
        assert config.detect_patterns is True
        assert config.quality_threshold == 0.7

    def test_default_storage_dir(self):
        """Test that storage_dir is set to default if empty"""
        config = AnalyticsConfig()

        assert ".ucts" in config.storage_dir
        assert "analytics" in config.storage_dir

    def test_custom_storage_dir(self, tmp_path):
        """Test custom storage directory"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))

        assert config.storage_dir == str(tmp_path)


class TestUsageMetrics:
    """Tests for UsageMetrics dataclass"""

    def test_default_values(self):
        """Test default metric values"""
        usage = UsageMetrics()

        assert usage.total_input_tokens == 0
        assert usage.total_output_tokens == 0
        assert usage.total_tokens == 0
        assert usage.message_count == 0
        assert usage.avg_input_per_message == 0.0
        assert usage.avg_output_per_message == 0.0

    def test_to_dict(self):
        """Test UsageMetrics serialization"""
        usage = UsageMetrics(
            total_input_tokens=1000,
            total_output_tokens=2000,
            total_tokens=3000,
            message_count=10,
            avg_input_per_message=100.0,
            avg_output_per_message=200.0
        )

        result = usage.to_dict()

        assert result["total_input_tokens"] == 1000
        assert result["total_output_tokens"] == 2000
        assert result["total_tokens"] == 3000
        assert result["message_count"] == 10


class TestCostEstimate:
    """Tests for CostEstimate dataclass"""

    def test_default_values(self):
        """Test default cost values"""
        costs = CostEstimate(model="default")

        assert costs.input_cost == 0.0
        assert costs.output_cost == 0.0
        assert costs.total_cost == 0.0
        assert costs.currency == "USD"

    def test_to_dict(self):
        """Test CostEstimate serialization"""
        costs = CostEstimate(
            model="gpt-4",
            input_cost=0.12345,
            output_cost=0.54321,
            total_cost=0.66666
        )

        result = costs.to_dict()

        assert result["model"] == "gpt-4"
        assert result["input_cost"] == 0.1235  # Rounded to 4 places
        assert result["output_cost"] == 0.5432  # Rounded to 4 places


class TestQualityMetrics:
    """Tests for QualityMetrics dataclass"""

    def test_default_values(self):
        """Test default quality values"""
        quality = QualityMetrics()

        assert quality.clarity_score == 0.0
        assert quality.efficiency_score == 0.0
        assert quality.success_rate == 0.0
        assert quality.code_quality == 0.0

    def test_overall_score(self):
        """Test overall score calculation"""
        quality = QualityMetrics(
            clarity_score=0.8,
            efficiency_score=0.6,
            success_rate=0.9,
            code_quality=0.7
        )

        overall = quality.overall_score()

        assert overall == 0.75  # (0.8 + 0.6 + 0.9 + 0.7) / 4

    def test_to_dict(self):
        """Test QualityMetrics serialization"""
        quality = QualityMetrics(
            clarity_score=0.8,
            efficiency_score=0.6,
            has_clear_goals=True,
            has_examples=True
        )

        result = quality.to_dict()

        assert result["clarity_score"] == 0.8
        assert result["has_clear_goals"] is True
        assert result["has_examples"] is True
        assert "overall_score" in result


class TestPatternMetrics:
    """Tests for PatternMetrics dataclass"""

    def test_default_values(self):
        """Test default pattern values"""
        patterns = PatternMetrics()

        assert patterns.common_issues == []
        assert patterns.successful_patterns == []
        assert patterns.language_distribution == {}
        assert patterns.error_patterns == []

    def test_to_dict(self):
        """Test PatternMetrics serialization"""
        patterns = PatternMetrics(
            common_issues=["unclear requirements"],
            successful_patterns=["clear specification"],
            language_distribution={"python": 5, "javascript": 3},
            topic_distribution={"api": 2}
        )

        result = patterns.to_dict()

        assert result["common_issues"] == ["unclear requirements"]
        assert result["language_distribution"]["python"] == 5


class TestAnalyticsReport:
    """Tests for AnalyticsReport dataclass"""

    def test_default_values(self):
        """Test default report values"""
        report = AnalyticsReport()

        assert report.session_count == 0
        assert report.period_start is None
        assert report.period_end is None

    def test_to_dict(self):
        """Test AnalyticsReport serialization"""
        report = AnalyticsReport(
            session_count=5,
            period_start=datetime(2025, 1, 1),
            period_end=datetime(2025, 1, 31)
        )

        result = report.to_dict()

        assert result["session_count"] == 5
        assert "2025-01-01" in result["period_start"]
        assert "usage" in result
        assert "costs" in result
        assert "quality" in result
        assert "patterns" in result

    def test_to_markdown(self):
        """Test markdown report generation"""
        report = AnalyticsReport(
            session_count=10,
            usage=UsageMetrics(
                total_input_tokens=5000,
                total_output_tokens=10000,
                total_tokens=15000,
                message_count=20
            ),
            costs=CostEstimate(
                model="default",
                input_cost=0.015,
                output_cost=0.15,
                total_cost=0.165
            )
        )

        markdown = report.to_markdown()

        assert "# UCTS Conversation Analytics Report" in markdown
        assert "Sessions analyzed: 10" in markdown
        assert "Total tokens: 15,000" in markdown
        assert "Estimated cost: $0.17" in markdown


class TestConversationAnalytics:
    """Tests for ConversationAnalytics class"""

    def test_init_creates_storage(self, tmp_path):
        """Test that init creates storage directory"""
        config = AnalyticsConfig(storage_dir=str(tmp_path / "analytics"))
        analytics = ConversationAnalytics(config)

        assert (tmp_path / "analytics").exists()

    def test_estimate_tokens(self, tmp_path):
        """Test token estimation"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        # ~4 chars per token
        tokens = analytics.estimate_tokens("Hello world!")  # 12 chars
        assert tokens == 3

        long_text = "a" * 1000
        tokens = analytics.estimate_tokens(long_text)
        assert tokens == 250

    def test_analyze_session_basic(self, tmp_path):
        """Test analyzing a basic session"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        session = Session(
            source="test",
            messages=[
                Message(role="user", content="Hello, please help me with coding"),
                Message(role="assistant", content="Of course! I'm here to help.")
            ]
        )

        report = analytics.analyze_session(session)

        assert report.session_count == 1
        assert report.usage.message_count == 2
        assert report.usage.total_tokens > 0
        assert report.costs.total_cost >= 0

    def test_analyze_session_with_code(self, tmp_path):
        """Test analyzing a session with code blocks"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        session = Session(
            source="test",
            messages=[
                Message(role="user", content="Create a Python function"),
                Message(role="assistant", content="Here's the function:")
            ],
            code_blocks=[
                CodeBlock(language="python", content="def hello():\n    return 'Hello'")
            ]
        )

        report = analytics.analyze_session(session)

        assert report.patterns.language_distribution.get("python", 0) == 1
        assert report.usage.total_output_tokens > 0

    def test_calculate_costs_default_model(self, tmp_path):
        """Test cost calculation with default model"""
        config = AnalyticsConfig(storage_dir=str(tmp_path), model="default")
        analytics = ConversationAnalytics(config)

        usage = UsageMetrics(
            total_input_tokens=1_000_000,
            total_output_tokens=1_000_000
        )

        costs = analytics._calculate_costs(usage)

        # Default pricing: input=3.0, output=15.0 per 1M tokens
        assert costs.input_cost == 3.0
        assert costs.output_cost == 15.0
        assert costs.total_cost == 18.0

    def test_calculate_costs_gpt4(self, tmp_path):
        """Test cost calculation with GPT-4 model"""
        config = AnalyticsConfig(storage_dir=str(tmp_path), model="gpt-4")
        analytics = ConversationAnalytics(config)

        usage = UsageMetrics(
            total_input_tokens=1_000_000,
            total_output_tokens=1_000_000
        )

        costs = analytics._calculate_costs(usage)

        # GPT-4 pricing: input=30.0, output=60.0 per 1M tokens
        assert costs.input_cost == 30.0
        assert costs.output_cost == 60.0
        assert costs.total_cost == 90.0

    def test_calculate_quality_clear_prompts(self, tmp_path):
        """Test quality calculation with clear prompts"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        session = Session(
            source="test",
            messages=[
                Message(role="user", content="Please create a function that does X. For example, it should handle Y. It must support Z constraint."),
                Message(role="assistant", content="I've created the function as requested. Here's the completed implementation.")
            ]
        )

        report = analytics.analyze_session(session)

        assert report.quality.has_clear_goals is True
        assert report.quality.has_examples is True
        assert report.quality.has_constraints is True

    def test_calculate_quality_empty_session(self, tmp_path):
        """Test quality calculation with empty session"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        session = Session(source="test", messages=[])

        report = analytics.analyze_session(session)

        assert report.quality.clarity_score == 0.0
        assert report.quality.success_rate == 0.0

    def test_detect_patterns_languages(self, tmp_path):
        """Test pattern detection for languages"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        session = Session(
            source="test",
            messages=[Message(role="user", content="test")],
            code_blocks=[
                CodeBlock(language="python", content="print('hello')"),
                CodeBlock(language="python", content="def foo(): pass"),
                CodeBlock(language="javascript", content="console.log('hi')"),
            ]
        )

        report = analytics.analyze_session(session)

        assert report.patterns.language_distribution["python"] == 2
        assert report.patterns.language_distribution["javascript"] == 1

    def test_detect_patterns_issues(self, tmp_path):
        """Test pattern detection for common issues"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        session = Session(
            source="test",
            messages=[
                Message(role="user", content="What do you mean by that? Can you clarify?"),
                Message(role="assistant", content="Let me explain...")
            ]
        )

        report = analytics.analyze_session(session)

        assert "unclear requirements" in report.patterns.common_issues

    def test_detect_patterns_success(self, tmp_path):
        """Test pattern detection for successful patterns"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        session = Session(
            source="test",
            messages=[
                Message(role="user", content="First, do step 1. Then, do step 2. For example, like this."),
                Message(role="assistant", content="Perfect, that works!")
            ]
        )

        report = analytics.analyze_session(session)

        assert "incremental approach" in report.patterns.successful_patterns
        assert "examples provided" in report.patterns.successful_patterns

    def test_detect_patterns_topics(self, tmp_path):
        """Test topic detection"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        session = Session(
            source="test",
            messages=[
                Message(role="user", content="I need help with my API endpoint and database query"),
                Message(role="assistant", content="Here's the REST API and SQL query")
            ]
        )

        report = analytics.analyze_session(session)

        assert "api" in report.patterns.topic_distribution
        assert "database" in report.patterns.topic_distribution

    def test_analyze_multiple_sessions(self, tmp_path):
        """Test analyzing multiple sessions"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        sessions = [
            Session(
                source="test1",
                messages=[
                    Message(role="user", content="Hello"),
                    Message(role="assistant", content="Hi there!")
                ]
            ),
            Session(
                source="test2",
                messages=[
                    Message(role="user", content="Help me"),
                    Message(role="assistant", content="Sure thing")
                ]
            )
        ]

        report = analytics.analyze_sessions(sessions)

        assert report.session_count == 2
        assert report.usage.message_count == 4

    def test_analyze_empty_sessions(self, tmp_path):
        """Test analyzing empty session list"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        report = analytics.analyze_sessions([])

        assert report.session_count == 0

    def test_history_persistence(self, tmp_path):
        """Test that analytics history persists"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))

        # First instance
        analytics1 = ConversationAnalytics(config)
        session = Session(
            source="test",
            messages=[Message(role="user", content="test message")]
        )
        analytics1.analyze_session(session)

        # Second instance should load history
        analytics2 = ConversationAnalytics(config)

        assert len(analytics2._history) >= 1

    def test_get_historical_report(self, tmp_path):
        """Test getting historical report"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        # Add some sessions
        for i in range(3):
            session = Session(
                source=f"test{i}",
                messages=[Message(role="user", content=f"message {i}")]
            )
            analytics.analyze_session(session)

        report = analytics.get_historical_report(days=30)

        assert report["summary"]["total_sessions"] >= 3

    def test_get_historical_report_empty(self, tmp_path):
        """Test getting historical report with no data"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        # Query far in the past
        report = analytics.get_historical_report(
            start_date=datetime(2020, 1, 1),
            end_date=datetime(2020, 1, 2)
        )

        assert report["records"] == []

    def test_get_cost_projection(self, tmp_path):
        """Test cost projection"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        usage = UsageMetrics(
            total_input_tokens=10000,
            total_output_tokens=20000,
            total_tokens=30000
        )

        projection = analytics.get_cost_projection(usage, projection_days=30)

        assert projection["current_daily"]["tokens"] == 30000
        assert projection["projection"]["days"] == 30
        assert projection["projection"]["total_tokens"] == 30000 * 30
        assert projection["monthly_estimate"]["tokens"] == 30000 * 30
        assert projection["yearly_estimate"]["tokens"] == 30000 * 365


class TestTokenPricing:
    """Tests for token pricing constants"""

    def test_pricing_exists_for_models(self):
        """Test that pricing exists for expected models"""
        expected_models = [
            "claude-3-opus",
            "claude-3-sonnet",
            "claude-3-haiku",
            "gpt-4-turbo",
            "gpt-4",
            "gpt-3.5-turbo",
            "default"
        ]

        for model in expected_models:
            assert model in TOKEN_PRICING
            assert "input" in TOKEN_PRICING[model]
            assert "output" in TOKEN_PRICING[model]

    def test_output_more_expensive_than_input(self):
        """Test that output tokens are more expensive than input"""
        for model, pricing in TOKEN_PRICING.items():
            assert pricing["output"] >= pricing["input"], f"Output should be >= input for {model}"


class TestGetAnalytics:
    """Tests for get_analytics singleton function"""

    def test_returns_instance(self, tmp_path):
        """Test that get_analytics returns an instance"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = get_analytics(config)

        assert analytics is not None
        assert isinstance(analytics, ConversationAnalytics)

    def test_singleton_behavior(self, tmp_path):
        """Test singleton behavior with None config"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))

        # First call with config
        analytics1 = get_analytics(config)

        # Second call with config creates new instance
        config2 = AnalyticsConfig(storage_dir=str(tmp_path / "other"))
        analytics2 = get_analytics(config2)

        # Config was provided so new instance created
        assert analytics1 is not analytics2


class TestAggregation:
    """Tests for report aggregation"""

    def test_aggregate_usage(self, tmp_path):
        """Test usage aggregation"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        sessions = [
            Session(
                source="test1",
                messages=[
                    Message(role="user", content="a" * 100),
                    Message(role="assistant", content="b" * 200)
                ]
            ),
            Session(
                source="test2",
                messages=[
                    Message(role="user", content="c" * 100),
                    Message(role="assistant", content="d" * 200)
                ]
            )
        ]

        report = analytics.analyze_sessions(sessions)

        # Should aggregate tokens
        assert report.usage.total_input_tokens > 0
        assert report.usage.total_output_tokens > 0
        assert report.usage.message_count == 4

    def test_aggregate_costs(self, tmp_path):
        """Test cost aggregation"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        sessions = [
            Session(
                source="test1",
                messages=[Message(role="user", content="a" * 1000)]
            ),
            Session(
                source="test2",
                messages=[Message(role="user", content="b" * 1000)]
            )
        ]

        report = analytics.analyze_sessions(sessions)

        # Costs should be non-zero
        assert report.costs.total_cost >= 0

    def test_aggregate_quality(self, tmp_path):
        """Test quality score aggregation"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        sessions = [
            Session(
                source="test1",
                messages=[
                    Message(role="user", content="Please help me"),
                    Message(role="assistant", content="Done!")
                ]
            ),
            Session(
                source="test2",
                messages=[
                    Message(role="user", content="Create something"),
                    Message(role="assistant", content="Finished!")
                ]
            )
        ]

        report = analytics.analyze_sessions(sessions)

        # Quality scores should be averaged
        assert 0 <= report.quality.clarity_score <= 1
        assert 0 <= report.quality.success_rate <= 1

    def test_aggregate_patterns(self, tmp_path):
        """Test pattern aggregation"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        sessions = [
            Session(
                source="test1",
                messages=[Message(role="user", content="test")],
                code_blocks=[
                    CodeBlock(language="python", content="pass"),
                    CodeBlock(language="python", content="pass")
                ]
            ),
            Session(
                source="test2",
                messages=[Message(role="user", content="test")],
                code_blocks=[
                    CodeBlock(language="javascript", content="null")
                ]
            )
        ]

        report = analytics.analyze_sessions(sessions)

        assert report.patterns.language_distribution["python"] == 2
        assert report.patterns.language_distribution["javascript"] == 1


class TestCodeQuality:
    """Tests for code quality analysis"""

    def test_code_quality_with_documentation(self, tmp_path):
        """Test code quality detection for documentation"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        session = Session(
            source="test",
            messages=[Message(role="user", content="test")],
            code_blocks=[
                CodeBlock(
                    language="python",
                    content='def hello():\n    """Say hello"""\n    return "Hello"'
                )
            ]
        )

        report = analytics.analyze_session(session)

        assert report.quality.code_quality > 0

    def test_code_quality_with_error_handling(self, tmp_path):
        """Test code quality detection for error handling"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        session = Session(
            source="test",
            messages=[Message(role="user", content="test")],
            code_blocks=[
                CodeBlock(
                    language="python",
                    content="try:\n    risky()\nexcept Exception:\n    pass"
                )
            ]
        )

        report = analytics.analyze_session(session)

        assert report.quality.code_quality > 0

    def test_code_quality_with_type_hints(self, tmp_path):
        """Test code quality detection for type hints"""
        config = AnalyticsConfig(storage_dir=str(tmp_path))
        analytics = ConversationAnalytics(config)

        session = Session(
            source="test",
            messages=[Message(role="user", content="test")],
            code_blocks=[
                CodeBlock(
                    language="python",
                    content="def greet(name: str) -> str:\n    return f'Hello {name}'"
                )
            ]
        )

        report = analytics.analyze_session(session)

        assert report.quality.code_quality > 0
