"""Tests for web chat export ingester"""
import json
from pathlib import Path
from unittest.mock import patch

import pytest

from ucts.ingestion.web_chat import WebChatIngester
from ucts.ingestion.base import FileNotFoundIngestionError, ParseError, InvalidFormatError


class TestWebChatIngester:
    """Tests for web chat export ingester"""

    def test_ingest_chatgpt_mapping_format(self, tmp_path):
        """Test ingesting ChatGPT mapping format"""
        data = {
            "title": "Test Chat",
            "mapping": {
                "node1": {
                    "message": {
                        "author": {"role": "user"},
                        "content": {"parts": ["Hello ChatGPT"]},
                        "create_time": 1700000001,
                    }
                },
                "node2": {
                    "message": {
                        "author": {"role": "assistant"},
                        "content": {"parts": ["Hello! How can I help?"]},
                        "create_time": 1700000002,
                    }
                },
            }
        }
        file_path = tmp_path / "chatgpt.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 2
        assert session.messages[0].role == "user"
        assert session.messages[0].content == "Hello ChatGPT"
        assert session.messages[1].role == "assistant"
        assert session.metadata["format"] == "chatgpt"
        assert session.metadata["title"] == "Test Chat"

    def test_ingest_chatgpt_string_content(self, tmp_path):
        """Test ChatGPT format with string content instead of parts"""
        data = {
            "mapping": {
                "node1": {
                    "message": {
                        "author": {"role": "user"},
                        "content": "Direct string content",
                        "create_time": 1700000001,
                    }
                },
            }
        }
        file_path = tmp_path / "chatgpt.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].content == "Direct string content"

    def test_ingest_chatgpt_with_model_slug(self, tmp_path):
        """Test ChatGPT format preserves model information"""
        data = {
            "mapping": {
                "node1": {
                    "message": {
                        "author": {"role": "assistant"},
                        "content": {"parts": ["Response"]},
                        "create_time": 1700000001,
                        "metadata": {"model_slug": "gpt-4"}
                    }
                },
            }
        }
        file_path = tmp_path / "chatgpt.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].metadata.get("model") == "gpt-4"

    def test_ingest_chatgpt_skips_system_messages(self, tmp_path):
        """Test that system messages are skipped in ChatGPT format"""
        data = {
            "mapping": {
                "node1": {
                    "message": {
                        "author": {"role": "system"},
                        "content": {"parts": ["System prompt"]},
                        "create_time": 1700000001,
                    }
                },
                "node2": {
                    "message": {
                        "author": {"role": "user"},
                        "content": {"parts": ["User message"]},
                        "create_time": 1700000002,
                    }
                },
            }
        }
        file_path = tmp_path / "chatgpt.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 1
        assert session.messages[0].role == "user"

    def test_ingest_chatgpt_sorts_by_create_time(self, tmp_path):
        """Test that messages are sorted by create_time"""
        data = {
            "mapping": {
                "later": {
                    "message": {
                        "author": {"role": "assistant"},
                        "content": {"parts": ["Second"]},
                        "create_time": 2,
                    }
                },
                "earlier": {
                    "message": {
                        "author": {"role": "user"},
                        "content": {"parts": ["First"]},
                        "create_time": 1,
                    }
                },
            }
        }
        file_path = tmp_path / "chatgpt.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].content == "First"
        assert session.messages[1].content == "Second"

    def test_ingest_claude_web_format(self, tmp_path):
        """Test ingesting Claude.ai web format"""
        data = {
            "uuid": "abc-123",
            "name": "My Chat",
            "chat_messages": [
                {"sender": "human", "text": "Hello Claude", "created_at": "2024-01-01T10:00:00Z"},
                {"sender": "assistant", "text": "Hello!", "created_at": "2024-01-01T10:00:01Z"},
            ]
        }
        file_path = tmp_path / "claude_web.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 2
        assert session.messages[0].role == "user"
        assert session.messages[1].role == "assistant"
        assert session.metadata["format"] == "claude_web"
        assert session.metadata["uuid"] == "abc-123"
        assert session.metadata["name"] == "My Chat"

    def test_ingest_claude_web_handles_non_string_text(self, tmp_path):
        """Test Claude web format handles non-string text"""
        data = {
            "uuid": "abc-123",
            "chat_messages": [
                {"sender": "human", "text": 12345},
                {"sender": "assistant", "text": None},
            ]
        }
        file_path = tmp_path / "claude_web.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].content == "12345"
        assert session.messages[1].content == ""

    def test_ingest_generic_messages_format(self, tmp_path):
        """Test ingesting generic format with messages key"""
        data = {
            "messages": [
                {"role": "user", "content": "Generic message"},
                {"role": "assistant", "content": "Generic response"},
            ]
        }
        file_path = tmp_path / "generic.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 2
        assert session.metadata["format"] == "generic_json"

    def test_ingest_generic_conversation_format(self, tmp_path):
        """Test ingesting generic format with conversation key"""
        data = {
            "conversation": [
                {"role": "user", "content": "Hello"},
            ]
        }
        file_path = tmp_path / "generic.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 1

    def test_ingest_generic_array_format(self, tmp_path):
        """Test ingesting generic array format"""
        data = [
            {"role": "user", "content": "Direct array"},
            {"role": "assistant", "content": "Response"},
        ]
        file_path = tmp_path / "generic.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 2
        assert session.metadata["format"] == "generic_json"

    def test_ingest_generic_uses_sender_field(self, tmp_path):
        """Test generic format uses sender field for role"""
        data = [
            {"sender": "user", "content": "Using sender"},
        ]
        file_path = tmp_path / "generic.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].role == "user"

    def test_ingest_generic_uses_text_field(self, tmp_path):
        """Test generic format uses text field for content"""
        data = [
            {"role": "user", "text": "Using text field"},
        ]
        file_path = tmp_path / "generic.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].content == "Using text field"

    def test_ingest_generic_uses_message_field(self, tmp_path):
        """Test generic format uses message field for content"""
        data = [
            {"role": "user", "message": "Using message field"},
        ]
        file_path = tmp_path / "generic.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].content == "Using message field"

    def test_role_normalization_human(self, tmp_path):
        """Test that human role is normalized to user"""
        data = [{"role": "human", "content": "Test"}]
        file_path = tmp_path / "chat.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].role == "user"

    def test_role_normalization_bot(self, tmp_path):
        """Test that bot role is normalized to assistant"""
        data = [{"role": "bot", "content": "Test"}]
        file_path = tmp_path / "chat.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].role == "assistant"

    def test_role_normalization_ai(self, tmp_path):
        """Test that ai role is normalized to assistant"""
        data = [{"role": "ai", "content": "Test"}]
        file_path = tmp_path / "chat.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].role == "assistant"

    def test_role_normalization_claude(self, tmp_path):
        """Test that claude role is normalized to assistant"""
        data = [{"role": "claude", "content": "Test"}]
        file_path = tmp_path / "chat.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].role == "assistant"

    def test_role_normalization_chatgpt(self, tmp_path):
        """Test that chatgpt role is normalized to assistant"""
        data = [{"role": "chatgpt", "content": "Test"}]
        file_path = tmp_path / "chat.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].role == "assistant"

    def test_ingest_extracts_code_blocks(self, tmp_path):
        """Test code block extraction"""
        data = [
            {
                "role": "assistant",
                "content": "Here's code:\n```javascript\nconst x = 1;\n```"
            },
        ]
        file_path = tmp_path / "chat.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.code_blocks) == 1
        assert session.code_blocks[0].language == "javascript"

    def test_ingest_extracts_todos(self, tmp_path):
        """Test TODO extraction"""
        data = [
            {"role": "assistant", "content": "TODO: Implement feature\nFIXME: Bug here"},
        ]
        file_path = tmp_path / "chat.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.todos) >= 2

    def test_ingest_extracts_decisions(self, tmp_path):
        """Test decision extraction"""
        data = [
            {"role": "assistant", "content": "We decided to use React for the frontend."},
        ]
        file_path = tmp_path / "chat.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.decisions) >= 1

    def test_ingest_file_not_found(self):
        """Test handling of missing file"""
        ingester = WebChatIngester()

        with pytest.raises(FileNotFoundIngestionError):
            ingester.ingest("/nonexistent/path.json")

    def test_ingest_empty_path(self):
        """Test handling of empty path"""
        ingester = WebChatIngester()

        with pytest.raises(ValueError, match="cannot be empty"):
            ingester.ingest("")

    def test_ingest_invalid_json(self, tmp_path):
        """Test handling of invalid JSON"""
        file_path = tmp_path / "invalid.json"
        file_path.write_text("not json at all")

        ingester = WebChatIngester()

        with pytest.raises(ParseError, match="Invalid JSON"):
            ingester.ingest(str(file_path))

    def test_ingest_skips_non_dict_messages(self, tmp_path):
        """Test that non-dict messages are skipped"""
        data = [
            {"role": "user", "content": "Valid"},
            "invalid",
            None,
            {"role": "assistant", "content": "Also valid"},
        ]
        file_path = tmp_path / "mixed.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 2

    def test_ingest_chatgpt_skips_empty_content(self, tmp_path):
        """Test that empty content messages are skipped"""
        data = {
            "mapping": {
                "node1": {
                    "message": {
                        "author": {"role": "user"},
                        "content": {"parts": [""]},
                        "create_time": 1,
                    }
                },
                "node2": {
                    "message": {
                        "author": {"role": "user"},
                        "content": {"parts": ["Has content"]},
                        "create_time": 2,
                    }
                },
            }
        }
        file_path = tmp_path / "chatgpt.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 1
        assert session.messages[0].content == "Has content"

    def test_ingest_chatgpt_handles_missing_message(self, tmp_path):
        """Test ChatGPT format handles nodes without message"""
        data = {
            "mapping": {
                "node1": {},
                "node2": {
                    "message": {
                        "author": {"role": "user"},
                        "content": {"parts": ["Valid"]},
                        "create_time": 1,
                    }
                },
            }
        }
        file_path = tmp_path / "chatgpt.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 1

    def test_ingest_chatgpt_handles_non_dict_mapping(self, tmp_path):
        """Test ChatGPT format handles non-dict mapping"""
        data = {
            "mapping": "not a dict"
        }
        file_path = tmp_path / "chatgpt.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 0

    def test_ingest_claude_web_handles_non_list_messages(self, tmp_path):
        """Test Claude web format handles non-list chat_messages"""
        data = {
            "uuid": "abc",
            "chat_messages": "not a list"
        }
        file_path = tmp_path / "claude_web.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 0

    def test_ingest_claude_web_skips_non_dict_messages(self, tmp_path):
        """Test Claude web format skips non-dict messages"""
        data = {
            "uuid": "abc",
            "chat_messages": [
                {"sender": "human", "text": "Valid"},
                "invalid",
                {"sender": "assistant", "text": "Also valid"},
            ]
        }
        file_path = tmp_path / "claude_web.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 2

    def test_format_detection_chatgpt_with_conversations(self, tmp_path):
        """Test ChatGPT format detection with conversations key"""
        data = {"conversations": []}
        file_path = tmp_path / "chatgpt.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        assert ingester._is_chatgpt_format(data) is True

    def test_format_detection_claude_web_with_uuid(self, tmp_path):
        """Test Claude web format detection with uuid and name"""
        data = {"uuid": "abc", "name": "Chat"}
        file_path = tmp_path / "claude_web.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        assert ingester._is_claude_web_format(data) is True

    def test_source_is_web(self, tmp_path):
        """Test that source is set to 'web'"""
        data = [{"role": "user", "content": "Test"}]
        file_path = tmp_path / "chat.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.source == "web"

    def test_timestamp_preserved(self, tmp_path):
        """Test that timestamp is preserved from messages"""
        data = [
            {"role": "user", "content": "Test", "timestamp": "2024-01-15T12:00:00Z"},
        ]
        file_path = tmp_path / "chat.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].timestamp == "2024-01-15T12:00:00Z"

    def test_chatgpt_multipart_content(self, tmp_path):
        """Test ChatGPT format with multiple parts"""
        data = {
            "mapping": {
                "node1": {
                    "message": {
                        "author": {"role": "assistant"},
                        "content": {"parts": ["Part 1", "Part 2", "Part 3"]},
                        "create_time": 1,
                    }
                },
            }
        }
        file_path = tmp_path / "chatgpt.json"
        file_path.write_text(json.dumps(data))

        ingester = WebChatIngester()
        session = ingester.ingest(str(file_path))

        assert "Part 1" in session.messages[0].content
        assert "Part 2" in session.messages[0].content
        assert "Part 3" in session.messages[0].content

