"""Tests for VS Code extension ingester"""
import json
from pathlib import Path
from unittest.mock import patch, mock_open

import pytest

from ucts.ingestion.vscode import VSCodeIngester
from ucts.ingestion.base import FileNotFoundIngestionError, ParseError, InvalidFormatError


class TestVSCodeIngester:
    """Tests for VS Code extension ingester"""

    def test_ingest_array_format(self, tmp_path):
        """Test ingesting direct array format"""
        data = [
            {"role": "user", "content": "Hello"},
            {"role": "assistant", "content": "Hi there!"},
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 2
        assert session.messages[0].role == "user"
        assert session.messages[0].content == "Hello"
        assert session.messages[1].role == "assistant"
        assert session.source == "vscode"

    def test_ingest_object_with_messages_key(self, tmp_path):
        """Test ingesting object format with messages key"""
        data = {
            "messages": [
                {"role": "user", "content": "What is Python?"},
                {"role": "assistant", "content": "Python is a programming language."},
            ],
            "workspace": "/path/to/project"
        }
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 2
        assert session.metadata["workspace"] == "/path/to/project"

    def test_ingest_object_with_conversation_key(self, tmp_path):
        """Test ingesting object format with conversation key"""
        data = {
            "conversation": [
                {"role": "user", "content": "Help me code"},
            ],
        }
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 1

    def test_ingest_object_with_turns_key(self, tmp_path):
        """Test ingesting object format with turns key"""
        data = {
            "turns": [
                {"role": "user", "content": "First turn"},
                {"role": "assistant", "content": "Response"},
            ],
        }
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 2

    def test_ingest_object_with_history_key(self, tmp_path):
        """Test ingesting object format with history key"""
        data = {
            "history": [
                {"role": "user", "content": "Previous chat"},
            ],
        }
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 1

    def test_ingest_with_file_metadata(self, tmp_path):
        """Test that file metadata is preserved"""
        data = [
            {
                "role": "user",
                "content": "Help with this file",
                "file": "src/main.py",
                "workspace": "/project"
            },
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].metadata.get("file") == "src/main.py"
        assert session.messages[0].metadata.get("workspace") == "/project"

    def test_ingest_with_active_file_metadata(self, tmp_path):
        """Test that activeFile metadata is handled"""
        data = [
            {
                "role": "user",
                "content": "Review this",
                "activeFile": "test.js",
            },
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].metadata.get("file") == "test.js"

    def test_ingest_extracts_code_blocks(self, tmp_path):
        """Test code block extraction"""
        data = [
            {
                "role": "assistant",
                "content": "Here's the code:\n```python\ndef hello():\n    print('Hello')\n```"
            },
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.code_blocks) == 1
        assert session.code_blocks[0].language == "python"
        assert "def hello" in session.code_blocks[0].content

    def test_ingest_extracts_todos(self, tmp_path):
        """Test TODO extraction"""
        data = [
            {
                "role": "assistant",
                "content": "TODO: Add error handling\nFIXME: Fix this bug"
            },
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.todos) >= 2
        assert any("Add error handling" in todo for todo in session.todos)

    def test_ingest_extracts_decisions(self, tmp_path):
        """Test decision extraction"""
        data = [
            {
                "role": "assistant",
                "content": "We decided to use Flask for the web framework."
            },
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.decisions) >= 1

    def test_role_normalization_copilot(self, tmp_path):
        """Test that copilot role is normalized to assistant"""
        data = [
            {"role": "copilot", "content": "I can help with that"},
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].role == "assistant"

    def test_role_normalization_bot(self, tmp_path):
        """Test that bot role is normalized to assistant"""
        data = [
            {"role": "bot", "content": "Response"},
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].role == "assistant"

    def test_role_normalization_human(self, tmp_path):
        """Test that human role is normalized to user"""
        data = [
            {"role": "human", "content": "Question"},
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].role == "user"

    def test_role_normalization_author_field(self, tmp_path):
        """Test that author field is used for role"""
        data = [
            {"author": "user", "content": "Using author field"},
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].role == "user"

    def test_ingest_file_not_found(self):
        """Test handling of missing file"""
        ingester = VSCodeIngester()

        with pytest.raises(FileNotFoundIngestionError):
            ingester.ingest("/nonexistent/path.json")

    def test_ingest_empty_path(self):
        """Test handling of empty path"""
        ingester = VSCodeIngester()

        with pytest.raises(ValueError, match="cannot be empty"):
            ingester.ingest("")

    def test_ingest_invalid_json(self, tmp_path):
        """Test handling of invalid JSON"""
        file_path = tmp_path / "invalid.json"
        file_path.write_text("{ invalid json }")

        ingester = VSCodeIngester()

        with pytest.raises(ParseError, match="Invalid JSON"):
            ingester.ingest(str(file_path))

    def test_ingest_invalid_format(self, tmp_path):
        """Test handling of non-object/array JSON"""
        file_path = tmp_path / "invalid.json"
        file_path.write_text('"just a string"')

        ingester = VSCodeIngester()

        with pytest.raises(InvalidFormatError, match="Expected JSON array or object"):
            ingester.ingest(str(file_path))

    def test_ingest_empty_messages(self, tmp_path):
        """Test handling of empty messages array"""
        data = {"messages": []}
        file_path = tmp_path / "empty.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 0

    def test_ingest_skips_non_dict_messages(self, tmp_path):
        """Test that non-dict messages are skipped"""
        data = [
            {"role": "user", "content": "Valid"},
            "invalid message",
            123,
            {"role": "assistant", "content": "Also valid"},
        ]
        file_path = tmp_path / "mixed.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert len(session.messages) == 2

    def test_detect_language_python(self):
        """Test language detection for Python files"""
        ingester = VSCodeIngester()

        assert ingester._detect_language("main.py") == "python"
        assert ingester._detect_language("script.pyw") == "python"

    def test_detect_language_javascript(self):
        """Test language detection for JavaScript files"""
        ingester = VSCodeIngester()

        assert ingester._detect_language("app.js") == "javascript"
        assert ingester._detect_language("component.jsx") == "javascript"
        assert ingester._detect_language("module.mjs") == "javascript"

    def test_detect_language_typescript(self):
        """Test language detection for TypeScript files"""
        ingester = VSCodeIngester()

        assert ingester._detect_language("app.ts") == "typescript"
        assert ingester._detect_language("component.tsx") == "typescript"

    def test_detect_language_other(self):
        """Test language detection for other file types"""
        ingester = VSCodeIngester()

        assert ingester._detect_language("main.go") == "go"
        assert ingester._detect_language("app.rs") == "rust"
        assert ingester._detect_language("Main.java") == "java"
        assert ingester._detect_language("Program.cs") == "csharp"
        assert ingester._detect_language("script.rb") == "ruby"

    def test_detect_language_unknown(self):
        """Test language detection for unknown file types"""
        ingester = VSCodeIngester()

        assert ingester._detect_language("file.unknown") == ""
        assert ingester._detect_language("no_extension") == ""

    def test_workspace_info_extraction(self, tmp_path):
        """Test workspace information extraction"""
        data = {
            "workspace": "/my/project",
            "workspacePath": "/should/be/ignored",  # workspace takes precedence
            "messages": [
                {"role": "user", "content": "Help", "file": "src/main.py"},
                {"role": "assistant", "content": "Sure", "file": "src/utils.js"},
            ]
        }
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert session.metadata["workspace"] == "/my/project"
        assert "python" in session.metadata["languages"]
        assert "javascript" in session.metadata["languages"]

    def test_content_from_text_field(self, tmp_path):
        """Test content extraction from text field"""
        data = [
            {"role": "user", "text": "Using text field"},
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].content == "Using text field"

    def test_content_from_message_field(self, tmp_path):
        """Test content extraction from message field"""
        data = [
            {"role": "user", "message": "Using message field"},
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].content == "Using message field"

    def test_non_string_content_converted(self, tmp_path):
        """Test that non-string content is converted to string"""
        data = [
            {"role": "user", "content": 12345},
            {"role": "assistant", "content": None},
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].content == "12345"
        assert session.messages[1].content == ""

    def test_timestamp_preserved(self, tmp_path):
        """Test that timestamp is preserved"""
        data = [
            {"role": "user", "content": "Hello", "timestamp": "2024-01-15T10:30:00Z"},
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].timestamp == "2024-01-15T10:30:00Z"

    def test_nested_metadata_merged(self, tmp_path):
        """Test that nested metadata is merged"""
        data = [
            {
                "role": "user",
                "content": "Hello",
                "file": "main.py",
                "metadata": {"model": "gpt-4", "extra": "data"}
            },
        ]
        file_path = tmp_path / "vscode_chat.json"
        file_path.write_text(json.dumps(data))

        ingester = VSCodeIngester()
        session = ingester.ingest(str(file_path))

        assert session.messages[0].metadata["file"] == "main.py"
        assert session.messages[0].metadata["model"] == "gpt-4"
        assert session.messages[0].metadata["extra"] == "data"

