import pytest
import iscc_sct as sct
from iscc_sct import utils
from blake3 import blake3


def test_check_integrity(tmp_path):
    # Create a temporary file with known content
    file_path = tmp_path / "testfile.txt"
    content = "This is a test file."
    with open(file_path, "w") as f:
        f.write(content)

    # Generate a correct checksum and then alter it to simulate failure
    hasher = blake3()
    hasher.update(content.encode())
    correct_checksum = hasher.hexdigest()
    assert utils.check_integrity(file_path, correct_checksum) == file_path

    wrong_checksum = correct_checksum + "wrong"  # Deliberately incorrect checksum

    # Test the function with the wrong checksum
    with pytest.raises(RuntimeError) as exc_info:
        utils.check_integrity(file_path, wrong_checksum)

    # Check that the exception message contains expected text
    assert "Failed integrity check" in str(exc_info.value)


def test_hamming_distance_identical():
    a = b"abc"
    b = b"abc"
    assert utils.hamming_distance(a, b) == 0


def test_hamming_distance_different():
    a = b"abc"
    b = b"abd"
    assert utils.hamming_distance(a, b) == 3


def test_hamming_distance_completely_different():
    a = b"\x00"
    b = b"\xff"
    assert utils.hamming_distance(a, b) == 8


def test_hamming_distance_raises_value_error():
    a = b"abc"
    b = b"abcd"
    with pytest.raises(ValueError):
        utils.hamming_distance(a, b)


def test_encode_decode_base32():
    original = b"Hello, World!"
    encoded = utils.encode_base32(original)
    assert isinstance(encoded, str)
    assert encoded == "JBSWY3DPFQQFO33SNRSCC"
    decoded = utils.decode_base32(encoded)
    assert isinstance(decoded, bytes)
    assert decoded == original


def test_encode_decode_base64():
    original = b"Hello, World!"
    encoded = utils.encode_base64(original)
    assert isinstance(encoded, str)
    assert encoded == "SGVsbG8sIFdvcmxkIQ"
    decoded = utils.decode_base64(encoded)
    assert isinstance(decoded, bytes)
    assert decoded == original


def test_encode_decode_edge_cases():
    # Test empty input
    assert utils.encode_base32(b"") == ""
    assert utils.decode_base32("") == b""
    assert utils.encode_base64(b"") == ""
    assert utils.decode_base64("") == b""

    # Test input with padding
    original = b"a"
    assert utils.decode_base32(utils.encode_base32(original)) == original
    assert utils.decode_base64(utils.encode_base64(original)) == original


def test_iscc_distance_different_lengths():
    iscc1 = sct.create("Hello", bits=64).iscc
    iscc2 = sct.create("Hello", bits=96).iscc
    with pytest.raises(ValueError, match="The input ISCCs must have the same length"):
        utils.iscc_distance(iscc1, iscc2)


def test_cosine_similarity_identical():
    a = b"\x00\x00\x00\x00"
    b = b"\x00\x00\x00\x00"
    assert utils.cosine_similarity(a, b) == 100


def test_cosine_similarity_opposite():
    a = b"\x00\x00\x00\x00"
    b = b"\xff\xff\xff\xff"
    assert utils.cosine_similarity(a, b) == -100


def test_cosine_similarity_half_similar():
    a = b"\x00\x00\xff\xff"
    b = b"\x00\x00\x00\x00"
    assert utils.cosine_similarity(a, b) == 0


def test_cosine_similarity_quarter_similar():
    a = b"\x00\xff\xff\xff"
    b = b"\x00\x00\x00\x00"
    assert utils.cosine_similarity(a, b) == -50


def test_cosine_similarity_three_quarter_similar():
    a = b"\x00\x00\x00\xff"
    b = b"\x00\x00\x00\x00"
    assert utils.cosine_similarity(a, b) == 50


def test_cosine_similarity_different_lengths():
    a = b"\x00\x00\x00"
    b = b"\x00\x00\x00\x00"
    with pytest.raises(ValueError, match="The lengths of the two bytes objects must be the same"):
        utils.cosine_similarity(a, b)


def test_granular_similarity():
    from iscc_sct.models import Metadata, FeatureSet, Feature

    # Create two Metadata objects with some matching and non-matching simprints
    metadata_a = Metadata(
        iscc="ISCC:KACYPXW563EDNM",
        features=[
            FeatureSet(
                simprints=[
                    Feature(simprint="AAECAwQFBgc"),  # Will match
                    Feature(simprint="CAkKCwwNDg8"),  # Will not match
                ]
            )
        ],
    )

    metadata_b = Metadata(
        iscc="ISCC:KACYPXW563EDNM",
        features=[
            FeatureSet(
                simprints=[
                    Feature(simprint="AAECAwQFBgc"),  # Will match
                    Feature(simprint="EBESExQVFhc"),  # Will not match
                ]
            )
        ],
    )

    # Test with default threshold
    matches = utils.granular_similarity(metadata_a, metadata_b)
    assert len(matches) == 1
    assert matches[0][0].simprint == "AAECAwQFBgc"
    assert matches[0][1] == 100
    assert matches[0][2].simprint == "AAECAwQFBgc"

    # Test with lower threshold
    matches = utils.granular_similarity(metadata_a, metadata_b, threshold=0)
    assert len(matches) == 2  # All combinations should match

    # Test with higher threshold
    matches = utils.granular_similarity(metadata_a, metadata_b, threshold=101)
    assert len(matches) == 0  # No matches should be found


def test_granular_similarity_no_matches():
    from iscc_sct.models import Metadata, FeatureSet, Feature

    metadata_a = Metadata(
        iscc="ISCC:KACYPXW563EDNM",
        features=[FeatureSet(simprints=[Feature(simprint="AAECAwQFBgc")])],
    )

    metadata_b = Metadata(
        iscc="ISCC:KACYPXW563EDNM",
        features=[FeatureSet(simprints=[Feature(simprint="CAkKCwwNDg8")])],
    )

    matches = utils.granular_similarity(metadata_a, metadata_b)
    assert len(matches) == 0


def test_granular_similarity_multiple_matches():
    from iscc_sct.models import Metadata, FeatureSet, Feature

    metadata_a = Metadata(
        iscc="ISCC:KACYPXW563EDNM",
        features=[
            FeatureSet(
                simprints=[Feature(simprint="AAECAwQFBgc"), Feature(simprint="CAkKCwwNDg8")]
            ),
            FeatureSet(simprints=[Feature(simprint="EBESExQVFhc")]),
        ],
    )

    metadata_b = Metadata(
        iscc="ISCC:KACYPXW563EDNM",
        features=[
            FeatureSet(
                simprints=[Feature(simprint="AAECAwQFBgc"), Feature(simprint="GBkaGxwdHh8")]
            ),
            FeatureSet(simprints=[Feature(simprint="EBESExQVFhc")]),
        ],
    )

    matches = utils.granular_similarity(metadata_a, metadata_b)
    assert len(matches) == 2
    assert {(match[0].simprint, match[2].simprint) for match in matches} == {
        ("AAECAwQFBgc", "AAECAwQFBgc"),
        ("EBESExQVFhc", "EBESExQVFhc"),
    }