Files
stonks-oracle/tests/test_content.py
T

85 lines
2.8 KiB
Python

"""Tests for shared canonical URL normalization and content hashing.
Validates normalize_url, content_hash, and content_hash_str from
services.shared.content.
Requirements: 3.2, 3.3
"""
import hashlib
from services.shared.content import content_hash, content_hash_str, normalize_url
class TestNormalizeUrl:
def test_lowercases_scheme_and_host(self):
assert normalize_url("HTTPS://Example.COM/path") == "https://example.com/path"
def test_strips_trailing_slash(self):
assert normalize_url("https://example.com/path/") == "https://example.com/path"
def test_strips_fragment(self):
result = normalize_url("https://example.com/path#section")
assert "#" not in result
assert result == "https://example.com/path"
def test_preserves_query(self):
assert normalize_url("https://example.com/path?q=test") == "https://example.com/path?q=test"
def test_sorts_query_params(self):
result = normalize_url("https://example.com/path?z=1&a=2")
assert result == "https://example.com/path?a=2&z=1"
def test_preserves_non_standard_port(self):
result = normalize_url("https://example.com:8443/path")
assert ":8443" in result
def test_strips_default_port_443(self):
result = normalize_url("https://example.com:443/path")
assert ":443" not in result
def test_strips_default_port_80(self):
result = normalize_url("http://example.com:80/path")
assert ":80" not in result
def test_root_path(self):
assert normalize_url("https://example.com") == "https://example.com/"
def test_defaults_scheme_to_https(self):
result = normalize_url("//example.com/path")
assert result.startswith("https://")
def test_deterministic_for_same_input(self):
url = "https://example.com/article?b=2&a=1#frag"
assert normalize_url(url) == normalize_url(url)
class TestContentHash:
def test_returns_sha256_hex(self):
data = b"hello world"
expected = hashlib.sha256(data).hexdigest()
assert content_hash(data) == expected
def test_deterministic(self):
data = b"test content"
assert content_hash(data) == content_hash(data)
def test_different_content_different_hash(self):
assert content_hash(b"aaa") != content_hash(b"bbb")
def test_empty_bytes(self):
result = content_hash(b"")
assert len(result) == 64 # SHA-256 hex length
class TestContentHashStr:
def test_matches_manual_sha256(self):
text = "hello world"
expected = hashlib.sha256(text.encode("utf-8")).hexdigest()
assert content_hash_str(text) == expected
def test_deterministic(self):
assert content_hash_str("test") == content_hash_str("test")
def test_different_text_different_hash(self):
assert content_hash_str("aaa") != content_hash_str("bbb")