85 lines
2.8 KiB
Python
85 lines
2.8 KiB
Python
"""Tests for shared canonical URL normalization and content hashing.
|
|
|
|
Validates normalize_url, content_hash, and content_hash_str from
|
|
services.shared.content.
|
|
|
|
Requirements: 3.2, 3.3
|
|
"""
|
|
import hashlib
|
|
|
|
from services.shared.content import content_hash, content_hash_str, normalize_url
|
|
|
|
|
|
class TestNormalizeUrl:
|
|
def test_lowercases_scheme_and_host(self):
|
|
assert normalize_url("HTTPS://Example.COM/path") == "https://example.com/path"
|
|
|
|
def test_strips_trailing_slash(self):
|
|
assert normalize_url("https://example.com/path/") == "https://example.com/path"
|
|
|
|
def test_strips_fragment(self):
|
|
result = normalize_url("https://example.com/path#section")
|
|
assert "#" not in result
|
|
assert result == "https://example.com/path"
|
|
|
|
def test_preserves_query(self):
|
|
assert normalize_url("https://example.com/path?q=test") == "https://example.com/path?q=test"
|
|
|
|
def test_sorts_query_params(self):
|
|
result = normalize_url("https://example.com/path?z=1&a=2")
|
|
assert result == "https://example.com/path?a=2&z=1"
|
|
|
|
def test_preserves_non_standard_port(self):
|
|
result = normalize_url("https://example.com:8443/path")
|
|
assert ":8443" in result
|
|
|
|
def test_strips_default_port_443(self):
|
|
result = normalize_url("https://example.com:443/path")
|
|
assert ":443" not in result
|
|
|
|
def test_strips_default_port_80(self):
|
|
result = normalize_url("http://example.com:80/path")
|
|
assert ":80" not in result
|
|
|
|
def test_root_path(self):
|
|
assert normalize_url("https://example.com") == "https://example.com/"
|
|
|
|
def test_defaults_scheme_to_https(self):
|
|
result = normalize_url("//example.com/path")
|
|
assert result.startswith("https://")
|
|
|
|
def test_deterministic_for_same_input(self):
|
|
url = "https://example.com/article?b=2&a=1#frag"
|
|
assert normalize_url(url) == normalize_url(url)
|
|
|
|
|
|
class TestContentHash:
|
|
def test_returns_sha256_hex(self):
|
|
data = b"hello world"
|
|
expected = hashlib.sha256(data).hexdigest()
|
|
assert content_hash(data) == expected
|
|
|
|
def test_deterministic(self):
|
|
data = b"test content"
|
|
assert content_hash(data) == content_hash(data)
|
|
|
|
def test_different_content_different_hash(self):
|
|
assert content_hash(b"aaa") != content_hash(b"bbb")
|
|
|
|
def test_empty_bytes(self):
|
|
result = content_hash(b"")
|
|
assert len(result) == 64 # SHA-256 hex length
|
|
|
|
|
|
class TestContentHashStr:
|
|
def test_matches_manual_sha256(self):
|
|
text = "hello world"
|
|
expected = hashlib.sha256(text.encode("utf-8")).hexdigest()
|
|
assert content_hash_str(text) == expected
|
|
|
|
def test_deterministic(self):
|
|
assert content_hash_str("test") == content_hash_str("test")
|
|
|
|
def test_different_text_different_hash(self):
|
|
assert content_hash_str("aaa") != content_hash_str("bbb")
|