Skip to content

Comparators

stickler.comparators

Common comparators for key information evaluation.

This package contains comparators that are shared between the traditional and ANLS Star evaluation systems. These comparators implement a unified interface that works with both systems.

stickler.comparators.BaseComparator

Bases: ABC

Base class for all comparators.

This class defines the interface that all comparators must implement. Comparators are used to compare two values and return a similarity score between 0.0 and 1.0, where 1.0 means the values are identical.

Source code in stickler/comparators/base.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
class BaseComparator(ABC):
    """Base class for all comparators.

    This class defines the interface that all comparators must implement.
    Comparators are used to compare two values and return a similarity score
    between 0.0 and 1.0, where 1.0 means the values are identical.
    """

    def __init__(self, threshold: float = 0.7):
        """Initialize the comparator.

        Args:
            threshold: Similarity threshold (0.0-1.0)
        """
        self.threshold = threshold

    @abstractmethod
    def compare(self, str1: Any, str2: Any) -> float:
        """Compare two values and return a similarity score.

        Args:
            str1: First value
            str2: Second value

        Returns:
            Similarity score between 0.0 and 1.0
        """
        pass

    def __call__(self, str1: Any, str2: Any) -> float:
        """Make the comparator callable.

        Args:
            str1: First value
            str2: Second value

        Returns:
            Similarity score between 0.0 and 1.0
        """
        return self.compare(str1, str2)

    def binary_compare(self, str1: Any, str2: Any) -> Tuple[int, int]:
        """Compare two values and return a binary result as (tp, fp) tuple.

        This method converts the continuous similarity score to a binary decision
        based on the threshold. If the similarity is greater than or equal to the
        threshold, it returns (1, 0) indicating true positive. Otherwise, it returns
        (0, 1) indicating false positive.

        Args:
            str1: First value
            str2: Second value

        Returns:
            Tuple of (tp, fp) where tp is 1 if similar, 0 otherwise,
            and fp is the opposite
        """
        score = self.compare(str1, str2)
        if score >= self.threshold:
            return (1, 0)  # True positive
        else:
            return (0, 1)  # False positive

    def __str__(self) -> str:
        """String representation for serialization."""
        return self.__class__.__name__

    def __repr__(self) -> str:
        """Detailed string representation."""
        return f"{self.__class__.__name__}(threshold={self.threshold})"

__call__(str1, str2)

Make the comparator callable.

Parameters:

Name Type Description Default
str1 Any

First value

required
str2 Any

Second value

required

Returns:

Type Description
float

Similarity score between 0.0 and 1.0

Source code in stickler/comparators/base.py
36
37
38
39
40
41
42
43
44
45
46
def __call__(self, str1: Any, str2: Any) -> float:
    """Make the comparator callable.

    Args:
        str1: First value
        str2: Second value

    Returns:
        Similarity score between 0.0 and 1.0
    """
    return self.compare(str1, str2)

__init__(threshold=0.7)

Initialize the comparator.

Parameters:

Name Type Description Default
threshold float

Similarity threshold (0.0-1.0)

0.7
Source code in stickler/comparators/base.py
15
16
17
18
19
20
21
def __init__(self, threshold: float = 0.7):
    """Initialize the comparator.

    Args:
        threshold: Similarity threshold (0.0-1.0)
    """
    self.threshold = threshold

__repr__()

Detailed string representation.

Source code in stickler/comparators/base.py
74
75
76
def __repr__(self) -> str:
    """Detailed string representation."""
    return f"{self.__class__.__name__}(threshold={self.threshold})"

__str__()

String representation for serialization.

Source code in stickler/comparators/base.py
70
71
72
def __str__(self) -> str:
    """String representation for serialization."""
    return self.__class__.__name__

binary_compare(str1, str2)

Compare two values and return a binary result as (tp, fp) tuple.

This method converts the continuous similarity score to a binary decision based on the threshold. If the similarity is greater than or equal to the threshold, it returns (1, 0) indicating true positive. Otherwise, it returns (0, 1) indicating false positive.

Parameters:

Name Type Description Default
str1 Any

First value

required
str2 Any

Second value

required

Returns:

Type Description
int

Tuple of (tp, fp) where tp is 1 if similar, 0 otherwise,

int

and fp is the opposite

Source code in stickler/comparators/base.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def binary_compare(self, str1: Any, str2: Any) -> Tuple[int, int]:
    """Compare two values and return a binary result as (tp, fp) tuple.

    This method converts the continuous similarity score to a binary decision
    based on the threshold. If the similarity is greater than or equal to the
    threshold, it returns (1, 0) indicating true positive. Otherwise, it returns
    (0, 1) indicating false positive.

    Args:
        str1: First value
        str2: Second value

    Returns:
        Tuple of (tp, fp) where tp is 1 if similar, 0 otherwise,
        and fp is the opposite
    """
    score = self.compare(str1, str2)
    if score >= self.threshold:
        return (1, 0)  # True positive
    else:
        return (0, 1)  # False positive

compare(str1, str2) abstractmethod

Compare two values and return a similarity score.

Parameters:

Name Type Description Default
str1 Any

First value

required
str2 Any

Second value

required

Returns:

Type Description
float

Similarity score between 0.0 and 1.0

Source code in stickler/comparators/base.py
23
24
25
26
27
28
29
30
31
32
33
34
@abstractmethod
def compare(self, str1: Any, str2: Any) -> float:
    """Compare two values and return a similarity score.

    Args:
        str1: First value
        str2: Second value

    Returns:
        Similarity score between 0.0 and 1.0
    """
    pass

stickler.comparators.ExactComparator

Bases: BaseComparator

Comparator that checks for exact string matching.

This comparator removes whitespace and punctuation before comparison. It returns 1.0 for exact matches and 0.0 otherwise.

Example
comparator = ExactComparator()

# Returns 1.0 (exact match after normalization)
comparator.compare("hello, world!", "hello world")

# Returns 0.0 (different strings)
comparator.compare("hello", "goodbye")
Source code in stickler/comparators/exact.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
class ExactComparator(BaseComparator):
    """Comparator that checks for exact string matching.

    This comparator removes whitespace and punctuation before comparison.
    It returns 1.0 for exact matches and 0.0 otherwise.

    Example:
        ```python
        comparator = ExactComparator()

        # Returns 1.0 (exact match after normalization)
        comparator.compare("hello, world!", "hello world")

        # Returns 0.0 (different strings)
        comparator.compare("hello", "goodbye")
        ```
    """

    def __init__(self, threshold: float = 1.0, case_sensitive: bool = False):
        """Initialize the comparator.

        Args:
            threshold: Similarity threshold (default 1.0)
            case_sensitive: Whether comparison is case sensitive (default False)
        """
        super().__init__(threshold=threshold)
        self.case_sensitive = case_sensitive

    def compare(self, str1: Any, str2: Any) -> float:
        """Compare two values with exact string matching.

        Args:
            str1: First value
            str2: Second value

        Returns:
            1.0 if the strings match exactly after normalization, 0.0 otherwise
        """
        if str1 is None and str2 is None:
            return 1.0
        if str1 is None or str2 is None:
            return 0.0

        # Convert to strings if they aren't already
        str1 = str(str1)
        str2 = str(str2)

        # Apply case normalization if needed
        if not self.case_sensitive:
            str1 = lowercase(str1)
            str2 = lowercase(str2)

        # Remove whitespace and punctuation
        normalized1 = strip_punctuation_space(str1)
        normalized2 = strip_punctuation_space(str2)

        # Compare normalized strings
        return 1.0 if normalized1 == normalized2 else 0.0

__init__(threshold=1.0, case_sensitive=False)

Initialize the comparator.

Parameters:

Name Type Description Default
threshold float

Similarity threshold (default 1.0)

1.0
case_sensitive bool

Whether comparison is case sensitive (default False)

False
Source code in stickler/comparators/exact.py
27
28
29
30
31
32
33
34
35
def __init__(self, threshold: float = 1.0, case_sensitive: bool = False):
    """Initialize the comparator.

    Args:
        threshold: Similarity threshold (default 1.0)
        case_sensitive: Whether comparison is case sensitive (default False)
    """
    super().__init__(threshold=threshold)
    self.case_sensitive = case_sensitive

compare(str1, str2)

Compare two values with exact string matching.

Parameters:

Name Type Description Default
str1 Any

First value

required
str2 Any

Second value

required

Returns:

Type Description
float

1.0 if the strings match exactly after normalization, 0.0 otherwise

Source code in stickler/comparators/exact.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def compare(self, str1: Any, str2: Any) -> float:
    """Compare two values with exact string matching.

    Args:
        str1: First value
        str2: Second value

    Returns:
        1.0 if the strings match exactly after normalization, 0.0 otherwise
    """
    if str1 is None and str2 is None:
        return 1.0
    if str1 is None or str2 is None:
        return 0.0

    # Convert to strings if they aren't already
    str1 = str(str1)
    str2 = str(str2)

    # Apply case normalization if needed
    if not self.case_sensitive:
        str1 = lowercase(str1)
        str2 = lowercase(str2)

    # Remove whitespace and punctuation
    normalized1 = strip_punctuation_space(str1)
    normalized2 = strip_punctuation_space(str2)

    # Compare normalized strings
    return 1.0 if normalized1 == normalized2 else 0.0

stickler.comparators.LevenshteinComparator

Bases: BaseComparator

Comparator using Levenshtein distance for string similarity.

This class implements the Levenshtein distance algorithm for measuring the difference between two strings. It calculates a normalized similarity score between 0 and 1.

Source code in stickler/comparators/levenshtein.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
class LevenshteinComparator(BaseComparator):
    """Comparator using Levenshtein distance for string similarity.

    This class implements the Levenshtein distance algorithm for measuring
    the difference between two strings. It calculates a normalized similarity
    score between 0 and 1.
    """

    def __init__(self, normalize: bool = True, threshold: float = 0.7):
        """Initialize the comparator.

        Args:
            normalize: Whether to normalize input strings
                      (strip whitespace, lowercase) before comparison
            threshold: Similarity threshold (default 0.7)
        """
        super().__init__(threshold=threshold)
        self._normalize = normalize

    @property
    def name(self) -> str:
        """Return the name of the comparator."""
        return "levenshtein"

    @property
    def config(self) -> Optional[Dict[str, Any]]:
        """Return configuration parameters."""
        return {"normalize": self._normalize}

    def compare(self, s1: Any, s2: Any) -> float:
        """
        Compare two strings using Levenshtein distance.

        Args:
            s1: First string or value
            s2: Second string or value

        Returns:
            Similarity score between 0.0 and 1.0, with 1.0 indicating identical

        Raises:
            TypeError: If either input is a dictionary, as dictionaries are not suitable
                      for Levenshtein distance comparison and should be handled through
                      structured models instead.
        """
        # Reject dictionaries - they should be broken down into proper StructuredModel subclasses
        if isinstance(s1, dict) or isinstance(s2, dict):
            raise TypeError(
                "Dictionary objects cannot be compared using LevenshteinComparator. "
                "Use a StructuredModel subclass with properly defined fields instead."
            )

        # Convert to strings and handle None values
        s1 = "" if s1 is None else str(s1)
        s2 = "" if s2 is None else str(s2)

        # Normalize strings if enabled
        if self._normalize:
            s1 = " ".join(s1.strip().lower().split())
            s2 = " ".join(s2.strip().lower().split())

        # Handle empty strings
        if not s1 and not s2:
            return 1.0

        # Calculate Levenshtein distance
        dist = self._levenshtein_distance(s1, s2)
        str_length = max(len(s1), len(s2))

        if str_length == 0:
            return 1.0

        # Convert distance to similarity (1.0 - normalized_distance)
        return 1.0 - (float(dist) / float(str_length))

    @staticmethod
    def _levenshtein_distance(s1: str, s2: str) -> int:
        """
        Calculate the Levenshtein distance between two strings.

        Args:
            s1: First string
            s2: Second string

        Returns:
            The Levenshtein distance as an integer
        """
        if len(s1) > len(s2):
            s1, s2 = s2, s1

        distances = range(len(s1) + 1)
        for i2, c2 in enumerate(s2):
            distances_ = [i2 + 1]
            for i1, c1 in enumerate(s1):
                if c1 == c2:
                    distances_.append(distances[i1])
                else:
                    distances_.append(
                        1 + min((distances[i1], distances[i1 + 1], distances_[-1]))
                    )
            distances = distances_
        return distances[-1]

config property

Return configuration parameters.

name property

Return the name of the comparator.

__init__(normalize=True, threshold=0.7)

Initialize the comparator.

Parameters:

Name Type Description Default
normalize bool

Whether to normalize input strings (strip whitespace, lowercase) before comparison

True
threshold float

Similarity threshold (default 0.7)

0.7
Source code in stickler/comparators/levenshtein.py
16
17
18
19
20
21
22
23
24
25
def __init__(self, normalize: bool = True, threshold: float = 0.7):
    """Initialize the comparator.

    Args:
        normalize: Whether to normalize input strings
                  (strip whitespace, lowercase) before comparison
        threshold: Similarity threshold (default 0.7)
    """
    super().__init__(threshold=threshold)
    self._normalize = normalize

compare(s1, s2)

Compare two strings using Levenshtein distance.

Parameters:

Name Type Description Default
s1 Any

First string or value

required
s2 Any

Second string or value

required

Returns:

Type Description
float

Similarity score between 0.0 and 1.0, with 1.0 indicating identical

Raises:

Type Description
TypeError

If either input is a dictionary, as dictionaries are not suitable for Levenshtein distance comparison and should be handled through structured models instead.

Source code in stickler/comparators/levenshtein.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def compare(self, s1: Any, s2: Any) -> float:
    """
    Compare two strings using Levenshtein distance.

    Args:
        s1: First string or value
        s2: Second string or value

    Returns:
        Similarity score between 0.0 and 1.0, with 1.0 indicating identical

    Raises:
        TypeError: If either input is a dictionary, as dictionaries are not suitable
                  for Levenshtein distance comparison and should be handled through
                  structured models instead.
    """
    # Reject dictionaries - they should be broken down into proper StructuredModel subclasses
    if isinstance(s1, dict) or isinstance(s2, dict):
        raise TypeError(
            "Dictionary objects cannot be compared using LevenshteinComparator. "
            "Use a StructuredModel subclass with properly defined fields instead."
        )

    # Convert to strings and handle None values
    s1 = "" if s1 is None else str(s1)
    s2 = "" if s2 is None else str(s2)

    # Normalize strings if enabled
    if self._normalize:
        s1 = " ".join(s1.strip().lower().split())
        s2 = " ".join(s2.strip().lower().split())

    # Handle empty strings
    if not s1 and not s2:
        return 1.0

    # Calculate Levenshtein distance
    dist = self._levenshtein_distance(s1, s2)
    str_length = max(len(s1), len(s2))

    if str_length == 0:
        return 1.0

    # Convert distance to similarity (1.0 - normalized_distance)
    return 1.0 - (float(dist) / float(str_length))

stickler.comparators.NumericComparator

Bases: BaseComparator

Comparator for numeric values with configurable tolerance.

This comparator extracts and compares numeric values from strings or numbers. It supports relative and absolute tolerance for comparison.

Example
# Default exact matching
exact = NumericComparator()
exact.compare("123", "123.0")  # Returns 1.0
exact.compare("123", "124")    # Returns 0.0

# With tolerance
approx = NumericComparator(relative_tolerance=0.1)  # 10% tolerance
approx.compare("100", "109")   # Returns 1.0 (within 10%)
approx.compare("100", "111")   # Returns 0.0 (beyond 10%)
Source code in stickler/comparators/numeric.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
class NumericComparator(BaseComparator):
    """Comparator for numeric values with configurable tolerance.

    This comparator extracts and compares numeric values from strings or numbers.
    It supports relative and absolute tolerance for comparison.

    Example:
        ```python
        # Default exact matching
        exact = NumericComparator()
        exact.compare("123", "123.0")  # Returns 1.0
        exact.compare("123", "124")    # Returns 0.0

        # With tolerance
        approx = NumericComparator(relative_tolerance=0.1)  # 10% tolerance
        approx.compare("100", "109")   # Returns 1.0 (within 10%)
        approx.compare("100", "111")   # Returns 0.0 (beyond 10%)
        ```
    """

    def __init__(
        self,
        threshold: float = 1.0,
        relative_tolerance: float = 0.0,
        absolute_tolerance: float = 0.0,
        tolerance: Optional[float] = None,
    ):
        """Initialize the comparator.

        Args:
            threshold: Similarity threshold (default 1.0)
            relative_tolerance: Relative tolerance for comparison (default 0.0)
            absolute_tolerance: Absolute tolerance for comparison (default 0.0)
            tolerance: Alias for absolute_tolerance (for backward compatibility)
        """
        super().__init__(threshold=threshold)
        self.relative_tolerance = relative_tolerance

        # Handle tolerance alias for backward compatibility
        if tolerance is not None:
            if absolute_tolerance != 0.0:
                raise ValueError(
                    "Cannot specify both 'tolerance' and 'absolute_tolerance'. Use 'absolute_tolerance'."
                )
            self.absolute_tolerance = tolerance
        else:
            self.absolute_tolerance = absolute_tolerance

    @property
    def config(self) -> Optional[Dict[str, Any]]:
        """Return configuration parameters for serialization."""
        config = {}
        if self.relative_tolerance != 0.0:
            config["relative_tolerance"] = self.relative_tolerance
        if self.absolute_tolerance != 0.0:
            config["absolute_tolerance"] = self.absolute_tolerance
        return config or None

    def compare(self, str1: Any, str2: Any) -> float:
        """Compare two values numerically.

        Args:
            str1: First value
            str2: Second value

        Returns:
            1.0 if the numbers match within tolerance, 0.0 otherwise
        """
        if str1 is None and str2 is None:
            return 1.0
        if str1 is None or str2 is None:
            return 0.0

        # Extract numeric values
        num1 = self._extract_number(str1)
        num2 = self._extract_number(str2)

        if num1 is None or num2 is None:
            return 0.0

        # Check equality with tolerance
        if self._numbers_equal(num1, num2):
            return 1.0

        return 0.0

    def _extract_number(self, value: Any) -> Union[Decimal, None]:
        """Extract a numeric value from a string or number.

        Args:
            value: Value to extract a number from

        Returns:
            Decimal value or None if no valid number could be extracted
        """
        if isinstance(value, (int, float)):
            return Decimal(str(value))

        if not isinstance(value, str):
            value = str(value)

        # Check for accounting notation: (123) means -123
        is_negative = False
        if value.startswith("(") and value.endswith(")"):
            value = value[1:-1]  # Remove the parentheses
            is_negative = True

        # Remove common currency symbols and other non-numeric characters
        value = re.sub(r"[^0-9.-]", "", value)

        # Handle empty string
        if not value:
            return None

        # Try to convert to Decimal
        try:
            decimal_value = Decimal(value)
            # Apply negative sign if accounting notation was used
            if is_negative:
                decimal_value = -decimal_value
            return decimal_value
        except InvalidOperation:
            return None

    def _numbers_equal(self, num1: Decimal, num2: Decimal) -> bool:
        """Check if two numbers are equal within tolerance.

        Args:
            num1: First number
            num2: Second number

        Returns:
            True if numbers are equal within tolerance, False otherwise
        """
        if num1 == num2:
            return True

        # Check with relative tolerance
        if self.relative_tolerance > 0:
            # Handle zero case
            if num1 == 0:
                return abs(num2) <= self.relative_tolerance

            # Calculate relative difference using num1 as base
            relative_diff = abs(num1 - num2) / abs(num1)
            if relative_diff <= self.relative_tolerance:
                return True

        # Check with absolute tolerance
        if self.absolute_tolerance > 0:
            if abs(num1 - num2) <= self.absolute_tolerance:
                return True

        return False

config property

Return configuration parameters for serialization.

__init__(threshold=1.0, relative_tolerance=0.0, absolute_tolerance=0.0, tolerance=None)

Initialize the comparator.

Parameters:

Name Type Description Default
threshold float

Similarity threshold (default 1.0)

1.0
relative_tolerance float

Relative tolerance for comparison (default 0.0)

0.0
absolute_tolerance float

Absolute tolerance for comparison (default 0.0)

0.0
tolerance Optional[float]

Alias for absolute_tolerance (for backward compatibility)

None
Source code in stickler/comparators/numeric.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def __init__(
    self,
    threshold: float = 1.0,
    relative_tolerance: float = 0.0,
    absolute_tolerance: float = 0.0,
    tolerance: Optional[float] = None,
):
    """Initialize the comparator.

    Args:
        threshold: Similarity threshold (default 1.0)
        relative_tolerance: Relative tolerance for comparison (default 0.0)
        absolute_tolerance: Absolute tolerance for comparison (default 0.0)
        tolerance: Alias for absolute_tolerance (for backward compatibility)
    """
    super().__init__(threshold=threshold)
    self.relative_tolerance = relative_tolerance

    # Handle tolerance alias for backward compatibility
    if tolerance is not None:
        if absolute_tolerance != 0.0:
            raise ValueError(
                "Cannot specify both 'tolerance' and 'absolute_tolerance'. Use 'absolute_tolerance'."
            )
        self.absolute_tolerance = tolerance
    else:
        self.absolute_tolerance = absolute_tolerance

compare(str1, str2)

Compare two values numerically.

Parameters:

Name Type Description Default
str1 Any

First value

required
str2 Any

Second value

required

Returns:

Type Description
float

1.0 if the numbers match within tolerance, 0.0 otherwise

Source code in stickler/comparators/numeric.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def compare(self, str1: Any, str2: Any) -> float:
    """Compare two values numerically.

    Args:
        str1: First value
        str2: Second value

    Returns:
        1.0 if the numbers match within tolerance, 0.0 otherwise
    """
    if str1 is None and str2 is None:
        return 1.0
    if str1 is None or str2 is None:
        return 0.0

    # Extract numeric values
    num1 = self._extract_number(str1)
    num2 = self._extract_number(str2)

    if num1 is None or num2 is None:
        return 0.0

    # Check equality with tolerance
    if self._numbers_equal(num1, num2):
        return 1.0

    return 0.0

stickler.comparators.NumericExactC = NumericComparator module-attribute

stickler.comparators.DateComparator

Bases: BaseComparator

Deterministic date comparator with year/range awareness.

See docs/docs/Guides/Comparators/date-comparator.md for the full behavior reference, configuration matrix, and corner cases.

Parameters:

Name Type Description Default
threshold float

Forwarded to :class:BaseComparator.

1.0
tolerance Optional[Union[timedelta, int, float]]

Optional window for Tier 1 single-vs-single comparisons only (range and partial-year branches ignore it). Accepts a timedelta or a numeric value in days. A whole-day tolerance floors both sides to the calendar day (time ignored); a sub-day tolerance (e.g. 1.5 = 36h) compares actual timestamps. Defaults to None, which is normalized to timedelta(0) (same calendar day).

None
dayfirst Optional[bool]

How to interpret ambiguous numeric dates like "01/02/2025". None (default) tries both interpretations and takes the better-matching score; True forces day-first; False forces month-first.

None
allow_partial_year bool

If True, year-less ↔ year-bearing pairs with matching month/day score 0.7. Default False.

False
range_mode RangeMode

How range comparisons are scored. One of "strict", "reject", "contains", "graded" (default).

'graded'
precision_mode PrecisionMode

How month/day resolution mismatches are scored ("Jan 2024" vs "Jan 1, 2024"). The first argument to :meth:compare is treated as ground truth.

  • "exact" (default): both sides must share the same resolution; a fabricated or dropped month/day is a miss.
  • "gt_loose": the prediction may be finer than the ground truth (extra precision ignored if consistent at the ground truth's grain) but not coarser.
  • "overlap": symmetric — either side may be coarser, as long as they agree on every field both sides specify.
'exact'
Source code in stickler/comparators/date.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
class DateComparator(BaseComparator):
    """Deterministic date comparator with year/range awareness.

    See ``docs/docs/Guides/Comparators/date-comparator.md`` for the full
    behavior reference, configuration matrix, and corner cases.

    Args:
        threshold: Forwarded to :class:`BaseComparator`.
        tolerance: Optional window for Tier 1 single-vs-single
            comparisons only (range and partial-year branches ignore it).
            Accepts a ``timedelta`` or a numeric value in days. A
            whole-day tolerance floors both sides to the calendar day
            (time ignored); a sub-day tolerance (e.g. ``1.5`` = 36h)
            compares actual timestamps. Defaults to ``None``, which is
            normalized to ``timedelta(0)`` (same calendar day).
        dayfirst: How to interpret ambiguous numeric dates like
            ``"01/02/2025"``. ``None`` (default) tries both
            interpretations and takes the better-matching score; ``True``
            forces day-first; ``False`` forces month-first.
        allow_partial_year: If ``True``, year-less ↔ year-bearing pairs
            with matching month/day score ``0.7``. Default ``False``.
        range_mode: How range comparisons are scored. One of
            ``"strict"``, ``"reject"``, ``"contains"``, ``"graded"``
            (default).
        precision_mode: How month/day *resolution* mismatches are scored
            (``"Jan 2024"`` vs ``"Jan 1, 2024"``). The first argument to
            :meth:`compare` is treated as ground truth.

            - ``"exact"`` (default): both sides must share the same
              resolution; a fabricated or dropped month/day is a miss.
            - ``"gt_loose"``: the prediction may be *finer* than the
              ground truth (extra precision ignored if consistent at the
              ground truth's grain) but not coarser.
            - ``"overlap"``: symmetric — either side may be coarser, as
              long as they agree on every field both sides specify.
    """

    def __init__(
        self,
        threshold: float = 1.0,
        tolerance: Optional[Union[timedelta, int, float]] = None,
        dayfirst: Optional[bool] = None,
        allow_partial_year: bool = False,
        range_mode: RangeMode = "graded",
        precision_mode: PrecisionMode = "exact",
    ):
        super().__init__(threshold=threshold)

        if not _DATEUTIL_AVAILABLE:
            raise ImportError(
                "The python-dateutil library is required for DateComparator. "
                "Install it with: pip install python-dateutil"
            )

        if dayfirst not in (None, True, False):
            raise ValueError(
                f"dayfirst must be None, True, or False; got {dayfirst!r}"
            )

        if range_mode not in _VALID_RANGE_MODES:
            raise ValueError(
                f"range_mode must be one of {_VALID_RANGE_MODES}; "
                f"got {range_mode!r}"
            )

        if precision_mode not in _VALID_PRECISION_MODES:
            raise ValueError(
                f"precision_mode must be one of {_VALID_PRECISION_MODES}; "
                f"got {precision_mode!r}"
            )

        # Tolerance accepts ``timedelta``, ``int``, or ``float``. Numeric
        # inputs are interpreted as days — friendlier for JSON-schema
        # configs where a literal ``timedelta(days=N)`` isn't expressible.
        if tolerance is None:
            self.tolerance = timedelta(0)
        elif isinstance(tolerance, timedelta):
            self.tolerance = tolerance
        elif isinstance(tolerance, bool):
            # bool is a subclass of int; reject it explicitly so True/False
            # don't silently become 1-day / 0-day windows.
            raise ValueError(
                "tolerance must be a timedelta or a numeric value in days; "
                f"got bool {tolerance!r}"
            )
        elif isinstance(tolerance, (int, float)):
            self.tolerance = timedelta(days=tolerance)
        else:
            raise ValueError(
                "tolerance must be a timedelta or a numeric value in days; "
                f"got {type(tolerance).__name__}"
            )

        if self.tolerance < timedelta(0):
            raise ValueError("tolerance must be non-negative")

        self.dayfirst = dayfirst
        self.allow_partial_year = allow_partial_year
        self.range_mode = range_mode
        self.precision_mode = precision_mode

    @property
    def config(self) -> Optional[dict]:
        """Round-trippable config for JSON-schema export.

        Only non-default values are emitted, and an all-default instance
        returns ``None`` — matching ``NumericComparator.config`` and
        keeping a redundant ``x-aws-stickler-comparator-config`` block out
        of every exported schema (the exporter keys off truthiness).

        Tolerance is exported as days (an int when the timedelta is a
        whole number of days, otherwise a float) so it can survive a
        JSON round-trip.
        """
        cfg: dict = {}
        if self.dayfirst is not None:
            cfg["dayfirst"] = self.dayfirst
        if self.allow_partial_year:
            cfg["allow_partial_year"] = self.allow_partial_year
        if self.range_mode != "graded":
            cfg["range_mode"] = self.range_mode
        if self.precision_mode != "exact":
            cfg["precision_mode"] = self.precision_mode
        if self.tolerance != timedelta(0):
            seconds = self.tolerance.total_seconds()
            days = seconds / 86400
            cfg["tolerance"] = int(days) if days.is_integer() else days
        return cfg or None

    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------

    def compare(self, str1: Any, str2: Any) -> float:
        """Score two date values per the tier system documented above."""
        if str1 is None and str2 is None:
            return 1.0
        if str1 is None or str2 is None:
            return 0.0

        # Resolve dayfirst pairwise. ``None`` means "try both
        # interpretations and take the best score" — that way a string
        # whose layout is genuinely ambiguous in isolation can still
        # match if one consistent interpretation lines up.
        #
        # A malformed value must never crash an evaluation run, so any
        # datetime-comparison edge (e.g. mixed tz-awareness the alignment
        # helpers didn't anticipate) degrades to 0.0 like every other
        # parse failure. The range/single paths align timezones inline;
        # this is a backstop, not the primary defense.
        try:
            if self.dayfirst is not None:
                return self._compare_with_dayfirst(str1, str2, self.dayfirst)

            return max(
                self._compare_with_dayfirst(str1, str2, False),
                self._compare_with_dayfirst(str1, str2, True),
            )
        except TypeError:
            return 0.0

    def _compare_with_dayfirst(
        self, str1: Any, str2: Any, dayfirst: bool
    ) -> float:
        """Run the tier dispatch with ``dayfirst`` pinned to one value."""
        a = self._parse(str1, dayfirst=dayfirst)
        b = self._parse(str2, dayfirst=dayfirst)
        if a is None or b is None:
            return 0.0

        a_is_range = isinstance(a, _ParsedRange)
        b_is_range = isinstance(b, _ParsedRange)

        # ``reject`` mode: any range input zeros out the comparison.
        if self.range_mode == "reject" and (a_is_range or b_is_range):
            return 0.0

        # Tier 4b: range vs range
        if a_is_range and b_is_range:
            return self._compare_range_range(a, b)

        # Tier 4: range vs single. ``a`` is always the ground truth (first
        # compare() argument); track whether it's the single side so the
        # directional precision gate (gt_loose) orients correctly.
        if a_is_range or b_is_range:
            single = b if a_is_range else a  # type: ignore[assignment]
            rng = a if a_is_range else b  # type: ignore[assignment]
            return self._compare_range_single(
                rng, single, single_is_gt=not a_is_range
            )

        # Both singles
        return self._compare_singles(a, b)

    # ------------------------------------------------------------------
    # Tier dispatch
    # ------------------------------------------------------------------

    def _compare_range_range(
        self, a: _ParsedRange, b: _ParsedRange
    ) -> float:
        """Tier 4b: range vs range under the configured range_mode."""
        # Year-presence consistency on both endpoints of both sides.
        # If endpoints disagree on year-presence within a side it's
        # malformed; we treat that as a 0.0 rather than try to repair.
        if a.start.has_year != a.end.has_year:
            return 0.0
        if b.start.has_year != b.end.has_year:
            return 0.0

        # Month/day resolution gate, per endpoint (precision_mode).
        if not self._resolution_ok(a.start, b.start):
            return 0.0
        if not self._resolution_ok(a.end, b.end):
            return 0.0

        year_match = a.start.has_year == b.start.has_year
        partial_year_multiplier = self._partial_year_multiplier(year_match)
        if partial_year_multiplier == 0.0:
            return 0.0

        # When year-presence differs (only reachable under
        # allow_partial_year=True), the year-less side's year is a
        # fictional 1900 placeholder, so endpoint equality and overlap are
        # judged on (month, day) only — mirroring the range-vs-single
        # m/d fallback. Otherwise compare full dates.
        if self.range_mode in ("strict", "contains"):
            if year_match:
                endpoints_match = self._dates_equal_day(
                    a.start.dt, b.start.dt
                ) and self._dates_equal_day(a.end.dt, b.end.dt)
            else:
                endpoints_match = self._md_equal(
                    a.start.dt, b.start.dt
                ) and self._md_equal(a.end.dt, b.end.dt)
            if endpoints_match:
                return 1.0 * partial_year_multiplier
            return 0.0

        # graded → Jaccard (reject mode is handled before we get here)
        jaccard = self._jaccard(a, b) if year_match else self._md_jaccard(a, b)
        return jaccard * partial_year_multiplier

    def _compare_range_single(
        self, rng: _ParsedRange, single: _ParsedSingle, single_is_gt: bool
    ) -> float:
        """Tier 4: range-vs-single under the configured range_mode.

        ``single_is_gt`` records whether the single side was the ground
        truth (the first :meth:`compare` argument), so the directional
        precision gate (``gt_loose``) is oriented the same way it is in the
        single-vs-single and range-vs-range paths.
        """
        if self.range_mode == "strict":
            return 0.0

        # Month/day resolution gate (precision_mode), applied per endpoint
        # with ground truth in the correct position — mirroring
        # _compare_range_range. Without this a reduced-precision single
        # (e.g. 'Jan 2024', whose day is fabricated to the 1st) would land
        # inside a day-grain range and score credit even under the default
        # 'exact' mode, the same score-inflating fabrication the gate
        # exists to refuse on the single-vs-single path.
        if single_is_gt:
            resolution_ok = self._resolution_ok(
                single, rng.start
            ) and self._resolution_ok(single, rng.end)
        else:
            resolution_ok = self._resolution_ok(
                rng.start, single
            ) and self._resolution_ok(rng.end, single)
        if not resolution_ok:
            return 0.0

        # Year-presence consistency: the range's endpoints must agree
        # internally, and we compare against the single's claim.
        if rng.start.has_year != rng.end.has_year:
            return 0.0
        year_match = rng.start.has_year == single.has_year
        partial_year_multiplier = self._partial_year_multiplier(year_match)
        if partial_year_multiplier == 0.0:
            return 0.0

        # Containment: when both sides agree on year-presence we compare
        # the full datetimes; when they disagree (only possible under
        # allow_partial_year=True) the year on the year-less side is a
        # fictional 1900 placeholder, so we compare on (month, day) only.
        if year_match:
            # Normalize all three to comparable naive days: endpoints and
            # the single may differ in tz-awareness.
            s = self._normalize_day(single.dt)
            lo = self._normalize_day(rng.start.dt)
            hi = self._normalize_day(rng.end.dt)
            inside = lo <= s <= hi
        else:
            inside = self._md_in_md_range(
                (single.dt.month, single.dt.day),
                (rng.start.dt.month, rng.start.dt.day),
                (rng.end.dt.month, rng.end.dt.day),
            )

        if inside:
            base = (
                1.0 if self.range_mode == "contains" else _RANGE_CONTAINS_GRADED_SCORE
            )
            return base * partial_year_multiplier
        return 0.0

    @staticmethod
    def _md_in_md_range(
        target: Tuple[int, int],
        lo: Tuple[int, int],
        hi: Tuple[int, int],
    ) -> bool:
        """Check (month, day) containment in a (month, day) range.

        When ``lo <= hi`` the span is the ordinary closed interval. When
        ``lo > hi`` the range wraps the year boundary in m/d space (e.g.
        Dec 20 → Jan 5, common for fiscal/holiday ranges in IDP data), so
        membership is the union of the two ends:
        ``target >= lo`` (late-year tail) or ``target <= hi`` (early-year
        head). Endpoints are inclusive either way.
        """
        if lo <= hi:
            return lo <= target <= hi
        return target >= lo or target <= hi

    def _compare_singles(
        self, a: _ParsedSingle, b: _ParsedSingle
    ) -> float:
        """Score two single dates across the year-presence and resolution axes.

        ``a`` is the ground truth (first :meth:`compare` argument). Two
        independent gates run before any value comparison:

        * month/day resolution (``precision_mode``), and
        * year presence (``allow_partial_year``, via
          :meth:`_partial_year_multiplier`).

        If both gates pass, the fields that *both* sides specify must
        agree; year-bearing day-grain pairs additionally honor
        ``tolerance``.
        """
        # Axis 1 — month/day resolution.
        if not self._resolution_ok(a, b):
            return 0.0

        # Axis 2 — year presence (carries the 0.7 partial-year credit).
        year_multiplier = self._partial_year_multiplier(a.has_year == b.has_year)
        if year_multiplier == 0.0:
            return 0.0

        if not self._single_values_agree(a, b):
            return 0.0

        return year_multiplier

    def _resolution_ok(self, a: _ParsedSingle, b: _ParsedSingle) -> bool:
        """Whether a month/day resolution mismatch is permitted.

        ``a`` is ground truth. Equal resolution is always fine; otherwise
        ``precision_mode`` decides:

        * ``"exact"`` — never (resolutions must match);
        * ``"gt_loose"`` — only if the prediction ``b`` is *finer* than
          the ground truth ``a`` (``b`` may add precision, not drop it);
        * ``"overlap"`` — either side may be coarser.
        """
        if a.md_resolution == b.md_resolution:
            return True
        if self.precision_mode == "exact":
            return False
        if self.precision_mode == "overlap":
            return True
        # gt_loose
        return b.md_resolution >= a.md_resolution

    def _single_values_agree(
        self, a: _ParsedSingle, b: _ParsedSingle
    ) -> bool:
        """Whether two singles agree on every field both sides specify.

        Year-bearing day-grain pairs go through the ``tolerance`` window
        (which spans month/year boundaries); every other pairing is exact
        on the fields present at the common (coarser) grain.
        """
        if a.has_year and b.has_year:
            if a.md_resolution == 2 and b.md_resolution == 2:
                # Both full dates: tolerance-aware comparison.
                a_dt, b_dt = self._align_timezones(a.dt, b.dt)
                if not self._has_subday_tolerance():
                    # Whole-day (or zero) tolerance keeps same-calendar-day
                    # semantics: floor to midnight so intra-day times are
                    # ignored and the window counts whole days.
                    a_dt = self._truncate_day(a_dt)
                    b_dt = self._truncate_day(b_dt)
                # A sub-day tolerance (e.g. 1.5 days = 36h) means the caller
                # cares about real elapsed time, so compare actual
                # timestamps without flooring.
                return abs(a_dt - b_dt) <= self.tolerance
            if a.dt.year != b.dt.year:
                return False
        if a.has_month and b.has_month and a.dt.month != b.dt.month:
            return False
        if a.has_day and b.has_day and a.dt.day != b.dt.day:
            return False
        return True

    def _has_subday_tolerance(self) -> bool:
        """Whether ``tolerance`` carries a sub-day (hours/minutes) component."""
        return self.tolerance.total_seconds() % 86400 != 0

    def _partial_year_multiplier(self, year_match: bool) -> float:
        """Multiplier applied to range scores when year-presence (mis)matches.

        Returns 1.0 when both sides agree on year-presence. Returns
        ``_PARTIAL_YEAR_MULTIPLIER`` (0.7) when ``allow_partial_year=True``
        and they disagree. Returns 0.0 otherwise.
        """
        if year_match:
            return 1.0
        return _PARTIAL_YEAR_MULTIPLIER if self.allow_partial_year else 0.0

    def _jaccard(self, a: _ParsedRange, b: _ParsedRange) -> float:
        """Jaccard overlap between two date ranges, day-level."""
        # Normalize to comparable naive days so endpoints from different
        # ranges (possibly mixed tz-awareness) can be min/max'd together.
        a_lo = self._normalize_day(a.start.dt)
        a_hi = self._normalize_day(a.end.dt)
        b_lo = self._normalize_day(b.start.dt)
        b_hi = self._normalize_day(b.end.dt)

        # Inclusive day count.
        intersect_lo = max(a_lo, b_lo)
        intersect_hi = min(a_hi, b_hi)
        if intersect_hi < intersect_lo:
            return 0.0

        intersect_days = (intersect_hi - intersect_lo).days + 1
        union_lo = min(a_lo, b_lo)
        union_hi = max(a_hi, b_hi)
        union_days = (union_hi - union_lo).days + 1
        return intersect_days / union_days

    @staticmethod
    def _md_equal(dt1: datetime, dt2: datetime) -> bool:
        """Whether two datetimes share a (month, day) — ignores year/time."""
        return (dt1.month, dt1.day) == (dt2.month, dt2.day)

    @classmethod
    def _md_jaccard(cls, a: _ParsedRange, b: _ParsedRange) -> float:
        """Jaccard overlap of two ranges in (month, day) space.

        Used when year-presence differs: the year-less side's year is a
        1900 placeholder, so overlap is measured over the set of
        ``(month, day)`` pairs each range spans rather than absolute days.
        """
        a_days = cls._md_set(a)
        b_days = cls._md_set(b)
        union = a_days | b_days
        if not union:
            return 0.0
        return len(a_days & b_days) / len(union)

    @staticmethod
    def _md_set(rng: _ParsedRange) -> set:
        """The set of (month, day) pairs a range covers, inclusive.

        Walks day by day from start to end. Bounded by a one-year cap so
        a malformed multi-year span can't run away.
        """
        start = rng.start.dt.replace(hour=0, minute=0, second=0, microsecond=0)
        end = rng.end.dt.replace(hour=0, minute=0, second=0, microsecond=0)
        days = (end - start).days
        # Year-less ranges can't wrap (parser enforces start <= end and
        # both default to 1900); cap the walk at a full year defensively.
        days = min(days, 366)
        out = set()
        cur = start
        for _ in range(days + 1):
            out.add((cur.month, cur.day))
            cur += timedelta(days=1)
        return out

    # ------------------------------------------------------------------
    # Parsing
    # ------------------------------------------------------------------

    def _parse(self, value: Any, dayfirst: bool) -> _ParseResult:
        """Parse input into a single date or range, or ``None`` on failure.

        Single-day ranges (``X to X``) are normally collapsed to a
        ``_ParsedSingle`` so they compare consistently with the bare
        single-date form. Under ``range_mode="reject"`` we skip the
        collapse so that the original range shape is preserved and the
        comparison surfaces it as a structural mismatch.
        """
        if isinstance(value, datetime):
            return _ParsedSingle(dt=value, has_year=True)
        if isinstance(value, date):
            return _ParsedSingle(
                dt=datetime(value.year, value.month, value.day), has_year=True
            )

        if not isinstance(value, str):
            value = str(value)

        s = value.strip()
        if not s:
            return None

        # Reject pathologically long input before any parsing. A real date
        # string is well under this; a huge value is malformed and would
        # otherwise cost dateutil a full scan (and could split into two
        # huge range halves). Over-length degrades to 0.0 like any other
        # parse failure.
        if len(s) > _MAX_INPUT_LEN:
            return None

        rng = self._try_parse_range(s, dayfirst=dayfirst)
        if rng is not None:
            # Collapse degenerate single-day ranges to singles, EXCEPT
            # under reject mode where we want the range shape preserved.
            if (
                self.range_mode != "reject"
                and self._dates_equal_day(rng.start.dt, rng.end.dt)
                and rng.start.has_year == rng.end.has_year
            ):
                return rng.start
            return rng

        # A string that carries a range-delimiter signal but didn't parse
        # as a valid range is a malformed/truncated range, not a single
        # date. Falling through to a single parse here would let dateutil
        # silently swallow a dangling dash (``'- 10/24/16'``) and score it
        # as a clean date.
        if self._has_range_delim_signal(s):
            return None

        return self._try_parse_single(s, dayfirst=dayfirst)

    @staticmethod
    def _has_range_delim_signal(s: str) -> bool:
        """Whether ``s`` looks like it was meant to be a range.

        Catches both the configured delimiters appearing internally and a
        dangling bare dash at either edge. Legitimate single dates put
        their dashes *between* digits (``2025-01-01``, ``10-24-2016``), so
        an edge dash only shows up on truncated range input.
        """
        if any(delim in s for delim in _RANGE_DELIMS):
            return True
        return s.startswith("-") or s.endswith("-")

    def _try_parse_range(
        self, s: str, dayfirst: bool
    ) -> Optional[_ParsedRange]:
        """Detect a range by splitting on configured delimiters."""
        for delim in _RANGE_DELIMS:
            if delim not in s:
                continue
            left, _, right = s.partition(delim)
            left_p = self._try_parse_single(left.strip(), dayfirst=dayfirst)
            right_p = self._try_parse_single(right.strip(), dayfirst=dayfirst)
            if left_p is None or right_p is None:
                continue
            # Align before the ordering check: endpoints may differ in
            # tz-awareness (one ISO-with-offset, one naive), which would
            # otherwise raise TypeError on the comparison.
            left_dt, right_dt = self._align_timezones(left_p.dt, right_p.dt)
            if left_dt > right_dt:
                return None
            return _ParsedRange(start=left_p, end=right_p)
        return None

    def _try_parse_single(
        self, s: str, dayfirst: bool
    ) -> Optional[_ParsedSingle]:
        """Parse one side as a single date (or ``None`` on failure).

        Year/month/day presence is detected by parsing twice with default
        dates that differ in *all three* components: any field the parser
        had to borrow from the default reveals itself by disagreeing
        between the two parses. This is what lets reduced-resolution
        inputs (``'Jan 2024'``, ``'2024'``) be told apart from full dates
        rather than silently fabricating the missing fields.
        """
        if not s:
            return None

        # Year-first layouts (ISO and ``YYYY/MM/DD``) fix month-then-day
        # order, so the day-first interpretation would corrupt them. Pin
        # those to month-first regardless of the requested ``dayfirst``.
        if _YEAR_FIRST_RE.match(s):
            dayfirst = False

        try:
            dt_lo = _dateutil_parser.parse(
                s, default=datetime(1900, 1, 1), dayfirst=dayfirst
            )
            # Default differs in year, month, AND day so each component's
            # presence can be probed independently.
            dt_hi = _dateutil_parser.parse(
                s, default=datetime(2099, 6, 15), dayfirst=dayfirst
            )
        except (ValueError, OverflowError, TypeError):
            return None

        has_year = dt_lo.year == dt_hi.year
        has_month = dt_lo.month == dt_hi.month
        has_day = dt_lo.day == dt_hi.day

        # Reject time-only inputs ('12:30 PM', '10/45AM' etc.): no date
        # component at all was specified, so everything came from the
        # default and only the time survives.
        if not (has_year or has_month or has_day):
            return None

        return _ParsedSingle(
            dt=dt_lo, has_year=has_year, has_month=has_month, has_day=has_day
        )

    # ------------------------------------------------------------------
    # Helpers
    # ------------------------------------------------------------------

    @staticmethod
    def _align_timezones(
        dt1: datetime, dt2: datetime
    ) -> tuple[datetime, datetime]:
        """Make two datetimes tz-comparable for subtraction."""
        aware1 = dt1.tzinfo is not None
        aware2 = dt2.tzinfo is not None
        if aware1 and aware2:
            return dt1.astimezone(timezone.utc), dt2.astimezone(timezone.utc)
        if aware1 and not aware2:
            return dt1, dt2.replace(tzinfo=dt1.tzinfo)
        if aware2 and not aware1:
            return dt1.replace(tzinfo=dt2.tzinfo), dt2
        return dt1, dt2

    @staticmethod
    def _truncate_day(dt: datetime) -> datetime:
        return dt.replace(hour=0, minute=0, second=0, microsecond=0)

    @staticmethod
    def _normalize_day(dt: datetime) -> datetime:
        """Collapse to a tz-naive midnight so any two dates are comparable.

        Aware datetimes are converted to UTC first; the result is always
        naive, so values that started with differing tz-awareness can be
        ordered against each other without raising ``TypeError``.
        """
        if dt.tzinfo is not None:
            dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
        return dt.replace(hour=0, minute=0, second=0, microsecond=0)

    @classmethod
    def _dates_equal_day(cls, dt1: datetime, dt2: datetime) -> bool:
        a, b = cls._align_timezones(dt1, dt2)
        return cls._truncate_day(a) == cls._truncate_day(b)

    def __repr__(self) -> str:
        return (
            f"{self.__class__.__name__}("
            f"threshold={self.threshold}, "
            f"tolerance={self.tolerance!r}, "
            f"dayfirst={self.dayfirst!r}, "
            f"allow_partial_year={self.allow_partial_year}, "
            f"range_mode={self.range_mode!r}, "
            f"precision_mode={self.precision_mode!r})"
        )

config property

Round-trippable config for JSON-schema export.

Only non-default values are emitted, and an all-default instance returns None — matching NumericComparator.config and keeping a redundant x-aws-stickler-comparator-config block out of every exported schema (the exporter keys off truthiness).

Tolerance is exported as days (an int when the timedelta is a whole number of days, otherwise a float) so it can survive a JSON round-trip.

compare(str1, str2)

Score two date values per the tier system documented above.

Source code in stickler/comparators/date.py
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
def compare(self, str1: Any, str2: Any) -> float:
    """Score two date values per the tier system documented above."""
    if str1 is None and str2 is None:
        return 1.0
    if str1 is None or str2 is None:
        return 0.0

    # Resolve dayfirst pairwise. ``None`` means "try both
    # interpretations and take the best score" — that way a string
    # whose layout is genuinely ambiguous in isolation can still
    # match if one consistent interpretation lines up.
    #
    # A malformed value must never crash an evaluation run, so any
    # datetime-comparison edge (e.g. mixed tz-awareness the alignment
    # helpers didn't anticipate) degrades to 0.0 like every other
    # parse failure. The range/single paths align timezones inline;
    # this is a backstop, not the primary defense.
    try:
        if self.dayfirst is not None:
            return self._compare_with_dayfirst(str1, str2, self.dayfirst)

        return max(
            self._compare_with_dayfirst(str1, str2, False),
            self._compare_with_dayfirst(str1, str2, True),
        )
    except TypeError:
        return 0.0

stickler.comparators.FuzzyComparator

Bases: BaseComparator

Comparator for fuzzy string matching.

This comparator uses the rapidfuzz library to calculate similarity between strings using advanced Levenshtein distance calculations. It provides better fuzzy matching than basic Levenshtein for many use cases.

If rapidfuzz is not available, this will raise an ImportError when instantiated.

Source code in stickler/comparators/fuzzy.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
class FuzzyComparator(BaseComparator):
    """Comparator for fuzzy string matching.

    This comparator uses the rapidfuzz library to calculate similarity between
    strings using advanced Levenshtein distance calculations. It provides better
    fuzzy matching than basic Levenshtein for many use cases.

    If rapidfuzz is not available, this will raise an ImportError when instantiated.
    """

    def __init__(
        self, method: str = "ratio", normalize: bool = True, threshold: float = 0.7
    ):
        """Initialize the fuzzy comparator.

        Args:
            method: The fuzzy matching method to use. Options:
                - "ratio": Standard Levenshtein distance ratio
                - "partial_ratio": Partial string matching
                - "token_sort_ratio": Token-based matching with sorting
                - "token_set_ratio": Token-based matching with set operations
            normalize: Whether to normalize input strings before comparison
                      (strip whitespace, lowercase)
            threshold: Similarity threshold (default 0.7)

        Raises:
            ImportError: If rapidfuzz library is not available
        """
        super().__init__(threshold=threshold)

        if not RAPIDFUZZ_AVAILABLE:
            raise ImportError(
                "The rapidfuzz library is required for FuzzyComparator. "
                "Install it with: pip install rapidfuzz"
            )

        self._method = method
        self._normalize = normalize

        # Select the appropriate fuzzy matching function
        self._fuzzy_func = {
            "ratio": fuzz.ratio,
            "partial_ratio": fuzz.partial_ratio,
            "token_sort_ratio": fuzz.token_sort_ratio,
            "token_set_ratio": fuzz.token_set_ratio,
        }.get(method, fuzz.ratio)

    @property
    def name(self) -> str:
        """Return the name of the comparator."""
        return f"fuzzy_{self._method}"

    @property
    def config(self) -> Optional[Dict[str, Any]]:
        """Return configuration parameters."""
        return {"method": self._method, "normalize": self._normalize}

    def compare(self, value1: Any, value2: Any) -> float:
        """Compare two strings using fuzzy matching.

        Args:
            value1: First string or value
            value2: Second string or value

        Returns:
            Similarity score between 0.0 and 1.0
        """
        # Handle None values
        if value1 is None and value2 is None:
            return 1.0
        elif value1 is None or value2 is None:
            return 0.0

        # Convert to strings
        s1 = str(value1)
        s2 = str(value2)

        # Normalize if enabled
        if self._normalize:
            s1 = s1.strip().lower()
            s2 = s2.strip().lower()

        # Calculate fuzzy match score and normalize to 0.0-1.0
        if s1 == "" and s2 == "":
            return 1.0

        # Use the selected fuzzy matching function
        try:
            return self._fuzzy_func(s1, s2) / 100.0
        except Exception:
            # Fall back to basic comparison if fuzzy match fails
            return 1.0 if s1 == s2 else 0.0

config property

Return configuration parameters.

name property

Return the name of the comparator.

__init__(method='ratio', normalize=True, threshold=0.7)

Initialize the fuzzy comparator.

Parameters:

Name Type Description Default
method str

The fuzzy matching method to use. Options: - "ratio": Standard Levenshtein distance ratio - "partial_ratio": Partial string matching - "token_sort_ratio": Token-based matching with sorting - "token_set_ratio": Token-based matching with set operations

'ratio'
normalize bool

Whether to normalize input strings before comparison (strip whitespace, lowercase)

True
threshold float

Similarity threshold (default 0.7)

0.7

Raises:

Type Description
ImportError

If rapidfuzz library is not available

Source code in stickler/comparators/fuzzy.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def __init__(
    self, method: str = "ratio", normalize: bool = True, threshold: float = 0.7
):
    """Initialize the fuzzy comparator.

    Args:
        method: The fuzzy matching method to use. Options:
            - "ratio": Standard Levenshtein distance ratio
            - "partial_ratio": Partial string matching
            - "token_sort_ratio": Token-based matching with sorting
            - "token_set_ratio": Token-based matching with set operations
        normalize: Whether to normalize input strings before comparison
                  (strip whitespace, lowercase)
        threshold: Similarity threshold (default 0.7)

    Raises:
        ImportError: If rapidfuzz library is not available
    """
    super().__init__(threshold=threshold)

    if not RAPIDFUZZ_AVAILABLE:
        raise ImportError(
            "The rapidfuzz library is required for FuzzyComparator. "
            "Install it with: pip install rapidfuzz"
        )

    self._method = method
    self._normalize = normalize

    # Select the appropriate fuzzy matching function
    self._fuzzy_func = {
        "ratio": fuzz.ratio,
        "partial_ratio": fuzz.partial_ratio,
        "token_sort_ratio": fuzz.token_sort_ratio,
        "token_set_ratio": fuzz.token_set_ratio,
    }.get(method, fuzz.ratio)

compare(value1, value2)

Compare two strings using fuzzy matching.

Parameters:

Name Type Description Default
value1 Any

First string or value

required
value2 Any

Second string or value

required

Returns:

Type Description
float

Similarity score between 0.0 and 1.0

Source code in stickler/comparators/fuzzy.py
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def compare(self, value1: Any, value2: Any) -> float:
    """Compare two strings using fuzzy matching.

    Args:
        value1: First string or value
        value2: Second string or value

    Returns:
        Similarity score between 0.0 and 1.0
    """
    # Handle None values
    if value1 is None and value2 is None:
        return 1.0
    elif value1 is None or value2 is None:
        return 0.0

    # Convert to strings
    s1 = str(value1)
    s2 = str(value2)

    # Normalize if enabled
    if self._normalize:
        s1 = s1.strip().lower()
        s2 = s2.strip().lower()

    # Calculate fuzzy match score and normalize to 0.0-1.0
    if s1 == "" and s2 == "":
        return 1.0

    # Use the selected fuzzy matching function
    try:
        return self._fuzzy_func(s1, s2) / 100.0
    except Exception:
        # Fall back to basic comparison if fuzzy match fails
        return 1.0 if s1 == s2 else 0.0

stickler.comparators.BERTComparator

Bases: BaseComparator

Comparator that uses BERT embeddings for semantic similarity.

This comparator uses the BERTScore metric to calculate semantic similarity between strings, returning the f1 score as the similarity measure.

Example
comparator = BERTComparator(threshold=0.8)

# Returns similarity score based on semantic similarity
score = comparator.compare("The cat sat on the mat", "A feline was sitting on a rug")
Source code in stickler/comparators/bert.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
class BERTComparator(BaseComparator):
    """Comparator that uses BERT embeddings for semantic similarity.

    This comparator uses the BERTScore metric to calculate semantic similarity
    between strings, returning the f1 score as the similarity measure.

    Example:
        ```python
        comparator = BERTComparator(threshold=0.8)

        # Returns similarity score based on semantic similarity
        score = comparator.compare("The cat sat on the mat", "A feline was sitting on a rug")
        ```
    """

    def __init__(self, threshold: float = 0.7):
        """Initialize the BERTComparator.

        Args:
            threshold: Similarity threshold (0.0-1.0)
        """
        super().__init__(threshold=threshold)
        if model is None:
            raise ImportError(
                "BERTScore model could not be loaded. Please install 'evaluate' package."
            )

    def compare(self, str1: Any, str2: Any) -> float:
        """Compare two strings using BERT semantic similarity.

        Args:
            str1: First string
            str2: Second string

        Returns:
            Similarity score between 0.0 and 1.0 based on BERTScore f1
        """
        if str1 is None or str2 is None:
            return 0.0

        # Convert to strings if they aren't already
        str1 = str(str1)
        str2 = str(str2)

        # Strip punctuation and whitespace
        str1_clean = strip_punctuation_space(str1)
        str2_clean = strip_punctuation_space(str2)

        # Handle empty strings
        if not str1_clean or not str2_clean:
            return 1.0 if str1_clean == str2_clean else 0.0

        try:
            # Calculate BERT score
            result = model.compute(
                predictions=[str1_clean], references=[str2_clean], lang="en"
            )

            # Return f1 score
            return result["f1"][0]
        except Exception as e:
            # Fallback to direct comparison
            print(f"BERT comparison error: {str(e)}")
            return 1.0 if str1_clean == str2_clean else 0.0

__init__(threshold=0.7)

Initialize the BERTComparator.

Parameters:

Name Type Description Default
threshold float

Similarity threshold (0.0-1.0)

0.7
Source code in stickler/comparators/bert.py
33
34
35
36
37
38
39
40
41
42
43
def __init__(self, threshold: float = 0.7):
    """Initialize the BERTComparator.

    Args:
        threshold: Similarity threshold (0.0-1.0)
    """
    super().__init__(threshold=threshold)
    if model is None:
        raise ImportError(
            "BERTScore model could not be loaded. Please install 'evaluate' package."
        )

compare(str1, str2)

Compare two strings using BERT semantic similarity.

Parameters:

Name Type Description Default
str1 Any

First string

required
str2 Any

Second string

required

Returns:

Type Description
float

Similarity score between 0.0 and 1.0 based on BERTScore f1

Source code in stickler/comparators/bert.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def compare(self, str1: Any, str2: Any) -> float:
    """Compare two strings using BERT semantic similarity.

    Args:
        str1: First string
        str2: Second string

    Returns:
        Similarity score between 0.0 and 1.0 based on BERTScore f1
    """
    if str1 is None or str2 is None:
        return 0.0

    # Convert to strings if they aren't already
    str1 = str(str1)
    str2 = str(str2)

    # Strip punctuation and whitespace
    str1_clean = strip_punctuation_space(str1)
    str2_clean = strip_punctuation_space(str2)

    # Handle empty strings
    if not str1_clean or not str2_clean:
        return 1.0 if str1_clean == str2_clean else 0.0

    try:
        # Calculate BERT score
        result = model.compute(
            predictions=[str1_clean], references=[str2_clean], lang="en"
        )

        # Return f1 score
        return result["f1"][0]
    except Exception as e:
        # Fallback to direct comparison
        print(f"BERT comparison error: {str(e)}")
        return 1.0 if str1_clean == str2_clean else 0.0

stickler.comparators.SemanticComparator

Bases: BaseComparator

Comparator that uses embeddings for semantic similarity.

This comparator uses embeddings from a model (default: Titan) to calculate semantic similarity between strings.

Attributes:

Name Type Description
SIMILARITY_FUNCTIONS

Dictionary of similarity functions

bc

BedrockClient instance

model_id

Model ID to use for embeddings

embedding_function

Function to generate embeddings

sim_function

Name of the similarity function to use

similarity_function

The actual similarity function

Source code in stickler/comparators/semantic.py
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
class SemanticComparator(BaseComparator):
    """Comparator that uses embeddings for semantic similarity.

    This comparator uses embeddings from a model (default: Titan) to calculate
    semantic similarity between strings.

    Attributes:
        SIMILARITY_FUNCTIONS: Dictionary of similarity functions
        bc: BedrockClient instance
        model_id: Model ID to use for embeddings
        embedding_function: Function to generate embeddings
        sim_function: Name of the similarity function to use
        similarity_function: The actual similarity function
    """

    SIMILARITY_FUNCTIONS = {
        "cosine_similarity": lambda x, y: 1 - spatial.distance.cosine(x, y)
    }

    def __init__(
        self,
        model_id: str = "amazon.titan-embed-text-v2:0",
        sim_function: str = "cosine_similarity",
        embedding_function: Optional[Callable] = None,
        threshold: float = 0.7,
    ):
        """Initialize the SemanticComparator.

        Args:
            model_id: Model ID to use for embeddings
            sim_function: Name of the similarity function to use
            embedding_function: Optional custom embedding function
            threshold: Similarity threshold (0.0-1.0)

        Raises:
            ImportError: If BedrockClient is not available and no embedding_function is provided
        """
        super().__init__(threshold=threshold)

        self.model_id = model_id
        if embedding_function is not None:
            self.embedding_function = embedding_function
        else:
            self.embedding_function = partial(
                generate_bedrock_embedding, model_id=model_id
            )

        self.sim_function = sim_function
        self.similarity_function = self.SIMILARITY_FUNCTIONS[self.sim_function]

    def compare(self, str1: str, str2: str) -> float:
        """Compare two values using semantic similarity.

        If embedding generation fails, this logs the model ID, embedding function,
        input lengths, similarity function, and exception type before falling back
        to raw equality.

        Args:
            str1: First value
            str2: Second value

        Returns:
            Similarity score between 0.0 and 1.0
        """
        if str1 is None or str2 is None:
            return 0.0

        try:
            x, y = self.embedding_function(str1), self.embedding_function(str2)
            return self.similarity_function(x, y)
        except Exception:
            logger.exception(
                "Semantic embedding comparison failed; falling back to string equality",
                extra={
                    "embedding_function": _embedding_function_name(
                        self.embedding_function
                    ),
                    "model_id": getattr(self, "model_id", None),
                    "input_1_length": _input_length(str1),
                    "input_2_length": _input_length(str2),
                    "similarity_function": self.sim_function,
                    "exception_type": type(sys.exc_info()[1]).__name__,
                },
            )
            # Fallback to string equality if embedding fails
            return 1.0 if str1 == str2 else 0.0

__init__(model_id='amazon.titan-embed-text-v2:0', sim_function='cosine_similarity', embedding_function=None, threshold=0.7)

Initialize the SemanticComparator.

Parameters:

Name Type Description Default
model_id str

Model ID to use for embeddings

'amazon.titan-embed-text-v2:0'
sim_function str

Name of the similarity function to use

'cosine_similarity'
embedding_function Optional[Callable]

Optional custom embedding function

None
threshold float

Similarity threshold (0.0-1.0)

0.7

Raises:

Type Description
ImportError

If BedrockClient is not available and no embedding_function is provided

Source code in stickler/comparators/semantic.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def __init__(
    self,
    model_id: str = "amazon.titan-embed-text-v2:0",
    sim_function: str = "cosine_similarity",
    embedding_function: Optional[Callable] = None,
    threshold: float = 0.7,
):
    """Initialize the SemanticComparator.

    Args:
        model_id: Model ID to use for embeddings
        sim_function: Name of the similarity function to use
        embedding_function: Optional custom embedding function
        threshold: Similarity threshold (0.0-1.0)

    Raises:
        ImportError: If BedrockClient is not available and no embedding_function is provided
    """
    super().__init__(threshold=threshold)

    self.model_id = model_id
    if embedding_function is not None:
        self.embedding_function = embedding_function
    else:
        self.embedding_function = partial(
            generate_bedrock_embedding, model_id=model_id
        )

    self.sim_function = sim_function
    self.similarity_function = self.SIMILARITY_FUNCTIONS[self.sim_function]

compare(str1, str2)

Compare two values using semantic similarity.

If embedding generation fails, this logs the model ID, embedding function, input lengths, similarity function, and exception type before falling back to raw equality.

Parameters:

Name Type Description Default
str1 str

First value

required
str2 str

Second value

required

Returns:

Type Description
float

Similarity score between 0.0 and 1.0

Source code in stickler/comparators/semantic.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def compare(self, str1: str, str2: str) -> float:
    """Compare two values using semantic similarity.

    If embedding generation fails, this logs the model ID, embedding function,
    input lengths, similarity function, and exception type before falling back
    to raw equality.

    Args:
        str1: First value
        str2: Second value

    Returns:
        Similarity score between 0.0 and 1.0
    """
    if str1 is None or str2 is None:
        return 0.0

    try:
        x, y = self.embedding_function(str1), self.embedding_function(str2)
        return self.similarity_function(x, y)
    except Exception:
        logger.exception(
            "Semantic embedding comparison failed; falling back to string equality",
            extra={
                "embedding_function": _embedding_function_name(
                    self.embedding_function
                ),
                "model_id": getattr(self, "model_id", None),
                "input_1_length": _input_length(str1),
                "input_2_length": _input_length(str2),
                "similarity_function": self.sim_function,
                "exception_type": type(sys.exc_info()[1]).__name__,
            },
        )
        # Fallback to string equality if embedding fails
        return 1.0 if str1 == str2 else 0.0

stickler.comparators.LLMComparator

Bases: BaseComparator

Large Language Model-based semantic comparator.

This comparator uses LLMs to perform intelligent semantic comparisons that go beyond simple string matching. It can understand context, handle abbreviations, recognize synonyms, and apply domain-specific comparison logic through custom evaluation guidelines.

The comparator returns binary similarity scores (0.0 or 1.0) based on whether the LLM determines the values are semantically equivalent. It handles edge cases like None values and provides detailed comparison information for debugging.

Attributes:

Name Type Description
model Union[Model, str]

The LLM model identifier or Model instance.

eval_guidelines str

Custom guidelines for comparison logic.

system_prompt str

The system prompt used to instruct the LLM.

prompt_template Template

Jinja2 template for formatting comparison prompts.

agent Agent

The strands Agent instance for LLM interactions.

threshold float

Inherited from BaseComparator, used for binary decisions.

Note

This comparator requires AWS Bedrock access and proper authentication. API calls incur costs and latency, so consider caching for repeated comparisons.

Source code in stickler/comparators/llm.py
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
class LLMComparator(BaseComparator):
    """Large Language Model-based semantic comparator.

    This comparator uses LLMs to perform intelligent semantic comparisons that go
    beyond simple string matching. It can understand context, handle abbreviations,
    recognize synonyms, and apply domain-specific comparison logic through custom
    evaluation guidelines.

    The comparator returns binary similarity scores (0.0 or 1.0) based on whether
    the LLM determines the values are semantically equivalent. It handles edge cases
    like None values and provides detailed comparison information for debugging.

    Attributes:
        model (Union[Model, str]): The LLM model identifier or Model instance.
        eval_guidelines (str, optional): Custom guidelines for comparison logic.
        system_prompt (str): The system prompt used to instruct the LLM.
        prompt_template (Template): Jinja2 template for formatting comparison prompts.
        agent (Agent): The strands Agent instance for LLM interactions.
        threshold (float): Inherited from BaseComparator, used for binary decisions.

    Note:
        This comparator requires AWS Bedrock access and proper authentication.
        API calls incur costs and latency, so consider caching for repeated comparisons.
    """

    def __init__(
        self,
        model: Union[Model, str] = None,
        eval_guidelines: str = None,
    ):
        """Initialize the LLM comparator.

        Args:
            model: The LLM model to use for comparisons. Can be a model identifier
                string (e.g., "us.anthropic.claude-3-haiku-20240307-v1:0") or a
                strands Model instance. Defaults to Claude 3 Haiku.
            eval_guidelines: Optional custom guidelines to include in the comparison
                prompt. These guidelines help the LLM understand domain-specific
                comparison rules (e.g., "Consider abbreviations equivalent").

        Raises:
            ImportError: If strands-agents is not installed.
            ValueError: If the model parameter is not provided.

        Example:
            >>> # Basic initialization
            >>> comparator = LLMComparator()

            >>> # With custom model and guidelines
            >>> comparator = LLMComparator(
            ...     model="us.amazon.nova-lite-v1:0",
            ...     eval_guidelines="Consider street abbreviations equivalent"
            ... )
        """
        super().__init__()

        # Check if strands is available
        if not STRANDS_AVAILABLE:
            raise ImportError(
                "LLMComparator requires the 'strands-agents' package. "
                "Install it with: pip install stickler-eval[llm]"
            )

        if model is None:
            raise ValueError("Model must be provided for LLMComparator.")
        self.model = model
        self.system_prompt = self._default_system_prompt()
        self.prompt_template = self._default_prompt_template()
        if eval_guidelines is not None:
            self.eval_guidelines = html.escape(eval_guidelines)
        else:
            self.eval_guidelines = eval_guidelines

        # Initialize Agent
        self.agent = Agent(
            model=self.model, system_prompt=self.system_prompt, callback_handler=None
        )

    def _default_system_prompt(self) -> str:
        """Generate the default system prompt for the LLM.

        Returns:
            str: System prompt instructing the LLM to perform binary comparisons.
        """
        return "You are a helpful assistant that compares two values and determines if they are equivalent. Only return one word: 'true' or 'false'."

    def _default_prompt_template(self) -> Template:
        """Generate the default Jinja2 template for comparison prompts.

        Returns:
            Template: Jinja2 template that formats comparison prompts with values
                and optional evaluation guidelines.
        """
        prompt_template = """
            Compare these two values and determine if they are equivalent:

            Value 1: {{ value1 }}
            Value 2: {{ value2 }}

            {% if eval_guidelines is not none %}
            <guidelines>
            Here are some guidelines to follow for the comparison:
            {{ eval_guidelines }}
            </guidelines>
            {% endif %}

            If the values are equivalent, return 'true'. If not, return 'false'. Only return one word: 'true' or 'false'.
            """

        template = Template(prompt_template)
        return template

    def _invoke_agent(self, prompt: str) -> str:
        """Invoke the LLM agent with a formatted prompt.

        Args:
            prompt: The formatted prompt string to send to the LLM.

        Returns:
            str: The text response from the LLM.

        Raises:
            Exception: If the agent call fails or response format is unexpected.
        """
        result = self.agent(prompt)
        return result.message["content"][0]["text"]

    def compare(self, value1: Any, value2: Any) -> float:
        """Compare two values using LLM-based semantic analysis.

        This method converts both values to strings and uses the configured LLM
        to determine if they are semantically equivalent. The comparison considers
        context, abbreviations, synonyms, and any provided evaluation guidelines.

        Args:
            value1: First value to compare. Can be any type that converts to string.
            value2: Second value to compare. Can be any type that converts to string.

        Returns:
            float: Binary similarity score:
                - 1.0 if the LLM determines the values are equivalent
                - 0.0 if the LLM determines the values are not equivalent
                - 0.0 if an error occurs during comparison

        Note:
            - None values: Returns 1.0 if both are None, 0.0 if only one is None
            - Error handling: Returns 0.0 for any exceptions during LLM calls
            - Cost consideration: Each call incurs API costs and latency

        Example:
            >>> comparator = LLMComparator()
            >>> comparator.compare("St. John's Street", "Saint John's St")
            1.0
            >>> comparator.compare("apple", "orange")
            0.0
            >>> comparator.compare(None, None)
            1.0
        """
        # Handle None values
        if value1 is None and value2 is None:
            return 1.0
        elif value1 is None or value2 is None:
            return 0.0

        # Format the prompt with your values
        formatted_prompt = self.prompt_template.render(
            value1=html.escape(str(value1)),
            value2=html.escape(str(value2)),
            eval_guidelines=self.eval_guidelines,
        )

        try:
            # Get LLM response
            response = self._invoke_agent(formatted_prompt)
            # Parse response to boolean
            response_lower = response.strip().lower()
            if "true" in response_lower:
                return 1.0
            else:
                return 0.0

        except NoCredentialsError:
            print("Error: AWS credentials not found.")
            raise

        except Exception as e:
            print(f"Error during LLM call: {e}")
            raise

    def get_comparison_details(self, value1: Any, value2: Any) -> Dict[str, Any]:
        """Get detailed information about a comparison operation.

        This method provides comprehensive details about the comparison process,
        including the formatted prompt, LLM response, model information, and
        final comparison result. Useful for debugging, auditing, and understanding
        how the LLM made its decision.

        Args:
            value1: First value to compare. Can be any type that converts to string.
            value2: Second value to compare. Can be any type that converts to string.

        Returns:
            Dict[str, Any]: Dictionary containing comparison details:
                - 'prompt' (str): The formatted prompt sent to the LLM
                - 'llm_response' (str): Raw response from the LLM
                - 'model_id' (Union[Model, str]): The model used (string ID or Model instance)
                - 'comparison_result' (float): Final similarity score (0.0 or 1.0)

                On error:
                - 'error' (str): Error message describing what went wrong
                - 'comparison_result' (bool): False to indicate failure

        Example:
            >>> comparator = LLMComparator(eval_guidelines="Consider abbreviations")
            >>> details = comparator.get_comparison_details("St. John", "Saint John")
            >>> print(details['llm_response'])
            'true'
            >>> print(details['comparison_result'])
            1.0
            >>> print('guidelines' in details['prompt'])
            True
        """
        formatted_prompt = self.prompt_template.render(
            value1=html.escape(str(value1)),
            value2=html.escape(str(value2)),
            eval_guidelines=self.eval_guidelines,
        )

        try:
            response = self._invoke_agent(formatted_prompt)
            return {
                "prompt": formatted_prompt,
                "llm_response": response,
                "model_id": self.model,
                "comparison_result": self.compare(value1, value2),
            }
        except Exception as e:
            return {"error": str(e), "comparison_result": False}

__init__(model=None, eval_guidelines=None)

Initialize the LLM comparator.

Parameters:

Name Type Description Default
model Union[Model, str]

The LLM model to use for comparisons. Can be a model identifier string (e.g., "us.anthropic.claude-3-haiku-20240307-v1:0") or a strands Model instance. Defaults to Claude 3 Haiku.

None
eval_guidelines str

Optional custom guidelines to include in the comparison prompt. These guidelines help the LLM understand domain-specific comparison rules (e.g., "Consider abbreviations equivalent").

None

Raises:

Type Description
ImportError

If strands-agents is not installed.

ValueError

If the model parameter is not provided.

Example

Basic initialization

comparator = LLMComparator()

With custom model and guidelines

comparator = LLMComparator( ... model="us.amazon.nova-lite-v1:0", ... eval_guidelines="Consider street abbreviations equivalent" ... )

Source code in stickler/comparators/llm.py
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def __init__(
    self,
    model: Union[Model, str] = None,
    eval_guidelines: str = None,
):
    """Initialize the LLM comparator.

    Args:
        model: The LLM model to use for comparisons. Can be a model identifier
            string (e.g., "us.anthropic.claude-3-haiku-20240307-v1:0") or a
            strands Model instance. Defaults to Claude 3 Haiku.
        eval_guidelines: Optional custom guidelines to include in the comparison
            prompt. These guidelines help the LLM understand domain-specific
            comparison rules (e.g., "Consider abbreviations equivalent").

    Raises:
        ImportError: If strands-agents is not installed.
        ValueError: If the model parameter is not provided.

    Example:
        >>> # Basic initialization
        >>> comparator = LLMComparator()

        >>> # With custom model and guidelines
        >>> comparator = LLMComparator(
        ...     model="us.amazon.nova-lite-v1:0",
        ...     eval_guidelines="Consider street abbreviations equivalent"
        ... )
    """
    super().__init__()

    # Check if strands is available
    if not STRANDS_AVAILABLE:
        raise ImportError(
            "LLMComparator requires the 'strands-agents' package. "
            "Install it with: pip install stickler-eval[llm]"
        )

    if model is None:
        raise ValueError("Model must be provided for LLMComparator.")
    self.model = model
    self.system_prompt = self._default_system_prompt()
    self.prompt_template = self._default_prompt_template()
    if eval_guidelines is not None:
        self.eval_guidelines = html.escape(eval_guidelines)
    else:
        self.eval_guidelines = eval_guidelines

    # Initialize Agent
    self.agent = Agent(
        model=self.model, system_prompt=self.system_prompt, callback_handler=None
    )

compare(value1, value2)

Compare two values using LLM-based semantic analysis.

This method converts both values to strings and uses the configured LLM to determine if they are semantically equivalent. The comparison considers context, abbreviations, synonyms, and any provided evaluation guidelines.

Parameters:

Name Type Description Default
value1 Any

First value to compare. Can be any type that converts to string.

required
value2 Any

Second value to compare. Can be any type that converts to string.

required

Returns:

Name Type Description
float float

Binary similarity score: - 1.0 if the LLM determines the values are equivalent - 0.0 if the LLM determines the values are not equivalent - 0.0 if an error occurs during comparison

Note
  • None values: Returns 1.0 if both are None, 0.0 if only one is None
  • Error handling: Returns 0.0 for any exceptions during LLM calls
  • Cost consideration: Each call incurs API costs and latency
Example

comparator = LLMComparator() comparator.compare("St. John's Street", "Saint John's St") 1.0 comparator.compare("apple", "orange") 0.0 comparator.compare(None, None) 1.0

Source code in stickler/comparators/llm.py
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
def compare(self, value1: Any, value2: Any) -> float:
    """Compare two values using LLM-based semantic analysis.

    This method converts both values to strings and uses the configured LLM
    to determine if they are semantically equivalent. The comparison considers
    context, abbreviations, synonyms, and any provided evaluation guidelines.

    Args:
        value1: First value to compare. Can be any type that converts to string.
        value2: Second value to compare. Can be any type that converts to string.

    Returns:
        float: Binary similarity score:
            - 1.0 if the LLM determines the values are equivalent
            - 0.0 if the LLM determines the values are not equivalent
            - 0.0 if an error occurs during comparison

    Note:
        - None values: Returns 1.0 if both are None, 0.0 if only one is None
        - Error handling: Returns 0.0 for any exceptions during LLM calls
        - Cost consideration: Each call incurs API costs and latency

    Example:
        >>> comparator = LLMComparator()
        >>> comparator.compare("St. John's Street", "Saint John's St")
        1.0
        >>> comparator.compare("apple", "orange")
        0.0
        >>> comparator.compare(None, None)
        1.0
    """
    # Handle None values
    if value1 is None and value2 is None:
        return 1.0
    elif value1 is None or value2 is None:
        return 0.0

    # Format the prompt with your values
    formatted_prompt = self.prompt_template.render(
        value1=html.escape(str(value1)),
        value2=html.escape(str(value2)),
        eval_guidelines=self.eval_guidelines,
    )

    try:
        # Get LLM response
        response = self._invoke_agent(formatted_prompt)
        # Parse response to boolean
        response_lower = response.strip().lower()
        if "true" in response_lower:
            return 1.0
        else:
            return 0.0

    except NoCredentialsError:
        print("Error: AWS credentials not found.")
        raise

    except Exception as e:
        print(f"Error during LLM call: {e}")
        raise

get_comparison_details(value1, value2)

Get detailed information about a comparison operation.

This method provides comprehensive details about the comparison process, including the formatted prompt, LLM response, model information, and final comparison result. Useful for debugging, auditing, and understanding how the LLM made its decision.

Parameters:

Name Type Description Default
value1 Any

First value to compare. Can be any type that converts to string.

required
value2 Any

Second value to compare. Can be any type that converts to string.

required

Returns:

Type Description
Dict[str, Any]

Dict[str, Any]: Dictionary containing comparison details: - 'prompt' (str): The formatted prompt sent to the LLM - 'llm_response' (str): Raw response from the LLM - 'model_id' (Union[Model, str]): The model used (string ID or Model instance) - 'comparison_result' (float): Final similarity score (0.0 or 1.0)

On error: - 'error' (str): Error message describing what went wrong - 'comparison_result' (bool): False to indicate failure

Example

comparator = LLMComparator(eval_guidelines="Consider abbreviations") details = comparator.get_comparison_details("St. John", "Saint John") print(details['llm_response']) 'true' print(details['comparison_result']) 1.0 print('guidelines' in details['prompt']) True

Source code in stickler/comparators/llm.py
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
def get_comparison_details(self, value1: Any, value2: Any) -> Dict[str, Any]:
    """Get detailed information about a comparison operation.

    This method provides comprehensive details about the comparison process,
    including the formatted prompt, LLM response, model information, and
    final comparison result. Useful for debugging, auditing, and understanding
    how the LLM made its decision.

    Args:
        value1: First value to compare. Can be any type that converts to string.
        value2: Second value to compare. Can be any type that converts to string.

    Returns:
        Dict[str, Any]: Dictionary containing comparison details:
            - 'prompt' (str): The formatted prompt sent to the LLM
            - 'llm_response' (str): Raw response from the LLM
            - 'model_id' (Union[Model, str]): The model used (string ID or Model instance)
            - 'comparison_result' (float): Final similarity score (0.0 or 1.0)

            On error:
            - 'error' (str): Error message describing what went wrong
            - 'comparison_result' (bool): False to indicate failure

    Example:
        >>> comparator = LLMComparator(eval_guidelines="Consider abbreviations")
        >>> details = comparator.get_comparison_details("St. John", "Saint John")
        >>> print(details['llm_response'])
        'true'
        >>> print(details['comparison_result'])
        1.0
        >>> print('guidelines' in details['prompt'])
        True
    """
    formatted_prompt = self.prompt_template.render(
        value1=html.escape(str(value1)),
        value2=html.escape(str(value2)),
        eval_guidelines=self.eval_guidelines,
    )

    try:
        response = self._invoke_agent(formatted_prompt)
        return {
            "prompt": formatted_prompt,
            "llm_response": response,
            "model_id": self.model,
            "comparison_result": self.compare(value1, value2),
        }
    except Exception as e:
        return {"error": str(e), "comparison_result": False}

stickler.comparators.StructuredModelComparator

Bases: BaseComparator

Comparator for structured model objects.

This comparator is designed to work with StructuredModel instances, leveraging their built-in comparison capabilities.

Source code in stickler/comparators/structured.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
class StructuredModelComparator(BaseComparator):
    """Comparator for structured model objects.

    This comparator is designed to work with StructuredModel instances,
    leveraging their built-in comparison capabilities.
    """

    def __init__(self, threshold: float = 0.7, strict_types: bool = False):
        """Initialize the comparator.

        Args:
            threshold: Similarity threshold (0.0-1.0)
            strict_types: If True, will raise TypeError when non-StructuredModel objects are compared
        """
        super().__init__(threshold)
        self.strict_types = strict_types

    def compare(self, model1: Any, model2: Any) -> float:
        """Compare two structured model instances.

        This method uses the built-in compare method of StructuredModel objects
        if available, otherwise falls back to basic equality comparison.

        Args:
            model1: First model (ideally a StructuredModel instance)
            model2: Second model (ideally a StructuredModel instance)

        Returns:
            Similarity score between 0.0 and 1.0

        Raises:
            TypeError: When strict_types=True and comparing non-StructuredModel objects
        """
        # In strict mode, enforce StructuredModel types (used in tests)
        # For string values, always raise TypeError in strict mode
        if self.strict_types and isinstance(model1, str) and isinstance(model2, str):
            raise TypeError(
                "StructuredModelComparator can only compare StructuredModel instances"
            )

        # Handle None values
        if model1 is None or model2 is None:
            return 1.0 if model1 == model2 else 0.0

        # Check if both objects have a compare method (duck typing)
        if hasattr(model1, "compare") and callable(model1.compare):
            return model1.compare(model2)

        # Fall back to equality check for non-StructuredModel objects
        return 1.0 if model1 == model2 else 0.0

__init__(threshold=0.7, strict_types=False)

Initialize the comparator.

Parameters:

Name Type Description Default
threshold float

Similarity threshold (0.0-1.0)

0.7
strict_types bool

If True, will raise TypeError when non-StructuredModel objects are compared

False
Source code in stickler/comparators/structured.py
15
16
17
18
19
20
21
22
23
def __init__(self, threshold: float = 0.7, strict_types: bool = False):
    """Initialize the comparator.

    Args:
        threshold: Similarity threshold (0.0-1.0)
        strict_types: If True, will raise TypeError when non-StructuredModel objects are compared
    """
    super().__init__(threshold)
    self.strict_types = strict_types

compare(model1, model2)

Compare two structured model instances.

This method uses the built-in compare method of StructuredModel objects if available, otherwise falls back to basic equality comparison.

Parameters:

Name Type Description Default
model1 Any

First model (ideally a StructuredModel instance)

required
model2 Any

Second model (ideally a StructuredModel instance)

required

Returns:

Type Description
float

Similarity score between 0.0 and 1.0

Raises:

Type Description
TypeError

When strict_types=True and comparing non-StructuredModel objects

Source code in stickler/comparators/structured.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def compare(self, model1: Any, model2: Any) -> float:
    """Compare two structured model instances.

    This method uses the built-in compare method of StructuredModel objects
    if available, otherwise falls back to basic equality comparison.

    Args:
        model1: First model (ideally a StructuredModel instance)
        model2: Second model (ideally a StructuredModel instance)

    Returns:
        Similarity score between 0.0 and 1.0

    Raises:
        TypeError: When strict_types=True and comparing non-StructuredModel objects
    """
    # In strict mode, enforce StructuredModel types (used in tests)
    # For string values, always raise TypeError in strict mode
    if self.strict_types and isinstance(model1, str) and isinstance(model2, str):
        raise TypeError(
            "StructuredModelComparator can only compare StructuredModel instances"
        )

    # Handle None values
    if model1 is None or model2 is None:
        return 1.0 if model1 == model2 else 0.0

    # Check if both objects have a compare method (duck typing)
    if hasattr(model1, "compare") and callable(model1.compare):
        return model1.compare(model2)

    # Fall back to equality check for non-StructuredModel objects
    return 1.0 if model1 == model2 else 0.0

stickler.comparators.BBoxIoUComparator

Bases: BaseComparator

Comparator for bounding boxes using Intersection over Union.

Compares two bounding boxes and returns their IoU as a similarity score between 0.0 and 1.0.

Bounding box formats accepted
  • Two-point: [[x1, y1], [x2, y2]]
  • Flat: [x1, y1, x2, y2]

Coordinates must be finite numbers; non-finite values (NaN, inf) are treated as malformed input and score 0.0. Booleans are accepted as coordinates (bool is a subclass of int: True == 1, False == 0), so guard upstream if that is not intended. Note that a zero-area box (a point, e.g. [[5, 5], [5, 5]]) has no area to intersect, so it scores IoU 0.0 even against an identical point — relevant when annotating point locations rather than regions.

Parameters:

Name Type Description Default
threshold float

IoU threshold for binary match classification (default: 0.5).

0.5
Example

from stickler.comparators.bbox import BBoxIoUComparator cmp = BBoxIoUComparator(threshold=0.5) cmp.compare([[0, 0], [10, 10]], [[0, 0], [10, 10]]) 1.0 cmp.compare([[0, 0], [5, 5]], [[5, 5], [10, 10]]) 0.0

Source code in stickler/comparators/bbox.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
class BBoxIoUComparator(BaseComparator):
    """Comparator for bounding boxes using Intersection over Union.

    Compares two bounding boxes and returns their IoU as a similarity
    score between 0.0 and 1.0.

    Bounding box formats accepted:
        - Two-point: [[x1, y1], [x2, y2]]
        - Flat: [x1, y1, x2, y2]

    Coordinates must be finite numbers; non-finite values (NaN, inf) are
    treated as malformed input and score 0.0. Booleans are accepted as
    coordinates (``bool`` is a subclass of ``int``: ``True`` == 1, ``False``
    == 0), so guard upstream if that is not intended. Note that a zero-area
    box (a point, e.g. ``[[5, 5], [5, 5]]``) has no area to intersect, so it
    scores IoU 0.0 even against an identical point — relevant when annotating
    point locations rather than regions.

    Args:
        threshold: IoU threshold for binary match classification (default: 0.5).

    Example:
        >>> from stickler.comparators.bbox import BBoxIoUComparator
        >>> cmp = BBoxIoUComparator(threshold=0.5)
        >>> cmp.compare([[0, 0], [10, 10]], [[0, 0], [10, 10]])
        1.0
        >>> cmp.compare([[0, 0], [5, 5]], [[5, 5], [10, 10]])
        0.0
    """

    def __init__(
        self,
        threshold: float = 0.5,
    ):
        super().__init__(threshold=threshold)

    def compare(self, bbox1: Any, bbox2: Any) -> float:
        """Compare two bounding boxes and return their IoU.

        Args:
            bbox1: First bounding box (prediction).
            bbox2: Second bounding box (ground truth).

        Returns:
            IoU score between 0.0 and 1.0.
        """
        if bbox1 is None and bbox2 is None:
            return 1.0
        if bbox1 is None or bbox2 is None:
            return 0.0

        coords1 = self._normalize_bbox(bbox1)
        coords2 = self._normalize_bbox(bbox2)

        if coords1 is None or coords2 is None:
            return 0.0

        return self._compute_iou(coords1, coords2)

    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------

    @staticmethod
    def _normalize_bbox(
        bbox: Any,
    ) -> Optional[Tuple[float, float, float, float]]:
        """Normalize a bounding box to (x1, y1, x2, y2) with x1<=x2, y1<=y2.

        Accepts:
            - [[x1, y1], [x2, y2]]
            - [x1, y1, x2, y2]

        Returns:
            (x_min, y_min, x_max, y_max) or None if the input is invalid.
        """
        try:
            if not isinstance(bbox, (list, tuple)):
                return None

            if len(bbox) == 2 and all(
                isinstance(p, (list, tuple)) and len(p) == 2 for p in bbox
            ):
                # Two-point format: [[x1, y1], [x2, y2]]
                x1, y1 = float(bbox[0][0]), float(bbox[0][1])
                x2, y2 = float(bbox[1][0]), float(bbox[1][1])
            elif len(bbox) == 4 and all(isinstance(v, (int, float)) for v in bbox):
                # Flat format: [x1, y1, x2, y2]
                x1, y1, x2, y2 = (float(v) for v in bbox)
            else:
                return None

            # Reject non-finite coordinates (NaN, inf) as malformed input so
            # they score as a miss rather than poisoning IoU output.
            if not all(math.isfinite(v) for v in (x1, y1, x2, y2)):
                return None

            return (min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
        except (TypeError, ValueError, IndexError):
            return None

    @staticmethod
    def _compute_iou(
        box1: Tuple[float, float, float, float],
        box2: Tuple[float, float, float, float],
    ) -> float:
        """Compute IoU between two normalized boxes (x1, y1, x2, y2).

        Args:
            box1: (x_min, y_min, x_max, y_max)
            box2: (x_min, y_min, x_max, y_max)

        Returns:
            IoU value between 0.0 and 1.0.
        """
        x_left = max(box1[0], box2[0])
        y_top = max(box1[1], box2[1])
        x_right = min(box1[2], box2[2])
        y_bottom = min(box1[3], box2[3])

        inter_area = max(0.0, x_right - x_left) * max(0.0, y_bottom - y_top)

        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
        union_area = area1 + area2 - inter_area

        if union_area <= 0:
            return 0.0

        return inter_area / union_area

compare(bbox1, bbox2)

Compare two bounding boxes and return their IoU.

Parameters:

Name Type Description Default
bbox1 Any

First bounding box (prediction).

required
bbox2 Any

Second bounding box (ground truth).

required

Returns:

Type Description
float

IoU score between 0.0 and 1.0.

Source code in stickler/comparators/bbox.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def compare(self, bbox1: Any, bbox2: Any) -> float:
    """Compare two bounding boxes and return their IoU.

    Args:
        bbox1: First bounding box (prediction).
        bbox2: Second bounding box (ground truth).

    Returns:
        IoU score between 0.0 and 1.0.
    """
    if bbox1 is None and bbox2 is None:
        return 1.0
    if bbox1 is None or bbox2 is None:
        return 0.0

    coords1 = self._normalize_bbox(bbox1)
    coords2 = self._normalize_bbox(bbox2)

    if coords1 is None or coords2 is None:
        return 0.0

    return self._compute_iou(coords1, coords2)