Comparators

`stickler.comparators`

Common comparators for key information evaluation.

This package contains comparators that are shared between the traditional and ANLS Star evaluation systems. These comparators implement a unified interface that works with both systems.

`stickler.comparators.BaseComparator`

Bases: ABC

Base class for all comparators.

This class defines the interface that all comparators must implement. Comparators are used to compare two values and return a similarity score between 0.0 and 1.0, where 1.0 means the values are identical.

Source code in stickler/comparators/base.py

class BaseComparator(ABC):
    """Base class for all comparators.

    This class defines the interface that all comparators must implement.
    Comparators are used to compare two values and return a similarity score
    between 0.0 and 1.0, where 1.0 means the values are identical.
    """

    def __init__(self, threshold: float = 0.7):
        """Initialize the comparator.

        Args:
            threshold: Similarity threshold (0.0-1.0)
        """
        self.threshold = threshold

    @abstractmethod
    def compare(self, str1: Any, str2: Any) -> float:
        """Compare two values and return a similarity score.

        Args:
            str1: First value
            str2: Second value

        Returns:
            Similarity score between 0.0 and 1.0
        """
        pass

    def __call__(self, str1: Any, str2: Any) -> float:
        """Make the comparator callable.

        Args:
            str1: First value
            str2: Second value

        Returns:
            Similarity score between 0.0 and 1.0
        """
        return self.compare(str1, str2)

    def binary_compare(self, str1: Any, str2: Any) -> Tuple[int, int]:
        """Compare two values and return a binary result as (tp, fp) tuple.

        This method converts the continuous similarity score to a binary decision
        based on the threshold. If the similarity is greater than or equal to the
        threshold, it returns (1, 0) indicating true positive. Otherwise, it returns
        (0, 1) indicating false positive.

        Args:
            str1: First value
            str2: Second value

        Returns:
            Tuple of (tp, fp) where tp is 1 if similar, 0 otherwise,
            and fp is the opposite
        """
        score = self.compare(str1, str2)
        if score >= self.threshold:
            return (1, 0)  # True positive
        else:
            return (0, 1)  # False positive

    def __str__(self) -> str:
        """String representation for serialization."""
        return self.__class__.__name__

    def __repr__(self) -> str:
        """Detailed string representation."""
        return f"{self.__class__.__name__}(threshold={self.threshold})"

`call(str1, str2)`

Make the comparator callable.

Parameters:

Name	Type	Description	Default
`str1`	`Any`	First value	required
`str2`	`Any`	Second value	required

Returns:

Type	Description
`float`	Similarity score between 0.0 and 1.0

Source code in stickler/comparators/base.py

def __call__(self, str1: Any, str2: Any) -> float:
    """Make the comparator callable.

    Args:
        str1: First value
        str2: Second value

    Returns:
        Similarity score between 0.0 and 1.0
    """
    return self.compare(str1, str2)

`init(threshold=0.7)`

Initialize the comparator.

Parameters:

Name	Type	Description	Default
`threshold`	`float`	Similarity threshold (0.0-1.0)	`0.7`

Source code in stickler/comparators/base.py

def __init__(self, threshold: float = 0.7):
    """Initialize the comparator.

    Args:
        threshold: Similarity threshold (0.0-1.0)
    """
    self.threshold = threshold

`repr()`

Detailed string representation.

Source code in stickler/comparators/base.py

def __repr__(self) -> str:
    """Detailed string representation."""
    return f"{self.__class__.__name__}(threshold={self.threshold})"

`str()`

String representation for serialization.

Source code in stickler/comparators/base.py

def __str__(self) -> str:
    """String representation for serialization."""
    return self.__class__.__name__

`binary_compare(str1, str2)`

Compare two values and return a binary result as (tp, fp) tuple.

This method converts the continuous similarity score to a binary decision based on the threshold. If the similarity is greater than or equal to the threshold, it returns (1, 0) indicating true positive. Otherwise, it returns (0, 1) indicating false positive.

Parameters:

Name	Type	Description	Default
`str1`	`Any`	First value	required
`str2`	`Any`	Second value	required

Returns:

Type	Description
`int`	Tuple of (tp, fp) where tp is 1 if similar, 0 otherwise,
`int`	and fp is the opposite

Source code in stickler/comparators/base.py

def binary_compare(self, str1: Any, str2: Any) -> Tuple[int, int]:
    """Compare two values and return a binary result as (tp, fp) tuple.

    This method converts the continuous similarity score to a binary decision
    based on the threshold. If the similarity is greater than or equal to the
    threshold, it returns (1, 0) indicating true positive. Otherwise, it returns
    (0, 1) indicating false positive.

    Args:
        str1: First value
        str2: Second value

    Returns:
        Tuple of (tp, fp) where tp is 1 if similar, 0 otherwise,
        and fp is the opposite
    """
    score = self.compare(str1, str2)
    if score >= self.threshold:
        return (1, 0)  # True positive
    else:
        return (0, 1)  # False positive

`compare(str1, str2)` `abstractmethod`

Compare two values and return a similarity score.

Parameters:

Name	Type	Description	Default
`str1`	`Any`	First value	required
`str2`	`Any`	Second value	required

Returns:

Type	Description
`float`	Similarity score between 0.0 and 1.0

Source code in stickler/comparators/base.py

@abstractmethod
def compare(self, str1: Any, str2: Any) -> float:
    """Compare two values and return a similarity score.

    Args:
        str1: First value
        str2: Second value

    Returns:
        Similarity score between 0.0 and 1.0
    """
    pass

`stickler.comparators.ExactComparator`

Bases: BaseComparator

Comparator that checks for exact string matching.

This comparator removes whitespace and punctuation before comparison. It returns 1.0 for exact matches and 0.0 otherwise.

Example

comparator = ExactComparator()

# Returns 1.0 (exact match after normalization)
comparator.compare("hello, world!", "hello world")

# Returns 0.0 (different strings)
comparator.compare("hello", "goodbye")

Source code in stickler/comparators/exact.py

class ExactComparator(BaseComparator):
    """Comparator that checks for exact string matching.

    This comparator removes whitespace and punctuation before comparison.
    It returns 1.0 for exact matches and 0.0 otherwise.

    Example:
        ```python
        comparator = ExactComparator()

        # Returns 1.0 (exact match after normalization)
        comparator.compare("hello, world!", "hello world")

        # Returns 0.0 (different strings)
        comparator.compare("hello", "goodbye")
        ```
    """

    def __init__(self, threshold: float = 1.0, case_sensitive: bool = False):
        """Initialize the comparator.

        Args:
            threshold: Similarity threshold (default 1.0)
            case_sensitive: Whether comparison is case sensitive (default False)
        """
        super().__init__(threshold=threshold)
        self.case_sensitive = case_sensitive

    def compare(self, str1: Any, str2: Any) -> float:
        """Compare two values with exact string matching.

        Args:
            str1: First value
            str2: Second value

        Returns:
            1.0 if the strings match exactly after normalization, 0.0 otherwise
        """
        if str1 is None and str2 is None:
            return 1.0
        if str1 is None or str2 is None:
            return 0.0

        # Convert to strings if they aren't already
        str1 = str(str1)
        str2 = str(str2)

        # Apply case normalization if needed
        if not self.case_sensitive:
            str1 = lowercase(str1)
            str2 = lowercase(str2)

        # Remove whitespace and punctuation
        normalized1 = strip_punctuation_space(str1)
        normalized2 = strip_punctuation_space(str2)

        # Compare normalized strings
        return 1.0 if normalized1 == normalized2 else 0.0

`init(threshold=1.0, case_sensitive=False)`

Initialize the comparator.

Parameters:

Name	Type	Description	Default
`threshold`	`float`	Similarity threshold (default 1.0)	`1.0`
`case_sensitive`	`bool`	Whether comparison is case sensitive (default False)	`False`

Source code in stickler/comparators/exact.py

def __init__(self, threshold: float = 1.0, case_sensitive: bool = False):
    """Initialize the comparator.

    Args:
        threshold: Similarity threshold (default 1.0)
        case_sensitive: Whether comparison is case sensitive (default False)
    """
    super().__init__(threshold=threshold)
    self.case_sensitive = case_sensitive

`compare(str1, str2)`

Compare two values with exact string matching.

Parameters:

Name	Type	Description	Default
`str1`	`Any`	First value	required
`str2`	`Any`	Second value	required

Returns:

Type	Description
`float`	1.0 if the strings match exactly after normalization, 0.0 otherwise

Source code in stickler/comparators/exact.py

def compare(self, str1: Any, str2: Any) -> float:
    """Compare two values with exact string matching.

    Args:
        str1: First value
        str2: Second value

    Returns:
        1.0 if the strings match exactly after normalization, 0.0 otherwise
    """
    if str1 is None and str2 is None:
        return 1.0
    if str1 is None or str2 is None:
        return 0.0

    # Convert to strings if they aren't already
    str1 = str(str1)
    str2 = str(str2)

    # Apply case normalization if needed
    if not self.case_sensitive:
        str1 = lowercase(str1)
        str2 = lowercase(str2)

    # Remove whitespace and punctuation
    normalized1 = strip_punctuation_space(str1)
    normalized2 = strip_punctuation_space(str2)

    # Compare normalized strings
    return 1.0 if normalized1 == normalized2 else 0.0

`stickler.comparators.LevenshteinComparator`

Bases: BaseComparator

Comparator using Levenshtein distance for string similarity.

This class implements the Levenshtein distance algorithm for measuring the difference between two strings. It calculates a normalized similarity score between 0 and 1.

Source code in stickler/comparators/levenshtein.py

class LevenshteinComparator(BaseComparator):
    """Comparator using Levenshtein distance for string similarity.

    This class implements the Levenshtein distance algorithm for measuring
    the difference between two strings. It calculates a normalized similarity
    score between 0 and 1.
    """

    def __init__(self, normalize: bool = True, threshold: float = 0.7):
        """Initialize the comparator.

        Args:
            normalize: Whether to normalize input strings
                      (strip whitespace, lowercase) before comparison
            threshold: Similarity threshold (default 0.7)
        """
        super().__init__(threshold=threshold)
        self._normalize = normalize

    @property
    def name(self) -> str:
        """Return the name of the comparator."""
        return "levenshtein"

    @property
    def config(self) -> Optional[Dict[str, Any]]:
        """Return configuration parameters."""
        return {"normalize": self._normalize}

    def compare(self, s1: Any, s2: Any) -> float:
        """
        Compare two strings using Levenshtein distance.

        Args:
            s1: First string or value
            s2: Second string or value

        Returns:
            Similarity score between 0.0 and 1.0, with 1.0 indicating identical

        Raises:
            TypeError: If either input is a dictionary, as dictionaries are not suitable
                      for Levenshtein distance comparison and should be handled through
                      structured models instead.
        """
        # Reject dictionaries - they should be broken down into proper StructuredModel subclasses
        if isinstance(s1, dict) or isinstance(s2, dict):
            raise TypeError(
                "Dictionary objects cannot be compared using LevenshteinComparator. "
                "Use a StructuredModel subclass with properly defined fields instead."
            )

        # Convert to strings and handle None values
        s1 = "" if s1 is None else str(s1)
        s2 = "" if s2 is None else str(s2)

        # Normalize strings if enabled
        if self._normalize:
            s1 = " ".join(s1.strip().lower().split())
            s2 = " ".join(s2.strip().lower().split())

        # Handle empty strings
        if not s1 and not s2:
            return 1.0

        # Calculate Levenshtein distance
        dist = self._levenshtein_distance(s1, s2)
        str_length = max(len(s1), len(s2))

        if str_length == 0:
            return 1.0

        # Convert distance to similarity (1.0 - normalized_distance)
        return 1.0 - (float(dist) / float(str_length))

    @staticmethod
    def _levenshtein_distance(s1: str, s2: str) -> int:
        """
        Calculate the Levenshtein distance between two strings.

        Args:
            s1: First string
            s2: Second string

        Returns:
            The Levenshtein distance as an integer
        """
        if len(s1) > len(s2):
            s1, s2 = s2, s1

        distances = range(len(s1) + 1)
        for i2, c2 in enumerate(s2):
            distances_ = [i2 + 1]
            for i1, c1 in enumerate(s1):
                if c1 == c2:
                    distances_.append(distances[i1])
                else:
                    distances_.append(
                        1 + min((distances[i1], distances[i1 + 1], distances_[-1]))
                    )
            distances = distances_
        return distances[-1]

`config` `property`

Return configuration parameters.

`name` `property`

Return the name of the comparator.

`init(normalize=True, threshold=0.7)`

Initialize the comparator.

Parameters:

Name	Type	Description	Default
`normalize`	`bool`	Whether to normalize input strings (strip whitespace, lowercase) before comparison	`True`
`threshold`	`float`	Similarity threshold (default 0.7)	`0.7`

Source code in stickler/comparators/levenshtein.py

def __init__(self, normalize: bool = True, threshold: float = 0.7):
    """Initialize the comparator.

    Args:
        normalize: Whether to normalize input strings
                  (strip whitespace, lowercase) before comparison
        threshold: Similarity threshold (default 0.7)
    """
    super().__init__(threshold=threshold)
    self._normalize = normalize

`compare(s1, s2)`

Compare two strings using Levenshtein distance.

Parameters:

Name	Type	Description	Default
`s1`	`Any`	First string or value	required
`s2`	`Any`	Second string or value	required

Returns:

Type	Description
`float`	Similarity score between 0.0 and 1.0, with 1.0 indicating identical

Raises:

Type	Description
`TypeError`	If either input is a dictionary, as dictionaries are not suitable for Levenshtein distance comparison and should be handled through structured models instead.

Source code in stickler/comparators/levenshtein.py

def compare(self, s1: Any, s2: Any) -> float:
    """
    Compare two strings using Levenshtein distance.

    Args:
        s1: First string or value
        s2: Second string or value

    Returns:
        Similarity score between 0.0 and 1.0, with 1.0 indicating identical

    Raises:
        TypeError: If either input is a dictionary, as dictionaries are not suitable
                  for Levenshtein distance comparison and should be handled through
                  structured models instead.
    """
    # Reject dictionaries - they should be broken down into proper StructuredModel subclasses
    if isinstance(s1, dict) or isinstance(s2, dict):
        raise TypeError(
            "Dictionary objects cannot be compared using LevenshteinComparator. "
            "Use a StructuredModel subclass with properly defined fields instead."
        )

    # Convert to strings and handle None values
    s1 = "" if s1 is None else str(s1)
    s2 = "" if s2 is None else str(s2)

    # Normalize strings if enabled
    if self._normalize:
        s1 = " ".join(s1.strip().lower().split())
        s2 = " ".join(s2.strip().lower().split())

    # Handle empty strings
    if not s1 and not s2:
        return 1.0

    # Calculate Levenshtein distance
    dist = self._levenshtein_distance(s1, s2)
    str_length = max(len(s1), len(s2))

    if str_length == 0:
        return 1.0

    # Convert distance to similarity (1.0 - normalized_distance)
    return 1.0 - (float(dist) / float(str_length))

`stickler.comparators.NumericComparator`

Bases: BaseComparator

Comparator for numeric values with configurable tolerance.

This comparator extracts and compares numeric values from strings or numbers. It supports relative and absolute tolerance for comparison.

Example

# Default exact matching
exact = NumericComparator()
exact.compare("123", "123.0")  # Returns 1.0
exact.compare("123", "124")    # Returns 0.0

# With tolerance
approx = NumericComparator(relative_tolerance=0.1)  # 10% tolerance
approx.compare("100", "109")   # Returns 1.0 (within 10%)
approx.compare("100", "111")   # Returns 0.0 (beyond 10%)

Source code in stickler/comparators/numeric.py

class NumericComparator(BaseComparator):
    """Comparator for numeric values with configurable tolerance.

    This comparator extracts and compares numeric values from strings or numbers.
    It supports relative and absolute tolerance for comparison.

    Example:
        ```python
        # Default exact matching
        exact = NumericComparator()
        exact.compare("123", "123.0")  # Returns 1.0
        exact.compare("123", "124")    # Returns 0.0

        # With tolerance
        approx = NumericComparator(relative_tolerance=0.1)  # 10% tolerance
        approx.compare("100", "109")   # Returns 1.0 (within 10%)
        approx.compare("100", "111")   # Returns 0.0 (beyond 10%)
        ```
    """

    def __init__(
        self,
        threshold: float = 1.0,
        relative_tolerance: float = 0.0,
        absolute_tolerance: float = 0.0,
        tolerance: Optional[float] = None,
    ):
        """Initialize the comparator.

        Args:
            threshold: Similarity threshold (default 1.0)
            relative_tolerance: Relative tolerance for comparison (default 0.0)
            absolute_tolerance: Absolute tolerance for comparison (default 0.0)
            tolerance: Alias for absolute_tolerance (for backward compatibility)
        """
        super().__init__(threshold=threshold)
        self.relative_tolerance = relative_tolerance

        # Handle tolerance alias for backward compatibility
        if tolerance is not None:
            if absolute_tolerance != 0.0:
                raise ValueError(
                    "Cannot specify both 'tolerance' and 'absolute_tolerance'. Use 'absolute_tolerance'."
                )
            self.absolute_tolerance = tolerance
        else:
            self.absolute_tolerance = absolute_tolerance

    def compare(self, str1: Any, str2: Any) -> float:
        """Compare two values numerically.

        Args:
            str1: First value
            str2: Second value

        Returns:
            1.0 if the numbers match within tolerance, 0.0 otherwise
        """
        if str1 is None and str2 is None:
            return 1.0
        if str1 is None or str2 is None:
            return 0.0

        # Extract numeric values
        num1 = self._extract_number(str1)
        num2 = self._extract_number(str2)

        if num1 is None or num2 is None:
            return 0.0

        # Check equality with tolerance
        if self._numbers_equal(num1, num2):
            return 1.0

        return 0.0

    def _extract_number(self, value: Any) -> Union[Decimal, None]:
        """Extract a numeric value from a string or number.

        Args:
            value: Value to extract a number from

        Returns:
            Decimal value or None if no valid number could be extracted
        """
        if isinstance(value, (int, float)):
            return Decimal(str(value))

        if not isinstance(value, str):
            value = str(value)

        # Check for accounting notation: (123) means -123
        is_negative = False
        if value.startswith("(") and value.endswith(")"):
            value = value[1:-1]  # Remove the parentheses
            is_negative = True

        # Remove common currency symbols and other non-numeric characters
        value = re.sub(r"[^0-9.-]", "", value)

        # Handle empty string
        if not value:
            return None

        # Try to convert to Decimal
        try:
            decimal_value = Decimal(value)
            # Apply negative sign if accounting notation was used
            if is_negative:
                decimal_value = -decimal_value
            return decimal_value
        except InvalidOperation:
            return None

    def _numbers_equal(self, num1: Decimal, num2: Decimal) -> bool:
        """Check if two numbers are equal within tolerance.

        Args:
            num1: First number
            num2: Second number

        Returns:
            True if numbers are equal within tolerance, False otherwise
        """
        if num1 == num2:
            return True

        # Check with relative tolerance
        if self.relative_tolerance > 0:
            # Handle zero case
            if num1 == 0:
                return abs(num2) <= self.relative_tolerance

            # Calculate relative difference using num1 as base
            relative_diff = abs(num1 - num2) / abs(num1)
            if relative_diff <= self.relative_tolerance:
                return True

        # Check with absolute tolerance
        if self.absolute_tolerance > 0:
            if abs(num1 - num2) <= self.absolute_tolerance:
                return True

        return False

`init(threshold=1.0, relative_tolerance=0.0, absolute_tolerance=0.0, tolerance=None)`

Initialize the comparator.

Parameters:

Name	Type	Description	Default
`threshold`	`float`	Similarity threshold (default 1.0)	`1.0`
`relative_tolerance`	`float`	Relative tolerance for comparison (default 0.0)	`0.0`
`absolute_tolerance`	`float`	Absolute tolerance for comparison (default 0.0)	`0.0`
`tolerance`	`Optional[float]`	Alias for absolute_tolerance (for backward compatibility)	`None`

Source code in stickler/comparators/numeric.py

def __init__(
    self,
    threshold: float = 1.0,
    relative_tolerance: float = 0.0,
    absolute_tolerance: float = 0.0,
    tolerance: Optional[float] = None,
):
    """Initialize the comparator.

    Args:
        threshold: Similarity threshold (default 1.0)
        relative_tolerance: Relative tolerance for comparison (default 0.0)
        absolute_tolerance: Absolute tolerance for comparison (default 0.0)
        tolerance: Alias for absolute_tolerance (for backward compatibility)
    """
    super().__init__(threshold=threshold)
    self.relative_tolerance = relative_tolerance

    # Handle tolerance alias for backward compatibility
    if tolerance is not None:
        if absolute_tolerance != 0.0:
            raise ValueError(
                "Cannot specify both 'tolerance' and 'absolute_tolerance'. Use 'absolute_tolerance'."
            )
        self.absolute_tolerance = tolerance
    else:
        self.absolute_tolerance = absolute_tolerance

`compare(str1, str2)`

Compare two values numerically.

Parameters:

Name	Type	Description	Default
`str1`	`Any`	First value	required
`str2`	`Any`	Second value	required

Returns:

Type	Description
`float`	1.0 if the numbers match within tolerance, 0.0 otherwise

Source code in stickler/comparators/numeric.py

def compare(self, str1: Any, str2: Any) -> float:
    """Compare two values numerically.

    Args:
        str1: First value
        str2: Second value

    Returns:
        1.0 if the numbers match within tolerance, 0.0 otherwise
    """
    if str1 is None and str2 is None:
        return 1.0
    if str1 is None or str2 is None:
        return 0.0

    # Extract numeric values
    num1 = self._extract_number(str1)
    num2 = self._extract_number(str2)

    if num1 is None or num2 is None:
        return 0.0

    # Check equality with tolerance
    if self._numbers_equal(num1, num2):
        return 1.0

    return 0.0

`stickler.comparators.NumericExactC = NumericComparator` `module-attribute`

`stickler.comparators.FuzzyComparator`

Bases: BaseComparator

Comparator for fuzzy string matching.

This comparator uses the rapidfuzz library to calculate similarity between strings using advanced Levenshtein distance calculations. It provides better fuzzy matching than basic Levenshtein for many use cases.

If rapidfuzz is not available, this will raise an ImportError when instantiated.

Source code in stickler/comparators/fuzzy.py

class FuzzyComparator(BaseComparator):
    """Comparator for fuzzy string matching.

    This comparator uses the rapidfuzz library to calculate similarity between
    strings using advanced Levenshtein distance calculations. It provides better
    fuzzy matching than basic Levenshtein for many use cases.

    If rapidfuzz is not available, this will raise an ImportError when instantiated.
    """

    def __init__(
        self, method: str = "ratio", normalize: bool = True, threshold: float = 0.7
    ):
        """Initialize the fuzzy comparator.

        Args:
            method: The fuzzy matching method to use. Options:
                - "ratio": Standard Levenshtein distance ratio
                - "partial_ratio": Partial string matching
                - "token_sort_ratio": Token-based matching with sorting
                - "token_set_ratio": Token-based matching with set operations
            normalize: Whether to normalize input strings before comparison
                      (strip whitespace, lowercase)
            threshold: Similarity threshold (default 0.7)

        Raises:
            ImportError: If rapidfuzz library is not available
        """
        super().__init__(threshold=threshold)

        if not RAPIDFUZZ_AVAILABLE:
            raise ImportError(
                "The rapidfuzz library is required for FuzzyComparator. "
                "Install it with: pip install rapidfuzz"
            )

        self._method = method
        self._normalize = normalize

        # Select the appropriate fuzzy matching function
        self._fuzzy_func = {
            "ratio": fuzz.ratio,
            "partial_ratio": fuzz.partial_ratio,
            "token_sort_ratio": fuzz.token_sort_ratio,
            "token_set_ratio": fuzz.token_set_ratio,
        }.get(method, fuzz.ratio)

    @property
    def name(self) -> str:
        """Return the name of the comparator."""
        return f"fuzzy_{self._method}"

    @property
    def config(self) -> Optional[Dict[str, Any]]:
        """Return configuration parameters."""
        return {"method": self._method, "normalize": self._normalize}

    def compare(self, value1: Any, value2: Any) -> float:
        """Compare two strings using fuzzy matching.

        Args:
            value1: First string or value
            value2: Second string or value

        Returns:
            Similarity score between 0.0 and 1.0
        """
        # Handle None values
        if value1 is None and value2 is None:
            return 1.0
        elif value1 is None or value2 is None:
            return 0.0

        # Convert to strings
        s1 = str(value1)
        s2 = str(value2)

        # Normalize if enabled
        if self._normalize:
            s1 = s1.strip().lower()
            s2 = s2.strip().lower()

        # Calculate fuzzy match score and normalize to 0.0-1.0
        if s1 == "" and s2 == "":
            return 1.0

        # Use the selected fuzzy matching function
        try:
            return self._fuzzy_func(s1, s2) / 100.0
        except Exception:
            # Fall back to basic comparison if fuzzy match fails
            return 1.0 if s1 == s2 else 0.0

`config` `property`

Return configuration parameters.

`name` `property`

Return the name of the comparator.

`init(method='ratio', normalize=True, threshold=0.7)`

Initialize the fuzzy comparator.

Parameters:

Name	Type	Description	Default
`method`	`str`	The fuzzy matching method to use. Options: - "ratio": Standard Levenshtein distance ratio - "partial_ratio": Partial string matching - "token_sort_ratio": Token-based matching with sorting - "token_set_ratio": Token-based matching with set operations	`'ratio'`
`normalize`	`bool`	Whether to normalize input strings before comparison (strip whitespace, lowercase)	`True`
`threshold`	`float`	Similarity threshold (default 0.7)	`0.7`

Raises:

Type	Description
`ImportError`	If rapidfuzz library is not available

Source code in stickler/comparators/fuzzy.py

def __init__(
    self, method: str = "ratio", normalize: bool = True, threshold: float = 0.7
):
    """Initialize the fuzzy comparator.

    Args:
        method: The fuzzy matching method to use. Options:
            - "ratio": Standard Levenshtein distance ratio
            - "partial_ratio": Partial string matching
            - "token_sort_ratio": Token-based matching with sorting
            - "token_set_ratio": Token-based matching with set operations
        normalize: Whether to normalize input strings before comparison
                  (strip whitespace, lowercase)
        threshold: Similarity threshold (default 0.7)

    Raises:
        ImportError: If rapidfuzz library is not available
    """
    super().__init__(threshold=threshold)

    if not RAPIDFUZZ_AVAILABLE:
        raise ImportError(
            "The rapidfuzz library is required for FuzzyComparator. "
            "Install it with: pip install rapidfuzz"
        )

    self._method = method
    self._normalize = normalize

    # Select the appropriate fuzzy matching function
    self._fuzzy_func = {
        "ratio": fuzz.ratio,
        "partial_ratio": fuzz.partial_ratio,
        "token_sort_ratio": fuzz.token_sort_ratio,
        "token_set_ratio": fuzz.token_set_ratio,
    }.get(method, fuzz.ratio)

`compare(value1, value2)`

Compare two strings using fuzzy matching.

Parameters:

Name	Type	Description	Default
`value1`	`Any`	First string or value	required
`value2`	`Any`	Second string or value	required

Returns:

Type	Description
`float`	Similarity score between 0.0 and 1.0

Source code in stickler/comparators/fuzzy.py

def compare(self, value1: Any, value2: Any) -> float:
    """Compare two strings using fuzzy matching.

    Args:
        value1: First string or value
        value2: Second string or value

    Returns:
        Similarity score between 0.0 and 1.0
    """
    # Handle None values
    if value1 is None and value2 is None:
        return 1.0
    elif value1 is None or value2 is None:
        return 0.0

    # Convert to strings
    s1 = str(value1)
    s2 = str(value2)

    # Normalize if enabled
    if self._normalize:
        s1 = s1.strip().lower()
        s2 = s2.strip().lower()

    # Calculate fuzzy match score and normalize to 0.0-1.0
    if s1 == "" and s2 == "":
        return 1.0

    # Use the selected fuzzy matching function
    try:
        return self._fuzzy_func(s1, s2) / 100.0
    except Exception:
        # Fall back to basic comparison if fuzzy match fails
        return 1.0 if s1 == s2 else 0.0

`stickler.comparators.BERTComparator`

Bases: BaseComparator

Comparator that uses BERT embeddings for semantic similarity.

This comparator uses the BERTScore metric to calculate semantic similarity between strings, returning the f1 score as the similarity measure.

Example

comparator = BERTComparator(threshold=0.8)

# Returns similarity score based on semantic similarity
score = comparator.compare("The cat sat on the mat", "A feline was sitting on a rug")

Source code in stickler/comparators/bert.py

class BERTComparator(BaseComparator):
    """Comparator that uses BERT embeddings for semantic similarity.

    This comparator uses the BERTScore metric to calculate semantic similarity
    between strings, returning the f1 score as the similarity measure.

    Example:
        ```python
        comparator = BERTComparator(threshold=0.8)

        # Returns similarity score based on semantic similarity
        score = comparator.compare("The cat sat on the mat", "A feline was sitting on a rug")
        ```
    """

    def __init__(self, threshold: float = 0.7):
        """Initialize the BERTComparator.

        Args:
            threshold: Similarity threshold (0.0-1.0)
        """
        super().__init__(threshold=threshold)
        if model is None:
            raise ImportError(
                "BERTScore model could not be loaded. Please install 'evaluate' package."
            )

    def compare(self, str1: Any, str2: Any) -> float:
        """Compare two strings using BERT semantic similarity.

        Args:
            str1: First string
            str2: Second string

        Returns:
            Similarity score between 0.0 and 1.0 based on BERTScore f1
        """
        if str1 is None or str2 is None:
            return 0.0

        # Convert to strings if they aren't already
        str1 = str(str1)
        str2 = str(str2)

        # Strip punctuation and whitespace
        str1_clean = strip_punctuation_space(str1)
        str2_clean = strip_punctuation_space(str2)

        # Handle empty strings
        if not str1_clean or not str2_clean:
            return 1.0 if str1_clean == str2_clean else 0.0

        try:
            # Calculate BERT score
            result = model.compute(
                predictions=[str1_clean], references=[str2_clean], lang="en"
            )

            # Return f1 score
            return result["f1"][0]
        except Exception as e:
            # Fallback to direct comparison
            print(f"BERT comparison error: {str(e)}")
            return 1.0 if str1_clean == str2_clean else 0.0

`init(threshold=0.7)`

Initialize the BERTComparator.

Parameters:

Name	Type	Description	Default
`threshold`	`float`	Similarity threshold (0.0-1.0)	`0.7`

Source code in stickler/comparators/bert.py

def __init__(self, threshold: float = 0.7):
    """Initialize the BERTComparator.

    Args:
        threshold: Similarity threshold (0.0-1.0)
    """
    super().__init__(threshold=threshold)
    if model is None:
        raise ImportError(
            "BERTScore model could not be loaded. Please install 'evaluate' package."
        )

`compare(str1, str2)`

Compare two strings using BERT semantic similarity.

Parameters:

Name	Type	Description	Default
`str1`	`Any`	First string	required
`str2`	`Any`	Second string	required

Returns:

Type	Description
`float`	Similarity score between 0.0 and 1.0 based on BERTScore f1

Source code in stickler/comparators/bert.py

def compare(self, str1: Any, str2: Any) -> float:
    """Compare two strings using BERT semantic similarity.

    Args:
        str1: First string
        str2: Second string

    Returns:
        Similarity score between 0.0 and 1.0 based on BERTScore f1
    """
    if str1 is None or str2 is None:
        return 0.0

    # Convert to strings if they aren't already
    str1 = str(str1)
    str2 = str(str2)

    # Strip punctuation and whitespace
    str1_clean = strip_punctuation_space(str1)
    str2_clean = strip_punctuation_space(str2)

    # Handle empty strings
    if not str1_clean or not str2_clean:
        return 1.0 if str1_clean == str2_clean else 0.0

    try:
        # Calculate BERT score
        result = model.compute(
            predictions=[str1_clean], references=[str2_clean], lang="en"
        )

        # Return f1 score
        return result["f1"][0]
    except Exception as e:
        # Fallback to direct comparison
        print(f"BERT comparison error: {str(e)}")
        return 1.0 if str1_clean == str2_clean else 0.0

`stickler.comparators.SemanticComparator`

Bases: BaseComparator

Comparator that uses embeddings for semantic similarity.

This comparator uses embeddings from a model (default: Titan) to calculate semantic similarity between strings.

Attributes:

Name	Type	Description
`SIMILARITY_FUNCTIONS`		Dictionary of similarity functions
`bc`		BedrockClient instance
`model_id`		Model ID to use for embeddings
`embedding_function`		Function to generate embeddings
`sim_function`		Name of the similarity function to use
`similarity_function`		The actual similarity function

Source code in stickler/comparators/semantic.py

class SemanticComparator(BaseComparator):
    """Comparator that uses embeddings for semantic similarity.

    This comparator uses embeddings from a model (default: Titan) to calculate
    semantic similarity between strings.

    Attributes:
        SIMILARITY_FUNCTIONS: Dictionary of similarity functions
        bc: BedrockClient instance
        model_id: Model ID to use for embeddings
        embedding_function: Function to generate embeddings
        sim_function: Name of the similarity function to use
        similarity_function: The actual similarity function
    """

    SIMILARITY_FUNCTIONS = {
        "cosine_similarity": lambda x, y: 1 - spatial.distance.cosine(x, y)
    }

    def __init__(
        self,
        model_id: str = "amazon.titan-embed-text-v2:0",
        sim_function: str = "cosine_similarity",
        embedding_function: Optional[Callable] = None,
        threshold: float = 0.7,
    ):
        """Initialize the SemanticComparator.

        Args:
            model_id: Model ID to use for embeddings
            sim_function: Name of the similarity function to use
            embedding_function: Optional custom embedding function
            threshold: Similarity threshold (0.0-1.0)

        Raises:
            ImportError: If BedrockClient is not available and no embedding_function is provided
        """
        super().__init__(threshold=threshold)

        if embedding_function is not None:
            self.embedding_function = embedding_function
        else:
            self.model_id = (model_id,)
            self.embedding_function = partial(
                generate_bedrock_embedding, model_id=model_id
            )

        self.sim_function = sim_function
        self.similarity_function = self.SIMILARITY_FUNCTIONS[self.sim_function]

    def compare(self, str1: str, str2: str) -> float:
        """Compare two strings using semantic similarity.

        Args:
            str1: First string
            str2: Second string

        Returns:
            Similarity score between 0.0 and 1.0
        """
        if str1 is None or str2 is None:
            return 0.0

        try:
            x, y = self.embedding_function(str1), self.embedding_function(str2)
            return self.similarity_function(x, y)
        except Exception:
            # Fallback to string equality if embedding fails
            return 1.0 if str1 == str2 else 0.0

`init(model_id='amazon.titan-embed-text-v2:0', sim_function='cosine_similarity', embedding_function=None, threshold=0.7)`

Initialize the SemanticComparator.

Parameters:

Name	Type	Description	Default
`model_id`	`str`	Model ID to use for embeddings	`'amazon.titan-embed-text-v2:0'`
`sim_function`	`str`	Name of the similarity function to use	`'cosine_similarity'`
`embedding_function`	`Optional[Callable]`	Optional custom embedding function	`None`
`threshold`	`float`	Similarity threshold (0.0-1.0)	`0.7`

Raises:

Type	Description
`ImportError`	If BedrockClient is not available and no embedding_function is provided

Source code in stickler/comparators/semantic.py

def __init__(
    self,
    model_id: str = "amazon.titan-embed-text-v2:0",
    sim_function: str = "cosine_similarity",
    embedding_function: Optional[Callable] = None,
    threshold: float = 0.7,
):
    """Initialize the SemanticComparator.

    Args:
        model_id: Model ID to use for embeddings
        sim_function: Name of the similarity function to use
        embedding_function: Optional custom embedding function
        threshold: Similarity threshold (0.0-1.0)

    Raises:
        ImportError: If BedrockClient is not available and no embedding_function is provided
    """
    super().__init__(threshold=threshold)

    if embedding_function is not None:
        self.embedding_function = embedding_function
    else:
        self.model_id = (model_id,)
        self.embedding_function = partial(
            generate_bedrock_embedding, model_id=model_id
        )

    self.sim_function = sim_function
    self.similarity_function = self.SIMILARITY_FUNCTIONS[self.sim_function]

`compare(str1, str2)`

Compare two strings using semantic similarity.

Parameters:

Name	Type	Description	Default
`str1`	`str`	First string	required
`str2`	`str`	Second string	required

Returns:

Type	Description
`float`	Similarity score between 0.0 and 1.0

Source code in stickler/comparators/semantic.py

def compare(self, str1: str, str2: str) -> float:
    """Compare two strings using semantic similarity.

    Args:
        str1: First string
        str2: Second string

    Returns:
        Similarity score between 0.0 and 1.0
    """
    if str1 is None or str2 is None:
        return 0.0

    try:
        x, y = self.embedding_function(str1), self.embedding_function(str2)
        return self.similarity_function(x, y)
    except Exception:
        # Fallback to string equality if embedding fails
        return 1.0 if str1 == str2 else 0.0

`stickler.comparators.LLMComparator`

Bases: BaseComparator

Comparator that uses LLM to determine semantic equivalence.

This comparator uses an LLM to determine if two values are semantically equivalent, returning 1.0 if True and 0.0 if False.

Attributes:

Name	Type	Description
`prompt`		Prompt template to use for comparison
`model_id`		Model ID to use for LLM
`temp`		Temperature for LLM inference
`system_prompt`		System prompt for the LLM

Source code in stickler/comparators/llm.py

class LLMComparator(BaseComparator):
    """Comparator that uses LLM to determine semantic equivalence.

    This comparator uses an LLM to determine if two values are semantically
    equivalent, returning 1.0 if True and 0.0 if False.

    Attributes:
        prompt: Prompt template to use for comparison
        model_id: Model ID to use for LLM
        temp: Temperature for LLM inference
        system_prompt: System prompt for the LLM
    """

    def __init__(
        self, prompt: str, model_id: str, temp: float = 0.5, threshold: float = 0.5
    ):
        """Initialize the LLMComparator.

        Args:
            prompt: Prompt template to use for comparison
            model_id: Model ID to use for LLM
            temp: Temperature for LLM inference
            threshold: Similarity threshold (0.0-1.0)
        """
        super().__init__(threshold=threshold)
        self.prompt = prompt
        self.temp = temp
        self.model_id = model_id
        self.system_prompt = "You are an evaluation assistant. Carefully decide if the two values are same or not. Respond only with 'TRUE' or 'FALSE', nothing else."

    def compare(self, str1: Any, str2: Any) -> float:
        """Compare two values using LLM.

        Args:
            str1: First value
            str2: Second value

        Returns:
            1.0 if LLM determines values are equivalent, 0.0 otherwise
        """
        if str1 is None or str2 is None:
            return 0.0
        raise Exception("This implementation is not working yet!")
        ci = ClaudeInvoker(
            self.prompt,
            self.model_id,
            system_prompt=self.system_prompt,
            temperature=self.temp,
        )

        kwargs = {"value1": str1, "value2": str2}

        try:
            response = ci.inference(kwargs)
        except Exception as e:
            print(f"LLM error: {str(e)}")
            sleep(2)
            response = ci.inference(kwargs)

        result = response == "TRUE"
        if result:
            print(
                "WARNING: LLM evaluation returned True. Please refine the prompt or review the result."
            )
            return 1.0
        else:
            return 0.0

`init(prompt, model_id, temp=0.5, threshold=0.5)`

Initialize the LLMComparator.

Parameters:

Name	Type	Description	Default
`prompt`	`str`	Prompt template to use for comparison	required
`model_id`	`str`	Model ID to use for LLM	required
`temp`	`float`	Temperature for LLM inference	`0.5`
`threshold`	`float`	Similarity threshold (0.0-1.0)	`0.5`

Source code in stickler/comparators/llm.py

def __init__(
    self, prompt: str, model_id: str, temp: float = 0.5, threshold: float = 0.5
):
    """Initialize the LLMComparator.

    Args:
        prompt: Prompt template to use for comparison
        model_id: Model ID to use for LLM
        temp: Temperature for LLM inference
        threshold: Similarity threshold (0.0-1.0)
    """
    super().__init__(threshold=threshold)
    self.prompt = prompt
    self.temp = temp
    self.model_id = model_id
    self.system_prompt = "You are an evaluation assistant. Carefully decide if the two values are same or not. Respond only with 'TRUE' or 'FALSE', nothing else."

`compare(str1, str2)`

Compare two values using LLM.

Parameters:

Name	Type	Description	Default
`str1`	`Any`	First value	required
`str2`	`Any`	Second value	required

Returns:

Type	Description
`float`	1.0 if LLM determines values are equivalent, 0.0 otherwise

Source code in stickler/comparators/llm.py

def compare(self, str1: Any, str2: Any) -> float:
    """Compare two values using LLM.

    Args:
        str1: First value
        str2: Second value

    Returns:
        1.0 if LLM determines values are equivalent, 0.0 otherwise
    """
    if str1 is None or str2 is None:
        return 0.0
    raise Exception("This implementation is not working yet!")
    ci = ClaudeInvoker(
        self.prompt,
        self.model_id,
        system_prompt=self.system_prompt,
        temperature=self.temp,
    )

    kwargs = {"value1": str1, "value2": str2}

    try:
        response = ci.inference(kwargs)
    except Exception as e:
        print(f"LLM error: {str(e)}")
        sleep(2)
        response = ci.inference(kwargs)

    result = response == "TRUE"
    if result:
        print(
            "WARNING: LLM evaluation returned True. Please refine the prompt or review the result."
        )
        return 1.0
    else:
        return 0.0

`stickler.comparators.StructuredModelComparator`

Bases: BaseComparator

Comparator for structured model objects.

This comparator is designed to work with StructuredModel instances, leveraging their built-in comparison capabilities.

Source code in stickler/comparators/structured.py

class StructuredModelComparator(BaseComparator):
    """Comparator for structured model objects.

    This comparator is designed to work with StructuredModel instances,
    leveraging their built-in comparison capabilities.
    """

    def __init__(self, threshold: float = 0.7, strict_types: bool = False):
        """Initialize the comparator.

        Args:
            threshold: Similarity threshold (0.0-1.0)
            strict_types: If True, will raise TypeError when non-StructuredModel objects are compared
        """
        super().__init__(threshold)
        self.strict_types = strict_types

    def compare(self, model1: Any, model2: Any) -> float:
        """Compare two structured model instances.

        This method uses the built-in compare method of StructuredModel objects
        if available, otherwise falls back to basic equality comparison.

        Args:
            model1: First model (ideally a StructuredModel instance)
            model2: Second model (ideally a StructuredModel instance)

        Returns:
            Similarity score between 0.0 and 1.0

        Raises:
            TypeError: When strict_types=True and comparing non-StructuredModel objects
        """
        # In strict mode, enforce StructuredModel types (used in tests)
        # For string values, always raise TypeError in strict mode
        if self.strict_types and isinstance(model1, str) and isinstance(model2, str):
            raise TypeError(
                "StructuredModelComparator can only compare StructuredModel instances"
            )

        # Handle None values
        if model1 is None or model2 is None:
            return 1.0 if model1 == model2 else 0.0

        # Check if both objects have a compare method (duck typing)
        if hasattr(model1, "compare") and callable(model1.compare):
            return model1.compare(model2)

        # Fall back to equality check for non-StructuredModel objects
        return 1.0 if model1 == model2 else 0.0

`init(threshold=0.7, strict_types=False)`

Initialize the comparator.

Parameters:

Name	Type	Description	Default
`threshold`	`float`	Similarity threshold (0.0-1.0)	`0.7`
`strict_types`	`bool`	If True, will raise TypeError when non-StructuredModel objects are compared	`False`

Source code in stickler/comparators/structured.py

def __init__(self, threshold: float = 0.7, strict_types: bool = False):
    """Initialize the comparator.

    Args:
        threshold: Similarity threshold (0.0-1.0)
        strict_types: If True, will raise TypeError when non-StructuredModel objects are compared
    """
    super().__init__(threshold)
    self.strict_types = strict_types

`compare(model1, model2)`

Compare two structured model instances.

This method uses the built-in compare method of StructuredModel objects if available, otherwise falls back to basic equality comparison.

Parameters:

Name	Type	Description	Default
`model1`	`Any`	First model (ideally a StructuredModel instance)	required
`model2`	`Any`	Second model (ideally a StructuredModel instance)	required

Returns:

Type	Description
`float`	Similarity score between 0.0 and 1.0

Raises:

Type	Description
`TypeError`	When strict_types=True and comparing non-StructuredModel objects

Source code in stickler/comparators/structured.py

def compare(self, model1: Any, model2: Any) -> float:
    """Compare two structured model instances.

    This method uses the built-in compare method of StructuredModel objects
    if available, otherwise falls back to basic equality comparison.

    Args:
        model1: First model (ideally a StructuredModel instance)
        model2: Second model (ideally a StructuredModel instance)

    Returns:
        Similarity score between 0.0 and 1.0

    Raises:
        TypeError: When strict_types=True and comparing non-StructuredModel objects
    """
    # In strict mode, enforce StructuredModel types (used in tests)
    # For string values, always raise TypeError in strict mode
    if self.strict_types and isinstance(model1, str) and isinstance(model2, str):
        raise TypeError(
            "StructuredModelComparator can only compare StructuredModel instances"
        )

    # Handle None values
    if model1 is None or model2 is None:
        return 1.0 if model1 == model2 else 0.0

    # Check if both objects have a compare method (duck typing)
    if hasattr(model1, "compare") and callable(model1.compare):
        return model1.compare(model2)

    # Fall back to equality check for non-StructuredModel objects
    return 1.0 if model1 == model2 else 0.0

Comparators

stickler.comparators

stickler.comparators.BaseComparator

__call__(str1, str2)

__init__(threshold=0.7)

__repr__()

__str__()

binary_compare(str1, str2)

compare(str1, str2) abstractmethod

stickler.comparators.ExactComparator

__init__(threshold=1.0, case_sensitive=False)

compare(str1, str2)

stickler.comparators.LevenshteinComparator

config property

name property

__init__(normalize=True, threshold=0.7)

compare(s1, s2)

stickler.comparators.NumericComparator

__init__(threshold=1.0, relative_tolerance=0.0, absolute_tolerance=0.0, tolerance=None)

compare(str1, str2)

stickler.comparators.NumericExactC = NumericComparator module-attribute

stickler.comparators.FuzzyComparator

config property

name property

__init__(method='ratio', normalize=True, threshold=0.7)

compare(value1, value2)

stickler.comparators.BERTComparator

__init__(threshold=0.7)

compare(str1, str2)

stickler.comparators.SemanticComparator

__init__(model_id='amazon.titan-embed-text-v2:0', sim_function='cosine_similarity', embedding_function=None, threshold=0.7)

compare(str1, str2)

stickler.comparators.LLMComparator

__init__(prompt, model_id, temp=0.5, threshold=0.5)

compare(str1, str2)

stickler.comparators.StructuredModelComparator

__init__(threshold=0.7, strict_types=False)

compare(model1, model2)

`stickler.comparators`

`stickler.comparators.BaseComparator`

`call(str1, str2)`

`init(threshold=0.7)`

`repr()`

`str()`

`binary_compare(str1, str2)`

`compare(str1, str2)` `abstractmethod`

`stickler.comparators.ExactComparator`

`init(threshold=1.0, case_sensitive=False)`

`compare(str1, str2)`

`stickler.comparators.LevenshteinComparator`

`config` `property`

`name` `property`

`init(normalize=True, threshold=0.7)`

`compare(s1, s2)`

`stickler.comparators.NumericComparator`

`init(threshold=1.0, relative_tolerance=0.0, absolute_tolerance=0.0, tolerance=None)`

`compare(str1, str2)`

`stickler.comparators.NumericExactC = NumericComparator` `module-attribute`

`stickler.comparators.FuzzyComparator`

`config` `property`

`name` `property`

`init(method='ratio', normalize=True, threshold=0.7)`

`compare(value1, value2)`

`stickler.comparators.BERTComparator`

`init(threshold=0.7)`

`compare(str1, str2)`

`stickler.comparators.SemanticComparator`

`init(model_id='amazon.titan-embed-text-v2:0', sim_function='cosine_similarity', embedding_function=None, threshold=0.7)`

`compare(str1, str2)`

`stickler.comparators.LLMComparator`

`init(prompt, model_id, temp=0.5, threshold=0.5)`

`compare(str1, str2)`

`stickler.comparators.StructuredModelComparator`

`init(threshold=0.7, strict_types=False)`

`compare(model1, model2)`