Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/textdistance/algorithms/simple.py: 49%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

71 statements  

1from __future__ import annotations 

2 

3# built-in 

4from itertools import takewhile 

5from typing import Sequence 

6 

7# app 

8from .base import Base as _Base, BaseSimilarity as _BaseSimilarity 

9from .types import SimFunc 

10 

11 

12__all__ = [ 

13 'Prefix', 'Postfix', 'Length', 'Identity', 'Matrix', 

14 'prefix', 'postfix', 'length', 'identity', 'matrix', 

15] 

16 

17 

18class Prefix(_BaseSimilarity): 

19 """prefix similarity 

20 """ 

21 

22 def __init__(self, qval: int = 1, sim_test: SimFunc = None) -> None: 

23 self.qval = qval 

24 self.sim_test = sim_test or self._ident 

25 

26 def __call__(self, *sequences: Sequence) -> Sequence: 

27 if not sequences: 

28 return '' 

29 sequences = self._get_sequences(*sequences) 

30 

31 def test(seq): 

32 return self.sim_test(*seq) 

33 

34 result = [c[0] for c in takewhile(test, zip(*sequences))] 

35 

36 s = sequences[0] 

37 if isinstance(s, str): 

38 return ''.join(result) 

39 if isinstance(s, bytes): 

40 return b''.join(result) 

41 return result 

42 

43 def similarity(self, *sequences: Sequence) -> int: 

44 return len(self(*sequences)) 

45 

46 

47class Postfix(Prefix): 

48 """postfix similarity 

49 """ 

50 

51 def __call__(self, *sequences: Sequence) -> Sequence: 

52 s = sequences[0] 

53 sequences = [list(reversed(s)) for s in sequences] 

54 result = reversed(super().__call__(*sequences)) 

55 if isinstance(s, str): 

56 return ''.join(result) 

57 if isinstance(s, bytes): 

58 return b''.join(result) 

59 return list(result) 

60 

61 

62class Length(_Base): 

63 """Length distance 

64 """ 

65 

66 def __call__(self, *sequences: Sequence) -> int: 

67 lengths = list(map(len, sequences)) 

68 return max(lengths) - min(lengths) 

69 

70 

71class Identity(_BaseSimilarity): 

72 """Identity similarity 

73 """ 

74 

75 def maximum(self, *sequences: Sequence) -> int: 

76 return 1 

77 

78 def __call__(self, *sequences: Sequence) -> int: 

79 return int(self._ident(*sequences)) 

80 

81 

82class Matrix(_BaseSimilarity): 

83 """Matrix similarity 

84 """ 

85 

86 def __init__( 

87 self, 

88 mat=None, 

89 mismatch_cost: int = 0, 

90 match_cost: int = 1, 

91 symmetric: bool = True, 

92 external: bool = True, 

93 ) -> None: 

94 self.mat = mat 

95 self.mismatch_cost = mismatch_cost 

96 self.match_cost = match_cost 

97 self.symmetric = symmetric 

98 

99 def maximum(self, *sequences: Sequence) -> int: 

100 return self.match_cost 

101 

102 def __call__(self, *sequences: Sequence) -> int: 

103 if not self.mat: 

104 if self._ident(*sequences): 

105 return self.match_cost 

106 return self.mismatch_cost 

107 

108 # search in matrix 

109 if sequences in self.mat: 

110 return self.mat[sequences] 

111 # search in symmetric matrix 

112 if self.symmetric: 

113 sequences = tuple(reversed(sequences)) 

114 if sequences in self.mat: 

115 return self.mat[sequences] 

116 # if identity then return match_cost 

117 if self._ident(*sequences): 

118 return self.match_cost 

119 # not found 

120 return self.mismatch_cost 

121 

122 

123prefix = Prefix() 

124postfix = Postfix() 

125length = Length() 

126identity = Identity() 

127matrix = Matrix()