1from __future__ import annotations
2
3# built-in
4from itertools import takewhile
5from typing import Sequence
6
7# app
8from .base import Base as _Base, BaseSimilarity as _BaseSimilarity
9from .types import SimFunc
10
11
12__all__ = [
13 'Prefix', 'Postfix', 'Length', 'Identity', 'Matrix',
14 'prefix', 'postfix', 'length', 'identity', 'matrix',
15]
16
17
18class Prefix(_BaseSimilarity):
19 """prefix similarity
20 """
21
22 def __init__(self, qval: int = 1, sim_test: SimFunc = None) -> None:
23 self.qval = qval
24 self.sim_test = sim_test or self._ident
25
26 def __call__(self, *sequences: Sequence) -> Sequence:
27 if not sequences:
28 return ''
29 sequences = self._get_sequences(*sequences)
30
31 def test(seq):
32 return self.sim_test(*seq)
33
34 result = [c[0] for c in takewhile(test, zip(*sequences))]
35
36 s = sequences[0]
37 if isinstance(s, str):
38 return ''.join(result)
39 if isinstance(s, bytes):
40 return b''.join(result)
41 return result
42
43 def similarity(self, *sequences: Sequence) -> int:
44 return len(self(*sequences))
45
46
47class Postfix(Prefix):
48 """postfix similarity
49 """
50
51 def __call__(self, *sequences: Sequence) -> Sequence:
52 s = sequences[0]
53 sequences = [list(reversed(s)) for s in sequences]
54 result = reversed(super().__call__(*sequences))
55 if isinstance(s, str):
56 return ''.join(result)
57 if isinstance(s, bytes):
58 return b''.join(result)
59 return list(result)
60
61
62class Length(_Base):
63 """Length distance
64 """
65
66 def __call__(self, *sequences: Sequence) -> int:
67 lengths = list(map(len, sequences))
68 return max(lengths) - min(lengths)
69
70
71class Identity(_BaseSimilarity):
72 """Identity similarity
73 """
74
75 def maximum(self, *sequences: Sequence) -> int:
76 return 1
77
78 def __call__(self, *sequences: Sequence) -> int:
79 return int(self._ident(*sequences))
80
81
82class Matrix(_BaseSimilarity):
83 """Matrix similarity
84 """
85
86 def __init__(
87 self,
88 mat=None,
89 mismatch_cost: int = 0,
90 match_cost: int = 1,
91 symmetric: bool = True,
92 external: bool = True,
93 ) -> None:
94 self.mat = mat
95 self.mismatch_cost = mismatch_cost
96 self.match_cost = match_cost
97 self.symmetric = symmetric
98
99 def maximum(self, *sequences: Sequence) -> int:
100 return self.match_cost
101
102 def __call__(self, *sequences: Sequence) -> int:
103 if not self.mat:
104 if self._ident(*sequences):
105 return self.match_cost
106 return self.mismatch_cost
107
108 # search in matrix
109 if sequences in self.mat:
110 return self.mat[sequences]
111 # search in symmetric matrix
112 if self.symmetric:
113 sequences = tuple(reversed(sequences))
114 if sequences in self.mat:
115 return self.mat[sequences]
116 # if identity then return match_cost
117 if self._ident(*sequences):
118 return self.match_cost
119 # not found
120 return self.mismatch_cost
121
122
123prefix = Prefix()
124postfix = Postfix()
125length = Length()
126identity = Identity()
127matrix = Matrix()