Coverage for blind_charging/officer.py: 78%

1import itertools

2import re

3from collections import defaultdict

4from typing import DefaultDict, List, Optional

6from .individual import Individual, MergeDifferentPersonsError

7from .locale import Locale

8from .source_text import nlp

11def _get_name_pattern(

12 parts: List[Optional[str]], star: Optional[str] = None

13) -> Optional[str]:

14 """Get a regular expression that joins a set of parts.

16 :param parts: Parts to join with space pattern

17 :param star: Optional star part to append to the end

18 :returns: Regex, or None if there were no parts to join

19 """

20 non_null = filter(None, parts)

21 if not non_null:

22 return None

23 pfx = r"\s+".join(non_null)

24 if star:

25 return pfx + star

26 return pfx

29class OfficerName(Individual):

30 # TODO(itsmrlin): double check name regex to prevent catastrophic backtracking

31 # TODO(itsmrlin): automate capitalization variation

32 # officer title re

33 t_re = (

36 r"commissioner|comm\.?|FTO|PSA)\s+"

37 )

38 # 5 digit code re

39 dgt5_re = r"(\s|^)?$?[0-9][A-Z][0-9A-Z]{2,3}$?(\s|\.|$)"

40 # name regex

41 n_re = r"[A-Z][A-Za-z\-\']*\s*"

42 # star regex

43 star_re = r"(?:#\s*)([0-9]{3,5})\b"

45 def __init__(self, name):

46 ofc_str = name

47 self._dict = {"ofc_str": ofc_str}

48 self.dgt5_code = None

49 self.star = None

50 self.title = None

51 self.name = []

52 self.code_name = None

53 self.cls = ""

54 self.officer_titles = {

55 "OFFICER": "Officer",

56 "OFC": "Officer",

57 "OFF": "Officer",

58 "SERGEANT": "Sergeant",

59 "SGT": "Sergeant",

60 "INSPECTOR": "Inspector",

61 "INSP": "Inspector",

62 "SHERIFF": "Sheriff",

63 "COMMISSIONER": "Commissioner",

64 "COMM": "Commissioner",

65 "FTO": "FTO",

66 "PSA": "PSA",

67 }

68 # title to abbr.

69 self.t2abbr = {}

70 for k in self.officer_titles.keys():

71 if self.officer_titles[k] not in self.t2abbr:

72 self.t2abbr[self.officer_titles[k]] = [k]

73 else:

74 self.t2abbr[self.officer_titles[k]].append(k)

76 if re.search(OfficerName.star_re, ofc_str):

77 star_no = re.search(OfficerName.star_re, ofc_str).group(1)

78 self.star = str(star_no)

80 parts = ofc_str.split()

82 for pp in parts:

83 p = pp.upper().strip()

84 m = re.match(r"\b[A-Za-z\-\']+\b", p)

85 is_officer_title = p.strip(".") in self.officer_titles

86 if not is_officer_title and m and not nlp.vocab[p].is_stop:

87 p_clean = re.sub(r"[^A-Z]+$", "", p)

88 self.name.append(p_clean)

89 elif is_officer_title:

90 self.title = self.officer_titles[p.strip(".")]

91 elif re.match(OfficerName.dgt5_re, p) is not None:

92 self.dgt5_code = p.strip("(").strip(")")

94 if not self.name:

95 self.name = set()

96 else:

97 self.name = set(self.name)

99 def __eq__(self, other):

100 # dgt5_code is likely a shift number (including 2 officers) and

101 # we do not match officer based on that

102 # but we DO know if they are different then officers are different

103 if self.dgt5_code != other.dgt5_code:

104 return False

105

106 if self.star is not None and self.star == other.star:

107 return True

108

109 for n1 in self.name:

110 for n2 in other.name:

111 if n1 == n2:

112 return True

113

114 return False

115

116 def __hash__(self):

117 return hash(str(self))

118

119 def __str__(self):

120 return str(

121 {

122 "code_name": self.code_name,

123 "dgt5_code": self.dgt5_code,

124 "star": self.star,

125 "title": self.title,

126 "name": self.name,

127 }

128 )

129

130 def __repr__(self):

131 return self.__str__()

132

133 def get_indicator(self):

134 if self.code_name is None:

135 return "CODE NAME NOT ASSIGNED!"

136

137 return self.code_name

138

139 def to_dict(self):

140 return self._dict.copy()

141

142 def merge(self, other):

143 if self != other:

144 raise MergeDifferentPersonsError

145

146 if self.dgt5_code is None:

147 self.dgt5_code = other.dgt5_code

148

149 if self.star is None:

150 self.star = other.star

151

152 if self.title is None:

153 self.title = other.title

154

155 self.name = self.name.union(other.name)

156

157 def _name_rep_impl(self):

158 reps = set()

159 combined_names = [

160 r"\s+".join(x)

161 for i in range(1, 3)

162 for x in itertools.permutations(self.name, i)

163 ]

164

165 dgt5_code = None if self.dgt5_code is None else r"$?%s$?" % self.dgt5_code

166 # 1A23B

167 reps.add(dgt5_code)

168

169 if self.star:

170 star_no = self.star

171 star_regex = r"\s*#\s*" + star_no

172 reps.add(star_regex)

173 else:

174 star_regex = None

175

176 for n in combined_names:

177 # John Doe

178 reps.add(_get_name_pattern([n]))

179 # John Doe #1234

180 reps.add(_get_name_pattern([n], star_regex))

181 if dgt5_code:

182 # 1A23B John Doe #1234

183 reps.add(_get_name_pattern([dgt5_code, n], star_regex))

184

185 if self.title is not None:

186 for t in self.t2abbr[self.title]:

187 if self.star is not None:

188 # Officer #1234

189 reps.add(_get_name_pattern([t + r"\.?"], star_regex))

190 if dgt5_code:

191 # 1A23B Officer #1234

192 reps.add(_get_name_pattern([dgt5_code, t + r"\.?"], star_regex))

193

194 for n in combined_names:

195

196 for t in self.t2abbr[self.title]:

197 # officer john doe

198 reps.add(_get_name_pattern([t + r"\.?", n]))

199 # officer john doe #1234

200 reps.add(_get_name_pattern([t + r"\.?", n], star_regex))

201 if dgt5_code:

202 # 1a23b officer john doe #1234

203 reps.add(

204 _get_name_pattern([dgt5_code, t + r"\.?", n], star_regex)

205 )

206

207 if None in reps:

208 reps.remove(None)

209

210 reps = {r"\b%s\b" % x for x in reps}

211

212 # TODO(jnu): the longest pattern is not necessarily going to yield the

213 # longest match. It's an ok heuristic for now, but really we should

214 # match all the patterns and resolve ovleraps by choosing the longest

215 # match.

216 reps = sorted(reps, key=lambda x: len(x), reverse=True)

217 return reps

218

219 @classmethod

220 def dedupe(

221 cls, officers: List["OfficerName"], locale: Locale

222 ) -> List["OfficerName"]:

223 """Merge duplicated officer references.

224

225 :param officers: List of officers

226 :param locale: Location information

227 :returns: De-duplicated list of officers

228 """

229 persons = super(OfficerName, cls).dedupe(officers, locale)

230

231 type_counts: DefaultDict[str, int] = defaultdict(int)

232 for p in persons:

233 if p.star is not None and p.title is None:

234 title = "Officer"

235 else:

236 title = p.title

237

238 if not title:

239 # if no title and no star, but has the 5 digit code

240 # it's probably a team/partnership

241 if p.dgt5_code is not None:

242 p.code_name = "[officer pair]"

243 else:

244 p.code_name = "an officer"

245 else:

246 type_counts[title] += 1

247 p.code_name = "%s #%d" % (title, type_counts[title])

248 # TODO(jnu): clean up how the class is applied

249 p.cls = "masked-officer"

250

251 return persons