Coverage for blind_charging/__init__.py: 90%

31 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-10-20 15:43 +0000

1"""This file defines the public blind_charging interface.""" 

2from typing import Iterable, List, Union 

3 

4from .annotation import Redaction 

5from .config import get_locale, set_locale 

6from .individual import Individual 

7from .masker import annotate as _annotate 

8from .source_text import nlp 

9 

10__all__ = [ 

11 "preload", 

12 "set_locale", 

13 "get_locale", 

14 "annotate", 

15 "apply_annotations", 

16 "redact", 

17] 

18 

19 

20def preload(): 

21 """Load language model into memory. 

22 

23 This is useful for long-running services to load the NLP model during the 

24 start-up process. Otherwise the model (which can be quite large and take 

25 some time to load into memory) will be loaded lazily during the first 

26 request, which will cause that request to hang and possibly time out. 

27 """ 

28 object.__getattribute__(nlp, "_factory")() 

29 

30 

31AnyKindOfIndividual = Union[str, dict, Individual] 

32"""Any type that might describe an `Individual`.""" 

33 

34 

35def _norm_individual(x: AnyKindOfIndividual) -> dict: 

36 """Normalize an `Individual` to a dict.""" 

37 if isinstance(x, str): 

38 return {"name": x} 

39 elif isinstance(x, Individual): 

40 return x.to_dict() 

41 elif isinstance(x, dict): 

42 return x 

43 else: 

44 raise TypeError(f"unexpected type {type(x)}") 

45 

46 

47def annotate( 

48 narrative: str, 

49 persons: Iterable[AnyKindOfIndividual], 

50 officers: Iterable[AnyKindOfIndividual], 

51 redact_officers_from_text: bool = True, 

52 literals: dict[str, list[str]] | None = None, 

53) -> List[Redaction]: 

54 """Generate redaction annotations for the input narrative. 

55 

56 :param narrative: Incident report text 

57 :param persons: List of people appearing in the text 

58 :param officers: List of officers appearing in the text 

59 :param redact_officers_from_text: Whether to mask officer names (default is `True`) 

60 :param literals: Custom literals to use for redaction (default is `None`) 

61 :returns: list of annotations 

62 """ 

63 # Normalize persons lists. 

64 # The list can either be a list of strings (the name of the person), or it 

65 # can be an enriched dictionary with more information. See `person.py` for 

66 # more information about what that dict can look like. Accept that someone 

67 # might want to pass `Individual` instances directly, though this is not 

68 # the norm. 

69 norm_persons = [_norm_individual(p) for p in persons] 

70 norm_officers = [_norm_individual(o) for o in officers] 

71 

72 return _annotate( 

73 get_locale(), 

74 narrative, 

75 norm_persons, 

76 norm_officers, 

77 redact_officers_from_text=redact_officers_from_text, 

78 literals=literals, 

79 ) 

80 

81 

82def apply_annotations( 

83 narrative: str, 

84 annotations: Iterable[Redaction], 

85) -> str: 

86 """Apply annotations to a narrative. 

87 

88 :param narrative: Incident report text 

89 :param annotations: List of annotations 

90 :returns: redacted narrative 

91 """ 

92 # Replace the original text with redaction 

93 redacted = narrative 

94 for r in sorted(annotations, key=lambda x: x.start, reverse=True): 

95 redacted = redacted[: r.start] + "<" + r.text + ">" + redacted[r.end :] 

96 

97 return redacted 

98 

99 

100def redact( 

101 narrative: str, 

102 persons: Iterable[AnyKindOfIndividual], 

103 officers: Iterable[AnyKindOfIndividual], 

104 redact_officers_from_text: bool = True, 

105 literals: dict[str, list[str]] | None = None, 

106) -> str: 

107 """Run redaction algorithm. 

108 

109 :param narrative: Incident report text 

110 :param persons: List of people appearing in the text 

111 :param officers: List of officers appearing in the text 

112 :param redact_officers_from_text: Whether to mask officer names (default is True) 

113 :param literals: Custom literals to use for redaction 

114 :returns: redacted narrative 

115 """ 

116 annotations = annotate( 

117 narrative, 

118 persons, 

119 officers, 

120 redact_officers_from_text=redact_officers_from_text, 

121 literals=literals, 

122 ) 

123 

124 return apply_annotations(narrative, annotations)