Coverage for blind_charging/__init__.py: 90%

29 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-17 20:36 +0000

1"""This file defines the public blind_charging interface.""" 

2from typing import Iterable, List, Union 

3 

4from .annotation import Redaction 

5from .config import get_locale, set_locale 

6from .individual import Individual 

7from .masker import annotate as _annotate 

8from .source_text import nlp 

9 

10__all__ = [ 

11 "preload", 

12 "set_locale", 

13 "get_locale", 

14 "annotate", 

15 "redact", 

16] 

17 

18 

19def preload(): 

20 """Load language model into memory. 

21 

22 This is useful for long-running services to load the NLP model during the 

23 start-up process. Otherwise the model (which can be quite large and take 

24 some time to load into memory) will be loaded lazily during the first 

25 request, which will cause that request to hang and possibly time out. 

26 """ 

27 object.__getattribute__(nlp, "_factory")() 

28 

29 

30AnyKindOfIndividual = Union[str, dict, Individual] 

31"""Any type that might describe an `Individual`.""" 

32 

33 

34def _norm_individual(x: AnyKindOfIndividual) -> dict: 

35 """Normalize an `Individual` to a dict.""" 

36 if isinstance(x, str): 

37 return {"name": x} 

38 elif isinstance(x, Individual): 

39 return x.to_dict() 

40 elif isinstance(x, dict): 

41 return x 

42 else: 

43 raise TypeError(f"unexpected type {type(x)}") 

44 

45 

46def annotate( 

47 narrative: str, 

48 persons: Iterable[AnyKindOfIndividual], 

49 officers: Iterable[AnyKindOfIndividual], 

50 redact_officers_from_text: bool = True, 

51) -> List[Redaction]: 

52 """Generate redaction annotations for the input narrative. 

53 

54 :param narrative: Incident report text 

55 :param persons: List of people appearing in the text 

56 :param officers: List of officers appearing in the text 

57 :param redact_officers_from_text: Whether to mask officer names (default is True) 

58 :returns: list of annotations 

59 """ 

60 # Normalize persons lists. 

61 # The list can either be a list of strings (the name of the person), or it 

62 # can be an enriched dictionary with more information. See `person.py` for 

63 # more information about what that dict can look like. Accept that someone 

64 # might want to pass `Individual` instances directly, though this is not 

65 # the norm. 

66 norm_persons = [_norm_individual(p) for p in persons] 

67 norm_officers = [_norm_individual(o) for o in officers] 

68 

69 return _annotate(get_locale(), narrative, norm_persons, norm_officers) 

70 

71 

72def redact( 

73 narrative: str, 

74 persons: Iterable[AnyKindOfIndividual], 

75 officers: Iterable[AnyKindOfIndividual], 

76 redact_officers_from_text: bool = True, 

77) -> str: 

78 """Run redaction algorithm. 

79 

80 :param narrative: Incident report text 

81 :param persons: List of people appearing in the text 

82 :param officers: List of officers appearing in the text 

83 :param redact_officers_from_text: Whether to mask officer names (default is True) 

84 :returns: redacted narrative 

85 """ 

86 annotations = annotate( 

87 narrative, 

88 persons, 

89 officers, 

90 redact_officers_from_text=redact_officers_from_text, 

91 ) 

92 

93 # Replace the original text with redaction 

94 redacted = narrative 

95 for r in annotations: 

96 redacted = redacted[: r.start] + "<" + r.text + ">" + redacted[r.end :] 

97 

98 return redacted