Coverage for blind_charging/__init__.py: 90%
29 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-17 20:36 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-17 20:36 +0000
1"""This file defines the public blind_charging interface."""
2from typing import Iterable, List, Union
4from .annotation import Redaction
5from .config import get_locale, set_locale
6from .individual import Individual
7from .masker import annotate as _annotate
8from .source_text import nlp
10__all__ = [
11 "preload",
12 "set_locale",
13 "get_locale",
14 "annotate",
15 "redact",
16]
19def preload():
20 """Load language model into memory.
22 This is useful for long-running services to load the NLP model during the
23 start-up process. Otherwise the model (which can be quite large and take
24 some time to load into memory) will be loaded lazily during the first
25 request, which will cause that request to hang and possibly time out.
26 """
27 object.__getattribute__(nlp, "_factory")()
30AnyKindOfIndividual = Union[str, dict, Individual]
31"""Any type that might describe an `Individual`."""
34def _norm_individual(x: AnyKindOfIndividual) -> dict:
35 """Normalize an `Individual` to a dict."""
36 if isinstance(x, str):
37 return {"name": x}
38 elif isinstance(x, Individual):
39 return x.to_dict()
40 elif isinstance(x, dict):
41 return x
42 else:
43 raise TypeError(f"unexpected type {type(x)}")
46def annotate(
47 narrative: str,
48 persons: Iterable[AnyKindOfIndividual],
49 officers: Iterable[AnyKindOfIndividual],
50 redact_officers_from_text: bool = True,
51) -> List[Redaction]:
52 """Generate redaction annotations for the input narrative.
54 :param narrative: Incident report text
55 :param persons: List of people appearing in the text
56 :param officers: List of officers appearing in the text
57 :param redact_officers_from_text: Whether to mask officer names (default is True)
58 :returns: list of annotations
59 """
60 # Normalize persons lists.
61 # The list can either be a list of strings (the name of the person), or it
62 # can be an enriched dictionary with more information. See `person.py` for
63 # more information about what that dict can look like. Accept that someone
64 # might want to pass `Individual` instances directly, though this is not
65 # the norm.
66 norm_persons = [_norm_individual(p) for p in persons]
67 norm_officers = [_norm_individual(o) for o in officers]
69 return _annotate(get_locale(), narrative, norm_persons, norm_officers)
72def redact(
73 narrative: str,
74 persons: Iterable[AnyKindOfIndividual],
75 officers: Iterable[AnyKindOfIndividual],
76 redact_officers_from_text: bool = True,
77) -> str:
78 """Run redaction algorithm.
80 :param narrative: Incident report text
81 :param persons: List of people appearing in the text
82 :param officers: List of officers appearing in the text
83 :param redact_officers_from_text: Whether to mask officer names (default is True)
84 :returns: redacted narrative
85 """
86 annotations = annotate(
87 narrative,
88 persons,
89 officers,
90 redact_officers_from_text=redact_officers_from_text,
91 )
93 # Replace the original text with redaction
94 redacted = narrative
95 for r in annotations:
96 redacted = redacted[: r.start] + "<" + r.text + ">" + redacted[r.end :]
98 return redacted