Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/chardet/pipeline/statistical.py: 33%

1"""Stage 3: Statistical bigram scoring.

3Note: ``from __future__ import annotations`` is intentionally omitted because

4this module is compiled with mypyc, which does not support PEP 563 string

5annotations.

6"""

8from chardet.models import BigramProfile, score_best_language

9from chardet.pipeline import DetectionResult

10from chardet.registry import EncodingInfo

13def score_candidates(

14 data: bytes, candidates: tuple[EncodingInfo, ...]

15) -> list[DetectionResult]:

16 """Score all candidates and return results sorted by confidence descending.

18 :param data: The raw byte data to score.

19 :param candidates: Encoding candidates to evaluate.

20 :returns: A list of :class:`DetectionResult` sorted by confidence.

21 """

22 if not data or not candidates:

23 return []

25 profile = BigramProfile(data)

26 scores: list[tuple[str, float, str | None]] = []

28 for enc in candidates:

29 s, lang = score_best_language(data, enc.name, profile=profile)

30 if s > 0.0:

31 scores.append((enc.name, s, lang))

33 scores.sort(key=lambda x: x[1], reverse=True)

34 return [

35 DetectionResult(encoding=name, confidence=s, language=lang)

36 for name, s, lang in scores