Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/chardet/pipeline/statistical.py: 33%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

15 statements  

1"""Stage 3: Statistical bigram scoring. 

2 

3Note: ``from __future__ import annotations`` is intentionally omitted because 

4this module is compiled with mypyc, which does not support PEP 563 string 

5annotations. 

6""" 

7 

8from chardet.models import BigramProfile, score_best_language 

9from chardet.pipeline import DetectionResult 

10from chardet.registry import EncodingInfo 

11 

12 

13def score_candidates( 

14 data: bytes, candidates: tuple[EncodingInfo, ...] 

15) -> list[DetectionResult]: 

16 """Score all candidates and return results sorted by confidence descending. 

17 

18 :param data: The raw byte data to score. 

19 :param candidates: Encoding candidates to evaluate. 

20 :returns: A list of :class:`DetectionResult` sorted by confidence. 

21 """ 

22 if not data or not candidates: 

23 return [] 

24 

25 profile = BigramProfile(data) 

26 scores: list[tuple[str, float, str | None]] = [] 

27 

28 for enc in candidates: 

29 s, lang = score_best_language(data, enc.name, profile=profile) 

30 if s > 0.0: 

31 scores.append((enc.name, s, lang)) 

32 

33 scores.sort(key=lambda x: x[1], reverse=True) 

34 return [ 

35 DetectionResult(encoding=name, confidence=s, language=lang) 

36 for name, s, lang in scores 

37 ]