Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/charset

1from __future__ import annotations

3from typing import TYPE_CHECKING, Any, Optional

4from warnings import warn

6from .api import from_bytes

7from .constant import CHARDET_CORRESPONDENCE

9# TODO: remove this check when dropping Python 3.7 support

10if TYPE_CHECKING:

11 from typing_extensions import TypedDict

13 class ResultDict(TypedDict):

14 encoding: Optional[str]

15 language: str

16 confidence: Optional[float]

19def detect(

20 byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any

21) -> ResultDict:

22 """

23 chardet legacy method

24 Detect the encoding of the given byte string. It should be mostly backward-compatible.

25 Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it)

26 This function is deprecated and should be used to migrate your project easily, consult the documentation for

27 further information. Not planned for removal.

29 :param byte_str: The byte sequence to examine.

30 :param should_rename_legacy: Should we rename legacy encodings

31 to their more modern equivalents?

32 """

33 if len(kwargs):

34 warn(

35 f"charset-normalizer disregard arguments '{','.join(list(kwargs.keys()))}' in legacy function detect()"

36 )

38 if not isinstance(byte_str, (bytearray, bytes)):

39 raise TypeError( # pragma: nocover

40 "Expected object of type bytes or bytearray, got: "

41 "{0}".format(type(byte_str))

42 )

44 if isinstance(byte_str, bytearray):

45 byte_str = bytes(byte_str)

47 r = from_bytes(byte_str).best()

49 encoding = r.encoding if r is not None else None

50 language = r.language if r is not None and r.language != "Unknown" else ""

51 confidence = 1.0 - r.chaos if r is not None else None

53 # Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process

54 # but chardet does return 'utf-8-sig' and it is a valid codec name.

55 if r is not None and encoding == "utf_8" and r.bom:

56 encoding += "_sig"

58 if should_rename_legacy is False and encoding in CHARDET_CORRESPONDENCE:

59 encoding = CHARDET_CORRESPONDENCE[encoding]

61 return {

62 "encoding": encoding,

63 "language": language,

64 "confidence": confidence,

65 }

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/charset_normalizer/legacy.py: 27%

26 statements