1# Extracted from https://github.com/pfmoore/pkg_metadata
2from __future__ import annotations
3
4from email.header import Header, decode_header, make_header
5from email.message import Message
6from typing import Any, cast
7
8METADATA_FIELDS = [
9 # Name, Multiple-Use
10 ("Metadata-Version", False),
11 ("Name", False),
12 ("Version", False),
13 ("Dynamic", True),
14 ("Platform", True),
15 ("Supported-Platform", True),
16 ("Summary", False),
17 ("Description", False),
18 ("Description-Content-Type", False),
19 ("Keywords", False),
20 ("Home-page", False),
21 ("Download-URL", False),
22 ("Author", False),
23 ("Author-email", False),
24 ("Maintainer", False),
25 ("Maintainer-email", False),
26 ("License", False),
27 ("License-Expression", False),
28 ("License-File", True),
29 ("Classifier", True),
30 ("Requires-Dist", True),
31 ("Requires-Python", False),
32 ("Requires-External", True),
33 ("Project-URL", True),
34 ("Provides-Extra", True),
35 ("Provides-Dist", True),
36 ("Obsoletes-Dist", True),
37]
38
39
40def json_name(field: str) -> str:
41 return field.lower().replace("-", "_")
42
43
44def msg_to_json(msg: Message) -> dict[str, Any]:
45 """Convert a Message object into a JSON-compatible dictionary."""
46
47 def sanitise_header(h: Header | str) -> str:
48 if isinstance(h, Header):
49 chunks = []
50 for bytes, encoding in decode_header(h):
51 if encoding == "unknown-8bit":
52 try:
53 # See if UTF-8 works
54 bytes.decode("utf-8")
55 encoding = "utf-8"
56 except UnicodeDecodeError:
57 # If not, latin1 at least won't fail
58 encoding = "latin1"
59 chunks.append((bytes, encoding))
60 return str(make_header(chunks))
61 return str(h)
62
63 result = {}
64 for field, multi in METADATA_FIELDS:
65 if field not in msg:
66 continue
67 key = json_name(field)
68 if multi:
69 value: str | list[str] = [
70 sanitise_header(v) for v in msg.get_all(field) # type: ignore
71 ]
72 else:
73 value = sanitise_header(msg.get(field)) # type: ignore
74 if key == "keywords":
75 # Accept both comma-separated and space-separated
76 # forms, for better compatibility with old data.
77 if "," in value:
78 value = [v.strip() for v in value.split(",")]
79 else:
80 value = value.split()
81 result[key] = value
82
83 payload = cast(str, msg.get_payload())
84 if payload:
85 result["description"] = payload
86
87 return result