1"""
2 pygments.lexers.cddl
3 ~~~~~~~~~~~~~~~~~~~~
4
5 Lexer for the Concise data definition language (CDDL), a notational
6 convention to express CBOR and JSON data structures.
7
8 More information:
9 https://datatracker.ietf.org/doc/rfc8610/
10
11 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
12 :license: BSD, see LICENSE for details.
13"""
14
15from pygments.lexer import RegexLexer, bygroups, include, words
16from pygments.token import Comment, Error, Keyword, Name, Number, Operator, \
17 Punctuation, String, Whitespace
18
19__all__ = ['CddlLexer']
20
21
22class CddlLexer(RegexLexer):
23 """
24 Lexer for CDDL definitions.
25 """
26 name = "CDDL"
27 url = 'https://datatracker.ietf.org/doc/rfc8610/'
28 aliases = ["cddl"]
29 filenames = ["*.cddl"]
30 mimetypes = ["text/x-cddl"]
31 version_added = '2.8'
32
33 _prelude_types = [
34 "any",
35 "b64legacy",
36 "b64url",
37 "bigfloat",
38 "bigint",
39 "bignint",
40 "biguint",
41 "bool",
42 "bstr",
43 "bytes",
44 "cbor-any",
45 "decfrac",
46 "eb16",
47 "eb64legacy",
48 "eb64url",
49 "encoded-cbor",
50 "false",
51 "float",
52 "float16",
53 "float16-32",
54 "float32",
55 "float32-64",
56 "float64",
57 "int",
58 "integer",
59 "mime-message",
60 "nil",
61 "nint",
62 "null",
63 "number",
64 "regexp",
65 "tdate",
66 "text",
67 "time",
68 "true",
69 "tstr",
70 "uint",
71 "undefined",
72 "unsigned",
73 "uri",
74 ]
75
76 _controls = [
77 ".and",
78 ".bits",
79 ".cbor",
80 ".cborseq",
81 ".default",
82 ".eq",
83 ".ge",
84 ".gt",
85 ".le",
86 ".lt",
87 ".ne",
88 ".regexp",
89 ".size",
90 ".within",
91 ]
92
93 _re_id = (
94 r"[$@A-Z_a-z]"
95 r"(?:[\-\.]+(?=[$@0-9A-Z_a-z])|[$@0-9A-Z_a-z])*"
96
97 )
98
99 # While the spec reads more like "an int must not start with 0" we use a
100 # lookahead here that says "after a 0 there must be no digit". This makes the
101 # '0' the invalid character in '01', which looks nicer when highlighted.
102 _re_uint = r"(?:0b[01]+|0x[0-9a-fA-F]+|[1-9]\d*|0(?!\d))"
103 _re_int = r"-?" + _re_uint
104
105 tokens = {
106 "commentsandwhitespace": [(r"\s+", Whitespace), (r";.+$", Comment.Single)],
107 "root": [
108 include("commentsandwhitespace"),
109 # tag types
110 (rf"#(\d\.{_re_uint})?", Keyword.Type), # type or any
111 # occurrence
112 (
113 rf"({_re_uint})?(\*)({_re_uint})?",
114 bygroups(Number, Operator, Number),
115 ),
116 (r"\?|\+", Operator), # occurrence
117 (r"\^", Operator), # cuts
118 (r"(\.\.\.|\.\.)", Operator), # rangeop
119 (words(_controls, suffix=r"\b"), Operator.Word), # ctlops
120 # into choice op
121 (rf"&(?=\s*({_re_id}|\())", Operator),
122 (rf"~(?=\s*{_re_id})", Operator), # unwrap op
123 (r"//|/(?!/)", Operator), # double und single slash
124 (r"=>|/==|/=|=", Operator),
125 (r"[\[\]{}\(\),<>:]", Punctuation),
126 # Bytestrings
127 (r"(b64)(')", bygroups(String.Affix, String.Single), "bstrb64url"),
128 (r"(h)(')", bygroups(String.Affix, String.Single), "bstrh"),
129 (r"'", String.Single, "bstr"),
130 # Barewords as member keys (must be matched before values, types, typenames,
131 # groupnames).
132 # Token type is String as barewords are always interpreted as such.
133 (rf"({_re_id})(\s*)(:)",
134 bygroups(String, Whitespace, Punctuation)),
135 # predefined types
136 (words(_prelude_types, prefix=r"(?![\-_$@])\b", suffix=r"\b(?![\-_$@])"),
137 Name.Builtin),
138 # user-defined groupnames, typenames
139 (_re_id, Name.Class),
140 # values
141 (r"0b[01]+", Number.Bin),
142 (r"0o[0-7]+", Number.Oct),
143 (r"0x[0-9a-fA-F]+(\.[0-9a-fA-F]+)?p[+-]?\d+", Number.Hex), # hexfloat
144 (r"0x[0-9a-fA-F]+", Number.Hex), # hex
145 # Float
146 (rf"{_re_int}(?=(\.\d|e[+-]?\d))(?:\.\d+)?(?:e[+-]?\d+)?",
147 Number.Float),
148 # Int
149 (_re_int, Number.Integer),
150 (r'"(\\\\|\\"|[^"])*"', String.Double),
151 ],
152 "bstrb64url": [
153 (r"'", String.Single, "#pop"),
154 include("commentsandwhitespace"),
155 (r"\\.", String.Escape),
156 (r"[0-9a-zA-Z\-_=]+", String.Single),
157 (r".", Error),
158 # (r";.+$", Token.Other),
159 ],
160 "bstrh": [
161 (r"'", String.Single, "#pop"),
162 include("commentsandwhitespace"),
163 (r"\\.", String.Escape),
164 (r"[0-9a-fA-F]+", String.Single),
165 (r".", Error),
166 ],
167 "bstr": [
168 (r"'", String.Single, "#pop"),
169 (r"\\.", String.Escape),
170 (r"[^'\\]+", String.Single),
171 ],
172 }