1"""
2 pygments.lexers.ecl
3 ~~~~~~~~~~~~~~~~~~~
4
5 Lexers for the ECL language.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexer import RegexLexer, include, bygroups, words
14from pygments.token import Comment, Operator, Keyword, Name, String, \
15 Number, Punctuation, Whitespace
16
17__all__ = ['ECLLexer']
18
19
20class ECLLexer(RegexLexer):
21 """
22 Lexer for the declarative big-data ECL language.
23 """
24
25 name = 'ECL'
26 url = 'https://hpccsystems.com/training/documentation/ecl-language-reference/html'
27 aliases = ['ecl']
28 filenames = ['*.ecl']
29 mimetypes = ['application/x-ecl']
30 version_added = '1.5'
31
32 flags = re.IGNORECASE | re.MULTILINE
33
34 tokens = {
35 'root': [
36 include('whitespace'),
37 include('statements'),
38 ],
39 'whitespace': [
40 (r'\s+', Whitespace),
41 (r'\/\/.*', Comment.Single),
42 (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline),
43 ],
44 'statements': [
45 include('types'),
46 include('keywords'),
47 include('functions'),
48 include('hash'),
49 (r'"', String, 'string'),
50 (r'\'', String, 'string'),
51 (r'(\d+\.\d*|\.\d+|\d+)e[+-]?\d+[lu]*', Number.Float),
52 (r'(\d+\.\d*|\.\d+|\d+f)f?', Number.Float),
53 (r'0x[0-9a-f]+[lu]*', Number.Hex),
54 (r'0[0-7]+[lu]*', Number.Oct),
55 (r'\d+[lu]*', Number.Integer),
56 (r'[~!%^&*+=|?:<>/-]+', Operator),
57 (r'[{}()\[\],.;]', Punctuation),
58 (r'[a-z_]\w*', Name),
59 ],
60 'hash': [
61 (r'^#.*$', Comment.Preproc),
62 ],
63 'types': [
64 (r'(RECORD|END)\D', Keyword.Declaration),
65 (r'((?:ASCII|BIG_ENDIAN|BOOLEAN|DATA|DECIMAL|EBCDIC|INTEGER|PATTERN|'
66 r'QSTRING|REAL|RECORD|RULE|SET OF|STRING|TOKEN|UDECIMAL|UNICODE|'
67 r'UNSIGNED|VARSTRING|VARUNICODE)\d*)(\s+)',
68 bygroups(Keyword.Type, Whitespace)),
69 ],
70 'keywords': [
71 (words((
72 'APPLY', 'ASSERT', 'BUILD', 'BUILDINDEX', 'EVALUATE', 'FAIL',
73 'KEYDIFF', 'KEYPATCH', 'LOADXML', 'NOTHOR', 'NOTIFY', 'OUTPUT',
74 'PARALLEL', 'SEQUENTIAL', 'SOAPCALL', 'CHECKPOINT', 'DEPRECATED',
75 'FAILCODE', 'FAILMESSAGE', 'FAILURE', 'GLOBAL', 'INDEPENDENT',
76 'ONWARNING', 'PERSIST', 'PRIORITY', 'RECOVERY', 'STORED', 'SUCCESS',
77 'WAIT', 'WHEN'), suffix=r'\b'),
78 Keyword.Reserved),
79 # These are classed differently, check later
80 (words((
81 'ALL', 'AND', 'ANY', 'AS', 'ATMOST', 'BEFORE', 'BEGINC++', 'BEST',
82 'BETWEEN', 'CASE', 'CONST', 'COUNTER', 'CSV', 'DESCEND', 'ENCRYPT',
83 'ENDC++', 'ENDMACRO', 'EXCEPT', 'EXCLUSIVE', 'EXPIRE', 'EXPORT',
84 'EXTEND', 'FALSE', 'FEW', 'FIRST', 'FLAT', 'FULL', 'FUNCTION',
85 'GROUP', 'HEADER', 'HEADING', 'HOLE', 'IFBLOCK', 'IMPORT', 'IN',
86 'JOINED', 'KEEP', 'KEYED', 'LAST', 'LEFT', 'LIMIT', 'LOAD', 'LOCAL',
87 'LOCALE', 'LOOKUP', 'MACRO', 'MANY', 'MAXCOUNT', 'MAXLENGTH',
88 'MIN SKEW', 'MODULE', 'INTERFACE', 'NAMED', 'NOCASE', 'NOROOT',
89 'NOSCAN', 'NOSORT', 'NOT', 'OF', 'ONLY', 'OPT', 'OR', 'OUTER',
90 'OVERWRITE', 'PACKED', 'PARTITION', 'PENALTY', 'PHYSICALLENGTH',
91 'PIPE', 'QUOTE', 'RELATIONSHIP', 'REPEAT', 'RETURN', 'RIGHT',
92 'SCAN', 'SELF', 'SEPARATOR', 'SERVICE', 'SHARED', 'SKEW', 'SKIP',
93 'SQL', 'STORE', 'TERMINATOR', 'THOR', 'THRESHOLD', 'TOKEN',
94 'TRANSFORM', 'TRIM', 'TRUE', 'TYPE', 'UNICODEORDER', 'UNSORTED',
95 'VALIDATE', 'VIRTUAL', 'WHOLE', 'WILD', 'WITHIN', 'XML', 'XPATH',
96 '__COMPRESSED__'), suffix=r'\b'),
97 Keyword.Reserved),
98 ],
99 'functions': [
100 (words((
101 'ABS', 'ACOS', 'ALLNODES', 'ASCII', 'ASIN', 'ASSTRING', 'ATAN',
102 'ATAN2', 'AVE', 'CASE', 'CHOOSE', 'CHOOSEN', 'CHOOSESETS',
103 'CLUSTERSIZE', 'COMBINE', 'CORRELATION', 'COS', 'COSH', 'COUNT',
104 'COVARIANCE', 'CRON', 'DATASET', 'DEDUP', 'DEFINE', 'DENORMALIZE',
105 'DISTRIBUTE', 'DISTRIBUTED', 'DISTRIBUTION', 'EBCDIC', 'ENTH',
106 'ERROR', 'EVALUATE', 'EVENT', 'EVENTEXTRA', 'EVENTNAME', 'EXISTS',
107 'EXP', 'FAILCODE', 'FAILMESSAGE', 'FETCH', 'FROMUNICODE',
108 'GETISVALID', 'GLOBAL', 'GRAPH', 'GROUP', 'HASH', 'HASH32',
109 'HASH64', 'HASHCRC', 'HASHMD5', 'HAVING', 'IF', 'INDEX',
110 'INTFORMAT', 'ISVALID', 'ITERATE', 'JOIN', 'KEYUNICODE', 'LENGTH',
111 'LIBRARY', 'LIMIT', 'LN', 'LOCAL', 'LOG', 'LOOP', 'MAP', 'MATCHED',
112 'MATCHLENGTH', 'MATCHPOSITION', 'MATCHTEXT', 'MATCHUNICODE', 'MAX',
113 'MERGE', 'MERGEJOIN', 'MIN', 'NOLOCAL', 'NONEMPTY', 'NORMALIZE',
114 'PARSE', 'PIPE', 'POWER', 'PRELOAD', 'PROCESS', 'PROJECT', 'PULL',
115 'RANDOM', 'RANGE', 'RANK', 'RANKED', 'REALFORMAT', 'RECORDOF',
116 'REGEXFIND', 'REGEXREPLACE', 'REGROUP', 'REJECTED', 'ROLLUP',
117 'ROUND', 'ROUNDUP', 'ROW', 'ROWDIFF', 'SAMPLE', 'SET', 'SIN',
118 'SINH', 'SIZEOF', 'SOAPCALL', 'SORT', 'SORTED', 'SQRT', 'STEPPED',
119 'STORED', 'SUM', 'TABLE', 'TAN', 'TANH', 'THISNODE', 'TOPN',
120 'TOUNICODE', 'TRANSFER', 'TRIM', 'TRUNCATE', 'TYPEOF', 'UNGROUP',
121 'UNICODEORDER', 'VARIANCE', 'WHICH', 'WORKUNIT', 'XMLDECODE',
122 'XMLENCODE', 'XMLTEXT', 'XMLUNICODE'), suffix=r'\b'),
123 Name.Function),
124 ],
125 'string': [
126 (r'"', String, '#pop'),
127 (r'\'', String, '#pop'),
128 (r'[^"\']+', String),
129 ],
130 }
131
132 def analyse_text(text):
133 """This is very difficult to guess relative to other business languages.
134 -> in conjunction with BEGIN/END seems relatively rare though."""
135 result = 0
136
137 if '->' in text:
138 result += 0.01
139 if 'BEGIN' in text:
140 result += 0.01
141 if 'END' in text:
142 result += 0.01
143
144 return result