Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/lilypond.py: 90%

31 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-03 06:10 +0000

1""" 

2 pygments.lexers.lilypond 

3 ~~~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexer for LilyPond. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import bygroups, default, inherit, words 

14from pygments.lexers.lisp import SchemeLexer 

15from pygments.lexers._lilypond_builtins import ( 

16 keywords, pitch_language_names, clefs, scales, repeat_types, units, 

17 chord_modifiers, pitches, music_functions, dynamics, articulations, 

18 music_commands, markup_commands, grobs, translators, contexts, 

19 context_properties, grob_properties, scheme_functions, paper_variables, 

20 header_variables 

21) 

22from pygments.token import Token 

23 

24__all__ = ["LilyPondLexer"] 

25 

26# In LilyPond, (unquoted) name tokens only contain letters, hyphens, 

27# and underscores, where hyphens and underscores must not start or end 

28# a name token. 

29# 

30# Note that many of the entities listed as LilyPond built-in keywords 

31# (in file `_lilypond_builtins.py`) are only valid if surrounded by 

32# double quotes, for example, 'hufnagel-fa1'. This means that 

33# `NAME_END_RE` doesn't apply to such entities in valid LilyPond code. 

34NAME_END_RE = r"(?=\d|[^\w\-]|[\-_][\W\d])" 

35 

36def builtin_words(names, backslash, suffix=NAME_END_RE): 

37 prefix = r"[\-_^]?" 

38 if backslash == "mandatory": 

39 prefix += r"\\" 

40 elif backslash == "optional": 

41 prefix += r"\\?" 

42 else: 

43 assert backslash == "disallowed" 

44 return words(names, prefix, suffix) 

45 

46 

47class LilyPondLexer(SchemeLexer): 

48 """ 

49 Lexer for input to LilyPond, a text-based music typesetter. 

50 

51 .. important:: 

52 

53 This lexer is meant to be used in conjunction with the ``lilypond`` style. 

54 

55 .. versionadded:: 2.11 

56 """ 

57 name = 'LilyPond' 

58 url = 'https://lilypond.org' 

59 aliases = ['lilypond'] 

60 filenames = ['*.ly'] 

61 mimetypes = [] 

62 

63 flags = re.DOTALL | re.MULTILINE 

64 

65 # Because parsing LilyPond input is very tricky (and in fact 

66 # impossible without executing LilyPond when there is Scheme 

67 # code in the file), this lexer does not try to recognize 

68 # lexical modes. Instead, it catches the most frequent pieces 

69 # of syntax, and, above all, knows about many kinds of builtins. 

70 

71 # In order to parse embedded Scheme, this lexer subclasses the SchemeLexer. 

72 # It redefines the 'root' state entirely, and adds a rule for #{ #} 

73 # to the 'value' state. The latter is used to parse a Scheme expression 

74 # after #. 

75 

76 def get_tokens_unprocessed(self, text): 

77 """Highlight Scheme variables as LilyPond builtins when applicable.""" 

78 for index, token, value in super().get_tokens_unprocessed(text): 

79 if token is Token.Name.Function or token is Token.Name.Variable: 

80 if value in scheme_functions: 

81 token = Token.Name.Builtin.SchemeFunction 

82 elif token is Token.Name.Builtin: 

83 token = Token.Name.Builtin.SchemeBuiltin 

84 yield index, token, value 

85 

86 tokens = { 

87 "root": [ 

88 # Whitespace. 

89 (r"\s+", Token.Text.Whitespace), 

90 

91 # Multi-line comments. These are non-nestable. 

92 (r"%\{.*?%\}", Token.Comment.Multiline), 

93 

94 # Simple comments. 

95 (r"%.*?$", Token.Comment.Single), 

96 

97 # End of embedded LilyPond in Scheme. 

98 (r"#\}", Token.Punctuation, "#pop"), 

99 

100 # Embedded Scheme, starting with # ("delayed"), 

101 # or $ (immediate). #@ and and $@ are the lesser known 

102 # "list splicing operators". 

103 (r"[#$]@?", Token.Punctuation, "value"), 

104 

105 # Any kind of punctuation: 

106 # - sequential music: { }, 

107 # - parallel music: << >>, 

108 # - voice separator: << \\ >>, 

109 # - chord: < >, 

110 # - bar check: |, 

111 # - dot in nested properties: \revert NoteHead.color, 

112 # - equals sign in assignments and lists for various commands: 

113 # \override Stem.color = red, 

114 # - comma as alternative syntax for lists: \time 3,3,2 4/4, 

115 # - colon in tremolos: c:32, 

116 # - double hyphen and underscore in lyrics: li -- ly -- pond __ 

117 # (which must be preceded by ASCII whitespace) 

118 (r"""(?x) 

119 \\\\ 

120 | (?<= \s ) (?: -- | __ ) 

121 | [{}<>=.,:|] 

122 """, Token.Punctuation), 

123 

124 # Pitches, with optional octavation marks, octave check, 

125 # and forced or cautionary accidental. 

126 (words(pitches, suffix=r"=?[',]*!?\??" + NAME_END_RE), Token.Pitch), 

127 

128 # Strings, optionally with direction specifier. 

129 (r'[\-_^]?"', Token.String, "string"), 

130 

131 # Numbers. 

132 (r"-?\d+\.\d+", Token.Number.Float), # 5. and .5 are not allowed 

133 (r"-?\d+/\d+", Token.Number.Fraction), 

134 # Integers, or durations with optional augmentation dots. 

135 # We have no way to distinguish these, so we highlight 

136 # them all as numbers. 

137 # 

138 # Normally, there is a space before the integer (being an 

139 # argument to a music function), which we check here. The 

140 # case without a space is handled below (as a fingering 

141 # number). 

142 (r"""(?x) 

143 (?<= \s ) -\d+ 

144 | (?: (?: \d+ | \\breve | \\longa | \\maxima ) 

145 \.* ) 

146 """, Token.Number), 

147 # Separates duration and duration multiplier highlighted as fraction. 

148 (r"\*", Token.Number), 

149 

150 # Ties, slurs, manual beams. 

151 (r"[~()[\]]", Token.Name.Builtin.Articulation), 

152 

153 # Predefined articulation shortcuts. A direction specifier is 

154 # required here. 

155 (r"[\-_^][>^_!.\-+]", Token.Name.Builtin.Articulation), 

156 

157 # Fingering numbers, string numbers. 

158 (r"[\-_^]?\\?\d+", Token.Name.Builtin.Articulation), 

159 

160 # Builtins. 

161 (builtin_words(keywords, "mandatory"), Token.Keyword), 

162 (builtin_words(pitch_language_names, "disallowed"), Token.Name.PitchLanguage), 

163 (builtin_words(clefs, "disallowed"), Token.Name.Builtin.Clef), 

164 (builtin_words(scales, "mandatory"), Token.Name.Builtin.Scale), 

165 (builtin_words(repeat_types, "disallowed"), Token.Name.Builtin.RepeatType), 

166 (builtin_words(units, "mandatory"), Token.Number), 

167 (builtin_words(chord_modifiers, "disallowed"), Token.ChordModifier), 

168 (builtin_words(music_functions, "mandatory"), Token.Name.Builtin.MusicFunction), 

169 (builtin_words(dynamics, "mandatory"), Token.Name.Builtin.Dynamic), 

170 # Those like slurs that don't take a backslash are covered above. 

171 (builtin_words(articulations, "mandatory"), Token.Name.Builtin.Articulation), 

172 (builtin_words(music_commands, "mandatory"), Token.Name.Builtin.MusicCommand), 

173 (builtin_words(markup_commands, "mandatory"), Token.Name.Builtin.MarkupCommand), 

174 (builtin_words(grobs, "disallowed"), Token.Name.Builtin.Grob), 

175 (builtin_words(translators, "disallowed"), Token.Name.Builtin.Translator), 

176 # Optional backslash because of \layout { \context { \Score ... } }. 

177 (builtin_words(contexts, "optional"), Token.Name.Builtin.Context), 

178 (builtin_words(context_properties, "disallowed"), Token.Name.Builtin.ContextProperty), 

179 (builtin_words(grob_properties, "disallowed"), 

180 Token.Name.Builtin.GrobProperty, 

181 "maybe-subproperties"), 

182 # Optional backslashes here because output definitions are wrappers 

183 # around modules. Concretely, you can do, e.g., 

184 # \paper { oddHeaderMarkup = \evenHeaderMarkup } 

185 (builtin_words(paper_variables, "optional"), Token.Name.Builtin.PaperVariable), 

186 (builtin_words(header_variables, "optional"), Token.Name.Builtin.HeaderVariable), 

187 

188 # Other backslashed-escaped names (like dereferencing a 

189 # music variable), possibly with a direction specifier. 

190 (r"[\-_^]?\\.+?" + NAME_END_RE, Token.Name.BackslashReference), 

191 

192 # Definition of a variable. Support assignments to alist keys 

193 # (myAlist.my-key.my-nested-key = \markup \spam \eggs). 

194 (r"""(?x) 

195 (?: [^\W\d] | - )+ 

196 (?= (?: [^\W\d] | [\-.] )* \s* = ) 

197 """, Token.Name.Lvalue), 

198 

199 # Virtually everything can appear in markup mode, so we highlight 

200 # as text. Try to get a complete word, or we might wrongly lex 

201 # a suffix that happens to be a builtin as a builtin (e.g., "myStaff"). 

202 (r"([^\W\d]|-)+?" + NAME_END_RE, Token.Text), 

203 (r".", Token.Text), 

204 ], 

205 "string": [ 

206 (r'"', Token.String, "#pop"), 

207 (r'\\.', Token.String.Escape), 

208 (r'[^\\"]+', Token.String), 

209 ], 

210 "value": [ 

211 # Scan a LilyPond value, then pop back since we had a 

212 # complete expression. 

213 (r"#\{", Token.Punctuation, ("#pop", "root")), 

214 inherit, 

215 ], 

216 # Grob subproperties are undeclared and it would be tedious 

217 # to maintain them by hand. Instead, this state allows recognizing 

218 # everything that looks like a-known-property.foo.bar-baz as 

219 # one single property name. 

220 "maybe-subproperties": [ 

221 (r"\s+", Token.Text.Whitespace), 

222 (r"(\.)((?:[^\W\d]|-)+?)" + NAME_END_RE, 

223 bygroups(Token.Punctuation, Token.Name.Builtin.GrobProperty)), 

224 default("#pop"), 

225 ] 

226 }