1# Copyright 2017 The Abseil Authors.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Internal helper functions for Abseil Python flags library."""
16
17import os
18import re
19import struct
20import sys
21import textwrap
22import types
23from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Sequence, Set
24from xml.dom import minidom
25# pylint: disable=g-import-not-at-top
26fcntl: Optional[types.ModuleType]
27try:
28 import fcntl
29except ImportError:
30 fcntl = None
31termios: Optional[types.ModuleType]
32try:
33 # Importing termios will fail on non-unix platforms.
34 import termios
35except ImportError:
36 termios = None
37# pylint: enable=g-import-not-at-top
38
39
40_DEFAULT_HELP_WIDTH = 80 # Default width of help output.
41# Minimal "sane" width of help output. We assume that any value below 40 is
42# unreasonable.
43_MIN_HELP_WIDTH = 40
44
45# Define the allowed error rate in an input string to get suggestions.
46#
47# We lean towards a high threshold because we tend to be matching a phrase,
48# and the simple algorithm used here is geared towards correcting word
49# spellings.
50#
51# For manual testing, consider "<command> --list" which produced a large number
52# of spurious suggestions when we used "least_errors > 0.5" instead of
53# "least_erros >= 0.5".
54_SUGGESTION_ERROR_RATE_THRESHOLD = 0.50
55
56# Characters that cannot appear or are highly discouraged in an XML 1.0
57# document. (See http://www.w3.org/TR/REC-xml/#charsets or
58# https://en.wikipedia.org/wiki/Valid_characters_in_XML#XML_1.0)
59_ILLEGAL_XML_CHARS_REGEX = re.compile(
60 '[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x84\x86-\x9f\ud800-\udfff\ufffe\uffff]'
61)
62
63# This is a set of module ids for the modules that disclaim key flags.
64# This module is explicitly added to this set so that we never consider it to
65# define key flag.
66disclaim_module_ids: Set[int] = {id(sys.modules[__name__])}
67
68
69# Define special flags here so that help may be generated for them.
70# NOTE: Please do NOT use SPECIAL_FLAGS from outside flags module.
71# Initialized inside flagvalues.py.
72# NOTE: This cannot be annotated as its actual FlagValues type since this would
73# create a circular dependency.
74SPECIAL_FLAGS: Any = None
75
76
77# This points to the flags module, initialized in flags/__init__.py.
78# This should only be used in adopt_module_key_flags to take SPECIAL_FLAGS into
79# account.
80FLAGS_MODULE: Optional[types.ModuleType] = None
81
82
83class _ModuleObjectAndName(NamedTuple):
84 """Module object and name.
85
86 Fields:
87 - module: object, module object.
88 - module_name: str, module name.
89 """
90 module: Optional[types.ModuleType]
91 module_name: str
92
93
94def get_module_object_and_name(
95 globals_dict: Dict[str, Any]
96) -> _ModuleObjectAndName:
97 """Returns the module that defines a global environment, and its name.
98
99 Args:
100 globals_dict: A dictionary that should correspond to an environment
101 providing the values of the globals.
102
103 Returns:
104 _ModuleObjectAndName - pair of module object & module name.
105 Returns (None, None) if the module could not be identified.
106 """
107 name = globals_dict.get('__name__', None)
108 module = sys.modules.get(name, None)
109 # Pick a more informative name for the main module.
110 return _ModuleObjectAndName(module,
111 (sys.argv[0] if name == '__main__' else name))
112
113
114def get_calling_module_object_and_name() -> _ModuleObjectAndName:
115 """Returns the module that's calling into this module.
116
117 We generally use this function to get the name of the module calling a
118 DEFINE_foo... function.
119
120 Returns:
121 The module object that called into this one.
122
123 Raises:
124 AssertionError: Raised when no calling module could be identified.
125 """
126 for depth in range(1, sys.getrecursionlimit()):
127 # sys._getframe is the right thing to use here, as it's the best
128 # way to walk up the call stack.
129 globals_for_frame = sys._getframe(depth).f_globals # pylint: disable=protected-access
130 module, module_name = get_module_object_and_name(globals_for_frame)
131 if id(module) not in disclaim_module_ids and module_name is not None:
132 return _ModuleObjectAndName(module, module_name)
133 raise AssertionError('No module was found')
134
135
136def get_calling_module() -> str:
137 """Returns the name of the module that's calling into this module."""
138 return get_calling_module_object_and_name().module_name
139
140
141def create_xml_dom_element(
142 doc: minidom.Document, name: str, value: Any
143) -> minidom.Element:
144 """Returns an XML DOM element with name and text value.
145
146 Args:
147 doc: minidom.Document, the DOM document it should create nodes from.
148 name: str, the tag of XML element.
149 value: object, whose string representation will be used
150 as the value of the XML element. Illegal or highly discouraged xml 1.0
151 characters are stripped.
152
153 Returns:
154 An instance of minidom.Element.
155 """
156 s = str(value)
157 if isinstance(value, bool):
158 # Display boolean values as the C++ flag library does: no caps.
159 s = s.lower()
160 # Remove illegal xml characters.
161 s = _ILLEGAL_XML_CHARS_REGEX.sub('', s)
162
163 e = doc.createElement(name)
164 e.appendChild(doc.createTextNode(s))
165 return e
166
167
168def get_help_width() -> int:
169 """Returns the integer width of help lines that is used in TextWrap."""
170 if not sys.stdout.isatty() or termios is None or fcntl is None:
171 return _DEFAULT_HELP_WIDTH
172 try:
173 data = fcntl.ioctl(sys.stdout, termios.TIOCGWINSZ, b'1234')
174 columns = struct.unpack('hh', data)[1]
175 # Emacs mode returns 0.
176 # Here we assume that any value below 40 is unreasonable.
177 if columns >= _MIN_HELP_WIDTH:
178 return columns
179 # Returning an int as default is fine, int(int) just return the int.
180 return int(os.getenv('COLUMNS', _DEFAULT_HELP_WIDTH))
181
182 except (TypeError, OSError, struct.error):
183 return _DEFAULT_HELP_WIDTH
184
185
186def get_flag_suggestions(
187 attempt: str, longopt_list: Sequence[str]
188) -> List[str]:
189 """Returns helpful similar matches for an invalid flag."""
190 # Don't suggest on very short strings, or if no longopts are specified.
191 if len(attempt) <= 2 or not longopt_list:
192 return []
193
194 option_names = [v.split('=')[0] for v in longopt_list]
195
196 # Find close approximations in flag prefixes.
197 # This also handles the case where the flag is spelled right but ambiguous.
198 distances = [(_damerau_levenshtein(attempt, option[0:len(attempt)]), option)
199 for option in option_names]
200 # t[0] is distance, and sorting by t[1] allows us to have stable output.
201 distances.sort()
202
203 least_errors, _ = distances[0]
204 # Don't suggest excessively bad matches.
205 if least_errors >= _SUGGESTION_ERROR_RATE_THRESHOLD * len(attempt):
206 return []
207
208 suggestions = []
209 for errors, name in distances:
210 if errors == least_errors:
211 suggestions.append(name)
212 else:
213 break
214 return suggestions
215
216
217def _damerau_levenshtein(a, b):
218 """Returns Damerau-Levenshtein edit distance from a to b."""
219 memo = {}
220
221 def distance(x, y):
222 """Recursively defined string distance with memoization."""
223 if (x, y) in memo:
224 return memo[x, y]
225 if not x:
226 d = len(y)
227 elif not y:
228 d = len(x)
229 else:
230 d = min(
231 distance(x[1:], y) + 1, # correct an insertion error
232 distance(x, y[1:]) + 1, # correct a deletion error
233 distance(x[1:], y[1:]) + (x[0] != y[0])) # correct a wrong character
234 if len(x) >= 2 and len(y) >= 2 and x[0] == y[1] and x[1] == y[0]:
235 # Correct a transposition.
236 t = distance(x[2:], y[2:]) + 1
237 if d > t:
238 d = t
239
240 memo[x, y] = d
241 return d
242 return distance(a, b)
243
244
245def text_wrap(
246 text: str,
247 length: Optional[int] = None,
248 indent: str = '',
249 firstline_indent: Optional[str] = None,
250) -> str:
251 """Wraps a given text to a maximum line length and returns it.
252
253 It turns lines that only contain whitespace into empty lines, keeps new lines,
254 and expands tabs using 4 spaces.
255
256 Args:
257 text: str, text to wrap.
258 length: int, maximum length of a line, includes indentation.
259 If this is None then use get_help_width()
260 indent: str, indent for all but first line.
261 firstline_indent: str, indent for first line; if None, fall back to indent.
262
263 Returns:
264 str, the wrapped text.
265
266 Raises:
267 ValueError: Raised if indent or firstline_indent not shorter than length.
268 """
269 # Get defaults where callee used None
270 if length is None:
271 length = get_help_width()
272 if indent is None:
273 indent = ''
274 if firstline_indent is None:
275 firstline_indent = indent
276
277 if len(indent) >= length:
278 raise ValueError('Length of indent exceeds length')
279 if len(firstline_indent) >= length:
280 raise ValueError('Length of first line indent exceeds length')
281
282 text = text.expandtabs(4)
283
284 result = []
285 # Create one wrapper for the first paragraph and one for subsequent
286 # paragraphs that does not have the initial wrapping.
287 wrapper = textwrap.TextWrapper(
288 width=length, initial_indent=firstline_indent, subsequent_indent=indent)
289 subsequent_wrapper = textwrap.TextWrapper(
290 width=length, initial_indent=indent, subsequent_indent=indent)
291
292 # textwrap does not have any special treatment for newlines. From the docs:
293 # "...newlines may appear in the middle of a line and cause strange output.
294 # For this reason, text should be split into paragraphs (using
295 # str.splitlines() or similar) which are wrapped separately."
296 for paragraph in (p.strip() for p in text.splitlines()):
297 if paragraph:
298 result.extend(wrapper.wrap(paragraph))
299 else:
300 result.append('') # Keep empty lines.
301 # Replace initial wrapper with wrapper for subsequent paragraphs.
302 wrapper = subsequent_wrapper
303
304 return '\n'.join(result)
305
306
307def flag_dict_to_args(
308 flag_map: Dict[str, Any], multi_flags: Optional[Set[str]] = None
309) -> Iterable[str]:
310 """Convert a dict of values into process call parameters.
311
312 This method is used to convert a dictionary into a sequence of parameters
313 for a binary that parses arguments using this module.
314
315 Args:
316 flag_map: dict, a mapping where the keys are flag names (strings).
317 values are treated according to their type:
318
319 * If value is ``None``, then only the name is emitted.
320 * If value is ``True``, then only the name is emitted.
321 * If value is ``False``, then only the name prepended with 'no' is
322 emitted.
323 * If value is a string then ``--name=value`` is emitted.
324 * If value is a collection, this will emit
325 ``--name=value1,value2,value3``, unless the flag name is in
326 ``multi_flags``, in which case this will emit
327 ``--name=value1 --name=value2 --name=value3``.
328 * Everything else is converted to string an passed as such.
329
330 multi_flags: set, names (strings) of flags that should be treated as
331 multi-flags.
332 Yields:
333 sequence of string suitable for a subprocess execution.
334 """
335 for key, value in flag_map.items():
336 if value is None:
337 yield '--%s' % key
338 elif isinstance(value, bool):
339 if value:
340 yield '--%s' % key
341 else:
342 yield '--no%s' % key
343 elif isinstance(value, (bytes, str)):
344 # We don't want strings to be handled like python collections.
345 yield '--%s=%s' % (key, value) # type: ignore[str-bytes-safe]
346 else:
347 # Now we attempt to deal with collections.
348 try:
349 if multi_flags and key in multi_flags:
350 for item in value:
351 yield '--%s=%s' % (key, str(item))
352 else:
353 yield '--%s=%s' % (key, ','.join(str(item) for item in value))
354 except TypeError:
355 # Default case.
356 yield '--%s=%s' % (key, value)
357
358
359def trim_docstring(docstring: str) -> str:
360 """Removes indentation from triple-quoted strings.
361
362 This is the function specified in PEP 257 to handle docstrings:
363 https://www.python.org/dev/peps/pep-0257/.
364
365 Args:
366 docstring: str, a python docstring.
367
368 Returns:
369 str, docstring with indentation removed.
370 """
371 if not docstring:
372 return ''
373
374 # If you've got a line longer than this you have other problems...
375 max_indent = 1 << 29
376
377 # Convert tabs to spaces (following the normal Python rules)
378 # and split into a list of lines:
379 lines = docstring.expandtabs().splitlines()
380
381 # Determine minimum indentation (first line doesn't count):
382 indent = max_indent
383 for line in lines[1:]:
384 stripped = line.lstrip()
385 if stripped:
386 indent = min(indent, len(line) - len(stripped))
387 # Remove indentation (first line is special):
388 trimmed = [lines[0].strip()]
389 if indent < max_indent:
390 for line in lines[1:]:
391 trimmed.append(line[indent:].rstrip())
392 # Strip off trailing and leading blank lines:
393 while trimmed and not trimmed[-1]:
394 trimmed.pop()
395 while trimmed and not trimmed[0]:
396 trimmed.pop(0)
397 # Return a single string:
398 return '\n'.join(trimmed)
399
400
401def doc_to_help(doc: str) -> str:
402 """Takes a __doc__ string and reformats it as help."""
403
404 # Get rid of starting and ending white space. Using lstrip() or even
405 # strip() could drop more than maximum of first line and right space
406 # of last line.
407 doc = doc.strip()
408
409 # Get rid of all empty lines.
410 whitespace_only_line = re.compile('^[ \t]+$', re.M)
411 doc = whitespace_only_line.sub('', doc)
412
413 # Cut out common space at line beginnings.
414 doc = trim_docstring(doc)
415
416 # Just like this module's comment, comments tend to be aligned somehow.
417 # In other words they all start with the same amount of white space.
418 # 1) keep double new lines;
419 # 2) keep ws after new lines if not empty line;
420 # 3) all other new lines shall be changed to a space;
421 # Solution: Match new lines between non white space and replace with space.
422 doc = re.sub(r'(?<=\S)\n(?=\S)', ' ', doc, flags=re.M)
423
424 return doc