Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyparsing/util.py: 40%

1# util.py

2import contextlib

3import re

4from functools import lru_cache, wraps

5import inspect

6import itertools

7import types

8from typing import Callable, Union, Iterable, TypeVar, cast, Any

9import warnings

11from .warnings import PyparsingDeprecationWarning, PyparsingDiagnosticWarning

13_bslash = chr(92)

14C = TypeVar("C", bound=Callable)

17class __config_flags:

18 """Internal class for defining compatibility and debugging flags"""

20 _all_names: list[str] = []

21 _fixed_names: list[str] = []

22 _type_desc = "configuration"

24 @classmethod

25 def _set(cls, dname, value):

26 if dname in cls._fixed_names:

27 warnings.warn(

28 f"{cls.__name__}.{dname} {cls._type_desc} is {str(getattr(cls, dname)).upper()}"

29 f" and cannot be overridden",

30 PyparsingDiagnosticWarning,

31 stacklevel=3,

32 )

33 return

34 if dname in cls._all_names:

35 setattr(cls, dname, value)

36 else:

37 raise ValueError(f"no such {cls._type_desc} {dname!r}")

39 enable = classmethod(lambda cls, name: cls._set(name, True))

40 disable = classmethod(lambda cls, name: cls._set(name, False))

43@lru_cache(maxsize=128)

44def col(loc: int, strg: str) -> int:

45 """

46 Returns current column within a string, counting newlines as line separators.

47 The first column is number 1.

49 Note: the default parsing behavior is to expand tabs in the input string

50 before starting the parsing process. See

51 :meth:`ParserElement.parse_string` for more

52 information on parsing strings containing ``<TAB>`` s, and suggested

53 methods to maintain a consistent view of the parsed string, the parse

54 location, and line and column positions within the parsed string.

55 """

56 s = strg

57 return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc)

60@lru_cache(maxsize=128)

61def lineno(loc: int, strg: str) -> int:

62 """Returns current line number within a string, counting newlines as line separators.

63 The first line is number 1.

65 Note - the default parsing behavior is to expand tabs in the input string

66 before starting the parsing process. See :meth:`ParserElement.parse_string`

67 for more information on parsing strings containing ``<TAB>`` s, and

68 suggested methods to maintain a consistent view of the parsed string, the

69 parse location, and line and column positions within the parsed string.

70 """

71 return strg.count("\n", 0, loc) + 1

74@lru_cache(maxsize=128)

75def line(loc: int, strg: str) -> str:

76 """

77 Returns the line of text containing loc within a string, counting newlines as line separators.

78 """

79 last_cr = strg.rfind("\n", 0, loc)

80 next_cr = strg.find("\n", loc)

81 return strg[last_cr + 1 : next_cr] if next_cr >= 0 else strg[last_cr + 1 :]

84class _UnboundedCache:

85 def __init__(self):

86 cache = {}

87 cache_get = cache.get

88 self.not_in_cache = not_in_cache = object()

90 def get(_, key):

91 return cache_get(key, not_in_cache)

93 def set_(_, key, value):

94 cache[key] = value

96 def clear(_):

97 cache.clear()

99 self.size = None

100 self.get = types.MethodType(get, self)

101 self.set = types.MethodType(set_, self)

102 self.clear = types.MethodType(clear, self)

103

104

105class _FifoCache:

106 def __init__(self, size):

107 cache = {}

108 self.size = size

109 self.not_in_cache = not_in_cache = object()

110 cache_get = cache.get

111 cache_pop = cache.pop

112

113 def get(_, key):

114 return cache_get(key, not_in_cache)

115

116 def set_(_, key, value):

117 cache[key] = value

118 while len(cache) > size:

119 # pop oldest element in cache by getting the first key

120 cache_pop(next(iter(cache)))

121

122 def clear(_):

123 cache.clear()

124

125 self.get = types.MethodType(get, self)

126 self.set = types.MethodType(set_, self)

127 self.clear = types.MethodType(clear, self)

128

129

130class LRUMemo:

131 """

132 A memoizing mapping that retains `capacity` deleted items

133

134 The memo tracks retained items by their access order; once `capacity` items

135 are retained, the least recently used item is discarded.

136 """

137

138 def __init__(self, capacity):

139 self._capacity = capacity

140 self._active = {}

141 self._memory = {}

142

143 def __getitem__(self, key):

144 try:

145 return self._active[key]

146 except KeyError:

147 self._memory[key] = self._memory.pop(key)

148 return self._memory[key]

149

150 def __setitem__(self, key, value):

151 self._memory.pop(key, None)

152 self._active[key] = value

153

154 def __delitem__(self, key):

155 try:

156 value = self._active.pop(key)

157 except KeyError:

158 pass

159 else:

160 oldest_keys = list(self._memory)[: -(self._capacity + 1)]

161 for key_to_delete in oldest_keys:

162 self._memory.pop(key_to_delete)

163 self._memory[key] = value

164

165 def clear(self):

166 self._active.clear()

167 self._memory.clear()

168

169

170class UnboundedMemo(dict):

171 """

172 A memoizing mapping that retains all deleted items

173 """

174

175 def __delitem__(self, key):

176 pass

177

178

179def _escape_regex_range_chars(s: str) -> str:

180 # escape these chars: ^-[]

181 for c in r"\^-[]":

182 s = s.replace(c, _bslash + c)

183 s = s.replace("\n", r"\n")

184 s = s.replace("\t", r"\t")

185 return str(s)

186

187

188class _GroupConsecutive:

189 """

190 Used as a callable `key` for itertools.groupby to group

191 characters that are consecutive:

192

193 .. testcode::

194

195 from itertools import groupby

196 from pyparsing.util import _GroupConsecutive

197

198 grouped = groupby("abcdejkmpqrs", key=_GroupConsecutive())

199 for index, group in grouped:

200 print(tuple([index, list(group)]))

201

202 prints:

203

204 .. testoutput::

205

206 (0, ['a', 'b', 'c', 'd', 'e'])

207 (1, ['j', 'k'])

208 (2, ['m'])

209 (3, ['p', 'q', 'r', 's'])

210 """

211

212 def __init__(self) -> None:

213 self.prev = 0

214 self.counter = itertools.count()

215 self.value = -1

216

217 def __call__(self, char: str) -> int:

218 c_int = ord(char)

219 self.prev, prev = c_int, self.prev

220 if c_int - prev > 1:

221 self.value = next(self.counter)

222 return self.value

223

224

225def _is_iterable(obj, _str_type=(str, bytes), _iter_exception=Exception):

226 # str's are iterable, but in pyparsing, we don't want to iterate over them

227 if isinstance(obj, _str_type):

228 return False

229

230 try:

231 iter(obj)

232 except _iter_exception: # noqa

233 return False

234 else:

235 return True

236

237

238def _escape_re_range_char(c: str) -> str:

239 return "\\" + c if c in r"\^-][" else c

240

241

242def _collapse_string_to_ranges(

243 s: Union[str, Iterable[str]], re_escape: bool = True

244) -> str:

245 r"""

246 Take a string or list of single-character strings, and return

247 a string of the consecutive characters in that string collapsed

248 into groups, as might be used in a regular expression '[a-z]'

249 character set::

250

251 'a' -> 'a' -> '[a]'

252 'bc' -> 'bc' -> '[bc]'

253 'defgh' -> 'd-h' -> '[d-h]'

254 'fdgeh' -> 'd-h' -> '[d-h]'

255 'jklnpqrtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]'

256

257 Duplicates get collapsed out::

258

259 'aaa' -> 'a' -> '[a]'

260 'bcbccb' -> 'bc' -> '[bc]'

261 'defghhgf' -> 'd-h' -> '[d-h]'

262 'jklnpqrjjjtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]'

263

264 Spaces are preserved::

265

266 'ab c' -> ' a-c' -> '[ a-c]'

267

268 Characters that are significant when defining regex ranges

269 get escaped::

270

271 'acde[]-' -> r'\-\[\]ac-e' -> r'[\-\[\]ac-e]'

272 """

273

274 # Developer notes:

275 # - Do not optimize this code assuming that the given input string

276 # or internal lists will be short (such as in loading generators into

277 # lists to make it easier to find the last element); this method is also

278 # used to generate regex ranges for character sets in the pyparsing.unicode

279 # classes, and these can be _very_ long lists of strings

280

281 escape_re_range_char: Callable[[str], str]

282 if re_escape:

283 escape_re_range_char = _escape_re_range_char

284 else:

285 escape_re_range_char = lambda ss: ss

286

287 ret = []

288

289 # reduce input string to remove duplicates, and put in sorted order

290 s_chars: list[str] = sorted(set(s))

291

292 if len(s_chars) > 2:

293 # find groups of characters that are consecutive (can be collapsed

294 # down to "<first>-<last>")

295 for _, chars in itertools.groupby(s_chars, key=_GroupConsecutive()):

296 # _ is unimportant, is just used to identify groups

297 # chars is an iterator of one or more consecutive characters

298 # that comprise the current group

299 first = last = next(chars)

300 with contextlib.suppress(ValueError):

301 *_, last = chars

302

303 if first == last:

304 # there was only a single char in this group

305 ret.append(escape_re_range_char(first))

306

307 elif last == chr(ord(first) + 1):

308 # there were only 2 characters in this group

309 # 'a','b' -> 'ab'

310 ret.append(f"{escape_re_range_char(first)}{escape_re_range_char(last)}")

311

312 else:

313 # there were > 2 characters in this group, make into a range

314 # 'c','d','e' -> 'c-e'

315 ret.append(

316 f"{escape_re_range_char(first)}-{escape_re_range_char(last)}"

317 )

318 else:

319 # only 1 or 2 chars were given to form into groups

320 # 'a' -> ['a']

321 # 'bc' -> ['b', 'c']

322 # 'dg' -> ['d', 'g']

323 # no need to list them with "-", just return as a list

324 # (after escaping)

325 ret = [escape_re_range_char(c) for c in s_chars]

326

327 return "".join(ret)

328

329

330def _flatten(ll: Iterable) -> list:

331 ret = []

332 for i in ll:

333 # Developer notes:

334 # - do not collapse this section of code, isinstance checks are done

335 # in optimal order

336 if isinstance(i, str):

337 ret.append(i)

338 elif isinstance(i, Iterable):

339 ret.extend(_flatten(i))

340 else:

341 ret.append(i)

342 return ret

343

344

345def _convert_escaped_numerics_to_char(s: str) -> str:

346 if s == "0":

347 return "\0"

348 if s.isdigit() and len(s) == 3:

349 return chr(int(s, 8))

350 elif s.startswith(("u", "x")):

351 return chr(int(s[1:], 16))

352 return s

353

354

355def make_compressed_re(

356 word_list: Iterable[str],

357 max_level: int = 2,

358 *,

359 non_capturing_groups: bool = True,

360 _level: int = 1,

361) -> str:

362 """

363 Create a regular expression string from a list of words, collapsing by common

364 prefixes and optional suffixes.

365

366 Calls itself recursively to build nested sublists for each group of suffixes

367 that have a shared prefix.

368 """

369

370 def get_suffixes_from_common_prefixes(namelist: list[str]):

371 if len(namelist) > 1:

372 for prefix, suffixes in itertools.groupby(namelist, key=lambda s: s[:1]):

373 yield prefix, sorted([s[1:] for s in suffixes], key=len, reverse=True)

374 else:

375 yield namelist[0][0], [namelist[0][1:]]

376

377 if _level == 1:

378 if not word_list:

379 raise ValueError("no words given to make_compressed_re()")

380

381 if "" in word_list:

382 raise ValueError("word list cannot contain empty string")

383 else:

384 # internal recursive call, just return empty string if no words

385 if not word_list:

386 return ""

387

388 # dedupe the word list

389 word_list = list({}.fromkeys(word_list))

390

391 if max_level == 0:

392 if any(len(wd) > 1 for wd in word_list):

393 return "|".join(

394 sorted([re.escape(wd) for wd in word_list], key=len, reverse=True)

395 )

396 else:

397 return f"[{''.join(_escape_regex_range_chars(wd) for wd in word_list)}]"

398

399 ret = []

400 sep = ""

401 ncgroup = "?:" if non_capturing_groups else ""

402

403 for initial, suffixes in get_suffixes_from_common_prefixes(sorted(word_list)):

404 ret.append(sep)

405 sep = "|"

406

407 initial = re.escape(initial)

408

409 trailing = ""

410 if "" in suffixes:

411 trailing = "?"

412 suffixes.remove("")

413

414 if len(suffixes) > 1:

415 if all(len(s) == 1 for s in suffixes):

416 ret.append(

417 f"{initial}[{''.join(_escape_regex_range_chars(s) for s in suffixes)}]{trailing}"

418 )

419 else:

420 if _level < max_level:

421 suffix_re = make_compressed_re(

422 sorted(suffixes),

423 max_level,

424 non_capturing_groups=non_capturing_groups,

425 _level=_level + 1,

426 )

427 ret.append(f"{initial}({ncgroup}{suffix_re}){trailing}")

428 else:

429 if all(len(s) == 1 for s in suffixes):

430 ret.append(

431 f"{initial}[{''.join(_escape_regex_range_chars(s) for s in suffixes)}]{trailing}"

432 )

433 else:

434 suffixes.sort(key=len, reverse=True)

435 ret.append(

436 f"{initial}({ncgroup}{'|'.join(re.escape(s) for s in suffixes)}){trailing}"

437 )

438 else:

439 if suffixes:

440 suffix = re.escape(suffixes[0])

441 if len(suffix) > 1 and trailing:

442 ret.append(f"{initial}({ncgroup}{suffix}){trailing}")

443 else:

444 ret.append(f"{initial}{suffix}{trailing}")

445 else:

446 ret.append(initial)

447 return "".join(ret)

448

449

450def replaced_by_pep8(compat_name: str, fn: C) -> C:

451

452 # Unwrap staticmethod/classmethod

453 fn = getattr(fn, "__func__", fn)

454

455 # (Presence of 'self' arg in signature is used by explain_exception() methods, so we take

456 # some extra steps to add it if present in decorated function.)

457 if ["self"] == list(inspect.signature(fn).parameters)[:1]:

458

459 @wraps(fn)

460 def _inner(self, *args, **kwargs):

461 warnings.warn(

462 f"{compat_name!r} deprecated - use {fn.__name__!r}",

463 PyparsingDeprecationWarning,

464 stacklevel=2,

465 )

466 return fn(self, *args, **kwargs)

467

468 else:

469

470 @wraps(fn)

471 def _inner(*args, **kwargs):

472 warnings.warn(

473 f"{compat_name!r} deprecated - use {fn.__name__!r}",

474 PyparsingDeprecationWarning,

475 stacklevel=2,

476 )

477 return fn(*args, **kwargs)

478

479 _inner.__doc__ = f"""

480 .. deprecated:: 3.0.0

481 Use :class:`{fn.__name__}` instead

482 """

483 _inner.__name__ = compat_name

484 _inner.__annotations__ = fn.__annotations__

485 if isinstance(fn, types.FunctionType):

486 _inner.__kwdefaults__ = fn.__kwdefaults__ # type: ignore [attr-defined]

487 elif isinstance(fn, type) and hasattr(fn, "__init__"):

488 _inner.__kwdefaults__ = fn.__init__.__kwdefaults__ # type: ignore [misc,attr-defined]

489 else:

490 _inner.__kwdefaults__ = None # type: ignore [attr-defined]

491 _inner.__qualname__ = fn.__qualname__

492 return cast(C, _inner)

493

494

495def _to_pep8_name(s: str, _re_sub_pattern=re.compile(r"([a-z])([A-Z])")) -> str:

496 s = _re_sub_pattern.sub(r"\1_\2", s)

497 return s.lower()

498

499

500def deprecate_argument(

501 kwargs: dict[str, Any], arg_name: str, default_value=None, *, new_name: str = ""

502) -> Any:

503

504 if arg_name in kwargs:

505 new_name = new_name or _to_pep8_name(arg_name)

506 warnings.warn(

507 f"{arg_name!r} argument is deprecated, use {new_name!r}",

508 category=PyparsingDeprecationWarning,

509 stacklevel=3,

510 )

511 else:

512 kwargs[arg_name] = default_value

513

514 return kwargs[arg_name]