Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/docutils/utils/punctuation_chars.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

12 statements  

1# :Id: $Id$ 

2# :Copyright: © 2011, 2017 Günter Milde. 

3# :License: Released under the terms of the `2-Clause BSD license`_, in short: 

4# 

5# Copying and distribution of this file, with or without modification, 

6# are permitted in any medium without royalty provided the copyright 

7# notice and this notice are preserved. 

8# This file is offered as-is, without any warranty. 

9# 

10# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause 

11# 

12# This file is generated by 

13# ``docutils/tools/dev/generate_punctuation_chars.py``. 

14# :: 

15 

16"""Docutils character category patterns. 

17 

18 Patterns for the implementation of the `inline markup recognition rules`_ 

19 in the reStructuredText parser `docutils.parsers.rst.states.py` based 

20 on Unicode character categories. 

21 The patterns are used inside ``[ ]`` in regular expressions. 

22 

23 Rule (5) requires determination of matching open/close pairs. However, the 

24 pairing of open/close quotes is ambiguous due to different typographic 

25 conventions in different languages. The ``quote_pairs`` function tests 

26 whether two characters form an open/close pair. 

27 

28 The patterns are generated by 

29 ``docutils/tools/dev/generate_punctuation_chars.py`` to prevent dependence 

30 on the Python version and avoid the time-consuming generation with every 

31 Docutils run. See there for motives and implementation details. 

32 

33 The category of some characters changed with the development of the 

34 Unicode standard. The current lists are generated with the help of the 

35 "unicodedata" module of Python 2.7.13 (based on Unicode version 5.2.0). 

36 

37 .. _inline markup recognition rules: 

38 https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html 

39 #inline-markup-recognition-rules 

40""" 

41 

42openers = ( 

43 '"\'(<\\[{\u0f3a\u0f3c\u169b\u2045\u207d\u208d\u2329\u2768' 

44 '\u276a\u276c\u276e\u2770\u2772\u2774\u27c5\u27e6\u27e8\u27ea' 

45 '\u27ec\u27ee\u2983\u2985\u2987\u2989\u298b\u298d\u298f\u2991' 

46 '\u2993\u2995\u2997\u29d8\u29da\u29fc\u2e22\u2e24\u2e26\u2e28' 

47 '\u3008\u300a\u300c\u300e\u3010\u3014\u3016\u3018\u301a\u301d' 

48 '\u301d\ufd3e\ufe17\ufe35\ufe37\ufe39\ufe3b\ufe3d\ufe3f\ufe41' 

49 '\ufe43\ufe47\ufe59\ufe5b\ufe5d\uff08\uff3b\uff5b\uff5f\uff62' 

50 '\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c\u2e1c\u2e20' 

51 '\u201a\u201e\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d' 

52 '\u2e1d\u2e21\u201b\u201f' 

53 ) 

54closers = ( 

55 '"\')>\\]}\u0f3b\u0f3d\u169c\u2046\u207e\u208e\u232a\u2769' 

56 '\u276b\u276d\u276f\u2771\u2773\u2775\u27c6\u27e7\u27e9\u27eb' 

57 '\u27ed\u27ef\u2984\u2986\u2988\u298a\u298c\u298e\u2990\u2992' 

58 '\u2994\u2996\u2998\u29d9\u29db\u29fd\u2e23\u2e25\u2e27\u2e29' 

59 '\u3009\u300b\u300d\u300f\u3011\u3015\u3017\u3019\u301b\u301e' 

60 '\u301f\ufd3f\ufe18\ufe36\ufe38\ufe3a\ufe3c\ufe3e\ufe40\ufe42' 

61 '\ufe44\ufe48\ufe5a\ufe5c\ufe5e\uff09\uff3d\uff5d\uff60\uff63' 

62 '\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d\u2e1d\u2e21' 

63 '\u201b\u201f\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c' 

64 '\u2e1c\u2e20\u201a\u201e' 

65 ) 

66delimiters = ( 

67 '\\-/:\u058a\xa1\xb7\xbf\u037e\u0387\u055a-\u055f\u0589' 

68 '\u05be\u05c0\u05c3\u05c6\u05f3\u05f4\u0609\u060a\u060c' 

69 '\u060d\u061b\u061e\u061f\u066a-\u066d\u06d4\u0700-\u070d' 

70 '\u07f7-\u07f9\u0830-\u083e\u0964\u0965\u0970\u0df4\u0e4f' 

71 '\u0e5a\u0e5b\u0f04-\u0f12\u0f85\u0fd0-\u0fd4\u104a-\u104f' 

72 '\u10fb\u1361-\u1368\u1400\u166d\u166e\u16eb-\u16ed\u1735' 

73 '\u1736\u17d4-\u17d6\u17d8-\u17da\u1800-\u180a\u1944\u1945' 

74 '\u19de\u19df\u1a1e\u1a1f\u1aa0-\u1aa6\u1aa8-\u1aad\u1b5a-' 

75 '\u1b60\u1c3b-\u1c3f\u1c7e\u1c7f\u1cd3\u2010-\u2017\u2020-' 

76 '\u2027\u2030-\u2038\u203b-\u203e\u2041-\u2043\u2047-' 

77 '\u2051\u2053\u2055-\u205e\u2cf9-\u2cfc\u2cfe\u2cff\u2e00' 

78 '\u2e01\u2e06-\u2e08\u2e0b\u2e0e-\u2e1b\u2e1e\u2e1f\u2e2a-' 

79 '\u2e2e\u2e30\u2e31\u3001-\u3003\u301c\u3030\u303d\u30a0' 

80 '\u30fb\ua4fe\ua4ff\ua60d-\ua60f\ua673\ua67e\ua6f2-\ua6f7' 

81 '\ua874-\ua877\ua8ce\ua8cf\ua8f8-\ua8fa\ua92e\ua92f\ua95f' 

82 '\ua9c1-\ua9cd\ua9de\ua9df\uaa5c-\uaa5f\uaade\uaadf\uabeb' 

83 '\ufe10-\ufe16\ufe19\ufe30-\ufe32\ufe45\ufe46\ufe49-\ufe4c' 

84 '\ufe50-\ufe52\ufe54-\ufe58\ufe5f-\ufe61\ufe63\ufe68\ufe6a' 

85 '\ufe6b\uff01-\uff03\uff05-\uff07\uff0a\uff0c-\uff0f\uff1a' 

86 '\uff1b\uff1f\uff20\uff3c\uff61\uff64\uff65' 

87 '\U00010100\U00010101\U0001039f\U000103d0\U00010857' 

88 '\U0001091f\U0001093f\U00010a50-\U00010a58\U00010a7f' 

89 '\U00010b39-\U00010b3f\U000110bb\U000110bc\U000110be-' 

90 '\U000110c1\U00012470-\U00012473' 

91 ) 

92closing_delimiters = r'\\.,;!?' 

93 

94 

95# Matching open/close quotes 

96# -------------------------- 

97 

98# Matching open/close pairs are at the same position in 

99# `punctuation_chars.openers` and `punctuation_chars.closers`. 

100# Additional matches (due to different typographic conventions 

101# in different languages) are stored in `quote_pairs`. 

102 

103quote_pairs = { 

104 # open char: matching closing characters # use case 

105 '\xbb': '\xbb', # » » Swedish 

106 '\u2018': '\u201a', # ‘ ‚ Albanian/Greek/Turkish 

107 '\u2019': '\u2019', # ’ ’ Swedish 

108 '\u201a': '\u2018\u2019', # ‚ ‘ German, ‚ ’ Polish 

109 '\u201c': '\u201e', # “ „ Albanian/Greek/Turkish 

110 '\u201e': '\u201c\u201d', # „ “ German, „ ” Polish 

111 '\u201d': '\u201d', # ” ” Swedish 

112 '\u203a': '\u203a', # › › Swedish 

113 } 

114"""Additional open/close quote pairs.""" 

115 

116 

117def match_chars(c1, c2): 

118 """Test whether `c1` and `c2` are a matching open/close character pair.""" 

119 try: 

120 i = openers.index(c1) 

121 except ValueError: # c1 not in openers 

122 return False 

123 return c2 == closers[i] or c2 in quote_pairs.get(c1, '')