1# :Id: $Id$
2# :Copyright: © 2011, 2017 Günter Milde.
3# :License: Released under the terms of the `2-Clause BSD license`_, in short:
4#
5# Copying and distribution of this file, with or without modification,
6# are permitted in any medium without royalty provided the copyright
7# notice and this notice are preserved.
8# This file is offered as-is, without any warranty.
9#
10# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
11#
12# This file is generated by
13# ``docutils/tools/dev/generate_punctuation_chars.py``.
14# ::
15
16"""Docutils character category patterns.
17
18 Patterns for the implementation of the `inline markup recognition rules`_
19 in the reStructuredText parser `docutils.parsers.rst.states.py` based
20 on Unicode character categories.
21 The patterns are used inside ``[ ]`` in regular expressions.
22
23 Rule (5) requires determination of matching open/close pairs. However, the
24 pairing of open/close quotes is ambiguous due to different typographic
25 conventions in different languages. The ``quote_pairs`` function tests
26 whether two characters form an open/close pair.
27
28 The patterns are generated by
29 ``docutils/tools/dev/generate_punctuation_chars.py`` to prevent dependence
30 on the Python version and avoid the time-consuming generation with every
31 Docutils run. See there for motives and implementation details.
32
33 The category of some characters changed with the development of the
34 Unicode standard. The current lists are generated with the help of the
35 "unicodedata" module of Python 2.7.13 (based on Unicode version 5.2.0).
36
37 .. _inline markup recognition rules:
38 https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html
39 #inline-markup-recognition-rules
40"""
41
42openers = (
43 '"\'(<\\[{\u0f3a\u0f3c\u169b\u2045\u207d\u208d\u2329\u2768'
44 '\u276a\u276c\u276e\u2770\u2772\u2774\u27c5\u27e6\u27e8\u27ea'
45 '\u27ec\u27ee\u2983\u2985\u2987\u2989\u298b\u298d\u298f\u2991'
46 '\u2993\u2995\u2997\u29d8\u29da\u29fc\u2e22\u2e24\u2e26\u2e28'
47 '\u3008\u300a\u300c\u300e\u3010\u3014\u3016\u3018\u301a\u301d'
48 '\u301d\ufd3e\ufe17\ufe35\ufe37\ufe39\ufe3b\ufe3d\ufe3f\ufe41'
49 '\ufe43\ufe47\ufe59\ufe5b\ufe5d\uff08\uff3b\uff5b\uff5f\uff62'
50 '\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c\u2e1c\u2e20'
51 '\u201a\u201e\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d'
52 '\u2e1d\u2e21\u201b\u201f'
53 )
54closers = (
55 '"\')>\\]}\u0f3b\u0f3d\u169c\u2046\u207e\u208e\u232a\u2769'
56 '\u276b\u276d\u276f\u2771\u2773\u2775\u27c6\u27e7\u27e9\u27eb'
57 '\u27ed\u27ef\u2984\u2986\u2988\u298a\u298c\u298e\u2990\u2992'
58 '\u2994\u2996\u2998\u29d9\u29db\u29fd\u2e23\u2e25\u2e27\u2e29'
59 '\u3009\u300b\u300d\u300f\u3011\u3015\u3017\u3019\u301b\u301e'
60 '\u301f\ufd3f\ufe18\ufe36\ufe38\ufe3a\ufe3c\ufe3e\ufe40\ufe42'
61 '\ufe44\ufe48\ufe5a\ufe5c\ufe5e\uff09\uff3d\uff5d\uff60\uff63'
62 '\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d\u2e1d\u2e21'
63 '\u201b\u201f\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c'
64 '\u2e1c\u2e20\u201a\u201e'
65 )
66delimiters = (
67 '\\-/:\u058a\xa1\xb7\xbf\u037e\u0387\u055a-\u055f\u0589'
68 '\u05be\u05c0\u05c3\u05c6\u05f3\u05f4\u0609\u060a\u060c'
69 '\u060d\u061b\u061e\u061f\u066a-\u066d\u06d4\u0700-\u070d'
70 '\u07f7-\u07f9\u0830-\u083e\u0964\u0965\u0970\u0df4\u0e4f'
71 '\u0e5a\u0e5b\u0f04-\u0f12\u0f85\u0fd0-\u0fd4\u104a-\u104f'
72 '\u10fb\u1361-\u1368\u1400\u166d\u166e\u16eb-\u16ed\u1735'
73 '\u1736\u17d4-\u17d6\u17d8-\u17da\u1800-\u180a\u1944\u1945'
74 '\u19de\u19df\u1a1e\u1a1f\u1aa0-\u1aa6\u1aa8-\u1aad\u1b5a-'
75 '\u1b60\u1c3b-\u1c3f\u1c7e\u1c7f\u1cd3\u2010-\u2017\u2020-'
76 '\u2027\u2030-\u2038\u203b-\u203e\u2041-\u2043\u2047-'
77 '\u2051\u2053\u2055-\u205e\u2cf9-\u2cfc\u2cfe\u2cff\u2e00'
78 '\u2e01\u2e06-\u2e08\u2e0b\u2e0e-\u2e1b\u2e1e\u2e1f\u2e2a-'
79 '\u2e2e\u2e30\u2e31\u3001-\u3003\u301c\u3030\u303d\u30a0'
80 '\u30fb\ua4fe\ua4ff\ua60d-\ua60f\ua673\ua67e\ua6f2-\ua6f7'
81 '\ua874-\ua877\ua8ce\ua8cf\ua8f8-\ua8fa\ua92e\ua92f\ua95f'
82 '\ua9c1-\ua9cd\ua9de\ua9df\uaa5c-\uaa5f\uaade\uaadf\uabeb'
83 '\ufe10-\ufe16\ufe19\ufe30-\ufe32\ufe45\ufe46\ufe49-\ufe4c'
84 '\ufe50-\ufe52\ufe54-\ufe58\ufe5f-\ufe61\ufe63\ufe68\ufe6a'
85 '\ufe6b\uff01-\uff03\uff05-\uff07\uff0a\uff0c-\uff0f\uff1a'
86 '\uff1b\uff1f\uff20\uff3c\uff61\uff64\uff65'
87 '\U00010100\U00010101\U0001039f\U000103d0\U00010857'
88 '\U0001091f\U0001093f\U00010a50-\U00010a58\U00010a7f'
89 '\U00010b39-\U00010b3f\U000110bb\U000110bc\U000110be-'
90 '\U000110c1\U00012470-\U00012473'
91 )
92closing_delimiters = r'\\.,;!?'
93
94
95# Matching open/close quotes
96# --------------------------
97
98# Matching open/close pairs are at the same position in
99# `punctuation_chars.openers` and `punctuation_chars.closers`.
100# Additional matches (due to different typographic conventions
101# in different languages) are stored in `quote_pairs`.
102
103quote_pairs = {
104 # open char: matching closing characters # use case
105 '\xbb': '\xbb', # » » Swedish
106 '\u2018': '\u201a', # ‘ ‚ Albanian/Greek/Turkish
107 '\u2019': '\u2019', # ’ ’ Swedish
108 '\u201a': '\u2018\u2019', # ‚ ‘ German, ‚ ’ Polish
109 '\u201c': '\u201e', # “ „ Albanian/Greek/Turkish
110 '\u201e': '\u201c\u201d', # „ “ German, „ ” Polish
111 '\u201d': '\u201d', # ” ” Swedish
112 '\u203a': '\u203a', # › › Swedish
113 }
114"""Additional open/close quote pairs."""
115
116
117def match_chars(c1, c2):
118 """Test whether `c1` and `c2` are a matching open/close character pair."""
119 try:
120 i = openers.index(c1)
121 except ValueError: # c1 not in openers
122 return False
123 return c2 == closers[i] or c2 in quote_pairs.get(c1, '')