Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nameparser/config/regexes.py: 75%
8 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:08 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:08 +0000
1# -*- coding: utf-8 -*-
2from __future__ import unicode_literals
3import re
5# emoji regex from https://stackoverflow.com/questions/26568722/remove-unicode-emoji-using-re-in-python
6try:
7 # Wide UCS-4 build
8 re_emoji = re.compile('['
9 '\U0001F300-\U0001F64F'
10 '\U0001F680-\U0001F6FF'
11 '\u2600-\u26FF\u2700-\u27BF]+',
12 re.UNICODE)
13except re.error:
14 # Narrow UCS-2 build
15 re_emoji = re.compile('('
16 '\ud83c[\udf00-\udfff]|'
17 '\ud83d[\udc00-\ude4f\ude80-\udeff]|'
18 '[\u2600-\u26FF\u2700-\u27BF])+',
19 re.UNICODE)
21REGEXES = set([
22 ("spaces", re.compile(r"\s+", re.U)),
23 ("word", re.compile(r"(\w|\.)+", re.U)),
24 ("mac", re.compile(r'^(ma?c)(\w{2,})', re.I | re.U)),
25 ("initial", re.compile(r'^(\w\.|[A-Z])?$', re.U)),
26 ("quoted_word", re.compile(r'(?<!\w)\'([^\s]*?)\'(?!\w)', re.U)),
27 ("double_quotes", re.compile(r'\"(.*?)\"', re.U)),
28 ("parenthesis", re.compile(r'\((.*?)\)', re.U)),
29 ("roman_numeral", re.compile(r'^(X|IX|IV|V?I{0,3})$', re.I | re.U)),
30 ("no_vowels",re.compile(r'^[^aeyiuo]+$', re.I | re.U)),
31 ("period_not_at_end",re.compile(r'.*\..+$', re.I | re.U)),
32 ("emoji",re_emoji),
33 ("phd", re.compile(r'\s(ph\.?\s+d\.?)', re.I | re.U)),
34])
35"""
36All regular expressions used by the parser are precompiled and stored in the config.
37"""