Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nameparser/config/regexes.py: 75%

8 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:08 +0000

1# -*- coding: utf-8 -*- 

2from __future__ import unicode_literals 

3import re 

4 

5# emoji regex from https://stackoverflow.com/questions/26568722/remove-unicode-emoji-using-re-in-python 

6try: 

7 # Wide UCS-4 build 

8 re_emoji = re.compile('[' 

9 '\U0001F300-\U0001F64F' 

10 '\U0001F680-\U0001F6FF' 

11 '\u2600-\u26FF\u2700-\u27BF]+', 

12 re.UNICODE) 

13except re.error: 

14 # Narrow UCS-2 build 

15 re_emoji = re.compile('(' 

16 '\ud83c[\udf00-\udfff]|' 

17 '\ud83d[\udc00-\ude4f\ude80-\udeff]|' 

18 '[\u2600-\u26FF\u2700-\u27BF])+', 

19 re.UNICODE) 

20 

21REGEXES = set([ 

22 ("spaces", re.compile(r"\s+", re.U)), 

23 ("word", re.compile(r"(\w|\.)+", re.U)), 

24 ("mac", re.compile(r'^(ma?c)(\w{2,})', re.I | re.U)), 

25 ("initial", re.compile(r'^(\w\.|[A-Z])?$', re.U)), 

26 ("quoted_word", re.compile(r'(?<!\w)\'([^\s]*?)\'(?!\w)', re.U)), 

27 ("double_quotes", re.compile(r'\"(.*?)\"', re.U)), 

28 ("parenthesis", re.compile(r'\((.*?)\)', re.U)), 

29 ("roman_numeral", re.compile(r'^(X|IX|IV|V?I{0,3})$', re.I | re.U)), 

30 ("no_vowels",re.compile(r'^[^aeyiuo]+$', re.I | re.U)), 

31 ("period_not_at_end",re.compile(r'.*\..+$', re.I | re.U)), 

32 ("emoji",re_emoji), 

33 ("phd", re.compile(r'\s(ph\.?\s+d\.?)', re.I | re.U)), 

34]) 

35""" 

36All regular expressions used by the parser are precompiled and stored in the config. 

37"""