Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/html2text/config.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

46 statements  

1import re 

2 

3# Use Unicode characters instead of their ascii pseudo-replacements 

4UNICODE_SNOB = False 

5 

6# Marker to use for marking tables for padding post processing 

7TABLE_MARKER_FOR_PAD = "special_marker_for_table_padding" 

8# Escape all special characters. Output is less readable, but avoids 

9# corner case formatting issues. 

10ESCAPE_SNOB = False 

11 

12# Put the links after each paragraph instead of at the end. 

13LINKS_EACH_PARAGRAPH = False 

14 

15# Wrap long lines at position. 0 for no wrapping. 

16BODY_WIDTH = 78 

17 

18# Don't show internal links (href="#local-anchor") -- corresponding link 

19# targets won't be visible in the plain text file anyway. 

20SKIP_INTERNAL_LINKS = True 

21 

22# Use inline, rather than reference, formatting for images and links 

23INLINE_LINKS = True 

24 

25# Protect links from line breaks surrounding them with angle brackets (in 

26# addition to their square brackets) 

27PROTECT_LINKS = False 

28# WRAP_LINKS = True 

29WRAP_LINKS = True 

30 

31# Wrap list items. 

32WRAP_LIST_ITEMS = False 

33 

34# Wrap tables 

35WRAP_TABLES = False 

36 

37# Number of pixels Google indents nested lists 

38GOOGLE_LIST_INDENT = 36 

39 

40# Values Google and others may use to indicate bold text 

41BOLD_TEXT_STYLE_VALUES = ("bold", "700", "800", "900") 

42 

43IGNORE_ANCHORS = False 

44IGNORE_MAILTO_LINKS = False 

45IGNORE_IMAGES = False 

46IMAGES_AS_HTML = False 

47IMAGES_TO_ALT = False 

48IMAGES_WITH_SIZE = False 

49IGNORE_EMPHASIS = False 

50MARK_CODE = False 

51BACKQUOTE_CODE_STYLE = False 

52DECODE_ERRORS = "strict" 

53DEFAULT_IMAGE_ALT = "" 

54PAD_TABLES = False 

55 

56# Convert links with same href and text to <href> format 

57# if they are absolute links 

58USE_AUTOMATIC_LINKS = True 

59 

60# For checking space-only lines on line 771 

61RE_SPACE = re.compile(r"\s\+") 

62 

63RE_ORDERED_LIST_MATCHER = re.compile(r"\d+\.\s") 

64RE_UNORDERED_LIST_MATCHER = re.compile(r"[-\*\+]\s") 

65RE_MD_CHARS_MATCHER = re.compile(r"([\\\[\]\(\)])") 

66RE_MD_CHARS_MATCHER_ALL = re.compile(r"([`\*_{}\[\]\(\)#!])") 

67 

68# to find links in the text 

69RE_LINK = re.compile(r"(\[.*?\] ?\(.*?\))|(\[.*?\]:.*?)") 

70 

71# to find table separators 

72RE_TABLE = re.compile(r" \| ") 

73 

74RE_MD_DOT_MATCHER = re.compile( 

75 r""" 

76 ^ # start of line 

77 (\s*\d+) # optional whitespace and a number 

78 (\.) # dot 

79 (?=\s) # lookahead assert whitespace 

80 """, 

81 re.MULTILINE | re.VERBOSE, 

82) 

83RE_MD_PLUS_MATCHER = re.compile( 

84 r""" 

85 ^ 

86 (\s*) 

87 (\+) 

88 (?=\s) 

89 """, 

90 flags=re.MULTILINE | re.VERBOSE, 

91) 

92RE_MD_DASH_MATCHER = re.compile( 

93 r""" 

94 ^ 

95 (\s*) 

96 (-) 

97 (?=\s|\-) # followed by whitespace (bullet list, or spaced out hr) 

98 # or another dash (header or hr) 

99 """, 

100 flags=re.MULTILINE | re.VERBOSE, 

101) 

102RE_SLASH_CHARS = r"\`*_{}[]()#+-.!" 

103RE_MD_BACKSLASH_MATCHER = re.compile( 

104 r""" 

105 (\\) # match one slash 

106 (?=[%s]) # followed by a char that requires escaping 

107 """ 

108 % re.escape(RE_SLASH_CHARS), 

109 flags=re.VERBOSE, 

110) 

111 

112UNIFIABLE = { 

113 "rsquo": "'", 

114 "lsquo": "'", 

115 "rdquo": '"', 

116 "ldquo": '"', 

117 "copy": "(C)", 

118 "mdash": "--", 

119 "nbsp": " ", 

120 "rarr": "->", 

121 "larr": "<-", 

122 "middot": "*", 

123 "ndash": "-", 

124 "oelig": "oe", 

125 "aelig": "ae", 

126 "agrave": "a", 

127 "aacute": "a", 

128 "acirc": "a", 

129 "atilde": "a", 

130 "auml": "a", 

131 "aring": "a", 

132 "egrave": "e", 

133 "eacute": "e", 

134 "ecirc": "e", 

135 "euml": "e", 

136 "igrave": "i", 

137 "iacute": "i", 

138 "icirc": "i", 

139 "iuml": "i", 

140 "ograve": "o", 

141 "oacute": "o", 

142 "ocirc": "o", 

143 "otilde": "o", 

144 "ouml": "o", 

145 "ugrave": "u", 

146 "uacute": "u", 

147 "ucirc": "u", 

148 "uuml": "u", 

149 "lrm": "", 

150 "rlm": "", 

151} 

152 

153# Format tables in HTML rather than Markdown syntax 

154BYPASS_TABLES = False 

155# Ignore table-related tags (table, th, td, tr) while keeping rows 

156IGNORE_TABLES = False 

157 

158 

159# Use a single line break after a block element rather than two line breaks. 

160# NOTE: Requires body width setting to be 0. 

161SINGLE_LINE_BREAK = False 

162 

163 

164# Use double quotation marks when converting the <q> tag. 

165OPEN_QUOTE = '"' 

166CLOSE_QUOTE = '"' 

167 

168# Include the <sup> and <sub> tags 

169INCLUDE_SUP_SUB = False