1"""Regexps to match html elements"""
2
3import re
4
5attr_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
6
7unquoted = "[^\"'=<>`\\x00-\\x20]+"
8single_quoted = "'[^']*'"
9double_quoted = '"[^"]*"'
10
11attr_value = "(?:" + unquoted + "|" + single_quoted + "|" + double_quoted + ")"
12
13attribute = "(?:\\s+" + attr_name + "(?:\\s*=\\s*" + attr_value + ")?)"
14
15open_tag = "<[A-Za-z][A-Za-z0-9\\-]*" + attribute + "*\\s*\\/?>"
16
17close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>"
18comment = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->"
19processing = "<[?][\\s\\S]*?[?]>"
20declaration = "<![A-Z]+\\s+[^>]*>"
21cdata = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"
22
23HTML_TAG_RE = re.compile(
24 "^(?:"
25 + open_tag
26 + "|"
27 + close_tag
28 + "|"
29 + comment
30 + "|"
31 + processing
32 + "|"
33 + declaration
34 + "|"
35 + cdata
36 + ")"
37)
38HTML_OPEN_CLOSE_TAG_STR = "^(?:" + open_tag + "|" + close_tag + ")"
39HTML_OPEN_CLOSE_TAG_RE = re.compile(HTML_OPEN_CLOSE_TAG_STR)