1"""Regexps to match html elements""" 
    2 
    3import re 
    4 
    5attr_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*" 
    6 
    7unquoted = "[^\"'=<>`\\x00-\\x20]+" 
    8single_quoted = "'[^']*'" 
    9double_quoted = '"[^"]*"' 
    10 
    11attr_value = "(?:" + unquoted + "|" + single_quoted + "|" + double_quoted + ")" 
    12 
    13attribute = "(?:\\s+" + attr_name + "(?:\\s*=\\s*" + attr_value + ")?)" 
    14 
    15open_tag = "<[A-Za-z][A-Za-z0-9\\-]*" + attribute + "*\\s*\\/?>" 
    16 
    17close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>" 
    18comment = "<!---?>|<!--(?:[^-]|-[^-]|--[^>])*-->" 
    19processing = "<[?][\\s\\S]*?[?]>" 
    20declaration = "<![A-Za-z][^>]*>" 
    21cdata = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>" 
    22 
    23HTML_TAG_RE = re.compile( 
    24    "^(?:" 
    25    + open_tag 
    26    + "|" 
    27    + close_tag 
    28    + "|" 
    29    + comment 
    30    + "|" 
    31    + processing 
    32    + "|" 
    33    + declaration 
    34    + "|" 
    35    + cdata 
    36    + ")" 
    37) 
    38HTML_OPEN_CLOSE_TAG_STR = "^(?:" + open_tag + "|" + close_tag + ")" 
    39HTML_OPEN_CLOSE_TAG_RE = re.compile(HTML_OPEN_CLOSE_TAG_STR)