Your IP : 18.218.162.180
3
� fAE�@s�dZddlZddlZddlZddlmZdgZejd�Zejd�Z ejd�Z
ejd�Zejd �Zejd
�Z
ejd�Zejd�Zejd
�Zejdej�Zejd
�Zejd�ZGdd�dej�ZdS)zA parser for HTML and XHTML.�N)�unescape�
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]�>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF
<[a-zA-Z][^\t\n\r\f />\x00]* # tag name
(?:[\s/]* # optional whitespace before attribute name
(?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
(?:\s*=+\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|"[^"]*" # LIT-enclosed value
|(?!['"])[^>\s]* # bare value
)
(?:\s*,)* # possibly followed by a comma
)?(?:\s|/(?!>))*
)*
)?
\s* # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c@s�eZdZdZd:Zdd�dd�Zdd �Zd
d�Zdd
�ZdZ dd�Z
dd�Zdd�Zdd�Z
dd�Zd;dd�Zdd�Zdd�Zd d!�Zd"d#�Zd$d%�Zd&d'�Zd(d)�Zd*d+�Zd,d-�Zd.d/�Zd0d1�Zd2d3�Zd4d5�Zd6d7�Zd8d9�ZdS)<raEFind tags and other markup and call handler functions.
Usage:
p = HTMLParser()
p.feed(data)
...
p.close()
Start tags are handled by calling self.handle_starttag() or
self.handle_startendtag(); end tags by self.handle_endtag(). The
data between tags is passed from the parser to the derived class
by calling self.handle_data() with the data as argument (the data
may be split up in arbitrary chunks). If convert_charrefs is
True the character references are converted automatically to the
corresponding Unicode character (and self.handle_data() is no
longer split in chunks), otherwise they are passed by calling
self.handle_entityref() or self.handle_charref() with the string
containing respectively the named or numeric reference as the
argument.
�script�styleT)�convert_charrefscCs||_|j�dS)z�Initialize and reset this instance.
If convert_charrefs is True (the default), all character references
are automatically converted to the corresponding Unicode characters.
N)r�reset)�selfr�r
�0/opt/alt/python36/lib64/python3.6/html/parser.py�__init__WszHTMLParser.__init__cCs(d|_d|_t|_d|_tjj|�dS)z1Reset this instance. Loses all unprocessed data.�z???N)�rawdata�lasttag�interesting_normal�interesting�
cdata_elem�_markupbase�
ParserBaser)r r
r
rr`s
zHTMLParser.resetcCs|j||_|jd�dS)z�Feed data to the parser.
Call this as often as you want, with as little or as much text
as you want (may include '\n').
rN)r�goahead)r �datar
r
r�feedhszHTMLParser.feedcCs|jd�dS)zHandle any buffered data.�N)r)r r
r
r�closeqszHTMLParser.closeNcCs|jS)z)Return full source of start tag: '<...>'.)�_HTMLParser__starttag_text)r r
r
r�get_starttag_textwszHTMLParser.get_starttag_textcCs$|j�|_tjd|jtj�|_dS)Nz</\s*%s\s*>)�lowerr�re�compile�Ir)r �elemr
r
r�set_cdata_mode{s
zHTMLParser.set_cdata_modecCst|_d|_dS)N)rrr)r r
r
r�clear_cdata_modeszHTMLParser.clear_cdata_modecCsZ|j}d}t|�}�x�||k�r�|jr||jr||jd|�}|dkr�|jdt||d��}|dkrvtjd�j ||�rvP|}n(|j
j ||�}|r�|j�}n|jr�P|}||kr�|jr�|jr�|jt
|||���n|j|||��|j||�}||kr�P|j}|d|��rLtj||��r&|j|�} n�|d|��r>|j|�} nl|d|��rV|j|�} nT|d|��rn|j|�} n<|d |��r�|j|�} n$|d
|k�r�|jd�|d
} nP| dk�r>|�s�P|jd|d
�} | dk�r�|jd|d
�} | dk�r|d
} n| d
7} |j�r,|j�r,|jt
||| ���n|j||| ��|j|| �}q|d|��r�tj||�}|�r�|j�d
d�}
|j|
�|j�} |d| d
��s�| d
} |j|| �}qn:d||d�k�r�|j|||d
��|j||d
�}Pq|d|��r�tj||�}|�rP|jd
�}
|j|
�|j�} |d| d
��sB| d
} |j|| �}qtj||�}|�r�|�r�|j�||d�k�r�|j�} | |k�r�|} |j||d
�}Pn,|d
|k�r�|jd�|j||d
�}nPqdstd��qW|�rH||k�rH|j�rH|j�r*|j�r*|jt
|||���n|j|||��|j||�}||d�|_dS)Nr�<�&�"z[\s;]z</z<!--z<?z<!rrz&#��;zinteresting.search() lied���)r�lenrr�find�rfind�maxrr�searchr�start�handle_datarZ updatepos�
startswith�starttagopen�match�parse_starttag�parse_endtag�
parse_comment�parse_pi�parse_html_declaration�charref�group�handle_charref�end� entityref�handle_entityref�
incomplete�AssertionError)r r;r�i�n�jZampposr2r0�k�namer
r
rr�s�
zHTMLParser.goaheadcCs�|j}|||d�dks"td��|||d�dkr@|j|�S|||d�dkr^|j|�S|||d�j�d kr�|jd
|d�}|dkr�d
S|j||d|��|dS|j|�SdS)Nr&z<!z+unexpected call to parse_html_declaration()�z<!--�z<![� z <!doctyperrr(r()rr?r5Zparse_marked_sectionrr*�handle_decl�parse_bogus_comment)r r@r�gtposr
r
rr7s
z!HTMLParser.parse_html_declarationrcCs`|j}|||d�dks"td��|jd|d�}|dkr>d S|rX|j||d|��|dS)
Nr&�<!�</z"unexpected call to parse_comment()rr)rKrLr(r()rr?r*�handle_comment)r r@Zreportr�posr
r
rrIszHTMLParser.parse_bogus_commentcCsd|j}|||d�dks"td��tj||d�}|s:dS|j�}|j||d|��|j�}|S)Nr&z<?zunexpected call to parse_pi()rr()rr?�picloser-r.� handle_pir;)r r@rr2rBr
r
rr6!szHTMLParser.parse_picCs�d|_|j|�}|dkr|S|j}|||�|_g}tj||d�}|sPtd��|j�}|jd�j�|_ }x�||k�r0t
j||�}|s�P|jddd�\} }
}|
s�d}n^|dd�dko�|dd�knp�|dd�dko�|dd�kn�r|dd
�}|�rt|�}|j| j�|f�|j�}qnW|||�j
�}|dk�r�|j�\}
}d
|jk�r�|
|jjd
�}
t|j�|jjd
�}n|t|j�}|j|||��|S|jd ��r�|j||�n"|j||�||jk�r�|j|�|S)Nrrz#unexpected call to parse_starttag()r&rF�'�"r�/>�
r(r(r()rrS)r�check_for_whole_start_tagr�tagfind_tolerantr2r?r;r9rr�attrfind_tolerantr�append�stripZgetpos�countr)r+r/�endswith�handle_startendtag�handle_starttag�CDATA_CONTENT_ELEMENTSr!)r r@�endposr�attrsr2rC�tag�mZattrname�restZ attrvaluer;�lineno�offsetr
r
rr3-sR
(*
zHTMLParser.parse_starttagcCs�|j}tj||�}|r�|j�}|||d�}|dkr>|dS|dkr~|jd|�rZ|dS|jd|�rjd S||krv|S|dS|dkr�d
S|dkr�dS||kr�|S|dStd��dS)Nrr�/z/>r&r
z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!r(r(r()r�locatestarttagend_tolerantr2r;r0r?)r r@rrbrB�nextr
r
rrU`s.z$HTMLParser.check_for_whole_start_tagcCs2|j}|||d�dks"td��tj||d�}|s:dS|j�}tj||�}|s�|jdk rr|j|||��|St j||d�}|s�|||d�dkr�|dS|j
|�S|jd�j�}|j
d|j��}|j|�|dS|jd�j�}|jdk �r||jk�r|j|||��|S|j|j��|j�|S) Nr&z</zunexpected call to parse_endtagrrFz</>rr()rr?� endendtagr-r;�
endtagfindr2rr/rVrIr9rr*�
handle_endtagr")r r@rr2rJZ namematchZtagnamer r
r
rr4�s8
zHTMLParser.parse_endtagcCs|j||�|j|�dS)N)r]rk)r rar`r
r
rr\�szHTMLParser.handle_startendtagcCsdS)Nr
)r rar`r
r
rr]�szHTMLParser.handle_starttagcCsdS)Nr
)r rar
r
rrk�szHTMLParser.handle_endtagcCsdS)Nr
)r rDr
r
rr:�szHTMLParser.handle_charrefcCsdS)Nr
)r rDr
r
rr=�szHTMLParser.handle_entityrefcCsdS)Nr
)r rr
r
rr/�szHTMLParser.handle_datacCsdS)Nr
)r rr
r
rrM�szHTMLParser.handle_commentcCsdS)Nr
)r Zdeclr
r
rrH�szHTMLParser.handle_declcCsdS)Nr
)r rr
r
rrP�szHTMLParser.handle_picCsdS)Nr
)r rr
r
r�unknown_decl�szHTMLParser.unknown_declcCstjdtdd�t|�S)NzZThe unescape method is deprecated and will be removed in 3.5, use html.unescape() instead.r&)�
stacklevel)�warnings�warn�DeprecationWarningr)r �sr
r
rr�s
zHTMLParser.unescape)rr)r)�__name__�
__module__�__qualname__�__doc__r^rrrrrrr!r"rr7rIr6r3rUr4r\r]rkr:r=r/rMrHrPrlrr
r
r
rr?s8 z
3"()rurrnrZhtmlr�__all__rrr>r<r8r1rOZcommentcloserVrW�VERBOSErgrirjrrr
r
r
r�<module>s(
?>