Your IP : 3.139.234.68


Current Path : /proc/self/root/opt/alt/python37/lib64/python3.7/html/__pycache__/
Upload File :
Current File : //proc/self/root/opt/alt/python37/lib64/python3.7/html/__pycache__/parser.cpython-37.opt-1.pyc

B

� f9E�@s�dZddlZddlZddlZddlmZdgZe�d�Ze�d�Z	e�d�Z
e�d�Ze�d	�Ze�d
�Z
e�d�Ze�d�Ze�d
�Ze�dej�Ze�d
�Ze�d�ZGdd�dej�ZdS)zA parser for HTML and XHTML.�N)�unescape�
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]�>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
         (?:\s*,)*                   # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c@s�eZdZdZdZdd�dd�Zdd�Zd	d
�Zdd�Zd
Z	dd�Z
dd�Zdd�Zdd�Z
dd�Zd9dd�Zdd�Zdd�Zdd �Zd!d"�Zd#d$�Zd%d&�Zd'd(�Zd)d*�Zd+d,�Zd-d.�Zd/d0�Zd1d2�Zd3d4�Zd5d6�Zd7d8�Zd
S):raEFind tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )ZscriptZstyleT)�convert_charrefscCs||_|��dS)z�Initialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r�reset)�selfr�r�0/opt/alt/python37/lib64/python3.7/html/parser.py�__init__WszHTMLParser.__init__cCs(d|_d|_t|_d|_tj�|�dS)z1Reset this instance.  Loses all unprocessed data.�z???N)�rawdata�lasttag�interesting_normal�interesting�
cdata_elem�_markupbase�
ParserBaser)rrrr	r`s
zHTMLParser.resetcCs|j||_|�d�dS)z�Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        rN)r�goahead)r�datarrr	�feedhszHTMLParser.feedcCs|�d�dS)zHandle any buffered data.�N)r)rrrr	�closeqszHTMLParser.closeNcCs|jS)z)Return full source of start tag: '<...>'.)�_HTMLParser__starttag_text)rrrr	�get_starttag_textwszHTMLParser.get_starttag_textcCs$|��|_t�d|jtj�|_dS)Nz</\s*%s\s*>)�lowerr�re�compile�Ir)r�elemrrr	�set_cdata_mode{s
zHTMLParser.set_cdata_modecCst|_d|_dS)N)rrr)rrrr	�clear_cdata_modeszHTMLParser.clear_cdata_modecCs@|j}d}t|�}�x�||k�r�|jrx|jsx|�d|�}|dkr�|�dt||d��}|dkrrt�d��	||�srP|}n(|j
�	||�}|r�|��}n|jr�P|}||kr�|jr�|js�|�t
|||���n|�|||��|�||�}||kr�P|j}|d|��rDt�||��r |�|�}	n�|d|��r8|�|�}	nl|d|��rP|�|�}	nT|d|��rh|�|�}	n<|d	|��r�|�|�}	n$|d
|k�r�|�d�|d
}	nP|	dk�r6|�s�P|�d|d
�}	|	dk�r�|�d|d
�}	|	dk�r�|d
}	n|	d
7}	|j�r$|j�s$|�t
|||	���n|�|||	��|�||	�}q|d|��r�t�||�}|�r�|��d
d�}
|�|
�|��}	|d|	d
��s�|	d
}	|�||	�}qn:d||d�k�r�|�|||d
��|�||d
�}Pq|d|�rt�||�}|�rF|�d
�}
|�|
�|��}	|d|	d
��s8|	d
}	|�||	�}qt�||�}|�r�|�r�|��||d�k�r�|��}	|	|k�r�|}	|�||d
�}Pn,|d
|k�r�|�d�|�||d
�}nPqqW|�r.||k�r.|j�s.|j�r|j�s|�t
|||���n|�|||��|�||�}||d�|_dS)Nr�<�&�"z[\s;]z</z<!--z<?z<!rrz&#�����;)r�lenrr�find�rfind�maxrr�searchr�start�handle_datarZ	updatepos�
startswith�starttagopen�match�parse_starttag�parse_endtag�
parse_comment�parse_pi�parse_html_declaration�charref�group�handle_charref�end�	entityref�handle_entityref�
incomplete)rr9r�i�n�jZampposr0r.�k�namerrr	r�s�












zHTMLParser.goaheadcCs�|j}|||d�dkr$|�|�S|||d�dkrB|�|�S|||d���dkr�|�d|d�}|dkrvdS|�||d	|��|d
S|�|�SdS)N�z<!--�z<![�	z	<!doctyperr%r$r)rr3Zparse_marked_sectionrr(�handle_decl�parse_bogus_comment)rr=r�gtposrrr	r5s

z!HTMLParser.parse_html_declarationrcCsD|j}|�d|d�}|dkr"dS|r<|�||d|��|dS)Nrr$r%r)rr(�handle_comment)rr=Zreportr�posrrr	rFszHTMLParser.parse_bogus_commentcCsH|j}t�||d�}|sdS|��}|�||d|��|��}|S)Nr$r%)r�picloser+r,�	handle_pir9)rr=rr0r?rrr	r4!szHTMLParser.parse_picCs�d|_|�|�}|dkr|S|j}|||�|_g}t�||d�}|��}|�d���|_}x�||k�r t	�||�}|s~P|�ddd�\}	}
}|
s�d}nZ|dd�dkr�|dd�ks�n|dd�dkr�|dd�kr�nn|dd�}|�rt
|�}|�|	��|f�|��}qbW|||���}|dk�r�|�
�\}
}d	|jk�r||
|j�d	�}
t|j�|j�d	�}n|t|j�}|�|||��|S|�d
��r�|�||�n"|�||�||jk�r�|�|�|S)Nrrr$rC�'r%�")rz/>�
z/>)r�check_for_whole_start_tagr�tagfind_tolerantr0r9r7rr
�attrfind_tolerantr�append�stripZgetpos�countr'r)r-�endswith�handle_startendtag�handle_starttag�CDATA_CONTENT_ELEMENTSr)rr=�endposr�attrsr0r@�tag�mZattrname�restZ	attrvaluer9�lineno�offsetrrr	r1-sP
&(

zHTMLParser.parse_starttagcCs�|j}t�||�}|r�|��}|||d�}|dkr>|dS|dkr~|�d|�rZ|dS|�d|�rjdS||krv|S|dS|dkr�dS|dkr�dS||kr�|S|dStd	��dS)
Nrr�/z/>r$r%rz6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r�locatestarttagend_tolerantr0r9r.�AssertionError)rr=rr\r?�nextrrr	rO`s.z$HTMLParser.check_for_whole_start_tagcCs|j}t�||d�}|sdS|��}t�||�}|s�|jdk	rV|�|||��|St�||d�}|s�|||d�dkr�|dS|�	|�S|�
d���}|�d|���}|�
|�|dS|�
d���}|jdk	r�||jkr�|�|||��|S|�
|�|��|S)Nrr%r$rCz</>r)r�	endendtagr+r9�
endtagfindr0rr-rPrFr7rr(�
handle_endtagr )rr=rr0rGZ	namematchZtagnamerrrr	r2�s6





zHTMLParser.parse_endtagcCs|�||�|�|�dS)N)rWrf)rr[rZrrr	rV�szHTMLParser.handle_startendtagcCsdS)Nr)rr[rZrrr	rW�szHTMLParser.handle_starttagcCsdS)Nr)rr[rrr	rf�szHTMLParser.handle_endtagcCsdS)Nr)rrArrr	r8�szHTMLParser.handle_charrefcCsdS)Nr)rrArrr	r;�szHTMLParser.handle_entityrefcCsdS)Nr)rrrrr	r-�szHTMLParser.handle_datacCsdS)Nr)rrrrr	rH�szHTMLParser.handle_commentcCsdS)Nr)rZdeclrrr	rE�szHTMLParser.handle_declcCsdS)Nr)rrrrr	rK�szHTMLParser.handle_picCsdS)Nr)rrrrr	�unknown_decl�szHTMLParser.unknown_declcCstjdtdd�t|�S)NzZThe unescape method is deprecated and will be removed in 3.5, use html.unescape() instead.r$)�
stacklevel)�warnings�warn�DeprecationWarningr)r�srrr	r�s
zHTMLParser.unescape)r)�__name__�
__module__�__qualname__�__doc__rXr
rrrrrrr rr5rFr4r1rOr2rVrWrfr8r;r-rHrErKrgrrrrr	r?s8		z
3"()rprrirZhtmlr�__all__rrr<r:r6r/rJZcommentcloserPrQ�VERBOSErardrerrrrrr	�<module>s(













?>