Your IP : 18.119.137.162


Current Path : /proc/self/root/opt/alt/python310/lib64/python3.10/html/__pycache__/
Upload File :
Current File : //proc/self/root/opt/alt/python310/lib64/python3.10/html/__pycache__/parser.cpython-310.opt-1.pyc

o

�?Og�C�@s�dZddlZddlZddlmZdgZe�d�Ze�d�Ze�d�Z	e�d�Z
e�d	�Ze�d
�Ze�d�Z
e�d�Ze�d
�Ze�dej�Ze�d
�Ze�d�ZGdd�dej�ZdS)zA parser for HTML and XHTML.�N)�unescape�
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]�>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c@s�eZdZdZdZdd�dd�Zdd�Zd	d
�Zdd�Zd
Z	dd�Z
dd�Zdd�Zdd�Z
dd�Zd7dd�Zdd�Zdd�Zdd �Zd!d"�Zd#d$�Zd%d&�Zd'd(�Zd)d*�Zd+d,�Zd-d.�Zd/d0�Zd1d2�Zd3d4�Zd5d6�Zd
S)8raEFind tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )ZscriptZstyleT)�convert_charrefscCs||_|��dS)z�Initialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r�reset)�selfr�r�2/opt/alt/python310/lib64/python3.10/html/parser.py�__init__VszHTMLParser.__init__cCs(d|_d|_t|_d|_tj�|�dS)z1Reset this instance.  Loses all unprocessed data.�z???N)�rawdata�lasttag�interesting_normal�interesting�
cdata_elem�_markupbase�
ParserBaser�rrrr	r_s
zHTMLParser.resetcCs|j||_|�d�dS)z�Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        rN)r�goahead�r�datarrr	�feedgszHTMLParser.feedcCs|�d�dS)zHandle any buffered data.�N)rrrrr	�closepszHTMLParser.closeNcCs|jS)z)Return full source of start tag: '<...>'.)�_HTMLParser__starttag_textrrrr	�get_starttag_textvszHTMLParser.get_starttag_textcCs$|��|_t�d|jtj�|_dS)Nz</\s*%s\s*>)�lowerr�re�compile�Ir)r�elemrrr	�set_cdata_modezs
zHTMLParser.set_cdata_modecCst|_d|_dS�N)rrrrrrr	�clear_cdata_mode~s
zHTMLParser.clear_cdata_modecCs2|j}d}t|�}||k�r�|jr;|js;|�d|�}|dkr:|�dt||d��}|dkr8t�d��	||�s8�n�|}n|j
�	||�}|rI|��}n|jrN�n�|}||kro|jrf|jsf|�t
|||���n	|�|||��|�||�}||kr{�nf|j}|d|��rt�||�r�|�|�}	n>|d|�r�|�|�}	n3|d|�r�|�|�}	n(|d|�r�|�|�}	n|d	|�r�|�|�}	n|d
|kr�|�d�|d
}	n�n|	dk�r|sאn
|�d|d
�}	|	dkr�|�d|d
�}	|	dkr�|d
}	n|	d
7}	|j�r|j�s|�t
|||	���n	|�|||	��|�||	�}n�|d|��rlt�||�}|�rO|��d
d�}
|�|
�|��}	|d|	d
��sH|	d
}	|�||	�}q	d||d�v�rk|�|||d
��|�||d
�}nu|d|��r�t�||�}|�r�|�d
�}
|�|
�|��}	|d|	d
��s�|	d
}	|�||	�}q	t�||�}|�r�|�r�|��||d�k�r�|��}	|	|k�r�|}	|�||d
�}n|d
|k�r�|�d�|�||d
�}nn||ks|�r||k�r|j�s|j�r|j�s|�t
|||���n	|�|||��|�||�}||d�|_dS)Nr�<�&�"z[\s;]z</�<!--z<?z<!rrz&#�����;)r�lenrr�find�rfind�maxrr�searchr�start�handle_datarZ	updatepos�
startswith�starttagopen�match�parse_starttag�parse_endtag�
parse_comment�parse_pi�parse_html_declaration�charref�group�handle_charref�end�	entityref�handle_entityref�
incomplete)rr=r�i�n�jZampposr4r2�k�namerrr	r�s�
��







�




�kzHTMLParser.goaheadcCs�|j}|||d�dkr|�|�S|||d�dkr!|�|�S|||d���dkrJ|�d|d�}|dkr;dS|�||d	|��|d
S|�|�S)N�r'�z<![�	z	<!doctyperr)r(r)rr7Zparse_marked_sectionrr,�handle_decl�parse_bogus_comment)rrAr�gtposrrr	r9�s


z!HTMLParser.parse_html_declarationrcCsD|j}|�d|d�}|dkrdS|r|�||d|��|dS)Nrr(r)r)rr,�handle_comment)rrAZreportr�posrrr	rJszHTMLParser.parse_bogus_commentcCsH|j}t�||d�}|sdS|��}|�||d|��|��}|S)Nr(r))r�picloser/r0�	handle_pir=)rrArr4rCrrr	r8 szHTMLParser.parse_picCs�d|_|�|�}|dkr|S|j}|||�|_g}t�||d�}|��}|�d���|_}||kr�t	�||�}|s=nS|�ddd�\}	}
}|
sLd}n-|dd�dkr^|dd�kssn|dd�dkrq|dd�krynn|dd�}|rt
|�}|�|	��|f�|��}||ks4|||���}|dvr�|�
�\}
}d	|jvr�|
|j�d	�}
t|j�|j�d	�}n|t|j�}|�|||��|S|�d
�r�|�||�|S|�||�||jvr�|�|�|S)Nrrr(rG�'r)�")r�/>�
rR)r�check_for_whole_start_tagr�tagfind_tolerantr4r=r;rr
�attrfind_tolerantr�append�stripZgetpos�countr+r-r1�endswith�handle_startendtag�handle_starttag�CDATA_CONTENT_ELEMENTSr!)rrA�endposr�attrsr4rD�tag�m�attrname�restZ	attrvaluer=�lineno�offsetrrr	r5,sV
&(�

�
�

zHTMLParser.parse_starttagcCs�|j}t�||�}|rU|��}|||d�}|dkr|dS|dkr?|�d|�r-|dS|�d|�r5dS||kr;|S|dS|dkrEdS|dvrKdS||krQ|S|dStd	��)
Nrr�/rRr(r)rz6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r�locatestarttagend_tolerantr4r=r2�AssertionError)rrArrarC�nextrrr	rT_s.z$HTMLParser.check_for_whole_start_tagcCs|j}t�||d�}|sdS|��}t�||�}|s`|jdur+|�|||��|St�||d�}|sH|||d�dkrC|dS|�	|�S|�
d���}|�d|���}|�
|�|dS|�
d���}|jdur|||jkr||�|||��|S|�
|�|��|S)Nrr)r(rGz</>r)r�	endendtagr/r=�
endtagfindr4rr1rUrJr;rr,�
handle_endtagr#)rrArr4rKZ	namematchZtagnamer rrr	r6�s6





zHTMLParser.parse_endtagcCs|�||�|�|�dSr")r\rl�rr`r_rrr	r[�szHTMLParser.handle_startendtagcC�dSr"rrmrrr	r\��zHTMLParser.handle_starttagcCrnr"r)rr`rrr	rl�rozHTMLParser.handle_endtagcCrnr"r�rrErrr	r<�rozHTMLParser.handle_charrefcCrnr"rrprrr	r?�rozHTMLParser.handle_entityrefcCrnr"rrrrr	r1�rozHTMLParser.handle_datacCrnr"rrrrr	rL�rozHTMLParser.handle_commentcCrnr"r)rZdeclrrr	rI�rozHTMLParser.handle_declcCrnr"rrrrr	rO�rozHTMLParser.handle_picCrnr"rrrrr	�unknown_decl�rozHTMLParser.unknown_decl)r)�__name__�
__module__�__qualname__�__doc__r]r
rrrrrr!r#rr9rJr8r5rTr6r[r\rlr<r?r1rLrIrOrqrrrr	r>s8		z
3"()rurrZhtmlr�__all__rrr@r>r:r3rNZcommentcloserUrV�VERBOSErgrjrkrrrrrr	�<module>s,








��



?>