Your IP : 18.226.186.153


Current Path : /proc/self/root/opt/alt/python35/lib64/python3.5/urllib/__pycache__/
Upload File :
Current File : //proc/self/root/opt/alt/python35/lib64/python3.5/urllib/__pycache__/robotparser.cpython-35.pyc



��Yf4�@sddZddlZddlZdgZGdd�d�ZGdd�d�ZGdd�d�ZdS)	a% robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
�N�RobotFileParserc@s�eZdZdZddd�Zdd�Zdd�Zd	d
�Zdd�Zd
d�Z	dd�Z
dd�Zdd�ZdS)rzs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    �cCs>g|_d|_d|_d|_|j|�d|_dS)NFr)�entries�
default_entry�disallow_all�	allow_all�set_url�last_checked)�self�url�r�7/opt/alt/python35/lib64/python3.5/urllib/robotparser.py�__init__s				
zRobotFileParser.__init__cCs|jS)z�Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r	)r
rrr
�mtimeszRobotFileParser.mtimecCsddl}|j�|_dS)zYSets the time the robots.txt file was last fetched to the
        current time.

        rN)�timer	)r
rrrr
�modified(szRobotFileParser.modifiedcCs5||_tjj|�dd�\|_|_dS)z,Sets the URL referring to a robots.txt file.��N)r�urllib�parse�urlparse�host�path)r
rrrr
r0s	zRobotFileParser.set_urlcCs�ytjj|j�}Wnmtjjk
r�}zG|jdkrOd|_n'|jdkrv|jdkrvd|_WYdd}~Xn)X|j	�}|j
|jd�j��dS)	z4Reads the robots.txt URL and feeds it to the parser.��Ti�i�Nzutf-8)rr)
rZrequestZurlopenr�errorZ	HTTPError�coderr�readr�decode�
splitlines)r
�f�err�rawrrr
r5szRobotFileParser.readcCs>d|jkr*|jdkr:||_n|jj|�dS)N�*)�
useragentsrr�append)r
�entryrrr
�
_add_entryBszRobotFileParser._add_entrycCsd}t�}|j�x�|D]�}|sr|dkrJt�}d}n(|dkrr|j|�t�}d}|jd�}|dkr�|d|�}|j�}|s�q |jdd�}t|�dkr |dj�j�|d<tj	j
|dj��|d<|ddkr_|dkrB|j|�t�}|jj|d�d}q |ddkr�|dkr�|j
jt|dd	��d}q |dd
kr |dkr |j
jt|dd��d}q W|dkr�|j|�dS)z�Parse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        rr��#N�:z
user-agentZdisallowFZallowT)�Entryrr'�find�strip�split�len�lowerrr�unquoter$r%�	rulelines�RuleLine)r
�lines�stater&�line�irrr
rKsL
	

		
	 
			
zRobotFileParser.parsecCs�|jr
dS|jrdS|js'dStjjtjj|��}tjjdd|j|j	|j
|jf�}tjj|�}|s�d}x-|j
D]"}|j|�r�|j|�Sq�W|jr�|jj|�SdS)z=using the parsed robots.txt decide if useragent can fetch urlFTr�/)rrr	rrrr1�
urlunparserZparamsZqueryZfragment�quoter�
applies_to�	allowancer)r
�	useragentrZ
parsed_urlr&rrr
�	can_fetchs$				zRobotFileParser.can_fetchcCsdjdd�|jD��S)NrcSs g|]}t|�d�qS)�
)�str)�.0r&rrr
�
<listcomp>�s	z+RobotFileParser.__str__.<locals>.<listcomp>)�joinr)r
rrr
�__str__�szRobotFileParser.__str__N)
�__name__�
__module__�__qualname__�__doc__rrrrrr'rr>rDrrrr
rs	
	4c@s:eZdZdZdd�Zdd�Zdd�ZdS)	r3zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.cCsY|dkr|rd}tjjtjj|��}tjj|�|_||_dS)NrT)rrr9rr:rr<)r
rr<rrr
r�s
zRuleLine.__init__cCs|jdkp|j|j�S)Nr#)r�
startswith)r
�filenamerrr
r;�szRuleLine.applies_tocCs|jrdndd|jS)NZAllowZDisallowz: )r<r)r
rrr
rD�szRuleLine.__str__N)rErFrGrHrr;rDrrrr
r3�sr3c@sFeZdZdZdd�Zdd�Zdd�Zdd	�Zd
S)r+z?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_dS)N)r$r2)r
rrr
r�s	zEntry.__init__cCsjg}x'|jD]}|jd|dg�qWx*|jD]}|jt|�dg�q:Wdj|�S)NzUser-agent: r?r)r$�extendr2r@rC)r
Zret�agentr6rrr
rD�sz
Entry.__str__cCs]|jd�dj�}x=|jD]2}|dkr9dS|j�}||kr#dSq#WdS)z2check if this entry applies to the specified agentr8rr#TF)r.r0r$)r
r=rLrrr
r;�szEntry.applies_tocCs.x'|jD]}|j|�r
|jSq
WdS)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)r2r;r<)r
rJr6rrr
r<�szEntry.allowanceN)rErFrGrHrrDr;r<rrrr
r+�s

r+)rHZurllib.parserZurllib.request�__all__rr3r+rrrr
�<module>s
	�

?>