Your IP : 3.135.241.191


Current Path : /opt/alt/python33/lib64/python3.3/urllib/__pycache__/
Upload File :
Current File : //opt/alt/python33/lib64/python3.3/urllib/__pycache__/robotparser.cpython-33.pyc

�
��f�c@sddZddlZddlZdgZGdd�d�ZGdd�d�ZGdd�d�ZdS(	u< robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
iNuRobotFileParsercBs�|EeZdZdZddd�Zdd�Zdd�Zd	d
�Zdd�Zd
d�Z	dd�Z
dd�Zdd�ZdS(uRobotFileParserus This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    ucCs>g|_d|_d|_d|_|j|�d|_dS(NiF(uentriesuNoneu
default_entryuFalseudisallow_allu	allow_alluset_urlulast_checked(uselfuurl((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__init__s				
uRobotFileParser.__init__cCs|jS(u�Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        (ulast_checked(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyumtimesuRobotFileParser.mtimecCsddl}|j�|_dS(uYSets the time the robots.txt file was last fetched to the
        current time.

        iN(utimeulast_checked(uselfutime((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyumodified(suRobotFileParser.modifiedcCs5||_tjj|�dd�\|_|_dS(u,Sets the URL referring to a robots.txt file.iiN(uurluurllibuparseuurlparseuhostupath(uselfuurl((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuset_url0s	uRobotFileParser.set_urlcCs�ytjj|j�}Wnatjjk
r|}z;|jdkrOd|_n|jdkrjd|_	nWYdd}~Xn)X|j
�}|j|jd�j
��dS(u4Reads the robots.txt URL and feeds it to the parser.i�i�i�Nuutf-8(i�i�T(uurlliburequestuurlopenuurluerroru	HTTPErrorucodeuTrueudisallow_allu	allow_allureaduparseudecodeu
splitlines(uselfufuerruraw((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuread5suRobotFileParser.readcCsAd|jkr-|jdkr=||_q=n|jj|�dS(Nu*(u
useragentsu
default_entryuNoneuentriesuappend(uselfuentry((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu
_add_entryBsuRobotFileParser._add_entrycCsd}t�}x�|D]�}|sn|dkr@t�}d}qn|dkrn|j|�t�}d}qnn|jd�}|dkr�|d|�}n|j�}|s�qn|jdd�}t|�dkr|dj�j�|d<tjj	|dj��|d<|ddkrd|dkrG|j|�t�}n|j
j|d�d}q�|ddkr�|dkr�|jjt
|dd
��d}q�q�|dd	kr�|dkr�|jjt
|dd��d}q�q�qqW|dkr|j|�ndS(u�Parse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        iiiu#Nu:u
user-agentudisallowuallowFT(uEntryu
_add_entryufindustripusplitulenuloweruurllibuparseuunquoteu
useragentsuappendu	rulelinesuRuleLineuFalseuTrue(uselfulinesustateuentryulineui((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuparseKsJ
	
		
	 
	uRobotFileParser.parsecCs�|jr
dS|jrdStjjtjj|��}tjjdd|j	|j
|j|jf�}tjj
|�}|s�d}nx-|jD]"}|j|�r�|j|�Sq�W|jr�|jj|�SdS(u=using the parsed robots.txt decide if useragent can fetch urluu/FT(udisallow_alluFalseu	allow_alluTrueuurllibuparseuurlparseuunquoteu
urlunparseupathuparamsuqueryufragmentuquoteuentriesu
applies_tou	allowanceu
default_entry(uselfu	useragentuurlu
parsed_urluentry((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu	can_fetch~s 				uRobotFileParser.can_fetchcCsdjdd�|jD��S(NucSs g|]}t|�d�qS(u
(ustr(u.0uentry((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu
<listcomp>�s	u+RobotFileParser.__str__.<locals>.<listcomp>(ujoinuentries(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__str__�suRobotFileParser.__str__N(
u__name__u
__module__u__qualname__u__doc__u__init__umtimeumodifieduset_urlureadu
_add_entryuparseu	can_fetchu__str__(u
__locals__((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuRobotFileParsers	
	3cBs>|EeZdZdZdd�Zdd�Zdd�ZdS(	uRuleLineuoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.cCs\|dkr|rd}ntjjtjj|��}tjj|�|_||_dS(NuT(uTrueuurllibuparseu
urlunparseuurlparseuquoteupathu	allowance(uselfupathu	allowance((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__init__�s
	uRuleLine.__init__cCs|jdkp|j|j�S(Nu*(upathu
startswith(uselfufilename((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu
applies_to�suRuleLine.applies_tocCs|jrdpdd|jS(NuAllowuDisallowu: (u	allowanceupath(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__str__�suRuleLine.__str__N(u__name__u
__module__u__qualname__u__doc__u__init__u
applies_tou__str__(u
__locals__((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuRuleLine�suRuleLinecBsJ|EeZdZdZdd�Zdd�Zdd�Zdd	�Zd
S(uEntryu?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_dS(N(u
useragentsu	rulelines(uself((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__init__�s	uEntry.__init__cCsjg}x'|jD]}|jd|dg�qWx*|jD]}|jt|�dg�q:Wdj|�S(NuUser-agent: u
u(u
useragentsuextendu	rulelinesustrujoin(uselfuretuagentuline((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu__str__�su
Entry.__str__cCs]|jd�dj�}x=|jD]2}|dkr9dS|j�}||kr#dSq#WdS(u2check if this entry applies to the specified agentu/iu*TF(usplituloweru
useragentsuTrueuFalse(uselfu	useragentuagent((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu
applies_to�suEntry.applies_tocCs.x'|jD]}|j|�r
|jSq
WdS(uZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT(u	rulelinesu
applies_tou	allowanceuTrue(uselfufilenameuline((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu	allowance�suEntry.allowanceN(u__name__u
__module__u__qualname__u__doc__u__init__u__str__u
applies_tou	allowance(u
__locals__((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyuEntry�s

uEntry(u__doc__uurllib.parseuurllibuurllib.requestu__all__uRobotFileParseruRuleLineuEntry(((u7/opt/alt/python33/lib64/python3.3/urllib/robotparser.pyu<module>s
	�

?>