File "robotparser.cpython-36.opt-1.pyc"

Full Path: /home/attunedd/public_html/byp/izo/con7ext_sym404/rintoar.txt/opt/alt/python36/lib64/python3.6/urllib/__pycache__/robotparser.cpython-36.opt-1.pyc
File size: 6.9 KB
MIME-type: text/x-bytecode.python
Charset: 8 bit

3

 f"@s\dZddlZddlZddlZdgZejddZGdddZGdddZ	Gd	d
d
Z
dS)a% robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
NRobotFileParserRequestRatezrequests secondsc@sjeZdZdZdddZddZddZd	d
ZddZd
dZ	ddZ
ddZddZddZ
ddZdS)rzs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    cCs,g|_d|_d|_d|_|j|d|_dS)NFr)entries
default_entrydisallow_all	allow_allset_urllast_checked)selfurlr
7/opt/alt/python36/lib64/python3.6/urllib/robotparser.py__init__s
zRobotFileParser.__init__cCs|jS)zReturns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r
)rr
r
rmtime$szRobotFileParser.mtimecCsddl}|j|_dS)zYSets the time the robots.txt file was last fetched to the
        current time.

        rN)timer
)rrr
r
rmodified-szRobotFileParser.modifiedcCs&||_tjj|dd\|_|_dS)z,Sets the URL referring to a robots.txt file.N)rurllibparseurlparsehostpath)rrr
r
rr	5szRobotFileParser.set_urlcCsytjj|j}WnRtjjk
rd}z2|jdkr:d|_n|jdkrT|jdkrTd|_WYdd}~XnX|j	}|j
|jdjdS)	z4Reads the robots.txt URL and feeds it to the parser.TiiNzutf-8)rr)
rZrequestZurlopenrerrorZ	HTTPErrorcoderrreadrdecode
splitlines)rferrrawr
r
rr:s
zRobotFileParser.readcCs,d|jkr|jdkr(||_n|jj|dS)N*)
useragentsrrappend)rentryr
r
r
_add_entryGs

zRobotFileParser._add_entrycCs6d}t}|jx|D]}|sT|dkr8t}d}n|dkrT|j|t}d}|jd}|dkrr|d|}|j}|sq|jdd}t|dkr|djj|d<tj	j
|dj|d<|ddkr|dkr|j|t}|jj|dd}q|ddkr4|dkr|j
jt|dd	d}q|dd
krh|dkr|j
jt|ddd}q|ddkr|dkr|djjrt|d|_d}q|dd
kr|dkr|djd}t|dkr|djjr|djjrtt|dt|d|_d}qW|dkr2|j|dS)zParse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        rr#N:z
user-agentZdisallowFZallowTzcrawl-delayzrequest-rate/)Entryrr(findstripsplitlenlowerrrunquoter%r&	rulelinesRuleLineisdigitintdelayrreq_rate)rlinesstater'lineiZnumbersr
r
rrPsd






 
zRobotFileParser.parsecCs|jr
dS|jrdS|jsdStjjtjj|}tjjdd|j|j	|j
|jf}tjj|}|sfd}x"|j
D]}|j|rn|j|SqnW|jr|jj|SdS)z=using the parsed robots.txt decide if useragent can fetch urlFTrr,)rrr
rrrr3
urlunparserZparamsZqueryZfragmentquoter
applies_to	allowancer)r	useragentrZ
parsed_urlr'r
r
r	can_fetchs$
zRobotFileParser.can_fetchcCs4|jsdSx|jD]}|j|r|jSqW|jjS)N)rrr@r8r)rrBr'r
r
rcrawl_delays

zRobotFileParser.crawl_delaycCs4|jsdSx|jD]}|j|r|jSqW|jjS)N)rrr@r9r)rrBr'r
r
rrequest_rates

zRobotFileParser.request_ratecCs0|j}|jdk	r||jg}djtt|dS)N
)rrjoinmapstr)rrr
r
r__str__s
zRobotFileParser.__str__N)r)__name__
__module____qualname____doc__rrrr	rr(rrCrDrErJr
r
r
rrs
	
	Cc@s(eZdZdZddZddZddZdS)	r5zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.cCs>|dkr|rd}tjjtjj|}tjj||_||_dS)NrT)rrr>rr?rrA)rrrAr
r
rrs
zRuleLine.__init__cCs|jdkp|j|jS)Nr$)r
startswith)rfilenamer
r
rr@szRuleLine.applies_tocCs|jr
dndd|jS)NZAllowZDisallowz: )rAr)rr
r
rrJszRuleLine.__str__N)rKrLrMrNrr@rJr
r
r
rr5sr5c@s0eZdZdZddZddZddZdd	Zd
S)r-z?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_d|_d|_dS)N)r%r4r8r9)rr
r
rrszEntry.__init__cCsg}x|jD]}|jd|qW|jdk	r@|jd|j|jdk	rj|j}|jd|jd|j|jtt|j	|jddj
|S)NzUser-agent: z
Crawl-delay: zRequest-rate: r,rrF)r%r&r8r9ZrequestsZsecondsextendrHrIr4rG)rZretagentZrater
r
rrJs


z
Entry.__str__cCsF|jddj}x.|jD]$}|dkr*dS|j}||krdSqWdS)z2check if this entry applies to the specified agentr,rr$TF)r0r2r%)rrBrRr
r
rr@szEntry.applies_tocCs$x|jD]}|j|r|jSqWdS)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)r4r@rA)rrPr<r
r
rrAs

zEntry.allowanceN)rKrLrMrNrrJr@rAr
r
r
rr-s


r-)rNcollectionsZurllib.parserZurllib.request__all__
namedtuplerrr5r-r
r
r
r<module>s2