� ��fMc@sJdZddlZddlZddlZejd�Zejd�Zejd�Zejd�Zejd�Z ejd�Z ejd �Z ejd �Z ejd �Z ejd �Zejd �Zejdej�Zejdej�Zejd�Zejd�ZGdd�de�ZGdd�dej�ZdS(uA parser for HTML and XHTML.iNu[&<]u &[a-zA-Z#]u%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]u)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]u <[a-zA-Z]u>u--\s*>u(([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*u$([a-zA-Z][^ />]*)(?:\s|/(?!>))*uJ\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?u]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*u� <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name (?:\s+ # whitespace before attribute name (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name (?:\s*=\s* # value indicator (?:'[^']*' # LITA-enclosed value |\"[^\"]*\" # LIT-enclosed value |[^'\">\s]+ # bare value ) )? ) )* \s* # trailing whitespace uF <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) (?:\s*,)* # possibly followed by a comma )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace u#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>cBs5|EeZdZdZddd�Zdd�ZdS(uHTMLParseErroru&Exception raised for all parse errors.cCs3|s t�||_|d|_|d|_dS(Nii(uAssertionErrorumsgulinenouoffset(uselfumsguposition((u0/opt/alt/python33/lib64/python3.3/html/parser.pyu__init__Ps   uHTMLParseError.__init__cCsW|j}|jdk r,|d|j}n|jdk rS|d|jd}n|S(Nu , at line %du , column %di(umsgulinenouNoneuoffset(uselfuresult((u0/opt/alt/python33/lib64/python3.3/html/parser.pyu__str__Vs  uHTMLParseError.__str__N(NN(u__name__u __module__u __qualname__u__doc__uNoneu__init__u__str__(u __locals__((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuHTMLParseErrorMsuHTMLParseErrorcBsd|EeZdZdZd:Zd;dd�Zdd�Zdd �Zd d �Z d d �Z d9Z dd�Z dd�Zdd�Zdd�Zdd�Zddd�Zdd�Zdd�Zdd �Zd!d"�Zd#d$�Zd%d&�Zd'd(�Zd)d*�Zd+d,�Zd-d.�Zd/d0�Zd1d2�Zd3d4�Zd5d6�Z d7d8�Z!d9S(<u HTMLParseru�Find tags and other markup and call handler functions. Usage: p = HTMLParser() p.feed(data) ... p.close() Start tags are handled by calling self.handle_starttag() or self.handle_startendtag(); end tags by self.handle_endtag(). The data between tags is passed from the parser to the derived class by calling self.handle_data() with the data as argument (the data may be split up in arbitrary chunks). Entity references are passed by calling self.handle_entityref() with the entity reference as the argument. Numeric character references are passed to self.handle_charref() with the string containing the reference as the argument. uscriptustylecCs6|rtjdtdd�n||_|j�dS(u�Initialize and reset this instance. If strict is set to False (the default) the parser will parse invalid markup, otherwise it will raise an error. Note that the strict mode is deprecated. uThe strict mode is deprecated.u stackleveliN(uwarningsuwarnuDeprecationWarningustrictureset(uselfustrict((u0/opt/alt/python33/lib64/python3.3/html/parser.pyu__init__us   uHTMLParser.__init__cCs8d|_d|_t|_d|_tjj|�dS(u1Reset this instance. Loses all unprocessed data.uu???N( urawdataulasttaguinteresting_normalu interestinguNoneu cdata_elemu _markupbaseu ParserBaseureset(uself((u0/opt/alt/python33/lib64/python3.3/html/parser.pyureset�s     uHTMLParser.resetcCs!|j||_|jd�dS(u�Feed data to the parser. Call this as often as you want, with as little or as much text as you want (may include '\n'). iN(urawdataugoahead(uselfudata((u0/opt/alt/python33/lib64/python3.3/html/parser.pyufeed�suHTMLParser.feedcCs|jd�dS(uHandle any buffered data.iN(ugoahead(uself((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuclose�suHTMLParser.closecCst||j���dS(N(uHTMLParseErrorugetpos(uselfumessage((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuerror�suHTMLParser.errorcCs|jS(u)Return full source of start tag: '<...>'.(u_HTMLParser__starttag_text(uself((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuget_starttag_text�suHTMLParser.get_starttag_textcCs2|j�|_tjd|jtj�|_dS(Nu </\s*%s\s*>(uloweru cdata_elemureucompileuIu interesting(uselfuelem((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuset_cdata_mode�suHTMLParser.set_cdata_modecCst|_d|_dS(N(uinteresting_normalu interestinguNoneu cdata_elem(uself((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuclear_cdata_mode�s uHTMLParser.clear_cdata_modec Cs|j}d}t|�}x�||kr�|jj||�}|rT|j�}n|jraPn|}||kr�|j|||��n|j||�}||kr�Pn|j}|d|�rqt j ||�r�|j |�}n�|d|�r |j |�}n�|d|�r-|j |�}n�|d|�rN|j|�}ng|d|�r�|jrx|j|�}q�|j|�}n+|d|kr�|jd�|d}nP|dkr\|s�Pn|jr�|jd�n|jd |d�}|dkr8|jd|d�}|dkrB|d}qBn |d7}|j|||��n|j||�}q|d |�rMtj ||�}|r�|j�d d�} |j| �|j�}|d |d�s�|d}n|j||�}qq�d ||d�krI|j|||d ��|j||d �}nPq|d |�r�tj ||�}|r�|jd�} |j| �|j�}|d |d�s�|d}n|j||�}qntj ||�}|re|ra|j�||d�kra|jr'|jd�qa|j�}||krH|}n|j||d�}nPq�|d|kr�|jd �|j||d�}q�Pqdstd��qW|r�||kr�|j r�|j|||��|j||�}n||d�|_dS(Niu<u</u<!--u<?u<!iuEOF in middle of constructu>u&#iu;u&u#EOF in middle of entity or char refuinteresting.search() liedi����(urawdataulenu interestingusearchustartu cdata_elemu handle_datau updateposu startswithu starttagopenumatchuparse_starttagu parse_endtagu parse_commentuparse_piustrictuparse_declarationuparse_html_declarationuerrorufinducharrefugroupuhandle_charrefuendu entityrefuhandle_entityrefu incompleteuAssertionError( uselfuendurawdatauiunumatchuju startswithukuname((u0/opt/alt/python33/lib64/python3.3/html/parser.pyugoahead�s�                    "     uHTMLParser.goaheadcCs�|j}|||d�dks/td��|||d�dkrV|j|�S|||d�dkr}|j|�S|||d�j�d kr�|jd |d�}|d kr�d S|j||d|��|d S|j|�SdS(Niu<!u+unexpected call to parse_html_declaration()iu<!--iu<![i u <!doctypeu>ii����i����(urawdatauAssertionErroru parse_commentuparse_marked_sectionulowerufindu handle_decluparse_bogus_comment(uselfuiurawdataugtpos((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuparse_html_declarations &    u!HTMLParser.parse_html_declarationicCs�|j}|||d�dks/td��|jd|d�}|dkrUd S|ry|j||d|��n|dS( Niu<!u</u"unexpected call to parse_comment()u>i(u<!u</i����i����(urawdatauAssertionErrorufinduhandle_comment(uselfuiureporturawdataupos((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuparse_bogus_comment)s & uHTMLParser.parse_bogus_commentcCs�|j}|||d�dks/td��tj||d�}|sOdS|j�}|j||d|��|j�}|S(Niu<?uunexpected call to parse_pi()ii����(urawdatauAssertionErrorupicloseusearchustartu handle_piuend(uselfuiurawdataumatchuj((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuparse_pi5s &  uHTMLParser.parse_picCsd|_|j|�}|dkr(|S|j}|||�|_g}|jrltj||d�}ntj||d�}|s�td��|j �}|j d�j �|_ }x'||kr�|jr�t j||�}ntj||�}|sPn|j ddd�\} } } | s2d} ns| dd�dko]| d d�kns�| dd�dko�| dd�knr�| dd�} n| r�|j| �} n|j| j �| f�|j �}q�W|||�j�} | dkr�|j�\} }d |jkra| |jjd �} t|j�|jjd �}n|t|j�}|jr�|jd |||�dd �f�n|j|||��|S| jd �r�|j||�n/|j||�||jkr|j|�n|S(Niiu#unexpected call to parse_starttag()iiu'u"u>u/>u u junk characters in start tag: %rii����i����i����(u>u/>(uNoneu_HTMLParser__starttag_textucheck_for_whole_start_tagurawdataustrictutagfindumatchutagfind_tolerantuAssertionErroruendugroupulowerulasttaguattrfinduattrfind_tolerantuunescapeuappendustripugetposucountulenurfinduerroru handle_datauendswithuhandle_startendtaguhandle_starttaguCDATA_CONTENT_ELEMENTSuset_cdata_mode(uselfuiuendposurawdatauattrsumatchukutagumuattrnameurestu attrvalueuendulinenouoffset((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuparse_starttagAs`       00    "uHTMLParser.parse_starttagcCsk|j}|jr'tj||�}ntj||�}|r[|j�}|||d�}|dkrs|dS|dkr�|jd|�r�|dS|jd|�r�d S|jr�|j||d�|jd�n||kr�|S|dSn|dkrd S|dkrd S|jr@|j||�|jd �n||krP|S|dSnt d ��dS(Niu>u/u/>iumalformed empty start taguu6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZumalformed start taguwe should not get here!i����i����i����( urawdataustrictulocatestarttagendumatchulocatestarttagend_tolerantuendu startswithu updateposuerroruAssertionError(uselfuiurawdataumujunext((u0/opt/alt/python33/lib64/python3.3/html/parser.pyucheck_for_whole_start_tag}s>             u$HTMLParser.check_for_whole_start_tagcCs�|j}|||d�dks/td��tj||d�}|sOd S|j�}tj||�}|sW|jdk r�|j |||��|S|j r�|j d|||�f�nt j||d�}|s|||d�dkr|dS|j |�Sn|jd�j�}|jd|j��}|j|�|dS|jd�j�}|jdk r�||jkr�|j |||��|Sn|j|j��|j�|S( Niu</uunexpected call to parse_endtagiubad end tag: %riu</>u>i����(urawdatauAssertionErroru endendtagusearchuendu endtagfindumatchu cdata_elemuNoneu handle_dataustrictuerrorutagfind_tolerantuparse_bogus_commentugroupulowerufindu handle_endtaguclear_cdata_mode(uselfuiurawdataumatchugtposu namematchutagnameuelem((u0/opt/alt/python33/lib64/python3.3/html/parser.pyu parse_endtag�s< &  !  uHTMLParser.parse_endtagcCs!|j||�|j|�dS(N(uhandle_starttagu handle_endtag(uselfutaguattrs((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuhandle_startendtag�suHTMLParser.handle_startendtagcCsdS(N((uselfutaguattrs((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuhandle_starttag�suHTMLParser.handle_starttagcCsdS(N((uselfutag((u0/opt/alt/python33/lib64/python3.3/html/parser.pyu handle_endtag�suHTMLParser.handle_endtagcCsdS(N((uselfuname((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuhandle_charref�suHTMLParser.handle_charrefcCsdS(N((uselfuname((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuhandle_entityref�suHTMLParser.handle_entityrefcCsdS(N((uselfudata((u0/opt/alt/python33/lib64/python3.3/html/parser.pyu handle_data�suHTMLParser.handle_datacCsdS(N((uselfudata((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuhandle_comment�suHTMLParser.handle_commentcCsdS(N((uselfudecl((u0/opt/alt/python33/lib64/python3.3/html/parser.pyu handle_decl�suHTMLParser.handle_declcCsdS(N((uselfudata((u0/opt/alt/python33/lib64/python3.3/html/parser.pyu handle_pi�suHTMLParser.handle_picCs$|jr |jd|f�ndS(Nuunknown declaration: %r(ustrictuerror(uselfudata((u0/opt/alt/python33/lib64/python3.3/html/parser.pyu unknown_decl�s uHTMLParser.unknown_declcCs8d|kr|Sdd�}tjd||dtj�S(Nu&c SsB|j�d}yx|ddkr�|dd�}|dd krht|dd�jd�d�}nt|jd��}t|�SWntk r�d|SYn�Xdd lm}||kr�||S|jd�r�d |Sx[td t |��D]<}|d|�|kr�||d|�||d�Sq�Wd |SdS( Niu#iuxuXu;iu&#(uhtml5u&i(uxuX( ugroupsuinturstripuchru ValueErroru html.entitiesuhtml5uendswithurangeulen(usucuhtml5ux((u0/opt/alt/python33/lib64/python3.3/html/parser.pyureplaceEntities�s&%   $u,HTMLParser.unescape.<locals>.replaceEntitiesu&&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))uflags(ureusubuASCII(uselfusureplaceEntities((u0/opt/alt/python33/lib64/python3.3/html/parser.pyuunescape�s    uHTMLParser.unescapeN(uscriptustyleF("u__name__u __module__u __qualname__u__doc__uCDATA_CONTENT_ELEMENTSuFalseu__init__uresetufeeducloseuerroruNoneu_HTMLParser__starttag_textuget_starttag_textuset_cdata_modeuclear_cdata_modeugoaheaduparse_html_declarationuparse_bogus_commentuparse_piuparse_starttagucheck_for_whole_start_tagu parse_endtaguhandle_startendtaguhandle_starttagu handle_endtaguhandle_charrefuhandle_entityrefu handle_datauhandle_commentu handle_declu handle_piu unknown_decluunescape(u __locals__((u0/opt/alt/python33/lib64/python3.3/html/parser.pyu HTMLParser_s:       i  < + *          u HTMLParser(u__doc__u _markupbaseureuwarningsucompileuinteresting_normalu incompleteu entityrefucharrefu starttagopenupicloseu commentcloseutagfindutagfind_tolerantuattrfinduattrfind_tolerantuVERBOSEulocatestarttagendulocatestarttagend_tolerantu endendtagu endtagfindu ExceptionuHTMLParseErroru ParserBaseu HTMLParser(((u0/opt/alt/python33/lib64/python3.3/html/parser.pyu<module>s0