File "_tokenizer.cpython-38.pyc"

Full Path: /home/attunedd/public_html/byp/izo/con7ext_sym404/rintoar.txt/usr/lib/python3.8/site-packages/pip/_vendor/html5lib/__pycache__/_tokenizer.cpython-38.pyc
File size: 38.69 KB
MIME-type: text/x-bytecode.python
Charset: 8 bit

U

.e$+@sddlmZmZmZddlmZddlmZddl	m
Z
ddl	mZddl	mZm
Z
ddl	mZmZmZdd	l	mZmZdd
l	mZddlmZddlmZeeZGd
ddeZdS))absolute_importdivisionunicode_literals)unichr)deque)spaceCharacters)entities)asciiLettersasciiUpper2Lower)digits	hexDigitsEOF)
tokenTypes
tagTokenTypes)replacementCharacters)HTMLInputStream)TriecsdeZdZdZdfdd	ZddZddZdd
dZdd
ZddZ	ddZ
ddZddZddZ
ddZddZddZddZd d!Zd"d#Zd$d%Zd&d'Zd(d)Zd*d+Zd,d-Zd.d/Zd0d1Zd2d3Zd4d5Zd6d7Zd8d9Zd:d;Zd<d=Z d>d?Z!d@dAZ"dBdCZ#dDdEZ$dFdGZ%dHdIZ&dJdKZ'dLdMZ(dNdOZ)dPdQZ*dRdSZ+dTdUZ,dVdWZ-dXdYZ.dZd[Z/d\d]Z0d^d_Z1d`daZ2dbdcZ3dddeZ4dfdgZ5dhdiZ6djdkZ7dldmZ8dndoZ9dpdqZ:drdsZ;dtduZ<dvdwZ=dxdyZ>dzd{Z?d|d}Z@d~dZAddZBddZCddZDddZEddZFddZGddZHddZIddZJddZKddZLZMS)
HTMLTokenizera	 This class takes care of tokenizing HTML.

    * self.currentToken
      Holds the token that is currently being processed.

    * self.state
      Holds a reference to the method to be invoked... XXX

    * self.stream
      Points to HTMLInputStream object.
    NcsFt|f||_||_d|_g|_|j|_d|_d|_t	t
|dS)NF)rstreamparserZ
escapeFlagZ
lastFourChars	dataStatestateescapecurrentTokensuperr__init__)selfrrkwargs	__class__C/usr/lib/python3.8/site-packages/pip/_vendor/html5lib/_tokenizer.pyr"szHTMLTokenizer.__init__ccsPtg|_|rL|jjr6td|jjddVq|jr
|jVq6q
dS)z This is where the magic happens.

        We do our usually processing through the states and when we have a token
        to return we yield the token which pauses processing until the next token
        is requested.
        
ParseErrorrtypedataN)r
tokenQueuerrerrorsrpoppopleftrr!r!r"__iter__1s
zHTMLTokenizer.__iter__c	%Cst}d}|rt}d}g}|j}||krH|tk	rH|||j}q"td||}|tkrt|}|j	t
ddd|idnbd|krd	ksn|d
krd}|j	t
ddd|idn d|krd
ksnd|krdksnd|krdksnd|kr,dksn|tddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d
g#kr|j	t
ddd|idzt|}Wn>t
k
r|d6}td|d?Btd7|d8@B}YnX|d9kr|j	t
dd:d;|j||S)<zThis function returns either U+FFFD or the character based on the
        decimal or hexadecimal representation. It also discards ";" if present.
        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
        
r#z$illegal-codepoint-for-numeric-entity	charAsIntr%r&Zdatavarsiii�riiiiiiiiiiiiiiiiiiiii	i	i
i
iiiii
i
iiiiiiii;z numeric-entity-without-semicolonr$)rr
rcharrappendintjoinrr'r	frozensetchr
ValueErrorunget)	rZisHexZallowedradix	charStackcr0r:vr!r!r"consumeNumberEntityAs







&
z!HTMLTokenizer.consumeNumberEntityFc	Csd}|jg}|dtksB|dtddfksB|dk	rV||dkrV|j|dn|ddkrd}||j|ddkrd}||j|r|dtks|s|dtkr|j|d||}n4|j	t
d	d
d|j|dd|}nh|dtk	rDt
d|s0qD||jqz$t
d|dd}t|}Wntk
rd}YnX|dk	r@|dd
kr|j	t
d	dd|dd
kr|r||tks||tks||dkr|j|dd|}n.t|}|j||d||d7}n4|j	t
d	dd|j|dd|}|r|jddd|7<n*|tkrd}nd}|j	t
||ddS)N&r<#F)xXTr#zexpected-numeric-entityr$r/r9znamed-entity-without-semicolon=zexpected-named-entityr&rSpaceCharacters
Characters)rr:rrrAr;r
rrFr'rr)r=entitiesTrieZhas_keys_with_prefixZlongest_prefixlenKeyErrorr
r	r)	rallowedChar
fromAttributeoutputrChexZ
entityNameZentityLengthZ	tokenTyper!r!r"
consumeEntitys~







zHTMLTokenizer.consumeEntitycCs|j|dddS)zIThis method replaces the need for "entityInAttributeValueState".
        T)rSrTN)rW)rrSr!r!r"processEntityInAttributesz&HTMLTokenizer.processEntityInAttributecCs|j}|dtkrp|dt|d<|dtdkrp|drR|jtddd|drp|jtdd	d|j||j|_d
S)zThis method is a generic handler for emitting the tags. It also sets
        the state to "data" because that's what's needed after a token has been
        emitted.
        r%nameEndTagr&r#zattributes-in-end-tagr$selfClosingzself-closing-flag-on-end-tagN)	rr	translaterrr'r;rr)rtokenr!r!r"emitCurrentTokenszHTMLTokenizer.emitCurrentTokencCs|j}|dkr|j|_n|dkr.|j|_n|dkrd|jtddd|jtdddn`|tkrpdS|t	kr|jtd	||j
t	d
dn&|j
d}|jtd||dd
S)NrGrHr#invalid-codepointr$rOFrNTrGrHr_)rr:entityDataStatertagOpenStater'r;rrr
charsUntilrr&charsr!r!r"rs.




zHTMLTokenizer.dataStatecCs||j|_dSNT)rWrrr+r!r!r"rbszHTMLTokenizer.entityDataStatecCs|j}|dkr|j|_n|dkr.|j|_n|tkr:dS|dkrp|jtddd|jtdd	dnT|t	kr|jtd
||j
t	ddn&|j
d}|jtd||ddS)
NrGrHFr_r#r`r$rOr2rNTra)rr:characterReferenceInRcdatarrcdataLessThanSignStaterr'r;rrrdrer!r!r"rcdataStates.




zHTMLTokenizer.rcdataStatecCs||j|_dSrg)rWrjrr+r!r!r"rh1sz(HTMLTokenizer.characterReferenceInRcdatacCs|j}|dkr|j|_nh|dkrR|jtddd|jtdddn2|tkr^dS|jd	}|jtd||dd
SNrHr_r#r`r$rOr2F)rHr_T)	rr:rawtextLessThanSignStaterr'r;rrrdrer!r!r"rawtextState6s"


zHTMLTokenizer.rawtextStatecCs|j}|dkr|j|_nh|dkrR|jtddd|jtdddn2|tkr^dS|jd	}|jtd||dd
Srk)	rr:scriptDataLessThanSignStaterr'r;rrrdrer!r!r"scriptDataStateHs"


zHTMLTokenizer.scriptDataStatecCsr|j}|tkrdS|dkrL|jtddd|jtdddn"|jtd||jdddS)	NFr_r#r`r$rOr2T)rr:rr'r;rrdrr&r!r!r"plaintextStateZs

zHTMLTokenizer.plaintextStatecCs|j}|dkr|j|_n|dkr.|j|_n|tkrVtd|gddd|_|j|_n|dkr|j	
tddd	|j	
td
dd	|j|_nt|dkr|j	
tdd
d	|j||j
|_n@|j	
tddd	|j	
td
dd	|j||j|_dS)N!/ZStartTagF)r%rYr&r[ZselfClosingAcknowledged>r#z'expected-tag-name-but-got-right-bracketr$rOz<>?z'expected-tag-name-but-got-question-markzexpected-tag-namerHT)rr:markupDeclarationOpenStatercloseTagOpenStater
rrtagNameStater'r;rrAbogusCommentStaterpr!r!r"rcis@





zHTMLTokenizer.tagOpenStatecCs|j}|tkr0td|gdd|_|j|_n|dkrX|jtddd|j	|_nn|t
kr|jtddd|jtd	d
d|j	|_n0|jtddd|id
|j||j|_dS)NrZFr%rYr&r[rtr#z*expected-closing-tag-but-got-right-bracketr$z expected-closing-tag-but-got-eofrO</z!expected-closing-tag-but-got-charr&r1T)
rr:r
rrrxrr'r;rrrAryrpr!r!r"rws2



zHTMLTokenizer.closeTagOpenStatecCs|j}|tkr|j|_n|dkr.|n~|tkrV|jt	ddd|j
|_nV|dkrh|j|_nD|dkr|jt	ddd|jdd	7<n|jd|7<d
S)Nrtr#zeof-in-tag-namer$rsr_r`rYr2T)
rr:rbeforeAttributeNameStaterr^rr'r;rrselfClosingStartTagStaterrpr!r!r"rxs&




zHTMLTokenizer.tagNameStatecCsP|j}|dkr"d|_|j|_n*|jtddd|j||j	|_dSNrsr/rOrHr$T)
rr:temporaryBufferrcdataEndTagOpenStaterr'r;rrArjrpr!r!r"ris

z%HTMLTokenizer.rcdataLessThanSignStatecCsX|j}|tkr*|j|7_|j|_n*|jtddd|j	||j
|_dSNrOr{r$T)rr:r
rrcdataEndTagNameStaterr'r;rrArjrpr!r!r"rs

z#HTMLTokenizer.rcdataEndTagOpenStatecCs|jo|jd|jk}|j}|tkrT|rTtd|jgdd|_|j|_n|dkr|rtd|jgdd|_|j	|_n||dkr|rtd|jgdd|_|
|j|_nH|tkr|j|7_n0|j
tdd|jd	|j||j|_d
SNrYrZFrzrsrtrOr{r$T)rlowerrrr:rrr|rr}r^rr
r'r;rArjrZappropriater&r!r!r"rs@



z#HTMLTokenizer.rcdataEndTagNameStatecCsP|j}|dkr"d|_|j|_n*|jtddd|j||j	|_dSr~)
rr:rrawtextEndTagOpenStaterr'r;rrArmrpr!r!r"rls

z&HTMLTokenizer.rawtextLessThanSignStatecCsX|j}|tkr*|j|7_|j|_n*|jtddd|j	||j
|_dSr)rr:r
rrawtextEndTagNameStaterr'r;rrArmrpr!r!r"rs

z$HTMLTokenizer.rawtextEndTagOpenStatecCs|jo|jd|jk}|j}|tkrT|rTtd|jgdd|_|j|_n|dkr|rtd|jgdd|_|j	|_n||dkr|rtd|jgdd|_|
|j|_nH|tkr|j|7_n0|j
tdd|jd	|j||j|_d
Sr)rrrrr:rrr|rr}r^rr
r'r;rArmrr!r!r"rs@



z$HTMLTokenizer.rawtextEndTagNameStatecCsx|j}|dkr"d|_|j|_nR|dkrJ|jtddd|j|_n*|jtddd|j	||j
|_dS)	Nrsr/rrrOz<!r$rHT)rr:rscriptDataEndTagOpenStaterr'r;rscriptDataEscapeStartStaterArorpr!r!r"rns


z)HTMLTokenizer.scriptDataLessThanSignStatecCsX|j}|tkr*|j|7_|j|_n*|jtddd|j	||j
|_dSr)rr:r
rscriptDataEndTagNameStaterr'r;rrArorpr!r!r"r,s

z'HTMLTokenizer.scriptDataEndTagOpenStatecCs|jo|jd|jk}|j}|tkrT|rTtd|jgdd|_|j|_n|dkr|rtd|jgdd|_|j	|_n||dkr|rtd|jgdd|_|
|j|_nH|tkr|j|7_n0|j
tdd|jd	|j||j|_d
Sr)rrrrr:rrr|rr}r^rr
r'r;rArorr!r!r"r7s@



z'HTMLTokenizer.scriptDataEndTagNameStatecCsJ|j}|dkr2|jtddd|j|_n|j||j|_dSN-rOr$T)	rr:r'r;rscriptDataEscapeStartDashStaterrArorpr!r!r"rSs

z(HTMLTokenizer.scriptDataEscapeStartStatecCsJ|j}|dkr2|jtddd|j|_n|j||j|_dSr)	rr:r'r;rscriptDataEscapedDashDashStaterrArorpr!r!r"r]s

z,HTMLTokenizer.scriptDataEscapeStartDashStatecCs|j}|dkr2|jtddd|j|_n|dkrD|j|_nn|dkrz|jtddd|jtdddn8|tkr|j	|_n&|j
d	}|jtd||dd
S)NrrOr$rHr_r#r`r2)rHrr_T)rr:r'r;rscriptDataEscapedDashStater"scriptDataEscapedLessThanSignStaterrrdrer!r!r"scriptDataEscapedStategs(




z$HTMLTokenizer.scriptDataEscapedStatecCs|j}|dkr2|jtddd|j|_n|dkrD|j|_nn|dkr|jtddd|jtddd|j|_n0|t	kr|j
|_n|jtd|d|j|_d	S)
NrrOr$rHr_r#r`r2T)rr:r'r;rrrrrrrrpr!r!r"r{s&




z(HTMLTokenizer.scriptDataEscapedDashStatecCs|j}|dkr*|jtdddn|dkr<|j|_n|dkrd|jtddd|j|_nn|dkr|jtddd|jtdd	d|j|_n0|t	kr|j
|_n|jtd|d|j|_d
S)NrrOr$rHrtr_r#r`r2T)rr:r'r;rrrrorrrrpr!r!r"rs*




z,HTMLTokenizer.scriptDataEscapedDashDashStatecCs|j}|dkr"d|_|j|_n\|tkrT|jtdd|d||_|j	|_n*|jtddd|j
||j|_dSr~)rr:r scriptDataEscapedEndTagOpenStaterr
r'r;r scriptDataDoubleEscapeStartStaterArrpr!r!r"rs


z0HTMLTokenizer.scriptDataEscapedLessThanSignStatecCsP|j}|tkr"||_|j|_n*|jtddd|j	||j
|_dSr)rr:r
r scriptDataEscapedEndTagNameStaterr'r;rrArrpr!r!r"rs

z.HTMLTokenizer.scriptDataEscapedEndTagOpenStatecCs|jo|jd|jk}|j}|tkrT|rTtd|jgdd|_|j|_n|dkr|rtd|jgdd|_|j	|_n||dkr|rtd|jgdd|_|
|j|_nH|tkr|j|7_n0|j
tdd|jd	|j||j|_d
Sr)rrrrr:rrr|rr}r^rr
r'r;rArrr!r!r"rs@



z.HTMLTokenizer.scriptDataEscapedEndTagNameStatecCs|j}|ttdBkrR|jtd|d|jdkrH|j	|_
q|j|_
nB|tkr|jtd|d|j|7_n|j
||j|_
dSN)rsrtrOr$ZscriptT)rr:rr>r'r;rrrscriptDataDoubleEscapedStaterrr
rArpr!r!r"rs


z.HTMLTokenizer.scriptDataDoubleEscapeStartStatecCs|j}|dkr2|jtddd|j|_n|dkrZ|jtddd|j|_nt|dkr|jtddd|jtdddn>|tkr|jtdd	d|j	|_n|jtd|dd
SNrrOr$rHr_r#r`r2eof-in-script-in-scriptT)
rr:r'r;r scriptDataDoubleEscapedDashStater(scriptDataDoubleEscapedLessThanSignStaterrrpr!r!r"rs*




z*HTMLTokenizer.scriptDataDoubleEscapedStatecCs|j}|dkr2|jtddd|j|_n|dkrZ|jtddd|j|_n|dkr|jtddd|jtddd|j|_nF|t	kr|jtdd	d|j
|_n|jtd|d|j|_d
Sr)rr:r'r;r$scriptDataDoubleEscapedDashDashStaterrrrrrpr!r!r"rs.




z.HTMLTokenizer.scriptDataDoubleEscapedDashStatecCs|j}|dkr*|jtdddn|dkrR|jtddd|j|_n|dkrz|jtddd|j|_n|dkr|jtddd|jtdd	d|j|_nF|t	kr|jtdd
d|j
|_n|jtd|d|j|_dS)NrrOr$rHrtr_r#r`r2rT)rr:r'r;rrrrorrrrpr!r!r"rs2




z2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatecCsP|j}|dkr8|jtdddd|_|j|_n|j||j	|_dS)NrsrOr$r/T)
rr:r'r;rrscriptDataDoubleEscapeEndStaterrArrpr!r!r"r0s

z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatecCs|j}|ttdBkrR|jtd|d|jdkrH|j	|_
q|j|_
nB|tkr|jtd|d|j|7_n|j
||j|_
dSr)rr:rr>r'r;rrrrrrr
rArpr!r!r"r;s


z,HTMLTokenizer.scriptDataDoubleEscapeEndStatecCs0|j}|tkr$|jtdn|tkrJ|jd|dg|j|_n|dkr\|	n|dkrn|j
|_n|dkr|jtddd	|jd|dg|j|_n|d
kr|jtddd	|jdddg|j|_nF|t
kr|jtdd
d	|j|_n|jd|dg|j|_dS)NTr&r/rtrs)'"rMrHr##invalid-character-in-attribute-namer$r_r`r2z#expected-attribute-name-but-got-eof)rr:rrdr
rr;attributeNameStaterr^r}r'rrrrpr!r!r"r|Ks<







z&HTMLTokenizer.beforeAttributeNameStatecCs|j}d}d}|dkr&|j|_n.|tkr\|jddd||jtd7<d}n|dkrjd}n|tkr||j|_n|dkr|j	|_n|d	kr|j
td
dd|jdddd
7<d}n|dkr|j
td
dd|jddd|7<d}nH|t
kr6|j
td
dd|j|_n|jddd|7<d}|r|jdddt|jddd<|jdddD]>\}}|jddd|kr|j
td
ddqҐq|r|dS)NTFrMr&rJrrtrsr_r#r`r$r2rrrHrzeof-in-attribute-namezduplicate-attribute)rr:beforeAttributeValueStaterr
rrdrafterAttributeNameStater}r'r;rrrr\rr^)rr&ZleavingThisStateZ	emitTokenrY_r!r!r"ris^






z HTMLTokenizer.attributeNameStatecCsD|j}|tkr$|jtdn|dkr8|j|_n|dkrJ|n|tkrp|jd	|dg|j
|_n|dkr|j|_n|dkr|j	t
dd	d
|jd	ddg|j
|_n|dkr|j	t
dd
d
|jd	|dg|j
|_nF|tkr$|j	t
ddd
|j|_n|jd	|dg|j
|_dS)NTrMrtr&r/rsr_r#r`r$r2rz&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)rr:rrdrrr^r
rr;rr}r'rrrrpr!r!r"rs@







z%HTMLTokenizer.afterAttributeNameStatecCsh|j}|tkr$|jtdn@|dkr8|j|_n,|dkrX|j|_|j|n|dkrj|j|_n|dkr|j	
tddd|n|d	kr|j	
tdd
d|j
ddd
d7<|j|_n|dkr|j	
tddd|j
ddd
|7<|j|_nL|tkrB|j	
tddd|j|_n"|j
ddd
|7<|j|_dS)NTrrGrrtr#z.expected-attribute-value-but-got-right-bracketr$r_r`r&rJrr2)rMrH`z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eof)rr:rrdattributeValueDoubleQuotedStaterattributeValueUnQuotedStaterAattributeValueSingleQuotedStater'r;rr^rrrrpr!r!r"rsF







z'HTMLTokenizer.beforeAttributeValueStatecCs|j}|dkr|j|_n|dkr0|dn|dkrj|jtddd|jddd	d
7<nN|t	kr|jtddd|j
|_n&|jddd	||jd7<d
S)NrrGr_r#r`r$r&rJrr2z#eof-in-attribute-value-double-quote)rrGr_Trr:afterAttributeValueStaterrXr'r;rrrrrdrpr!r!r"rs&



z-HTMLTokenizer.attributeValueDoubleQuotedStatecCs|j}|dkr|j|_n|dkr0|dn|dkrj|jtddd|jddd	d
7<nN|t	kr|jtddd|j
|_n&|jddd	||jd7<d
S)NrrGr_r#r`r$r&rJrr2z#eof-in-attribute-value-single-quote)rrGr_Trrpr!r!r"rs&



z-HTMLTokenizer.attributeValueSingleQuotedStatecCs|j}|tkr|j|_n|dkr0|dn|dkrB|n|dkr||jt	ddd|j
ddd	|7<n|d
kr|jt	ddd|j
ddd	d7<nV|tkr|jt	dd
d|j|_n.|j
ddd	||j
tdtB7<dS)NrGrt)rrrMrHrr#z0unexpected-character-in-unquoted-attribute-valuer$r&rJrr_r`r2z eof-in-attribute-value-no-quotes)rGrtrrrMrHrr_T)rr:rr|rrXr^r'r;rrrrrdr>rpr!r!r"rs4





z)HTMLTokenizer.attributeValueUnQuotedStatecCs|j}|tkr|j|_n|dkr.|np|dkr@|j|_n^|tkrt|j	t
ddd|j||j|_n*|j	t
ddd|j||j|_dS)Nrtrsr#z$unexpected-EOF-after-attribute-valuer$z*unexpected-character-after-attribute-valueT)
rr:rr|rr^r}rr'r;rrArrpr!r!r"r s&




z&HTMLTokenizer.afterAttributeValueStatecCs|j}|dkr&d|jd<|n^|tkrZ|jtddd|j||j	|_
n*|jtddd|j||j|_
dS)NrtTr[r#z#unexpected-EOF-after-solidus-in-tagr$z)unexpected-character-after-solidus-in-tag)rr:rr^rr'r;rrArrr|rpr!r!r"r}4s 



z&HTMLTokenizer.selfClosingStartTagStatecCsD|jd}|dd}|jtd|d|j|j|_dS)Nrtr_r2Commentr$T)	rrdreplacer'r;rr:rrrpr!r!r"ryFs
zHTMLTokenizer.bogusCommentStatecCs|jg}|ddkrR||j|ddkrPtddd|_|j|_dSn|ddkrd}dD](}||j|d|krfd	}qqf|rtd
ddddd|_|j|_dSn|ddkrD|jdk	rD|jj	j
rD|jj	j
dj|jj	jkrDd}d
D].}||j|d|krd	}q2q|rD|j
|_dS|jtddd|rt|j|qZ|j|_dS)NrJrrr/r$T)dD))oOrDCtTyYpPeEFZDoctype)r%rYpublicIdsystemIdcorrect[)rrArrrr#zexpected-dashes-or-doctype)rr:r;rrcommentStartStaterdoctypeStaterZtreeZopenElements	namespaceZdefaultNamespacecdataSectionStater'rAr)ry)rrCmatchedexpectedr!r!r"rvUs\
z(HTMLTokenizer.markupDeclarationOpenStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd	d|j|j|j|_nP|t	kr|jtdd
d|j|j|j|_n|jd|7<|j
|_dS)Nrr_r#r`r$r&r2rtincorrect-commenteof-in-commentT)rr:commentStartDashStaterr'r;rrrrcommentStaterpr!r!r"rs.



zHTMLTokenizer.commentStartStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd	d|j|j|j|_nT|t	kr|jtdd
d|j|j|j|_n|jdd|7<|j
|_dS)Nrr_r#r`r$r&-�rtrrT)rr:commentEndStaterr'r;rrrrrrpr!r!r"rs.



z#HTMLTokenizer.commentStartDashStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<nT|tkr|jtddd|j|j|j	|_n|jd||j
d	7<d
S)Nrr_r#r`r$r&r2r)rr_T)rr:commentEndDashStaterr'r;rrrrrdrpr!r!r"rs$



zHTMLTokenizer.commentStatecCs|j}|dkr|j|_n|dkrV|jtddd|jdd7<|j|_nT|t	kr|jtddd|j|j|j
|_n|jdd|7<|j|_d	S)
Nrr_r#r`r$r&rzeof-in-comment-end-dashT)rr:rrr'r;rrrrrrpr!r!r"rs$



z!HTMLTokenizer.commentEndDashStatecCs,|j}|dkr*|j|j|j|_n|dkrd|jtddd|jdd7<|j|_n|dkr|jtdd	d|j	|_n|d
kr|jtddd|jd|7<nj|t
kr|jtddd|j|j|j|_n4|jtdd
d|jdd|7<|j|_dS)Nrtr_r#r`r$r&u--�rrz,unexpected-bang-after-double-dash-in-commentrz,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T)rr:r'r;rrrrrcommentEndBangStaterrpr!r!r"rs@




zHTMLTokenizer.commentEndStatecCs|j}|dkr*|j|j|j|_n|dkrN|jdd7<|j|_n|dkr|jtddd|jdd	7<|j	|_nT|t
kr|jtdd
d|j|j|j|_n|jdd|7<|j	|_dS)Nrtrr&z--!r_r#r`r$u--!�zeof-in-comment-end-bang-stateT)rr:r'r;rrrrrrrrpr!r!r"rs,




z!HTMLTokenizer.commentEndBangStatecCs|j}|tkr|j|_nj|tkr\|jtdddd|j	d<|j|j	|j
|_n*|jtddd|j||j|_dS)Nr#!expected-doctype-name-but-got-eofr$Frzneed-space-after-doctypeT)rr:rbeforeDoctypeNameStaterrr'r;rrrrArpr!r!r"rs 



zHTMLTokenizer.doctypeStatecCs|j}|tkrn|dkrT|jtdddd|jd<|j|j|j|_n|dkr|jtdddd	|jd
<|j	|_nR|t
kr|jtdddd|jd<|j|j|j|_n||jd
<|j	|_dS)
Nrtr#z+expected-doctype-name-but-got-right-bracketr$Frr_r`r2rYrT)rr:rr'r;rrrrdoctypeNameStaterrpr!r!r"rs4







z$HTMLTokenizer.beforeDoctypeNameStatecCs|j}|tkr2|jdt|jd<|j|_n|dkrh|jdt|jd<|j	|j|j
|_n|dkr|j	tddd|jdd7<|j|_nh|t
kr|j	tdddd	|jd
<|jdt|jd<|j	|j|j
|_n|jd|7<dS)NrYrtr_r#r`r$r2zeof-in-doctype-nameFrT)rr:rrr\rafterDoctypeNameStaterr'r;rrrrrpr!r!r"r6s0





zHTMLTokenizer.doctypeNameStatecCsH|j}|tkrn.|dkr8|j|j|j|_n|tkrd|jd<|j	||jt
ddd|j|j|j|_n|dkrd}d	D]}|j}||krd}qq|r|j|_dSnD|d
kr
d}dD]}|j}||krd}qq|r
|j|_dS|j	||jt
ddd
|idd|jd<|j
|_dS)NrtFrr#eof-in-doctyper$rT))uU)bB)lL)iIrsS)rrrr)mMz*expected-space-or-right-bracket-in-doctyper&r1)rr:rr'r;rrrrrArafterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)rr&rrr!r!r"rOsT






z#HTMLTokenizer.afterDoctypeNameStatecCs|j}|tkr|j|_n|dkrP|jtddd|j||j|_nT|t	kr|jtdddd|j
d<|j|j
|j|_n|j||j|_dS	N)rrr#unexpected-char-in-doctyper$rFrT)rr:r"beforeDoctypePublicIdentifierStaterr'r;rrArrrrpr!r!r"rs&




z,HTMLTokenizer.afterDoctypePublicKeywordStatecCs|j}|tkrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jt	dddd	|jd
<|j|j|j
|_nh|tkr|jt	dddd	|jd
<|j|j|j
|_n(|jt	dddd	|jd
<|j|_d
S)Nrr/rrrtr#unexpected-end-of-doctyper$FrrrT)
rr:rr(doctypePublicIdentifierDoubleQuotedStater(doctypePublicIdentifierSingleQuotedStater'r;rrrrrpr!r!r"rs:









z0HTMLTokenizer.beforeDoctypePublicIdentifierStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd	dd
|jd<|j|j|j|_nR|t	kr|jtdddd
|jd<|j|j|j|_n|jd|7<d
S)Nrr_r#r`r$rr2rtrFrrT
rr:!afterDoctypePublicIdentifierStaterr'r;rrrrrpr!r!r"rs0





z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd	dd
|jd<|j|j|j|_nR|t	kr|jtdddd
|jd<|j|j|j|_n|jd|7<d
S)Nrr_r#r`r$rr2rtrFrrTrrpr!r!r"rs0





z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatecCs|j}|tkr|j|_n|dkr<|j|j|j|_n|dkrn|jt	dddd|jd<|j
|_n|dkr|jt	dddd|jd<|j|_nh|tkr|jt	dd	dd
|jd<|j|j|j|_n(|jt	dddd
|jd<|j
|_dS)
Nrtrr#rr$r/rrrFrT)rr:r-betweenDoctypePublicAndSystemIdentifiersStaterr'r;rrr(doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStaterrrpr!r!r"rs>









z/HTMLTokenizer.afterDoctypePublicIdentifierStatecCs|j}|tkrn|dkr4|j|j|j|_n|dkrPd|jd<|j|_n|dkrld|jd<|j	|_nh|t
kr|jtdddd	|jd
<|j|j|j|_n(|jtdddd	|jd
<|j|_dS)
Nrtrr/rrr#rr$FrrT)
rr:rr'r;rrrrrrrrrpr!r!r"rs2








z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatecCs|j}|tkr|j|_n|dkrP|jtddd|j||j|_nT|t	kr|jtdddd|j
d<|j|j
|j|_n|j||j|_dSr)rr:r"beforeDoctypeSystemIdentifierStaterr'r;rrArrrrpr!r!r"rs&




z,HTMLTokenizer.afterDoctypeSystemKeywordStatecCs|j}|tkrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jt	dddd	|jd
<|j|j|j
|_nh|tkr|jt	dddd	|jd
<|j|j|j
|_n(|jt	dddd	|jd
<|j|_dS)
Nrr/rrrtr#rr$FrrT)
rr:rrrrrr'r;rrrrrpr!r!r"r/s:









z0HTMLTokenizer.beforeDoctypeSystemIdentifierStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd	dd
|jd<|j|j|j|_nR|t	kr|jtdddd
|jd<|j|j|j|_n|jd|7<d
S)Nrr_r#r`r$rr2rtrFrrT
rr:!afterDoctypeSystemIdentifierStaterr'r;rrrrrpr!r!r"rLs0





z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd	dd
|jd<|j|j|j|_nR|t	kr|jtdddd
|jd<|j|j|j|_n|jd|7<d
S)Nrr_r#r`r$rr2rtrFrrTrrpr!r!r"rds0





z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatecCs|j}|tkrn~|dkr4|j|j|j|_n^|tkrt|jt	dddd|jd<|j|j|j|_n|jt	ddd|j
|_dS)	Nrtr#rr$FrrT)rr:rr'r;rrrrrrrpr!r!r"r|s$



z/HTMLTokenizer.afterDoctypeSystemIdentifierStatecCsZ|j}|dkr*|j|j|j|_n,|tkrV|j||j|j|j|_ndS)NrtT)	rr:r'r;rrrrrArpr!r!r"rs


zHTMLTokenizer.bogusDoctypeStatecCsg}||jd||jd|j}|tkr>qq|dksJt|ddddkrv|ddd|d<qq||qd|}|d}|dkrt|D]}|j	t
d	d
dq|dd}|r|j	t
d
|d|j|_
dS)N]rtrJz]]r/r_rr#r`r$r2rOT)r;rrdr:rAssertionErrorr=countranger'rrrr)rr&r:Z	nullCountrr!r!r"rs2



zHTMLTokenizer.cdataSectionState)N)NF)N__name__
__module____qualname____doc__rr,rFrWrXr^rrbrjrhrmrorqrcrwrxrirrrlrrrnrrrrrrrrrrrrrrrrr|rrrrrrrr}ryrvrrrrrrrrrrrrrrrrrrrrrrr
__classcell__r!r!rr"rsH
P#

6 "-3rN)Z
__future__rrrZpip._vendor.sixrr?collectionsrZ	constantsrr	r
rrr
rrrrZ_inputstreamrZ_trierrPobjectrr!r!r!r"<module>s