Line data Source code
1 : /*
2 : * Copyright (c) 2005-2007 Henri Sivonen
3 : * Copyright (c) 2007-2015 Mozilla Foundation
4 : * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
5 : * Foundation, and Opera Software ASA.
6 : *
7 : * Permission is hereby granted, free of charge, to any person obtaining a
8 : * copy of this software and associated documentation files (the "Software"),
9 : * to deal in the Software without restriction, including without limitation
10 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 : * and/or sell copies of the Software, and to permit persons to whom the
12 : * Software is furnished to do so, subject to the following conditions:
13 : *
14 : * The above copyright notice and this permission notice shall be included in
15 : * all copies or substantial portions of the Software.
16 : *
17 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 : * DEALINGS IN THE SOFTWARE.
24 : */
25 :
26 : /*
27 : * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
28 : * Please edit Tokenizer.java instead and regenerate.
29 : */
30 :
31 : #ifndef nsHtml5Tokenizer_h
32 : #define nsHtml5Tokenizer_h
33 :
34 : #include "nsIAtom.h"
35 : #include "nsHtml5AtomTable.h"
36 : #include "nsHtml5String.h"
37 : #include "nsIContent.h"
38 : #include "nsTraceRefcnt.h"
39 : #include "jArray.h"
40 : #include "nsHtml5DocumentMode.h"
41 : #include "nsHtml5ArrayCopy.h"
42 : #include "nsHtml5NamedCharacters.h"
43 : #include "nsHtml5NamedCharactersAccel.h"
44 : #include "nsGkAtoms.h"
45 : #include "nsAHtml5TreeBuilderState.h"
46 : #include "nsHtml5Macros.h"
47 : #include "nsHtml5Highlighter.h"
48 : #include "nsHtml5TokenizerLoopPolicies.h"
49 :
50 : class nsHtml5StreamParser;
51 :
52 : class nsHtml5AttributeName;
53 : class nsHtml5ElementName;
54 : class nsHtml5TreeBuilder;
55 : class nsHtml5MetaScanner;
56 : class nsHtml5UTF16Buffer;
57 : class nsHtml5StateSnapshot;
58 : class nsHtml5Portability;
59 :
60 :
61 : class nsHtml5Tokenizer
62 : {
63 : private:
64 : static const int32_t DATA_AND_RCDATA_MASK = ~1;
65 :
66 : public:
67 : static const int32_t DATA = 0;
68 :
69 : static const int32_t RCDATA = 1;
70 :
71 : static const int32_t SCRIPT_DATA = 2;
72 :
73 : static const int32_t RAWTEXT = 3;
74 :
75 : static const int32_t SCRIPT_DATA_ESCAPED = 4;
76 :
77 : static const int32_t ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5;
78 :
79 : static const int32_t ATTRIBUTE_VALUE_SINGLE_QUOTED = 6;
80 :
81 : static const int32_t ATTRIBUTE_VALUE_UNQUOTED = 7;
82 :
83 : static const int32_t PLAINTEXT = 8;
84 :
85 : static const int32_t TAG_OPEN = 9;
86 :
87 : static const int32_t CLOSE_TAG_OPEN = 10;
88 :
89 : static const int32_t TAG_NAME = 11;
90 :
91 : static const int32_t BEFORE_ATTRIBUTE_NAME = 12;
92 :
93 : static const int32_t ATTRIBUTE_NAME = 13;
94 :
95 : static const int32_t AFTER_ATTRIBUTE_NAME = 14;
96 :
97 : static const int32_t BEFORE_ATTRIBUTE_VALUE = 15;
98 :
99 : static const int32_t AFTER_ATTRIBUTE_VALUE_QUOTED = 16;
100 :
101 : static const int32_t BOGUS_COMMENT = 17;
102 :
103 : static const int32_t MARKUP_DECLARATION_OPEN = 18;
104 :
105 : static const int32_t DOCTYPE = 19;
106 :
107 : static const int32_t BEFORE_DOCTYPE_NAME = 20;
108 :
109 : static const int32_t DOCTYPE_NAME = 21;
110 :
111 : static const int32_t AFTER_DOCTYPE_NAME = 22;
112 :
113 : static const int32_t BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23;
114 :
115 : static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24;
116 :
117 : static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25;
118 :
119 : static const int32_t AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26;
120 :
121 : static const int32_t BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27;
122 :
123 : static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28;
124 :
125 : static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29;
126 :
127 : static const int32_t AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30;
128 :
129 : static const int32_t BOGUS_DOCTYPE = 31;
130 :
131 : static const int32_t COMMENT_START = 32;
132 :
133 : static const int32_t COMMENT_START_DASH = 33;
134 :
135 : static const int32_t COMMENT = 34;
136 :
137 : static const int32_t COMMENT_END_DASH = 35;
138 :
139 : static const int32_t COMMENT_END = 36;
140 :
141 : static const int32_t COMMENT_END_BANG = 37;
142 :
143 : static const int32_t NON_DATA_END_TAG_NAME = 38;
144 :
145 : static const int32_t MARKUP_DECLARATION_HYPHEN = 39;
146 :
147 : static const int32_t MARKUP_DECLARATION_OCTYPE = 40;
148 :
149 : static const int32_t DOCTYPE_UBLIC = 41;
150 :
151 : static const int32_t DOCTYPE_YSTEM = 42;
152 :
153 : static const int32_t AFTER_DOCTYPE_PUBLIC_KEYWORD = 43;
154 :
155 : static const int32_t BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44;
156 :
157 : static const int32_t AFTER_DOCTYPE_SYSTEM_KEYWORD = 45;
158 :
159 : static const int32_t CONSUME_CHARACTER_REFERENCE = 46;
160 :
161 : static const int32_t CONSUME_NCR = 47;
162 :
163 : static const int32_t CHARACTER_REFERENCE_TAIL = 48;
164 :
165 : static const int32_t HEX_NCR_LOOP = 49;
166 :
167 : static const int32_t DECIMAL_NRC_LOOP = 50;
168 :
169 : static const int32_t HANDLE_NCR_VALUE = 51;
170 :
171 : static const int32_t HANDLE_NCR_VALUE_RECONSUME = 52;
172 :
173 : static const int32_t CHARACTER_REFERENCE_HILO_LOOKUP = 53;
174 :
175 : static const int32_t SELF_CLOSING_START_TAG = 54;
176 :
177 : static const int32_t CDATA_START = 55;
178 :
179 : static const int32_t CDATA_SECTION = 56;
180 :
181 : static const int32_t CDATA_RSQB = 57;
182 :
183 : static const int32_t CDATA_RSQB_RSQB = 58;
184 :
185 : static const int32_t SCRIPT_DATA_LESS_THAN_SIGN = 59;
186 :
187 : static const int32_t SCRIPT_DATA_ESCAPE_START = 60;
188 :
189 : static const int32_t SCRIPT_DATA_ESCAPE_START_DASH = 61;
190 :
191 : static const int32_t SCRIPT_DATA_ESCAPED_DASH = 62;
192 :
193 : static const int32_t SCRIPT_DATA_ESCAPED_DASH_DASH = 63;
194 :
195 : static const int32_t BOGUS_COMMENT_HYPHEN = 64;
196 :
197 : static const int32_t RAWTEXT_RCDATA_LESS_THAN_SIGN = 65;
198 :
199 : static const int32_t SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66;
200 :
201 : static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_START = 67;
202 :
203 : static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED = 68;
204 :
205 : static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;
206 :
207 : static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;
208 :
209 : static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;
210 :
211 : static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
212 :
213 : static const int32_t PROCESSING_INSTRUCTION = 73;
214 :
215 : static const int32_t PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
216 :
217 : private:
218 : static const int32_t LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
219 :
220 : static char16_t LT_GT[];
221 : static char16_t LT_SOLIDUS[];
222 : static char16_t RSQB_RSQB[];
223 : static char16_t REPLACEMENT_CHARACTER[];
224 : static char16_t LF[];
225 : static char16_t CDATA_LSQB[];
226 : static char16_t OCTYPE[];
227 : static char16_t UBLIC[];
228 : static char16_t YSTEM[];
229 : static staticJArray<char16_t,int32_t> TITLE_ARR;
230 : static staticJArray<char16_t,int32_t> SCRIPT_ARR;
231 : static staticJArray<char16_t,int32_t> STYLE_ARR;
232 : static staticJArray<char16_t,int32_t> PLAINTEXT_ARR;
233 : static staticJArray<char16_t,int32_t> XMP_ARR;
234 : static staticJArray<char16_t,int32_t> TEXTAREA_ARR;
235 : static staticJArray<char16_t,int32_t> IFRAME_ARR;
236 : static staticJArray<char16_t,int32_t> NOEMBED_ARR;
237 : static staticJArray<char16_t,int32_t> NOSCRIPT_ARR;
238 : static staticJArray<char16_t,int32_t> NOFRAMES_ARR;
239 : protected:
240 : nsHtml5TreeBuilder* tokenHandler;
241 : nsHtml5StreamParser* encodingDeclarationHandler;
242 : bool lastCR;
243 : int32_t stateSave;
244 : private:
245 : int32_t returnStateSave;
246 : protected:
247 : int32_t index;
248 : private:
249 : bool forceQuirks;
250 : char16_t additional;
251 : int32_t entCol;
252 : int32_t firstCharKey;
253 : int32_t lo;
254 : int32_t hi;
255 : int32_t candidate;
256 : int32_t charRefBufMark;
257 : protected:
258 : int32_t value;
259 : private:
260 : bool seenDigits;
261 : protected:
262 : int32_t cstart;
263 : private:
264 : nsHtml5String publicId;
265 : nsHtml5String systemId;
266 : autoJArray<char16_t,int32_t> strBuf;
267 : int32_t strBufLen;
268 : autoJArray<char16_t,int32_t> charRefBuf;
269 : int32_t charRefBufLen;
270 : autoJArray<char16_t,int32_t> bmpChar;
271 : autoJArray<char16_t,int32_t> astralChar;
272 : protected:
273 : nsHtml5ElementName* endTagExpectation;
274 : private:
275 : jArray<char16_t,int32_t> endTagExpectationAsArray;
276 : protected:
277 : bool endTag;
278 : private:
279 : bool containsHyphen;
280 : nsHtml5ElementName* tagName;
281 : nsHtml5ElementName* nonInternedTagName;
282 : protected:
283 : nsHtml5AttributeName* attributeName;
284 : private:
285 : nsHtml5AttributeName* nonInternedAttributeName;
286 : nsIAtom* doctypeName;
287 : nsHtml5String publicIdentifier;
288 : nsHtml5String systemIdentifier;
289 : nsHtml5HtmlAttributes* attributes;
290 : bool newAttributesEachTime;
291 : bool shouldSuspend;
292 : protected:
293 : bool confident;
294 : private:
295 : int32_t line;
296 : int32_t attributeLine;
297 : nsHtml5AtomTable* interner;
298 : bool viewingXmlSource;
299 : public:
300 : nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
301 : void setInterner(nsHtml5AtomTable* interner);
302 : void initLocation(nsHtml5String newPublicId, nsHtml5String newSystemId);
303 : bool isViewingXmlSource();
304 : void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation);
305 : void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation);
306 : private:
307 : void endTagExpectationToArray();
308 : public:
309 : void setLineNumber(int32_t line);
310 10 : inline int32_t getLineNumber()
311 : {
312 10 : return line;
313 : }
314 :
315 : nsHtml5HtmlAttributes* emptyAttributes();
316 : private:
317 0 : inline void appendCharRefBuf(char16_t c)
318 : {
319 0 : MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length, "Attempted to overrun charRefBuf!");
320 0 : charRefBuf[charRefBufLen++] = c;
321 0 : }
322 :
323 : void emitOrAppendCharRefBuf(int32_t returnState);
324 82 : inline void clearStrBufAfterUse()
325 : {
326 82 : strBufLen = 0;
327 82 : }
328 :
329 64 : inline void clearStrBufBeforeUse()
330 : {
331 64 : MOZ_ASSERT(!strBufLen, "strBufLen not reset after previous use!");
332 64 : strBufLen = 0;
333 64 : }
334 :
335 5 : inline void clearStrBufAfterOneHyphen()
336 : {
337 5 : MOZ_ASSERT(strBufLen == 1, "strBufLen length not one!");
338 5 : MOZ_ASSERT(strBuf[0] == '-', "strBuf does not start with a hyphen!");
339 5 : strBufLen = 0;
340 5 : }
341 :
342 1144 : inline void appendStrBuf(char16_t c)
343 : {
344 1144 : MOZ_ASSERT(strBufLen < strBuf.length, "Previous buffer length insufficient.");
345 1144 : if (MOZ_UNLIKELY(strBufLen == strBuf.length)) {
346 0 : if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) {
347 0 : MOZ_CRASH("Unable to recover from buffer reallocation failure");
348 : }
349 : }
350 1144 : strBuf[strBufLen++] = c;
351 1144 : }
352 :
353 : protected:
354 : nsHtml5String strBufToString();
355 : private:
356 : void strBufToDoctypeName();
357 : void emitStrBuf();
358 0 : inline void appendSecondHyphenToBogusComment()
359 : {
360 0 : appendStrBuf('-');
361 0 : }
362 :
363 0 : inline void adjustDoubleHyphenAndAppendToStrBufAndErr(char16_t c)
364 : {
365 0 : errConsecutiveHyphens();
366 0 : appendStrBuf(c);
367 0 : }
368 :
369 : void appendStrBuf(char16_t* buffer, int32_t offset, int32_t length);
370 0 : inline void appendCharRefBufToStrBuf()
371 : {
372 0 : appendStrBuf(charRefBuf, 0, charRefBufLen);
373 0 : charRefBufLen = 0;
374 0 : }
375 :
376 : void emitComment(int32_t provisionalHyphens, int32_t pos);
377 : protected:
378 : void flushChars(char16_t* buf, int32_t pos);
379 : private:
380 : void strBufToElementNameString();
381 : int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
382 : void attributeNameComplete();
383 : void addAttributeWithoutValue();
384 : void addAttributeWithValue();
385 : public:
386 : void start();
387 : bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
388 : private:
389 : template<class P> int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos);
390 : void initDoctypeFields();
391 0 : inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
392 : {
393 0 : silentCarriageReturn();
394 0 : adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
395 0 : }
396 :
397 0 : inline void adjustDoubleHyphenAndAppendToStrBufLineFeed()
398 : {
399 0 : silentLineFeed();
400 0 : adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
401 0 : }
402 :
403 4 : inline void appendStrBufLineFeed()
404 : {
405 4 : silentLineFeed();
406 4 : appendStrBuf('\n');
407 4 : }
408 :
409 0 : inline void appendStrBufCarriageReturn()
410 : {
411 0 : silentCarriageReturn();
412 0 : appendStrBuf('\n');
413 0 : }
414 :
415 : protected:
416 0 : inline void silentCarriageReturn()
417 : {
418 0 : ++line;
419 0 : lastCR = true;
420 0 : }
421 :
422 81 : inline void silentLineFeed()
423 : {
424 81 : ++line;
425 81 : }
426 :
427 : private:
428 : void emitCarriageReturn(char16_t* buf, int32_t pos);
429 : void emitReplacementCharacter(char16_t* buf, int32_t pos);
430 : void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
431 : void setAdditionalAndRememberAmpersandLocation(char16_t add);
432 : void bogusDoctype();
433 : void bogusDoctypeWithoutQuirks();
434 : void handleNcrValue(int32_t returnState);
435 : public:
436 : void eof();
437 : private:
438 : void emitDoctypeToken(int32_t pos);
439 : protected:
440 3106 : inline char16_t checkChar(char16_t* buf, int32_t pos)
441 : {
442 3106 : return buf[pos];
443 : }
444 :
445 : public:
446 : bool internalEncodingDeclaration(nsHtml5String internalCharset);
447 : private:
448 : void emitOrAppendTwo(const char16_t* val, int32_t returnState);
449 : void emitOrAppendOne(const char16_t* val, int32_t returnState);
450 : public:
451 : void end();
452 : void requestSuspension();
453 : bool isInDataState();
454 : void resetToDataState();
455 : void loadState(nsHtml5Tokenizer* other);
456 : void initializeWithoutStarting();
457 : void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
458 : ~nsHtml5Tokenizer();
459 : static void initializeStatics();
460 : static void releaseStatics();
461 :
462 : #include "nsHtml5TokenizerHSupplement.h"
463 : };
464 :
465 : #endif
466 :
|