Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #ifndef INCREMENTAL_TOKENIZER_H__
8 : #define INCREMENTAL_TOKENIZER_H__
9 :
10 : #include "mozilla/Tokenizer.h"
11 :
12 : #include "nsError.h"
13 : #include <functional>
14 :
15 : class nsIInputStream;
16 :
17 : namespace mozilla {
18 :
19 0 : class IncrementalTokenizer : public TokenizerBase
20 : {
21 : public:
22 : /**
23 : * The consumer callback. The function is called for every single token
24 : * as found in the input. Failure result returned by this callback stops
25 : * the tokenization immediately and bubbles to result of Feed/FinishInput.
26 : *
27 : * Fragment()s of consumed tokens are ensured to remain valid until next call to
28 : * Feed/FinishInput and are pointing to a single linear buffer. Hence, those can
29 : * be safely used to accumulate the data for processing after Feed/FinishInput
30 : * returned.
31 : */
32 : typedef std::function<nsresult(Token const&, IncrementalTokenizer& i)> Consumer;
33 :
34 : /**
35 : * For aWhitespaces and aAdditionalWordChars arguments see TokenizerBase.
36 : *
37 : * @param aConsumer
38 : * A mandatory non-null argument, a function that consumes the tokens as they
39 : * come when the tokenizer is fed.
40 : * @param aRawMinBuffered
41 : * When we have buffered at least aRawMinBuffered data, but there was no custom
42 : * token found so far because of too small incremental feed chunks, deliver
43 : * the raw data to preserve streaming and to save memory. This only has effect
44 : * in OnlyCustomTokenizing mode.
45 : */
46 : explicit IncrementalTokenizer(Consumer&& aConsumer,
47 : const char* aWhitespaces = nullptr,
48 : const char* aAdditionalWordChars = nullptr,
49 : uint32_t aRawMinBuffered = 1024);
50 :
51 : /**
52 : * Pushes the input to be tokenized. These directly call the Consumer callback
53 : * on every found token. Result of the Consumer callback is returned here.
54 : *
55 : * The tokenizer must be initialized with a valid consumer prior call to these
56 : * methods. It's not allowed to call Feed/FinishInput from inside the Consumer
57 : * callback.
58 : */
59 : nsresult FeedInput(const nsACString& aInput);
60 : nsresult FeedInput(nsIInputStream* aInput, uint32_t aCount);
61 : nsresult FinishInput();
62 :
63 : /**
64 : * Can only be called from inside the consumer callback.
65 : *
66 : * When there is still anything to read from the input, tokenize it, store
67 : * the token type and value to aToken result and shift the cursor past this
68 : * just parsed token. Each call to Next() reads another token from
69 : * the input and shifts the cursor.
70 : *
71 : * Returns false if there is not enough data to deterministically recognize
72 : * tokens or when the last returned token was EOF.
73 : */
74 : MOZ_MUST_USE
75 : bool Next(Token& aToken);
76 :
77 : /**
78 : * Can only be called from inside the consumer callback.
79 : *
80 : * Tells the tokenizer to revert the cursor and stop the async parsing until
81 : * next feed of the input. This is useful when more than one token is needed
82 : * to decide on the syntax but there is not enough input to get a next token
83 : * (Next() returned false.)
84 : */
85 : void NeedMoreInput();
86 :
87 : /**
88 : * Can only be called from inside the consumer callback.
89 : *
90 : * This makes the consumer callback be called again while parsing
91 : * the input at the previous cursor position again. This is useful when
92 : * the tokenizer state (custom tokens, tokenization mode) has changed and
93 : * we want to re-parse the input again.
94 : */
95 : void Rollback();
96 :
97 : private:
98 : // Loops over the input with TokenizerBase::Parse and calls the Consumer callback.
99 : nsresult Process();
100 :
101 : #ifdef DEBUG
102 : // True when inside the consumer callback, used only for assertions.
103 : bool mConsuming;
104 : #endif // DEBUG
105 : // Modifyable only from the Consumer callback, tells the parser to break, rollback
106 : // and wait for more input.
107 : bool mNeedMoreInput;
108 : // Modifyable only from the Consumer callback, tells the parser to rollback and
109 : // parse the input again, with (if modified) new settings of the tokenizer.
110 : bool mRollback;
111 : // The input buffer. Updated with each call to Feed/FinishInput.
112 : nsCString mInput;
113 : // Numerical index pointing at the current cursor position. We don't keep direct
114 : // reference to the string buffer since the buffer gets often reallocated.
115 : nsCString::index_type mInputCursor;
116 : // Refernce to the consumer function.
117 : Consumer mConsumer;
118 : };
119 :
120 : } // mozilla
121 :
122 : #endif
|