Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 :
7 : /**
8 : * MODULE NOTES:
9 : * @update gess 4/1/98
10 : *
11 : * The scanner is a low-level service class that knows
12 : * how to consume characters out of an (internal) stream.
13 : * This class also offers a series of utility methods
14 : * that most tokenizers want, such as readUntil()
15 : * and SkipWhitespace().
16 : */
17 :
18 :
19 : #ifndef SCANNER
20 : #define SCANNER
21 :
22 : #include "nsCOMPtr.h"
23 : #include "nsString.h"
24 : #include "nsIParser.h"
25 : #include "mozilla/Encoding.h"
26 : #include "nsScannerString.h"
27 : #include "mozilla/CheckedInt.h"
28 :
29 : class nsReadEndCondition {
30 : public:
31 : const char16_t *mChars;
32 : char16_t mFilter;
33 : explicit nsReadEndCondition(const char16_t* aTerminateChars);
34 : private:
35 : nsReadEndCondition(const nsReadEndCondition& aOther); // No copying
36 : void operator=(const nsReadEndCondition& aOther); // No assigning
37 : };
38 :
39 : class nsScanner final {
40 : using Encoding = mozilla::Encoding;
41 : template <typename T> using NotNull = mozilla::NotNull<T>;
42 : public:
43 :
44 : /**
45 : * Use this constructor for the XML fragment parsing case
46 : */
47 : explicit nsScanner(const nsAString& anHTMLString);
48 :
49 : /**
50 : * Use this constructor if you want i/o to be based on
51 : * a file (therefore a stream) or just data you provide via Append().
52 : */
53 : nsScanner(nsString& aFilename, bool aCreateStream);
54 :
55 : ~nsScanner();
56 :
57 : /**
58 : * retrieve next char from internal input stream
59 : *
60 : * @update gess 3/25/98
61 : * @param ch is the char to accept new value
62 : * @return error code reflecting read status
63 : */
64 : nsresult GetChar(char16_t& ch);
65 :
66 : /**
67 : * Records current offset position in input stream. This allows us
68 : * to back up to this point if the need should arise, such as when
69 : * tokenization gets interrupted.
70 : *
71 : * @update gess 5/12/98
72 : * @param
73 : * @return
74 : */
75 : int32_t Mark(void);
76 :
77 : /**
78 : * Resets current offset position of input stream to marked position.
79 : * This allows us to back up to this point if the need should arise,
80 : * such as when tokenization gets interrupted.
81 : * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
82 : *
83 : * @update gess 5/12/98
84 : * @param
85 : * @return
86 : */
87 : void RewindToMark(void);
88 :
89 :
90 : /**
91 : *
92 : *
93 : * @update harishd 01/12/99
94 : * @param
95 : * @return
96 : */
97 : bool UngetReadable(const nsAString& aBuffer);
98 :
99 : /**
100 : *
101 : *
102 : * @update gess 5/13/98
103 : * @param
104 : * @return
105 : */
106 : nsresult Append(const nsAString& aBuffer);
107 :
108 : /**
109 : *
110 : *
111 : * @update gess 5/21/98
112 : * @param
113 : * @return
114 : */
115 : nsresult Append(const char* aBuffer, uint32_t aLen);
116 :
117 : /**
118 : * Call this to copy bytes out of the scanner that have not yet been consumed
119 : * by the tokenization process.
120 : *
121 : * @update gess 5/12/98
122 : * @param aCopyBuffer is where the scanner buffer will be copied to
123 : * @return true if OK or false on OOM
124 : */
125 : bool CopyUnusedData(nsString& aCopyBuffer);
126 :
127 : /**
128 : * Retrieve the name of the file that the scanner is reading from.
129 : * In some cases, it's just a given name, because the scanner isn't
130 : * really reading from a file.
131 : *
132 : * @update gess 5/12/98
133 : * @return
134 : */
135 : nsString& GetFilename(void);
136 :
137 : static void SelfTest();
138 :
139 : /**
140 : * Use this setter to change the scanner's unicode decoder
141 : *
142 : * @update ftang 3/02/99
143 : * @param aCharset a normalized (alias resolved) charset name
144 : * @param aCharsetSource- where the charset info came from
145 : * @return
146 : */
147 : nsresult SetDocumentCharset(NotNull<const Encoding*> aEncoding,
148 : int32_t aSource);
149 :
150 : void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
151 : void CurrentPosition(nsScannerIterator& aPosition);
152 : void EndReading(nsScannerIterator& aPosition);
153 : void SetPosition(nsScannerIterator& aPosition,
154 : bool aTruncate = false);
155 :
156 : /**
157 : * Internal method used to cause the internal buffer to
158 : * be filled with data.
159 : *
160 : * @update gess4/3/98
161 : */
162 : bool IsIncremental(void) {return mIncremental;}
163 23 : void SetIncremental(bool anIncrValue) {mIncremental=anIncrValue;}
164 :
165 : protected:
166 :
167 : bool AppendToBuffer(nsScannerString::Buffer* aBuffer);
168 0 : bool AppendToBuffer(const nsAString& aStr)
169 : {
170 0 : nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr);
171 0 : if (!buf)
172 0 : return false;
173 0 : AppendToBuffer(buf);
174 0 : return true;
175 : }
176 :
177 : nsScannerString* mSlidingBuffer;
178 : nsScannerIterator mCurrentPosition; // The position we will next read from in the scanner buffer
179 : nsScannerIterator mMarkPosition; // The position last marked (we may rewind to here)
180 : nsScannerIterator mEndPosition; // The current end of the scanner buffer
181 : nsString mFilename;
182 : bool mIncremental;
183 : int32_t mCharsetSource;
184 : nsCString mCharset;
185 : mozilla::UniquePtr<mozilla::Decoder> mUnicodeDecoder;
186 :
187 : private:
188 : nsScanner &operator =(const nsScanner &); // Not implemented.
189 : };
190 :
191 : #endif
192 :
193 :
|