Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #ifndef __nsCharSeparatedTokenizer_h
8 : #define __nsCharSeparatedTokenizer_h
9 :
10 : #include "mozilla/RangedPtr.h"
11 :
12 : #include "nsDependentSubstring.h"
13 : #include "nsCRT.h"
14 :
15 : /**
16 : * This parses a SeparatorChar-separated string into tokens.
17 : * Whitespace surrounding tokens is not treated as part of tokens, however
18 : * whitespace inside a token is. If the final token is the empty string, it is
19 : * not returned.
20 : *
21 : * Some examples, with SeparatorChar = ',':
22 : *
23 : * "foo, bar, baz" -> "foo" "bar" "baz"
24 : * "foo,bar,baz" -> "foo" "bar" "baz"
25 : * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
26 : * "foo, ,bar,baz" -> "foo" "" "bar" "baz"
27 : * "foo,,bar,baz" -> "foo" "" "bar" "baz"
28 : * "foo,bar,baz," -> "foo" "bar" "baz"
29 : *
30 : * The function used for whitespace detection is a template argument.
31 : * By default, it is NS_IsAsciiWhitespace.
32 : */
33 : template<typename DependentSubstringType, bool IsWhitespace(char16_t)>
34 : class nsTCharSeparatedTokenizer
35 : {
36 : typedef typename DependentSubstringType::char_type CharType;
37 : typedef typename DependentSubstringType::substring_type SubstringType;
38 :
39 : public:
40 : // Flags -- only one for now. If we need more, they should be defined to
41 : // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
42 : enum
43 : {
44 : SEPARATOR_OPTIONAL = 1
45 : };
46 :
47 413 : nsTCharSeparatedTokenizer(const SubstringType& aSource,
48 : CharType aSeparatorChar,
49 : uint32_t aFlags = 0)
50 : : mIter(aSource.Data(), aSource.Length())
51 413 : , mEnd(aSource.Data() + aSource.Length(), aSource.Data(),
52 : aSource.Length())
53 : , mSeparatorChar(aSeparatorChar)
54 : , mWhitespaceBeforeFirstToken(false)
55 : , mWhitespaceAfterCurrentToken(false)
56 : , mSeparatorAfterCurrentToken(false)
57 826 : , mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)
58 : {
59 : // Skip initial whitespace
60 413 : while (mIter < mEnd && IsWhitespace(*mIter)) {
61 0 : mWhitespaceBeforeFirstToken = true;
62 0 : ++mIter;
63 : }
64 413 : }
65 :
66 : /**
67 : * Checks if any more tokens are available.
68 : */
69 1263 : bool hasMoreTokens() const
70 : {
71 1263 : MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
72 : "Should be at beginning of token if there is one");
73 :
74 1263 : return mIter < mEnd;
75 : }
76 :
77 : /*
78 : * Returns true if there is whitespace prior to the first token.
79 : */
80 0 : bool whitespaceBeforeFirstToken() const
81 : {
82 0 : return mWhitespaceBeforeFirstToken;
83 : }
84 :
85 : /*
86 : * Returns true if there is a separator after the current token.
87 : * Useful if you want to check whether the last token has a separator
88 : * after it which may not be valid.
89 : */
90 30 : bool separatorAfterCurrentToken() const
91 : {
92 30 : return mSeparatorAfterCurrentToken;
93 : }
94 :
95 : /*
96 : * Returns true if there is any whitespace after the current token.
97 : */
98 0 : bool whitespaceAfterCurrentToken() const
99 : {
100 0 : return mWhitespaceAfterCurrentToken;
101 : }
102 :
103 : /**
104 : * Returns the next token.
105 : */
106 1171 : const DependentSubstringType nextToken()
107 : {
108 1171 : mozilla::RangedPtr<const CharType> tokenStart = mIter;
109 1171 : mozilla::RangedPtr<const CharType> tokenEnd = mIter;
110 :
111 1171 : MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
112 : "Should be at beginning of token if there is one");
113 :
114 : // Search until we hit separator or end (or whitespace, if a separator
115 : // isn't required -- see clause with 'break' below).
116 3969 : while (mIter < mEnd && *mIter != mSeparatorChar) {
117 : // Skip to end of the current word.
118 45545 : while (mIter < mEnd &&
119 22317 : !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
120 9926 : ++mIter;
121 : }
122 2075 : tokenEnd = mIter;
123 :
124 : // Skip whitespace after the current word.
125 2075 : mWhitespaceAfterCurrentToken = false;
126 4805 : while (mIter < mEnd && IsWhitespace(*mIter)) {
127 1365 : mWhitespaceAfterCurrentToken = true;
128 1365 : ++mIter;
129 : }
130 2075 : if (mSeparatorOptional) {
131 : // We've hit (and skipped) whitespace, and that's sufficient to end
132 : // our token, regardless of whether we've reached a SeparatorChar.
133 676 : break;
134 : } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
135 : }
136 :
137 1958 : mSeparatorAfterCurrentToken = (mIter != mEnd &&
138 787 : *mIter == mSeparatorChar);
139 1171 : MOZ_ASSERT(mSeparatorOptional ||
140 : (mSeparatorAfterCurrentToken == (mIter < mEnd)),
141 : "If we require a separator and haven't hit the end of "
142 : "our string, then we shouldn't have left the loop "
143 : "unless we hit a separator");
144 :
145 : // Skip separator (and any whitespace after it), if we're at one.
146 1171 : if (mSeparatorAfterCurrentToken) {
147 590 : ++mIter;
148 :
149 980 : while (mIter < mEnd && IsWhitespace(*mIter)) {
150 195 : mWhitespaceAfterCurrentToken = true;
151 195 : ++mIter;
152 : }
153 : }
154 :
155 1171 : return Substring(tokenStart.get(), tokenEnd.get());
156 : }
157 :
158 : private:
159 : mozilla::RangedPtr<const CharType> mIter;
160 : const mozilla::RangedPtr<const CharType> mEnd;
161 : CharType mSeparatorChar;
162 : bool mWhitespaceBeforeFirstToken;
163 : bool mWhitespaceAfterCurrentToken;
164 : bool mSeparatorAfterCurrentToken;
165 : bool mSeparatorOptional;
166 : };
167 :
168 : template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
169 : class nsCharSeparatedTokenizerTemplate
170 : : public nsTCharSeparatedTokenizer<nsDependentSubstring, IsWhitespace>
171 : {
172 : public:
173 150 : nsCharSeparatedTokenizerTemplate(const nsAString& aSource,
174 : char16_t aSeparatorChar,
175 : uint32_t aFlags = 0)
176 : : nsTCharSeparatedTokenizer<nsDependentSubstring,
177 150 : IsWhitespace>(aSource, aSeparatorChar, aFlags)
178 : {
179 150 : }
180 : };
181 :
182 : typedef nsCharSeparatedTokenizerTemplate<> nsCharSeparatedTokenizer;
183 :
184 : template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
185 : class nsCCharSeparatedTokenizerTemplate
186 : : public nsTCharSeparatedTokenizer<nsDependentCSubstring, IsWhitespace>
187 : {
188 : public:
189 263 : nsCCharSeparatedTokenizerTemplate(const nsACString& aSource,
190 : char aSeparatorChar,
191 : uint32_t aFlags = 0)
192 : : nsTCharSeparatedTokenizer<nsDependentCSubstring,
193 263 : IsWhitespace>(aSource, aSeparatorChar, aFlags)
194 : {
195 263 : }
196 : };
197 :
198 : typedef nsCCharSeparatedTokenizerTemplate<> nsCCharSeparatedTokenizer;
199 :
200 : #endif /* __nsCharSeparatedTokenizer_h */
|