Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #ifndef nsCSPParser_h___
8 : #define nsCSPParser_h___
9 :
10 : #include "nsCSPUtils.h"
11 : #include "nsIURI.h"
12 : #include "nsString.h"
13 :
14 : /**
15 : * How does the parsing work?
16 : *
17 : * We generate tokens by splitting the policy-string by whitespace and semicolon.
18 : * Interally the tokens are represented as an array of string-arrays:
19 : *
20 : * [
21 : * [ name, src, src, src, ... ],
22 : * [ name, src, src, src, ... ],
23 : * [ name, src, src, src, ... ]
24 : * ]
25 : *
26 : * for example:
27 : * [
28 : * [ img-src, http://www.example.com, http:www.test.com ],
29 : * [ default-src, 'self'],
30 : * [ script-src, 'unsafe-eval', 'unsafe-inline' ],
31 : * ]
32 : *
33 : * The first element of each array has to be a valid directive-name, otherwise we can
34 : * ignore the remaining elements of the array. Also, if the
35 : * directive already exists in the current policy, we can ignore
36 : * the remaining elements of that array. (http://www.w3.org/TR/CSP/#parsing)
37 : */
38 :
39 : typedef nsTArray< nsTArray<nsString> > cspTokens;
40 :
41 : class nsCSPTokenizer {
42 :
43 : public:
44 : static void tokenizeCSPPolicy(const nsAString &aPolicyString, cspTokens& outTokens);
45 :
46 : private:
47 : nsCSPTokenizer(const char16_t* aStart, const char16_t* aEnd);
48 : ~nsCSPTokenizer();
49 :
50 0 : inline bool atEnd()
51 : {
52 0 : return mCurChar >= mEndChar;
53 : }
54 :
55 0 : inline void skipWhiteSpace()
56 : {
57 0 : while (mCurChar < mEndChar &&
58 0 : nsContentUtils::IsHTMLWhitespace(*mCurChar)) {
59 0 : mCurToken.Append(*mCurChar++);
60 : }
61 0 : mCurToken.Truncate();
62 0 : }
63 :
64 0 : inline void skipWhiteSpaceAndSemicolon()
65 : {
66 0 : while (mCurChar < mEndChar && (*mCurChar == ';' ||
67 0 : nsContentUtils::IsHTMLWhitespace(*mCurChar))){
68 0 : mCurToken.Append(*mCurChar++);
69 : }
70 0 : mCurToken.Truncate();
71 0 : }
72 :
73 0 : inline bool accept(char16_t aChar)
74 : {
75 0 : NS_ASSERTION(mCurChar < mEndChar, "Trying to dereference mEndChar");
76 0 : if (*mCurChar == aChar) {
77 0 : mCurToken.Append(*mCurChar++);
78 0 : return true;
79 : }
80 0 : return false;
81 : }
82 :
83 : void generateNextToken();
84 : void generateTokens(cspTokens& outTokens);
85 :
86 : const char16_t* mCurChar;
87 : const char16_t* mEndChar;
88 : nsString mCurToken;
89 : };
90 :
91 :
92 : class nsCSPParser {
93 :
94 : public:
95 : /**
96 : * The CSP parser only has one publicly accessible function, which is parseContentSecurityPolicy.
97 : * Internally the input string is separated into string tokens and policy() is called, which starts
98 : * parsing the policy. The parser calls one function after the other according the the source-list
99 : * from http://www.w3.org/TR/CSP11/#source-list. E.g., the parser can only call port() after the parser
100 : * has already processed any possible host in host(), similar to a finite state machine.
101 : */
102 : static nsCSPPolicy* parseContentSecurityPolicy(const nsAString &aPolicyString,
103 : nsIURI *aSelfURI,
104 : bool aReportOnly,
105 : nsCSPContext* aCSPContext,
106 : bool aDeliveredViaMetaTag);
107 :
108 : private:
109 : nsCSPParser(cspTokens& aTokens,
110 : nsIURI* aSelfURI,
111 : nsCSPContext* aCSPContext,
112 : bool aDeliveredViaMetaTag);
113 :
114 : static bool sCSPExperimentalEnabled;
115 : static bool sStrictDynamicEnabled;
116 :
117 : ~nsCSPParser();
118 :
119 :
120 : // Parsing the CSP using the source-list from http://www.w3.org/TR/CSP11/#source-list
121 : nsCSPPolicy* policy();
122 : void directive();
123 : nsCSPDirective* directiveName();
124 : void directiveValue(nsTArray<nsCSPBaseSrc*>& outSrcs);
125 : void requireSRIForDirectiveValue(nsRequireSRIForDirective* aDir);
126 : void referrerDirectiveValue(nsCSPDirective* aDir);
127 : void reportURIList(nsCSPDirective* aDir);
128 : void sandboxFlagList(nsCSPDirective* aDir);
129 : void sourceList(nsTArray<nsCSPBaseSrc*>& outSrcs);
130 : nsCSPBaseSrc* sourceExpression();
131 : nsCSPSchemeSrc* schemeSource();
132 : nsCSPHostSrc* hostSource();
133 : nsCSPBaseSrc* keywordSource();
134 : nsCSPNonceSrc* nonceSource();
135 : nsCSPHashSrc* hashSource();
136 : nsCSPHostSrc* host();
137 : bool hostChar();
138 : bool schemeChar();
139 : bool port();
140 : bool path(nsCSPHostSrc* aCspHost);
141 :
142 : bool subHost(); // helper function to parse subDomains
143 : bool atValidUnreservedChar(); // helper function to parse unreserved
144 : bool atValidSubDelimChar(); // helper function to parse sub-delims
145 : bool atValidPctEncodedChar(); // helper function to parse pct-encoded
146 : bool subPath(nsCSPHostSrc* aCspHost); // helper function to parse paths
147 :
148 0 : inline bool atEnd()
149 : {
150 0 : return mCurChar >= mEndChar;
151 : }
152 :
153 0 : inline bool accept(char16_t aSymbol)
154 : {
155 0 : if (atEnd()) { return false; }
156 0 : return (*mCurChar == aSymbol) && advance();
157 : }
158 :
159 0 : inline bool accept(bool (*aClassifier) (char16_t))
160 : {
161 0 : if (atEnd()) { return false; }
162 0 : return (aClassifier(*mCurChar)) && advance();
163 : }
164 :
165 0 : inline bool peek(char16_t aSymbol)
166 : {
167 0 : if (atEnd()) { return false; }
168 0 : return *mCurChar == aSymbol;
169 : }
170 :
171 0 : inline bool peek(bool (*aClassifier) (char16_t))
172 : {
173 0 : if (atEnd()) { return false; }
174 0 : return aClassifier(*mCurChar);
175 : }
176 :
177 0 : inline bool advance()
178 : {
179 0 : if (atEnd()) { return false; }
180 0 : mCurValue.Append(*mCurChar++);
181 0 : return true;
182 : }
183 :
184 0 : inline void resetCurValue()
185 : {
186 0 : mCurValue.Truncate();
187 0 : }
188 :
189 : bool atEndOfPath();
190 : bool atValidPathChar();
191 :
192 : void resetCurChar(const nsAString& aToken);
193 :
194 : void logWarningErrorToConsole(uint32_t aSeverityFlag,
195 : const char* aProperty,
196 : const char16_t* aParams[],
197 : uint32_t aParamsLength);
198 :
199 : /**
200 : * When parsing the policy, the parser internally uses the following helper
201 : * variables/members which are used/reset during parsing. The following
202 : * example explains how they are used.
203 : * The tokenizer separats all input into arrays of arrays of strings, which
204 : * are stored in mTokens, for example:
205 : * mTokens = [ [ script-src, http://www.example.com, 'self' ], ... ]
206 : *
207 : * When parsing starts, mCurdir always holds the currently processed array of strings.
208 : * In our example:
209 : * mCurDir = [ script-src, http://www.example.com, 'self' ]
210 : *
211 : * During parsing, we process/consume one string at a time of that array.
212 : * We set mCurToken to the string we are currently processing; in the first case
213 : * that would be:
214 : * mCurToken = script-src
215 : * which allows to do simple string comparisons to see if mCurToken is a valid directive.
216 : *
217 : * Continuing parsing, the parser consumes the next string of that array, resetting:
218 : * mCurToken = "http://www.example.com"
219 : * ^ ^
220 : * mCurChar mEndChar (points *after* the 'm')
221 : * mCurValue = ""
222 : *
223 : * After calling advance() the first time, helpers would hold the following values:
224 : * mCurToken = "http://www.example.com"
225 : * ^ ^
226 : * mCurChar mEndChar (points *after* the 'm')
227 : * mCurValue = "h"
228 : *
229 : * We continue parsing till all strings of one directive are consumed, then we reset
230 : * mCurDir to hold the next array of strings and start the process all over.
231 : */
232 :
233 : const char16_t* mCurChar;
234 : const char16_t* mEndChar;
235 : nsString mCurValue;
236 : nsString mCurToken;
237 : nsTArray<nsString> mCurDir;
238 :
239 : // helpers to allow invalidation of srcs within script-src and style-src
240 : // if either 'strict-dynamic' or at least a hash or nonce is present.
241 : bool mHasHashOrNonce; // false, if no hash or nonce is defined
242 : bool mStrictDynamic; // false, if 'strict-dynamic' is not defined
243 : nsCSPKeywordSrc* mUnsafeInlineKeywordSrc; // null, otherwise invlidate()
244 :
245 : // cache variables for child-src and frame-src directive handling.
246 : // frame-src is deprecated in favor of child-src, however if we
247 : // see a frame-src directive, it takes precedence for frames and iframes.
248 : // At the end of parsing, if we have a child-src directive, we need to
249 : // decide whether it will handle frames, or if there is a frame-src we
250 : // should honor instead.
251 : nsCSPChildSrcDirective* mChildSrc;
252 : nsCSPDirective* mFrameSrc;
253 :
254 : // cache variable to let nsCSPHostSrc know that it's within
255 : // the frame-ancestors directive.
256 : bool mParsingFrameAncestorsDir;
257 :
258 : cspTokens mTokens;
259 : nsIURI* mSelfURI;
260 : nsCSPPolicy* mPolicy;
261 : nsCSPContext* mCSPContext; // used for console logging
262 : bool mDeliveredViaMetaTag;
263 : };
264 :
265 : #endif /* nsCSPParser_h___ */
|