Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 : * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
3 : * This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "mozilla/ArrayUtils.h"
8 :
9 : #include "mozStorageSQLFunctions.h"
10 : #include "nsUnicharUtils.h"
11 : #include <algorithm>
12 :
13 : namespace mozilla {
14 : namespace storage {
15 :
16 : ////////////////////////////////////////////////////////////////////////////////
17 : //// Local Helper Functions
18 :
19 : namespace {
20 :
21 : /**
22 : * Performs the LIKE comparison of a string against a pattern. For more detail
23 : * see http://www.sqlite.org/lang_expr.html#like.
24 : *
25 : * @param aPatternItr
26 : * An iterator at the start of the pattern to check for.
27 : * @param aPatternEnd
28 : * An iterator at the end of the pattern to check for.
29 : * @param aStringItr
30 : * An iterator at the start of the string to check for the pattern.
31 : * @param aStringEnd
32 : * An iterator at the end of the string to check for the pattern.
33 : * @param aEscapeChar
34 : * The character to use for escaping symbols in the pattern.
35 : * @return 1 if the pattern is found, 0 otherwise.
36 : */
37 : int
38 0 : likeCompare(nsAString::const_iterator aPatternItr,
39 : nsAString::const_iterator aPatternEnd,
40 : nsAString::const_iterator aStringItr,
41 : nsAString::const_iterator aStringEnd,
42 : char16_t aEscapeChar)
43 : {
44 0 : const char16_t MATCH_ALL('%');
45 0 : const char16_t MATCH_ONE('_');
46 :
47 0 : bool lastWasEscape = false;
48 0 : while (aPatternItr != aPatternEnd) {
49 : /**
50 : * What we do in here is take a look at each character from the input
51 : * pattern, and do something with it. There are 4 possibilities:
52 : * 1) character is an un-escaped match-all character
53 : * 2) character is an un-escaped match-one character
54 : * 3) character is an un-escaped escape character
55 : * 4) character is not any of the above
56 : */
57 0 : if (!lastWasEscape && *aPatternItr == MATCH_ALL) {
58 : // CASE 1
59 : /**
60 : * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a
61 : * MATCH_ALL character. For each MATCH_ONE character, skip one character
62 : * in the pattern string.
63 : */
64 0 : while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) {
65 0 : if (*aPatternItr == MATCH_ONE) {
66 : // If we've hit the end of the string we are testing, no match
67 0 : if (aStringItr == aStringEnd)
68 0 : return 0;
69 0 : aStringItr++;
70 : }
71 0 : aPatternItr++;
72 : }
73 :
74 : // If we've hit the end of the pattern string, match
75 0 : if (aPatternItr == aPatternEnd)
76 0 : return 1;
77 :
78 0 : while (aStringItr != aStringEnd) {
79 0 : if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd,
80 : aEscapeChar)) {
81 : // we've hit a match, so indicate this
82 0 : return 1;
83 : }
84 0 : aStringItr++;
85 : }
86 :
87 : // No match
88 0 : return 0;
89 : }
90 0 : else if (!lastWasEscape && *aPatternItr == MATCH_ONE) {
91 : // CASE 2
92 0 : if (aStringItr == aStringEnd) {
93 : // If we've hit the end of the string we are testing, no match
94 0 : return 0;
95 : }
96 0 : aStringItr++;
97 0 : lastWasEscape = false;
98 : }
99 0 : else if (!lastWasEscape && *aPatternItr == aEscapeChar) {
100 : // CASE 3
101 0 : lastWasEscape = true;
102 : }
103 : else {
104 : // CASE 4
105 0 : if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) {
106 : // If we've hit a point where the strings don't match, there is no match
107 0 : return 0;
108 : }
109 0 : aStringItr++;
110 0 : lastWasEscape = false;
111 : }
112 :
113 0 : aPatternItr++;
114 : }
115 :
116 0 : return aStringItr == aStringEnd;
117 : }
118 :
119 : /**
120 : * Compute the Levenshtein Edit Distance between two strings.
121 : *
122 : * @param aStringS
123 : * a string
124 : * @param aStringT
125 : * another string
126 : * @param _result
127 : * an outparam that will receive the edit distance between the arguments
128 : * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc.
129 : */
130 : int
131 0 : levenshteinDistance(const nsAString &aStringS,
132 : const nsAString &aStringT,
133 : int *_result)
134 : {
135 : // Set the result to a non-sensical value in case we encounter an error.
136 0 : *_result = -1;
137 :
138 0 : const uint32_t sLen = aStringS.Length();
139 0 : const uint32_t tLen = aStringT.Length();
140 :
141 0 : if (sLen == 0) {
142 0 : *_result = tLen;
143 0 : return SQLITE_OK;
144 : }
145 0 : if (tLen == 0) {
146 0 : *_result = sLen;
147 0 : return SQLITE_OK;
148 : }
149 :
150 : // Notionally, Levenshtein Distance is computed in a matrix. If we
151 : // assume s = "span" and t = "spam", the matrix would look like this:
152 : // s -->
153 : // t s p a n
154 : // | 0 1 2 3 4
155 : // V s 1 * * * *
156 : // p 2 * * * *
157 : // a 3 * * * *
158 : // m 4 * * * *
159 : //
160 : // Note that the row width is sLen + 1 and the column height is tLen + 1,
161 : // where sLen is the length of the string "s" and tLen is the length of "t".
162 : // The first row and the first column are initialized as shown, and
163 : // the algorithm computes the remaining cells row-by-row, and
164 : // left-to-right within each row. The computation only requires that
165 : // we be able to see the current row and the previous one.
166 :
167 : // Allocate memory for two rows.
168 0 : AutoTArray<int, nsAutoString::kDefaultStorageSize> row1;
169 0 : AutoTArray<int, nsAutoString::kDefaultStorageSize> row2;
170 :
171 : // Declare the raw pointers that will actually be used to access the memory.
172 0 : int *prevRow = row1.AppendElements(sLen + 1);
173 0 : int *currRow = row2.AppendElements(sLen + 1);
174 :
175 : // Initialize the first row.
176 0 : for (uint32_t i = 0; i <= sLen; i++)
177 0 : prevRow[i] = i;
178 :
179 0 : const char16_t *s = aStringS.BeginReading();
180 0 : const char16_t *t = aStringT.BeginReading();
181 :
182 : // Compute the empty cells in the "matrix" row-by-row, starting with
183 : // the second row.
184 0 : for (uint32_t ti = 1; ti <= tLen; ti++) {
185 :
186 : // Initialize the first cell in this row.
187 0 : currRow[0] = ti;
188 :
189 : // Get the character from "t" that corresponds to this row.
190 0 : const char16_t tch = t[ti - 1];
191 :
192 : // Compute the remaining cells in this row, left-to-right,
193 : // starting at the second column (and first character of "s").
194 0 : for (uint32_t si = 1; si <= sLen; si++) {
195 :
196 : // Get the character from "s" that corresponds to this column,
197 : // compare it to the t-character, and compute the "cost".
198 0 : const char16_t sch = s[si - 1];
199 0 : int cost = (sch == tch) ? 0 : 1;
200 :
201 : // ............ We want to calculate the value of cell "d" from
202 : // ...ab....... the previously calculated (or initialized) cells
203 : // ...cd....... "a", "b", and "c", where d = min(a', b', c').
204 : // ............
205 0 : int aPrime = prevRow[si - 1] + cost;
206 0 : int bPrime = prevRow[si] + 1;
207 0 : int cPrime = currRow[si - 1] + 1;
208 0 : currRow[si] = std::min(aPrime, std::min(bPrime, cPrime));
209 : }
210 :
211 : // Advance to the next row. The current row becomes the previous
212 : // row and we recycle the old previous row as the new current row.
213 : // We don't need to re-initialize the new current row since we will
214 : // rewrite all of its cells anyway.
215 0 : int *oldPrevRow = prevRow;
216 0 : prevRow = currRow;
217 0 : currRow = oldPrevRow;
218 : }
219 :
220 : // The final result is the value of the last cell in the last row.
221 : // Note that that's now in the "previous" row, since we just swapped them.
222 0 : *_result = prevRow[sLen];
223 0 : return SQLITE_OK;
224 : }
225 :
226 : // This struct is used only by registerFunctions below, but ISO C++98 forbids
227 : // instantiating a template dependent on a locally-defined type. Boo-urns!
228 : struct Functions {
229 : const char *zName;
230 : int nArg;
231 : int enc;
232 : void *pContext;
233 : void (*xFunc)(::sqlite3_context*, int, sqlite3_value**);
234 : };
235 :
236 : } // namespace
237 :
238 : ////////////////////////////////////////////////////////////////////////////////
239 : //// Exposed Functions
240 :
241 : int
242 8 : registerFunctions(sqlite3 *aDB)
243 : {
244 : Functions functions[] = {
245 : {"lower",
246 : 1,
247 : SQLITE_UTF16,
248 : 0,
249 : caseFunction},
250 : {"lower",
251 : 1,
252 : SQLITE_UTF8,
253 : 0,
254 : caseFunction},
255 : {"upper",
256 : 1,
257 : SQLITE_UTF16,
258 : (void*)1,
259 : caseFunction},
260 : {"upper",
261 : 1,
262 : SQLITE_UTF8,
263 : (void*)1,
264 : caseFunction},
265 :
266 : {"like",
267 : 2,
268 : SQLITE_UTF16,
269 : 0,
270 : likeFunction},
271 : {"like",
272 : 2,
273 : SQLITE_UTF8,
274 : 0,
275 : likeFunction},
276 : {"like",
277 : 3,
278 : SQLITE_UTF16,
279 : 0,
280 : likeFunction},
281 : {"like",
282 : 3,
283 : SQLITE_UTF8,
284 : 0,
285 : likeFunction},
286 :
287 : {"levenshteinDistance",
288 : 2,
289 : SQLITE_UTF16,
290 : 0,
291 : levenshteinDistanceFunction},
292 : {"levenshteinDistance",
293 : 2,
294 : SQLITE_UTF8,
295 : 0,
296 : levenshteinDistanceFunction},
297 8 : };
298 :
299 8 : int rv = SQLITE_OK;
300 88 : for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) {
301 80 : struct Functions *p = &functions[i];
302 80 : rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext,
303 80 : p->xFunc, nullptr, nullptr);
304 : }
305 :
306 8 : return rv;
307 : }
308 :
309 : ////////////////////////////////////////////////////////////////////////////////
310 : //// SQL Functions
311 :
312 : void
313 0 : caseFunction(sqlite3_context *aCtx,
314 : int aArgc,
315 : sqlite3_value **aArgv)
316 : {
317 0 : NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");
318 :
319 0 : nsAutoString data(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])));
320 0 : bool toUpper = ::sqlite3_user_data(aCtx) ? true : false;
321 :
322 0 : if (toUpper)
323 0 : ::ToUpperCase(data);
324 : else
325 0 : ::ToLowerCase(data);
326 :
327 : // Set the result.
328 0 : ::sqlite3_result_text16(aCtx, data.get(), -1, SQLITE_TRANSIENT);
329 0 : }
330 :
331 : /**
332 : * This implements the like() SQL function. This is used by the LIKE operator.
333 : * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is
334 : * an escape character, say E, it is implemented as 'like(B, A, E)'.
335 : */
336 : void
337 0 : likeFunction(sqlite3_context *aCtx,
338 : int aArgc,
339 : sqlite3_value **aArgv)
340 : {
341 0 : NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!");
342 :
343 0 : if (::sqlite3_value_bytes(aArgv[0]) > SQLITE_MAX_LIKE_PATTERN_LENGTH) {
344 : ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex",
345 0 : SQLITE_TOOBIG);
346 0 : return;
347 : }
348 :
349 0 : if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1]))
350 0 : return;
351 :
352 0 : nsDependentString A(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1])));
353 0 : nsDependentString B(static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0])));
354 0 : NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!");
355 :
356 0 : char16_t E = 0;
357 0 : if (3 == aArgc)
358 0 : E = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[2]))[0];
359 :
360 0 : nsAString::const_iterator itrString, endString;
361 0 : A.BeginReading(itrString);
362 0 : A.EndReading(endString);
363 0 : nsAString::const_iterator itrPattern, endPattern;
364 0 : B.BeginReading(itrPattern);
365 0 : B.EndReading(endPattern);
366 0 : ::sqlite3_result_int(aCtx, likeCompare(itrPattern, endPattern, itrString,
367 0 : endString, E));
368 : }
369 :
370 0 : void levenshteinDistanceFunction(sqlite3_context *aCtx,
371 : int aArgc,
372 : sqlite3_value **aArgv)
373 : {
374 0 : NS_ASSERTION(2 == aArgc, "Invalid number of arguments!");
375 :
376 : // If either argument is a SQL NULL, then return SQL NULL.
377 0 : if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL ||
378 0 : ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) {
379 0 : ::sqlite3_result_null(aCtx);
380 0 : return;
381 : }
382 :
383 0 : int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
384 0 : const char16_t *a = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[0]));
385 :
386 0 : int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t);
387 0 : const char16_t *b = static_cast<const char16_t *>(::sqlite3_value_text16(aArgv[1]));
388 :
389 : // Compute the Levenshtein Distance, and return the result (or error).
390 0 : int distance = -1;
391 0 : const nsDependentString A(a, aLen);
392 0 : const nsDependentString B(b, bLen);
393 0 : int status = levenshteinDistance(A, B, &distance);
394 0 : if (status == SQLITE_OK) {
395 0 : ::sqlite3_result_int(aCtx, distance);
396 : }
397 0 : else if (status == SQLITE_NOMEM) {
398 0 : ::sqlite3_result_error_nomem(aCtx);
399 : }
400 : else {
401 0 : ::sqlite3_result_error(aCtx, "User function returned error code", -1);
402 : }
403 : }
404 :
405 : } // namespace storage
406 : } // namespace mozilla
|