Line data Source code
1 : /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : /*
7 : * This file is based on usc_impl.c from ICU 4.2.0.1, slightly adapted
8 : * for use within Mozilla Gecko, separate from a standard ICU build.
9 : *
10 : * The original ICU license of the code follows:
11 : *
12 : * ICU License - ICU 1.8.1 and later
13 : *
14 : * COPYRIGHT AND PERMISSION NOTICE
15 : *
16 : * Copyright (c) 1995-2009 International Business Machines Corporation and
17 : * others
18 : *
19 : * All rights reserved.
20 : *
21 : * Permission is hereby granted, free of charge, to any person obtaining a
22 : * copy of this software and associated documentation files (the "Software"),
23 : * to deal in the Software without restriction, including without limitation
24 : * the rights to use, copy, modify, merge, publish, distribute, and/or sell
25 : * copies of the Software, and to permit persons to whom the Software is
26 : * furnished to do so, provided that the above copyright notice(s) and this
27 : * permission notice appear in all copies of the Software and that both the
28 : * above copyright notice(s) and this permission notice appear in supporting
29 : * documentation.
30 : *
31 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
34 : * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
35 : * BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
36 : * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
37 : * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
38 : * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
39 : * SOFTWARE.
40 : *
41 : * Except as contained in this notice, the name of a copyright holder shall
42 : * not be used in advertising or otherwise to promote the sale, use or other
43 : * dealings in this Software without prior written authorization of the
44 : * copyright holder.
45 : *
46 : * All trademarks and registered trademarks mentioned herein are the property
47 : * of their respective owners.
48 : */
49 :
50 : #include "gfxScriptItemizer.h"
51 : #include "nsUnicodeProperties.h"
52 : #include "nsCharTraits.h"
53 : #include "harfbuzz/hb.h"
54 :
55 : using namespace mozilla::unicode;
56 :
57 : #define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
58 : #define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH)
59 : #define INC(sp,count) (MOD((sp) + (count)))
60 : #define INC1(sp) (INC(sp, 1))
61 : #define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count)))
62 : #define DEC1(sp) (DEC(sp, 1))
63 : #define STACK_IS_EMPTY() (pushCount <= 0)
64 : #define STACK_IS_NOT_EMPTY() (! STACK_IS_EMPTY())
65 : #define TOP() (parenStack[parenSP])
66 : #define SYNC_FIXUP() (fixupCount = 0)
67 :
68 : void
69 0 : gfxScriptItemizer::push(uint32_t endPairChar, Script newScriptCode)
70 : {
71 0 : pushCount = LIMIT_INC(pushCount);
72 0 : fixupCount = LIMIT_INC(fixupCount);
73 :
74 0 : parenSP = INC1(parenSP);
75 0 : parenStack[parenSP].endPairChar = endPairChar;
76 0 : parenStack[parenSP].scriptCode = newScriptCode;
77 0 : }
78 :
79 : void
80 0 : gfxScriptItemizer::pop()
81 : {
82 0 : if (STACK_IS_EMPTY()) {
83 0 : return;
84 : }
85 :
86 0 : if (fixupCount > 0) {
87 0 : fixupCount -= 1;
88 : }
89 :
90 0 : pushCount -= 1;
91 0 : parenSP = DEC1(parenSP);
92 :
93 : /* If the stack is now empty, reset the stack
94 : pointers to their initial values.
95 : */
96 0 : if (STACK_IS_EMPTY()) {
97 0 : parenSP = -1;
98 : }
99 : }
100 :
101 : void
102 61 : gfxScriptItemizer::fixup(Script newScriptCode)
103 : {
104 61 : int32_t fixupSP = DEC(parenSP, fixupCount);
105 :
106 61 : while (fixupCount-- > 0) {
107 0 : fixupSP = INC1(fixupSP);
108 0 : parenStack[fixupSP].scriptCode = newScriptCode;
109 : }
110 61 : }
111 :
112 : // We regard the current char as having the same script as the in-progress run
113 : // if either script code is Common or Inherited, or if the run script appears
114 : // in the character's ScriptExtensions, or if the char is a cluster extender.
115 : static inline bool
116 658 : SameScript(Script runScript, Script currCharScript, uint32_t aCurrCh)
117 : {
118 596 : return runScript <= Script::INHERITED ||
119 542 : currCharScript <= Script::INHERITED ||
120 0 : currCharScript == runScript ||
121 658 : IsClusterExtender(aCurrCh) ||
122 658 : HasScript(aCurrCh, runScript);
123 : }
124 :
125 62 : gfxScriptItemizer::gfxScriptItemizer(const char16_t *src, uint32_t length)
126 62 : : textPtr(src), textLength(length)
127 : {
128 62 : reset();
129 62 : }
130 :
131 : void
132 0 : gfxScriptItemizer::SetText(const char16_t *src, uint32_t length)
133 : {
134 0 : textPtr = src;
135 0 : textLength = length;
136 :
137 0 : reset();
138 0 : }
139 :
140 : bool
141 124 : gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
142 : Script& aRunScript)
143 : {
144 : /* if we've fallen off the end of the text, we're done */
145 124 : if (scriptLimit >= textLength) {
146 62 : return false;
147 : }
148 :
149 62 : SYNC_FIXUP();
150 62 : scriptCode = Script::COMMON;
151 :
152 720 : for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) {
153 : uint32_t ch;
154 : Script sc;
155 658 : uint32_t startOfChar = scriptLimit;
156 :
157 658 : ch = textPtr[scriptLimit];
158 :
159 : /* decode UTF-16 (may be surrogate pair) */
160 658 : if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) {
161 0 : uint32_t low = textPtr[scriptLimit + 1];
162 0 : if (NS_IS_LOW_SURROGATE(low)) {
163 0 : ch = SURROGATE_TO_UCS4(ch, low);
164 0 : scriptLimit += 1;
165 : }
166 : }
167 :
168 : // Initialize gc to UNASSIGNED; we'll only set it to the true GC
169 : // if the character has script=COMMON, otherwise we don't care.
170 658 : uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
171 :
172 658 : sc = GetScriptCode(ch);
173 658 : if (sc == Script::COMMON) {
174 : /*
175 : * Paired character handling:
176 : *
177 : * if it's an open character, push it onto the stack.
178 : * if it's a close character, find the matching open on the
179 : * stack, and use that script code. Any non-matching open
180 : * characters above it on the stack will be popped.
181 : *
182 : * We only do this if the script is COMMON; for chars with
183 : * specific script assignments, we just use them as-is.
184 : */
185 55 : gc = GetGeneralCategory(ch);
186 55 : if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) {
187 0 : uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch);
188 0 : if (endPairChar != ch) {
189 0 : push(endPairChar, scriptCode);
190 : }
191 55 : } else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
192 0 : HasMirroredChar(ch))
193 : {
194 0 : while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) {
195 0 : pop();
196 : }
197 :
198 0 : if (STACK_IS_NOT_EMPTY()) {
199 0 : sc = TOP().scriptCode;
200 : }
201 : }
202 : }
203 :
204 658 : if (SameScript(scriptCode, sc, ch)) {
205 658 : if (scriptCode <= Script::INHERITED &&
206 : sc > Script::INHERITED)
207 : {
208 61 : scriptCode = sc;
209 61 : fixup(scriptCode);
210 : }
211 :
212 : /*
213 : * if this character is a close paired character,
214 : * pop the matching open character from the stack
215 : */
216 658 : if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
217 0 : HasMirroredChar(ch)) {
218 0 : pop();
219 : }
220 : } else {
221 : /*
222 : * reset scriptLimit in case it was advanced during reading a
223 : * multiple-code-unit character
224 : */
225 0 : scriptLimit = startOfChar;
226 :
227 0 : break;
228 : }
229 : }
230 :
231 62 : aRunStart = scriptStart;
232 62 : aRunLimit = scriptLimit;
233 62 : aRunScript = scriptCode;
234 :
235 62 : return true;
236 : }
|