Line data Source code
1 :
2 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "nsString.h"
8 : #include "nsIScriptableUConv.h"
9 : #include "nsScriptableUConv.h"
10 : #include "nsIStringStream.h"
11 : #include "nsComponentManagerUtils.h"
12 :
13 : using namespace mozilla;
14 :
15 : /* Implementation file */
16 0 : NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter)
17 :
18 0 : nsScriptableUnicodeConverter::nsScriptableUnicodeConverter()
19 0 : : mIsInternal(false)
20 : {
21 0 : }
22 :
23 0 : nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter()
24 : {
25 0 : }
26 :
27 : NS_IMETHODIMP
28 0 : nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc,
29 : nsACString& _retval)
30 : {
31 0 : if (!mEncoder)
32 0 : return NS_ERROR_FAILURE;
33 :
34 : // We can compute the length without replacement, because the
35 : // the replacement is only one byte long and a mappable character
36 : // would always output something, i.e. at least one byte.
37 : // When encoding to ISO-2022-JP, unmappables shouldn't be able
38 : // to cause more escape sequences to be emitted than the mappable
39 : // worst case where every input character causes an escape into
40 : // a different state.
41 : CheckedInt<size_t> needed =
42 0 : mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length());
43 0 : if (!needed.isValid() || needed.value() > UINT32_MAX) {
44 0 : return NS_ERROR_OUT_OF_MEMORY;
45 : }
46 :
47 0 : if (!_retval.SetLength(needed.value(), fallible)) {
48 0 : return NS_ERROR_OUT_OF_MEMORY;
49 : }
50 :
51 0 : auto src = MakeSpan(aSrc);
52 0 : auto dst = AsWritableBytes(MakeSpan(_retval));
53 0 : size_t totalWritten = 0;
54 : for (;;) {
55 : uint32_t result;
56 : size_t read;
57 : size_t written;
58 0 : Tie(result, read, written) =
59 0 : mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false);
60 0 : if (result != kInputEmpty && result != kOutputFull) {
61 0 : MOZ_RELEASE_ASSERT(written < dst.Length(),
62 : "Unmappables with one-byte replacement should not exceed mappable worst case.");
63 0 : dst[written++] = '?';
64 : }
65 0 : totalWritten += written;
66 0 : if (result == kInputEmpty) {
67 0 : MOZ_ASSERT(totalWritten <= UINT32_MAX);
68 0 : if (!_retval.SetLength(totalWritten, fallible)) {
69 0 : return NS_ERROR_OUT_OF_MEMORY;
70 : }
71 0 : return NS_OK;
72 : }
73 0 : src = src.From(read);
74 0 : dst = dst.From(written);
75 0 : }
76 : }
77 :
78 : NS_IMETHODIMP
79 0 : nsScriptableUnicodeConverter::Finish(nsACString& _retval)
80 : {
81 : // The documentation for this method says it should be called after
82 : // ConvertFromUnicode(). However, our own tests called it after
83 : // convertFromByteArray(), i.e. when *decoding*.
84 : // Assuming that there exists extensions that similarly call
85 : // this at the wrong time, let's deal. In general, it is a design
86 : // error for this class to handle conversions in both directions.
87 0 : if (!mEncoder) {
88 0 : _retval.Truncate();
89 0 : mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
90 0 : return NS_OK;
91 : }
92 : // If we are encoding to ISO-2022-JP, potentially
93 : // transition back to the ASCII state. The buffer
94 : // needs to be large enough for an additional NCR,
95 : // though.
96 0 : _retval.SetLength(13);
97 0 : Span<char16_t> src(nullptr);
98 : uint32_t result;
99 : size_t read;
100 : size_t written;
101 : bool hadErrors;
102 0 : Tie(result, read, written, hadErrors) =
103 0 : mEncoder->EncodeFromUTF16(src, _retval, true);
104 : Unused << hadErrors;
105 0 : MOZ_ASSERT(!read);
106 0 : MOZ_ASSERT(result == kInputEmpty);
107 0 : _retval.SetLength(written);
108 :
109 0 : mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
110 0 : mEncoder->Encoding()->NewEncoderInto(*mEncoder);
111 0 : return NS_OK;
112 : }
113 :
114 : NS_IMETHODIMP
115 0 : nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, nsAString& _retval)
116 : {
117 0 : return ConvertFromByteArray(
118 0 : reinterpret_cast<const uint8_t*>(aSrc.BeginReading()),
119 : aSrc.Length(),
120 0 : _retval);
121 : }
122 :
123 : NS_IMETHODIMP
124 0 : nsScriptableUnicodeConverter::ConvertFromByteArray(const uint8_t* aData,
125 : uint32_t aCount,
126 : nsAString& _retval)
127 : {
128 0 : if (!mDecoder)
129 0 : return NS_ERROR_FAILURE;
130 :
131 0 : CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(aCount);
132 0 : if (!needed.isValid() || needed.value() > UINT32_MAX) {
133 0 : return NS_ERROR_OUT_OF_MEMORY;
134 : }
135 :
136 0 : if (!_retval.SetLength(needed.value(), fallible)) {
137 0 : return NS_ERROR_OUT_OF_MEMORY;
138 : }
139 :
140 0 : auto src = MakeSpan(aData, aCount);
141 : uint32_t result;
142 : size_t read;
143 : size_t written;
144 : bool hadErrors;
145 : // The UTF-8 decoder used to throw regardless of the error behavior.
146 : // Simulating the old behavior for compatibility with legacy callers.
147 : // If callers want control over the behavior, they should switch to
148 : // TextDecoder.
149 0 : if (mDecoder->Encoding() == UTF_8_ENCODING) {
150 0 : Tie(result, read, written) =
151 0 : mDecoder->DecodeToUTF16WithoutReplacement(src, _retval, false);
152 0 : if (result != kInputEmpty) {
153 0 : return NS_ERROR_UDEC_ILLEGALINPUT;
154 : }
155 : } else {
156 0 : Tie(result, read, written, hadErrors) =
157 0 : mDecoder->DecodeToUTF16(src, _retval, false);
158 : }
159 0 : MOZ_ASSERT(result == kInputEmpty);
160 0 : MOZ_ASSERT(read == aCount);
161 0 : MOZ_ASSERT(written <= needed.value());
162 : Unused << hadErrors;
163 0 : if (!_retval.SetLength(written, fallible)) {
164 0 : return NS_ERROR_OUT_OF_MEMORY;
165 : }
166 0 : return NS_OK;
167 : }
168 :
169 : NS_IMETHODIMP
170 0 : nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString,
171 : uint32_t* aLen,
172 : uint8_t** _aData)
173 : {
174 0 : if (!mEncoder)
175 0 : return NS_ERROR_FAILURE;
176 :
177 : CheckedInt<size_t> needed =
178 0 : mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aString.Length());
179 0 : if (!needed.isValid() || needed.value() > UINT32_MAX) {
180 0 : return NS_ERROR_OUT_OF_MEMORY;
181 : }
182 :
183 0 : uint8_t* data = (uint8_t*)malloc(needed.value());
184 0 : if (!data) {
185 0 : return NS_ERROR_OUT_OF_MEMORY;
186 : }
187 0 : auto src = MakeSpan(aString);
188 0 : auto dst = MakeSpan(data, needed.value());
189 0 : size_t totalWritten = 0;
190 : for (;;) {
191 : uint32_t result;
192 : size_t read;
193 : size_t written;
194 0 : Tie(result, read, written) =
195 0 : mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true);
196 0 : if (result != kInputEmpty && result != kOutputFull) {
197 : // There's always room for one byte in the case of
198 : // an unmappable character, because otherwise
199 : // we'd have gotten `kOutputFull`.
200 0 : dst[written++] = '?';
201 : }
202 0 : totalWritten += written;
203 0 : if (result == kInputEmpty) {
204 0 : *_aData = data;
205 0 : MOZ_ASSERT(totalWritten <= UINT32_MAX);
206 0 : *aLen = totalWritten;
207 0 : return NS_OK;
208 : }
209 0 : src = src.From(read);
210 0 : dst = dst.From(written);
211 0 : }
212 : }
213 :
214 : NS_IMETHODIMP
215 0 : nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString,
216 : nsIInputStream** _retval)
217 : {
218 : nsresult rv;
219 : nsCOMPtr<nsIStringInputStream> inputStream =
220 0 : do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv);
221 0 : if (NS_FAILED(rv))
222 0 : return rv;
223 :
224 : uint8_t* data;
225 : uint32_t dataLen;
226 0 : rv = ConvertToByteArray(aString, &dataLen, &data);
227 0 : if (NS_FAILED(rv))
228 0 : return rv;
229 :
230 0 : rv = inputStream->AdoptData(reinterpret_cast<char*>(data), dataLen);
231 0 : if (NS_FAILED(rv)) {
232 0 : free(data);
233 0 : return rv;
234 : }
235 :
236 0 : NS_ADDREF(*_retval = inputStream);
237 0 : return rv;
238 : }
239 :
240 : NS_IMETHODIMP
241 0 : nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset)
242 : {
243 0 : if (!mDecoder) {
244 0 : aCharset.Truncate();
245 : } else {
246 0 : mDecoder->Encoding()->Name(aCharset);
247 : }
248 0 : return NS_OK;
249 : }
250 :
251 : NS_IMETHODIMP
252 0 : nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset)
253 : {
254 0 : return InitConverter(aCharset);
255 : }
256 :
257 : NS_IMETHODIMP
258 0 : nsScriptableUnicodeConverter::GetIsInternal(bool *aIsInternal)
259 : {
260 0 : *aIsInternal = mIsInternal;
261 0 : return NS_OK;
262 : }
263 :
264 : NS_IMETHODIMP
265 0 : nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal)
266 : {
267 0 : mIsInternal = aIsInternal;
268 0 : return NS_OK;
269 : }
270 :
271 : nsresult
272 0 : nsScriptableUnicodeConverter::InitConverter(const nsACString& aCharset)
273 : {
274 0 : mEncoder = nullptr;
275 0 : mDecoder = nullptr;
276 :
277 0 : auto encoding = Encoding::ForLabelNoReplacement(aCharset);
278 0 : if (!encoding) {
279 0 : return NS_ERROR_UCONV_NOCONV;
280 : }
281 0 : if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) {
282 0 : mEncoder = encoding->NewEncoder();
283 : }
284 0 : mDecoder = encoding->NewDecoderWithBOMRemoval();
285 0 : return NS_OK;
286 : }
|