Line data Source code
1 : /*
2 : * Copyright 2011 Google Inc.
3 : *
4 : * Use of this source code is governed by a BSD-style license that can be
5 : * found in the LICENSE file.
6 : */
7 :
8 : #include "SkPDFMakeToUnicodeCmap.h"
9 : #include "SkPDFUtils.h"
10 : #include "SkUtils.h"
11 :
12 0 : static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
13 : bool multibyte) {
14 : // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
15 : // It's there to prevent old version Adobe Readers from malfunctioning.
16 : const char* kHeader =
17 : "/CIDInit /ProcSet findresource begin\n"
18 : "12 dict begin\n"
19 0 : "begincmap\n";
20 0 : cmap->writeText(kHeader);
21 :
22 : // The /CIDSystemInfo must be consistent to the one in
23 : // SkPDFFont::populateCIDFont().
24 : // We can not pass over the system info object here because the format is
25 : // different. This is not a reference object.
26 : const char* kSysInfo =
27 : "/CIDSystemInfo\n"
28 : "<< /Registry (Adobe)\n"
29 : "/Ordering (UCS)\n"
30 : "/Supplement 0\n"
31 0 : ">> def\n";
32 0 : cmap->writeText(kSysInfo);
33 :
34 : // The CMapName must be consistent to /CIDSystemInfo above.
35 : // /CMapType 2 means ToUnicode.
36 : // Codespace range just tells the PDF processor the valid range.
37 : const char* kTypeInfoHeader =
38 : "/CMapName /Adobe-Identity-UCS def\n"
39 : "/CMapType 2 def\n"
40 0 : "1 begincodespacerange\n";
41 0 : cmap->writeText(kTypeInfoHeader);
42 0 : if (multibyte) {
43 0 : cmap->writeText("<0000> <FFFF>\n");
44 : } else {
45 0 : cmap->writeText("<00> <FF>\n");
46 : }
47 0 : cmap->writeText("endcodespacerange\n");
48 0 : }
49 :
50 0 : static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
51 : const char kFooter[] =
52 : "endcmap\n"
53 : "CMapName currentdict /CMap defineresource pop\n"
54 : "end\n"
55 0 : "end";
56 0 : cmap->writeText(kFooter);
57 0 : }
58 :
59 : namespace {
60 : struct BFChar {
61 : SkGlyphID fGlyphId;
62 : SkUnichar fUnicode;
63 : };
64 :
65 : struct BFRange {
66 : SkGlyphID fStart;
67 : SkGlyphID fEnd;
68 : SkUnichar fUnicode;
69 : };
70 : } // namespace
71 :
72 0 : static void write_glyph(SkDynamicMemoryWStream* cmap,
73 : bool multiByte,
74 : SkGlyphID gid) {
75 0 : if (multiByte) {
76 0 : SkPDFUtils::WriteUInt16BE(cmap, gid);
77 : } else {
78 0 : SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
79 : }
80 0 : }
81 :
82 0 : static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
83 : bool multiByte,
84 : SkDynamicMemoryWStream* cmap) {
85 : // PDF spec defines that every bf* list can have at most 100 entries.
86 0 : for (int i = 0; i < bfchar.count(); i += 100) {
87 0 : int count = bfchar.count() - i;
88 0 : count = SkMin32(count, 100);
89 0 : cmap->writeDecAsText(count);
90 0 : cmap->writeText(" beginbfchar\n");
91 0 : for (int j = 0; j < count; ++j) {
92 0 : cmap->writeText("<");
93 0 : write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
94 0 : cmap->writeText("> <");
95 0 : SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
96 0 : cmap->writeText(">\n");
97 : }
98 0 : cmap->writeText("endbfchar\n");
99 : }
100 0 : }
101 :
102 0 : static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
103 : bool multiByte,
104 : SkDynamicMemoryWStream* cmap) {
105 : // PDF spec defines that every bf* list can have at most 100 entries.
106 0 : for (int i = 0; i < bfrange.count(); i += 100) {
107 0 : int count = bfrange.count() - i;
108 0 : count = SkMin32(count, 100);
109 0 : cmap->writeDecAsText(count);
110 0 : cmap->writeText(" beginbfrange\n");
111 0 : for (int j = 0; j < count; ++j) {
112 0 : cmap->writeText("<");
113 0 : write_glyph(cmap, multiByte, bfrange[i + j].fStart);
114 0 : cmap->writeText("> <");
115 0 : write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
116 0 : cmap->writeText("> <");
117 0 : SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
118 0 : cmap->writeText(">\n");
119 : }
120 0 : cmap->writeText("endbfrange\n");
121 : }
122 0 : }
123 :
124 : // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
125 : // Technote 5014.
126 : // The function is not static so we can test it in unit tests.
127 : //
128 : // Current implementation guarantees bfchar and bfrange entries do not overlap.
129 : //
130 : // Current implementation does not attempt aggresive optimizations against
131 : // following case because the specification is not clear.
132 : //
133 : // 4 beginbfchar 1 beginbfchar
134 : // <0003> <0013> <0020> <0014>
135 : // <0005> <0015> to endbfchar
136 : // <0007> <0017> 1 beginbfrange
137 : // <0020> <0014> <0003> <0007> <0013>
138 : // endbfchar endbfrange
139 : //
140 : // Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
141 : // overlap, but succeeding maps supersede preceding maps."
142 : //
143 : // In case of searching text in PDF, bfrange will have higher precedence so
144 : // typing char id 0x0014 in search box will get glyph id 0x0004 first. However,
145 : // the spec does not mention how will this kind of conflict being resolved.
146 : //
147 : // For the worst case (having 65536 continuous unicode and we use every other
148 : // one of them), the possible savings by aggressive optimization is 416KB
149 : // pre-compressed and does not provide enough motivation for implementation.
150 0 : void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,
151 : const SkBitSet* subset,
152 : SkDynamicMemoryWStream* cmap,
153 : bool multiByteGlyphs,
154 : SkGlyphID firstGlyphID,
155 : SkGlyphID lastGlyphID) {
156 0 : if (glyphToUnicode.isEmpty()) {
157 0 : return;
158 : }
159 0 : int glyphOffset = 0;
160 0 : if (!multiByteGlyphs) {
161 0 : glyphOffset = firstGlyphID - 1;
162 : }
163 :
164 0 : SkTDArray<BFChar> bfcharEntries;
165 0 : SkTDArray<BFRange> bfrangeEntries;
166 :
167 0 : BFRange currentRangeEntry = {0, 0, 0};
168 0 : bool rangeEmpty = true;
169 : const int limit =
170 0 : SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset;
171 :
172 0 : for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
173 0 : bool inSubset = i < limit &&
174 0 : (subset == nullptr || subset->has(i + glyphOffset));
175 0 : if (!rangeEmpty) {
176 : // PDF spec requires bfrange not changing the higher byte,
177 : // e.g. <1035> <10FF> <2222> is ok, but
178 : // <1035> <1100> <2222> is no good
179 : bool inRange =
180 0 : i == currentRangeEntry.fEnd + 1 &&
181 0 : i >> 8 == currentRangeEntry.fStart >> 8 &&
182 0 : i < limit &&
183 0 : glyphToUnicode[i + glyphOffset] ==
184 0 : currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
185 0 : if (!inSubset || !inRange) {
186 0 : if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
187 0 : bfrangeEntries.push(currentRangeEntry);
188 : } else {
189 0 : BFChar* entry = bfcharEntries.append();
190 0 : entry->fGlyphId = currentRangeEntry.fStart;
191 0 : entry->fUnicode = currentRangeEntry.fUnicode;
192 : }
193 0 : rangeEmpty = true;
194 : }
195 : }
196 0 : if (inSubset) {
197 0 : currentRangeEntry.fEnd = i;
198 0 : if (rangeEmpty) {
199 0 : currentRangeEntry.fStart = i;
200 0 : currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset];
201 0 : rangeEmpty = false;
202 : }
203 : }
204 : }
205 :
206 : // The spec requires all bfchar entries for a font must come before bfrange
207 : // entries.
208 0 : append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
209 0 : append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
210 : }
211 :
212 0 : sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
213 : const SkTDArray<SkUnichar>& glyphToUnicode,
214 : const SkBitSet* subset,
215 : bool multiByteGlyphs,
216 : SkGlyphID firstGlyphID,
217 : SkGlyphID lastGlyphID) {
218 0 : SkDynamicMemoryWStream cmap;
219 0 : append_tounicode_header(&cmap, multiByteGlyphs);
220 0 : SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
221 0 : firstGlyphID, lastGlyphID);
222 0 : append_cmap_footer(&cmap);
223 : return sk_make_sp<SkPDFStream>(
224 0 : std::unique_ptr<SkStreamAsset>(cmap.detachAsStream()));
225 : }
|