summaryrefslogtreecommitdiff
path: root/sfntly/table/core/cmap_table.h
blob: bbdc7b54b5f76db5a27f7f21df45181b7612169e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
/*
 * Copyright 2011 Google Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef SFNTLY_CPP_SRC_SFNTLY_TABLE_CORE_CMAP_TABLE_H_
#define SFNTLY_CPP_SRC_SFNTLY_TABLE_CORE_CMAP_TABLE_H_

// type.h needs to be included first because of building issues on Windows
// Type aliases we delcare are defined in other headers and make the build
// fail otherwise.
#include "sfntly/port/type.h"
#include <vector>
#include <map>

#include "sfntly/port/refcount.h"
#include "sfntly/table/subtable.h"
#include "sfntly/table/subtable_container_table.h"

namespace sfntly {

// CMap subtable formats
struct CMapFormat {
  enum {
    kFormat0 = 0,
    kFormat2 = 2,
    kFormat4 = 4,
    kFormat6 = 6,
    kFormat8 = 8,
    kFormat10 = 10,
    kFormat12 = 12,
    kFormat13 = 13,
    kFormat14 = 14
  };
};

// A CMap table
class CMapTable : public SubTableContainerTable, public RefCounted<CMapTable> {
public:
  // CMapTable::CMapId
  struct CMapId {
    int32_t platform_id;
    int32_t encoding_id;
    bool operator==(const CMapId& obj) const {
      return platform_id == obj.platform_id && encoding_id == obj.encoding_id;
    }
  };
  static CMapId WINDOWS_BMP;
  static CMapId WINDOWS_UCS4;
  static CMapId MAC_ROMAN;

  // CMapTable::CMapIdComparator
  class CMapIdComparator {
   public:
    bool operator()(const CMapId& lhs, const CMapId& rhs) const;
  };

  // A filter on cmap
  // CMapTable::CMapFilter
  class CMapFilter {
   public:
    // Test on whether the cmap is acceptable or not
    // @param cmap_id the id of the cmap
    // @return true if the cmap is acceptable; false otherwise
    virtual bool accept(const CMapId& cmap_id) const = 0;
    // Make gcc -Wnon-virtual-dtor happy.
    virtual ~CMapFilter() {}
  };

  // Filters CMaps by CMapId to implement CMapTable::get()
  // wanted_id is the CMap we'd like to find.
  // We compare the current CMap to it either by equality (==) or using a
  // comparator.
  // CMapTable::CMapIdFilter
  class CMapIdFilter : public CMapFilter {
   public:
    explicit CMapIdFilter(const CMapId wanted_id);
    CMapIdFilter(const CMapId wanted_id,
                 const CMapIdComparator* comparator);
    ~CMapIdFilter() {}
    virtual bool accept(const CMapId& cmap_id) const;
   private:
    const CMapId wanted_id_;
    const CMapIdComparator *comparator_;
  };

  // The abstract base class for all cmaps.
  //
  // CMap equality is based on the equality of the (@link {@link CMapId} that
  // defines the CMap. In the cmap table for a font there can only be one cmap
  // with a given cmap id (pair of platform and encoding ids) no matter what the
  // type of the cmap is.
  //
  // The cmap offers CharacterIterator to allow iteration over
  // characters that are mapped by the cmap. This iteration mostly returns the
  // characters mapped by the cmap. It will return all characters mapped by the
  // cmap to anything but .notdef <b>but</b> it may return some that are not
  // mapped or are mapped to .notdef. Various cmap tables provide ranges and
  // such to describe characters for lookup but without going the full way to
  // mapping to the glyph id it isn't always possible to tell if a character
  // will end up with a valid glyph id. So, some of the characters returned from
  // the Iterator may still end up pointing to the .notdef glyph. However, the
  // number of such characters should be small in most cases with well designed
  // cmaps.
  class Builder;
  class CMap : public SubTable {
   public:
    // CMapTable::CMap::Builder
    class Builder : public SubTable::Builder {
     public:
      virtual ~Builder();

      CALLER_ATTACH static Builder*
          GetBuilder(ReadableFontData* data,
                     int32_t offset,
                     const CMapId& cmap_id);
      CALLER_ATTACH static Builder*
          GetBuilder(int32_t format,
                     const CMapId& cmap_id);

      // Note: yes, an object is returned on stack since it's small enough.
      virtual CMapId cmap_id() { return cmap_id_; }
      virtual int32_t platform_id() { return cmap_id_.platform_id; }
      virtual int32_t encoding_id() { return cmap_id_.encoding_id; }
      virtual int32_t format() { return format_; }
      virtual int32_t language() { return language_; }
      virtual void set_language(int32_t language) { language_ = language; }

     protected:
      Builder(ReadableFontData* data,
              int32_t format,
              const CMapId& cmap_id);
      Builder(WritableFontData* data,
              int32_t format,
              const CMapId& cmap_id);

      virtual int32_t SubSerialize(WritableFontData* new_data);
      virtual bool SubReadyToSerialize();
      virtual int32_t SubDataSizeToSerialize();
      virtual void SubDataSet();

     private:
      int32_t format_;
      CMapId cmap_id_;
      int32_t language_;

      friend class CMapTable::Builder;
    };
    // Abstract CMap character iterator
    // The fully qualified name is CMapTable::CMap::CharacterIterator
    class CharacterIterator {
     public:
      virtual ~CharacterIterator() {}
      virtual bool HasNext() = 0;
      // Returns -1 if there are no more characters to iterate through
      // and exceptions are turned off
      virtual int32_t Next() = 0;

     protected:
      // Use the CMap::Iterator method below instead of directly requesting
      // a CharacterIterator.
      CharacterIterator() {}
    };

    CMap(ReadableFontData* data, int32_t format, const CMapId& cmap_id);
    virtual ~CMap();

    virtual CMap::CharacterIterator* Iterator() = 0;

    virtual int32_t format() { return format_; }
    virtual CMapId cmap_id() { return cmap_id_; }
    virtual int32_t platform_id() { return cmap_id_.platform_id; }
    virtual int32_t encoding_id() { return cmap_id_.encoding_id; }

    // Get the language of the cmap.
    //
    // Note on the language field in 'cmap' subtables: The language field must
    // be set to zero for all cmap subtables whose platform IDs are other than
    // Macintosh (platform ID 1). For cmap subtables whose platform IDs are
    // Macintosh, set this field to the Macintosh language ID of the cmap
    // subtable plus one, or to zero if the cmap subtable is not
    // language-specific. For example, a Mac OS Turkish cmap subtable must set
    // this field to 18, since the Macintosh language ID for Turkish is 17. A
    // Mac OS Roman cmap subtable must set this field to 0, since Mac OS Roman
    // is not a language-specific encoding.
    //
    // @return the language id
    virtual int32_t Language() = 0;

    // Gets the glyph id for the character code provided.
    // The character code provided must be in the encoding used by the cmap
    // table.
    virtual int32_t GlyphId(int32_t character) = 0;

   private:
    int32_t format_;
    CMapId cmap_id_;
  };
  typedef Ptr<CMap> CMapPtr;
  typedef Ptr<CMap::Builder> CMapBuilderPtr;
  typedef std::map<CMapId, CMapBuilderPtr, CMapIdComparator> CMapBuilderMap;

  // A cmap format 0 sub table
  class CMapFormat0 : public CMap, public RefCounted<CMapFormat0> {
   public:
    // The fully qualified name is CMapTable::CMapFormat0::Builder
    class Builder : public CMap::Builder,
                    public RefCounted<Builder> {
     public:
      CALLER_ATTACH static Builder* NewInstance(ReadableFontData* data,
                                                int32_t offset,
                                                const CMapId& cmap_id);
      CALLER_ATTACH static Builder* NewInstance(WritableFontData* data,
                                                int32_t offset,
                                                const CMapId& cmap_id);
      CALLER_ATTACH static Builder* NewInstance(const CMapId& cmap_id);
      virtual ~Builder();

     protected:
      virtual CALLER_ATTACH FontDataTable*
          SubBuildTable(ReadableFontData* data);

     private:
      // When creating a new CMapFormat0 Builder, use NewInstance instead of
      // the constructors! This avoids a memory leak when slicing the FontData.
      Builder(ReadableFontData* data, int32_t offset, const CMapId& cmap_id);
      Builder(WritableFontData* data, int32_t offset, const CMapId& cmap_id);
      Builder(const CMapId& cmap_id);
    };

    // The fully qualified name is CMapTable::CMapFormat0::CharacterIterator
    class CharacterIterator : public CMap::CharacterIterator {
     public:
      virtual ~CharacterIterator();
      virtual bool HasNext();
      virtual int32_t Next();

     private:
      CharacterIterator(int32_t start, int32_t end);
      friend class CMapFormat0;
      int32_t character_, max_character_;
    };

    virtual ~CMapFormat0();
    virtual int32_t Language();
    virtual int32_t GlyphId(int32_t character);
    CMap::CharacterIterator* Iterator();

   private:
    CMapFormat0(ReadableFontData* data, const CMapId& cmap_id);
  };

  // A cmap format 2 sub table
  // The format 2 cmap is used for multi-byte encodings such as SJIS,
  // EUC-JP/KR/CN, Big5, etc.
  class CMapFormat2 : public CMap, public RefCounted<CMapFormat2> {
   public:
    // CMapTable::CMapFormat2::Builder
    class Builder : public CMap::Builder,
                    public RefCounted<Builder> {
     public:
      Builder(ReadableFontData* data,
              int32_t offset,
              const CMapId& cmap_id);
      Builder(WritableFontData* data,
              int32_t offset,
              const CMapId& cmap_id);
      virtual ~Builder();

     protected:
      virtual CALLER_ATTACH FontDataTable*
          SubBuildTable(ReadableFontData* data);
    };
    // CMapTable::CMapFormat2::CharacterIterator
    class CharacterIterator : public CMap::CharacterIterator {
     public:
      virtual ~CharacterIterator();
      virtual bool hasNext();
      virtual int32_t next();

     private:
      CharacterIterator();
    };

    virtual int32_t Language();
    virtual int32_t GlyphId(int32_t character);

    // Returns how many bytes would be consumed by a lookup of this character
    // with this cmap. This comes about because the cmap format 2 table is
    // designed around multi-byte encodings such as SJIS, EUC-JP, Big5, etc.
    // return the number of bytes consumed from this "character" - either 1 or 2
    virtual int32_t BytesConsumed(int32_t character);

    virtual ~CMapFormat2();

   private:
    CMapFormat2(ReadableFontData* data, const CMapId& cmap_id);

    int32_t SubHeaderOffset(int32_t sub_header_index);
    int32_t FirstCode(int32_t sub_header_index);
    int32_t EntryCount(int32_t sub_header_index);
    int32_t IdRangeOffset(int32_t sub_header_index);
    int32_t IdDelta(int32_t sub_header_index);
    CMap::CharacterIterator* Iterator();
  };

  // CMapTable::Builder
  class Builder : public SubTableContainerTable::Builder,
                  public RefCounted<Builder> {
   public:
    // Constructor scope is public because C++ does not allow base class to
    // instantiate derived class with protected constructors.
    Builder(Header* header, WritableFontData* data);
    Builder(Header* header, ReadableFontData* data);
    virtual ~Builder();

    virtual int32_t SubSerialize(WritableFontData* new_data);
    virtual bool SubReadyToSerialize();
    virtual int32_t SubDataSizeToSerialize();
    virtual void SubDataSet();
    virtual CALLER_ATTACH FontDataTable* SubBuildTable(ReadableFontData* data);

    static CALLER_ATTACH Builder* CreateBuilder(Header* header,
                                                WritableFontData* data);

    CMap::Builder* NewCMapBuilder(const CMapId& cmap_id,
                                  ReadableFontData* data);
    // Create a new empty CMapBuilder of the type specified in the id.
    CMap::Builder* NewCMapBuilder(int32_t format, const CMapId& cmap_id);
    CMap::Builder* CMapBuilder(const CMapId& cmap_id);

    int32_t NumCMaps();
    void SetVersion(int32_t version);

    CMapBuilderMap* GetCMapBuilders();

   protected:
    static CALLER_ATTACH CMap::Builder* CMapBuilder(ReadableFontData* data,
                                                    int32_t index);

   private:
    void Initialize(ReadableFontData* data);
    static int32_t NumCMaps(ReadableFontData* data);

    int32_t version_;
    CMapBuilderMap cmap_builders_;
  };
  typedef Ptr<Builder> CMapTableBuilderPtr;

  class CMapIterator {
   public:
    // If filter is NULL, filter through all tables.
    CMapIterator(CMapTable* table, const CMapFilter* filter);
    bool HasNext();
    CMap* Next();

   private:
    int32_t table_index_;
    const CMapFilter* filter_;
    CMapTable* table_;
  };

  // Make a CMapId from a platform_id, encoding_id pair
  static CMapId NewCMapId(int32_t platform_id, int32_t encoding_id);
  // Make a CMapId from another CMapId
  static CMapId NewCMapId(const CMapId& obj);

  // Get the CMap with the specified parameters if it exists.
  // Returns NULL otherwise.
  CALLER_ATTACH CMap* GetCMap(const int32_t index);
  CALLER_ATTACH CMap* GetCMap(const int32_t platform_id,
                              const int32_t encoding_id);
  CALLER_ATTACH CMap* GetCMap(const CMapId GetCMap_id);

  // Get the table version.
  virtual int32_t Version();

  // Get the number of cmaps within the CMap table.
  virtual int32_t NumCMaps();

  // Get the cmap id for the cmap with the given index.
  // Note: yes, an object is returned on stack since it's small enough.
  //       This function is renamed from cmapId to GetCMapId().
  virtual CMapId GetCMapId(int32_t index);

  virtual int32_t PlatformId(int32_t index);
  virtual int32_t EncodingId(int32_t index);

  // Get the offset in the table data for the cmap table with the given index.
  // The offset is from the beginning of the table.
  virtual int32_t Offset(int32_t index);

  virtual ~CMapTable();

  static const int32_t NOTDEF;

 private:
  // Offsets to specific elements in the underlying data. These offsets are
  // relative to the start of the table or the start of sub-blocks within
  // the table.
  struct Offset {
    enum {
      kVersion = 0,
      kNumTables = 2,
      kEncodingRecordStart = 4,

      // offsets relative to the encoding record
      kEncodingRecordPlatformId = 0,
      kEncodingRecordEncodingId = 2,
      kEncodingRecordOffset = 4,
      kEncodingRecordSize = 8,

      kFormat = 0,

      // Format 0: Byte encoding table
      kFormat0Format = 0,
      kFormat0Length = 2,
      kFormat0Language = 4,
      kFormat0GlyphIdArray = 6,

      // Format 2: High-byte mapping through table
      kFormat2Format = 0,
      kFormat2Length = 2,
      kFormat2Language = 4,
      kFormat2SubHeaderKeys = 6,
      kFormat2SubHeaders = 518,
      // offset relative to the subHeader structure
      kFormat2SubHeader_firstCode = 0,
      kFormat2SubHeader_entryCount = 2,
      kFormat2SubHeader_idDelta = 4,
      kFormat2SubHeader_idRangeOffset = 6,
      kFormat2SubHeader_structLength = 8,

      // Format 4: Segment mapping to delta values
      kFormat4Format = 0,
      kFormat4Length = 2,
      kFormat4Language = 4,
      kFormat4SegCountX2 = 6,
      kFormat4SearchRange = 8,
      kFormat4EntrySelector = 10,
      kFormat4RangeShift = 12,
      kFormat4EndCount = 14,
      kFormat4FixedSize = 16,

      // format 6: Trimmed table mapping
      kFormat6Format = 0,
      kFormat6Length = 2,
      kFormat6Language = 4,
      kFormat6FirstCode = 6,
      kFormat6EntryCount = 8,
      kFormat6GlyphIdArray = 10,

      // Format 8: mixed 16-bit and 32-bit coverage
      kFormat8Format = 0,
      kFormat8Length = 4,
      kFormat8Language = 8,
      kFormat8Is32 = 12,
      kFormat8nGroups204 = 8204,
      kFormat8Groups208 = 8208,
      // offset relative to the group structure
      kFormat8Group_startCharCode = 0,
      kFormat8Group_endCharCode = 4,
      kFormat8Group_startGlyphId = 8,
      kFormat8Group_structLength = 12,

      // Format 10: Trimmed array
      kFormat10Format = 0,
      kFormat10Length = 4,
      kFormat10Language = 8,
      kFormat10StartCharCode = 12,
      kFormat10NumChars = 16,
      kFormat10Glyphs0 = 20,

      // Format 12: Segmented coverage
      kFormat12Format = 0,
      kFormat12Length = 4,
      kFormat12Language = 8,
      kFormat12nGroups = 12,
      kFormat12Groups = 16,
      kFormat12Groups_structLength = 12,
      // offsets within the group structure
      kFormat12_startCharCode = 0,
      kFormat12_endCharCode = 4,
      kFormat12_startGlyphId = 8,

      // Format 13: Last Resort Font
      kFormat13Format = 0,
      kFormat13Length = 4,
      kFormat13Language = 8,
      kFormat13nGroups = 12,
      kFormat13Groups = 16,
      kFormat13Groups_structLength = 12,
      // offsets within the group structure
      kFormat13_startCharCode = 0,
      kFormat13_endCharCode = 4,
      kFormat13_glyphId = 8,

      // Format 14: Unicode Variation Sequences
      kFormat14Format = 0,
      kFormat14Length = 2,

      // TODO(stuartg): finish tables
      // Default UVS Table

      // Non-default UVS Table
      kLast = -1
    };
  };

  CMapTable(Header* header, ReadableFontData* data);

  // Get the offset in the table data for the encoding record for the cmap with
  // the given index. The offset is from the beginning of the table.
  static int32_t OffsetForEncodingRecord(int32_t index);
};
typedef std::vector<CMapTable::CMapId> CMapIdList;
typedef Ptr<CMapTable> CMapTablePtr;
}  // namespace sfntly

#endif  // SFNTLY_CPP_SRC_SFNTLY_TABLE_CORE_CMAP_TABLE_H_