1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef PINYINIME_INCLUDE_DICTLIST_H__
#define PINYINIME_INCLUDE_DICTLIST_H__
#include <stdlib.h>
#include <stdio.h>
#include "./dictdef.h"
#include "./searchutility.h"
#include "./spellingtrie.h"
#include "./utf16char.h"
namespace ime_pinyin {
class DictList {
private:
bool initialized_;
const SpellingTrie *spl_trie_;
// Number of SingCharItem. The first is blank, because id 0 is invalid.
size_t scis_num_;
char16 *scis_hz_;
SpellingId *scis_splid_;
// The large memory block to store the word list.
char16 *buf_;
// Starting position of those words whose lengths are i+1, counted in
// char16
size_t start_pos_[kMaxLemmaSize + 1];
size_t start_id_[kMaxLemmaSize + 1];
int (*cmp_func_[kMaxLemmaSize])(const void *, const void *);
bool alloc_resource(size_t buf_size, size_t scim_num);
void free_resource();
#ifdef ___BUILD_MODEL___
// Calculate the requsted memory, including the start_pos[] buffer.
size_t calculate_size(const LemmaEntry *lemma_arr, size_t lemma_num);
void fill_scis(const SingleCharItem *scis, size_t scis_num);
// Copy the related content to the inner buffer
// It should be called after calculate_size()
void fill_list(const LemmaEntry *lemma_arr, size_t lemma_num);
// Find the starting position for the buffer of those 2-character Chinese word
// whose first character is the given Chinese character.
char16* find_pos2_startedbyhz(char16 hz_char);
#endif
// Find the starting position for the buffer of those words whose lengths are
// word_len. The given parameter cmp_func decides how many characters from
// beginning will be used to compare.
char16* find_pos_startedbyhzs(const char16 last_hzs[],
size_t word_Len,
int (*cmp_func)(const void *, const void *));
public:
DictList();
~DictList();
bool save_list(FILE *fp);
bool load_list(FILE *fp);
#ifdef ___BUILD_MODEL___
// Init the list from the LemmaEntry array.
// lemma_arr should have been sorted by the hanzi_str, and have been given
// ids from 1
bool init_list(const SingleCharItem *scis, size_t scis_num,
const LemmaEntry *lemma_arr, size_t lemma_num);
#endif
// Get the hanzi string for the given id
uint16 get_lemma_str(LemmaIdType id_hz, char16 *str_buf, uint16 str_max);
void convert_to_hanzis(char16 *str, uint16 str_len);
void convert_to_scis_ids(char16 *str, uint16 str_len);
// last_hzs stores the last n Chinese characters history, its length should be
// less or equal than kMaxPredictSize.
// hzs_len specifies the length(<= kMaxPredictSize).
// predict_buf is used to store the result.
// buf_len specifies the buffer length.
// b4_used specifies how many items before predict_buf have been used.
// Returned value is the number of newly added items.
size_t predict(const char16 last_hzs[], uint16 hzs_len,
NPredictItem *npre_items, size_t npre_max,
size_t b4_used);
// If half_splid is a valid half spelling id, return those full spelling
// ids which share this half id.
uint16 get_splids_for_hanzi(char16 hanzi, uint16 half_splid,
uint16 *splids, uint16 max_splids);
LemmaIdType get_lemma_id(const char16 *str, uint16 str_len);
};
}
#endif // PINYINIME_INCLUDE_DICTLIST_H__
|