/* * Copyright (C) 2009 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef PINYINIME_INCLUDE_DICTLIST_H__ #define PINYINIME_INCLUDE_DICTLIST_H__ #include #include #include "./dictdef.h" #include "./searchutility.h" #include "./spellingtrie.h" #include "./utf16char.h" namespace ime_pinyin { class DictList { private: bool initialized_; const SpellingTrie *spl_trie_; // Number of SingCharItem. The first is blank, because id 0 is invalid. size_t scis_num_; char16 *scis_hz_; SpellingId *scis_splid_; // The large memory block to store the word list. char16 *buf_; // Starting position of those words whose lengths are i+1, counted in // char16 size_t start_pos_[kMaxLemmaSize + 1]; size_t start_id_[kMaxLemmaSize + 1]; int (*cmp_func_[kMaxLemmaSize])(const void *, const void *); bool alloc_resource(size_t buf_size, size_t scim_num); void free_resource(); #ifdef ___BUILD_MODEL___ // Calculate the requsted memory, including the start_pos[] buffer. size_t calculate_size(const LemmaEntry *lemma_arr, size_t lemma_num); void fill_scis(const SingleCharItem *scis, size_t scis_num); // Copy the related content to the inner buffer // It should be called after calculate_size() void fill_list(const LemmaEntry *lemma_arr, size_t lemma_num); // Find the starting position for the buffer of those 2-character Chinese word // whose first character is the given Chinese character. char16* find_pos2_startedbyhz(char16 hz_char); #endif // Find the starting position for the buffer of those words whose lengths are // word_len. The given parameter cmp_func decides how many characters from // beginning will be used to compare. char16* find_pos_startedbyhzs(const char16 last_hzs[], size_t word_Len, int (*cmp_func)(const void *, const void *)); public: DictList(); ~DictList(); bool save_list(FILE *fp); bool load_list(FILE *fp); #ifdef ___BUILD_MODEL___ // Init the list from the LemmaEntry array. // lemma_arr should have been sorted by the hanzi_str, and have been given // ids from 1 bool init_list(const SingleCharItem *scis, size_t scis_num, const LemmaEntry *lemma_arr, size_t lemma_num); #endif // Get the hanzi string for the given id uint16 get_lemma_str(LemmaIdType id_hz, char16 *str_buf, uint16 str_max); void convert_to_hanzis(char16 *str, uint16 str_len); void convert_to_scis_ids(char16 *str, uint16 str_len); // last_hzs stores the last n Chinese characters history, its length should be // less or equal than kMaxPredictSize. // hzs_len specifies the length(<= kMaxPredictSize). // predict_buf is used to store the result. // buf_len specifies the buffer length. // b4_used specifies how many items before predict_buf have been used. // Returned value is the number of newly added items. size_t predict(const char16 last_hzs[], uint16 hzs_len, NPredictItem *npre_items, size_t npre_max, size_t b4_used); // If half_splid is a valid half spelling id, return those full spelling // ids which share this half id. uint16 get_splids_for_hanzi(char16 hanzi, uint16 half_splid, uint16 *splids, uint16 max_splids); LemmaIdType get_lemma_id(const char16 *str, uint16 str_len); }; } #endif // PINYINIME_INCLUDE_DICTLIST_H__