PinyinIME/jni/include/searchutility.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

/*
 * Copyright (C) 2009 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
#define PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__

#include <stdlib.h>
#include "./spellingtrie.h"

namespace ime_pinyin {

// Type used to identify the size of a pool, such as id pool, etc.
typedef uint16 PoolPosType;

// Type used to identify a parsing mile stone in an atom dictionary.
typedef uint16 MileStoneHandle;

// Type used to express a lemma and its probability score.
typedef struct {
  size_t id:(kLemmaIdSize * 8);
  size_t lma_len:4;
  uint16 psb;  // The score, the lower psb, the higher possibility.
  // For single character items, we may also need Hanzi.
  // For multiple characer items, ignore it.
  char16 hanzi;
} LmaPsbItem, *PLmaPsbItem;

// LmaPsbItem extended with string.
typedef struct {
  LmaPsbItem lpi;
  char16 str[kMaxLemmaSize + 1];
} LmaPsbStrItem, *PLmaPsbStrItem;


typedef struct {
  float psb;
  char16 pre_hzs[kMaxPredictSize];
  uint16 his_len;  // The length of the history used to do the prediction.
} NPredictItem, *PNPredictItem;

// Parameter structure used to extend in a dictionary. All dictionaries
// receives the same DictExtPara and a dictionary specific MileStoneHandle for
// extending.
//
// When the user inputs a new character, AtomDictBase::extend_dict() will be
// called at least once for each dictionary.
//
// For example, when the user inputs "wm", extend_dict() will be called twice,
// and the DictExtPara parameter are as follows respectively:
// 1. splids = {w, m}; splids_extended = 1; ext_len = 1; step_no = 1;
// splid_end_split = false; id_start = wa(the first id start with 'w');
// id_num = number of ids starting with 'w'.
// 2. splids = {m}; splids_extended = 0; ext_len = 1; step_no = 1;
// splid_end_split = false; id_start = wa; id_num = number of ids starting with
// 'w'.
//
// For string "women", one of the cases of the DictExtPara parameter is:
// splids = {wo, men}, splids_extended = 1, ext_len = 3 (length of "men"),
// step_no = 4; splid_end_split = false; id_start = men, id_num = 1.
//
typedef struct {
  // Spelling ids for extending, there are splids_extended + 1 ids in the
  // buffer.
  // For a normal lemma, there can only be kMaxLemmaSize spelling ids in max,
  // but for a composing phrase, there can kMaxSearchSteps spelling ids.
  uint16 splids[kMaxSearchSteps];

  // Number of ids that have been used before. splids[splids_extended] is the
  // newly added id for the current extension.
  uint16 splids_extended;

  // The step span of the extension. It is also the size of the string for
  // the newly added spelling id.
  uint16 ext_len;

  // The step number for the current extension. It is also the ending position
  // in the input Pinyin string for the substring of spelling ids in splids[].
  // For example, when the user inputs "women", step_no = 4.
  // This parameter may useful to manage the MileStoneHandle list for each
  // step. When the user deletes a character from the string, MileStoneHandle
  // objects for the the steps after that character should be reset; when the
  // user begins a new string, all MileStoneHandle objects should be reset.
  uint16 step_no;

  // Indicate whether the newly added spelling ends with a splitting character
  bool splid_end_split;

  // If the newly added id is a half id, id_start is the first id of the
  // corresponding full ids; if the newly added id is a full id, id_start is
  // that id.
  uint16 id_start;

  // If the newly added id is a half id, id_num is the number of corresponding
  // ids; if it is a full id, id_num == 1.
  uint16 id_num;
}DictExtPara, *PDictExtPara;

bool is_system_lemma(LemmaIdType lma_id);
bool is_user_lemma(LemmaIdType lma_id);
bool is_composing_lemma(LemmaIdType lma_id);

int cmp_lpi_with_psb(const void *p1, const void *p2);
int cmp_lpi_with_unified_psb(const void *p1, const void *p2);
int cmp_lpi_with_id(const void *p1, const void *p2);
int cmp_lpi_with_hanzi(const void *p1, const void *p2);

int cmp_lpsi_with_str(const void *p1, const void *p2);

int cmp_hanzis_1(const void *p1, const void *p2);
int cmp_hanzis_2(const void *p1, const void *p2);
int cmp_hanzis_3(const void *p1, const void *p2);
int cmp_hanzis_4(const void *p1, const void *p2);
int cmp_hanzis_5(const void *p1, const void *p2);
int cmp_hanzis_6(const void *p1, const void *p2);
int cmp_hanzis_7(const void *p1, const void *p2);
int cmp_hanzis_8(const void *p1, const void *p2);

int cmp_npre_by_score(const void *p1, const void *p2);
int cmp_npre_by_hislen_score(const void *p1, const void *p2);
int cmp_npre_by_hanzi_score(const void *p1, const void *p2);


size_t remove_duplicate_npre(NPredictItem *npre_items, size_t npre_num);

size_t align_to_size_t(size_t size);

}  // namespace

#endif  // PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__