summaryrefslogtreecommitdiff
path: root/gunicode.h
blob: b40cb539328532d4080434037ec90c78c2a2e1c6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
/* gunicode.h - Unicode manipulation functions
 *
 *  Copyright (C) 1999, 2000 Tom Tromey
 *  Copyright 2000 Red Hat, Inc.
 *
 * The Gnome Library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * The Gnome Library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with the Gnome Library; see the file COPYING.LIB.  If not,
 * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 *   Boston, MA 02111-1307, USA.
 */

#ifndef __GUNICODE_H__
#define __GUNICODE_H__

#include <stddef.h>      /* For size_t */

#ifdef __cplusplus
extern "C"
{
#endif

typedef guint32 gunichar;
typedef guint16 gunichar2;

/* These are the possible character classifications.  */
typedef enum {
  G_UNICODE_CONTROL,
  G_UNICODE_FORMAT,
  G_UNICODE_UNASSIGNED,
  G_UNICODE_PRIVATE_USE,
  G_UNICODE_SURROGATE,
  G_UNICODE_LOWERCASE_LETTER,
  G_UNICODE_MODIFIER_LETTER,
  G_UNICODE_OTHER_LETTER,
  G_UNICODE_TITLECASE_LETTER,
  G_UNICODE_UPPERCASE_LETTER,
  G_UNICODE_COMBINING_MARK,
  G_UNICODE_ENCLOSING_MARK,
  G_UNICODE_NON_SPACING_MARK,
  G_UNICODE_DECIMAL_NUMBER,
  G_UNICODE_LETTER_NUMBER,
  G_UNICODE_OTHER_NUMBER,
  G_UNICODE_CONNECT_PUNCTUATION,
  G_UNICODE_DASH_PUNCTUATION,
  G_UNICODE_CLOSE_PUNCTUATION,
  G_UNICODE_FINAL_PUNCTUATION,
  G_UNICODE_INITIAL_PUNCTUATION,
  G_UNICODE_OTHER_PUNCTUATION,
  G_UNICODE_OPEN_PUNCTUATION,
  G_UNICODE_CURRENCY_SYMBOL,
  G_UNICODE_MODIFIER_SYMBOL,
  G_UNICODE_MATH_SYMBOL,
  G_UNICODE_OTHER_SYMBOL,
  G_UNICODE_LINE_SEPARATOR,
  G_UNICODE_PARAGRAPH_SEPARATOR,
  G_UNICODE_SPACE_SEPARATOR
} GUnicodeType;

/* Returns TRUE if current locale uses UTF-8 charset.  If CHARSET is
 * not null, sets *CHARSET to the name of the current locale's
 * charset.  This value is statically allocated.
 */
gboolean g_get_charset (char **charset);

/* These are all analogs of the <ctype.h> functions.
 */
gboolean g_unichar_isalnum   (gunichar c);
gboolean g_unichar_isalpha   (gunichar c);
gboolean g_unichar_iscntrl   (gunichar c);
gboolean g_unicphar_isdigit   (gunichar c);
gboolean g_unichar_isgraph   (gunichar c);
gboolean g_unichar_islower   (gunichar c);
gboolean g_unichar_isprint   (gunichar c);
gboolean g_unichar_ispunct   (gunichar c);
gboolean g_unichar_isspace   (gunichar c);
gboolean g_unichar_isupper   (gunichar c);
gboolean g_unichar_isxdigit  (gunichar c);
gboolean g_unichar_istitle   (gunichar c);
gboolean g_unichar_isdefined (gunichar c);
gboolean g_unichar_iswide    (gunichar c);

/* More <ctype.h> functions.  These convert between the three cases.
 * See the Unicode book to understand title case.  */
gunichar g_unichar_toupper (gunichar c);
gunichar g_unichar_tolower (gunichar c);
gunichar g_unichar_totitle (gunichar c);

/* If C is a digit (according to `g_unichar_isdigit'), then return its
   numeric value.  Otherwise return -1.  */
gint g_unichar_digit_value (gunichar c);

gint g_unichar_xdigit_value (gunichar c);

/* Return the Unicode character type of a given character.  */
GUnicodeType g_unichar_type (gunichar c);



/* Compute canonical ordering of a string in-place.  This rearranges
   decomposed characters in the string according to their combining
   classes.  See the Unicode manual for more information.  */
void g_unicode_canonical_ordering (gunichar *string,
				   size_t   len);

/* Compute canonical decomposition of a character.  Returns g_malloc()d
   string of Unicode characters.  RESULT_LEN is set to the resulting
   length of the string.  */
gunichar *g_unicode_canonical_decomposition (gunichar  ch,
					     size_t   *result_len);

/* Array of skip-bytes-per-initial character.
 * We prefix variable declarations so they can
 * properly get exported in windows dlls.
 */
#ifndef GLIB_VAR
#  ifdef G_OS_WIN32
#    ifdef GLIB_COMPILATION
#      define GLIB_VAR __declspec(dllexport)
#    else /* !GLIB_COMPILATION */
#      define GLIB_VAR extern __declspec(dllimport)
#    endif /* !GLIB_COMPILATION */
#  else /* !G_OS_WIN32 */
#    define GLIB_VAR extern
#  endif /* !G_OS_WIN32 */
#endif /* !GLIB_VAR */

GLIB_VAR char g_utf8_skip[256];

#define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)])

gunichar g_utf8_get_char          (const gchar *p);
gchar *  g_utf8_offset_to_pointer  (const gchar *str,
				    gint         offset);
gint     g_utf8_pointer_to_offset (const gchar *str,
				   const gchar *pos);
gchar *  g_utf8_prev_char         (const gchar *p);
gchar *  g_utf8_find_next_char    (const gchar *p,
				   const gchar *bound);
gchar *  g_utf8_find_prev_char    (const gchar *str,
				   const gchar *p);

gint g_utf8_strlen (const gchar *p,
		    gint         max);

/* Copies n characters from src to dest */
gchar *g_utf8_strncpy (gchar       *dest,
		       const gchar *src,
		       size_t       n);

/* Find the UTF-8 character corresponding to ch, in string p. These
   functions are equivalants to strchr and strrchr */

gchar *g_utf8_strchr  (const gchar *p,
		       gunichar     ch);
gchar *g_utf8_strrchr (const gchar *p,
		       gunichar     ch);

gunichar2 *g_utf8_to_utf16 (const gchar     *str,
			    gint             len);
gunichar * g_utf8_to_ucs4  (const gchar     *str,
			    gint             len);
gunichar * g_utf16_to_ucs4 (const gunichar2 *str,
			    gint             len);
gchar *    g_utf16_to_utf8 (const gunichar2 *str,
			    gint             len);
gunichar * g_ucs4_to_utf16 (const gunichar  *str,
			    gint             len);
gchar *    g_ucs4_to_utf8  (const gunichar  *str,
			    gint             len);

/* Convert a single character into UTF-8. outbuf must have at
 * least 6 bytes of space. Returns the number of bytes in the
 * result.
 */
gint      g_unichar_to_utf8 (gunichar    c,
			     char       *outbuf);

#ifdef __cplusplus
}
#endif

#endif /* GUNICODE_H */