diff options
author | Robert Brady <robert@suse.co.uk> | 2000-11-12 21:23:55 +0000 |
---|---|---|
committer | Robert Brady <rbrady@src.gnome.org> | 2000-11-12 21:23:55 +0000 |
commit | 8bda01029faf3078a83c45fcdfbfcbd3368ad475 (patch) | |
tree | 1d93b819bae5427af96f0b9a8951578ae9583be3 /glib | |
parent | 91d5e23f5fa8b9eda5cf07c3aedec46256183550 (diff) | |
download | glib-8bda01029faf3078a83c45fcdfbfcbd3368ad475.tar.gz |
Remove g_filename_{to,from}_utf8
2000-11-12 Robert Brady <robert@suse.co.uk>
* gstrfuncs.c, gstrfuncs.h: Remove g_filename_{to,from}_utf8
* gconvert.c, gconvert.h: Add g_filename_{to,from}_utf8 and
g_locale_{to.from}_utf8. The locale_ variant honours
nl_langinfo(CODESET), the filename_ variant uses UTF-8 unless
asked otherwise.
(g_convert): Add G_CONVERT_ERROR_PARTIAL_INPUT error, if bytesread
!= length and no bytesread pointer passed.
Diffstat (limited to 'glib')
-rw-r--r-- | glib/gconvert.c | 276 | ||||
-rw-r--r-- | glib/gconvert.h | 15 | ||||
-rw-r--r-- | glib/gstrfuncs.c | 218 | ||||
-rw-r--r-- | glib/gstrfuncs.h | 6 |
4 files changed, 290 insertions, 225 deletions
diff --git a/glib/gconvert.c b/glib/gconvert.c index 7e596b424..97a0fe1b7 100644 --- a/glib/gconvert.c +++ b/glib/gconvert.c @@ -23,6 +23,11 @@ #include <iconv.h> #include <errno.h> #include <string.h> +#include <stdlib.h> + +#ifdef G_OS_WIN32 +#include <windows.h> +#endif #include "glib.h" @@ -173,6 +178,15 @@ g_convert (const gchar *str, if (bytes_read) *bytes_read = p - str; + else + { + if ((p - str) != len) + { + g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT, + _("Partial character sequence at end of input")); + have_error = TRUE; + } + } if (bytes_written) *bytes_written = outp - dest; /* Doesn't include '\0' */ @@ -402,3 +416,265 @@ g_convert_with_fallback (const gchar *str, else return dest; } + +/* + * g_locale_to_utf8 + * + * Converts a string which is in the encoding used for strings by + * the C runtime (usually the same as that used by the operating + * system) in the current locale into a UTF-8 string. + */ + +gchar * +g_locale_to_utf8 (const gchar *opsysstring, GError **error) +{ +#ifdef G_OS_WIN32 + + gint i, clen, wclen, first; + const gint len = strlen (opsysstring); + wchar_t *wcs, wc; + gchar *result, *bp; + const wchar_t *wcp; + + wcs = g_new (wchar_t, len); + wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len); + + wcp = wcs; + clen = 0; + for (i = 0; i < wclen; i++) + { + wc = *wcp++; + + if (wc < 0x80) + clen += 1; + else if (wc < 0x800) + clen += 2; + else if (wc < 0x10000) + clen += 3; + else if (wc < 0x200000) + clen += 4; + else if (wc < 0x4000000) + clen += 5; + else + clen += 6; + } + + result = g_malloc (clen + 1); + + wcp = wcs; + bp = result; + for (i = 0; i < wclen; i++) + { + wc = *wcp++; + + if (wc < 0x80) + { + first = 0; + clen = 1; + } + else if (wc < 0x800) + { + first = 0xc0; + clen = 2; + } + else if (wc < 0x10000) + { + first = 0xe0; + clen = 3; + } + else if (wc < 0x200000) + { + first = 0xf0; + clen = 4; + } + else if (wc < 0x4000000) + { + first = 0xf8; + clen = 5; + } + else + { + first = 0xfc; + clen = 6; + } + + /* Woo-hoo! */ + switch (clen) + { + case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ + case 1: bp[0] = wc | first; + } + + bp += clen; + } + *bp = 0; + + g_free (wcs); + + return result; + +#else + + char *charset, *str; + + if (g_get_charset (&charset)) + return g_strdup (opsysstring); + + str = g_convert (opsysstring, strlen (opsysstring), + "UTF-8", charset, NULL, NULL, error); + + return str; +#endif +} + +/* + * g_locale_from_utf8 + * + * The reverse of g_locale_to_utf8. + */ + +gchar * +g_locale_from_utf8 (const gchar *utf8string, GError **error) +{ +#ifdef G_OS_WIN32 + + gint i, mask, clen, mblen; + const gint len = strlen (utf8string); + wchar_t *wcs, *wcp; + gchar *result; + guchar *cp, *end, c; + gint n; + + /* First convert to wide chars */ + cp = (guchar *) utf8string; + end = cp + len; + n = 0; + wcs = g_new (wchar_t, len + 1); + wcp = wcs; + while (cp != end) + { + mask = 0; + c = *cp; + + if (c < 0x80) + { + clen = 1; + mask = 0x7f; + } + else if ((c & 0xe0) == 0xc0) + { + clen = 2; + mask = 0x1f; + } + else if ((c & 0xf0) == 0xe0) + { + clen = 3; + mask = 0x0f; + } + else if ((c & 0xf8) == 0xf0) + { + clen = 4; + mask = 0x07; + } + else if ((c & 0xfc) == 0xf8) + { + clen = 5; + mask = 0x03; + } + else if ((c & 0xfc) == 0xfc) + { + clen = 6; + mask = 0x01; + } + else + { + g_free (wcs); + return NULL; + } + + if (cp + clen > end) + { + g_free (wcs); + return NULL; + } + + *wcp = (cp[0] & mask); + for (i = 1; i < clen; i++) + { + if ((cp[i] & 0xc0) != 0x80) + { + g_free (wcs); + return NULL; + } + *wcp <<= 6; + *wcp |= (cp[i] & 0x3f); + } + + cp += clen; + wcp++; + n++; + } + if (cp != end) + { + g_free (wcs); + return NULL; + } + + /* n is the number of wide chars constructed */ + + /* Convert to a string in the current ANSI codepage */ + + result = g_new (gchar, 3 * n + 1); + mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL); + result[mblen] = 0; + g_free (wcs); + + return result; + +#else + + gchar *charset, *str; + + if (g_get_charset (&charset)) + return g_strdup (utf8string); + + str = g_convert (utf8string, strlen (utf8string), + charset, "UTF-8", NULL, NULL, error); + + return str; + +#endif +} + +/* Filenames are in UTF-8 unless specificially requested otherwise */ + +gchar* +g_filename_to_utf8 (const gchar *string, GError **error) +{ +#ifdef G_OS_WIN32 + return g_locale_to_utf8 (string, error); +#else + if (getenv ("G_BROKEN_FILENAMES")) + return g_locale_to_utf8 (string, error); + + return g_strdup (string); +#endif +} + +gchar* +g_filename_from_utf8 (const gchar *string, GError **error) +{ +#ifdef G_OS_WIN32 + return g_locale_from_utf8 (string, error); +#else + if (getenv ("G_BROKEN_FILENAMES")) + return g_locale_from_utf8 (string, error); + + return g_strdup (string); +#endif +} + diff --git a/glib/gconvert.h b/glib/gconvert.h index 0586a19c1..1eaed3ece 100644 --- a/glib/gconvert.h +++ b/glib/gconvert.h @@ -35,7 +35,8 @@ typedef enum { G_CONVERT_ERROR_NO_CONVERSION, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, - G_CONVERT_ERROR_FAILED + G_CONVERT_ERROR_FAILED, + G_CONVERT_ERROR_PARTIAL_INPUT, } GConvertError; #define G_CONVERT_ERROR g_convert_error_quark() @@ -57,6 +58,18 @@ gchar* g_convert_with_fallback (const gchar *str, gint *bytes_written, GError **error); + +/* Convert between libc's idea of strings and UTF-8. + */ +gchar* g_locale_to_utf8 (const gchar *opsysstring, GError **error); +gchar* g_locale_from_utf8 (const gchar *utf8string, GError **error); + +/* Convert between the operating system (or C runtime) + * representation of file names and UTF-8. + */ +gchar* g_filename_to_utf8 (const gchar *opsysstring, GError **error); +gchar* g_filename_from_utf8 (const gchar *utf8string, GError **error); + G_END_DECLS #endif /* __G_CONVERT_H__ */ diff --git a/glib/gstrfuncs.c b/glib/gstrfuncs.c index 2d1ae71f4..69e19b10e 100644 --- a/glib/gstrfuncs.c +++ b/glib/gstrfuncs.c @@ -1230,224 +1230,6 @@ g_strescape (const gchar *source, return dest; } -/* - * g_filename_to_utf8 - * - * Converts a string which is in the encoding used for file names by - * the C runtime (usually the same as that used by the operating - * system) in the current locale into a UTF-8 string. - */ - -gchar * -g_filename_to_utf8 (const gchar *opsysstring) -{ -#ifdef G_OS_WIN32 - - gint i, clen, wclen, first; - const gint len = strlen (opsysstring); - wchar_t *wcs, wc; - gchar *result, *bp; - const wchar_t *wcp; - - wcs = g_new (wchar_t, len); - wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len); - - wcp = wcs; - clen = 0; - for (i = 0; i < wclen; i++) - { - wc = *wcp++; - - if (wc < 0x80) - clen += 1; - else if (wc < 0x800) - clen += 2; - else if (wc < 0x10000) - clen += 3; - else if (wc < 0x200000) - clen += 4; - else if (wc < 0x4000000) - clen += 5; - else - clen += 6; - } - - result = g_malloc (clen + 1); - - wcp = wcs; - bp = result; - for (i = 0; i < wclen; i++) - { - wc = *wcp++; - - if (wc < 0x80) - { - first = 0; - clen = 1; - } - else if (wc < 0x800) - { - first = 0xc0; - clen = 2; - } - else if (wc < 0x10000) - { - first = 0xe0; - clen = 3; - } - else if (wc < 0x200000) - { - first = 0xf0; - clen = 4; - } - else if (wc < 0x4000000) - { - first = 0xf8; - clen = 5; - } - else - { - first = 0xfc; - clen = 6; - } - - /* Woo-hoo! */ - switch (clen) - { - case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */ - case 1: bp[0] = wc | first; - } - - bp += clen; - } - *bp = 0; - - g_free (wcs); - - return result; - -#else - - return g_strdup (opsysstring); - -#endif -} - -/* - * g_filename_from_utf8 - * - * The reverse of g_filename_to_utf8. - */ - -gchar * -g_filename_from_utf8 (const gchar *utf8string) -{ -#ifdef G_OS_WIN32 - - gint i, mask, clen, mblen; - const gint len = strlen (utf8string); - wchar_t *wcs, *wcp; - gchar *result; - guchar *cp, *end, c; - gint n; - - /* First convert to wide chars */ - cp = (guchar *) utf8string; - end = cp + len; - n = 0; - wcs = g_new (wchar_t, len + 1); - wcp = wcs; - while (cp != end) - { - mask = 0; - c = *cp; - - if (c < 0x80) - { - clen = 1; - mask = 0x7f; - } - else if ((c & 0xe0) == 0xc0) - { - clen = 2; - mask = 0x1f; - } - else if ((c & 0xf0) == 0xe0) - { - clen = 3; - mask = 0x0f; - } - else if ((c & 0xf8) == 0xf0) - { - clen = 4; - mask = 0x07; - } - else if ((c & 0xfc) == 0xf8) - { - clen = 5; - mask = 0x03; - } - else if ((c & 0xfc) == 0xfc) - { - clen = 6; - mask = 0x01; - } - else - { - g_free (wcs); - return NULL; - } - - if (cp + clen > end) - { - g_free (wcs); - return NULL; - } - - *wcp = (cp[0] & mask); - for (i = 1; i < clen; i++) - { - if ((cp[i] & 0xc0) != 0x80) - { - g_free (wcs); - return NULL; - } - *wcp <<= 6; - *wcp |= (cp[i] & 0x3f); - } - - cp += clen; - wcp++; - n++; - } - if (cp != end) - { - g_free (wcs); - return NULL; - } - - /* n is the number of wide chars constructed */ - - /* Convert to a string in the current ANSI codepage */ - - result = g_new (gchar, 3 * n + 1); - mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL); - result[mblen] = 0; - g_free (wcs); - - return result; - -#else - - return g_strdup (utf8string); - -#endif -} - gchar* g_strchug (gchar *string) { diff --git a/glib/gstrfuncs.h b/glib/gstrfuncs.h index ffec17e8a..516c31046 100644 --- a/glib/gstrfuncs.h +++ b/glib/gstrfuncs.h @@ -103,12 +103,6 @@ gchar* g_strescape (const gchar *source, gpointer g_memdup (gconstpointer mem, guint byte_size); -/* Convert between the operating system (or C runtime) - * representation of file names and UTF-8. - */ -gchar* g_filename_to_utf8 (const gchar *opsysstring); -gchar* g_filename_from_utf8 (const gchar *utf8string); - /* NULL terminated string arrays. * g_strsplit() splits up string into max_tokens tokens at delim and * returns a newly allocated string array. |