summaryrefslogtreecommitdiff
path: root/gutf8.c
diff options
context:
space:
mode:
authorHavoc Pennington <hp@redhat.com>2000-09-11 00:09:31 +0000
committerHavoc Pennington <hp@src.gnome.org>2000-09-11 00:09:31 +0000
commit49c937fcbb0add57ab215c0c65ba3a02e6fb13c9 (patch)
tree13554e2b962d514755fda38a6f3626d129a64707 /gutf8.c
parent333f4c43256867b9cba26950727ce7a03af4e90f (diff)
downloadglib-49c937fcbb0add57ab215c0c65ba3a02e6fb13c9.tar.gz
Add this function.
2000-09-10 Havoc Pennington <hp@redhat.com> * gutf8.c (g_utf8_validate): Add this function.
Diffstat (limited to 'gutf8.c')
-rw-r--r--gutf8.c75
1 files changed, 75 insertions, 0 deletions
diff --git a/gutf8.c b/gutf8.c
index 8bf95ebc3..f98f1372a 100644
--- a/gutf8.c
+++ b/gutf8.c
@@ -487,3 +487,78 @@ g_utf8_to_ucs4 (const char *str, int len)
return result;
}
+/**
+ * g_utf8_validate:
+ * @str: a pointer to character data
+ * @max_len: max bytes to validate, or -1 to go until nul
+ * @end: return location for end of valid data
+ *
+ * Validates UTF-8 encoded text. @str is the text to validate;
+ * if @str is nul-terminated, then @max_len can be -1, otherwise
+ * @max_len should be the number of bytes to validate.
+ * If @end is non-NULL, then the end of the valid range
+ * will be stored there (i.e. the address of the first invalid byte
+ * if some bytes were invalid, or the end of the text being validated
+ * otherwise).
+ *
+ * Returns TRUE if all of @str was valid. Many GLib and GTK+
+ * routines <emphasis>require</emphasis> valid UTF8 as input;
+ * so data read from a file or the network should be checked
+ * with g_utf8_validate() before doing anything else with it.
+ *
+ * Return value: TRUE if the text was valid UTF-8.
+ **/
+gboolean
+g_utf8_validate (const gchar *str,
+ gint max_len,
+ const gchar **end)
+{
+
+ const gchar *p;
+ gboolean retval = TRUE;
+
+ if (end)
+ *end = str;
+
+ p = str;
+
+ while ((max_len < 0 || (p - str) < max_len) && *p)
+ {
+ int i, mask = 0, len;
+ gunichar result;
+ unsigned char c = (unsigned char) *p;
+
+ UTF8_COMPUTE (c, mask, len);
+
+ if (len == -1)
+ {
+ retval = FALSE;
+ break;
+ }
+
+ /* check that the expected number of bytes exists in str */
+ if (max_len >= 0 &&
+ ((max_len - (p - str)) < len))
+ {
+ retval = FALSE;
+ break;
+ }
+
+ UTF8_GET (result, p, i, mask, len);
+
+ if (result == (gunichar)-1)
+ {
+ retval = FALSE;
+ break;
+ }
+
+ p += len;
+ }
+
+ if (end)
+ *end = p;
+
+ return retval;
+}
+
+