aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Gutkin <agutkin@google.com>2014-03-04 17:22:31 +0000
committerAlexander Gutkin <agutkin@google.com>2014-03-04 17:22:31 +0000
commit96039b7897bb8b5853e91404d97abca72d933c69 (patch)
tree67f53abddaf6561b9d8389f94e31774caecb77e7
parent439f3d1f87279a8be383ee01ef98cb9a5ca68573 (diff)
downloadlibutf-96039b7897bb8b5853e91404d97abca72d933c69.tar.gz
Updating libutf to a newer version.
This brings us in sync with the google3 //third_party/utf implementation. Change-Id: I0f3e013304bab6d609ddf0b2619f3b5b8ec6f047
-rw-r--r--plan9.h29
-rw-r--r--rune.c251
-rw-r--r--runestrcat.c10
-rw-r--r--runestrchr.c11
-rw-r--r--runestrcmp.c8
-rw-r--r--runestrcpy.c10
-rw-r--r--runestrdup.c10
-rw-r--r--runestrecpy.c10
-rw-r--r--runestrlen.c8
-rw-r--r--runestrncat.c12
-rw-r--r--runestrncmp.c8
-rw-r--r--runestrncpy.c8
-rw-r--r--runestrrchr.c13
-rw-r--r--runestrstr.c11
-rw-r--r--runetype.c1139
-rw-r--r--runetypebody-5.0.0.h1361
-rw-r--r--utf.h269
-rw-r--r--utfdef.h35
-rw-r--r--utfecpy.c11
-rw-r--r--utflen.c9
-rw-r--r--utfnlen.c10
-rw-r--r--utfrrune.c12
-rw-r--r--utfrune.c10
-rw-r--r--utfutf.c13
24 files changed, 1910 insertions, 1358 deletions
diff --git a/plan9.h b/plan9.h
deleted file mode 100644
index e40e33e..0000000
--- a/plan9.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * compiler directive on Plan 9
- */
-#ifndef USED
-#define USED(x) if(x);else
-#endif
-
-/*
- * easiest way to make sure these are defined
- */
-#define uchar _utfuchar
-#define ushort _utfushort
-#define uint _utfuint
-#define ulong _utfulong
-typedef unsigned char uchar;
-typedef unsigned short ushort;
-typedef unsigned int uint;
-typedef unsigned long ulong;
-
-/*
- * nil cannot be ((void*)0) on ANSI C,
- * because it is used for function pointers
- */
-#undef nil
-#define nil 0
-
-#undef nelem
-#define nelem(x) (sizeof (x)/sizeof (x)[0])
-
diff --git a/rune.c b/rune.c
index f594480..65df3d3 100644
--- a/rune.c
+++ b/rune.c
@@ -7,14 +7,14 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
enum
{
@@ -23,7 +23,7 @@ enum
Bit2 = 5,
Bit3 = 4,
Bit4 = 3,
- Bit5 = 2,
+ Bit5 = 2,
T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
@@ -32,19 +32,139 @@ enum
T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
- Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
- Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
- Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
- Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
+ Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
+ Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
+ Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
+ Rune4 = (1<<(Bit4+3*Bitx))-1,
+ /* 0001 1111 1111 1111 1111 1111 */
Maskx = (1<<Bitx)-1, /* 0011 1111 */
Testx = Maskx ^ 0xFF, /* 1100 0000 */
- Bad = Runeerror
+ Bad = Runeerror,
};
+/*
+ * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
+ * This is a slower but "safe" version of the old chartorune
+ * that works on strings that are not necessarily null-terminated.
+ *
+ * If you know for sure that your string is null-terminated,
+ * chartorune will be a bit faster.
+ *
+ * It is guaranteed not to attempt to access "length"
+ * past the incoming pointer. This is to avoid
+ * possible access violations. If the string appears to be
+ * well-formed but incomplete (i.e., to get the whole Rune
+ * we'd need to read past str+length) then we'll set the Rune
+ * to Bad and return 0.
+ *
+ * Note that if we have decoding problems for other
+ * reasons, we return 1 instead of 0.
+ */
int
-chartorune(Rune *rune, char *str)
+charntorune(Rune *rune, const char *str, int length)
+{
+ int c, c1, c2, c3;
+ long l;
+
+ /* When we're not allowed to read anything */
+ if(length <= 0) {
+ goto badlen;
+ }
+
+ /*
+ * one character sequence (7-bit value)
+ * 00000-0007F => T1
+ */
+ c = *(uchar*)str;
+ if(c < Tx) {
+ *rune = c;
+ return 1;
+ }
+
+ // If we can't read more than one character we must stop
+ if(length <= 1) {
+ goto badlen;
+ }
+
+ /*
+ * two character sequence (11-bit value)
+ * 0080-07FF => T2 Tx
+ */
+ c1 = *(uchar*)(str+1) ^ Tx;
+ if(c1 & Testx)
+ goto bad;
+ if(c < T3) {
+ if(c < T2)
+ goto bad;
+ l = ((c << Bitx) | c1) & Rune2;
+ if(l <= Rune1)
+ goto bad;
+ *rune = l;
+ return 2;
+ }
+
+ // If we can't read more than two characters we must stop
+ if(length <= 2) {
+ goto badlen;
+ }
+
+ /*
+ * three character sequence (16-bit value)
+ * 0800-FFFF => T3 Tx Tx
+ */
+ c2 = *(uchar*)(str+2) ^ Tx;
+ if(c2 & Testx)
+ goto bad;
+ if(c < T4) {
+ l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+ if(l <= Rune2)
+ goto bad;
+ *rune = l;
+ return 3;
+ }
+
+ if (length <= 3)
+ goto badlen;
+
+ /*
+ * four character sequence (21-bit value)
+ * 10000-1FFFFF => T4 Tx Tx Tx
+ */
+ c3 = *(uchar*)(str+3) ^ Tx;
+ if (c3 & Testx)
+ goto bad;
+ if (c < T5) {
+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+ if (l <= Rune3)
+ goto bad;
+ *rune = l;
+ return 4;
+ }
+
+ // Support for 5-byte or longer UTF-8 would go here, but
+ // since we don't have that, we'll just fall through to bad.
+
+ /*
+ * bad decoding
+ */
+bad:
+ *rune = Bad;
+ return 1;
+badlen:
+ *rune = Bad;
+ return 0;
+
+}
+
+
+/*
+ * This is the older "unsafe" version, which works fine on
+ * null-terminated strings.
+ */
+int
+chartorune(Rune *rune, const char *str)
{
int c, c1, c2, c3;
long l;
@@ -92,25 +212,26 @@ chartorune(Rune *rune, char *str)
}
/*
- * four character sequence
- * 10000-10FFFF => T4 Tx Tx Tx
+ * four character sequence (21-bit value)
+ * 10000-1FFFFF => T4 Tx Tx Tx
*/
- if(UTFmax >= 4) {
- c3 = *(uchar*)(str+3) ^ Tx;
- if(c3 & Testx)
+ c3 = *(uchar*)(str+3) ^ Tx;
+ if (c3 & Testx)
+ goto bad;
+ if (c < T5) {
+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+ if (l <= Rune3)
goto bad;
- if(c < T5) {
- l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
- if(l <= Rune3)
- goto bad;
- if(l > Runemax)
- goto bad;
- *rune = l;
- return 4;
- }
+ *rune = l;
+ return 4;
}
/*
+ * Support for 5-byte or longer UTF-8 would go here, but
+ * since we don't have that, we'll just fall through to bad.
+ */
+
+ /*
* bad decoding
*/
bad:
@@ -119,9 +240,16 @@ bad:
}
int
-runetochar(char *str, Rune *rune)
+isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) {
+ *consumed = charntorune(rune, str, length);
+ return *rune != Runeerror || *consumed == 3;
+}
+
+int
+runetochar(char *str, const Rune *rune)
{
- long c;
+ /* Runes are signed, so convert to unsigned for range check. */
+ unsigned long c;
/*
* one character sequence
@@ -135,7 +263,7 @@ runetochar(char *str, Rune *rune)
/*
* two character sequence
- * 00080-007FF => T2 Tx
+ * 0080-07FF => T2 Tx
*/
if(c <= Rune2) {
str[0] = T2 | (c >> 1*Bitx);
@@ -144,74 +272,79 @@ runetochar(char *str, Rune *rune)
}
/*
- * three character sequence
- * 00800-0FFFF => T3 Tx Tx
+ * If the Rune is out of range, convert it to the error rune.
+ * Do this test here because the error rune encodes to three bytes.
+ * Doing it earlier would duplicate work, since an out of range
+ * Rune wouldn't have fit in one or two bytes.
*/
- if(c > Runemax)
+ if (c > Runemax)
c = Runeerror;
- if(c <= Rune3) {
+
+ /*
+ * three character sequence
+ * 0800-FFFF => T3 Tx Tx
+ */
+ if (c <= Rune3) {
str[0] = T3 | (c >> 2*Bitx);
str[1] = Tx | ((c >> 1*Bitx) & Maskx);
str[2] = Tx | (c & Maskx);
return 3;
}
-
+
/*
- * four character sequence
- * 010000-1FFFFF => T4 Tx Tx Tx
+ * four character sequence (21-bit value)
+ * 10000-1FFFFF => T4 Tx Tx Tx
*/
- str[0] = T4 | (c >> 3*Bitx);
+ str[0] = T4 | (c >> 3*Bitx);
str[1] = Tx | ((c >> 2*Bitx) & Maskx);
str[2] = Tx | ((c >> 1*Bitx) & Maskx);
- str[3] = Tx | (c & Maskx);
+ str[3] = Tx | (c & Maskx);
return 4;
}
int
-runelen(long c)
+runelen(Rune rune)
{
- Rune rune;
char str[10];
- rune = c;
return runetochar(str, &rune);
}
int
-runenlen(Rune *r, int nrune)
+runenlen(const Rune *r, int nrune)
{
int nb, c;
nb = 0;
while(nrune--) {
c = *r++;
- if(c <= Rune1)
+ if (c <= Rune1)
nb++;
- else
- if(c <= Rune2)
+ else if (c <= Rune2)
nb += 2;
- else
- if(c <= Rune3 || c > Runemax)
+ else if (c <= Rune3)
nb += 3;
- else
+ else /* assert(c <= Rune4) */
nb += 4;
}
return nb;
}
int
-fullrune(char *str, int n)
+fullrune(const char *str, int n)
{
- int c;
-
- if(n <= 0)
- return 0;
- c = *(uchar*)str;
- if(c < Tx)
- return 1;
- if(c < T3)
- return n >= 2;
- if(UTFmax == 3 || c < T4)
- return n >= 3;
- return n >= 4;
+ if (n > 0) {
+ int c = *(uchar*)str;
+ if (c < Tx)
+ return 1;
+ if (n > 1) {
+ if (c < T3)
+ return 1;
+ if (n > 2) {
+ if (c < T4 || n > 3)
+ return 1;
+ }
+ }
+ }
+ return 0;
}
diff --git a/runestrcat.c b/runestrcat.c
index 65d4c0f..ccb7cde 100644
--- a/runestrcat.c
+++ b/runestrcat.c
@@ -7,19 +7,19 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
Rune*
-runestrcat(Rune *s1, Rune *s2)
+runestrcat(Rune *s1, const Rune *s2)
{
- runestrcpy(runestrchr(s1, 0), s2);
+ runestrcpy((Rune*)runestrchr(s1, 0), s2);
return s1;
}
diff --git a/runestrchr.c b/runestrchr.c
index 21fbeeb..7acca84 100644
--- a/runestrchr.c
+++ b/runestrchr.c
@@ -7,17 +7,18 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
+const
Rune*
-runestrchr(Rune *s, Rune c)
+runestrchr(const Rune *s, Rune c)
{
Rune c0 = c;
Rune c1;
@@ -28,7 +29,7 @@ runestrchr(Rune *s, Rune c)
return s-1;
}
- while(c1 = *s++)
+ while((c1 = *s++) != 0)
if(c1 == c0)
return s-1;
return 0;
diff --git a/runestrcmp.c b/runestrcmp.c
index a368613..c274625 100644
--- a/runestrcmp.c
+++ b/runestrcmp.c
@@ -7,17 +7,17 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
int
-runestrcmp(Rune *s1, Rune *s2)
+runestrcmp(const Rune *s1, const Rune *s2)
{
Rune c1, c2;
diff --git a/runestrcpy.c b/runestrcpy.c
index 0659fc3..99c463d 100644
--- a/runestrcpy.c
+++ b/runestrcpy.c
@@ -7,22 +7,22 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
Rune*
-runestrcpy(Rune *s1, Rune *s2)
+runestrcpy(Rune *s1, const Rune *s2)
{
Rune *os1;
os1 = s1;
- while(*s1++ = *s2++)
+ while((*s1++ = *s2++) != 0)
;
return os1;
}
diff --git a/runestrdup.c b/runestrdup.c
index 8170e7b..345f2b0 100644
--- a/runestrdup.c
+++ b/runestrdup.c
@@ -7,22 +7,22 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
Rune*
-runestrdup(Rune *s)
+runestrdup(const Rune *s)
{
Rune *ns;
- ns = malloc(sizeof(Rune)*(runestrlen(s) + 1));
+ ns = (Rune*)malloc(sizeof(Rune)*(runestrlen(s) + 1));
if(ns == 0)
return 0;
diff --git a/runestrecpy.c b/runestrecpy.c
index c543e22..d095e3a 100644
--- a/runestrecpy.c
+++ b/runestrecpy.c
@@ -7,22 +7,22 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
Rune*
-runestrecpy(Rune *s1, Rune *es1, Rune *s2)
+runestrecpy(Rune *s1, Rune *es1, const Rune *s2)
{
if(s1 >= es1)
return s1;
- while(*s1++ = *s2++){
+ while((*s1++ = *s2++) != 0){
if(s1 == es1){
*--s1 = '\0';
break;
diff --git a/runestrlen.c b/runestrlen.c
index 0a13ecd..ebf76da 100644
--- a/runestrlen.c
+++ b/runestrlen.c
@@ -7,17 +7,17 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
long
-runestrlen(Rune *s)
+runestrlen(const Rune *s)
{
return runestrchr(s, 0) - s;
diff --git a/runestrncat.c b/runestrncat.c
index 9653637..3ad827e 100644
--- a/runestrncat.c
+++ b/runestrncat.c
@@ -7,23 +7,23 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
Rune*
-runestrncat(Rune *s1, Rune *s2, long n)
+runestrncat(Rune *s1, const Rune *s2, long n)
{
Rune *os1;
os1 = s1;
- s1 = runestrchr(s1, 0);
- while(*s1++ = *s2++)
+ s1 = (Rune*)runestrchr(s1, 0);
+ while((*s1++ = *s2++) != 0)
if(--n < 0) {
s1[-1] = 0;
break;
diff --git a/runestrncmp.c b/runestrncmp.c
index 5e9a3b6..060a425 100644
--- a/runestrncmp.c
+++ b/runestrncmp.c
@@ -7,17 +7,17 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
int
-runestrncmp(Rune *s1, Rune *s2, long n)
+runestrncmp(const Rune *s1, const Rune *s2, long n)
{
Rune c1, c2;
diff --git a/runestrncpy.c b/runestrncpy.c
index ffcb3e1..4deeaaf 100644
--- a/runestrncpy.c
+++ b/runestrncpy.c
@@ -7,17 +7,17 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
Rune*
-runestrncpy(Rune *s1, Rune *s2, long n)
+runestrncpy(Rune *s1, const Rune *s2, long n)
{
int i;
Rune *os1;
diff --git a/runestrrchr.c b/runestrrchr.c
index 1b0edbb..c7fb3e1 100644
--- a/runestrrchr.c
+++ b/runestrrchr.c
@@ -7,24 +7,25 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
+const
Rune*
-runestrrchr(Rune *s, Rune c)
+runestrrchr(const Rune *s, Rune c)
{
- Rune *r;
+ const Rune *r;
if(c == 0)
return runestrchr(s, 0);
r = 0;
- while(s = runestrchr(s, c))
+ while((s = runestrchr(s, c)) != 0)
r = s++;
return r;
}
diff --git a/runestrstr.c b/runestrstr.c
index f5fa997..fc5fd96 100644
--- a/runestrstr.c
+++ b/runestrstr.c
@@ -7,23 +7,24 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
/*
* Return pointer to first occurrence of s2 in s1,
* 0 if none
*/
+const
Rune*
-runestrstr(Rune *s1, Rune *s2)
+runestrstr(const Rune *s1, const Rune *s2)
{
- Rune *p, *pa, *pb;
+ const Rune *p, *pa, *pb;
int c0, c;
c0 = *s2;
diff --git a/runetype.c b/runetype.c
index ac6d7b5..64ba1dc 100644
--- a/runetype.c
+++ b/runetype.c
@@ -7,1037 +7,22 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
-#include <stdarg.h>
-#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
-/*
- * alpha ranges -
- * only covers ranges not in lower||upper
- */
-static
-Rune __alpha2[] =
-{
- 0x00d8, 0x00f6, /* Ø - ö */
- 0x00f8, 0x01f5, /* ø - ǵ */
- 0x0250, 0x02a8, /* ɐ - ʨ */
- 0x038e, 0x03a1, /* Ύ - Ρ */
- 0x03a3, 0x03ce, /* Σ - ώ */
- 0x03d0, 0x03d6, /* ϐ - ϖ */
- 0x03e2, 0x03f3, /* Ϣ - ϳ */
- 0x0490, 0x04c4, /* Ґ - ӄ */
- 0x0561, 0x0587, /* ա - և */
- 0x05d0, 0x05ea, /* א - ת */
- 0x05f0, 0x05f2, /* װ - ײ */
- 0x0621, 0x063a, /* ء - غ */
- 0x0640, 0x064a, /* ـ - ي */
- 0x0671, 0x06b7, /* ٱ - ڷ */
- 0x06ba, 0x06be, /* ں - ھ */
- 0x06c0, 0x06ce, /* ۀ - ێ */
- 0x06d0, 0x06d3, /* ې - ۓ */
- 0x0905, 0x0939, /* अ - ह */
- 0x0958, 0x0961, /* क़ - ॡ */
- 0x0985, 0x098c, /* অ - ঌ */
- 0x098f, 0x0990, /* এ - ঐ */
- 0x0993, 0x09a8, /* ও - ন */
- 0x09aa, 0x09b0, /* প - র */
- 0x09b6, 0x09b9, /* শ - হ */
- 0x09dc, 0x09dd, /* ড় - ঢ় */
- 0x09df, 0x09e1, /* য় - ৡ */
- 0x09f0, 0x09f1, /* ৰ - ৱ */
- 0x0a05, 0x0a0a, /* ਅ - ਊ */
- 0x0a0f, 0x0a10, /* ਏ - ਐ */
- 0x0a13, 0x0a28, /* ਓ - ਨ */
- 0x0a2a, 0x0a30, /* ਪ - ਰ */
- 0x0a32, 0x0a33, /* ਲ - ਲ਼ */
- 0x0a35, 0x0a36, /* ਵ - ਸ਼ */
- 0x0a38, 0x0a39, /* ਸ - ਹ */
- 0x0a59, 0x0a5c, /* ਖ਼ - ੜ */
- 0x0a85, 0x0a8b, /* અ - ઋ */
- 0x0a8f, 0x0a91, /* એ - ઑ */
- 0x0a93, 0x0aa8, /* ઓ - ન */
- 0x0aaa, 0x0ab0, /* પ - ર */
- 0x0ab2, 0x0ab3, /* લ - ળ */
- 0x0ab5, 0x0ab9, /* વ - હ */
- 0x0b05, 0x0b0c, /* ଅ - ଌ */
- 0x0b0f, 0x0b10, /* ଏ - ଐ */
- 0x0b13, 0x0b28, /* ଓ - ନ */
- 0x0b2a, 0x0b30, /* ପ - ର */
- 0x0b32, 0x0b33, /* ଲ - ଳ */
- 0x0b36, 0x0b39, /* ଶ - ହ */
- 0x0b5c, 0x0b5d, /* ଡ଼ - ଢ଼ */
- 0x0b5f, 0x0b61, /* ୟ - ୡ */
- 0x0b85, 0x0b8a, /* அ - ஊ */
- 0x0b8e, 0x0b90, /* எ - ஐ */
- 0x0b92, 0x0b95, /* ஒ - க */
- 0x0b99, 0x0b9a, /* ங - ச */
- 0x0b9e, 0x0b9f, /* ஞ - ட */
- 0x0ba3, 0x0ba4, /* ண - த */
- 0x0ba8, 0x0baa, /* ந - ப */
- 0x0bae, 0x0bb5, /* ம - வ */
- 0x0bb7, 0x0bb9, /* ஷ - ஹ */
- 0x0c05, 0x0c0c, /* అ - ఌ */
- 0x0c0e, 0x0c10, /* ఎ - ఐ */
- 0x0c12, 0x0c28, /* ఒ - న */
- 0x0c2a, 0x0c33, /* ప - ళ */
- 0x0c35, 0x0c39, /* వ - హ */
- 0x0c60, 0x0c61, /* ౠ - ౡ */
- 0x0c85, 0x0c8c, /* ಅ - ಌ */
- 0x0c8e, 0x0c90, /* ಎ - ಐ */
- 0x0c92, 0x0ca8, /* ಒ - ನ */
- 0x0caa, 0x0cb3, /* ಪ - ಳ */
- 0x0cb5, 0x0cb9, /* ವ - ಹ */
- 0x0ce0, 0x0ce1, /* ೠ - ೡ */
- 0x0d05, 0x0d0c, /* അ - ഌ */
- 0x0d0e, 0x0d10, /* എ - ഐ */
- 0x0d12, 0x0d28, /* ഒ - ന */
- 0x0d2a, 0x0d39, /* പ - ഹ */
- 0x0d60, 0x0d61, /* ൠ - ൡ */
- 0x0e01, 0x0e30, /* ก - ะ */
- 0x0e32, 0x0e33, /* า - ำ */
- 0x0e40, 0x0e46, /* เ - ๆ */
- 0x0e5a, 0x0e5b, /* ๚ - ๛ */
- 0x0e81, 0x0e82, /* ກ - ຂ */
- 0x0e87, 0x0e88, /* ງ - ຈ */
- 0x0e94, 0x0e97, /* ດ - ທ */
- 0x0e99, 0x0e9f, /* ນ - ຟ */
- 0x0ea1, 0x0ea3, /* ມ - ຣ */
- 0x0eaa, 0x0eab, /* ສ - ຫ */
- 0x0ead, 0x0eae, /* ອ - ຮ */
- 0x0eb2, 0x0eb3, /* າ - ຳ */
- 0x0ec0, 0x0ec4, /* ເ - ໄ */
- 0x0edc, 0x0edd, /* ໜ - ໝ */
- 0x0f18, 0x0f19, /* ༘ - ༙ */
- 0x0f40, 0x0f47, /* ཀ - ཇ */
- 0x0f49, 0x0f69, /* ཉ - ཀྵ */
- 0x10d0, 0x10f6, /* ა - ჶ */
- 0x1100, 0x1159, /* ᄀ - ᅙ */
- 0x115f, 0x11a2, /* ᅟ - ᆢ */
- 0x11a8, 0x11f9, /* ᆨ - ᇹ */
- 0x1e00, 0x1e9b, /* Ḁ - ẛ */
- 0x1f50, 0x1f57, /* ὐ - ὗ */
- 0x1f80, 0x1fb4, /* ᾀ - ᾴ */
- 0x1fb6, 0x1fbc, /* ᾶ - ᾼ */
- 0x1fc2, 0x1fc4, /* ῂ - ῄ */
- 0x1fc6, 0x1fcc, /* ῆ - ῌ */
- 0x1fd0, 0x1fd3, /* ῐ - ΐ */
- 0x1fd6, 0x1fdb, /* ῖ - Ί */
- 0x1fe0, 0x1fec, /* ῠ - Ῥ */
- 0x1ff2, 0x1ff4, /* ῲ - ῴ */
- 0x1ff6, 0x1ffc, /* ῶ - ῼ */
- 0x210a, 0x2113, /* ℊ - ℓ */
- 0x2115, 0x211d, /* ℕ - ℝ */
- 0x2120, 0x2122, /* ℠ - ™ */
- 0x212a, 0x2131, /* K - ℱ */
- 0x2133, 0x2138, /* ℳ - ℸ */
- 0x3041, 0x3094, /* ぁ - ゔ */
- 0x30a1, 0x30fa, /* ァ - ヺ */
- 0x3105, 0x312c, /* ㄅ - ㄬ */
- 0x3131, 0x318e, /* ㄱ - ㆎ */
- 0x3192, 0x319f, /* ㆒ - ㆟ */
- 0x3260, 0x327b, /* ㉠ - ㉻ */
- 0x328a, 0x32b0, /* ㊊ - ㊰ */
- 0x32d0, 0x32fe, /* ㋐ - ㋾ */
- 0x3300, 0x3357, /* ㌀ - ㍗ */
- 0x3371, 0x3376, /* ㍱ - ㍶ */
- 0x337b, 0x3394, /* ㍻ - ㎔ */
- 0x3399, 0x339e, /* ㎙ - ㎞ */
- 0x33a9, 0x33ad, /* ㎩ - ㎭ */
- 0x33b0, 0x33c1, /* ㎰ - ㏁ */
- 0x33c3, 0x33c5, /* ㏃ - ㏅ */
- 0x33c7, 0x33d7, /* ㏇ - ㏗ */
- 0x33d9, 0x33dd, /* ㏙ - ㏝ */
- 0x4e00, 0x9fff, /* 一 - 鿿 */
- 0xac00, 0xd7a3, /* 가 - 힣 */
- 0xf900, 0xfb06, /* 豈 - st */
- 0xfb13, 0xfb17, /* ﬓ - ﬗ */
- 0xfb1f, 0xfb28, /* ײַ - ﬨ */
- 0xfb2a, 0xfb36, /* שׁ - זּ */
- 0xfb38, 0xfb3c, /* טּ - לּ */
- 0xfb40, 0xfb41, /* נּ - סּ */
- 0xfb43, 0xfb44, /* ףּ - פּ */
- 0xfb46, 0xfbb1, /* צּ - ﮱ */
- 0xfbd3, 0xfd3d, /* ﯓ - ﴽ */
- 0xfd50, 0xfd8f, /* ﵐ - ﶏ */
- 0xfd92, 0xfdc7, /* ﶒ - ﷇ */
- 0xfdf0, 0xfdf9, /* ﷰ - ﷹ */
- 0xfe70, 0xfe72, /* ﹰ - ﹲ */
- 0xfe76, 0xfefc, /* ﹶ - ﻼ */
- 0xff66, 0xff6f, /* ヲ - ッ */
- 0xff71, 0xff9d, /* ア - ン */
- 0xffa0, 0xffbe, /* ᅠ - ᄒ */
- 0xffc2, 0xffc7, /* ᅡ - ᅦ */
- 0xffca, 0xffcf, /* ᅧ - ᅬ */
- 0xffd2, 0xffd7, /* ᅭ - ᅲ */
- 0xffda, 0xffdc, /* ᅳ - ᅵ */
-};
-
-/*
- * alpha singlets -
- * only covers ranges not in lower||upper
- */
-static
-Rune __alpha1[] =
-{
- 0x00aa, /* ª */
- 0x00b5, /* µ */
- 0x00ba, /* º */
- 0x03da, /* Ϛ */
- 0x03dc, /* Ϝ */
- 0x03de, /* Ϟ */
- 0x03e0, /* Ϡ */
- 0x06d5, /* ە */
- 0x09b2, /* ল */
- 0x0a5e, /* ਫ਼ */
- 0x0a8d, /* ઍ */
- 0x0ae0, /* ૠ */
- 0x0b9c, /* ஜ */
- 0x0cde, /* ೞ */
- 0x0e4f, /* ๏ */
- 0x0e84, /* ຄ */
- 0x0e8a, /* ຊ */
- 0x0e8d, /* ຍ */
- 0x0ea5, /* ລ */
- 0x0ea7, /* ວ */
- 0x0eb0, /* ະ */
- 0x0ebd, /* ຽ */
- 0x1fbe, /* ι */
- 0x207f, /* ⁿ */
- 0x20a8, /* ₨ */
- 0x2102, /* ℂ */
- 0x2107, /* ℇ */
- 0x2124, /* ℤ */
- 0x2126, /* Ω */
- 0x2128, /* ℨ */
- 0xfb3e, /* מּ */
- 0xfe74, /* ﹴ */
-};
-
-/*
- * space ranges
- */
-static
-Rune __space2[] =
-{
- 0x0009, 0x000a, /* tab and newline */
- 0x0020, 0x0020, /* space */
- 0x00a0, 0x00a0, /*   */
- 0x2000, 0x200b, /*   - ​ */
- 0x2028, 0x2029, /* 
 - 
 */
- 0x3000, 0x3000, /*   */
- 0xfeff, 0xfeff, /*  */
-};
-
-/*
- * lower case ranges
- * 3rd col is conversion excess 500
- */
-static
-Rune __toupper2[] =
-{
- 0x0061, 0x007a, 468, /* a-z A-Z */
- 0x00e0, 0x00f6, 468, /* à-ö À-Ö */
- 0x00f8, 0x00fe, 468, /* ø-þ Ø-Þ */
- 0x0256, 0x0257, 295, /* ɖ-ɗ Ɖ-Ɗ */
- 0x0258, 0x0259, 298, /* ɘ-ə Ǝ-Ə */
- 0x028a, 0x028b, 283, /* ʊ-ʋ Ʊ-Ʋ */
- 0x03ad, 0x03af, 463, /* έ-ί Έ-Ί */
- 0x03b1, 0x03c1, 468, /* α-ρ Α-Ρ */
- 0x03c3, 0x03cb, 468, /* σ-ϋ Σ-Ϋ */
- 0x03cd, 0x03ce, 437, /* ύ-ώ Ύ-Ώ */
- 0x0430, 0x044f, 468, /* а-я А-Я */
- 0x0451, 0x045c, 420, /* ё-ќ Ё-Ќ */
- 0x045e, 0x045f, 420, /* ў-џ Ў-Џ */
- 0x0561, 0x0586, 452, /* ա-ֆ Ա-Ֆ */
- 0x1f00, 0x1f07, 508, /* ἀ-ἇ Ἀ-Ἇ */
- 0x1f10, 0x1f15, 508, /* ἐ-ἕ Ἐ-Ἕ */
- 0x1f20, 0x1f27, 508, /* ἠ-ἧ Ἠ-Ἧ */
- 0x1f30, 0x1f37, 508, /* ἰ-ἷ Ἰ-Ἷ */
- 0x1f40, 0x1f45, 508, /* ὀ-ὅ Ὀ-Ὅ */
- 0x1f60, 0x1f67, 508, /* ὠ-ὧ Ὠ-Ὧ */
- 0x1f70, 0x1f71, 574, /* ὰ-ά Ὰ-Ά */
- 0x1f72, 0x1f75, 586, /* ὲ-ή Ὲ-Ή */
- 0x1f76, 0x1f77, 600, /* ὶ-ί Ὶ-Ί */
- 0x1f78, 0x1f79, 628, /* ὸ-ό Ὸ-Ό */
- 0x1f7a, 0x1f7b, 612, /* ὺ-ύ Ὺ-Ύ */
- 0x1f7c, 0x1f7d, 626, /* ὼ-ώ Ὼ-Ώ */
- 0x1f80, 0x1f87, 508, /* ᾀ-ᾇ ᾈ-ᾏ */
- 0x1f90, 0x1f97, 508, /* ᾐ-ᾗ ᾘ-ᾟ */
- 0x1fa0, 0x1fa7, 508, /* ᾠ-ᾧ ᾨ-ᾯ */
- 0x1fb0, 0x1fb1, 508, /* ᾰ-ᾱ Ᾰ-Ᾱ */
- 0x1fd0, 0x1fd1, 508, /* ῐ-ῑ Ῐ-Ῑ */
- 0x1fe0, 0x1fe1, 508, /* ῠ-ῡ Ῠ-Ῡ */
- 0x2170, 0x217f, 484, /* ⅰ-ⅿ Ⅰ-Ⅿ */
- 0x24d0, 0x24e9, 474, /* ⓐ-ⓩ Ⓐ-Ⓩ */
- 0xff41, 0xff5a, 468, /* a-z A-Z */
-};
-
-/*
- * lower case singlets
- * 2nd col is conversion excess 500
- */
-static
-Rune __toupper1[] =
-{
- 0x00ff, 621, /* ÿ Ÿ */
- 0x0101, 499, /* ā Ā */
- 0x0103, 499, /* ă Ă */
- 0x0105, 499, /* ą Ą */
- 0x0107, 499, /* ć Ć */
- 0x0109, 499, /* ĉ Ĉ */
- 0x010b, 499, /* ċ Ċ */
- 0x010d, 499, /* č Č */
- 0x010f, 499, /* ď Ď */
- 0x0111, 499, /* đ Đ */
- 0x0113, 499, /* ē Ē */
- 0x0115, 499, /* ĕ Ĕ */
- 0x0117, 499, /* ė Ė */
- 0x0119, 499, /* ę Ę */
- 0x011b, 499, /* ě Ě */
- 0x011d, 499, /* ĝ Ĝ */
- 0x011f, 499, /* ğ Ğ */
- 0x0121, 499, /* ġ Ġ */
- 0x0123, 499, /* ģ Ģ */
- 0x0125, 499, /* ĥ Ĥ */
- 0x0127, 499, /* ħ Ħ */
- 0x0129, 499, /* ĩ Ĩ */
- 0x012b, 499, /* ī Ī */
- 0x012d, 499, /* ĭ Ĭ */
- 0x012f, 499, /* į Į */
- 0x0131, 268, /* ı I */
- 0x0133, 499, /* ij IJ */
- 0x0135, 499, /* ĵ Ĵ */
- 0x0137, 499, /* ķ Ķ */
- 0x013a, 499, /* ĺ Ĺ */
- 0x013c, 499, /* ļ Ļ */
- 0x013e, 499, /* ľ Ľ */
- 0x0140, 499, /* ŀ Ŀ */
- 0x0142, 499, /* ł Ł */
- 0x0144, 499, /* ń Ń */
- 0x0146, 499, /* ņ Ņ */
- 0x0148, 499, /* ň Ň */
- 0x014b, 499, /* ŋ Ŋ */
- 0x014d, 499, /* ō Ō */
- 0x014f, 499, /* ŏ Ŏ */
- 0x0151, 499, /* ő Ő */
- 0x0153, 499, /* œ Œ */
- 0x0155, 499, /* ŕ Ŕ */
- 0x0157, 499, /* ŗ Ŗ */
- 0x0159, 499, /* ř Ř */
- 0x015b, 499, /* ś Ś */
- 0x015d, 499, /* ŝ Ŝ */
- 0x015f, 499, /* ş Ş */
- 0x0161, 499, /* š Š */
- 0x0163, 499, /* ţ Ţ */
- 0x0165, 499, /* ť Ť */
- 0x0167, 499, /* ŧ Ŧ */
- 0x0169, 499, /* ũ Ũ */
- 0x016b, 499, /* ū Ū */
- 0x016d, 499, /* ŭ Ŭ */
- 0x016f, 499, /* ů Ů */
- 0x0171, 499, /* ű Ű */
- 0x0173, 499, /* ų Ų */
- 0x0175, 499, /* ŵ Ŵ */
- 0x0177, 499, /* ŷ Ŷ */
- 0x017a, 499, /* ź Ź */
- 0x017c, 499, /* ż Ż */
- 0x017e, 499, /* ž Ž */
- 0x017f, 200, /* ſ S */
- 0x0183, 499, /* ƃ Ƃ */
- 0x0185, 499, /* ƅ Ƅ */
- 0x0188, 499, /* ƈ Ƈ */
- 0x018c, 499, /* ƌ Ƌ */
- 0x0192, 499, /* ƒ Ƒ */
- 0x0199, 499, /* ƙ Ƙ */
- 0x01a1, 499, /* ơ Ơ */
- 0x01a3, 499, /* ƣ Ƣ */
- 0x01a5, 499, /* ƥ Ƥ */
- 0x01a8, 499, /* ƨ Ƨ */
- 0x01ad, 499, /* ƭ Ƭ */
- 0x01b0, 499, /* ư Ư */
- 0x01b4, 499, /* ƴ Ƴ */
- 0x01b6, 499, /* ƶ Ƶ */
- 0x01b9, 499, /* ƹ Ƹ */
- 0x01bd, 499, /* ƽ Ƽ */
- 0x01c5, 499, /* Dž DŽ */
- 0x01c6, 498, /* dž DŽ */
- 0x01c8, 499, /* Lj LJ */
- 0x01c9, 498, /* lj LJ */
- 0x01cb, 499, /* Nj NJ */
- 0x01cc, 498, /* nj NJ */
- 0x01ce, 499, /* ǎ Ǎ */
- 0x01d0, 499, /* ǐ Ǐ */
- 0x01d2, 499, /* ǒ Ǒ */
- 0x01d4, 499, /* ǔ Ǔ */
- 0x01d6, 499, /* ǖ Ǖ */
- 0x01d8, 499, /* ǘ Ǘ */
- 0x01da, 499, /* ǚ Ǚ */
- 0x01dc, 499, /* ǜ Ǜ */
- 0x01df, 499, /* ǟ Ǟ */
- 0x01e1, 499, /* ǡ Ǡ */
- 0x01e3, 499, /* ǣ Ǣ */
- 0x01e5, 499, /* ǥ Ǥ */
- 0x01e7, 499, /* ǧ Ǧ */
- 0x01e9, 499, /* ǩ Ǩ */
- 0x01eb, 499, /* ǫ Ǫ */
- 0x01ed, 499, /* ǭ Ǭ */
- 0x01ef, 499, /* ǯ Ǯ */
- 0x01f2, 499, /* Dz DZ */
- 0x01f3, 498, /* dz DZ */
- 0x01f5, 499, /* ǵ Ǵ */
- 0x01fb, 499, /* ǻ Ǻ */
- 0x01fd, 499, /* ǽ Ǽ */
- 0x01ff, 499, /* ǿ Ǿ */
- 0x0201, 499, /* ȁ Ȁ */
- 0x0203, 499, /* ȃ Ȃ */
- 0x0205, 499, /* ȅ Ȅ */
- 0x0207, 499, /* ȇ Ȇ */
- 0x0209, 499, /* ȉ Ȉ */
- 0x020b, 499, /* ȋ Ȋ */
- 0x020d, 499, /* ȍ Ȍ */
- 0x020f, 499, /* ȏ Ȏ */
- 0x0211, 499, /* ȑ Ȑ */
- 0x0213, 499, /* ȓ Ȓ */
- 0x0215, 499, /* ȕ Ȕ */
- 0x0217, 499, /* ȗ Ȗ */
- 0x0253, 290, /* ɓ Ɓ */
- 0x0254, 294, /* ɔ Ɔ */
- 0x025b, 297, /* ɛ Ɛ */
- 0x0260, 295, /* ɠ Ɠ */
- 0x0263, 293, /* ɣ Ɣ */
- 0x0268, 291, /* ɨ Ɨ */
- 0x0269, 289, /* ɩ Ɩ */
- 0x026f, 289, /* ɯ Ɯ */
- 0x0272, 287, /* ɲ Ɲ */
- 0x0283, 282, /* ʃ Ʃ */
- 0x0288, 282, /* ʈ Ʈ */
- 0x0292, 281, /* ʒ Ʒ */
- 0x03ac, 462, /* ά Ά */
- 0x03cc, 436, /* ό Ό */
- 0x03d0, 438, /* ϐ Β */
- 0x03d1, 443, /* ϑ Θ */
- 0x03d5, 453, /* ϕ Φ */
- 0x03d6, 446, /* ϖ Π */
- 0x03e3, 499, /* ϣ Ϣ */
- 0x03e5, 499, /* ϥ Ϥ */
- 0x03e7, 499, /* ϧ Ϧ */
- 0x03e9, 499, /* ϩ Ϩ */
- 0x03eb, 499, /* ϫ Ϫ */
- 0x03ed, 499, /* ϭ Ϭ */
- 0x03ef, 499, /* ϯ Ϯ */
- 0x03f0, 414, /* ϰ Κ */
- 0x03f1, 420, /* ϱ Ρ */
- 0x0461, 499, /* ѡ Ѡ */
- 0x0463, 499, /* ѣ Ѣ */
- 0x0465, 499, /* ѥ Ѥ */
- 0x0467, 499, /* ѧ Ѧ */
- 0x0469, 499, /* ѩ Ѩ */
- 0x046b, 499, /* ѫ Ѫ */
- 0x046d, 499, /* ѭ Ѭ */
- 0x046f, 499, /* ѯ Ѯ */
- 0x0471, 499, /* ѱ Ѱ */
- 0x0473, 499, /* ѳ Ѳ */
- 0x0475, 499, /* ѵ Ѵ */
- 0x0477, 499, /* ѷ Ѷ */
- 0x0479, 499, /* ѹ Ѹ */
- 0x047b, 499, /* ѻ Ѻ */
- 0x047d, 499, /* ѽ Ѽ */
- 0x047f, 499, /* ѿ Ѿ */
- 0x0481, 499, /* ҁ Ҁ */
- 0x0491, 499, /* ґ Ґ */
- 0x0493, 499, /* ғ Ғ */
- 0x0495, 499, /* ҕ Ҕ */
- 0x0497, 499, /* җ Җ */
- 0x0499, 499, /* ҙ Ҙ */
- 0x049b, 499, /* қ Қ */
- 0x049d, 499, /* ҝ Ҝ */
- 0x049f, 499, /* ҟ Ҟ */
- 0x04a1, 499, /* ҡ Ҡ */
- 0x04a3, 499, /* ң Ң */
- 0x04a5, 499, /* ҥ Ҥ */
- 0x04a7, 499, /* ҧ Ҧ */
- 0x04a9, 499, /* ҩ Ҩ */
- 0x04ab, 499, /* ҫ Ҫ */
- 0x04ad, 499, /* ҭ Ҭ */
- 0x04af, 499, /* ү Ү */
- 0x04b1, 499, /* ұ Ұ */
- 0x04b3, 499, /* ҳ Ҳ */
- 0x04b5, 499, /* ҵ Ҵ */
- 0x04b7, 499, /* ҷ Ҷ */
- 0x04b9, 499, /* ҹ Ҹ */
- 0x04bb, 499, /* һ Һ */
- 0x04bd, 499, /* ҽ Ҽ */
- 0x04bf, 499, /* ҿ Ҿ */
- 0x04c2, 499, /* ӂ Ӂ */
- 0x04c4, 499, /* ӄ Ӄ */
- 0x04c8, 499, /* ӈ Ӈ */
- 0x04cc, 499, /* ӌ Ӌ */
- 0x04d1, 499, /* ӑ Ӑ */
- 0x04d3, 499, /* ӓ Ӓ */
- 0x04d5, 499, /* ӕ Ӕ */
- 0x04d7, 499, /* ӗ Ӗ */
- 0x04d9, 499, /* ә Ә */
- 0x04db, 499, /* ӛ Ӛ */
- 0x04dd, 499, /* ӝ Ӝ */
- 0x04df, 499, /* ӟ Ӟ */
- 0x04e1, 499, /* ӡ Ӡ */
- 0x04e3, 499, /* ӣ Ӣ */
- 0x04e5, 499, /* ӥ Ӥ */
- 0x04e7, 499, /* ӧ Ӧ */
- 0x04e9, 499, /* ө Ө */
- 0x04eb, 499, /* ӫ Ӫ */
- 0x04ef, 499, /* ӯ Ӯ */
- 0x04f1, 499, /* ӱ Ӱ */
- 0x04f3, 499, /* ӳ Ӳ */
- 0x04f5, 499, /* ӵ Ӵ */
- 0x04f9, 499, /* ӹ Ӹ */
- 0x1e01, 499, /* ḁ Ḁ */
- 0x1e03, 499, /* ḃ Ḃ */
- 0x1e05, 499, /* ḅ Ḅ */
- 0x1e07, 499, /* ḇ Ḇ */
- 0x1e09, 499, /* ḉ Ḉ */
- 0x1e0b, 499, /* ḋ Ḋ */
- 0x1e0d, 499, /* ḍ Ḍ */
- 0x1e0f, 499, /* ḏ Ḏ */
- 0x1e11, 499, /* ḑ Ḑ */
- 0x1e13, 499, /* ḓ Ḓ */
- 0x1e15, 499, /* ḕ Ḕ */
- 0x1e17, 499, /* ḗ Ḗ */
- 0x1e19, 499, /* ḙ Ḙ */
- 0x1e1b, 499, /* ḛ Ḛ */
- 0x1e1d, 499, /* ḝ Ḝ */
- 0x1e1f, 499, /* ḟ Ḟ */
- 0x1e21, 499, /* ḡ Ḡ */
- 0x1e23, 499, /* ḣ Ḣ */
- 0x1e25, 499, /* ḥ Ḥ */
- 0x1e27, 499, /* ḧ Ḧ */
- 0x1e29, 499, /* ḩ Ḩ */
- 0x1e2b, 499, /* ḫ Ḫ */
- 0x1e2d, 499, /* ḭ Ḭ */
- 0x1e2f, 499, /* ḯ Ḯ */
- 0x1e31, 499, /* ḱ Ḱ */
- 0x1e33, 499, /* ḳ Ḳ */
- 0x1e35, 499, /* ḵ Ḵ */
- 0x1e37, 499, /* ḷ Ḷ */
- 0x1e39, 499, /* ḹ Ḹ */
- 0x1e3b, 499, /* ḻ Ḻ */
- 0x1e3d, 499, /* ḽ Ḽ */
- 0x1e3f, 499, /* ḿ Ḿ */
- 0x1e41, 499, /* ṁ Ṁ */
- 0x1e43, 499, /* ṃ Ṃ */
- 0x1e45, 499, /* ṅ Ṅ */
- 0x1e47, 499, /* ṇ Ṇ */
- 0x1e49, 499, /* ṉ Ṉ */
- 0x1e4b, 499, /* ṋ Ṋ */
- 0x1e4d, 499, /* ṍ Ṍ */
- 0x1e4f, 499, /* ṏ Ṏ */
- 0x1e51, 499, /* ṑ Ṑ */
- 0x1e53, 499, /* ṓ Ṓ */
- 0x1e55, 499, /* ṕ Ṕ */
- 0x1e57, 499, /* ṗ Ṗ */
- 0x1e59, 499, /* ṙ Ṙ */
- 0x1e5b, 499, /* ṛ Ṛ */
- 0x1e5d, 499, /* ṝ Ṝ */
- 0x1e5f, 499, /* ṟ Ṟ */
- 0x1e61, 499, /* ṡ Ṡ */
- 0x1e63, 499, /* ṣ Ṣ */
- 0x1e65, 499, /* ṥ Ṥ */
- 0x1e67, 499, /* ṧ Ṧ */
- 0x1e69, 499, /* ṩ Ṩ */
- 0x1e6b, 499, /* ṫ Ṫ */
- 0x1e6d, 499, /* ṭ Ṭ */
- 0x1e6f, 499, /* ṯ Ṯ */
- 0x1e71, 499, /* ṱ Ṱ */
- 0x1e73, 499, /* ṳ Ṳ */
- 0x1e75, 499, /* ṵ Ṵ */
- 0x1e77, 499, /* ṷ Ṷ */
- 0x1e79, 499, /* ṹ Ṹ */
- 0x1e7b, 499, /* ṻ Ṻ */
- 0x1e7d, 499, /* ṽ Ṽ */
- 0x1e7f, 499, /* ṿ Ṿ */
- 0x1e81, 499, /* ẁ Ẁ */
- 0x1e83, 499, /* ẃ Ẃ */
- 0x1e85, 499, /* ẅ Ẅ */
- 0x1e87, 499, /* ẇ Ẇ */
- 0x1e89, 499, /* ẉ Ẉ */
- 0x1e8b, 499, /* ẋ Ẋ */
- 0x1e8d, 499, /* ẍ Ẍ */
- 0x1e8f, 499, /* ẏ Ẏ */
- 0x1e91, 499, /* ẑ Ẑ */
- 0x1e93, 499, /* ẓ Ẓ */
- 0x1e95, 499, /* ẕ Ẕ */
- 0x1ea1, 499, /* ạ Ạ */
- 0x1ea3, 499, /* ả Ả */
- 0x1ea5, 499, /* ấ Ấ */
- 0x1ea7, 499, /* ầ Ầ */
- 0x1ea9, 499, /* ẩ Ẩ */
- 0x1eab, 499, /* ẫ Ẫ */
- 0x1ead, 499, /* ậ Ậ */
- 0x1eaf, 499, /* ắ Ắ */
- 0x1eb1, 499, /* ằ Ằ */
- 0x1eb3, 499, /* ẳ Ẳ */
- 0x1eb5, 499, /* ẵ Ẵ */
- 0x1eb7, 499, /* ặ Ặ */
- 0x1eb9, 499, /* ẹ Ẹ */
- 0x1ebb, 499, /* ẻ Ẻ */
- 0x1ebd, 499, /* ẽ Ẽ */
- 0x1ebf, 499, /* ế Ế */
- 0x1ec1, 499, /* ề Ề */
- 0x1ec3, 499, /* ể Ể */
- 0x1ec5, 499, /* ễ Ễ */
- 0x1ec7, 499, /* ệ Ệ */
- 0x1ec9, 499, /* ỉ Ỉ */
- 0x1ecb, 499, /* ị Ị */
- 0x1ecd, 499, /* ọ Ọ */
- 0x1ecf, 499, /* ỏ Ỏ */
- 0x1ed1, 499, /* ố Ố */
- 0x1ed3, 499, /* ồ Ồ */
- 0x1ed5, 499, /* ổ Ổ */
- 0x1ed7, 499, /* ỗ Ỗ */
- 0x1ed9, 499, /* ộ Ộ */
- 0x1edb, 499, /* ớ Ớ */
- 0x1edd, 499, /* ờ Ờ */
- 0x1edf, 499, /* ở Ở */
- 0x1ee1, 499, /* ỡ Ỡ */
- 0x1ee3, 499, /* ợ Ợ */
- 0x1ee5, 499, /* ụ Ụ */
- 0x1ee7, 499, /* ủ Ủ */
- 0x1ee9, 499, /* ứ Ứ */
- 0x1eeb, 499, /* ừ Ừ */
- 0x1eed, 499, /* ử Ử */
- 0x1eef, 499, /* ữ Ữ */
- 0x1ef1, 499, /* ự Ự */
- 0x1ef3, 499, /* ỳ Ỳ */
- 0x1ef5, 499, /* ỵ Ỵ */
- 0x1ef7, 499, /* ỷ Ỷ */
- 0x1ef9, 499, /* ỹ Ỹ */
- 0x1f51, 508, /* ὑ Ὑ */
- 0x1f53, 508, /* ὓ Ὓ */
- 0x1f55, 508, /* ὕ Ὕ */
- 0x1f57, 508, /* ὗ Ὗ */
- 0x1fb3, 509, /* ᾳ ᾼ */
- 0x1fc3, 509, /* ῃ ῌ */
- 0x1fe5, 507, /* ῥ Ῥ */
- 0x1ff3, 509, /* ῳ ῼ */
-};
-
-/*
- * upper case ranges
- * 3rd col is conversion excess 500
- */
-static
-Rune __tolower2[] =
-{
- 0x0041, 0x005a, 532, /* A-Z a-z */
- 0x00c0, 0x00d6, 532, /* À-Ö à-ö */
- 0x00d8, 0x00de, 532, /* Ø-Þ ø-þ */
- 0x0189, 0x018a, 705, /* Ɖ-Ɗ ɖ-ɗ */
- 0x018e, 0x018f, 702, /* Ǝ-Ə ɘ-ə */
- 0x01b1, 0x01b2, 717, /* Ʊ-Ʋ ʊ-ʋ */
- 0x0388, 0x038a, 537, /* Έ-Ί έ-ί */
- 0x038e, 0x038f, 563, /* Ύ-Ώ ύ-ώ */
- 0x0391, 0x03a1, 532, /* Α-Ρ α-ρ */
- 0x03a3, 0x03ab, 532, /* Σ-Ϋ σ-ϋ */
- 0x0401, 0x040c, 580, /* Ё-Ќ ё-ќ */
- 0x040e, 0x040f, 580, /* Ў-Џ ў-џ */
- 0x0410, 0x042f, 532, /* А-Я а-я */
- 0x0531, 0x0556, 548, /* Ա-Ֆ ա-ֆ */
- 0x10a0, 0x10c5, 548, /* Ⴀ-Ⴥ ა-ჵ */
- 0x1f08, 0x1f0f, 492, /* Ἀ-Ἇ ἀ-ἇ */
- 0x1f18, 0x1f1d, 492, /* Ἐ-Ἕ ἐ-ἕ */
- 0x1f28, 0x1f2f, 492, /* Ἠ-Ἧ ἠ-ἧ */
- 0x1f38, 0x1f3f, 492, /* Ἰ-Ἷ ἰ-ἷ */
- 0x1f48, 0x1f4d, 492, /* Ὀ-Ὅ ὀ-ὅ */
- 0x1f68, 0x1f6f, 492, /* Ὠ-Ὧ ὠ-ὧ */
- 0x1f88, 0x1f8f, 492, /* ᾈ-ᾏ ᾀ-ᾇ */
- 0x1f98, 0x1f9f, 492, /* ᾘ-ᾟ ᾐ-ᾗ */
- 0x1fa8, 0x1faf, 492, /* ᾨ-ᾯ ᾠ-ᾧ */
- 0x1fb8, 0x1fb9, 492, /* Ᾰ-Ᾱ ᾰ-ᾱ */
- 0x1fba, 0x1fbb, 426, /* Ὰ-Ά ὰ-ά */
- 0x1fc8, 0x1fcb, 414, /* Ὲ-Ή ὲ-ή */
- 0x1fd8, 0x1fd9, 492, /* Ῐ-Ῑ ῐ-ῑ */
- 0x1fda, 0x1fdb, 400, /* Ὶ-Ί ὶ-ί */
- 0x1fe8, 0x1fe9, 492, /* Ῠ-Ῡ ῠ-ῡ */
- 0x1fea, 0x1feb, 388, /* Ὺ-Ύ ὺ-ύ */
- 0x1ff8, 0x1ff9, 372, /* Ὸ-Ό ὸ-ό */
- 0x1ffa, 0x1ffb, 374, /* Ὼ-Ώ ὼ-ώ */
- 0x2160, 0x216f, 516, /* Ⅰ-Ⅿ ⅰ-ⅿ */
- 0x24b6, 0x24cf, 526, /* Ⓐ-Ⓩ ⓐ-ⓩ */
- 0xff21, 0xff3a, 532, /* A-Z a-z */
-};
-
-/*
- * upper case singlets
- * 2nd col is conversion excess 500
- */
static
-Rune __tolower1[] =
-{
- 0x0100, 501, /* Ā ā */
- 0x0102, 501, /* Ă ă */
- 0x0104, 501, /* Ą ą */
- 0x0106, 501, /* Ć ć */
- 0x0108, 501, /* Ĉ ĉ */
- 0x010a, 501, /* Ċ ċ */
- 0x010c, 501, /* Č č */
- 0x010e, 501, /* Ď ď */
- 0x0110, 501, /* Đ đ */
- 0x0112, 501, /* Ē ē */
- 0x0114, 501, /* Ĕ ĕ */
- 0x0116, 501, /* Ė ė */
- 0x0118, 501, /* Ę ę */
- 0x011a, 501, /* Ě ě */
- 0x011c, 501, /* Ĝ ĝ */
- 0x011e, 501, /* Ğ ğ */
- 0x0120, 501, /* Ġ ġ */
- 0x0122, 501, /* Ģ ģ */
- 0x0124, 501, /* Ĥ ĥ */
- 0x0126, 501, /* Ħ ħ */
- 0x0128, 501, /* Ĩ ĩ */
- 0x012a, 501, /* Ī ī */
- 0x012c, 501, /* Ĭ ĭ */
- 0x012e, 501, /* Į į */
- 0x0130, 301, /* İ i */
- 0x0132, 501, /* IJ ij */
- 0x0134, 501, /* Ĵ ĵ */
- 0x0136, 501, /* Ķ ķ */
- 0x0139, 501, /* Ĺ ĺ */
- 0x013b, 501, /* Ļ ļ */
- 0x013d, 501, /* Ľ ľ */
- 0x013f, 501, /* Ŀ ŀ */
- 0x0141, 501, /* Ł ł */
- 0x0143, 501, /* Ń ń */
- 0x0145, 501, /* Ņ ņ */
- 0x0147, 501, /* Ň ň */
- 0x014a, 501, /* Ŋ ŋ */
- 0x014c, 501, /* Ō ō */
- 0x014e, 501, /* Ŏ ŏ */
- 0x0150, 501, /* Ő ő */
- 0x0152, 501, /* Œ œ */
- 0x0154, 501, /* Ŕ ŕ */
- 0x0156, 501, /* Ŗ ŗ */
- 0x0158, 501, /* Ř ř */
- 0x015a, 501, /* Ś ś */
- 0x015c, 501, /* Ŝ ŝ */
- 0x015e, 501, /* Ş ş */
- 0x0160, 501, /* Š š */
- 0x0162, 501, /* Ţ ţ */
- 0x0164, 501, /* Ť ť */
- 0x0166, 501, /* Ŧ ŧ */
- 0x0168, 501, /* Ũ ũ */
- 0x016a, 501, /* Ū ū */
- 0x016c, 501, /* Ŭ ŭ */
- 0x016e, 501, /* Ů ů */
- 0x0170, 501, /* Ű ű */
- 0x0172, 501, /* Ų ų */
- 0x0174, 501, /* Ŵ ŵ */
- 0x0176, 501, /* Ŷ ŷ */
- 0x0178, 379, /* Ÿ ÿ */
- 0x0179, 501, /* Ź ź */
- 0x017b, 501, /* Ż ż */
- 0x017d, 501, /* Ž ž */
- 0x0181, 710, /* Ɓ ɓ */
- 0x0182, 501, /* Ƃ ƃ */
- 0x0184, 501, /* Ƅ ƅ */
- 0x0186, 706, /* Ɔ ɔ */
- 0x0187, 501, /* Ƈ ƈ */
- 0x018b, 501, /* Ƌ ƌ */
- 0x0190, 703, /* Ɛ ɛ */
- 0x0191, 501, /* Ƒ ƒ */
- 0x0193, 705, /* Ɠ ɠ */
- 0x0194, 707, /* Ɣ ɣ */
- 0x0196, 711, /* Ɩ ɩ */
- 0x0197, 709, /* Ɨ ɨ */
- 0x0198, 501, /* Ƙ ƙ */
- 0x019c, 711, /* Ɯ ɯ */
- 0x019d, 713, /* Ɲ ɲ */
- 0x01a0, 501, /* Ơ ơ */
- 0x01a2, 501, /* Ƣ ƣ */
- 0x01a4, 501, /* Ƥ ƥ */
- 0x01a7, 501, /* Ƨ ƨ */
- 0x01a9, 718, /* Ʃ ʃ */
- 0x01ac, 501, /* Ƭ ƭ */
- 0x01ae, 718, /* Ʈ ʈ */
- 0x01af, 501, /* Ư ư */
- 0x01b3, 501, /* Ƴ ƴ */
- 0x01b5, 501, /* Ƶ ƶ */
- 0x01b7, 719, /* Ʒ ʒ */
- 0x01b8, 501, /* Ƹ ƹ */
- 0x01bc, 501, /* Ƽ ƽ */
- 0x01c4, 502, /* DŽ dž */
- 0x01c5, 501, /* Dž dž */
- 0x01c7, 502, /* LJ lj */
- 0x01c8, 501, /* Lj lj */
- 0x01ca, 502, /* NJ nj */
- 0x01cb, 501, /* Nj nj */
- 0x01cd, 501, /* Ǎ ǎ */
- 0x01cf, 501, /* Ǐ ǐ */
- 0x01d1, 501, /* Ǒ ǒ */
- 0x01d3, 501, /* Ǔ ǔ */
- 0x01d5, 501, /* Ǖ ǖ */
- 0x01d7, 501, /* Ǘ ǘ */
- 0x01d9, 501, /* Ǚ ǚ */
- 0x01db, 501, /* Ǜ ǜ */
- 0x01de, 501, /* Ǟ ǟ */
- 0x01e0, 501, /* Ǡ ǡ */
- 0x01e2, 501, /* Ǣ ǣ */
- 0x01e4, 501, /* Ǥ ǥ */
- 0x01e6, 501, /* Ǧ ǧ */
- 0x01e8, 501, /* Ǩ ǩ */
- 0x01ea, 501, /* Ǫ ǫ */
- 0x01ec, 501, /* Ǭ ǭ */
- 0x01ee, 501, /* Ǯ ǯ */
- 0x01f1, 502, /* DZ dz */
- 0x01f2, 501, /* Dz dz */
- 0x01f4, 501, /* Ǵ ǵ */
- 0x01fa, 501, /* Ǻ ǻ */
- 0x01fc, 501, /* Ǽ ǽ */
- 0x01fe, 501, /* Ǿ ǿ */
- 0x0200, 501, /* Ȁ ȁ */
- 0x0202, 501, /* Ȃ ȃ */
- 0x0204, 501, /* Ȅ ȅ */
- 0x0206, 501, /* Ȇ ȇ */
- 0x0208, 501, /* Ȉ ȉ */
- 0x020a, 501, /* Ȋ ȋ */
- 0x020c, 501, /* Ȍ ȍ */
- 0x020e, 501, /* Ȏ ȏ */
- 0x0210, 501, /* Ȑ ȑ */
- 0x0212, 501, /* Ȓ ȓ */
- 0x0214, 501, /* Ȕ ȕ */
- 0x0216, 501, /* Ȗ ȗ */
- 0x0386, 538, /* Ά ά */
- 0x038c, 564, /* Ό ό */
- 0x03e2, 501, /* Ϣ ϣ */
- 0x03e4, 501, /* Ϥ ϥ */
- 0x03e6, 501, /* Ϧ ϧ */
- 0x03e8, 501, /* Ϩ ϩ */
- 0x03ea, 501, /* Ϫ ϫ */
- 0x03ec, 501, /* Ϭ ϭ */
- 0x03ee, 501, /* Ϯ ϯ */
- 0x0460, 501, /* Ѡ ѡ */
- 0x0462, 501, /* Ѣ ѣ */
- 0x0464, 501, /* Ѥ ѥ */
- 0x0466, 501, /* Ѧ ѧ */
- 0x0468, 501, /* Ѩ ѩ */
- 0x046a, 501, /* Ѫ ѫ */
- 0x046c, 501, /* Ѭ ѭ */
- 0x046e, 501, /* Ѯ ѯ */
- 0x0470, 501, /* Ѱ ѱ */
- 0x0472, 501, /* Ѳ ѳ */
- 0x0474, 501, /* Ѵ ѵ */
- 0x0476, 501, /* Ѷ ѷ */
- 0x0478, 501, /* Ѹ ѹ */
- 0x047a, 501, /* Ѻ ѻ */
- 0x047c, 501, /* Ѽ ѽ */
- 0x047e, 501, /* Ѿ ѿ */
- 0x0480, 501, /* Ҁ ҁ */
- 0x0490, 501, /* Ґ ґ */
- 0x0492, 501, /* Ғ ғ */
- 0x0494, 501, /* Ҕ ҕ */
- 0x0496, 501, /* Җ җ */
- 0x0498, 501, /* Ҙ ҙ */
- 0x049a, 501, /* Қ қ */
- 0x049c, 501, /* Ҝ ҝ */
- 0x049e, 501, /* Ҟ ҟ */
- 0x04a0, 501, /* Ҡ ҡ */
- 0x04a2, 501, /* Ң ң */
- 0x04a4, 501, /* Ҥ ҥ */
- 0x04a6, 501, /* Ҧ ҧ */
- 0x04a8, 501, /* Ҩ ҩ */
- 0x04aa, 501, /* Ҫ ҫ */
- 0x04ac, 501, /* Ҭ ҭ */
- 0x04ae, 501, /* Ү ү */
- 0x04b0, 501, /* Ұ ұ */
- 0x04b2, 501, /* Ҳ ҳ */
- 0x04b4, 501, /* Ҵ ҵ */
- 0x04b6, 501, /* Ҷ ҷ */
- 0x04b8, 501, /* Ҹ ҹ */
- 0x04ba, 501, /* Һ һ */
- 0x04bc, 501, /* Ҽ ҽ */
- 0x04be, 501, /* Ҿ ҿ */
- 0x04c1, 501, /* Ӂ ӂ */
- 0x04c3, 501, /* Ӄ ӄ */
- 0x04c7, 501, /* Ӈ ӈ */
- 0x04cb, 501, /* Ӌ ӌ */
- 0x04d0, 501, /* Ӑ ӑ */
- 0x04d2, 501, /* Ӓ ӓ */
- 0x04d4, 501, /* Ӕ ӕ */
- 0x04d6, 501, /* Ӗ ӗ */
- 0x04d8, 501, /* Ә ә */
- 0x04da, 501, /* Ӛ ӛ */
- 0x04dc, 501, /* Ӝ ӝ */
- 0x04de, 501, /* Ӟ ӟ */
- 0x04e0, 501, /* Ӡ ӡ */
- 0x04e2, 501, /* Ӣ ӣ */
- 0x04e4, 501, /* Ӥ ӥ */
- 0x04e6, 501, /* Ӧ ӧ */
- 0x04e8, 501, /* Ө ө */
- 0x04ea, 501, /* Ӫ ӫ */
- 0x04ee, 501, /* Ӯ ӯ */
- 0x04f0, 501, /* Ӱ ӱ */
- 0x04f2, 501, /* Ӳ ӳ */
- 0x04f4, 501, /* Ӵ ӵ */
- 0x04f8, 501, /* Ӹ ӹ */
- 0x1e00, 501, /* Ḁ ḁ */
- 0x1e02, 501, /* Ḃ ḃ */
- 0x1e04, 501, /* Ḅ ḅ */
- 0x1e06, 501, /* Ḇ ḇ */
- 0x1e08, 501, /* Ḉ ḉ */
- 0x1e0a, 501, /* Ḋ ḋ */
- 0x1e0c, 501, /* Ḍ ḍ */
- 0x1e0e, 501, /* Ḏ ḏ */
- 0x1e10, 501, /* Ḑ ḑ */
- 0x1e12, 501, /* Ḓ ḓ */
- 0x1e14, 501, /* Ḕ ḕ */
- 0x1e16, 501, /* Ḗ ḗ */
- 0x1e18, 501, /* Ḙ ḙ */
- 0x1e1a, 501, /* Ḛ ḛ */
- 0x1e1c, 501, /* Ḝ ḝ */
- 0x1e1e, 501, /* Ḟ ḟ */
- 0x1e20, 501, /* Ḡ ḡ */
- 0x1e22, 501, /* Ḣ ḣ */
- 0x1e24, 501, /* Ḥ ḥ */
- 0x1e26, 501, /* Ḧ ḧ */
- 0x1e28, 501, /* Ḩ ḩ */
- 0x1e2a, 501, /* Ḫ ḫ */
- 0x1e2c, 501, /* Ḭ ḭ */
- 0x1e2e, 501, /* Ḯ ḯ */
- 0x1e30, 501, /* Ḱ ḱ */
- 0x1e32, 501, /* Ḳ ḳ */
- 0x1e34, 501, /* Ḵ ḵ */
- 0x1e36, 501, /* Ḷ ḷ */
- 0x1e38, 501, /* Ḹ ḹ */
- 0x1e3a, 501, /* Ḻ ḻ */
- 0x1e3c, 501, /* Ḽ ḽ */
- 0x1e3e, 501, /* Ḿ ḿ */
- 0x1e40, 501, /* Ṁ ṁ */
- 0x1e42, 501, /* Ṃ ṃ */
- 0x1e44, 501, /* Ṅ ṅ */
- 0x1e46, 501, /* Ṇ ṇ */
- 0x1e48, 501, /* Ṉ ṉ */
- 0x1e4a, 501, /* Ṋ ṋ */
- 0x1e4c, 501, /* Ṍ ṍ */
- 0x1e4e, 501, /* Ṏ ṏ */
- 0x1e50, 501, /* Ṑ ṑ */
- 0x1e52, 501, /* Ṓ ṓ */
- 0x1e54, 501, /* Ṕ ṕ */
- 0x1e56, 501, /* Ṗ ṗ */
- 0x1e58, 501, /* Ṙ ṙ */
- 0x1e5a, 501, /* Ṛ ṛ */
- 0x1e5c, 501, /* Ṝ ṝ */
- 0x1e5e, 501, /* Ṟ ṟ */
- 0x1e60, 501, /* Ṡ ṡ */
- 0x1e62, 501, /* Ṣ ṣ */
- 0x1e64, 501, /* Ṥ ṥ */
- 0x1e66, 501, /* Ṧ ṧ */
- 0x1e68, 501, /* Ṩ ṩ */
- 0x1e6a, 501, /* Ṫ ṫ */
- 0x1e6c, 501, /* Ṭ ṭ */
- 0x1e6e, 501, /* Ṯ ṯ */
- 0x1e70, 501, /* Ṱ ṱ */
- 0x1e72, 501, /* Ṳ ṳ */
- 0x1e74, 501, /* Ṵ ṵ */
- 0x1e76, 501, /* Ṷ ṷ */
- 0x1e78, 501, /* Ṹ ṹ */
- 0x1e7a, 501, /* Ṻ ṻ */
- 0x1e7c, 501, /* Ṽ ṽ */
- 0x1e7e, 501, /* Ṿ ṿ */
- 0x1e80, 501, /* Ẁ ẁ */
- 0x1e82, 501, /* Ẃ ẃ */
- 0x1e84, 501, /* Ẅ ẅ */
- 0x1e86, 501, /* Ẇ ẇ */
- 0x1e88, 501, /* Ẉ ẉ */
- 0x1e8a, 501, /* Ẋ ẋ */
- 0x1e8c, 501, /* Ẍ ẍ */
- 0x1e8e, 501, /* Ẏ ẏ */
- 0x1e90, 501, /* Ẑ ẑ */
- 0x1e92, 501, /* Ẓ ẓ */
- 0x1e94, 501, /* Ẕ ẕ */
- 0x1ea0, 501, /* Ạ ạ */
- 0x1ea2, 501, /* Ả ả */
- 0x1ea4, 501, /* Ấ ấ */
- 0x1ea6, 501, /* Ầ ầ */
- 0x1ea8, 501, /* Ẩ ẩ */
- 0x1eaa, 501, /* Ẫ ẫ */
- 0x1eac, 501, /* Ậ ậ */
- 0x1eae, 501, /* Ắ ắ */
- 0x1eb0, 501, /* Ằ ằ */
- 0x1eb2, 501, /* Ẳ ẳ */
- 0x1eb4, 501, /* Ẵ ẵ */
- 0x1eb6, 501, /* Ặ ặ */
- 0x1eb8, 501, /* Ẹ ẹ */
- 0x1eba, 501, /* Ẻ ẻ */
- 0x1ebc, 501, /* Ẽ ẽ */
- 0x1ebe, 501, /* Ế ế */
- 0x1ec0, 501, /* Ề ề */
- 0x1ec2, 501, /* Ể ể */
- 0x1ec4, 501, /* Ễ ễ */
- 0x1ec6, 501, /* Ệ ệ */
- 0x1ec8, 501, /* Ỉ ỉ */
- 0x1eca, 501, /* Ị ị */
- 0x1ecc, 501, /* Ọ ọ */
- 0x1ece, 501, /* Ỏ ỏ */
- 0x1ed0, 501, /* Ố ố */
- 0x1ed2, 501, /* Ồ ồ */
- 0x1ed4, 501, /* Ổ ổ */
- 0x1ed6, 501, /* Ỗ ỗ */
- 0x1ed8, 501, /* Ộ ộ */
- 0x1eda, 501, /* Ớ ớ */
- 0x1edc, 501, /* Ờ ờ */
- 0x1ede, 501, /* Ở ở */
- 0x1ee0, 501, /* Ỡ ỡ */
- 0x1ee2, 501, /* Ợ ợ */
- 0x1ee4, 501, /* Ụ ụ */
- 0x1ee6, 501, /* Ủ ủ */
- 0x1ee8, 501, /* Ứ ứ */
- 0x1eea, 501, /* Ừ ừ */
- 0x1eec, 501, /* Ử ử */
- 0x1eee, 501, /* Ữ ữ */
- 0x1ef0, 501, /* Ự ự */
- 0x1ef2, 501, /* Ỳ ỳ */
- 0x1ef4, 501, /* Ỵ ỵ */
- 0x1ef6, 501, /* Ỷ ỷ */
- 0x1ef8, 501, /* Ỹ ỹ */
- 0x1f59, 492, /* Ὑ ὑ */
- 0x1f5b, 492, /* Ὓ ὓ */
- 0x1f5d, 492, /* Ὕ ὕ */
- 0x1f5f, 492, /* Ὗ ὗ */
- 0x1fbc, 491, /* ᾼ ᾳ */
- 0x1fcc, 491, /* ῌ ῃ */
- 0x1fec, 493, /* Ῥ ῥ */
- 0x1ffc, 491, /* ῼ ῳ */
-};
-
-/*
- * title characters are those between
- * upper and lower case. ie DZ Dz dz
- */
-static
-Rune __totitle1[] =
-{
- 0x01c4, 501, /* DŽ Dž */
- 0x01c6, 499, /* dž Dž */
- 0x01c7, 501, /* LJ Lj */
- 0x01c9, 499, /* lj Lj */
- 0x01ca, 501, /* NJ Nj */
- 0x01cc, 499, /* nj Nj */
- 0x01f1, 501, /* DZ Dz */
- 0x01f3, 499, /* dz Dz */
-};
-
-static Rune*
-bsearch(Rune c, Rune *t, int n, int ne)
+Rune*
+rbsearch(Rune c, Rune *t, int n, int ne)
{
Rune *p;
int m;
while(n > 1) {
- m = n/2;
+ m = n >> 1;
p = t + m*ne;
if(c >= p[0]) {
t = p;
@@ -1050,102 +35,36 @@ bsearch(Rune c, Rune *t, int n, int ne)
return 0;
}
-Rune
-tolowerrune(Rune c)
-{
- Rune *p;
-
- p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3);
- if(p && c >= p[0] && c <= p[1])
- return c + p[2] - 500;
- p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2);
- if(p && c == p[0])
- return c + p[1] - 500;
- return c;
-}
-
-Rune
-toupperrune(Rune c)
-{
- Rune *p;
-
- p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3);
- if(p && c >= p[0] && c <= p[1])
- return c + p[2] - 500;
- p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2);
- if(p && c == p[0])
- return c + p[1] - 500;
- return c;
-}
-
-Rune
-totitlerune(Rune c)
-{
- Rune *p;
-
- p = bsearch(c, __totitle1, nelem(__totitle1)/2, 2);
- if(p && c == p[0])
- return c + p[1] - 500;
- return c;
-}
-
-int
-islowerrune(Rune c)
-{
- Rune *p;
-
- p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3);
- if(p && c >= p[0] && c <= p[1])
- return 1;
- p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2);
- if(p && c == p[0])
- return 1;
- return 0;
-}
-
-int
-isupperrune(Rune c)
-{
- Rune *p;
-
- p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3);
- if(p && c >= p[0] && c <= p[1])
- return 1;
- p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2);
- if(p && c == p[0])
- return 1;
- return 0;
-}
+/*
+ * The "ideographic" property is hard to extract from UnicodeData.txt,
+ * so it is hard coded here.
+ *
+ * It is defined in the Unicode PropList.txt file, for example
+ * PropList-3.0.0.txt. Unlike the UnicodeData.txt file, the format of
+ * PropList changes between versions. This property appears relatively static;
+ * it is the same in version 4.0.1, except that version defines some >16 bit
+ * chars as ideographic as well: 20000..2a6d6, and 2f800..2Fa1d.
+ */
+static Rune __isideographicr[] = {
+ 0x3006, 0x3007, /* 3006 not in Unicode 2, in 2.1 */
+ 0x3021, 0x3029,
+ 0x3038, 0x303a, /* not in Unicode 2 or 2.1 */
+ 0x3400, 0x4db5, /* not in Unicode 2 or 2.1 */
+ 0x4e00, 0x9fbb, /* 0x9FA6..0x9FBB added for 4.1.0? */
+ 0xf900, 0xfa2d,
+ 0x20000, 0x2A6D6,
+ 0x2F800, 0x2FA1D,
+};
int
-isalpharune(Rune c)
+isideographicrune(Rune c)
{
Rune *p;
- if(isupperrune(c) || islowerrune(c))
- return 1;
- p = bsearch(c, __alpha2, nelem(__alpha2)/2, 2);
+ p = rbsearch(c, __isideographicr, nelem(__isideographicr)/2, 2);
if(p && c >= p[0] && c <= p[1])
return 1;
- p = bsearch(c, __alpha1, nelem(__alpha1), 1);
- if(p && c == p[0])
- return 1;
return 0;
}
-int
-istitlerune(Rune c)
-{
- return isupperrune(c) && islowerrune(c);
-}
-
-int
-isspacerune(Rune c)
-{
- Rune *p;
-
- p = bsearch(c, __space2, nelem(__space2)/2, 2);
- if(p && c >= p[0] && c <= p[1])
- return 1;
- return 0;
-}
+#include "runetypebody-5.0.0.h"
diff --git a/runetypebody-5.0.0.h b/runetypebody-5.0.0.h
new file mode 100644
index 0000000..67a645d
--- /dev/null
+++ b/runetypebody-5.0.0.h
@@ -0,0 +1,1361 @@
+/* generated automatically by mkrunetype.c from UnicodeData-5.0.0.txt */
+
+static Rune __isspacer[] = {
+ 0x0009, 0x000d,
+ 0x0020, 0x0020,
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000,
+ 0xfeff, 0xfeff,
+};
+
+int
+isspacerune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isspacer, nelem(__isspacer)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ return 0;
+}
+
+static Rune __isdigitr[] = {
+ 0x0030, 0x0039,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x07c0, 0x07c9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be6, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19d9,
+ 0x1b50, 0x1b59,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff,
+};
+
+int
+isdigitrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isdigitr, nelem(__isdigitr)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ return 0;
+}
+
+static Rune __isalphar[] = {
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x037a, 0x037d,
+ 0x0388, 0x038a,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x048a, 0x0513,
+ 0x0531, 0x0556,
+ 0x0561, 0x0587,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0621, 0x063a,
+ 0x0640, 0x064a,
+ 0x066e, 0x066f,
+ 0x0671, 0x06d3,
+ 0x06e5, 0x06e6,
+ 0x06ee, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x0712, 0x072f,
+ 0x074d, 0x076d,
+ 0x0780, 0x07a5,
+ 0x07ca, 0x07ea,
+ 0x07f4, 0x07f5,
+ 0x0904, 0x0939,
+ 0x0958, 0x0961,
+ 0x097b, 0x097f,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b6, 0x09b9,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e1,
+ 0x09f0, 0x09f1,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a59, 0x0a5c,
+ 0x0a72, 0x0a74,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0ae0, 0x0ae1,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c60, 0x0c61,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0ce0, 0x0ce1,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d60, 0x0d61,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dc0, 0x0dc6,
+ 0x0e01, 0x0e30,
+ 0x0e32, 0x0e33,
+ 0x0e40, 0x0e46,
+ 0x0e81, 0x0e82,
+ 0x0e87, 0x0e88,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb0,
+ 0x0eb2, 0x0eb3,
+ 0x0ec0, 0x0ec4,
+ 0x0edc, 0x0edd,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f88, 0x0f8b,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x1050, 0x1055,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a8,
+ 0x1900, 0x191c,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19a9,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x1b05, 0x1b33,
+ 0x1b45, 0x1b4b,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2090, 0x2094,
+ 0x210a, 0x2113,
+ 0x2119, 0x211d,
+ 0x212a, 0x212d,
+ 0x212f, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x2183, 0x2184,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2c6c,
+ 0x2c74, 0x2c77,
+ 0x2c80, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3006,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fbb,
+ 0xa000, 0xa48c,
+ 0xa717, 0xa71a,
+ 0xa800, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xa840, 0xa873,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10340,
+ 0x10342, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x10900, 0x10915,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x12000, 0x1236e,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+};
+
+static Rune __isalphas[] = {
+ 0x00aa,
+ 0x00b5,
+ 0x00ba,
+ 0x02ee,
+ 0x0386,
+ 0x038c,
+ 0x0559,
+ 0x06d5,
+ 0x06ff,
+ 0x0710,
+ 0x07b1,
+ 0x07fa,
+ 0x093d,
+ 0x0950,
+ 0x09b2,
+ 0x09bd,
+ 0x09ce,
+ 0x0a5e,
+ 0x0abd,
+ 0x0ad0,
+ 0x0b3d,
+ 0x0b71,
+ 0x0b83,
+ 0x0b9c,
+ 0x0cbd,
+ 0x0cde,
+ 0x0dbd,
+ 0x0e84,
+ 0x0e8a,
+ 0x0e8d,
+ 0x0ea5,
+ 0x0ea7,
+ 0x0ebd,
+ 0x0ec6,
+ 0x0f00,
+ 0x10fc,
+ 0x1258,
+ 0x12c0,
+ 0x17d7,
+ 0x17dc,
+ 0x1f59,
+ 0x1f5b,
+ 0x1f5d,
+ 0x1fbe,
+ 0x2071,
+ 0x207f,
+ 0x2102,
+ 0x2107,
+ 0x2115,
+ 0x2124,
+ 0x2126,
+ 0x2128,
+ 0x214e,
+ 0x2d6f,
+ 0xfb1d,
+ 0xfb3e,
+ 0x10808,
+ 0x1083c,
+ 0x1083f,
+ 0x10a00,
+ 0x1d4a2,
+ 0x1d4bb,
+ 0x1d546,
+};
+
+int
+isalpharune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isalphar, nelem(__isalphar)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __isalphas, nelem(__isalphas), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __isupperr[] = {
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0178, 0x0179,
+ 0x0181, 0x0182,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a6, 0x01a7,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b7, 0x01b8,
+ 0x01f6, 0x01f8,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0243, 0x0246,
+ 0x0388, 0x038a,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x04c0, 0x04c1,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f68, 0x1f6f,
+ 0x1f88, 0x1f8f,
+ 0x1f98, 0x1f9f,
+ 0x1fa8, 0x1faf,
+ 0x1fb8, 0x1fbc,
+ 0x1fc8, 0x1fcc,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffc,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2119, 0x211d,
+ 0x212a, 0x212d,
+ 0x2130, 0x2133,
+ 0x213e, 0x213f,
+ 0x2160, 0x216f,
+ 0x24b6, 0x24cf,
+ 0x2c00, 0x2c2e,
+ 0x2c62, 0x2c64,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8,
+};
+
+static Rune __isupperp[] = {
+ 0x0100, 0x0136,
+ 0x0139, 0x0147,
+ 0x014a, 0x0176,
+ 0x017b, 0x017d,
+ 0x01a2, 0x01a4,
+ 0x01cd, 0x01db,
+ 0x01de, 0x01ee,
+ 0x01fa, 0x0232,
+ 0x0248, 0x024e,
+ 0x03d8, 0x03ee,
+ 0x0460, 0x0480,
+ 0x048a, 0x04be,
+ 0x04c3, 0x04cd,
+ 0x04d0, 0x0512,
+ 0x1e00, 0x1e94,
+ 0x1ea0, 0x1ef8,
+ 0x1f59, 0x1f5f,
+ 0x2124, 0x2128,
+ 0x2c67, 0x2c6b,
+ 0x2c80, 0x2ce2,
+};
+
+static Rune __isuppers[] = {
+ 0x0184,
+ 0x01a9,
+ 0x01ac,
+ 0x01b5,
+ 0x01bc,
+ 0x01c4,
+ 0x01c7,
+ 0x01ca,
+ 0x01f1,
+ 0x01f4,
+ 0x0241,
+ 0x0386,
+ 0x038c,
+ 0x03f4,
+ 0x03f7,
+ 0x2102,
+ 0x2107,
+ 0x2115,
+ 0x2145,
+ 0x2183,
+ 0x2c60,
+ 0x2c75,
+ 0x1d49c,
+ 0x1d4a2,
+ 0x1d546,
+ 0x1d7ca,
+};
+
+int
+isupperrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __isupperr, nelem(__isupperr)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __isupperp, nelem(__isupperp)/2, 2);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return 1;
+ p = rbsearch(c, __isuppers, nelem(__isuppers), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __islowerr[] = {
+ 0x0061, 0x007a,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff,
+ 0x0137, 0x0138,
+ 0x0148, 0x0149,
+ 0x017e, 0x0180,
+ 0x018c, 0x018d,
+ 0x0199, 0x019b,
+ 0x01aa, 0x01ab,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01dc, 0x01dd,
+ 0x01ef, 0x01f0,
+ 0x0233, 0x0239,
+ 0x023f, 0x0240,
+ 0x024f, 0x0293,
+ 0x0295, 0x02af,
+ 0x037b, 0x037d,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03ef, 0x03f3,
+ 0x03fb, 0x03fc,
+ 0x0430, 0x045f,
+ 0x04ce, 0x04cf,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d77,
+ 0x1d79, 0x1d9a,
+ 0x1e95, 0x1e9b,
+ 0x1f00, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x210e, 0x210f,
+ 0x213c, 0x213d,
+ 0x2146, 0x2149,
+ 0x2170, 0x217f,
+ 0x24d0, 0x24e9,
+ 0x2c30, 0x2c5e,
+ 0x2c65, 0x2c66,
+ 0x2c76, 0x2c77,
+ 0x2ce3, 0x2ce4,
+ 0x2d00, 0x2d25,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a5,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+};
+
+static Rune __islowerp[] = {
+ 0x0101, 0x0135,
+ 0x013a, 0x0146,
+ 0x014b, 0x0177,
+ 0x017a, 0x017c,
+ 0x0183, 0x0185,
+ 0x01a1, 0x01a5,
+ 0x01b4, 0x01b6,
+ 0x01cc, 0x01da,
+ 0x01df, 0x01ed,
+ 0x01f3, 0x01f5,
+ 0x01f9, 0x0231,
+ 0x0247, 0x024d,
+ 0x03d9, 0x03ed,
+ 0x0461, 0x0481,
+ 0x048b, 0x04bf,
+ 0x04c2, 0x04cc,
+ 0x04d1, 0x0513,
+ 0x1e01, 0x1e93,
+ 0x1ea1, 0x1ef9,
+ 0x2c68, 0x2c6c,
+ 0x2c81, 0x2ce1,
+};
+
+static Rune __islowers[] = {
+ 0x00aa,
+ 0x00b5,
+ 0x00ba,
+ 0x0188,
+ 0x0192,
+ 0x0195,
+ 0x019e,
+ 0x01a8,
+ 0x01ad,
+ 0x01b0,
+ 0x01c6,
+ 0x01c9,
+ 0x023c,
+ 0x0242,
+ 0x0390,
+ 0x03f5,
+ 0x03f8,
+ 0x1fbe,
+ 0x2071,
+ 0x207f,
+ 0x210a,
+ 0x2113,
+ 0x212f,
+ 0x2134,
+ 0x2139,
+ 0x214e,
+ 0x2184,
+ 0x2c61,
+ 0x2c74,
+ 0x1d4bb,
+ 0x1d7cb,
+};
+
+int
+islowerrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __islowerr, nelem(__islowerr)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __islowerp, nelem(__islowerp)/2, 2);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return 1;
+ p = rbsearch(c, __islowers, nelem(__islowers), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __istitler[] = {
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0178, 0x0179,
+ 0x0181, 0x0182,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a6, 0x01a7,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b7, 0x01b8,
+ 0x01f6, 0x01f8,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0243, 0x0246,
+ 0x0388, 0x038a,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x04c0, 0x04c1,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f68, 0x1f6f,
+ 0x1f88, 0x1f8f,
+ 0x1f98, 0x1f9f,
+ 0x1fa8, 0x1faf,
+ 0x1fb8, 0x1fbc,
+ 0x1fc8, 0x1fcc,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffc,
+ 0x2160, 0x216f,
+ 0x24b6, 0x24cf,
+ 0x2c00, 0x2c2e,
+ 0x2c62, 0x2c64,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+};
+
+static Rune __istitlep[] = {
+ 0x0100, 0x012e,
+ 0x0132, 0x0136,
+ 0x0139, 0x0147,
+ 0x014a, 0x0176,
+ 0x017b, 0x017d,
+ 0x01a2, 0x01a4,
+ 0x01cb, 0x01db,
+ 0x01de, 0x01ee,
+ 0x01f2, 0x01f4,
+ 0x01fa, 0x0232,
+ 0x0248, 0x024e,
+ 0x03d8, 0x03ee,
+ 0x0460, 0x0480,
+ 0x048a, 0x04be,
+ 0x04c3, 0x04cd,
+ 0x04d0, 0x0512,
+ 0x1e00, 0x1e94,
+ 0x1ea0, 0x1ef8,
+ 0x1f59, 0x1f5f,
+ 0x2c67, 0x2c6b,
+ 0x2c80, 0x2ce2,
+};
+
+static Rune __istitles[] = {
+ 0x0184,
+ 0x01a9,
+ 0x01ac,
+ 0x01b5,
+ 0x01bc,
+ 0x01c5,
+ 0x01c8,
+ 0x0241,
+ 0x0386,
+ 0x038c,
+ 0x03f7,
+ 0x2132,
+ 0x2183,
+ 0x2c60,
+ 0x2c75,
+};
+
+int
+istitlerune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __istitler, nelem(__istitler)/2, 2);
+ if(p && c >= p[0] && c <= p[1])
+ return 1;
+ p = rbsearch(c, __istitlep, nelem(__istitlep)/2, 2);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return 1;
+ p = rbsearch(c, __istitles, nelem(__istitles), 1);
+ if(p && c == p[0])
+ return 1;
+ return 0;
+}
+
+static Rune __toupperr[] = {
+ 0x0061, 0x007a, 1048544,
+ 0x00e0, 0x00f6, 1048544,
+ 0x00f8, 0x00fe, 1048544,
+ 0x0256, 0x0257, 1048371,
+ 0x028a, 0x028b, 1048359,
+ 0x037b, 0x037d, 1048706,
+ 0x03ad, 0x03af, 1048539,
+ 0x03b1, 0x03c1, 1048544,
+ 0x03c3, 0x03cb, 1048544,
+ 0x03cd, 0x03ce, 1048513,
+ 0x0430, 0x044f, 1048544,
+ 0x0450, 0x045f, 1048496,
+ 0x0561, 0x0586, 1048528,
+ 0x1f00, 0x1f07, 1048584,
+ 0x1f10, 0x1f15, 1048584,
+ 0x1f20, 0x1f27, 1048584,
+ 0x1f30, 0x1f37, 1048584,
+ 0x1f40, 0x1f45, 1048584,
+ 0x1f60, 0x1f67, 1048584,
+ 0x1f70, 0x1f71, 1048650,
+ 0x1f72, 0x1f75, 1048662,
+ 0x1f76, 0x1f77, 1048676,
+ 0x1f78, 0x1f79, 1048704,
+ 0x1f7a, 0x1f7b, 1048688,
+ 0x1f7c, 0x1f7d, 1048702,
+ 0x1f80, 0x1f87, 1048584,
+ 0x1f90, 0x1f97, 1048584,
+ 0x1fa0, 0x1fa7, 1048584,
+ 0x1fb0, 0x1fb1, 1048584,
+ 0x1fd0, 0x1fd1, 1048584,
+ 0x1fe0, 0x1fe1, 1048584,
+ 0x2170, 0x217f, 1048560,
+ 0x24d0, 0x24e9, 1048550,
+ 0x2c30, 0x2c5e, 1048528,
+ 0x2d00, 0x2d25, 1041312,
+ 0xff41, 0xff5a, 1048544,
+ 0x10428, 0x1044f, 1048536,
+};
+
+static Rune __toupperp[] = {
+ 0x0101, 0x012f, 1048575,
+ 0x0133, 0x0137, 1048575,
+ 0x013a, 0x0148, 1048575,
+ 0x014b, 0x0177, 1048575,
+ 0x017a, 0x017e, 1048575,
+ 0x0183, 0x0185, 1048575,
+ 0x01a1, 0x01a5, 1048575,
+ 0x01b4, 0x01b6, 1048575,
+ 0x01ce, 0x01dc, 1048575,
+ 0x01df, 0x01ef, 1048575,
+ 0x01f9, 0x021f, 1048575,
+ 0x0223, 0x0233, 1048575,
+ 0x0247, 0x024f, 1048575,
+ 0x03d9, 0x03ef, 1048575,
+ 0x0461, 0x0481, 1048575,
+ 0x048b, 0x04bf, 1048575,
+ 0x04c2, 0x04ce, 1048575,
+ 0x04d1, 0x0513, 1048575,
+ 0x1e01, 0x1e95, 1048575,
+ 0x1ea1, 0x1ef9, 1048575,
+ 0x1f51, 0x1f57, 1048584,
+ 0x2c68, 0x2c6c, 1048575,
+ 0x2c81, 0x2ce3, 1048575,
+};
+
+static Rune __touppers[] = {
+ 0x00b5, 1049319,
+ 0x00ff, 1048697,
+ 0x0131, 1048344,
+ 0x017f, 1048276,
+ 0x0180, 1048771,
+ 0x0188, 1048575,
+ 0x018c, 1048575,
+ 0x0192, 1048575,
+ 0x0195, 1048673,
+ 0x0199, 1048575,
+ 0x019a, 1048739,
+ 0x019e, 1048706,
+ 0x01a8, 1048575,
+ 0x01ad, 1048575,
+ 0x01b0, 1048575,
+ 0x01b9, 1048575,
+ 0x01bd, 1048575,
+ 0x01bf, 1048632,
+ 0x01c5, 1048575,
+ 0x01c6, 1048574,
+ 0x01c8, 1048575,
+ 0x01c9, 1048574,
+ 0x01cb, 1048575,
+ 0x01cc, 1048574,
+ 0x01dd, 1048497,
+ 0x01f2, 1048575,
+ 0x01f3, 1048574,
+ 0x01f5, 1048575,
+ 0x023c, 1048575,
+ 0x0242, 1048575,
+ 0x0253, 1048366,
+ 0x0254, 1048370,
+ 0x0259, 1048374,
+ 0x025b, 1048373,
+ 0x0260, 1048371,
+ 0x0263, 1048369,
+ 0x0268, 1048367,
+ 0x0269, 1048365,
+ 0x026b, 1059319,
+ 0x026f, 1048365,
+ 0x0272, 1048363,
+ 0x0275, 1048362,
+ 0x027d, 1059303,
+ 0x0280, 1048358,
+ 0x0283, 1048358,
+ 0x0288, 1048358,
+ 0x0289, 1048507,
+ 0x028c, 1048505,
+ 0x0292, 1048357,
+ 0x0345, 1048660,
+ 0x03ac, 1048538,
+ 0x03c2, 1048545,
+ 0x03cc, 1048512,
+ 0x03d0, 1048514,
+ 0x03d1, 1048519,
+ 0x03d5, 1048529,
+ 0x03d6, 1048522,
+ 0x03f0, 1048490,
+ 0x03f1, 1048496,
+ 0x03f2, 1048583,
+ 0x03f5, 1048480,
+ 0x03f8, 1048575,
+ 0x03fb, 1048575,
+ 0x04cf, 1048561,
+ 0x1d7d, 1052390,
+ 0x1e9b, 1048517,
+ 0x1fb3, 1048585,
+ 0x1fbe, 1041371,
+ 0x1fc3, 1048585,
+ 0x1fe5, 1048583,
+ 0x1ff3, 1048585,
+ 0x214e, 1048548,
+ 0x2184, 1048575,
+ 0x2c61, 1048575,
+ 0x2c65, 1037781,
+ 0x2c66, 1037784,
+ 0x2c76, 1048575,
+};
+
+Rune
+toupperrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __toupperr, nelem(__toupperr)/3, 3);
+ if(p && c >= p[0] && c <= p[1])
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __toupperp, nelem(__toupperp)/3, 3);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __touppers, nelem(__touppers)/2, 2);
+ if(p && c == p[0])
+ return c + p[1] - 1048576;
+ return c;
+}
+
+static Rune __tolowerr[] = {
+ 0x0041, 0x005a, 1048608,
+ 0x00c0, 0x00d6, 1048608,
+ 0x00d8, 0x00de, 1048608,
+ 0x0189, 0x018a, 1048781,
+ 0x01b1, 0x01b2, 1048793,
+ 0x0388, 0x038a, 1048613,
+ 0x038e, 0x038f, 1048639,
+ 0x0391, 0x03a1, 1048608,
+ 0x03a3, 0x03ab, 1048608,
+ 0x03fd, 0x03ff, 1048446,
+ 0x0400, 0x040f, 1048656,
+ 0x0410, 0x042f, 1048608,
+ 0x0531, 0x0556, 1048624,
+ 0x10a0, 0x10c5, 1055840,
+ 0x1f08, 0x1f0f, 1048568,
+ 0x1f18, 0x1f1d, 1048568,
+ 0x1f28, 0x1f2f, 1048568,
+ 0x1f38, 0x1f3f, 1048568,
+ 0x1f48, 0x1f4d, 1048568,
+ 0x1f68, 0x1f6f, 1048568,
+ 0x1f88, 0x1f8f, 1048568,
+ 0x1f98, 0x1f9f, 1048568,
+ 0x1fa8, 0x1faf, 1048568,
+ 0x1fb8, 0x1fb9, 1048568,
+ 0x1fba, 0x1fbb, 1048502,
+ 0x1fc8, 0x1fcb, 1048490,
+ 0x1fd8, 0x1fd9, 1048568,
+ 0x1fda, 0x1fdb, 1048476,
+ 0x1fe8, 0x1fe9, 1048568,
+ 0x1fea, 0x1feb, 1048464,
+ 0x1ff8, 0x1ff9, 1048448,
+ 0x1ffa, 0x1ffb, 1048450,
+ 0x2160, 0x216f, 1048592,
+ 0x24b6, 0x24cf, 1048602,
+ 0x2c00, 0x2c2e, 1048624,
+ 0xff21, 0xff3a, 1048608,
+ 0x10400, 0x10427, 1048616,
+};
+
+static Rune __tolowerp[] = {
+ 0x0100, 0x012e, 1048577,
+ 0x0132, 0x0136, 1048577,
+ 0x0139, 0x0147, 1048577,
+ 0x014a, 0x0176, 1048577,
+ 0x017b, 0x017d, 1048577,
+ 0x01a2, 0x01a4, 1048577,
+ 0x01b3, 0x01b5, 1048577,
+ 0x01cd, 0x01db, 1048577,
+ 0x01de, 0x01ee, 1048577,
+ 0x01f8, 0x021e, 1048577,
+ 0x0222, 0x0232, 1048577,
+ 0x0248, 0x024e, 1048577,
+ 0x03d8, 0x03ee, 1048577,
+ 0x0460, 0x0480, 1048577,
+ 0x048a, 0x04be, 1048577,
+ 0x04c3, 0x04cd, 1048577,
+ 0x04d0, 0x0512, 1048577,
+ 0x1e00, 0x1e94, 1048577,
+ 0x1ea0, 0x1ef8, 1048577,
+ 0x1f59, 0x1f5f, 1048568,
+ 0x2c67, 0x2c6b, 1048577,
+ 0x2c80, 0x2ce2, 1048577,
+};
+
+static Rune __tolowers[] = {
+ 0x0130, 1048377,
+ 0x0178, 1048455,
+ 0x0179, 1048577,
+ 0x0181, 1048786,
+ 0x0182, 1048577,
+ 0x0184, 1048577,
+ 0x0186, 1048782,
+ 0x0187, 1048577,
+ 0x018b, 1048577,
+ 0x018e, 1048655,
+ 0x018f, 1048778,
+ 0x0190, 1048779,
+ 0x0191, 1048577,
+ 0x0193, 1048781,
+ 0x0194, 1048783,
+ 0x0196, 1048787,
+ 0x0197, 1048785,
+ 0x0198, 1048577,
+ 0x019c, 1048787,
+ 0x019d, 1048789,
+ 0x019f, 1048790,
+ 0x01a0, 1048577,
+ 0x01a6, 1048794,
+ 0x01a7, 1048577,
+ 0x01a9, 1048794,
+ 0x01ac, 1048577,
+ 0x01ae, 1048794,
+ 0x01af, 1048577,
+ 0x01b7, 1048795,
+ 0x01b8, 1048577,
+ 0x01bc, 1048577,
+ 0x01c4, 1048578,
+ 0x01c5, 1048577,
+ 0x01c7, 1048578,
+ 0x01c8, 1048577,
+ 0x01ca, 1048578,
+ 0x01cb, 1048577,
+ 0x01f1, 1048578,
+ 0x01f2, 1048577,
+ 0x01f4, 1048577,
+ 0x01f6, 1048479,
+ 0x01f7, 1048520,
+ 0x0220, 1048446,
+ 0x023a, 1059371,
+ 0x023b, 1048577,
+ 0x023d, 1048413,
+ 0x023e, 1059368,
+ 0x0241, 1048577,
+ 0x0243, 1048381,
+ 0x0244, 1048645,
+ 0x0245, 1048647,
+ 0x0246, 1048577,
+ 0x0386, 1048614,
+ 0x038c, 1048640,
+ 0x03f4, 1048516,
+ 0x03f7, 1048577,
+ 0x03f9, 1048569,
+ 0x03fa, 1048577,
+ 0x04c0, 1048591,
+ 0x04c1, 1048577,
+ 0x1fbc, 1048567,
+ 0x1fcc, 1048567,
+ 0x1fec, 1048569,
+ 0x1ffc, 1048567,
+ 0x2126, 1041059,
+ 0x212a, 1040193,
+ 0x212b, 1040314,
+ 0x2132, 1048604,
+ 0x2183, 1048577,
+ 0x2c60, 1048577,
+ 0x2c62, 1037833,
+ 0x2c63, 1044762,
+ 0x2c64, 1037849,
+ 0x2c75, 1048577,
+};
+
+Rune
+tolowerrune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __tolowerr, nelem(__tolowerr)/3, 3);
+ if(p && c >= p[0] && c <= p[1])
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __tolowerp, nelem(__tolowerp)/3, 3);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __tolowers, nelem(__tolowers)/2, 2);
+ if(p && c == p[0])
+ return c + p[1] - 1048576;
+ return c;
+}
+
+static Rune __totitler[] = {
+ 0x0061, 0x007a, 1048544,
+ 0x00e0, 0x00f6, 1048544,
+ 0x00f8, 0x00fe, 1048544,
+ 0x0256, 0x0257, 1048371,
+ 0x028a, 0x028b, 1048359,
+ 0x037b, 0x037d, 1048706,
+ 0x03ad, 0x03af, 1048539,
+ 0x03b1, 0x03c1, 1048544,
+ 0x03c3, 0x03cb, 1048544,
+ 0x03cd, 0x03ce, 1048513,
+ 0x0430, 0x044f, 1048544,
+ 0x0450, 0x045f, 1048496,
+ 0x0561, 0x0586, 1048528,
+ 0x1f00, 0x1f07, 1048584,
+ 0x1f10, 0x1f15, 1048584,
+ 0x1f20, 0x1f27, 1048584,
+ 0x1f30, 0x1f37, 1048584,
+ 0x1f40, 0x1f45, 1048584,
+ 0x1f60, 0x1f67, 1048584,
+ 0x1f70, 0x1f71, 1048650,
+ 0x1f72, 0x1f75, 1048662,
+ 0x1f76, 0x1f77, 1048676,
+ 0x1f78, 0x1f79, 1048704,
+ 0x1f7a, 0x1f7b, 1048688,
+ 0x1f7c, 0x1f7d, 1048702,
+ 0x1f80, 0x1f87, 1048584,
+ 0x1f90, 0x1f97, 1048584,
+ 0x1fa0, 0x1fa7, 1048584,
+ 0x1fb0, 0x1fb1, 1048584,
+ 0x1fd0, 0x1fd1, 1048584,
+ 0x1fe0, 0x1fe1, 1048584,
+ 0x2170, 0x217f, 1048560,
+ 0x24d0, 0x24e9, 1048550,
+ 0x2c30, 0x2c5e, 1048528,
+ 0x2d00, 0x2d25, 1041312,
+ 0xff41, 0xff5a, 1048544,
+ 0x10428, 0x1044f, 1048536,
+};
+
+static Rune __totitlep[] = {
+ 0x0101, 0x012f, 1048575,
+ 0x0133, 0x0137, 1048575,
+ 0x013a, 0x0148, 1048575,
+ 0x014b, 0x0177, 1048575,
+ 0x017a, 0x017e, 1048575,
+ 0x0183, 0x0185, 1048575,
+ 0x01a1, 0x01a5, 1048575,
+ 0x01b4, 0x01b6, 1048575,
+ 0x01cc, 0x01dc, 1048575,
+ 0x01df, 0x01ef, 1048575,
+ 0x01f3, 0x01f5, 1048575,
+ 0x01f9, 0x021f, 1048575,
+ 0x0223, 0x0233, 1048575,
+ 0x0247, 0x024f, 1048575,
+ 0x03d9, 0x03ef, 1048575,
+ 0x0461, 0x0481, 1048575,
+ 0x048b, 0x04bf, 1048575,
+ 0x04c2, 0x04ce, 1048575,
+ 0x04d1, 0x0513, 1048575,
+ 0x1e01, 0x1e95, 1048575,
+ 0x1ea1, 0x1ef9, 1048575,
+ 0x1f51, 0x1f57, 1048584,
+ 0x2c68, 0x2c6c, 1048575,
+ 0x2c81, 0x2ce3, 1048575,
+};
+
+static Rune __totitles[] = {
+ 0x00b5, 1049319,
+ 0x00ff, 1048697,
+ 0x0131, 1048344,
+ 0x017f, 1048276,
+ 0x0180, 1048771,
+ 0x0188, 1048575,
+ 0x018c, 1048575,
+ 0x0192, 1048575,
+ 0x0195, 1048673,
+ 0x0199, 1048575,
+ 0x019a, 1048739,
+ 0x019e, 1048706,
+ 0x01a8, 1048575,
+ 0x01ad, 1048575,
+ 0x01b0, 1048575,
+ 0x01b9, 1048575,
+ 0x01bd, 1048575,
+ 0x01bf, 1048632,
+ 0x01c4, 1048577,
+ 0x01c6, 1048575,
+ 0x01c7, 1048577,
+ 0x01c9, 1048575,
+ 0x01ca, 1048577,
+ 0x01dd, 1048497,
+ 0x01f1, 1048577,
+ 0x023c, 1048575,
+ 0x0242, 1048575,
+ 0x0253, 1048366,
+ 0x0254, 1048370,
+ 0x0259, 1048374,
+ 0x025b, 1048373,
+ 0x0260, 1048371,
+ 0x0263, 1048369,
+ 0x0268, 1048367,
+ 0x0269, 1048365,
+ 0x026b, 1059319,
+ 0x026f, 1048365,
+ 0x0272, 1048363,
+ 0x0275, 1048362,
+ 0x027d, 1059303,
+ 0x0280, 1048358,
+ 0x0283, 1048358,
+ 0x0288, 1048358,
+ 0x0289, 1048507,
+ 0x028c, 1048505,
+ 0x0292, 1048357,
+ 0x0345, 1048660,
+ 0x03ac, 1048538,
+ 0x03c2, 1048545,
+ 0x03cc, 1048512,
+ 0x03d0, 1048514,
+ 0x03d1, 1048519,
+ 0x03d5, 1048529,
+ 0x03d6, 1048522,
+ 0x03f0, 1048490,
+ 0x03f1, 1048496,
+ 0x03f2, 1048583,
+ 0x03f5, 1048480,
+ 0x03f8, 1048575,
+ 0x03fb, 1048575,
+ 0x04cf, 1048561,
+ 0x1d7d, 1052390,
+ 0x1e9b, 1048517,
+ 0x1fb3, 1048585,
+ 0x1fbe, 1041371,
+ 0x1fc3, 1048585,
+ 0x1fe5, 1048583,
+ 0x1ff3, 1048585,
+ 0x214e, 1048548,
+ 0x2184, 1048575,
+ 0x2c61, 1048575,
+ 0x2c65, 1037781,
+ 0x2c66, 1037784,
+ 0x2c76, 1048575,
+};
+
+Rune
+totitlerune(Rune c)
+{
+ Rune *p;
+
+ p = rbsearch(c, __totitler, nelem(__totitler)/3, 3);
+ if(p && c >= p[0] && c <= p[1])
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __totitlep, nelem(__totitlep)/3, 3);
+ if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+ return c + p[2] - 1048576;
+ p = rbsearch(c, __totitles, nelem(__totitles)/2, 2);
+ if(p && c == p[0])
+ return c + p[1] - 1048576;
+ return c;
+}
+
diff --git a/utf.h b/utf.h
index 44052f4..02ba472 100644
--- a/utf.h
+++ b/utf.h
@@ -1,54 +1,233 @@
-#ifndef _UTF_H_
-#define _UTF_H_ 1
-#if defined(__cplusplus)
-extern "C" {
-#endif
+#ifndef _UTFH_
+#define _UTFH_ 1
+
+#include <stdint.h>
-typedef unsigned int Rune; /* 32 bits */
+typedef signed int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/
enum
{
- UTFmax = 4, /* maximum bytes per rune */
- Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
- Runeself = 0x80, /* rune and UTF sequences are the same (<) */
- Runeerror = 0xFFFD, /* decoding error in UTF */
- Runemax = 0x10FFFF /* maximum rune value */
+ UTFmax = 4, /* maximum bytes per rune */
+ Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
+ Runeself = 0x80, /* rune and UTF sequences are the same (<) */
+ Runeerror = 0xFFFD, /* decoding error in UTF */
+ Runemax = 0x10FFFF, /* maximum rune value */
};
-/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */
-int chartorune(Rune *rune, char *str);
-int fullrune(char *str, int n);
-int isalpharune(Rune c);
-int islowerrune(Rune c);
-int isspacerune(Rune c);
-int istitlerune(Rune c);
-int isupperrune(Rune c);
-int runelen(long c);
-int runenlen(Rune *r, int nrune);
-Rune* runestrcat(Rune *s1, Rune *s2);
-Rune* runestrchr(Rune *s, Rune c);
-int runestrcmp(Rune *s1, Rune *s2);
-Rune* runestrcpy(Rune *s1, Rune *s2);
-Rune* runestrdup(Rune *s) ;
-Rune* runestrecpy(Rune *s1, Rune *es1, Rune *s2);
-long runestrlen(Rune *s);
-Rune* runestrncat(Rune *s1, Rune *s2, long n);
-int runestrncmp(Rune *s1, Rune *s2, long n);
-Rune* runestrncpy(Rune *s1, Rune *s2, long n);
-Rune* runestrrchr(Rune *s, Rune c);
-Rune* runestrstr(Rune *s1, Rune *s2);
-int runetochar(char *str, Rune *rune);
-Rune tolowerrune(Rune c);
-Rune totitlerune(Rune c);
-Rune toupperrune(Rune c);
-char* utfecpy(char *to, char *e, char *from);
-int utflen(char *s);
-int utfnlen(char *s, long m);
-char* utfrrune(char *s, long c);
-char* utfrune(char *s, long c);
-char* utfutf(char *s1, char *s2);
-
-#if defined(__cplusplus)
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * rune routines
+ */
+
+/*
+ * These routines were written by Rob Pike and Ken Thompson
+ * and first appeared in Plan 9.
+ * SEE ALSO
+ * utf (7)
+ * tcs (1)
+*/
+
+// runetochar copies (encodes) one rune, pointed to by r, to at most
+// UTFmax bytes starting at s and returns the number of bytes generated.
+
+int runetochar(char* s, const Rune* r);
+
+
+// chartorune copies (decodes) at most UTFmax bytes starting at s to
+// one rune, pointed to by r, and returns the number of bytes consumed.
+// If the input is not exactly in UTF format, chartorune will set *r
+// to Runeerror and return 1.
+//
+// Note: There is no special case for a "null-terminated" string. A
+// string whose first byte has the value 0 is the UTF8 encoding of the
+// Unicode value 0 (i.e., ASCII NULL). A byte value of 0 is illegal
+// anywhere else in a UTF sequence.
+
+int chartorune(Rune* r, const char* s);
+
+
+// charntorune is like chartorune, except that it will access at most
+// n bytes of s. If the UTF sequence is incomplete within n bytes,
+// charntorune will set *r to Runeerror and return 0. If it is complete
+// but not in UTF format, it will set *r to Runeerror and return 1.
+//
+// Added 2004-09-24 by Wei-Hwa Huang
+
+int charntorune(Rune* r, const char* s, int n);
+
+// isvalidcharntorune(str, n, r, consumed)
+// is a convenience function that calls "*consumed = charntorune(r, str, n)"
+// and returns an int (logically boolean) indicating whether the first
+// n bytes of str was a valid and complete UTF sequence.
+
+int isvalidcharntorune(const char* str, int n, Rune* r, int* consumed);
+
+// runelen returns the number of bytes required to convert r into UTF.
+
+int runelen(Rune r);
+
+
+// runenlen returns the number of bytes required to convert the n
+// runes pointed to by r into UTF.
+
+int runenlen(const Rune* r, int n);
+
+
+// fullrune returns 1 if the string s of length n is long enough to be
+// decoded by chartorune, and 0 otherwise. This does not guarantee
+// that the string contains a legal UTF encoding. This routine is used
+// by programs that obtain input one byte at a time and need to know
+// when a full rune has arrived.
+
+int fullrune(const char* s, int n);
+
+// The following routines are analogous to the corresponding string
+// routines with "utf" substituted for "str", and "rune" substituted
+// for "chr".
+
+// utflen returns the number of runes that are represented by the UTF
+// string s. (cf. strlen)
+
+int utflen(const char* s);
+
+
+// utfnlen returns the number of complete runes that are represented
+// by the first n bytes of the UTF string s. If the last few bytes of
+// the string contain an incompletely coded rune, utfnlen will not
+// count them; in this way, it differs from utflen, which includes
+// every byte of the string. (cf. strnlen)
+
+int utfnlen(const char* s, long n);
+
+
+// utfrune returns a pointer to the first occurrence of rune r in the
+// UTF string s, or 0 if r does not occur in the string. The NULL
+// byte terminating a string is considered to be part of the string s.
+// (cf. strchr)
+
+const char* utfrune(const char* s, Rune r);
+
+
+// utfrrune returns a pointer to the last occurrence of rune r in the
+// UTF string s, or 0 if r does not occur in the string. The NULL
+// byte terminating a string is considered to be part of the string s.
+// (cf. strrchr)
+
+const char* utfrrune(const char* s, Rune r);
+
+
+// utfutf returns a pointer to the first occurrence of the UTF string
+// s2 as a UTF substring of s1, or 0 if there is none. If s2 is the
+// null string, utfutf returns s1. (cf. strstr)
+
+const char* utfutf(const char* s1, const char* s2);
+
+
+// utfecpy copies UTF sequences until a null sequence has been copied,
+// but writes no sequences beyond es1. If any sequences are copied,
+// s1 is terminated by a null sequence, and a pointer to that sequence
+// is returned. Otherwise, the original s1 is returned. (cf. strecpy)
+
+char* utfecpy(char *s1, char *es1, const char *s2);
+
+
+
+// These functions are rune-string analogues of the corresponding
+// functions in strcat (3).
+//
+// These routines first appeared in Plan 9.
+// SEE ALSO
+// memmove (3)
+// rune (3)
+// strcat (2)
+//
+// BUGS: The outcome of overlapping moves varies among implementations.
+
+Rune* runestrcat(Rune* s1, const Rune* s2);
+Rune* runestrncat(Rune* s1, const Rune* s2, long n);
+
+const Rune* runestrchr(const Rune* s, Rune c);
+
+int runestrcmp(const Rune* s1, const Rune* s2);
+int runestrncmp(const Rune* s1, const Rune* s2, long n);
+
+Rune* runestrcpy(Rune* s1, const Rune* s2);
+Rune* runestrncpy(Rune* s1, const Rune* s2, long n);
+Rune* runestrecpy(Rune* s1, Rune* es1, const Rune* s2);
+
+Rune* runestrdup(const Rune* s);
+
+const Rune* runestrrchr(const Rune* s, Rune c);
+long runestrlen(const Rune* s);
+const Rune* runestrstr(const Rune* s1, const Rune* s2);
+
+
+
+// The following routines test types and modify cases for Unicode
+// characters. Unicode defines some characters as letters and
+// specifies three cases: upper, lower, and title. Mappings among the
+// cases are also defined, although they are not exhaustive: some
+// upper case letters have no lower case mapping, and so on. Unicode
+// also defines several character properties, a subset of which are
+// checked by these routines. These routines are based on Unicode
+// version 3.0.0.
+//
+// NOTE: The routines are implemented in C, so the boolean functions
+// (e.g., isupperrune) return 0 for false and 1 for true.
+//
+//
+// toupperrune, tolowerrune, and totitlerune are the Unicode case
+// mappings. These routines return the character unchanged if it has
+// no defined mapping.
+
+Rune toupperrune(Rune r);
+Rune tolowerrune(Rune r);
+Rune totitlerune(Rune r);
+
+
+// isupperrune tests for upper case characters, including Unicode
+// upper case letters and targets of the toupper mapping. islowerrune
+// and istitlerune are defined analogously.
+
+int isupperrune(Rune r);
+int islowerrune(Rune r);
+int istitlerune(Rune r);
+
+
+// isalpharune tests for Unicode letters; this includes ideographs in
+// addition to alphabetic characters.
+
+int isalpharune(Rune r);
+
+
+// isdigitrune tests for digits. Non-digit numbers, such as Roman
+// numerals, are not included.
+
+int isdigitrune(Rune r);
+
+
+// isideographicrune tests for ideographic characters and numbers, as
+// defined by the Unicode standard.
+
+int isideographicrune(Rune r);
+
+
+// isspacerune tests for whitespace characters, including "C" locale
+// whitespace, Unicode defined whitespace, and the "zero-width
+// non-break space" character.
+
+int isspacerune(Rune r);
+
+
+// (The comments in this file were copied from the manpage files rune.3,
+// isalpharune.3, and runestrcat.3. Some formatting changes were also made
+// to conform to Google style. /JRM 11/11/05)
+
+#ifdef __cplusplus
}
#endif
+
#endif
diff --git a/utfdef.h b/utfdef.h
index 1ff4181..4b58ae8 100644
--- a/utfdef.h
+++ b/utfdef.h
@@ -1,33 +1,14 @@
-/*
- * compiler directive on Plan 9
- */
-#ifndef USED
-#define USED(x) if(x);else
-#endif
+#define uchar _utfuchar
+#define ushort _utfushort
+#define uint _utfuint
+#define ulong _utfulong
+#define vlong _utfvlong
+#define uvlong _utfuvlong
-/*
- * easiest way to make sure these are defined
- */
-#define uchar _fmtuchar
-#define ushort _fmtushort
-#define uint _fmtuint
-#define ulong _fmtulong
-#define vlong _fmtvlong
-#define uvlong _fmtuvlong
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned int uint;
typedef unsigned long ulong;
-typedef unsigned long long uvlong;
-typedef long long vlong;
-
-/*
- * nil cannot be ((void*)0) on ANSI C,
- * because it is used for function pointers
- */
-#undef nil
-#define nil 0
-
-#undef nelem
-#define nelem ((void*)0)
+#define nelem(x) (sizeof(x)/sizeof((x)[0]))
+#define nil ((void*)0)
diff --git a/utfecpy.c b/utfecpy.c
index cf3535f..e733a0f 100644
--- a/utfecpy.c
+++ b/utfecpy.c
@@ -7,24 +7,23 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
-#define _BSD_SOURCE 1 /* memccpy */
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
char*
-utfecpy(char *to, char *e, char *from)
+utfecpy(char *to, char *e, const char *from)
{
char *end;
if(to >= e)
return to;
- end = memccpy(to, from, '\0', e - to);
+ end = (char*)memccpy(to, from, '\0', e - to);
if(end == nil){
end = e-1;
while(end>to && (*--end&0xC0)==0x80)
diff --git a/utflen.c b/utflen.c
index 769805a..45653d5 100644
--- a/utflen.c
+++ b/utflen.c
@@ -7,17 +7,17 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
int
-utflen(char *s)
+utflen(const char *s)
{
int c;
long n;
@@ -34,4 +34,5 @@ utflen(char *s)
s += chartorune(&rune, s);
n++;
}
+ return 0;
}
diff --git a/utfnlen.c b/utfnlen.c
index 6680329..d673c82 100644
--- a/utfnlen.c
+++ b/utfnlen.c
@@ -7,22 +7,22 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
int
-utfnlen(char *s, long m)
+utfnlen(const char *s, long m)
{
int c;
long n;
Rune rune;
- char *es;
+ const char *es;
es = s + m;
for(n = 0; s < es; n++) {
diff --git a/utfrrune.c b/utfrrune.c
index cff12b5..c0b89f5 100644
--- a/utfrrune.c
+++ b/utfrrune.c
@@ -7,21 +7,22 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
+const
char*
-utfrrune(char *s, long c)
+utfrrune(const char *s, Rune c)
{
long c1;
Rune r;
- char *s1;
+ const char *s1;
if(c < Runesync) /* not part of utf sequence */
return strrchr(s, c);
@@ -42,4 +43,5 @@ utfrrune(char *s, long c)
s1 = s;
s += c1;
}
+ return 0;
}
diff --git a/utfrune.c b/utfrune.c
index 52b8359..913783f 100644
--- a/utfrune.c
+++ b/utfrune.c
@@ -7,17 +7,18 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
+const
char*
-utfrune(char *s, long c)
+utfrune(const char *s, Rune c)
{
long c1;
Rune r;
@@ -41,4 +42,5 @@ utfrune(char *s, long c)
return s;
s += n;
}
+ return 0;
}
diff --git a/utfutf.c b/utfutf.c
index 13c8502..ec49231 100644
--- a/utfutf.c
+++ b/utfutf.c
@@ -7,24 +7,25 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
-#include "plan9.h"
#include "utf.h"
+#include "utfdef.h"
/*
* Return pointer to first occurrence of s2 in s1,
* 0 if none
*/
+const
char*
-utfutf(char *s1, char *s2)
+utfutf(const char *s1, const char *s2)
{
- char *p;
+ const char *p;
long f, n1, n2;
Rune r;
@@ -34,7 +35,7 @@ utfutf(char *s1, char *s2)
return strstr(s1, s2);
n2 = strlen(s2);
- for(p=s1; p=utfrune(p, f); p+=n1)
+ for(p=s1; (p=utfrune(p, f)) != 0; p+=n1)
if(strncmp(p, s2, n2) == 0)
return p;
return 0;