diff options
author | Alexander Gutkin <agutkin@google.com> | 2014-03-04 17:22:31 +0000 |
---|---|---|
committer | Alexander Gutkin <agutkin@google.com> | 2014-03-04 17:22:31 +0000 |
commit | 96039b7897bb8b5853e91404d97abca72d933c69 (patch) | |
tree | 67f53abddaf6561b9d8389f94e31774caecb77e7 | |
parent | 439f3d1f87279a8be383ee01ef98cb9a5ca68573 (diff) | |
download | libutf-96039b7897bb8b5853e91404d97abca72d933c69.tar.gz |
Updating libutf to a newer version.
This brings us in sync with the google3 //third_party/utf
implementation.
Change-Id: I0f3e013304bab6d609ddf0b2619f3b5b8ec6f047
-rw-r--r-- | plan9.h | 29 | ||||
-rw-r--r-- | rune.c | 251 | ||||
-rw-r--r-- | runestrcat.c | 10 | ||||
-rw-r--r-- | runestrchr.c | 11 | ||||
-rw-r--r-- | runestrcmp.c | 8 | ||||
-rw-r--r-- | runestrcpy.c | 10 | ||||
-rw-r--r-- | runestrdup.c | 10 | ||||
-rw-r--r-- | runestrecpy.c | 10 | ||||
-rw-r--r-- | runestrlen.c | 8 | ||||
-rw-r--r-- | runestrncat.c | 12 | ||||
-rw-r--r-- | runestrncmp.c | 8 | ||||
-rw-r--r-- | runestrncpy.c | 8 | ||||
-rw-r--r-- | runestrrchr.c | 13 | ||||
-rw-r--r-- | runestrstr.c | 11 | ||||
-rw-r--r-- | runetype.c | 1139 | ||||
-rw-r--r-- | runetypebody-5.0.0.h | 1361 | ||||
-rw-r--r-- | utf.h | 269 | ||||
-rw-r--r-- | utfdef.h | 35 | ||||
-rw-r--r-- | utfecpy.c | 11 | ||||
-rw-r--r-- | utflen.c | 9 | ||||
-rw-r--r-- | utfnlen.c | 10 | ||||
-rw-r--r-- | utfrrune.c | 12 | ||||
-rw-r--r-- | utfrune.c | 10 | ||||
-rw-r--r-- | utfutf.c | 13 |
24 files changed, 1910 insertions, 1358 deletions
diff --git a/plan9.h b/plan9.h deleted file mode 100644 index e40e33e..0000000 --- a/plan9.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * compiler directive on Plan 9 - */ -#ifndef USED -#define USED(x) if(x);else -#endif - -/* - * easiest way to make sure these are defined - */ -#define uchar _utfuchar -#define ushort _utfushort -#define uint _utfuint -#define ulong _utfulong -typedef unsigned char uchar; -typedef unsigned short ushort; -typedef unsigned int uint; -typedef unsigned long ulong; - -/* - * nil cannot be ((void*)0) on ANSI C, - * because it is used for function pointers - */ -#undef nil -#define nil 0 - -#undef nelem -#define nelem(x) (sizeof (x)/sizeof (x)[0]) - @@ -7,14 +7,14 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" enum { @@ -23,7 +23,7 @@ enum Bit2 = 5, Bit3 = 4, Bit4 = 3, - Bit5 = 2, + Bit5 = 2, T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ @@ -32,19 +32,139 @@ enum T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ - Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ - Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ - Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ - Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ + Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ + Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ + Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ + Rune4 = (1<<(Bit4+3*Bitx))-1, + /* 0001 1111 1111 1111 1111 1111 */ Maskx = (1<<Bitx)-1, /* 0011 1111 */ Testx = Maskx ^ 0xFF, /* 1100 0000 */ - Bad = Runeerror + Bad = Runeerror, }; +/* + * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24 + * This is a slower but "safe" version of the old chartorune + * that works on strings that are not necessarily null-terminated. + * + * If you know for sure that your string is null-terminated, + * chartorune will be a bit faster. + * + * It is guaranteed not to attempt to access "length" + * past the incoming pointer. This is to avoid + * possible access violations. If the string appears to be + * well-formed but incomplete (i.e., to get the whole Rune + * we'd need to read past str+length) then we'll set the Rune + * to Bad and return 0. + * + * Note that if we have decoding problems for other + * reasons, we return 1 instead of 0. + */ int -chartorune(Rune *rune, char *str) +charntorune(Rune *rune, const char *str, int length) +{ + int c, c1, c2, c3; + long l; + + /* When we're not allowed to read anything */ + if(length <= 0) { + goto badlen; + } + + /* + * one character sequence (7-bit value) + * 00000-0007F => T1 + */ + c = *(uchar*)str; + if(c < Tx) { + *rune = c; + return 1; + } + + // If we can't read more than one character we must stop + if(length <= 1) { + goto badlen; + } + + /* + * two character sequence (11-bit value) + * 0080-07FF => T2 Tx + */ + c1 = *(uchar*)(str+1) ^ Tx; + if(c1 & Testx) + goto bad; + if(c < T3) { + if(c < T2) + goto bad; + l = ((c << Bitx) | c1) & Rune2; + if(l <= Rune1) + goto bad; + *rune = l; + return 2; + } + + // If we can't read more than two characters we must stop + if(length <= 2) { + goto badlen; + } + + /* + * three character sequence (16-bit value) + * 0800-FFFF => T3 Tx Tx + */ + c2 = *(uchar*)(str+2) ^ Tx; + if(c2 & Testx) + goto bad; + if(c < T4) { + l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; + if(l <= Rune2) + goto bad; + *rune = l; + return 3; + } + + if (length <= 3) + goto badlen; + + /* + * four character sequence (21-bit value) + * 10000-1FFFFF => T4 Tx Tx Tx + */ + c3 = *(uchar*)(str+3) ^ Tx; + if (c3 & Testx) + goto bad; + if (c < T5) { + l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; + if (l <= Rune3) + goto bad; + *rune = l; + return 4; + } + + // Support for 5-byte or longer UTF-8 would go here, but + // since we don't have that, we'll just fall through to bad. + + /* + * bad decoding + */ +bad: + *rune = Bad; + return 1; +badlen: + *rune = Bad; + return 0; + +} + + +/* + * This is the older "unsafe" version, which works fine on + * null-terminated strings. + */ +int +chartorune(Rune *rune, const char *str) { int c, c1, c2, c3; long l; @@ -92,25 +212,26 @@ chartorune(Rune *rune, char *str) } /* - * four character sequence - * 10000-10FFFF => T4 Tx Tx Tx + * four character sequence (21-bit value) + * 10000-1FFFFF => T4 Tx Tx Tx */ - if(UTFmax >= 4) { - c3 = *(uchar*)(str+3) ^ Tx; - if(c3 & Testx) + c3 = *(uchar*)(str+3) ^ Tx; + if (c3 & Testx) + goto bad; + if (c < T5) { + l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; + if (l <= Rune3) goto bad; - if(c < T5) { - l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; - if(l <= Rune3) - goto bad; - if(l > Runemax) - goto bad; - *rune = l; - return 4; - } + *rune = l; + return 4; } /* + * Support for 5-byte or longer UTF-8 would go here, but + * since we don't have that, we'll just fall through to bad. + */ + + /* * bad decoding */ bad: @@ -119,9 +240,16 @@ bad: } int -runetochar(char *str, Rune *rune) +isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) { + *consumed = charntorune(rune, str, length); + return *rune != Runeerror || *consumed == 3; +} + +int +runetochar(char *str, const Rune *rune) { - long c; + /* Runes are signed, so convert to unsigned for range check. */ + unsigned long c; /* * one character sequence @@ -135,7 +263,7 @@ runetochar(char *str, Rune *rune) /* * two character sequence - * 00080-007FF => T2 Tx + * 0080-07FF => T2 Tx */ if(c <= Rune2) { str[0] = T2 | (c >> 1*Bitx); @@ -144,74 +272,79 @@ runetochar(char *str, Rune *rune) } /* - * three character sequence - * 00800-0FFFF => T3 Tx Tx + * If the Rune is out of range, convert it to the error rune. + * Do this test here because the error rune encodes to three bytes. + * Doing it earlier would duplicate work, since an out of range + * Rune wouldn't have fit in one or two bytes. */ - if(c > Runemax) + if (c > Runemax) c = Runeerror; - if(c <= Rune3) { + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + if (c <= Rune3) { str[0] = T3 | (c >> 2*Bitx); str[1] = Tx | ((c >> 1*Bitx) & Maskx); str[2] = Tx | (c & Maskx); return 3; } - + /* - * four character sequence - * 010000-1FFFFF => T4 Tx Tx Tx + * four character sequence (21-bit value) + * 10000-1FFFFF => T4 Tx Tx Tx */ - str[0] = T4 | (c >> 3*Bitx); + str[0] = T4 | (c >> 3*Bitx); str[1] = Tx | ((c >> 2*Bitx) & Maskx); str[2] = Tx | ((c >> 1*Bitx) & Maskx); - str[3] = Tx | (c & Maskx); + str[3] = Tx | (c & Maskx); return 4; } int -runelen(long c) +runelen(Rune rune) { - Rune rune; char str[10]; - rune = c; return runetochar(str, &rune); } int -runenlen(Rune *r, int nrune) +runenlen(const Rune *r, int nrune) { int nb, c; nb = 0; while(nrune--) { c = *r++; - if(c <= Rune1) + if (c <= Rune1) nb++; - else - if(c <= Rune2) + else if (c <= Rune2) nb += 2; - else - if(c <= Rune3 || c > Runemax) + else if (c <= Rune3) nb += 3; - else + else /* assert(c <= Rune4) */ nb += 4; } return nb; } int -fullrune(char *str, int n) +fullrune(const char *str, int n) { - int c; - - if(n <= 0) - return 0; - c = *(uchar*)str; - if(c < Tx) - return 1; - if(c < T3) - return n >= 2; - if(UTFmax == 3 || c < T4) - return n >= 3; - return n >= 4; + if (n > 0) { + int c = *(uchar*)str; + if (c < Tx) + return 1; + if (n > 1) { + if (c < T3) + return 1; + if (n > 2) { + if (c < T4 || n > 3) + return 1; + } + } + } + return 0; } diff --git a/runestrcat.c b/runestrcat.c index 65d4c0f..ccb7cde 100644 --- a/runestrcat.c +++ b/runestrcat.c @@ -7,19 +7,19 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" Rune* -runestrcat(Rune *s1, Rune *s2) +runestrcat(Rune *s1, const Rune *s2) { - runestrcpy(runestrchr(s1, 0), s2); + runestrcpy((Rune*)runestrchr(s1, 0), s2); return s1; } diff --git a/runestrchr.c b/runestrchr.c index 21fbeeb..7acca84 100644 --- a/runestrchr.c +++ b/runestrchr.c @@ -7,17 +7,18 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" +const Rune* -runestrchr(Rune *s, Rune c) +runestrchr(const Rune *s, Rune c) { Rune c0 = c; Rune c1; @@ -28,7 +29,7 @@ runestrchr(Rune *s, Rune c) return s-1; } - while(c1 = *s++) + while((c1 = *s++) != 0) if(c1 == c0) return s-1; return 0; diff --git a/runestrcmp.c b/runestrcmp.c index a368613..c274625 100644 --- a/runestrcmp.c +++ b/runestrcmp.c @@ -7,17 +7,17 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" int -runestrcmp(Rune *s1, Rune *s2) +runestrcmp(const Rune *s1, const Rune *s2) { Rune c1, c2; diff --git a/runestrcpy.c b/runestrcpy.c index 0659fc3..99c463d 100644 --- a/runestrcpy.c +++ b/runestrcpy.c @@ -7,22 +7,22 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" Rune* -runestrcpy(Rune *s1, Rune *s2) +runestrcpy(Rune *s1, const Rune *s2) { Rune *os1; os1 = s1; - while(*s1++ = *s2++) + while((*s1++ = *s2++) != 0) ; return os1; } diff --git a/runestrdup.c b/runestrdup.c index 8170e7b..345f2b0 100644 --- a/runestrdup.c +++ b/runestrdup.c @@ -7,22 +7,22 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> #include <stdlib.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" Rune* -runestrdup(Rune *s) +runestrdup(const Rune *s) { Rune *ns; - ns = malloc(sizeof(Rune)*(runestrlen(s) + 1)); + ns = (Rune*)malloc(sizeof(Rune)*(runestrlen(s) + 1)); if(ns == 0) return 0; diff --git a/runestrecpy.c b/runestrecpy.c index c543e22..d095e3a 100644 --- a/runestrecpy.c +++ b/runestrecpy.c @@ -7,22 +7,22 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" Rune* -runestrecpy(Rune *s1, Rune *es1, Rune *s2) +runestrecpy(Rune *s1, Rune *es1, const Rune *s2) { if(s1 >= es1) return s1; - while(*s1++ = *s2++){ + while((*s1++ = *s2++) != 0){ if(s1 == es1){ *--s1 = '\0'; break; diff --git a/runestrlen.c b/runestrlen.c index 0a13ecd..ebf76da 100644 --- a/runestrlen.c +++ b/runestrlen.c @@ -7,17 +7,17 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" long -runestrlen(Rune *s) +runestrlen(const Rune *s) { return runestrchr(s, 0) - s; diff --git a/runestrncat.c b/runestrncat.c index 9653637..3ad827e 100644 --- a/runestrncat.c +++ b/runestrncat.c @@ -7,23 +7,23 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" Rune* -runestrncat(Rune *s1, Rune *s2, long n) +runestrncat(Rune *s1, const Rune *s2, long n) { Rune *os1; os1 = s1; - s1 = runestrchr(s1, 0); - while(*s1++ = *s2++) + s1 = (Rune*)runestrchr(s1, 0); + while((*s1++ = *s2++) != 0) if(--n < 0) { s1[-1] = 0; break; diff --git a/runestrncmp.c b/runestrncmp.c index 5e9a3b6..060a425 100644 --- a/runestrncmp.c +++ b/runestrncmp.c @@ -7,17 +7,17 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" int -runestrncmp(Rune *s1, Rune *s2, long n) +runestrncmp(const Rune *s1, const Rune *s2, long n) { Rune c1, c2; diff --git a/runestrncpy.c b/runestrncpy.c index ffcb3e1..4deeaaf 100644 --- a/runestrncpy.c +++ b/runestrncpy.c @@ -7,17 +7,17 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" Rune* -runestrncpy(Rune *s1, Rune *s2, long n) +runestrncpy(Rune *s1, const Rune *s2, long n) { int i; Rune *os1; diff --git a/runestrrchr.c b/runestrrchr.c index 1b0edbb..c7fb3e1 100644 --- a/runestrrchr.c +++ b/runestrrchr.c @@ -7,24 +7,25 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" +const Rune* -runestrrchr(Rune *s, Rune c) +runestrrchr(const Rune *s, Rune c) { - Rune *r; + const Rune *r; if(c == 0) return runestrchr(s, 0); r = 0; - while(s = runestrchr(s, c)) + while((s = runestrchr(s, c)) != 0) r = s++; return r; } diff --git a/runestrstr.c b/runestrstr.c index f5fa997..fc5fd96 100644 --- a/runestrstr.c +++ b/runestrstr.c @@ -7,23 +7,24 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" /* * Return pointer to first occurrence of s2 in s1, * 0 if none */ +const Rune* -runestrstr(Rune *s1, Rune *s2) +runestrstr(const Rune *s1, const Rune *s2) { - Rune *p, *pa, *pb; + const Rune *p, *pa, *pb; int c0, c; c0 = *s2; @@ -7,1037 +7,22 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ -#include <stdarg.h> -#include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" -/* - * alpha ranges - - * only covers ranges not in lower||upper - */ -static -Rune __alpha2[] = -{ - 0x00d8, 0x00f6, /* Ø - ö */ - 0x00f8, 0x01f5, /* ø - ǵ */ - 0x0250, 0x02a8, /* ɐ - ʨ */ - 0x038e, 0x03a1, /* Ύ - Ρ */ - 0x03a3, 0x03ce, /* Σ - ώ */ - 0x03d0, 0x03d6, /* ϐ - ϖ */ - 0x03e2, 0x03f3, /* Ϣ - ϳ */ - 0x0490, 0x04c4, /* Ґ - ӄ */ - 0x0561, 0x0587, /* ա - և */ - 0x05d0, 0x05ea, /* א - ת */ - 0x05f0, 0x05f2, /* װ - ײ */ - 0x0621, 0x063a, /* ء - غ */ - 0x0640, 0x064a, /* ـ - ي */ - 0x0671, 0x06b7, /* ٱ - ڷ */ - 0x06ba, 0x06be, /* ں - ھ */ - 0x06c0, 0x06ce, /* ۀ - ێ */ - 0x06d0, 0x06d3, /* ې - ۓ */ - 0x0905, 0x0939, /* अ - ह */ - 0x0958, 0x0961, /* क़ - ॡ */ - 0x0985, 0x098c, /* অ - ঌ */ - 0x098f, 0x0990, /* এ - ঐ */ - 0x0993, 0x09a8, /* ও - ন */ - 0x09aa, 0x09b0, /* প - র */ - 0x09b6, 0x09b9, /* শ - হ */ - 0x09dc, 0x09dd, /* ড় - ঢ় */ - 0x09df, 0x09e1, /* য় - ৡ */ - 0x09f0, 0x09f1, /* ৰ - ৱ */ - 0x0a05, 0x0a0a, /* ਅ - ਊ */ - 0x0a0f, 0x0a10, /* ਏ - ਐ */ - 0x0a13, 0x0a28, /* ਓ - ਨ */ - 0x0a2a, 0x0a30, /* ਪ - ਰ */ - 0x0a32, 0x0a33, /* ਲ - ਲ਼ */ - 0x0a35, 0x0a36, /* ਵ - ਸ਼ */ - 0x0a38, 0x0a39, /* ਸ - ਹ */ - 0x0a59, 0x0a5c, /* ਖ਼ - ੜ */ - 0x0a85, 0x0a8b, /* અ - ઋ */ - 0x0a8f, 0x0a91, /* એ - ઑ */ - 0x0a93, 0x0aa8, /* ઓ - ન */ - 0x0aaa, 0x0ab0, /* પ - ર */ - 0x0ab2, 0x0ab3, /* લ - ળ */ - 0x0ab5, 0x0ab9, /* વ - હ */ - 0x0b05, 0x0b0c, /* ଅ - ଌ */ - 0x0b0f, 0x0b10, /* ଏ - ଐ */ - 0x0b13, 0x0b28, /* ଓ - ନ */ - 0x0b2a, 0x0b30, /* ପ - ର */ - 0x0b32, 0x0b33, /* ଲ - ଳ */ - 0x0b36, 0x0b39, /* ଶ - ହ */ - 0x0b5c, 0x0b5d, /* ଡ଼ - ଢ଼ */ - 0x0b5f, 0x0b61, /* ୟ - ୡ */ - 0x0b85, 0x0b8a, /* அ - ஊ */ - 0x0b8e, 0x0b90, /* எ - ஐ */ - 0x0b92, 0x0b95, /* ஒ - க */ - 0x0b99, 0x0b9a, /* ங - ச */ - 0x0b9e, 0x0b9f, /* ஞ - ட */ - 0x0ba3, 0x0ba4, /* ண - த */ - 0x0ba8, 0x0baa, /* ந - ப */ - 0x0bae, 0x0bb5, /* ம - வ */ - 0x0bb7, 0x0bb9, /* ஷ - ஹ */ - 0x0c05, 0x0c0c, /* అ - ఌ */ - 0x0c0e, 0x0c10, /* ఎ - ఐ */ - 0x0c12, 0x0c28, /* ఒ - న */ - 0x0c2a, 0x0c33, /* ప - ళ */ - 0x0c35, 0x0c39, /* వ - హ */ - 0x0c60, 0x0c61, /* ౠ - ౡ */ - 0x0c85, 0x0c8c, /* ಅ - ಌ */ - 0x0c8e, 0x0c90, /* ಎ - ಐ */ - 0x0c92, 0x0ca8, /* ಒ - ನ */ - 0x0caa, 0x0cb3, /* ಪ - ಳ */ - 0x0cb5, 0x0cb9, /* ವ - ಹ */ - 0x0ce0, 0x0ce1, /* ೠ - ೡ */ - 0x0d05, 0x0d0c, /* അ - ഌ */ - 0x0d0e, 0x0d10, /* എ - ഐ */ - 0x0d12, 0x0d28, /* ഒ - ന */ - 0x0d2a, 0x0d39, /* പ - ഹ */ - 0x0d60, 0x0d61, /* ൠ - ൡ */ - 0x0e01, 0x0e30, /* ก - ะ */ - 0x0e32, 0x0e33, /* า - ำ */ - 0x0e40, 0x0e46, /* เ - ๆ */ - 0x0e5a, 0x0e5b, /* ๚ - ๛ */ - 0x0e81, 0x0e82, /* ກ - ຂ */ - 0x0e87, 0x0e88, /* ງ - ຈ */ - 0x0e94, 0x0e97, /* ດ - ທ */ - 0x0e99, 0x0e9f, /* ນ - ຟ */ - 0x0ea1, 0x0ea3, /* ມ - ຣ */ - 0x0eaa, 0x0eab, /* ສ - ຫ */ - 0x0ead, 0x0eae, /* ອ - ຮ */ - 0x0eb2, 0x0eb3, /* າ - ຳ */ - 0x0ec0, 0x0ec4, /* ເ - ໄ */ - 0x0edc, 0x0edd, /* ໜ - ໝ */ - 0x0f18, 0x0f19, /* ༘ - ༙ */ - 0x0f40, 0x0f47, /* ཀ - ཇ */ - 0x0f49, 0x0f69, /* ཉ - ཀྵ */ - 0x10d0, 0x10f6, /* ა - ჶ */ - 0x1100, 0x1159, /* ᄀ - ᅙ */ - 0x115f, 0x11a2, /* ᅟ - ᆢ */ - 0x11a8, 0x11f9, /* ᆨ - ᇹ */ - 0x1e00, 0x1e9b, /* Ḁ - ẛ */ - 0x1f50, 0x1f57, /* ὐ - ὗ */ - 0x1f80, 0x1fb4, /* ᾀ - ᾴ */ - 0x1fb6, 0x1fbc, /* ᾶ - ᾼ */ - 0x1fc2, 0x1fc4, /* ῂ - ῄ */ - 0x1fc6, 0x1fcc, /* ῆ - ῌ */ - 0x1fd0, 0x1fd3, /* ῐ - ΐ */ - 0x1fd6, 0x1fdb, /* ῖ - Ί */ - 0x1fe0, 0x1fec, /* ῠ - Ῥ */ - 0x1ff2, 0x1ff4, /* ῲ - ῴ */ - 0x1ff6, 0x1ffc, /* ῶ - ῼ */ - 0x210a, 0x2113, /* ℊ - ℓ */ - 0x2115, 0x211d, /* ℕ - ℝ */ - 0x2120, 0x2122, /* ℠ - ™ */ - 0x212a, 0x2131, /* K - ℱ */ - 0x2133, 0x2138, /* ℳ - ℸ */ - 0x3041, 0x3094, /* ぁ - ゔ */ - 0x30a1, 0x30fa, /* ァ - ヺ */ - 0x3105, 0x312c, /* ㄅ - ㄬ */ - 0x3131, 0x318e, /* ㄱ - ㆎ */ - 0x3192, 0x319f, /* ㆒ - ㆟ */ - 0x3260, 0x327b, /* ㉠ - ㉻ */ - 0x328a, 0x32b0, /* ㊊ - ㊰ */ - 0x32d0, 0x32fe, /* ㋐ - ㋾ */ - 0x3300, 0x3357, /* ㌀ - ㍗ */ - 0x3371, 0x3376, /* ㍱ - ㍶ */ - 0x337b, 0x3394, /* ㍻ - ㎔ */ - 0x3399, 0x339e, /* ㎙ - ㎞ */ - 0x33a9, 0x33ad, /* ㎩ - ㎭ */ - 0x33b0, 0x33c1, /* ㎰ - ㏁ */ - 0x33c3, 0x33c5, /* ㏃ - ㏅ */ - 0x33c7, 0x33d7, /* ㏇ - ㏗ */ - 0x33d9, 0x33dd, /* ㏙ - ㏝ */ - 0x4e00, 0x9fff, /* 一 - 鿿 */ - 0xac00, 0xd7a3, /* 가 - 힣 */ - 0xf900, 0xfb06, /* 豈 - st */ - 0xfb13, 0xfb17, /* ﬓ - ﬗ */ - 0xfb1f, 0xfb28, /* ײַ - ﬨ */ - 0xfb2a, 0xfb36, /* שׁ - זּ */ - 0xfb38, 0xfb3c, /* טּ - לּ */ - 0xfb40, 0xfb41, /* נּ - סּ */ - 0xfb43, 0xfb44, /* ףּ - פּ */ - 0xfb46, 0xfbb1, /* צּ - ﮱ */ - 0xfbd3, 0xfd3d, /* ﯓ - ﴽ */ - 0xfd50, 0xfd8f, /* ﵐ - ﶏ */ - 0xfd92, 0xfdc7, /* ﶒ - ﷇ */ - 0xfdf0, 0xfdf9, /* ﷰ - ﷹ */ - 0xfe70, 0xfe72, /* ﹰ - ﹲ */ - 0xfe76, 0xfefc, /* ﹶ - ﻼ */ - 0xff66, 0xff6f, /* ヲ - ッ */ - 0xff71, 0xff9d, /* ア - ン */ - 0xffa0, 0xffbe, /* ᅠ - ᄒ */ - 0xffc2, 0xffc7, /* ᅡ - ᅦ */ - 0xffca, 0xffcf, /* ᅧ - ᅬ */ - 0xffd2, 0xffd7, /* ᅭ - ᅲ */ - 0xffda, 0xffdc, /* ᅳ - ᅵ */ -}; - -/* - * alpha singlets - - * only covers ranges not in lower||upper - */ -static -Rune __alpha1[] = -{ - 0x00aa, /* ª */ - 0x00b5, /* µ */ - 0x00ba, /* º */ - 0x03da, /* Ϛ */ - 0x03dc, /* Ϝ */ - 0x03de, /* Ϟ */ - 0x03e0, /* Ϡ */ - 0x06d5, /* ە */ - 0x09b2, /* ল */ - 0x0a5e, /* ਫ਼ */ - 0x0a8d, /* ઍ */ - 0x0ae0, /* ૠ */ - 0x0b9c, /* ஜ */ - 0x0cde, /* ೞ */ - 0x0e4f, /* ๏ */ - 0x0e84, /* ຄ */ - 0x0e8a, /* ຊ */ - 0x0e8d, /* ຍ */ - 0x0ea5, /* ລ */ - 0x0ea7, /* ວ */ - 0x0eb0, /* ະ */ - 0x0ebd, /* ຽ */ - 0x1fbe, /* ι */ - 0x207f, /* ⁿ */ - 0x20a8, /* ₨ */ - 0x2102, /* ℂ */ - 0x2107, /* ℇ */ - 0x2124, /* ℤ */ - 0x2126, /* Ω */ - 0x2128, /* ℨ */ - 0xfb3e, /* מּ */ - 0xfe74, /* ﹴ */ -}; - -/* - * space ranges - */ -static -Rune __space2[] = -{ - 0x0009, 0x000a, /* tab and newline */ - 0x0020, 0x0020, /* space */ - 0x00a0, 0x00a0, /* */ - 0x2000, 0x200b, /* - */ - 0x2028, 0x2029, /*
-
*/ - 0x3000, 0x3000, /* */ - 0xfeff, 0xfeff, /* */ -}; - -/* - * lower case ranges - * 3rd col is conversion excess 500 - */ -static -Rune __toupper2[] = -{ - 0x0061, 0x007a, 468, /* a-z A-Z */ - 0x00e0, 0x00f6, 468, /* à-ö À-Ö */ - 0x00f8, 0x00fe, 468, /* ø-þ Ø-Þ */ - 0x0256, 0x0257, 295, /* ɖ-ɗ Ɖ-Ɗ */ - 0x0258, 0x0259, 298, /* ɘ-ə Ǝ-Ə */ - 0x028a, 0x028b, 283, /* ʊ-ʋ Ʊ-Ʋ */ - 0x03ad, 0x03af, 463, /* έ-ί Έ-Ί */ - 0x03b1, 0x03c1, 468, /* α-ρ Α-Ρ */ - 0x03c3, 0x03cb, 468, /* σ-ϋ Σ-Ϋ */ - 0x03cd, 0x03ce, 437, /* ύ-ώ Ύ-Ώ */ - 0x0430, 0x044f, 468, /* а-я А-Я */ - 0x0451, 0x045c, 420, /* ё-ќ Ё-Ќ */ - 0x045e, 0x045f, 420, /* ў-џ Ў-Џ */ - 0x0561, 0x0586, 452, /* ա-ֆ Ա-Ֆ */ - 0x1f00, 0x1f07, 508, /* ἀ-ἇ Ἀ-Ἇ */ - 0x1f10, 0x1f15, 508, /* ἐ-ἕ Ἐ-Ἕ */ - 0x1f20, 0x1f27, 508, /* ἠ-ἧ Ἠ-Ἧ */ - 0x1f30, 0x1f37, 508, /* ἰ-ἷ Ἰ-Ἷ */ - 0x1f40, 0x1f45, 508, /* ὀ-ὅ Ὀ-Ὅ */ - 0x1f60, 0x1f67, 508, /* ὠ-ὧ Ὠ-Ὧ */ - 0x1f70, 0x1f71, 574, /* ὰ-ά Ὰ-Ά */ - 0x1f72, 0x1f75, 586, /* ὲ-ή Ὲ-Ή */ - 0x1f76, 0x1f77, 600, /* ὶ-ί Ὶ-Ί */ - 0x1f78, 0x1f79, 628, /* ὸ-ό Ὸ-Ό */ - 0x1f7a, 0x1f7b, 612, /* ὺ-ύ Ὺ-Ύ */ - 0x1f7c, 0x1f7d, 626, /* ὼ-ώ Ὼ-Ώ */ - 0x1f80, 0x1f87, 508, /* ᾀ-ᾇ ᾈ-ᾏ */ - 0x1f90, 0x1f97, 508, /* ᾐ-ᾗ ᾘ-ᾟ */ - 0x1fa0, 0x1fa7, 508, /* ᾠ-ᾧ ᾨ-ᾯ */ - 0x1fb0, 0x1fb1, 508, /* ᾰ-ᾱ Ᾰ-Ᾱ */ - 0x1fd0, 0x1fd1, 508, /* ῐ-ῑ Ῐ-Ῑ */ - 0x1fe0, 0x1fe1, 508, /* ῠ-ῡ Ῠ-Ῡ */ - 0x2170, 0x217f, 484, /* ⅰ-ⅿ Ⅰ-Ⅿ */ - 0x24d0, 0x24e9, 474, /* ⓐ-ⓩ Ⓐ-Ⓩ */ - 0xff41, 0xff5a, 468, /* a-z A-Z */ -}; - -/* - * lower case singlets - * 2nd col is conversion excess 500 - */ -static -Rune __toupper1[] = -{ - 0x00ff, 621, /* ÿ Ÿ */ - 0x0101, 499, /* ā Ā */ - 0x0103, 499, /* ă Ă */ - 0x0105, 499, /* ą Ą */ - 0x0107, 499, /* ć Ć */ - 0x0109, 499, /* ĉ Ĉ */ - 0x010b, 499, /* ċ Ċ */ - 0x010d, 499, /* č Č */ - 0x010f, 499, /* ď Ď */ - 0x0111, 499, /* đ Đ */ - 0x0113, 499, /* ē Ē */ - 0x0115, 499, /* ĕ Ĕ */ - 0x0117, 499, /* ė Ė */ - 0x0119, 499, /* ę Ę */ - 0x011b, 499, /* ě Ě */ - 0x011d, 499, /* ĝ Ĝ */ - 0x011f, 499, /* ğ Ğ */ - 0x0121, 499, /* ġ Ġ */ - 0x0123, 499, /* ģ Ģ */ - 0x0125, 499, /* ĥ Ĥ */ - 0x0127, 499, /* ħ Ħ */ - 0x0129, 499, /* ĩ Ĩ */ - 0x012b, 499, /* ī Ī */ - 0x012d, 499, /* ĭ Ĭ */ - 0x012f, 499, /* į Į */ - 0x0131, 268, /* ı I */ - 0x0133, 499, /* ij IJ */ - 0x0135, 499, /* ĵ Ĵ */ - 0x0137, 499, /* ķ Ķ */ - 0x013a, 499, /* ĺ Ĺ */ - 0x013c, 499, /* ļ Ļ */ - 0x013e, 499, /* ľ Ľ */ - 0x0140, 499, /* ŀ Ŀ */ - 0x0142, 499, /* ł Ł */ - 0x0144, 499, /* ń Ń */ - 0x0146, 499, /* ņ Ņ */ - 0x0148, 499, /* ň Ň */ - 0x014b, 499, /* ŋ Ŋ */ - 0x014d, 499, /* ō Ō */ - 0x014f, 499, /* ŏ Ŏ */ - 0x0151, 499, /* ő Ő */ - 0x0153, 499, /* œ Œ */ - 0x0155, 499, /* ŕ Ŕ */ - 0x0157, 499, /* ŗ Ŗ */ - 0x0159, 499, /* ř Ř */ - 0x015b, 499, /* ś Ś */ - 0x015d, 499, /* ŝ Ŝ */ - 0x015f, 499, /* ş Ş */ - 0x0161, 499, /* š Š */ - 0x0163, 499, /* ţ Ţ */ - 0x0165, 499, /* ť Ť */ - 0x0167, 499, /* ŧ Ŧ */ - 0x0169, 499, /* ũ Ũ */ - 0x016b, 499, /* ū Ū */ - 0x016d, 499, /* ŭ Ŭ */ - 0x016f, 499, /* ů Ů */ - 0x0171, 499, /* ű Ű */ - 0x0173, 499, /* ų Ų */ - 0x0175, 499, /* ŵ Ŵ */ - 0x0177, 499, /* ŷ Ŷ */ - 0x017a, 499, /* ź Ź */ - 0x017c, 499, /* ż Ż */ - 0x017e, 499, /* ž Ž */ - 0x017f, 200, /* ſ S */ - 0x0183, 499, /* ƃ Ƃ */ - 0x0185, 499, /* ƅ Ƅ */ - 0x0188, 499, /* ƈ Ƈ */ - 0x018c, 499, /* ƌ Ƌ */ - 0x0192, 499, /* ƒ Ƒ */ - 0x0199, 499, /* ƙ Ƙ */ - 0x01a1, 499, /* ơ Ơ */ - 0x01a3, 499, /* ƣ Ƣ */ - 0x01a5, 499, /* ƥ Ƥ */ - 0x01a8, 499, /* ƨ Ƨ */ - 0x01ad, 499, /* ƭ Ƭ */ - 0x01b0, 499, /* ư Ư */ - 0x01b4, 499, /* ƴ Ƴ */ - 0x01b6, 499, /* ƶ Ƶ */ - 0x01b9, 499, /* ƹ Ƹ */ - 0x01bd, 499, /* ƽ Ƽ */ - 0x01c5, 499, /* Dž DŽ */ - 0x01c6, 498, /* dž DŽ */ - 0x01c8, 499, /* Lj LJ */ - 0x01c9, 498, /* lj LJ */ - 0x01cb, 499, /* Nj NJ */ - 0x01cc, 498, /* nj NJ */ - 0x01ce, 499, /* ǎ Ǎ */ - 0x01d0, 499, /* ǐ Ǐ */ - 0x01d2, 499, /* ǒ Ǒ */ - 0x01d4, 499, /* ǔ Ǔ */ - 0x01d6, 499, /* ǖ Ǖ */ - 0x01d8, 499, /* ǘ Ǘ */ - 0x01da, 499, /* ǚ Ǚ */ - 0x01dc, 499, /* ǜ Ǜ */ - 0x01df, 499, /* ǟ Ǟ */ - 0x01e1, 499, /* ǡ Ǡ */ - 0x01e3, 499, /* ǣ Ǣ */ - 0x01e5, 499, /* ǥ Ǥ */ - 0x01e7, 499, /* ǧ Ǧ */ - 0x01e9, 499, /* ǩ Ǩ */ - 0x01eb, 499, /* ǫ Ǫ */ - 0x01ed, 499, /* ǭ Ǭ */ - 0x01ef, 499, /* ǯ Ǯ */ - 0x01f2, 499, /* Dz DZ */ - 0x01f3, 498, /* dz DZ */ - 0x01f5, 499, /* ǵ Ǵ */ - 0x01fb, 499, /* ǻ Ǻ */ - 0x01fd, 499, /* ǽ Ǽ */ - 0x01ff, 499, /* ǿ Ǿ */ - 0x0201, 499, /* ȁ Ȁ */ - 0x0203, 499, /* ȃ Ȃ */ - 0x0205, 499, /* ȅ Ȅ */ - 0x0207, 499, /* ȇ Ȇ */ - 0x0209, 499, /* ȉ Ȉ */ - 0x020b, 499, /* ȋ Ȋ */ - 0x020d, 499, /* ȍ Ȍ */ - 0x020f, 499, /* ȏ Ȏ */ - 0x0211, 499, /* ȑ Ȑ */ - 0x0213, 499, /* ȓ Ȓ */ - 0x0215, 499, /* ȕ Ȕ */ - 0x0217, 499, /* ȗ Ȗ */ - 0x0253, 290, /* ɓ Ɓ */ - 0x0254, 294, /* ɔ Ɔ */ - 0x025b, 297, /* ɛ Ɛ */ - 0x0260, 295, /* ɠ Ɠ */ - 0x0263, 293, /* ɣ Ɣ */ - 0x0268, 291, /* ɨ Ɨ */ - 0x0269, 289, /* ɩ Ɩ */ - 0x026f, 289, /* ɯ Ɯ */ - 0x0272, 287, /* ɲ Ɲ */ - 0x0283, 282, /* ʃ Ʃ */ - 0x0288, 282, /* ʈ Ʈ */ - 0x0292, 281, /* ʒ Ʒ */ - 0x03ac, 462, /* ά Ά */ - 0x03cc, 436, /* ό Ό */ - 0x03d0, 438, /* ϐ Β */ - 0x03d1, 443, /* ϑ Θ */ - 0x03d5, 453, /* ϕ Φ */ - 0x03d6, 446, /* ϖ Π */ - 0x03e3, 499, /* ϣ Ϣ */ - 0x03e5, 499, /* ϥ Ϥ */ - 0x03e7, 499, /* ϧ Ϧ */ - 0x03e9, 499, /* ϩ Ϩ */ - 0x03eb, 499, /* ϫ Ϫ */ - 0x03ed, 499, /* ϭ Ϭ */ - 0x03ef, 499, /* ϯ Ϯ */ - 0x03f0, 414, /* ϰ Κ */ - 0x03f1, 420, /* ϱ Ρ */ - 0x0461, 499, /* ѡ Ѡ */ - 0x0463, 499, /* ѣ Ѣ */ - 0x0465, 499, /* ѥ Ѥ */ - 0x0467, 499, /* ѧ Ѧ */ - 0x0469, 499, /* ѩ Ѩ */ - 0x046b, 499, /* ѫ Ѫ */ - 0x046d, 499, /* ѭ Ѭ */ - 0x046f, 499, /* ѯ Ѯ */ - 0x0471, 499, /* ѱ Ѱ */ - 0x0473, 499, /* ѳ Ѳ */ - 0x0475, 499, /* ѵ Ѵ */ - 0x0477, 499, /* ѷ Ѷ */ - 0x0479, 499, /* ѹ Ѹ */ - 0x047b, 499, /* ѻ Ѻ */ - 0x047d, 499, /* ѽ Ѽ */ - 0x047f, 499, /* ѿ Ѿ */ - 0x0481, 499, /* ҁ Ҁ */ - 0x0491, 499, /* ґ Ґ */ - 0x0493, 499, /* ғ Ғ */ - 0x0495, 499, /* ҕ Ҕ */ - 0x0497, 499, /* җ Җ */ - 0x0499, 499, /* ҙ Ҙ */ - 0x049b, 499, /* қ Қ */ - 0x049d, 499, /* ҝ Ҝ */ - 0x049f, 499, /* ҟ Ҟ */ - 0x04a1, 499, /* ҡ Ҡ */ - 0x04a3, 499, /* ң Ң */ - 0x04a5, 499, /* ҥ Ҥ */ - 0x04a7, 499, /* ҧ Ҧ */ - 0x04a9, 499, /* ҩ Ҩ */ - 0x04ab, 499, /* ҫ Ҫ */ - 0x04ad, 499, /* ҭ Ҭ */ - 0x04af, 499, /* ү Ү */ - 0x04b1, 499, /* ұ Ұ */ - 0x04b3, 499, /* ҳ Ҳ */ - 0x04b5, 499, /* ҵ Ҵ */ - 0x04b7, 499, /* ҷ Ҷ */ - 0x04b9, 499, /* ҹ Ҹ */ - 0x04bb, 499, /* һ Һ */ - 0x04bd, 499, /* ҽ Ҽ */ - 0x04bf, 499, /* ҿ Ҿ */ - 0x04c2, 499, /* ӂ Ӂ */ - 0x04c4, 499, /* ӄ Ӄ */ - 0x04c8, 499, /* ӈ Ӈ */ - 0x04cc, 499, /* ӌ Ӌ */ - 0x04d1, 499, /* ӑ Ӑ */ - 0x04d3, 499, /* ӓ Ӓ */ - 0x04d5, 499, /* ӕ Ӕ */ - 0x04d7, 499, /* ӗ Ӗ */ - 0x04d9, 499, /* ә Ә */ - 0x04db, 499, /* ӛ Ӛ */ - 0x04dd, 499, /* ӝ Ӝ */ - 0x04df, 499, /* ӟ Ӟ */ - 0x04e1, 499, /* ӡ Ӡ */ - 0x04e3, 499, /* ӣ Ӣ */ - 0x04e5, 499, /* ӥ Ӥ */ - 0x04e7, 499, /* ӧ Ӧ */ - 0x04e9, 499, /* ө Ө */ - 0x04eb, 499, /* ӫ Ӫ */ - 0x04ef, 499, /* ӯ Ӯ */ - 0x04f1, 499, /* ӱ Ӱ */ - 0x04f3, 499, /* ӳ Ӳ */ - 0x04f5, 499, /* ӵ Ӵ */ - 0x04f9, 499, /* ӹ Ӹ */ - 0x1e01, 499, /* ḁ Ḁ */ - 0x1e03, 499, /* ḃ Ḃ */ - 0x1e05, 499, /* ḅ Ḅ */ - 0x1e07, 499, /* ḇ Ḇ */ - 0x1e09, 499, /* ḉ Ḉ */ - 0x1e0b, 499, /* ḋ Ḋ */ - 0x1e0d, 499, /* ḍ Ḍ */ - 0x1e0f, 499, /* ḏ Ḏ */ - 0x1e11, 499, /* ḑ Ḑ */ - 0x1e13, 499, /* ḓ Ḓ */ - 0x1e15, 499, /* ḕ Ḕ */ - 0x1e17, 499, /* ḗ Ḗ */ - 0x1e19, 499, /* ḙ Ḙ */ - 0x1e1b, 499, /* ḛ Ḛ */ - 0x1e1d, 499, /* ḝ Ḝ */ - 0x1e1f, 499, /* ḟ Ḟ */ - 0x1e21, 499, /* ḡ Ḡ */ - 0x1e23, 499, /* ḣ Ḣ */ - 0x1e25, 499, /* ḥ Ḥ */ - 0x1e27, 499, /* ḧ Ḧ */ - 0x1e29, 499, /* ḩ Ḩ */ - 0x1e2b, 499, /* ḫ Ḫ */ - 0x1e2d, 499, /* ḭ Ḭ */ - 0x1e2f, 499, /* ḯ Ḯ */ - 0x1e31, 499, /* ḱ Ḱ */ - 0x1e33, 499, /* ḳ Ḳ */ - 0x1e35, 499, /* ḵ Ḵ */ - 0x1e37, 499, /* ḷ Ḷ */ - 0x1e39, 499, /* ḹ Ḹ */ - 0x1e3b, 499, /* ḻ Ḻ */ - 0x1e3d, 499, /* ḽ Ḽ */ - 0x1e3f, 499, /* ḿ Ḿ */ - 0x1e41, 499, /* ṁ Ṁ */ - 0x1e43, 499, /* ṃ Ṃ */ - 0x1e45, 499, /* ṅ Ṅ */ - 0x1e47, 499, /* ṇ Ṇ */ - 0x1e49, 499, /* ṉ Ṉ */ - 0x1e4b, 499, /* ṋ Ṋ */ - 0x1e4d, 499, /* ṍ Ṍ */ - 0x1e4f, 499, /* ṏ Ṏ */ - 0x1e51, 499, /* ṑ Ṑ */ - 0x1e53, 499, /* ṓ Ṓ */ - 0x1e55, 499, /* ṕ Ṕ */ - 0x1e57, 499, /* ṗ Ṗ */ - 0x1e59, 499, /* ṙ Ṙ */ - 0x1e5b, 499, /* ṛ Ṛ */ - 0x1e5d, 499, /* ṝ Ṝ */ - 0x1e5f, 499, /* ṟ Ṟ */ - 0x1e61, 499, /* ṡ Ṡ */ - 0x1e63, 499, /* ṣ Ṣ */ - 0x1e65, 499, /* ṥ Ṥ */ - 0x1e67, 499, /* ṧ Ṧ */ - 0x1e69, 499, /* ṩ Ṩ */ - 0x1e6b, 499, /* ṫ Ṫ */ - 0x1e6d, 499, /* ṭ Ṭ */ - 0x1e6f, 499, /* ṯ Ṯ */ - 0x1e71, 499, /* ṱ Ṱ */ - 0x1e73, 499, /* ṳ Ṳ */ - 0x1e75, 499, /* ṵ Ṵ */ - 0x1e77, 499, /* ṷ Ṷ */ - 0x1e79, 499, /* ṹ Ṹ */ - 0x1e7b, 499, /* ṻ Ṻ */ - 0x1e7d, 499, /* ṽ Ṽ */ - 0x1e7f, 499, /* ṿ Ṿ */ - 0x1e81, 499, /* ẁ Ẁ */ - 0x1e83, 499, /* ẃ Ẃ */ - 0x1e85, 499, /* ẅ Ẅ */ - 0x1e87, 499, /* ẇ Ẇ */ - 0x1e89, 499, /* ẉ Ẉ */ - 0x1e8b, 499, /* ẋ Ẋ */ - 0x1e8d, 499, /* ẍ Ẍ */ - 0x1e8f, 499, /* ẏ Ẏ */ - 0x1e91, 499, /* ẑ Ẑ */ - 0x1e93, 499, /* ẓ Ẓ */ - 0x1e95, 499, /* ẕ Ẕ */ - 0x1ea1, 499, /* ạ Ạ */ - 0x1ea3, 499, /* ả Ả */ - 0x1ea5, 499, /* ấ Ấ */ - 0x1ea7, 499, /* ầ Ầ */ - 0x1ea9, 499, /* ẩ Ẩ */ - 0x1eab, 499, /* ẫ Ẫ */ - 0x1ead, 499, /* ậ Ậ */ - 0x1eaf, 499, /* ắ Ắ */ - 0x1eb1, 499, /* ằ Ằ */ - 0x1eb3, 499, /* ẳ Ẳ */ - 0x1eb5, 499, /* ẵ Ẵ */ - 0x1eb7, 499, /* ặ Ặ */ - 0x1eb9, 499, /* ẹ Ẹ */ - 0x1ebb, 499, /* ẻ Ẻ */ - 0x1ebd, 499, /* ẽ Ẽ */ - 0x1ebf, 499, /* ế Ế */ - 0x1ec1, 499, /* ề Ề */ - 0x1ec3, 499, /* ể Ể */ - 0x1ec5, 499, /* ễ Ễ */ - 0x1ec7, 499, /* ệ Ệ */ - 0x1ec9, 499, /* ỉ Ỉ */ - 0x1ecb, 499, /* ị Ị */ - 0x1ecd, 499, /* ọ Ọ */ - 0x1ecf, 499, /* ỏ Ỏ */ - 0x1ed1, 499, /* ố Ố */ - 0x1ed3, 499, /* ồ Ồ */ - 0x1ed5, 499, /* ổ Ổ */ - 0x1ed7, 499, /* ỗ Ỗ */ - 0x1ed9, 499, /* ộ Ộ */ - 0x1edb, 499, /* ớ Ớ */ - 0x1edd, 499, /* ờ Ờ */ - 0x1edf, 499, /* ở Ở */ - 0x1ee1, 499, /* ỡ Ỡ */ - 0x1ee3, 499, /* ợ Ợ */ - 0x1ee5, 499, /* ụ Ụ */ - 0x1ee7, 499, /* ủ Ủ */ - 0x1ee9, 499, /* ứ Ứ */ - 0x1eeb, 499, /* ừ Ừ */ - 0x1eed, 499, /* ử Ử */ - 0x1eef, 499, /* ữ Ữ */ - 0x1ef1, 499, /* ự Ự */ - 0x1ef3, 499, /* ỳ Ỳ */ - 0x1ef5, 499, /* ỵ Ỵ */ - 0x1ef7, 499, /* ỷ Ỷ */ - 0x1ef9, 499, /* ỹ Ỹ */ - 0x1f51, 508, /* ὑ Ὑ */ - 0x1f53, 508, /* ὓ Ὓ */ - 0x1f55, 508, /* ὕ Ὕ */ - 0x1f57, 508, /* ὗ Ὗ */ - 0x1fb3, 509, /* ᾳ ᾼ */ - 0x1fc3, 509, /* ῃ ῌ */ - 0x1fe5, 507, /* ῥ Ῥ */ - 0x1ff3, 509, /* ῳ ῼ */ -}; - -/* - * upper case ranges - * 3rd col is conversion excess 500 - */ -static -Rune __tolower2[] = -{ - 0x0041, 0x005a, 532, /* A-Z a-z */ - 0x00c0, 0x00d6, 532, /* À-Ö à-ö */ - 0x00d8, 0x00de, 532, /* Ø-Þ ø-þ */ - 0x0189, 0x018a, 705, /* Ɖ-Ɗ ɖ-ɗ */ - 0x018e, 0x018f, 702, /* Ǝ-Ə ɘ-ə */ - 0x01b1, 0x01b2, 717, /* Ʊ-Ʋ ʊ-ʋ */ - 0x0388, 0x038a, 537, /* Έ-Ί έ-ί */ - 0x038e, 0x038f, 563, /* Ύ-Ώ ύ-ώ */ - 0x0391, 0x03a1, 532, /* Α-Ρ α-ρ */ - 0x03a3, 0x03ab, 532, /* Σ-Ϋ σ-ϋ */ - 0x0401, 0x040c, 580, /* Ё-Ќ ё-ќ */ - 0x040e, 0x040f, 580, /* Ў-Џ ў-џ */ - 0x0410, 0x042f, 532, /* А-Я а-я */ - 0x0531, 0x0556, 548, /* Ա-Ֆ ա-ֆ */ - 0x10a0, 0x10c5, 548, /* Ⴀ-Ⴥ ა-ჵ */ - 0x1f08, 0x1f0f, 492, /* Ἀ-Ἇ ἀ-ἇ */ - 0x1f18, 0x1f1d, 492, /* Ἐ-Ἕ ἐ-ἕ */ - 0x1f28, 0x1f2f, 492, /* Ἠ-Ἧ ἠ-ἧ */ - 0x1f38, 0x1f3f, 492, /* Ἰ-Ἷ ἰ-ἷ */ - 0x1f48, 0x1f4d, 492, /* Ὀ-Ὅ ὀ-ὅ */ - 0x1f68, 0x1f6f, 492, /* Ὠ-Ὧ ὠ-ὧ */ - 0x1f88, 0x1f8f, 492, /* ᾈ-ᾏ ᾀ-ᾇ */ - 0x1f98, 0x1f9f, 492, /* ᾘ-ᾟ ᾐ-ᾗ */ - 0x1fa8, 0x1faf, 492, /* ᾨ-ᾯ ᾠ-ᾧ */ - 0x1fb8, 0x1fb9, 492, /* Ᾰ-Ᾱ ᾰ-ᾱ */ - 0x1fba, 0x1fbb, 426, /* Ὰ-Ά ὰ-ά */ - 0x1fc8, 0x1fcb, 414, /* Ὲ-Ή ὲ-ή */ - 0x1fd8, 0x1fd9, 492, /* Ῐ-Ῑ ῐ-ῑ */ - 0x1fda, 0x1fdb, 400, /* Ὶ-Ί ὶ-ί */ - 0x1fe8, 0x1fe9, 492, /* Ῠ-Ῡ ῠ-ῡ */ - 0x1fea, 0x1feb, 388, /* Ὺ-Ύ ὺ-ύ */ - 0x1ff8, 0x1ff9, 372, /* Ὸ-Ό ὸ-ό */ - 0x1ffa, 0x1ffb, 374, /* Ὼ-Ώ ὼ-ώ */ - 0x2160, 0x216f, 516, /* Ⅰ-Ⅿ ⅰ-ⅿ */ - 0x24b6, 0x24cf, 526, /* Ⓐ-Ⓩ ⓐ-ⓩ */ - 0xff21, 0xff3a, 532, /* A-Z a-z */ -}; - -/* - * upper case singlets - * 2nd col is conversion excess 500 - */ static -Rune __tolower1[] = -{ - 0x0100, 501, /* Ā ā */ - 0x0102, 501, /* Ă ă */ - 0x0104, 501, /* Ą ą */ - 0x0106, 501, /* Ć ć */ - 0x0108, 501, /* Ĉ ĉ */ - 0x010a, 501, /* Ċ ċ */ - 0x010c, 501, /* Č č */ - 0x010e, 501, /* Ď ď */ - 0x0110, 501, /* Đ đ */ - 0x0112, 501, /* Ē ē */ - 0x0114, 501, /* Ĕ ĕ */ - 0x0116, 501, /* Ė ė */ - 0x0118, 501, /* Ę ę */ - 0x011a, 501, /* Ě ě */ - 0x011c, 501, /* Ĝ ĝ */ - 0x011e, 501, /* Ğ ğ */ - 0x0120, 501, /* Ġ ġ */ - 0x0122, 501, /* Ģ ģ */ - 0x0124, 501, /* Ĥ ĥ */ - 0x0126, 501, /* Ħ ħ */ - 0x0128, 501, /* Ĩ ĩ */ - 0x012a, 501, /* Ī ī */ - 0x012c, 501, /* Ĭ ĭ */ - 0x012e, 501, /* Į į */ - 0x0130, 301, /* İ i */ - 0x0132, 501, /* IJ ij */ - 0x0134, 501, /* Ĵ ĵ */ - 0x0136, 501, /* Ķ ķ */ - 0x0139, 501, /* Ĺ ĺ */ - 0x013b, 501, /* Ļ ļ */ - 0x013d, 501, /* Ľ ľ */ - 0x013f, 501, /* Ŀ ŀ */ - 0x0141, 501, /* Ł ł */ - 0x0143, 501, /* Ń ń */ - 0x0145, 501, /* Ņ ņ */ - 0x0147, 501, /* Ň ň */ - 0x014a, 501, /* Ŋ ŋ */ - 0x014c, 501, /* Ō ō */ - 0x014e, 501, /* Ŏ ŏ */ - 0x0150, 501, /* Ő ő */ - 0x0152, 501, /* Œ œ */ - 0x0154, 501, /* Ŕ ŕ */ - 0x0156, 501, /* Ŗ ŗ */ - 0x0158, 501, /* Ř ř */ - 0x015a, 501, /* Ś ś */ - 0x015c, 501, /* Ŝ ŝ */ - 0x015e, 501, /* Ş ş */ - 0x0160, 501, /* Š š */ - 0x0162, 501, /* Ţ ţ */ - 0x0164, 501, /* Ť ť */ - 0x0166, 501, /* Ŧ ŧ */ - 0x0168, 501, /* Ũ ũ */ - 0x016a, 501, /* Ū ū */ - 0x016c, 501, /* Ŭ ŭ */ - 0x016e, 501, /* Ů ů */ - 0x0170, 501, /* Ű ű */ - 0x0172, 501, /* Ų ų */ - 0x0174, 501, /* Ŵ ŵ */ - 0x0176, 501, /* Ŷ ŷ */ - 0x0178, 379, /* Ÿ ÿ */ - 0x0179, 501, /* Ź ź */ - 0x017b, 501, /* Ż ż */ - 0x017d, 501, /* Ž ž */ - 0x0181, 710, /* Ɓ ɓ */ - 0x0182, 501, /* Ƃ ƃ */ - 0x0184, 501, /* Ƅ ƅ */ - 0x0186, 706, /* Ɔ ɔ */ - 0x0187, 501, /* Ƈ ƈ */ - 0x018b, 501, /* Ƌ ƌ */ - 0x0190, 703, /* Ɛ ɛ */ - 0x0191, 501, /* Ƒ ƒ */ - 0x0193, 705, /* Ɠ ɠ */ - 0x0194, 707, /* Ɣ ɣ */ - 0x0196, 711, /* Ɩ ɩ */ - 0x0197, 709, /* Ɨ ɨ */ - 0x0198, 501, /* Ƙ ƙ */ - 0x019c, 711, /* Ɯ ɯ */ - 0x019d, 713, /* Ɲ ɲ */ - 0x01a0, 501, /* Ơ ơ */ - 0x01a2, 501, /* Ƣ ƣ */ - 0x01a4, 501, /* Ƥ ƥ */ - 0x01a7, 501, /* Ƨ ƨ */ - 0x01a9, 718, /* Ʃ ʃ */ - 0x01ac, 501, /* Ƭ ƭ */ - 0x01ae, 718, /* Ʈ ʈ */ - 0x01af, 501, /* Ư ư */ - 0x01b3, 501, /* Ƴ ƴ */ - 0x01b5, 501, /* Ƶ ƶ */ - 0x01b7, 719, /* Ʒ ʒ */ - 0x01b8, 501, /* Ƹ ƹ */ - 0x01bc, 501, /* Ƽ ƽ */ - 0x01c4, 502, /* DŽ dž */ - 0x01c5, 501, /* Dž dž */ - 0x01c7, 502, /* LJ lj */ - 0x01c8, 501, /* Lj lj */ - 0x01ca, 502, /* NJ nj */ - 0x01cb, 501, /* Nj nj */ - 0x01cd, 501, /* Ǎ ǎ */ - 0x01cf, 501, /* Ǐ ǐ */ - 0x01d1, 501, /* Ǒ ǒ */ - 0x01d3, 501, /* Ǔ ǔ */ - 0x01d5, 501, /* Ǖ ǖ */ - 0x01d7, 501, /* Ǘ ǘ */ - 0x01d9, 501, /* Ǚ ǚ */ - 0x01db, 501, /* Ǜ ǜ */ - 0x01de, 501, /* Ǟ ǟ */ - 0x01e0, 501, /* Ǡ ǡ */ - 0x01e2, 501, /* Ǣ ǣ */ - 0x01e4, 501, /* Ǥ ǥ */ - 0x01e6, 501, /* Ǧ ǧ */ - 0x01e8, 501, /* Ǩ ǩ */ - 0x01ea, 501, /* Ǫ ǫ */ - 0x01ec, 501, /* Ǭ ǭ */ - 0x01ee, 501, /* Ǯ ǯ */ - 0x01f1, 502, /* DZ dz */ - 0x01f2, 501, /* Dz dz */ - 0x01f4, 501, /* Ǵ ǵ */ - 0x01fa, 501, /* Ǻ ǻ */ - 0x01fc, 501, /* Ǽ ǽ */ - 0x01fe, 501, /* Ǿ ǿ */ - 0x0200, 501, /* Ȁ ȁ */ - 0x0202, 501, /* Ȃ ȃ */ - 0x0204, 501, /* Ȅ ȅ */ - 0x0206, 501, /* Ȇ ȇ */ - 0x0208, 501, /* Ȉ ȉ */ - 0x020a, 501, /* Ȋ ȋ */ - 0x020c, 501, /* Ȍ ȍ */ - 0x020e, 501, /* Ȏ ȏ */ - 0x0210, 501, /* Ȑ ȑ */ - 0x0212, 501, /* Ȓ ȓ */ - 0x0214, 501, /* Ȕ ȕ */ - 0x0216, 501, /* Ȗ ȗ */ - 0x0386, 538, /* Ά ά */ - 0x038c, 564, /* Ό ό */ - 0x03e2, 501, /* Ϣ ϣ */ - 0x03e4, 501, /* Ϥ ϥ */ - 0x03e6, 501, /* Ϧ ϧ */ - 0x03e8, 501, /* Ϩ ϩ */ - 0x03ea, 501, /* Ϫ ϫ */ - 0x03ec, 501, /* Ϭ ϭ */ - 0x03ee, 501, /* Ϯ ϯ */ - 0x0460, 501, /* Ѡ ѡ */ - 0x0462, 501, /* Ѣ ѣ */ - 0x0464, 501, /* Ѥ ѥ */ - 0x0466, 501, /* Ѧ ѧ */ - 0x0468, 501, /* Ѩ ѩ */ - 0x046a, 501, /* Ѫ ѫ */ - 0x046c, 501, /* Ѭ ѭ */ - 0x046e, 501, /* Ѯ ѯ */ - 0x0470, 501, /* Ѱ ѱ */ - 0x0472, 501, /* Ѳ ѳ */ - 0x0474, 501, /* Ѵ ѵ */ - 0x0476, 501, /* Ѷ ѷ */ - 0x0478, 501, /* Ѹ ѹ */ - 0x047a, 501, /* Ѻ ѻ */ - 0x047c, 501, /* Ѽ ѽ */ - 0x047e, 501, /* Ѿ ѿ */ - 0x0480, 501, /* Ҁ ҁ */ - 0x0490, 501, /* Ґ ґ */ - 0x0492, 501, /* Ғ ғ */ - 0x0494, 501, /* Ҕ ҕ */ - 0x0496, 501, /* Җ җ */ - 0x0498, 501, /* Ҙ ҙ */ - 0x049a, 501, /* Қ қ */ - 0x049c, 501, /* Ҝ ҝ */ - 0x049e, 501, /* Ҟ ҟ */ - 0x04a0, 501, /* Ҡ ҡ */ - 0x04a2, 501, /* Ң ң */ - 0x04a4, 501, /* Ҥ ҥ */ - 0x04a6, 501, /* Ҧ ҧ */ - 0x04a8, 501, /* Ҩ ҩ */ - 0x04aa, 501, /* Ҫ ҫ */ - 0x04ac, 501, /* Ҭ ҭ */ - 0x04ae, 501, /* Ү ү */ - 0x04b0, 501, /* Ұ ұ */ - 0x04b2, 501, /* Ҳ ҳ */ - 0x04b4, 501, /* Ҵ ҵ */ - 0x04b6, 501, /* Ҷ ҷ */ - 0x04b8, 501, /* Ҹ ҹ */ - 0x04ba, 501, /* Һ һ */ - 0x04bc, 501, /* Ҽ ҽ */ - 0x04be, 501, /* Ҿ ҿ */ - 0x04c1, 501, /* Ӂ ӂ */ - 0x04c3, 501, /* Ӄ ӄ */ - 0x04c7, 501, /* Ӈ ӈ */ - 0x04cb, 501, /* Ӌ ӌ */ - 0x04d0, 501, /* Ӑ ӑ */ - 0x04d2, 501, /* Ӓ ӓ */ - 0x04d4, 501, /* Ӕ ӕ */ - 0x04d6, 501, /* Ӗ ӗ */ - 0x04d8, 501, /* Ә ә */ - 0x04da, 501, /* Ӛ ӛ */ - 0x04dc, 501, /* Ӝ ӝ */ - 0x04de, 501, /* Ӟ ӟ */ - 0x04e0, 501, /* Ӡ ӡ */ - 0x04e2, 501, /* Ӣ ӣ */ - 0x04e4, 501, /* Ӥ ӥ */ - 0x04e6, 501, /* Ӧ ӧ */ - 0x04e8, 501, /* Ө ө */ - 0x04ea, 501, /* Ӫ ӫ */ - 0x04ee, 501, /* Ӯ ӯ */ - 0x04f0, 501, /* Ӱ ӱ */ - 0x04f2, 501, /* Ӳ ӳ */ - 0x04f4, 501, /* Ӵ ӵ */ - 0x04f8, 501, /* Ӹ ӹ */ - 0x1e00, 501, /* Ḁ ḁ */ - 0x1e02, 501, /* Ḃ ḃ */ - 0x1e04, 501, /* Ḅ ḅ */ - 0x1e06, 501, /* Ḇ ḇ */ - 0x1e08, 501, /* Ḉ ḉ */ - 0x1e0a, 501, /* Ḋ ḋ */ - 0x1e0c, 501, /* Ḍ ḍ */ - 0x1e0e, 501, /* Ḏ ḏ */ - 0x1e10, 501, /* Ḑ ḑ */ - 0x1e12, 501, /* Ḓ ḓ */ - 0x1e14, 501, /* Ḕ ḕ */ - 0x1e16, 501, /* Ḗ ḗ */ - 0x1e18, 501, /* Ḙ ḙ */ - 0x1e1a, 501, /* Ḛ ḛ */ - 0x1e1c, 501, /* Ḝ ḝ */ - 0x1e1e, 501, /* Ḟ ḟ */ - 0x1e20, 501, /* Ḡ ḡ */ - 0x1e22, 501, /* Ḣ ḣ */ - 0x1e24, 501, /* Ḥ ḥ */ - 0x1e26, 501, /* Ḧ ḧ */ - 0x1e28, 501, /* Ḩ ḩ */ - 0x1e2a, 501, /* Ḫ ḫ */ - 0x1e2c, 501, /* Ḭ ḭ */ - 0x1e2e, 501, /* Ḯ ḯ */ - 0x1e30, 501, /* Ḱ ḱ */ - 0x1e32, 501, /* Ḳ ḳ */ - 0x1e34, 501, /* Ḵ ḵ */ - 0x1e36, 501, /* Ḷ ḷ */ - 0x1e38, 501, /* Ḹ ḹ */ - 0x1e3a, 501, /* Ḻ ḻ */ - 0x1e3c, 501, /* Ḽ ḽ */ - 0x1e3e, 501, /* Ḿ ḿ */ - 0x1e40, 501, /* Ṁ ṁ */ - 0x1e42, 501, /* Ṃ ṃ */ - 0x1e44, 501, /* Ṅ ṅ */ - 0x1e46, 501, /* Ṇ ṇ */ - 0x1e48, 501, /* Ṉ ṉ */ - 0x1e4a, 501, /* Ṋ ṋ */ - 0x1e4c, 501, /* Ṍ ṍ */ - 0x1e4e, 501, /* Ṏ ṏ */ - 0x1e50, 501, /* Ṑ ṑ */ - 0x1e52, 501, /* Ṓ ṓ */ - 0x1e54, 501, /* Ṕ ṕ */ - 0x1e56, 501, /* Ṗ ṗ */ - 0x1e58, 501, /* Ṙ ṙ */ - 0x1e5a, 501, /* Ṛ ṛ */ - 0x1e5c, 501, /* Ṝ ṝ */ - 0x1e5e, 501, /* Ṟ ṟ */ - 0x1e60, 501, /* Ṡ ṡ */ - 0x1e62, 501, /* Ṣ ṣ */ - 0x1e64, 501, /* Ṥ ṥ */ - 0x1e66, 501, /* Ṧ ṧ */ - 0x1e68, 501, /* Ṩ ṩ */ - 0x1e6a, 501, /* Ṫ ṫ */ - 0x1e6c, 501, /* Ṭ ṭ */ - 0x1e6e, 501, /* Ṯ ṯ */ - 0x1e70, 501, /* Ṱ ṱ */ - 0x1e72, 501, /* Ṳ ṳ */ - 0x1e74, 501, /* Ṵ ṵ */ - 0x1e76, 501, /* Ṷ ṷ */ - 0x1e78, 501, /* Ṹ ṹ */ - 0x1e7a, 501, /* Ṻ ṻ */ - 0x1e7c, 501, /* Ṽ ṽ */ - 0x1e7e, 501, /* Ṿ ṿ */ - 0x1e80, 501, /* Ẁ ẁ */ - 0x1e82, 501, /* Ẃ ẃ */ - 0x1e84, 501, /* Ẅ ẅ */ - 0x1e86, 501, /* Ẇ ẇ */ - 0x1e88, 501, /* Ẉ ẉ */ - 0x1e8a, 501, /* Ẋ ẋ */ - 0x1e8c, 501, /* Ẍ ẍ */ - 0x1e8e, 501, /* Ẏ ẏ */ - 0x1e90, 501, /* Ẑ ẑ */ - 0x1e92, 501, /* Ẓ ẓ */ - 0x1e94, 501, /* Ẕ ẕ */ - 0x1ea0, 501, /* Ạ ạ */ - 0x1ea2, 501, /* Ả ả */ - 0x1ea4, 501, /* Ấ ấ */ - 0x1ea6, 501, /* Ầ ầ */ - 0x1ea8, 501, /* Ẩ ẩ */ - 0x1eaa, 501, /* Ẫ ẫ */ - 0x1eac, 501, /* Ậ ậ */ - 0x1eae, 501, /* Ắ ắ */ - 0x1eb0, 501, /* Ằ ằ */ - 0x1eb2, 501, /* Ẳ ẳ */ - 0x1eb4, 501, /* Ẵ ẵ */ - 0x1eb6, 501, /* Ặ ặ */ - 0x1eb8, 501, /* Ẹ ẹ */ - 0x1eba, 501, /* Ẻ ẻ */ - 0x1ebc, 501, /* Ẽ ẽ */ - 0x1ebe, 501, /* Ế ế */ - 0x1ec0, 501, /* Ề ề */ - 0x1ec2, 501, /* Ể ể */ - 0x1ec4, 501, /* Ễ ễ */ - 0x1ec6, 501, /* Ệ ệ */ - 0x1ec8, 501, /* Ỉ ỉ */ - 0x1eca, 501, /* Ị ị */ - 0x1ecc, 501, /* Ọ ọ */ - 0x1ece, 501, /* Ỏ ỏ */ - 0x1ed0, 501, /* Ố ố */ - 0x1ed2, 501, /* Ồ ồ */ - 0x1ed4, 501, /* Ổ ổ */ - 0x1ed6, 501, /* Ỗ ỗ */ - 0x1ed8, 501, /* Ộ ộ */ - 0x1eda, 501, /* Ớ ớ */ - 0x1edc, 501, /* Ờ ờ */ - 0x1ede, 501, /* Ở ở */ - 0x1ee0, 501, /* Ỡ ỡ */ - 0x1ee2, 501, /* Ợ ợ */ - 0x1ee4, 501, /* Ụ ụ */ - 0x1ee6, 501, /* Ủ ủ */ - 0x1ee8, 501, /* Ứ ứ */ - 0x1eea, 501, /* Ừ ừ */ - 0x1eec, 501, /* Ử ử */ - 0x1eee, 501, /* Ữ ữ */ - 0x1ef0, 501, /* Ự ự */ - 0x1ef2, 501, /* Ỳ ỳ */ - 0x1ef4, 501, /* Ỵ ỵ */ - 0x1ef6, 501, /* Ỷ ỷ */ - 0x1ef8, 501, /* Ỹ ỹ */ - 0x1f59, 492, /* Ὑ ὑ */ - 0x1f5b, 492, /* Ὓ ὓ */ - 0x1f5d, 492, /* Ὕ ὕ */ - 0x1f5f, 492, /* Ὗ ὗ */ - 0x1fbc, 491, /* ᾼ ᾳ */ - 0x1fcc, 491, /* ῌ ῃ */ - 0x1fec, 493, /* Ῥ ῥ */ - 0x1ffc, 491, /* ῼ ῳ */ -}; - -/* - * title characters are those between - * upper and lower case. ie DZ Dz dz - */ -static -Rune __totitle1[] = -{ - 0x01c4, 501, /* DŽ Dž */ - 0x01c6, 499, /* dž Dž */ - 0x01c7, 501, /* LJ Lj */ - 0x01c9, 499, /* lj Lj */ - 0x01ca, 501, /* NJ Nj */ - 0x01cc, 499, /* nj Nj */ - 0x01f1, 501, /* DZ Dz */ - 0x01f3, 499, /* dz Dz */ -}; - -static Rune* -bsearch(Rune c, Rune *t, int n, int ne) +Rune* +rbsearch(Rune c, Rune *t, int n, int ne) { Rune *p; int m; while(n > 1) { - m = n/2; + m = n >> 1; p = t + m*ne; if(c >= p[0]) { t = p; @@ -1050,102 +35,36 @@ bsearch(Rune c, Rune *t, int n, int ne) return 0; } -Rune -tolowerrune(Rune c) -{ - Rune *p; - - p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3); - if(p && c >= p[0] && c <= p[1]) - return c + p[2] - 500; - p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2); - if(p && c == p[0]) - return c + p[1] - 500; - return c; -} - -Rune -toupperrune(Rune c) -{ - Rune *p; - - p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3); - if(p && c >= p[0] && c <= p[1]) - return c + p[2] - 500; - p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2); - if(p && c == p[0]) - return c + p[1] - 500; - return c; -} - -Rune -totitlerune(Rune c) -{ - Rune *p; - - p = bsearch(c, __totitle1, nelem(__totitle1)/2, 2); - if(p && c == p[0]) - return c + p[1] - 500; - return c; -} - -int -islowerrune(Rune c) -{ - Rune *p; - - p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3); - if(p && c >= p[0] && c <= p[1]) - return 1; - p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2); - if(p && c == p[0]) - return 1; - return 0; -} - -int -isupperrune(Rune c) -{ - Rune *p; - - p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3); - if(p && c >= p[0] && c <= p[1]) - return 1; - p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2); - if(p && c == p[0]) - return 1; - return 0; -} +/* + * The "ideographic" property is hard to extract from UnicodeData.txt, + * so it is hard coded here. + * + * It is defined in the Unicode PropList.txt file, for example + * PropList-3.0.0.txt. Unlike the UnicodeData.txt file, the format of + * PropList changes between versions. This property appears relatively static; + * it is the same in version 4.0.1, except that version defines some >16 bit + * chars as ideographic as well: 20000..2a6d6, and 2f800..2Fa1d. + */ +static Rune __isideographicr[] = { + 0x3006, 0x3007, /* 3006 not in Unicode 2, in 2.1 */ + 0x3021, 0x3029, + 0x3038, 0x303a, /* not in Unicode 2 or 2.1 */ + 0x3400, 0x4db5, /* not in Unicode 2 or 2.1 */ + 0x4e00, 0x9fbb, /* 0x9FA6..0x9FBB added for 4.1.0? */ + 0xf900, 0xfa2d, + 0x20000, 0x2A6D6, + 0x2F800, 0x2FA1D, +}; int -isalpharune(Rune c) +isideographicrune(Rune c) { Rune *p; - if(isupperrune(c) || islowerrune(c)) - return 1; - p = bsearch(c, __alpha2, nelem(__alpha2)/2, 2); + p = rbsearch(c, __isideographicr, nelem(__isideographicr)/2, 2); if(p && c >= p[0] && c <= p[1]) return 1; - p = bsearch(c, __alpha1, nelem(__alpha1), 1); - if(p && c == p[0]) - return 1; return 0; } -int -istitlerune(Rune c) -{ - return isupperrune(c) && islowerrune(c); -} - -int -isspacerune(Rune c) -{ - Rune *p; - - p = bsearch(c, __space2, nelem(__space2)/2, 2); - if(p && c >= p[0] && c <= p[1]) - return 1; - return 0; -} +#include "runetypebody-5.0.0.h" diff --git a/runetypebody-5.0.0.h b/runetypebody-5.0.0.h new file mode 100644 index 0000000..67a645d --- /dev/null +++ b/runetypebody-5.0.0.h @@ -0,0 +1,1361 @@ +/* generated automatically by mkrunetype.c from UnicodeData-5.0.0.txt */ + +static Rune __isspacer[] = { + 0x0009, 0x000d, + 0x0020, 0x0020, + 0x0085, 0x0085, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000, + 0xfeff, 0xfeff, +}; + +int +isspacerune(Rune c) +{ + Rune *p; + + p = rbsearch(c, __isspacer, nelem(__isspacer)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + return 0; +} + +static Rune __isdigitr[] = { + 0x0030, 0x0039, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x07c0, 0x07c9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0x1b50, 0x1b59, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff, +}; + +int +isdigitrune(Rune c) +{ + Rune *p; + + p = rbsearch(c, __isdigitr, nelem(__isdigitr)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + return 0; +} + +static Rune __isalphar[] = { + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x037a, 0x037d, + 0x0388, 0x038a, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x048a, 0x0513, + 0x0531, 0x0556, + 0x0561, 0x0587, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0621, 0x063a, + 0x0640, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06e5, 0x06e6, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x0712, 0x072f, + 0x074d, 0x076d, + 0x0780, 0x07a5, + 0x07ca, 0x07ea, + 0x07f4, 0x07f5, + 0x0904, 0x0939, + 0x0958, 0x0961, + 0x097b, 0x097f, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b6, 0x09b9, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0ce0, 0x0ce1, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d60, 0x0d61, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e40, 0x0e46, + 0x0e81, 0x0e82, + 0x0e87, 0x0e88, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ec0, 0x0ec4, + 0x0edc, 0x0edd, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f88, 0x0f8b, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x1050, 0x1055, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x1820, 0x1877, + 0x1880, 0x18a8, + 0x1900, 0x191c, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x1b05, 0x1b33, + 0x1b45, 0x1b4b, + 0x1d00, 0x1dbf, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2090, 0x2094, + 0x210a, 0x2113, + 0x2119, 0x211d, + 0x212a, 0x212d, + 0x212f, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2183, 0x2184, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2c6c, + 0x2c74, 0x2c77, + 0x2c80, 0x2ce4, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3006, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa717, 0xa71a, + 0xa800, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xa840, 0xa873, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10340, + 0x10342, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x10900, 0x10915, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x12000, 0x1236e, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, +}; + +static Rune __isalphas[] = { + 0x00aa, + 0x00b5, + 0x00ba, + 0x02ee, + 0x0386, + 0x038c, + 0x0559, + 0x06d5, + 0x06ff, + 0x0710, + 0x07b1, + 0x07fa, + 0x093d, + 0x0950, + 0x09b2, + 0x09bd, + 0x09ce, + 0x0a5e, + 0x0abd, + 0x0ad0, + 0x0b3d, + 0x0b71, + 0x0b83, + 0x0b9c, + 0x0cbd, + 0x0cde, + 0x0dbd, + 0x0e84, + 0x0e8a, + 0x0e8d, + 0x0ea5, + 0x0ea7, + 0x0ebd, + 0x0ec6, + 0x0f00, + 0x10fc, + 0x1258, + 0x12c0, + 0x17d7, + 0x17dc, + 0x1f59, + 0x1f5b, + 0x1f5d, + 0x1fbe, + 0x2071, + 0x207f, + 0x2102, + 0x2107, + 0x2115, + 0x2124, + 0x2126, + 0x2128, + 0x214e, + 0x2d6f, + 0xfb1d, + 0xfb3e, + 0x10808, + 0x1083c, + 0x1083f, + 0x10a00, + 0x1d4a2, + 0x1d4bb, + 0x1d546, +}; + +int +isalpharune(Rune c) +{ + Rune *p; + + p = rbsearch(c, __isalphar, nelem(__isalphar)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = rbsearch(c, __isalphas, nelem(__isalphas), 1); + if(p && c == p[0]) + return 1; + return 0; +} + +static Rune __isupperr[] = { + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0178, 0x0179, + 0x0181, 0x0182, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a6, 0x01a7, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b7, 0x01b8, + 0x01f6, 0x01f8, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0243, 0x0246, + 0x0388, 0x038a, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03d2, 0x03d4, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x04c0, 0x04c1, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f68, 0x1f6f, + 0x1f88, 0x1f8f, + 0x1f98, 0x1f9f, + 0x1fa8, 0x1faf, + 0x1fb8, 0x1fbc, + 0x1fc8, 0x1fcc, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffc, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2119, 0x211d, + 0x212a, 0x212d, + 0x2130, 0x2133, + 0x213e, 0x213f, + 0x2160, 0x216f, + 0x24b6, 0x24cf, + 0x2c00, 0x2c2e, + 0x2c62, 0x2c64, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49e, 0x1d49f, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8, +}; + +static Rune __isupperp[] = { + 0x0100, 0x0136, + 0x0139, 0x0147, + 0x014a, 0x0176, + 0x017b, 0x017d, + 0x01a2, 0x01a4, + 0x01cd, 0x01db, + 0x01de, 0x01ee, + 0x01fa, 0x0232, + 0x0248, 0x024e, + 0x03d8, 0x03ee, + 0x0460, 0x0480, + 0x048a, 0x04be, + 0x04c3, 0x04cd, + 0x04d0, 0x0512, + 0x1e00, 0x1e94, + 0x1ea0, 0x1ef8, + 0x1f59, 0x1f5f, + 0x2124, 0x2128, + 0x2c67, 0x2c6b, + 0x2c80, 0x2ce2, +}; + +static Rune __isuppers[] = { + 0x0184, + 0x01a9, + 0x01ac, + 0x01b5, + 0x01bc, + 0x01c4, + 0x01c7, + 0x01ca, + 0x01f1, + 0x01f4, + 0x0241, + 0x0386, + 0x038c, + 0x03f4, + 0x03f7, + 0x2102, + 0x2107, + 0x2115, + 0x2145, + 0x2183, + 0x2c60, + 0x2c75, + 0x1d49c, + 0x1d4a2, + 0x1d546, + 0x1d7ca, +}; + +int +isupperrune(Rune c) +{ + Rune *p; + + p = rbsearch(c, __isupperr, nelem(__isupperr)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = rbsearch(c, __isupperp, nelem(__isupperp)/2, 2); + if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1)) + return 1; + p = rbsearch(c, __isuppers, nelem(__isuppers), 1); + if(p && c == p[0]) + return 1; + return 0; +} + +static Rune __islowerr[] = { + 0x0061, 0x007a, + 0x00df, 0x00f6, + 0x00f8, 0x00ff, + 0x0137, 0x0138, + 0x0148, 0x0149, + 0x017e, 0x0180, + 0x018c, 0x018d, + 0x0199, 0x019b, + 0x01aa, 0x01ab, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01dc, 0x01dd, + 0x01ef, 0x01f0, + 0x0233, 0x0239, + 0x023f, 0x0240, + 0x024f, 0x0293, + 0x0295, 0x02af, + 0x037b, 0x037d, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03ef, 0x03f3, + 0x03fb, 0x03fc, + 0x0430, 0x045f, + 0x04ce, 0x04cf, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d62, 0x1d77, + 0x1d79, 0x1d9a, + 0x1e95, 0x1e9b, + 0x1f00, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x210e, 0x210f, + 0x213c, 0x213d, + 0x2146, 0x2149, + 0x2170, 0x217f, + 0x24d0, 0x24e9, + 0x2c30, 0x2c5e, + 0x2c65, 0x2c66, + 0x2c76, 0x2c77, + 0x2ce3, 0x2ce4, + 0x2d00, 0x2d25, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a5, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, +}; + +static Rune __islowerp[] = { + 0x0101, 0x0135, + 0x013a, 0x0146, + 0x014b, 0x0177, + 0x017a, 0x017c, + 0x0183, 0x0185, + 0x01a1, 0x01a5, + 0x01b4, 0x01b6, + 0x01cc, 0x01da, + 0x01df, 0x01ed, + 0x01f3, 0x01f5, + 0x01f9, 0x0231, + 0x0247, 0x024d, + 0x03d9, 0x03ed, + 0x0461, 0x0481, + 0x048b, 0x04bf, + 0x04c2, 0x04cc, + 0x04d1, 0x0513, + 0x1e01, 0x1e93, + 0x1ea1, 0x1ef9, + 0x2c68, 0x2c6c, + 0x2c81, 0x2ce1, +}; + +static Rune __islowers[] = { + 0x00aa, + 0x00b5, + 0x00ba, + 0x0188, + 0x0192, + 0x0195, + 0x019e, + 0x01a8, + 0x01ad, + 0x01b0, + 0x01c6, + 0x01c9, + 0x023c, + 0x0242, + 0x0390, + 0x03f5, + 0x03f8, + 0x1fbe, + 0x2071, + 0x207f, + 0x210a, + 0x2113, + 0x212f, + 0x2134, + 0x2139, + 0x214e, + 0x2184, + 0x2c61, + 0x2c74, + 0x1d4bb, + 0x1d7cb, +}; + +int +islowerrune(Rune c) +{ + Rune *p; + + p = rbsearch(c, __islowerr, nelem(__islowerr)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = rbsearch(c, __islowerp, nelem(__islowerp)/2, 2); + if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1)) + return 1; + p = rbsearch(c, __islowers, nelem(__islowers), 1); + if(p && c == p[0]) + return 1; + return 0; +} + +static Rune __istitler[] = { + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0178, 0x0179, + 0x0181, 0x0182, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a6, 0x01a7, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b7, 0x01b8, + 0x01f6, 0x01f8, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0243, 0x0246, + 0x0388, 0x038a, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x04c0, 0x04c1, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f68, 0x1f6f, + 0x1f88, 0x1f8f, + 0x1f98, 0x1f9f, + 0x1fa8, 0x1faf, + 0x1fb8, 0x1fbc, + 0x1fc8, 0x1fcc, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffc, + 0x2160, 0x216f, + 0x24b6, 0x24cf, + 0x2c00, 0x2c2e, + 0x2c62, 0x2c64, + 0xff21, 0xff3a, + 0x10400, 0x10427, +}; + +static Rune __istitlep[] = { + 0x0100, 0x012e, + 0x0132, 0x0136, + 0x0139, 0x0147, + 0x014a, 0x0176, + 0x017b, 0x017d, + 0x01a2, 0x01a4, + 0x01cb, 0x01db, + 0x01de, 0x01ee, + 0x01f2, 0x01f4, + 0x01fa, 0x0232, + 0x0248, 0x024e, + 0x03d8, 0x03ee, + 0x0460, 0x0480, + 0x048a, 0x04be, + 0x04c3, 0x04cd, + 0x04d0, 0x0512, + 0x1e00, 0x1e94, + 0x1ea0, 0x1ef8, + 0x1f59, 0x1f5f, + 0x2c67, 0x2c6b, + 0x2c80, 0x2ce2, +}; + +static Rune __istitles[] = { + 0x0184, + 0x01a9, + 0x01ac, + 0x01b5, + 0x01bc, + 0x01c5, + 0x01c8, + 0x0241, + 0x0386, + 0x038c, + 0x03f7, + 0x2132, + 0x2183, + 0x2c60, + 0x2c75, +}; + +int +istitlerune(Rune c) +{ + Rune *p; + + p = rbsearch(c, __istitler, nelem(__istitler)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = rbsearch(c, __istitlep, nelem(__istitlep)/2, 2); + if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1)) + return 1; + p = rbsearch(c, __istitles, nelem(__istitles), 1); + if(p && c == p[0]) + return 1; + return 0; +} + +static Rune __toupperr[] = { + 0x0061, 0x007a, 1048544, + 0x00e0, 0x00f6, 1048544, + 0x00f8, 0x00fe, 1048544, + 0x0256, 0x0257, 1048371, + 0x028a, 0x028b, 1048359, + 0x037b, 0x037d, 1048706, + 0x03ad, 0x03af, 1048539, + 0x03b1, 0x03c1, 1048544, + 0x03c3, 0x03cb, 1048544, + 0x03cd, 0x03ce, 1048513, + 0x0430, 0x044f, 1048544, + 0x0450, 0x045f, 1048496, + 0x0561, 0x0586, 1048528, + 0x1f00, 0x1f07, 1048584, + 0x1f10, 0x1f15, 1048584, + 0x1f20, 0x1f27, 1048584, + 0x1f30, 0x1f37, 1048584, + 0x1f40, 0x1f45, 1048584, + 0x1f60, 0x1f67, 1048584, + 0x1f70, 0x1f71, 1048650, + 0x1f72, 0x1f75, 1048662, + 0x1f76, 0x1f77, 1048676, + 0x1f78, 0x1f79, 1048704, + 0x1f7a, 0x1f7b, 1048688, + 0x1f7c, 0x1f7d, 1048702, + 0x1f80, 0x1f87, 1048584, + 0x1f90, 0x1f97, 1048584, + 0x1fa0, 0x1fa7, 1048584, + 0x1fb0, 0x1fb1, 1048584, + 0x1fd0, 0x1fd1, 1048584, + 0x1fe0, 0x1fe1, 1048584, + 0x2170, 0x217f, 1048560, + 0x24d0, 0x24e9, 1048550, + 0x2c30, 0x2c5e, 1048528, + 0x2d00, 0x2d25, 1041312, + 0xff41, 0xff5a, 1048544, + 0x10428, 0x1044f, 1048536, +}; + +static Rune __toupperp[] = { + 0x0101, 0x012f, 1048575, + 0x0133, 0x0137, 1048575, + 0x013a, 0x0148, 1048575, + 0x014b, 0x0177, 1048575, + 0x017a, 0x017e, 1048575, + 0x0183, 0x0185, 1048575, + 0x01a1, 0x01a5, 1048575, + 0x01b4, 0x01b6, 1048575, + 0x01ce, 0x01dc, 1048575, + 0x01df, 0x01ef, 1048575, + 0x01f9, 0x021f, 1048575, + 0x0223, 0x0233, 1048575, + 0x0247, 0x024f, 1048575, + 0x03d9, 0x03ef, 1048575, + 0x0461, 0x0481, 1048575, + 0x048b, 0x04bf, 1048575, + 0x04c2, 0x04ce, 1048575, + 0x04d1, 0x0513, 1048575, + 0x1e01, 0x1e95, 1048575, + 0x1ea1, 0x1ef9, 1048575, + 0x1f51, 0x1f57, 1048584, + 0x2c68, 0x2c6c, 1048575, + 0x2c81, 0x2ce3, 1048575, +}; + +static Rune __touppers[] = { + 0x00b5, 1049319, + 0x00ff, 1048697, + 0x0131, 1048344, + 0x017f, 1048276, + 0x0180, 1048771, + 0x0188, 1048575, + 0x018c, 1048575, + 0x0192, 1048575, + 0x0195, 1048673, + 0x0199, 1048575, + 0x019a, 1048739, + 0x019e, 1048706, + 0x01a8, 1048575, + 0x01ad, 1048575, + 0x01b0, 1048575, + 0x01b9, 1048575, + 0x01bd, 1048575, + 0x01bf, 1048632, + 0x01c5, 1048575, + 0x01c6, 1048574, + 0x01c8, 1048575, + 0x01c9, 1048574, + 0x01cb, 1048575, + 0x01cc, 1048574, + 0x01dd, 1048497, + 0x01f2, 1048575, + 0x01f3, 1048574, + 0x01f5, 1048575, + 0x023c, 1048575, + 0x0242, 1048575, + 0x0253, 1048366, + 0x0254, 1048370, + 0x0259, 1048374, + 0x025b, 1048373, + 0x0260, 1048371, + 0x0263, 1048369, + 0x0268, 1048367, + 0x0269, 1048365, + 0x026b, 1059319, + 0x026f, 1048365, + 0x0272, 1048363, + 0x0275, 1048362, + 0x027d, 1059303, + 0x0280, 1048358, + 0x0283, 1048358, + 0x0288, 1048358, + 0x0289, 1048507, + 0x028c, 1048505, + 0x0292, 1048357, + 0x0345, 1048660, + 0x03ac, 1048538, + 0x03c2, 1048545, + 0x03cc, 1048512, + 0x03d0, 1048514, + 0x03d1, 1048519, + 0x03d5, 1048529, + 0x03d6, 1048522, + 0x03f0, 1048490, + 0x03f1, 1048496, + 0x03f2, 1048583, + 0x03f5, 1048480, + 0x03f8, 1048575, + 0x03fb, 1048575, + 0x04cf, 1048561, + 0x1d7d, 1052390, + 0x1e9b, 1048517, + 0x1fb3, 1048585, + 0x1fbe, 1041371, + 0x1fc3, 1048585, + 0x1fe5, 1048583, + 0x1ff3, 1048585, + 0x214e, 1048548, + 0x2184, 1048575, + 0x2c61, 1048575, + 0x2c65, 1037781, + 0x2c66, 1037784, + 0x2c76, 1048575, +}; + +Rune +toupperrune(Rune c) +{ + Rune *p; + + p = rbsearch(c, __toupperr, nelem(__toupperr)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return c + p[2] - 1048576; + p = rbsearch(c, __toupperp, nelem(__toupperp)/3, 3); + if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1)) + return c + p[2] - 1048576; + p = rbsearch(c, __touppers, nelem(__touppers)/2, 2); + if(p && c == p[0]) + return c + p[1] - 1048576; + return c; +} + +static Rune __tolowerr[] = { + 0x0041, 0x005a, 1048608, + 0x00c0, 0x00d6, 1048608, + 0x00d8, 0x00de, 1048608, + 0x0189, 0x018a, 1048781, + 0x01b1, 0x01b2, 1048793, + 0x0388, 0x038a, 1048613, + 0x038e, 0x038f, 1048639, + 0x0391, 0x03a1, 1048608, + 0x03a3, 0x03ab, 1048608, + 0x03fd, 0x03ff, 1048446, + 0x0400, 0x040f, 1048656, + 0x0410, 0x042f, 1048608, + 0x0531, 0x0556, 1048624, + 0x10a0, 0x10c5, 1055840, + 0x1f08, 0x1f0f, 1048568, + 0x1f18, 0x1f1d, 1048568, + 0x1f28, 0x1f2f, 1048568, + 0x1f38, 0x1f3f, 1048568, + 0x1f48, 0x1f4d, 1048568, + 0x1f68, 0x1f6f, 1048568, + 0x1f88, 0x1f8f, 1048568, + 0x1f98, 0x1f9f, 1048568, + 0x1fa8, 0x1faf, 1048568, + 0x1fb8, 0x1fb9, 1048568, + 0x1fba, 0x1fbb, 1048502, + 0x1fc8, 0x1fcb, 1048490, + 0x1fd8, 0x1fd9, 1048568, + 0x1fda, 0x1fdb, 1048476, + 0x1fe8, 0x1fe9, 1048568, + 0x1fea, 0x1feb, 1048464, + 0x1ff8, 0x1ff9, 1048448, + 0x1ffa, 0x1ffb, 1048450, + 0x2160, 0x216f, 1048592, + 0x24b6, 0x24cf, 1048602, + 0x2c00, 0x2c2e, 1048624, + 0xff21, 0xff3a, 1048608, + 0x10400, 0x10427, 1048616, +}; + +static Rune __tolowerp[] = { + 0x0100, 0x012e, 1048577, + 0x0132, 0x0136, 1048577, + 0x0139, 0x0147, 1048577, + 0x014a, 0x0176, 1048577, + 0x017b, 0x017d, 1048577, + 0x01a2, 0x01a4, 1048577, + 0x01b3, 0x01b5, 1048577, + 0x01cd, 0x01db, 1048577, + 0x01de, 0x01ee, 1048577, + 0x01f8, 0x021e, 1048577, + 0x0222, 0x0232, 1048577, + 0x0248, 0x024e, 1048577, + 0x03d8, 0x03ee, 1048577, + 0x0460, 0x0480, 1048577, + 0x048a, 0x04be, 1048577, + 0x04c3, 0x04cd, 1048577, + 0x04d0, 0x0512, 1048577, + 0x1e00, 0x1e94, 1048577, + 0x1ea0, 0x1ef8, 1048577, + 0x1f59, 0x1f5f, 1048568, + 0x2c67, 0x2c6b, 1048577, + 0x2c80, 0x2ce2, 1048577, +}; + +static Rune __tolowers[] = { + 0x0130, 1048377, + 0x0178, 1048455, + 0x0179, 1048577, + 0x0181, 1048786, + 0x0182, 1048577, + 0x0184, 1048577, + 0x0186, 1048782, + 0x0187, 1048577, + 0x018b, 1048577, + 0x018e, 1048655, + 0x018f, 1048778, + 0x0190, 1048779, + 0x0191, 1048577, + 0x0193, 1048781, + 0x0194, 1048783, + 0x0196, 1048787, + 0x0197, 1048785, + 0x0198, 1048577, + 0x019c, 1048787, + 0x019d, 1048789, + 0x019f, 1048790, + 0x01a0, 1048577, + 0x01a6, 1048794, + 0x01a7, 1048577, + 0x01a9, 1048794, + 0x01ac, 1048577, + 0x01ae, 1048794, + 0x01af, 1048577, + 0x01b7, 1048795, + 0x01b8, 1048577, + 0x01bc, 1048577, + 0x01c4, 1048578, + 0x01c5, 1048577, + 0x01c7, 1048578, + 0x01c8, 1048577, + 0x01ca, 1048578, + 0x01cb, 1048577, + 0x01f1, 1048578, + 0x01f2, 1048577, + 0x01f4, 1048577, + 0x01f6, 1048479, + 0x01f7, 1048520, + 0x0220, 1048446, + 0x023a, 1059371, + 0x023b, 1048577, + 0x023d, 1048413, + 0x023e, 1059368, + 0x0241, 1048577, + 0x0243, 1048381, + 0x0244, 1048645, + 0x0245, 1048647, + 0x0246, 1048577, + 0x0386, 1048614, + 0x038c, 1048640, + 0x03f4, 1048516, + 0x03f7, 1048577, + 0x03f9, 1048569, + 0x03fa, 1048577, + 0x04c0, 1048591, + 0x04c1, 1048577, + 0x1fbc, 1048567, + 0x1fcc, 1048567, + 0x1fec, 1048569, + 0x1ffc, 1048567, + 0x2126, 1041059, + 0x212a, 1040193, + 0x212b, 1040314, + 0x2132, 1048604, + 0x2183, 1048577, + 0x2c60, 1048577, + 0x2c62, 1037833, + 0x2c63, 1044762, + 0x2c64, 1037849, + 0x2c75, 1048577, +}; + +Rune +tolowerrune(Rune c) +{ + Rune *p; + + p = rbsearch(c, __tolowerr, nelem(__tolowerr)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return c + p[2] - 1048576; + p = rbsearch(c, __tolowerp, nelem(__tolowerp)/3, 3); + if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1)) + return c + p[2] - 1048576; + p = rbsearch(c, __tolowers, nelem(__tolowers)/2, 2); + if(p && c == p[0]) + return c + p[1] - 1048576; + return c; +} + +static Rune __totitler[] = { + 0x0061, 0x007a, 1048544, + 0x00e0, 0x00f6, 1048544, + 0x00f8, 0x00fe, 1048544, + 0x0256, 0x0257, 1048371, + 0x028a, 0x028b, 1048359, + 0x037b, 0x037d, 1048706, + 0x03ad, 0x03af, 1048539, + 0x03b1, 0x03c1, 1048544, + 0x03c3, 0x03cb, 1048544, + 0x03cd, 0x03ce, 1048513, + 0x0430, 0x044f, 1048544, + 0x0450, 0x045f, 1048496, + 0x0561, 0x0586, 1048528, + 0x1f00, 0x1f07, 1048584, + 0x1f10, 0x1f15, 1048584, + 0x1f20, 0x1f27, 1048584, + 0x1f30, 0x1f37, 1048584, + 0x1f40, 0x1f45, 1048584, + 0x1f60, 0x1f67, 1048584, + 0x1f70, 0x1f71, 1048650, + 0x1f72, 0x1f75, 1048662, + 0x1f76, 0x1f77, 1048676, + 0x1f78, 0x1f79, 1048704, + 0x1f7a, 0x1f7b, 1048688, + 0x1f7c, 0x1f7d, 1048702, + 0x1f80, 0x1f87, 1048584, + 0x1f90, 0x1f97, 1048584, + 0x1fa0, 0x1fa7, 1048584, + 0x1fb0, 0x1fb1, 1048584, + 0x1fd0, 0x1fd1, 1048584, + 0x1fe0, 0x1fe1, 1048584, + 0x2170, 0x217f, 1048560, + 0x24d0, 0x24e9, 1048550, + 0x2c30, 0x2c5e, 1048528, + 0x2d00, 0x2d25, 1041312, + 0xff41, 0xff5a, 1048544, + 0x10428, 0x1044f, 1048536, +}; + +static Rune __totitlep[] = { + 0x0101, 0x012f, 1048575, + 0x0133, 0x0137, 1048575, + 0x013a, 0x0148, 1048575, + 0x014b, 0x0177, 1048575, + 0x017a, 0x017e, 1048575, + 0x0183, 0x0185, 1048575, + 0x01a1, 0x01a5, 1048575, + 0x01b4, 0x01b6, 1048575, + 0x01cc, 0x01dc, 1048575, + 0x01df, 0x01ef, 1048575, + 0x01f3, 0x01f5, 1048575, + 0x01f9, 0x021f, 1048575, + 0x0223, 0x0233, 1048575, + 0x0247, 0x024f, 1048575, + 0x03d9, 0x03ef, 1048575, + 0x0461, 0x0481, 1048575, + 0x048b, 0x04bf, 1048575, + 0x04c2, 0x04ce, 1048575, + 0x04d1, 0x0513, 1048575, + 0x1e01, 0x1e95, 1048575, + 0x1ea1, 0x1ef9, 1048575, + 0x1f51, 0x1f57, 1048584, + 0x2c68, 0x2c6c, 1048575, + 0x2c81, 0x2ce3, 1048575, +}; + +static Rune __totitles[] = { + 0x00b5, 1049319, + 0x00ff, 1048697, + 0x0131, 1048344, + 0x017f, 1048276, + 0x0180, 1048771, + 0x0188, 1048575, + 0x018c, 1048575, + 0x0192, 1048575, + 0x0195, 1048673, + 0x0199, 1048575, + 0x019a, 1048739, + 0x019e, 1048706, + 0x01a8, 1048575, + 0x01ad, 1048575, + 0x01b0, 1048575, + 0x01b9, 1048575, + 0x01bd, 1048575, + 0x01bf, 1048632, + 0x01c4, 1048577, + 0x01c6, 1048575, + 0x01c7, 1048577, + 0x01c9, 1048575, + 0x01ca, 1048577, + 0x01dd, 1048497, + 0x01f1, 1048577, + 0x023c, 1048575, + 0x0242, 1048575, + 0x0253, 1048366, + 0x0254, 1048370, + 0x0259, 1048374, + 0x025b, 1048373, + 0x0260, 1048371, + 0x0263, 1048369, + 0x0268, 1048367, + 0x0269, 1048365, + 0x026b, 1059319, + 0x026f, 1048365, + 0x0272, 1048363, + 0x0275, 1048362, + 0x027d, 1059303, + 0x0280, 1048358, + 0x0283, 1048358, + 0x0288, 1048358, + 0x0289, 1048507, + 0x028c, 1048505, + 0x0292, 1048357, + 0x0345, 1048660, + 0x03ac, 1048538, + 0x03c2, 1048545, + 0x03cc, 1048512, + 0x03d0, 1048514, + 0x03d1, 1048519, + 0x03d5, 1048529, + 0x03d6, 1048522, + 0x03f0, 1048490, + 0x03f1, 1048496, + 0x03f2, 1048583, + 0x03f5, 1048480, + 0x03f8, 1048575, + 0x03fb, 1048575, + 0x04cf, 1048561, + 0x1d7d, 1052390, + 0x1e9b, 1048517, + 0x1fb3, 1048585, + 0x1fbe, 1041371, + 0x1fc3, 1048585, + 0x1fe5, 1048583, + 0x1ff3, 1048585, + 0x214e, 1048548, + 0x2184, 1048575, + 0x2c61, 1048575, + 0x2c65, 1037781, + 0x2c66, 1037784, + 0x2c76, 1048575, +}; + +Rune +totitlerune(Rune c) +{ + Rune *p; + + p = rbsearch(c, __totitler, nelem(__totitler)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return c + p[2] - 1048576; + p = rbsearch(c, __totitlep, nelem(__totitlep)/3, 3); + if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1)) + return c + p[2] - 1048576; + p = rbsearch(c, __totitles, nelem(__totitles)/2, 2); + if(p && c == p[0]) + return c + p[1] - 1048576; + return c; +} + @@ -1,54 +1,233 @@ -#ifndef _UTF_H_ -#define _UTF_H_ 1 -#if defined(__cplusplus) -extern "C" { -#endif +#ifndef _UTFH_ +#define _UTFH_ 1 + +#include <stdint.h> -typedef unsigned int Rune; /* 32 bits */ +typedef signed int Rune; /* Code-point values in Unicode 4.0 are 21 bits wide.*/ enum { - UTFmax = 4, /* maximum bytes per rune */ - Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ - Runeself = 0x80, /* rune and UTF sequences are the same (<) */ - Runeerror = 0xFFFD, /* decoding error in UTF */ - Runemax = 0x10FFFF /* maximum rune value */ + UTFmax = 4, /* maximum bytes per rune */ + Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ + Runeself = 0x80, /* rune and UTF sequences are the same (<) */ + Runeerror = 0xFFFD, /* decoding error in UTF */ + Runemax = 0x10FFFF, /* maximum rune value */ }; -/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */ -int chartorune(Rune *rune, char *str); -int fullrune(char *str, int n); -int isalpharune(Rune c); -int islowerrune(Rune c); -int isspacerune(Rune c); -int istitlerune(Rune c); -int isupperrune(Rune c); -int runelen(long c); -int runenlen(Rune *r, int nrune); -Rune* runestrcat(Rune *s1, Rune *s2); -Rune* runestrchr(Rune *s, Rune c); -int runestrcmp(Rune *s1, Rune *s2); -Rune* runestrcpy(Rune *s1, Rune *s2); -Rune* runestrdup(Rune *s) ; -Rune* runestrecpy(Rune *s1, Rune *es1, Rune *s2); -long runestrlen(Rune *s); -Rune* runestrncat(Rune *s1, Rune *s2, long n); -int runestrncmp(Rune *s1, Rune *s2, long n); -Rune* runestrncpy(Rune *s1, Rune *s2, long n); -Rune* runestrrchr(Rune *s, Rune c); -Rune* runestrstr(Rune *s1, Rune *s2); -int runetochar(char *str, Rune *rune); -Rune tolowerrune(Rune c); -Rune totitlerune(Rune c); -Rune toupperrune(Rune c); -char* utfecpy(char *to, char *e, char *from); -int utflen(char *s); -int utfnlen(char *s, long m); -char* utfrrune(char *s, long c); -char* utfrune(char *s, long c); -char* utfutf(char *s1, char *s2); - -#if defined(__cplusplus) +#ifdef __cplusplus +extern "C" { +#endif + +/* + * rune routines + */ + +/* + * These routines were written by Rob Pike and Ken Thompson + * and first appeared in Plan 9. + * SEE ALSO + * utf (7) + * tcs (1) +*/ + +// runetochar copies (encodes) one rune, pointed to by r, to at most +// UTFmax bytes starting at s and returns the number of bytes generated. + +int runetochar(char* s, const Rune* r); + + +// chartorune copies (decodes) at most UTFmax bytes starting at s to +// one rune, pointed to by r, and returns the number of bytes consumed. +// If the input is not exactly in UTF format, chartorune will set *r +// to Runeerror and return 1. +// +// Note: There is no special case for a "null-terminated" string. A +// string whose first byte has the value 0 is the UTF8 encoding of the +// Unicode value 0 (i.e., ASCII NULL). A byte value of 0 is illegal +// anywhere else in a UTF sequence. + +int chartorune(Rune* r, const char* s); + + +// charntorune is like chartorune, except that it will access at most +// n bytes of s. If the UTF sequence is incomplete within n bytes, +// charntorune will set *r to Runeerror and return 0. If it is complete +// but not in UTF format, it will set *r to Runeerror and return 1. +// +// Added 2004-09-24 by Wei-Hwa Huang + +int charntorune(Rune* r, const char* s, int n); + +// isvalidcharntorune(str, n, r, consumed) +// is a convenience function that calls "*consumed = charntorune(r, str, n)" +// and returns an int (logically boolean) indicating whether the first +// n bytes of str was a valid and complete UTF sequence. + +int isvalidcharntorune(const char* str, int n, Rune* r, int* consumed); + +// runelen returns the number of bytes required to convert r into UTF. + +int runelen(Rune r); + + +// runenlen returns the number of bytes required to convert the n +// runes pointed to by r into UTF. + +int runenlen(const Rune* r, int n); + + +// fullrune returns 1 if the string s of length n is long enough to be +// decoded by chartorune, and 0 otherwise. This does not guarantee +// that the string contains a legal UTF encoding. This routine is used +// by programs that obtain input one byte at a time and need to know +// when a full rune has arrived. + +int fullrune(const char* s, int n); + +// The following routines are analogous to the corresponding string +// routines with "utf" substituted for "str", and "rune" substituted +// for "chr". + +// utflen returns the number of runes that are represented by the UTF +// string s. (cf. strlen) + +int utflen(const char* s); + + +// utfnlen returns the number of complete runes that are represented +// by the first n bytes of the UTF string s. If the last few bytes of +// the string contain an incompletely coded rune, utfnlen will not +// count them; in this way, it differs from utflen, which includes +// every byte of the string. (cf. strnlen) + +int utfnlen(const char* s, long n); + + +// utfrune returns a pointer to the first occurrence of rune r in the +// UTF string s, or 0 if r does not occur in the string. The NULL +// byte terminating a string is considered to be part of the string s. +// (cf. strchr) + +const char* utfrune(const char* s, Rune r); + + +// utfrrune returns a pointer to the last occurrence of rune r in the +// UTF string s, or 0 if r does not occur in the string. The NULL +// byte terminating a string is considered to be part of the string s. +// (cf. strrchr) + +const char* utfrrune(const char* s, Rune r); + + +// utfutf returns a pointer to the first occurrence of the UTF string +// s2 as a UTF substring of s1, or 0 if there is none. If s2 is the +// null string, utfutf returns s1. (cf. strstr) + +const char* utfutf(const char* s1, const char* s2); + + +// utfecpy copies UTF sequences until a null sequence has been copied, +// but writes no sequences beyond es1. If any sequences are copied, +// s1 is terminated by a null sequence, and a pointer to that sequence +// is returned. Otherwise, the original s1 is returned. (cf. strecpy) + +char* utfecpy(char *s1, char *es1, const char *s2); + + + +// These functions are rune-string analogues of the corresponding +// functions in strcat (3). +// +// These routines first appeared in Plan 9. +// SEE ALSO +// memmove (3) +// rune (3) +// strcat (2) +// +// BUGS: The outcome of overlapping moves varies among implementations. + +Rune* runestrcat(Rune* s1, const Rune* s2); +Rune* runestrncat(Rune* s1, const Rune* s2, long n); + +const Rune* runestrchr(const Rune* s, Rune c); + +int runestrcmp(const Rune* s1, const Rune* s2); +int runestrncmp(const Rune* s1, const Rune* s2, long n); + +Rune* runestrcpy(Rune* s1, const Rune* s2); +Rune* runestrncpy(Rune* s1, const Rune* s2, long n); +Rune* runestrecpy(Rune* s1, Rune* es1, const Rune* s2); + +Rune* runestrdup(const Rune* s); + +const Rune* runestrrchr(const Rune* s, Rune c); +long runestrlen(const Rune* s); +const Rune* runestrstr(const Rune* s1, const Rune* s2); + + + +// The following routines test types and modify cases for Unicode +// characters. Unicode defines some characters as letters and +// specifies three cases: upper, lower, and title. Mappings among the +// cases are also defined, although they are not exhaustive: some +// upper case letters have no lower case mapping, and so on. Unicode +// also defines several character properties, a subset of which are +// checked by these routines. These routines are based on Unicode +// version 3.0.0. +// +// NOTE: The routines are implemented in C, so the boolean functions +// (e.g., isupperrune) return 0 for false and 1 for true. +// +// +// toupperrune, tolowerrune, and totitlerune are the Unicode case +// mappings. These routines return the character unchanged if it has +// no defined mapping. + +Rune toupperrune(Rune r); +Rune tolowerrune(Rune r); +Rune totitlerune(Rune r); + + +// isupperrune tests for upper case characters, including Unicode +// upper case letters and targets of the toupper mapping. islowerrune +// and istitlerune are defined analogously. + +int isupperrune(Rune r); +int islowerrune(Rune r); +int istitlerune(Rune r); + + +// isalpharune tests for Unicode letters; this includes ideographs in +// addition to alphabetic characters. + +int isalpharune(Rune r); + + +// isdigitrune tests for digits. Non-digit numbers, such as Roman +// numerals, are not included. + +int isdigitrune(Rune r); + + +// isideographicrune tests for ideographic characters and numbers, as +// defined by the Unicode standard. + +int isideographicrune(Rune r); + + +// isspacerune tests for whitespace characters, including "C" locale +// whitespace, Unicode defined whitespace, and the "zero-width +// non-break space" character. + +int isspacerune(Rune r); + + +// (The comments in this file were copied from the manpage files rune.3, +// isalpharune.3, and runestrcat.3. Some formatting changes were also made +// to conform to Google style. /JRM 11/11/05) + +#ifdef __cplusplus } #endif + #endif @@ -1,33 +1,14 @@ -/* - * compiler directive on Plan 9 - */ -#ifndef USED -#define USED(x) if(x);else -#endif +#define uchar _utfuchar +#define ushort _utfushort +#define uint _utfuint +#define ulong _utfulong +#define vlong _utfvlong +#define uvlong _utfuvlong -/* - * easiest way to make sure these are defined - */ -#define uchar _fmtuchar -#define ushort _fmtushort -#define uint _fmtuint -#define ulong _fmtulong -#define vlong _fmtvlong -#define uvlong _fmtuvlong typedef unsigned char uchar; typedef unsigned short ushort; typedef unsigned int uint; typedef unsigned long ulong; -typedef unsigned long long uvlong; -typedef long long vlong; - -/* - * nil cannot be ((void*)0) on ANSI C, - * because it is used for function pointers - */ -#undef nil -#define nil 0 - -#undef nelem -#define nelem ((void*)0) +#define nelem(x) (sizeof(x)/sizeof((x)[0])) +#define nil ((void*)0) @@ -7,24 +7,23 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ -#define _BSD_SOURCE 1 /* memccpy */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" char* -utfecpy(char *to, char *e, char *from) +utfecpy(char *to, char *e, const char *from) { char *end; if(to >= e) return to; - end = memccpy(to, from, '\0', e - to); + end = (char*)memccpy(to, from, '\0', e - to); if(end == nil){ end = e-1; while(end>to && (*--end&0xC0)==0x80) @@ -7,17 +7,17 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" int -utflen(char *s) +utflen(const char *s) { int c; long n; @@ -34,4 +34,5 @@ utflen(char *s) s += chartorune(&rune, s); n++; } + return 0; } @@ -7,22 +7,22 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" int -utfnlen(char *s, long m) +utfnlen(const char *s, long m) { int c; long n; Rune rune; - char *es; + const char *es; es = s + m; for(n = 0; s < es; n++) { @@ -7,21 +7,22 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" +const char* -utfrrune(char *s, long c) +utfrrune(const char *s, Rune c) { long c1; Rune r; - char *s1; + const char *s1; if(c < Runesync) /* not part of utf sequence */ return strrchr(s, c); @@ -42,4 +43,5 @@ utfrrune(char *s, long c) s1 = s; s += c1; } + return 0; } @@ -7,17 +7,18 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" +const char* -utfrune(char *s, long c) +utfrune(const char *s, Rune c) { long c1; Rune r; @@ -41,4 +42,5 @@ utfrune(char *s, long c) return s; s += n; } + return 0; } @@ -7,24 +7,25 @@ * or modification of this software and in all copies of the supporting * documentation for such software. * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include <stdarg.h> #include <string.h> -#include "plan9.h" #include "utf.h" +#include "utfdef.h" /* * Return pointer to first occurrence of s2 in s1, * 0 if none */ +const char* -utfutf(char *s1, char *s2) +utfutf(const char *s1, const char *s2) { - char *p; + const char *p; long f, n1, n2; Rune r; @@ -34,7 +35,7 @@ utfutf(char *s1, char *s2) return strstr(s1, s2); n2 = strlen(s2); - for(p=s1; p=utfrune(p, f); p+=n1) + for(p=s1; (p=utfrune(p, f)) != 0; p+=n1) if(strncmp(p, s2, n2) == 0) return p; return 0; |