# These are tests of the UnicodeSetBuilder. # The format is: # ; # The second field is empty if the formats are the same. # It is the error code if an error is expected. # # The differences in format are that the new format has: # && and -- defined as in ICU4C Regex # ~~ defined as symmetric difference, as in UTS#18 # Various quirks for compatibility with Java Regex @showerror \p{Nd} ; @hide # Check - and --; & and && [[a-z] ; doSetNoCloseError [[a-z]-[b-m][s]] ; [[a-z]--[b-m][s]] ; [[a-z]-[[b-m][s]]] [a-z--b-ms] ; [[a-z]-[[b-m][s]]] [[a-z]&[b-m][s]] ; [[a-z]&&[b-m][s]] ; [[a-z]&[[b-m][s]]] [a-z&&b-ms] ; [[a-z]&[[b-m][s]]] # Basic [ab[^a-z]e] ; [b\x{AC00}cd-mf] ; [^abc] ; # Properties [\p{Nd}] ; [\P{Nd}] ; [\p{gc=Nd}] ; [\P{gc=Nd}] ; [\p{gc≠Nd}] ; [\P{gc=Nd}] [\P{gc≠Nd}] ; [\p{gc=Nd}] \p{Nd} ; \P{Nd} ; \p{gc=Nd} ; \P{gc=Nd} ; \p{gc≠Nd} ; \P{gc=Nd} \P{gc≠Nd} ; \p{gc=Nd} [[:Nd:]] ; [[:^Nd:]] ; [[:gc=Nd:]] ; [[:^gc=Nd:]] ; [[:gc≠Nd:]] ; [[:^gc=Nd:]] [[:^gc≠Nd:]] ; [[:gc=Nd:]] [:Nd:] ; [:^Nd:] ; [:gc=Nd:] ; [:^gc=Nd:] ; [:gc≠Nd:] ; [:^gc=Nd:] [:^gc≠Nd:] ; [:gc=Nd:] # more set-like objects \s ; [:white_space:] \S ; [:^white-space:] \w ; [[:alphabetic:][:Nd:]] \W ; [^[:alphabetic:][:Nd:]] \d ; [:Nd:] \D ; [:^Nd:] [\s] ; [:white_space:] [\S] ; [:^white-space:] [\w] ; [[:alphabetic:][:Nd:]] [\W] ; [^[:alphabetic:][:Nd:]] [\d] ; [:Nd:] [\D] ; [:^Nd:] # characters \N{FULL STOP} ; [.] [\N{FULL STOP}] \x{61} ; [a] [\x{61}] ; [a] [\a] ; [a] [\[] # quirks [:abc] ; [abc\:] [abc:] ; [abc\:] [-a] ; [a\-] [~a] ; [a\~] [&a] ; [a\&] [--a] ; error [&&a] ; error [a&b] ; [ab\&] [a-[b]] ; [ab\-] [[a]-b] ; [ab\-] [a&[b]] ; [ab\&] [[a]&b] ; [ab\&] [a&&] ; error [a--] ; error [a-b-z] ; [a-b\-z] [a-b&z] ; [a-b\&z] # Logically \N{...} and \x{...}, etc should act like a literal # and \s, \w, \p{...} etc. should act like a set [\N{FULL STOP}-a] ; [.-a] [a-\N{FULL STOP}] ; [.-a] [[ab]&\S] ; [[ab]&[:^whitespace:]] [\S&[ab]] ; [[ab]&[:^whitespace:]] [\s-z] ; [[:whitespace:]\-z] [\s&z] ; [[:whitespace:]\&z] [abc-\p{xx}] ; error