// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * Copyright (c) 1997-2014, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************** * * File UCNVSELTST.C * * Modification History: * Name Description * MOHAMED ELDAWY Creation ******************************************************************** */ /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/ #include "ucnvseltst.h" #include #include "unicode/utypes.h" #include "unicode/ucnvsel.h" #include "unicode/ustring.h" #include "unicode/utf8.h" #include "cmemory.h" #include "cstring.h" #include "propsvec.h" #define FILENAME_BUFFER 1024 #define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING static void TestSelector(void); static void TestUPropsVector(void); void addCnvSelTest(TestNode** root); /* Declaration required to suppress compiler warnings. */ void addCnvSelTest(TestNode** root) { addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector"); addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector"); } static const char **gAvailableNames = NULL; static int32_t gCountAvailable = 0; static UBool getAvailableNames() { int32_t i; if (gAvailableNames != NULL) { return TRUE; } gCountAvailable = ucnv_countAvailable(); if (gCountAvailable == 0) { log_data_err("No converters available.\n"); return FALSE; } gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *)); if (gAvailableNames == NULL) { log_err("unable to allocate memory for %ld available converter names\n", (long)gCountAvailable); return FALSE; } for (i = 0; i < gCountAvailable; ++i) { gAvailableNames[i] = ucnv_getAvailableName(i); } return TRUE; } static void releaseAvailableNames() { uprv_free((void *)gAvailableNames); gAvailableNames = NULL; gCountAvailable = 0; } static const char ** getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) { const char **names; int32_t i; *pCount = 0; if (count <= 0) { return NULL; } names = (const char **)uprv_malloc(count * sizeof(char *)); if (names == NULL) { log_err("memory allocation error for %ld pointers\n", (long)count); return NULL; } if (step == 0 && count > 0) { step = 1; } for (i = 0; i < count; ++i) { if (0 <= start && start < gCountAvailable) { names[i] = gAvailableNames[start]; start += step; ++*pCount; } } return names; } #if 0 /* * ucnvsel_open() does not support "no encodings": * Given 0 encodings it will open a selector for all available ones. */ static const char ** getNoEncodings(int32_t *pCount) { *pCount = 0; return NULL; } #endif static const char ** getOneEncoding(int32_t *pCount) { return getEncodings(1, 0, 1, pCount); } static const char ** getFirstEvenEncodings(int32_t *pCount) { return getEncodings(0, 2, 25, pCount); } static const char ** getMiddleEncodings(int32_t *pCount) { return getEncodings(gCountAvailable - 12, 1, 22, pCount); } static const char ** getLastEncodings(int32_t *pCount) { return getEncodings(gCountAvailable - 1, -1, 25, pCount); } static const char ** getSomeEncodings(int32_t *pCount) { /* 20 evenly distributed */ return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount); } static const char ** getEveryThirdEncoding(int32_t *pCount) { return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount); } static const char ** getAllEncodings(int32_t *pCount) { return getEncodings(0, 1, gCountAvailable, pCount); } typedef const char **GetEncodingsFn(int32_t *); static GetEncodingsFn *const getEncodingsFns[] = { getOneEncoding, getFirstEvenEncodings, getMiddleEncodings, getLastEncodings, getSomeEncodings, getEveryThirdEncoding, getAllEncodings }; static FILE *fopenOrError(const char *filename) { int32_t needLen; FILE *f; char fnbuf[FILENAME_BUFFER]; const char* directory = ctest_dataSrcDir(); needLen = (int32_t)(uprv_strlen(directory) + uprv_strlen(TDSRCPATH) + uprv_strlen(filename) + 1); if(needLen > FILENAME_BUFFER) { log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n", filename, needLen, FILENAME_BUFFER); return NULL; } strcpy(fnbuf, directory); strcat(fnbuf, TDSRCPATH); strcat(fnbuf, filename); f = fopen(fnbuf, "rb"); if(f == NULL) { log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename); } return f; } typedef struct TestText { char *text, *textLimit; char *limit; int32_t number; } TestText; static void text_reset(TestText *tt) { tt->limit = tt->text; tt->number = 0; } static char * text_nextString(TestText *tt, int32_t *pLength) { char *s = tt->limit; if (s == tt->textLimit) { /* we already delivered the last string */ return NULL; } else if (s == tt->text) { /* first string */ if ((tt->textLimit - tt->text) >= 3 && s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf ) { s += 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */ } } else { /* skip the string terminator */ ++s; ++tt->number; } /* find the end of this string */ tt->limit = uprv_strchr(s, 0); *pLength = (int32_t)(tt->limit - s); return s; } static UBool text_open(TestText *tt) { FILE *f; char *s; int32_t length; uprv_memset(tt, 0, sizeof(TestText)); f = fopenOrError("ConverterSelectorTestUTF8.txt"); if(!f) { return FALSE; } fseek(f, 0, SEEK_END); length = (int32_t)ftell(f); fseek(f, 0, SEEK_SET); tt->text = (char *)uprv_malloc(length + 1); if (tt->text == NULL) { fclose(f); return FALSE; } if (length != (int32_t)fread(tt->text, 1, length, f)) { log_err("error reading %ld bytes from test text file\n", (long)length); length = 0; uprv_free(tt->text); } fclose(f); tt->textLimit = tt->text + length; *tt->textLimit = 0; /* replace all Unicode '#' (U+0023) with NUL */ for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {} text_reset(tt); return TRUE; } static void text_close(TestText *tt) { uprv_free(tt->text); } static int32_t findIndex(const char* converterName) { int32_t i; for (i = 0 ; i < gCountAvailable; i++) { if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) { return i; } } return -1; } static UBool * getResultsManually(const char** encodings, int32_t num_encodings, const char *utf8, int32_t length, const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) { UBool* resultsManually; int32_t i; resultsManually = (UBool*) uprv_malloc(gCountAvailable); uprv_memset(resultsManually, 0, gCountAvailable); for(i = 0 ; i < num_encodings ; i++) { UErrorCode status = U_ZERO_ERROR; /* get unicode set for that converter */ USet* set; UConverter* test_converter; UChar32 cp; int32_t encIndex, offset; set = uset_openEmpty(); test_converter = ucnv_open(encodings[i], &status); ucnv_getUnicodeSet(test_converter, set, whichSet, &status); if (excludedCodePoints != NULL) { uset_addAll(set, excludedCodePoints); } uset_freeze(set); offset = 0; cp = 0; encIndex = findIndex(encodings[i]); /* * The following is almost, but not entirely, the same as * resultsManually[encIndex] = * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length); * They might be different if the set contains strings, * or if the utf8 string contains an illegal sequence. * * The UConverterSelector does not currently handle strings that can be * converted, and it treats an illegal sequence as convertible * while uset_spanUTF8() treats it like U+FFFD which may not be convertible. */ resultsManually[encIndex] = TRUE; while(offset= 0 && !uset_contains(set, cp)) { resultsManually[encIndex] = FALSE; break; } } uset_close(set); ucnv_close(test_converter); } return resultsManually; } /* closes res but does not free resultsManually */ static void verifyResult(UEnumeration* res, const UBool *resultsManually) { UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool)); const char* name; UErrorCode status = U_ZERO_ERROR; int32_t i; /* fill the bool for the selector results! */ uprv_memset(resultsFromSystem, 0, gCountAvailable); while ((name = uenum_next(res,NULL, &status)) != NULL) { resultsFromSystem[findIndex(name)] = TRUE; } for(i = 0 ; i < gCountAvailable; i++) { if(resultsManually[i] != resultsFromSystem[i]) { log_err("failure in converter selector\n" "converter %s had conflicting results -- manual: %d, system %d\n", gAvailableNames[i], resultsManually[i], resultsFromSystem[i]); } } uprv_free(resultsFromSystem); uenum_close(res); } static UConverterSelector * serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) { char *new_buffer; int32_t ser_len, ser_len2; /* preflight */ ser_len = ucnvsel_serialize(sel, NULL, 0, status); if (*status != U_BUFFER_OVERFLOW_ERROR) { log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status)); return sel; } new_buffer = (char *)uprv_malloc(ser_len); *status = U_ZERO_ERROR; ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status); if (U_FAILURE(*status) || ser_len != ser_len2) { log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status)); uprv_free(new_buffer); return sel; } ucnvsel_close(sel); uprv_free(*buffer); *buffer = new_buffer; sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status); if (U_FAILURE(*status)) { log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status)); return NULL; } return sel; } static void TestSelector() { TestText text; USet* excluded_sets[3] = { NULL }; int32_t i, testCaseIdx; if (!getAvailableNames()) { return; } if (!text_open(&text)) { releaseAvailableNames(); } excluded_sets[0] = uset_openEmpty(); for(i = 1 ; i < 3 ; i++) { excluded_sets[i] = uset_open(i*30, i*30+500); } for(testCaseIdx = 0; testCaseIdx < UPRV_LENGTHOF(getEncodingsFns); testCaseIdx++) { int32_t excluded_set_id; int32_t num_encodings; const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings); if (getTestOption(QUICK_OPTION) && num_encodings > 25) { uprv_free((void *)encodings); continue; } /* * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++) * * This loop was replaced by the following statement because * the loop made the test run longer without adding to the code coverage. * The handling of the exclusion set is independent of the * set of encodings, so there is no need to test every combination. */ excluded_set_id = testCaseIdx % UPRV_LENGTHOF(excluded_sets); { UConverterSelector *sel_rt, *sel_fb; char *buffer_fb = NULL; UErrorCode status = U_ZERO_ERROR; sel_rt = ucnvsel_open(encodings, num_encodings, excluded_sets[excluded_set_id], UCNV_ROUNDTRIP_SET, &status); if (num_encodings == gCountAvailable) { /* test the special "all converters" parameter values */ sel_fb = ucnvsel_open(NULL, 0, excluded_sets[excluded_set_id], UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); } else if (uset_isEmpty(excluded_sets[excluded_set_id])) { /* test that a NULL set gives the same results as an empty set */ sel_fb = ucnvsel_open(encodings, num_encodings, NULL, UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); } else { sel_fb = ucnvsel_open(encodings, num_encodings, excluded_sets[excluded_set_id], UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); } if (U_FAILURE(status)) { log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status)); ucnvsel_close(sel_rt); uprv_free((void *)encodings); continue; } text_reset(&text); for (;;) { UBool *manual_rt, *manual_fb; static UChar utf16[10000]; char *s; int32_t length8, length16; s = text_nextString(&text, &length8); if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) { break; } manual_rt = getResultsManually(encodings, num_encodings, s, length8, excluded_sets[excluded_set_id], UCNV_ROUNDTRIP_SET); manual_fb = getResultsManually(encodings, num_encodings, s, length8, excluded_sets[excluded_set_id], UCNV_ROUNDTRIP_AND_FALLBACK_SET); /* UTF-8 with length */ status = U_ZERO_ERROR; verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt); verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb); /* UTF-8 NUL-terminated */ verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt); verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb); u_strFromUTF8(utf16, UPRV_LENGTHOF(utf16), &length16, s, length8, &status); if (U_FAILURE(status)) { log_err("error converting the test text (string %ld) to UTF-16 - %s\n", (long)text.number, u_errorName(status)); } else { if (text.number == 0) { sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status); } if (U_SUCCESS(status)) { /* UTF-16 with length */ verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt); verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb); /* UTF-16 NUL-terminated */ verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt); verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb); } } uprv_free(manual_rt); uprv_free(manual_fb); } ucnvsel_close(sel_rt); ucnvsel_close(sel_fb); uprv_free(buffer_fb); } uprv_free((void *)encodings); } releaseAvailableNames(); text_close(&text); for(i = 0 ; i < 3 ; i++) { uset_close(excluded_sets[i]); } } /* Improve code coverage of UPropsVectors */ static void TestUPropsVector() { UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR; UPropsVectors *pv = upvec_open(100, &errorCode); if (pv != NULL) { log_err("Should have returned NULL if UErrorCode is an error."); return; } errorCode = U_ZERO_ERROR; pv = upvec_open(-1, &errorCode); if (pv != NULL || U_SUCCESS(errorCode)) { log_err("Should have returned NULL if column is less than 0.\n"); return; } errorCode = U_ZERO_ERROR; pv = upvec_open(100, &errorCode); if (pv == NULL || U_FAILURE(errorCode)) { log_err("Unable to open UPropsVectors.\n"); return; } if (upvec_getValue(pv, 0, 1) != 0) { log_err("upvec_getValue should return 0.\n"); } if (upvec_getRow(pv, 0, NULL, NULL) == NULL) { log_err("upvec_getRow should not return NULL.\n"); } if (upvec_getArray(pv, NULL, NULL) != NULL) { log_err("upvec_getArray should return NULL.\n"); } upvec_close(pv); }