/* * kmp_str.cpp -- String manipulation routines. */ //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "kmp_str.h" #include // va_* #include // vsnprintf() #include // malloc(), realloc() #include "kmp.h" #include "kmp_i18n.h" /* String buffer. Usage: // Declare buffer and initialize it. kmp_str_buf_t buffer; __kmp_str_buf_init( & buffer ); // Print to buffer. __kmp_str_buf_print(& buffer, "Error in file \"%s\" line %d\n", "foo.c", 12); __kmp_str_buf_print(& buffer, " <%s>\n", line); // Use buffer contents. buffer.str is a pointer to data, buffer.used is a // number of printed characters (not including terminating zero). write( fd, buffer.str, buffer.used ); // Free buffer. __kmp_str_buf_free( & buffer ); // Alternatively, you can detach allocated memory from buffer: __kmp_str_buf_detach( & buffer ); return buffer.str; // That memory should be freed eventually. Notes: * Buffer users may use buffer.str and buffer.used. Users should not change any fields of buffer directly. * buffer.str is never NULL. If buffer is empty, buffer.str points to empty string (""). * For performance reasons, buffer uses stack memory (buffer.bulk) first. If stack memory is exhausted, buffer allocates memory on heap by malloc(), and reallocates it by realloc() as amount of used memory grows. * Buffer doubles amount of allocated memory each time it is exhausted. */ // TODO: __kmp_str_buf_print() can use thread local memory allocator. #define KMP_STR_BUF_INVARIANT(b) \ { \ KMP_DEBUG_ASSERT((b)->str != NULL); \ KMP_DEBUG_ASSERT((b)->size >= sizeof((b)->bulk)); \ KMP_DEBUG_ASSERT((b)->size % sizeof((b)->bulk) == 0); \ KMP_DEBUG_ASSERT((unsigned)(b)->used < (b)->size); \ KMP_DEBUG_ASSERT( \ (b)->size == sizeof((b)->bulk) ? (b)->str == &(b)->bulk[0] : 1); \ KMP_DEBUG_ASSERT((b)->size > sizeof((b)->bulk) ? (b)->str != &(b)->bulk[0] \ : 1); \ } void __kmp_str_buf_clear(kmp_str_buf_t *buffer) { KMP_STR_BUF_INVARIANT(buffer); if (buffer->used > 0) { buffer->used = 0; buffer->str[0] = 0; } KMP_STR_BUF_INVARIANT(buffer); } // __kmp_str_buf_clear void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, size_t size) { KMP_STR_BUF_INVARIANT(buffer); KMP_DEBUG_ASSERT(size >= 0); if (buffer->size < (unsigned int)size) { // Calculate buffer size. do { buffer->size *= 2; } while (buffer->size < (unsigned int)size); // Enlarge buffer. if (buffer->str == &buffer->bulk[0]) { buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size); if (buffer->str == NULL) { KMP_FATAL(MemoryAllocFailed); } KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1); } else { buffer->str = (char *)KMP_INTERNAL_REALLOC(buffer->str, buffer->size); if (buffer->str == NULL) { KMP_FATAL(MemoryAllocFailed); } } } KMP_DEBUG_ASSERT(buffer->size > 0); KMP_DEBUG_ASSERT(buffer->size >= (unsigned)size); KMP_STR_BUF_INVARIANT(buffer); } // __kmp_str_buf_reserve void __kmp_str_buf_detach(kmp_str_buf_t *buffer) { KMP_STR_BUF_INVARIANT(buffer); // If internal bulk is used, allocate memory and copy it. if (buffer->size <= sizeof(buffer->bulk)) { buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size); if (buffer->str == NULL) { KMP_FATAL(MemoryAllocFailed); } KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1); } } // __kmp_str_buf_detach void __kmp_str_buf_free(kmp_str_buf_t *buffer) { KMP_STR_BUF_INVARIANT(buffer); if (buffer->size > sizeof(buffer->bulk)) { KMP_INTERNAL_FREE(buffer->str); } buffer->str = buffer->bulk; buffer->size = sizeof(buffer->bulk); buffer->used = 0; KMP_STR_BUF_INVARIANT(buffer); } // __kmp_str_buf_free void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, size_t len) { KMP_STR_BUF_INVARIANT(buffer); KMP_DEBUG_ASSERT(str != NULL); KMP_DEBUG_ASSERT(len >= 0); __kmp_str_buf_reserve(buffer, buffer->used + len + 1); buffer->str[buffer->used] = '\0'; KMP_STRNCAT_S(buffer->str + buffer->used, len + 1, str, len); __kmp_type_convert(buffer->used + len, &(buffer->used)); KMP_STR_BUF_INVARIANT(buffer); } // __kmp_str_buf_cat void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src) { KMP_DEBUG_ASSERT(dest); KMP_DEBUG_ASSERT(src); KMP_STR_BUF_INVARIANT(dest); KMP_STR_BUF_INVARIANT(src); if (!src->str || !src->used) return; __kmp_str_buf_reserve(dest, dest->used + src->used + 1); dest->str[dest->used] = '\0'; KMP_STRNCAT_S(dest->str + dest->used, src->used + 1, src->str, src->used); dest->used += src->used; KMP_STR_BUF_INVARIANT(dest); } // __kmp_str_buf_catbuf // Return the number of characters written int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, va_list args) { int rc; KMP_STR_BUF_INVARIANT(buffer); for (;;) { int const free = buffer->size - buffer->used; int size; // Try to format string. { /* On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf() crashes if it is called for the second time with the same args. To prevent the crash, we have to pass a fresh intact copy of args to vsnprintf() on each iteration. Unfortunately, standard va_copy() macro is not available on Windows* OS. However, it seems vsnprintf() does not modify args argument on Windows* OS. */ #if !KMP_OS_WINDOWS va_list _args; va_copy(_args, args); // Make copy of args. #define args _args // Substitute args with its copy, _args. #endif // KMP_OS_WINDOWS rc = KMP_VSNPRINTF(buffer->str + buffer->used, free, format, args); #if !KMP_OS_WINDOWS #undef args // Remove substitution. va_end(_args); #endif // KMP_OS_WINDOWS } // No errors, string has been formatted. if (rc >= 0 && rc < free) { buffer->used += rc; break; } // Error occurred, buffer is too small. if (rc >= 0) { // C99-conforming implementation of vsnprintf returns required buffer size size = buffer->used + rc + 1; } else { // Older implementations just return -1. Double buffer size. size = buffer->size * 2; } // Enlarge buffer. __kmp_str_buf_reserve(buffer, size); // And try again. } KMP_DEBUG_ASSERT(buffer->size > 0); KMP_STR_BUF_INVARIANT(buffer); return rc; } // __kmp_str_buf_vprint // Return the number of characters written int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) { int rc; va_list args; va_start(args, format); rc = __kmp_str_buf_vprint(buffer, format, args); va_end(args); return rc; } // __kmp_str_buf_print /* The function prints specified size to buffer. Size is expressed using biggest possible unit, for example 1024 is printed as "1k". */ void __kmp_str_buf_print_size(kmp_str_buf_t *buf, size_t size) { char const *names[] = {"", "k", "M", "G", "T", "P", "E", "Z", "Y"}; int const units = sizeof(names) / sizeof(char const *); int u = 0; if (size > 0) { while ((size % 1024 == 0) && (u + 1 < units)) { size = size / 1024; ++u; } } __kmp_str_buf_print(buf, "%" KMP_SIZE_T_SPEC "%s", size, names[u]); } // __kmp_str_buf_print_size void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path) { fname->path = NULL; fname->dir = NULL; fname->base = NULL; if (path != NULL) { char *slash = NULL; // Pointer to the last character of dir. char *base = NULL; // Pointer to the beginning of basename. fname->path = __kmp_str_format("%s", path); // Original code used strdup() function to copy a string, but on Windows* OS // Intel(R) 64 it causes assertion id debug heap, so I had to replace // strdup with __kmp_str_format(). if (KMP_OS_WINDOWS) { __kmp_str_replace(fname->path, '\\', '/'); } fname->dir = __kmp_str_format("%s", fname->path); slash = strrchr(fname->dir, '/'); if (KMP_OS_WINDOWS && slash == NULL) { // On Windows* OS, if slash not found, char first = (char)TOLOWER(fname->dir[0]); // look for drive. if ('a' <= first && first <= 'z' && fname->dir[1] == ':') { slash = &fname->dir[1]; } } base = (slash == NULL ? fname->dir : slash + 1); fname->base = __kmp_str_format("%s", base); // Copy basename *base = 0; // and truncate dir. } } // kmp_str_fname_init void __kmp_str_fname_free(kmp_str_fname_t *fname) { __kmp_str_free(&fname->path); __kmp_str_free(&fname->dir); __kmp_str_free(&fname->base); } // kmp_str_fname_free int __kmp_str_fname_match(kmp_str_fname_t const *fname, char const *pattern) { int dir_match = 1; int base_match = 1; if (pattern != NULL) { kmp_str_fname_t ptrn; __kmp_str_fname_init(&ptrn, pattern); dir_match = strcmp(ptrn.dir, "*/") == 0 || (fname->dir != NULL && __kmp_str_eqf(fname->dir, ptrn.dir)); base_match = strcmp(ptrn.base, "*") == 0 || (fname->base != NULL && __kmp_str_eqf(fname->base, ptrn.base)); __kmp_str_fname_free(&ptrn); } return dir_match && base_match; } // __kmp_str_fname_match // Get the numeric fields from source location string. // For clang these fields are Line/Col of the start of the construct. // For icc these are LineBegin/LineEnd of the construct. // Function is fast as it does not duplicate string (which involves memory // allocation), and parses the string in place. void __kmp_str_loc_numbers(char const *Psource, int *LineBeg, int *LineEndOrCol) { char *Str; KMP_DEBUG_ASSERT(LineBeg); KMP_DEBUG_ASSERT(LineEndOrCol); // Parse Psource string ";file;func;line;line_end_or_column;;" to get // numbers only, skipping string fields "file" and "func". // Find 1-st semicolon. KMP_DEBUG_ASSERT(Psource); #ifdef __cplusplus Str = strchr(CCAST(char *, Psource), ';'); #else Str = strchr(Psource, ';'); #endif // Check returned pointer to see if the format of Psource is broken. if (Str) { // Find 2-nd semicolon. Str = strchr(Str + 1, ';'); } if (Str) { // Find 3-rd semicolon. Str = strchr(Str + 1, ';'); } if (Str) { // Read begin line number. *LineBeg = atoi(Str + 1); // Find 4-th semicolon. Str = strchr(Str + 1, ';'); } else { // Broken format of input string, cannot read the number. *LineBeg = 0; } if (Str) { // Read end line or column number. *LineEndOrCol = atoi(Str + 1); } else { // Broken format of input string, cannot read the number. *LineEndOrCol = 0; } } kmp_str_loc_t __kmp_str_loc_init(char const *psource, bool init_fname) { kmp_str_loc_t loc; loc._bulk = NULL; loc.file = NULL; loc.func = NULL; loc.line = 0; loc.col = 0; if (psource != NULL) { char *str = NULL; char *dummy = NULL; char *line = NULL; char *col = NULL; // Copy psource to keep it intact. loc._bulk = __kmp_str_format("%s", psource); // Parse psource string: ";file;func;line;col;;" str = loc._bulk; __kmp_str_split(str, ';', &dummy, &str); __kmp_str_split(str, ';', &loc.file, &str); __kmp_str_split(str, ';', &loc.func, &str); __kmp_str_split(str, ';', &line, &str); __kmp_str_split(str, ';', &col, &str); // Convert line and col into numberic values. if (line != NULL) { loc.line = atoi(line); if (loc.line < 0) { loc.line = 0; } } if (col != NULL) { loc.col = atoi(col); if (loc.col < 0) { loc.col = 0; } } } __kmp_str_fname_init(&loc.fname, init_fname ? loc.file : NULL); return loc; } // kmp_str_loc_init void __kmp_str_loc_free(kmp_str_loc_t *loc) { __kmp_str_fname_free(&loc->fname); __kmp_str_free(&(loc->_bulk)); loc->file = NULL; loc->func = NULL; } // kmp_str_loc_free /* This function is intended to compare file names. On Windows* OS file names are case-insensitive, so functions performs case-insensitive comparison. On Linux* OS it performs case-sensitive comparison. Note: The function returns *true* if strings are *equal*. */ int __kmp_str_eqf( // True, if strings are equal, false otherwise. char const *lhs, // First string. char const *rhs // Second string. ) { int result; #if KMP_OS_WINDOWS result = (_stricmp(lhs, rhs) == 0); #else result = (strcmp(lhs, rhs) == 0); #endif return result; } // __kmp_str_eqf /* This function is like sprintf, but it *allocates* new buffer, which must be freed eventually by __kmp_str_free(). The function is very convenient for constructing strings, it successfully replaces strdup(), strcat(), it frees programmer from buffer allocations and helps to avoid buffer overflows. Examples: str = __kmp_str_format("%s", orig); //strdup() doesn't care about buffer size __kmp_str_free( & str ); str = __kmp_str_format( "%s%s", orig1, orig2 ); // strcat(), doesn't care // about buffer size. __kmp_str_free( & str ); str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string. __kmp_str_free( & str ); Performance note: This function allocates memory with malloc() calls, so do not call it from performance-critical code. In performance-critical code consider using kmp_str_buf_t instead, since it uses stack-allocated buffer for short strings. Why does this function use malloc()? 1. __kmp_allocate() returns cache-aligned memory allocated with malloc(). There are no reasons in using __kmp_allocate() for strings due to extra overhead while cache-aligned memory is not necessary. 2. __kmp_thread_malloc() cannot be used because it requires pointer to thread structure. We need to perform string operations during library startup (for example, in __kmp_register_library_startup()) when no thread structures are allocated yet. So standard malloc() is the only available option. */ char *__kmp_str_format( // Allocated string. char const *format, // Format string. ... // Other parameters. ) { va_list args; int size = 512; char *buffer = NULL; int rc; // Allocate buffer. buffer = (char *)KMP_INTERNAL_MALLOC(size); if (buffer == NULL) { KMP_FATAL(MemoryAllocFailed); } for (;;) { // Try to format string. va_start(args, format); rc = KMP_VSNPRINTF(buffer, size, format, args); va_end(args); // No errors, string has been formatted. if (rc >= 0 && rc < size) { break; } // Error occurred, buffer is too small. if (rc >= 0) { // C99-conforming implementation of vsnprintf returns required buffer // size. size = rc + 1; } else { // Older implementations just return -1. size = size * 2; } // Enlarge buffer and try again. buffer = (char *)KMP_INTERNAL_REALLOC(buffer, size); if (buffer == NULL) { KMP_FATAL(MemoryAllocFailed); } } return buffer; } // func __kmp_str_format void __kmp_str_free(char **str) { KMP_DEBUG_ASSERT(str != NULL); KMP_INTERNAL_FREE(*str); *str = NULL; } // func __kmp_str_free /* If len is zero, returns true iff target and data have exact case-insensitive match. If len is negative, returns true iff target is a case-insensitive substring of data. If len is positive, returns true iff target is a case-insensitive substring of data or vice versa, and neither is shorter than len. */ int __kmp_str_match(char const *target, int len, char const *data) { int i; if (target == NULL || data == NULL) { return FALSE; } for (i = 0; target[i] && data[i]; ++i) { if (TOLOWER(target[i]) != TOLOWER(data[i])) { return FALSE; } } return ((len > 0) ? i >= len : (!target[i] && (len || !data[i]))); } // __kmp_str_match // If data contains all of target, returns true, otherwise returns false. // len should be the length of target bool __kmp_str_contains(char const *target, int len, char const *data) { int i = 0, j = 0, start = 0; if (target == NULL || data == NULL) { return FALSE; } while (target[i]) { if (!data[j]) return FALSE; if (TOLOWER(target[i]) != TOLOWER(data[j])) { j = start + 1; start = j; i = 0; } else { if (i == 0) start = j; j++; i++; } } return i == len; } // __kmp_str_contains int __kmp_str_match_false(char const *data) { int result = __kmp_str_match("false", 1, data) || __kmp_str_match("off", 2, data) || __kmp_str_match("0", 1, data) || __kmp_str_match(".false.", 2, data) || __kmp_str_match(".f.", 2, data) || __kmp_str_match("no", 1, data) || __kmp_str_match("disabled", 0, data); return result; } // __kmp_str_match_false int __kmp_str_match_true(char const *data) { int result = __kmp_str_match("true", 1, data) || __kmp_str_match("on", 2, data) || __kmp_str_match("1", 1, data) || __kmp_str_match(".true.", 2, data) || __kmp_str_match(".t.", 2, data) || __kmp_str_match("yes", 1, data) || __kmp_str_match("enabled", 0, data); return result; } // __kmp_str_match_true void __kmp_str_replace(char *str, char search_for, char replace_with) { char *found = NULL; found = strchr(str, search_for); while (found) { *found = replace_with; found = strchr(found + 1, search_for); } } // __kmp_str_replace void __kmp_str_split(char *str, // I: String to split. char delim, // I: Character to split on. char **head, // O: Pointer to head (may be NULL). char **tail // O: Pointer to tail (may be NULL). ) { char *h = str; char *t = NULL; if (str != NULL) { char *ptr = strchr(str, delim); if (ptr != NULL) { *ptr = 0; t = ptr + 1; } } if (head != NULL) { *head = h; } if (tail != NULL) { *tail = t; } } // __kmp_str_split /* strtok_r() is not available on Windows* OS. This function reimplements strtok_r(). */ char *__kmp_str_token( char *str, // String to split into tokens. Note: String *is* modified! char const *delim, // Delimiters. char **buf // Internal buffer. ) { char *token = NULL; #if KMP_OS_WINDOWS // On Windows* OS there is no strtok_r() function. Let us implement it. if (str != NULL) { *buf = str; // First call, initialize buf. } *buf += strspn(*buf, delim); // Skip leading delimiters. if (**buf != 0) { // Rest of the string is not yet empty. token = *buf; // Use it as result. *buf += strcspn(*buf, delim); // Skip non-delimiters. if (**buf != 0) { // Rest of the string is not yet empty. **buf = 0; // Terminate token here. *buf += 1; // Advance buf to start with the next token next time. } } #else // On Linux* OS and OS X*, strtok_r() is available. Let us use it. token = strtok_r(str, delim, buf); #endif return token; } // __kmp_str_token int __kmp_basic_str_to_int(char const *str) { int result; char const *t; result = 0; for (t = str; *t != '\0'; ++t) { if (*t < '0' || *t > '9') break; result = (result * 10) + (*t - '0'); } return result; } int __kmp_str_to_int(char const *str, char sentinel) { int result, factor; char const *t; result = 0; for (t = str; *t != '\0'; ++t) { if (*t < '0' || *t > '9') break; result = (result * 10) + (*t - '0'); } switch (*t) { case '\0': /* the current default for no suffix is bytes */ factor = 1; break; case 'b': case 'B': /* bytes */ ++t; factor = 1; break; case 'k': case 'K': /* kilo-bytes */ ++t; factor = 1024; break; case 'm': case 'M': /* mega-bytes */ ++t; factor = (1024 * 1024); break; default: if (*t != sentinel) return (-1); t = ""; factor = 1; } if (result > (INT_MAX / factor)) result = INT_MAX; else result *= factor; return (*t != 0 ? 0 : result); } // __kmp_str_to_int /* The routine parses input string. It is expected it is a unsigned integer with optional unit. Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb" or "m" for megabytes, ..., "yb" or "y" for yottabytes. :-) Unit name is case-insensitive. The routine returns 0 if everything is ok, or error code: -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown unit *size is set to zero. */ void __kmp_str_to_size( // R: Error code. char const *str, // I: String of characters, unsigned number and unit ("b", // "kb", etc). size_t *out, // O: Parsed number. size_t dfactor, // I: The factor if none of the letters specified. char const **error // O: Null if everything is ok, error message otherwise. ) { size_t value = 0; size_t factor = 0; int overflow = 0; int i = 0; int digit; KMP_DEBUG_ASSERT(str != NULL); // Skip spaces. while (str[i] == ' ' || str[i] == '\t') { ++i; } // Parse number. if (str[i] < '0' || str[i] > '9') { *error = KMP_I18N_STR(NotANumber); return; } do { digit = str[i] - '0'; overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10); value = (value * 10) + digit; ++i; } while (str[i] >= '0' && str[i] <= '9'); // Skip spaces. while (str[i] == ' ' || str[i] == '\t') { ++i; } // Parse unit. #define _case(ch, exp) \ case ch: \ case ch - ('a' - 'A'): { \ size_t shift = (exp)*10; \ ++i; \ if (shift < sizeof(size_t) * 8) { \ factor = (size_t)(1) << shift; \ } else { \ overflow = 1; \ } \ } break; switch (str[i]) { _case('k', 1); // Kilo _case('m', 2); // Mega _case('g', 3); // Giga _case('t', 4); // Tera _case('p', 5); // Peta _case('e', 6); // Exa _case('z', 7); // Zetta _case('y', 8); // Yotta // Oops. No more units... } #undef _case if (str[i] == 'b' || str[i] == 'B') { // Skip optional "b". if (factor == 0) { factor = 1; } ++i; } if (!(str[i] == ' ' || str[i] == '\t' || str[i] == 0)) { // Bad unit *error = KMP_I18N_STR(BadUnit); return; } if (factor == 0) { factor = dfactor; } // Apply factor. overflow = overflow || (value > (KMP_SIZE_T_MAX / factor)); value *= factor; // Skip spaces. while (str[i] == ' ' || str[i] == '\t') { ++i; } if (str[i] != 0) { *error = KMP_I18N_STR(IllegalCharacters); return; } if (overflow) { *error = KMP_I18N_STR(ValueTooLarge); *out = KMP_SIZE_T_MAX; return; } *error = NULL; *out = value; } // __kmp_str_to_size void __kmp_str_to_uint( // R: Error code. char const *str, // I: String of characters, unsigned number. kmp_uint64 *out, // O: Parsed number. char const **error // O: Null if everything is ok, error message otherwise. ) { size_t value = 0; int overflow = 0; int i = 0; int digit; KMP_DEBUG_ASSERT(str != NULL); // Skip spaces. while (str[i] == ' ' || str[i] == '\t') { ++i; } // Parse number. if (str[i] < '0' || str[i] > '9') { *error = KMP_I18N_STR(NotANumber); return; } do { digit = str[i] - '0'; overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10); value = (value * 10) + digit; ++i; } while (str[i] >= '0' && str[i] <= '9'); // Skip spaces. while (str[i] == ' ' || str[i] == '\t') { ++i; } if (str[i] != 0) { *error = KMP_I18N_STR(IllegalCharacters); return; } if (overflow) { *error = KMP_I18N_STR(ValueTooLarge); *out = (kmp_uint64)-1; return; } *error = NULL; *out = value; } // __kmp_str_to_unit // end of file //