1 /* 2 * Copyright (c) 2014-2020 Pavel Kalvoda <me@pavelkalvoda.com> 3 * 4 * libcbor is free software; you can redistribute it and/or modify 5 * it under the terms of the MIT license. See LICENSE for details. 6 */ 7 8 #include "unicode.h" 9 10 #define UTF8_ACCEPT 0 11 #define UTF8_REJECT 1 12 13 static const uint8_t utf8d[] = { 14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00..1f */ 17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20..3f */ 20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40..5f */ 23 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60..7f */ 26 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 28 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /* 80..9f */ 29 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 30 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 31 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* a0..bf */ 32 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 33 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 34 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* c0..df */ 35 0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 36 0x3, 0x3, 0x4, 0x3, 0x3, /* e0..ef */ 37 0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 38 0x8, 0x8, 0x8, 0x8, 0x8, /* f0..ff */ 39 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 40 0x6, 0x1, 0x1, 0x1, 0x1, /* s0..s0 */ 41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 43 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, /* s1..s2 */ 44 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, /* s3..s4 */ 47 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 49 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s5..s6 */ 50 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 51 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s7..s8 */ 53 }; 54 55 /* Copyright of this function: (c) 2008-2009 Bjoern Hoehrmann 56 * <bjoern@hoehrmann.de> */ 57 /* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */ 58 uint32_t _cbor_unicode_decode(uint32_t* state, uint32_t* codep, uint32_t byte) { 59 uint32_t type = utf8d[byte]; 60 61 *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6) 62 : (0xff >> type) & (byte); 63 64 *state = utf8d[256 + *state * 16 + type]; 65 return *state; 66 } 67 68 size_t _cbor_unicode_codepoint_count(cbor_data source, size_t source_length, 69 struct _cbor_unicode_status* status) { 70 *status = 71 (struct _cbor_unicode_status){.location = 0, .status = _CBOR_UNICODE_OK}; 72 uint32_t codepoint, state = UTF8_ACCEPT, res; 73 size_t pos = 0, count = 0; 74 75 for (; pos < source_length; pos++) { 76 res = _cbor_unicode_decode(&state, &codepoint, source[pos]); 77 78 if (res == UTF8_ACCEPT) { 79 count++; 80 } else if (res == UTF8_REJECT) { 81 goto error; 82 } 83 } 84 85 /* Unfinished multibyte codepoint */ 86 if (state != UTF8_ACCEPT) goto error; 87 88 return count; 89 90 error: 91 *status = (struct _cbor_unicode_status){.location = pos, 92 .status = _CBOR_UNICODE_BADCP}; 93 return -1; 94 } 95