1 /* 2 * Copyright (c) 2014-2020 Pavel Kalvoda <me@pavelkalvoda.com> 3 * 4 * libcbor is free software; you can redistribute it and/or modify 5 * it under the terms of the MIT license. See LICENSE for details. 6 */ 7 8 #include "unicode.h" 9 #include <stdint.h> 10 11 #define UTF8_ACCEPT 0 12 #define UTF8_REJECT 1 13 14 static const uint8_t utf8d[] = { 15 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00..1f */ 18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20..3f */ 21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40..5f */ 24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60..7f */ 27 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 29 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /* 80..9f */ 30 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 31 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 32 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* a0..bf */ 33 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 34 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 35 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* c0..df */ 36 0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 37 0x3, 0x3, 0x4, 0x3, 0x3, /* e0..ef */ 38 0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 39 0x8, 0x8, 0x8, 0x8, 0x8, /* f0..ff */ 40 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 41 0x6, 0x1, 0x1, 0x1, 0x1, /* s0..s0 */ 42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 44 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, /* s1..s2 */ 45 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 47 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, /* s3..s4 */ 48 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 49 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 50 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s5..s6 */ 51 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 52 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s7..s8 */ 54 }; 55 56 /* Copyright of this function: (c) 2008-2009 Bjoern Hoehrmann 57 * <bjoern@hoehrmann.de> */ 58 /* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */ 59 uint32_t _cbor_unicode_decode(uint32_t* state, uint32_t* codep, uint32_t byte) { 60 uint32_t type = utf8d[byte]; 61 62 *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6) 63 : (0xff >> type) & (byte); 64 65 *state = utf8d[256 + *state * 16 + type]; 66 return *state; 67 } 68 69 uint64_t _cbor_unicode_codepoint_count(cbor_data source, uint64_t source_length, 70 struct _cbor_unicode_status* status) { 71 *status = 72 (struct _cbor_unicode_status){.location = 0, .status = _CBOR_UNICODE_OK}; 73 uint32_t codepoint, state = UTF8_ACCEPT, res; 74 uint64_t pos = 0, count = 0; 75 76 for (; pos < source_length; pos++) { 77 res = _cbor_unicode_decode(&state, &codepoint, source[pos]); 78 79 if (res == UTF8_ACCEPT) { 80 count++; 81 } else if (res == UTF8_REJECT) { 82 goto error; 83 } 84 } 85 86 /* Unfinished multibyte codepoint */ 87 if (state != UTF8_ACCEPT) goto error; 88 89 return count; 90 91 error: 92 *status = (struct _cbor_unicode_status){.location = pos, 93 .status = _CBOR_UNICODE_BADCP}; 94 return 0; 95 } 96