xref: /freebsd/contrib/libcbor/src/cbor/strings.h (revision abd872540f24cfc7dbd1ea29b6918c7082a22108)
110ff414cSEd Maste /*
210ff414cSEd Maste  * Copyright (c) 2014-2020 Pavel Kalvoda <me@pavelkalvoda.com>
310ff414cSEd Maste  *
410ff414cSEd Maste  * libcbor is free software; you can redistribute it and/or modify
510ff414cSEd Maste  * it under the terms of the MIT license. See LICENSE for details.
610ff414cSEd Maste  */
710ff414cSEd Maste 
810ff414cSEd Maste #ifndef LIBCBOR_STRINGS_H
910ff414cSEd Maste #define LIBCBOR_STRINGS_H
1010ff414cSEd Maste 
1110ff414cSEd Maste #include "cbor/cbor_export.h"
1210ff414cSEd Maste #include "cbor/common.h"
1310ff414cSEd Maste 
1410ff414cSEd Maste #ifdef __cplusplus
1510ff414cSEd Maste extern "C" {
1610ff414cSEd Maste #endif
1710ff414cSEd Maste 
1810ff414cSEd Maste /*
1910ff414cSEd Maste  * ============================================================================
2010ff414cSEd Maste  * String manipulation
2110ff414cSEd Maste  * ============================================================================
2210ff414cSEd Maste  */
2310ff414cSEd Maste 
245d3e7166SEd Maste /** Returns the length of the underlying string in bytes
2510ff414cSEd Maste  *
265d3e7166SEd Maste  * There can be fewer unicode character than bytes (see
275d3e7166SEd Maste  * `cbor_string_codepoint_count`). For definite strings only.
2810ff414cSEd Maste  *
295d3e7166SEd Maste  * @param item a definite string
3010ff414cSEd Maste  * @return length of the string. Zero if no chunk has been attached yet
3110ff414cSEd Maste  */
325d3e7166SEd Maste _CBOR_NODISCARD CBOR_EXPORT size_t cbor_string_length(const cbor_item_t *item);
3310ff414cSEd Maste 
3410ff414cSEd Maste /** The number of codepoints in this string
3510ff414cSEd Maste  *
36*abd87254SEd Maste  * Might differ from `cbor_string_length` if there are multibyte codepoints.
37*abd87254SEd Maste  * If the string data is not valid UTF-8, returns 0.
3810ff414cSEd Maste  *
395d3e7166SEd Maste  * @param item A string
4010ff414cSEd Maste  * @return The number of codepoints in this string
4110ff414cSEd Maste  */
425d3e7166SEd Maste _CBOR_NODISCARD CBOR_EXPORT size_t
435d3e7166SEd Maste cbor_string_codepoint_count(const cbor_item_t *item);
4410ff414cSEd Maste 
4510ff414cSEd Maste /** Is the string definite?
4610ff414cSEd Maste  *
475d3e7166SEd Maste  * @param item a string
4810ff414cSEd Maste  * @return Is the string definite?
4910ff414cSEd Maste  */
505d3e7166SEd Maste _CBOR_NODISCARD CBOR_EXPORT bool cbor_string_is_definite(
515d3e7166SEd Maste     const cbor_item_t *item);
5210ff414cSEd Maste 
5310ff414cSEd Maste /** Is the string indefinite?
5410ff414cSEd Maste  *
555d3e7166SEd Maste  * @param item a string
5610ff414cSEd Maste  * @return Is the string indefinite?
5710ff414cSEd Maste  */
585d3e7166SEd Maste _CBOR_NODISCARD CBOR_EXPORT bool cbor_string_is_indefinite(
595d3e7166SEd Maste     const cbor_item_t *item);
6010ff414cSEd Maste 
6110ff414cSEd Maste /** Get the handle to the underlying string
6210ff414cSEd Maste  *
6310ff414cSEd Maste  * Definite items only. Modifying the data is allowed. In that case, the caller
6410ff414cSEd Maste  * takes responsibility for the effect on items this item might be a part of
6510ff414cSEd Maste  *
665d3e7166SEd Maste  * @param item A definite string
675d3e7166SEd Maste  * @return The address of the underlying string.
685d3e7166SEd Maste  * @return `NULL` if no data have been assigned yet.
6910ff414cSEd Maste  */
705d3e7166SEd Maste _CBOR_NODISCARD CBOR_EXPORT cbor_mutable_data
715d3e7166SEd Maste cbor_string_handle(const cbor_item_t *item);
7210ff414cSEd Maste 
7310ff414cSEd Maste /** Set the handle to the underlying string
7410ff414cSEd Maste  *
75*abd87254SEd Maste  * The data is assumed to be a valid UTF-8 string. If the string is non-empty
76*abd87254SEd Maste  * and invalid, `cbor_string_codepoint_count` will return 0.
7710ff414cSEd Maste  *
7810ff414cSEd Maste  * \rst
7910ff414cSEd Maste  * .. warning:: Using a pointer to a stack allocated constant is a common
8010ff414cSEd Maste  *  mistake. Lifetime of the string will expire when it goes out of scope and
8110ff414cSEd Maste  *  the CBOR item will be left inconsistent.
8210ff414cSEd Maste  * \endrst
8310ff414cSEd Maste  *
845d3e7166SEd Maste  * @param item A definite string
8510ff414cSEd Maste  * @param data The memory block. The caller gives up the ownership of the block.
8610ff414cSEd Maste  * libcbor will deallocate it when appropriate using its free function
8710ff414cSEd Maste  * @param length Length of the data block
8810ff414cSEd Maste  */
8910ff414cSEd Maste CBOR_EXPORT void cbor_string_set_handle(
9010ff414cSEd Maste     cbor_item_t *item, cbor_mutable_data CBOR_RESTRICT_POINTER data,
9110ff414cSEd Maste     size_t length);
9210ff414cSEd Maste 
9310ff414cSEd Maste /** Get the handle to the array of chunks
9410ff414cSEd Maste  *
9510ff414cSEd Maste  * Manipulations with the memory block (e.g. sorting it) are allowed, but the
9610ff414cSEd Maste  * validity and the number of chunks must be retained.
9710ff414cSEd Maste  *
985d3e7166SEd Maste  * @param item A indefinite string
9910ff414cSEd Maste  * @return array of #cbor_string_chunk_count definite strings
10010ff414cSEd Maste  */
1015d3e7166SEd Maste _CBOR_NODISCARD CBOR_EXPORT cbor_item_t **cbor_string_chunks_handle(
1025d3e7166SEd Maste     const cbor_item_t *item);
10310ff414cSEd Maste 
10410ff414cSEd Maste /** Get the number of chunks this string consist of
10510ff414cSEd Maste  *
1065d3e7166SEd Maste  * @param item A indefinite string
10710ff414cSEd Maste  * @return The chunk count. 0 for freshly created items.
10810ff414cSEd Maste  */
1095d3e7166SEd Maste _CBOR_NODISCARD CBOR_EXPORT size_t
1105d3e7166SEd Maste cbor_string_chunk_count(const cbor_item_t *item);
11110ff414cSEd Maste 
11210ff414cSEd Maste /** Appends a chunk to the string
11310ff414cSEd Maste  *
11410ff414cSEd Maste  * Indefinite strings only.
11510ff414cSEd Maste  *
11610ff414cSEd Maste  * May realloc the chunk storage.
11710ff414cSEd Maste  *
1185d3e7166SEd Maste  * @param item An indefinite string
1195d3e7166SEd Maste  * @param chunk A definite string item. Its reference count will be increased
1205d3e7166SEd Maste  * by one.
1215d3e7166SEd Maste  * @return `true` on success. `false` on memory allocation failure. In that
1225d3e7166SEd Maste  * case, the refcount of @p `chunk` is not increased and the @p `item` is left
1235d3e7166SEd Maste  * intact.
12410ff414cSEd Maste  */
1255d3e7166SEd Maste _CBOR_NODISCARD CBOR_EXPORT bool cbor_string_add_chunk(cbor_item_t *item,
1265d3e7166SEd Maste                                                        cbor_item_t *chunk);
12710ff414cSEd Maste 
12810ff414cSEd Maste /** Creates a new definite string
12910ff414cSEd Maste  *
13010ff414cSEd Maste  * The handle is initialized to `NULL` and length to 0
13110ff414cSEd Maste  *
1325d3e7166SEd Maste  * @return Reference to the new string item. The item's reference count is
1335d3e7166SEd Maste  * initialized to one.
1345d3e7166SEd Maste  * @return `NULL` if memory allocation fails
13510ff414cSEd Maste  */
1365d3e7166SEd Maste _CBOR_NODISCARD CBOR_EXPORT cbor_item_t *cbor_new_definite_string(void);
13710ff414cSEd Maste 
13810ff414cSEd Maste /** Creates a new indefinite string
13910ff414cSEd Maste  *
14010ff414cSEd Maste  * The chunks array is initialized to `NULL` and chunkcount to 0
14110ff414cSEd Maste  *
1425d3e7166SEd Maste  * @return Reference to the new string item. The item's reference count is
1435d3e7166SEd Maste  * initialized to one.
1445d3e7166SEd Maste  * @return `NULL` if memory allocation fails
14510ff414cSEd Maste  */
1465d3e7166SEd Maste _CBOR_NODISCARD CBOR_EXPORT cbor_item_t *cbor_new_indefinite_string(void);
14710ff414cSEd Maste 
14810ff414cSEd Maste /** Creates a new string and initializes it
14910ff414cSEd Maste  *
150*abd87254SEd Maste  * The data from `val` will be copied to a newly allocated memory block.
151*abd87254SEd Maste  *
152*abd87254SEd Maste  * Note that valid UTF-8 strings do not contain null bytes, so this routine is
153*abd87254SEd Maste  * correct for all valid inputs. If the input is not guaranteed to be valid,
154*abd87254SEd Maste  * use `cbor_build_stringn` instead.
15510ff414cSEd Maste  *
15610ff414cSEd Maste  * @param val A null-terminated UTF-8 string
1575d3e7166SEd Maste  * @return Reference to the new string item. The item's reference count is
1585d3e7166SEd Maste  * initialized to one.
1595d3e7166SEd Maste  * @return `NULL` if memory allocation fails
16010ff414cSEd Maste  */
1615d3e7166SEd Maste _CBOR_NODISCARD CBOR_EXPORT cbor_item_t *cbor_build_string(const char *val);
16210ff414cSEd Maste 
16310ff414cSEd Maste /** Creates a new string and initializes it
16410ff414cSEd Maste  *
165*abd87254SEd Maste  * The data from `handle` will be copied to a newly allocated memory block.
16610ff414cSEd Maste  *
167*abd87254SEd Maste  * All @p `length` bytes will be stored in the string, even if there are null
168*abd87254SEd Maste  * bytes or invalid UTF-8 sequences.
169*abd87254SEd Maste  *
170*abd87254SEd Maste  * @param val A UTF-8 string, at least @p `length` bytes long
1715d3e7166SEd Maste  * @param length Length (in bytes) of the string passed in @p `val`.
1725d3e7166SEd Maste  * @return Reference to the new string item. The item's reference count is
1735d3e7166SEd Maste  * initialized to one.
1745d3e7166SEd Maste  * @return `NULL` if memory allocation fails
17510ff414cSEd Maste  */
1765d3e7166SEd Maste _CBOR_NODISCARD CBOR_EXPORT cbor_item_t *cbor_build_stringn(const char *val,
1775d3e7166SEd Maste                                                             size_t length);
17810ff414cSEd Maste 
17910ff414cSEd Maste #ifdef __cplusplus
18010ff414cSEd Maste }
18110ff414cSEd Maste #endif
18210ff414cSEd Maste 
18310ff414cSEd Maste #endif  // LIBCBOR_STRINGS_H
184