xref: /freebsd/contrib/libcbor/src/cbor/strings.h (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 /*
2  * Copyright (c) 2014-2020 Pavel Kalvoda <me@pavelkalvoda.com>
3  *
4  * libcbor is free software; you can redistribute it and/or modify
5  * it under the terms of the MIT license. See LICENSE for details.
6  */
7 
8 #ifndef LIBCBOR_STRINGS_H
9 #define LIBCBOR_STRINGS_H
10 
11 #include "cbor/cbor_export.h"
12 #include "cbor/common.h"
13 
14 #ifdef __cplusplus
15 extern "C" {
16 #endif
17 
18 /*
19  * ============================================================================
20  * String manipulation
21  * ============================================================================
22  */
23 
24 /** Returns the length of the underlying string in bytes
25  *
26  * There can be fewer unicode character than bytes (see
27  * `cbor_string_codepoint_count`). For definite strings only.
28  *
29  * @param item a definite string
30  * @return length of the string. Zero if no chunk has been attached yet
31  */
32 _CBOR_NODISCARD CBOR_EXPORT size_t cbor_string_length(const cbor_item_t* item);
33 
34 /** The number of codepoints in this string
35  *
36  * Might differ from `cbor_string_length` if there are multibyte codepoints.
37  * If the string data is not valid UTF-8, returns 0.
38  *
39  * @param item A string
40  * @return The number of codepoints in this string
41  */
42 _CBOR_NODISCARD CBOR_EXPORT size_t
43 cbor_string_codepoint_count(const cbor_item_t* item);
44 
45 /** Is the string definite?
46  *
47  * @param item a string
48  * @return Is the string definite?
49  */
50 _CBOR_NODISCARD CBOR_EXPORT bool cbor_string_is_definite(
51     const cbor_item_t* item);
52 
53 /** Is the string indefinite?
54  *
55  * @param item a string
56  * @return Is the string indefinite?
57  */
58 _CBOR_NODISCARD CBOR_EXPORT bool cbor_string_is_indefinite(
59     const cbor_item_t* item);
60 
61 /** Get the handle to the underlying string
62  *
63  * Definite items only. Modifying the data is allowed. In that case, the caller
64  * takes responsibility for the effect on items this item might be a part of
65  *
66  * @param item A definite string
67  * @return The address of the underlying string.
68  * @return `NULL` if no data have been assigned yet.
69  */
70 _CBOR_NODISCARD CBOR_EXPORT cbor_mutable_data
71 cbor_string_handle(const cbor_item_t* item);
72 
73 /** Set the handle to the underlying string
74  *
75  * The data is assumed to be a valid UTF-8 string. If the string is non-empty
76  * and invalid, `cbor_string_codepoint_count` will return 0.
77  *
78  * \rst
79  * .. warning::
80  *   Using a pointer to a stack allocated constant is a common mistake.
81  *   Lifetime of the string will expire when it goes out of scope and the CBOR
82  *   item will be left inconsistent.
83  * \endrst
84  *
85  * @param item A definite string
86  * @param data The memory block. The caller gives up the ownership of the block.
87  * libcbor will deallocate it when appropriate using its free function
88  * @param length Length of the data block
89  */
90 CBOR_EXPORT void cbor_string_set_handle(
91     cbor_item_t* item, cbor_mutable_data CBOR_RESTRICT_POINTER data,
92     size_t length);
93 
94 /** Get the handle to the array of chunks
95  *
96  * Manipulations with the memory block (e.g. sorting it) are allowed, but the
97  * validity and the number of chunks must be retained.
98  *
99  * @param item A indefinite string
100  * @return array of #cbor_string_chunk_count definite strings
101  */
102 _CBOR_NODISCARD CBOR_EXPORT cbor_item_t** cbor_string_chunks_handle(
103     const cbor_item_t* item);
104 
105 /** Get the number of chunks this string consist of
106  *
107  * @param item A indefinite string
108  * @return The chunk count. 0 for freshly created items.
109  */
110 _CBOR_NODISCARD CBOR_EXPORT size_t
111 cbor_string_chunk_count(const cbor_item_t* item);
112 
113 /** Appends a chunk to the string
114  *
115  * Indefinite strings only.
116  *
117  * May realloc the chunk storage.
118  *
119  * @param item An indefinite string
120  * @param chunk A definite string item. Its reference count will be increased
121  * by one.
122  * @return `true` on success. `false` on memory allocation failure. In that
123  * case, the refcount of @p `chunk` is not increased and the @p `item` is left
124  * intact.
125  */
126 _CBOR_NODISCARD CBOR_EXPORT bool cbor_string_add_chunk(cbor_item_t* item,
127                                                        cbor_item_t* chunk);
128 
129 /** Creates a new definite string
130  *
131  * The handle is initialized to `NULL` and length to 0
132  *
133  * @return Reference to the new string item. The item's reference count is
134  * initialized to one.
135  * @return `NULL` if memory allocation fails
136  */
137 _CBOR_NODISCARD CBOR_EXPORT cbor_item_t* cbor_new_definite_string(void);
138 
139 /** Creates a new indefinite string
140  *
141  * The chunks array is initialized to `NULL` and chunkcount to 0
142  *
143  * @return Reference to the new string item. The item's reference count is
144  * initialized to one.
145  * @return `NULL` if memory allocation fails
146  */
147 _CBOR_NODISCARD CBOR_EXPORT cbor_item_t* cbor_new_indefinite_string(void);
148 
149 /** Creates a new string and initializes it
150  *
151  * The data from `val` will be copied to a newly allocated memory block.
152  *
153  * Note that valid UTF-8 strings do not contain null bytes, so this routine is
154  * correct for all valid inputs. If the input is not guaranteed to be valid,
155  * use `cbor_build_stringn` instead.
156  *
157  * @param val A null-terminated UTF-8 string
158  * @return Reference to the new string item. The item's reference count is
159  * initialized to one.
160  * @return `NULL` if memory allocation fails
161  */
162 _CBOR_NODISCARD CBOR_EXPORT cbor_item_t* cbor_build_string(const char* val);
163 
164 /** Creates a new string and initializes it
165  *
166  * The data from `handle` will be copied to a newly allocated memory block.
167  *
168  * All @p `length` bytes will be stored in the string, even if there are null
169  * bytes or invalid UTF-8 sequences.
170  *
171  * @param val A UTF-8 string, at least @p `length` bytes long
172  * @param length Length (in bytes) of the string passed in @p `val`.
173  * @return Reference to the new string item. The item's reference count is
174  * initialized to one.
175  * @return `NULL` if memory allocation fails
176  */
177 _CBOR_NODISCARD CBOR_EXPORT cbor_item_t* cbor_build_stringn(const char* val,
178                                                             size_t length);
179 
180 #ifdef __cplusplus
181 }
182 #endif
183 
184 #endif  // LIBCBOR_STRINGS_H
185