xref: /freebsd/contrib/libcbor/src/cbor/strings.h (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 /*
2  * Copyright (c) 2014-2020 Pavel Kalvoda <me@pavelkalvoda.com>
3  *
4  * libcbor is free software; you can redistribute it and/or modify
5  * it under the terms of the MIT license. See LICENSE for details.
6  */
7 
8 #ifndef LIBCBOR_STRINGS_H
9 #define LIBCBOR_STRINGS_H
10 
11 #include "cbor/cbor_export.h"
12 #include "cbor/common.h"
13 
14 #ifdef __cplusplus
15 extern "C" {
16 #endif
17 
18 /*
19  * ============================================================================
20  * String manipulation
21  * ============================================================================
22  */
23 
24 /** Returns the length of the underlying string in bytes
25  *
26  * There can be fewer unicode character than bytes (see
27  * `cbor_string_codepoint_count`). For definite strings only.
28  *
29  * @param item a definite string
30  * @return length of the string. Zero if no chunk has been attached yet
31  */
32 _CBOR_NODISCARD CBOR_EXPORT size_t cbor_string_length(const cbor_item_t *item);
33 
34 /** The number of codepoints in this string
35  *
36  * Might differ from `cbor_string_length` if there are multibyte codepoints.
37  * If the string data is not valid UTF-8, returns 0.
38  *
39  * @param item A string
40  * @return The number of codepoints in this string
41  */
42 _CBOR_NODISCARD CBOR_EXPORT size_t
43 cbor_string_codepoint_count(const cbor_item_t *item);
44 
45 /** Is the string definite?
46  *
47  * @param item a string
48  * @return Is the string definite?
49  */
50 _CBOR_NODISCARD CBOR_EXPORT bool cbor_string_is_definite(
51     const cbor_item_t *item);
52 
53 /** Is the string indefinite?
54  *
55  * @param item a string
56  * @return Is the string indefinite?
57  */
58 _CBOR_NODISCARD CBOR_EXPORT bool cbor_string_is_indefinite(
59     const cbor_item_t *item);
60 
61 /** Get the handle to the underlying string
62  *
63  * Definite items only. Modifying the data is allowed. In that case, the caller
64  * takes responsibility for the effect on items this item might be a part of
65  *
66  * @param item A definite string
67  * @return The address of the underlying string.
68  * @return `NULL` if no data have been assigned yet.
69  */
70 _CBOR_NODISCARD CBOR_EXPORT cbor_mutable_data
71 cbor_string_handle(const cbor_item_t *item);
72 
73 /** Set the handle to the underlying string
74  *
75  * The data is assumed to be a valid UTF-8 string. If the string is non-empty
76  * and invalid, `cbor_string_codepoint_count` will return 0.
77  *
78  * \rst
79  * .. warning:: Using a pointer to a stack allocated constant is a common
80  *  mistake. Lifetime of the string will expire when it goes out of scope and
81  *  the CBOR item will be left inconsistent.
82  * \endrst
83  *
84  * @param item A definite string
85  * @param data The memory block. The caller gives up the ownership of the block.
86  * libcbor will deallocate it when appropriate using its free function
87  * @param length Length of the data block
88  */
89 CBOR_EXPORT void cbor_string_set_handle(
90     cbor_item_t *item, cbor_mutable_data CBOR_RESTRICT_POINTER data,
91     size_t length);
92 
93 /** Get the handle to the array of chunks
94  *
95  * Manipulations with the memory block (e.g. sorting it) are allowed, but the
96  * validity and the number of chunks must be retained.
97  *
98  * @param item A indefinite string
99  * @return array of #cbor_string_chunk_count definite strings
100  */
101 _CBOR_NODISCARD CBOR_EXPORT cbor_item_t **cbor_string_chunks_handle(
102     const cbor_item_t *item);
103 
104 /** Get the number of chunks this string consist of
105  *
106  * @param item A indefinite string
107  * @return The chunk count. 0 for freshly created items.
108  */
109 _CBOR_NODISCARD CBOR_EXPORT size_t
110 cbor_string_chunk_count(const cbor_item_t *item);
111 
112 /** Appends a chunk to the string
113  *
114  * Indefinite strings only.
115  *
116  * May realloc the chunk storage.
117  *
118  * @param item An indefinite string
119  * @param chunk A definite string item. Its reference count will be increased
120  * by one.
121  * @return `true` on success. `false` on memory allocation failure. In that
122  * case, the refcount of @p `chunk` is not increased and the @p `item` is left
123  * intact.
124  */
125 _CBOR_NODISCARD CBOR_EXPORT bool cbor_string_add_chunk(cbor_item_t *item,
126                                                        cbor_item_t *chunk);
127 
128 /** Creates a new definite string
129  *
130  * The handle is initialized to `NULL` and length to 0
131  *
132  * @return Reference to the new string item. The item's reference count is
133  * initialized to one.
134  * @return `NULL` if memory allocation fails
135  */
136 _CBOR_NODISCARD CBOR_EXPORT cbor_item_t *cbor_new_definite_string(void);
137 
138 /** Creates a new indefinite string
139  *
140  * The chunks array is initialized to `NULL` and chunkcount to 0
141  *
142  * @return Reference to the new string item. The item's reference count is
143  * initialized to one.
144  * @return `NULL` if memory allocation fails
145  */
146 _CBOR_NODISCARD CBOR_EXPORT cbor_item_t *cbor_new_indefinite_string(void);
147 
148 /** Creates a new string and initializes it
149  *
150  * The data from `val` will be copied to a newly allocated memory block.
151  *
152  * Note that valid UTF-8 strings do not contain null bytes, so this routine is
153  * correct for all valid inputs. If the input is not guaranteed to be valid,
154  * use `cbor_build_stringn` instead.
155  *
156  * @param val A null-terminated UTF-8 string
157  * @return Reference to the new string item. The item's reference count is
158  * initialized to one.
159  * @return `NULL` if memory allocation fails
160  */
161 _CBOR_NODISCARD CBOR_EXPORT cbor_item_t *cbor_build_string(const char *val);
162 
163 /** Creates a new string and initializes it
164  *
165  * The data from `handle` will be copied to a newly allocated memory block.
166  *
167  * All @p `length` bytes will be stored in the string, even if there are null
168  * bytes or invalid UTF-8 sequences.
169  *
170  * @param val A UTF-8 string, at least @p `length` bytes long
171  * @param length Length (in bytes) of the string passed in @p `val`.
172  * @return Reference to the new string item. The item's reference count is
173  * initialized to one.
174  * @return `NULL` if memory allocation fails
175  */
176 _CBOR_NODISCARD CBOR_EXPORT cbor_item_t *cbor_build_stringn(const char *val,
177                                                             size_t length);
178 
179 #ifdef __cplusplus
180 }
181 #endif
182 
183 #endif  // LIBCBOR_STRINGS_H
184