xref: /freebsd/contrib/kyua/utils/text/operations.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1 // Copyright 2012 The Kyua Authors.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 //   notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 //   notice, this list of conditions and the following disclaimer in the
12 //   documentation and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors
14 //   may be used to endorse or promote products derived from this software
15 //   without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 #include "utils/text/operations.ipp"
30 
31 #include <sstream>
32 
33 #include "utils/format/macros.hpp"
34 #include "utils/sanity.hpp"
35 
36 namespace text = utils::text;
37 
38 
39 /// Replaces XML special characters from an input string.
40 ///
41 /// The list of XML special characters is specified here:
42 ///     http://www.w3.org/TR/xml11/#charsets
43 ///
44 /// \param in The input to quote.
45 ///
46 /// \return A quoted string without any XML special characters.
47 std::string
48 text::escape_xml(const std::string& in)
49 {
50     std::ostringstream quoted;
51 
52     for (std::string::const_iterator it = in.begin();
53          it != in.end(); ++it) {
54         unsigned char c = (unsigned char)*it;
55         if (c == '"') {
56             quoted << "&quot;";
57         } else if (c == '&') {
58             quoted << "&amp;";
59         } else if (c == '<') {
60             quoted << "&lt;";
61         } else if (c == '>') {
62             quoted << "&gt;";
63         } else if (c == '\'') {
64             quoted << "&apos;";
65         } else if ((c >= 0x01 && c <= 0x08) ||
66                    (c >= 0x0B && c <= 0x0C) ||
67                    (c >= 0x0E && c <= 0x1F) ||
68                    (c >= 0x7F && c <= 0x84) ||
69                    (c >= 0x86 && c <= 0x9F)) {
70             // for RestrictedChar characters, escape them
71             // as '&amp;#[decimal ASCII value];'
72             // so that in the XML file we will see the escaped
73             // character.
74             quoted << "&amp;#" << static_cast< std::string::size_type >(*it)
75                    << ";";
76         } else {
77             quoted << *it;
78         }
79     }
80     return quoted.str();
81 }
82 
83 
84 /// Surrounds a string with quotes, escaping the quote itself if needed.
85 ///
86 /// \param text The string to quote.
87 /// \param quote The quote character to use.
88 ///
89 /// \return The quoted string.
90 std::string
91 text::quote(const std::string& text, const char quote)
92 {
93     std::ostringstream quoted;
94     quoted << quote;
95 
96     std::string::size_type start_pos = 0;
97     std::string::size_type last_pos = text.find(quote);
98     while (last_pos != std::string::npos) {
99         quoted << text.substr(start_pos, last_pos - start_pos) << '\\';
100         start_pos = last_pos;
101         last_pos = text.find(quote, start_pos + 1);
102     }
103     quoted << text.substr(start_pos);
104 
105     quoted << quote;
106     return quoted.str();
107 }
108 
109 
110 /// Fills a paragraph to the specified length.
111 ///
112 /// This preserves any sequence of spaces in the input and any possible
113 /// newlines.  Sequences of spaces may be split in half (and thus one space is
114 /// lost), but the rest of the spaces will be preserved as either trailing or
115 /// leading spaces.
116 ///
117 /// \param input The string to refill.
118 /// \param target_width The width to refill the paragraph to.
119 ///
120 /// \return The refilled paragraph as a sequence of independent lines.
121 std::vector< std::string >
122 text::refill(const std::string& input, const std::size_t target_width)
123 {
124     std::vector< std::string > output;
125 
126     std::string::size_type start = 0;
127     while (start < input.length()) {
128         std::string::size_type width;
129         if (start + target_width >= input.length())
130             width = input.length() - start;
131         else {
132             if (input[start + target_width] == ' ') {
133                 width = target_width;
134             } else {
135                 const std::string::size_type pos = input.find_last_of(
136                     " ", start + target_width - 1);
137                 if (pos == std::string::npos || pos < start + 1) {
138                     width = input.find_first_of(" ", start + target_width);
139                     if (width == std::string::npos)
140                         width = input.length() - start;
141                     else
142                         width -= start;
143                 } else {
144                     width = pos - start;
145                 }
146             }
147         }
148         INV(width != std::string::npos);
149         INV(start + width <= input.length());
150         INV(input[start + width] == ' ' || input[start + width] == '\0');
151         output.push_back(input.substr(start, width));
152 
153         start += width + 1;
154     }
155 
156     if (input.empty()) {
157         INV(output.empty());
158         output.push_back("");
159     }
160 
161     return output;
162 }
163 
164 
165 /// Fills a paragraph to the specified length.
166 ///
167 /// See the documentation for refill() for additional details.
168 ///
169 /// \param input The string to refill.
170 /// \param target_width The width to refill the paragraph to.
171 ///
172 /// \return The refilled paragraph as a string with embedded newlines.
173 std::string
174 text::refill_as_string(const std::string& input, const std::size_t target_width)
175 {
176     return join(refill(input, target_width), "\n");
177 }
178 
179 
180 /// Replaces all occurrences of a substring in a string.
181 ///
182 /// \param input The string in which to perform the replacement.
183 /// \param search The pattern to be replaced.
184 /// \param replacement The substring to replace search with.
185 ///
186 /// \return A copy of input with the replacements performed.
187 std::string
188 text::replace_all(const std::string& input, const std::string& search,
189                   const std::string& replacement)
190 {
191     std::string output;
192 
193     std::string::size_type pos, lastpos = 0;
194     while ((pos = input.find(search, lastpos)) != std::string::npos) {
195         output += input.substr(lastpos, pos - lastpos);
196         output += replacement;
197         lastpos = pos + search.length();
198     }
199     output += input.substr(lastpos);
200 
201     return output;
202 }
203 
204 
205 /// Splits a string into different components.
206 ///
207 /// \param str The string to split.
208 /// \param delimiter The separator to use to split the words.
209 ///
210 /// \return The different words in the input string as split by the provided
211 /// delimiter.
212 std::vector< std::string >
213 text::split(const std::string& str, const char delimiter)
214 {
215     std::vector< std::string > words;
216     if (!str.empty()) {
217         std::string::size_type pos = str.find(delimiter);
218         words.push_back(str.substr(0, pos));
219         while (pos != std::string::npos) {
220             ++pos;
221             const std::string::size_type next = str.find(delimiter, pos);
222             words.push_back(str.substr(pos, next - pos));
223             pos = next;
224         }
225     }
226     return words;
227 }
228 
229 
230 /// Converts a string to a boolean.
231 ///
232 /// \param str The string to convert.
233 ///
234 /// \return The converted string, if the input string was valid.
235 ///
236 /// \throw std::value_error If the input string does not represent a valid
237 ///     boolean value.
238 template<>
239 bool
240 text::to_type(const std::string& str)
241 {
242     if (str == "true")
243         return true;
244     else if (str == "false")
245         return false;
246     else
247         throw value_error(F("Invalid boolean value '%s'") % str);
248 }
249 
250 
251 /// Identity function for to_type, for genericity purposes.
252 ///
253 /// \param str The string to convert.
254 ///
255 /// \return The input string.
256 template<>
257 std::string
258 text::to_type(const std::string& str)
259 {
260     return str;
261 }
262