1*b0d29bc4SBrooks Davis // Copyright 2012 The Kyua Authors.
2*b0d29bc4SBrooks Davis // All rights reserved.
3*b0d29bc4SBrooks Davis //
4*b0d29bc4SBrooks Davis // Redistribution and use in source and binary forms, with or without
5*b0d29bc4SBrooks Davis // modification, are permitted provided that the following conditions are
6*b0d29bc4SBrooks Davis // met:
7*b0d29bc4SBrooks Davis //
8*b0d29bc4SBrooks Davis // * Redistributions of source code must retain the above copyright
9*b0d29bc4SBrooks Davis // notice, this list of conditions and the following disclaimer.
10*b0d29bc4SBrooks Davis // * Redistributions in binary form must reproduce the above copyright
11*b0d29bc4SBrooks Davis // notice, this list of conditions and the following disclaimer in the
12*b0d29bc4SBrooks Davis // documentation and/or other materials provided with the distribution.
13*b0d29bc4SBrooks Davis // * Neither the name of Google Inc. nor the names of its contributors
14*b0d29bc4SBrooks Davis // may be used to endorse or promote products derived from this software
15*b0d29bc4SBrooks Davis // without specific prior written permission.
16*b0d29bc4SBrooks Davis //
17*b0d29bc4SBrooks Davis // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18*b0d29bc4SBrooks Davis // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19*b0d29bc4SBrooks Davis // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20*b0d29bc4SBrooks Davis // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21*b0d29bc4SBrooks Davis // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22*b0d29bc4SBrooks Davis // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23*b0d29bc4SBrooks Davis // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24*b0d29bc4SBrooks Davis // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25*b0d29bc4SBrooks Davis // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26*b0d29bc4SBrooks Davis // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27*b0d29bc4SBrooks Davis // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*b0d29bc4SBrooks Davis
29*b0d29bc4SBrooks Davis #include "utils/text/operations.ipp"
30*b0d29bc4SBrooks Davis
31*b0d29bc4SBrooks Davis #include <sstream>
32*b0d29bc4SBrooks Davis
33*b0d29bc4SBrooks Davis #include "utils/format/macros.hpp"
34*b0d29bc4SBrooks Davis #include "utils/sanity.hpp"
35*b0d29bc4SBrooks Davis
36*b0d29bc4SBrooks Davis namespace text = utils::text;
37*b0d29bc4SBrooks Davis
38*b0d29bc4SBrooks Davis
39*b0d29bc4SBrooks Davis /// Replaces XML special characters from an input string.
40*b0d29bc4SBrooks Davis ///
41*b0d29bc4SBrooks Davis /// The list of XML special characters is specified here:
42*b0d29bc4SBrooks Davis /// http://www.w3.org/TR/xml11/#charsets
43*b0d29bc4SBrooks Davis ///
44*b0d29bc4SBrooks Davis /// \param in The input to quote.
45*b0d29bc4SBrooks Davis ///
46*b0d29bc4SBrooks Davis /// \return A quoted string without any XML special characters.
47*b0d29bc4SBrooks Davis std::string
escape_xml(const std::string & in)48*b0d29bc4SBrooks Davis text::escape_xml(const std::string& in)
49*b0d29bc4SBrooks Davis {
50*b0d29bc4SBrooks Davis std::ostringstream quoted;
51*b0d29bc4SBrooks Davis
52*b0d29bc4SBrooks Davis for (std::string::const_iterator it = in.begin();
53*b0d29bc4SBrooks Davis it != in.end(); ++it) {
54*b0d29bc4SBrooks Davis unsigned char c = (unsigned char)*it;
55*b0d29bc4SBrooks Davis if (c == '"') {
56*b0d29bc4SBrooks Davis quoted << """;
57*b0d29bc4SBrooks Davis } else if (c == '&') {
58*b0d29bc4SBrooks Davis quoted << "&";
59*b0d29bc4SBrooks Davis } else if (c == '<') {
60*b0d29bc4SBrooks Davis quoted << "<";
61*b0d29bc4SBrooks Davis } else if (c == '>') {
62*b0d29bc4SBrooks Davis quoted << ">";
63*b0d29bc4SBrooks Davis } else if (c == '\'') {
64*b0d29bc4SBrooks Davis quoted << "'";
65*b0d29bc4SBrooks Davis } else if ((c >= 0x01 && c <= 0x08) ||
66*b0d29bc4SBrooks Davis (c >= 0x0B && c <= 0x0C) ||
67*b0d29bc4SBrooks Davis (c >= 0x0E && c <= 0x1F) ||
68*b0d29bc4SBrooks Davis (c >= 0x7F && c <= 0x84) ||
69*b0d29bc4SBrooks Davis (c >= 0x86 && c <= 0x9F)) {
70*b0d29bc4SBrooks Davis // for RestrictedChar characters, escape them
71*b0d29bc4SBrooks Davis // as '&#[decimal ASCII value];'
72*b0d29bc4SBrooks Davis // so that in the XML file we will see the escaped
73*b0d29bc4SBrooks Davis // character.
74*b0d29bc4SBrooks Davis quoted << "&#" << static_cast< std::string::size_type >(*it)
75*b0d29bc4SBrooks Davis << ";";
76*b0d29bc4SBrooks Davis } else {
77*b0d29bc4SBrooks Davis quoted << *it;
78*b0d29bc4SBrooks Davis }
79*b0d29bc4SBrooks Davis }
80*b0d29bc4SBrooks Davis return quoted.str();
81*b0d29bc4SBrooks Davis }
82*b0d29bc4SBrooks Davis
83*b0d29bc4SBrooks Davis
84*b0d29bc4SBrooks Davis /// Surrounds a string with quotes, escaping the quote itself if needed.
85*b0d29bc4SBrooks Davis ///
86*b0d29bc4SBrooks Davis /// \param text The string to quote.
87*b0d29bc4SBrooks Davis /// \param quote The quote character to use.
88*b0d29bc4SBrooks Davis ///
89*b0d29bc4SBrooks Davis /// \return The quoted string.
90*b0d29bc4SBrooks Davis std::string
quote(const std::string & text,const char quote)91*b0d29bc4SBrooks Davis text::quote(const std::string& text, const char quote)
92*b0d29bc4SBrooks Davis {
93*b0d29bc4SBrooks Davis std::ostringstream quoted;
94*b0d29bc4SBrooks Davis quoted << quote;
95*b0d29bc4SBrooks Davis
96*b0d29bc4SBrooks Davis std::string::size_type start_pos = 0;
97*b0d29bc4SBrooks Davis std::string::size_type last_pos = text.find(quote);
98*b0d29bc4SBrooks Davis while (last_pos != std::string::npos) {
99*b0d29bc4SBrooks Davis quoted << text.substr(start_pos, last_pos - start_pos) << '\\';
100*b0d29bc4SBrooks Davis start_pos = last_pos;
101*b0d29bc4SBrooks Davis last_pos = text.find(quote, start_pos + 1);
102*b0d29bc4SBrooks Davis }
103*b0d29bc4SBrooks Davis quoted << text.substr(start_pos);
104*b0d29bc4SBrooks Davis
105*b0d29bc4SBrooks Davis quoted << quote;
106*b0d29bc4SBrooks Davis return quoted.str();
107*b0d29bc4SBrooks Davis }
108*b0d29bc4SBrooks Davis
109*b0d29bc4SBrooks Davis
110*b0d29bc4SBrooks Davis /// Fills a paragraph to the specified length.
111*b0d29bc4SBrooks Davis ///
112*b0d29bc4SBrooks Davis /// This preserves any sequence of spaces in the input and any possible
113*b0d29bc4SBrooks Davis /// newlines. Sequences of spaces may be split in half (and thus one space is
114*b0d29bc4SBrooks Davis /// lost), but the rest of the spaces will be preserved as either trailing or
115*b0d29bc4SBrooks Davis /// leading spaces.
116*b0d29bc4SBrooks Davis ///
117*b0d29bc4SBrooks Davis /// \param input The string to refill.
118*b0d29bc4SBrooks Davis /// \param target_width The width to refill the paragraph to.
119*b0d29bc4SBrooks Davis ///
120*b0d29bc4SBrooks Davis /// \return The refilled paragraph as a sequence of independent lines.
121*b0d29bc4SBrooks Davis std::vector< std::string >
refill(const std::string & input,const std::size_t target_width)122*b0d29bc4SBrooks Davis text::refill(const std::string& input, const std::size_t target_width)
123*b0d29bc4SBrooks Davis {
124*b0d29bc4SBrooks Davis std::vector< std::string > output;
125*b0d29bc4SBrooks Davis
126*b0d29bc4SBrooks Davis std::string::size_type start = 0;
127*b0d29bc4SBrooks Davis while (start < input.length()) {
128*b0d29bc4SBrooks Davis std::string::size_type width;
129*b0d29bc4SBrooks Davis if (start + target_width >= input.length())
130*b0d29bc4SBrooks Davis width = input.length() - start;
131*b0d29bc4SBrooks Davis else {
132*b0d29bc4SBrooks Davis if (input[start + target_width] == ' ') {
133*b0d29bc4SBrooks Davis width = target_width;
134*b0d29bc4SBrooks Davis } else {
135*b0d29bc4SBrooks Davis const std::string::size_type pos = input.find_last_of(
136*b0d29bc4SBrooks Davis " ", start + target_width - 1);
137*b0d29bc4SBrooks Davis if (pos == std::string::npos || pos < start + 1) {
138*b0d29bc4SBrooks Davis width = input.find_first_of(" ", start + target_width);
139*b0d29bc4SBrooks Davis if (width == std::string::npos)
140*b0d29bc4SBrooks Davis width = input.length() - start;
141*b0d29bc4SBrooks Davis else
142*b0d29bc4SBrooks Davis width -= start;
143*b0d29bc4SBrooks Davis } else {
144*b0d29bc4SBrooks Davis width = pos - start;
145*b0d29bc4SBrooks Davis }
146*b0d29bc4SBrooks Davis }
147*b0d29bc4SBrooks Davis }
148*b0d29bc4SBrooks Davis INV(width != std::string::npos);
149*b0d29bc4SBrooks Davis INV(start + width <= input.length());
150*b0d29bc4SBrooks Davis INV(input[start + width] == ' ' || input[start + width] == '\0');
151*b0d29bc4SBrooks Davis output.push_back(input.substr(start, width));
152*b0d29bc4SBrooks Davis
153*b0d29bc4SBrooks Davis start += width + 1;
154*b0d29bc4SBrooks Davis }
155*b0d29bc4SBrooks Davis
156*b0d29bc4SBrooks Davis if (input.empty()) {
157*b0d29bc4SBrooks Davis INV(output.empty());
158*b0d29bc4SBrooks Davis output.push_back("");
159*b0d29bc4SBrooks Davis }
160*b0d29bc4SBrooks Davis
161*b0d29bc4SBrooks Davis return output;
162*b0d29bc4SBrooks Davis }
163*b0d29bc4SBrooks Davis
164*b0d29bc4SBrooks Davis
165*b0d29bc4SBrooks Davis /// Fills a paragraph to the specified length.
166*b0d29bc4SBrooks Davis ///
167*b0d29bc4SBrooks Davis /// See the documentation for refill() for additional details.
168*b0d29bc4SBrooks Davis ///
169*b0d29bc4SBrooks Davis /// \param input The string to refill.
170*b0d29bc4SBrooks Davis /// \param target_width The width to refill the paragraph to.
171*b0d29bc4SBrooks Davis ///
172*b0d29bc4SBrooks Davis /// \return The refilled paragraph as a string with embedded newlines.
173*b0d29bc4SBrooks Davis std::string
refill_as_string(const std::string & input,const std::size_t target_width)174*b0d29bc4SBrooks Davis text::refill_as_string(const std::string& input, const std::size_t target_width)
175*b0d29bc4SBrooks Davis {
176*b0d29bc4SBrooks Davis return join(refill(input, target_width), "\n");
177*b0d29bc4SBrooks Davis }
178*b0d29bc4SBrooks Davis
179*b0d29bc4SBrooks Davis
180*b0d29bc4SBrooks Davis /// Replaces all occurrences of a substring in a string.
181*b0d29bc4SBrooks Davis ///
182*b0d29bc4SBrooks Davis /// \param input The string in which to perform the replacement.
183*b0d29bc4SBrooks Davis /// \param search The pattern to be replaced.
184*b0d29bc4SBrooks Davis /// \param replacement The substring to replace search with.
185*b0d29bc4SBrooks Davis ///
186*b0d29bc4SBrooks Davis /// \return A copy of input with the replacements performed.
187*b0d29bc4SBrooks Davis std::string
replace_all(const std::string & input,const std::string & search,const std::string & replacement)188*b0d29bc4SBrooks Davis text::replace_all(const std::string& input, const std::string& search,
189*b0d29bc4SBrooks Davis const std::string& replacement)
190*b0d29bc4SBrooks Davis {
191*b0d29bc4SBrooks Davis std::string output;
192*b0d29bc4SBrooks Davis
193*b0d29bc4SBrooks Davis std::string::size_type pos, lastpos = 0;
194*b0d29bc4SBrooks Davis while ((pos = input.find(search, lastpos)) != std::string::npos) {
195*b0d29bc4SBrooks Davis output += input.substr(lastpos, pos - lastpos);
196*b0d29bc4SBrooks Davis output += replacement;
197*b0d29bc4SBrooks Davis lastpos = pos + search.length();
198*b0d29bc4SBrooks Davis }
199*b0d29bc4SBrooks Davis output += input.substr(lastpos);
200*b0d29bc4SBrooks Davis
201*b0d29bc4SBrooks Davis return output;
202*b0d29bc4SBrooks Davis }
203*b0d29bc4SBrooks Davis
204*b0d29bc4SBrooks Davis
205*b0d29bc4SBrooks Davis /// Splits a string into different components.
206*b0d29bc4SBrooks Davis ///
207*b0d29bc4SBrooks Davis /// \param str The string to split.
208*b0d29bc4SBrooks Davis /// \param delimiter The separator to use to split the words.
209*b0d29bc4SBrooks Davis ///
210*b0d29bc4SBrooks Davis /// \return The different words in the input string as split by the provided
211*b0d29bc4SBrooks Davis /// delimiter.
212*b0d29bc4SBrooks Davis std::vector< std::string >
split(const std::string & str,const char delimiter)213*b0d29bc4SBrooks Davis text::split(const std::string& str, const char delimiter)
214*b0d29bc4SBrooks Davis {
215*b0d29bc4SBrooks Davis std::vector< std::string > words;
216*b0d29bc4SBrooks Davis if (!str.empty()) {
217*b0d29bc4SBrooks Davis std::string::size_type pos = str.find(delimiter);
218*b0d29bc4SBrooks Davis words.push_back(str.substr(0, pos));
219*b0d29bc4SBrooks Davis while (pos != std::string::npos) {
220*b0d29bc4SBrooks Davis ++pos;
221*b0d29bc4SBrooks Davis const std::string::size_type next = str.find(delimiter, pos);
222*b0d29bc4SBrooks Davis words.push_back(str.substr(pos, next - pos));
223*b0d29bc4SBrooks Davis pos = next;
224*b0d29bc4SBrooks Davis }
225*b0d29bc4SBrooks Davis }
226*b0d29bc4SBrooks Davis return words;
227*b0d29bc4SBrooks Davis }
228*b0d29bc4SBrooks Davis
229*b0d29bc4SBrooks Davis
230*b0d29bc4SBrooks Davis /// Converts a string to a boolean.
231*b0d29bc4SBrooks Davis ///
232*b0d29bc4SBrooks Davis /// \param str The string to convert.
233*b0d29bc4SBrooks Davis ///
234*b0d29bc4SBrooks Davis /// \return The converted string, if the input string was valid.
235*b0d29bc4SBrooks Davis ///
236*b0d29bc4SBrooks Davis /// \throw std::value_error If the input string does not represent a valid
237*b0d29bc4SBrooks Davis /// boolean value.
238*b0d29bc4SBrooks Davis template<>
239*b0d29bc4SBrooks Davis bool
to_type(const std::string & str)240*b0d29bc4SBrooks Davis text::to_type(const std::string& str)
241*b0d29bc4SBrooks Davis {
242*b0d29bc4SBrooks Davis if (str == "true")
243*b0d29bc4SBrooks Davis return true;
244*b0d29bc4SBrooks Davis else if (str == "false")
245*b0d29bc4SBrooks Davis return false;
246*b0d29bc4SBrooks Davis else
247*b0d29bc4SBrooks Davis throw value_error(F("Invalid boolean value '%s'") % str);
248*b0d29bc4SBrooks Davis }
249*b0d29bc4SBrooks Davis
250*b0d29bc4SBrooks Davis
251*b0d29bc4SBrooks Davis /// Identity function for to_type, for genericity purposes.
252*b0d29bc4SBrooks Davis ///
253*b0d29bc4SBrooks Davis /// \param str The string to convert.
254*b0d29bc4SBrooks Davis ///
255*b0d29bc4SBrooks Davis /// \return The input string.
256*b0d29bc4SBrooks Davis template<>
257*b0d29bc4SBrooks Davis std::string
to_type(const std::string & str)258*b0d29bc4SBrooks Davis text::to_type(const std::string& str)
259*b0d29bc4SBrooks Davis {
260*b0d29bc4SBrooks Davis return str;
261*b0d29bc4SBrooks Davis }
262