xref: /freebsd/contrib/kyua/utils/cmdline/parser.cpp (revision 497a13601fb74c264e28eff7cd3f9ac74f0f212b)
1 // Copyright 2010 The Kyua Authors.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 //   notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 //   notice, this list of conditions and the following disclaimer in the
12 //   documentation and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors
14 //   may be used to endorse or promote products derived from this software
15 //   without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 #include "utils/cmdline/parser.hpp"
30 
31 #if defined(HAVE_CONFIG_H)
32 #   include "config.h"
33 #endif
34 
35 extern "C" {
36 #include <getopt.h>
37 }
38 
39 #include <cstdlib>
40 #include <cstring>
41 #include <limits>
42 
43 #include "utils/auto_array.ipp"
44 #include "utils/cmdline/exceptions.hpp"
45 #include "utils/cmdline/options.hpp"
46 #include "utils/format/macros.hpp"
47 #include "utils/noncopyable.hpp"
48 #include "utils/sanity.hpp"
49 
50 namespace cmdline = utils::cmdline;
51 
52 namespace {
53 
54 
55 /// Auxiliary data to call getopt_long(3).
56 struct getopt_data : utils::noncopyable {
57     /// Plain-text representation of the short options.
58     ///
59     /// This string follows the syntax expected by getopt_long(3) in the
60     /// argument to describe the short options.
61     std::string short_options;
62 
63     /// Representation of the long options as expected by getopt_long(3).
64     utils::auto_array< ::option > long_options;
65 
66     /// Auto-generated identifiers to be able to parse long options.
67     std::map< int, const cmdline::base_option* > ids;
68 };
69 
70 
71 /// Converts a cmdline::options_vector to a getopt_data.
72 ///
73 /// \param options The high-level definition of the options.
74 /// \param [out] data An object containing the necessary data to call
75 ///     getopt_long(3) and interpret its results.
76 static void
77 options_to_getopt_data(const cmdline::options_vector& options,
78                        getopt_data& data)
79 {
80     data.short_options.clear();
81     data.long_options.reset(new ::option[options.size() + 1]);
82 
83     int cur_id = 512;
84 
85     for (cmdline::options_vector::size_type i = 0; i < options.size(); i++) {
86         const cmdline::base_option* option = options[i];
87         ::option& long_option = data.long_options[i];
88 
89         long_option.name = option->long_name().c_str();
90         if (option->needs_arg())
91             if (option->arg_is_optional())
92                 long_option.has_arg = optional_argument;
93             else
94                 long_option.has_arg = required_argument;
95         else
96             long_option.has_arg = no_argument;
97 
98         int id = -1;
99         if (option->has_short_name()) {
100             data.short_options += option->short_name();
101             if (option->needs_arg())
102                 data.short_options += option->arg_is_optional() ? "::" : ":";
103             id = option->short_name();
104         } else {
105             id = cur_id++;
106         }
107         long_option.flag = NULL;
108         long_option.val = id;
109         data.ids[id] = option;
110     }
111 
112     ::option& last_long_option = data.long_options[options.size()];
113     last_long_option.name = NULL;
114     last_long_option.has_arg = 0;
115     last_long_option.flag = NULL;
116     last_long_option.val = 0;
117 }
118 
119 
120 /// Converts an argc/argv pair to an args_vector.
121 ///
122 /// \param argc The value of argc as passed to main().
123 /// \param argv The value of argv as passed to main().
124 ///
125 /// \return An args_vector with the same contents of argc/argv.
126 static cmdline::args_vector
127 argv_to_vector(int argc, const char* const argv[])
128 {
129     PRE(argv[argc] == NULL);
130     cmdline::args_vector args;
131     for (int i = 0; i < argc; i++)
132         args.push_back(argv[i]);
133     return args;
134 }
135 
136 
137 /// Creates a mutable version of argv.
138 ///
139 /// \param argc The value of argc as passed to main().
140 /// \param argv The value of argv as passed to main().
141 ///
142 /// \return A new argv, with mutable buffers.  The returned array must be
143 /// released using the free_mutable_argv() function.
144 static char**
145 make_mutable_argv(const int argc, const char* const* argv)
146 {
147     char** mutable_argv = new char*[argc + 1];
148     for (int i = 0; i < argc; i++)
149         mutable_argv[i] = ::strdup(argv[i]);
150     mutable_argv[argc] = NULL;
151     return mutable_argv;
152 }
153 
154 
155 /// Releases the object returned by make_mutable_argv().
156 ///
157 /// \param argv A dynamically-allocated argv as returned by make_mutable_argv().
158 static void
159 free_mutable_argv(char** argv)
160 {
161     char** ptr = argv;
162     while (*ptr != NULL) {
163         ::free(*ptr);
164         ptr++;
165     }
166     delete [] argv;
167 }
168 
169 
170 /// Finds the name of the offending option after a getopt_long error.
171 ///
172 /// \param data Our internal getopt data used for the call to getopt_long.
173 /// \param getopt_optopt The value of getopt(3)'s optopt after the error.
174 /// \param argv The argv passed to getopt_long.
175 /// \param getopt_optind The value of getopt(3)'s optind after the error.
176 ///
177 /// \return A fully-specified option name (i.e. an option name prefixed by
178 ///     either '-' or '--').
179 static std::string
180 find_option_name(const getopt_data& data, const int getopt_optopt,
181                  char** argv, const int getopt_optind)
182 {
183     PRE(getopt_optopt >= 0);
184 
185     if (getopt_optopt == 0) {
186         return argv[getopt_optind - 1];
187     } else if (getopt_optopt < std::numeric_limits< char >::max()) {
188         INV(getopt_optopt > 0);
189         const char ch = static_cast< char >(getopt_optopt);
190         return F("-%s") % ch;
191     } else {
192         for (const ::option* opt = &data.long_options[0]; opt->name != NULL;
193              opt++) {
194             if (opt->val == getopt_optopt)
195                 return F("--%s") % opt->name;
196         }
197         UNREACHABLE;
198     }
199 }
200 
201 
202 }  // anonymous namespace
203 
204 
205 /// Constructs a new parsed_cmdline.
206 ///
207 /// Use the cmdline::parse() free functions to construct.
208 ///
209 /// \param option_values_ A mapping of long option names to values.  This
210 ///     contains a representation of the options provided by the user.  Note
211 ///     that each value is actually a collection values: a user may specify a
212 ///     flag multiple times, and depending on the case we want to honor one or
213 ///     the other.  For those options that support no argument, the argument
214 ///     value is the empty string.
215 /// \param arguments_ The list of non-option arguments in the command line.
216 cmdline::parsed_cmdline::parsed_cmdline(
217     const std::map< std::string, std::vector< std::string > >& option_values_,
218     const cmdline::args_vector& arguments_) :
219     _option_values(option_values_),
220     _arguments(arguments_)
221 {
222 }
223 
224 
225 /// Checks if the given option has been given in the command line.
226 ///
227 /// \param name The long option name to check for presence.
228 ///
229 /// \return True if the option has been given; false otherwise.
230 bool
231 cmdline::parsed_cmdline::has_option(const std::string& name) const
232 {
233     return _option_values.find(name) != _option_values.end();
234 }
235 
236 
237 /// Gets the raw value of an option.
238 ///
239 /// The raw value of an option is a collection of strings that represent all the
240 /// values passed to the option on the command line.  It is up to the consumer
241 /// if he wants to honor only the last value or all of them.
242 ///
243 /// The caller has to use get_option() instead; this function is internal.
244 ///
245 /// \pre has_option(name) must be true.
246 ///
247 /// \param name The option to query.
248 ///
249 /// \return The value of the option as a plain string.
250 const std::vector< std::string >&
251 cmdline::parsed_cmdline::get_option_raw(const std::string& name) const
252 {
253     std::map< std::string, std::vector< std::string > >::const_iterator iter =
254         _option_values.find(name);
255     INV_MSG(iter != _option_values.end(), F("Undefined option --%s") % name);
256     return (*iter).second;
257 }
258 
259 
260 /// Returns the non-option arguments found in the command line.
261 ///
262 /// \return The arguments, if any.
263 const cmdline::args_vector&
264 cmdline::parsed_cmdline::arguments(void) const
265 {
266     return _arguments;
267 }
268 
269 
270 /// Parses a command line.
271 ///
272 /// \param args The command line to parse, broken down by words.
273 /// \param options The description of the supported options.
274 ///
275 /// \return The parsed command line.
276 ///
277 /// \pre args[0] must be the program or command name.
278 ///
279 /// \throw cmdline::error See the description of parse(argc, argv, options) for
280 ///     more details on the raised errors.
281 cmdline::parsed_cmdline
282 cmdline::parse(const cmdline::args_vector& args,
283                const cmdline::options_vector& options)
284 {
285     PRE_MSG(args.size() >= 1, "No progname or command name found");
286 
287     utils::auto_array< const char* > argv(new const char*[args.size() + 1]);
288     for (args_vector::size_type i = 0; i < args.size(); i++)
289         argv[i] = args[i].c_str();
290     argv[args.size()] = NULL;
291     return parse(static_cast< int >(args.size()), argv.get(), options);
292 }
293 
294 
295 /// Parses a command line.
296 ///
297 /// \param argc The number of arguments in argv, without counting the
298 ///     terminating NULL.
299 /// \param argv The arguments to parse.  The array is NULL-terminated.
300 /// \param options The description of the supported options.
301 ///
302 /// \return The parsed command line.
303 ///
304 /// \pre args[0] must be the program or command name.
305 ///
306 /// \throw cmdline::missing_option_argument_error If the user specified an
307 ///     option that requires an argument, but no argument was provided.
308 /// \throw cmdline::unknown_option_error If the user specified an unknown
309 ///     option (i.e. an option not defined in options).
310 /// \throw cmdline::option_argument_value_error If the user passed an invalid
311 ///     argument to a supported option.
312 cmdline::parsed_cmdline
313 cmdline::parse(const int argc, const char* const* argv,
314                const cmdline::options_vector& options)
315 {
316     PRE_MSG(argc >= 1, "No progname or command name found");
317 
318     getopt_data data;
319     options_to_getopt_data(options, data);
320 
321     std::map< std::string, std::vector< std::string > > option_values;
322 
323     for (cmdline::options_vector::const_iterator iter = options.begin();
324          iter != options.end(); iter++) {
325         const cmdline::base_option* option = *iter;
326         if (option->needs_arg() && option->has_default_value() &&
327             !option->arg_is_optional()) {
328             option_values[option->long_name()].push_back(
329                 option->default_value());
330         }
331     }
332 
333     args_vector args;
334 
335     int mutable_argc = argc;
336     char** mutable_argv = make_mutable_argv(argc, argv);
337     const int old_opterr = ::opterr;
338     try {
339         int ch;
340 
341         ::opterr = 0;
342 
343         while ((ch = ::getopt_long(mutable_argc, mutable_argv,
344                                    ("+:" + data.short_options).c_str(),
345                                    data.long_options.get(), NULL)) != -1) {
346             if (ch == ':' ) {
347                 const std::string name = find_option_name(
348                     data, ::optopt, mutable_argv, ::optind);
349                 throw cmdline::missing_option_argument_error(name);
350             } else if (ch == '?') {
351                 const std::string name = find_option_name(
352                     data, ::optopt, mutable_argv, ::optind);
353                 throw cmdline::unknown_option_error(name);
354             }
355 
356             const std::map< int, const cmdline::base_option* >::const_iterator
357                 id = data.ids.find(ch);
358             INV(id != data.ids.end());
359             const cmdline::base_option* option = (*id).second;
360 
361             if (option->needs_arg()) {
362                 if (::optarg != NULL) {
363                     option->validate(::optarg);
364                     option_values[option->long_name()].push_back(::optarg);
365                 } else {
366                     if (option->arg_is_optional())
367                         option_values[option->long_name()].push_back(
368                             option->default_value());
369                     else
370                         INV(option->has_default_value());
371                 }
372             } else {
373                 option_values[option->long_name()].push_back("");
374             }
375         }
376         args = argv_to_vector(mutable_argc - optind, mutable_argv + optind);
377 
378         ::opterr = old_opterr;
379         ::optind = GETOPT_OPTIND_RESET_VALUE;
380 #if defined(HAVE_GETOPT_WITH_OPTRESET)
381         ::optreset = 1;
382 #endif
383     } catch (...) {
384         free_mutable_argv(mutable_argv);
385         ::opterr = old_opterr;
386         ::optind = GETOPT_OPTIND_RESET_VALUE;
387 #if defined(HAVE_GETOPT_WITH_OPTRESET)
388         ::optreset = 1;
389 #endif
390         throw;
391     }
392     free_mutable_argv(mutable_argv);
393 
394     return parsed_cmdline(option_values, args);
395 }
396