xref: /freebsd/contrib/kyua/utils/cmdline/parser.cpp (revision 517e52b6c21ccff22c46df0dcd15c19baee3d86c)
1 // Copyright 2010 The Kyua Authors.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 //   notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 //   notice, this list of conditions and the following disclaimer in the
12 //   documentation and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors
14 //   may be used to endorse or promote products derived from this software
15 //   without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 #include "utils/cmdline/parser.hpp"
30 
31 #if defined(HAVE_CONFIG_H)
32 #   include "config.h"
33 #endif
34 
35 extern "C" {
36 #include <getopt.h>
37 }
38 
39 #include <cstdlib>
40 #include <cstring>
41 #include <limits>
42 
43 #include "utils/auto_array.ipp"
44 #include "utils/cmdline/exceptions.hpp"
45 #include "utils/cmdline/options.hpp"
46 #include "utils/format/macros.hpp"
47 #include "utils/noncopyable.hpp"
48 #include "utils/sanity.hpp"
49 
50 namespace cmdline = utils::cmdline;
51 
52 namespace {
53 
54 
55 /// Auxiliary data to call getopt_long(3).
56 struct getopt_data : utils::noncopyable {
57     /// Plain-text representation of the short options.
58     ///
59     /// This string follows the syntax expected by getopt_long(3) in the
60     /// argument to describe the short options.
61     std::string short_options;
62 
63     /// Representation of the long options as expected by getopt_long(3).
64     utils::auto_array< ::option > long_options;
65 
66     /// Auto-generated identifiers to be able to parse long options.
67     std::map< int, const cmdline::base_option* > ids;
68 };
69 
70 
71 /// Converts a cmdline::options_vector to a getopt_data.
72 ///
73 /// \param options The high-level definition of the options.
74 /// \param [out] data An object containing the necessary data to call
75 ///     getopt_long(3) and interpret its results.
76 static void
77 options_to_getopt_data(const cmdline::options_vector& options,
78                        getopt_data& data)
79 {
80     data.short_options.clear();
81     data.long_options.reset(new ::option[options.size() + 1]);
82 
83     int cur_id = 512;
84 
85     for (cmdline::options_vector::size_type i = 0; i < options.size(); i++) {
86         const cmdline::base_option* option = options[i];
87         ::option& long_option = data.long_options[i];
88 
89         long_option.name = option->long_name().c_str();
90         if (option->needs_arg())
91             long_option.has_arg = required_argument;
92         else
93             long_option.has_arg = no_argument;
94 
95         int id = -1;
96         if (option->has_short_name()) {
97             data.short_options += option->short_name();
98             if (option->needs_arg())
99                 data.short_options += ':';
100             id = option->short_name();
101         } else {
102             id = cur_id++;
103         }
104         long_option.flag = NULL;
105         long_option.val = id;
106         data.ids[id] = option;
107     }
108 
109     ::option& last_long_option = data.long_options[options.size()];
110     last_long_option.name = NULL;
111     last_long_option.has_arg = 0;
112     last_long_option.flag = NULL;
113     last_long_option.val = 0;
114 }
115 
116 
117 /// Converts an argc/argv pair to an args_vector.
118 ///
119 /// \param argc The value of argc as passed to main().
120 /// \param argv The value of argv as passed to main().
121 ///
122 /// \return An args_vector with the same contents of argc/argv.
123 static cmdline::args_vector
124 argv_to_vector(int argc, const char* const argv[])
125 {
126     PRE(argv[argc] == NULL);
127     cmdline::args_vector args;
128     for (int i = 0; i < argc; i++)
129         args.push_back(argv[i]);
130     return args;
131 }
132 
133 
134 /// Creates a mutable version of argv.
135 ///
136 /// \param argc The value of argc as passed to main().
137 /// \param argv The value of argv as passed to main().
138 ///
139 /// \return A new argv, with mutable buffers.  The returned array must be
140 /// released using the free_mutable_argv() function.
141 static char**
142 make_mutable_argv(const int argc, const char* const* argv)
143 {
144     char** mutable_argv = new char*[argc + 1];
145     for (int i = 0; i < argc; i++)
146         mutable_argv[i] = ::strdup(argv[i]);
147     mutable_argv[argc] = NULL;
148     return mutable_argv;
149 }
150 
151 
152 /// Releases the object returned by make_mutable_argv().
153 ///
154 /// \param argv A dynamically-allocated argv as returned by make_mutable_argv().
155 static void
156 free_mutable_argv(char** argv)
157 {
158     char** ptr = argv;
159     while (*ptr != NULL) {
160         ::free(*ptr);
161         ptr++;
162     }
163     delete [] argv;
164 }
165 
166 
167 /// Finds the name of the offending option after a getopt_long error.
168 ///
169 /// \param data Our internal getopt data used for the call to getopt_long.
170 /// \param getopt_optopt The value of getopt(3)'s optopt after the error.
171 /// \param argv The argv passed to getopt_long.
172 /// \param getopt_optind The value of getopt(3)'s optind after the error.
173 ///
174 /// \return A fully-specified option name (i.e. an option name prefixed by
175 ///     either '-' or '--').
176 static std::string
177 find_option_name(const getopt_data& data, const int getopt_optopt,
178                  char** argv, const int getopt_optind)
179 {
180     PRE(getopt_optopt >= 0);
181 
182     if (getopt_optopt == 0) {
183         return argv[getopt_optind - 1];
184     } else if (getopt_optopt < std::numeric_limits< char >::max()) {
185         INV(getopt_optopt > 0);
186         const char ch = static_cast< char >(getopt_optopt);
187         return F("-%s") % ch;
188     } else {
189         for (const ::option* opt = &data.long_options[0]; opt->name != NULL;
190              opt++) {
191             if (opt->val == getopt_optopt)
192                 return F("--%s") % opt->name;
193         }
194         UNREACHABLE;
195     }
196 }
197 
198 
199 }  // anonymous namespace
200 
201 
202 /// Constructs a new parsed_cmdline.
203 ///
204 /// Use the cmdline::parse() free functions to construct.
205 ///
206 /// \param option_values_ A mapping of long option names to values.  This
207 ///     contains a representation of the options provided by the user.  Note
208 ///     that each value is actually a collection values: a user may specify a
209 ///     flag multiple times, and depending on the case we want to honor one or
210 ///     the other.  For those options that support no argument, the argument
211 ///     value is the empty string.
212 /// \param arguments_ The list of non-option arguments in the command line.
213 cmdline::parsed_cmdline::parsed_cmdline(
214     const std::map< std::string, std::vector< std::string > >& option_values_,
215     const cmdline::args_vector& arguments_) :
216     _option_values(option_values_),
217     _arguments(arguments_)
218 {
219 }
220 
221 
222 /// Checks if the given option has been given in the command line.
223 ///
224 /// \param name The long option name to check for presence.
225 ///
226 /// \return True if the option has been given; false otherwise.
227 bool
228 cmdline::parsed_cmdline::has_option(const std::string& name) const
229 {
230     return _option_values.find(name) != _option_values.end();
231 }
232 
233 
234 /// Gets the raw value of an option.
235 ///
236 /// The raw value of an option is a collection of strings that represent all the
237 /// values passed to the option on the command line.  It is up to the consumer
238 /// if he wants to honor only the last value or all of them.
239 ///
240 /// The caller has to use get_option() instead; this function is internal.
241 ///
242 /// \pre has_option(name) must be true.
243 ///
244 /// \param name The option to query.
245 ///
246 /// \return The value of the option as a plain string.
247 const std::vector< std::string >&
248 cmdline::parsed_cmdline::get_option_raw(const std::string& name) const
249 {
250     std::map< std::string, std::vector< std::string > >::const_iterator iter =
251         _option_values.find(name);
252     INV_MSG(iter != _option_values.end(), F("Undefined option --%s") % name);
253     return (*iter).second;
254 }
255 
256 
257 /// Returns the non-option arguments found in the command line.
258 ///
259 /// \return The arguments, if any.
260 const cmdline::args_vector&
261 cmdline::parsed_cmdline::arguments(void) const
262 {
263     return _arguments;
264 }
265 
266 
267 /// Parses a command line.
268 ///
269 /// \param args The command line to parse, broken down by words.
270 /// \param options The description of the supported options.
271 ///
272 /// \return The parsed command line.
273 ///
274 /// \pre args[0] must be the program or command name.
275 ///
276 /// \throw cmdline::error See the description of parse(argc, argv, options) for
277 ///     more details on the raised errors.
278 cmdline::parsed_cmdline
279 cmdline::parse(const cmdline::args_vector& args,
280                const cmdline::options_vector& options)
281 {
282     PRE_MSG(args.size() >= 1, "No progname or command name found");
283 
284     utils::auto_array< const char* > argv(new const char*[args.size() + 1]);
285     for (args_vector::size_type i = 0; i < args.size(); i++)
286         argv[i] = args[i].c_str();
287     argv[args.size()] = NULL;
288     return parse(static_cast< int >(args.size()), argv.get(), options);
289 }
290 
291 
292 /// Parses a command line.
293 ///
294 /// \param argc The number of arguments in argv, without counting the
295 ///     terminating NULL.
296 /// \param argv The arguments to parse.  The array is NULL-terminated.
297 /// \param options The description of the supported options.
298 ///
299 /// \return The parsed command line.
300 ///
301 /// \pre args[0] must be the program or command name.
302 ///
303 /// \throw cmdline::missing_option_argument_error If the user specified an
304 ///     option that requires an argument, but no argument was provided.
305 /// \throw cmdline::unknown_option_error If the user specified an unknown
306 ///     option (i.e. an option not defined in options).
307 /// \throw cmdline::option_argument_value_error If the user passed an invalid
308 ///     argument to a supported option.
309 cmdline::parsed_cmdline
310 cmdline::parse(const int argc, const char* const* argv,
311                const cmdline::options_vector& options)
312 {
313     PRE_MSG(argc >= 1, "No progname or command name found");
314 
315     getopt_data data;
316     options_to_getopt_data(options, data);
317 
318     std::map< std::string, std::vector< std::string > > option_values;
319 
320     for (cmdline::options_vector::const_iterator iter = options.begin();
321          iter != options.end(); iter++) {
322         const cmdline::base_option* option = *iter;
323         if (option->needs_arg() && option->has_default_value())
324             option_values[option->long_name()].push_back(
325                 option->default_value());
326     }
327 
328     args_vector args;
329 
330     int mutable_argc = argc;
331     char** mutable_argv = make_mutable_argv(argc, argv);
332     const int old_opterr = ::opterr;
333     try {
334         int ch;
335 
336         ::opterr = 0;
337 
338         while ((ch = ::getopt_long(mutable_argc, mutable_argv,
339                                    ("+:" + data.short_options).c_str(),
340                                    data.long_options.get(), NULL)) != -1) {
341             if (ch == ':' ) {
342                 const std::string name = find_option_name(
343                     data, ::optopt, mutable_argv, ::optind);
344                 throw cmdline::missing_option_argument_error(name);
345             } else if (ch == '?') {
346                 const std::string name = find_option_name(
347                     data, ::optopt, mutable_argv, ::optind);
348                 throw cmdline::unknown_option_error(name);
349             }
350 
351             const std::map< int, const cmdline::base_option* >::const_iterator
352                 id = data.ids.find(ch);
353             INV(id != data.ids.end());
354             const cmdline::base_option* option = (*id).second;
355 
356             if (option->needs_arg()) {
357                 if (::optarg != NULL) {
358                     option->validate(::optarg);
359                     option_values[option->long_name()].push_back(::optarg);
360                 } else
361                     INV(option->has_default_value());
362             } else {
363                 option_values[option->long_name()].push_back("");
364             }
365         }
366         args = argv_to_vector(mutable_argc - optind, mutable_argv + optind);
367 
368         ::opterr = old_opterr;
369         ::optind = GETOPT_OPTIND_RESET_VALUE;
370 #if defined(HAVE_GETOPT_WITH_OPTRESET)
371         ::optreset = 1;
372 #endif
373     } catch (...) {
374         free_mutable_argv(mutable_argv);
375         ::opterr = old_opterr;
376         ::optind = GETOPT_OPTIND_RESET_VALUE;
377 #if defined(HAVE_GETOPT_WITH_OPTRESET)
378         ::optreset = 1;
379 #endif
380         throw;
381     }
382     free_mutable_argv(mutable_argv);
383 
384     return parsed_cmdline(option_values, args);
385 }
386