xref: /freebsd/contrib/kyua/engine/scheduler.cpp (revision 19261079b74319502c6ffa1249920079f0f69a72)
1 // Copyright 2014 The Kyua Authors.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 //   notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 //   notice, this list of conditions and the following disclaimer in the
12 //   documentation and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors
14 //   may be used to endorse or promote products derived from this software
15 //   without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 #include "engine/scheduler.hpp"
30 
31 extern "C" {
32 #include <unistd.h>
33 }
34 
35 #include <cstdio>
36 #include <cstdlib>
37 #include <fstream>
38 #include <memory>
39 #include <stdexcept>
40 
41 #include "engine/config.hpp"
42 #include "engine/exceptions.hpp"
43 #include "engine/requirements.hpp"
44 #include "model/context.hpp"
45 #include "model/metadata.hpp"
46 #include "model/test_case.hpp"
47 #include "model/test_program.hpp"
48 #include "model/test_result.hpp"
49 #include "utils/config/tree.ipp"
50 #include "utils/datetime.hpp"
51 #include "utils/defs.hpp"
52 #include "utils/env.hpp"
53 #include "utils/format/macros.hpp"
54 #include "utils/fs/directory.hpp"
55 #include "utils/fs/exceptions.hpp"
56 #include "utils/fs/operations.hpp"
57 #include "utils/fs/path.hpp"
58 #include "utils/logging/macros.hpp"
59 #include "utils/noncopyable.hpp"
60 #include "utils/optional.ipp"
61 #include "utils/passwd.hpp"
62 #include "utils/process/executor.ipp"
63 #include "utils/process/status.hpp"
64 #include "utils/sanity.hpp"
65 #include "utils/stacktrace.hpp"
66 #include "utils/stream.hpp"
67 #include "utils/text/operations.ipp"
68 
69 namespace config = utils::config;
70 namespace datetime = utils::datetime;
71 namespace executor = utils::process::executor;
72 namespace fs = utils::fs;
73 namespace logging = utils::logging;
74 namespace passwd = utils::passwd;
75 namespace process = utils::process;
76 namespace scheduler = engine::scheduler;
77 namespace text = utils::text;
78 
79 using utils::none;
80 using utils::optional;
81 
82 
83 /// Timeout for the test case cleanup operation.
84 ///
85 /// TODO(jmmv): This is here only for testing purposes.  Maybe we should expose
86 /// this setting as part of the user_config.
87 datetime::delta scheduler::cleanup_timeout(60, 0);
88 
89 
90 /// Timeout for the test case listing operation.
91 ///
92 /// TODO(jmmv): This is here only for testing purposes.  Maybe we should expose
93 /// this setting as part of the user_config.
94 datetime::delta scheduler::list_timeout(300, 0);
95 
96 
97 namespace {
98 
99 
100 /// Magic exit status to indicate that the test case was probably skipped.
101 ///
102 /// The test case was only skipped if and only if we return this exit code and
103 /// we find the skipped_cookie file on disk.
104 static const int exit_skipped = 84;
105 
106 
107 /// Text file containing the skip reason for the test case.
108 ///
109 /// This will only be present within unique_work_directory if the test case
110 /// exited with the exit_skipped code.  However, there is no guarantee that the
111 /// file is there (say if the test really decided to exit with code exit_skipped
112 /// on its own).
113 static const char* skipped_cookie = "skipped.txt";
114 
115 
116 /// Mapping of interface names to interface definitions.
117 typedef std::map< std::string, std::shared_ptr< scheduler::interface > >
118     interfaces_map;
119 
120 
121 /// Mapping of interface names to interface definitions.
122 ///
123 /// Use register_interface() to add an entry to this global table.
124 static interfaces_map interfaces;
125 
126 
127 /// Scans the contents of a directory and appends the file listing to a file.
128 ///
129 /// \param dir_path The directory to scan.
130 /// \param output_file The file to which to append the listing.
131 ///
132 /// \throw engine::error If there are problems listing the files.
133 static void
134 append_files_listing(const fs::path& dir_path, const fs::path& output_file)
135 {
136     std::ofstream output(output_file.c_str(), std::ios::app);
137     if (!output)
138         throw engine::error(F("Failed to open output file %s for append")
139                             % output_file);
140     try {
141         std::set < std::string > names;
142 
143         const fs::directory dir(dir_path);
144         for (fs::directory::const_iterator iter = dir.begin();
145              iter != dir.end(); ++iter) {
146             if (iter->name != "." && iter->name != "..")
147                 names.insert(iter->name);
148         }
149 
150         if (!names.empty()) {
151             output << "Files left in work directory after failure: "
152                    << text::join(names, ", ") << '\n';
153         }
154     } catch (const fs::error& e) {
155         throw engine::error(F("Cannot append files listing to %s: %s")
156                             % output_file % e.what());
157     }
158 }
159 
160 
161 /// Maintenance data held while a test is being executed.
162 ///
163 /// This data structure exists from the moment when a test is executed via
164 /// scheduler::spawn_test() or scheduler::impl::spawn_cleanup() to when it is
165 /// cleaned up with result_handle::cleanup().
166 ///
167 /// This is a base data type intended to be extended for the test and cleanup
168 /// cases so that each contains only the relevant data.
169 struct exec_data : utils::noncopyable {
170     /// Test program data for this test case.
171     const model::test_program_ptr test_program;
172 
173     /// Name of the test case.
174     const std::string test_case_name;
175 
176     /// Constructor.
177     ///
178     /// \param test_program_ Test program data for this test case.
179     /// \param test_case_name_ Name of the test case.
180     exec_data(const model::test_program_ptr test_program_,
181               const std::string& test_case_name_) :
182         test_program(test_program_), test_case_name(test_case_name_)
183     {
184     }
185 
186     /// Destructor.
187     virtual ~exec_data(void)
188     {
189     }
190 };
191 
192 
193 /// Maintenance data held while a test is being executed.
194 struct test_exec_data : public exec_data {
195     /// Test program-specific execution interface.
196     const std::shared_ptr< scheduler::interface > interface;
197 
198     /// User configuration passed to the execution of the test.  We need this
199     /// here to recover it later when chaining the execution of a cleanup
200     /// routine (if any).
201     const config::tree user_config;
202 
203     /// Whether this test case still needs to have its cleanup routine executed.
204     ///
205     /// This is set externally when the cleanup routine is actually invoked to
206     /// denote that no further attempts shall be made at cleaning this up.
207     bool needs_cleanup;
208 
209     /// The exit_handle for this test once it has completed.
210     ///
211     /// This is set externally when the test case has finished, as we need this
212     /// information to invoke the followup cleanup routine in the right context,
213     /// as indicated by needs_cleanup.
214     optional< executor::exit_handle > exit_handle;
215 
216     /// Constructor.
217     ///
218     /// \param test_program_ Test program data for this test case.
219     /// \param test_case_name_ Name of the test case.
220     /// \param interface_ Test program-specific execution interface.
221     /// \param user_config_ User configuration passed to the test.
222     test_exec_data(const model::test_program_ptr test_program_,
223                    const std::string& test_case_name_,
224                    const std::shared_ptr< scheduler::interface > interface_,
225                    const config::tree& user_config_) :
226         exec_data(test_program_, test_case_name_),
227         interface(interface_), user_config(user_config_)
228     {
229         const model::test_case& test_case = test_program->find(test_case_name);
230         needs_cleanup = test_case.get_metadata().has_cleanup();
231     }
232 };
233 
234 
235 /// Maintenance data held while a test cleanup routine is being executed.
236 ///
237 /// Instances of this object are related to a previous test_exec_data, as
238 /// cleanup routines can only exist once the test has been run.
239 struct cleanup_exec_data : public exec_data {
240     /// The exit handle of the test.  This is necessary so that we can return
241     /// the correct exit_handle to the user of the scheduler.
242     executor::exit_handle body_exit_handle;
243 
244     /// The final result of the test's body.  This is necessary to compute the
245     /// right return value for a test with a cleanup routine: the body result is
246     /// respected if it is a "bad" result; else the result of the cleanup
247     /// routine is used if it has failed.
248     model::test_result body_result;
249 
250     /// Constructor.
251     ///
252     /// \param test_program_ Test program data for this test case.
253     /// \param test_case_name_ Name of the test case.
254     /// \param body_exit_handle_ If not none, exit handle of the body
255     ///     corresponding to the cleanup routine represented by this exec_data.
256     /// \param body_result_ If not none, result of the body corresponding to the
257     ///     cleanup routine represented by this exec_data.
258     cleanup_exec_data(const model::test_program_ptr test_program_,
259                       const std::string& test_case_name_,
260                       const executor::exit_handle& body_exit_handle_,
261                       const model::test_result& body_result_) :
262         exec_data(test_program_, test_case_name_),
263         body_exit_handle(body_exit_handle_), body_result(body_result_)
264     {
265     }
266 };
267 
268 
269 /// Shared pointer to exec_data.
270 ///
271 /// We require this because we want exec_data to not be copyable, and thus we
272 /// cannot just store it in the map without move constructors.
273 typedef std::shared_ptr< exec_data > exec_data_ptr;
274 
275 
276 /// Mapping of active PIDs to their maintenance data.
277 typedef std::map< int, exec_data_ptr > exec_data_map;
278 
279 
280 /// Enforces a test program to hold an absolute path.
281 ///
282 /// TODO(jmmv): This function (which is a pretty ugly hack) exists because we
283 /// want the interface hooks to receive a test_program as their argument.
284 /// However, those hooks run after the test program has been isolated, which
285 /// means that the current directory has changed since when the test_program
286 /// objects were created.  This causes the absolute_path() method of
287 /// test_program to return bogus values if the internal representation of their
288 /// path is relative.  We should fix somehow: maybe making the fs module grab
289 /// its "current_path" view at program startup time; or maybe by grabbing the
290 /// current path at test_program creation time; or maybe something else.
291 ///
292 /// \param program The test program to modify.
293 ///
294 /// \return A new test program whose internal paths are absolute.
295 static model::test_program
296 force_absolute_paths(const model::test_program program)
297 {
298     const std::string& relative = program.relative_path().str();
299     const std::string absolute = program.absolute_path().str();
300 
301     const std::string root = absolute.substr(
302         0, absolute.length() - relative.length());
303 
304     return model::test_program(
305         program.interface_name(),
306         program.relative_path(), fs::path(root),
307         program.test_suite_name(),
308         program.get_metadata(), program.test_cases());
309 }
310 
311 
312 /// Functor to list the test cases of a test program.
313 class list_test_cases {
314     /// Interface of the test program to execute.
315     std::shared_ptr< scheduler::interface > _interface;
316 
317     /// Test program to execute.
318     const model::test_program _test_program;
319 
320     /// User-provided configuration variables.
321     const config::tree& _user_config;
322 
323 public:
324     /// Constructor.
325     ///
326     /// \param interface Interface of the test program to execute.
327     /// \param test_program Test program to execute.
328     /// \param user_config User-provided configuration variables.
329     list_test_cases(
330         const std::shared_ptr< scheduler::interface > interface,
331         const model::test_program* test_program,
332         const config::tree& user_config) :
333         _interface(interface),
334         _test_program(force_absolute_paths(*test_program)),
335         _user_config(user_config)
336     {
337     }
338 
339     /// Body of the subprocess.
340     void
341     operator()(const fs::path& /* control_directory */)
342     {
343         const config::properties_map vars = scheduler::generate_config(
344             _user_config, _test_program.test_suite_name());
345         _interface->exec_list(_test_program, vars);
346     }
347 };
348 
349 
350 /// Functor to execute a test program in a child process.
351 class run_test_program {
352     /// Interface of the test program to execute.
353     std::shared_ptr< scheduler::interface > _interface;
354 
355     /// Test program to execute.
356     const model::test_program _test_program;
357 
358     /// Name of the test case to execute.
359     const std::string& _test_case_name;
360 
361     /// User-provided configuration variables.
362     const config::tree& _user_config;
363 
364     /// Verifies if the test case needs to be skipped or not.
365     ///
366     /// We could very well run this on the scheduler parent process before
367     /// issuing the fork.  However, doing this here in the child process is
368     /// better for two reasons: first, it allows us to continue using the simple
369     /// spawn/wait abstraction of the scheduler; and, second, we parallelize the
370     /// requirements checks among tests.
371     ///
372     /// \post If the test's preconditions are not met, the caller process is
373     /// terminated with a special exit code and a "skipped cookie" is written to
374     /// the disk with the reason for the failure.
375     ///
376     /// \param skipped_cookie_path File to create with the skip reason details
377     ///     if this test is skipped.
378     void
379     do_requirements_check(const fs::path& skipped_cookie_path)
380     {
381         const model::test_case& test_case = _test_program.find(
382             _test_case_name);
383 
384         const std::string skip_reason = engine::check_reqs(
385             test_case.get_metadata(), _user_config,
386             _test_program.test_suite_name(),
387             fs::current_path());
388         if (skip_reason.empty())
389             return;
390 
391         std::ofstream output(skipped_cookie_path.c_str());
392         if (!output) {
393             std::perror((F("Failed to open %s for write") %
394                          skipped_cookie_path).str().c_str());
395             std::abort();
396         }
397         output << skip_reason;
398         output.close();
399 
400         // Abruptly terminate the process.  We don't want to run any destructors
401         // inherited from the parent process by mistake, which could, for
402         // example, delete our own control files!
403         ::_exit(exit_skipped);
404     }
405 
406 public:
407     /// Constructor.
408     ///
409     /// \param interface Interface of the test program to execute.
410     /// \param test_program Test program to execute.
411     /// \param test_case_name Name of the test case to execute.
412     /// \param user_config User-provided configuration variables.
413     run_test_program(
414         const std::shared_ptr< scheduler::interface > interface,
415         const model::test_program_ptr test_program,
416         const std::string& test_case_name,
417         const config::tree& user_config) :
418         _interface(interface),
419         _test_program(force_absolute_paths(*test_program)),
420         _test_case_name(test_case_name),
421         _user_config(user_config)
422     {
423     }
424 
425     /// Body of the subprocess.
426     ///
427     /// \param control_directory The testcase directory where files will be
428     ///     read from.
429     void
430     operator()(const fs::path& control_directory)
431     {
432         const model::test_case& test_case = _test_program.find(
433             _test_case_name);
434         if (test_case.fake_result())
435             ::_exit(EXIT_SUCCESS);
436 
437         do_requirements_check(control_directory / skipped_cookie);
438 
439         const config::properties_map vars = scheduler::generate_config(
440             _user_config, _test_program.test_suite_name());
441         _interface->exec_test(_test_program, _test_case_name, vars,
442                               control_directory);
443     }
444 };
445 
446 
447 /// Functor to execute a test program in a child process.
448 class run_test_cleanup {
449     /// Interface of the test program to execute.
450     std::shared_ptr< scheduler::interface > _interface;
451 
452     /// Test program to execute.
453     const model::test_program _test_program;
454 
455     /// Name of the test case to execute.
456     const std::string& _test_case_name;
457 
458     /// User-provided configuration variables.
459     const config::tree& _user_config;
460 
461 public:
462     /// Constructor.
463     ///
464     /// \param interface Interface of the test program to execute.
465     /// \param test_program Test program to execute.
466     /// \param test_case_name Name of the test case to execute.
467     /// \param user_config User-provided configuration variables.
468     run_test_cleanup(
469         const std::shared_ptr< scheduler::interface > interface,
470         const model::test_program_ptr test_program,
471         const std::string& test_case_name,
472         const config::tree& user_config) :
473         _interface(interface),
474         _test_program(force_absolute_paths(*test_program)),
475         _test_case_name(test_case_name),
476         _user_config(user_config)
477     {
478     }
479 
480     /// Body of the subprocess.
481     ///
482     /// \param control_directory The testcase directory where cleanup will be
483     ///     run from.
484     void
485     operator()(const fs::path& control_directory)
486     {
487         const config::properties_map vars = scheduler::generate_config(
488             _user_config, _test_program.test_suite_name());
489         _interface->exec_cleanup(_test_program, _test_case_name, vars,
490                                  control_directory);
491     }
492 };
493 
494 
495 /// Obtains the right scheduler interface for a given test program.
496 ///
497 /// \param name The name of the interface of the test program.
498 ///
499 /// \return An scheduler interface.
500 std::shared_ptr< scheduler::interface >
501 find_interface(const std::string& name)
502 {
503     const interfaces_map::const_iterator iter = interfaces.find(name);
504     PRE(interfaces.find(name) != interfaces.end());
505     return (*iter).second;
506 }
507 
508 
509 }  // anonymous namespace
510 
511 
512 void
513 scheduler::interface::exec_cleanup(
514     const model::test_program& /* test_program */,
515     const std::string& /* test_case_name */,
516     const config::properties_map& /* vars */,
517     const utils::fs::path& /* control_directory */) const
518 {
519     // Most test interfaces do not support standalone cleanup routines so
520     // provide a default implementation that does nothing.
521     UNREACHABLE_MSG("exec_cleanup not implemented for an interface that "
522                     "supports standalone cleanup routines");
523 }
524 
525 
526 /// Internal implementation of a lazy_test_program.
527 struct engine::scheduler::lazy_test_program::impl : utils::noncopyable {
528     /// Whether the test cases list has been yet loaded or not.
529     bool _loaded;
530 
531     /// User configuration to pass to the test program list operation.
532     config::tree _user_config;
533 
534     /// Scheduler context to use to load test cases.
535     scheduler::scheduler_handle& _scheduler_handle;
536 
537     /// Constructor.
538     ///
539     /// \param user_config_ User configuration to pass to the test program list
540     ///     operation.
541     /// \param scheduler_handle_ Scheduler context to use when loading test
542     ///     cases.
543     impl(const config::tree& user_config_,
544          scheduler::scheduler_handle& scheduler_handle_) :
545         _loaded(false), _user_config(user_config_),
546         _scheduler_handle(scheduler_handle_)
547     {
548     }
549 };
550 
551 
552 /// Constructs a new test program.
553 ///
554 /// \param interface_name_ Name of the test program interface.
555 /// \param binary_ The name of the test program binary relative to root_.
556 /// \param root_ The root of the test suite containing the test program.
557 /// \param test_suite_name_ The name of the test suite this program belongs to.
558 /// \param md_ Metadata of the test program.
559 /// \param user_config_ User configuration to pass to the scheduler.
560 /// \param scheduler_handle_ Scheduler context to use to load test cases.
561 scheduler::lazy_test_program::lazy_test_program(
562     const std::string& interface_name_,
563     const fs::path& binary_,
564     const fs::path& root_,
565     const std::string& test_suite_name_,
566     const model::metadata& md_,
567     const config::tree& user_config_,
568     scheduler::scheduler_handle& scheduler_handle_) :
569     test_program(interface_name_, binary_, root_, test_suite_name_, md_,
570                  model::test_cases_map()),
571     _pimpl(new impl(user_config_, scheduler_handle_))
572 {
573 }
574 
575 
576 /// Gets or loads the list of test cases from the test program.
577 ///
578 /// \return The list of test cases provided by the test program.
579 const model::test_cases_map&
580 scheduler::lazy_test_program::test_cases(void) const
581 {
582     _pimpl->_scheduler_handle.check_interrupt();
583 
584     if (!_pimpl->_loaded) {
585         const model::test_cases_map tcs = _pimpl->_scheduler_handle.list_tests(
586             this, _pimpl->_user_config);
587 
588         // Due to the restrictions on when set_test_cases() may be called (as a
589         // way to lazily initialize the test cases list before it is ever
590         // returned), this cast is valid.
591         const_cast< scheduler::lazy_test_program* >(this)->set_test_cases(tcs);
592 
593         _pimpl->_loaded = true;
594 
595         _pimpl->_scheduler_handle.check_interrupt();
596     }
597 
598     INV(_pimpl->_loaded);
599     return test_program::test_cases();
600 }
601 
602 
603 /// Internal implementation for the result_handle class.
604 struct engine::scheduler::result_handle::bimpl : utils::noncopyable {
605     /// Generic executor exit handle for this result handle.
606     executor::exit_handle generic;
607 
608     /// Mutable pointer to the corresponding scheduler state.
609     ///
610     /// This object references a member of the scheduler_handle that yielded
611     /// this result_handle instance.  We need this direct access to clean up
612     /// after ourselves when the result is destroyed.
613     exec_data_map& all_exec_data;
614 
615     /// Constructor.
616     ///
617     /// \param generic_ Generic executor exit handle for this result handle.
618     /// \param [in,out] all_exec_data_ Global object keeping track of all active
619     ///     executions for an scheduler.  This is a pointer to a member of the
620     ///     scheduler_handle object.
621     bimpl(const executor::exit_handle generic_, exec_data_map& all_exec_data_) :
622         generic(generic_), all_exec_data(all_exec_data_)
623     {
624     }
625 
626     /// Destructor.
627     ~bimpl(void)
628     {
629         LD(F("Removing %s from all_exec_data") % generic.original_pid());
630         all_exec_data.erase(generic.original_pid());
631     }
632 };
633 
634 
635 /// Constructor.
636 ///
637 /// \param pbimpl Constructed internal implementation.
638 scheduler::result_handle::result_handle(std::shared_ptr< bimpl > pbimpl) :
639     _pbimpl(pbimpl)
640 {
641 }
642 
643 
644 /// Destructor.
645 scheduler::result_handle::~result_handle(void)
646 {
647 }
648 
649 
650 /// Cleans up the test case results.
651 ///
652 /// This function should be called explicitly as it provides the means to
653 /// control any exceptions raised during cleanup.  Do not rely on the destructor
654 /// to clean things up.
655 ///
656 /// \throw engine::error If the cleanup fails, especially due to the inability
657 ///     to remove the work directory.
658 void
659 scheduler::result_handle::cleanup(void)
660 {
661     _pbimpl->generic.cleanup();
662 }
663 
664 
665 /// Returns the original PID corresponding to this result.
666 ///
667 /// \return An exec_handle.
668 int
669 scheduler::result_handle::original_pid(void) const
670 {
671     return _pbimpl->generic.original_pid();
672 }
673 
674 
675 /// Returns the timestamp of when spawn_test was called.
676 ///
677 /// \return A timestamp.
678 const datetime::timestamp&
679 scheduler::result_handle::start_time(void) const
680 {
681     return _pbimpl->generic.start_time();
682 }
683 
684 
685 /// Returns the timestamp of when wait_any_test returned this object.
686 ///
687 /// \return A timestamp.
688 const datetime::timestamp&
689 scheduler::result_handle::end_time(void) const
690 {
691     return _pbimpl->generic.end_time();
692 }
693 
694 
695 /// Returns the path to the test-specific work directory.
696 ///
697 /// This is guaranteed to be clear of files created by the scheduler.
698 ///
699 /// \return The path to a directory that exists until cleanup() is called.
700 fs::path
701 scheduler::result_handle::work_directory(void) const
702 {
703     return _pbimpl->generic.work_directory();
704 }
705 
706 
707 /// Returns the path to the test's stdout file.
708 ///
709 /// \return The path to a file that exists until cleanup() is called.
710 const fs::path&
711 scheduler::result_handle::stdout_file(void) const
712 {
713     return _pbimpl->generic.stdout_file();
714 }
715 
716 
717 /// Returns the path to the test's stderr file.
718 ///
719 /// \return The path to a file that exists until cleanup() is called.
720 const fs::path&
721 scheduler::result_handle::stderr_file(void) const
722 {
723     return _pbimpl->generic.stderr_file();
724 }
725 
726 
727 /// Internal implementation for the test_result_handle class.
728 struct engine::scheduler::test_result_handle::impl : utils::noncopyable {
729     /// Test program data for this test case.
730     model::test_program_ptr test_program;
731 
732     /// Name of the test case.
733     std::string test_case_name;
734 
735     /// The actual result of the test execution.
736     const model::test_result test_result;
737 
738     /// Constructor.
739     ///
740     /// \param test_program_ Test program data for this test case.
741     /// \param test_case_name_ Name of the test case.
742     /// \param test_result_ The actual result of the test execution.
743     impl(const model::test_program_ptr test_program_,
744          const std::string& test_case_name_,
745          const model::test_result& test_result_) :
746         test_program(test_program_),
747         test_case_name(test_case_name_),
748         test_result(test_result_)
749     {
750     }
751 };
752 
753 
754 /// Constructor.
755 ///
756 /// \param pbimpl Constructed internal implementation for the base object.
757 /// \param pimpl Constructed internal implementation.
758 scheduler::test_result_handle::test_result_handle(
759     std::shared_ptr< bimpl > pbimpl, std::shared_ptr< impl > pimpl) :
760     result_handle(pbimpl), _pimpl(pimpl)
761 {
762 }
763 
764 
765 /// Destructor.
766 scheduler::test_result_handle::~test_result_handle(void)
767 {
768 }
769 
770 
771 /// Returns the test program that yielded this result.
772 ///
773 /// \return A test program.
774 const model::test_program_ptr
775 scheduler::test_result_handle::test_program(void) const
776 {
777     return _pimpl->test_program;
778 }
779 
780 
781 /// Returns the name of the test case that yielded this result.
782 ///
783 /// \return A test case name
784 const std::string&
785 scheduler::test_result_handle::test_case_name(void) const
786 {
787     return _pimpl->test_case_name;
788 }
789 
790 
791 /// Returns the actual result of the test execution.
792 ///
793 /// \return A test result.
794 const model::test_result&
795 scheduler::test_result_handle::test_result(void) const
796 {
797     return _pimpl->test_result;
798 }
799 
800 
801 /// Internal implementation for the scheduler_handle.
802 struct engine::scheduler::scheduler_handle::impl : utils::noncopyable {
803     /// Generic executor instance encapsulated by this one.
804     executor::executor_handle generic;
805 
806     /// Mapping of exec handles to the data required at run time.
807     exec_data_map all_exec_data;
808 
809     /// Collection of test_exec_data objects.
810     typedef std::vector< const test_exec_data* > test_exec_data_vector;
811 
812     /// Constructor.
813     impl(void) : generic(executor::setup())
814     {
815     }
816 
817     /// Destructor.
818     ///
819     /// This runs any pending cleanup routines, which should only happen if the
820     /// scheduler is abruptly terminated (aka if a signal is received).
821     ~impl(void)
822     {
823         const test_exec_data_vector tests_data = tests_needing_cleanup();
824 
825         for (test_exec_data_vector::const_iterator iter = tests_data.begin();
826              iter != tests_data.end(); ++iter) {
827             const test_exec_data* test_data = *iter;
828 
829             try {
830                 sync_cleanup(test_data);
831             } catch (const std::runtime_error& e) {
832                 LW(F("Failed to run cleanup routine for %s:%s on abrupt "
833                      "termination")
834                    % test_data->test_program->relative_path()
835                    % test_data->test_case_name);
836             }
837         }
838     }
839 
840     /// Finds any pending exec_datas that correspond to tests needing cleanup.
841     ///
842     /// \return The collection of test_exec_data objects that have their
843     /// needs_cleanup property set to true.
844     test_exec_data_vector
845     tests_needing_cleanup(void)
846     {
847         test_exec_data_vector tests_data;
848 
849         for (exec_data_map::const_iterator iter = all_exec_data.begin();
850              iter != all_exec_data.end(); ++iter) {
851             const exec_data_ptr data = (*iter).second;
852 
853             try {
854                 test_exec_data* test_data = &dynamic_cast< test_exec_data& >(
855                     *data.get());
856                 if (test_data->needs_cleanup) {
857                     tests_data.push_back(test_data);
858                     test_data->needs_cleanup = false;
859                 }
860             } catch (const std::bad_cast& e) {
861                 // Do nothing for cleanup_exec_data objects.
862             }
863         }
864 
865         return tests_data;
866     }
867 
868     /// Cleans up a single test case synchronously.
869     ///
870     /// \param test_data The data of the previously executed test case to be
871     ///     cleaned up.
872     void
873     sync_cleanup(const test_exec_data* test_data)
874     {
875         // The message in this result should never be seen by the user, but use
876         // something reasonable just in case it leaks and we need to pinpoint
877         // the call site.
878         model::test_result result(model::test_result_broken,
879                                   "Test case died abruptly");
880 
881         const executor::exec_handle cleanup_handle = spawn_cleanup(
882             test_data->test_program, test_data->test_case_name,
883             test_data->user_config, test_data->exit_handle.get(),
884             result);
885         generic.wait(cleanup_handle);
886     }
887 
888     /// Forks and executes a test case cleanup routine asynchronously.
889     ///
890     /// \param test_program The container test program.
891     /// \param test_case_name The name of the test case to run.
892     /// \param user_config User-provided configuration variables.
893     /// \param body_handle The exit handle of the test case's corresponding
894     ///     body.  The cleanup will be executed in the same context.
895     /// \param body_result The result of the test case's corresponding body.
896     ///
897     /// \return A handle for the background operation.  Used to match the result
898     /// of the execution returned by wait_any() with this invocation.
899     executor::exec_handle
900     spawn_cleanup(const model::test_program_ptr test_program,
901                   const std::string& test_case_name,
902                   const config::tree& user_config,
903                   const executor::exit_handle& body_handle,
904                   const model::test_result& body_result)
905     {
906         generic.check_interrupt();
907 
908         const std::shared_ptr< scheduler::interface > interface =
909             find_interface(test_program->interface_name());
910 
911         LI(F("Spawning %s:%s (cleanup)") % test_program->absolute_path() %
912            test_case_name);
913 
914         const executor::exec_handle handle = generic.spawn_followup(
915             run_test_cleanup(interface, test_program, test_case_name,
916                              user_config),
917             body_handle, cleanup_timeout);
918 
919         const exec_data_ptr data(new cleanup_exec_data(
920             test_program, test_case_name, body_handle, body_result));
921         LD(F("Inserting %s into all_exec_data (cleanup)") % handle.pid());
922         INV_MSG(all_exec_data.find(handle.pid()) == all_exec_data.end(),
923                 F("PID %s already in all_exec_data; not properly cleaned "
924                   "up or reused too fast") % handle.pid());;
925         all_exec_data.insert(exec_data_map::value_type(handle.pid(), data));
926 
927         return handle;
928     }
929 };
930 
931 
932 /// Constructor.
933 scheduler::scheduler_handle::scheduler_handle(void) : _pimpl(new impl())
934 {
935 }
936 
937 
938 /// Destructor.
939 scheduler::scheduler_handle::~scheduler_handle(void)
940 {
941 }
942 
943 
944 /// Queries the path to the root of the work directory for all tests.
945 ///
946 /// \return A path.
947 const fs::path&
948 scheduler::scheduler_handle::root_work_directory(void) const
949 {
950     return _pimpl->generic.root_work_directory();
951 }
952 
953 
954 /// Cleans up the scheduler state.
955 ///
956 /// This function should be called explicitly as it provides the means to
957 /// control any exceptions raised during cleanup.  Do not rely on the destructor
958 /// to clean things up.
959 ///
960 /// \throw engine::error If there are problems cleaning up the scheduler.
961 void
962 scheduler::scheduler_handle::cleanup(void)
963 {
964     _pimpl->generic.cleanup();
965 }
966 
967 
968 /// Checks if the given interface name is valid.
969 ///
970 /// \param name The name of the interface to validate.
971 ///
972 /// \throw engine::error If the given interface is not supported.
973 void
974 scheduler::ensure_valid_interface(const std::string& name)
975 {
976     if (interfaces.find(name) == interfaces.end())
977         throw engine::error(F("Unsupported test interface '%s'") % name);
978 }
979 
980 
981 /// Registers a new interface.
982 ///
983 /// \param name The name of the interface.  Must not have yet been registered.
984 /// \param spec Interface specification.
985 void
986 scheduler::register_interface(const std::string& name,
987                               const std::shared_ptr< interface > spec)
988 {
989     PRE(interfaces.find(name) == interfaces.end());
990     interfaces.insert(interfaces_map::value_type(name, spec));
991 }
992 
993 
994 /// Returns the names of all registered interfaces.
995 ///
996 /// \return A collection of interface names.
997 std::set< std::string >
998 scheduler::registered_interface_names(void)
999 {
1000     std::set< std::string > names;
1001     for (interfaces_map::const_iterator iter = interfaces.begin();
1002          iter != interfaces.end(); ++iter) {
1003         names.insert((*iter).first);
1004     }
1005     return names;
1006 }
1007 
1008 
1009 /// Initializes the scheduler.
1010 ///
1011 /// \pre This function can only be called if there is no other scheduler_handle
1012 /// object alive.
1013 ///
1014 /// \return A handle to the operations of the scheduler.
1015 scheduler::scheduler_handle
1016 scheduler::setup(void)
1017 {
1018     return scheduler_handle();
1019 }
1020 
1021 
1022 /// Retrieves the list of test cases from a test program.
1023 ///
1024 /// This operation is currently synchronous.
1025 ///
1026 /// This operation should never throw.  Any errors during the processing of the
1027 /// test case list are subsumed into a single test case in the return value that
1028 /// represents the failed retrieval.
1029 ///
1030 /// \param test_program The test program from which to obtain the list of test
1031 /// cases.
1032 /// \param user_config User-provided configuration variables.
1033 ///
1034 /// \return The list of test cases.
1035 model::test_cases_map
1036 scheduler::scheduler_handle::list_tests(
1037     const model::test_program* test_program,
1038     const config::tree& user_config)
1039 {
1040     _pimpl->generic.check_interrupt();
1041 
1042     const std::shared_ptr< scheduler::interface > interface = find_interface(
1043         test_program->interface_name());
1044 
1045     try {
1046         const executor::exec_handle exec_handle = _pimpl->generic.spawn(
1047             list_test_cases(interface, test_program, user_config),
1048             list_timeout, none);
1049         executor::exit_handle exit_handle = _pimpl->generic.wait(exec_handle);
1050 
1051         const model::test_cases_map test_cases = interface->parse_list(
1052             exit_handle.status(),
1053             exit_handle.stdout_file(),
1054             exit_handle.stderr_file());
1055 
1056         exit_handle.cleanup();
1057 
1058         if (test_cases.empty())
1059             throw std::runtime_error("Empty test cases list");
1060 
1061         return test_cases;
1062     } catch (const std::runtime_error& e) {
1063         // TODO(jmmv): This is a very ugly workaround for the fact that we
1064         // cannot report failures at the test-program level.
1065         LW(F("Failed to load test cases list: %s") % e.what());
1066         model::test_cases_map fake_test_cases;
1067         fake_test_cases.insert(model::test_cases_map::value_type(
1068             "__test_cases_list__",
1069             model::test_case(
1070                 "__test_cases_list__",
1071                 "Represents the correct processing of the test cases list",
1072                 model::test_result(model::test_result_broken, e.what()))));
1073         return fake_test_cases;
1074     }
1075 }
1076 
1077 
1078 /// Forks and executes a test case asynchronously.
1079 ///
1080 /// Note that the caller needn't know if the test has a cleanup routine or not.
1081 /// If there indeed is a cleanup routine, we trigger it at wait_any() time.
1082 ///
1083 /// \param test_program The container test program.
1084 /// \param test_case_name The name of the test case to run.
1085 /// \param user_config User-provided configuration variables.
1086 ///
1087 /// \return A handle for the background operation.  Used to match the result of
1088 /// the execution returned by wait_any() with this invocation.
1089 scheduler::exec_handle
1090 scheduler::scheduler_handle::spawn_test(
1091     const model::test_program_ptr test_program,
1092     const std::string& test_case_name,
1093     const config::tree& user_config)
1094 {
1095     _pimpl->generic.check_interrupt();
1096 
1097     const std::shared_ptr< scheduler::interface > interface = find_interface(
1098         test_program->interface_name());
1099 
1100     LI(F("Spawning %s:%s") % test_program->absolute_path() % test_case_name);
1101 
1102     const model::test_case& test_case = test_program->find(test_case_name);
1103 
1104     optional< passwd::user > unprivileged_user;
1105     if (user_config.is_set("unprivileged_user") &&
1106         test_case.get_metadata().required_user() == "unprivileged") {
1107         unprivileged_user = user_config.lookup< engine::user_node >(
1108             "unprivileged_user");
1109     }
1110 
1111     const executor::exec_handle handle = _pimpl->generic.spawn(
1112         run_test_program(interface, test_program, test_case_name,
1113                          user_config),
1114         test_case.get_metadata().timeout(),
1115         unprivileged_user);
1116 
1117     const exec_data_ptr data(new test_exec_data(
1118         test_program, test_case_name, interface, user_config));
1119     LD(F("Inserting %s into all_exec_data") % handle.pid());
1120     INV_MSG(
1121         _pimpl->all_exec_data.find(handle.pid()) == _pimpl->all_exec_data.end(),
1122         F("PID %s already in all_exec_data; not cleaned up or reused too fast")
1123         % handle.pid());;
1124     _pimpl->all_exec_data.insert(exec_data_map::value_type(handle.pid(), data));
1125 
1126     return handle.pid();
1127 }
1128 
1129 
1130 /// Waits for completion of any forked test case.
1131 ///
1132 /// Note that if the terminated test case has a cleanup routine, this function
1133 /// is the one in charge of spawning the cleanup routine asynchronously.
1134 ///
1135 /// \return The result of the execution of a subprocess.  This is a dynamically
1136 /// allocated object because the scheduler can spawn subprocesses of various
1137 /// types and, at wait time, we don't know upfront what we are going to get.
1138 scheduler::result_handle_ptr
1139 scheduler::scheduler_handle::wait_any(void)
1140 {
1141     _pimpl->generic.check_interrupt();
1142 
1143     executor::exit_handle handle = _pimpl->generic.wait_any();
1144 
1145     const exec_data_map::iterator iter = _pimpl->all_exec_data.find(
1146         handle.original_pid());
1147     exec_data_ptr data = (*iter).second;
1148 
1149     utils::dump_stacktrace_if_available(data->test_program->absolute_path(),
1150                                         _pimpl->generic, handle);
1151 
1152     optional< model::test_result > result;
1153     try {
1154         test_exec_data* test_data = &dynamic_cast< test_exec_data& >(
1155             *data.get());
1156         LD(F("Got %s from all_exec_data") % handle.original_pid());
1157 
1158         test_data->exit_handle = handle;
1159 
1160         const model::test_case& test_case = test_data->test_program->find(
1161             test_data->test_case_name);
1162 
1163         result = test_case.fake_result();
1164 
1165         if (!result && handle.status() && handle.status().get().exited() &&
1166             handle.status().get().exitstatus() == exit_skipped) {
1167             // If the test's process terminated with our magic "exit_skipped"
1168             // status, there are two cases to handle.  The first is the case
1169             // where the "skipped cookie" exists, in which case we never got to
1170             // actually invoke the test program; if that's the case, handle it
1171             // here.  The second case is where the test case actually decided to
1172             // exit with the "exit_skipped" status; in that case, just fall back
1173             // to the regular status handling.
1174             const fs::path skipped_cookie_path = handle.control_directory() /
1175                 skipped_cookie;
1176             std::ifstream input(skipped_cookie_path.c_str());
1177             if (input) {
1178                 result = model::test_result(model::test_result_skipped,
1179                                             utils::read_stream(input));
1180                 input.close();
1181 
1182                 // If we determined that the test needs to be skipped, we do not
1183                 // want to run the cleanup routine because doing so could result
1184                 // in errors.  However, we still want to run the cleanup routine
1185                 // if the test's body reports a skip (because actions could have
1186                 // already been taken).
1187                 test_data->needs_cleanup = false;
1188             }
1189         }
1190         if (!result) {
1191             result = test_data->interface->compute_result(
1192                 handle.status(),
1193                 handle.control_directory(),
1194                 handle.stdout_file(),
1195                 handle.stderr_file());
1196         }
1197         INV(result);
1198 
1199         if (!result.get().good()) {
1200             append_files_listing(handle.work_directory(),
1201                                  handle.stderr_file());
1202         }
1203 
1204         if (test_data->needs_cleanup) {
1205             INV(test_case.get_metadata().has_cleanup());
1206             // The test body has completed and we have processed it.  If there
1207             // is a cleanup routine, trigger it now and wait for any other test
1208             // completion.  The caller never knows about cleanup routines.
1209             _pimpl->spawn_cleanup(test_data->test_program,
1210                                   test_data->test_case_name,
1211                                   test_data->user_config, handle, result.get());
1212             test_data->needs_cleanup = false;
1213 
1214             // TODO(jmmv): Chaining this call is ugly.  We'd be better off by
1215             // looping over terminated processes until we got a result suitable
1216             // for user consumption.  For the time being this is good enough and
1217             // not a problem because the call chain won't get big: the majority
1218             // of test cases do not have cleanup routines.
1219             return wait_any();
1220         }
1221     } catch (const std::bad_cast& e) {
1222         const cleanup_exec_data* cleanup_data =
1223             &dynamic_cast< const cleanup_exec_data& >(*data.get());
1224         LD(F("Got %s from all_exec_data (cleanup)") % handle.original_pid());
1225 
1226         // Handle the completion of cleanup subprocesses internally: the caller
1227         // is not aware that these exist so, when we return, we must return the
1228         // data for the original test that triggered this routine.  For example,
1229         // because the caller wants to see the exact same exec_handle that was
1230         // returned by spawn_test.
1231 
1232         const model::test_result& body_result = cleanup_data->body_result;
1233         if (body_result.good()) {
1234             if (!handle.status()) {
1235                 result = model::test_result(model::test_result_broken,
1236                                             "Test case cleanup timed out");
1237             } else {
1238                 if (!handle.status().get().exited() ||
1239                     handle.status().get().exitstatus() != EXIT_SUCCESS) {
1240                     result = model::test_result(
1241                         model::test_result_broken,
1242                         "Test case cleanup did not terminate successfully");
1243                 } else {
1244                     result = body_result;
1245                 }
1246             }
1247         } else {
1248             result = body_result;
1249         }
1250 
1251         // Untrack the cleanup process.  This must be done explicitly because we
1252         // do not create a result_handle object for the cleanup, and that is the
1253         // one in charge of doing so in the regular (non-cleanup) case.
1254         LD(F("Removing %s from all_exec_data (cleanup) in favor of %s")
1255            % handle.original_pid()
1256            % cleanup_data->body_exit_handle.original_pid());
1257         _pimpl->all_exec_data.erase(handle.original_pid());
1258 
1259         handle = cleanup_data->body_exit_handle;
1260     }
1261     INV(result);
1262 
1263     std::shared_ptr< result_handle::bimpl > result_handle_bimpl(
1264         new result_handle::bimpl(handle, _pimpl->all_exec_data));
1265     std::shared_ptr< test_result_handle::impl > test_result_handle_impl(
1266         new test_result_handle::impl(
1267             data->test_program, data->test_case_name, result.get()));
1268     return result_handle_ptr(new test_result_handle(result_handle_bimpl,
1269                                                     test_result_handle_impl));
1270 }
1271 
1272 
1273 /// Forks and executes a test case synchronously for debugging.
1274 ///
1275 /// \pre No other processes should be in execution by the scheduler.
1276 ///
1277 /// \param test_program The container test program.
1278 /// \param test_case_name The name of the test case to run.
1279 /// \param user_config User-provided configuration variables.
1280 /// \param stdout_target File to which to write the stdout of the test case.
1281 /// \param stderr_target File to which to write the stderr of the test case.
1282 ///
1283 /// \return The result of the execution of the test.
1284 scheduler::result_handle_ptr
1285 scheduler::scheduler_handle::debug_test(
1286     const model::test_program_ptr test_program,
1287     const std::string& test_case_name,
1288     const config::tree& user_config,
1289     const fs::path& stdout_target,
1290     const fs::path& stderr_target)
1291 {
1292     const exec_handle exec_handle = spawn_test(
1293         test_program, test_case_name, user_config);
1294     result_handle_ptr result_handle = wait_any();
1295 
1296     // TODO(jmmv): We need to do this while the subprocess is alive.  This is
1297     // important for debugging purposes, as we should see the contents of stdout
1298     // or stderr as they come in.
1299     //
1300     // Unfortunately, we cannot do so.  We cannot just read and block from a
1301     // file, waiting for further output to appear... as this only works on pipes
1302     // or sockets.  We need a better interface for this whole thing.
1303     {
1304         std::auto_ptr< std::ostream > output = utils::open_ostream(
1305             stdout_target);
1306         *output << utils::read_file(result_handle->stdout_file());
1307     }
1308     {
1309         std::auto_ptr< std::ostream > output = utils::open_ostream(
1310             stderr_target);
1311         *output << utils::read_file(result_handle->stderr_file());
1312     }
1313 
1314     INV(result_handle->original_pid() == exec_handle);
1315     return result_handle;
1316 }
1317 
1318 
1319 /// Checks if an interrupt has fired.
1320 ///
1321 /// Calls to this function should be sprinkled in strategic places through the
1322 /// code protected by an interrupts_handler object.
1323 ///
1324 /// This is just a wrapper over signals::check_interrupt() to avoid leaking this
1325 /// dependency to the caller.
1326 ///
1327 /// \throw signals::interrupted_error If there has been an interrupt.
1328 void
1329 scheduler::scheduler_handle::check_interrupt(void) const
1330 {
1331     _pimpl->generic.check_interrupt();
1332 }
1333 
1334 
1335 /// Queries the current execution context.
1336 ///
1337 /// \return The queried context.
1338 model::context
1339 scheduler::current_context(void)
1340 {
1341     return model::context(fs::current_path(), utils::getallenv());
1342 }
1343 
1344 
1345 /// Generates the set of configuration variables for a test program.
1346 ///
1347 /// \param user_config The configuration variables provided by the user.
1348 /// \param test_suite The name of the test suite.
1349 ///
1350 /// \return The mapping of configuration variables for the test program.
1351 config::properties_map
1352 scheduler::generate_config(const config::tree& user_config,
1353                            const std::string& test_suite)
1354 {
1355     config::properties_map props;
1356 
1357     try {
1358         props = user_config.all_properties(F("test_suites.%s") % test_suite,
1359                                            true);
1360     } catch (const config::unknown_key_error& unused_error) {
1361         // Ignore: not all test suites have entries in the configuration.
1362     }
1363 
1364     // TODO(jmmv): This is a hack that exists for the ATF interface only, so it
1365     // should be moved there.
1366     if (user_config.is_set("unprivileged_user")) {
1367         const passwd::user& user =
1368             user_config.lookup< engine::user_node >("unprivileged_user");
1369         props["unprivileged-user"] = user.name;
1370     }
1371 
1372     return props;
1373 }
1374