// Copyright 2014 The Kyua Authors.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
//   notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
//   notice, this list of conditions and the following disclaimer in the
//   documentation and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors
//   may be used to endorse or promote products derived from this software
//   without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "utils/text/regex.hpp"

extern "C" {
#include <sys/types.h>

#include <regex.h>
}

#include "utils/auto_array.ipp"
#include "utils/defs.hpp"
#include "utils/format/macros.hpp"
#include "utils/noncopyable.hpp"
#include "utils/sanity.hpp"
#include "utils/text/exceptions.hpp"

namespace text = utils::text;


namespace {


static void throw_regex_error(const int, const ::regex_t*, const std::string&)
    UTILS_NORETURN;


/// Constructs and raises a regex_error.
///
/// \param error The error code returned by regcomp(3) or regexec(3).
/// \param preg The native regex object that caused this error.
/// \param prefix Error message prefix string.
///
/// \throw regex_error The constructed exception.
static void
throw_regex_error(const int error, const ::regex_t* preg,
                  const std::string& prefix)
{
    char buffer[1024];

    // TODO(jmmv): Would be nice to handle the case where the message does
    // not fit in the temporary buffer.
    (void)::regerror(error, preg, buffer, sizeof(buffer));

    throw text::regex_error(F("%s: %s") % prefix % buffer);
}


}  // anonymous namespace


/// Internal implementation for regex_matches.
struct utils::text::regex_matches::impl : utils::noncopyable {
    /// String on which we are matching.
    ///
    /// In theory, we could take a reference here instead of a copy, and make
    /// it a requirement for the caller to ensure that the lifecycle of the
    /// input string outlasts the lifecycle of the regex_matches.  However, that
    /// contract is very easy to break with hardcoded strings (as we do in
    /// tests).  Just go for the safer case here.
    const std::string _string;

    /// Maximum number of matching groups we expect, including the full match.
    ///
    /// In other words, this is the size of the _matches array.
    const std::size_t _nmatches;

    /// Native regular expression match representation.
    utils::auto_array< ::regmatch_t > _matches;

    /// Constructor.
    ///
    /// This executes the regex on the given string and sets up the internal
    /// class state based on the results.
    ///
    /// \param preg The native regex object.
    /// \param str The string on which to execute the regex.
    /// \param ngroups Number of capture groups in the regex.  This is an upper
    ///     bound and may be greater than the actual matches.
    ///
    /// \throw regex_error If the call to regexec(3) fails.
    impl(const ::regex_t* preg, const std::string& str,
         const std::size_t ngroups) :
        _string(str),
        _nmatches(ngroups + 1),
        _matches(new ::regmatch_t[_nmatches])
    {
        const int error = ::regexec(preg, _string.c_str(), _nmatches,
                                    _matches.get(), 0);
        if (error == REG_NOMATCH) {
            _matches.reset(NULL);
        } else if (error != 0) {
            throw_regex_error(error, preg,
                              F("regexec on '%s' failed") % _string);
        }
    }

    /// Destructor.
    ~impl(void)
    {
    }
};


/// Constructor.
///
/// \param pimpl Constructed implementation of the object.
text::regex_matches::regex_matches(std::shared_ptr< impl > pimpl) :
    _pimpl(pimpl)
{
}


/// Destructor.
text::regex_matches::~regex_matches(void)
{
}


/// Returns the number of matches in this object.
///
/// Note that this does not correspond to the number of groups provided at
/// construction time.  The returned value here accounts for only the returned
/// valid matches.
///
/// \return Number of matches, including the full match.
std::size_t
text::regex_matches::count(void) const
{
    std::size_t total = 0;
    if (_pimpl->_matches.get() != NULL) {
        for (std::size_t i = 0; i < _pimpl->_nmatches; ++i) {
            if (_pimpl->_matches[i].rm_so != -1)
                ++total;
        }
        INV(total <= _pimpl->_nmatches);
    }
    return total;
}


/// Gets a match.
///
/// \param index Number of the match to get.  Index 0 always contains the match
///     of the whole regex.
///
/// \pre There regex must have matched the input string.
/// \pre index must be lower than count().
///
/// \return The textual match.
std::string
text::regex_matches::get(const std::size_t index) const
{
    PRE(*this);
    PRE(index < count());

    const ::regmatch_t* match = &_pimpl->_matches[index];

    return std::string(_pimpl->_string.c_str() + match->rm_so,
                       match->rm_eo - match->rm_so);
}


/// Checks if there are any matches.
///
/// \return True if the object contains one or more matches; false otherwise.
text::regex_matches::operator bool(void) const
{
    return _pimpl->_matches.get() != NULL;
}


/// Internal implementation for regex.
struct utils::text::regex::impl : utils::noncopyable {
    /// Native regular expression representation.
    ::regex_t _preg;

    /// Number of capture groups in the regular expression.  This is an upper
    /// bound and does NOT include the default full string match.
    std::size_t _ngroups;

    /// Constructor.
    ///
    /// This compiles the given regular expression.
    ///
    /// \param regex_ The regular expression to compile.
    /// \param ngroups Number of capture groups in the regular expression.  This
    ///     is an upper bound and does NOT include the default full string
    ///     match.
    /// \param ignore_case Whether to ignore case during matching.
    ///
    /// \throw regex_error If the call to regcomp(3) fails.
    impl(const std::string& regex_, const std::size_t ngroups,
         const bool ignore_case) :
        _ngroups(ngroups)
    {
        const int flags = REG_EXTENDED | (ignore_case ? REG_ICASE : 0);
        const int error = ::regcomp(&_preg, regex_.c_str(), flags);
        if (error != 0)
            throw_regex_error(error, &_preg, F("regcomp on '%s' failed")
                              % regex_);
    }

    /// Destructor.
    ~impl(void)
    {
        ::regfree(&_preg);
    }
};


/// Constructor.
///
/// \param pimpl Constructed implementation of the object.
text::regex::regex(std::shared_ptr< impl > pimpl) : _pimpl(pimpl)
{
}


/// Destructor.
text::regex::~regex(void)
{
}


/// Compiles a new regular expression.
///
/// \param regex_ The regular expression to compile.
/// \param ngroups Number of capture groups in the regular expression.  This is
///     an upper bound and does NOT include the default full string match.
/// \param ignore_case Whether to ignore case during matching.
///
/// \return A new regular expression, ready to match strings.
///
/// \throw regex_error If the regular expression is invalid and cannot be
///     compiled.
text::regex
text::regex::compile(const std::string& regex_, const std::size_t ngroups,
                     const bool ignore_case)
{
    return regex(std::shared_ptr< impl >(new impl(regex_, ngroups,
                                                  ignore_case)));
}


/// Matches the regular expression against a string.
///
/// \param str String to match the regular expression against.
///
/// \return A new regex_matches object with the results of the match.
text::regex_matches
text::regex::match(const std::string& str) const
{
    std::shared_ptr< regex_matches::impl > pimpl(new regex_matches::impl(
        &_pimpl->_preg, str, _pimpl->_ngroups));
    return regex_matches(pimpl);
}


/// Compiles and matches a regular expression once.
///
/// This is syntactic sugar to simplify the instantiation of a new regex object
/// and its subsequent match on a string.
///
/// \param regex_ The regular expression to compile and match.
/// \param str String to match the regular expression against.
/// \param ngroups Number of capture groups in the regular expression.
/// \param ignore_case Whether to ignore case during matching.
///
/// \return A new regex_matches object with the results of the match.
text::regex_matches
text::match_regex(const std::string& regex_, const std::string& str,
                  const std::size_t ngroups, const bool ignore_case)
{
    return regex::compile(regex_, ngroups, ignore_case).match(str);
}