1 // class template regex -*- C++ -*-
 
    3 // Copyright (C) 2013-2014 Free Software Foundation, Inc.
 
    5 // This file is part of the GNU ISO C++ Library.  This library is free
 
    6 // software; you can redistribute it and/or modify it under the
 
    7 // terms of the GNU General Public License as published by the
 
    8 // Free Software Foundation; either version 3, or (at your option)
 
   11 // This library is distributed in the hope that it will be useful,
 
   12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 
   13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
   14 // GNU General Public License for more details.
 
   16 // Under Section 7 of GPL version 3, you are granted additional
 
   17 // permissions described in the GCC Runtime Library Exception, version
 
   18 // 3.1, as published by the Free Software Foundation.
 
   20 // You should have received a copy of the GNU General Public License and
 
   21 // a copy of the GCC Runtime Library Exception along with this program;
 
   22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
   23 // <http://www.gnu.org/licenses/>.
 
   26  *  @file bits/regex.tcc
 
   27  *  This is an internal header file, included by other library headers.
 
   28  *  Do not attempt to use it directly. @headername{regex}
 
   31 // See below __regex_algo_impl to get what this is talking about. The default
 
   32 // value 1 indicated a conservative optimization without giving up worst case
 
   34 #ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
 
   35 #define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
 
   38 namespace std _GLIBCXX_VISIBILITY(default)
 
   42 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   44   // Result of merging regex_match and regex_search.
 
   46   // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
 
   47   // the other one if possible, for test purpose).
 
   49   // That __match_mode is true means regex_match, else regex_search.
 
   50   template<typename _BiIter, typename _Alloc,
 
   51       typename _CharT, typename _TraitsT,
 
   52       _RegexExecutorPolicy __policy,
 
   55     __regex_algo_impl(_BiIter                              __s,
 
   57              match_results<_BiIter, _Alloc>&      __m,
 
   58              const basic_regex<_CharT, _TraitsT>& __re,
 
   59              regex_constants::match_flag_type     __flags)
 
   61       if (__re._M_automaton == nullptr)
 
   64       typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
 
   66       __res.resize(__re._M_automaton->_M_sub_count() + 2);
 
   67       for (auto& __it : __res)
 
   70       // This function decide which executor to use under given circumstances.
 
   71       // The _S_auto policy now is the following: if a NFA has no
 
   72       // back-references and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
 
   73       // quantifiers (*, +, ?), the BFS executor will be used, other wise
 
   74       // DFS executor. This is because DFS executor has a exponential upper
 
   75       // bound, but better best-case performace. Meanwhile, BFS executor can
 
   76       // effectively prevent from exponential-long time matching (which must
 
   77       // contains many quantifiers), but it's slower in average.
 
   79       // For simple regex, BFS executor could be 2 or more times slower than
 
   82       // Of course, BFS executor cannot handle back-references.
 
   84       if (!__re._M_automaton->_M_has_backref
 
   85      && (__policy == _RegexExecutorPolicy::_S_alternate
 
   86          || __re._M_automaton->_M_quant_count
 
   87        > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
 
   89      _Executor<_BiIter, _Alloc, _TraitsT, false>
 
   90        __executor(__s, __e, __m, __re, __flags);
 
   92        __ret = __executor._M_match();
 
   94        __ret = __executor._M_search();
 
   98      _Executor<_BiIter, _Alloc, _TraitsT, true>
 
   99        __executor(__s, __e, __m, __re, __flags);
 
  101        __ret = __executor._M_match();
 
  103        __ret = __executor._M_search();
 
  107      for (auto __it : __res)
 
  109          __it.first = __it.second = __e;
 
  110      auto& __pre = __res[__res.size()-2];
 
  111      auto& __suf = __res[__res.size()-1];
 
  114          __pre.matched = false;
 
  117          __suf.matched = false;
 
  124          __pre.second = __res[0].first;
 
  125          __pre.matched = (__pre.first != __pre.second);
 
  126          __suf.first = __res[0].second;
 
  128          __suf.matched = (__suf.first != __suf.second);
 
  134 _GLIBCXX_END_NAMESPACE_VERSION
 
  137 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
  139   template<typename _Ch_type>
 
  140   template<typename _Fwd_iter>
 
  141     typename regex_traits<_Ch_type>::string_type
 
  142     regex_traits<_Ch_type>::
 
  143     lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
 
  145       typedef std::ctype<char_type> __ctype_type;
 
  146       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
 
  148       static const char* __collatenames[] =
 
  241      "left-square-bracket",
 
  243      "right-square-bracket",
 
  273      "left-curly-bracket",
 
  275      "right-curly-bracket",
 
  282       //static const char* __digraphs[] =
 
  308       std::string __s(__last - __first, '?');
 
  309       __fctyp.narrow(__first, __last, '?', &*__s.begin());
 
  311       for (unsigned int __i = 0; *__collatenames[__i]; __i++)
 
  312    if (__s == __collatenames[__i])
 
  313      return string_type(1, __fctyp.widen(static_cast<char>(__i)));
 
  315       //for (unsigned int __i = 0; *__digraphs[__i]; __i++)
 
  317       //    const char* __now = __digraphs[__i];
 
  320       //   string_type ret(__s.size(), __fctyp.widen('?'));
 
  321       //   __fctyp.widen(__now, __now + 2/* ouch */, &*ret.begin());
 
  325       return string_type();
 
  328   template<typename _Ch_type>
 
  329   template<typename _Fwd_iter>
 
  330     typename regex_traits<_Ch_type>::char_class_type
 
  331     regex_traits<_Ch_type>::
 
  332     lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
 
  334       typedef std::ctype<char_type> __ctype_type;
 
  335       typedef std::ctype<char> __cctype_type;
 
  336       typedef const pair<const char*, char_class_type> _ClassnameEntry;
 
  337       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
 
  338       const __cctype_type& __cctyp(use_facet<__cctype_type>(_M_locale));
 
  340       static _ClassnameEntry __classnames[] =
 
  342    {"d", ctype_base::digit},
 
  343    {"w", {ctype_base::alnum, _RegexMask::_S_under}},
 
  344    {"s", ctype_base::space},
 
  345    {"alnum", ctype_base::alnum},
 
  346    {"alpha", ctype_base::alpha},
 
  347    {"blank", {0, _RegexMask::_S_blank}},
 
  348    {"cntrl", ctype_base::cntrl},
 
  349    {"digit", ctype_base::digit},
 
  350    {"graph", ctype_base::graph},
 
  351    {"lower", ctype_base::lower},
 
  352    {"print", ctype_base::print},
 
  353    {"punct", ctype_base::punct},
 
  354    {"space", ctype_base::space},
 
  355    {"upper", ctype_base::upper},
 
  356    {"xdigit", ctype_base::xdigit},
 
  359       std::string __s(__last - __first, '?');
 
  360       __fctyp.narrow(__first, __last, '?', &__s[0]);
 
  361       __cctyp.tolower(&*__s.begin(), &*__s.begin() + __s.size());
 
  362       for (_ClassnameEntry* __it = __classnames;
 
  363       __it < *(&__classnames + 1);
 
  366      if (__s == __it->first)
 
  370               & (ctype_base::lower | ctype_base::upper)) != 0))
 
  371        return ctype_base::alpha;
 
  378   template<typename _Ch_type>
 
  380     regex_traits<_Ch_type>::
 
  381     isctype(_Ch_type __c, char_class_type __f) const
 
  383       typedef std::ctype<char_type> __ctype_type;
 
  384       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
 
  386       return __fctyp.is(__f._M_base, __c)
 
  388    || ((__f._M_extended & _RegexMask::_S_under)
 
  389        && __c == __fctyp.widen('_'))
 
  391    || ((__f._M_extended & _RegexMask::_S_blank)
 
  392        && (__c == __fctyp.widen(' ')
 
  393        || __c == __fctyp.widen('\t')));
 
  396   template<typename _Ch_type>
 
  398     regex_traits<_Ch_type>::
 
  399     value(_Ch_type __ch, int __radix) const
 
  401       std::basic_istringstream<char_type> __is(string_type(1, __ch));
 
  405       else if (__radix == 16)
 
  408       return __is.fail() ? -1 : __v;
 
  411   template<typename _Bi_iter, typename _Alloc>
 
  412   template<typename _Out_iter>
 
  413     _Out_iter match_results<_Bi_iter, _Alloc>::
 
  414     format(_Out_iter __out,
 
  415       const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
 
  416       const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
 
  417       match_flag_type __flags) const
 
  419       _GLIBCXX_DEBUG_ASSERT( ready() );
 
  420       regex_traits<char_type> __traits;
 
  421       typedef std::ctype<char_type> __ctype_type;
 
  423    __fctyp(use_facet<__ctype_type>(__traits.getloc()));
 
  425       auto __output = [&](size_t __idx)
 
  427      auto& __sub = _Base_type::operator[](__idx);
 
  429        __out = std::copy(__sub.first, __sub.second, __out);
 
  432       if (__flags & regex_constants::format_sed)
 
  434      for (; __fmt_first != __fmt_last;)
 
  435        if (*__fmt_first == '&')
 
  440        else if (*__fmt_first == '\\')
 
  442        if (++__fmt_first != __fmt_last
 
  443            && __fctyp.is(__ctype_type::digit, *__fmt_first))
 
  444          __output(__traits.value(*__fmt_first++, 10));
 
  449          *__out++ = *__fmt_first++;
 
  455          auto __next = std::find(__fmt_first, __fmt_last, '$');
 
  456          if (__next == __fmt_last)
 
  459          __out = std::copy(__fmt_first, __next, __out);
 
  461          auto __eat = [&](char __ch) -> bool
 
  471          if (++__next == __fmt_last)
 
  478        __output(_Base_type::size()-2);
 
  479          else if (__eat('\''))
 
  480        __output(_Base_type::size()-1);
 
  481          else if (__fctyp.is(__ctype_type::digit, *__next))
 
  483          long __num = __traits.value(*__next, 10);
 
  484          if (++__next != __fmt_last
 
  485              && __fctyp.is(__ctype_type::digit, *__next))
 
  488              __num += __traits.value(*__next++, 10);
 
  490          if (0 <= __num && __num < this->size())
 
  495          __fmt_first = __next;
 
  497      __out = std::copy(__fmt_first, __fmt_last, __out);
 
  502   template<typename _Out_iter, typename _Bi_iter,
 
  503       typename _Rx_traits, typename _Ch_type>
 
  505     regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
 
  506          const basic_regex<_Ch_type, _Rx_traits>& __e,
 
  507          const _Ch_type* __fmt,
 
  508          regex_constants::match_flag_type __flags)
 
  510       typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
 
  511       _IterT __i(__first, __last, __e, __flags);
 
  515      if (!(__flags & regex_constants::format_no_copy))
 
  516        __out = std::copy(__first, __last, __out);
 
  520      sub_match<_Bi_iter> __last;
 
  521      auto __len = char_traits<_Ch_type>::length(__fmt);
 
  522      for (; __i != __end; ++__i)
 
  524          if (!(__flags & regex_constants::format_no_copy))
 
  525        __out = std::copy(__i->prefix().first, __i->prefix().second,
 
  527          __out = __i->format(__out, __fmt, __fmt + __len, __flags);
 
  528          __last = __i->suffix();
 
  529          if (__flags & regex_constants::format_first_only)
 
  532      if (!(__flags & regex_constants::format_no_copy))
 
  533        __out = std::copy(__last.first, __last.second, __out);
 
  538   template<typename _Bi_iter,
 
  542     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
 
  543     operator==(const regex_iterator& __rhs) const
 
  545       return (_M_match.empty() && __rhs._M_match.empty())
 
  546    || (_M_begin == __rhs._M_begin
 
  547        && _M_end == __rhs._M_end
 
  548        && _M_pregex == __rhs._M_pregex
 
  549        && _M_flags == __rhs._M_flags
 
  550        && _M_match[0] == __rhs._M_match[0]);
 
  553   template<typename _Bi_iter,
 
  556     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
 
  557     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
 
  560       // In all cases in which the call to regex_search returns true,
 
  561       // match.prefix().first shall be equal to the previous value of
 
  562       // match[0].second, and for each index i in the half-open range
 
  563       // [0, match.size()) for which match[i].matched is true,
 
  564       // match[i].position() shall return distance(begin, match[i].first).
 
  566       if (_M_match[0].matched)
 
  568      auto __start = _M_match[0].second;
 
  569      auto __prefix_first = _M_match[0].second;
 
  570      if (_M_match[0].first == _M_match[0].second)
 
  572          if (__start == _M_end)
 
  574          _M_match = value_type();
 
  579          if (regex_search(__start, _M_end, _M_match, *_M_pregex,
 
  581                   | regex_constants::match_not_null
 
  582                   | regex_constants::match_continuous))
 
  584              _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
 
  585              auto& __prefix = _M_match.at(_M_match.size());
 
  586              __prefix.first = __prefix_first;
 
  587              __prefix.matched = __prefix.first != __prefix.second;
 
  589              _M_match._M_begin = _M_begin;
 
  596      _M_flags |= regex_constants::match_prev_avail;
 
  597      if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
 
  599          _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
 
  600          auto& __prefix = _M_match.at(_M_match.size());
 
  601          __prefix.first = __prefix_first;
 
  602          __prefix.matched = __prefix.first != __prefix.second;
 
  604          _M_match._M_begin = _M_begin;
 
  607        _M_match = value_type();
 
  612   template<typename _Bi_iter,
 
  615     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
 
  616     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
 
  617     operator=(const regex_token_iterator& __rhs)
 
  619       _M_position = __rhs._M_position;
 
  620       _M_subs = __rhs._M_subs;
 
  622       _M_suffix = __rhs._M_suffix;
 
  623       _M_has_m1 = __rhs._M_has_m1;
 
  624       _M_normalize_result();
 
  628   template<typename _Bi_iter,
 
  632     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
 
  633     operator==(const regex_token_iterator& __rhs) const
 
  635       if (_M_end_of_seq() && __rhs._M_end_of_seq())
 
  637       if (_M_suffix.matched && __rhs._M_suffix.matched
 
  638      && _M_suffix == __rhs._M_suffix)
 
  640       if (_M_end_of_seq() || _M_suffix.matched
 
  641      || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
 
  643       return _M_position == __rhs._M_position
 
  644    && _M_n == __rhs._M_n
 
  645    && _M_subs == __rhs._M_subs;
 
  648   template<typename _Bi_iter,
 
  651     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
 
  652     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
 
  655       _Position __prev = _M_position;
 
  656       if (_M_suffix.matched)
 
  657    *this = regex_token_iterator();
 
  658       else if (_M_n + 1 < _M_subs.size())
 
  661      _M_result = &_M_current_match();
 
  667      if (_M_position != _Position())
 
  668        _M_result = &_M_current_match();
 
  669      else if (_M_has_m1 && __prev->suffix().length() != 0)
 
  671          _M_suffix.matched = true;
 
  672          _M_suffix.first = __prev->suffix().first;
 
  673          _M_suffix.second = __prev->suffix().second;
 
  674          _M_result = &_M_suffix;
 
  677        *this = regex_token_iterator();
 
  682   template<typename _Bi_iter,
 
  686     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
 
  687     _M_init(_Bi_iter __a, _Bi_iter __b)
 
  690       for (auto __it : _M_subs)
 
  696       if (_M_position != _Position())
 
  697    _M_result = &_M_current_match();
 
  700      _M_suffix.matched = true;
 
  701      _M_suffix.first = __a;
 
  702      _M_suffix.second = __b;
 
  703      _M_result = &_M_suffix;
 
  709 _GLIBCXX_END_NAMESPACE_VERSION