Std :: regex and ignoring flags

After learning basic C ++ , I specialized in focusing on std::regexcreating two console applications: 1. renremand 2 bfind..
And I decided to create some convenient functions for working with regexin as simple as possible plus everything with std; named RFC (= regular expression function)

There are a few strange things that always make me wonder, but it ruined all my attempts and these two console applications.

One of the important functions is count_matchthat counts the number of matches within a string. Here is the complete code:

unsigned int count_match( const std::string& user_string, const std::string& user_pattern, const std::string& flags = "o" ){

    const bool flags_has_i = flags.find( "i" ) < flags.size();
    const bool flags_has_g = flags.find( "g" ) < flags.size();

    std::regex::flag_type regex_flag                  = flags_has_i ? std::regex_constants::icase         : std::regex_constants::ECMAScript;
//    std::regex_constants::match_flag_type search_flag = flags_has_g ? std::regex_constants::match_default : std::regex_constants::format_first_only;
    std::regex rx( user_pattern, regex_flag );
    std::match_results< std::string::const_iterator > mr;

    unsigned int counter = 0;
    std::string temp = user_string;
    while( std::regex_search( temp, mr, rx ) ){
        temp = mr.suffix().str();
        ++counter;
    }

    if( flags_has_g ){
        return counter;
    } else {
        if( counter >= 1 ) return 1;
        else               return 0;
    }

}  

, , search_flag , std::regex_search , ?, - std::regex_repalce. std::regex_search format_first_only, std::regex_replace . .

, icase , []. , capital letter small letter: [A-Z] [A-Z]

, s = "ONE TWO THREE four five six seven"

std

std::cout << count_match( s, "[A-Z]+" ) << '\n';          // 1 => First match
std::cout << count_match( s, "[A-Z]+", "g" ) << '\n';     // 3 => Global match
std::cout << count_match( s, "[A-Z]+", "gi" ) << '\n';    // 3 => Global match plus insensitive  

perl laugauge boost :

std::cout << count_match( s, "[A-Z]+" ) << '\n';          // 1 => First match
std::cout << count_match( s, "[A-Z]+", "g" ) << '\n';     // 3 => Global match
std::cout << count_match( s, "[A-Z]+", "gi" ) << '\n';    // 7 => Global match plus insensitive  

regex PCRE; ECMAScript 262, ++, ides , , ++? std::regex_iterator std::regex_token_iterator .

RFC std, !

, - , ECMAScript 262 , , , , . .


gcc version 6.3.0 20170519 (Ubuntu/Linaro 6.3.0-18ubuntu2~16.04)
clang version 3.8.0-2ubuntu4  

:

perl -le '++$c while $ARGV[0] =~ m/[A-Z]+/g; print $c ;' "ONE TWO THREE four five six seven" // 3
perl -le '++$c while $ARGV[0] =~ m/[A-Z]+/gi; print $c ;' "ONE TWO THREE four five six seven" // 7  

code:

uint count_match( ref const (char[]) user_string, const (char[]) user_pattern, const (char[]) flags ){

    const bool flag_has_g = flags.indexOf( "g" ) != -1;

    Regex!( char ) rx = regex( user_pattern, flags );
    uint counter = 0;
    foreach( mr; matchAll( user_string, rx ) ){
        ++counter;
    }

    if( flag_has_g ){
        return counter;
    } else {
        if( counter >= 1 ) return 1;
        else               return 0;
    }
} 

:

writeln( count_match( s, "[A-Z]+", "g" ) );  // 3
writeln( count_match( s, "[A-Z]+", "gi" ) ); // 7  

code:

var s = "ONE TWO THREE four five six seven";

var rx1 = new RegExp( "[A-Z]+" , "g" );
var rx2 = new RegExp( "[A-Z]+" , "gi" );

var counter = 0;
while( rx1.exec( s ) ){
   ++counter;
}
document.write( counter + "<br>" ); // 3

counter = 0;
while( rx2.exec( s ) ){
   ++counter;
}
document.write( counter ); // 7
Hide result

. gcc 7.1.0 , 6.3.0 : 1 3 3, 7.1.0 1 3 7 .

clang . . igor-tandetnik

0
2

, , ECMAScript, Igor Tandetnik, gcc 7.1.0 .

:

std::cout << ( rx.flags() & std::regex_constants::icase == std::regex_constants::icase ? "yes" : "no" ) << '\n';  

, icase , true, false. , . gcc 7.1.0
gcc 7.1.0 .

clang , clang 3.8.0 . 3.7.1 .

clang 3.8.0 :

std::cout << count_match( s, "[A-Z]+" ) << '\n';          // 1 => First match
std::cout << count_match( s, "[A-Z]+", "g" ) << '\n';     // 3 => Global match
std::cout << count_match( s, "[A-Z]+", "gi" ) << '\n';    // 7 => Global match plus insensitive

enter image description here

, - clang 3.2 . .

, ,

0

, , search_flag , std:: regex_search, ? - std:: regex_repalce.

format_first_only. "". regex_replace " ", , " ".

regex_match regex_search ; ( regex_match ). , , ; , .


, icase , → []. , : [A-Z] [a-z]

icase , , ​​ .

  • , ​​ libstd++ GCC 6.3 ( 2016) GCC 7.1 ( 2017 .).
  • , ​​ lib++ Clang 3.2 ( 2012) Clang 3.3 ( 2013 .).
0

Source: https://habr.com/ru/post/1685290/


All Articles