C ++ 11 regexp and GCC

According to https://gcc.gnu.org/onlinedocs/libstdc++/manual/status.html#status.iso.2011 in GCC, the regexp mechanism of the C ++ 11 standard should be completed. Now, can someone explain to me why is this simple example

#include <iostream>
#include <string>
#include <regex>


using namespace std;


int main ()
{
    string string_array[] = {"http://www.cplusplus.com/reference/regex/regex_match/",
                             "tcp://192.168.2.1:1234/hello/how/are/you",
                             "https://mail.google.com/mail/u/0/?tab=wm#inbox/15178022db56df29?projector=1"};
    regex e("^(?:([A-Za-z]+):)?(\\/{0,3})([0-9.\\-A-Za-z]+)(?::(\\d+))?(?:\\/([^?#]*))?(?:\\?([^#]*))?(?:#(.*))?$");

    for(int i=0; i<3; i++)
    {
        smatch sm;
        regex_match (string_array[i],sm,e);

        for (unsigned i=0; i<sm.size(); ++i)
        {
            cout << "[" << sm[i] << "] ";
        }

        cout << endl;
    }
    return 0;
}

leads to this result (note, for example, an incorrectly parsed port number of the second line, but there seem to be a lot of errors)

[http://www.cplusplus.com/reference/regex/regex_match/] [http] [//] [www.cplusplus.com/reference/regex] [] [regex_match/] [] [] 
[tcp://192.168.2.1:1234/hello/how/are/you] [tcp] [//] [192.168.2.1:1234/hello/how/are/you] [] [] [] [] 
[https://mail.google.com/mail/u/0/?tab=wm#inbox/15178022db56df29?projector=1] [https] [//] [mail.google.com/mail/u/0/?tab=wm] [] [] [] [inbox/15178022db56df29?projector=1] 

while its python python

import re

string_array = ["http://www.cplusplus.com/reference/regex/regex_match/",
                         "tcp://192.168.2.1:1234/hello/how/are/you",
                         "https://mail.google.com/mail/u/0/?tab=wm#inbox/15178022db56df29?projector=1"]
e = re.compile("^(?:([A-Za-z]+):)?(\\/{0,3})([0-9.\\-A-Za-z]+)(?::(\\d+))?(?:\\/([^?#]*))?(?:\\?([^#]*))?(?:#(.*))?$");

for i in range(len(string_array)):
    m = e.match(string_array[i])
    print(m.groups())

prints this correctly?

('http', '//', 'www.cplusplus.com', None, 'reference/regex/regex_match/', None, None)
('tcp', '//', '192.168.2.1', '1234', 'hello/how/are/you', None, None)
('https', '//', 'mail.google.com', None, 'mail/u/0/', 'tab=wm', 'inbox/15178022db56df29?projector=1')

I am using gcc 5.3.0 on archlinux

edit:

I changed the program to this by checking the regex syntax_option_type flag

#include <iostream>
#include <string>
#include <regex>


using namespace std;


int main ()
{
    string string_array[] = {"http://www.cplusplus.com/reference/regex/regex_match/",
                             "tcp://192.168.2.1:1234/hello/how/are/you",
                             "https://mail.google.com/mail/u/0/?tab=wm#inbox/15178022db56df29?projector=1"};
    regex e("^(?:([A-Za-z]+):)?(\\/{0,3})([0-9.\\-A-Za-z]+)(?::(\\d+))?(?:\\/([^?#]*))?(?:\\?([^#]*))?(?:#(.*))?$");

    for(int i=0; i<3; i++)
    {
        smatch sm;
        cout << "match: " <<regex_match (string_array[i],sm,e) << endl;

        for (unsigned i=0; i<sm.size(); ++i)
        {
            cout << "[" << sm[i].str() << "] ";
        }
    }

    cout << endl;

    switch(e.flags())
    {
        case regex_constants::basic:
            cout << "POSIX syntax was used" << endl;
            break;
        case regex_constants::awk:
            cout << "POSIX awk syntax was used" << endl;
            break;
        case regex_constants::ECMAScript:
            cout << "ECMA syntax was used" << endl;
            break;
        case regex_constants::egrep:
            cout << "POSIX egrep syntax was used" << endl;
            break;
    }

    return 0;
}

and unexpectedly I get in the end

match: 1
[http://www.cplusplus.com/reference/regex/regex_match/] [http] [//] [www.cplusplus.com/reference/regex] [] [regex_match/] [] [] match: 1
[tcp://192.168.2.1:1234/hello/how/are/you] [tcp] [//] [192.168.2.1:1234/hello/how/are/you] [] [] [] [] match: 1
[https://mail.google.com/mail/u/0/?tab=wm#inbox/15178022db56df29?projector=1] [https] [//] [mail.google.com/mail/u/0/?tab=wm] [] [] [] [inbox/15178022db56df29?projector=1] 
ECMA syntax was used

this really seems to be a compiler error.

+4
source share
2 answers

:

groups() Python ( ), 1:

, , 1 , .

match_results , 0- ( ):

empty sub_match: sub_match , - , (.. ), sub_match match_results .

\ 3 , , . A (= [.-A] ).

escape- POSIX, . , (, [0-9.A-Za-z-]+).

, Python

e = re.compile("^(?:([A-Za-z]+):)?(\\/{0,3})([0-9.A-Za-z-]+)(?::(\\d+))?(?:\\/([^?#]*))?(?:\\?([^#]*))?(?:#(.*))?$");

++:

regex e("^(?:([A-Za-z]+):)?(\\/{0,3})([0-9.A-Za-z-]+)(?::(\\d+))?(?:\\/([^?#]*))?(?:\\?([^#]*))?(?:#(.*))?$");
// ...
for(int i=0; i<3; i++)
{
    smatch sm;
    regex_match (string_array[i],sm,e);

    for (unsigned i=1; i<sm.size(); ++i) // Here, start with the second element
    {
        cout << "[" << sm[i] << "] ";
    }

    cout << endl;
}

Python ++.

0

.

cout << "[" << sm[i] << "] ";

cout << "[" << sm[i].str() << "] ";

.

0

Source: https://habr.com/ru/post/1620427/


All Articles