I have a very simple path design that I am trying to analyze using boost spirit.lex.
We have the following grammar:
token := [az]+ path := (token : path) | (token)
So, we are just talking about lowercase ASCII strings.
I have three examples: "xyz", "abc: xyz", "abc: xyz:".
The first two should be considered valid. A third that has a hind gut should not be considered valid. Unfortunately, I recognize the three parsers as valid. Grammar should not allow an empty token, but, apparently, the spirit does just that. What am I missing to let go of the third?
In addition, if you read the code below, in the comments there is another version of the parser, which requires that all paths end in half-columns. I can get the appropriate behavior when I activate these lines (that is, the deviation is "abc: xyz :;"), but this is not quite what I want.
Does anyone have any ideas?
Thanks.
#include <boost/config/warning_disable.hpp> #include <boost/spirit/include/qi.hpp> #include <boost/spirit/include/lex_lexertl.hpp> #include <boost/spirit/include/phoenix_operator.hpp> #include <iostream> #include <string> using namespace boost::spirit; using boost::phoenix::val; template<typename Lexer> struct PathTokens : boost::spirit::lex::lexer<Lexer> { PathTokens() { identifier = "[az]+"; separator = ":"; this->self.add (identifier) (separator) (';') ; } boost::spirit::lex::token_def<std::string> identifier, separator; }; template <typename Iterator> struct PathGrammar : boost::spirit::qi::grammar<Iterator> { template <typename TokenDef> PathGrammar(TokenDef const& tok) : PathGrammar::base_type(path) { using boost::spirit::_val; path = (token >> tok.separator >> path)[std::cerr << _1 << "\n"] | //(token >> ';')[std::cerr << _1 << "\n"] (token)[std::cerr << _1 << "\n"] ; token = (tok.identifier) [_val=_1] ; } boost::spirit::qi::rule<Iterator> path; boost::spirit::qi::rule<Iterator, std::string()> token; }; int main() { typedef std::string::iterator BaseIteratorType; typedef boost::spirit::lex::lexertl::token<BaseIteratorType, boost::mpl::vector<std::string> > TokenType; typedef boost::spirit::lex::lexertl::lexer<TokenType> LexerType; typedef PathTokens<LexerType>::iterator_type TokensIterator; typedef std::vector<std::string> Tests; Tests paths; paths.push_back("abc"); paths.push_back("abc:xyz"); paths.push_back("abc:xyz:"); /* paths.clear(); paths.push_back("abc;"); paths.push_back("abc:xyz;"); paths.push_back("abc:xyz:;"); */ for ( Tests::iterator iter = paths.begin(); iter != paths.end(); ++iter ) { std::string str = *iter; std::cerr << "*****" << str << "*****\n"; PathTokens<LexerType> tokens; PathGrammar<TokensIterator> grammar(tokens); BaseIteratorType first = str.begin(); BaseIteratorType last = str.end(); bool r = boost::spirit::lex::tokenize_and_parse(first, last, tokens, grammar); std::cerr << r << " " << (first==last) << "\n"; } }
source share