I am building a parser for a command language that I've pieced together from various samples. I've read the Boost Spirit Qi and Lex docs, and I think I understand the basics, but from what I've read, I should avoid attributes and use utree. What docs I've found on utree basically suck. Given the code below, I have the following questions:
- How do I annotate the parser to create an AST using utree?
- How do I walk the utree after it is built, to discover what was parsed? e.g. for token-only commands, such as SET DEBUG ON, as well as commands with values, such as LOAD "file.ext" or SET DRIVE C:
- I want to add a comment character, "!". So, how can I ignore everything after that - except when it occurs in a quoted string?
- Why doesn't my error handler get called when I give it invalid input?
How can I make the command tokens case insensitive, but not change the contents of a quoted string?
#include <Windows.h> #include <conio.h> #include <string> #include <vector> #include <iostream> #define BOOST_SPIRIT_DEBUG #include <boostspiritincludeqi.hpp> #include <boostspiritincludephoenix.hpp> #include <boostspiritincludelex.hpp> #include <boostspiritincludelex_lexertl.hpp> using namespace std; using namespace boost::spirit; using boost::spirit::utree; // // Tokens used by the command grammar // template <typename Lexer> struct command_tokens : lex::lexer <Lexer> { command_tokens () : // // Verbs, with abbreviation (just enough characters to make each unique) // boot ("B(O(O(T)?)?)?"), exit ("E(X(I(T)?)?)?"), help ("H(E(L(P)?)?)?"), dash_help ("-H(E(L(P)?)?)?"), slash_help ("\/H(E(L(P)?)?)?"), load ("L(O(A(D)?)?)?"), quit ("Q(U(I(T)?)?)?"), set ("SE(T)?"), show ("SH(O(W)?)?"), // // Nouns, with abbreviation (the minimum number of characters is usually 3, but may be more to ensure uniqueness) // debug ("DEB(U(G)?)?"), drive ("DRI(V(E)?)?"), trace ("TRA(C(E)?)?"), // // Qualifiers // on ("ON"), off ("OFF"), // // Tokens to pass back to the grammar // quoted_string ("...") { using namespace boost::spirit::lex; // // Associate the tokens with the lexer // this->self = boot | exit | help | dash_help | slash_help | load | quit | set | show | debug | drive | trace | off | on | quoted_string ; // // Define whitespace to ignore: space, tab, newline // this->self ("WS") = lex::token_def <> ("[ \t\n]+") ; } lex::token_def <> boot; lex::token_def <> dash_help; lex::token_def <> debug; lex::token_def <string> drive; lex::token_def <> exit; lex::token_def <> help; lex::token_def <> load; lex::token_def <> off; lex::token_def <> on; lex::token_def <> quit; lex::token_def <string> quoted_string; lex::token_def <> set; lex::token_def <> show; lex::token_def <> slash_help; lex::token_def <> trace; }; // // Display parse error // struct error_handler_ { template <typename, typename, typename> struct result { typedef void type; }; template <typename Iterator> void operator () ( qi::info const& What, Iterator Err_pos, Iterator Last ) const { cout << "Error! Expecting " << What << " here: "" << string (Err_pos, Last) << """ << endl; } }; boost::phoenix::function <error_handler_> const error_handler = error_handler_ (); // // Grammar describing the valid commands // template <typename Iterator, typename Lexer> struct command_grammar : qi::grammar <Iterator> { template <typename Lexer> command_grammar (command_tokens <Lexer> const& Tok) : command_grammar::base_type (start) { using qi::on_error; using qi::fail; using qi::char_; start = +commands; commands = ( boot_command | exit_command | help_command | load_command | set_command | show_command ); boot_command = Tok.boot; exit_command = Tok.exit | Tok.quit; help_command = Tok.help | Tok.dash_help | Tok.slash_help; load_command = Tok.load >> Tok.quoted_string; set_command = Tok.set; show_command = Tok.show; set_property = debug_property | drive_property | trace_property; debug_property = Tok.debug >> on_off; drive_property = Tok.drive >> char_ ("A-Z") >> char_ (":"); trace_property = Tok.trace >> on_off; on_off = Tok.on | Tok.off; BOOST_SPIRIT_DEBUG_NODE (start); BOOST_SPIRIT_DEBUG_NODE (commands); BOOST_SPIRIT_DEBUG_NODE (boot_command); BOOST_SPIRIT_DEBUG_NODE (exit_command); BOOST_SPIRIT_DEBUG_NODE (help_command); BOOST_SPIRIT_DEBUG_NODE (load_command); BOOST_SPIRIT_DEBUG_NODE (quit_command); BOOST_SPIRIT_DEBUG_NODE (set_command); BOOST_SPIRIT_DEBUG_NODE (show_command); BOOST_SPIRIT_DEBUG_NODE (set_property); BOOST_SPIRIT_DEBUG_NODE (debug_property); BOOST_SPIRIT_DEBUG_NODE (drive_property); BOOST_SPIRIT_DEBUG_NODE (trace_property); BOOST_SPIRIT_DEBUG_NODE (target_property); on_error <fail> (start, error_handler (_4, _3, _2)); } qi::rule <Iterator> start; qi::rule <Iterator> commands; qi::rule <Iterator> boot_command; qi::rule <Iterator> exit_command; qi::rule <Iterator> help_command; qi::rule <Iterator> load_command; qi::rule <Iterator> quit_command; qi::rule <Iterator> set_command; qi::rule <Iterator> show_command; qi::rule <Iterator> set_property; qi::rule <Iterator> debug_property; qi::rule <Iterator, string ()> drive_property; qi::rule <Iterator> target_property; qi::rule <Iterator> trace_property; qi::rule <Iterator> on_off; }; int main ( int Argc, PCHAR Argv ) { typedef std::string::iterator base_iterator_type; typedef lex::lexertl::token <base_iterator_type> token_type; typedef lex::lexertl::lexer <token_type> lexer_type; typedef command_tokens <lexer_type> command_tokens; typedef command_tokens::iterator_type iterator_type; typedef command_grammar <iterator_type, command_tokens::lexer_def> command_grammar; command_tokens tokens; command_grammar commands (tokens); string input = "SET DRIVE C:"; string::iterator it = input.begin (); iterator_type iter = tokens.begin (it, input.end ()); iterator_type end = tokens.end (); string ws ("WS"); bool result = lex::tokenize_and_phrase_parse (it, input.end (), tokens, commands, qi::in_state (ws) [tokens.self]); if (result) { cout << "Parse succeeded" << endl; } else { string rest (it, input.end ()); cout << "Parse failed" << endl; cout << "Stopped at " << rest << endl; } return 0; } // End of main