// Copyright (c) 2001-2010 Hartmut Kaiser // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // This example is the equivalent to the following lex program: /* //[wcp_flex_version %{ int c = 0, w = 0, l = 0; %} word [^ \t\n]+ eol \n %% {word} { ++w; c += yyleng; } {eol} { ++c; ++l; } . { ++c; } %% main() { yylex(); printf("%d %d %d\n", l, w, c); } //] */ // Its purpose is to do the word count function of the wc command in UNIX. It // prints the number of lines, words and characters in a file. // // The example additionally demonstrates how to use the add_pattern(...)(...) // syntax to define lexer patterns. These patterns are essentially parameter- // less 'macros' for regular expressions, allowing to simplify their // definition. // #define BOOST_SPIRIT_LEXERTL_DEBUG #define BOOST_VARIANT_MINIMIZE_SIZE #include //[wcp_includes #include #include #include #include #include //] #include #include #include "example.hpp" //[wcp_namespaces using namespace boost::spirit; using namespace boost::spirit::ascii; //] /////////////////////////////////////////////////////////////////////////////// // Token definition: We use the lexertl based lexer engine as the underlying // lexer type. /////////////////////////////////////////////////////////////////////////////// //[wcp_token_ids enum tokenids { IDANY = lex::min_token_id + 10 }; //] //[wcp_token_definition template struct word_count_tokens : lex::lexer { word_count_tokens() { // define patterns (lexer macros) to be used during token definition // below this->self.add_pattern ("WORD", "[^ \t\n]+") ; // define tokens and associate them with the lexer word = "{WORD}"; // reference the pattern 'WORD' as defined above // this lexer will recognize 3 token types: words, newlines, and // everything else this->self.add (word) // no token id is needed here ('\n') // characters are usable as tokens as well (".", IDANY) // string literals will not be esacped by the library ; } // the token 'word' exposes the matched string as its parser attribute lex::token_def word; }; //] /////////////////////////////////////////////////////////////////////////////// // Grammar definition /////////////////////////////////////////////////////////////////////////////// //[wcp_grammar_definition template struct word_count_grammar : qi::grammar { template word_count_grammar(TokenDef const& tok) : word_count_grammar::base_type(start) , c(0), w(0), l(0) { using boost::phoenix::ref; using boost::phoenix::size; start = *( tok.word [++ref(w), ref(c) += size(_1)] | lit('\n') [++ref(c), ++ref(l)] | qi::token(IDANY) [++ref(c)] ) ; } std::size_t c, w, l; qi::rule start; }; //] /////////////////////////////////////////////////////////////////////////////// //[wcp_main int main(int argc, char* argv[]) { /*< Define the token type to be used: `std::string` is available as the type of the token attribute >*/ typedef lex::lexertl::token< char const*, boost::mpl::vector > token_type; /*< Define the lexer type to use implementing the state machine >*/ typedef lex::lexertl::lexer lexer_type; /*< Define the iterator type exposed by the lexer type >*/ typedef word_count_tokens::iterator_type iterator_type; // now we use the types defined above to create the lexer and grammar // object instances needed to invoke the parsing process word_count_tokens word_count; // Our lexer word_count_grammar g (word_count); // Our parser // read in the file int memory std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1])); char const* first = str.c_str(); char const* last = &first[str.size()]; /*< Parsing is done based on the the token stream, not the character stream read from the input. The function `tokenize_and_parse()` wraps the passed iterator range `[first, last)` by the lexical analyzer and uses its exposed iterators to parse the toke stream. >*/ bool r = lex::tokenize_and_parse(first, last, word_count, g); if (r) { std::cout << "lines: " << g.l << ", words: " << g.w << ", characters: " << g.c << "\n"; } else { std::string rest(first, last); std::cerr << "Parsing failed\n" << "stopped at: \"" << rest << "\"\n"; } return 0; } //]