// Copyright (c) 2001-2010 Hartmut Kaiser // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // This example is the equivalent to the following lex program: // // %{ // /* INITIAL is the default start state. COMMENT is our new */ // /* state where we remove comments. */ // %} // // %s COMMENT // %% // "//".* ; // "/*" BEGIN COMMENT; // . ECHO; // [\n] ECHO; // "*/" BEGIN INITIAL; // . ; // [\n] ; // %% // // main() // { // yylex(); // } // // Its purpose is to strip comments out of C code. // // Additionally this example demonstrates the use of lexer states to structure // the lexer definition. // #define BOOST_SPIRIT_LEXERTL_DEBUG #include #include #include #include #include #include #include #include "example.hpp" using namespace boost::spirit; /////////////////////////////////////////////////////////////////////////////// // Token definition: We use the lexertl based lexer engine as the underlying // lexer type. /////////////////////////////////////////////////////////////////////////////// enum tokenids { IDANY = lex::min_token_id + 10, IDEOL = lex::min_token_id + 11 }; /////////////////////////////////////////////////////////////////////////////// // Simple custom semantic action function object used to print the matched // input sequence for a particular token template struct echo_input_functor { echo_input_functor (std::basic_ostream& os_) : os(os_) {} // This is called by the semantic action handling code during the lexing template void operator()(Iterator const& b, Iterator const& e , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)& , std::size_t&, Context&) const { os << std::string(b, e); } std::basic_ostream& os; }; template inline echo_input_functor echo_input(std::basic_ostream& os) { return echo_input_functor(os); } /////////////////////////////////////////////////////////////////////////////// // Another simple custom semantic action function object used to switch the // state of the lexer struct set_lexer_state { set_lexer_state(char const* state_) : state(state_) {} // This is called by the semantic action handling code during the lexing template void operator()(Iterator const&, Iterator const& , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)& , std::size_t&, Context& ctx) const { ctx.set_state_name(state.c_str()); } std::string state; }; /////////////////////////////////////////////////////////////////////////////// template struct strip_comments_tokens : lex::lexer { strip_comments_tokens() : strip_comments_tokens::base_type(lex::match_flags::match_default) { // define tokens and associate them with the lexer cppcomment = "\\/\\/[^\n]*"; // '//[^\n]*' ccomment = "\\/\\*"; // '/*' endcomment = "\\*\\/"; // '*/' any = "."; eol = "\n"; // The following tokens are associated with the default lexer state // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is // strictly optional. this->self = cppcomment | ccomment [ set_lexer_state("COMMENT") ] | eol [ echo_input(std::cout) ] | any [ echo_input(std::cout) ] ; // The following tokens are associated with the lexer state 'COMMENT'. this->self("COMMENT") = endcomment [ set_lexer_state("INITIAL") ] | "\n" | "." ; } lex::token_def<> cppcomment, ccomment, endcomment, any, eol; }; /////////////////////////////////////////////////////////////////////////////// int main(int argc, char* argv[]) { // iterator type used to expose the underlying input stream typedef std::string::iterator base_iterator_type; // lexer type typedef lex::lexertl::actor_lexer > lexer_type; // now we use the types defined above to create the lexer and grammar // object instances needed to invoke the parsing process strip_comments_tokens strip_comments; // Our lexer // No parsing is done alltogether, everything happens in the lexer semantic // actions. std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1])); base_iterator_type first = str.begin(); bool r = lex::tokenize(first, str.end(), strip_comments); if (!r) { std::string rest(first, str.end()); std::cerr << "Lexical analysis failed\n" << "stopped at: \"" << rest << "\"\n"; } return 0; }