【问题】
antlr v3,用代码:
grammar preprocess;
//lexer grammar preprocess;
options{
	language=Java;
	output = AST;
}
@lexer::header {
//package com.mm.antlrv3demo;
import java.io.*;
import java.util.*;
}
@parser::header {
//package com.mm.antlrv3demo;
}
@lexer::members {
    //public static TokenStreamSelector selector; // must be assigned externally
    protected static Integer ifState = 1; // -1: no-else false, 0:false, 1: true
    protected static List ifStates = new ArrayList(); // holds nested if conditions
    protected static Map defines = new Hashtable(); // holds the defines
    protected Map defineArgs = new Hashtable(); // holds the args for a macro call
    /*
    public void uponEOF() throws TokenStreamException, CharStreamException {
        try {
            selector.pop(); // return to old lexer/stream
            selector.retry();
        } catch (NoSuchElementException e) {
            // return a real EOF if nothing in stack
        }
    }
    */
	
	class SaveStruct {
      SaveStruct(CharStream input){
        this.input = input;
        this.marker = input.mark();
      }
      public CharStream input;
      public int marker;
     }
     Stack<SaveStruct> includes = new Stack<SaveStruct>();
     
	// class SaveStruct_defines {
      // SaveStruct(CharStream input){
        // this.input = input;
        // this.marker = input.mark();
      // }
      // public CharStream input;
      // public int marker;
     // }
     // Stack<SaveStruct_defines> definesSaveStruct = new Stack<SaveStruct_defines>();
 
    // We should override this method for handling EOF of included file
     public Token nextToken(){
       Token token = super.nextToken();
 
       if(token.getType() == Token.EOF && !includes.empty()){
        // We've got EOF and have non empty stack.
         SaveStruct ss = includes.pop();
         setCharStream(ss.input);
         input.rewind(ss.marker);
         //this should be used instead of super [like below] to handle exits from nested includes
         //it matters, when the 'include' token is the last in previous stream (using super, lexer 'crashes' returning EOF token)
         token = this.nextToken();
       }
 
      // Skip first token after switching on another input.
      // You need to use this rather than super as there may be nested include files
       if(((CommonToken)token).getStartIndex() < 0)
         token = this.nextToken();
 
       return token;
     }
}
COMMENT
    :   ('//' ~('\n'|'\r')* '\r'? '\n') {skip();}
    |   ('/*' ( options {greedy=false;} : . )* '*/') {skip();}
    ;
// and lexer rule
INCLUDE    :    '#include' (WS)? f=STRING 
{
    String name = f.getText();
    name = name.substring(1,name.length()-1);
    try {
        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        includes.push(ss);
 
        // switch on new input stream
        setCharStream(new ANTLRFileStream(name));
        reset();
    } catch(Exception fnf) { throw new Error("Cannot open file " + name); }
};
/*
fragment
NON_CR_LF	:	~('\r'|'\n');
fragment
TAB_SPACE
	:	(' ' | '\t');
*/
//DIRECTIVE 	:	('#define' WS* defineMacro=ID WS* defineText=STRING)
//DIRECTIVE 	:	('#define' WS* defineMacro=ID WS* defineText=( NON_CR_LF+ | (NON_CR_LF* (TAB_SPACE+ '\\' '\r'? '\n' NON_CR_LF+)*) ) )
fragment
//MACRO_TEXT :    ( (('\\'){skip();System.out.println("skip line tail back slash");} '\r'? '\n')
//MACRO_TEXT :    ( ('\\'{$channel=HIDDEN;System.out.println("set back slash to hidden");} '\r'? '\n')
//MACRO_TEXT :    ( (('\\'){setText("");System.out.println("set back slash to empty");} '\r'? '\n')
MACRO_TEXT :    (('\\' '\r'? '\n') | (~('\r'|'\n')))*;
//MACRO_TEXT :    (('\\' '\r'? '\n') | (~('\n')))*;
//MACRO_TEXT :    (('\\' '\n') | (~('\n')))*;
//MACRO_TEXT :    (('\\' '\n') | (~('\n' | '\r')))*;
//MACRO_TEXT :    ( ('\\' '\r'? '\n') | (~('\r'|'\n')))* -> ( ('\r'? '\n') | (~('\r'|'\n')))*;
//MACRO_TEXT :    (('\\'{setText("");} '\r'? '\n') | (~('\r'|'\n')))*;
/*
MACRO_TEXT :    ((('\\' '\r'? '\n') | (~('\r'|'\n')))*)
	{
		String origMultiLineStr = getText();
		String newMultiLineStr = origMultiLineStr.replace("\\", "");
		setText(newMultiLineStr);
	};
*/
//MACRO_TEXT :    ( (('\\' '\r'? '\n')=>('\r' '\n')) | (~('\r'|'\n')))*;
DIRECTIVE
@init{
    List args = new ArrayList();
    boolean condition = true;
    
    String arg0Text = "";
    String arg1Text = "";
    String definedContent = "";
    String defineId = "";
}
:	('#define' WS* defineMacro=RAW_IDENTIFIER
    {
        args.add(""); // first element will hold the macro text
    }
        (
            ( '(' // get arguments if you find them (no spaces before left paren)
                (WS)? defineArg0=RAW_IDENTIFIER (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);}
                ( ',' (WS)? defineArg1=RAW_IDENTIFIER (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
              ')'
            | ' '|'\t'|'\f'
            )
            ( options{greedy=true;}: ' '|'\t'|'\f' )*
            // store the text verbatim - tokenize when called
            macroText=MACRO_TEXT 
            {
                definedContent = macroText.getText();
                definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
                args.set(0, definedContent);
                
        
	        //process the define content, to check whether it contain the previous define
	        //if yes, then process it
	        
	        // save current lexer's state
	        SaveStruct ss = new SaveStruct(input);
	        includes.push(ss);
	        // switch on new input stream
	        setCharStream(new ANTLRStringStream(definedContent));
	        reset();
            }
        )? '\r'? '\n'
    {
        defineId = defineMacro.getText();
        defines.put(defineId, args );
        skip();
    }
    );
IDENTIFIER @init{
    List define = new ArrayList();
    List foundArgs = new ArrayList();
    
    String callArg0Text = "";
    String callArg1Text = "";
} :
    identifier=RAW_IDENTIFIER
    {
        // see if this is a macro argument
        define = (List)defineArgs.get(identifier.getText());
        if (define==null) {
            // see if this is a macro call
            define = (List)defines.get(identifier.getText());
        }
    }
    ( {(define!=null) && (define.size()>1)}?=> (WS|COMMENT)?
        // take in arguments if macro call requires them
        '('
        callArg0=EXPR
        {
            callArg0Text = callArg0.getText(); 
            foundArgs.add(callArg0Text);
        }
        ( COMMA callArg1=EXPR 
        {
            callArg1Text = callArg1.getText();
            foundArgs.add(callArg1Text);
        }
        )*
        { foundArgs.size()==define.size()-1 }? // better have right amount
        ')'
    | {!((define!=null) && (define.size()>1))}?=>
    )
{
if (define!=null) {
    String defineText = (String)define.get(0);
    if (define.size()==1) {
        //only have one value in list -> the defineText is the define para content -> just need replace directly
        setText(defineText);
    } else {
        //add new dict pair: (para, call value)
        for (int i=0;i<foundArgs.size();++i) {
            // treat macro arguments similar to local defines
            List arg = new ArrayList();
            arg.add((String)foundArgs.get(i));
            defineArgs.put( (String)define.get(1+i), arg );
        }
        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        includes.push(ss);
        // switch on new input stream
        setCharStream(new ANTLRStringStream(defineText));
        reset();
    }
}
};
fragment RAW_IDENTIFIER : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* ;
NUMBER : ('0'..'9') ('0'..'9'|'a'..'z'|'A'..'Z'|'_')* ; // allow ahpha suffixes on numbers (i.e. L:long)
// group symbols into categories to parse EXPR
LEFT  : '(' | '[' | '{' ;
RIGHT : ')' | ']' | '}' ;
COMMA : ',' ;
OPERATOR : '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '\\' | '^' | '`' | '|' | '~' ;
fragment EXPR // allow just about anything without being ambiguous
    : (WS)? (NUMBER|IDENTIFIER)?
            (
                        ( LEFT EXPR ( COMMA EXPR )* RIGHT
            | STRING
            | OPERATOR // quotes, COMMA, LEFT, and RIGHT not in here
            )
            EXPR
        )?
    ;
//INT :	'0'..'9'+    ;
FLOAT
    :   ('0'..'9')+ '.' ('0'..'9')* EXPONENT?
    |   '.' ('0'..'9')+ EXPONENT?
    |   ('0'..'9')+ EXPONENT
    ;
WS  :   ( ' '
        | '\t'
        | '\r'
        | '\n'
        ) {$channel=HIDDEN;}
    ;
//RestSymbo	:	'{' | '}' | '&' | ';' | ',' | '+' | '-' | ')' | '(' | '~' | '/' | '`' | '$' | '@' | '%' | '^' | '#' | '\\' ;
STRING
    :  '"' ( ESC_SEQ | ~('\\'|'"') )* '"'
    ;
CHAR:  '\'' ( ESC_SEQ | ~('\''|'\\') ) '\''
    ;
fragment
EXPONENT : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
fragment
HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
fragment
ESC_SEQ
    :   '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
    |   UNICODE_ESC
    |   OCTAL_ESC
    ;
fragment
OCTAL_ESC
    :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
    |   '\\' ('0'..'7') ('0'..'7')
    |   '\\' ('0'..'7')
    ;
fragment
UNICODE_ESC
    :   '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
    ;
    
header
	:	include*;
include	:	INCLUDE;//'#include ' '<' ID ('.h' | '.ddl') '>';
其中,这部分的代码:
//process the define content, to check whether it contain the previous define //if yes, then process it // save current lexer's state SaveStruct ss = new SaveStruct(input); includes.push(ss); // switch on new input stream setCharStream(new ANTLRStringStream(definedContent)); reset();
是后来加进去,为了处理多层的宏定义的,即,形如:
| #define B C #define A B | 
的内容的。
上面的完整的代码,用于处理内容:
| …… #define get_dev_var_value(a,b,c)    _get_dev_var_value((a),(b),METHODID(c)) //#define GET_DEV_VAR_VALUE(a,b) _get_dev_var_value((a),0,METHODID(b))   { LABEL "Message"; HELP "TEST"; DEFINITION { ……                   …… } } | 
结果变成了:
| line 1:0 mismatched character ‘_’ expecting ‘\n’ line 1:0 mismatched character ‘"’ expecting ‘\n’ line 1:19 mismatched character ‘<EOF>’ expecting ‘"’ line 1:0 mismatched character ‘R’ expecting ‘\n’ line 1:0 mismatched character ‘0’ expecting ‘\n’ line 1:0 mismatched character ‘"’ expecting ‘\n’ line 1:26 mismatched character ‘<EOF>’ expecting ‘"’ 
 
 get_dev_var_value((a),(b),METHODID(c))   { LABEL "Message"; HELP "TEST"; DEFINITION     {                           } } | 
即,多层(嵌套)的宏定义,不仅没有去除,而且结果宏的内容,即MACRO_TEXT的内容,也没有去除。
【解决过程】
1.antlr语法换成:
DIRECTIVE
@init{
    List args = new ArrayList();
    boolean condition = true;
    
    String arg0Text = "";
    String arg1Text = "";
    String definedContent = "";
    String defineId = "";
}
:	('#define' WS* defineMacro=RAW_IDENTIFIER
    {
        args.add(""); // first element will hold the macro text
    }
        (
            ( '(' // get arguments if you find them (no spaces before left paren)
                (WS)? defineArg0=RAW_IDENTIFIER (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);}
                ( ',' (WS)? defineArg1=RAW_IDENTIFIER (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
              ')'
            | ' '|'\t'|'\f'
            )
            ( options{greedy=true;}: ' '|'\t'|'\f' )*
            // store the text verbatim - tokenize when called
            macroText=MACRO_TEXT 
            {
                definedContent = macroText.getText();
                definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
                args.set(0, definedContent);
            }
        )? '\r'? '\n'
    {
        defineId = defineMacro.getText();
        defines.put(defineId, args );
        skip();
        
        //process the define content, to check whether it contain the previous define
        //if yes, then process it
        
        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        includes.push(ss);
        // switch on new input stream
        setCharStream(new ANTLRStringStream(definedContent));
        reset();
    }
    );结果是:
| _get_dev_var_value((a),(b),METHODID(c))_get_dev_var_value((a),(0),METHODID(b)) METHOD message_methode LABEL "Message"; HELP "TEST"; DEFINITION     {                                    } | 
2.再换成:
DIRECTIVE
@init{
    List args = new ArrayList();
    boolean condition = true;
    
    String arg0Text = "";
    String arg1Text = "";
    String definedContent = "";
    String defineId = "";
}
@after{
	//process the define content, to check whether it contain the previous define
	//if yes, then process it
	
// save current lexer's state
	SaveStruct ss = new SaveStruct(input);
	includes.push(ss);
	// switch on new input stream
	setCharStream(new ANTLRStringStream(definedContent));
	reset();
}
:	('#define' WS* defineMacro=RAW_IDENTIFIER
    {
        args.add(""); // first element will hold the macro text
    }
        (
            ( '(' // get arguments if you find them (no spaces before left paren)
                (WS)? defineArg0=RAW_IDENTIFIER (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);}
                ( ',' (WS)? defineArg1=RAW_IDENTIFIER (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
              ')'
            | ' '|'\t'|'\f'
            )
            ( options{greedy=true;}: ' '|'\t'|'\f' )*
            // store the text verbatim - tokenize when called
            macroText=MACRO_TEXT 
            {
                definedContent = macroText.getText();
                definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
                args.set(0, definedContent);
            }
        )? '\r'? '\n'
    {
        defineId = defineMacro.getText();
        defines.put(defineId, args );
        skip();
    }
    );结果是:
|   METHOD message_methode ……                   } } | 
3。又换成:
DIRECTIVE
@init{
    List args = new ArrayList();
    boolean condition = true;
    
    String arg0Text = "";
    String arg1Text = "";
    String definedContent = "";
    String defineId = "";
}
@after{
}
:	('#define' WS* defineMacro=RAW_IDENTIFIER
    {
        args.add(""); // first element will hold the macro text
    }
        (
            ( '(' // get arguments if you find them (no spaces before left paren)
                (WS)? defineArg0=RAW_IDENTIFIER (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);}
                ( ',' (WS)? defineArg1=RAW_IDENTIFIER (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
              ')'
            | ' '|'\t'|'\f'
            )
            ( options{greedy=true;}: ' '|'\t'|'\f' )*
            // store the text verbatim - tokenize when called
            macroText=MACRO_TEXT 
            {
                definedContent = macroText.getText();
                definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
                args.set(0, definedContent);
            }
        )? '\r'? '\n'
    {
        defineId = defineMacro.getText();
        defines.put(defineId, args );
        skip();
    }
    )
    {
	//process the define content, to check whether it contain the previous define
	//if yes, then process it
	
	// save current lexer's state
	SaveStruct ss = new SaveStruct(input);
	includes.push(ss);
	// switch on new input stream
	setCharStream(new ANTLRStringStream(definedContent));
	reset();
    };结果是:
|   METHOD message_methode …… _get_dev_var_value((a),(0),METHODID(b));          } } | 
4.再去试试,用:
IDENTIFIER @init{
    List define = new ArrayList();
    List foundArgs = new ArrayList();
    
    String callArg0Text = "";
    String callArg1Text = "";
} :
    identifier=RAW_IDENTIFIER
    {
        // see if this is a macro argument
        define = (List)defineArgs.get(identifier.getText());
        if (define==null) {
            // see if this is a macro call
            define = (List)defines.get(identifier.getText());
        }
        else
        {
        	//is normal macro replacement
        	System.out.println("normal define call=" + (String)define.get(0));
        }
    }
    ( {(define!=null) && (define.size()>1)}?=> (WS|COMMENT)?
        // take in arguments if macro call requires them
        '('
        callArg0=EXPR
        {
            callArg0Text = callArg0.getText(); 
            foundArgs.add(callArg0Text);
        }
        ( COMMA callArg1=EXPR 
        {
            callArg1Text = callArg1.getText();
            foundArgs.add(callArg1Text);
        }
        )*
        { foundArgs.size()==define.size()-1 }? // better have right amount
        ')'
    | {!((define!=null) && (define.size()>1))}?=>
    )
{
if (define!=null) {
    String defineText = (String)define.get(0);
    if (define.size()==1) {
        //only have one value in list -> the defineText is the define para content -> just need replace directly
        setText(defineText);
    } else {
        //add new dict pair: (para, call value)
        for (int i=0;i<foundArgs.size();++i) {
            // treat macro arguments similar to local defines
            List arg = new ArrayList();
            arg.add((String)foundArgs.get(i));
            defineArgs.put( (String)define.get(1+i), arg );
        }
        // save current lexer's state
        SaveStruct ss = new SaveStruct(input);
        includes.push(ss);
        // switch on new input stream
        setCharStream(new ANTLRStringStream(defineText));
        reset();
    }
}
};处理:
| …… #define get_dev_var_value(d,e,f) _get_dev_var_value((d),(e),METHODID(f)) #define GET_DEV_VAR_VALUE(a,b) get_dev_var_value(a,0,b) //#define GET_DEV_VAR_VALUE(a,b) _get_dev_var_value((a),0,METHODID(b))   { …… GET_DEV_VAR_VALUE("Choose between \n 3, 5, 9, 17 or 33 points \n No of points:", count);          …… } | 
得到:
| normal define call=a normal define call=b normal define call="Choose between \n 3, 5, 9, 17 or 33 points \n No of points:" normal define call= count normal define call=a normal define call=0 normal define call=b normal define call="Choose between \n 3, 5, 9, 17 or 33 points \n No of points:" normal define call= transfer_function normal define call=a normal define call=0 normal define call=b 
 
 _get_dev_var_value((d),(e),METHODID(f))_get_dev_var_value((a),(0),METHODID(b)) METHOD message_methode ……                   } | 
【总结】
截止目前还是没有搞定。