1.真正去实现的时候,可以参考:
Tips on designing a preprocessor for C++ using Antlr
中的例子,去添加对应的action code。
2.参考:
How to do preprocessing in antlr v4?
->
A list of all available downlads at Soft Gems
->
Windows Resource File Parser + Converter
下载到 312KB的rc-converter.zip。
3.又从:
[antlr-interest] completed an ANTLR-based cpp preprocessor (#include, #if, #define, etc)
拷贝了一份代码,供后续参考:
//copy from
//http://www.antlr3.org/pipermail/antlr-interest/2004-July/008778.html
//name to:
//cppLexer.g
// Author: Eric Mahurin
// License: just give me credit
options {
language="Java";
}
{
import java.io.*;
import java.util.*;
import antlr.*;
class cpp implements cppLexerTokenTypes {
public static TokenStreamSelector selector = new TokenStreamSelector();
public static void main(String[] args) {
try {
// will need a stack of lexers for #include and macro calls
cppLexer mainLexer = new cppLexer(new DataInputStream(System.in));
mainLexer.selector = selector;
selector.select(mainLexer);
for (;;) {
Token t = selector.nextToken();
if (t.getType()==Token.EOF_TYPE) break;
System.out.print(t.getText());
}
} catch(Exception e) {
System.err.println("exception: "+e);
}
}
}
}
class cppLexer extends Lexer;
options {
testLiterals = false;
k = 4;
}
tokens {
ENDIF ;
}
{
public static TokenStreamSelector selector; // must be assigned externally
protected static Integer ifState = 1; // -1: no-else false, 0:false, 1: true
protected static List ifStates = new ArrayList(); // holds nested if conditions
protected static Map defines = new Hashtable(); // holds the defines
protected Map defineArgs = new Hashtable(); // holds the args for a macro call
public void uponEOF() throws TokenStreamException, CharStreamException {
try {
selector.pop(); // return to old lexer/stream
selector.retry();
} catch (NoSuchElementException e) {
// return a real EOF if nothing in stack
}
}
}
DIRECTIVE {
List args = new ArrayList();
boolean condition = true;
} : '#'
( "include" (WS)? includeFile:STRING {
if (ifState==1) {
// found this in examples/java/includeFile
String name = includeFile.getText();
name = name.substring(1,name.length()-1);
try {
cppLexer sublexer = new cppLexer(new DataInputStream(new FileInputStream(name)));
sublexer.defines = defines; // want defines to be persistent
sublexer.setFilename(name);
selector.push(sublexer);
selector.retry();
} catch (FileNotFoundException fnf) {
System.err.println("cannot find file "+name);
}
}
}
| "define" WS defineMacro:RAW_IDENTIFIER
{
args.add(""); // first element will hold the macro text
}
(
( '(' // get arguments if you find them (no spaces before left paren)
(WS)? defineArg0:RAW_IDENTIFIER (WS)? {args.add(defineArg0.getText());}
( COMMA (WS)? defineArg1:RAW_IDENTIFIER (WS)? {args.add(defineArg1.getText());} )*
')'
| ' '|'\t'|'\f'
)
( options{greedy=true;}: ' '|'\t'|'\f' )*
// store the text verbatim - tokenize when called
defineText:MACRO_TEXT {args.set(0,defineText.getText());}
)? '\n' {newline();}
{ if (ifState==1) {
defines.put( defineMacro.getText(), args );
$setType(Token.SKIP);
}}
| "undef" WS undefMacro:RAW_IDENTIFIER { if (ifState==1) {
defines.remove(undefMacro.getText());
$setType(Token.SKIP);
}}
| ("ifdef"|"ifndef"{condition=false;})
WS ifMacro:RAW_IDENTIFIER
{
ifStates.add(ifState);
if (ifState==1) {
condition = (defines.containsKey(ifMacro.getText())==condition);
ifState = condition?1:0;
} else {
ifState = -1;
}
if (ifState==1) {
$setType(Token.SKIP);
} else {
// gobble up tokens until ENDIF (could be caused by else)
for (;;) {
try {
if (selector.nextToken().getType()==ENDIF) break;
} catch (TokenStreamRetryException r) {
// just continue if someone tried retry
}
}
// retry in case we switched lexers
selector.retry();
}
}
|
( "else" // treat like elsif (true)
| "elsif" WS elsifMacro:RAW_IDENTIFIER {
condition=defines.containsKey(elsifMacro.getText());
}
)
{
if (ifState==1) {
// previous if/elsif was taken - discard rest
ifState = -1;
for (;;) {
try {
if (selector.nextToken().getType()==ENDIF) break;
} catch (TokenStreamRetryException r) {
// just continue if someone tried retry
}
}
// retry in case we switched lexers
selector.retry();
} else if (ifState==0 && condition) {
// "elsif" (true) or "else"
$setType(ENDIF);
ifState = 1;
}
}
| "endif" {
condition = (ifState==1);
try {
// return to previous if state
ifState = (Integer)ifStates.remove(ifStates.size()-1);
if (condition) {
$setType(Token.SKIP);
} else {
// tell if/else/elsif to stop discarding tokens
$setType(ENDIF);
}
} catch (ArrayIndexOutOfBoundsException e) {
// endif with no if
}
}
);
IDENTIFIER options {testLiterals=true;} {
List define = new ArrayList();
List args = new ArrayList();
} :
identifier:RAW_IDENTIFIER
{
// see if this is a macro argument
define = (List)defineArgs.get(identifier.getText());
if (_createToken && define==null) {
// see if this is a macro call
define = (List)defines.get(identifier.getText());
}
}
( { (define!=null) && (define.size()>1) }? (WS|COMMENT)?
// take in arguments if macro call requires them
'('
callArg0:EXPR {args.add(callArg0.getText());}
( COMMA callArg1:EXPR {args.add(callArg1.getText());} )*
{ args.size()==define.size()-1 }? // better have right amount
')'
| { !((define!=null) && (define.size()>1)) }?
)
{ if (define!=null) {
String defineText = (String)define.get(0);
if (!_createToken) {
// just substitute text if called from EXPR - no token created
$setText(defineText);
} else {
// create a new lexer to handle the macro text
cppLexer sublexer = new cppLexer(new DataInputStream(new StringBufferInputStream(defineText)));
for (int i=0;i<args.size();++i) {
// treat macro arguments similar to local defines
List arg = new ArrayList();
arg.add((String)args.get(i));
sublexer.defineArgs.put( (String)define.get(1+i), arg );
}
selector.push(sublexer);
// retry in new lexer
selector.retry();
}
}};
STRING
: '"' ( '\\' . | ~('\\'|'"') )* '"' // double quoted string
| '\'' ( '\\' . | ~('\\'|'\'') )* '\'' // single quoted string
;
protected MACRO_TEXT :
( '\\'! '\n' {newline();} // escaped newline
| ~'\n'
)*;
WS :
( ' '
| '\t'
| '\f'
| '\n' {newline();}
) { /*$setType(Token.SKIP);*/ };
COMMENT :
( "//" (~'\n')* '\n' {newline();} // single line comment
| "/*" ( options{greedy=false;} : '\n' {newline();} | ~('\n') )* "*/" // multi-line comment
) { /*$setType(Token.SKIP);*/ };
protected RAW_IDENTIFIER : ('a'..'z'|'A'..'Z'|'_')
('a'..'z'|'A'..'Z'|'_'|'0'..'9')* ;
NUMBER : ('0'..'9') ('0'..'9'|'a'..'z'|'A'..'Z'|'_')* ; // allow ahpha
suffixes on numbers (i.e. L:long)
// group symbols into categories to parse EXPR
LEFT : '(' | '[' | '{' ;
RIGHT : ')' | ']' | '}' ;
COMMA : ',' ;
OPERATOR : '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '\\' | '^' | '`' | '|' | '~' ;
protected EXPR // allow just about anything without being ambiguous
: (WS)? (NUMBER|IDENTIFIER)?
(
( LEFT EXPR ( COMMA EXPR )* RIGHT
| STRING
| OPERATOR // quotes, COMMA, LEFT, and RIGHT not in here
)
EXPR
)?
;
4.参考:
How to do preprocessing in antlr v4?
看提到了:
TokenStreamRewriter
不过后来没继续弄。
5.后来是别的同事,搞定了include的,包括递归调用的功能,完整代码如下:
(同时此处后又加了对于define的简单替换)
grammar preprocess;
//lexer grammar preprocess;
options{
language=Java;
}
@lexer::header {
//package com.mm.antlrv3demo;
import java.io.*;
import java.util.*;
}
@parser::header {
//package com.mm.antlrv3demo;
}
@lexer::members {
class SaveStruct {
SaveStruct(CharStream input){
this.input = input;
this.marker = input.mark();
}
public CharStream input;
public int marker;
}
static Map defines = new Hashtable(); // holds the defines
Stack<SaveStruct> includes = new Stack<SaveStruct>();
// We should override this method for handling EOF of included file
public Token nextToken(){
Token token = super.nextToken();
if(token.getType() == Token.EOF && !includes.empty()){
// We've got EOF and have non empty stack.
SaveStruct ss = includes.pop();
setCharStream(ss.input);
input.rewind(ss.marker);
//this should be used instead of super [like below] to handle exits from nested includes
//it matters, when the 'include' token is the last in previous stream (using super, lexer 'crashes' returning EOF token)
token = this.nextToken();
}
// Skip first token after switching on another input.
// You need to use this rather than super as there may be nested include files
if(((CommonToken)token).getStartIndex() < 0)
token = this.nextToken();
return token;
}
}
COMMENT
: ('//' ~('\n'|'\r')* '\r'? '\n') {skip();}
| ('/*' ( options {greedy=false;} : . )* '*/') {skip();}
;
// and lexer rule
INCLUDE : '#include' (WS)? f=STRING
{
String name = f.getText();
name = name.substring(1,name.length()-1);
try {
// save current lexer's state
SaveStruct ss = new SaveStruct(input);
includes.push(ss);
// switch on new input stream
setCharStream(new ANTLRFileStream(name));
reset();
} catch(Exception fnf) { throw new Error("Cannot open file " + name); }
};
DIRECTIVE : ('#define' WS* defineMacro=ID WS* defineText=STRING)
{
String macroKey = defineMacro.getText();
String macroValue = defineText.getText();
System.out.println("Found macro: " + macroKey + "=" + macroValue);
defines.put(macroKey, macroValue);
skip();
};
ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
{
// see if this is a macro call
String idStr = getText();
if(defines.containsKey(idStr))
//define = (List)defines.get(identifier.getText());
//if(define!=null) && (define.size()>1)
{
String macroValue = (String)defines.get(idStr);
System.out.println("Found macro reference, so replce " + idStr + " to " + macroValue);
setText(macroValue);
}
};
INT : '0'..'9'+
;
FLOAT
: ('0'..'9')+ '.' ('0'..'9')* EXPONENT?
| '.' ('0'..'9')+ EXPONENT?
| ('0'..'9')+ EXPONENT
;
WS : ( ' '
| '\t'
| '\r'
| '\n'
) {$channel=HIDDEN;}
;
RestSymbo
: '{' | '}' | '&' | ';' | ',' | '+' | '-' | ')' | '(' | '~' | '/' | '`' | '$' | '@' | '%' | '^' | '#' | '\\' ;
STRING
: '"' ( ESC_SEQ | ~('\\'|'"') )* '"'
;
CHAR: '\'' ( ESC_SEQ | ~('\''|'\\') ) '\''
;
fragment
EXPONENT : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
fragment
HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
fragment
ESC_SEQ
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
| UNICODE_ESC
| OCTAL_ESC
;
fragment
OCTAL_ESC
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
fragment
UNICODE_ESC
: '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
;
header
: include*;
include : INCLUDE;//'#include ' '<' ID ('.h' | '.ddl') '>';
【总结】
还是antlr代码,和手动加的action code(java代码)去实现对应的递归处理include的逻辑的。
转载请注明:在路上 » 【记录】尝试用antlr处理C代码中的#include