The PL1
grammar that I provide here, parses 3 statements but more importantly it
handles the lexical situations related to line break, comments and quoted
string constants.
PL1GRAMMAR
(preliminary)
options {
IGNORE_CASE
= true;
DEBUG_PARSER = false;
DEBUG_LOOKAHEAD = false;
DEBUG_TOKEN_MANAGER = false;
LOOKAHEAD = 2;
FORCE_LA_CHECK=true;
JAVA_UNICODE_ESCAPE = false;
UNICODE_INPUT = false;
//COMMON_TOKEN_ACTION = true;
STATIC = false;
}
PARSER_BEGIN(PL1)
/** Simple
brace matcher. */
public class
PL1 {
/** Main entry point. */
public static void main(String args[]) throws
ParseException {
PL1 parser = new PL1(System.in);
parser.Input();
}
}
PARSER_END(PL1)
TOKEN_MGR_DECLS
: {
// int BOF = (curLexState =
START_OF_LINE);
}
SPECIAL_TOKEN
: /* WHITE SPACE */
{
<SPACE: ( " " |
"\t" | "\r" | "\f" | "\n" )+ >
}
<DEFAULT>
MORE :
{
"//" :
IN_SINGLE_LINE_COMMENT
|
"/*" :
IN_MULTI_LINE_COMMENT
}
<IN_SINGLE_LINE_COMMENT>
SPECIAL_TOKEN
:
{
<SINGLE_LINE_COMMENT:
(~["\n","\r"])*
("\n"|"\r"|"\r\n")? > : DEFAULT
}
<IN_MULTI_LINE_COMMENT>
SPECIAL_TOKEN
:
{
<MULTI_LINE_COMMENT:
"*/" > : DEFAULT
}
<IN_SINGLE_LINE_COMMENT,IN_MULTI_LINE_COMMENT,
IN_PATTERN>
MORE :
{
< ~[] >
}
/* Strings
*/
MORE :
{
"\"" : WITHIN_STRING
}
<WITHIN_STRING>
MORE :
{
<"\"\""> :
WITHIN_STRING
}
<WITHIN_STRING>
TOKEN :
{
< UNTERMINATED_STRING_LITERAL: "\n" > : DEFAULT
}
<WITHIN_STRING>
TOKEN :
{
< STRING_LITERAL: "\"" >
: DEFAULT
}
<WITHIN_STRING>
MORE :
{
< ( ~[] ) >
}
TOKEN : /*
SEPARATORS */
{
< LPAREN: "(" >
| <
RPAREN: ")" >
| <
LBRACE: "{" >
| <
RBRACE: "}" >
| <
LBRACKET: "[" >
| <
RBRACKET: "]" >
| <
SEMICOLON: ";" >
| <
COMMA: "," >
| <
HASH: "#" >
| <
DOT: "." >
| <
DOLLAR: "$" >
}
TOKEN : /*
OPERATORS */
{
< ASSIGN: "=" >
| <
GT: ">" >
| <
LT: "<" >
| <
BANG: "!" >
| <
TILDE: "~" >
| <
HOOK: "?" >
| <
COLON: ":" >
| <
LE: "<=" >
| <
GE: ">=" >
| <
NE: "<>" >
| <
PLUS: "+" >
| <
MINUS: "-" >
| <
STAR: "*" >
| <
SLASH: "/" >
| <
BIT_AND: "&" >
| <
PERCENT: "%" >
| <
BACKSLASH : "\\" >
| <
EXPO: "^" >
}
TOKEN :
{
<STATEMENT_LABEL:
["A"-"Z"] (["A"-"Z"] | "_" |
["0"-"9"])+ <COLON>>
{
// Exclude the
":" from the label token.
input_stream.backup(1);
matchedToken.image
= matchedToken.image.substring(0, matchedToken.image.length()-1);
}
}
TOKEN :
{
< PROC: "proc">
| < OPTIONS:
"options">
| < MAIN:
"main">
| < END: "end">
| <STATEMNT1:
"aaa">
| <PUT: "put">
| <LIST: "list">
}
TOKEN :
{
< ANYTHING_ELSE: ( ~[] ) >
}
/** Root
production. */
void Input()
:
{System.out.println("Start");}
{
(
(LabelStatement())?
BasicStatement()
)*
<EOF>
}
void
LabelStatement() :
{}
{
<STATEMENT_LABEL> <COLON>
}
void
BasicStatement() :
{}
{
(ProcStatement() | EndStatement() |
PutStatement())
<SEMICOLON>
}
void
ProcStatement() :
{}
{
<PROC> <OPTIONS><LPAREN><MAIN><RPAREN>
}
void
EndStatement() :
{}
{
<END>
}
void
PutStatement() :
{}
{
<PUT> <LIST> <LPAREN><STRING_LITERAL><RPAREN>
}
Test data1:
hello1.pl1
Hello2: proc
options(main);
put
list ("Hello, world!");
list ("Hello, world!");
end;
Testdata2:
Hello2.pl1
Hello2: proc
options(main);
/***********************
*
* by Ali R+
SARAL
*
************************/
put
list ("Hello, world!");
// single line comment
end;