Sunday, 14 April 2013

PL1 grammar (preliminary)

This is the beginning of my work to convert batch PL1 programs to COBOL.  As usual the first step is to write a JAVACC grammar for PL1.  Currently there are not any PL1 JAVACC grammars available freely on the internet.  So, I will be writing one in this coming days or months depending on my workload.

The PL1 grammar that I provide here, parses 3 statements but more importantly it handles the lexical situations related to line break, comments and quoted string constants.

PL1GRAMMAR (preliminary)

options {
                IGNORE_CASE = true;
                DEBUG_PARSER = false;
                DEBUG_LOOKAHEAD = false;
                DEBUG_TOKEN_MANAGER = false;
                LOOKAHEAD = 2;
                FORCE_LA_CHECK=true;
              
                JAVA_UNICODE_ESCAPE = false;
                UNICODE_INPUT = false;
                //COMMON_TOKEN_ACTION = true;
                STATIC = false;
}

PARSER_BEGIN(PL1)

/** Simple brace matcher. */
public class PL1 {

  /** Main entry point. */
  public static void main(String args[]) throws ParseException {

    PL1 parser = new PL1(System.in);
    parser.Input();
  }
}

PARSER_END(PL1)

TOKEN_MGR_DECLS : {
//            int BOF = (curLexState = START_OF_LINE);
}

SPECIAL_TOKEN : /* WHITE SPACE */
{
    <SPACE: ( " " | "\t" | "\r" | "\f" | "\n" )+ >
}

<DEFAULT>
MORE :
{
                "//" : IN_SINGLE_LINE_COMMENT
|
                "/*" : IN_MULTI_LINE_COMMENT
}

<IN_SINGLE_LINE_COMMENT>
SPECIAL_TOKEN :
{
                <SINGLE_LINE_COMMENT: (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > : DEFAULT
}

<IN_MULTI_LINE_COMMENT>
SPECIAL_TOKEN :
{
                <MULTI_LINE_COMMENT: "*/" > : DEFAULT
}

<IN_SINGLE_LINE_COMMENT,IN_MULTI_LINE_COMMENT, IN_PATTERN>
MORE :
{
                < ~[] >
}

/* Strings */

MORE :
{
  "\"" : WITHIN_STRING
}

<WITHIN_STRING> MORE :
{
  <"\"\""> : WITHIN_STRING
}

 
<WITHIN_STRING> TOKEN :
{
  < UNTERMINATED_STRING_LITERAL:  "\n" > : DEFAULT
}

<WITHIN_STRING> TOKEN :
{
  < STRING_LITERAL: "\"" > : DEFAULT
}

<WITHIN_STRING> MORE :
{
  < ( ~[] ) >
}

TOKEN : /* SEPARATORS */
{
  < LPAREN: "(" >
| < RPAREN: ")" >
| < LBRACE: "{" >
| < RBRACE: "}" >
| < LBRACKET: "[" >
| < RBRACKET: "]" >
| < SEMICOLON: ";" >
| < COMMA: "," >
| < HASH: "#" >
| < DOT: "." >
| < DOLLAR: "$" >
}

TOKEN : /* OPERATORS */
{
  < ASSIGN: "=" >
| < GT: ">" >
| < LT: "<" >
| < BANG: "!" >
| < TILDE: "~" >
| < HOOK: "?" >
| < COLON: ":" >
| < LE: "<=" >
| < GE: ">=" >
| < NE: "<>" >
| < PLUS: "+" >
| < MINUS: "-" >
| < STAR: "*" >
| < SLASH: "/" >
| < BIT_AND: "&" >
| < PERCENT: "%" >
| < BACKSLASH : "\\" >
| < EXPO: "^" >
}

TOKEN :
{
<STATEMENT_LABEL: ["A"-"Z"] (["A"-"Z"] | "_" | ["0"-"9"])+ <COLON>>
                {
                               // Exclude the ":" from the label token.
                               input_stream.backup(1);
                               matchedToken.image = matchedToken.image.substring(0, matchedToken.image.length()-1);
                }
}

TOKEN :
{
                < PROC: "proc">
                | < OPTIONS: "options">
                | < MAIN: "main">
                | < END: "end">
                | <STATEMNT1: "aaa">
                | <PUT: "put">
                | <LIST: "list">
}

TOKEN :
{
    < ANYTHING_ELSE: ( ~[] ) >
}

/** Root production. */
void Input() :
{System.out.println("Start");}
{
                (
                (LabelStatement())?
                 BasicStatement()
                )*
  <EOF>
}

void LabelStatement() :
{}
{
 <STATEMENT_LABEL> <COLON>
}

void BasicStatement() :
{}
{
 (ProcStatement() | EndStatement() | PutStatement())
 <SEMICOLON>
}

void ProcStatement() :
{}
{
 <PROC> <OPTIONS><LPAREN><MAIN><RPAREN>
}

void EndStatement() :
{}
{
 <END>
}

void PutStatement() :
{}
{
 <PUT> <LIST> <LPAREN><STRING_LITERAL><RPAREN>
}


Test data1:
hello1.pl1

Hello2: proc options(main);
     put
                  list ("Hello, world!");
end;


Testdata2:
Hello2.pl1

Hello2: proc options(main);
/***********************
*
* by Ali R+ SARAL
*
************************/
     put
                    list ("Hello, world!");                            

                // single line comment
end;