Jump to content

User:Haus/Hanzo/lexer

From Wikipedia, the free encyclopedia

Here's a (somewhat dated) version of the guts of a flex specification for parsing infoboxes:


...
%%
%{
  private int comment_count = 0;
  private String name;
  private String value;
%} 
%line
%char
%caseless
%unicode
%standalone
//%debug
%state SHIPBOX
%state NAME
%state VALUE

ALPHA=[A-Za-z]
DIGIT=[0-9]
NONNEWLINE_WHITE_SPACE_CHAR=[\ \t\b\012]
WHITE_SPACE_CHAR=[\n\ \t\b\012]
STRING_TEXT=(\\\"|[^\n\"]|\\{WHITE_SPACE_CHAR}+\\)*
LineTerminator = \r|\n|\r\n
InputCharacter = [^\r\n]
WhiteSpace     = {LineTerminator} | [ \t\f]

%% 
<YYINITIAL> {

"{{Infobox Ship"[|]*{WhiteSpace} |
"{{Ship table"[|]*{WhiteSpace} {
   yybegin(SHIPBOX);
   comment_count +=1;
   return (1); 
   }

[^\n]*[\n]* {
    //printlns replaced to preserve UTF-8
    System.out.println(yytext());  
    return (100);
   }
}


<SHIPBOX> {
"{{" { comment_count = comment_count + 1; }
[\|]*"}}" { 
	comment_count = comment_count - 1; 
	Utility.Assert(comment_count >= 0);
	if (comment_count == 0) {
		ShipBox.printnv(name,value);
		ShipBox.printbox();
    		yybegin(YYINITIAL);
        }
     }

\|    {
	if(name!=null && value!=null){
		ShipBox.printnv(name,value);
	} 
	yybegin(NAME);
}

[^\|] { value += yytext();}
}

<NAME> {
[^=]*"=" {
	name = new String(yytext());
	yybegin(VALUE);
      }
}

<VALUE> {
[^\n\r]*[\n\r]+ {
	value = new String(yytext());
	yybegin(SHIPBOX);
      }
}