%{ /* Lex code for the formalised english parser in WebKB-1 */

void fe_skipCcomments(); void fe_skipHTMLComments(); void fe_skipTag();
void fe_skipSingleQuotedString(); void fe_skipDoubleQuotedString();
%}

ALPHA       [a-z]
DIGIT       [0-9]
ANY         (.|\n)
SPACE       [ \t\n]
S           ([ \t\n ]|"&nbsp;")+

IDENT1      ({ALPHA}|"#"{ALPHA}|"!"{ALPHA})
IDENTn      ({ALPHA}|{DIGIT}|"_"|"-"|"/"|"&"|"@"|"#"|"~"|"'"[a-z]|[.?][a-z0-9?#~ \t\n ]|"://")
IDENT       {IDENT1}({IDENTn})*
NUMBER      ("+"|"-")?{DIGIT}+("."{DIGIT}*)?
OTHER_OPER  ("=>"|"<=>"|"="|"=<"|">="|">")
IDENT_OPER  ({IDENT}|"!="|"<="|"=<"|{OTHER_OPER})

ANNOT1      ("(^"([^\)]|(\*[^)]))*"^)")
DELIM1      ("$("([^\)]|(\)[^\$]))*")$")

QUOTED2_STR \"([^"\\]|(\\{ANY}))*\"
QUOTED1_BODY '([^'\\]|(\\{ANY}))*'

%x INQUOTE1

%%
{S}	            ;
"//"[^\n]*          ;/* skip C++ line comments */
"/*"                fe_skipCcomments();
"<!--"              ;/* fe_skipHTMLComments(); (content parsed) */
"-->"               ;
"<!"		    fe_skipTag();
"<"{ALPHA}	    fe_skipTag();
"!="                return NOT_EQUAL;
"=<"	            return LTE;
"<="	            return IMPLIED_BY;
{OTHER_OPER}        {/*if ((value=AliasAssocArray[fetext])) unputs(value);
                       else*/ return OTHER_OPERATOR;
                    }
"or"{S}             return OTHER_OPERATOR;

"named"{S}                return NAMED;

"less"{S}"than"{S}        return '<';
"more"{S}"than"{S}        return '>';
"which"{S}                ;
"whom"{S}                 ;
"who"{S}                  ;
"that"{S}                 ;
"there"{S}"is"{S}         ;
"is"{S}"there"{S}         ;
"is"{S}"a"{S}             return IS_A;
"is"{S}"an"{S}            return IS_A;
"is"{S}"the"{S}           return HAS_FOR;
"is"{S}                   return HAS_FOR;
"are"{S}"the"{S}          return HAS_FOR;
"are"{S}                  return HAS_FOR;
"has"{S}"for"{S}          return HAS_FOR;
"have"{S}"for"{S}         return HAS_FOR;
"for"{S}                  return HAS_FOR;
"with"{S}"the"{S}         return WITH;
"with"{S}                 return WITH;
"can"{S}"be"{S}"the"{S}   return CAN_HAVE_FOR;
"can"{S}"be"{S}           return CAN_HAVE_FOR;
"can"{S}"have"{S}"for"{S} return CAN_HAVE_FOR;
"may"{S}"be"{S}"the"{S}   return MAY_HAVE_FOR;
"may"{S}"be"{S}           return MAY_HAVE_FOR;
"may"{S}"have"{S}"for"{S} return MAY_HAVE_FOR;


"and"{S}                  {/*printf("flex-AND\n");*/return AND;}
"at"{S}"least"{S}         return AT_LEAST;
"at"{S}"most"{S}          return AT_MOST;
"at"{S}                   return AT;
"an"{S}                   return A;
"a"{S}                    return A;
"a"{S}"typical"{S}        return MOST;
"certain"{S}              return CERTAIN;
"some"{S}                 return SOME;
"several"{S}              return SEVERAL;
"the"{S}                  return THE;
"all"{S}                  return ALL;
"any"{S}                  return EVERY;
"every"{S}                return EVERY;
"most"{S}                 return MOST;
"mostly"{S}               return MOSTLY;

"group"{S}"of"{S}         return GROUP_OF;
"bag"{S}"of"{S}           return BAG_OF;
"set"{S}"of"{S}           return SET_OF;
"sequence"{S}"of"{S}      return SEQUENCE_OF;
"alternative"{S}          return ALTERNATIVE;
"together"{S}             return TOGETHER;

"of"{S}                   return OF;
"between:"{S}             {yyless(7);return IDENTIFIER;}
"between"{S}":"           {yyless(7);return IDENTIFIER;}
"between"{S}              return BETWEEN;
"from"{S}                 return FROM;
"to"{S}                   return TO;

"dozens"{S}               return DOZENS;
"hundreds"{S}             return HUNDREDS;
"thousands"{S}            return THOUSANDS;
"millions"{S}             return MILLIONS;
"billions"{S}             return BILLIONS;

"good"{S}                 return GOOD;
"bad"{S}                  return BAD;
"important"{S}            return IMPORTANT;
"small"{S}                return SMALL;
"big"{S}                  return BIG;
"great"{S}                return GREAT;
"few"{S}                  return FEW;
"many"{S}                 return MANY;

"Something"            {fetext[0]='T';fetext[1]='h';fetext[2]='i';fetext[3]='n';
                        fetext[4]='g';fetext[5]='\0'; return IDENTIFIER;
                       }


{QUOTED2_STR}             return STRING;

{NUMBER}                  return NUMERAL;
{IDENT}             {/*if ((value=AliasAssocArray[fetext])) unputs(value);
                       else printf("flex-term:%s.\n",fetext);*/
                      if (isspace(fetext[feleng-1]))
                      { if ((fetext[feleng-2]=='.')||(fetext[feleng-2]=='?'))
                        { yyless(2); fetext[feleng-2]='\0'; }
                      }
                      return IDENTIFIER;
                    }

"_,_"               {/*printf("flex:_,_\n");*/ return BOB; }
"_and_"             {/*printf("flex:_and_\n");*/return BOB; }
.                   {/*printf("flex:%c-%s.\n",fetext[0],fetext);*/
                       return fetext[0];}
%%



void fe_skipCcomments() /* UNTESTED */
{ int c; unput(' '); /* just to remove the error message "unused fct()" */
         while ((c=yyinput()))
           while (c=='*') if ((c=yyinput())=='/') return;
  /* or: while ((c=yyinput()))
           if (c=='*') {  while (c=='*') c=yyinput();  if (c=='/') break; }
     or: while ((c=yyinput()))
         { if (c=='*') { c=yyinput(); if (c=='/') return; else unput(c); } }
  */
}
void fe_skipHTMLComments() /* UNTESTED */
{ int c; while ((c=yyinput())) /* skip an HTML comment,UNTESTED */
           if (c=='-') { while ((c=yyinput())=='-');  if (c=='>') break; } 
}
void fe_skipTag()
{ int c=yyinput();
  while (c!='>')  /* skip to the end of the tag */
  { if (c=='\'') fe_skipSingleQuotedString();
    else if (c=='"') fe_skipDoubleQuotedString();
    else c=yyinput();
  }
}
void fe_skipSingleQuotedString()
{ int c;  while ((c=yyinput()))
          { if (c=='\'') return; else if (c=='\\') yyinput(); }
}
void fe_skipDoubleQuotedString()
{ int c;  while ((c=yyinput()))
          { printf("read:%c.%d%s.\n",c,yyleng,yytext);
            if (c=='"') return; else if (c=='\\') yyinput(); }
}
