Saturday, May 12, 2012

Recursive Decent Parser


Sample Output when syntax is correct

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<ctype.h>

#define MAX   100

#define ID           261           //identifier
#define ARRAY        262           //array
#define COMMA        265           //','
#define SMCOLON      266           //';'
#define PNTR         267           //pointer
#define OBRCS        268           //'{'
#define CBRCS        269           //'}'
#define INVALID      270           //not a valid type for our tokens

#define FAILURE      0
#define SUCCESS      1

//this program will now detect Structure declaration like " struct abc{int a,*v,p[]; float e,*o,y[]; char r,*t,w[];}; "
//grammer:
//     str_stmt      ->     struct sname{ dec_list };
//     dec_list      ->     dec_stmt rest_stmts
//     rest_stmts    ->     dec_stmt rest_stmts | NULL
//     dec_stmt      ->     type var_list;
//     type          ->     int | float | char
//     var_list      ->     var rest_list
//     rest_list     ->     , var_list | NULL
//     sname         ->     ID
//     var                  ->     ID
//                                |ARRAY
//                                |PNTR

char str[MAX];             //our input string
char token[MAX];           //contains a token
int token_type;            //the type of token
char *s;                   //pointer to str
void get_token();          //function to get next token

int str_stmt();            //struct statement
int dec_list();            //declaration of struct members
int rest_stmts();          //rest of dec_stmts if there
int dec_stmt();            //function to verify declaration syntax
int type();                //function to check type
int var_list();            //function to check varaible syntax
int rest_list();           //fuction to check variable list,rest_list and var_list work               recursively and call each other

int main()
{
       while(1)
       {
              s=str;
              printf("\nENTER DECLARTATION SYNTAX: ");
              gets(s);

              if( str_stmt() )
                     printf("\n\n***** SYNTAX CORRECT *****\n\n");
              else
                     printf("\n\n***** SYNTAX INCORRECT *****\n\n");       
       }

       return 0;
}

int str_stmt()
{
       get_token();
       if(strcmp(token,"struct") == 0)
       {
              printf("\tMATCHED KEYWORD    : %s",token);
              get_token();
              if(token_type == ID)
              {
                     printf("\tMATCHED IDENTIFIER : %s",token);
                     get_token();
                     if(token_type == OBRCS)
                     {
                           printf("\tMATCHED OBRCS      : %s",token);
                           if(dec_list())
                                  return SUCCESS;
                     }
                     else
                           printf("\tExpecting '{'");
              }
              else
                     printf("\tExpecting struct name.");
       }
       else
              printf("\tExpecting struct keyword");

       return FAILURE;
}

int dec_list()
{
       get_token();
       if(dec_stmt())
       {
              printf("\n\t\t\t\t\t-> MATCHED DEC_STMT");
              if(rest_stmts())
                     return SUCCESS;
       }
       else
              printf("\n\t\t\t\t\t-> DEC_STMT INCORRECT");
       return FAILURE;
}

int rest_stmts()
{
       get_token();
       if(token_type == CBRCS)
       {
              printf("\tMATCHED CBRCS      : %s",token);
              get_token();
              if(token_type == SMCOLON)
              {
                     printf("\tMATCHED SMCOLON    : %s",token);
                     return SUCCESS;
              }
              else
                     printf("\tExpecting ';'");
       }
       else if(dec_stmt())
       {
              printf("\n\t\t\t\t\t-> MATCHED DEC_STMT");
              if(rest_stmts())
                     return SUCCESS;
       }
       else
              printf("\n\t\t\t\t\t->Expecting valid dec_stmt or '}'.");

       return FAILURE;
}

int dec_stmt()
{
       if( (strcmp(token,"int") == 0) ||(strcmp(token,"float") == 0) || (strcmp(token,"char") == 0) )
       {
              printf("\tMATCHED TYPE       : %s",token);
              if(var_list() )
                     return SUCCESS;
       }
       else
              printf("\tExpecting correct TYPE");

       return FAILURE;
}

int var_list()
{
       get_token();
       if(token_type == ID)
       {
              printf("\tMATCHED IDENTIFIER : %s",token);
              if( rest_list() )
                     return SUCCESS;
       }
       else if(token_type == ARRAY)
       {
              printf("\tMATCHED ARRAY-ID   : %s",token);
              if( rest_list() )
                     return SUCCESS;
       }
       else if(token_type == PNTR)
       {
              printf("\tMATCHED POINTER    : %s",token);
              if( rest_list() )
                     return SUCCESS;
       }
       else
              printf("\tExpecting an ID, POINTER or ARRAY");

       return FAILURE;
}

int rest_list()
{
       get_token();

       if(token_type == COMMA)
       {
              printf("\tMATCHED COMMA      : %s",token);
              if( var_list() )
                     return SUCCESS;
       }
       else if(token_type == SMCOLON)
       {
              printf("\tMATCHED SMCOLON    : %s",token);
              return SUCCESS;
       }
       else
              printf("\tExpecting ',' or ';'");
      
       return FAILURE;
}

void get_token()
{
       //before writing this function, remember this is the base of whole program
       //have in mind what we need : ID ARRAY ( ) , ; =
       //rest are invalid for our needs right now
       char* tostr_tt(int token_type);

       int i = 0;                        //index for token string

       while(*s == ' ' || *s == '\t')    //eat white spaces
              s++;

       if(*s == '*')  //pointer
       {
              token[i++] = *s++;
              if(isalpha(*s) || *s == '_')
              {
                     token_type=PNTR;
                     while(isalnum(*s) || *s == '_')   //while token is a valid ID
                           token[i++] = *s++;
              }
              else
                     token_type=INVALID;

              token[i] = '\0';
       }
       else if( isalpha(*s) || *s == '_')       //if current token starts with an alphaber or _ it is an ID
       {
              token_type=ID;
              while(isalnum(*s) || *s == '_')   //while token is a valid ID
              {
                     token[i++] = *s++;
              }

              token[i] = '\0';
       }
       else
       {
              if(*s == ',')
                     token_type = COMMA;
              else if(*s == ';')
                     token_type = SMCOLON;
              else if(*s == '{')
                     token_type = OBRCS;
              else if(*s == '}')
                     token_type = CBRCS;
              else
                     token_type = INVALID;
              token[i++] = *s++;
              token[i] = '\0';
       }

       if(token_type == ID)  //array is ID[]
       {
              if(*s == '[')
              {
                     token_type = INVALID;
                     token[i++] = '[';
                     s++;
                     if(*s == ']')
                     {
                           token_type = ARRAY;
                           token[i++] = ']';
                           s++;
                     }
                     token[i] = '\0';
              }
       }

       printf("\nTOKEN : %s\t\tTYPE:%-10s",token,tostr_tt(token_type));
}

char* tostr_tt(int token_type)
{
       switch(token_type)
       {
              case ID:             return "ID";         break;
              case ARRAY:          return "ARRAY";      break;
              case COMMA:          return "COMMA";      break;
              case SMCOLON:        return "SMCOLON";    break;
              case PNTR:           return "POINTER";    break;
              case OBRCS:          return "OBRACES";    break;
              case CBRCS:          return "CBRACES";    break;
              case INVALID:        return "INVALID";    break;
              default:             return "ERROR";      break;

       }     
}

No comments:

Post a Comment