由于老师要求,最近在做oceanbase存储过程的实现,在oceanbase 0.4以前是不支持存储过程的。实现的主要步骤主要包括
1、语法解析
2、词法解析
3、具体执行语法树的步骤
现在先来说说语法解析吧,在这一块主要是使用的flex( 词法分析器生成工具) 和bison(语法分析器生成器) 这两个是对用户输入的存储过程语句进行解析的
来具体说说该怎么实现对sql语句的分析吧
1、首先建立一个lex的文件
%option noyywrap nodefault yylineno case-insensitive%{#include prosql.tab.hpp#include #include #include #include #include //yystype yylval;int oldstate;extern c int yylex();//extern c int yyparse();extern c void yyerror(const char *s, ...);extern char globalinputtext[10000];extern int readinputforlexer( char *buffer, int *numbytesread, int maxbytestoread );#undef yy_input#define yy_input(b,r,s) readinputforlexer(b,&r,s)%}%x comment%%create { return create; }procedure { return procedure; }sql { return sql; }declare { return declare; }set { return set; }begin { return begint; }end { return end; }int { return int; }varchar { return varchar; }date { return date; }time { return time; }double { return double; }if { return if; }then { return then; }else { return else; }endif { return endif; }for { return for; }when { return when; }while { return while; }[0-9]+ { yylval.strval = strdup(yytext);/*printf(number=%s\n,yylval.strval);*/ return intnum; }/*number*/[0-9]+.[0-9]* |.[0-9]+ |[0-9]+e[-+]?[0-9]+ |[0-9]+.[0-9]*e[-+]?[0-9]+ |.[0-9]*e[-+]?[0-9]+ { yylval.strval = strdup(yytext);/*printf(float=%s\n,yylval.strval);*/ return approxnum; }/*double*/true { yylval.strval = 1;/*printf(bool=%s\n,yylval.strval);*/ return bool; }/*bool*/false { yylval.strval = 0;/*printf(bool=%s\n,yylval.strval);*/ return bool; }/*bool*/'(\\.|''|[^'\n])*' |\(\\.|\\|[^\n])*\ { char *temp = strdup(yytext); yylval.strval = strdup(yytext); //getcorrectstring(yylval.strval, temp); /*printf(string=%s\n,yylval.strval);*/ return string; }/*string*/'(\\.|[^'\n])*$ { yyerror(unterminated string %s, yytext); }\(\\.|[^\n])*$ { yyerror(unterminated string %s, yytext); }x'[0-9a-f]+' | 0x[0-9a-f]+ { yylval.strval = strdup(yytext); return string; }0b[01]+ |b'[01]+' { yylval.strval = strdup(yytext); return string; }[-+&~|^/%*(),.;!] { return yytext[0]; }&& { return andop; }|| { return or; } { yylval.subtok = 2; return comparison; }!= | { yylval.subtok = 3; return comparison; }= { yylval.subtok = 4; return comparison; }= { yylval.subtok = 6; return comparison; } { yylval.subtok = 12; return comparison; } { yylval.subtok = 2; return shift; }[a-za-z][a-za-z0-9_]* { yylval.strval = strdup(yytext); /*printf(name 1=%s\n,yylval.strval);*/ return name; }`[^`/\\.\n]+` { yylval.strval = strdup(yytext+1); /*printf(name 2=%s\n,yylval.strval);*/ yylval.strval[yyleng-2] = 0; return name; }`[^`\n]*$ { yyerror(unterminated quoted name %s, yytext); }@[0-9a-z_.$]+ |@\[^\n]+\ |@`[^`\n]+` |@'[^'\n]+' { yylval.strval = strdup(yytext+1); return uservar; }@\[^\n]*$ { yyerror(unterminated quoted user variable %s, yytext); }@`[^`\n]*$ { yyerror(unterminated quoted user variable %s, yytext); }@'[^'\n]*$ { yyerror(unterminated quoted user variable %s, yytext); }:= { return assign; }#.* ;--[ \t].* ;/* { oldstate = yy_start; begin comment; }*/ { begin oldstate; }.|\n ; { yyerror(unclosed comment); }[ \t\n] /* white space */. { yyerror(mystery character '%c', *yytext); }%%这一部分呢就是对 每个我们自定义的满足正则的识别
接下来是对词的语法识别
%{#include #include #include #include #include char * parsetreeroot=null;extern c int yylex();extern c int yyparse();extern c void yyerror(const char *s, ...);char globalinputtext[10000];int globalreadoffset;int readinputforlexer( char *buffer, int *numbytesread, int maxbytestoread );char * mystrcat(char *s1,char *s2){ char *p1=(char *)malloc(strlen(s1)+strlen(s2)+1); strcpy(p1,s1); strcat(p1,s2); return p1;}%}%locations%union { int intval; double floatval; char *strval; int subtok;}%token name%token string%token intnum%token bool%token approxnum%token uservar%type stmt_root create_stmt para_list definition data_type pro_block pro_parameters declare_list set_list %type assign_var pro_body pro_stmt_list sql_stmt expr%right assign%left or%left xor%left andop%left not '!'%left between%left comparison /* = = */%left '|'%left '&'%left shift /* */%left '+' '-'%left '*' '/' '%' mod%left '^'%token create%token procedure%token proname%token declare%token set%token begint%token end%token sql%token int%token varchar%token date%token time%token double%token if%token not%token exists%token then%token else%token endif%token for%token when%token while%start stmt_root%%stmt_root: create_stmt pro_block { $$=mystrcat($1,$2); parsetreeroot=$$;};create_stmt: create procedure name '(' para_list ')' { char *temp=mystrcat(create procedure ,$3); temp=mystrcat(temp,(); temp=mystrcat(temp,$5); $$=mystrcat(temp,)(create)\n); };/*opt_if_not_exists: { $$ = 0; } | if not exists { $$ = 1; } ;*/para_list: definition { $$=$1; }|definition ',' para_list { char *temp=mystrcat($1,,); $$=mystrcat(temp,$3); };definition: uservar data_type { char *temp=mystrcat($1, ); $$=mystrcat(temp,$2); };data_type: date {$$=date; } | time {$$=time; } | varchar '(' intnum ')' {$$=varchar; } | int {$$=int; } | double {$$=double; } ;pro_block: begint pro_parameters pro_body end { char *temp=mystrcat(begin\n,$2); temp=mystrcat(temp,); temp=mystrcat(temp,$3); $$=mystrcat(temp,end); //printf(pro_body %s\n,$3); };pro_parameters: declare_list ';' { $$=mystrcat($1,;(declare)\n);}|pro_parameters declare_list ';' { char *temp=mystrcat($1,$2); $$=mystrcat(temp,;(declare)\n); }|pro_parameters set_list ';' { char *temp=mystrcat($1,$2); $$=mystrcat(temp,;(set)\n); };declare_list:|declare definition { $$=mystrcat(declare ,$2); }|declare_list ',' definition { char *temp=mystrcat($1,,); $$=mystrcat(temp,$3); };set_list:|set assign_var { $$=mystrcat(set ,$2); }| set_list ',' assign_var { char *temp=mystrcat($1,,); $$=mystrcat(temp,$3); };assign_var : uservar comparison expr { char *temp=mystrcat($1,=); $$=mystrcat(temp,$3); };expr: name { $$=$1;} | string { $$=$1;} | intnum { $$=$1;} | approxnum { $$=$1;} | bool { $$=$1;} ;pro_body : pro_stmt_list { $$=$1; };pro_stmt_list: sql_stmt {$$=$1; }|pro_stmt_list sql_stmt { $$=mystrcat($1,$2); };sql_stmt: |sql name ';' { $$=mystrcat($2,;(sql)\n);};%%/*int main(int argc, char* argv[]){ yyparse();}*/int readinputforlexer( char *buffer, int *numbytesread, int maxbytestoread ) { int numbytestoread = maxbytestoread; int bytesremaining = strlen(globalinputtext)-globalreadoffset; int i; if ( numbytestoread > bytesremaining ) { numbytestoread = bytesremaining; } for ( i = 0; i < numbytestoread; i++ ) { buffer[i] = globalinputtext[globalreadoffset+i]; } *numbytesread = numbytestoread; globalreadoffset += numbytestoread; return 0;}void yyerror(const char *s, ...){ fprintf(stderr, error: %s\n, s);}void zzerror(const char *s, ...){ extern int yylineno; va_list ap; va_start(ap, s); fprintf(stderr, %d: error: , yylineno); vfprintf(stderr, s, ap); fprintf(stderr, \n);}int yywrap(void){ return 1;}char* getsql(){ return parsetreeroot;}这部分就是对上一个识别出来的词 进行顺序上的确定,构成一个完整的语法
这些需要在linux环境下进行调试
bison -d 文件名
flex 文件名