User:Antigng-bot/regex/regexFrontEnd

维基百科,自由的百科全书
#include <stdio.h>
#include "regexFrontEnd.h"
#include "regexBackEnd.h"
int opStack[8192]={0};
int opStackTop=0;
static int opStackPush(unsigned int op)
{
	opStack[opStackTop++]=op;
	return opStackTop>4096;
}
static unsigned int opStackPop()
{
	if(opStackTop)
	{
		opStackTop--;
		return opStack[opStackTop];
	}
	else return 0;
}
static unsigned int opStackGetTop()
{
	if(opStackTop) return opStack[opStackTop-1];
	else return 0;
}
static int opStackIsNull()
{
	return opStackTop==0;
}

static int doReversePolishConversion(struct _regexToken tok)
{
	struct _regexToken pseudo_tok;
	pseudo_tok.type=T_Regex_Bi_OP;
	switch(tok.type)
	{
	case T_Regex_EOL:
		{
			unsigned int op=0;
			while(!opStackIsNull())
			{
				if((op=opStackPop())=='(') 
				{
					fprintf(stderr,"\tParser: Brackets don't match. Missing ')'s.\n");
					return 1;
				}
				else
				{
					pseudo_tok.value.ch=op;
					if(genNFANodeToQueue(pseudo_tok)) return 1;
				}
			}
			if(genNFANodeToQueue(tok)) return 1;
		}
		break;
	case T_Regex_Ch:
	case T_Regex_Gen:
	case T_Regex_Range:
	case T_Regex_Uni_OP:
		if(genNFANodeToQueue(tok)) return 1;
		break;
	case T_Regex_Bi_OP:
		switch(tok.value.ch)
		{
		case '|':
			while(!opStackIsNull())
			{
				switch(opStackGetTop())
				{
				case '|':
				case '&':
					pseudo_tok.value.ch=opStackPop();
					if(genNFANodeToQueue(pseudo_tok)) return 1;
					break;
				default:
					goto _exitSymbol;
				}
			}
_exitSymbol:
			if(opStackPush('|'))
			{
				fprintf(stderr,"\tParser: Stack overflow.\n");
				return 1;
			}
			break;
		case '&':
			while(!opStackIsNull())
			{
				if((opStackGetTop()=='&'))
				{
					pseudo_tok.value.ch=opStackPop();
					if(genNFANodeToQueue(pseudo_tok)) return 1;
				}
				else break;
			}
			if(opStackPush('&'))
			{
				fprintf(stderr,"\tParser: Stack overflow.\n");
				return 1;
			}
			break;
		default:
			fprintf(stderr,"\tParser: Unknown operator.\n");
			return 1;
		}
		break;
	case T_Regex_Bra:
		if(opStackPush('('))
		{
			fprintf(stderr,"\tParser: Stack overflow.\n");
			return 1;
		}
		break;
	case T_Regex_Ket:
		{
			int matched=0;
			unsigned int op=0;
			while(!opStackIsNull())
			{
				if((op=opStackPop())=='(')
				{
					matched=1;
					break;
				}
				else
				{
					pseudo_tok.value.ch=op;
					if(genNFANodeToQueue(pseudo_tok)) return 1;
				}
			}
			if(!matched)
			{
				fprintf(stderr,"\tParser: Brackets don't match. Unmatched ')' found.\n");
				return 1;
			}
			break;
		}
		break;
	}
	return 0;
}
int prevType=-1;
static int regexPatternParser(struct _regexToken tok)
{
	struct _regexToken pseudo_tok;
	pseudo_tok.type=T_Regex_Bi_OP;
	pseudo_tok.value.ch='&';
	switch(tok.type)
	{
	case T_Regex_Ch:
	case T_Regex_Gen:
	case T_Regex_Range:
	case T_Regex_Bra:
		switch(prevType)
		{
		case T_Regex_Ch:
		case T_Regex_Gen:
		case T_Regex_Range:
		case T_Regex_Uni_OP:
		case T_Regex_Ket:
			if(doReversePolishConversion(pseudo_tok)) return 1;
			break;
		}
		break;
	}
	prevType=tok.type;
	return doReversePolishConversion(tok);
}
static void throwError(int count,unsigned int symbol)
{
	char symText[16];
	if(!symbol)
	{
		sprintf(symText,"EOL");
	}
	else if((symbol>=32)&&(symbol<=126))
	{
		sprintf(symText,"'%c'",symbol);
	}
	else
	{
		sprintf(symText,"'\\u%04x'",symbol);
	}
	fprintf(stderr,"Lexer: Error at char number %d: %s\n",count,symText);
	return;
}
int regexPatternLexer(const unsigned int *regexPattern)
{
	struct _regexToken tok;
	unsigned int uch=0;
	int count=0;
	int subCount=0;
	int state=0;
	tok.type=tok.value.ch=0;
	uch=regexPattern[count];
	while(uch)
	{
		int isSet=0;
		switch(state)
		{
		case 0:
			switch(uch)
			{
			case '\\':
				state=1;
				break;
			case '%':
				state=2;
				break;
			case '[':
				subCount=0;
				state=3;
				break;
			case '*':
				tok.type=T_Regex_Uni_OP;
				tok.value.ch='*';
				isSet=1;
				break;
			case '+':
				tok.type=T_Regex_Uni_OP;
				tok.value.ch='+';
				isSet=1;
				break;
			case '?':
				tok.type=T_Regex_Uni_OP;
				tok.value.ch='?';
				isSet=1;
				break;
			case '|':
				tok.type=T_Regex_Bi_OP;
				tok.value.ch='|';
				isSet=1;
				break;
			case '(':
				tok.type=T_Regex_Bra;
				tok.value.ch='(';
				isSet=1;
				break;
			case ')':
				tok.type=T_Regex_Ket;
				tok.value.ch=')';
				isSet=1;
				break;
			default:
				tok.type=T_Regex_Ch;
				tok.value.ch=uch;
				isSet=1;
				break;
			}
			break;
		case 1:
			tok.type=T_Regex_Ch;
			isSet=1;
			switch(uch)
			{
			case 'r':
				tok.value.ch='\r';
				break;
			case 'n':
				tok.value.ch='\n';
				break;
			case 't':
				tok.value.ch='\t';
				break;
			case 'f':
				tok.value.ch='\f';
				break;
			case 'a':
				tok.value.ch='\a';
				break;
			default:
				tok.value.ch=uch;
				break;
			}
			state=0;
			break;
		case 2:
			switch(uch)
			{
			case 'a':
				tok.type=T_Regex_Gen;
				tok.value.ch='a';
				isSet=1;
				break;
			case 'd':
				tok.type=T_Regex_Gen;
				tok.value.ch='d';
				isSet=1;
				break;
			case 's':
				tok.type=T_Regex_Gen;
				tok.value.ch='s';
				isSet=1;
				break;
			default:
				throwError(count,uch);
				return 1;
				break;
			}
			state=0;
			break;
		case 3:
			switch(uch)
			{
			case '\\':
				state=4;
				break;
			case '%':
				throwError(count,uch);
				return 1;
				break;
			case '[':
				throwError(count,uch);
				return 1;
				break;
			case ']':
				throwError(count,uch);
				return 1;
				break;
			case '*':
				throwError(count,uch);
				return 1;
				break;
			case '+':
				throwError(count,uch);
				return 1;
				break;
			case '?':
				throwError(count,uch);
				return 1;
				break;
			case '|':
				throwError(count,uch);
				return 1;
				break;
			case '(':
				throwError(count,uch);
				return 1;
				break;
			case ')':
				throwError(count,uch);
				return 1;
				break;
			case '-':
				throwError(count,uch);
				return 1;
				break;
			default:
				(tok.value.range.pairs)[subCount].start=uch;
				state=5;
				break;
			}
			break;
		case 4:
			switch(uch)
			{
			case 'r':
				(tok.value.range.pairs)[subCount].start='\r';
				break;
			case 'n':
				(tok.value.range.pairs)[subCount].start='\n';
				break;
			case 't':
				(tok.value.range.pairs)[subCount].start='\t';
				break;
			case 'f':
				(tok.value.range.pairs)[subCount].start='\f';
				break;
			case 'a':
				(tok.value.range.pairs)[subCount].start='\a';
				break;
			default:
				(tok.value.range.pairs)[subCount].start=uch;
				break;
			}
			state=5;
			break;
		case 5:
			switch(uch)
			{
			case '%':
				throwError(count,uch);
				return 1;
				break;
			case '[':
				throwError(count,uch);
				return 1;
				break;
			case '*':
				throwError(count,uch);
				return 1;
				break;
			case '+':
				throwError(count,uch);
				return 1;
				break;
			case '?':
				throwError(count,uch);
				return 1;
				break;
			case '|':
				throwError(count,uch);
				return 1;
				break;
			case '(':
				throwError(count,uch);
				return 1;
				break;
			case ')':
				throwError(count,uch);
				return 1;
				break;
			case '-':
				state=6;
				break;
			case ']':
				(tok.value.range.pairs)[subCount].end=(tok.value.range.pairs)[subCount].start;
				subCount++;
				tok.value.range.count=subCount;
				tok.type=T_Regex_Range;
				isSet=1;
				state=0;
				break;
			case '\\':
				(tok.value.range.pairs)[subCount].end=(tok.value.range.pairs)[subCount].start;
				subCount++;
				state=4;
				break;
			default:
				(tok.value.range.pairs)[subCount].end=(tok.value.range.pairs)[subCount].start;
				subCount++;
				(tok.value.range.pairs)[subCount].start=uch;
				break;
			}
			if(subCount>62)
			{
				throwError(count,uch);
				return 1;
			}
			break;
		case 6:
			switch(uch)
			{
			case '\\':
				state=7;
				break;
			case '%':
				throwError(count,uch);
				return 1;
				break;
			case '[':
				throwError(count,uch);
				return 1;
				break;
			case ']':
				throwError(count,uch);
				return 1;
				break;
			case '*':
				throwError(count,uch);
				return 1;
				break;
			case '+':
				throwError(count,uch);
				return 1;
				break;
			case '?':
				throwError(count,uch);
				return 1;
				break;
			case '|':
				throwError(count,uch);
				return 1;
				break;
			case '(':
				throwError(count,uch);
				return 1;
				break;
			case ')':
				throwError(count,uch);
				return 1;
				break;
			case '-':
				throwError(count,uch);
				return 1;
				break;
			default:
				(tok.value.range.pairs)[subCount].end=uch;
				state=8;
				break;
			}
			break;
		case 7:
			switch(uch)
			{
			case 'r':
				(tok.value.range.pairs)[subCount].end='\r';
				break;
			case 'n':
				(tok.value.range.pairs)[subCount].end='\n';
				break;
			case 't':
				(tok.value.range.pairs)[subCount].end='\t';
				break;
			case 'f':
				(tok.value.range.pairs)[subCount].end='\f';
				break;
			case 'a':
				(tok.value.range.pairs)[subCount].end='\a';
				break;
			default:
				(tok.value.range.pairs)[subCount].end=uch;
				break;
			}
			state=8;
			break;
		case 8:
			switch(uch)
			{
			case '\\':
				subCount++;
				state=4;
				break;
			case '%':
				throwError(count,uch);
				return 1;
				break;
			case '[':
				throwError(count,uch);
				return 1;
				break;
			case '*':
				throwError(count,uch);
				return 1;
				break;
			case '+':
				throwError(count,uch);
				return 1;
				break;
			case '?':
				throwError(count,uch);
				return 1;
				break;
			case '|':
				throwError(count,uch);
				return 1;
				break;
			case '(':
				throwError(count,uch);
				return 1;
				break;
			case ')':
				throwError(count,uch);
				return 1;
				break;
			case '-':
				throwError(count,uch);
				return 1;
				break;
			case ']':
				subCount++;
				tok.value.range.count=subCount;
				tok.type=T_Regex_Range;
				isSet=1;
				state=0;
				break;
			default:
				subCount++;
				(tok.value.range.pairs)[subCount].start=uch;
				state=5;
				break;
			}
			if(subCount>62)
			{
				throwError(count,uch);
				return 1;
			}
			break;
		}
		if(isSet)
		{
			if(regexPatternParser(tok))
			{
				throwError(count,uch);
				return 2;
			}
		}
		count++;
		uch=regexPattern[count];
	}
	tok.type=T_Regex_EOL;
	tok.value.ch=0;
	if(state||(regexPatternParser(tok)))
	{
		throwError(count,0);
		return 1+!state;
	}
	return 0;
}