Your case is very simple. You can have three types of tokens in your data stream:
- digits
- a separator
- an "error" token
Everything else determines a parse error (you can even forget the last token type and treat the error character as a parse error as well.
First you'll need a lexer function that reads the data stream and returns tokens. Then you have to have a parsing function that fetches tokens using the lexer function and interprets them according to a grammar of your choice. The parsing function knows which symbols to expect and can interprete them or generate a parse error.
Here is a primitive lexer:
enum Token { EndOfStream=-1, EndOfLine=-2, Separator=-3, Error=-4 };
int lexer(istream &stream){
int value = 0;
bool hasValue = false;
while(1){
if(stream.eof()) return EndOfStream;
int ch = stream.peek(); // peek a char
if(ch>='0' && ch<='9'){
// a digit, calculate value
value = value*10 + (ch-'0');
hasValue = true;
stream.get(); // discard the character
continue;
} else if(hasValue) return value; // if not a digit and already read a digit, return the value
stream.get(); // discard the upcoming character which was already peeked
switch(ch){
case ' ': case '\t': break; // white space - ignore
case '\n': return EndOfLine;
case ',': return Separator;
default: return Error;
}
}
return Error;
}
enum Token { EndOfStream=-1, EndOfLine=-2, Separator=-3, Error=-4 };
int lexer(istream &stream){
int value = 0;
bool hasValue = false;
while(1){
if(stream.eof()) return EndOfStream;
int ch = stream.peek(); // peek a char
if(ch>='0' && ch<='9'){
// a digit, calculate value
value = value*10 + (ch-'0');
hasValue = true;
stream.get(); // discard the character
continue;
} else if(hasValue) return value; // if not a digit and already read a digit, return the value
stream.get(); // discard the upcoming character which was already peeked
switch(ch){
case ' ': case '\t': break; // white space - ignore
case '\n': return EndOfLine;
case ',': return Separator;
default: return Error;
}
}
return Error;
}
To copy to clipboard, switch view to plain text mode
The parser is really a finite state machine:
enum State { NumberOrSeparatorOrEnd, Number, SeparatorOrEnd, NumberOrEnd };
void parser(){
//...
int token;
State state = NumberOrEnd;
do {
token = lexer(stream);
switch(state){
case NumberOrEnd:
if(token==EndOfLine || token==EndOfStream) return;
else if(token>=0){
processNumber(token);
state = SeparatorOrEnd; // expect a separator or end
} else {
error = true;
return; // parse error
}
break;
case SeparatorOrEnd:
if(token==Separator){
state = Number;
} else if(token==EndOfLine || token==EndOfStream){
return;
} else {
error = true; return; // parse error
}
break;
//...
}
}
//...
}
enum State { NumberOrSeparatorOrEnd, Number, SeparatorOrEnd, NumberOrEnd };
void parser(){
//...
int token;
State state = NumberOrEnd;
do {
token = lexer(stream);
switch(state){
case NumberOrEnd:
if(token==EndOfLine || token==EndOfStream) return;
else if(token>=0){
processNumber(token);
state = SeparatorOrEnd; // expect a separator or end
} else {
error = true;
return; // parse error
}
break;
case SeparatorOrEnd:
if(token==Separator){
state = Number;
} else if(token==EndOfLine || token==EndOfStream){
return;
} else {
error = true; return; // parse error
}
break;
//...
}
}
//...
}
To copy to clipboard, switch view to plain text mode
And there you have a complete extendible parser
Bookmarks