blob: cfb64af58187e1772d6a6a584a816150abcbcb90 (
plain) (
tree)
|
|
// token.cpp
// Revision 25-jul-2004
#include "token.h"
#include <cctype>
#include <sstream>
using std::isalpha;
using std::isdigit;
using std::isxdigit;
using std::isspace;
Tokenizer::Tokenizer (const std::string & source) :
str (source), pos (0), limit (source.size () )
{
}
unsigned char Tokenizer::peek ()
{
if (pos < limit)
return str [pos];
else
return '\0';
}
unsigned char Tokenizer::peeknospace ()
{
unsigned char c= '\0';
std::string::size_type p= pos;
while (p < limit && (c= str [p] ) == ' ')
++p;
if (p < limit)
return c;
else
return '\0';
}
unsigned char Tokenizer::nextchar ()
{
if (pos < limit)
return str [pos++];
else
return '\0';
}
unsigned char Tokenizer::nextcharnospace ()
{
unsigned char c= '\0';
while (pos < limit && (c= str [pos] ) == ' ')
++pos;
if (pos < limit)
{
++pos;
return c;
}
else
return '\0';
}
void Tokenizer::ungetchar ()
{
if (pos > 0)
--pos;
}
//namespace {
inline bool isident (char c)
{
return isalpha (c) || isdigit (c) || c == '_';
}
//}
Tokenizer::Token Tokenizer::get ()
{
unsigned char c= nextchar ();
if (c == '\0')
return Token (EndLine);
std::string str;
if (isspace (c) )
{
do {
str+= c;
} while (isspace (c= nextchar () ) );
if (c != '\0')
ungetchar ();
return Token (Blank, str);
}
switch (c)
{
case '"':
//str+= c;
while ( (c= nextchar () ) != '\0')
{
if (c == '"')
{
c= peek ();
if (c != '"')
break;
nextchar ();
}
str+= c;
}
return Token (Literal, str);
case '&':
// Hex, octal or binary number
str= '&';
c= nextchar ();
if (c == 'x' || c == 'X') // Binary
{
str+= c;
while ( (c= nextchar () ) == '0' || c == '1')
str+= c;
if (c != 0)
ungetchar ();
}
else if (c == 'o' || c == 'O') // Octal
{
str+= c;
while ( (c= nextchar () ) >= '0' && c <= '7')
str+= c;
if (c != 0)
ungetchar ();
}
else // Hexadecimal
{
if (c == 'h' || c == 'H')
{
str+= c;
c= nextchar ();
}
while (isxdigit (c) )
{
str+= c;
c= nextchar ();
}
if (c != 0)
ungetchar ();
}
return Token (Plain, str);
case '=':
{
str= c;
c= peeknospace ();
if (c == '>' || c == '<')
{
str+= c;
(void) nextcharnospace ();
}
}
return Token (Plain, str);
case '<':
str= c;
c= peeknospace ();
if (c == '=' || c == '>')
{
str+= c;
(void) nextcharnospace ();
}
return Token (Plain, str);
case '>':
str= c;
c= peeknospace ();
if (c == '=' || c == '<')
{
str+= c;
(void) nextcharnospace ();
}
return Token (Plain, str);
default:
; // Later.
}
if (isalpha (c) )
{
do
{
str+= c;
} while ( isident (c= nextchar () ) );
if (c == '$')
str+= c;
else
if (c != '\0')
ungetchar ();
return Token (Plain, str);
}
if (isdigit (c) || c == '.')
{
bool nofloat= true;
while (isdigit (c) )
{
str+= c;
c= nextchar ();
}
if (c == '.')
{
str+= '.';
nofloat= false;
while ( isdigit (c= nextchar () ) )
str+= c;
}
if (c == 'e' || c == 'E')
{
str+= c;
nofloat= false;
c= nextchar ();
if (! isdigit (c) && c != '+' && c != '-')
{
// Data such as 1E
while (isident (c) )
{
str+= c;
c= nextchar ();
}
if (c != 0)
ungetchar ();
return Token (Plain, str);
}
if (c == '+' || c == '-')
{
str+= c;
c= nextchar ();
}
while (isdigit (c) )
{
str+= c;
c= nextchar ();
}
}
if (c != 0)
{
if (isident (c) )
{
do {
str+= c;
c= nextchar ();
} while (isident (c) );
if (c != 0)
ungetchar ();
return Token (Plain, str);
}
ungetchar ();
}
if (nofloat)
{
#if 0
std::istringstream iss (str);
BlInteger n;
iss >> n;
if (iss)
{
iss.get ();
//if (! iss)
if (iss.eof () )
return Token (n);
}
#else
BlInteger n= 0;
std::string::size_type i;
const std::string::size_type l= str.size ();
for (i= 0; i < l; ++i)
{
BlInteger digit= str [i] - '0';
if (n > (BlIntegerMax / 10) )
break;
n*= 10;
if (n > BlIntegerMax - digit)
break;
n+= digit;
}
if (i == l)
return Token (n);
#endif
}
return Token (Plain, str);
}
// If nothing else:
str= c;
return Token (Plain, str);
}
std::string Tokenizer::getrest ()
{
std::string r;
char c;
while ( (c= nextchar () ) != 0)
{
if (c == '"')
{
r+= c;
while ( (c= nextchar () ) != '\0')
{
if (c == '"')
{
c= peek ();
if (c != '"')
break;
nextchar ();
}
r+= c;
}
r+= '\0';
}
else
r+= c;
}
return r;
}
// Fin de token.cpp
|