aboutsummaryrefslogblamecommitdiffstats
path: root/token.cpp
blob: cfb64af58187e1772d6a6a584a816150abcbcb90 (plain) (tree)


























































































































































































































































































































                                                                     
// token.cpp
// Revision 25-jul-2004

#include "token.h"
#include <cctype>
#include <sstream>

using std::isalpha;
using std::isdigit;
using std::isxdigit;
using std::isspace;

Tokenizer::Tokenizer (const std::string & source) :
	str (source), pos (0), limit (source.size () )
{
}

unsigned char Tokenizer::peek ()
{
	if (pos < limit)
		return str [pos];
	else
		return '\0';
}

unsigned char Tokenizer::peeknospace ()
{
	unsigned char c= '\0';
	std::string::size_type p= pos;
	while (p < limit && (c= str [p] ) == ' ')
		++p;
	if (p < limit)
		return c;
	else
		return '\0';
}

unsigned char Tokenizer::nextchar ()
{
	if (pos < limit)
		return str [pos++];
	else
		return '\0';
}

unsigned char Tokenizer::nextcharnospace ()
{
	unsigned char c= '\0';
	while (pos < limit && (c= str [pos] ) == ' ')
		++pos;
	if (pos < limit)
	{
		++pos;
		return c;
	}
	else
		return '\0';
}

void Tokenizer::ungetchar ()
{
	if (pos > 0)
		--pos;
}

//namespace {

inline bool isident (char c)
{
	return isalpha (c) || isdigit (c) || c == '_';
}

//}

Tokenizer::Token Tokenizer::get ()
{
	unsigned char c= nextchar ();
	if (c == '\0')
		return Token (EndLine);

	std::string str;
	if (isspace (c) )
	{
		do {
			str+= c;
		} while (isspace (c= nextchar () ) );
		if (c != '\0')
			ungetchar ();
		return Token (Blank, str);
	}

	switch (c)
	{
	case '"':
		//str+= c;
		while ( (c= nextchar () ) != '\0')
		{
			if (c == '"')
			{
				c= peek ();
				if (c != '"')
					break;
				nextchar ();
			}
			str+= c;
		}
		return Token (Literal, str);
	case '&':
		// Hex, octal or binary number
		str= '&';
		c= nextchar ();
		if (c == 'x' || c == 'X') // Binary
		{
			str+= c;
			while ( (c= nextchar () ) == '0' || c == '1')
				str+= c;
			if (c != 0)
				ungetchar ();
		}
		else if (c == 'o' || c == 'O') // Octal
		{
			str+= c;
			while ( (c= nextchar () ) >= '0' && c <= '7')
				str+= c;
			if (c != 0)
				ungetchar ();
		}
		else // Hexadecimal
		{
			if (c == 'h' || c == 'H')
			{
				str+= c;
				c= nextchar ();
			}
			while (isxdigit (c) )
			{
				str+= c;
				c= nextchar ();
			}
			if (c != 0)
				ungetchar ();
		}
		return Token (Plain, str);
	case '=':
		{
			str= c;
			c= peeknospace ();
			if (c == '>' || c == '<')
			{
				str+= c;
				(void) nextcharnospace ();
			}
		}
		return Token (Plain, str);
	case '<':
		str= c;
		c= peeknospace ();
		if (c == '=' || c == '>')
		{
			str+= c;
			(void) nextcharnospace ();
		}
		return Token (Plain, str);
	case '>':
		str= c;
		c= peeknospace ();
		if (c == '=' || c == '<')
		{
			str+= c;
			(void) nextcharnospace ();
		}
		return Token (Plain, str);
	default:
		; // Later.
	}

	if (isalpha (c) )
	{
		do
		{
			str+= c;
		} while ( isident (c= nextchar () ) );
		if (c == '$')
			str+= c;
		else
			if (c != '\0')
				ungetchar ();
		return Token (Plain, str);
	}

	if (isdigit (c) || c == '.')
	{
		bool nofloat= true;
		while (isdigit (c) )
		{
			str+= c;
			c= nextchar ();
		}
		if (c == '.')
		{
			str+= '.';
			nofloat= false;
			while ( isdigit (c= nextchar () ) )
				str+= c;
		}
		if (c == 'e' || c == 'E')
		{
			str+= c;
			nofloat= false;
			c= nextchar ();
			if (! isdigit (c) && c != '+' && c != '-')
			{
				// Data such as 1E
				while (isident (c) )
				{
					str+= c;
					c= nextchar ();
				}
				if (c != 0)
					ungetchar ();
				return Token (Plain, str);
			}
			if (c == '+' || c == '-')
			{
				str+= c;
				c= nextchar ();
			}
			while (isdigit (c) )
			{
				str+= c;
				c= nextchar ();
			}
		}
		if (c != 0)
		{
			if (isident (c) )
			{
				do {
					str+= c;
					c= nextchar ();
				} while (isident (c) );
				if (c != 0)
					ungetchar ();
				return Token (Plain, str);
			}
			ungetchar ();
		}
		if (nofloat)
		{
			#if 0
			std::istringstream iss (str);
			BlInteger n;
			iss >> n;
			if (iss)
			{
				iss.get ();
				//if (! iss)
				if (iss.eof () )
					return Token (n);
			}
			#else
			BlInteger n= 0;
			std::string::size_type i;
			const std::string::size_type l= str.size ();
			for (i= 0; i < l; ++i)
			{
				BlInteger digit= str [i] - '0';
				if (n > (BlIntegerMax / 10) )
					break;
				n*= 10;
				if (n > BlIntegerMax - digit)
					break;
				n+= digit;
			}
			if (i == l)
				return Token (n);
			#endif
		}
		return Token (Plain, str);
	}

	// If nothing else:
	str= c;
	return Token (Plain, str);
}

std::string Tokenizer::getrest ()
{
	std::string r;
	char c;
	while ( (c= nextchar () ) != 0)
	{
		if (c == '"')
		{
			r+= c;
			while ( (c= nextchar () ) != '\0')
			{
				if (c == '"')
				{
					c= peek ();
					if (c != '"')
						break;
					nextchar ();
				}
				r+= c;
			}
			r+= '\0';
		}
		else
			r+= c;
	}
	return r;
}

// Fin de token.cpp
Un proyecto texto-plano.xyz