aboutsummaryrefslogtreecommitdiffstats
path: root/token.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'token.cpp')
-rw-r--r--token.cpp315
1 files changed, 315 insertions, 0 deletions
diff --git a/token.cpp b/token.cpp
new file mode 100644
index 0000000..cfb64af
--- /dev/null
+++ b/token.cpp
@@ -0,0 +1,315 @@
+// token.cpp
+// Revision 25-jul-2004
+
+#include "token.h"
+#include <cctype>
+#include <sstream>
+
+using std::isalpha;
+using std::isdigit;
+using std::isxdigit;
+using std::isspace;
+
+Tokenizer::Tokenizer (const std::string & source) :
+ str (source), pos (0), limit (source.size () )
+{
+}
+
+unsigned char Tokenizer::peek ()
+{
+ if (pos < limit)
+ return str [pos];
+ else
+ return '\0';
+}
+
+unsigned char Tokenizer::peeknospace ()
+{
+ unsigned char c= '\0';
+ std::string::size_type p= pos;
+ while (p < limit && (c= str [p] ) == ' ')
+ ++p;
+ if (p < limit)
+ return c;
+ else
+ return '\0';
+}
+
+unsigned char Tokenizer::nextchar ()
+{
+ if (pos < limit)
+ return str [pos++];
+ else
+ return '\0';
+}
+
+unsigned char Tokenizer::nextcharnospace ()
+{
+ unsigned char c= '\0';
+ while (pos < limit && (c= str [pos] ) == ' ')
+ ++pos;
+ if (pos < limit)
+ {
+ ++pos;
+ return c;
+ }
+ else
+ return '\0';
+}
+
+void Tokenizer::ungetchar ()
+{
+ if (pos > 0)
+ --pos;
+}
+
+//namespace {
+
+inline bool isident (char c)
+{
+ return isalpha (c) || isdigit (c) || c == '_';
+}
+
+//}
+
+Tokenizer::Token Tokenizer::get ()
+{
+ unsigned char c= nextchar ();
+ if (c == '\0')
+ return Token (EndLine);
+
+ std::string str;
+ if (isspace (c) )
+ {
+ do {
+ str+= c;
+ } while (isspace (c= nextchar () ) );
+ if (c != '\0')
+ ungetchar ();
+ return Token (Blank, str);
+ }
+
+ switch (c)
+ {
+ case '"':
+ //str+= c;
+ while ( (c= nextchar () ) != '\0')
+ {
+ if (c == '"')
+ {
+ c= peek ();
+ if (c != '"')
+ break;
+ nextchar ();
+ }
+ str+= c;
+ }
+ return Token (Literal, str);
+ case '&':
+ // Hex, octal or binary number
+ str= '&';
+ c= nextchar ();
+ if (c == 'x' || c == 'X') // Binary
+ {
+ str+= c;
+ while ( (c= nextchar () ) == '0' || c == '1')
+ str+= c;
+ if (c != 0)
+ ungetchar ();
+ }
+ else if (c == 'o' || c == 'O') // Octal
+ {
+ str+= c;
+ while ( (c= nextchar () ) >= '0' && c <= '7')
+ str+= c;
+ if (c != 0)
+ ungetchar ();
+ }
+ else // Hexadecimal
+ {
+ if (c == 'h' || c == 'H')
+ {
+ str+= c;
+ c= nextchar ();
+ }
+ while (isxdigit (c) )
+ {
+ str+= c;
+ c= nextchar ();
+ }
+ if (c != 0)
+ ungetchar ();
+ }
+ return Token (Plain, str);
+ case '=':
+ {
+ str= c;
+ c= peeknospace ();
+ if (c == '>' || c == '<')
+ {
+ str+= c;
+ (void) nextcharnospace ();
+ }
+ }
+ return Token (Plain, str);
+ case '<':
+ str= c;
+ c= peeknospace ();
+ if (c == '=' || c == '>')
+ {
+ str+= c;
+ (void) nextcharnospace ();
+ }
+ return Token (Plain, str);
+ case '>':
+ str= c;
+ c= peeknospace ();
+ if (c == '=' || c == '<')
+ {
+ str+= c;
+ (void) nextcharnospace ();
+ }
+ return Token (Plain, str);
+ default:
+ ; // Later.
+ }
+
+ if (isalpha (c) )
+ {
+ do
+ {
+ str+= c;
+ } while ( isident (c= nextchar () ) );
+ if (c == '$')
+ str+= c;
+ else
+ if (c != '\0')
+ ungetchar ();
+ return Token (Plain, str);
+ }
+
+ if (isdigit (c) || c == '.')
+ {
+ bool nofloat= true;
+ while (isdigit (c) )
+ {
+ str+= c;
+ c= nextchar ();
+ }
+ if (c == '.')
+ {
+ str+= '.';
+ nofloat= false;
+ while ( isdigit (c= nextchar () ) )
+ str+= c;
+ }
+ if (c == 'e' || c == 'E')
+ {
+ str+= c;
+ nofloat= false;
+ c= nextchar ();
+ if (! isdigit (c) && c != '+' && c != '-')
+ {
+ // Data such as 1E
+ while (isident (c) )
+ {
+ str+= c;
+ c= nextchar ();
+ }
+ if (c != 0)
+ ungetchar ();
+ return Token (Plain, str);
+ }
+ if (c == '+' || c == '-')
+ {
+ str+= c;
+ c= nextchar ();
+ }
+ while (isdigit (c) )
+ {
+ str+= c;
+ c= nextchar ();
+ }
+ }
+ if (c != 0)
+ {
+ if (isident (c) )
+ {
+ do {
+ str+= c;
+ c= nextchar ();
+ } while (isident (c) );
+ if (c != 0)
+ ungetchar ();
+ return Token (Plain, str);
+ }
+ ungetchar ();
+ }
+ if (nofloat)
+ {
+ #if 0
+ std::istringstream iss (str);
+ BlInteger n;
+ iss >> n;
+ if (iss)
+ {
+ iss.get ();
+ //if (! iss)
+ if (iss.eof () )
+ return Token (n);
+ }
+ #else
+ BlInteger n= 0;
+ std::string::size_type i;
+ const std::string::size_type l= str.size ();
+ for (i= 0; i < l; ++i)
+ {
+ BlInteger digit= str [i] - '0';
+ if (n > (BlIntegerMax / 10) )
+ break;
+ n*= 10;
+ if (n > BlIntegerMax - digit)
+ break;
+ n+= digit;
+ }
+ if (i == l)
+ return Token (n);
+ #endif
+ }
+ return Token (Plain, str);
+ }
+
+ // If nothing else:
+ str= c;
+ return Token (Plain, str);
+}
+
+std::string Tokenizer::getrest ()
+{
+ std::string r;
+ char c;
+ while ( (c= nextchar () ) != 0)
+ {
+ if (c == '"')
+ {
+ r+= c;
+ while ( (c= nextchar () ) != '\0')
+ {
+ if (c == '"')
+ {
+ c= peek ();
+ if (c != '"')
+ break;
+ nextchar ();
+ }
+ r+= c;
+ }
+ r+= '\0';
+ }
+ else
+ r+= c;
+ }
+ return r;
+}
+
+// Fin de token.cpp
Un proyecto texto-plano.xyz