/*************************************************************** bwb_str.c String-Management Routines for Bywater BASIC Interpreter Copyright (c) 1993, Ted A. Campbell Bywater Software email: tcamp@delphi.com Copyright and Permissions Information: All U.S. and international rights are claimed by the author, Ted A. Campbell. This software is released under the terms of the GNU General Public License (GPL), which is distributed with this software in the file "COPYING". The GPL specifies the terms under which users may copy and use the software in this distribution. A separate license is available for commercial distribution, for information on which you should contact the author. ***************************************************************/ /*---------------------------------------------------------------*/ /* NOTE: Modifications marked "JBV" were made by Jon B. Volkoff, */ /* 11/1995 (eidetics@cerf.net). */ /* */ /* Those additionally marked with "DD" were at the suggestion of */ /* Dale DePriest (daled@cadence.com). */ /* */ /* Version 3.00 by Howard Wulf, AF5NE */ /* */ /* Version 3.10 by Howard Wulf, AF5NE */ /* */ /* Version 3.20 by Howard Wulf, AF5NE */ /* */ /*---------------------------------------------------------------*/ /*************************************************************** BASIC allows embedded NUL (0) characters. C str*() does not. ALL the StringType code should use mem*() and ->length. ALL the StringType code should prevent string overflow. ***************************************************************/ #include "bwbasic.h" static int CharListToSet (char *pattern, int start, int stop); static int IndexOf (char *buffer, char find, int start); /*************************************************************** FUNCTION: str_btob() DESCRIPTION: This C function assigns a bwBASIC string structure to another bwBASIC string structure. ***************************************************************/ int str_btob (StringType * d, StringType * s) { assert (d != NULL); assert (s != NULL); /* assert( s->length >= 0 ); */ assert( My != NULL ); if (s->length > MAXLEN) { WARN_STRING_TOO_LONG; /* str_btob */ s->length = MAXLEN; } /* get memory for new buffer */ if (d->sbuffer != NULL) { /* free the destination's buffer */ if (d->sbuffer == My->MaxLenBuffer) { /* ** this occurs when setting the return value of a multi-line string user function ** ** FUNCTION INKEY$ ** DIM A$ ** LINE INPUT A$ ** LET INKEY$ = LEFT$( A$, 1 ) ** END FUNCTION ** */ } else if (d->sbuffer == My->ConsoleOutput) { /* this should never occur, but let's make sure we don't crash if it does */ WARN_INTERNAL_ERROR; /* continue processing */ } else if (d->sbuffer == My->ConsoleInput) { /* this should never occur, but let's make sure we don't crash if it does */ WARN_INTERNAL_ERROR; /* continue processing */ } else { free (d->sbuffer); d->sbuffer = NULL; } d->sbuffer = NULL; d->length = 0; } if (d->sbuffer == NULL) { /* allocate the destination's buffer */ d->length = 0; if ((d->sbuffer = (char *) calloc (s->length + 1 /* NulChar */ , sizeof (char))) == NULL) { WARN_OUT_OF_MEMORY; return FALSE; } } /* write the b string to the b string */ assert( d->sbuffer != NULL ); if( s->length > 0 ) { assert( s->sbuffer != NULL ); bwb_memcpy (d->sbuffer, s->sbuffer, s->length); } d->length = s->length; d->sbuffer[d->length] = NulChar; return TRUE; } /*************************************************************** FUNCTION: str_cmp() DESCRIPTION: This C function performs the equivalent of the C strcmp() function, using BASIC strings. ***************************************************************/ int str_cmp (StringType * a, StringType * b) { assert (a != NULL); assert (b != NULL); assert( My != NULL ); assert( My->CurrentVersion != NULL ); if (a->length > MAXLEN) { WARN_STRING_TOO_LONG; /* str_cmp */ a->length = MAXLEN; } if (b->length > MAXLEN) { WARN_STRING_TOO_LONG; /* str_cmp */ b->length = MAXLEN; } if (a->sbuffer == NULL) { if (b->sbuffer == NULL) { return 0; } if (b->length == 0) { return 0; } return 1; } assert( a->sbuffer != NULL ); a->sbuffer[a->length] = NulChar; if (b->sbuffer == NULL) { if (a->sbuffer == NULL) { return 0; } if (a->length == 0) { return 0; } return -1; } assert( b->sbuffer != NULL ); b->sbuffer[b->length] = NulChar; if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT) { /* case insensitive */ return bwb_stricmp (a->sbuffer, b->sbuffer); /* NOTE: embedded NUL characters terminate comparison */ } else { /* case sensitive */ return bwb_strcmp (a->sbuffer, b->sbuffer); /* NOTE: embedded NUL characters terminate comparison */ } } /*************************************************************** MATCH ***************************************************************/ int str_match (char *A, int A_Length, char *B, int B_Length, int I_Start) { /* SYNTAX: j% = MATCH( a$, b$, i% ) MATCH returns the position of the first occurrence of a$ in b$ starting with the character position given by the third parameter. A zero is returned if no MATCH is found. The following pattern-matching features are available: # matches any digit (0-9). ! matches any upper-or lower-case letter. ? matches any character. \ serves as an escape character indicating the following character does not have special meaning. For example, a ? signifies any character is a MATCH unless preceded by a \. a$ and b$ must be strings. If either of these arguments are numeric, an error occurs. If i% is real, it is converted to an integer. If i% is a string, an error occurs. If i% is negative or zero, a run-time error occurs. When i% is greater than the length of b$, zero is returned. If b$ is a null string, a 0 is returned. If b$ is not null, but a$ is null, a 1 is returned. Examples: MATCH( "is", "Now is the", 1) returns 5 MATCH( "##", "October 8, 1876", 1) returns 12 MATCH( "a?", "character", 4 ) returns 5 MATCH( "\#", "123#45", 1) returns 4 MATCH( "ABCD", "ABC", 1 ) returns 0 MATCH( "\#1\\\?", "1#1\?2#", 1 ) returns 2 */ int a; /* current position in A$ */ int b; /* current position in B$ */ assert (A != NULL); assert (B != NULL); if (I_Start <= 0) { return 0; } if (I_Start > B_Length) { return 0; } if (B_Length <= 0) { return 0; } if (A_Length <= 0) { return 1; } I_Start--; /* BASIC to C */ for (b = I_Start; b < B_Length; b++) { int n; /* number of characters in A$ matched with B$ */ n = 0; for (a = 0; a < A_Length; a++) { int bn; bn = b + n; if (A[a] == '#' && bwb_isdigit (B[bn])) { n++; } else if (A[a] == '!' && bwb_isalpha (B[bn])) { n++; } else if (A[a] == '?') { n++; } else if (a < (A_Length - 1) && A[a] == '\\' && A[a + 1] == B[bn]) { n++; a++; } else if (A[a] == B[bn]) { n++; } else { break; } } if (a == A_Length) { b++; /* C to BASIC */ return b; } } return 0; } /*************************************************************** FUNCTION: str_like() DESCRIPTION: This C function performs the equivalent of the BASIC LIKE operator, using BASIC strings. ***************************************************************/ /* inspired by http://www.blackbeltcoder.com/Articles/net/implementing-vbs-like-operator-in-c */ /* KNOWN ISSUES: To match the character '[', use "[[]". To match the character '?', use "[?]". To match the character '*', use "[*]". Does not match "" with "[]" or "[!]". */ #define CHAR_SET '*' #define CHAR_CLR ' ' static char charList[256]; static int IndexOf (char *buffer, char find, int start) { int buffer_count; int buffer_length; assert (buffer != NULL); assert( My != NULL ); assert( My->CurrentVersion != NULL ); buffer_length = bwb_strlen (buffer); if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT) { find = bwb_toupper (find); } for (buffer_count = start; buffer_count < buffer_length; buffer_count++) { char theChar; theChar = buffer[buffer_count]; if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT) { theChar = bwb_toupper (theChar); } if (theChar == find) { /* FOUND */ return buffer_count; } } /* NOT FOUND */ return -1; } static int CharListToSet (char *pattern, int start, int stop) { /* Converts a string of characters to a HashSet of characters. If the string contains character ranges, such as A-Z, all characters in the range are also added to the returned set of characters. */ int pattern_Count; assert (pattern != NULL); bwb_memset (charList, CHAR_CLR, 256); if (start > stop) { /* ERROR */ return FALSE; } /* Leading '-' */ while (pattern[start] == '-') { /* Match character '-' */ charList[0x00FF & pattern[start]] = CHAR_SET; start++; if (start > stop) { /* DONE */ return TRUE; } } /* Trailing '-' */ while (pattern[stop] == '-') { /* Match character '-' */ charList[0x00FF & pattern[stop]] = CHAR_SET; stop--; if (start > stop) { /* DONE */ return TRUE; } } for (pattern_Count = start; pattern_Count <= stop; pattern_Count++) { if (pattern[pattern_Count] == '-') { /* Character range */ char startChar; char endChar; char theChar; if (pattern_Count > start) { startChar = pattern[pattern_Count - 1]; if (startChar == '-') { /* ERROR */ return FALSE; } if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT) { startChar = bwb_toupper (startChar); } } else { /* ERROR */ return FALSE; } if (pattern_Count < stop) { endChar = pattern[pattern_Count + 1]; if (endChar == '-') { /* ERROR */ return FALSE; } if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT) { endChar = bwb_toupper (endChar); } if (endChar < startChar) { /* ERROR */ return FALSE; } } else { /* ERROR */ return FALSE; } /* Although the startChar has already been set, and the endChar will be set on the next loop, we go ahead and set them here too. Not the most efficient, but easy to understand, and we do not have to do anything special for edge cases such as [A-A] and [A-B]. */ for (theChar = startChar; theChar <= endChar; theChar++) { charList[0x00FF & theChar] = CHAR_SET; } } else { /* Single character */ char theChar; theChar = pattern[pattern_Count]; if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT) { theChar = bwb_toupper (theChar); } charList[0x00FF & theChar] = CHAR_SET; } } return TRUE; } int IsLike (char *buffer, int *buffer_count, int buffer_Length, char *pattern, int *pattern_count, int pattern_Length) { /* Implement's VB's Like operator logic. */ /* if matched then buffer_count is updated pattern_count is updated returns TRUE else returns FALSE end if */ int bc; int pc; assert (buffer != NULL); assert (buffer_count != NULL); assert (pattern != NULL); assert (pattern_count != NULL); assert( My != NULL ); assert( My->CurrentVersion != NULL ); bc = *buffer_count; pc = *pattern_count; /* Loop through pattern string */ while (pc < pattern_Length) { /* Get next pattern character */ if (pattern[pc] == '[') { /* Character list */ /* [] and [!] are special */ char IsExclude; IsExclude = CHAR_CLR; pc++; /* pc is first character after '[' */ if (pattern[pc] == '!') { pc++; IsExclude = CHAR_SET; } /* pc is first character after '[' */ if (pattern[pc] == ']') { /* [] and [!] are special */ /* pc is first character after '[' and is a ']' */ pc++; if (IsExclude == CHAR_CLR) { /* [] */ /* matches "" */ } else { /* [!] */ /* same as '?' */ if (bc >= buffer_Length) { /* we have completed the buffer without completing the pattern */ return FALSE; } bc++; } } else { /* Build character list */ /* pc is first character after '[' and is not a ']' */ int stop_count; stop_count = IndexOf (pattern, ']', pc); /* stop_count is the character ']' */ if (stop_count < 0) { /* NOT FOUND */ return FALSE; } /* pc is first character after '[' */ /* stop_count is the character ']' */ CharListToSet (pattern, pc, stop_count - 1); pc = stop_count + 1; /* pc is first character after ']' */ if (bc >= buffer_Length) { /* we have completed the buffer without completing the pattern */ return FALSE; } if (charList[0x00FF & buffer[bc]] == IsExclude) { /* not matched */ return FALSE; } bc++; } } else if (pattern[pc] == '?' /* LIKE char */ ) { /* Matches a single character */ pc++; if (bc >= buffer_Length) { /* Check for end of string */ /* we have completed the buffer without completing the pattern */ return FALSE; } bc++; } else if (pattern[pc] == '#' /* LIKE digit */ ) { /* Matches a single digit */ pc++; if (bc >= buffer_Length) { /* Check for end of string */ /* we have completed the buffer without completing the pattern */ return FALSE; } if (bwb_isdigit (buffer[bc])) { bc++; } else { /* not matched */ return FALSE; } } else if (pattern[pc] == '*' /* LIKE chars */ ) { /* Zero or more characters */ while (pattern[pc] == '*' /* LIKE chars */ ) { pc++; } if (pc == pattern_Length) { /* Matches all remaining characters */ bc = buffer_Length; pc = pattern_Length; break; } else { int p; int b; int IsMatched; p = pc; b = bc; IsMatched = FALSE; while (b <= buffer_Length && IsMatched == FALSE) { int last_b; last_b = b; IsMatched = IsLike (buffer, &b, buffer_Length, pattern, &p, pattern_Length); if (IsMatched == FALSE) { /* not matched, try again begining at next buffer position */ p = pc; b = last_b + 1; } } if (IsMatched == FALSE) { /* not matched */ return FALSE; } pc = p; bc = b; } } else { char pattChar; char buffChar; pattChar = pattern[pc]; if (bc >= buffer_Length) { /* Check for end of string */ /* we have completed the buffer without completing the pattern */ return FALSE; } buffChar = buffer[bc]; if (My->CurrentVersion->OptionFlags & OPTION_COMPARE_TEXT) { pattChar = bwb_toupper (pattChar); buffChar = bwb_toupper (buffChar); } if (pattChar == buffChar) { /* matched specified character */ pc++; bc++; } else { /* not matched */ return FALSE; } } } /* Return true if all characters matched */ if (pc < pattern_Length) { /* not matched */ return FALSE; } if (bc < buffer_Length) { /* not matched */ return FALSE; } /* all characters matched */ *buffer_count = bc; *pattern_count = pc; return TRUE; } /* EOF */