summaryrefslogtreecommitdiff
path: root/src/core/scanner.c
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2008-05-15 23:28:07 +0000
committerPaul Buetow <paul@buetow.org>2008-05-15 23:28:07 +0000
commitbe839900419c7a74c4a46efd279d0ca16b35dc1f (patch)
tree1355c8f238d1c58ffd5cb8803bcc2adf987e79aa /src/core/scanner.c
parent33c945e58f86267b0d3bdca4c3421155e11eb0d9 (diff)
Moved stuff into trunk.
Diffstat (limited to 'src/core/scanner.c')
-rw-r--r--src/core/scanner.c391
1 files changed, 391 insertions, 0 deletions
diff --git a/src/core/scanner.c b/src/core/scanner.c
new file mode 100644
index 0000000..85f0780
--- /dev/null
+++ b/src/core/scanner.c
@@ -0,0 +1,391 @@
+/*:*
+ *: File: ./src/core/scanner.c
+ *: A simple interpreter
+ *:
+ *: WWW : http://fype.buetow.org
+ *: E-Mail : fype@dev.buetow.org
+ *:
+ *: Copyright (c) 2005 2006 2007 2008, Paul Buetow (http://www.pblabs.net)
+ *: All rights reserved.
+ *:
+ *: Redistribution and use in source and binary forms, with or without modi-
+ *: fication, are permitted provided that the following conditions are met:
+ *: * Redistributions of source code must retain the above copyright
+ *: notice, this list of conditions and the following disclaimer.
+ *: * Redistributions in binary form must reproduce the above copyright
+ *: notice, this list of conditions and the following disclaimer in the
+ *: documentation and/or other materials provided with the distribution.
+ *: * Neither the name of P. B. Labs nor the names of its contributors may
+ *: be used to endorse or promote products derived from this software
+ *: without specific prior written permission.
+ *:
+ *: THIS SOFTWARE IS PROVIDED BY Paul Buetow AS IS'' AND ANY EXPRESS OR
+ *: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *: WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *: DISCLAIMED. IN NO EVENT SHALL Paul Buetow BE LIABLE FOR ANY DIRECT,
+ *: INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *: (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ *: SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *: HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ *: STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ *: IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *: POSSIBILITY OF SUCH DAMAGE.
+ *:*/
+
+#include "scanner.h"
+
+#include <ctype.h>
+#include <string.h>
+
+const char const *KEYWORDS[] = {
+ "if",
+ "else",
+ "elsif",
+ "while",
+ "ret",
+ "const",
+};
+
+const char const *OPERATORS[] = {
+ "!",
+ "!=",
+ "(",
+ ")",
+ "*",
+ "+",
+ "++",
+ ",",
+ "-",
+ "--",
+ ".",
+ "/",
+ "\\",
+ ":",
+ "==",
+ ";",
+ "<<",
+ "<",
+ "<=",
+ "=",
+ ">=",
+ ">>",
+ ">",
+ "{",
+ "}",
+};
+
+const char TOKENENDS[] = "{}();:,.";
+int CODESTRING_INDEX = 0;
+
+Scanner*
+scanner_new(List *p_list_token, Tupel *p_tupel_argv) {
+ Scanner *p_scanner = malloc(sizeof(Scanner));
+
+ Dat *p_dat_string = p_tupel_argv->a;
+
+ if (dat_empty(p_dat_string))
+ ERROR("No source given");
+
+ if (argv_checkopts("e", p_tupel_argv)) {
+ p_scanner->c_codestring = dat_pop(p_dat_string);
+ p_scanner->c_filename = NULL;
+ p_scanner->fp = NULL;
+
+ } else {
+ p_scanner->c_codestring = NULL;
+ p_scanner->c_filename = dat_pop(p_dat_string);
+ p_scanner->fp = fopen(p_scanner->c_filename, "r");
+
+ if (!p_scanner->fp)
+ ERROR("Could not open '%s' for reading!", p_scanner->c_filename);
+ }
+
+ p_scanner->p_list_token = p_list_token;
+
+ p_scanner->i_current_line_nr = 1;
+ p_scanner->i_current_pos_nr = 0;
+
+ p_scanner->i_num_keywords = sizeof(KEYWORDS) / sizeof(char const *);
+ p_scanner->i_num_operators = sizeof(OPERATORS) / sizeof(char const *);
+ p_scanner->i_num_tokenends = strlen(TOKENENDS);
+
+ return p_scanner;
+}
+
+void
+scanner_delete(Scanner *p_scanner) {
+ if (p_scanner->fp)
+ fclose(p_scanner->fp);
+ free(p_scanner);
+}
+
+void
+scanner_post_task(Scanner *p_scanner) {
+ List *p_list_token = scanner_get_list_token(p_scanner);
+ ListIterator *p_iter = listiterator_new(p_list_token);
+
+ Token *pt_last[] = { NULL, NULL };
+ TokenType tt_last[] = { TT_NONE, TT_NONE };
+
+ while (listiterator_has_next(p_iter)) {
+ ListElem *p_le = listiterator_next_elem(p_iter);
+ Token *p_token = p_le->p_val;
+ TokenType tt_cur = token_get_tt(p_token);
+
+ if (pt_last[0]) {
+ if (tt_cur == TT_INTEGER && tt_last[1] == TT_DOT
+ && tt_last[0] == TT_INTEGER) {
+
+ token_ref_down(pt_last[0]);
+ token_ref_down(pt_last[1]);
+
+ char *c_2 = token_get_val(p_token);
+ char *c_0 = token_get_val(pt_last[0]);
+ int i_len = strlen(c_2) + strlen(c_0) + 1;
+ char *c_new = calloc(i_len+1, sizeof(char));
+
+ sprintf(c_new, "%s.%s", c_0, c_2);
+ free(c_2);
+ c_new[i_len] = 0;
+
+ token_set_val(p_token, c_new);
+ token_set_tt(p_token, TT_DOUBLE);
+ token_set_dval(p_token, atof(c_new));
+
+ list_remove_elem(p_list_token, p_le->p_prev);
+ list_remove_elem(p_list_token, p_le->p_prev);
+
+ pt_last[0] = pt_last[1] = NULL;
+ tt_last[0] = tt_last[1] = TT_NONE;
+ }
+ }
+
+ tt_last[0] = tt_last[1];
+ tt_last[1] = tt_cur;
+
+ pt_last[0] = pt_last[1];
+ pt_last[1] = p_token;
+ }
+
+ listiterator_delete(p_iter);
+}
+
+_Bool
+_scanner_has_next_char(Scanner *p_scanner) {
+ if (p_scanner->fp)
+ return !feof(p_scanner->fp);
+
+ return p_scanner->c_codestring[CODESTRING_INDEX] != 0;
+}
+
+char
+_scanner_get_next_char(Scanner *p_scanner) {
+ if (p_scanner->fp)
+ return fgetc(p_scanner->fp);
+
+ return (p_scanner->c_codestring[CODESTRING_INDEX++]);
+}
+
+void
+scanner_run(Fype *p_fype) {
+ Scanner *p_scanner = scanner_new(p_fype->p_list_token, p_fype->p_tupel_argv);
+
+ int i_token_len = 0;
+ char *c_token = malloc(sizeof(char));
+
+ c_token[0] = 0;
+
+ while ( _scanner_has_next_char(p_scanner) ) {
+ //char c = fgetc(fp);
+ char c = _scanner_get_next_char(p_scanner);
+ ++p_scanner->i_current_pos_nr;
+
+ switch (c) {
+ case '#':
+ {
+ c = _scanner_get_next_char(p_scanner);
+ ++p_scanner->i_current_pos_nr;
+ _Bool b_multi_comment = c == '*';
+
+ do {
+ c = _scanner_get_next_char(p_scanner);
+ ++p_scanner->i_current_pos_nr;
+ if (c == '\n') {
+ ++p_scanner->i_current_line_nr;
+ p_scanner->i_current_pos_nr = 0;
+
+ if (!b_multi_comment)
+ break;
+
+ } else if (b_multi_comment && c == '*' &&
+ _scanner_has_next_char(p_scanner)) {
+ if ( (c = _scanner_get_next_char(p_scanner)) == '#')
+ break;
+
+ else if (c == '\n')
+ ++p_scanner->i_current_line_nr;
+ }
+
+ } while ( _scanner_has_next_char(p_scanner) );
+ }
+
+ break;
+
+ case '"':
+ if (i_token_len) {
+ TokenType tt_cur = scanner_get_tt_cur(c_token);
+ scanner_add_token(p_scanner, &c_token, &i_token_len, tt_cur);
+ }
+ {
+ int i_num_nl = 0;
+ do {
+ // c = fgetc(fp);
+ c = _scanner_get_next_char(p_scanner);
+ if ( c == '\n' ) {
+ ++i_num_nl;
+ p_scanner->i_current_pos_nr = 0;
+
+ i_token_len += 2;
+ c_token = realloc(c_token, sizeof(char) * (i_token_len + 1));
+ c_token[i_token_len-2] = '\\';
+ c_token[i_token_len-1] = 'n';
+ c_token[i_token_len] = 0;
+
+ } else if (c == '"') {
+ if (i_token_len && c_token[i_token_len-1] == '\\')
+ c_token[i_token_len-1] = '"';
+
+ else
+ break;
+
+ } else {
+ ++i_token_len;
+ c_token = realloc(c_token, sizeof(char) * (i_token_len + 1));
+ c_token[i_token_len-1] = c;
+ c_token[i_token_len] = 0;
+ }
+
+ //} while ( !feof(fp) );
+ } while ( _scanner_has_next_char(p_scanner) );
+
+ scanner_add_token(p_scanner, &c_token, &i_token_len, TT_STRING);
+
+ if (i_num_nl)
+ p_scanner->i_current_line_nr += i_num_nl;
+ }
+
+ break;
+
+ case '\n':
+ case '\t':
+ case ' ':
+ if (i_token_len) {
+ TokenType tt_cur = scanner_get_tt_cur(c_token);
+ scanner_add_token(p_scanner, &c_token, &i_token_len, tt_cur);
+ }
+
+ if (c == '\n') {
+ ++p_scanner->i_current_line_nr;
+ p_scanner->i_current_pos_nr = 0;
+ }
+
+ break;
+
+ default:
+ if (i_token_len) {
+ char d = c_token[i_token_len-1];
+ if ((!isalpha(d) && !isdigit(d) /*&& d != '-'*/) &&
+ (isalpha(c) || isdigit(c))) {
+
+ TokenType tt_cur = scanner_get_tt_cur(c_token);
+ scanner_add_token(p_scanner, &c_token, &i_token_len, tt_cur);
+
+ } else {
+ for (int i = 0; i < p_scanner->i_num_tokenends; ++i) {
+ if (TOKENENDS[i] == c) {
+ TokenType tt_cur = scanner_get_tt_cur(c_token);
+ scanner_add_token(p_scanner, &c_token, &i_token_len, tt_cur);
+ break;
+ }
+ }
+ }
+ }
+
+ ++i_token_len;
+ c_token = realloc(c_token, sizeof(char) * i_token_len + 1);
+ c_token[i_token_len-1] = c;
+ c_token[i_token_len] = 0;
+ }
+ }
+
+ if (argv_checkopts("e", p_fype->p_tupel_argv) && i_token_len) {
+ TokenType tt_cur = scanner_get_tt_cur(c_token);
+ scanner_add_token(p_scanner, &c_token, &i_token_len, tt_cur);
+ }
+
+ scanner_post_task(p_scanner);
+
+ char *c_filename = scanner_get_filename(p_scanner);
+ scanner_delete(p_scanner);
+
+ if (argv_checkopts("TV", p_fype->p_tupel_argv))
+ list_iterate(p_fype->p_list_token, token_print_cb);
+
+ char *c_basename = NULL;
+ if (c_filename) {
+ int i_len = strlen(c_filename) - 3;
+ c_basename = calloc(i_len+1, sizeof(char));
+ strncpy(c_basename, c_filename, i_len);
+ c_basename[i_len] = 0;
+
+ } else {
+ char *c_basename = calloc(1, sizeof(char));
+ c_basename[0] = 0;
+ }
+
+ p_fype->c_basename = c_basename;
+
+ /*
+ c_token = calloc(2, sizeof(char*));
+ c_token[0] = ';';
+ c_token[1] = '\0';
+ i_token_len = 1;
+
+ scanner_add_token(p_scanner, &c_token, &i_token_len, TT_STRING);
+ */
+}
+
+void
+scanner_add_token(Scanner *p_scanner, char **cc_token, int *p_token_len,
+ TokenType tt_cur) {
+
+ List *p_list_token = scanner_get_list_token(p_scanner);
+ Token *p_token = token_new(*cc_token, tt_cur, p_scanner->i_current_line_nr,
+ p_scanner->i_current_pos_nr, p_scanner->c_filename);
+
+ list_add_back(p_list_token, p_token);
+ token_ref_up(p_token);
+
+ *cc_token = malloc(sizeof(char));
+ (*cc_token)[0] = 0;
+ *p_token_len = 0;
+}
+
+TokenType
+scanner_get_tt_cur(char *c_token) {
+ if (isdigit(c_token[0]))
+ return TT_INTEGER;
+
+ if (c_token[0] == '-' && 1 < strlen(c_token) && isdigit(c_token[1]))
+ return TT_INTEGER;
+
+ TokenType tt_cur = get_tt(c_token);
+
+ return tt_cur == TT_NONE ? TT_IDENT : tt_cur;
+}
+
+void
+scanner_cleanup_list_token_cb(void *p_void) {
+ Token *p_token = p_void;
+ token_delete(p_token);
+}