amath/app/main/lexer.cpp

256 lines
5.7 KiB
C++

/*
* Copyright (c) 2015-2016 Carsten Sonne Larsen
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include "clib.h"
#include "main/lexer.h"
#include "main/token.h"
#include "localize/lex.h"
#include "localize/kword.h"
#include "system/program.h"
Lexer::Lexer(const char *input)
{
AllocAndCopy(&this->input, input);
pos = 0;
str = this->input;
first = NOMEM;
current = NOMEM;
}
Lexer::~Lexer()
{
delete [] input;
if (first != NOMEM) {
delete first;
}
}
char* Lexer::GetInput()
{
return input;
}
Token* Lexer::GetFirstToken()
{
return first;
}
void Lexer::Tokenize()
{
pos = 0;
str = input;
first = NOMEM;
current = NOMEM;
GetNextToken();
first = current;
do {
Token* last = current;
GetNextToken();
last->next = current;
} while (current->symbol != symend);
}
void Lexer::GetNextToken()
{
// Skip spaces and non visible characters
while (*str != 0 && ShouldSkip(*str)) {
str++;
if (Program->Language->CharIsSpace(*str)) {
pos++;
}
}
if (*str == 0)
{
current = new Token(current, symend, pos);
} else if (GetOperator() || GetQuotedIdent() || GetDigitValue() || GetLitteral()) {
return;
} else {
str++;
pos++;
current = new Token(current, symunknown, pos - 1);
}
}
bool Lexer::GetOperator()
{
static const unsigned int count = sizeof(operators) / sizeof(operatordef);
for (unsigned int i = 0; i < count; i++) {
if (operators[i].chr == *str) {
current = new Token(current, operators[i].symbol, pos);
str++;
pos++;
return true;
}
}
return false;
}
bool Lexer::GetQuotedIdent()
{
if (*str != '"') {
return false;
}
char *start = str;
const unsigned int startPos = pos;
int unsigned len = 0;
str++;
while (*str != 0 && *str != '"' && !Program->Language->CharIsCntrl(*str)) {
str++;
len++;
}
if (len == 0 || *str != '"') {
str = start;
return false;
}
char *ident = new char[len + 1];
MemCopy(ident, start + 1, len);
ident[len] = 0;
current = new Token(current, symqident, ident, startPos);
str++;
pos += len + 1;
delete [] ident;
return true;
}
bool Lexer::GetLitteral()
{
const char *start = str;
const unsigned int startPos = pos;
int unsigned len = 0;
bool found = true;
Symbol ksymbol;
while (found) {
if (Program->Language->CharIsAlpha(*str)) {
str++;
len++;
} else if (len != 0 && Program->Language->CharIsDigit(*str)) {
str++;
len++;
} else {
found = false;
}
}
if (len == 0) {
return false;
}
char *ident = new char[len + 1];
MemCopy(ident, start, len);
ident[len] = 0;
if ((ksymbol = FindKeyword(ident))) {
current = new Token(current, ksymbol, startPos);
} else {
current = new Token(current, symident, ident, startPos);
}
pos += len;
delete [] ident;
return true;
}
bool Lexer::GetDigitValue()
{
unsigned int len;
char *end;
// Only the numeral parser can determine if next token is a value
Number *number = Program->Input->Parse(str, &len, &end);
if (str == end) {
delete number;
return false;
}
char *text = new char[len + 1];
MemCopy(text, str, len);
*(text + len) = '\0';
current = new Token(current, symnumber, text, pos);
delete text;
delete number;
pos += len;
str = end;
return true;
}
Symbol Lexer::FindKeyword(const char *ident)
{
return Program->Language->FindKeyword(ident);
}
char* Lexer::FindKeyword(Symbol symbol)
{
static const unsigned int kwcount = sizeof(keywords) / sizeof(keyworddef);
for (unsigned int i = 0; i < kwcount; i++) {
if (keywords[i].symbol == symbol) {
return (char*)keywords[i].name;
}
}
static const unsigned int ocount = sizeof(operators) / sizeof(operatordef);
for (unsigned int i = 0; i < ocount; i++) {
if (operators[i].symbol == symbol) {
return (char*)&(operators[i].chr);
}
}
return NOMEM;
}
bool Lexer::ShouldSkip(char character)
{
if (character == '\n') {
return false;
}
if (Program->Language->CharIsCntrl(character)) {
return true;
}
if (Program->Language->CharIsSpace(character)) {
return true;
}
return false;
}