git.haldean.org ubik / e2d2d42 libubik / codegen / token.l
e2d2d42

Tree @e2d2d42 (Download .tar.gz)

token.l @e2d2d42raw · history · blame

/*
 * token.l: ubik language tokenizer
 * Copyright (C) 2016, Haldean Brown
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

%{

#include "ubik/ast.h"
#include "ubik/string.h"
#include "ubik/parse.h"
#include "grammar.h"

/* these are generated by generated code, and they're innocuous, so we
 * suppress them. */
#pragma GCC diagnostic ignored "-Wunused-parameter"
/* this is less innocuous, but there's not much I can do about it. */
#pragma GCC diagnostic ignored "-Wsign-compare"

#define PRINT_TOKENS 0

#define TOKEN(x) { \
        if (PRINT_TOKENS) printf(#x "\n"); \
        yylval->token = x; \
        return x; }
#define TSTRING(token) { \
        yylval->string = ubik_strdup(yytext, ctx->region); \
        if (PRINT_TOKENS) printf(#token ": %s\n", yytext); \
        return token; \
}

#define YY_USER_ACTION do { \
        yyget_lloc(yyscanner)->first_line = yyget_lloc(yyscanner)->last_line = yylineno; \
        yyget_lloc(yyscanner)->first_column = yycolumn; \
        yyget_lloc(yyscanner)->last_column = yycolumn + yyleng - 1; \
        yycolumn += yyleng; } while (0);

#define YY_EXTRA_TYPE struct ubik_parse_context *

#define YY_INPUT(buf, result, max_size) result = get_input(yyg->yyextra_r, buf, max_size);

char *read_string_literal(struct ubik_parse_context *ctx, yyscan_t scanner);
static size_t get_input(struct ubik_parse_context *ctx, char *buf, size_t max_size);

%}

%option reentrant
%option noyywrap
%option yylineno
%option nounput
%option bison-bridge
%option bison-locations

NAMECHARS [a-zA-Z0-9/_\-']*
NAME [a-z_\-][a-zA-Z0-9/_\-']*

%%

\n
[ ]

#.*                          /* swallow comments */

"?:"                         TOKEN(TEST)
"!"                          TOKEN(IMMEDIATE)
":"                          TOKEN(BIND)
"^"                          TOKEN(TYPE)
"=>"                         TOKEN(IMPLIES)
"->"                         TOKEN(GOES_TO)
"\\"                         TOKEN(LAMBDA)
"="                          TOKEN(IS)
"("                          TOKEN(OPEN_PAR)
")"                          TOKEN(CLOSE_PAR)
"\."                         TOKEN(MEMBER)
"{"                          TOKEN(OPEN_SCOPE)
"}"                          TOKEN(CLOSE_SCOPE)
"|"                          TOKEN(GIVEN)
"'"                          TOKEN(EXISTS)
"?"                          TOKEN(COND)
"*"                          TOKEN(SPLAT)
"+"                          TOKEN(ADD)
"~"                          TOKEN(DEFINES)
"_"                          TOKEN(INTERFACE)
"`"                          TOKEN(IMPORT)
"/"                          TOKEN(DIV)
"%"                          TOKEN(MOD)
([0-9]+(\.[0-9]*)?|\.[0-9]+) { yylval->string = yytext; return NUMBER; }
{NAME}                       TSTRING(NAME)
{NAME}:{NAME}                TSTRING(QUALIFIED_NAME)
{NAME}:[A-Z]{NAMECHARS}      TSTRING(QUALIFIED_TYPE_NAME)
[A-Z]{NAMECHARS}             TSTRING(TYPE_NAME)
"\""                         { yylval->string = read_string_literal(ctx, yyscanner);
                               return STRING; }

.                            { printf("%d: invalid character '%s'\n",
                                      yylineno, yytext);
                               return yytext[0]; }


%%

char *
read_string_literal(struct ubik_parse_context *ctx, yyscan_t scanner)
{
        char *res;
        char last;
        char this;
        size_t n;
        size_t cap;

        n = 0;
        cap = 128;
        last = '\0';
        this = '\0';
        ubik_ralloc((void**) &res, cap, sizeof(char), ctx->region);

        for (;;)
        {
                this = input(scanner);
                if (this == '"' && last != '\\')
                        break;

                if (last == '\\')
                {
                        if (this == 'n')
                        {
                                res[n - 1] = '\n';
                                last = this;
                                continue;
                        }
                        if (this == 'r')
                        {
                                res[n - 1] = '\r';
                                last = this;
                                continue;
                        }
                        if (this == 't')
                        {
                                res[n - 1] = '\t';
                                last = this;
                                continue;
                        }
                }

                if (n == cap)
                {
                        cap *= 2;
                        ubik_realloc(
                                (void**) &res, cap, sizeof(char), ctx->region);
                }
                res[n++] = this;
                last = this;
        }

        return res;
}

static size_t
get_input(struct ubik_parse_context *ctx, char *buf, size_t max_size)
{
        size_t read;
        read = ubik_stream_read(buf, ctx->source_stream, max_size);
        return read;
}