328 lines
7.2 KiB
Diff
328 lines
7.2 KiB
Diff
|
From edf250c633bef40e7e37dafc9fc393dd2ad9074f Mon Sep 17 00:00:00 2001
|
||
|
From: Michael Forney <mforney@mforney.org>
|
||
|
Date: Tue, 10 Apr 2018 13:37:14 -0700
|
||
|
Subject: [PATCH] m4: Use hand-written lexer to avoid cycle in bootstrap
|
||
|
|
||
|
---
|
||
|
tokenizer.c | 191 +++++++++++++++++++++++++++++++++++++++++
|
||
|
tokenizer.l | 109 -----------------------
|
||
|
2 files changed, 191 insertions(+), 109 deletions(-)
|
||
|
create mode 100644 tokenizer.c
|
||
|
delete mode 100644 tokenizer.l
|
||
|
|
||
|
diff --git a/tokenizer.c b/tokenizer.c
|
||
|
new file mode 100644
|
||
|
index 00000000000..fa19fc65035
|
||
|
--- /dev/null
|
||
|
+++ b/tokenizer.c
|
||
|
@@ -0,0 +1,191 @@
|
||
|
+/* $OpenBSD: tokenizer.l,v 1.10 2017/06/17 01:55:16 bcallah Exp $ */
|
||
|
+/*
|
||
|
+ * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org>
|
||
|
+ *
|
||
|
+ * Permission to use, copy, modify, and distribute this software for any
|
||
|
+ * purpose with or without fee is hereby granted, provided that the above
|
||
|
+ * copyright notice and this permission notice appear in all copies.
|
||
|
+ *
|
||
|
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||
|
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||
|
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||
|
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||
|
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||
|
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||
|
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||
|
+ */
|
||
|
+#include "parser.h"
|
||
|
+#include <assert.h>
|
||
|
+#include <ctype.h>
|
||
|
+#include <errno.h>
|
||
|
+#include <limits.h>
|
||
|
+#include <stdbool.h>
|
||
|
+#include <stdio.h>
|
||
|
+#include <stdlib.h>
|
||
|
+#include <stdint.h>
|
||
|
+
|
||
|
+extern void m4_warnx(const char *, ...);
|
||
|
+extern int mimic_gnu;
|
||
|
+extern int32_t yylval;
|
||
|
+static const char *yypos;
|
||
|
+
|
||
|
+void
|
||
|
+yy_scan_string(const char *s)
|
||
|
+{
|
||
|
+ yypos = s;
|
||
|
+}
|
||
|
+
|
||
|
+static int32_t
|
||
|
+number(const char *yytext, size_t yylen)
|
||
|
+{
|
||
|
+ long l;
|
||
|
+
|
||
|
+ errno = 0;
|
||
|
+ l = strtol(yytext, NULL, 0);
|
||
|
+ if (((l == LONG_MAX || l == LONG_MIN) && errno == ERANGE) ||
|
||
|
+ l > INT32_MAX || l < INT32_MIN)
|
||
|
+ m4_warnx("numeric overflow in expr: %.*s", (int)yylen, yytext);
|
||
|
+ return l;
|
||
|
+}
|
||
|
+
|
||
|
+static int32_t
|
||
|
+parse_radix(const char *yytext, size_t yylen)
|
||
|
+{
|
||
|
+ long base;
|
||
|
+ char *next;
|
||
|
+ long l;
|
||
|
+ int d;
|
||
|
+
|
||
|
+ l = 0;
|
||
|
+ base = strtol(yytext+2, &next, 0);
|
||
|
+ if (base > 36 || next == NULL) {
|
||
|
+ m4_warnx("error in number %.*s", (int)yylen, yytext);
|
||
|
+ } else {
|
||
|
+ next++;
|
||
|
+ while (*next != 0) {
|
||
|
+ if (*next >= '0' && *next <= '9')
|
||
|
+ d = *next - '0';
|
||
|
+ else if (*next >= 'a' && *next <= 'z')
|
||
|
+ d = *next - 'a' + 10;
|
||
|
+ else {
|
||
|
+ assert(*next >= 'A' && *next <= 'Z');
|
||
|
+ d = *next - 'A' + 10;
|
||
|
+ }
|
||
|
+ if (d >= base) {
|
||
|
+ m4_warnx("error in number %.*s", (int)yylen, yytext);
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+ l = base * l + d;
|
||
|
+ next++;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ return l;
|
||
|
+}
|
||
|
+
|
||
|
+static int
|
||
|
+isodigit(int c)
|
||
|
+{
|
||
|
+ return c >= '0' && c <= '7';
|
||
|
+}
|
||
|
+
|
||
|
+int yylex(void)
|
||
|
+{
|
||
|
+ const char *start;
|
||
|
+
|
||
|
+next:
|
||
|
+ start = yypos;
|
||
|
+ switch (*yypos) {
|
||
|
+ case ' ':
|
||
|
+ case '\t':
|
||
|
+ case '\n':
|
||
|
+ ++yypos;
|
||
|
+ goto next;
|
||
|
+ case '<':
|
||
|
+ switch (yypos[1]) {
|
||
|
+ case '=':
|
||
|
+ yypos += 2;
|
||
|
+ return LE;
|
||
|
+ case '<':
|
||
|
+ yypos += 2;
|
||
|
+ return LSHIFT;
|
||
|
+ }
|
||
|
+ break;
|
||
|
+ case '>':
|
||
|
+ switch (yypos[1]) {
|
||
|
+ case '=':
|
||
|
+ yypos += 2;
|
||
|
+ return GE;
|
||
|
+ case '>':
|
||
|
+ yypos += 2;
|
||
|
+ return RSHIFT;
|
||
|
+ }
|
||
|
+ break;
|
||
|
+ case '=':
|
||
|
+ if (yypos[1] != '=')
|
||
|
+ break;
|
||
|
+ yypos += 2;
|
||
|
+ return EQ;
|
||
|
+ case '!':
|
||
|
+ if (yypos[1] != '=')
|
||
|
+ break;
|
||
|
+ yypos += 2;
|
||
|
+ return NE;
|
||
|
+ case '&':
|
||
|
+ if (yypos[1] != '&')
|
||
|
+ break;
|
||
|
+ yypos += 2;
|
||
|
+ return LAND;
|
||
|
+ case '|':
|
||
|
+ if (yypos[1] != '|')
|
||
|
+ break;
|
||
|
+ yypos += 2;
|
||
|
+ return LOR;
|
||
|
+ case '*':
|
||
|
+ if (!mimic_gnu || yypos[1] != '*')
|
||
|
+ break;
|
||
|
+ yypos += 2;
|
||
|
+ return EXPONENT;
|
||
|
+ case '0':
|
||
|
+ switch (*++yypos) {
|
||
|
+ case 'x':
|
||
|
+ case 'X':
|
||
|
+ if (!isxdigit(*++yypos))
|
||
|
+ return ERROR;
|
||
|
+ do ++yypos;
|
||
|
+ while (isxdigit(*yypos));
|
||
|
+ break;
|
||
|
+ case 'r':
|
||
|
+ case 'R':
|
||
|
+ if (!mimic_gnu)
|
||
|
+ break;
|
||
|
+ if (!isdigit(*++yypos))
|
||
|
+ return ERROR;
|
||
|
+ do ++yypos;
|
||
|
+ while (isdigit(*yypos));
|
||
|
+ if (*yypos != ':')
|
||
|
+ return ERROR;
|
||
|
+ if (!isalnum(*++yypos))
|
||
|
+ return ERROR;
|
||
|
+ do ++yypos;
|
||
|
+ while (isalnum(*yypos));
|
||
|
+ yylval = parse_radix(start, yypos - start);
|
||
|
+ return NUMBER;
|
||
|
+ default:
|
||
|
+ do ++yypos;
|
||
|
+ while (isodigit(*yypos));
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ yylval = number(start, yypos - start);
|
||
|
+ return NUMBER;
|
||
|
+ case '\0':
|
||
|
+ return '\0';
|
||
|
+ }
|
||
|
+ if (isdigit(*yypos)) {
|
||
|
+ do ++yypos;
|
||
|
+ while (isdigit(*yypos));
|
||
|
+ yylval = number(start, yypos - start);
|
||
|
+ return NUMBER;
|
||
|
+ }
|
||
|
+
|
||
|
+ return *yypos++;
|
||
|
+}
|
||
|
diff --git a/tokenizer.l b/tokenizer.l
|
||
|
deleted file mode 100644
|
||
|
index 94f02fb6085..00000000000
|
||
|
--- a/tokenizer.l
|
||
|
+++ /dev/null
|
||
|
@@ -1,109 +0,0 @@
|
||
|
-%{
|
||
|
-/* $OpenBSD: tokenizer.l,v 1.10 2017/06/17 01:55:16 bcallah Exp $ */
|
||
|
-/*
|
||
|
- * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org>
|
||
|
- *
|
||
|
- * Permission to use, copy, modify, and distribute this software for any
|
||
|
- * purpose with or without fee is hereby granted, provided that the above
|
||
|
- * copyright notice and this permission notice appear in all copies.
|
||
|
- *
|
||
|
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||
|
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||
|
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||
|
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||
|
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||
|
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||
|
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||
|
- */
|
||
|
-#include "parser.h"
|
||
|
-#include <assert.h>
|
||
|
-#include <stdlib.h>
|
||
|
-#include <errno.h>
|
||
|
-#include <stdint.h>
|
||
|
-#include <limits.h>
|
||
|
-
|
||
|
-extern void m4_warnx(const char *, ...);
|
||
|
-extern int mimic_gnu;
|
||
|
-extern int32_t yylval;
|
||
|
-
|
||
|
-int32_t number(void);
|
||
|
-int32_t parse_radix(void);
|
||
|
-%}
|
||
|
-
|
||
|
-delim [ \t\n]
|
||
|
-ws {delim}+
|
||
|
-hex 0[xX][0-9a-fA-F]+
|
||
|
-oct 0[0-7]*
|
||
|
-dec [1-9][0-9]*
|
||
|
-radix 0[rR][0-9]+:[0-9a-zA-Z]+
|
||
|
-
|
||
|
-%option noyywrap
|
||
|
-
|
||
|
-%%
|
||
|
-{ws} {/* just skip it */}
|
||
|
-{hex}|{oct}|{dec} { yylval = number(); return(NUMBER); }
|
||
|
-{radix} { if (mimic_gnu) {
|
||
|
- yylval = parse_radix(); return(NUMBER);
|
||
|
- } else {
|
||
|
- return(ERROR);
|
||
|
- }
|
||
|
- }
|
||
|
-"<=" { return(LE); }
|
||
|
-">=" { return(GE); }
|
||
|
-"<<" { return(LSHIFT); }
|
||
|
-">>" { return(RSHIFT); }
|
||
|
-"==" { return(EQ); }
|
||
|
-"!=" { return(NE); }
|
||
|
-"&&" { return(LAND); }
|
||
|
-"||" { return(LOR); }
|
||
|
-"**" { if (mimic_gnu) { return (EXPONENT); } }
|
||
|
-. { return yytext[0]; }
|
||
|
-%%
|
||
|
-
|
||
|
-int32_t
|
||
|
-number()
|
||
|
-{
|
||
|
- long l;
|
||
|
-
|
||
|
- errno = 0;
|
||
|
- l = strtol(yytext, NULL, 0);
|
||
|
- if (((l == LONG_MAX || l == LONG_MIN) && errno == ERANGE) ||
|
||
|
- l > INT32_MAX || l < INT32_MIN)
|
||
|
- m4_warnx("numeric overflow in expr: %s", yytext);
|
||
|
- return l;
|
||
|
-}
|
||
|
-
|
||
|
-int32_t
|
||
|
-parse_radix()
|
||
|
-{
|
||
|
- long base;
|
||
|
- char *next;
|
||
|
- long l;
|
||
|
- int d;
|
||
|
-
|
||
|
- l = 0;
|
||
|
- base = strtol(yytext+2, &next, 0);
|
||
|
- if (base > 36 || next == NULL) {
|
||
|
- m4_warnx("error in number %s", yytext);
|
||
|
- } else {
|
||
|
- next++;
|
||
|
- while (*next != 0) {
|
||
|
- if (*next >= '0' && *next <= '9')
|
||
|
- d = *next - '0';
|
||
|
- else if (*next >= 'a' && *next <= 'z')
|
||
|
- d = *next - 'a' + 10;
|
||
|
- else {
|
||
|
- assert(*next >= 'A' && *next <= 'Z');
|
||
|
- d = *next - 'A' + 10;
|
||
|
- }
|
||
|
- if (d >= base) {
|
||
|
- m4_warnx("error in number %s", yytext);
|
||
|
- return 0;
|
||
|
- }
|
||
|
- l = base * l + d;
|
||
|
- next++;
|
||
|
- }
|
||
|
- }
|
||
|
- return l;
|
||
|
-}
|
||
|
-
|
||
|
--
|
||
|
2.17.0
|
||
|
|