iglunix/base/om4/m4-hand-written-lexer.patch
2021-09-30 14:06:21 +01:00

327 lines
7.2 KiB
Diff

From edf250c633bef40e7e37dafc9fc393dd2ad9074f Mon Sep 17 00:00:00 2001
From: Michael Forney <mforney@mforney.org>
Date: Tue, 10 Apr 2018 13:37:14 -0700
Subject: [PATCH] m4: Use hand-written lexer to avoid cycle in bootstrap
---
tokenizer.c | 191 +++++++++++++++++++++++++++++++++++++++++
tokenizer.l | 109 -----------------------
2 files changed, 191 insertions(+), 109 deletions(-)
create mode 100644 tokenizer.c
delete mode 100644 tokenizer.l
diff --git a/tokenizer.c b/tokenizer.c
new file mode 100644
index 00000000000..fa19fc65035
--- /dev/null
+++ b/tokenizer.c
@@ -0,0 +1,191 @@
+/* $OpenBSD: tokenizer.l,v 1.10 2017/06/17 01:55:16 bcallah Exp $ */
+/*
+ * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include "parser.h"
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+extern void m4_warnx(const char *, ...);
+extern int mimic_gnu;
+extern int32_t yylval;
+static const char *yypos;
+
+void
+yy_scan_string(const char *s)
+{
+ yypos = s;
+}
+
+static int32_t
+number(const char *yytext, size_t yylen)
+{
+ long l;
+
+ errno = 0;
+ l = strtol(yytext, NULL, 0);
+ if (((l == LONG_MAX || l == LONG_MIN) && errno == ERANGE) ||
+ l > INT32_MAX || l < INT32_MIN)
+ m4_warnx("numeric overflow in expr: %.*s", (int)yylen, yytext);
+ return l;
+}
+
+static int32_t
+parse_radix(const char *yytext, size_t yylen)
+{
+ long base;
+ char *next;
+ long l;
+ int d;
+
+ l = 0;
+ base = strtol(yytext+2, &next, 0);
+ if (base > 36 || next == NULL) {
+ m4_warnx("error in number %.*s", (int)yylen, yytext);
+ } else {
+ next++;
+ while (*next != 0) {
+ if (*next >= '0' && *next <= '9')
+ d = *next - '0';
+ else if (*next >= 'a' && *next <= 'z')
+ d = *next - 'a' + 10;
+ else {
+ assert(*next >= 'A' && *next <= 'Z');
+ d = *next - 'A' + 10;
+ }
+ if (d >= base) {
+ m4_warnx("error in number %.*s", (int)yylen, yytext);
+ return 0;
+ }
+ l = base * l + d;
+ next++;
+ }
+ }
+ return l;
+}
+
+static int
+isodigit(int c)
+{
+ return c >= '0' && c <= '7';
+}
+
+int yylex(void)
+{
+ const char *start;
+
+next:
+ start = yypos;
+ switch (*yypos) {
+ case ' ':
+ case '\t':
+ case '\n':
+ ++yypos;
+ goto next;
+ case '<':
+ switch (yypos[1]) {
+ case '=':
+ yypos += 2;
+ return LE;
+ case '<':
+ yypos += 2;
+ return LSHIFT;
+ }
+ break;
+ case '>':
+ switch (yypos[1]) {
+ case '=':
+ yypos += 2;
+ return GE;
+ case '>':
+ yypos += 2;
+ return RSHIFT;
+ }
+ break;
+ case '=':
+ if (yypos[1] != '=')
+ break;
+ yypos += 2;
+ return EQ;
+ case '!':
+ if (yypos[1] != '=')
+ break;
+ yypos += 2;
+ return NE;
+ case '&':
+ if (yypos[1] != '&')
+ break;
+ yypos += 2;
+ return LAND;
+ case '|':
+ if (yypos[1] != '|')
+ break;
+ yypos += 2;
+ return LOR;
+ case '*':
+ if (!mimic_gnu || yypos[1] != '*')
+ break;
+ yypos += 2;
+ return EXPONENT;
+ case '0':
+ switch (*++yypos) {
+ case 'x':
+ case 'X':
+ if (!isxdigit(*++yypos))
+ return ERROR;
+ do ++yypos;
+ while (isxdigit(*yypos));
+ break;
+ case 'r':
+ case 'R':
+ if (!mimic_gnu)
+ break;
+ if (!isdigit(*++yypos))
+ return ERROR;
+ do ++yypos;
+ while (isdigit(*yypos));
+ if (*yypos != ':')
+ return ERROR;
+ if (!isalnum(*++yypos))
+ return ERROR;
+ do ++yypos;
+ while (isalnum(*yypos));
+ yylval = parse_radix(start, yypos - start);
+ return NUMBER;
+ default:
+ do ++yypos;
+ while (isodigit(*yypos));
+ break;
+ }
+ yylval = number(start, yypos - start);
+ return NUMBER;
+ case '\0':
+ return '\0';
+ }
+ if (isdigit(*yypos)) {
+ do ++yypos;
+ while (isdigit(*yypos));
+ yylval = number(start, yypos - start);
+ return NUMBER;
+ }
+
+ return *yypos++;
+}
diff --git a/tokenizer.l b/tokenizer.l
deleted file mode 100644
index 94f02fb6085..00000000000
--- a/tokenizer.l
+++ /dev/null
@@ -1,109 +0,0 @@
-%{
-/* $OpenBSD: tokenizer.l,v 1.10 2017/06/17 01:55:16 bcallah Exp $ */
-/*
- * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#include "parser.h"
-#include <assert.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <stdint.h>
-#include <limits.h>
-
-extern void m4_warnx(const char *, ...);
-extern int mimic_gnu;
-extern int32_t yylval;
-
-int32_t number(void);
-int32_t parse_radix(void);
-%}
-
-delim [ \t\n]
-ws {delim}+
-hex 0[xX][0-9a-fA-F]+
-oct 0[0-7]*
-dec [1-9][0-9]*
-radix 0[rR][0-9]+:[0-9a-zA-Z]+
-
-%option noyywrap
-
-%%
-{ws} {/* just skip it */}
-{hex}|{oct}|{dec} { yylval = number(); return(NUMBER); }
-{radix} { if (mimic_gnu) {
- yylval = parse_radix(); return(NUMBER);
- } else {
- return(ERROR);
- }
- }
-"<=" { return(LE); }
-">=" { return(GE); }
-"<<" { return(LSHIFT); }
-">>" { return(RSHIFT); }
-"==" { return(EQ); }
-"!=" { return(NE); }
-"&&" { return(LAND); }
-"||" { return(LOR); }
-"**" { if (mimic_gnu) { return (EXPONENT); } }
-. { return yytext[0]; }
-%%
-
-int32_t
-number()
-{
- long l;
-
- errno = 0;
- l = strtol(yytext, NULL, 0);
- if (((l == LONG_MAX || l == LONG_MIN) && errno == ERANGE) ||
- l > INT32_MAX || l < INT32_MIN)
- m4_warnx("numeric overflow in expr: %s", yytext);
- return l;
-}
-
-int32_t
-parse_radix()
-{
- long base;
- char *next;
- long l;
- int d;
-
- l = 0;
- base = strtol(yytext+2, &next, 0);
- if (base > 36 || next == NULL) {
- m4_warnx("error in number %s", yytext);
- } else {
- next++;
- while (*next != 0) {
- if (*next >= '0' && *next <= '9')
- d = *next - '0';
- else if (*next >= 'a' && *next <= 'z')
- d = *next - 'a' + 10;
- else {
- assert(*next >= 'A' && *next <= 'Z');
- d = *next - 'A' + 10;
- }
- if (d >= base) {
- m4_warnx("error in number %s", yytext);
- return 0;
- }
- l = base * l + d;
- next++;
- }
- }
- return l;
-}
-
--
2.17.0