psyclpc/src/util/indent/lexi.c

585 lines
15 KiB
C

/*
* Copyright (c) 1985 Sun Microsystems, Inc.
* Copyright (c) 1980 The Regents of the University of California.
* Copyright (c) 1976 Board of Trustees of the University of Illinois.
* All rights reserved.
*
* Redistribution and use in source and binary forms are permitted
* provided that the above copyright notice and this paragraph are
* duplicated in all such forms and that any documentation,
* advertising materials, and other materials related to such
* distribution and use acknowledge that the software was developed
* by the University of California, Berkeley, the University of Illinois,
* Urbana, and Sun Microsystems, Inc. The name of either University
* or Sun Microsystems may not be used to endorse or promote products
* derived from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
#ifndef lint
static char sccsid[] = "@(#)lexi.c 5.11 (Berkeley) 9/15/88";
#endif /* not lint */
/*
* Here we have the token scanner for indent. It scans off one token and puts
* it in the global variable "token". It returns a code, indicating the type
* of token scanned.
*/
#include "indent_globs.h"
#include "ctype.h"
#define alphanum 1
#define opchar 3
enum rwcodes {
rw_break,
rw_switch,
rw_case,
rw_struct_like, /* struct, enum, union */
rw_decl,
rw_sp_paren, /* if, while, for */
rw_sp_nparen, /* do, else */
rw_sizeof
};
struct templ {
char *rwd;
enum rwcodes rwcode;
};
struct templ *user_specials = 0;
unsigned int user_specials_max, user_specials_idx;
struct templ specials[] =
{
{"switch", rw_switch},
{"case", rw_case},
{"break", rw_break},
{"struct", rw_struct_like},
{"union", rw_struct_like},
{"enum", rw_struct_like},
{"default", rw_case},
{"int", rw_decl},
{"char", rw_decl},
{"float", rw_decl},
{"double", rw_decl},
/* {"long", rw_decl},
{"short", rw_decl},*/
{"typdef", rw_decl},
{"unsigned", rw_decl},
{"register", rw_decl},
{"static", rw_decl},
{"global", rw_decl},
{"extern", rw_decl},
{"void", rw_decl},
{"va_dcl", rw_decl},
{"goto", rw_break},
{"return", rw_break},
{"if", rw_sp_paren},
{"while", rw_sp_paren},
{"for", rw_sp_paren},
{"else", rw_sp_nparen},
{"do", rw_sp_nparen},
{"sizeof", rw_sizeof},
{0, 0}
};
char chartype[128] =
{ /* this is used to facilitate the decision of
* what type (alphanumeric, operator) each
* character is */
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 3, 0, 0, 1, 3, 3, 0,
0, 0, 3, 3, 0, 3, 0, 3,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 0, 0, 3, 3, 3, 3,
0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 3, 1,
0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 3, 0, 3, 0
};
enum codes
lexi()
{
/* used to walk through the token */
char *tok;
int unary_delim; /* this is set to 1 if the current token
*
* forces a following operator to be unary */
static enum codes last_code; /* the last token type returned */
static int l_struct; /* set to 1 if the last token was 'struct' */
int code; /* internal code to be returned */
char qchar; /* the delimiter character for a string */
unary_delim = false;
parser_state_tos->col_1 = parser_state_tos->last_nl; /* tell world that this token started in
* column 1 iff the last thing scanned was nl */
parser_state_tos->last_nl = false;
while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
parser_state_tos->col_1 = false; /* leading blanks imply token is not in column
* 1 */
if (++buf_ptr >= buf_end)
fill_buffer();
}
token = buf_ptr;
/* Scan an alphanumeric token */
if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
/*
* we have a character or number
*/
register char *j; /* used for searching thru list of
*
* reserved words */
register struct templ *p;
if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
int seendot = 0,
seenexp = 0;
if (*buf_ptr == '0' &&
(buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
buf_ptr += 2;
while (isxdigit(*buf_ptr))
buf_ptr++;
}
else
while (1) {
if (*buf_ptr == '.')
if (seendot)
break;
else
seendot++;
buf_ptr++;
if (!isdigit(*buf_ptr) && *buf_ptr != '.')
if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
break;
else {
seenexp++;
seendot++;
buf_ptr++;
if (*buf_ptr == '+' || *buf_ptr == '-')
buf_ptr++;
}
}
if (*buf_ptr == 'L' || *buf_ptr == 'l')
buf_ptr++;
}
else
while (chartype[*buf_ptr] == alphanum) { /* copy it over */
buf_ptr++;
if (buf_ptr >= buf_end)
fill_buffer();
}
token_end = buf_ptr;
while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
if (++buf_ptr >= buf_end)
fill_buffer();
}
parser_state_tos->its_a_keyword = false;
parser_state_tos->sizeof_keyword = false;
if (l_struct) { /* if last token was 'struct', then this token
* should be treated as a declaration */
l_struct = false;
last_code = ident;
parser_state_tos->last_u_d = true;
return (decl);
}
parser_state_tos->last_u_d = false; /* Operator after indentifier is binary */
last_code = ident; /* Remember that this is the code we will
* return */
/*
* This loop will check if the token is a keyword.
*/
for (p = specials; (j = p->rwd) != 0; p++) {
tok = token; /* point at scanned token */
if (*j++ != *tok++ || *j++ != *tok++)
continue; /* This test depends on the fact that
* identifiers are always at least 1 character
* long (ie. the first two bytes of the
* identifier are always meaningful) */
if (tok >= token_end)
break; /* If its a 1 or 2 character identifier */
while (tok < token_end && *tok++ == *j++)
if (*j == 0 && tok == token_end)
goto found_keyword; /* I wish that C had a multi-level
* break... */
}
if (p->rwd) { /* we have a keyword */
found_keyword:
parser_state_tos->its_a_keyword = true;
parser_state_tos->last_u_d = true;
switch (p->rwcode) {
case rw_switch: /* it is a switch */
return (swstmt);
case rw_case: /* a case or default */
return (casestmt);
case rw_struct_like: /* a "struct" */
if (parser_state_tos->p_l_follow)
break; /* inside parens: cast */
l_struct = true;
/*
* Next time around, we will want to know that we have had a
* 'struct'
*/
case rw_decl: /* one of the declaration keywords */
if (parser_state_tos->p_l_follow) {
parser_state_tos->cast_mask |= 1 << parser_state_tos->p_l_follow;
break; /* inside parens: cast */
}
last_code = decl;
return (decl);
case rw_sp_paren: /* if, while, for */
return (sp_paren);
case rw_sp_nparen: /* do, else */
return (sp_nparen);
case rw_sizeof:
parser_state_tos->sizeof_keyword = true;
default: /* all others are treated like any other
* identifier */
return (ident);
} /* end of switch */
} /* end of if (found_it) */
if (*buf_ptr == '(' && parser_state_tos->tos <= 1 && parser_state_tos->ind_level == 0) {
register char *tp = buf_ptr;
while (tp < buf_end)
if (*tp++ == ')' && *tp == ';')
goto not_proc;
parser_state_tos->procname = token;
parser_state_tos->procname_end = token_end;
parser_state_tos->in_parameter_declaration = 1;
not_proc:;
}
/*
* The following hack attempts to guess whether or not the current
* token is in fact a declaration keyword -- one that has been
* typedefd
*/
if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
&& !parser_state_tos->p_l_follow
&& !parser_state_tos->block_init
&& (parser_state_tos->last_token == rparen || parser_state_tos->last_token == semicolon ||
parser_state_tos->last_token == decl ||
parser_state_tos->last_token == lbrace || parser_state_tos->last_token == rbrace)) {
parser_state_tos->its_a_keyword = true;
parser_state_tos->last_u_d = true;
last_code = decl;
return decl;
}
if (last_code == decl) /* if this is a declared variable, then
* following sign is unary */
parser_state_tos->last_u_d = true; /* will make "int a -1" work */
last_code = ident;
return (ident); /* the ident is not in the list */
} /* end of procesing for alpanum character */
/* l l l Scan a non-alphanumeric token */
/* If it is not a one character token, token_end will get changed
later. */
token_end = buf_ptr + 1;
if (++buf_ptr >= buf_end)
fill_buffer();
switch (*token) {
case '\n':
unary_delim = parser_state_tos->last_u_d;
parser_state_tos->last_nl = true; /* remember that we just had a newline */
code = (had_eof ? 0 : newline);
/*
* if data has been exausted, the newline is a dummy, and we should
* return code to stop
*/
break;
case '\'': /* start of quoted character */
case '"': /* start of string */
qchar = *token;
/* Find out how big the literal is so we can set token_end. */
/* Invariant: before loop test buf_ptr points to the next */
/* character that we have not yet checked. */
while (*buf_ptr != qchar && *buf_ptr != 0 && *buf_ptr != '\n')
{
if (*buf_ptr == '\\')
{
buf_ptr++;
if (buf_ptr >= buf_end)
fill_buffer ();
if (*buf_ptr == '\n')
++line_no;
if (*buf_ptr == 0)
break;
}
buf_ptr++;
if (buf_ptr >= buf_end)
fill_buffer ();
}
if (*buf_ptr == '\n' || *buf_ptr == 0)
{
diag (1,
qchar == '\''
? "Unterminated character constant"
: "Unterminated string constant"
);
}
else
{
/* Advance over end quote char. */
buf_ptr++;
if (buf_ptr >= buf_end)
fill_buffer ();
}
code = ident;
break;
case ('('):
if (lpc && *buf_ptr == '{') {
buf_ptr++;
}
case ('['):
unary_delim = true;
code = lparen;
break;
case (')'):
case (']'):
code = rparen;
break;
case '#':
unary_delim = parser_state_tos->last_u_d;
code = preesc;
break;
case '?':
unary_delim = true;
code = question;
break;
case (':'):
if (lpc && *buf_ptr == ':') {
buf_ptr++;
code = unary_op;
unary_delim = true;
break;
}
code = colon;
unary_delim = true;
break;
case (';'):
unary_delim = true;
code = semicolon;
break;
case ('{'):
unary_delim = true;
/* This check is made in the code for '='. No one who writes
initializers without '=' these days deserves to have indent
work on their code (besides which, uncommenting this would
screw up anything which assumes that parser_state_tos->block_init really
means you are in an initializer. */
/*
* if (parser_state_tos->in_or_st) parser_state_tos->block_init = 1;
*/
/* The following neat hack causes the braces in structure
initializations to be treated as parentheses, thus causing
initializations to line up correctly, e.g.
struct foo bar =
{{a,
b,
c},
{1,
2}};
If lparen is returned, token can be used to distinguish
between '{' and '(' where necessary. */
code = parser_state_tos->block_init ? lparen : lbrace;
break;
case ('}'):
if (lpc && *buf_ptr == ')') {
buf_ptr++;
code = rparen;
break;
}
unary_delim = true;
/* The following neat hack is explained under '{' above. */
code = parser_state_tos->block_init ? rparen : rbrace;
break;
case 014: /* a form feed */
unary_delim = parser_state_tos->last_u_d;
parser_state_tos->last_nl = true; /* remember this so we can set 'parser_state_tos->col_1'
* right */
code = form_feed;
break;
case (','):
unary_delim = true;
code = comma;
break;
case '.':
unary_delim = false;
code = period;
break;
case '-':
case '+': /* check for -, +, --, ++ */
code = (parser_state_tos->last_u_d ? unary_op : binary_op);
unary_delim = true;
if (*buf_ptr == token[0]) {
/* check for doubled character */
buf_ptr++;
/* buffer overflow will be checked at end of loop */
if (last_code == ident || last_code == rparen) {
code = (parser_state_tos->last_u_d ? unary_op : postop);
/* check for following ++ or -- */
unary_delim = false;
}
}
else if (*buf_ptr == '=')
/* check for operator += */
buf_ptr++;
else if (*buf_ptr == '>') {
/* check for operator -> */
buf_ptr++;
if (!pointer_as_binop) {
unary_delim = false;
code = unary_op;
parser_state_tos->want_blank = false;
}
}
break; /* buffer overflow will be checked at end of
* switch */
case '=':
if (parser_state_tos->in_or_st)
parser_state_tos->block_init = 1;
if (*buf_ptr == '=') /* == */
buf_ptr++;
code = binary_op;
unary_delim = true;
break;
/* can drop thru!!! */
case '>':
case '<':
case '!': /* ops like <, <<, <=, !=, etc */
if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
if (++buf_ptr >= buf_end)
fill_buffer();
}
code = (parser_state_tos->last_u_d ? unary_op : binary_op);
unary_delim = true;
break;
default:
if (token[0] == '/' && *buf_ptr == '*') {
/* it is start of comment */
if (++buf_ptr >= buf_end)
fill_buffer();
code = comment;
unary_delim = parser_state_tos->last_u_d;
break;
}
while (*(buf_ptr - 1) == *buf_ptr || *buf_ptr == '=') {
/*
* handle ||, &&, etc, and also things as in int *****i
*/
if (++buf_ptr >= buf_end)
fill_buffer();
}
code = (parser_state_tos->last_u_d ? unary_op : binary_op);
unary_delim = true;
} /* end of switch */
if (code != newline) {
l_struct = false;
last_code = code;
}
token_end = buf_ptr;
if (buf_ptr >= buf_end) /* check for input buffer empty */
fill_buffer();
parser_state_tos->last_u_d = unary_delim;
return (code);
}
/*
* Add the given keyword to the keyword table, using val as the keyword type
*/
addkey(key, val)
char *key;
enum rwcodes val;
{
register struct templ *p = specials;
while (p->rwd)
if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
return;
else
p++;
if (user_specials == 0)
{
user_specials = (struct templ *) xmalloc (5 * sizeof (struct templ));
if (user_specials == 0)
{
fputs ("indent: out of memory\n", stderr);
exit (1);
}
user_specials_max = 5;
user_specials_idx = 0;
}
else if (user_specials_idx == user_specials_max)
{
user_specials_max += 5;
user_specials = (struct templ *) xrealloc ((char *) user_specials,
user_specials_max
* sizeof (struct templ));
}
p = &user_specials[user_specials_idx++];
p->rwd = key;
p->rwcode = val;
p[1].rwd = 0;
p[1].rwcode = 0;
return;
}