/*-------------------------------------------------------------------------- Copyright (c) Working Knowledge, Inc. 1992. All rights reserved. Name : lex.c Purpose: Version: 1.0 Notes : --------------------------------------------------------------------------*/ /*-------------------------------------- --------------------------------------*/ /*-- --*/ /*-------------------------------------------------------------------------- Directives and Pragmas --------------------------------------------------------------------------*/ #include #include #include #include #include "base.h" /*-------------------------------------------------------------------------- Function Definitions --------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------- lex() - Source code analysis, 1st pass ---------------------------------------------------------------------------- Return: SUCCESS or FAILURE Notes : lex controls the initial analysis of each source file. Main calls it once for each module. Lex: - Declares and initializes the major data structures for module data - Reads the module - Calls functions to parse the module - Collects the basic module measures - Creates the internal representation of the module for pass 2 --------------------------------------------------------------------------*/ int lex (void) { /*-------------------------------------- Data declarations --------------------------------------*/ struct parse pa, *paptr = &pa; struct symbol *syptr = NULL; char fn_sc[FILENAME_MAX]; FILE * in1; int i; int result = SUCCESS; /*-------------------------------------- Preliminaries --------------------------------------*/ /*-- Get the source file name --*/ strcpy(fn_sc, getenvl("fn_sc")); /*-- Trace for function start --*/ #if defined(TRACE_F1) fprintf(DEBUG_OUT,"\nSTART lex for %s",fn_sc); EXIT_FS #endif /*-- Open the file of source code --*/ in1 = fopen(fn_sc,"r"); if (in1 == NULL) { fprintf(ERR_OUT,"\nERROR: (lex) Cannot open input file %s.", fn_sc); result = FAILURE; } /*-------------------------------------- Data Initializations --------------------------------------*/ init_pa (paptr); keep_counts(NULL,'I'); syptr = symlist(Append); /*-------------------------------------- Read and process the file of source code --------------------------------------*/ while(fgets(pa.line,MAXLINESIZE,in1)) { #if defined(TRACE_D2) fprintf(DEBUG_OUT,"\nLEX, input: %s",pa.line); #endif LS_LINE_FIX /* Language specific adjustments to input line */ /*-------------------------------------- Parse one buffer of source text --------------------------------------*/ pa.ss = 0; while(get_token(paptr,syptr)) { keep_counts(syptr,'L'); /* We won't keep comment and newline on the symbol list */ if (syptr->tktype1 == Comment || syptr->tktype1 == Newline) symlist(Remove); /* Process the list for a statement and then clear it. */ if (syptr->tktype2 == Stmntend) { keep_counts(NULL,'S'); symlist(Clear); } /* Get ready for the next get_token */ syptr = symlist(Append); } /* Buffer loop */ } /* Module loop */ /*-------------------------------------- Clean-up and quit --------------------------------------*/ symlist(Free); keep_counts(NULL,'F'); fclose(in1); #if defined(TRACE_F2) fprintf(DEBUG_OUT,"\nLEX parse complete for: %s",fn_sc); EXIT_FS #endif return (result); } /*-------------------------------------------------------------------------- get_token() - Find the next token ---------------------------------------------------------------------------- Return: Length of the token; 0 if no token found. Changes fields in struct pa. Notes : get_token parses a source text line to find a token. lex calls get_token and it - Finds a token - Puts the token in syptr->token. --------------------------------------------------------------------------*/ int get_token (struct parse *paptr, struct symbol *syptr) { /*-------------------------------------- Data Declarations --------------------------------------*/ static int in_comnt = 0; static int in_stmnt = 0; int i; /* General Index */ char *cptr; #if defined(TRACE_F2) fprintf(DEBUG_OUT,"\nSTART get_token"); EXIT_FS #endif #if defined(TRACE_D2) fprintf(DEBUG_OUT,"\nss:%d ts:%d line:%s", paptr->ss, paptr->ts, paptr->line); #endif /* Set starting point for scan. */ cptr = paptr->line + paptr->ss; /*-------------------------------------- Find a token --------------------------------------*/ syptr->tksize = 0; while (*cptr) { /*-- New line --*/ if ( *cptr == '\n') { paptr->ts = cptr - paptr->line; syptr->tksize = 1; syptr->tktype1 = Newline; syptr->tktype2 = NULL; break; } /*-- Visible characters --*/ /* The sections for comments, names, etc. are set up to allow more than one to be tried. e.g., for SAS, this allows & to be tried as a name, and then if scan_name decides it's an operator, the code will fall through to scan_punct, which serves as a catch-all routine. Note that if scan_punct can't recognize the token it gets counted as white space. */ if (isgraph(*cptr)) { paptr->ts = cptr - paptr->line; /*-- Comments --*/ if ( (in_comnt) || (LS_COMNTSTART) ) { syptr->tksize = scan_comnt(cptr, &(syptr->tktype1), &(syptr->tktype2)); if (syptr->tksize) break; } /*-- Names --*/ if (LS_NAMESTART) { syptr->tksize = scan_name(cptr, &(syptr->tktype1), &(syptr->tktype2)); if (syptr->tksize) break; } /*-- Numbers --*/ if (isxdigit(*cptr) || *cptr == '.') { syptr->tksize = scan_nmconst(cptr, &(syptr->tktype1), &(syptr->tktype2)); if (syptr->tksize) break; } /*-- Character literals --*/ if (*cptr == '\'' || *cptr == '\"') { syptr->tksize = scan_chconst(cptr, &(syptr->tktype1), &(syptr->tktype2)); if (syptr->tksize) break; } /*-- Punctuation --*/ syptr->tksize = scan_punct(cptr, &(syptr->tktype1), &(syptr->tktype2)); if (syptr->tksize) break; } cptr++; } /*-------------------------------------- Record results and return --------------------------------------*/ /* Either we have a token, or we've run out of chars to look at. */ /* syptr->tksize is 0 if we found nothing; (+) if we found something. */ if (syptr->tksize != 0) { /* Note where we are, to provide context for the next token */ if (syptr->tktype2 == Stmntend || syptr->tktype2 == Comntend) { in_comnt = 0; in_stmnt = 0; } else if (syptr->tktype1 == Comment) { in_comnt = 1; in_stmnt = 0; } else if (syptr->tktype1 != Newline){ in_comnt = 0; in_stmnt = 1; } /* Exit for language specific action based on token line position */ LS_TOK_POS /* Count leading space and set next start location */ syptr->tkspace = paptr->ts - paptr->ss; paptr->ss = paptr->ts + syptr->tksize; /* Move the token to syptr->token */ syptr->tksize = (syptr->tksize > MAXTOKENSIZE) ? MAXTOKENSIZE : syptr->tksize; strncpy(syptr->token, cptr, syptr->tksize); syptr->token[syptr->tksize] = '\0'; } #if defined(TRACE_D2) fprintf(DEBUG_OUT,"\ntktype1: %d tktype2: %d tksize: %d token: %s", syptr->tktype1, syptr->tktype2, syptr->tksize, syptr->token); #endif return (syptr->tksize); }