/* ctags.c -- Generate tags for subsequent use by Brief or VI editors.

   Usage: ctags source1.c source2.c ... >tags

   This program will perform a simple parsing of one or more C source
   files and write a "tags" file to stdout. This file is then used in
   conjunction with tagging commands build into VI and available (as 
   macros) from the Solution Systems BBS. The tags file will contain
   a line for each procedure in the source file. Each line has the form:

   <procedure name> <file name> <search criteria>

   The search criteria contains the entire source line containing the
   procedure name to reduce the possibility of the search finding the
   wrong line.

   This has been compiled under Microsoft C V4.0 and will also compile
   under Unix V on an AT&T 3B1.	When using Microsoft C, link with
   ssetargv.obj to enable wild card expansion of command line arguments.

   01/11/87	Initial release

   02/01/87	Misc. minor enhansements.

   04/22/87	Tag #defined names in addition to function names.

   Paul Verket */

#define	LINT_ARGS

#include <stdio.h>
#include <ctype.h>

#ifndef SEEK_SET
#	define SEEK_SET	0
#endif

typedef enum {T_WORD, T_BRACEEXP, T_COMMA, T_SEMI, T_PREPROCESS, 
	T_OPENPAREN, T_CLOSEPAREN, T_OPENBRACE, T_CLOSEBRACE, T_NL, 
	T_EOF} TOKEN;

main(argc, argv)
int	argc;
char	*argv[];
{
	FILE	*in_file;

	if (argc < 2) {
		fprintf(stderr, "Usage: %s file [file...] [>tags]\n", argv[0]);
		exit(1);
		}

	/* Cycle through each source-file argument on the command line. */
	while (argc-- > 1) {
		in_file = fopen((++argv)[0], "r");
		if (in_file == NULL) {
			perror(argv[0]);
			exit(1);
			}
		find_functions(argv[0], in_file);
		fclose(in_file);
		}

	exit(0);
	}

find_functions(filename, in_file)
char	filename[];
FILE	*in_file;
{
	TOKEN	gettoken(),
		curr_token;
	enum	{NEUTRAL, NAME, FN_NAME, INPAREN, INBRACE, CHECK_DEFINE, 
		RECORD_DEFINE, PREPROCESSOR} 
			state = NEUTRAL;
	char	word[132],
		function[132];
	long	line_start,
		defn_start;	/* ftell() of the procedure line */
	int	paren_cnt,
		brace_cnt;

	while ((curr_token = gettoken(word, &line_start, in_file)) != T_EOF) 
	    switch((int) state) {
	    	/* The "home" state. If a "word" is found, assume that it is
		   a procedure name. If T_PREPROCESS, look for #define names
		   and toss the rest of the line since macro definitions look 
		   like procedures. If an open brace is found, start gobbling 
		   up the text contained within the braces. Keep a brace count 
		   to handle nested braces. */
		case NEUTRAL:
			switch ((int) curr_token) {
				case T_WORD:
					state = NAME;
					/* Note that the parens may start on
					   the next line, so store the offset
					   now. */
					defn_start = line_start;
					continue;
				case T_PREPROCESS:
					state = CHECK_DEFINE;
					defn_start = line_start;
					continue;
				case T_OPENBRACE:
					state = INBRACE;
					brace_cnt = 1;
					continue;
				default:
					continue;
				}
		/* All subsequent "word"s will be assumed to be the real
		   function name until an open paren is found. If something
		   other than a word or paren is found, then this wasn't
		   a function name after all. */
		case NAME:
			switch ((int) curr_token) {
				case T_WORD:
				case T_NL:
					defn_start = line_start;
					continue;
				case T_OPENPAREN:
					state = INPAREN;
					strcpy(function, word);
					paren_cnt = 1;
					continue;
				default:
					state = NEUTRAL;
					continue;
				}
		/* Eat up all the stuff within parens until the close paren
		   is found. Keep a counter to handle nested parens. */
		case INPAREN:
			switch ((int) curr_token) {
				case T_OPENPAREN:
					paren_cnt++;
					continue;
				case T_CLOSEPAREN:
					if (--paren_cnt == 0)
						state = FN_NAME;
					continue;
				default:
					continue;
				}
		/* If a comma or a semicolon is found, then this was a false
		   alarm. If an opening brace or another word is found, then
		   we found a procedure definition. */
		case FN_NAME:
			switch ((int) curr_token) {
				case T_COMMA:
				case T_SEMI:
					state = NEUTRAL;
					continue;
				case T_NL:
					continue;
				case T_OPENBRACE:
					state = INBRACE;
					brace_cnt = 1;
					printf("%s %s ", function, filename);
					print_defn_line(in_file, defn_start);
					continue;
				default:
					state = NEUTRAL;
					printf("%s %s ", function, filename);
					print_defn_line(in_file, defn_start);
					continue;
				}
		/* Loop until the closing brace is found. Keep a counter to
		   handle nested braces. */
		case INBRACE:
			switch((int) curr_token) {
				case T_OPENBRACE:
					brace_cnt++;
					continue;
				case T_CLOSEBRACE:
					if (--brace_cnt == 0) 
						state = NEUTRAL;
					continue;
				default:
					continue;
				}
		/* Check preprocessor lines for #define statements */
		case CHECK_DEFINE:
			switch ((int) curr_token) {
				case T_WORD:
					if (0 == strcmp(word, "define"))
						state = RECORD_DEFINE;
					else state = PREPROCESSOR;
					continue;
				default:
					state = PREPROCESSOR;
					continue;
				}
		/* Record the defined name in the same way as function names */
		case RECORD_DEFINE:
			state = PREPROCESSOR; /* toss the rest */
			printf("%s %s ", word, filename);
			print_defn_line(in_file, defn_start);
			continue;
		/* Handle the preprocessor line until a new-line is found.
		   The tokenizer tosses escaped new lines. */
		case PREPROCESSOR:
			switch ((int) curr_token) {
				case T_NL:
					state = NEUTRAL;
					continue;
				default:
					continue;
				}
		}
	}

/* Break up input file into tokens. Take care with characters inside quotes
   and comments that might cause trouble. (like braces and parens!) */
static TOKEN gettoken(word, line_start, in_file)
char	*word;
long	*line_start;
FILE	*in_file;
{
	enum {NEUTRAL, INQUOTE, INSQUOTE, INWORD, INCOMMENT} 
		state = NEUTRAL;
	static int	col_count = 0;
	int	c,
		c2;
	char	*w;

	w = word;
	while ((c = getc(in_file)) != EOF) {
		/* Keep a column count to aid in finding preprocessor lines.
		   Keep the ftell() of the start of the line for use when a
		   source line is to be printed. */
		if (c == '\n') {
			col_count = 0;
			*line_start = ftell(in_file);
			}
		else col_count++;

		switch((int) state) {
			/* The "home" state. Quoted strings and comments are
			   stripped. Words consisting of letters, digits and
			   the underscore are gathered. */
			case NEUTRAL:
				switch(c) {
					case '(':
						return T_OPENPAREN;
					case ')':
						return T_CLOSEPAREN;
					case '#':
						if (col_count == 1)
							return T_PREPROCESS;
						continue;
					case '\n':
						return T_NL;
					case '"':
						state = INQUOTE;
						continue;
					case '\'':
						state = INSQUOTE;
						continue;
					case '{':
						return T_OPENBRACE;
					case '}':	/*}*/
						return T_CLOSEBRACE;
					case '/':	/* start of comment? */
						if ((c2 = getc(in_file)) == '*') {
							state = INCOMMENT;
							col_count++;
							continue;
							}
						else {
							ungetc(c2, in_file);
							continue;
							}
					case ';':
						return T_SEMI;
					case ',':
						return T_COMMA;
					case '\\': /* toss the escape */
						getc(in_file);
						continue;
					default:
						if (isalnum(c) || c == '_') {
							state = INWORD;
							*w++ = c;
							}
						continue;
					}
			/* Stay in this state, tossing characters, until the
			   closing marker. */
			case INCOMMENT:
				switch(c) {
					case '*':	/* end of comment? */
						if ((c2 = getc(in_file)) == '/') {
							state = NEUTRAL;
							col_count++;
							continue;
							}
						else {
							ungetc(c2, in_file);
							continue;
							}
					default:
						continue;
					}
			case INQUOTE:
				switch(c) {
					case '"':
						state = NEUTRAL;
						continue;
					case '\\': /* toss the escape */
						getc(in_file);
						continue;
					default:
						continue;
					}
			case INSQUOTE:
				switch(c) {
					case '\'':
						state = NEUTRAL;
						continue;
					case '\\': /* toss the escape */
						getc(in_file);
						continue;
					default:
						continue;
					}
			/* Gather up the word. */
			case INWORD:
				if (isalnum(c) || c == '_') {
					*w++ = c;
					continue;
					}
				else	{
					ungetc(c, in_file);
					*w = NULL;
					col_count--;
					return T_WORD;
					}
			}
		}
	
	return T_EOF;
	}

/* Use the previously stored ftell() of the start of line to dump the source
   line. */
print_defn_line(in_file, line_start)
FILE	*in_file;
long	line_start;
{
	long	current_position;
	int	c;

	current_position = ftell(in_file);
	fseek(in_file, line_start, SEEK_SET);

	printf("?^");
	while ((c = getc(in_file)) != EOF && c != '\n') putchar(c);
	printf("$?\n");

	fseek(in_file, current_position, SEEK_SET);
	}
