char *parsers_rcs = "$Id: parsers.c,v 1.7 1997/04/17 22:34:25 ACJC Exp $";
/* Written and copyright by the Anonymous Coders and Junkbusters Corporation.
 * Will be made available under the GNU General Public License.
 * This software comes with NO WARRANTY.
 */

#include <stdio.h>
#include <sys/types.h>
#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#include <string.h>

#ifdef REGEX
#include <regex.h>
#endif

#include "jcc.h"

struct parsers url_patterns[] = {
	{ "GET ", 		 4,	url_http		},
	{ "POST ",		 5,	url_http		},
	{ "CONNECT ",		 8,	url_https		},
	{  NULL,		 0,	NULL			}
};

struct parsers client_patterns[] = {
	{ "get ",		 4,	intercept_url		},
	{ "referer:",		 8,	client_referer		},
	{ "user-agent:",	11,	client_uagent		},
	{ "ua-",		 3,	client_ua		},
	{ "from:",		 9,	client_from		},
	{ "cookie:",		 7,	client_send_cookie	},
	{ "x-forwarded-for:",	16,	client_x_forwarded	},
	{ "proxy-connection:",	17,	zap			},
	{ NULL,			 0,	NULL			}
};

struct parsers intercept_patterns[] = {
	{ "show-proxy-args",    14,	show_proxy_args		},
	{ NULL,			 0,	NULL			}
};

struct parsers server_patterns[] = {
	{ "set-cookie:",	11,	server_set_cookie	},
	{ NULL,			 0,	NULL			}
};

int (*add_client_headers[])() = {
	client_cookie_adder,
	client_x_forwarded_adder,
	client_xtra_adder,
	NULL
};

int (*add_server_headers[])() = {
	NULL
};

struct parsers *
match(buf, pats)
char *buf;
struct parsers *pats;
{
	char *p, *q;
	struct parsers *v;

	for(v = pats; (q = v->str) ; v++) {
		p = buf;

		while(*q && (*q == tolower(*p))) {
			q++, p++;
		}

		if(*q == '\0') {
			return(v);
		}
	}
	return(NULL);
}

/* read() one character at a time to get the header.
 * I don't like it, but I don't know of a better way
 * that won't interfere with a clean select()/read()
 * interface in the main loop...
 */

int
read_header(int fd, char *buf, int n)
{
	int x;
	char *p = buf;
	char *e;
	int primed = 0;

	for(p=buf, e = buf + n - 1; p < e; p++) {
		x = read(fd, p, 1);
		if(x != 1) break;
		switch(*p) {
		case '\r':
			p--;	/* ignore CR */
			break;
		case '\n':
			if(primed) {
				*++p = '\0';        /* NULL terminate buffer  */
				fd_in_body[fd] = 1; /* indicate end of header */
				return(p - buf);    /* return header length   */
			}
			primed = 1;
			break;
		default:
			primed = 0;
		}
	}
	/* the header is too long or the read failed:
	 * either way, we have to bail out
	 */
	return(-1);
}

/* add, delete or modifiy lines in the HTTP header streams
 * assumes that on entry, buf[] contains a complete NULL terminated header;
 * and on return, it contains a complete, possibly modified, header.
 */
int
sed(fd, buf, max, n, pats, more_headers)
char *buf;
struct parsers *pats[];
int (*more_headers[])();
{
	static char  tmp[64*1024];
	static char  hdr[64*1024];
	char *p, *t, *eol;
	int len;
	struct parsers *v;
	int (**f)();

	p = buf;	/* the original header */
	t = tmp;	/* holding area for modified header */

	while(*p) {
		if((eol = strchr(p, '\n')) == NULL) {
			/* no end of line ?? */
			return(-1);
		}

		/* this is the blank line that terminates the header */
		if(eol == p) break;

		*eol = '\0';

		/* copy header line into a place where it can be modified */
		strcpy(hdr, p);

		if(DEBUG(HDR)) fprintf(log, "scan: %s", hdr);

		if((v = match(hdr, pats))) {
			len = v->parser(v, hdr);
			hdr[len] = '\0';
		}

		if(DEBUG(HDR)) fprintf(log, "\n");

		/* copy non-empty header lines to the tmp area */
		if(*hdr) {
			char *h = hdr;
			while(*t = *h++) t++;
			*t++ = '\r'; *t++ = '\n';
		}

		/* bump the pointer to the next line */
		p = eol + 1;
	}

	/* put additional headers into tmp area */
	for(f = more_headers; *f ; f++) {
		t += (*f)(t);
	}

	/* add the blank line that terminates the header */
	*t++ = '\r'; *t++ = '\n';

	/* overwrite the original header with the sanitized version */
	len = t - tmp;

	/* verify that new header will fit into the buffer */
	if(len >= max) return(-1);

	memcpy(buf, tmp, len);

	/* null terminate the buffer */
	buf[len] = '\0';

	return(len);
}

/* parse out the host and port from the URL */
int
convert_url(buf, host, port, path)
char *buf, *host, *path;
int *port;
{
	struct parsers *v;
	char *b;

	for(v = url_patterns; v->str; v++) {
		if(strncmp(buf, v->str, v->len) == 0) {
			/* first char past the command */
			b  = buf + v->len;
			return(v->parser(buf, b, host, port, path));
		}
	}

	if(DEBUG(HDR)) fprintf(log,
		"%s: error: unrecognized URL command = '%s'\n", prog, buf);
	return(-1);
}

/* intercept_url() doesn't modify the url.  It looks at the "basename" of
 * the url and conditionally sets a pointer to a global url_intercept.
 * if the pointer is set, then the proxy will NOT forward the request
 * to a remote site, but instead will return the result of executing
 * the url_intercept function.
 */
int
intercept_url(struct parsers *v, char *s)
{
	char *basename;
	char eol[BUFSIZ], *p;
	
	p = strrchr(s, ' ');

	if(p == NULL) {
		if(DEBUG(HDR)) fprintf(log,
			"%s: improper URL '%s'\n", prog, s);
		return(0);
	}

	strcpy(eol, p);

	*p = '\0';

	basename = strrchr(s, '/');

	if(basename == NULL) {
		strcat(p, eol);
		if(DEBUG(HDR)) fprintf(log,
			"%s: improper URL '%s'\n", prog, s);
		return(0);
	}

	/* first char past the slash */
	basename++;

	interceptor = NULL;

	for(v = intercept_patterns; v->str; v++) {
		if(strcmp(basename, v->str) == 0) {
			interceptor = v->parser;
			break;
		}
	}

	strcat(s, eol);

	return(strlen(s));
}

/* here begins the family of parser functions that reformat header lines */

/* parses and modifies buf in-place as follows:
 *
 *	buf = "GET http://somehost[:someport]/path otherstuff"
 *
 * becomes
 *
 *	host = somehost
 *	port = someport (default value = 80)
 *	buf  = "GET /path otherstuff"
 */

int
url_http(buf, b, host, port, path)
char *buf, *b, *host, *path;
int *port;
{
	char *p, *t, *b_sav = b;

	p = "http://";

	while(*p && (*p == tolower(*b))) {
		p++, b++;
	}

	if(*p != '\0') {
		if(DEBUG(HDR)) fprintf(log,
			"%s: error: unrecognized HTTP = '%s'\n", prog, buf);
		return(-1);
	}

	/* b points at the first char past the "http://" */

	if((p = strchr(b, '/')) == NULL) {
		if(DEBUG(HDR)) fprintf(log,
			"%s: error no host in URL = '%s'\n", prog, buf);
		return(-1);
	}

	*p = '\0';

	strcpy(host, b);

	*p = '/';

	t = p;

	while((*t) && (*t != ' ') && (*t != '\r') && (*t != '\n')) *path++ = *t++;

	*path = '\0';

	/* if passing to another proxy, don't elide the target host info */
	if(forward_host == NULL) {
		strcpy(b_sav, p);
	}

	*port = 80;

	if((p = strchr(host, ':'))) {
		*p++ = '\0';
		if(isdigit(*p)) {
			*port = atoi(p);
		}
	}
	return(strlen(buf));
}

/* parses and modifies buf in-place as follows:
 *
 *	buf = "CONNECT somehost[:someport] otherstuff"
 *
 * becomes
 *
 *	host = somehost
 *	port = someport (default value = 80)
 *	buf  = ""
 */

int
url_https(buf, b, host, port, path)
char *buf, *b, *host, *path;
int *port;
{
	char *p;

	SSL = 1;

	*path = '\0';

	if((p = strchr(b, ' ')) == NULL) {
		if(DEBUG(HDR)) fprintf(log,
			"%s: error no host in URL = '%s'\n", prog, buf);
		return(-1);
	}

	*p = '\0';

	strcpy(host, b);

	*p = ' ';

	*port = 80;

	if((p = strchr(host, ':'))) {
		*p++ = '\0';
		if(isdigit(*p)) {
			*port = atoi(p);
		}
	}

	/* if passing to another proxy, don't elide the target host info */
	if(forward_host == NULL) {
		*buf = '\0';
	}

	return(strlen(buf));
}

int zap(struct parsers *v, char *s)
{
	if(DEBUG(HDR)) fprintf(log, " crunch!");
	return(0);
}

int crumble(struct parsers *v, char *s)
{
	if(DEBUG(HDR)) fprintf(log, " crunch!");
	return(0);
}

int client_referer(struct parsers *v, char *s)
{
	if(referer == NULL) {
		if(DEBUG(HDR)) fprintf(log, " crunch!");
		return(0);
	}

	if(*referer == '.') {
		return(strlen(s));
	}

	if(*referer == '@') {
		if(send_user_cookie) {
			return(strlen(s));
		} else {
			if(DEBUG(HDR)) fprintf(log, " crunch!");
			return(0);
		}
	}

	if(DEBUG(HDR)) fprintf(log, " modified");
	sprintf(s, "Referer: %s", referer);
	return(strlen(s));
}

int client_uagent(struct parsers *v, char *s)
{
	if(uagent == NULL) {
		strcpy(s, DEFAULT_USER_AGENT);
		if(DEBUG(HDR)) fprintf(log, " default");
		return(strlen(s));
	}

	if(*uagent == '.') {
		return(strlen(s));
	}

	if(*uagent == '@') {
		if(send_user_cookie) {
			return(strlen(s));
		} else {
			strcpy(s, DEFAULT_USER_AGENT);
			if(DEBUG(HDR)) fprintf(log, " default");
			return(strlen(s));
		}
	}

	if(DEBUG(HDR)) fprintf(log, " modified");
	sprintf(s, "User-Agent: %s", uagent);
	return(strlen(s));
}

int client_ua(struct parsers *v, char *s)
{
	if(uagent == NULL) {
		if(DEBUG(HDR)) fprintf(log, " crunch!");
		return(0);
	}

	if(*uagent != '.') {
		return(strlen(s));
	}

	if(*uagent != '@') {
		if(send_user_cookie) {
			return(strlen(s));
		} else {
			if(DEBUG(HDR)) fprintf(log, " crunch!");
			return(0);
		}
	}

	if(DEBUG(HDR)) fprintf(log, " crunch!");
	return(0);
}

int client_from(struct parsers *v, char *s)
{
	/* if not set, zap it */
	if(from == NULL) {
		if(DEBUG(HDR)) fprintf(log, " crunch!");
		return(0);
	}

	if(*from != '.') {
		sprintf(s, "From: %s", from);
		if(DEBUG(HDR)) fprintf(log, " modified");
	}

	return(strlen(s));
}

int
client_send_cookie(struct parsers *v, char *s)
{
	if(send_user_cookie) {
		enlist(cookie_list, s + v->len + 1);
	} else {
		if(DEBUG(HDR)) fprintf(log, " crunch!");
	}

	/* always return zero here.  the cookie header will be sent
	 * at the end of the other headers.
	 */
	return(0);
}

int server_set_cookie(struct parsers *v, char *s)
{
	if(jarfile) fprintf(jar, "%s\t%s\n", connected_to, (s + v->len + 1));

	if(accept_server_cookie == 0) return(crumble(v, s));

	return(strlen(s));
}

void
enlist(char **list, char *str)
{
	while(*list) list++;

	*list++ = strdup(str);

	if(*list) free(*list);

	*list   = NULL;
}

int
client_cookie_adder(char *p)
{
	char tmp[BUFSIZ], **s, *t;

	t  = tmp;
	*p = '\0';

	for(s = cookie_list; *s ; s++) {
		if(t > tmp) {
			*t++ = ';' ;
			*t++ = ' ' ;
		}
		strcpy(t, *s);
		t += strlen(t);
	}

	for(s = wafer_list;  *s ; s++) {
		if(t > tmp) {
			*t++ = ';' ;
			*t++ = ' ' ;
		}
		strcpy(t, url_encode(cookie_code_map, *s));
		t += strlen(t);
	}

	if(t > tmp) {
		sprintf(p, "Cookie: %s\r\n", tmp);
	}

	return(strlen(p));
}

int
client_xtra_adder(char *p)
{
	char **s, *t;

	t = p;
	for(s = xtra_list; *s; s++) {
		sprintf(t, "%s\r\n", *s);
		t += strlen(t);
	}
	return(strlen(p));
}

int
client_x_forwarded(char *p)
{
	if(add_forwarded) {
		strcpy(x_forwarded, p);
	}

	/* always return zero, since this information
	 * will be sent out at the end of the header.
	 */

	return(0);
}

int
client_x_forwarded_adder(char *p)
{
	if(add_forwarded == 0) {
		return(0);
	}

	if(*x_forwarded) {
		sprintf(p, "%s, %s\r\n", x_forwarded, remote_ip);
	} else {
		sprintf(p, "X-Forwarded-For: %s\r\n", remote_ip);
	}

	return(strlen(p));
}

struct block_spec *
block_url(char *host, int port, char *path)
{
	struct block_spec *b, c[1];
	char buf[BUFSIZ], *vec[BUFSIZ];

	strcpy(buf, host);

	c->url->dcnt = dsplit(buf, vec);
	c->url->dvec = vec;

	for(b = blist->next; b ; b = b->next) {
		if((b->url->port == 0) || (b->url->port == port)) {
			if((b->url->domain[0] == '\0') || (domaincmp(b->url, c->url) == 0)) {
				if((b->url->path == NULL) ||
#ifdef REGEX
				   (regexec(b->url->preg, path, 0, NULL, 0) == 0)
#else
				   (strncmp(b->url->path, path, b->url->pathlen) == 0)
#endif
				) {
					return(b);
				}
			}
		}
	}
	return(NULL);
}

struct cookie_spec *
cookie_url(char *host, int port, char *path)
{
	struct cookie_spec *b, c[1];
	char buf[BUFSIZ], *vec[BUFSIZ];

	strcpy(buf, host);

	c->url->dcnt = dsplit(buf, vec);
	c->url->dvec = vec;

	for(b = clist->next; b ; b = b->next) {
		if((b->url->port == 0) || (b->url->port == port)) {
			if((b->url->domain[0] == '\0') || (domaincmp(b->url, c->url) == 0)) {
				if((b->url->path == NULL) ||
#ifdef REGEX
				   (regexec(b->url->preg, path, 0, NULL, 0) == 0)
#else
				   (strncmp(b->url->path, path, b->url->pathlen) == 0)
#endif
				) {
					return(b);
				}
			}
		}
	}
	return(NULL);
}

int
dsplit(char *domain, char **vec)
{
	/* split the pattern into components */
	int cnt;
	char *p;

	cnt = 0;

	vec[cnt++] = domain;

	for(p = domain; *p ; p++) {

		*p = tolower(*p);

		if(*p == '.') {
			*p = '\0';
			vec[cnt++] = p+1;
		}
	}
	return(cnt);
}

/* the "pattern" is a domain that may contain a '*' as a wildcard.
 * the "fqdn" is the domain name against which the patterns are compared.
 *
 * domaincmp("a.b.c" , "a.b.c")	=> 0 (MATCH)
 * domaincmp("a*.b.c", "a.b.c")	=> 0 (MATCH)
 * domaincmp("b.c"   , "a.b.c")	=> 0 (MATCH)
 * domaincmp(""      , "a.b.c")	=> 0 (MATCH)
 */

int
domaincmp(struct url_spec *pattern, struct url_spec *fqdn)
{
	char **pv, **fv;	/* vectors  */
	int    pn,   fn;	/* counters */
	char  *p,   *f;		/* chars    */
	
	pv = pattern->dvec;
	pn = pattern->dcnt;

	fv = fqdn->dvec;
	fn = fqdn->dcnt;

	while((pn > 0) && (fn > 0)) {
		p = pv[--pn];
		f = fv[--fn];

		while(*p && *f && (*p == tolower(*f))) {
			p++, f++;
		}

		if((*p != tolower(*f)) && (*p != '*')) return(1);
	}

	if(pn > 0) return(1);

	return(0);
}

