
/*-
 * Copyright (c) 1995 The Apache Group. All rights reserved.
 * 
 *
 * Apache httpd license
 * ====================
 * 
 *
 * This is the license for the Apache Server. It covers all the
 * files which come in this distribution, and should never be removed.
 * 
 * The "Apache Group" has based this server, called "Apache", on
 * public domain code distributed under the name "NCSA httpd 1.3".
 * 
 * NCSA httpd 1.3 was placed in the public domain by the National Center 
 * for Supercomputing Applications at the University of Illinois 
 * at Urbana-Champaign.
 * 
 * As requested by NCSA we acknowledge,
 * 
 *  "Portions developed at the National Center for Supercomputing
 *   Applications at the University of Illinois at Urbana-Champaign."
 *
 * Copyright on the sections of code added by the "Apache Group" belong
 * to the "Apache Group" and/or the original authors. The "Apache Group" and
 * authors hereby grant permission for their code, along with the
 * public domain NCSA code, to be distributed under the "Apache" name.
 * 
 * Reuse of "Apache Group" code outside of the Apache distribution should
 * be acknowledged with the following quoted text, to be included with any new
 * work;
 * 
 * "Portions developed by the "Apache Group", taken with permission 
 *  from the Apache Server   http://www.apache.org/apache/   "
 *
 *
 * Permission is hereby granted to anyone to redistribute Apache under
 * the "Apache" name. We do not grant permission for the resale of Apache, but
 * we do grant permission for vendors to bundle Apache free with other software,
 * or to charge a reasonable price for redistribution, provided it is made
 * clear that Apache is free. Permission is also granted for vendors to 
 * sell support for for Apache. We explicitly forbid the redistribution of 
 * Apache under any other name.
 * 
 * The "Apache Group" makes no promises to support "Apache". Users and
 * sellers of Apache support, and users of "Apache Group" code, should be 
 * aware that they use "Apache" or portions of the "Apache Group" code at 
 * their own risk. While every effort has been made to ensure that "Apache"
 * is robust and safe to use, we will not accept responsibility for any
 * damage caused, or loss of data or income which results from its use.
 * 
 */



/*
 * http_mime_db.c: keeps track of MIME types the client is willing to
 * accept, and contains code to handle type arbitration.
 *
 * rst
 */

#include "httpd.h"

/*
 * TO DO --- error code 406.  Unfortunately, the specification for
 *           a 406 reply in the current draft standard is unworkable;
 *           we return 404 for these pending a workable spec. 
 *
 * MAYBE --- Server-side inclusion of multi-variant entities ---
 *           see send_included_file in http_includes.c, which is
 *           juuust like send_file and send_node, except everything
 *           is *slightly* different.
 *
 * --- rst
 */

#define MIME_TYPE_LEN 64
#define NMIME 40		/* Number of accept_foos we can handle
				 * from the client
				 * without going to malloc()
				 */

#define NVAR 10			/* Number of map entries */

/* Record of available info on a media type specified by the client
 * (we also use 'em for encodings and languages)
 */

typedef struct accept_rec {
    char type_name[MIME_TYPE_LEN];
    float quality;
    float max_bytes;
    float level;
} accept_rec;

/* Record of available info on a particular variant
 *
 * Note that a few of these fields are updated by the actual negotiation
 * code.  These are:
 *
 * quality --- initialized to the value of qs, and subsequently jiggered
 *             to reflect the client's preferences.  In particular, it
 *             gets zeroed out if the variant has an unacceptable content
 *             encoding, or if it is in a language which the client
 *             doesn't accept and some other variant *is* in a language
 *             the client accepts.
 *
 * level_matched --- initialized to zero.  Set to the value of level
 *             if the client actually accepts this media type at that
 *             level (and *not* if it got in on a wildcard).  See level_cmp
 *             below.
 */

typedef struct var_rec {
    char type_name[MIME_TYPE_LEN];
    char file_name[MIME_TYPE_LEN];
    char content_encoding[MIME_TYPE_LEN];
    char lang[MIME_TYPE_LEN];
    float level;		/* Auxiliary to content-type... */
    float qs;
    float bytes;
    int lang_index;
    int is_pseudo_html;		/* text/html, *or* the INCLUDES_MAGIC_TYPEs */

    /* Above are all written-once properties of the variant.  The
     * two fields below are changed during negotiation:
     */
    
    float quality;	
    float level_matched;
} var_rec;

/* Lists of records of both types */

typedef struct rec_list {
    int nactive;
    int nalloc;
    int is_malloced;
    char *recs;
} rec_list;

accept_rec basic_accepts[NMIME];
rec_list accepts = {0, NMIME, 0, (void *)basic_accepts};

accept_rec basic_accept_encodings[NMIME];
rec_list accept_encodings = {0, NMIME, 0, (void *)basic_accept_encodings};

accept_rec basic_accept_langs[NMIME];
rec_list accept_langs = {0, NMIME, 0, (void *)basic_accept_langs};

var_rec basic_avail_vars[NMIME];
rec_list avail_vars = {0, NVAR, 0, (void *)basic_avail_vars};

/* A few functions to manipulate var_recs.
 * Cleaning out the fields...
 */

void clean_var_rec (var_rec *mime_info)
{
    mime_info->type_name[0] = '\0';
    mime_info->file_name[0] = '\0';
    mime_info->content_encoding[0] = '\0';
    mime_info->lang[0] = '\0';

    mime_info->is_pseudo_html = 0.0;
    mime_info->level = 0.0;
    mime_info->level_matched = 0.0;
    mime_info->qs = 0.0;
    mime_info->quality = 0.0;
    mime_info->bytes = 0;
    mime_info->lang_index = -1;
}

/* Initializing the relevant fields of a variant record from the
 * accept_info read out of its content-type, one way or another.
 */

void set_mime_fields (var_rec *var, accept_rec *mime_info)
{
    strcpy (var->type_name, mime_info->type_name);
    var->qs = mime_info->quality;
    var->quality = mime_info->quality; /* Initial quality is just qs */
    var->level = mime_info->level;

    var->is_pseudo_html = 
	(!strcmp (var->type_name, "text/html")
	 || !strcmp (var->type_name, INCLUDES_MAGIC_TYPE)
	 || !strcmp (var->type_name, INCLUDES_MAGIC_TYPE3));
}

/*****************************************************************
 *
 * Configuration vars...
 */

int cache_negotiated_docs = 0;


/*****************************************************************
 *
 * Reinitialization --- as when starting a new transaction after
 * an old one is over.  (Not yet, but certainly for HTTP/1.1).
 */

void reset_mime_db()
{
    accepts.nactive = 0;
    accept_encodings.nactive = 0;
    accept_langs.nactive = 0;
    avail_vars.nactive = 0;
}

void clear_rec_list (rec_list *recs)
{
    recs->nactive = 0;
}

/*****************************************************************
 *
 * Manipulating rec_lists
 */

void *new_elt (rec_list *acc, int record_size, FILE *out)
{
    if (acc->nactive == acc->nalloc) {
	
	/* Need to add space... */
	int size = acc->nactive * record_size;
	    
	if (!acc->is_malloced) {
	    char *new = malloc(size * 2);
	    
	    if (new) memcpy (new, acc->recs, size);
	    acc->nalloc *= 2;
	    acc->recs = new;
	    acc->is_malloced = 1;
	}
	else {
	    acc->recs = realloc(acc->recs, size * 2);
	    acc->nalloc *= 2;
	}

	if (!acc->recs) {
	    die (SERVER_ERROR,"httpd: malloc failed in add_accept", out);
	}
    }

    return acc->recs + (record_size * acc->nactive++);
}


void add_accept (rec_list *recs, accept_rec *accept_info, FILE *out)
{
    accept_rec *new = (accept_rec *)new_elt(recs, sizeof(accept_rec), out);
    
    *new = *accept_info;
}

void add_var (rec_list *recs, var_rec *mime_info, FILE *out)
{
    var_rec *new = (var_rec *)new_elt(recs, sizeof(var_rec), out);
    
    *new = *mime_info;
}

/*****************************************************************
 *
 * Parsing (lists of) media types and their parameters, as seen in
 * HTTPD header lines and elsewhere.
 */

void lowercase_string (char *cp)
{
    while (*cp) { *cp = tolower(*cp); ++cp; }
}

/* Retrieve a token, spacing over it and returning a pointer to
 * the first non-white byte afterwards.  Note that these tokens
 * are delimited by semis and commas; and can also be delimited
 * by whitespace at the caller's option.
 */

char *get_token (char *token, char *accept_line, int accept_white,
		 int tok_len, FILE *out)
{
    char *token_end = token + tok_len - 1;
    
    /* Find first non-white byte */
    
    while (*accept_line && isspace(*accept_line))
      ++accept_line;

    /* copy into the 'token' array */
    
    while (*accept_line && (accept_white || !isspace(*accept_line))
	   && *accept_line != ';' && *accept_line != ',')
    {
	if (token > token_end)
	  die (SERVER_ERROR,"httpd: mime type too long in accept", out);
	    
	*token = *accept_line++;

	if (*token++ == '"')
	  while (*accept_line) {
	      if (token > token_end)
		die (SERVER_ERROR,"httpd: mime parm too long in accept", out);
	      
	      *token = *accept_line++;
	      if (*token++ == '"') break;
	  }
    }
	  
    *token = '\0';

    /* Advance accept_line pointer to the next non-white byte */

    while (*accept_line && isspace(*accept_line))
      ++accept_line;

    return accept_line;
}

/*
 * Get a single mime type entry --- one media type and parameters;
 * enter the values we recognize into the argument accept_rec
 */

char *get_entry (accept_rec *result, char *accept_line, FILE *out)
{
    result->quality = 1.0;
    result->max_bytes = 0.0;
    result->level = 0.0;
    
    /* Note that this handles what I gather is the "old format",
     *
     *    Accept: text/html text/plain moo/zot
     *
     * without any compatibility kludges --- if the token after the
     * MIME type begins with a semicolon, we know we're looking at parms,
     * otherwise, we know we aren't.  (So why all the pissing and moaning
     * in the CERN server code?  I must be missing something).
     */
    
    accept_line = get_token (result->type_name, accept_line, 0,
			     MIME_TYPE_LEN, out);

    lowercase_string (result->type_name); /* You want case-insensitive,
					   * you'll *get* case-insensitive.
					   */
    

    /* KLUDGE!!! Default HTML to level 2.0 unless the browser
     * *explicitly* says something else.
     */
	
    if (!strcmp (result->type_name, "text/html")
	&& result->level == 0.0)
	result->level = 2.0;
    else if (!strcmp (result->type_name, INCLUDES_MAGIC_TYPE))
	result->level = 2.0;
    else if (!strcmp (result->type_name, INCLUDES_MAGIC_TYPE3))
	result->level = 3.0;

    while (*accept_line == ';') {
	/* Parameters ... */

	char parm[MIME_TYPE_LEN];
	char *parm_name;
	char *cp;
	    
	++accept_line;
	accept_line = get_token (parm, accept_line, 1,
				 MIME_TYPE_LEN, out);

	/* Look for 'var = value' --- and make sure the var is in lcase. */
	
	for (cp = parm; *cp && !isspace(*cp) && *cp != '='; ++cp)
	    *cp = tolower(*cp);

	if (!*cp) continue;	/* No '='; just ignore it. */
	    
	*cp++ = '\0';		/* Delimit var */
	while (*cp && (isspace(*cp) || *cp == '='))
	    ++cp;

	if (*cp == '"') ++cp;
	
	if (parm[0] == 'q'
	    && (parm[1] == '\0' || parm[1] == 's' && parm[2] == '\0'))
	    result->quality = atof(cp);
	else if (parm[0] == 'm' && parm[1] == 'x' &&
		 parm[2] == 'b' && parm[3] == '\0')
	    result->max_bytes = atof(cp);
	else if (parm[0] == 'l' && !strcmp (&parm[1], "evel"))
	    result->level = atof(cp);
    }

    if (*accept_line == ',') ++accept_line;

    return accept_line;
}
		 

/*****************************************************************
 *
 * Dealing with header lines ...
 */

void do_header_line (rec_list *accept_recs, char *accept_line, FILE *out)
{
    accept_rec mime_info;
    
    while (*accept_line) {
	accept_line = get_entry (&mime_info, accept_line, out);
	add_accept (accept_recs, &mime_info, out);
    }
}

/*****************************************************************
 *
 * Handling header lines from clients...
 */

void note_client_accept (char *accept_line, FILE *out)
{
    do_header_line (&accepts, accept_line, out);
}

void note_client_accept_lang (char *accept_line, FILE *out)
{
    do_header_line (&accept_langs, accept_line, out);
}

void note_client_accept_enc (char *accept_line, FILE *out)
{
    do_header_line (&accept_encodings, accept_line, out);
}

/* Sometimes clients will give us no Accept info at all; this routine sets
 * up the standard default for that case, and also arranges for us to be
 * willing to run a CGI script if we find one.  (In fact, we set up to
 * dramatically prefer CGI scripts in cases where that's appropriate,
 * e.g., POST).
 */

void maybe_add_default_encodings(int prefer_scripts, FILE *out)
{
    accept_rec cgi_accept_rec =
      { CGI_MAGIC_TYPE, 1e-20, 0.0, 0.0 };
    accept_rec default_accept_rec =
      { "*/*", 1.0, 0.0, 0.0 };

    if (prefer_scripts)
        cgi_accept_rec.quality=1e20;

    if (accepts.nactive == 0) add_accept (&accepts, &default_accept_rec, out);
	
    add_accept(&accepts, &cgi_accept_rec, out);
}

/*****************************************************************
 *
 * Parsing type-map files, in Roy's meta/http format augmented with
 * #-comments.
 */

/* Reading RFC822-style header lines, ignoring #-comments and
 * handling continuations.
 */

enum header_state { header_eof, header_seen, header_sep };

enum header_state get_header_line (char *buffer, int len, FILE *map)
{
    char *buf_end = buffer + len;
    char *cp;
    int c;
    
    /* Get a noncommented line */
    
    do {
	if (fgets(buffer, MAX_STRING_LEN, map) == NULL)
	    return header_eof;
    } while (buffer[0] == '#');
    
    /* If blank, just return it --- this ends information on this variant */
    
    for (cp = buffer; *cp && isspace (*cp); ++cp)
      continue;

    if (*cp == '\0') return header_sep;

    /* If non-blank, go looking for header lines, but note that we still
     * have to treat comments specially...
     */

    cp += strlen(cp);
    
    while ((c = getc(map)) != EOF)
    {
	if (c == '#') {
	    /* Comment line */
	    while ((c = getc(map)) != EOF && c != '\n')
	       continue;
	} else if (isspace(c)) {
	    /* Leading whitespace.  POSSIBLE continuation line
	     * Also, possibly blank --- if so, we ungetc() the final newline
	     * so that we will pick up the blank line the next time 'round.
	     */
	    
	    while (c != EOF && c != '\n' && isspace(c))
	        c = getc(map);

	    ungetc (c, map);
	    
	    if (c == '\n') return header_seen; /* Blank line */

	    /* Continuation */

	    while (cp < buf_end - 2 && (c = getc(map)) != EOF && c != '\n')
	        *cp++ = c;

	    *cp++ = '\n';
	    *cp = '\0';
	} else {

	    /* Line beginning with something other than whitespace */
	    
	    ungetc (c, map);
	    return header_seen;
	}
    }

    return header_seen;
}

/* Stripping out RFC822 comments */

void strip_paren_comments (char *hdr)
{
    while (*hdr) {
	if (*hdr == '"') {
	    while (*++hdr && *hdr != '"')
		continue;
	    ++hdr;
	}
	else if (*hdr == '(') {
	    while (*hdr && *hdr != ')')	*hdr++ = ' ';
	    
	    if (*hdr) *hdr++ = ' ';
	}
	else ++hdr;
    }
}

/* Getting to a header body from the header */

char *lcase_header_name_return_body (char *header, FILE *out)
{
    char *cp = header;
    
    while (*cp && *cp != ':')
        *cp++ = tolower(*cp);
    
    if (!*cp) {
	log_reason ("Syntax error in type map --- no ':'", header);
	die (SERVER_ERROR, "Syntax error in type map", out);
    }

    do ++cp; while (*cp && isspace (*cp));

    if (!*cp) {
	log_reason ("Syntax error in type map --- no header body", header);
	die (SERVER_ERROR, "Syntax error in type map", out);
    }

    return cp;
}

void read_type_map (char *map_name, FILE *out)
{
    char buffer[MAX_STRING_LEN];
    FILE *map = fopen (map_name, "r");
    struct var_rec mime_info;
    enum header_state hstate;

    if (map == NULL) {
        log_reason("cannot access type map file",map_name);
        unmunge_name(map_name);
        die(FORBIDDEN,map_name,out);
    }

    clean_var_rec (&mime_info);
    
    do {
	hstate = get_header_line (buffer, MAX_STRING_LEN, map);
	
	if (hstate == header_seen) {
	    char *body = lcase_header_name_return_body (buffer, out);
	    
	    strip_paren_comments (body);
	    
	    if (!strncmp (buffer, "uri:", 4)) {
	        get_token (mime_info.file_name, body, 0, MIME_TYPE_LEN, out);
	    }
	    else if (!strncmp (buffer, "content-type:", 13)) {
		struct accept_rec accept_info;
		
		get_entry (&accept_info, body, out);
		set_mime_fields (&mime_info, &accept_info);
	    }
	    else if (!strncmp (buffer, "content-length:", 15)) {
		mime_info.bytes = atoi(body);
	    }
	    else if (!strncmp (buffer, "content-language:", 17)) {
		get_token (mime_info.lang, body, 0, MIME_TYPE_LEN, out);
		lowercase_string (mime_info.lang);
	    }
	    else if (!strncmp (buffer, "content-encoding:", 17)) {
		get_token (mime_info.content_encoding, body, 0,
			   MIME_TYPE_LEN, out);
		lowercase_string (mime_info.content_encoding);
	    }
	} else {
	    if (mime_info.quality > 0)
	        add_var (&avail_vars, &mime_info, out);
	    
	    clean_var_rec(&mime_info);
	}
    } while (hstate != header_eof);
    
    fclose (map);
}

/*****************************************************************
 *
 * Same, except we use a filtered directory listing as the map...
 */

void read_types_multi (char *file_name, int name_len, FILE *out)
{
    char *filp = &file_name[strlen(file_name) - 1];
    DIR *dirp;
    struct DIR_TYPE *dir_entry;
    int prefix_len, dir_len;
    struct var_rec mime_info;
    struct accept_rec accept_info;

    clean_var_rec (&mime_info);
    
    while (*filp == '/') *filp-- = '\0';
    while (*filp != '/' && filp > file_name) --filp;

    if (filp == file_name) return; /* Weird, weird... file_name has no '/'s.
				    * Multi will fail, which makes about
				    * as much sense as anything else that
				    * could happen.
				    */

    *filp = '\0';
    dirp = opendir (file_name);
    *filp++ = '/';

    dir_len = filp - file_name;
    prefix_len = strlen(filp);

    if (dirp == NULL) {
        log_reason("cannot read directory for multi",file_name);
        unmunge_name(file_name);
        die(FORBIDDEN,file_name,out);
    }

    while (dir_entry = readdir (dirp)) {
	
	/* Do we have a match? */
	
	if (strncmp (dir_entry->d_name, filp, prefix_len)) continue;
	if (dir_entry->d_name[prefix_len] != '.') continue;
	
	if (strlen (dir_entry->d_name) + 1 > MIME_TYPE_LEN) {
	    log_reason ("Directory entry too long in multiviews",
			dir_entry->d_name);
	    die (SERVER_ERROR, "Directory entry too long in multiviews", out);
	}
	
	/* Yep.  Do we know what it is, or does it have some random
	 * junk extension (e.g., emacs backup)?
	 */
	
	content_encoding[0] = '\0';
	set_content_type_and_parms (dir_entry->d_name);

	if (!content_type[0]) continue;

	/* If it's a map file, we use that instead of the map
	 * we're building...
	 */

	if (!strcmp (content_type, MAP_FILE_MAGIC_TYPE)) {
	    int new_name_len = dir_len + strlen(dir_entry->d_name) + 1;
	    
	    if (new_name_len > name_len) {
		log_reason ("Map file name too long in multiviews",
			    dir_entry->d_name);
		die(SERVER_ERROR, "Map file name too long in multiviews", out);
	    }
	    
	    closedir(dirp);
	    
	    clear_rec_list (&avail_vars);
	    strcpy (filp, dir_entry->d_name);
	    read_type_map (file_name, out);
	    return;
	}
	
	/* Have reasonable content-type --- gather info
	 * and note its availability.
	 */
	
	strcpy (mime_info.file_name, dir_entry->d_name);
	
	get_entry (&accept_info, content_type, out);
	
	set_mime_fields (&mime_info, &accept_info);
	strcpy (mime_info.content_encoding, content_encoding);
	
	add_var (&avail_vars, &mime_info, out);
    }

    closedir(dirp);
}


/****************************************************************
 *
 * Utility routine to deal with filenames... yields the real
 * (translated, if necessary) pathname of the file named by a
 * particular variant.
 */

void substitute_mapped_name (char *dest, int dlen,
			     char *file_name, var_rec *rec, 
			     FILE *out)
{
    if (rec->file_name[0] == '/') {
	if (strlen(rec->file_name) + 1 > dlen) {
	    log_reason("map entry too long",file_name);
	    die(SERVER_ERROR, "map entry too long", out);
	}
	strcpy (dest, rec->file_name);
    
	if (translate_name (dest, out) != STD_DOCUMENT) {
	    log_reason ("can only map real files", dest);
	    die (SERVER_ERROR, "non-STD_DOCUMENT listed in map file", out);
	}
    }
    else if (strlen (file_name) + 1 > dlen) {
	log_reason("file name too long",file_name);
	die(SERVER_ERROR, "file name too long", out);
    }
    else if (rec->file_name[0] != '.' && !strchr(rec->file_name, '/')) {

	/* Relative URI in same directory --- can avoid untranslating
	 * and retranslating the pathname...
	 */
	
	char *filp;

	strcpy (dest, file_name);
	filp = &dest[strlen(dest) - 1];
    
	while (*filp == '/') *filp-- = '\0';
	while (*filp != '/' && filp > dest) --filp;
	++filp;
    
	if ((filp - dest) + strlen (rec->file_name) + 1 > dlen) {
	    log_reason("file name too long after mapping",file_name);
	    die(SERVER_ERROR, "file name too long after mapping", out);
	}

	strcpy (filp, rec->file_name);
    }
    else {
	/* The nasty general case --- relative URI to file in
	 * different directory...
	 */
	char *filp;

	strcpy (dest, file_name);
	unmunge_name (dest);
	
	filp = &dest[strlen(dest) - 1];
    
	while (*filp == '/') *filp-- = '\0';
	while (*filp != '/' && filp > dest) --filp;
	++filp;
    
	if ((filp - dest) + strlen (rec->file_name) + 1 > dlen) {
	    log_reason("file name too long after mapping",file_name);
	    die(SERVER_ERROR, "file name too long after mapping", out);
	}

	strcpy (filp, rec->file_name);
	
	if (translate_name (dest, out) != STD_DOCUMENT) {
	    log_reason ("can only map real files", dest);
	    die (SERVER_ERROR, "non-STD_DOCUMENT listed in map file", out);
	}
    }
}

/*****************************************************************
 * And now for the code you've been waiting for... actually
 * finding a match to the client's requirements.
 */

/* Matching MIME types ... the star/star and foo/* commenting conventions
 * are implemented here.  (You know what I mean by star/star, but just
 * try mentioning those three characters in a C comment).  Using strcmp()
 * is legit, because everything has already been smashed to lowercase.
 *
 * Note also that if we get an exact match on the media type, we update
 * level_matched for use in level_cmp below...
 */

int mime_match (accept_rec *accept, var_rec *avail)
{
    char *accept_type = accept->type_name;
    char *avail_type = avail->type_name;
    int len = strlen(accept_type);
  
    if (accept_type[0] == '*')	/* Anything matches star/star */
	return 1; 
    else if (accept_type[len - 1] == '*')
	return !strncmp (accept_type, avail_type, len - 2);
    else if (!strcmp (accept_type, avail_type)
	     || (!strcmp (accept_type, "text/html")
		 && (!strcmp(avail_type, INCLUDES_MAGIC_TYPE)
		     || !strcmp(avail_type, INCLUDES_MAGIC_TYPE3)))) {
	if (accept->level >= avail->level) {
	    avail->level_matched = avail->level;
	    return 1;
	}
    }
    else
	return 0;
}

/* This code implements a piece of the tie-breaking algorithm between
 * variants of equal quality.  This piece is the treatment of variants
 * of the same base media type, but different levels.  What we want to
 * return is the variant at the highest level that the client explicitly
 * claimed to accept.
 *
 * If all the variants available are at a higher level than that, or if
 * the client didn't say anything specific about this media type at all
 * and we these variants just got in on a wildcard, we prefer the lowest
 * level, on grounds that that's the one that the client is least likely
 * to choke on.
 *
 * (This is all motivated by treatment of levels in HTML --- we only
 * want to give level 3 to browsers that explicitly ask for it; browsers
 * that don't, including HTTP/0.9 browsers that only get the implicit
 * "Accept: * / *" [space added to avoid confusing cpp --- no, that
 * syntax doesn't really work] should get HTML2 if available).
 *
 * (Note that this code only comes into play when we are choosing among
 * variants of equal quality, where the draft standard gives us a fair
 * bit of leeway about what to do.  It ain't specified by the standard;
 * rather, it is a choice made by this server about what to do in cases
 * where the standard does not specify a unique course of action).
 */

int level_cmp (var_rec *var1, var_rec *var2)
{
    /* Levels are only comparable between matching media types */

    if (var1->is_pseudo_html && !var2->is_pseudo_html)
	return 0;
    
    if (!var1->is_pseudo_html && strcmp (var1->type_name, var2->type_name))
	return 0;
    
    /* Take highest level that matched, if either did match. */
    
    if (var1->level_matched > var2->level_matched) return 1;
    if (var1->level_matched < var2->level_matched) return -1;

    /* Neither matched.  Take lowest level, if there's a difference. */

    if (var1->level < var2->level) return 1;
    if (var1->level > var2->level) return -1;

    /* Tied */

    return 0;
}

/* Finding languages.  Note that we only match the substring specified
 * by the Accept: line --- this is to allow "en" to match all subvariants
 * of English.
 *
 * Again, strcmp() is legit because we've ditched case already.
 */

int find_lang_index (char *lang)
{
    accept_rec *accs = (accept_rec *)accept_langs.recs;
    accept_rec *best = NULL;
    int i;

    for (i = 0; i < accept_langs.nactive; ++i)
	if (!strncmp (lang, accs[i].type_name, strlen(accs[i].type_name)))
	    return i;
	    
    return -1;		
}

void find_lang_indexes ()
{
    var_rec *var_recs = (var_rec*)avail_vars.recs;
    int i;
    int found_any = 0;

    if (accept_langs.nactive == 0) {
	
	/* Client doesn't care */

	for (i = 0; i < avail_vars.nactive; ++i)
	    var_recs[i].lang_index = -1;

	return;
    }
    
    for (i = 0; i < avail_vars.nactive; ++i)
	if (var_recs[i].quality > 0) {
	    int index = find_lang_index (var_recs[i].lang);

	    var_recs[i].lang_index = index;
	    if (index >= 0) found_any = 1;
	}

    /* If we have any variants in a language acceptable to the client,
     * blow away everything that isn't.
     */
    
    if (found_any)
	for (i = 0; i < avail_vars.nactive; ++i) 
	    if (var_recs[i].lang_index < 0)
		var_recs[i].quality = 0;
}

/* Finding content encodings.  Note that we assume that the client
 * accepts the trivial encodings.  Strcmp() is legit because... aw, hell.
 */

int is_identity_encoding (char *enc)
{
    return (!*enc || !strcmp (enc, "7bit") || !strcmp (enc, "8bit")
	    || !strcmp (enc, "binary"));
}

int find_encoding (char *enc)
{
    accept_rec *accs = (accept_rec *)accept_encodings.recs;
    int i;

    if (is_identity_encoding(enc)) return 1.0;

    for (i = 0; i < accept_encodings.nactive; ++i)
	if (!strcmp (enc, accs[i].type_name))
	    return 1;

    return 0;
}

void do_encodings ()
{
    var_rec *var_recs = (var_rec*)avail_vars.recs;
    int i;

    /* Lose any variant with an unacceptable content encoding */

    for (i = 0; i < avail_vars.nactive; ++i)
	if (var_recs[i].quality > 0
	    && !find_encoding (var_recs[i].content_encoding))
	    
	    var_recs[i].quality = 0;
}

/* Determining the content length --- if the map didn't tell us,
 * we have to do a stat() and remember for next time.
 */

int find_content_length(var_rec *variant, char *file_name, int len, FILE *out)
{
    struct stat statb;
    char fullname[MAX_STRING_LEN];

    if (variant->bytes == 0) {
	substitute_mapped_name (fullname, MAX_STRING_LEN,
				file_name, variant, out);
	if (stat (fullname, &statb) >= 0) variant->bytes = statb.st_size;
    }

    return variant->bytes;
}

/* The main event. */

var_rec *best_match(char *file_name, int len, FILE *out)
{
    int i, j;
    var_rec *best = NULL;
    float best_quality = 0.0;
    int levcmp;
    
    accept_rec *accept_recs = (accept_rec *)accepts.recs;
    var_rec *avail_recs = (var_rec *)avail_vars.recs;

    /* Nuke variants which are unsuitable due to a content encoding,
     * or possibly a language, which the client doesn't accept.
     * (If we haven't *got* a variant in a language the client accepts,
     * find_lang_indexes keeps 'em all, so we still wind up serving
     * something...).
     */
    
    do_encodings();
    find_lang_indexes();
    
    for (i = 0; i < accepts.nactive; ++i) {

	accept_rec *type = &accept_recs[i];
	
	for (j = 0; j < avail_vars.nactive; ++j) {
	    
	    var_rec *variant = &avail_recs[j];
	    float q = type->quality * variant->quality;
		
	    /* If we've already rejected this variant, don't waste time */
	    
	    if (q == 0.0) continue;	
	    
	    /* If media types don't match, forget it.
	     * (This includes the level check).
	     */
	    
	    if (!mime_match(type, variant)) continue;

	    /* Check maxbytes */
		
	    if (type->max_bytes > 0
		&& (find_content_length(variant, file_name, len, out)
		    > type->max_bytes))
		continue;
		
	    /* If it lasted this far, consider it ---
	     * If better quality than our current best, take it.
	     * If equal quality, *maybe* take it.
	     *
	     * Note that the current http draft specifies no particular
	     * behavior for variants which tie in quality; the server
	     * can, at its option, return a 300 response listing all
	     * of them (and perhaps the others), or choose one of the
	     * tied variants by whatever means it likes.  This server
	     * breaks ties as follows, in order:
	     *
	     * By order of languages in Accept-language, to give the
	     * client a way to specify a language preference.  I'd prefer
	     * to give this precedence over media type, but the standard
	     * doesn't allow for that.
	     *
	     * By level preference, as defined by level_cmp above.
	     *
	     * By order of Accept: header matched, so that the order in
	     * which media types are named by the client functions as a
	     * preference order, if the client didn't give us explicit
	     * quality values.
	     *
	     * Finally, by content_length, so that among variants which
	     * have the same quality, language and content_type (including
	     * level) we ship the one that saps the least bandwidth.
	     */
		
	    if (q > best_quality
		|| (q == best_quality
		    && (variant->lang_index < best->lang_index
			|| (variant->lang_index == best->lang_index
			    && ((levcmp = level_cmp (variant, best)) == 1
				|| (levcmp == 0
				    && !strcmp (variant->type_name,
						best->type_name)
				    && (find_content_length(variant, file_name,
							    len, out)
					<
					find_content_length(best, file_name,
							    len, out))))))))
	    {
		best = variant;
		best_quality = q;
	    }
	}
    }

    return best;
}

/*****************************************************************
 *
 * Security.
 */

void recheck_filename (char *file_name, int allow_opts,
		       struct stat *finfo, FILE *out)
{
    int allow = 1;
    char allow_options = allow_opts;
    
    /* Have to redo access checks, since the pathname changed
     * For now, don't take advantage of the work we've already done...
     */

    if (stat (file_name, finfo) < 0) {
	log_reason("map file entry does not exist",file_name);
	unmunge_name(file_name);
	die(NOT_FOUND,file_name,out);
    }
    evaluate_access (file_name, finfo, M_GET, &allow, &allow_options, out);
    
    if(!allow) {
        log_reason("client denied by server configuration",file_name);
        unmunge_name(file_name);
        die(FORBIDDEN,file_name,out);
    }
}

/****************************************************************
 *
 * Executive...
 */

var_rec *negotiation_common (char *file_name, int name_len, int prefer_script,
			     int allow_options, struct stat *finfo, FILE *out)
{
    char fullname[MAX_STRING_LEN]; 
    var_rec *best;
    
    maybe_add_default_encodings(prefer_script, out);
    
    if (!(best = best_match(file_name, name_len, out))) return NULL;
    
    substitute_mapped_name (fullname, MAX_STRING_LEN, file_name, best, out);
    
    if (strlen(fullname) + 1 > name_len) {
        log_reason("file name too long after mapping",file_name);
        die(SERVER_ERROR, "file name too long after mapping", out);
    } else {
	strcpy (file_name, fullname);
    }
    
    recheck_filename (file_name, allow_options, finfo, out);

    /* If not a CGI script, toss on Pragma: no-cache */
    
    if (strcmp (best->type_name, CGI_MAGIC_TYPE) && !cache_negotiated_docs) 
	force_header ("Pragma: no-cache\015\012", out);
    
    return best;
}

int handle_map_file (char *file_name, int name_len, int prefer_script,
		     int allow_options, struct stat *finfo, FILE *out)
{
    var_rec *best;
    
    read_type_map (file_name, out);

    if (!(best = negotiation_common (file_name, name_len, prefer_script,
				     allow_options, finfo, out)))
	return 0;
    
    strcpy (content_type, best->type_name);

    /* Don't set content_encoding unless it's nontrivial ---
     * some browsers might get confused.
     */
    
    if (is_identity_encoding (best->content_encoding))
        *content_encoding = '\0';
    else
        strcpy (content_encoding, best->content_encoding);
    
    return 1;
}

int handle_multi (char *file_name, int name_len, int prefer_script,
		  int allow_options, struct stat *finfo, FILE *out)
{
    var_rec *best;
    
    if (!(allow_options&OPT_MULTI)) {
	log_reason ("No file(s) found to satisfy this request", file_name);
	return 0;
    }
    
    read_types_multi (file_name, name_len, out);
    
    if (!(best = negotiation_common (file_name, name_len, prefer_script,
				     allow_options, finfo, out)))
	return 0;
    
    content_encoding[0] = '\0';	/* Clear it out, so set_content_type
				 * won't get confused later.  Sigh...
				 */
    
    return 1;
}
