/* -*- mode: c; c-file-style: "gnu" -*-
 * http.c -- Miscellaneous HTTP request handling functions
 * Copyright (C) 2002, 2003, 2004 Gergely Nagy <algernon@bonehunter.rulez.org>
 *
 * This file is part of Thy.
 *
 * Thy is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 dated June, 1991.
 *
 * Thy is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 * License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

/** @file http.c
 * HTTP request parsing and processing routines.
 *
 * This module deals with parsing HTTP requests: filtering out
 * interesting headers, resolving the filename associated with an URL
 * and that kind of stuff.
 */

#include "system.h"

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif
#include <sys/types.h>
#include <time.h>

#include "compat/compat.h"
#include "bh-libs/list.h"

#include "cgi.h"
#include "config.h"
#include "http.h"
#include "misc.h"
#include "options.h"
#include "thy.h"
#include "types.h"

/** @internal Convert a HTTP header to a CGI environment variable.
 * Given a HTTP-header in LINE, split it into a name and value field,
 * then set up a NAME=VALUE environment variable using cgi_addenv.
 *
 * @param req is the HTTP request structure to work with.
 * @param line is the HTTP request line.
 */
static void
_http_line2cgienv (request_t *req, const char *line)
{
  char *tmp, *value, *name;
  size_t i;

  value = strstr (line, ":");
  if (!value)
    return;
  tmp = bhc_strndup (line, (size_t)(value - line));
  for (i = 0;  i < strlen (tmp); i++)
    if (tmp[i] == '-' || tmp[i] == ' ')
      tmp[i] = '_';
    else
      tmp[i] = (char) toupper ((int) tmp[i]);
  asprintf (&name, "HTTP_%s", tmp);

  cgi_addenv (req, name, &value[2]);
  free (name);
  free (tmp);
}

/** @internal Whitespace-cleaning strdup().
 * Duplicates a given string, stripping all leading and trailing
 * whitespace.
 *
 * @param str is the string to duplicate.
 *
 * @returns A newly allocated buffer with the cleaned string.
 */
static char *
_http_strdup (const char *str)
{
  const char *s;
  char *s2, *t;

  s = str;
  while (*s == ' ' || *s == '\t')
    s++;
  s2 = bhc_strdup (s);
  t = strchr (s2, ' ');
  if (t)
    t[0] = '\0';
  t = strchr (s2, '\t');
  if (t)
    t[0] = '\0';

  return s2;
}

/** @internal Parse stuff[;q=value] constructs.
 * Parses stuff[;q=value] constructs, and puts them into an
 * char array. the [;q=value] stuff is ignored.
 *
 * @param src is the construct to parse.
 *
 * @returns An array of char *elements, all newly allocated.
 */
static char **
_http_parse_q (const char *src)
{
  char **res;
  size_t c = 0, a = 5;
  char *element, *tmp, *v, *dsrc = bhc_strdup (src);

  res = (char **)bhc_calloc (a, sizeof (char *));

  element = strtok (dsrc, ",");
  do
    {
      if (!element)
	break;

      if ((tmp = strstr (element, ";")) != NULL)
	{
	  tmp[0] = '\0';
	  tmp++;
	}
      v = _http_strdup (element);

      res[c++] = v;
      if (c > a)
	{
	  a *= 2;
	  XSREALLOC (res, char *, a);
	}
    } while ((element = strtok (NULL, ",")) != NULL);
  XSREALLOC (res, char *, (a > c) ? a : a + 1);
  res[c] = NULL;

  free (dsrc);

  return res;
}

/** @internal Parse a HTTP header line.
 * @param line is the HTTP header line to parse.
 * @param req is the structure to put the result into.
 */
static void
_http_parse_line (const char *line, request_t *req)
{
  if (strncasecmp (line, "Authorization: ", 15))
    _http_line2cgienv (req, line);

  if (!strncasecmp (line, "User-Agent: ", 12))
    {
      req->ua = bhc_strdup (&line[12]);
      return;
    }

  if (!strncasecmp (line, "Referer: ", 9))
    {
      req->referer = bhc_strdup (&line[9]);
      return;
    }

  if (!strncasecmp (line, "If-Modified-Since: ", 19))
    {
      strptime (&line[19], "%a, %d %b %Y %H:%M:%S",
		&(req->modified_since));
      return;
    }
  if (!strncasecmp (line, "If-Unmodified-Since: ", 21))
    {
      strptime (&line[21], "%a, %d %b %Y %H:%M:%S",
		&(req->unmodified_since));
      return;
    }

  if (!strncasecmp (line, "Host: ", 6))
    {
      if (strlen (line) < 7)
	return;

      req->host = bhc_strdup (&line[6]);
      return;
    }

  if (!strncasecmp (line, "Content-Type: ", 14))
    {
      req->content_type = bhc_strdup (&line[14]);
      return;
    }

  if (!strncasecmp (line, "Content-Length: ", 16))
    {
      req->content_length = atoi (&line[16]);
      return;
    }

  if (!strncasecmp (line, "Range: bytes=", 13))
    {
      if (sscanf (&line[13], SIZET_FORMAT "-" SIZET_FORMAT,
		  (size_t *)&req->range_start,
		  (size_t *)&req->range_end) != 2)
	req->range_start = atoi (&line[13]);
      return;
    }

  if (!strncasecmp (line, "Connection: ", 12))
    {
      if (!strncmp (&line[12], "close", 5))
	req->keepalive = 0;
      if (!strncasecmp (&line[12], "Keep-Alive", 10))
	req->keepalive = 1;
      return;
    }

  if (!strncasecmp (line, "Authorization: ", 15))
    {
      if (!strncasecmp (&line[15], "Basic", 5))
	req->auth_token = bhc_strdup (&line[21]);
      return;
    }

  if (!strncasecmp (line, "Accept-Encoding: ", 17))
    {
      char **encodings = _http_parse_q (&line[17]);
      int i = 0, f = 0;

      if (!encodings)
	return;

      while (encodings[i])
	{
	  if (f == 0)
	    {
	      if (!strcasecmp (encodings[i], "gzip"))
		{
		  f = 1;
		  req->encoding = CONTENT_ENCODING_GZIP;
		}
	      if (!strcasecmp (encodings[i], "deflate"))
		{
		  f = 1;
		  req->encoding = CONTENT_ENCODING_DEFLATE;
		}
	    }
	  free (encodings[i]);
	  i++;
	}
      free (encodings);
      return;
    }

  if (!strncasecmp (line, "If-Range: ", 10))
    {
      req->if_range = bhc_strdup (&line[10]);
      return;
    }

  if (!strncasecmp (line, "Expect: ", 8))
    {
      req->expect = bhc_strdup (&line[8]);
      return;
    }

  if (!strncasecmp (line, "If-Match: ", 10))
    {
      char *value;
      char *tmp = bhc_strdup (&line[10]);

      value = strtok (tmp, ",");
      do
	{
	  char *v;

	  if (!value)
	    break;

	  v = _http_strdup (value);
	  bhl_list_append_string (req->if_match, v);
	  free (v);
	} while ((value = strtok (NULL, ",")) != NULL);
      free (tmp);
      return;
    }

  if (!strncasecmp (line, "If-None-Match: ", 15))
    {
      char *value;
      char *tmp = bhc_strdup (&line[15]);

      value = strtok (tmp, ",");
      do
	{
	  char *v;

	  if (!value)
	    break;

	  v = _http_strdup (value);
	  bhl_list_append_string (req->if_none_match, v);
	  free (v);
	} while ((value = strtok (NULL, ",")) != NULL);
      free (tmp);
      return;
    }

    if (!strncasecmp (line, "Upgrade: ", 9))
    {
      char *upgrade = _http_strdup (&line[9]);

      if (!strcasecmp (upgrade, "http/1.1"))
	req->upgrade = THY_UPGRADE_HTTP11;
#if THY_OPTION_TLS
      if (!strcasecmp (upgrade, "tls/1.0"))
	req->upgrade = THY_UPGRADE_TLS10;
#endif

      free (upgrade);
      return;
    }
}

/** Resolve an URL.
 * Find the path corresponding to a given URL.
 *
 * @param url is the URL to resolve.
 * @param host is the virtual host the request came for, if any.
 * @param absuri is the absolute URL of the request.
 *
 * @returns The resolved path in a newly allocated string, or NULL if
 * an error occurred.
 */
char *
http_url_resolve (const char *url, const char *host, const char *absuri)
{
  thy_mappable_config_t *config;
  char *resolved, *dalias = NULL;
  size_t len, reslen, urllen, ai;
  size_t hostlen, aliaslen = 0;

  if (!url)
    return NULL;

  /* Is this an absolute path? */
  if (url[0] != '/')
    return NULL;

  config = config_get_mapped (absuri, NULL);

  urllen = strlen (url);
  hostlen = (host) ? strlen (host) : 0;

  /* Is this an userdir thing? */
  if (config->options.userdir == THY_BOOL_TRUE && url[1] == '~')
    {
      int i = userpos (url);
      char *ud;

      /* Setup resolved */
      if ((ud = userdir (url, config->userdir)) != NULL)
	{
	  size_t urlen, udlen;

	  urlen = urllen - i;
	  udlen = strlen (ud);
	  resolved = (char *)bhc_malloc (urlen + udlen + 1);
	  memcpy (mempcpy (resolved, ud, udlen), &url[i], urlen);
	  resolved [urlen + udlen] = 0;
	  free (ud);
	  free (config);
	  return resolved;
	}
    }

  /* Might this be an alias? */
  for (ai = 0; ai < bhl_list_size (config->aliases); ai++)
    {
      pair_t *alias;
      size_t alen;

      bhl_list_get (config->aliases, ai, (void **)&alias);
      alen = strlen (alias->field);
      if (alen <= aliaslen)
	{
	  free (alias);
	  continue;
	}
      if (memcmp (url, alias->field, alen) == 0)
	{
	  aliaslen = alen;
	  dalias = alias->value;
	}
      free (alias);
    }
  /* Right, now lets see if we found something... */
  if (aliaslen > 0 && dalias)
    {
      asprintf (&resolved, "%s%s", dalias, &url[aliaslen]);
      return resolved;
    }

  len = strlen (config->webroot) + urllen;
  if (config->options.vhosting == THY_BOOL_TRUE)
    len += hostlen;

  resolved = (char *)bhc_calloc (1, len + 2);

  reslen = strlen (config->webroot);
  memcpy (resolved, config->webroot, reslen);
  if (host != NULL && config->options.vhosting == THY_BOOL_TRUE)
    {
      resolved[reslen++] = '/';
      memcpy (resolved + reslen, host, hostlen);
      reslen += hostlen;
    }

  memcpy (resolved + reslen, url, urllen);

  free (config);
  return resolved;
}

/** Parse a full HTTP request.
 * Splits up the request into lines and parses each one separately.
 *
 * @param req is the output structure, which must be preallocated.
 * @param buffer should contain the full HTTP request.
 *
 * @returns HTTP_PARSE_OK on success, HTTP_PARSE_PREMATURE if the
 * request was not complete.
 *
 * @note As a side-effect, it also fills in REQ.
 */
parse_result_t
http_request_parse (request_t *req, char *buffer)
{
  char *p;
  int lines = -2;
  size_t i;

  req->host = NULL;
  req->ua = NULL;
  req->referer = NULL;
  memset (&(req->modified_since), 0, sizeof (struct tm));
  memset (&(req->unmodified_since), 0, sizeof (struct tm));
  req->http_major = 0;
  req->http_minor = 0;
  req->url = NULL;
  req->range_start = 0;
  req->range_end = 0;
  req->keepalive = 1;
  req->content_type = NULL;
  req->content_length = 0;
  req->query_string = NULL;
  req->path_info = NULL;
  req->cgienvlen = 0;
  req->cgienvalloc = 10;
  req->cgienv = (char **)bhc_calloc (req->cgienvalloc, sizeof (char *));
#if THY_OPTION_TRACE
  req->raw = bhc_strdup (buffer);
#else
  req->raw = NULL;
#endif
  req->auth_realm = NULL;
  req->auth_token = NULL;
  req->auth_file = NULL;
  req->resolved = NULL;
  req->file = NULL;
  req->encoding = CONTENT_ENCODING_NONE;
  req->if_range = NULL;
  req->if_match = bhl_list_init (0, NULL);
  req->if_none_match = bhl_list_init (0, NULL);
  req->expect = NULL;
  req->upgrade = THY_UPGRADE_NONE;

  for (i = 0; i < strlen (buffer); i++)
    if (buffer[i] == '\n')
      lines++;

  /* ignoring leading spaces */
  while (isspace (*buffer) && *buffer)
    buffer++;

  /* parsing the method */
  p = buffer;
  while (!isspace (*buffer) && *buffer)
    buffer++;
  if (*buffer)
    *(buffer++) = '\0';

  req->method = HTTP_METHOD_UNKNOWN;
  req->method_str = bhc_strdup (p);
  if (!strcasecmp ("GET", p))
    req->method = HTTP_METHOD_GET;
  if (!strcasecmp ("HEAD", p))
    req->method = HTTP_METHOD_HEAD;
  if (!strcasecmp ("POST", p))
    req->method = HTTP_METHOD_POST;
  if (!strcasecmp ("OPTIONS", p))
    req->method = HTTP_METHOD_OPTIONS;
#if THY_OPTION_TRACE
  if (!strcasecmp ("TRACE", p))
    req->method = HTTP_METHOD_TRACE;
#endif
#if THY_OPTION_DEBUG
  if (!strcasecmp ("QUIT", p))
    exit (1);
#endif

  /* the spaces between method and url */
  while (isspace (*buffer) && *buffer)
    buffer++;

  p = buffer;
  while (!isspace (*buffer) && *buffer)
    buffer++;
  if (!*buffer)
    return HTTP_PARSE_PREMATURE;

  *(buffer++) = '\0';
  req->url = bhc_strdup (p);

  /* the spaces between url and 'HTTP' */
  while (isspace (*buffer) && *buffer)
    buffer++;

  p = buffer;
  while (*buffer != '/' && *buffer)
    buffer++;
  if (!*buffer)
    return HTTP_PARSE_PREMATURE;
  *(buffer++) = '\0';

  if (strcasecmp ("HTTP", p))
    return HTTP_PARSE_PREMATURE;

  /* the major number of protocol */
  p = buffer;
  while (isdigit (*buffer) && *buffer)
    buffer++;
  if (!*buffer)
    return HTTP_PARSE_PREMATURE;
  *(buffer++) = '\0';
  req->http_major = atoi (p);

  /* the minor number of protocol */
  p = buffer;
  while (isdigit (*buffer) && *buffer)
    buffer++;
  if (!*buffer)
    return HTTP_PARSE_PREMATURE;
  *(buffer++) = '\0';
  req->http_minor = atoi (p);

  /* skip the rest of the line */
  while (isspace (*buffer))
    buffer++;

  while (*buffer && lines--)
    {
      i = 0;
      p = buffer;

      /* go to the end of the line and zero-terminate it */
      while (*(buffer + i) && (*(buffer + i) != '\r') &&
	     (*(buffer + i) != '\n'))
	i++;
      buffer += i + 1;
      *(buffer - 1) = '\0';
      if (*buffer == '\r' || *buffer == '\n')
	buffer++;

      _http_parse_line (p, req);
    }

  return HTTP_PARSE_OK;
}
