(* $Id: http_client.mli 126 2004-05-25 20:56:41Z gerd $
 * ----------------------------------------------------------------------
 *
 *)

(**********************************************************************)
(* HTTP/1.1 client                                                    *)
(* written by Gerd Stolpmann                                          *)
(**********************************************************************)

(* Implements much of HTTP/1.1.
 * Implements the following advanced features:
 *  - chunked messages
 *  - persistent connections
 *  - connections in pipelining mode ("full duplex" connections)
 *  - modular authentication methods, currently Basic and Digest
 *  - event-driven implementation; allows concurrent service for
 *    several network connections 
 * Left out:
 *  - multipart messages, including multipart/byterange
 *  - content encoding (compression)    (1)
 *  - content digests specified by RFC 2068 and 2069   (1)
 *  - content negotiation   (1)
 *  - conditional and partial GET   (1)
 *  - following code 303 redirections automatically    (1)
 *  - client-side caching   (1)
 *  - HTTP/0.9 compatibility
 *
 * (1) These features can be implemented on top of this module if really needed,
 *     but there is no special support for them.
 *)

(* TODO list:
 * - Currently all data are kept in main memory. It should be possible to
 *   upload from files, and to download into files. (like the Cgi module)
 *   The MIME decoder from Netstring should be used.
 * - digest authentication: should be updated from RFC 2069 to RFC 2617.
 *)

(* RESTRICTED THREAD-SAFETY - NEW SINCE RELEASE 0.3:
 *
 * The module can be compiled such that it is thread-safe. In particular,
 * one has to use the netclient_mt.cm[x]a archive, and thread-safety is
 * restricted to the following kinds of usage:
 * - The golden rule is that threads must not share pipeline objects.
 *   If every thread uses its own pipeline, every thread will have its own
 *   set of state variables.
 *   It is not detected if two threads errornously share a pipeline,
 *   neither by an error message nor by implicit serialization. Strange
 *   things may happen.
 * - The same applies to the other objects: "get", "trace", "options",
 *   "head", "post", "put", "delete", "basic_auth_method", and 
 *   "digest_auth_method". But sharing these objects would make no sense
 *   at all.
 * - Of course, it would be possible to use lots of mutexes to make
 *   the module fully thread-safe. Perhaps in the next release.
 * - The Convenience module serializes; see below.
 *)


(**********************************************************************
 *** NOTE: At the end of this interface specification there is a    ***
 *** SIMPLIFIED interface that is sufficient for many applications. ***
 *** The simplified interface is recommended for beginners.         ***
 **********************************************************************)



exception Header_is_incomplete;;
exception Body_is_incomplete;;
exception Body_maybe_complete;;

exception Bad_message of string;;
  (* The server sent a message which cannot be interpreted. The string
   * indicates the reason.
   *)

exception Http_error of (int * string);;
  (* The server sent an error message. The left component of the pair is
   * the error code, the right component is the error text.
   *)

exception No_reply;;
  (* There was no response to the request because some other request failed
   * earlier and it was not allowed to send the request again.
   *)


exception Http_protocol of exn;;
  (* The request could not be processed because the exception condition 
   * was raised.
   *)

type secret;;
  (* You cannot call methods requiring a parameter of type secret *)

type buffer_type;;
  (* internally used *)

type token;;
  (* internally used *)

type 'message_class how_to_reconnect =
    Send_again         (* Send the request automatically again *)
  | Request_fails      (* Drop the request *)
  | Inquire of ('message_class -> bool)
                       (* If the function return 'true' send again, otherwise
			* drop the request.
			*)
;;


type 'message_class how_to_redirect =
    Redirect           (* Perform the redirection *)
  | Do_not_redirect    (* No redirection *)
  | Redirect_inquire of ('message_class -> bool)
                       (* If the function return 'true' redirect, otherwise
			* do not redirect
			*)
;;


type synchronization =
    Sync_with_handshake_before_request_body of float
      (* After the request header is sent, the client waits for the specified
       * amount of time (seconds) until a "100 CONTINUE" response is returned
       * by the server.
       * This mode is recommended for POST and PUT requests if there might
       * be reasons that the request is denied, e.g. because authorization
       * is required.
       *)
  | Sync
      (* The next request begins after the response of the last request has
       * been received.
       *)
  | Pipeline of int
      (* The client is allowed to send several requests without waiting
       * for responses. The number is the maximum number of unreplied
       * requests that are allowed. A typical value: 5.
       * If you increase this value, the risk becomes higher that requests
       * must be repeatedly sent to the server in the case the connection
       * crashes. Increasing is recommended if you send a bigger number of
       * HEAD requests to the server. Decreasing is recommended if you send
       * large POST or PUT requests to the server.
       *)

      (* General note: The first request/response round is always done in
       * Sync mode, because the protocol version of the other side
       * is not known at that moment. Sync_with_handshake_before_request_body
       * and Pipelined require HTTP/1.1.
       *)
;;


type http_options = 
    { synchronization : synchronization;
        (* Default: Pipeline 5. *)
      maximum_connection_failures : int;
        (* This option limits the number of connection attempts that crash
	 * immediately.
	 * Default: 5
	 *)
      maximum_message_errors : int;
        (* This option limits the number of protocol errors tolerated per
	 * request. If a request leads to a protocol error, the connection
	 * is shut down, the server is connected again, and the request is
	 * tried again (if the kind of the message allows retransmission).
	 * If a request repeatedly fails, this option limits the number
	 * of retransmissions.
	 * Default: 2
	 *)
      inhibit_persistency : bool;
        (* This option turns persistent connections off.
	 * Default: false
	 * It is normally not necessary to apply this option.
	 *)
      connection_timeout : float;
        (* If there is no network transmission for this period of time,
	 * the connection is shut down, and tried again.
	 * Default: 300.0 (seconds)
	 * It may be necessary to increase this value if HTTP is used for
	 * batch applications that contact extremely slow services.
	 *)
      number_of_parallel_connections : int;
	(* The client keeps up to this number of parallel connections to
	 * a single content server or proxy.
	 * Default: 2
	 * You may increase this value if you are mainly connected with
	 * an HTTP/1.0 proxy.
	 *)
      verbose_status : bool;
      verbose_request_header : bool;
      verbose_response_header : bool;
      verbose_request_contents : bool;
      verbose_response_contents : bool;
      verbose_connection : bool;
        (* Enable various debugging message types.
	 * 'status':             reports about status of received documents
	 * 'request_header':     prints the header sent to the server
	 * 'request_contents':   prints the document sent to the server
	 * 'response_header':    prints the header of the answer from the server
	 * 'response_contents':  prints the document received from the server
	 * 'connection':         reports many connection events; authentication,
	 *                       too.
	 *)
    }
;;


(*************************************************************)
(***** class message: core functionality of all messages *****)
(*************************************************************)

class virtual message :
  object ('self)
    (* "message" contains data of a request, and if successful, data of the
     * response.
     *)

    method virtual prepare : bool -> unit

    (* public state: *)

    method is_served : bool
	(* true if request/response cycle has been done, or if two many
	 * errors have occurred
	 *)
    method using_proxy : bool
        (* Whether a proxy is contacted or not. *)
    method get_host : unit -> string
	(* host name of the content server *)
    method get_port : unit -> int
	(* port number of the content server *)
    method get_uri : unit -> string
	(* the full URI of this message: http://server:port/path. 
	 * The /path component is omitted for an OPTIONS message "*".
	 *)
    method get_req_body : unit -> string
	(* What has been sent as body in the (last) request *)
    method get_req_header : unit -> (string * string) list
	(* What has been sent as header in the (last) request. Returns
	 * (key, value) pairs, where the keys are all in lowercase.
	 *
	 * NOTE ABOUT THE REQUEST HEADER:
	 * The header of a "message" object is initially empty. You can
	 * set header entries using "set_req_header" before the request.
	 * When sending the request some header entries are automatically
	 * added, such as "host", "content-length" and "authorization".
	 *)
    method assoc_req_header : string -> string
	(* Query a specific header entry. The name of the entry must be
	 * given in lowercase characters. 
	 *)
    method assoc_multi_req_header : string -> string list
        (* Return all header values for a given field name (header entries
	 * which allow several values separated by commas can also be 
	 * transmitted by several header lines with the same name).
	 *)
    method set_req_header : string -> string -> unit
	(* Set the request header entry with given "name" to "value". *)
    method get_req_uri : unit -> string
	(* Get the "request URI". This value is only set after doing the
	 * request.
	 * Note: This is the string actually sent in the request. It may
	 * be a full URI (http://.../), or only the path, depending on the
	 * type of the request.
	 *)
    method get_req_method : unit -> string
	(* Get the name of the request method. This value is only set after 
	 * doing the request.
	 *)
    method get_resp_header : unit -> (string * string) list
	(* Get the header of the last response. The keys are in lowercase
	 * characters again.
	 *
	 * If there was not a valid response at all, the exception
	 * Http_protocol e is raised, where e describes the reason.
	 *)
    method assoc_resp_header : string -> string
	(* Query a specific header entry of the response. The name of the
	 * entry must be given in lowercase characters.
	 *
	 * If there was not a valid response at all, the exception
	 * Http_protocol e is raised, where e describes the reason.
	 *)
    method assoc_multi_resp_header : string -> string list
        (* Return all response header values for a given field name (header 
	 * entries which allow several values separated by commas can also be 
	 * transmitted by several header lines with the same name).
	 *
	 * If there was not a valid response at all, the exception
	 * Http_protocol e is raised, where e describes the reason.
	 *)
    method get_resp_body : unit -> string
	(* Returns the body of the last response if the response status
	 * is OK (i.e. the code is in the range 200 to 299).
	 *
	 * Otherwise, Http_error (code, body) is raised where 'code' is
	 * the response code and 'body' is the body of the (errorneous)
	 * response.
	 *
	 * If there was not a valid response at all, the exception
	 * Http_protocol e is raised, where e describes the reason.
	 *)
    method dest_status : unit -> (string * int * string)
	(* Returns the status line of the last response (but status lines
	 * with code 100 are ignored).
	 * The returned triple is (http_string, code, text)
	 *
	 * If there was not a valid response at all, the exception
	 * Http_protocol e is raised, where e describes the reason.
	 *)

    (* connection control: *)

    method get_reconnect_mode : message how_to_reconnect
	(* Get what to do if the server needs to be reconnected, i.e.
	 * if the request must be sent repeatedly.
	 * By default, this is 'Send_again' if the request is idempotent,
	 * and 'Request_fails' otherwise.
	 *)

    method set_reconnect_mode : message how_to_reconnect -> unit

    method get_redirect_mode : message how_to_redirect
      (* By default, the redirect mode is Redirect for GET and HEAD requests,
       * and Do_not_redirect otherwise.
       *)

    method set_redirect_mode : message how_to_redirect -> unit

    (* proxy control: *)

    method no_proxy : unit -> unit
	(* Forces that this request is done without proxy. *)

    method is_proxy_allowed : unit -> bool
	(* Returns if this object would allow a proxy *)

    (* methods for convenience: *)

    method dump_header : string -> (string * string) list -> unit
	(* Writes the given header list to stderr. Every line is
	 * prefixed by the given string.
	 *)

    (* private state: *)

    method init_query : secret -> string -> unit

    method set_served : secret -> unit
    method set_unserved : secret -> unit
    method get_request : secret -> string
    method set_response : buffer_type -> bool -> unit

    method get_error_counter : secret -> int
    method set_error_counter : secret -> int -> unit
    method set_error_exception : secret -> exn -> unit


    (* private methods: *)

    method decode_header : secret -> unit
    method decode_header_at : secret -> buffer_type -> int -> 
                                ( (string * string) list * int )
    method decode_body : secret -> bool -> bool -> unit
    method body_is_complete : secret -> (string * string) list -> 
                                buffer_type -> int -> int
    method received_body_is_complete : secret -> (int * int)
  end
;;


(*****************************************)
(***** message types by http methods *****)
(*****************************************)

(* For all method classes: the query string MUST NOT contain usernames and
 * passwords!
 *)

class get : string ->            (* The query, "http://server/path" *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class trace : string -> int ->     
  (* (1) The query, "http://server/path" 
   * (2) maximum number of hops
   *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class options : string ->        (* The query, "http://server/path" *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class head : string ->           (* The query, "http://server/path" *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class post : string -> (string * string) list ->
  (* (1) The query, "http://server/path"
   * (2) The parameters that are transferred using the mime type
   *     application/x-www-form-urlencoded
   *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class post_raw : string -> string ->
  (* (1) The query, "http://server/path"
   * (2) The body to be transferred
   *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class put : string -> string ->
  (* (1) The query, "http://server/path"
   * (2) The body to be transferred
   *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class delete : string ->         (* The query, "http://server/path" *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;



(**********************************)
(***** Authentication methods *****)
(**********************************)

class basic_auth_method :
  object
    val mutable current_realm : string
    method name : string 
    method set_realm : string -> string -> string -> unit
	(* set_realm realm user password:
	 * adds that (user,password) should be used for the given realm
	 *)
    method get_credentials : unit -> (string * string)
	(* get (user,password) for the current realm or raise Not_found.
	 * This method may be overridden. For example, an interactive
	 * application may open a dialoge box to get the credentials of
	 * an unknown realm.
	 *)
    (* The following methods should not be used from outside *)
    method www_authenticate : message -> token list -> unit
    method set_authorization : message -> string -> unit
    method update : message -> token list -> unit
  end
;;


class digest_auth_method :
  object
    inherit basic_auth_method
  end
;;



(**********************************************)
(***** class pipeline: the http processor *****)
(**********************************************)

class pipeline :
  object
    (* A "pipeline" object is a FIFO queue of messages. It is called
     * "pipeline" because it is processed asynchronously: Requests may be
     * sent to the HTTP server independently of whether responses of the
     * previous requests already arrived or not.
     * Furthermore, a "pipeline" object may keep connections to several
     * servers at once. (More exactly, it has a FIFO queue for every
     * server it is connected with.)
     * The "pipeline" object keeps track what is happening, so you need
     * not to care about the details of communications. The API is
     * simple: Create a "pipeline" object, do some setup (add authentication
     * methods; configure the proxy to use), add the requests, and 
     * "run" the pipeline. The rest is done automatically. To get the results,
     * you can either memorize the requests you wanted to know yourself
     * and ask every request object about the reply of the server; or
     * you can specify that a callback function should be called once
     * the request is processed (with positive or negative result).
     * It is possible to add further requests to the pipeline within
     * these callback functions.
     *
     * If you want to have several pipelines, or some cooperation with
     * other network services, you may specify a Unixqueue.event_system.
     * For example, to have two pipelines working concurrently:
     *
     * let ues = Unixqueue.create_unix_event_system() in
     * let p1 = new pipeline in
     * let p2 = new pipeline in
     * p1 # set_event_system ues;
     * p2 # set_event_system ues;
     * Unixqueue.run ues             (* run p1 and p2 in parallel *)
     *
     * This works not only with pipelines, but with every network client
     * or server which is compatible with the Unixqueue design.
     *)

    method set_event_system : Unixqueue.event_system -> unit

    method add_authentication_method : basic_auth_method -> unit
	(* adds an authentication method *)
    method set_proxy : string -> int -> unit
	(* set_proxy name port:
	 * sets that a proxy 'name' listening on 'port' should be used
	 *)
    method set_proxy_auth : string -> string -> unit
	(* sets user and password for the proxy. Only the "basic" authentication
	 * method is implemented.
	 *)
    method avoid_proxy_for : string list -> unit
	(* sets a list of host names or domain suffixes for which no proxy
	 * should be used. 
	 * e.g. [ "localhost"; ".our.net" ]
	 *)
    method set_proxy_from_environment : unit -> unit
	(* Inspect the environment variables "http_proxy" and "no_proxy"
	 * and set the proxy options from them.
	 *)
    method reset : unit -> unit
	(* Empties the pipeline and closes any open connection.
	 * The currently active operation is interrupted, and every request
	 * with response is set to No_reply (i.e. you get the exception
	 * No_reply if you try to access the response).
	 * If there are callbacks for these requests, the callback
	 * functions are invoked.
	 * The queues of open requests and replies are cleared. All
	 * connections to all servers are closed.
	 *)
    method add : message -> unit
	(* Adds the message at the end of the pipeline. The state of the
	 * message is set to "unserved".
	 *)
    method add_with_callback : message -> (message -> unit) -> unit
	(* Adds the message at the end of the pipeline. The state of the
	 * message is set to "unserved".
	 * After the message has been processed, the callback function
	 * is called. This function is called for every message that
	 * leaves the pipeline, it does not matter whether processing
	 * was successful or not. Invoke 'dest_status' on the message
	 * to get what happened; either some status information from the
	 * server is available (perhaps OK status), or an exception is
	 * raised. This exception is a deferred exception coming from
	 * some unusual situation.
	 *)
    method run : unit -> unit
      (* Runs through the requests in the pipeline. If a request can be
       * fulfilled, i.e. the server sends a response, the state of the
       * request is set and the request is removed from the pipeline.
       * If a request cannot be fulfilled (no response, bad response, 
       * network error), the exception is stored in the 'message'
       * object and will be raised once the state of the object is
       * queried.
       * Under certain conditions (serious network errors) 'run' does
       * not catch the exception; it simply cleans its own state up
       * (aborting the errorneous network connection). In this case,
       * simply invoke 'run' again to continue.
       * 'run' terminates normally if the pipeline becomes empty.
       *
       * The engine handles the following HTTP return codes itself:
       * 100: This is an intermediate return code and simply ignored.
       * 301: If the method is GET or HEAD, the redirection is followed.
       * 302: If the method is GET or HEAD, the redirection is followed.
       * 401: Content server authentication
       * 407: Proxy server authentication
       *
       * All other return codes remain uninterpreted, it is up to the
       * caller of this function to react on them.
       *
       *)

    method get_options : http_options
    method set_options : http_options -> unit
      (* Get/Set the available options for the HTTP engine. 
       * The new options will take into effect immediately.
       *)

    method number_of_open_messages : int
      (* Returns the number of messages which are still in the pipeline. *)

    method number_of_open_connections : int
      (* Returns the number of connections which are open at the same time *)

  end
;;


(**************************************************)
(***** Convenience module for simple purposes *****)
(**************************************************)

(* Do 'open Http_client.Convenience' for simple applications.
 *
 * - The environment variables "http_proxy" and "no_proxy" determine 
 *   the proxy settings. "http_proxy" must be an http-URL that contains
 *   the proxy's name, its port, and optionally user and password.
 *   E.g. "http://eric:eric'spassword@proxy:8080/".
 *   The variable "no_proxy" is a comma-separated list of hosts and
 *   domains for which no proxy must be used.
 *   E.g. "localhost, sun, moon, .intra.net"
 * - There is a default behaviour to authenticate. Both "basic" and "digest"
 *   methods are enabled. Two global variables, http_user and http_password
 *   set the user and password if the URL does not specify them. In the case
 *   that user and password are included in the URL, these values are always
 *   used.
 * - There is a default error behaviour. If a request fails, it is automatically
 *   repeated. The variable http_trials specifies the number of times a request
 *   is submitted at most.
 *   Requests are not repeated if there is a HTTP return code that indicates
 *   a normal operating condition.
 *   POST and DELETE requests are never repeated.
 *)

(* RESTRICTED THREAD SAFETY - NEW SINCE RELEASE 0.3:
 *
 * The Convenience module is fully thread-safe with the exception of the
 * exported variables (http_trials, http_user, and http_password). Note
 * that all threads share the same pipeline, and access to the pipeline
 * is serialized.
 * The latter simply means that it always works, but that threads may 
 * block each other (i.e. the program slows down if more than one thread
 * wants to open http connections at the same time).
 *)


module Convenience :
    sig
      val http_trials : int ref
        (* number of times every request is tried. Default: 3 *)

      val http_user : string ref
	(* The default user if authentication is required *)

      val http_password : string ref
	(* The default password if authentication is required *)

      val http_get_message : string -> message
	(* Does a "GET" request with the given URL and returns the message.
	 * The URL may contain a user and a password, as in
	 * "http://user:password@server.com/path". If authentication is
	 * required by the server, the user and password values from the
	 * URL are taken, if present. Otherwise, http_user and http_password
	 * are used. If http_user is "", authentication fails always.
	 *)
	  
      val http_head_message : string -> message
	(* Does a "HEAD" request with the given URL and returns the reply.
	 * See also http_get_message.
	 *)

      val http_post_message : string -> (string * string) list -> message
	(* Does a "POST" request with the given URL and returns the reply.
	 * The list contains the parameters send with the POST request.
	 * See also http_get_message.
	 *)

      val http_put_message : string -> string -> message
	(* Does a "PUT" request with the given URL and returns the reply.
         * The second argument contains the contents to be put.
	 * See also http_get_message.
	 *)

      val http_delete_message : string -> message
	(* Does a "DELETE" request with the given URL and returns the reply.
	 * See also http_get_message.
	 *)

      val http_get : string -> string
	(* Does a "GET" request with the given URL and returns the message
	 * body. See also http_get_message.
         *)

      val http_post : string -> (string * string) list -> string
	(* Does a "POST" request with the given URL and returns the message
	 * The list contains the parameters send with the POST request.
	 * body. See also http_get_message.
         *)

      val http_put : string -> string -> string
	(* Does a "PUT" request with the given URL and returns the message
	 * body. The second argument contains the contents to be put.
         * See also http_get_message.
         *)

      val http_delete : string -> string
	(* Does a "DELETE" request with the given URL and returns the message
	 * body. See also http_get_message.
         *)

      val http_verbose : unit -> unit
	(* Turns on debug messages on stderr. *)

    end


