/*
 $Id: PULLPAGE,v 1.3 2001/02/17 22:26:25 rick Exp $
 * Rick Younie <younie@debian.org>
 *
 * PULLPAGE - pull in a web page, HTTP/1.0, with rxsock
 *
 * Typical Usage:
 *
 *   call value 'REGINA_MACROS', '/usr/lib/searchscripts','SYSTEM'
 *   ret = PULLPAGE(site, url)
 *   parse var ret error ',' buffer
 *
 *   error:
 *      -3  - couldn't load the socket functions
 *      -2  - connect() or select() timeout
 *      -1  - Control-C
 *       0  - no error
 *       else the symbolic error name - e.g. ECONNREFUSED
 *           (see errno.h in kernel source)
 *
 * Description:
 *
 *   call value 'REGINA_MACROS', '/usr/lib/searchscripts','SYSTEM'
 *     - this allows your script to see this function
 *
 *   ret = PULLPAGE(site, url, timeout, headertoo, dots)
 *     - site, e.g. 'debian.org'
 *     - url, e.g. '/index.html'
 *     - timeout in seconds, default 60
 *     - headertoo 1=return web page header, 0=don't (default)
 *     - dots 1=show progress dots, 0=don't (default)
 *
 *     e.g. ret = PULLPAGE('debian.org', '/index.html')
 *          parse var ret error ',' body
 *
 *     e.g. crlf2 = '0d 0a 0d 0a'x
 *          ret = PULLPAGE('debian.org', '/index.html', 30, 1, 1)
 *          parse var ret error ',' header (crlf2) body
 *
 *   http_proxy - uses this environment variable if it is set
 *
 */
    TRACE OFF

    NO_SOCKET_LIBRARY = -3
    CONNECT_SELECT_TIMEOUT = -2
    CTRLC = -1

    /* allow ^C bailout */
    SIGNAL ON HALT NAME CTRL_C

    crlf = '0d 0a'x
    crlf2 = crlf || crlf
    wipe = '0d 1b'x || '[K' || '0d'x

    parse arg site, page, timeout, headertoo, dots

    if \datatype(timeout,'NUM') then timeout = 60
    if \abbrev(page,'/') then page = '/'page
    if headertoo <> '1' then headertoo = 0

    /* if dots is not 0 (or not supplied) default to 1 */
    if dots <> '1' then dots = 0

    /* use proxy if it is set in environment */
    http_proxy = value('http_proxy',,'SYSTEM')

    sockbuff = ''

    /* inner loop error gets error from ERRNO; outer loop returns a    */
    /* manually set error because ERRNO isn't meaningful at that point */
    do outer = 1 to 1

        if RxFuncQuery("SockLoadFuncs") <> 0 then do
            call RxFuncAdd 'SockLoadFuncs','rxsock','SockLoadFuncs'
            if RxFuncErrmsg() <> '' then do
                err = NO_SOCKET_LIBRARY
                leave outer
            end
            call SockLoadFuncs
        end

        do inner = 1 to 1

            /* get IP of remote */
            server.!family = "AF_INET"
            if http_proxy = '' then do
                server.!port = 80
                if SockGetHostByName(site, 'server.!') <> 1 then leave inner
            end
            /* using proxy instead of direct connect */
            else do
                parse var http_proxy 'http://' proxy ':' server.!port
                if SockGetHostByName(proxy, 'server.!') <> 1 then leave inner
            end

            /* make socket */
            socket = SockSocket("AF_INET","SOCK_STREAM","IPPROTO_TCP")
            if socket = -1 then leave inner

            /* set non-blocking */
            if SockIoctl(socket, 'FIONBIO', 1) <> 0 then leave inner

            /* connect, with t/o check */
            call time 'R'
            do while SockConnect(socket,"server.!") <> 0
                if ERRNO <> 'EINPROGRESS',
                 & ERRNO <> 'EALREADY'
                    then leave inner
                if time('E') > timeout then do
                    err = CONNECT_SELECT_TIMEOUT
                    leave outer
                end
            end

            /* set blocking */
            if SockIoctl(socket, 'FIONBIO', 0) <> 0 then leave inner

            /* send request */
            if http_proxy = '' then sendstring = 'GET' page 'HTTP/1.0'crlf
            else sendstring = 'GET http://'site || page 'HTTP/1.0'crlf
            sendstring = sendstring || 'Host:' site || crlf ||,
                        'Accept: *'||'/'||'*' || crlf || crlf
            
            call SockSend socket, sendstring

            socket.0 = 1
            socket.1 = socket

            num_dots = 1

            /* get the data */
            do forever

                /* caller wants progress indicator? */
                if dots then do
                    /* print a dot for each chunk pulled */
                    if num_dots < 60 then do
                        call charout 'stdout', '.'
                        num_dots = num_dots + 1
                    end
                    else do
                        call charout 'stdout',wipe
                        num_dots = 1
                    end
                end

                call SockSelect 'socket.','','',timeout

                /* -1 error; 0 timeout */
                if socket.0 < 0 then leave inner
                else if socket.0 = 0 then do
                    err = CONNECT_SELECT_TIMEOUT
                    leave outer
                end

                bytes = SockRecv(socket, 'chunk', 512)

                /* -1 = error; 0 = done */
                if bytes = -1 then leave inner
                if bytes = 0 then do  /* only good exit */
                    err = 0
                    leave outer
                end
                else sockbuff = sockbuff || chunk /* another chunk */

            end

        end inner

        err = ERRNO

    end outer

CLEANUPANDRETURN:

    if symbol('socket') <> 'LIT'
        then if socket >= 0 then call SockClose socket

    /* lose the header? */
    if \headertoo then parse var sockbuff . (crlf2) sockbuff

    return err','sockbuff

CTRL_C:
    if symbol('socket') <> 'LIT'
        then if socket >= 0 then call SockClose socket
    return -1','
