/*		Handle a Retrieve request from a WWW client	HTRetrieve.c
**		===========================================
**
** Authors
**	CTB	Carl Barker, Brunel
**	DMX	Daniel Martin
**	TBL	Tim Berners-Lee, CERN, Geneva	timbl@info.cern.ch
**
** History:
**	29 Apr 91 (TBL)	Split from daemon.c
**	5 Sept 91 (DMX)	Added path simplification to prevent '..'ing to an 
**			uncorrect directory.
**			Added '\r' as space for telneting to socket.
**	10 Sep 91 (TBL)	Reject request and log if fails authorisation
**	26 Jan 92 (TBL) Added some of CTB's code for directory read.
**	23 Apr 93 (TBL) keyword untangling passed to lower level
**	31 Oct 93 (AL)	Added /htbin stuff.
*/

/* (c) CERN WorldWideWeb project 1990,91. See Copyright.html for details */

#define USE_PLAINTEXT	/* Makes retrieval of postscript easier for now */
			/* but not good sgml */

#define BUFFER_SIZE 4096	/* Arbitrary size for efficiency */
#define INFINITY 512		/* file name length @@ FIXME */

#include "HTUtils.h"
#include "HTFormat.h"
#include "tcp.h"

#ifdef RULES			/* Use rules? */
#include "HTRules.h"
#endif
#include "HTParse.h"

#include "HTFile.h"
#include "HTDaemon.h"		/* calls back to HTTP daemon */
#include "HTMLGen.h"		/* For HTML generator */
#include "HTWriter.h"		/* For making streams to net and disk */

#include "HTAccess.h"
#include "HTScript.h"		/* /htbin script calls */
#include "HTAAUtil.h"		/* FREE() macro */
#include "HTRules.h"		/* HTSearchScript */

extern int WWW_TraceFlag;	/* Control diagnostic output */
extern FILE * logfile;		/* Log file output */
/* extern char HTClientHost[16]; */	/* Client name to be output */
extern int HTWriteASCII PARAMS((int soc, char * s));	/* In HTDaemon.c */

/* PUBLIC FILE * logfile = 0;	*/ /* Log file if any */
extern char *HTClientHost;	/* Peer internet address */





/*	Override HTML presentation method
**	---------------------------------
**
**	The "presentation" of HTML in the case of a server is
**	the generation of HTML markup.   The presence of this
**	routine prevents any of the client-oriented presentation code
**	from being picked up from the library libwww.
*/
PUBLIC HTStructured* HTML_new ARGS3(
	HTParentAnchor *, 	anchor,
	HTFormat,		format_out,
	HTStream*,		stream)
{
    HTStream * markup = HTStreamStack(
    	WWW_HTML, format_out, stream, anchor);
    if (!markup) return NULL;
    
    return HTMLGenerator(markup);
}


/*	Dummy things in hypertext object @@@@ */

PUBLIC void HText_select() {}
PUBLIC void HText_selectAnchor() {}
PUBLIC void * HTMainAnchor = NULL;



/*	Retrieve a document
**	-------------------
*/
PUBLIC int HTRetrieve ARGS2(CONST char *,  arg,
			    int,	   soc)
{
    static char * arg2 = NULL;		/* Simplified argument -- autofreed */
    char * keywords=strchr(arg, '?');
    static char ** keywordvec = NULL;
    static char * pool = NULL;	/* Contains all the strings in keywordvec */
    int status;

    FREE(keywordvec);	/* From previous call */
    FREE(pool);		/* 	- " -	      */

    if (keywords) {
	*(keywords++) = (char)0;	
	if (!*keywords) keywords = NULL;
	else {
	    char *cur;
	    char *word;
	    int cnt = 1;		/* One for NULL terminator */
	    int equal_signs = 0;	/* Number of = signs in form req */
	    BOOL form_request = NO;	/* Form or search request? */
	    BOOL form_value = NO;	/* Are we parsing form field value */
	                                /* or name? */
	    /*
	    ** Find out if this is a form request.
	    */
	    if (strchr(keywords, '=')  ||  strchr(keywords, '&'))
		form_request = YES;
	    
	    /*
	    ** Count keywords, form fields and values (sometimes
	    ** counts too much but that's ok), and allocate a vector
	    ** for field names, values and keywords
	    */
	    for (cur=keywords;  *cur;  cur++) {
		if ('+' == *cur  ||  '=' == *cur  ||  '&' == *cur) {
		    cnt++;
		    if ('=' == *cur) equal_signs++;
		}
	    }

	    /*
	    ** Copy keyword string so that after each equal sign
	    ** there is space for an extra NULL.
	    */
	    if (!(pool = (char*)malloc(strlen(keywords) + equal_signs + 1)))
		outofmem(__FILE__, "HTRetrieve");
	    if (equal_signs > 0) {
		char *dest = pool;
		for (cur=keywords; *cur; cur++, dest++) {
		    *dest = *cur;
		    if ('=' == *cur)
			*(++dest) = ' ';	/* Make room for NULL */
		}
		*dest = (char)0;		/* Terminate */
	    }
	    else strcpy(pool, keywords);

	    if (!(keywordvec = (char**)malloc((cnt+1)*sizeof(char*))))
		outofmem(__FILE__, "HTRetrieve");

	    /*
	    ** Put form field names, values and keywords
	    ** to vector unescaped.
	    */
	    word=pool;
	    form_value = NO;
	    for (cur=pool, cnt=0;  *cur;  cur++) {
		switch (*cur) {
		  case '=':
		    *(++cur) = (char)0;
		    HTUnEscape(word);
		    keywordvec[cnt++] = word;
		    form_value = YES;
		    word = cur + 1;
		    break;
		  case '&':
		    *cur = (char)0;
		    HTUnEscape(word);
		    keywordvec[cnt++] = word;
		    form_value = NO;
		    word = cur + 1;
		    break;
		  case '+':
		    if (form_value)
			*cur = ' ';
		    else {
			*cur = (char)0;
			HTUnEscape(word);
			keywordvec[cnt++] = word;
			word = cur + 1;
		    }
		    break;
		} /* switch */
	    } /* for every keywords character */
	    /* Put last keyword and terminate vector */
	    HTUnEscape(word);	
	    keywordvec[cnt++] = word;
	    keywordvec[cnt] = NULL;
	}
    }

    StrAllocCopy(arg2, arg);	/* Also frees the one from previous call */
    HTSimplify(arg2);	/* Remove ".." etc  (DMX) */
    
    /*
    ** HTSimplify will leave in a "/../" at the top, which can
    ** be a security hole. Fixed 931001 TBL
    */ 
    if (strstr(arg2, "/../")) {
	if (TRACE) printf("HTRetrieve: Illegal attempt to use /../ %s\n",
			  arg);
	return HTLoadError(HTASCIIWriter(soc), 403,
			   "URL containing /../ disallowed.");
    }
    else if (0 == strncmp(arg2, "/htbin/", 7)) {
	/* Call a script to generate a document */
	status = HTCallScript(arg2, keywordvec, soc);
	return status;
    }
    else if (keywords) {
	if (HTSearchScript) {		/* Search enabled */
	    char *search_script = NULL;
	    char *translated = HTTranslate(arg2);
	    char *filename = NULL;

	    if (translated) {
		if ((filename = HTParse(translated, "",
					PARSE_PATH | PARSE_PUNCTUATION))) {
		    StrAllocCopy(search_script, "/htbin/");
		    StrAllocCat(search_script, HTSearchScript);
		    if (arg2[0] != '/')
			StrAllocCat(search_script, "/");
		    StrAllocCat(search_script, filename);

		    status = HTCallScript(search_script, keywordvec, soc);
		    free(translated);
		    free(filename);
		    free(search_script);
		    return status;
		}
		free(translated);
	    }
	    if (TRACE) printf("HTHandle: search failed--sth invalid %s\n",arg);
	    return HTLoadError(HTASCIIWriter(soc),500,"Search not possible.");
	}
	else {
	    if (TRACE) printf("HTHandle: can't perform search %s\n",
			      arg);
	    return
		HTLoadError(HTASCIIWriter(soc), 403,
			    "Sorry, this server does not perform searches.");
	}
    } /* If keywords given */
    else {  /* Load the document normally into the client */
	HTStream * client = HTASCIIWriter(soc);

	HTLoadToStream(arg2, NO, client);
	return HT_LOADED;
    }

} /* Retrieve */

