/* ===================
 *  extract-title.c
 * ===================
 *
 * Usage:
 *	extract-title www_dir filepath
 *
 * It extracts the TITLE of the HTML document, if it finds it among the
 * first few lines of the file. Otherwise it returns the file name starting
 * from the WWW home directory, i.e. it returns 'filepath' if title was 
 * not found.
 * www_dir (WWW home dir) must be given to tell extract-title the absolute
 * path for directory from where down filepath begins.
 *
 * What I'm trying to say is that is I access URL 
 *     http://www1.cern.ch/EPAC/x.html
 * the rule system maps it to /CERN_WWW/CERNWeb/EPAC/x.html so I call
 * extract-title as follows:
 *	  extract-title /CERN_WWW/CERNWeb EPAC/x.html
 *
 * This way (if the TITLE is missing) the client won't see the strange
 * /CERN_WWW/CERNWeb in the anchor name (this of course wouldn't reduce
 * functionality but is only cosmetic).
 *
 * -------------------------------------------------------------------------
 * This software, including the above comments, was written by Ari Luotonen
 * from CERN. Many big thanks go to him for sharing this with me. I have
 * merely added this statement, and included his code in our server
 * extension package.
 *  -- Chris (neuss@igd.fhg.de)
 * -------------------------------------------------------------------------
 */

 
#include <stdio.h>
#include <string.h>
 
typedef int BOOL;
#define TRUE 1
#define FALSE 0
 
main(argc, argv)
int argc;
char **argv;
{
    FILE *fp;
    char filename[256];
    char buffer[1024];
    char *p;
    BOOL found = FALSE;
    BOOL tag = FALSE;
    int lines = 0;
 
    if (argc != 3) {
        fprintf(stderr, "\n\
This program takes an HTML document and extracts to its stdout\n\
the TITLE of the document, all in one line.\n\n\
Usage:\n\
\t%s directory filename\n\n", argv[0]);
        exit(1);
    }
 
    strcpy(filename, argv[1]);
    if (*filename)
      strcat(filename, "/");
    strcat(filename, argv[2]);
 
    if (!(fp = fopen(filename, "r"))) {
        fprintf(stderr, "%s: Unable to open file \"%s\"\n",
                argv[0], filename);
        exit(2);
    }
 
    while (lines++ < 20 &&      /* Scan only first 20 lines */
           NULL != (p = fgets(buffer, 1024, fp))) {
        if (*p)
            p[strlen(p)-1] = '\0';      /* Overwrite newline */
        while (p && *p) {
            if (tag) {
                p = strchr(p, '>');
                if (p) { 
                    p++;
                    tag = FALSE;
                }
                else continue;
            }
            if (found)
                while (*p && *p != '<') fputc(*(p++), stdout);
            else
                while (*p && *p != '<') p++;
            if (!*p) {
                if (found)
                    fputc(' ', stdout); /* We replace newline with space */
                continue;
            }
            if (found)
                while (*p && *p != '<') fputc(*(p++), stdout);
            else
                while (*p && *p != '<') p++;
            if (!*p) {
                if (found)
                    fputc(' ', stdout); /* We replace newline with space */
                continue;
            }
            else if (!found && !strncmp(p, "<TITLE>", 7)) {
                p += 7;
                found = TRUE;
            }
            else if (found && !strncmp(p, "</TITLE>", 8)) {
                fclose(fp);
                fputc('\n', stdout);
                exit(0);
            }
            else tag = TRUE;
        } /* while stuff in buffer */
    } /* while not EOF and not very many lines read */
    if (p) /* Not EOF, but TITLE not found among the first few lines */
        printf("%s\n", argv[2]);  /* Then using filename */
    fclose(fp);
}
