// $Id: fetch.c,v 1.42 2008/12/10 22:53:33 lynx Exp $ // vim:syntax=lpc
//
// generic HTTP GET client, mostly used for RSS -
// but we could fetch any page or data with it, really
// tobij even allowed the object to have the URL as its object name. fancy!  ;)

#ifdef Dfetch
# undef DEBUG
# define DEBUG Dfetch
#endif

#include <ht/http.h>
#include <net.h>
#include <uniform.h>
#include <services.h>
#include <regexp.h>

virtual inherit NET_PATH "output"; // virtual: in case we get inherited..
inherit NET_PATH "connect";
//inherit NET_PATH "place/master";

inherit NET_PATH "queue";

// additional headers. we keep them lower-case to ensure we have no
// double items in there. HTTP ignores case by spec.
volatile mapping rheaders = ([ "user-agent": SERVER_VERSION ]);

volatile mapping headers, fheaders;
volatile string http_message;
volatile int http_status, port, fetching, ssl;
volatile string buffer, thehost, url, fetched, host, resource, method;
volatile mixed rbody;
volatile int stream;

int parse_status(string all);
int parse_header(string all);
int buffer_content(string all);

string qHost() { return thehost; }

varargs void fetch(string murl, string meth, mixed body, mapping hdrs, int strm) {
	method = meth || "GET";
	rbody = body;
	stream = strm;
	if (hdrs) rheaders += hdrs;
	if (url != murl) {
		// accept.c does this for us:
		//url = replace(murl, ":/", "://");
		// so we can use this method also in a normal way
		url = murl;
		// resource may need to be re-parsed (other params)
		resource = 0;
		// re-parse the hostname?
	       	//thehost = port = 0;
	}
	P3(("%O: fetch(%O)\n", ME, url))
	unless (fetching) connect();
}

object load() { return ME; }

void sAuth(string user, string password) {
	rheaders["authorization"] = "basic "+ encode_base64(user +":"+ password);
}

string sAgent(string a) { return rheaders["user-agent"] = a; }

// net/place/news code follows.

void connect() {
	mixed t;

	fetching = 1;
	ssl = 0;
	unless (thehost) {
		unless (sscanf(url, "http%s://%s/%!s", t, thehost)) {
			P0(("%O couldn't parse %O\n", ME, url))
			return 0;
		}
		//thehost = lower_case(thehost); // why? who needs that?
		ssl = t == "s";
	}
	P4(("URL, THEHOST: %O, %O\n", url, thehost))
	unless (port) {
		unless (sscanf(thehost, "%s:%d", thehost, port) == 2)
		    port = ssl? HTTPS_SERVICE: HTTP_SERVICE;
		rheaders["host"] = thehost;
	}
	P2(("Resolving %O and connecting.\n", thehost))
	::connect(thehost, port);
}

// some people think these are case sensitive.. let's fix it for them (only works for most cases)
string http_header_capitalize(string name) {
    return regreplace(name, "(^.|-.)", (: return upper_case($1); :), 1);
}

varargs int real_logon(int failure) {
	string scheme;

	headers = ([ ]);
	http_status = 500;
	http_message = "(failure)";	// used by debug only

	unless(::logon(failure)) return -1;
	unless (url) return -3;
	unless (resource) sscanf(url, "%s://%s/%s", scheme, host, resource); 

	string body = "";
	if (stringp(rbody)) {
	    body = rbody;
	} else if (mappingp(rbody) && sizeof(rbody)) {
	    body = make_query_string(rbody);
	    unless (rheaders["content-type"])
		rheaders["content-type"] = "application/x-www-form-urlencoded";
	}
	if (strlen(body)) rheaders["content-length"] = strlen(body);

	buffer = "";
	foreach (string key, string value : rheaders) {
	    buffer += http_header_capitalize(key) + ": " + value + "\r\n";
	}

	// we won't need connection: close w/ http/1.0
	//emit("Connection: close\r\n\r\n");		
	P2(("%O fetching /%s from %O\n", ME, resource, host))
	P4(("%O using %O\n", ME, buffer))
	emit(method + " /"+ resource +" HTTP/1.0\r\n"
	     + buffer + "\r\n" + body);

	buffer = "";
	next_input_to(#'parse_status);
	return 0; // duh.
}

varargs int logon(int failure, int sub) {
// net/connect disables telnet for all robots and circuits
#if 0 //__EFUN_DEFINED__(enable_telnet)
	// when fetching the spiegel rss feed, telnet_neg() occasionally
	// crashes. fixing that would be cool, but why have the telnet
	// machine enabled at all?
	enable_telnet(0);
#endif
	// when called from xmlrpc.c we can't do TLS anyway
	if (sub) return ::logon(failure);
	if (ssl) tls_init_connection(ME, #'real_logon);
	else real_logon(failure);
	return 0; // duh.
}

int parse_status(string all) {
	string prot;
	string state;

	sscanf(all, "%s%t%s", prot, state);
	sscanf(state, "%d%t%s", http_status, http_message);
	if (http_status != R_OK) {
		P0(("%O got %O %O from %O\n", ME,
		    http_status, http_message, host));
		monitor_report("_failure_unsupported_code_HTTP",
		    S("http/fetch'ing %O returned %O %O", url || ME,
		       http_status, http_message));
	}
	next_input_to(#'parse_header);
	return 1;
}

int parse_header(string all) { 
	string key, val;
	// TODO: parse status code
	if (all != "") {
		P2(("http/fetch::parse_header %O\n",  all))
		if (sscanf(all, "%s:%1.0t%s", key, val) == 2) {
			headers[lower_case(key)] = val;
			// P2(("ht head: %O = %O\n", key, val))
		}
		next_input_to(#'parse_header);
		return 1;
	} else {
		// das wollen wir nur bei status 200
		P2(("%O now waiting for http body\n", ME))
		next_input_to(#'buffer_content);
		return 1;
	}
	return 1;
}

int buffer_content(string data) {
	P2(("%O body %O\n", ME, data))
	if (stream) {
		mixed *waiter;
		foreach (waiter : qToArray(ME)) {
			funcall(waiter[0], data, waiter[1] ? fheaders : copy(fheaders), http_status, 1);
		}
	} else {
		buffer += data + "\n";
	}
	next_input_to(#'buffer_content);
	return 1;
}

disconnected(remainder) {
	P2(("%O got disconnected.. %O\n", ME, remainder))
	headers["_fetchtime"] = isotime(ctime(time()), 1);
	if (headers["last-modified"])
	    rheaders["if-modified-since"] = headers["last-modified"];
	//if (headers["etag"])
	//    rheaders["if-none-match"] = headers["etag"]; // heise does not work with etag

	if (stream) {
		fetched = remainder;
	} else {
		fetched = buffer;
		if (remainder) fetched += remainder;
	}
	fheaders = headers;
	buffer = headers = 0;
	switch (http_status) {
	default:
		mixed *waiter;
		while (qSize(ME)) {
			waiter = shift(ME);
			P2(("%O calls back.. body is %O\n", ME, fetched))
			funcall(waiter[0], fetched, waiter[1] ? fheaders : copy(fheaders), http_status);
		}
		if (http_status == R_OK) break;
		// doesn't seem to get here when HTTP returns 301 or 302. strange.
		// fall thru
	case R_NOTMODIFIED:
		qDel(ME);
		qInit(ME, 150, 5);
	}
	fetching = 0;
	return 1;       // presume this disc was expected
}

varargs string content(closure cb, int force, int willbehave) {
	if (cb) {
	    if (fetched) {
		if (force) {
		    funcall(cb, fetched, willbehave ? fheaders : copy(fheaders));
	}
	    } else {
		enqueue(ME, ({ cb, willbehave }));
	    }
	}
	return fetched;
}

varargs mapping headers(int willbehave) {
	return willbehave ? fheaders : copy(fheaders);
}

string qHeader(mixed key) {
	if (mappingp(fheaders)) return fheaders[key];
	return 0;
}

string qReqHeader(string key) {
	return rheaders[lower_case(key)];
}

void sReqHeader(string key, string value) {
	rheaders[lower_case(key)] = value;
}

varargs void refetch(closure cb, int willbehave) {
	enqueue(ME, ({ cb, willbehave }));
	unless (fetching) connect();
}

protected create() {
	qCreate();
	qInit(ME, 150, 5);
}