#!/usr/bin/perl

#redirect to archive.org
#
#if archive.org doesn't have the date specified, it returns a 302 with the nearest date
#this script takes the Location header from the 302 response and returns it to Squid

use IO::Handle;
use LWP::UserAgent;

$|=1;
$debug=0;

my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime time;
$mon = sprintf "%02d",($mon + 1);
$mday = sprintf "%02d",$mday;
$year = $year + 1900;
$year = $year - 4;
$datestring = $year . $mon . $mday . "000000";

if ($debug == 1) { open (DEBUG, '>>/usr/local/squid/var/logs/time_machine_debug.log'); }
autoflush DEBUG 1;

while (<>) {
        chomp $_;
	if ($debug == 1) { print DEBUG "INPUT- $_\n"; }

        if ($_ =~ m/.*archive\.org/) {
		print "$_\n";
		if ($debug == 1) { print DEBUG "OUTPUT- $_\n"; }
	}

	else {
		@input = split(" ", $_);
		$url = $input[0];
		@split_url = split("//", $url);
		$archive_url = "http://web.archive.org/web/$datestring/$split_url[1]";

		my $ua = LWP::UserAgent->new;
		$ua->timeout(10);

		my $response = $ua->get($archive_url);

		if ($response->is_success) {
			$newurl = $response->previous->header('Location');
		}
		else {
			$newurl = $_;
		}

		chomp $newurl;
		print "$newurl\n";
		if ($debug == 1) { print DEBUG "OUTPUT- $newurl\n"; }
        }

}

close (DEBUG);
