#!/usr/bin/env python
import datetime
import HTMLParser
import os.path
import urllib

class Parse(HTMLParser.HTMLParser):
	def __init__(self):
		self.__in_a = False
		self.__in_b = False
		HTMLParser.HTMLParser.__init__(self)
	
	def handle_starttag(self, tag, attrs):
		if tag == "a" and len(attrs) > 0 and attrs[0][0] == "href" and attrs[0][1].startswith("http://www.phdcomics.com/comics/archive.php?comicid="):
			self.__in_a = True
		elif self.__in_a and tag == "b":
			self.__in_b = True
	
	def handle_endtag(self, tag):
		if tag == "b":
			self.__in_b = False
		if tag == "a":
			self.__in_a = False
	
	def handle_data(self, data):
		if self.__in_a and self.__in_b:
			filename = None
			try:
				month, day, year = data.split("/")
				if year == "1997":
	   				if int(month) < 11 or (int(month) == 11 and int(day) < 22):
						fn = "phd%02i%02i.gif" % (int(month), int(day))
					else:
	 					fn = {"22": "phddead.gif", "23": "phdthank1.gif", "24":"phdthank2.gif", "25":"phdthank3.gif"}[day]
				elif year == "1998":
	   				if int(month) < 4:
						fn = "phd%02i%02i.gif" % (int(month), int(day))
					elif int(month) < 6:
						fn = "phd%02i%02is.gif" % (int(month), int(day))
					else:
						fn = "phd%02i%02i%02is.gif" % (int(month), int(day), int(year[-2:]))
				elif year == "1999" and day == "0":
					fn = "phdthisweek1.gif"
				elif year == "2000" and month == "6" and day == "20":
					fn = "phdsummer2000.gif"
				elif year == "2000" and month == "6" and day == "28":
					fn = "phd062800b.gif"
				elif year == "2001" and month == "3" and day == "15":
					fn = "phdhaiku1.gif"
				elif year == "2001" and month == "3" and day == "24":
					fn = "phd032401b.gif"
				elif year == "2001" and month == "9" and day == "11":
					fn = "phd091201s.gif"
				elif year == "2002" and month == "5" and day == "31":
					fn = "phd052802s.gif"
				elif year == "2002" and month == "6" and day == "6":
					fn = "phd052902s.gif"
				elif year == "2002" and month == "6" and day == "16":
					fn = "phd061002s.gif"
				elif year == "2003" and month == "3" and day == "31":
					fn = "phd013103s.gif"
				elif year == "2005" and month == "3" and day == "24":
					fn = "phd032205s.gif"
				elif year == "2005" and month == "3" and day == "26":
					fn = "phd031905s.gif"
				elif year == "2005" and month == "3" and day == "29":
					fn = "phd032405s.gif"
				elif year == "2005" and month == "3" and day == "31":
					fn = "phd032505s.gif"
				elif year == "2005" and month == "4" and day == "2":
					fn = "phd031105s.gif"
				elif year == "2005" and month == "4" and day == "5":
					fn = "phd032005s.gif"
				elif year == "2005" and month == "4" and day == "7":
					fn = "phd032705s.gif"
				elif year == "2005" and month == "4" and day == "9":
					fn = "phd031205_ads.gif"
				elif year == "2006" and month == "5" and day == "11":
					fn = "phd112506ss.gif"
				else:
					fn = "phd%02i%02i%02is.gif" % (int(month), int(day), int(year[-2:]))
				url = "http://www.phdcomics.com/comics/archive/" + fn
				filename = "phdarchive/" + fn
				link = "nice-archive/%04i-%02i-%02i.gif" % (int(year), int(month), int(day))
				if os.path.isfile(filename) and not os.path.islink(filename):
					print "File already exists %s %s %s" % (day, month, year)
					f = file(link, "w")
					f.write(fn + "\n")
					f.close()

				elif not os.path.isfile(filename) and not os.path.islink(filename):
					filename, headers = urllib.urlretrieve(url, filename)
					if headers["Content-Type"] != "image/gif":
						print "Error with %s %s %s" % (day, month, year)
						os.remove(filename)
						f = file(link, "w")
						f.write(fn + "\n")
						f.close()
					else:
						if os.path.isfile(link):
							os.remove(link)
						os.symlink("../%s" % filename, link)

			except Exception:
				print "Exception on %s %s %s" % (day, month, year)
				if os.path.isfile(filename):
					os.remove(filename)
				raise

if __name__ == "__main__":
	p = Parse()
	p.feed("".join(file("archive_list.php", "r").readlines()))

