Protoball:Count Game Tab Sources Usage.py

From Protoball
Jump to navigation Jump to search
if True:
	import urllib2
	from bs4 import BeautifulSoup
	import re
	total_sources = 0
	region = "Greater_New_York_City"
	r = urllib2.urlopen("http://protoball.org/Games_Tab:" + region.replace(" ", "_"))
	t = r.read()
	soup = BeautifulSoup(t)
	tables = soup.select("table.nice")
	rows = []
	for table in tables:
		rows.extend(table.select("tr")[1:])
	sources = 0
	journals = {}
	for journal in ["BDE", "BE&KCD", "NDA", "NYC", "NYDT", "NYH", "NYMN", "NYSM", "NYT", "PSOT", "SG", "SOT", "TS", "WSOT"]:
		journals[journal] = {}
	journals["all"] = {}
	year_re = re.compile("\d{4}")
	for row in rows:
		cells = row.find_all("td")
		date_cell = cells[0]
		year = year_re.findall(date_cell.get_text())[0]
		source_cell = cells[-1]
		text = source_cell.get_text()
		for journal in journals:
			pub_re = re.compile(r"\b%s\b" % journal)
			if len(pub_re.findall(text)) > 0:
				for stat in [journal, "all"]:
					if year in journals[stat]:
						journals[stat][year] += 1
					else:
						journals[stat][year] = 1

	years = []
	for journal in journals:
		for year in journal:
			if not year in years:
				years.append(year)
	years.sort()

	journallist = [journal for journal in journals]
	journallist.sort()

	print "     ",
	for year in years:
		print " " + year,
	print
	for journal in journallist:
		print "%-5s" % journal,		
		for year in years:
			if year in journals[journal]:
				print "%5s" % journals[journal][year],
			else:
				print "     ",
		print