Protoball:Count Game Tab Sources Usage.py
Jump to navigation
Jump to search
if True: import urllib2 from bs4 import BeautifulSoup import re total_sources = 0 region = "Greater_New_York_City" r = urllib2.urlopen("http://protoball.org/Games_Tab:" + region.replace(" ", "_")) t = r.read() soup = BeautifulSoup(t) tables = soup.select("table.nice") rows = [] for table in tables: rows.extend(table.select("tr")[1:]) sources = 0 journals = {} for journal in ["BDE", "BE&KCD", "NDA", "NYC", "NYDT", "NYH", "NYMN", "NYSM", "NYT", "PSOT", "SG", "SOT", "TS", "WSOT"]: journals[journal] = {} journals["all"] = {} year_re = re.compile("\d{4}") for row in rows: cells = row.find_all("td") date_cell = cells[0] year = year_re.findall(date_cell.get_text())[0] source_cell = cells[-1] text = source_cell.get_text() for journal in journals: pub_re = re.compile(r"\b%s\b" % journal) if len(pub_re.findall(text)) > 0: for stat in [journal, "all"]: if year in journals[stat]: journals[stat][year] += 1 else: journals[stat][year] = 1 years = [] for journal in journals: for year in journal: if not year in years: years.append(year) years.sort() journallist = [journal for journal in journals] journallist.sort() print " ", for year in years: print " " + year, print for journal in journallist: print "%-5s" % journal, for year in years: if year in journals[journal]: print "%5s" % journals[journal][year], else: print " ", print