import pycurl
from StringIO import StringIO
import re
import sys
def getpage(url):
c = pycurl.Curl()
c.setopt(pycurl.URL, url)
resp = StringIO()
c.setopt(pycurl.WRITEFUNCTION, resp.write)
try: c.perform()
except: return None
return resp.getvalue()
def scanpage(html, keywords):
count = {}
for kwd in keywords:
count[kwd.strip()] = len(re.findall(kwd.strip(), html, re.I))
return count
if __name__ == '__main__':
if len(sys.argv) < 3:
print 'Usage: python scanner.py [url_file] [kwd_file]'
sys.exit()
f = file(sys.argv[1], 'r')
urls = f.readlines()
f.close()
f = file(sys.argv[2], 'r')
keywords = f.readlines()
f.close()
res = {}
for url in urls:
html = getpage(url.strip())
if html is None: break
res[url.strip()] = scanpage(html, keywords)
print res