- Code: Select all
import urllib
import urlparse
import re
# import peewee
import json
from peewee import *
#from peewee import MySQLDatabase ('cpan', user='root',passwd='rimbaud')
db = MySQLDatabase('cpan', user='root',passwd='rimbaud')
class User(Model):
name = TextField()
cname = TextField()
email = TextField()
url = TextField()
class Meta:
database = db # this model uses the cpan database
User.create_table() #ensure table is created
url = "http://search.cpan.org/author/?W"
html = urllib.urlopen(url).read()
for lk, capname, name in re.findall('<a href="(/~.*?/)"><b>(.*?)</b></a><br/><small>(.*?)</small>', html):
alk = urlparse.urljoin(url, lk)
data = { 'url':alk, 'name':name, 'cname':capname }
phtml = urllib.urlopen(alk).read()
memail = re.search('<a href="mailto:(.*?)">', phtml)
if memail:
data['email'] = memail.group(1)
data = json.load('email') #your json data file here
for entry in data: #assuming your data is an array of JSON objects
user = User.create(name=entry["name"], cname=entry["cname"],
email=entry["email"], url=entry["url"])
user.save()
but this fails... see
- Code: Select all
)
martin@linux-70ce:~/perl> python cpan_100.py
Traceback (most recent call last):
File "cpan_100.py", line 47, in <module>
data = json.load('email') #your json data file here
File "/usr/lib/python2.7/json/__init__.py", line 286, in load
return loads(fp.read(),
AttributeError: 'str' object has no attribute 'read'
martin@linux-70ce:~/perl>
guess that i have made something wrong: We should try out to write like so:
- Code: Select all
data = []
- Code: Select all
entry = { 'url':alk, 'name':name, 'cname':capname }
and probably
- Code: Select all
data.append(entry.copy())
if you have some ideas - i would be glad to hear from you