I am trying to get names of restaurants, their addresses and their phone numbers.
My code keeps on getting stuck in the 2nd definition. The first def works fine. I am not sure why as I cant identify any mistake. The loop just does not go through.
I would appreciate someone to comment if I am doing an obvious mistake.
Thanks
from urllib2 import urlopen
from csv import writer
def get_urls_of_restaurant():
list_urls = []
n = 0
nn = 0
for i in range(6):
url = urlopen('http://www.go.co.tz/index.php/restaurants/masaki?start=' + str(nn)).readlines() #open URL whis lists restaurants
while n < len(url):
if '<h2 class="contentheading">' in url[n]:
list_urls.append(url[n+1].split('"')[1])
n += 1
n = 0
nn += 3
list_urls.reverse()
print "Geting urls done! Get %s" %len(list_urls) + ' urls.'
return list_urls
def open_url_and_write_data(list_urls):
n = len(list_urls)-1
csv_file = open('restdar_guide.csv', 'wb')
file_writer = writer(csv_file, delimiter=';')
file_writer.writerow(['Name'] + ['address'] + ['phone'])
while n >= 0:
print 'Reading %s' % str(int(len(list_urls))-n) + " element of %s" % len(list_urls) + " element's..."
url = urlopen('http://www.go.co.tz' + list_urls[n]).readlines()
num_str = 0
list_write = []
while num_str < len(url):
if '<title>' in url[num_str]:
list_write.append(url[num_str].split('<')[0][7:])
if 'Location:</strong>' in url[num_str]:
list_write.append(url[num_str].split('<')[1][9:])
else:
list_write.append('unknown')
if '<li><strong>Tel:</strong>' in url[num_str]:
list_write.append(url[num_str].split('<')[2][10:])
else:
list_write.append('unknown')
file_writer.writerow([list_write[0]] + [list_write[1]] + [list_write[2]])
n -= 1
csv_file.close()
print 'Done!'
list_urls = get_urls_of_restaurant()
open_url_and_write_data(list_urls)