Unable to save the complete web fetched values into csv document - python

import requests
from bs4 import BeautifulSoup
import csv
import time
def fin_car(url):
x = {}
y = []
page = ''
while page == '':
try:
page = requests.get(url)
except:
print("Connection refused by the server..")
print("Let me sleep for 5 seconds")
print("ZZzzzz...")
time.sleep(5)
print("Was a nice sleep, now let me continue...")
continue
#page = requests.get(url)
soup = BeautifulSoup(page.content, 'lxml')
Precio = []
price = soup.find('div' , 'price').text
Precio = (str(price).strip())
print (Precio)
#-------------------------------------------------------------------------------------------------------------#
# #Tipo Propiedad #Tipo de oferta #
#-------------------------------------------------------------------------------------------------------------#
Tipo_Propiedad = []
Tipo_de_oferta = []
T_1 = soup.find('div' , 'box').h1.text
text = (str(T_1).strip())
l = text.find(' ')
m = text.find(' ', l+1)
n = text.find(' ', m+1)
Tipo_Propiedad = text[0 : l]
Tipo_de_oferta = text[m+1 : n]
print (Tipo_Propiedad)
print (Tipo_de_oferta)
#-------------------------------------------------------------------------------------------------------------#
# #Departamento #Ciudad #Zona #Barrio #
#-------------------------------------------------------------------------------------------------------------#
Departamento = []
Ciudad = []
Zona = []
Barrio = []
first = soup.find('div' , 'breadcrumb left')
link = first.find('div')
a_link = link.findAll('a')
box1 = []
for row in a_link:
box1.append(row.text)
Departamento = (box1[1:2].pop())
Ciudad = (box1[2:3].pop())
Zona = (box1[3:4].pop())
Barrio = (box1[4:5])
print (Departamento)
print (Ciudad)
print (Zona)
print (Barrio)
#-------------------------------------------------------------------------------------------------------------#
# #Área #Habitaciones #Baños #Parqueaderos #
#-------------------------------------------------------------------------------------------------------------#
box_2 = soup.find('div' ,'features clearfix')
box_2_1 = box_2.findAll('span')
box2 = []
Área=[]
Habitaciones = []
Baños = []
Parqueaderos = []
for row2 in box_2_1:
box2.append(str(row2.text).strip())
for i in box_2_1:
a = box2[0:1].pop()
b = box2[1:2].pop()
c = box2[2:3].pop()
d = box2[3:4].pop()
a1 = a[0 : a.find(' ')]
Área = (a1)
Habitaciones = (b.rstrip()[-1])
Baños = (c.rstrip()[-1])
Parqueaderos =(d)
print (Área)
print (Habitaciones)
print (Baños)
print (Parqueaderos)
#-------------------------------------------------------------------------------------------------------------#
# #Área_Privada #Área_Const #Antigüedad #Admón #Estrato #Estado #Piso_No #
#-------------------------------------------------------------------------------------------------------------#
box_3 = soup.find('div' ,'row features_2 ')
box_3_1 = box_3.findAll('li') #
Área_Privada = []
Área_Const = []
Antigüedad = []
Admón = []
Estrato = []
Estado = []
Piso_No = []
for li in box_3_1:
heading_words = li.b.text.split() #
target_content = str(li.br.next_sibling).strip() #
if "privada:" in heading_words:
Área_Privada = (target_content) #
elif "Const.:" in heading_words:
Área_Const = (target_content)
elif "Antigüedad:" in heading_words:
Antigüedad = (target_content)
elif "Admón:" in heading_words:
Admón = (target_content)
elif "Estrato:" in heading_words:
Estrato = (target_content)
elif "Estado:" in heading_words:
Estado = (target_content)
elif "Piso" in heading_words:
Piso_No = (target_content)
print (Área_Privada) #
print (Área_Const)
print (Antigüedad)
print (Admón)
print (Estrato) #
print (Estado)
print (Piso_No[0:1])
#-------------------------------------------------------------------------------------------------------------#
# #Actualizado #Visitas #Código_FincaRaiz # #
#-------------------------------------------------------------------------------------------------------------#
box4 = soup.find('div' , 'box_content row')
box4_1 = box4.findAll('span')
vis = []
Actualizado = []
Visitas = []
Código_FincaRaiz = []
for i in box4_1:
vis.append((str(i.text).strip()))
for j in box4_1:
e = vis[0:1].pop()
f = vis[2:3].pop()
Actualizado = e
Código_FincaRaiz = f
url="https://www.fincaraiz.com.co/WebServices/Statistics.asmx/GetAdvertVisits?idAdvert={}&idASource=40&idType=1001".format(Código_FincaRaiz) #
page1 = requests.get(url)
soup1 = BeautifulSoup(page1.content , 'lxml')
visit1 = soup1.find('double').text
Visitas = (visit1)
print (Actualizado)
print (Visitas)
print (Código_FincaRaiz)
#-------------------------------------------------------------------------------------------------------------#
x['Código FincaRaiz'] = Código_FincaRaiz
x['Departamento'] = Departamento
x['Ciudad'] = Ciudad
x['Zona'] = Zona
x['Barrio'] = Barrio
x['Tipo Propiedad'] = Tipo_Propiedad
x['Tipo de oferta'] = Tipo_de_oferta
x['Precio'] = Precio
x['Área'] = Área
x['Área Privada'] = Área_Privada
x['Área Const.'] = Área_Const
x['Antigüedad'] = Antigüedad
x['Baños'] = Baños
x['Habitaciones'] = Habitaciones
x['Parqueaderos'] = Parqueaderos
x['Admón'] = Admón
x['Estrato'] = Estrato
x['Estado'] = Estado
x['Piso No.'] = Piso_No
x['Actualizado'] = Actualizado
x['Visitas'] = Visitas
y.append(x)
x = {}
y = []
filename = 'Fincar.csv'
with open(filename, 'w', newline='') as f:
w = csv.DictWriter(f,['Código FincaRaiz','Departamento','Ciudad','Zona','Barrio', 'Tipo Propiedad', 'Tipo de oferta',
'Precio' , 'Área' , 'Área Privada' , 'Área Const.', 'Antigüedad', 'Baños' , 'Habitaciones',
'Parqueaderos' , 'Admón', 'Estrato' , 'Estado' , 'Piso No.' , 'Actualizado', 'Visitas'])
w.writeheader()
for x in y:
w.writerow(x)a
tab = []
xen = []
key_value = 'https://www.fincaraiz.com.co'
for i in range(2,6):
tab.append('https://www.fincaraiz.com.co/finca-raiz/?ad=30|{}||||1||||||||||||||||||||||1|||1||||||'.format(i))
for j in tab:
page = requests.get(j)
soup = BeautifulSoup(page.content , 'lxml')
index = soup.findAll('div' , 'span-title')
for i in index:
xen.append(i.find('a').get('href'))
for j in xen:
url = (key_value + j)
fin_car(url)
I've tried to fetch values from list of pages and trying to save the file to csv document, however csv document only storing the last value inside the csv document.
Tried multiple ways but it always giving the same output.
also the column with the blank values need to be filled with nil value, however it posting only [] symbol.
New to python and find it difficult to saving it to python. Need your support to achieve this task.
How should I proceed further

def fin_car(url):
x = {}
y = []
...
x = {}
y = []
These values are in different scopes. Assigning x inside fin_car doesn't affect it outside. You could change that using a global statement, but much better is to return from the function.
Even if you were changing the outside values of x and y, you only call fin_car long after writing to the CSV. The order of events in your code matters.
I suggest:
def fin_car(url):
x = {}
...
return x
with open...:
w = csv.DictWriter(...)
...
for j in tab:
...
for j in xen:
url = ...
w.writerow(fin_car(url))
You don't need y at all.

Related

Python adding parameters to a function

Im struggling here with a problem that probably will be simple to solve. The code below perform 3 requests to an api
To get all the project's ID's (data)
To return the excluded id's (data2)
To return the concluded projects(data3)
After that, i have created a new function call Listas() that the result is the unique elements from list 1, and did not appear in list 2 or 3
How can I use this result as parameter to the teste() function?
Now I'm passing teste(ids), but ids is the first data request (all the project's ID's), and I need to use Listas() result as argument to teste
import requests
import xlwt
import openpyxl
def Sults():
headers = {
"Authorization":"xxxxx",
"Content-Type":"application/json;charset=UTF-8"
}
global id
global id_exclude
global id_conclude
global nomeUnidade
global dataInicio
global dataFinal
global responsavel
global dtConclusao
id = []
id_exclude = []
id_conclude = []
nomeUnidade = []
dataInicio = []
dataFinal = []
responsavel = []
dtConclusao = []
for count1 in range(3):
url = "https://api/api/v1/implantacao/projeto?start={}&dtInicio=2022-01-01T18:02:55Z".format(count1)
response = requests.get(url, headers=headers)
data = response.json()
url2 = "https://api/api/v1/implantacao/projeto?start={}&dtInicio=2022-01-01T18:02:55Z&concluido=false".format(count1)
response2 = requests.get(url2, headers=headers)
data2 = response2.json()
url3 = "https://api/api/v1/implantacao/projeto?start={}&dtInicio=2022-01-01T18:02:55Z&concluido=true".format(count1)
response3 = requests.get(url3, headers=headers)
data3 = response3.json()
# print(data)
ids = unidades(data)
Excel()
unidades2(data2)
unidades3(data3)
Listas()
teste(ids)
def unidades(data):
#sheet.write(0, 5, "Modelo")
for i in data['data']:
id.append(i['id']) #append id list
nomeUnidade.append(i['nome']) #append id list
dataInicio.append(i['dtInicio']) #append id list
dataFinal.append(i['dtFim']) #append id list
responsavel.append(i['responsavel']['nome']) #append id list
return id
return nomeUnidade
return dataInicio
return dataFinal
return responsavel
def Excel():
wb = openpyxl.Workbook()
sheet = wb.active
for i in range(len(id)):
sheet.cell(row=i+1, column=1).value = id[i]
sheet.cell(row=i+1, column=2).value = nomeUnidade[i]
sheet.cell(row=i+1, column=3).value = dataInicio[i]
sheet.cell(row=i+1, column=4).value = dataFinal[i]
sheet.cell(row=i+1, column=5).value = responsavel[i]
wb.save("NewUNIDADES6.xlsx")
def unidades2(data2):
for j in data2['data']:
id_exclude.append(j['id'])
return id_exclude
def unidades3(data3):
for k in data3['data']:
id_conclude.append(k['id'])
return id_conclude
def Listas():
result = list(set(id) - set(id_exclude) - set(id_conclude))
print("Implantação:")
print(result)
def teste(result):
listID = (result)
print("List ID:")
print (listID)
headers = {
"Authorization":"O3BldGxhbmQ7MTU5NTAxNTI1NTI1OA==",
"Content-Type":"application/json;charset=UTF-8"
}
length = len(listID)
nome = []
codigoTarefa = []
situacaoTarefa = []
faseNome = []
global ID_List
ID_List = [] #but how did u get this list became an [254,254,254,254,255,255,255 etc]
for li in range(length):
for count in range(4):
#o start dentro de url é o counter. Ou seja, ele vai até 4, a partir do 0
url = "https://api/api/v1/implantacao/projeto/{}/tarefa?start={}&limit=50".format(listID[li], count) #here i got the id from the code above, and count is the pagination about api
print("Print")
print(url)
response = requests.get(url, headers=headers)
data = response.json()
unidades10(data, nome, codigoTarefa, situacaoTarefa, faseNome, listID[li])#li is legnth of id list ok? Ok, i get it now. So )
#print(nome)
wb = openpyxl.Workbook()
sheet = wb.active
for i in range(len(nome)):
sheet.cell(row=i+1, column=1).value = ID_List[i]
sheet.cell(row=i+1, column=2).value = nome[i]
sheet.cell(row=i+1, column=3).value = codigoTarefa[i]
sheet.cell(row=i+1, column=4).value = situacaoTarefa[i]
sheet.cell(row=i+1, column=5).value = faseNome[i]
wb.save("TarefasNEW.xlsx")
#print(codigoTarefa)
#print(situacaoTarefa)
#print(faseNome)
def unidades10(data, nome, codigoTarefa, situacaoTarefa, faseNome, ID): # ok just wait secounds this ID here is the parameter to the above listID[li]
workbook = xlwt.Workbook()
sheet = workbook.add_sheet("BACKOFFICE")
coluna = 1
for i in data['data']:
nome.append(i['nome'])
codigoTarefa.append(i['codigo'])
situacaoTarefa.append(i['situacao'])
faseNome.append(i['fase']['nome'])
ID_List.append(ID)# append here
coluna +=1
if __name__ == '__main__':
Sults()

How can I read this barcode

How can I read this barcode in python:
This is my code:
def do_one(image):
print(image)
image = KImage(image)
l = time()
def tick():
nonlocal l
n = time()
t = n - l
l = n
return t
union = set()
first = []
first_t = 0
total_t = 0
results = []
for scalar in [0.5, 0.2, 0.1, 1, 1.5]:
tick()
if scalar != 1:
image_scaled = scale_image(image.image, scalar=scalar).convert("L")
else:
image_scaled = image.image.convert("L")
res = qrdecode(image_scaled)
st = tick()
union.update(res); total_t += st
if res:
first = res
first_t = total_t
for sharpness in [0.1, 0.5,1]:
tick()
image_scaled_sharp = sharpen(image_scaled, sharpness)
res = qrdecode(image_scaled_sharp)
t = tick()
union.update(res); total_t += st + t
if res:
first = res
first_t = total_t
tick()
image_scaled_autocontrast = autocontrast(image_scaled)
res = qrdecode(image_scaled_autocontrast)
t = tick()
union.update(res); total_t += st + t
if res:
first = res
first_t = total_t
results.append({"file": image.filename,
"what": f"do-all-the-things",
"result": list(union),
"time": total_t})
# queue.put(results)
return results
I am able to read several bar codes but I have not been able to do this because of more improvements and changes that I make. Does anyone know how I can read this bar code?

How to use append in a second for loop

So I am using append to extend my list of scraped apartments. In this code I run in to a problem, because I have created second for loop to change the page in the website. So first for loop gives new page to the next for loop to scrape. But when its done with one page it just overwrites the last list. What I am doing wrong?
for page in range(1, 4): # Gives new page to scrape
r = requests.get( url + str(page))
soup = bs(r.content)
apartments = soup.select(".ListPage__cardContainer__39dKQ")
base_path = "https://www.etuovi.com"
x = []
apartment_list = []
for index ,apartment in enumerate(apartments):
if index == 2: # Just to not scrape every item
break
relative_path = apartment.a['href']
full_path = base_path + relative_path
id_number = apartment.a['id']
apartment_list.append(get_apartment_data(full_path)) #This works for one page
x.append(apartment_list) # Tried to make this work.. Think one list should be enaught.
And the functions:
def get_content_value(info_list_data):
if info_list_data.find("li"):
return [li.get_text(" ", strip=True).replace("\xa0", "").replace("€", "").replace("/ kk",
"").replace("\n", "") for li in info_list_data.find_all("li")]
else:
return info_list_data.get_text(" ", strip=True).replace("\xa0" , "").replace("€", "").replace("/
kk", "").replace("\n", "")
Last:
def get_apartment_data(url):
r = requests.get(url)
soup = bs(r.content)
all_info_list = soup.find_all(class_ = "CompactInfoRow__infoRow__2hjs_ flexboxgrid__row__wfmuy")
for info_list in all_info_list:
info_list.prettify()
info = {}
for index, info_list in enumerate(all_info_list):
content_key = info_list.find(class_ = "flexboxgrid__col-xs-12__1I1LS flexboxgrid__col-sm-4__3RH7g
ItemHeader__itemHeader__32xAv").get_text(" ", strip=True)
content_value = get_content_value(info_list.find(class_ = "flexboxgrid__col-xs-12__1I1LS
flexboxgrid__col-sm-8__2jfMv CompactInfoRow__content__3jGt4"))
info[content_key] = content_value
return info
for page in range(1, 4): # Gives new page to scrape
r = requests.get( url + str(page))
soup = bs(r.content)
apartments = soup.select(".ListPage__cardContainer__39dKQ")
base_path = "https://www.etuovi.com"
x = []
apartment_list = []
for index ,apartment in enumerate(apartments):
if index == 2: # Just to not scrape every item
break
relative_path = apartment.a['href']
full_path = base_path + relative_path
id_number = apartment.a['id']
apartment_list.append(get_apartment_data(full_path)) #This works for one page
x.append(apartment_list.copy())
You need to use the copy() method to make an independent copy. Otherwise each time you make a new apartment_list, it will change in your x list too. Like twins lists.
More generally:
x = []
lst = [1,2,3]
x.append(lst)
print (x)
lst[0] = 0
x.append(lst)
print (x)
Output:
[[1,2,3]]
[[0,2,3],[0,2,3]]
The right way is:
x = []
lst = [1,2,3]
x.append(lst.copy())
print (x)
lst[0] = 0
x.append(lst.copy())
print (x)
Output:
[[1,2,3]]
[[1,2,3],[0,2,3]]

ValueError: dict contains fields not in fieldnames even with if statement

I'm trying to pull all of the 2016 NY Times articles that have the word "economy" in them using the Times' API. I get the following error message at the end of my code:
ValueError: dict contains fields not in fieldnames: 'abstract'
And here is my code:
from nytimesarticle import articleAPI
api = articleAPI('0282db2f333f4f4095edd19f0660c978')
articles = api.search( q = 'economy',
fq = {'headline':'economy', 'source':['Reuters','AP', 'The New
YorkTimes']},
begin_date = 20151231)
def parse_articles(articles):
news = []
for i in articles['response']['docs']:
dic = {}
dic['id'] = i['_id']
if i['abstract'] is not None:
dic['abstract'] = i['abstract'].encode("utf8")
dic['headline'] = i['headline']['main'].encode("utf8")
dic['desk'] = i['news_desk']
dic['date'] = i['pub_date'][0:10] # cutting time of day.
dic['section'] = i['section_name']
if i['snippet'] is not None:
dic['snippet'] = i['snippet'].encode("utf8")
dic['source'] = i['source']
dic['type'] = i['type_of_material']
dic['url'] = i['web_url']
dic['word_count'] = i['word_count']
locations = []
for x in range(0,len(i['keywords'])):
if 'glocations' in i['keywords'][x]['name']:
locations.append(i['keywords'][x]['value'])
dic['locations'] = locations
subjects = []
for x in range(0,len(i['keywords'])):
if 'subject' in i['keywords'][x]['name']:
subjects.append(i['keywords'][x]['value'])
dic['subjects'] = subjects
news.append(dic)
return(news)
def get_articles(date,query):
all_articles = []
for i in range(0,100):
articles = api.search(q = query,
fq = {'source':['Reuters','AP', 'The New York Times']},
begin_date = 20151231,
end_date = 20160715,
sort='oldest',
page = str(i))
articles = parse_articles(articles)
all_articles = all_articles + articles
return(all_articles)
econ_all = []
for i in range(2015,2016):
print 'Processing' + str(i) + '...'
econ_year = get_articles(str(i),'economy')
econ_all = econ_all + econ_year
import csv
keys = econ_all[0].keys()
with open('econ-mentions.csv', 'wb') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(econ_all)
It seems my if statement should prevent the error. Also, if I use "writerow" as I've seen sometimes mentioned on here I get the entire list of details without creating the csv. Any help would be appreciated!
I am not sure what was your problem, but this code creates a file econ-mentions.csv with content.
from nytimesarticle import articleAPI
def parse_articles(articles):
news = []
for i in articles['response']['docs']:
dic = {}
dic['id'] = i['_id']
if i['abstract'] is not None:
dic['abstract'] = i['abstract'].encode("utf8")
dic['headline'] = i['headline']['main'].encode("utf8")
dic['desk'] = i['news_desk']
dic['date'] = i['pub_date'][0:10] # cutting time of day.
dic['section'] = i['section_name']
if i['snippet'] is not None:
dic['snippet'] = i['snippet'].encode("utf8")
dic['source'] = i['source']
dic['type'] = i['type_of_material']
dic['url'] = i['web_url']
dic['word_count'] = i['word_count']
locations = []
for x in range(0,len(i['keywords'])):
if 'glocations' in i['keywords'][x]['name']:
locations.append(i['keywords'][x]['value'])
dic['locations'] = locations
subjects = []
for x in range(0,len(i['keywords'])):
if 'subject' in i['keywords'][x]['name']:
subjects.append(i['keywords'][x]['value'])
dic['subjects'] = subjects
news.append(dic)
return(news)
def get_articles(date,query):
all_articles = []
for i in range(0,100):
articles = api.search(q = query,
fq = {'source':['Reuters','AP', 'The New York Times']},
begin_date = 20151231,
end_date = 20160715,
sort='oldest',
page = str(i))
articles = parse_articles(articles)
all_articles = all_articles + articles
return(all_articles)
if __name__ == "__main__":
api = articleAPI('0282db2f333f4f4095edd19f0660c978')
articles = api.search( q = 'economy',
fq = {'headline':'economy', 'source':['Reuters','AP', 'The New YorkTimes']},
begin_date = 20151231)
econ_all = []
for i in range(2015,2016):
print 'Processing' + str(i) + '...'
econ_year = get_articles(str(i),'economy')
econ_all = econ_all + econ_year
import csv
keys = econ_all[0].keys()
with open('econ-mentions.csv', 'wb') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(econ_all)

Python - Variable being printed over string

I am using python 2.7 and i have a problem that i haven't encountered before, when i print a certain string and then a variable on the same line the variable is printed over the string. e.g. the script is coded like so print 'IP Rating = ', ipRating and the output in command prompt will be 'IP20ating = '. I have no idea why this is happening but i have the same code for various variables and string in the same script and they all come out as expected, i have tried renaming the variable and changing the string but there is still no difference, has anybody encoutered this error before or have any ideas why this might be happening? i can post the code if requested.
Many thanks :)
EDIT
Here is the code - I know i may have repeated myself a few times and there are unneccessary library's in there but the way i work is by importing all libraries i might need and then removing unnecessary code at the end.
from bs4 import BeautifulSoup as Soup
from bs4 import BeautifulSoup
from urllib import urlopen
import webbrowser
import httplib
import urllib2
import urllib
import string
import mylib
import xlrd
import glob
import xlwt
import bs4
import sys
import os
import re
print '\nStarting Web Search'
found = False
while found == False:
excelFile = "F:\\len\\web sheets completed\\csv formatted\\imported\\re-imported\\Import Corrections\\saxby web spreadsheet.xls"
try:
inFi = xlrd.open_workbook(excelFile)
found = True
except IOError:
print 'File not found.'
inFi = xlrd.open_workbook(excelFile)
inWS = inFi.sheet_by_index(0)
headers = mylib.getHeader(inWS)
supplyHead = mylib.findHeader('Supplier Part Ref', headers)
saxbeginurl = "http://www.saxbylighting.com/index.php?pg=search&ser="
badLink = "index.php?pg=search&ser=10180&next=0"
resLink = "http://www.saxbylighting.com/images/ProductImages/Zoomed/"
overCount = 0
for t in range(524,534):
projection = 0
ipRating = 0
diameter = 0
width = 0
weight = 0
length = 0
height = 0
i = 0
w = 0
l = 0
h = 0
d = 0
p = 0
x = 0
iP = 0
wei = 0
imgStock = str(inWS.cell(t, supplyHead).value.encode('latin-1'))
overCount = overCount + 1
print '\n',imgStock
if imgStock == '3TRAWI':
url = 'http://www.saxbylighting.com/index.php?pg=details&prod=53'
elif imgStock == '10313':
url = 'http://www.saxbylighting.com/index.php?pg=details&prod=204'
else:
url = saxbeginurl + imgStock
html_page = urllib2.urlopen(url)
soup = BeautifulSoup(html_page)
img_tags = soup.find_all("img")
the_image_tag = soup.find("img", src='/images/dhl_logo.png')
try:
for dataSheet in soup.find('div',{'class':'panes'}):
#print dataSheet, ' -- ', str(i)
i = i + 1
if i == 4:
reqData = str(dataSheet).split('<img', 1)[0]
first_Data = reqData.replace('<br/>','\n')
second_Data = first_Data.replace('<b>','')
third_Data = second_Data.replace('</b>','')
fourth_Data = third_Data.replace(':',': ')
dataList = fourth_Data.split('\n')
#print dataList
for information in dataList:
if 'Weight' in dataList[wei]:
pre_Weight = dataList[wei]
sec_weight = str(pre_Weight).replace('Weight :','')
weight = sec_weight.replace(' ','')
wei += 1
if 'IP' in dataList[iP]:
ipRating = str(dataList[iP])
iP += 1
for product_Dimensions in dataList:
if 'Product dimensions :' in dataList[x]:
#print dataList[x]
dimensionList = str(dataList[x]).replace('mm','mm:')
#print dimensionList
prelim_Dimensions = dimensionList.replace('Product dimensions :','')
first_Dimensions = prelim_Dimensions.replace('cm','0mm')
sec_Dimensions = first_Dimensions.replace(' ',' ')
third_Dimensions = sec_Dimensions.strip()
dimenList = third_Dimensions.split('mm:')
#print dimenList
for project in dimenList:
if 'Proj' in dimenList[p]:
pre_pro = str(dimenList[p]).replace('Proj','')
sec_pro = pre_pro.replace(':','')
thro_pro = sec_pro.replace(' ','')
projection = thro_pro
elif p == len(dimenList):
print 'Projection not found'
p += 1
for diamet in dimenList:
if 'dia' in dimenList[d]:
pre_dia = str(dimenList[d]).replace('dia','')
sec_dia = pre_dia.replace(':','')
third_dia = sec_dia.replace(' ','')
diameter = third_dia
elif d == len(dimenList):
print 'Diameter not found'
d += 1
for heig in dimenList:
if 'H:' in dimenList[h]:
pre_hei = str(dimenList[h]).replace('H','')
sec_hei = pre_hei.replace(':','')
third_hei = sec_hei.replace(' ','')
height = third_hei
elif h == len(dimenList):
print 'Height not found'
h += 1
for lent in dimenList:
if 'L:' in dimenList[l]:
pre_leng = str(dimenList[l]).replace('L','')
sec_leng = pre_leng.replace(':','')
third_leng = sec_leng.replace(' ','')
length = third_leng
elif l == len(dimenList):
print 'Length not found'
l += 1
for wid in dimenList:
if 'W:' in dimenList[w]:
pre_wid = str(dimenList[w]).replace('W','')
sec_wid = pre_wid.replace(':','')
third_wid = sec_wid.replace(' ','')
width = third_wid
elif w == len(dimenList):
print 'Width not found'
w += 1
x += 1
print 'IP Rating = ', ipRating
print 'Weight = ', weight
print 'Projection = ', projection, 'mm'
print 'Diameter = ',diameter, 'mm'
print 'Length = ',length, 'mm'
print 'Height = ',height, 'mm'
print 'Width = ',width, 'mm'
except TypeError:
print 'Type Error... skipping this product and carrying on.'
Here is an example output
IP44ating =
Weight = .51KGS
Projection = 35 mm
Diameter = 0 mm
Length = 0 mm
Height = 90 mm
Width = 120 mm
I strongly suspect that your data ipRating that you think is IP20 is actually \rIP20. That is: that you have a stray 0x13 carriage return character in there at the start of the variable. The carriage return character is moving the print position to the start of the line and then the variable is overwriting what you printed before.
You can test whether this is the problem by adding the line:
ipRating = ipRating.replace("\r", "")
before your print statement.
This is the proper way to do what you're doing.
print('IP Rating = %s' % ipRating)
or
print('IP Rating = %d' % ipRating)
That is just one example from all the print statements you have at the end of your code.
If you're putting a string variable in print, use a %s or otherwise use a %d. If you have any more questions just ask.

Categories