I'm new in python and I would like to know how could I write a new CSV file which contains a simple list. Then I will use this file in an Excel Worksheet.
My ENTIRE code:
import csv
import urllib
from bs4 import BeautifulSoup
sock = urllib.urlopen("http://www.fatm.com.es/Datos_Equipo.asp?Cod=03CA0007")
htmlSource = sock.read()
sock.close()
soup = BeautifulSoup(htmlSource)
form = soup.find("form", {'id': "FORM1"})
table = form.find("table")
entidad = [item.text.strip() for item in table.find_all('td')]
valores = [item.get('value') for item in form.find_all('input')]
lista = entidad
i = 0
x = 1
while i <=10:
lista.insert(i+x, valores[i])
i += 1
x += 1
print lista
w = csv.writer(file(r'C:\Python27\yo.csv','wb'),dialect='excel')
w.writerows(lista)
lista = [u'Club',
u'CLUB TENIS DE MESA PORTUENSE',
u'Nombre Equipo',
u'C.T.M. PORTUENSE',
u'Telefono fijo',
u'630970055',
u'Telefono Movil',
u'630970055',
u'E_Mail',
u'M.LOPEZ_70#HOTMAIL.COM',
u'Local de Juego',
u'INSTITUTO MAR DE CADIZ',
u'Categoria',
u'DIVISION HONOR ANDALUZA',
u'Grupo',
u'GRUPO 9',
u'Delegado',
u'SANCHEZ SANTANO, JOSE MARIA',
u'*Dias de Juego',
u'SABADO',
u'*Hora de Juego',
u'17:00']
My results: an empty CSV file. :(
Thanks in advance!!!
Here you go:
import csv
lista = [u'Club',
u'CLUB TENIS DE MESA PORTUENSE',
u'Nombre Equipo',
u'C.T.M. PORTUENSE',
u'Telefono fijo',
u'630970055',
u'Telefono Movil',
u'630970055', u'E_Mail', u'M.LOPEZ_70#HOTMAIL.COM', u'Local de Juego', u'INSTITUTO MAR DE CADIZ', u'Categoria', u'DIVISION HONOR ANDALUZA', u'Grupo', u'GRUPO 9', u'Delegado', u'SANCHEZ SANTANO, JOSE MARIA', u'*Dias de Juego', u'SABADO', u'*Hora de Juego', u'17:00']
header = []
row = []
for i, val in enumerate(lista):
if i%2==0:
header.append(val)
else:
row.append(val)
out = open('file.csv', 'w')
w = csv.writer(out, dialect='excel')
w.writerow(header)
w.writerow(row)
out.close()
As a follow up side question. What I would do in your place is to create one list for your column names, like:
header = ['col_name1', 'col_name2', ... ]
and a list of lists for the values like:
values = [
[row1_val1, row1_val2, ...],
[row2_val1, row2_val2, ...],
...
]
Then you can do:
w.writerow(header)
for row in values:
w.writerow(row)
Check the doc of the csv module, there might be a way to write all rows in one go. I've never used it myself.
Related
good evening, I have a problem is that I have to make 2 python files where TestFiltrador filters the columns of a .csv file and that has 2 columns exactly) and the other called: Testdefiltrado prints those 2 columns but I get this error.
TestFiltraor:
Almacenamiento_de_datos = namedtuple('DESAPARECIDOS_INDIA_2018_2020','Tipo_de_sangre,Nombres,Genero,Cuerpo,Distrito,'
#where are the columns CSV FILE
'DENUNCIADO,Estatura,Edad,Cuando_Desparecio')
def Archivo_Csv(Archivo):
with open(Archivo, encoding = 'utf-8') as f:
lineador = csv.reader(f, delimiter=";")
next(lineador)
lista =[(Almacenamiento_de_datos(Tipo_de_sangre, Nombres, Genero, Cuerpo, Distrito, DENUNCIADO, float(Estatura), int(Edad),
Cuando_Desparecio)for Tipo_de_sangre, Nombres, Genero, Cuerpo, Distrito,DENUNCIADO,
Estatura, Edad, Cuando_Desparecio in lineador)]
return lista
#open the csv file and access it to
Storage which is namedtuple save it in the variable l i s t a
def Todos_Los_nombres (lista):
for i in lista:
Nom = next(i['Nombres'], i ['Edad'])
return Nom
#en list iterate and find me the columns EN: h o m b r e s, (int) E d a d
this would be the code to call it
def TestFiltrador (Desaparecidoscsv):
Nom= Todos_Los_nombres(Desaparecidoscsv)
print("Los nombres y sus edad son : ",Nom)
## Nom : iterates the first code above and prints
is this error
Nom = next(i['Nombres'], i ['Edad'])
TypeError: 'generator' object is not subscriptable
how can i do ?
i hope they can helpe me , thank for see my question
You're creating a generator expression here:
lista = [
(
Almacenamiento_de_datos(
Tipo_de_sangre,
Nombres,
Genero,
Cuerpo,
Distrito,
DENUNCIADO,
float(Estatura),
int(Edad),
Cuando_Desparecio,
)
for Tipo_de_sangre, Nombres, Genero, Cuerpo, Distrito, DENUNCIADO, Estatura, Edad, Cuando_Desparecio in lineador
)
]
If you want a list of namedtuples, do this:
lista = [
Almacenamiento_de_datos(
Tipo_de_sangre,
Nombres,
Genero,
Cuerpo,
Distrito,
DENUNCIADO,
float(Estatura),
int(Edad),
Cuando_Desparecio,
)
for Tipo_de_sangre, Nombres, Genero, Cuerpo, Distrito, DENUNCIADO, Estatura, Edad, Cuando_Desparecio in lineador
]
I have the following output:
datos=['Venta Casas CARRETERA NACIONAL, Nuevo León', 'Publicado el 29 de Abr', 'ABEDUL DE LADERAS', '3 Recámaras', '4.5 Baños', '300m² de Construcción', '300m² de Terreno', '2 Plantas', ' 81-1255-3166', ' Preguntale al vendedor', 'http://zuhausebienesraices.nocnok.com/', "INFOSTATS_ADOAnuncios('5', '30559440');"]
And I would like to assign a different variable to each item if it is in the list otherwise it will be 0. For example:
recamara= the string from the list that has the word "Recámara"
bano= the string from the list that has the string "Baño"
and so on. And if the word "Baño" is not in the list then bano= 0
If you are using Python you can use list comprehension to do this.
datos = ['Venta Casas CARRETERA NACIONAL, Nuevo León', 'Publicado el 29 de Abr', 'ABEDUL DE LADERAS', '3 Recámaras', '4.5 Baños', '300m² de Construcción', '300m² de Terreno', '2 Plantas', ' 81-1255-3166', ' Preguntale al vendedor', 'http://zuhausebienesraices.nocnok.com/', "INFOSTATS_ADOAnuncios('5', '30559440');"]
# list of strings which has "Casas" in it
casas_list = [string for string in datos if "Casas" in string]
print(casas_list)
print(len(casas_list))
recamara = [s for s in datos if "Recámara" in s]
bano = [s for s in datos if "Baño" in s]
if len(recamara)==0:
recamara = 0
else:
print(recamara[0]) #print the entire list if there will be more than 1 string
if len(bano)==0:
bano = 0
else:
print(bano[0])
I have a problem with me file csv. It's saving with spaces in middle of each row. I don't know why. How do I solve this problem? I'm asking because I don't find any answer and solutions to this.
Here is the code:
import csv
import random
def dict_ID_aeropuertos():
with open('AeropuertosArg.csv') as archivo_csv:
leer = csv.reader(archivo_csv)
dic_ID = {}
for linea in leer:
dic_ID.setdefault(linea[0],linea[1])
archivo_csv.close()
return dic_ID
def ruteoAleatorio():
dic_ID = dict_ID_aeropuertos()
lista_ID = list(dic_ID.keys())
cont = 0
lista_rutas = []
while (cont < 50):
r1 = random.choice(lista_ID)
r2 = random.choice(lista_ID)
if (r1 != r2):
t = (r1,r2)
if (t not in lista_rutas):
lista_rutas.append(t)
cont += 1
with open('rutasAeropuertos.csv', 'w') as archivo_rutas:
escribir = csv.writer(archivo_rutas)
escribir.writerows(lista_rutas)
archivo_rutas.close()
ruteoAleatorio()
Here is the file csv AeropuertosArg.cvs:
1,Aeroparque Jorge Newbery,Ciudad Autonoma de Buenos Aires,Ciudad Autonoma de Buenos Aires,-34.55803,-58.417009
2,Aeropuerto Internacional Ministro Pistarini,Ezeiza,Buenos Aires,-34.815004,-58.5348284
3,Aeropuerto Internacional Ingeniero Ambrosio Taravella,Cordoba,Cordoba,-31.315437,-64.21232
4,Aeropuerto Internacional Gobernador Francisco Gabrielli,Ciudad de Mendoza,Mendoza,-32.827864,-68.79849
5,Aeropuerto Internacional Teniente Luis Candelaria,San Carlos de Bariloche,Rio Negro,-41.146714,-71.16203
6,Aeropuerto Internacional de Salta Martin Miguel de Guemes,Ciudad de Salta,Salta,-24.84423,-65.478412
7,Aeropuerto Internacional de Puerto Iguazu,Puerto Iguazu,Misiones,-25.731778,-54.476181
8,Aeropuerto Internacional Presidente Peron,Ciudad de Neuquen,Neuquen,-38.952137,-68.140484
9,Aeropuerto Internacional Malvinas Argentinas,Ushuaia,Tierra del Fuego,-54.842237,-68.309701
10,Aeropuerto Internacional Rosario Islas Malvinas,Rosario,Santa Fe,-32.916887,-60.780391
11,Aeropuerto Internacional Comandante Armando Tola,El Calafate,Santa Cruz,-50.283977,-72.053641
12,Aeropuerto Internacional General Enrique Mosconi,Comodoro Rivadavia,Chubut,-45.789435,-67.467498
13,Aeropuerto Internacional Teniente General Benjamin Matienzo,San Miguel de Tucuman,Tucuman,-26.835888,-65.108361
14,Aeropuerto Comandante Espora,Bahia Blanca,Buenos Aires,-38.716152,-62.164955
15,Aeropuerto Almirante Marcos A. Zar,Trelew,Chubut,-43.209957,-65.273405
16,Aeropuerto Internacional de Resistencia,Resistencia,Chaco,-27.444926,-59.048739
17,Aeropuerto Internacional Astor Piazolla,Mar del Plata,Buenos Aires,-37.933205,-57.581518
18,Aeropuerto Internacional Gobernador Horacio Guzman,San Salvador de Jujuy,Jujuy,-24.385987,-65.093755
19,Aeropuerto Internacional Piloto Civil Norberto Fernandez,Rio Gallegos,Santa Cruz,-51.611788,-69.306315
20,Aeropuerto Domingo Faustino Sarmiento,San Juan,San Juan,-31.571814,-68.422568
Your problem is, that the csv-module writerows has its own "newline"-logic. It interferes with the default newline behaviour of open():
Fix like this:
with open('rutasAeropuertos.csv', 'w', newline='' ) as archivo_rutas:
# ^^^^^^^^^^
This is also documented in the example in the documentation: csv.writer(csvfile, dialect='excel', **fmtparams):
If csvfile is a file object, it should be opened with newline='' [1]
with a link to a footnote telling you:
[1] If newline='' is not specified, newlines embedded inside quoted fields will not be interpreted correctly, and on platforms that use \r\n linendings on write an extra \r will be added. It should always be safe to specify newline='', since the csv module does its own (universal) newline handling.
You are using windows which does use \r\n which adds another \r which leads to your "wrong" output.
Full code with some optimizations:
import csv
import random
def dict_ID_aeropuertos():
with open('AeropuertosArg.csv') as archivo_csv:
leer = csv.reader(archivo_csv)
dic_ID = {}
for linea in leer:
dic_ID.setdefault(linea[0],linea[1])
return dic_ID
def ruteoAleatorio():
dic_ID = dict_ID_aeropuertos()
lista_ID = list(dic_ID.keys())
lista_rutas = set() # a set only holds unique values
while (len(lista_rutas) < 50): # simply check the length of the set
r1,r2 = random.sample(lista_ID, k=2) # draw 2 different ones
lista_rutas.add( (r1,r2) ) # you can not add duplicates, no need to check
with open('rutasAeropuertos.csv', 'w', newline='' ) as archivo_rutas:
escribir = csv.writer(archivo_rutas)
escribir.writerows(lista_rutas)
ruteoAleatorio()
Output:
9,3
16,10
15,6
[snipp lots of values]
13,14
13,7
20,4
I am importing an excel worksheet that has the following columns name:
N° Pedido
1234
6424
4563
The column name ha a special character (°). Because of that, I can´t merge this with another Data Frame or rename the column. I don´t get any error message just the name stays the same. What should I do?
This is the code I am using and the result of the Dataframes:
import pandas as pd
import numpy as np
# Importando Planilhas
CRM = pd.ExcelFile(r'C:\Users\Michel\Desktop\Relatorio de
Vendas\relatorio_vendas_CRM.xlsx', encoding= 'utf-8')
protheus = pd.ExcelFile(r'C:\Users\Michel\Desktop\Relatorio de
Vendas\relatorio_vendas_protheus.xlsx', encoding= 'utf-8')
#transformando em Data Frame
df_crm = CRM.parse('190_pedido_export (33)')
df_protheus = protheus.parse('Relatorio de Pedido de Venda')]
# Transformando Campos em float o protheus
def turn_to_float(x):
return np.float(x)
df_protheus["TES"] = df_protheus["TES"].apply(turn_to_float)
df_protheus["Qtde"] = df_protheus["Qtde"].apply(turn_to_float)
df_protheus["Valor"] = df_protheus["Valor"].apply(turn_to_float)
#Tirando Tes de não venda do protheus
# tirando valores com código errado 6
df_protheus_1 = df_protheus[df_protheus.TES != 513.0]
df_protheus_2 = df_protheus_1[df_protheus_1.TES != 576.0]
**df_crm.columns = df_crm.columns.str.replace('N° Pedido', 'teste')
df_crm.columns**
Orçamento Origem N° Pedido Nº Pedido ERP Estabelecimento Tipo de
Pedido Classificação(Tipo) Aplicação Conta CNPJ/CPF Contato ...
Aprovação Parcial Antecipa Entrega Desconto da Tabela de Preço
Desconto do Cliente Desconto Informado Observações Observações NF Vl
Total Bruto Vl Total Completo
0 20619.0 23125 NaN Optitex 1 - Venda NaN Industrialização/Revenda
XAVIER E ARAUJO LTDA ME 7970626000170 NaN ... N N 0 0 0
Note that I used other codes for the bold part with the same result:
#renomeando tabela para dar Merge
#df_crm['proc'] = df_crm['N\xc2\xb0 Pedido']
#df_crm['N Pedido'] = df_crm['N° Pedido']
#df_crm.drop('N° Pedido',inplace=True,axis=1)
#df_crm
#df_crm['N Pedido'] = df_crm['N° Pedido']
#df.drop('N° Pedido',inplace=True,axis=1)
#df_crm
#df_crm_1 = df_crm.rename(columns={"N°Pedido": "teste"})
#df_crm_1
Thanks for posting the link to the Google Sheet. I downloaded it and loaded it via pandas:
df = pd.read_excel(r'~\relatorio_vendas_CRM.xlsx', encoding = 'utf-8')
df.columns = df.columns.str.replace('°', '')
df.columns = df.columns.str.replace('º', '')
Note that the two replace statements are replacing different characters, although they look very similar.
Help from: Why do I get a SyntaxError for a Unicode escape in my file path?
I was able to copy the values into another column. You could try that
df['N Pedido'] = df['N° Pedido']
df.drop('N° Pedido',inplace=True,axis=1)
Hi I'm trying to access a json to save it in a list to perform a sort of append and create a pdf in ReportLab, I have the following code but I have several problems the first is that I would like to have a list of 2xn to always it has columns and rows be dynamic according to the json.
If anyone can help me be grateful much
import json
json_data = []
attributesName = []
testTable = { "attributes":[] }
attributesValue = []
path="prueba2.pdf"
doc = SimpleDocTemplate(path, pagesize=letter)
styleSheet = getSampleStyleSheet()
text = []
with open("prueba.json") as json_file:
document = json.load(json_file)
for item in document:
for data_item in item['data']:
attributesName.append([str(data_item['name'])
attributesValue.append([data_item['value']])
testTable[attributesName].extend({data_item['name'], data_item['value']})
print attributesName[0]
print testTable[0]
parts = []
p = Paragraph('''<para align=left fontsize=9>{0}</para>'''.format(text), styleSheet["BodyText"])
parts.append(p)
doc.build(parts)
I implemented the following,but it prints the list
[[['RFC', 'NOMBRE', 'APELLIDO PATERNO', 'APELLIDO MATERNO', 'FECHA NACIMIENTO', 'CALLE', 'No. EXTERI
OR', 'No. INTERIOR', 'C.P.', 'ENTIDAD', 'MUNICIPIO', 'COLONIA', 'DOCUMENTO']], [['MORR910304FL2', 'R
JOSE', 'MONTIEL', 'ROBLES', '1992-02-04', 'AMOR', '4', '2', '55064', 'EDO DE MEX', 'ECATEPEC', 'INDUSTRIAL', 'Documento']]]
I want some like this
[['RFC'], ['22232446']]
[['NOMBRE'], ['22239952']]
[['APELLIDO'], ['22245430']]
if you change your code with the next code
with open("prueba.json") as json_file:
document = json.load(json_file)
for item in document:
for data_item in item['data']:
attributesName.append(str(data_item["name"]))
attributesValue.append(str(data_item["value"]))
tabla.append([[attributesName],[attributesValue]])
print attributesName
print attributesValue
for Y in tabla:
print(Y)