I am working on a simple program that takes a list of filenames and transforms it into an import file (.impex). The import file has three sections, each of which requires their own header. After printing the header to an output file, I use a for loop to iterate over the list of filenames and extract the necessary information from them to generate the import entry row.
input a .csv containing filenames, like such:
3006419_3006420_ENG_FRONT.jpg
desired output a .impex file formatted thusly:
header-1 line-1
header-1 line-2
header-1 line-3
;E3006419_3006420_FRONT_Image_Container;
header-2 line-1
;3006419_3006420D_ENG_FRONT-92x92;cloudfronturl;3006419_3006420D_ENG_FRONT.jpg;image/jpg;;;100Wx100H;E3006419_3006420_FRONT_Image_Container
header-3 line-1
;3006420;E3006419_3006420_FRONT_Image_Container
Here is my code:
file = open(sys.argv[1])
output = open('output.impex','w+') #define our output impex file
#write variables and header for media container creation
output.write('{}\n{}\n{}\n'.format('header-1 line-1','header-1 line-2','header-1 line-3'))
#write media container creation impex rows
for line in file:
nameAndExtension = line.split('.')
name = nameAndExtension[0]
extension = nameAndExtension[1]
elements = name.split('_')
parentSKU = elements[0]
childSKU = elements[1]
lang = elements[2]
angle = elements[3]
output.write(";E" + parentSKU + "_" + childSKU + "_" + angle + '_Image_Container;\n')
#write header for media creation
output.write('{}\n'.format('header-2 line-1'))
#write media creation impex rows
for line in file:
nameAndExtension = line.split('.')
name = nameAndExtension[0]
extension = nameAndExtension[1]
elements = name.split('_')
parentSKU = elements[0]
childSKU = elements[1]
lang = elements[2]
angle = elements[3]
output.write('{}\n'.format(';' + name + '-92x92;https://' + cloudfront + '.cloudfront.net/92x92/product-images/ACTIVE-HYBRIS/' + line + ';' + line + ';image/' + extension + ';;100Wx100H'))
#write header for product association
output.write('{}\n'.format('header-3 line-1'))
for line in file:
nameAndExtension = line.split('.')
name = nameAndExtension[0]
extension = nameAndExtension[1]
elements = name.split('_')
parentSKU = elements[0]
childSKU = elements[1]
lang = elements[2]
angle = elements[3]
output.write(childSKU + ";E" + parentSKU + "_" + childSKU + "_" + angle + '_Image_Container;\n')
My output looks like this:
header-1 line-1
header-1 line-2
header-1 line-3
;E3006419_3006420_FRONT_Image_Container;
;E3006419_3006421_FRONT_Image_Container;
;E3006419_3006422_FRONT_Image_Container;
;E3006419_3006423_FRONT_Image_Container;
;E3006419_3006424_FRONT_Image_Container;
header-2 line-1
header-3 line-1
So you can see it's doing the first for-loop correctly, but not writing anything for the second or third for loops. And I know it could be more nicely done with a clean function.
Just change each for loop to open the file beforehand:
file = open(sys.argv[1])
for line in file:
nameAndExtension = line.split('.')
# etc
Related
I am trying to append the parameters passed to a function to a specific place in an existing text file.
txt file:
query{
text:"",
source_language:"",
target_language:"",
},
data_type:[16],
params{
client:"xyz"
}
python:
def function(text,source_language,target_language):
f = open("file.txt", "w");
f.write( 'text:' + text + '\n' )
f.write( 'source_language:' + source_language + '\n' )
f.write( 'target_language:' + target_language + '\n' )
f.close()
But, its not working. Is there a way to append the parameters directly into the file including " " and ,. I am trying to add just the parameters into the existing file with data at the specified position.
Solution
In revision to your comments, considering that this is only being applied to the example above and need only to alter those specific three lines, this will accomplish the task (included if location: in case you don't match keyword it won't erase your file by open('w')
def change_text(new_text):
content[1] = list(content[1])
y = list(new_text)
content[1] = content[1][:6] + y + content[1][-2:]
content[1] = ''.join(content[1])
def change_source_l(new_source_l):
content[2] = list(content[2])
y = list(new_source_l)
content[2] = content[2][:17] + y + content[2][-2:]
content[2] = ''.join(content[2])
def change_target_l(new_target_l):
content[3] = list(content[3])
y = list(new_target_l)
content[3] = content[3][:17] + y + content[3][-2:]
content[3] = ''.join(content[3])
filename = open('query.txt', 'r')
content = filename.read()
content = content.split()
filename.close()
name = open('query.txt', 'w')
change_text('something')
change_source_l('this')
change_target_l('that')
name.write('\n'.join(content))
name.close()
Output
(xenial)vash#localhost:~/python/LPTHW$ cat query.txt
query{
text:"something",
source_language:"this",
target_language:"that",
},
data_type:[16],
params{
client:"xyz"
Open file in r+ mode
Use .seek method from Python file I/O and then write your content.
I know that there are some topics on this that tell us to use .strip() or .rstrip() function to do this but it's not working for me.
I have a programme that appends a new line to the csv file but unfortunately it generates a trailing comma...
I have tried to remove it with the .strip() function in python but it isn't working well, I am doing something wrong?
This is an example of what happen when I input 'T123' for Bike_No and '05/08/2017' for Purchase_Date
from datetime import datetime
td= datetime.now()
initial_bike_detaillist=[]
deBatt =100
deKM = 0.00
deDate = str(td)[8:10] + "/"+ str(td)[5:7] + "/"+ str(td)[0:4]
print("Option 4: Add abicycle \n")
Bike_No=input("Bike No. :")
Purchase_Date=str(input("Purchase Date:"))
initial_bike_detaillist=[str(Bike_No),str(Purchase_Date),str(deBatt),str(deDate),str(deKM)]#because there is no write function for int
filename="Assignment_Data1.csv"
file=open(filepath + filename,"a")
file.write("\n")
for k in initial_bike_detaillist:
file.write("{},".format(k))
print("Bicycle ({}) has been created".format(Bike_No))
file.close()
file=open(filepath + filename,"r")
for line in file:
line.strip()
print(line)
expected output=
Bike No.,Purchase Date,Batt %,Last Maintenance,KM since Last
T101,10/04/2016,55,10/01/2017,25.08
T102,01/07/2016,10,15/05/2017,30.94
T103,15/11/2016,94,13/06/2017,83.16
T104,25/04/2017,58,10/01/2017,25.08
T105,24/05/2017,5,20/06/2017,93.80
T123,04/04/2017,100,05/08/2017,0.0
actual output:
Bike No.,Purchase Date,Batt %,Last Maintenance,KM since Last
T101,10/04/2016,55,10/01/2017,25.08
T102,01/07/2016,10,15/05/2017,30.94
T103,15/11/2016,94,13/06/2017,83.16
T104,25/04/2017,58,10/01/2017,25.08
T105,24/05/2017,5,20/06/2017,93.80
T123,04/04/2017,100,05/08/2017,0.0,
`
Instead of this line :
for k in initial_bike_detaillist:
file.write("{},".format(k))
use following line :
file.write(','.join(initial_bike_detaillist))
Your Code :
from datetime import datetime
td = datetime.now()
initial_bike_detaillist = []
deBatt = 100
deKM = 0.00
deDate = str(td)[8:10] + "/" + str(td)[5:7] + "/" + str(td)[0:4]
print("Option 4: Add abicycle \n")
Bike_No = input("Bike No. :")
Purchase_Date = str(input("Purchase Date:"))
initial_bike_detaillist = [str(Bike_No), str(Purchase_Date), str(deBatt), str(deDate),
str(deKM)] # because there is no write function for int
filename = "Assignment_Data1.csv"
file = open(filepath + filename, "a")
file.write("\n")
# for k in initial_bike_detaillist:
# file.write("{},".format(k))
file.write(','.join(initial_bike_detaillist)) # use this line .
print("Bicycle ({}) has been created".format(Bike_No))
file.close()
file = open(filepath + filename, "r")
for line in file:
# line.strip() # Then, not need this line
print(line)
This is the code. However, this code can only parse 4 characters of Arabian only. I want it to parse dynamically. So, the number of characters does not matter. Therefore, it can parse 1 character, 2 character or more based on the number of existing characters.
import xml.etree.ElementTree as ET
import os, glob
import csv
from time import time
#read xml path
xml_path = glob.glob('D:\1. Thesis FINISH!!!\*.xml')
#create file declaration for saving the result
file = open("parsing.csv","w")
#file = open("./%s" % ('parsing.csv'), 'w')
#create variable of starting time
t0=time()
#create file header
file.write('wordImage_id'+'|'+'paw1'+'|'+'paw2'+'|' + 'paw3' + '|' + 'paw4' + '|'+'font_size'+'|'+'font_style'+
'|'+'font_name'+'|'+'specs_effect'+'|'+'specs_height'+'|'+'specs_height'
+'|'+'specs_width'+'|'+'specs_encoding'+'|'+'generation_filtering'+
'|'+'generation_renderer'+'|'+'generation_type' + '\n')
for doc in xml_path:
print 'Reading file - ', os.path.basename(doc)
tree = ET.parse(doc)
#tree = ET.parse('D:\1. Thesis FINISH!!!\Image_14_AdvertisingBold_13.xml')
root = tree.getroot()
#get wordimage id
image_id = root.attrib['id']
#get paw 1 and paw 2
paw1 = root[0][0].text
paw2 = root[0][1].text
paw3 = root[0][2].text
paw4 = root[0][3].text
#get properties of font
for font in root.findall('font'):
size = font.get('size')
style = font.get('fontStyle')
name = font.get('name')
#get properties of specs
for specs in root.findall('specs'):
effect = specs.get('effect')
height = specs.get('height')
width = specs.get('width')
encoding = specs.get('encoding')
#get properties for generation
for generation in root.findall('generation'):
filtering = generation.get('filtering')
renderer = generation.get('renderer')
types = generation.get('type')
#save the result in csv
file.write(image_id + '|' + paw1 + '|' + paw2 + '|' + paw3 + '|' + paw4 + '|' + size + '|' +
style + '|' + name + '|' + effect + '|' + height + '|'
+ width + '|' + encoding + '|' + filtering + '|' + renderer + '|' + types + '\n')
#close the file
file.close()
#print time execution
print("process done in %0.3fs." % (time() - t0))
so I have this python file which looks out for all the "label" tags in a XML file and does some modification with it. label is a string containing at max three lines. the code is manipulating XML file.
#1 label="Number of Packets Transmitted by the Source
Node of the Path to the Destination Node Of
the Path"
#2 label="Number of Packets Transmitted by the Source
node of the path to the destination node of
the path"
notice in label #2 words in second and third line are not in upper case which is not what I want. I want help in correcting logic of my program such that I should not write label twice.
import os
from io import StringIO, BytesIO
def splitAndMakeTitleCase(line):
# does something not relevant to context
fileList = open("AllFiles")
for fileStr in fileList:
fileName = fileStr.rstrip('\n')
openFile = open(fileName)
openNewFile = open(fileName+'TitleCase.xml','w')
lines = openFile.readlines()
for lineIndex in range(0,len(lines)):
line = lines[lineIndex]
skip = 0
if "label=" in line and "const" not in line:
segs = line.split('"')
if len(segs) >= 3:
pass
else:
openNewFile.write(lines[lineIndex])
secondTitleCaseLine = splitAndMakeTitleCase(lines[lineIndex + 1])
skip = lineIndex + 1
openNewFile.write(secondTitleCaseLine)
if '"' not in lines[lineIndex + 1]:
thirdTitleCaseLine = splitAndMakeTitleCase(lines[lineIndex + 2])
skip = lineIndex + 1
openNewFile.write(thirdTitleCaseLine)
openNewFile.write(lines[lineIndex])
openFile.close()
openNewFile.close()
#cmd = "mv " + fileName + "TitleCase.xml " + fileName
#os.system(cmd)
In your for loop you have the first if and then within that you do some printing to the file. Then after that you do another print of the line to the file. I think that you probably want that last line in a else like this:
for fileStr in fileList:
fileName = fileStr.rstrip('\n')
openFile = open(fileName)
openNewFile = open(fileName+'TitleCase.xml','w')
lines = openFile.readlines()
for lineIndex in range(0,len(lines)):
line = lines[lineIndex]
skip = 0
if "label=" in line and "const" not in line:
segs = line.split('"')
if len(segs) >= 3:
pass
else:
openNewFile.write(lines[lineIndex])
secondTitleCaseLine = splitAndMakeTitleCase(lines[lineIndex + 1])
skip = lineIndex + 1
openNewFile.write(secondTitleCaseLine)
if '"' not in lines[lineIndex + 1]:
thirdTitleCaseLine = splitAndMakeTitleCase(lines[lineIndex + 2])
skip = lineIndex + 1
openNewFile.write(thirdTitleCaseLine)
else:
openNewFile.write(lines[lineIndex])
openFile.close()
openNewFile.close()
#cmd = "mv " + fileName + "TitleCase.xml " + fileName
#os.system(cmd)
i'm having a problem with the python's csv reader. The problem is that i want to open and read different csv files, but he keeps on reading always the same one.
from csv import reader
alphabet = ["a", "b", "c"]
for letter in alphabet:
csv_file = open('/home/desktop/csv/' + letter + '.csv', 'r')
csv_data = reader(csv_file)
The problem is that he seems to open the other files, but he keep on reading always the first file.
Is there a way to "clean" the reader or make him read another file? I even tried to close the csv file, but it didn't work.
The full code is this
from csv import reader
#Orario
orario_csv_file = '/home/andrea/Scrivania/orario.csv'
orario_csv = open(orario_csv_file)
orario_data = reader(orario_csv)
orario = []
#Corsi
corsi = ["EDILIZIA", "EDILE-ARCHIT", "ELETTRONICA", "TECNOLOGIE_DI_INTERNET", "INFORMATICA", "GESTIONALE", "ENERGETICA", "MECCANICA", "CIVILE_ED_AMBIENTALE", "MEDICA", "ENGINEERING_SCIENCES"]
giorni = ["Lun", "Mar", "Mer", "Gio", "Ven"]
for row in orario_data:
orario.append(row)
for corso in corsi:
nome_corso_file = '/home/andrea/Scrivania/xml/' + corso + '.xml'
nome_corso_xml = open(nome_corso_file, 'wt')
nome_corso_xml.write('<?xml version="1.0"?>' + "\n")
nome_corso_xml.write('<orario>' + "\n")
nome_csv = corso + '_csv'
nome_csv = '/home/andrea/Scrivania/csv/' + corso + '.csv'
nome_corso_csv = open(nome_csv, 'rt')
corso_data = reader(nome_corso_csv)
nome_corso_xml.write(' <corso name="' + corso + '">' + "\n")
for a in range(0, 3):
nome_corso_xml.write(' <anno num="' + str(a+1) + '">' + "\n")
for j in range(1, 6):
nome_corso_xml.write(' <giorno name="' + orario[2][j] + '">' + "\n")
for i in range(3, 12):
lez = orario[i + a*12][j]
if lez == "":
nome_corso_xml.write(' <lezione>' + "-" + '</lezione>' + "\n")
else:
for riga in corso_data:
if riga[0] == lez:
if riga[2] == "":
nome_corso_xml.write(' <lezione name="' + lez + '">' + riga[1] + '</lezione>' + "\n")
else:
for g in range(0, len(riga)):
if riga[g].lower() == orario[2][j].lower():
nome_corso_xml.write(' <lezione name="' + lez + '">' + riga[g+1] + '</lezione>' + "\n")
nome_corso_csv.seek(0)
nome_corso_xml.write(' </giorno>' + "\n")
nome_corso_xml.write(' </anno>' + "\n")
nome_corso_xml.write(' </corso>' + "\n")
nome_corso_xml.write('</orario>' + "\n")
nome_corso_xml.close()
He open the "EDILIZIA.csv" and compile the "EDILIZIA.xml", then he should open the "EDILE-ARCHIT.csv" and compile its xml, but when he read, he keeps on reading from "EDILIZIA.csv"
Here's the .csv files that you need.
http://pastebin.com/kJhL8HpK
If you try to make it read first EDILIZIA.csv and then EDILE-ARCHIT.csv he'll keep on using always the EDILIZIA.csv to compile the xml, but he should firt open EDILIZIA.csv, compile the EDILIZIA.xml, then read the EDILE-ARCHIT.csv and compile the EDILE-ARCHIT.xml.
If you take a look at the final xmls, you'll see that the EDILE-ARCHIT.xml will only display the common subjects of EDILIZIA.csv and EDILE-ARCHIT.csv
It took a long time to figure out what you are doing here. To tell the truth your code is a mess - there are many unused variables and lines that make no sense at all. Anyway, your code reads the appropriate csv file each time, thus the error is not where you thought it was.
If I am right, orario.csv contains the timetable of each course (stored in corsi list) for three semesters or years, and the corso.csv files contain the room where subjects are held. So you want to merge the information into an XML file.
You only forgot one thing: to proceed in orario.csv. Your code wants to merge the very first three anno with the current corso. To fix it, you have to make two changes.
First in this for loop header:
for corso in corsi:
Modify to:
for num, corso in enumerate(corsi):
And when you assign lez:
lez = orario[i + a*12][j]
Modify to:
lez = orario[i + a*12*(num+1)][j]
Now it should work.
This code produces exactly the same result, but it uses Python's XML module to build the output file:
from csv import reader
import xml.etree.cElementTree as ET
import xml.dom.minidom as DOM
corsi = ["EDILIZIA", "EDILE-ARCHIT", "ELETTRONICA", "TECNOLOGIE_DI_INTERNET", "INFORMATICA", "GESTIONALE", "ENERGETICA", "MECCANICA", "CIVILE_ED_AMBIENTALE", "MEDICA", "ENGINEERING_SCIENCES"]
with open('orario.csv', 'r') as orario_csv:
orario = reader(orario_csv)
orario_data = [ row for row in orario ]
for num, corso in enumerate(corsi):
with open(corso + '.csv', 'r') as corso_csv:
corso_raw = reader(corso_csv)
corso_data = [ row for row in corso_raw ]
root_elem = ET.Element('orario')
corso_elem = ET.SubElement(root_elem, 'corso')
corso_elem.set('name', corso)
for anno in range(0, 3):
anno_elem = ET.SubElement(corso_elem, 'anno')
anno_elem.set('num', str(anno + 1))
for giorno in range(1, 6):
giorno_elem = ET.SubElement(anno_elem, 'giorno')
giorno_elem.set('name', orario_data[2][giorno])
for lezione in range(3, 12):
lez = orario_data[lezione + anno * 12 * (num + 1)][giorno]
if lez == '':
lezione_elem = ET.SubElement(giorno_elem, 'lezione')
lezione_elem.text = '-'
else:
for riga in corso_data:
if riga[0] == lez:
if riga[2] == '':
lezione_elem = ET.SubElement(giorno_elem, 'lezione')
lezione_elem.set('name', lez)
lezione_elem.text = riga[1]
else:
for g in range(0, len(riga)):
if riga[g].lower() == orario_data[2][giorno].lower():
lezione_elem = ET.SubElement(giorno_elem, 'lezione')
lezione_elem.set('name', lez)
lezione_elem.text = riga[g + 1]
with open(corso + '_new.xml', 'w') as corso_xml:
xml_data = DOM.parseString(ET.tostring(root_elem, method = 'xml')).toprettyxml(indent = ' ')
corso_xml.write(xml_data)
Cheers.
I think I may have spotted the cause of your problem.
The second item in your corsi list ends with a full stop. This means that you will be looking for the file "EDILE-ARCHIT..csv", which is almost does not exist. When you try and open the file, the open() call will throw an exception, and your program will terminate.
Try removing the trailing full stop, and running it again.