I am trying to run this python file but it won't give me the JSON file (data.json). I am not sure if I need to specify it more where and how to put it out.
import xlrd
from collections import OrderedDict
import json
excel_file_path = 'checks.xlsx'
wb = xlrd.open_workbook (excel_file_path)
sh = wb.sheet_by_index(0)
data_list = []
for rownum in range(1, sh.nrows):
data = OrderedDict ()
row_values = sh.row_values (rownum)
data ['Code'] = row_values[0]
data ['Name'] = row_values[1]
data ['Amount'] = row_values[2]
data ['Date'] = row_values[3]
data ['MailingAddress'] = row_values[4]
data ['MailingAddress2'] = row_values[5]
data ['MailingCity'] = row_values[6]
data ['MailingState'] = row_values[7]
data ['MailingZip'] = row_values[8]
data_list.append(data)
j = json.dumps (data_list, ensure_ascii = False)
with open('data.json', 'w+') as f:
f.write(j)
Related
I want to split csv file into 2 lists using column name
CSV file:
Molecule Name,SMILES
ZINC53 (Aspirin),CC(=O)Oc1ccccc1C(=O)O
ZINC7460 (Vatalanib),Clc1ccc(Nc2nnc(Cc3ccncc3)c3ccccc23)cc1
ZINC1493878 (Sorafenib),CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1
Code:
namelist = list()
smileslist = list()
with open('./file.csv', 'r') as f:
f = csv.reader(f, delimiter=',')
columns = next(f)
type_col1 = columns.index("Molecule Name")
type_col2 = columns.index("SMILES")
for column in f:
if type_col1 == 'Molecule Name':
namelist.append(column)
elif type_col2 == 'SMILES':
smileslist.append(column)
With pandas library you can do it as easily as :
import pandas as pd
df = pd.read_csv("./file.csv")
namelist = df["Molecule Name"].tolist()
smileslist = df["SMILES"].tolist()
print(namelist)
print(smileslist)
Or if you prefer using the csv reader you can do it as follow :
import csv
namelist = list()
smileslist = list()
with open("./file.csv", "r") as f:
f = csv.reader(f, delimiter=',')
columns = next(f)
index_col1 = columns.index("Molecule Name")
index_col2 = columns.index("SMILES")
for column in f:
namelist.append(column[index_col1])
smileslist.append(column[index_col2])
Here's my code:
import glob
import itertools
import sys, os
import six
import csv
import numpy as np
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1
os.chdir("PATH/pdf")
extension = 'pdf'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
valeur = []
n = 1
for i in all_filenames:
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
valeur.append(values)
n = n+1
with open('test.csv','wb') as f:
for i in valeur:
f.write(i)
The goal here is to pick up some informations in PDF. Here's the output :
As you can see, the format is not pretty. I'm not very familiar with open() so I'm kind of stuck.
I would like to have distinct rows for each PDF with each informations having her own cell. Something like that :
Try to store the data from each pdf file in a separate list. And add this list to the valeur list which you have.
Use csv module as #martineau rightly suggested.
You can try the with below code.
import csv
valeur = []
#your code
n = 1
for i in all_filenames:
temp_list = []
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
temp_list.append(values)
n = n+1
valeur.append(temp_list)
#Finally when you have the required data, you can write to csv file like this.
with open('mycsv.csv', 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for val in valeur:
wr.writerow(val)
With this, the output would be like this
Trying to autosort point values from greasest to least, from .txt to .csv
Im trying to sort this: "email#email.com:stuffhere | PointsTotal = 1440"
this is what I currently got;
import csv
import glob
allTxtFiles = glob.glob("txt\\*.txt")
for t in allTxtFiles:
inputFile = open(t,'r').readlines()
endlines = []
sortedLines = []
for e in inputFile:
minNum = e.split("|")
minNum[4] = minNum[4].replace("PointsTotal = ",'')
minNum[4] = minNum[4].strip()
try:
minNum[4] = int(minNum[4])
sortedLines.append(minNum)
except:
endlines.append(minNum)
sortedLines.sort(key=lambda x: int(x[4]),reverse=True)
sortedLines.extend(endlines)
with open("sorted\\"+t.replace("txt\\",'')+".csv",'a+',newline="") as outfile:
writer = csv.writer(outfile)
for s in sortedLines:
writer.writerow(s)
I am trying to convert xls to json and but when I am executing the code it's not giving me the data inside xls sheet, it's only giving me the json structure.
Below is the code which I am running, I am not able to understand what modification I should further make in this so that I can get a perfect json file.
Please note - input is in the form of binary stream and output is also in the form of a stream and not file.
#!/usr/bin/python -u
import sys
import xlrd
import simplejson
from collections import OrderedDict
wb = xlrd.open_workbook(file_contents=sys.stdin.read())
for sheet_index in range(wb.nsheets):
# print sheet_index
sh = wb.sheet_by_index(sheet_index)
# print "Processing sheet no ", sheet_index
attributes = sh.row_values(0)
#print attributes
rows_list = []
attr_list = []
# print attr_list[0]
for rownum in range(1,sh.nrows):
row_val_list = sh.row_values(rownum)
row_dict = OrderedDict()
for index in range(len(attr_list)):
row_dict[attr_list[index]] = row_val_list[index]
#row_dict['ID'] = row_val_list[0]
#row_dict['Name'] = row_val_list[1]
#rows_list.append(row_dict)
#json_data = simplejson.dumps(rows_list)
#sys.stdout.write(json_data)
rows_list.append(row_dict)
json_data = simplejson.dumps(rows_list)
sys.stdout.write(json_data)
# json_data = simplejson.dumps(rows_list)
#sys.stdout.write(json_data)
~
Any help is much appreciated
here is the correct working python code
#!/usr/bin/python -u
import sys
import xlrd
import simplejson
from collections import OrderedDict
wb = xlrd.open_workbook(file_contents=sys.stdin.read())
#print "Sheets are .... ", wb.nsheets
for sheet_index in range(wb.nsheets):
sh = wb.sheet_by_index(sheet_index)
if sh.nrows == 0:
continue
attr_list = sh.row_values(0)
rows_list = []
for rownum in range(1,sh.nrows):
row_values = sh.row_values(rownum)
row_dict = OrderedDict()
for index in range(len(attr_list)):
row_dict[attr_list[index]] = row_values[index]
rows_list.append(row_dict)
json_data = simplejson.dumps(rows_list)
sys.stdout.write(json_data)
I can able to convert xlsx to csv in the case of single excel sheet.
How can i do the same in the case of multiple sheet in single excel file?
I have tried:
workBook = xlrd.open_workbook(filePath)
sheet_names = workBook.sheet_names()
lenth = len(sheet_names)
for i in range(0,lenth):
sheet = workBook.sheet_by_name(sheet_names[i])
yourcsvFile = open(csvPath, 'wb')
wr = csv.writer(yourcsvFile, quoting=csv.QUOTE_ALL)
for rownum in xrange(sheet.nrows):
wr.writerow(sheet.row_values(rownum))
yourcsvFile.close()
Try this
import sys
import xlrd
import csv
filePath = sys.argv[1] # user input file
csvPath = sys.argv[2]
workBook = xlrd.open_workbook(filePath)
sheet_names = workBook.sheet_names()
list_sheet = []
lenth = len(sheet_names)
for i in range(0,lenth):
sheet = workBook.sheet_by_name(sheet_names[i])
list_sheet.append(sheet)
yourcsvFile = open(csvPath, 'wb')
wr = csv.writer(yourcsvFile, quoting=csv.QUOTE_ALL)
total_row = list_sheet[0].ncols
for k in xrange(0,1):
for rownum in xrange(list_sheet[k].nrows):
wr.writerow(list_sheet[k].row_values(rownum))
if len(sheet_names) > 1:
for k in xrange(1,len(list_sheet)):
if list_sheet[k].ncols != total_row:
continue
for rownum in xrange(1,list_sheet[k].nrows):
wr.writerow(list_sheet[k].row_values(rownum))
yourcsvFile.close()