Excel to Json Python file - python

I am trying to run this python file but it won't give me the JSON file (data.json). I am not sure if I need to specify it more where and how to put it out.
import xlrd
from collections import OrderedDict
import json
excel_file_path = 'checks.xlsx'
wb = xlrd.open_workbook (excel_file_path)
sh = wb.sheet_by_index(0)
data_list = []
for rownum in range(1, sh.nrows):
data = OrderedDict ()
row_values = sh.row_values (rownum)
data ['Code'] = row_values[0]
data ['Name'] = row_values[1]
data ['Amount'] = row_values[2]
data ['Date'] = row_values[3]
data ['MailingAddress'] = row_values[4]
data ['MailingAddress2'] = row_values[5]
data ['MailingCity'] = row_values[6]
data ['MailingState'] = row_values[7]
data ['MailingZip'] = row_values[8]
data_list.append(data)
j = json.dumps (data_list, ensure_ascii = False)
with open('data.json', 'w+') as f:
f.write(j)

Related

Split csv file into 2 list depending upon column name using python

I want to split csv file into 2 lists using column name
CSV file:
Molecule Name,SMILES
ZINC53 (Aspirin),CC(=O)Oc1ccccc1C(=O)O
ZINC7460 (Vatalanib),Clc1ccc(Nc2nnc(Cc3ccncc3)c3ccccc23)cc1
ZINC1493878 (Sorafenib),CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1
Code:
namelist = list()
smileslist = list()
with open('./file.csv', 'r') as f:
f = csv.reader(f, delimiter=',')
columns = next(f)
type_col1 = columns.index("Molecule Name")
type_col2 = columns.index("SMILES")
for column in f:
if type_col1 == 'Molecule Name':
namelist.append(column)
elif type_col2 == 'SMILES':
smileslist.append(column)
With pandas library you can do it as easily as :
import pandas as pd
df = pd.read_csv("./file.csv")
namelist = df["Molecule Name"].tolist()
smileslist = df["SMILES"].tolist()
print(namelist)
print(smileslist)
Or if you prefer using the csv reader you can do it as follow :
import csv
namelist = list()
smileslist = list()
with open("./file.csv", "r") as f:
f = csv.reader(f, delimiter=',')
columns = next(f)
index_col1 = columns.index("Molecule Name")
index_col2 = columns.index("SMILES")
for column in f:
namelist.append(column[index_col1])
smileslist.append(column[index_col2])

How to build specific format with open()?

Here's my code:
import glob
import itertools
import sys, os
import six
import csv
import numpy as np
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1
os.chdir("PATH/pdf")
extension = 'pdf'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
valeur = []
n = 1
for i in all_filenames:
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
valeur.append(values)
n = n+1
with open('test.csv','wb') as f:
for i in valeur:
f.write(i)
The goal here is to pick up some informations in PDF. Here's the output :
As you can see, the format is not pretty. I'm not very familiar with open() so I'm kind of stuck.
I would like to have distinct rows for each PDF with each informations having her own cell. Something like that :
Try to store the data from each pdf file in a separate list. And add this list to the valeur list which you have.
Use csv module as #martineau rightly suggested.
You can try the with below code.
import csv
valeur = []
#your code
n = 1
for i in all_filenames:
temp_list = []
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
temp_list.append(values)
n = n+1
valeur.append(temp_list)
#Finally when you have the required data, you can write to csv file like this.
with open('mycsv.csv', 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for val in valeur:
wr.writerow(val)
With this, the output would be like this

script to autosort point vaules not working

Trying to autosort point values from greasest to least, from .txt to .csv
Im trying to sort this: "email#email.com:stuffhere | PointsTotal = 1440"
this is what I currently got;
import csv
import glob
allTxtFiles = glob.glob("txt\\*.txt")
for t in allTxtFiles:
inputFile = open(t,'r').readlines()
endlines = []
sortedLines = []
for e in inputFile:
minNum = e.split("|")
minNum[4] = minNum[4].replace("PointsTotal = ",'')
minNum[4] = minNum[4].strip()
try:
minNum[4] = int(minNum[4])
sortedLines.append(minNum)
except:
endlines.append(minNum)
sortedLines.sort(key=lambda x: int(x[4]),reverse=True)
sortedLines.extend(endlines)
with open("sorted\\"+t.replace("txt\\",'')+".csv",'a+',newline="") as outfile:
writer = csv.writer(outfile)
for s in sortedLines:
writer.writerow(s)

convert xls to json in python

I am trying to convert xls to json and but when I am executing the code it's not giving me the data inside xls sheet, it's only giving me the json structure.
Below is the code which I am running, I am not able to understand what modification I should further make in this so that I can get a perfect json file.
Please note - input is in the form of binary stream and output is also in the form of a stream and not file.
#!/usr/bin/python -u
import sys
import xlrd
import simplejson
from collections import OrderedDict
wb = xlrd.open_workbook(file_contents=sys.stdin.read())
for sheet_index in range(wb.nsheets):
# print sheet_index
sh = wb.sheet_by_index(sheet_index)
# print "Processing sheet no ", sheet_index
attributes = sh.row_values(0)
#print attributes
rows_list = []
attr_list = []
# print attr_list[0]
for rownum in range(1,sh.nrows):
row_val_list = sh.row_values(rownum)
row_dict = OrderedDict()
for index in range(len(attr_list)):
row_dict[attr_list[index]] = row_val_list[index]
#row_dict['ID'] = row_val_list[0]
#row_dict['Name'] = row_val_list[1]
#rows_list.append(row_dict)
#json_data = simplejson.dumps(rows_list)
#sys.stdout.write(json_data)
rows_list.append(row_dict)
json_data = simplejson.dumps(rows_list)
sys.stdout.write(json_data)
# json_data = simplejson.dumps(rows_list)
#sys.stdout.write(json_data)
~
Any help is much appreciated
here is the correct working python code
#!/usr/bin/python -u
import sys
import xlrd
import simplejson
from collections import OrderedDict
wb = xlrd.open_workbook(file_contents=sys.stdin.read())
#print "Sheets are .... ", wb.nsheets
for sheet_index in range(wb.nsheets):
sh = wb.sheet_by_index(sheet_index)
if sh.nrows == 0:
continue
attr_list = sh.row_values(0)
rows_list = []
for rownum in range(1,sh.nrows):
row_values = sh.row_values(rownum)
row_dict = OrderedDict()
for index in range(len(attr_list)):
row_dict[attr_list[index]] = row_values[index]
rows_list.append(row_dict)
json_data = simplejson.dumps(rows_list)
sys.stdout.write(json_data)

How to convert multiple xlsx sheet to csv using python

I can able to convert xlsx to csv in the case of single excel sheet.
How can i do the same in the case of multiple sheet in single excel file?
I have tried:
workBook = xlrd.open_workbook(filePath)
sheet_names = workBook.sheet_names()
lenth = len(sheet_names)
for i in range(0,lenth):
sheet = workBook.sheet_by_name(sheet_names[i])
yourcsvFile = open(csvPath, 'wb')
wr = csv.writer(yourcsvFile, quoting=csv.QUOTE_ALL)
for rownum in xrange(sheet.nrows):
wr.writerow(sheet.row_values(rownum))
yourcsvFile.close()
Try this
import sys
import xlrd
import csv
filePath = sys.argv[1] # user input file
csvPath = sys.argv[2]
workBook = xlrd.open_workbook(filePath)
sheet_names = workBook.sheet_names()
list_sheet = []
lenth = len(sheet_names)
for i in range(0,lenth):
sheet = workBook.sheet_by_name(sheet_names[i])
list_sheet.append(sheet)
yourcsvFile = open(csvPath, 'wb')
wr = csv.writer(yourcsvFile, quoting=csv.QUOTE_ALL)
total_row = list_sheet[0].ncols
for k in xrange(0,1):
for rownum in xrange(list_sheet[k].nrows):
wr.writerow(list_sheet[k].row_values(rownum))
if len(sheet_names) > 1:
for k in xrange(1,len(list_sheet)):
if list_sheet[k].ncols != total_row:
continue
for rownum in xrange(1,list_sheet[k].nrows):
wr.writerow(list_sheet[k].row_values(rownum))
yourcsvFile.close()

Categories