Trying to autosort point values from greasest to least, from .txt to .csv
Im trying to sort this: "email#email.com:stuffhere | PointsTotal = 1440"
this is what I currently got;
import csv
import glob
allTxtFiles = glob.glob("txt\\*.txt")
for t in allTxtFiles:
inputFile = open(t,'r').readlines()
endlines = []
sortedLines = []
for e in inputFile:
minNum = e.split("|")
minNum[4] = minNum[4].replace("PointsTotal = ",'')
minNum[4] = minNum[4].strip()
try:
minNum[4] = int(minNum[4])
sortedLines.append(minNum)
except:
endlines.append(minNum)
sortedLines.sort(key=lambda x: int(x[4]),reverse=True)
sortedLines.extend(endlines)
with open("sorted\\"+t.replace("txt\\",'')+".csv",'a+',newline="") as outfile:
writer = csv.writer(outfile)
for s in sortedLines:
writer.writerow(s)
Related
I want to split csv file into 2 lists using column name
CSV file:
Molecule Name,SMILES
ZINC53 (Aspirin),CC(=O)Oc1ccccc1C(=O)O
ZINC7460 (Vatalanib),Clc1ccc(Nc2nnc(Cc3ccncc3)c3ccccc23)cc1
ZINC1493878 (Sorafenib),CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1
Code:
namelist = list()
smileslist = list()
with open('./file.csv', 'r') as f:
f = csv.reader(f, delimiter=',')
columns = next(f)
type_col1 = columns.index("Molecule Name")
type_col2 = columns.index("SMILES")
for column in f:
if type_col1 == 'Molecule Name':
namelist.append(column)
elif type_col2 == 'SMILES':
smileslist.append(column)
With pandas library you can do it as easily as :
import pandas as pd
df = pd.read_csv("./file.csv")
namelist = df["Molecule Name"].tolist()
smileslist = df["SMILES"].tolist()
print(namelist)
print(smileslist)
Or if you prefer using the csv reader you can do it as follow :
import csv
namelist = list()
smileslist = list()
with open("./file.csv", "r") as f:
f = csv.reader(f, delimiter=',')
columns = next(f)
index_col1 = columns.index("Molecule Name")
index_col2 = columns.index("SMILES")
for column in f:
namelist.append(column[index_col1])
smileslist.append(column[index_col2])
Here's my code:
import glob
import itertools
import sys, os
import six
import csv
import numpy as np
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1
os.chdir("PATH/pdf")
extension = 'pdf'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
valeur = []
n = 1
for i in all_filenames:
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
valeur.append(values)
n = n+1
with open('test.csv','wb') as f:
for i in valeur:
f.write(i)
The goal here is to pick up some informations in PDF. Here's the output :
As you can see, the format is not pretty. I'm not very familiar with open() so I'm kind of stuck.
I would like to have distinct rows for each PDF with each informations having her own cell. Something like that :
Try to store the data from each pdf file in a separate list. And add this list to the valeur list which you have.
Use csv module as #martineau rightly suggested.
You can try the with below code.
import csv
valeur = []
#your code
n = 1
for i in all_filenames:
temp_list = []
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
temp_list.append(values)
n = n+1
valeur.append(temp_list)
#Finally when you have the required data, you can write to csv file like this.
with open('mycsv.csv', 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for val in valeur:
wr.writerow(val)
With this, the output would be like this
Code is working! No errors! and also first IF is working!
But other IF statements are not working.
I am trying to open a CSV file and export a JSON file. So I am going through all rows in the CSV file and append data.
The problem is the code is going through the first IF only. so as result only 1 object is getting updated. All other IF statements need to be executed! Which they wont!
def make_json(File1, jsonFilePath):
data1 = []
with open('1.csv', encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
data1.append(rows)
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
if data1[0]["Shift Pattern"] == "DA7":
data1[0]["shiftHourStart"] = "07:00:00"
data1[0]["shiftHourEnd"] = "17:30:00"
data1[0]["daysOfWeek"] = "[0,1,2,3]"
if data1[0]["Shift Pattern"] == "DB7":
data1[0]["shiftHourStart"] = "07:00:00"
data1[0]["shiftHourEnd"] = "17:30:00"
data1[0]["daysOfWeek"] = "[3,4,5,6]"
if data1[0]["Shift Pattern"] == "NB30":
data1[0]["shiftHourStart"] = "18:30:00"
data1[0]["shiftHourEnd"] = "05:00:00"
data1[0]["daysOfWeek"] = "[3,4,5,6]"
if data1[0]["Shift Pattern"] == "S30":
data1[0]["shiftHourStart"] = "13:30:00"
data1[0]["shiftHourEnd"] = "22:00:00"
data1[0]["daysOfWeek"] = "[1,2,3,4,5]"
jsonf.write(simplejson.dumps(data1, indent=4))
So you want to loop through the data1 list and check every object?
Then you are missing the loop:
def make_json(File1, jsonFilePath):
data1 = []
with open('1.csv', encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
data1.append(rows)
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
for i in data1:
if i["Shift Pattern"] == "DA7":
i["shiftHourStart"] = "07:00:00"
i["shiftHourEnd"] = "17:30:00"
i["daysOfWeek"] = "[0,1,2,3]"
if i["Shift Pattern"] == "DB7":
i["shiftHourStart"] = "07:00:00"
i["shiftHourEnd"] = "17:30:00"
i["daysOfWeek"] = "[3,4,5,6]"
if i["Shift Pattern"] == "NB30":
i["shiftHourStart"] = "18:30:00"
i["shiftHourEnd"] = "05:00:00"
i["daysOfWeek"] = "[3,4,5,6]"
if i["Shift Pattern"] == "S30":
i["shiftHourStart"] = "13:30:00"
i["shiftHourEnd"] = "22:00:00"
i["daysOfWeek"] = "[1,2,3,4,5]"
jsonf.write(simplejson.dumps(data1, indent=4))
In this question:
First I read the scores from the csv file and then
I saved an item in the following code in the lw list.
I want to write the lw list in a csv file.
How can I do this?
I read scores from a csv file called alaki.csv:
mandana,5,7,3,15
hamid,3,9,4,20,9,1,8,16,0,5,2,4,7,2,1
sina,19,10,19,6,8,14,3
sara,0,5,20,14
soheila,13,2,5,1,3,10,12,4,13,17,7,7
ali,1,9
sarvin,0,16,16,13,19,2,17,8
import csv
# For the average
from statistics import mean
import operator
from collections import Counter
def calculate_average_of_averages(input_file_name, output_file_name):
#output_file_name=chert.csv
with open(input_file_name) as d:
se = csv.reader(d)
l = {}
for ldf in se:
name = ldf[0]
lsd = mean([float(sd) for sd in ldf[1:]])
l[name] = lsd
with open(output_file_name,'w') as ff:
fd = csv.writer(ff)
a = list(l.values())
lw = []
m = mean(a)
lw.append(m)
calculate_average_of_averages('alaki.csv','chert.csv')
output in csv file:
8.401530612244898
please help me
How about this:
import csv
# For the average
from statistics import mean
import operator
from collections import Counter
def calculate_average_of_averages(input_file_name, output_file_name):
#output_file_name=chert.csv
with open(input_file_name) as d:
se = csv.reader(d)
l = {}
for ldf in se:
name = ldf[0]
lsd = mean([float(sd) for sd in ldf[1:]])
l[name] = lsd
m = mean(list(l.values()))
l["average_of_average"]=m
with open(output_file_name,'w') as ff:
for name,value in l.items():
ff.write("{},{}\n".format(name,value))
calculate_average_of_averages('alaki.csv','chert.csv')
output looks like:
mandana,7.5
hamid,6.066666666666666
sina,11.285714285714286
sara,9.75
soheila,7.833333333333333
ali,5.0
sarvin,11.375
average_of_average,8.401530612244898
to output just average_of_average
replace the write block:
with open(output_file_name,'w') as ff:
ff.write(l['average_of_average'])
You can use the pandas library by adding these 2 lines
import csv
import pandas as pd
# For the average
from statistics import mean
import operator
from collections import Counter
def calculate_average_of_averages(input_file_name, output_file_name):
with open(input_file_name) as d:
se = csv.reader(d)
l = {}
for ldf in se:
name = ldf[0]
lsd = mean([float(sd) for sd in ldf[1:]])
l[name] = lsd
a = list(l.values())
lw = []
m = mean(a)
lw.append(m)
pd.DataFrame(lw,columns=["yourColumn"]).to_csv(output_file_name+".csv")
calculate_average_of_averages('alaki.csv','chert.csv')
I am not sure if CSV writer is necessary to write just one line.
import csv
from statistics import mean
def calculate_mean_of_means(input_file, output_file):
with open(input_file, newline='') as csvfile:
csvreader = csv.reader(csvfile)
ls = {}
for row in csvreader:
str_to_int = [int(i) for i in row[1:]]
ls[row[0]] = str_to_int
total_means = 0
for score in ls.values():
total_means += mean(score)
mean_of_means = [total_means / len(ls)]
with open(output_file, 'w', newline='') as csvfile:
meanwriter = csv.writer(csvfile)
meanwriter.writerow(mean_of_means)
calculate_mean_of_means('alaki.csv', 'chert.csv')
I have the following code to read three components of two waves and extract each of them to a csv file.
I am still a beginner and so the code is very long, at least I don't want to type the wave name(EHMH011604150003.EW1 EHMH011604150003.NS1 ...) 6 times in my read part. how can I put the Variable that I defined as "name" there?
Any tips on making the code look smart is also appreciated.
Thank you
from pathlib import Path
import os
import numpy as np
import csv
#p =Path('D:/Jobs_2020/RJA/')
#p2 = p/'20160415波形'
#p3 = p2/'kik'
name = 'EHMH011604150003'
# Function 'getKiK-net'
def Convert2Acc(data):
tokens = data.split()
# Scale factor
(Scale, Factor) = tokens[tokens.index('Factor')+1].split('(gal)/')
# Strong motion
items = tokens[tokens.index('Memo.')+1:]
rdata = np.array(items, dtype=np.float64) # rdata: raw data
acc_gal = (rdata - rdata[0]) * float(Scale) / float(Factor)
return acc_gal # acc_gal: Acc. converted unit into gal
# Read data filess
rfile_EW1 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.EW1'
fr_EW1 = open(rfile_EW1, 'r')
EW1_gal = fr_EW1.read()
fr_EW1.close()
rfile_NS1 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.NS1'
fr_NS1 = open(rfile_NS1, 'r')
NS1_gal = fr_NS1.read()
fr_NS1.close()
rfile_UD1 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.UD1'
fr_UD1 = open(rfile_UD1, 'r')
UD1_gal = fr_UD1.read()
fr_UD1.close()
rfile_EW2 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.EW2'
fr_EW2 = open(rfile_EW2, 'r')
EW2_gal = fr_EW2.read()
fr_EW2.close()
rfile_NS2 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.NS2'
fr_NS2 = open(rfile_NS2, 'r')
NS2_gal = fr_NS2.read()
fr_NS2.close()
rfile_UD2 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.UD2'
fr_UD2 = open(rfile_UD2, 'r')
UD2_gal = fr_UD2.read()
fr_UD2.close()
# Store data in array
# _Acc: 2D Array
_Acc1 = [Convert2Acc(EW1_gal), Convert2Acc(NS1_gal), Convert2Acc(UD1_gal)]
Acc1 = np.array(_Acc1).T # Acc: Transposed 2D array to write to .csv file
_Acc2 = [Convert2Acc(EW2_gal), Convert2Acc(NS2_gal), Convert2Acc(UD2_gal)]
Acc2 = np.array(_Acc2).T # Acc: Transposed 2D array to write to .csv file
# Write to .csv file
with open(str(name)+'-1'+'.csv', 'w') as file:
writer = csv.writer(file, lineterminator='\n')
writer.writerows(Acc1)
with open(str(name)+'-2'+'.csv', 'w') as file:
writer = csv.writer(file, lineterminator='\n')
writer.writerows(Acc2)
Something like:
rfile_EW1 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\'+name+'.EW1'
should work.