script to autosort point vaules not working - python

Trying to autosort point values from greasest to least, from .txt to .csv
Im trying to sort this: "email#email.com:stuffhere | PointsTotal = 1440"
this is what I currently got;
import csv
import glob
allTxtFiles = glob.glob("txt\\*.txt")
for t in allTxtFiles:
inputFile = open(t,'r').readlines()
endlines = []
sortedLines = []
for e in inputFile:
minNum = e.split("|")
minNum[4] = minNum[4].replace("PointsTotal = ",'')
minNum[4] = minNum[4].strip()
try:
minNum[4] = int(minNum[4])
sortedLines.append(minNum)
except:
endlines.append(minNum)
sortedLines.sort(key=lambda x: int(x[4]),reverse=True)
sortedLines.extend(endlines)
with open("sorted\\"+t.replace("txt\\",'')+".csv",'a+',newline="") as outfile:
writer = csv.writer(outfile)
for s in sortedLines:
writer.writerow(s)

Related

Split csv file into 2 list depending upon column name using python

I want to split csv file into 2 lists using column name
CSV file:
Molecule Name,SMILES
ZINC53 (Aspirin),CC(=O)Oc1ccccc1C(=O)O
ZINC7460 (Vatalanib),Clc1ccc(Nc2nnc(Cc3ccncc3)c3ccccc23)cc1
ZINC1493878 (Sorafenib),CNC(=O)c1cc(Oc2ccc(NC(=O)Nc3ccc(Cl)c(C(F)(F)F)c3)cc2)ccn1
Code:
namelist = list()
smileslist = list()
with open('./file.csv', 'r') as f:
f = csv.reader(f, delimiter=',')
columns = next(f)
type_col1 = columns.index("Molecule Name")
type_col2 = columns.index("SMILES")
for column in f:
if type_col1 == 'Molecule Name':
namelist.append(column)
elif type_col2 == 'SMILES':
smileslist.append(column)
With pandas library you can do it as easily as :
import pandas as pd
df = pd.read_csv("./file.csv")
namelist = df["Molecule Name"].tolist()
smileslist = df["SMILES"].tolist()
print(namelist)
print(smileslist)
Or if you prefer using the csv reader you can do it as follow :
import csv
namelist = list()
smileslist = list()
with open("./file.csv", "r") as f:
f = csv.reader(f, delimiter=',')
columns = next(f)
index_col1 = columns.index("Molecule Name")
index_col2 = columns.index("SMILES")
for column in f:
namelist.append(column[index_col1])
smileslist.append(column[index_col2])

How to build specific format with open()?

Here's my code:
import glob
import itertools
import sys, os
import six
import csv
import numpy as np
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1
os.chdir("PATH/pdf")
extension = 'pdf'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
valeur = []
n = 1
for i in all_filenames:
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
valeur.append(values)
n = n+1
with open('test.csv','wb') as f:
for i in valeur:
f.write(i)
The goal here is to pick up some informations in PDF. Here's the output :
As you can see, the format is not pretty. I'm not very familiar with open() so I'm kind of stuck.
I would like to have distinct rows for each PDF with each informations having her own cell. Something like that :
Try to store the data from each pdf file in a separate list. And add this list to the valeur list which you have.
Use csv module as #martineau rightly suggested.
You can try the with below code.
import csv
valeur = []
#your code
n = 1
for i in all_filenames:
temp_list = []
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
temp_list.append(values)
n = n+1
valeur.append(temp_list)
#Finally when you have the required data, you can write to csv file like this.
with open('mycsv.csv', 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for val in valeur:
wr.writerow(val)
With this, the output would be like this

Only first IF statement is working! All other IF not working

Code is working! No errors! and also first IF is working!
But other IF statements are not working.
I am trying to open a CSV file and export a JSON file. So I am going through all rows in the CSV file and append data.
The problem is the code is going through the first IF only. so as result only 1 object is getting updated. All other IF statements need to be executed! Which they wont!
def make_json(File1, jsonFilePath):
data1 = []
with open('1.csv', encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
data1.append(rows)
  
    with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
            if data1[0]["Shift Pattern"] == "DA7": 
                data1[0]["shiftHourStart"] = "07:00:00"
                data1[0]["shiftHourEnd"] = "17:30:00"
                data1[0]["daysOfWeek"] = "[0,1,2,3]"
            if data1[0]["Shift Pattern"] == "DB7": 
                data1[0]["shiftHourStart"] = "07:00:00"
                data1[0]["shiftHourEnd"] = "17:30:00"
                data1[0]["daysOfWeek"] = "[3,4,5,6]"
            if data1[0]["Shift Pattern"] == "NB30":  
                data1[0]["shiftHourStart"] = "18:30:00"
                data1[0]["shiftHourEnd"] = "05:00:00"
                data1[0]["daysOfWeek"] = "[3,4,5,6]"
            if data1[0]["Shift Pattern"] == "S30":  
                data1[0]["shiftHourStart"] = "13:30:00"
                data1[0]["shiftHourEnd"] = "22:00:00"
                data1[0]["daysOfWeek"] = "[1,2,3,4,5]"
            jsonf.write(simplejson.dumps(data1, indent=4))
        
So you want to loop through the data1 list and check every object?
Then you are missing the loop:
def make_json(File1, jsonFilePath):
data1 = []
with open('1.csv', encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for rows in csvReader:
data1.append(rows)
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
for i in data1:
if i["Shift Pattern"] == "DA7":
i["shiftHourStart"] = "07:00:00"
i["shiftHourEnd"] = "17:30:00"
i["daysOfWeek"] = "[0,1,2,3]"
if i["Shift Pattern"] == "DB7":
i["shiftHourStart"] = "07:00:00"
i["shiftHourEnd"] = "17:30:00"
i["daysOfWeek"] = "[3,4,5,6]"
if i["Shift Pattern"] == "NB30":
i["shiftHourStart"] = "18:30:00"
i["shiftHourEnd"] = "05:00:00"
i["daysOfWeek"] = "[3,4,5,6]"
if i["Shift Pattern"] == "S30":
i["shiftHourStart"] = "13:30:00"
i["shiftHourEnd"] = "22:00:00"
i["daysOfWeek"] = "[1,2,3,4,5]"
jsonf.write(simplejson.dumps(data1, indent=4))

How can I write just one float item in csv?

In this question:
First I read the scores from the csv file and then
I saved an item in the following code in the lw list.
I want to write the lw list in a csv file.
How can I do this?
I read scores from a csv file called alaki.csv:
mandana,5,7,3,15
hamid,3,9,4,20,9,1,8,16,0,5,2,4,7,2,1
sina,19,10,19,6,8,14,3
sara,0,5,20,14
soheila,13,2,5,1,3,10,12,4,13,17,7,7
ali,1,9
sarvin,0,16,16,13,19,2,17,8
import csv
# For the average
from statistics import mean
import operator
from collections import Counter
def calculate_average_of_averages(input_file_name, output_file_name):
#output_file_name=chert.csv
with open(input_file_name) as d:
se = csv.reader(d)
l = {}
for ldf in se:
name = ldf[0]
lsd = mean([float(sd) for sd in ldf[1:]])
l[name] = lsd
with open(output_file_name,'w') as ff:
fd = csv.writer(ff)
a = list(l.values())
lw = []
m = mean(a)
lw.append(m)
calculate_average_of_averages('alaki.csv','chert.csv')
output in csv file:
8.401530612244898
please help me
How about this:
import csv
# For the average
from statistics import mean
import operator
from collections import Counter
def calculate_average_of_averages(input_file_name, output_file_name):
#output_file_name=chert.csv
with open(input_file_name) as d:
se = csv.reader(d)
l = {}
for ldf in se:
name = ldf[0]
lsd = mean([float(sd) for sd in ldf[1:]])
l[name] = lsd
m = mean(list(l.values()))
l["average_of_average"]=m
with open(output_file_name,'w') as ff:
for name,value in l.items():
ff.write("{},{}\n".format(name,value))
calculate_average_of_averages('alaki.csv','chert.csv')
output looks like:
mandana,7.5
hamid,6.066666666666666
sina,11.285714285714286
sara,9.75
soheila,7.833333333333333
ali,5.0
sarvin,11.375
average_of_average,8.401530612244898
to output just average_of_average
replace the write block:
with open(output_file_name,'w') as ff:
ff.write(l['average_of_average'])
You can use the pandas library by adding these 2 lines
import csv
import pandas as pd
# For the average
from statistics import mean
import operator
from collections import Counter
def calculate_average_of_averages(input_file_name, output_file_name):
with open(input_file_name) as d:
se = csv.reader(d)
l = {}
for ldf in se:
name = ldf[0]
lsd = mean([float(sd) for sd in ldf[1:]])
l[name] = lsd
a = list(l.values())
lw = []
m = mean(a)
lw.append(m)
pd.DataFrame(lw,columns=["yourColumn"]).to_csv(output_file_name+".csv")
calculate_average_of_averages('alaki.csv','chert.csv')
I am not sure if CSV writer is necessary to write just one line.
import csv
from statistics import mean
def calculate_mean_of_means(input_file, output_file):
with open(input_file, newline='') as csvfile:
csvreader = csv.reader(csvfile)
ls = {}
for row in csvreader:
str_to_int = [int(i) for i in row[1:]]
ls[row[0]] = str_to_int
total_means = 0
for score in ls.values():
total_means += mean(score)
mean_of_means = [total_means / len(ls)]
with open(output_file, 'w', newline='') as csvfile:
meanwriter = csv.writer(csvfile)
meanwriter.writerow(mean_of_means)
calculate_mean_of_means('alaki.csv', 'chert.csv')

How to use a variable in path?

I have the following code to read three components of two waves and extract each of them to a csv file.
I am still a beginner and so the code is very long, at least I don't want to type the wave name(EHMH011604150003.EW1 EHMH011604150003.NS1 ...) 6 times in my read part. how can I put the Variable that I defined as "name" there?
Any tips on making the code look smart is also appreciated.
Thank you
from pathlib import Path
import os
import numpy as np
import csv
#p =Path('D:/Jobs_2020/RJA/')
#p2 = p/'20160415波形'
#p3 = p2/'kik'
name = 'EHMH011604150003'
# Function 'getKiK-net'
def Convert2Acc(data):
tokens = data.split()
# Scale factor
(Scale, Factor) = tokens[tokens.index('Factor')+1].split('(gal)/')
# Strong motion
items = tokens[tokens.index('Memo.')+1:]
rdata = np.array(items, dtype=np.float64) # rdata: raw data
acc_gal = (rdata - rdata[0]) * float(Scale) / float(Factor)
return acc_gal # acc_gal: Acc. converted unit into gal
# Read data filess
rfile_EW1 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.EW1'
fr_EW1 = open(rfile_EW1, 'r')
EW1_gal = fr_EW1.read()
fr_EW1.close()
rfile_NS1 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.NS1'
fr_NS1 = open(rfile_NS1, 'r')
NS1_gal = fr_NS1.read()
fr_NS1.close()
rfile_UD1 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.UD1'
fr_UD1 = open(rfile_UD1, 'r')
UD1_gal = fr_UD1.read()
fr_UD1.close()
rfile_EW2 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.EW2'
fr_EW2 = open(rfile_EW2, 'r')
EW2_gal = fr_EW2.read()
fr_EW2.close()
rfile_NS2 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.NS2'
fr_NS2 = open(rfile_NS2, 'r')
NS2_gal = fr_NS2.read()
fr_NS2.close()
rfile_UD2 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.UD2'
fr_UD2 = open(rfile_UD2, 'r')
UD2_gal = fr_UD2.read()
fr_UD2.close()
# Store data in array
# _Acc: 2D Array
_Acc1 = [Convert2Acc(EW1_gal), Convert2Acc(NS1_gal), Convert2Acc(UD1_gal)]
Acc1 = np.array(_Acc1).T # Acc: Transposed 2D array to write to .csv file
_Acc2 = [Convert2Acc(EW2_gal), Convert2Acc(NS2_gal), Convert2Acc(UD2_gal)]
Acc2 = np.array(_Acc2).T # Acc: Transposed 2D array to write to .csv file
# Write to .csv file
with open(str(name)+'-1'+'.csv', 'w') as file:
writer = csv.writer(file, lineterminator='\n')
writer.writerows(Acc1)
with open(str(name)+'-2'+'.csv', 'w') as file:
writer = csv.writer(file, lineterminator='\n')
writer.writerows(Acc2)
Something like:
rfile_EW1 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\'+name+'.EW1'
should work.

Categories