How to use a variable in path? - python

I have the following code to read three components of two waves and extract each of them to a csv file.
I am still a beginner and so the code is very long, at least I don't want to type the wave name(EHMH011604150003.EW1 EHMH011604150003.NS1 ...) 6 times in my read part. how can I put the Variable that I defined as "name" there?
Any tips on making the code look smart is also appreciated.
Thank you
from pathlib import Path
import os
import numpy as np
import csv
#p =Path('D:/Jobs_2020/RJA/')
#p2 = p/'20160415波形'
#p3 = p2/'kik'
name = 'EHMH011604150003'
# Function 'getKiK-net'
def Convert2Acc(data):
tokens = data.split()
# Scale factor
(Scale, Factor) = tokens[tokens.index('Factor')+1].split('(gal)/')
# Strong motion
items = tokens[tokens.index('Memo.')+1:]
rdata = np.array(items, dtype=np.float64) # rdata: raw data
acc_gal = (rdata - rdata[0]) * float(Scale) / float(Factor)
return acc_gal # acc_gal: Acc. converted unit into gal
# Read data filess
rfile_EW1 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.EW1'
fr_EW1 = open(rfile_EW1, 'r')
EW1_gal = fr_EW1.read()
fr_EW1.close()
rfile_NS1 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.NS1'
fr_NS1 = open(rfile_NS1, 'r')
NS1_gal = fr_NS1.read()
fr_NS1.close()
rfile_UD1 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.UD1'
fr_UD1 = open(rfile_UD1, 'r')
UD1_gal = fr_UD1.read()
fr_UD1.close()
rfile_EW2 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.EW2'
fr_EW2 = open(rfile_EW2, 'r')
EW2_gal = fr_EW2.read()
fr_EW2.close()
rfile_NS2 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.NS2'
fr_NS2 = open(rfile_NS2, 'r')
NS2_gal = fr_NS2.read()
fr_NS2.close()
rfile_UD2 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\EHMH011604150003.UD2'
fr_UD2 = open(rfile_UD2, 'r')
UD2_gal = fr_UD2.read()
fr_UD2.close()
# Store data in array
# _Acc: 2D Array
_Acc1 = [Convert2Acc(EW1_gal), Convert2Acc(NS1_gal), Convert2Acc(UD1_gal)]
Acc1 = np.array(_Acc1).T # Acc: Transposed 2D array to write to .csv file
_Acc2 = [Convert2Acc(EW2_gal), Convert2Acc(NS2_gal), Convert2Acc(UD2_gal)]
Acc2 = np.array(_Acc2).T # Acc: Transposed 2D array to write to .csv file
# Write to .csv file
with open(str(name)+'-1'+'.csv', 'w') as file:
writer = csv.writer(file, lineterminator='\n')
writer.writerows(Acc1)
with open(str(name)+'-2'+'.csv', 'w') as file:
writer = csv.writer(file, lineterminator='\n')
writer.writerows(Acc2)

Something like:
rfile_EW1 = 'D:\\Jobs_2020\\RJA\\20160415波形\\kik\\'+name+'.EW1'
should work.

Related

TypeError: 'numpy.uint8' object is not iterable and put csv file didn't work

im try to put my loop numpy data Modus_citra into csv file, but i've tried using numpy and using normal write csv didn't work
import glob
import cv2
from os import listdir
from os.path import isfile, join
import os
import numpy as np
from sklearn.utils.validation import check_array
import csv
import pandas as pd
def find_mode(np_array) :
vals,counts = np.unique(np_array, return_counts=True)
index = np.argmax(counts)
return(vals[index])
folder = ("C:/Users/ROG FLOW/Desktop/Untuk SIDANG TA/Sudah Aman/testbikincsv/folderdatacitra/*.jpg")
for file in glob.glob(folder):
image = cv2.imread(file)
rows = image.shape[0]
cols = image.shape[1]
middlex = cols/2
middley = rows/2
middle = [middlex,middley]
titikawalx = middlex - 10
titikawaly = middley - 10
titikakhirx = middlex + 10
titikakhiry = middley + 10
crop = image[int(titikawaly):int(titikakhiry), int(titikawalx):int(titikakhirx)]
c = cv2.cvtColor(crop, cv2.COLOR_BGR2HSV)
H,S,V = cv2.split(c)
Modus_citra = (find_mode(H))
data = (Modus_citra)
with open("foo.csv", 'w') as file:
writer = csv.writer(file)
writer.writerows(data)
error = 'numpy.uint8' object is not iterable
foo.csv = from pictures
60
40
19
11
please can someone help me ? i Appreciate every help
According to the edit, you can try:
folder = "C:/Users/ROG FLOW/Desktop/Untuk SIDANG TA/Sudah Aman/testbikincsv/folderdatacitra/*.jpg"
all_data = []
for file in glob.glob(folder):
# ...
Modus_citra = find_mode(H)
all_data.append(Modus_citra) # <-- add the numpy.uint8 to the all_data list
# after the loop write the data to the CSV file:
with open("foo.csv", "w") as file:
writer = csv.writer(file)
for data in all_data:
writer.writerow([data])

Writing to a CSV without pandas

I am writing a list of numbers to a CSV.
However it is putting each number into a different cell.
I can not figure out why.
What I tried
I was using csv.writerow() which puts them all into the same row.
But I need them in columns.
Attempting to fix that I switched to csv.writerows() which puts them columns, but every single number is in a new row separated from the next.
Does anyone know why this is?
Code
class readingJ1Average:
def readingJ1(filepath):
with open(filepath, 'r') as f:
j1 = f.readlines()[46:47]
#Coverting list to a string
j1_join = ('\n'.join(j1))
#Pulling only average
j1_value = j1_join[5:16]
#Appending to a list
j1_list.append(j1_value)
def readingJ2(filepath):
with open(filepath, 'r') as f:
j2 = f.readlines()[47:48]
print(j2)
#Coverting list to a string
j2_join = ('\n'.join(j2))
#Pulling only average
j2_value = j2_join[5:16]
#Appending to a list
j2_list.append(j2_value)
def readingJ3(filepath):
with open(filepath, 'r') as f:
j3 = f.readlines()[48:49]
#Coverting list to a string
j3_join = ('\n'.join(j3))
#Pulling only average
j3_value = j3_join[5:16]
#Appending to a list
j3_list.append(j3_value)
def readingJ4(filepath):
with open(filepath, 'r') as f:
j4 = f.readlines()[48:49]
#Coverting list to a string
j4_join = ('\n'.join(j4))
#Pulling only average
j4_value = j4_join[5:16]
#Appending to a list
j4_list.append(j4_value)
def readingJ5(filepath):
with open(filepath, 'r') as f:
j5 = f.readlines()[49:50]
#Coverting list to a string
j5_join = ('\n'.join(j5))
#Pulling only average
j5_value = j5_join[5:16]
#Appending to a list
j5_list.append(j5_value)
def readingJ6(filepath):
with open(filepath, 'r') as f:
j6 = f.readlines()[50:51]
#Coverting list to a string
j6_join = ('\n'.join(j6))
#Pulling only average
j6_value = j6_join[5:16]
#Appending to a list
j6_list.append(j6_value)
def readingJ7(filepath):
with open(filepath, 'r') as f:
j7 = f.readlines()[51:52]
#Coverting list to a string
j7_join = ('\n'.join(j7))
#Pulling only average
j7_value = j7_join[5:16]
#Appending to a list
j7_list.append(j7_value)
#Beginning main code
j1_list = []
j2_list = []
j3_list = []
j4_list = []
j5_list = []
j6_list = []
j7_list = []
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ1(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ2(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ3(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ4(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ5(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ6(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ7(filepath)
with open('C:/Users/DunningJ3/Desktop/sample.csv', 'w') as wf:
write = csv.writer(wf)
write.writerows(j1_list)
#TXT file to Excel
The fast answer is that you need to convert the row to string instead on keeping it as a list or to use a matrix. But first you need to keep things simple, cleaning all code smells and following best practices, otherwise will be hard to figure out a solution.
The csv.writerows() is expecting a list of rows but you want to transpose them, so we can solve this by using a matrix or an array of string. For both the new item (a list of numbers or a string) is generated per row of the original csv taking the same position.
Let's say the original csv is "A" and contains items in the form of "a(ij)". You will build a new "A' " where it's items are "a'(ji)" and csv.writerows() is expecting:
[
[a'(00), a'(01), ..., a'(0i)]
[a'(10), a'(11), ..., a'(1i)]
...
[a'(j0), a'(j1), ..., a'(ji)
]
This is kind transposing a matrix btw
import csv
matrix = []
def init_matrix(total_lines):
for i in range(total_lines):
matrix.append([])
def readAll(filepath, csv_separator):
with open(filepath, 'r') as f:
lines = f.readlines()
total_rows = len(lines)
total_cols = len(lines[0].split(csv_separator))
print('Total Rows ', total_rows)
print('Total Cols ', total_cols)
init_matrix(total_cols)
for j in range(total_rows):
line = lines[j].rstrip()
elements = line.split(csv_separator)
for i in range(total_cols):
matrix[i].append(elements[i])
def main():
filepath = f"{'test.csv'}"
readAll(filepath, ',')
with open('result.csv', 'w') as wf:
write = csv.writer(wf)
write.writerows(matrix)
main()
And here the sample test.csv file
a,1,2,3,4,5,6,7,8,9,0
b,1,2,3,4,5,6,7,8,9,0
c,1,2,3,4,5,6,7,8,9,0
d,1,2,3,4,5,6,7,8,9,0
e,1,2,3,4,5,6,7,8,9,0
f,1,2,3,4,5,6,7,8,9,0
g,1,2,3,4,5,6,7,8,9,0
The output will be
a,b,c,d,e,f,g
1,1,1,1,1,1,1
2,2,2,2,2,2,2
3,3,3,3,3,3,3
4,4,4,4,4,4,4
5,5,5,5,5,5,5
6,6,6,6,6,6,6
7,7,7,7,7,7,7
8,8,8,8,8,8,8
9,9,9,9,9,9,9
0,0,0,0,0,0,0

How to build specific format with open()?

Here's my code:
import glob
import itertools
import sys, os
import six
import csv
import numpy as np
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1
os.chdir("PATH/pdf")
extension = 'pdf'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
valeur = []
n = 1
for i in all_filenames:
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
valeur.append(values)
n = n+1
with open('test.csv','wb') as f:
for i in valeur:
f.write(i)
The goal here is to pick up some informations in PDF. Here's the output :
As you can see, the format is not pretty. I'm not very familiar with open() so I'm kind of stuck.
I would like to have distinct rows for each PDF with each informations having her own cell. Something like that :
Try to store the data from each pdf file in a separate list. And add this list to the valeur list which you have.
Use csv module as #martineau rightly suggested.
You can try the with below code.
import csv
valeur = []
#your code
n = 1
for i in all_filenames:
temp_list = []
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
temp_list.append(values)
n = n+1
valeur.append(temp_list)
#Finally when you have the required data, you can write to csv file like this.
with open('mycsv.csv', 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for val in valeur:
wr.writerow(val)
With this, the output would be like this

script to autosort point vaules not working

Trying to autosort point values from greasest to least, from .txt to .csv
Im trying to sort this: "email#email.com:stuffhere | PointsTotal = 1440"
this is what I currently got;
import csv
import glob
allTxtFiles = glob.glob("txt\\*.txt")
for t in allTxtFiles:
inputFile = open(t,'r').readlines()
endlines = []
sortedLines = []
for e in inputFile:
minNum = e.split("|")
minNum[4] = minNum[4].replace("PointsTotal = ",'')
minNum[4] = minNum[4].strip()
try:
minNum[4] = int(minNum[4])
sortedLines.append(minNum)
except:
endlines.append(minNum)
sortedLines.sort(key=lambda x: int(x[4]),reverse=True)
sortedLines.extend(endlines)
with open("sorted\\"+t.replace("txt\\",'')+".csv",'a+',newline="") as outfile:
writer = csv.writer(outfile)
for s in sortedLines:
writer.writerow(s)

Defining a list within the code versus reading it from a file

I am trying to count the number of specific words in a given report. Does anyone know why defining a list within the code makes the second part of the following code run faster than reading the list from a file? Is there a solution? The list contains the same words is a lot longer than two words in the following example.
# Example code: Within code list
import csv
import glob
import re
import time
TARGET_FILES = r'C:/Users/s170760/Desktop/Reports_Cleaned/*.*'
OUTPUT_FILE = r'C:/Users/s170760/Desktop/Parser.csv'
OUTPUT_FIELDS = ['file name', 'create']
create = {'agile', 'skills'}
def main():
f_out = open(OUTPUT_FILE, 'w')
wr = csv.writer(f_out, lineterminator='\n')
wr.writerow(OUTPUT_FIELDS)
file_list = glob.glob(TARGET_FILES)
for file in file_list:
print(file)
with open(file, 'r', encoding='UTF-8', errors='ignore') as f_in:
doc = f_in.read()
doc = doc.lower()
output_data = get_data(doc)
output_data[0] = file
wr.writerow(output_data)
def get_data(doc):
_odata = [0] * 2
tokens = re.findall('\w(?:[-\w]*\w)?', doc)
for token in tokens:
if token in create:
_odata[1] += 1
return _odata
Here is the other way:
# Example code: Reading list from a file
import csv
import glob
import re
import time
TARGET_FILES = r'C:/Users/s170760/Desktop/Reports_Cleaned/*.*'
OUTPUT_FILE = r'C:/Users/s170760/Desktop/Parser.csv'
OUTPUT_FIELDS = ['file name', 'create']
create = open('C:/Users/s170760/Desktop/Create.txt', 'r').read().splitlines()
def main():
f_out = open(OUTPUT_FILE, 'w')
wr = csv.writer(f_out, lineterminator='\n')
wr.writerow(OUTPUT_FIELDS)
file_list = glob.glob(TARGET_FILES)
for file in file_list:
print(file)
with open(file, 'r', encoding='UTF-8', errors='ignore') as f_in:
doc = f_in.read()
doc = doc.lower()
output_data = get_data(doc)
output_data[0] = file
wr.writerow(output_data)
def get_data(doc):
_odata = [0] * 2
tokens = re.findall('\w(?:[-\w]*\w)?', doc)
for token in tokens:
if token in create:
_odata[1] += 1
return _odata
As pointed out by Mark in the comments, the first code snippet uses a set of strings, while the second code snippet loads a file into a list of strings.
Why sets are faster than lists in this use case, is well explained in this Stack Overflow answer. Parsing the output of open to a set can indeed solve your problem.
So replace:
create = open('C:/Users/s170760/Desktop/Create.txt', 'r').read().splitlines()
With:
create = set(open('C:/Users/s170760/Desktop/Create.txt', 'r').read().splitlines())

Categories