im try to put my loop numpy data Modus_citra into csv file, but i've tried using numpy and using normal write csv didn't work
import glob
import cv2
from os import listdir
from os.path import isfile, join
import os
import numpy as np
from sklearn.utils.validation import check_array
import csv
import pandas as pd
def find_mode(np_array) :
vals,counts = np.unique(np_array, return_counts=True)
index = np.argmax(counts)
return(vals[index])
folder = ("C:/Users/ROG FLOW/Desktop/Untuk SIDANG TA/Sudah Aman/testbikincsv/folderdatacitra/*.jpg")
for file in glob.glob(folder):
image = cv2.imread(file)
rows = image.shape[0]
cols = image.shape[1]
middlex = cols/2
middley = rows/2
middle = [middlex,middley]
titikawalx = middlex - 10
titikawaly = middley - 10
titikakhirx = middlex + 10
titikakhiry = middley + 10
crop = image[int(titikawaly):int(titikakhiry), int(titikawalx):int(titikakhirx)]
c = cv2.cvtColor(crop, cv2.COLOR_BGR2HSV)
H,S,V = cv2.split(c)
Modus_citra = (find_mode(H))
data = (Modus_citra)
with open("foo.csv", 'w') as file:
writer = csv.writer(file)
writer.writerows(data)
error = 'numpy.uint8' object is not iterable
foo.csv = from pictures
60
40
19
11
please can someone help me ? i Appreciate every help
According to the edit, you can try:
folder = "C:/Users/ROG FLOW/Desktop/Untuk SIDANG TA/Sudah Aman/testbikincsv/folderdatacitra/*.jpg"
all_data = []
for file in glob.glob(folder):
# ...
Modus_citra = find_mode(H)
all_data.append(Modus_citra) # <-- add the numpy.uint8 to the all_data list
# after the loop write the data to the CSV file:
with open("foo.csv", "w") as file:
writer = csv.writer(file)
for data in all_data:
writer.writerow([data])
I am writing a list of numbers to a CSV.
However it is putting each number into a different cell.
I can not figure out why.
What I tried
I was using csv.writerow() which puts them all into the same row.
But I need them in columns.
Attempting to fix that I switched to csv.writerows() which puts them columns, but every single number is in a new row separated from the next.
Does anyone know why this is?
Code
class readingJ1Average:
def readingJ1(filepath):
with open(filepath, 'r') as f:
j1 = f.readlines()[46:47]
#Coverting list to a string
j1_join = ('\n'.join(j1))
#Pulling only average
j1_value = j1_join[5:16]
#Appending to a list
j1_list.append(j1_value)
def readingJ2(filepath):
with open(filepath, 'r') as f:
j2 = f.readlines()[47:48]
print(j2)
#Coverting list to a string
j2_join = ('\n'.join(j2))
#Pulling only average
j2_value = j2_join[5:16]
#Appending to a list
j2_list.append(j2_value)
def readingJ3(filepath):
with open(filepath, 'r') as f:
j3 = f.readlines()[48:49]
#Coverting list to a string
j3_join = ('\n'.join(j3))
#Pulling only average
j3_value = j3_join[5:16]
#Appending to a list
j3_list.append(j3_value)
def readingJ4(filepath):
with open(filepath, 'r') as f:
j4 = f.readlines()[48:49]
#Coverting list to a string
j4_join = ('\n'.join(j4))
#Pulling only average
j4_value = j4_join[5:16]
#Appending to a list
j4_list.append(j4_value)
def readingJ5(filepath):
with open(filepath, 'r') as f:
j5 = f.readlines()[49:50]
#Coverting list to a string
j5_join = ('\n'.join(j5))
#Pulling only average
j5_value = j5_join[5:16]
#Appending to a list
j5_list.append(j5_value)
def readingJ6(filepath):
with open(filepath, 'r') as f:
j6 = f.readlines()[50:51]
#Coverting list to a string
j6_join = ('\n'.join(j6))
#Pulling only average
j6_value = j6_join[5:16]
#Appending to a list
j6_list.append(j6_value)
def readingJ7(filepath):
with open(filepath, 'r') as f:
j7 = f.readlines()[51:52]
#Coverting list to a string
j7_join = ('\n'.join(j7))
#Pulling only average
j7_value = j7_join[5:16]
#Appending to a list
j7_list.append(j7_value)
#Beginning main code
j1_list = []
j2_list = []
j3_list = []
j4_list = []
j5_list = []
j6_list = []
j7_list = []
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ1(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ2(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ3(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ4(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ5(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ6(filepath)
for file in os.listdir():
#check if file is in text format or not
if file.endswith(".ls"):
filepath = f"{path}\{file}"
#calling the read function
readingJ1Average.readingJ7(filepath)
with open('C:/Users/DunningJ3/Desktop/sample.csv', 'w') as wf:
write = csv.writer(wf)
write.writerows(j1_list)
#TXT file to Excel
The fast answer is that you need to convert the row to string instead on keeping it as a list or to use a matrix. But first you need to keep things simple, cleaning all code smells and following best practices, otherwise will be hard to figure out a solution.
The csv.writerows() is expecting a list of rows but you want to transpose them, so we can solve this by using a matrix or an array of string. For both the new item (a list of numbers or a string) is generated per row of the original csv taking the same position.
Let's say the original csv is "A" and contains items in the form of "a(ij)". You will build a new "A' " where it's items are "a'(ji)" and csv.writerows() is expecting:
[
[a'(00), a'(01), ..., a'(0i)]
[a'(10), a'(11), ..., a'(1i)]
...
[a'(j0), a'(j1), ..., a'(ji)
]
This is kind transposing a matrix btw
import csv
matrix = []
def init_matrix(total_lines):
for i in range(total_lines):
matrix.append([])
def readAll(filepath, csv_separator):
with open(filepath, 'r') as f:
lines = f.readlines()
total_rows = len(lines)
total_cols = len(lines[0].split(csv_separator))
print('Total Rows ', total_rows)
print('Total Cols ', total_cols)
init_matrix(total_cols)
for j in range(total_rows):
line = lines[j].rstrip()
elements = line.split(csv_separator)
for i in range(total_cols):
matrix[i].append(elements[i])
def main():
filepath = f"{'test.csv'}"
readAll(filepath, ',')
with open('result.csv', 'w') as wf:
write = csv.writer(wf)
write.writerows(matrix)
main()
And here the sample test.csv file
a,1,2,3,4,5,6,7,8,9,0
b,1,2,3,4,5,6,7,8,9,0
c,1,2,3,4,5,6,7,8,9,0
d,1,2,3,4,5,6,7,8,9,0
e,1,2,3,4,5,6,7,8,9,0
f,1,2,3,4,5,6,7,8,9,0
g,1,2,3,4,5,6,7,8,9,0
The output will be
a,b,c,d,e,f,g
1,1,1,1,1,1,1
2,2,2,2,2,2,2
3,3,3,3,3,3,3
4,4,4,4,4,4,4
5,5,5,5,5,5,5
6,6,6,6,6,6,6
7,7,7,7,7,7,7
8,8,8,8,8,8,8
9,9,9,9,9,9,9
0,0,0,0,0,0,0
Here's my code:
import glob
import itertools
import sys, os
import six
import csv
import numpy as np
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1
os.chdir("PATH/pdf")
extension = 'pdf'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
valeur = []
n = 1
for i in all_filenames:
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
valeur.append(values)
n = n+1
with open('test.csv','wb') as f:
for i in valeur:
f.write(i)
The goal here is to pick up some informations in PDF. Here's the output :
As you can see, the format is not pretty. I'm not very familiar with open() so I'm kind of stuck.
I would like to have distinct rows for each PDF with each informations having her own cell. Something like that :
Try to store the data from each pdf file in a separate list. And add this list to the valeur list which you have.
Use csv module as #martineau rightly suggested.
You can try the with below code.
import csv
valeur = []
#your code
n = 1
for i in all_filenames:
temp_list = []
fp = open(i, "rb")
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog["AcroForm"])["Fields"]
for i in fields:
field = resolve1(i)
name, value = field.get("T"), field.get("V")
filehehe = "{0}:{1}".format(name,value)
values = resolve1(value)
names = resolve1(name)
temp_list.append(values)
n = n+1
valeur.append(temp_list)
#Finally when you have the required data, you can write to csv file like this.
with open('mycsv.csv', 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for val in valeur:
wr.writerow(val)
With this, the output would be like this
I am trying to count the number of specific words in a given report. Does anyone know why defining a list within the code makes the second part of the following code run faster than reading the list from a file? Is there a solution? The list contains the same words is a lot longer than two words in the following example.
# Example code: Within code list
import csv
import glob
import re
import time
TARGET_FILES = r'C:/Users/s170760/Desktop/Reports_Cleaned/*.*'
OUTPUT_FILE = r'C:/Users/s170760/Desktop/Parser.csv'
OUTPUT_FIELDS = ['file name', 'create']
create = {'agile', 'skills'}
def main():
f_out = open(OUTPUT_FILE, 'w')
wr = csv.writer(f_out, lineterminator='\n')
wr.writerow(OUTPUT_FIELDS)
file_list = glob.glob(TARGET_FILES)
for file in file_list:
print(file)
with open(file, 'r', encoding='UTF-8', errors='ignore') as f_in:
doc = f_in.read()
doc = doc.lower()
output_data = get_data(doc)
output_data[0] = file
wr.writerow(output_data)
def get_data(doc):
_odata = [0] * 2
tokens = re.findall('\w(?:[-\w]*\w)?', doc)
for token in tokens:
if token in create:
_odata[1] += 1
return _odata
Here is the other way:
# Example code: Reading list from a file
import csv
import glob
import re
import time
TARGET_FILES = r'C:/Users/s170760/Desktop/Reports_Cleaned/*.*'
OUTPUT_FILE = r'C:/Users/s170760/Desktop/Parser.csv'
OUTPUT_FIELDS = ['file name', 'create']
create = open('C:/Users/s170760/Desktop/Create.txt', 'r').read().splitlines()
def main():
f_out = open(OUTPUT_FILE, 'w')
wr = csv.writer(f_out, lineterminator='\n')
wr.writerow(OUTPUT_FIELDS)
file_list = glob.glob(TARGET_FILES)
for file in file_list:
print(file)
with open(file, 'r', encoding='UTF-8', errors='ignore') as f_in:
doc = f_in.read()
doc = doc.lower()
output_data = get_data(doc)
output_data[0] = file
wr.writerow(output_data)
def get_data(doc):
_odata = [0] * 2
tokens = re.findall('\w(?:[-\w]*\w)?', doc)
for token in tokens:
if token in create:
_odata[1] += 1
return _odata
As pointed out by Mark in the comments, the first code snippet uses a set of strings, while the second code snippet loads a file into a list of strings.
Why sets are faster than lists in this use case, is well explained in this Stack Overflow answer. Parsing the output of open to a set can indeed solve your problem.
So replace:
create = open('C:/Users/s170760/Desktop/Create.txt', 'r').read().splitlines()
With:
create = set(open('C:/Users/s170760/Desktop/Create.txt', 'r').read().splitlines())