Load data from csv into numpy array - python

I am trying to load data in a csv file (with delimiter ',') into a numpy array. Example of a line is: 81905.75578271,81906.6205052,50685.487931,.... (1000 columns).
I have this code but it seems to not be working properly as in the exit of the function the debugger cannot recognize the data, and when I call the xtrain.shape it returns 0:
def load_data(path):
# return np.loadtxt(path,dtype=int,delimiter=',')
file = open(path,'r')
data = []
for line in file:
array_vals = line.split(",")
array = []
for val in array_vals:
if not val:
array.append(float(val))
data.append(np.asarray(array))
return np.asarray(data)
x_train = load_data(path)

This should give you your required output.
import numpy as np
def load_data(path):
return np.loadtxt(path,delimiter=',')

Related

How to write csv inside a loop python

i've done got my outputs for the csv file, but i dont know how to write it into csv file because output result is numpy array
def find_mode(np_array) :
vals,counts = np.unique(np_array, return_counts=True)
index = np.argmax(counts)
return(vals[index])
folder = ("C:/Users/ROG FLOW/Desktop/Untuk SIDANG TA/Sudah Aman/testbikincsv/folderdatacitra/*.jpg")
for file in glob.glob(folder):
a = cv2.imread(file)
rows = a.shape[0]
cols = a.shape[1]
middlex = cols/2
middley = rows/2
middle = [middlex,middley]
titikawalx = middlex - 10
titikawaly = middley - 10
titikakhirx = middlex + 10
titikakhiry = middley + 10
crop = a[int(titikawaly):int(titikakhiry), int(titikawalx):int(titikakhirx)]
c = cv2.cvtColor(crop, cv2.COLOR_BGR2HSV)
H,S,V = cv2.split(c)
hsv_split = np.concatenate((H,S,V),axis=1)
Modus_citra = (find_mode(H)) #how to put this in csv
my outputs is modus citra which is array np.uint8, im trying to put it on csv file but im still confused how to write it into csv because the result in loop.
can someone help me how to write it into csv file ? i appreciate every help
Run your loop, and put the data into lists
eg. mydata = [result1,result2,result3]
Then use csv.writerows(mydata) to write your list into csv rows
https://docs.python.org/3/library/csv.html#csv.csvwriter.writerows
You can save your NumPy arrays to CSV filesĀ using the savetxt() function. This function takes a filename and array as arguments and saves the array into CSV format. You must also specify the delimiter; this is the character used to separate each variable in the file, most commonly a comma. For example:
import numpy as np
my_array = np.array([1,2,3,4,5,6,7,8,9,10])
my_file = np.savetxt('randomtext.csv', my_array, delimiter = ',', fmt = '%d')
print(my_file)

Reading data in exponential format in python (numpy)

I am trying to read the data but its first coloumn have data in exp format which is not allowing me to read the file, here is the minimal working example of my code and here is the link datafile for trying out the code
import numpy as np
filename ="0 A.dat"
data = np.loadtxt(filename, delimiter=',', skiprows=3)
but I am getting this error
ValueError: could not convert string to float:
You can read them with pandas:
import pandas as pd
data = pd.read_csv(filename, delimiter=',', skiprows=3)
import numpy as np
def yesfloat(string):
""" True if given string is float else False"""
try:
return float(string)
except ValueError:
return False
data = []
with open('0 A.dat', 'r') as f:
d = f.readlines()
for i in d:
k = i.rstrip().split(",")
data.append([float(i) if yesfloat(i) else i for i in k])
data = np.array(data, dtype='O')
data
I don't know if that is the answer you are looking for but i tried it with you data and it returned this
array([list(['% Version 1.00']), list(['%']),
list(['%freq[Hz]\tTrc1_S21[dB]\tTrc2_S21[U]\tTrc3_S21[U]\tTrc4_S21[U]']),
...,
list([9998199819.981998, -22.89936928953151, 0.07161954135843378, -0.0618770495057106, -0.03606368601322174, '']),
list([9999099909.991, -22.91188769540125, 0.07151639513438152, -0.06464007496833801, -0.03059829212725163, '']),
list([10000000000.0, -22.92596306398167, 0.07140059761720122, -0.0669037401676178, -0.02493862248957157, ''])],
dtype=object)

How to write continuous outputs in a single txt file

I am working with multiple data files (File_1, File_2, .....). I want the desired outputs for each data file to be saved in the same txt file as row values of a new column.
I tried the following code for my first data file (File_1). The desired outputs (Av_Age_btwn_0_to_5, Av_Age_btwn_5_to_10) are stored as row values of a column in the output txt file (Result.txt). Now, I want these outputs to be stored as row values of a next column of the same txt file when I work with File_2. Then for File_3, in a similar manner, I want the outputs in the next column and so on.
import numpy as np
data=np.loadtxt('C:/Users/Hrihaan/Desktop/File_1.txt')
Age=data[:,0]
Age_btwn_0_to_5=Age[(Age<5) & (Age>0)]
Age_btwn_5_to_10=Age[(Age<10) & (Age>=5)]
Av_Age_btwn_0_to_5=np.mean(Age_btwn_0_to_5)
Av_Age_btwn_5_to_10=np.mean(Age_btwn_5_to_10)
np.savetxt('/Users/Hrihaan/Desktop/Result.txt', (Av_Age_btwn_0_to_5, Av_Age_btwn_5_to_10), delimiter=',')
Any help would be appreciated.
If I understand correctly, each of your files is a column, and you want to combine them into a matrix (one file per column).
Maybe something like this could work?
import numpy as np
# Simulate some dummy data
def simulate_data(n_files):
for i in range(n_files):
ages = np.random.randint(0,10,100)
np.savetxt("/tmp/File_{}.txt".format(i),ages,fmt='%i')
# Your file processing
def process(age):
age_btwn_0_to_5=age[(age<5) & (age>0)]
age_btwn_5_to_10=age[(age<10) & (age>=5)]
av_age_btwn_0_to_5=np.mean(age_btwn_0_to_5)
av_age_btwn_5_to_10=np.mean(age_btwn_5_to_10)
return (av_age_btwn_0_to_5, av_age_btwn_5_to_10)
n_files = 5
simulate_data(n_files)
results = []
for i in range(n_files):
# load data
data=np.loadtxt('/tmp/File_{}.txt'.format(i))
# Process your file and extract your information
data_processed = process(data)
# Store the result
results.append(data_processed)
results = np.asarray(results)
np.savetxt('/tmp/Result.txt',results.T,delimiter=',',fmt='%.3f')
In the end, you have something like that:
2.649,2.867,2.270,2.475,2.632
7.080,6.920,7.288,7.231,6.880
Is it what you're looking for?
import numpy as np
# some data
age = np.arange(10)
time = np.arange(10)
mean = np.arange(10)
output = np.array(list(zip(age,time,mean)))
np.savetxt('FooFile.txt', output, delimiter=',', fmt='%s')
# ^^^^^^^^ --> Use this keyword argument if you want to save it as int. For simplicity just don't use it.
output:
0,0,0
1,1,1
2,2,2
3,3,3
4,4,4
5,5,5
6,6,6
7,7,7
8,8,8
9,9,9

Numpy ValueError: setting an array element with a sequence reading in list

I have this code that reads numbers and is meant to calculate std and %rms using numpy
import numpy as np
import glob
import os
values = []
line_number = 6
road = '/Users/allisondavis/Documents/HCl'
for pbpfile in glob.glob(os.path.join(road, 'pbpfile*')):
lines = open(pbpfile, 'r').readlines()
while line_number < len(lines) :
variables = lines[line_number].split()
values.append(variables)
line_number = line_number + 3
a = np.asarray(values).astype(np.float)
std = np.std(a)
rms = std * 100
print rms
However I keep getting the error code:
Traceback (most recent call last):
File "rmscalc.py", line 17, in <module>
a = np.asarray(values).astype(np.float)
ValueError: setting an array element with a sequence.
Any idea how to fix this? I am new to python/numpy. If I print my values it looks something like this:
[[1,2,3,4],[2,4,5,6],[1,3,5,6]]
I can think of a modification to your code which can potentially fix your problem:
Initialize values as a numpy array, and use numpy append or concatenate:
values = np.array([], dtype=float)
Then inside loop:
values = np.append(values, [variables], axis=0)
# or
variables = np.array(lines[line_number].split(), dtype=float)
values = np.concatenate((values, variables), axis=0)
Alternatively, if you files are .csv (or any other type Pandas can read):
import pandas as pd
# Replace `read_csv` with your appropriate file reader
a = pd.concat([pd.read_csv(pbpfile)
for pbpfile in glob.glob(os.path.join(road, 'pbpfile*'))]).values
# or
a = np.concatenate([pd.read_csv(pbpfile).values
for pbpfile in glob.glob(os.path.join(road, 'pbpfile*'))], axis=0)

Substitute function for pandas read_table not working

I am new to Python!
Due to some reasons, I can not use pandas in my environment. So I am writing the pandas read_table() by myself. Basically I am converting one code which is using pandas.read_table by myself. The code using pandas which has to be replaced is as follows :
import pandas as pd
import numpy as np
import scipy as sp
data_file = pd.read_table(r'records.csv', sep = ';', header=None)
id = np.unique(data_file[0])
tags = np.unique(data_file[1])
number_of_rows = len(id)
number_of_columns = len(tags)
words_indices, letter_indices = {}, {}
for i in range(len(tags)):
words_indices[tags[i]] = i
for i in range(len(id)):
letter_indices[id[i]] = i
#scipy sparse matrix
Vector = sp.lil_matrix((number_of_rows, number_of_columns))
#adds data into the sparse matrix
for line in data_file.values:
u, i , r = map(str,line)
Vector[letter_indices[u], words_indices[i]] = r
The csv file is having some 100 records of this format :
REC000034232657,CRC FIX OE Resubmit,0.0073410, 45
Now, I have replaced the pandas.read_table as follows by directly reading it from database rather than from .csv file :
def fetch_table(**kwargs):
qry = kwargs['qrystr']
try:
cursor = conn.cursor()
cursor.execute(qry)
all_tuples = cursor.fetchall()
return all_tuples
except pyodbc.ProgrammingError as e:
print ("Exception occured as :", type(e) , e)
# pandas alternate code
total_col = 0
count = 0
dict_csv = {}
stmt = "select * from tickets;"
fetched_rows = fetch_table(qrystr = stmt)
for row in fetched_rows:
total_col = len(row)
break
for i in range(0,total_col):
dict_csv[i] = []
for row in fetched_rows:
for i in range(0,total_col):
dict_csv[i].append(row[i])
# End of pandas alternate code
Rest of the code just continues to be same as the earlier chunk of code except that instead of data_file (returned by pd.read_table()), now I am using dict_csv , so the for loop in the earlier code which adds data to the sparse matrix is changed to :
for line in data_file.values:
u, i , r = map(str,line)
Vector[letter_indices[u], words_indices[i]] = r
However, I am getting below TypeError for that :
Traceback (most recent call last):
File "C:\Python32\my_scripts\ds.py", line 132, in <module>
for line in dict_csv.values:
TypeError: 'builtin_function_or_method' object is not iterable
I understand that dict_csv.values is not returning an iterable list, can anybody please point me what mistake I am making.
Also the integer 45 is coming as Decimal(45), how can I get rid of that ?
Thanks a lot

Categories