Reading data in exponential format in python (numpy) - python

I am trying to read the data but its first coloumn have data in exp format which is not allowing me to read the file, here is the minimal working example of my code and here is the link datafile for trying out the code
import numpy as np
filename ="0 A.dat"
data = np.loadtxt(filename, delimiter=',', skiprows=3)
but I am getting this error
ValueError: could not convert string to float:

You can read them with pandas:
import pandas as pd
data = pd.read_csv(filename, delimiter=',', skiprows=3)

import numpy as np
def yesfloat(string):
""" True if given string is float else False"""
try:
return float(string)
except ValueError:
return False
data = []
with open('0 A.dat', 'r') as f:
d = f.readlines()
for i in d:
k = i.rstrip().split(",")
data.append([float(i) if yesfloat(i) else i for i in k])
data = np.array(data, dtype='O')
data
I don't know if that is the answer you are looking for but i tried it with you data and it returned this
array([list(['% Version 1.00']), list(['%']),
list(['%freq[Hz]\tTrc1_S21[dB]\tTrc2_S21[U]\tTrc3_S21[U]\tTrc4_S21[U]']),
...,
list([9998199819.981998, -22.89936928953151, 0.07161954135843378, -0.0618770495057106, -0.03606368601322174, '']),
list([9999099909.991, -22.91188769540125, 0.07151639513438152, -0.06464007496833801, -0.03059829212725163, '']),
list([10000000000.0, -22.92596306398167, 0.07140059761720122, -0.0669037401676178, -0.02493862248957157, ''])],
dtype=object)

Related

Pandas csv dataframe to json array

I am reading a csv file and trying to convert the data into json array.But I am facing issues as "only size-1 arrays can be converted to Python scalars"
The csv file contents are
4.4.4.4
5.5.5.5
My code is below
import numpy as np
import pandas as pd
df1 = pd.read_csv('/Users/Documents/datasetfiles/test123.csv', header=None)
df1.head(5)
0
0 4.4.4.4
1 5.5.5.5
df_to_array = np.array(df1)
app_json = json.dumps(df_to_array,default=int)
I need output as
["4.4.4.4", "5.5.5.5", "3.3.3.3"]
As other answers mentioned, just use list: json.dumps(list(df[0]))
FYI, the data shape is your problem:
if you absolutely must use numpy, then transpose the array first:
json.dumps(list(df_to_array.transpose()[0]))
Given test.csv:
4.4.4.4
5.5.5.5
Doing:
import json
with open('test.csv') as f:
data = f.read().splitlines()
print(data)
print(json.dumps(data))
Output:
['4.4.4.4', '5.5.5.5']
["4.4.4.4", "5.5.5.5"]
You're overcomplicating things using pandas is this is all you want to do~
import json
import pandas as pd
df1 = pd.read_csv('/Users/Documents/datasetfiles/test123.csv', header=None)
df1.head(5)
0
0 4.4.4.4
1 5.5.5.5
df_to_array = list(df1[0])
app_json = json.dumps(df_to_array,default=int)
print(app_json)
["4.4.4.4", "5.5.5.5", "3.3.3.3"]

Filtering a pandas data frame

Suppose we have a pandas data frame df with a column id with about 5 rows. In the following code below, why do I still get the length of the filtered data frame to be 5:
import pickle
import gzip
import bz2
import pandas as pd
import os
import _pickle as cPickle
import bz2
from downcast import reduce
def load(filename):
"""
Load from filename using pickle
#param filename: name of file to load from
#type filename: str
"""
try:
f = bz2.BZ2File(filename, 'rb')
except:
sys.stderr.write('File ' + filename + ' cannot be read\n')
sys.stderr.write(details)
return
myobj = cPickle.load(f)
f.close()
return myobj
df=pd.DataFrame({"ids":[1,2,3,4,5]})
print(df.shape)
sfile = bz2.BZ2File('df_list_small', 'w')
pickle.dump(df, sfile)
This gives a shape of (5,1) .
df_new= load('df_list_small')
df_new = reduce(df_new)
all_groups = {ident:df_new for ident,df_new in df_new.groupby('ids')}
ids = 1
df_test = all_groups[ids]
print(df_test.shape)
This below gives a shape of (1,1)
So maybe it works only for certain files?
I figured it out. The filtered data frame would have the same dimensions as the original one because they are equal. If I had put a different id, then the dimension of the filtered data frame would have been different.

Python Pandas .iloc Download columns by number

I want to download column numbers, eg 1,3,2. In the param.txt file I have only such an entry
import pandas as pd
import numpy as np
df = pd.read_csv('sample1.csv')
with open('param.txt') as f:
s = f.read()
b = df.iloc[:, [s]]
print(b.to_string(index=False))
When I start a script
raise IndexError(f".iloc requires numeric indexers, got {arr}")
IndexError: .iloc requires numeric indexers, got ['1,3,2']
How to simply change from such a form to numeric
Thank you for every help
This should work assuming f.read() returns "1,2,3"
import pandas as pd
import numpy as np
df = pd.read_csv('sample1.csv')
with open('param.txt') as f:
s = f.read() # Assuming this is a string such as "1,2,3"
s = s.split(",") # Split string to list where there are commas ["1","2","3"]
s = [int(x) for x in s] # Convert entries from string to int [1,2,3]
b = df.iloc[:, s] # No need for brackets since s is already a list
print(b.to_string(index=False))

How to delete a specific row from the initial data based on a value in another array?

I hava a value [474] in form of array. I want to search for this value in 1st column of my data and delete that complete row, so my data size will reduce by 1 row.
import numpy as np
import io
from numpy import genfromtxt
data =io.StringIO("""
ID,1,2,3,4,5,6
5362,0.974,-0.404,-0.763,0.868,-0.5,0.16
485,-0.659,0.531,0.623,0.402,0.772,0.506
582,0.045,0.994,0.762,-0.036,0.117,-0.355
99,0.777,0.537,0.391,0.456,0.329,0.108
75,-0.44,0.522,0.856,-0.04,0.656,-0.935
474,0.357,0.81,0.135,0.389,0.055,0.224
594,-0.291,0.031,0.742,-0.332,0.815,0.983
597,0.968,-0.357,0.591,0.892,0.375,0.88
124,0.737,0.611,0.764,0.289,0.298,-0.705
635,0.883,0.96,-0.987,0.29,0.997,0.186
""")
data = genfromtxt(data, delimiter=',', skip_header=1, dtype=np.float64)
print(data)
diff = [474]
print (diff)
[474]
The below row should get deleted from the original array.
474,0.357,0.81,0.135,0.389,0.055,0.224
idx = np.where(data[:, 0] == 474)
cleaneddata = np.delete(data2, idx, 0)
Try this one:
data = genfromtxt(data, delimiter=',', skip_header=1, dtype=np.float64)
data = pd.DataFrame(data)
data[data[0] != 474]

Load data from csv into numpy array

I am trying to load data in a csv file (with delimiter ',') into a numpy array. Example of a line is: 81905.75578271,81906.6205052,50685.487931,.... (1000 columns).
I have this code but it seems to not be working properly as in the exit of the function the debugger cannot recognize the data, and when I call the xtrain.shape it returns 0:
def load_data(path):
# return np.loadtxt(path,dtype=int,delimiter=',')
file = open(path,'r')
data = []
for line in file:
array_vals = line.split(",")
array = []
for val in array_vals:
if not val:
array.append(float(val))
data.append(np.asarray(array))
return np.asarray(data)
x_train = load_data(path)
This should give you your required output.
import numpy as np
def load_data(path):
return np.loadtxt(path,delimiter=',')

Categories