I want to download column numbers, eg 1,3,2. In the param.txt file I have only such an entry
import pandas as pd
import numpy as np
df = pd.read_csv('sample1.csv')
with open('param.txt') as f:
s = f.read()
b = df.iloc[:, [s]]
print(b.to_string(index=False))
When I start a script
raise IndexError(f".iloc requires numeric indexers, got {arr}")
IndexError: .iloc requires numeric indexers, got ['1,3,2']
How to simply change from such a form to numeric
Thank you for every help
This should work assuming f.read() returns "1,2,3"
import pandas as pd
import numpy as np
df = pd.read_csv('sample1.csv')
with open('param.txt') as f:
s = f.read() # Assuming this is a string such as "1,2,3"
s = s.split(",") # Split string to list where there are commas ["1","2","3"]
s = [int(x) for x in s] # Convert entries from string to int [1,2,3]
b = df.iloc[:, s] # No need for brackets since s is already a list
print(b.to_string(index=False))
Related
This code was originally made in a .ipynb file.
I am getting the TypeError: list indices must be integers or slices, not str
can't seem to figure out how to fix this problem.
The result should be that the unix-timestamps in the dataframe get translated to (Year-Month) and the most recent date should be used as a file.
import numpy as np
import pandas as pd
import time
from datetime import datetime
import os
import re
df = pd.DataFrame()
files = os.listdir('input')
arr = [i for i in files if i.endswith('.csv') and 'export_' in i]
df = pd.DataFrame({'filename':arr})
res = []
# The code that gives the error.
for i in df.index:
unix_code = re.findall('\d+', arr[i])
for x in unix_code:
"facturatie_vzs_" + datetime.utcfromtimestamp(unix_code[x]).strftime('%Y-%m') + ".csv"
res.append(i)
Use list comprehension with and for logical and with scalars (& is bitwise AND used in arrays) and then test substring by in operator, last array pass to DataFrame constructor:
files = os.listdir('input')
#for test
#files=['export_1656662723.csv', 'export_sss1654071237.csv']
arr = [i for i in files if i.endswith('.csv') and 'export_' in i]
df = pd.DataFrame({'filename':arr})
df['timestamp'] = pd.to_datetime(df['filename'].str.extract('(\d+)',expand=False), unit='s')
print (df)
filename timestamp
0 export_1656662723.csv 2022-07-01 08:05:23
1 export_sss1654071237.csv 2022-06-01 08:13:57
I have h5 file contains nested data structure as following:
Measures/Gait/Joint/Back/Abduction/Maximum
Measures/Gait/Joint/Back/Abduction/Minimum
I am looking to put the content of nested data structure of "Minimum" and "Maximum" into Dataframe (2 columns x 25 rows).
I tried to run this code, but it doesn't work:
import pandas as pd
import h5py
import hdfdict
f = h5py.File("walking5.h5",'r')
for name in f:
print(name)
res = hdfdict.load("walking5.h5")
print(res.keys())
list3 = [['Measures']['Gait']['Joint']['Back']['Abduction']['Maximum'],['Measures']['Gait']['Joint']['Back']['Abduction']['Minimum']]
df1 = []
for np in list3:
df = pd.DataFrame (str(res[np]))
df.columns = [str(np)]
df1.append(df)
I don't know why this code doesn't work.
I have received this error:
TypeError: list indices must be integers or slices, not str
I am trying to read the data but its first coloumn have data in exp format which is not allowing me to read the file, here is the minimal working example of my code and here is the link datafile for trying out the code
import numpy as np
filename ="0 A.dat"
data = np.loadtxt(filename, delimiter=',', skiprows=3)
but I am getting this error
ValueError: could not convert string to float:
You can read them with pandas:
import pandas as pd
data = pd.read_csv(filename, delimiter=',', skiprows=3)
import numpy as np
def yesfloat(string):
""" True if given string is float else False"""
try:
return float(string)
except ValueError:
return False
data = []
with open('0 A.dat', 'r') as f:
d = f.readlines()
for i in d:
k = i.rstrip().split(",")
data.append([float(i) if yesfloat(i) else i for i in k])
data = np.array(data, dtype='O')
data
I don't know if that is the answer you are looking for but i tried it with you data and it returned this
array([list(['% Version 1.00']), list(['%']),
list(['%freq[Hz]\tTrc1_S21[dB]\tTrc2_S21[U]\tTrc3_S21[U]\tTrc4_S21[U]']),
...,
list([9998199819.981998, -22.89936928953151, 0.07161954135843378, -0.0618770495057106, -0.03606368601322174, '']),
list([9999099909.991, -22.91188769540125, 0.07151639513438152, -0.06464007496833801, -0.03059829212725163, '']),
list([10000000000.0, -22.92596306398167, 0.07140059761720122, -0.0669037401676178, -0.02493862248957157, ''])],
dtype=object)
I'm trying to retrieve a string from an excel sheet and split it into words then print it or write it back into a new string but when retrieving the data using pandas and trying to split it an error occurs saying dataframe doesn't support split function
the excel sheet has this line in it:
I expect and output like this:
import numpy
import pandas as pd
df = pd.read_excel('eng.xlsx')
txt = df
x = txt.split()
print(x)
AttributeError: 'DataFrame' object has no attribute 'split'
That's because you are applying split() function on a DataFrame and that's not possible.
import pandas as pd
import numpy as np
def append_nan(x, max_len):
"""
Function to append NaN value into a list based on a max length
"""
if len(x) < max_len:
x += [np.nan]*(max_len - len(x))
return x
# I define here a dataframe for the example
#df = pd.DataFrame(['This is my first sentence', 'This is a second sentence with more words'])
df = pd.read_excel('your_file.xlsx', index=None, header=None)
col_names = df.columns.values.tolist()
df_output = df.copy()
# Split your strings
df_output[col_names[0]] = df[col_names[0]].apply(lambda x: x.split(' '))
# Get the maximum length of all yours sentences
max_len = max(map(len, df_output[col_names[0]]))
# Append NaN value to have the same number for all column
df_output[col_names[0]] = df_output[col_names[0]].apply(lambda x: append_nan(x, max_len))
# Create columns names and build your dataframe
column_names = ["word_"+str(d) for d in range(max_len)]
df_output = pd.DataFrame(list(df_output[col_names[0]]), columns=column_names)
# Then you can save it
df_output.to_excel('output.xlsx')
Phone number "+49374425070" is getting converted to "49374425070.0" when i try to use "to_csv".
source.to_csv('CVV'+'_source.txt',sep = '\t', index = False ,encoding='utf-8',quoting=csv.QUOTE_NONE)
import pandas as pd
import csv
#Source
source = pd.read_csv('source2.csv') #source csv name
source.to_csv('CVV'+'_source.txt',sep = '\t', index = False ,encoding='utf-8',quoting=csv.QUOTE_NONE)#float_format='%.0f'
print('archive.txt and source.txt generated')
You want to read the column as a string rather than int_64. To do this, use something like this:
read_csv('sample.csv', dtype={'phone': str})
This will work in Pandas >= 0.9.1.