Array of arrays from a dat file

Array of arrays from a dat file - python

I have a dat file like this:
# donnees t,x,y pour uid=12345
3.949218750000000000e+00 1.442828613651609082e+00 -8.307779446951960578e-01
1.125000000000000000e+00 1.005202962469742722e+00 5.323792795519278753e-01
3.281250000000000000e-01 1.133892308149918815e+00 1.321436062441114778e+00
5.449218750000000000e+00 -1.568679619747660459e+00 1.225514134192944526e+00
.....
.....
.....
And I would like to extract and read the data in a specific way. I have to define each column as T, X, and Y, in arrays, and then the function I use must return an array containing the 3 arrays when typing return T,X,Y
I've tried this for now:
def lecture(fichier):
data = np.loadtxt('mon_fichier.dat', usecols=(0,1,2))
print(data,data.shape)
T = data[0]
X = data[1]
Y = data[2]
return T,X,Y
But it returns me 3 arrays, not an array containing the 3 arrays.
Any idea about how to proceed?
EDIT: here is how I did it:
def lecture(fichier):
with open(fichier, 'r') as f:
data = f.readlines()
N = len(data)
T = np.zeros(N-1)
X = np.zeros(N-1)
Y = np.zeros(N-1)
for i in range(1,N):
line = data[i]
d = line.split()
T[i-1] = float(d[0])
X[i-1] = float(d[1])
Y[i-1] = float(d[2])
print(T.shape)
return T,X,Y

May be you want this:
import numpy as np
def lecture():
data = np.loadtxt('test.txt', usecols=(0,1,2))
T = [[x] for x in data[0]]
X = [[x] for x in data[1]]
Y = [[x] for x in data[2]]
return T,X,Y
data = lecture()
print(data)
print(data[0])
Output:
#complete data
([[3.94921875], [1.442828613651609], [-0.8307779446951961]],
[[1.125], [1.0052029624697427], [0.5323792795519279]],
[[0.328125], [1.1338923081499188], [1.3214360624411148]])
#data[0]
[[3.94921875], [1.442828613651609], [-0.8307779446951961]]

Related

How to fix ' 'str' object has no attribute 'tell' ' error in Python when trying to read xpt files?

I am five .xpt data files which I am trying to read in python. I want to ge the data in those files into an array. This is for a machine learning project. I am trying that with this code but am getting an error on the line 'with xport.Reader(fname) as reader: '.
Though I am still not sure if fixing this error would fix the whole code or now. Is there any other method I can use?
# imports to read the xpt file data
import xport
import numpy as np
import os
#read the xpt file data
FN = ["BMX.XPT", "BMX_B.XPT", "BMX_C.XPT", "BMX_D.XPT", "BMX_E.XPT"]
def get_data(fname):
Z={}
H=None
with xport.Reader(fname) as reader:
for row in reader:
if H is None:
H=row.keys()
H.remove("SEQN")
H.sort()
Z[row["SEQN"]] = [row[k] for k in H]
return Z,H
# call get_data method on each file
D,VN =[],[]
for fn in FN:
fn_full = os.path.join("../Data/", fn)
X,H = get_data(fn_full)
s = fn.replace(".XPT", "")
H = [s + ":" + x for x in H]
D.append(X)
VN += H
## The sequence numbers that are in all data sets
KY = set(D[0].keys())
for d in D[1:]:
KY &= set(d.keys())
KY = list(KY)
KY.sort()
def to_float(x):
try:
return float(x)
except ValueError:
return float("nan")
## Merge the data
Z = []
for ky in KY:
z = []
map(z.extend, (d[ky] for d in D))
## equivalent to
## for d in D:
## z.extend(d[ky])
z = [to_float(a) for a in z]
## equivalent to
## map(to_float, z)
Z.append(z)
Z = np.array(Z)

It seems the input to xport.reader should be a file instead of a string. You should open it as a file instead of passing the file name as a parameter.
E.g.
with xport.Reader(open(fname, ‘r’)) as reader:
....

Writing random numbers to csv-file

I want to output a csv-file where the lines look like this:
x,y,0,1 or x,y,1,0 where x and y are random integers from 1 to 5 with weighted probabilities. Unfortunately I get just this as a csv-file:
4,4,0,1
0,1
0,1
3,4,1,0
1,0
0,1
1,0
0,1
1,0
0,1
My python code looks like this:
import csv
import random
import numpy
l = [[[10,20,50,10,10],[20,20,20,20,20]],[[10,20,50,10,10],[20,20,20,20,20]]]
with open("test.csv", "wb") as f:
writer = csv.writer(f)
for i in range(10):
x = random.randint(0,1)
h = l[x]
d =[]
while len(h) > 0:
a=numpy.array(h.pop(0))
d.append(numpy.random.choice(range(1, 6), p=a/100.0))
c = [0] * 2
c[x]=1
writer.writerow(d+c)
What do I do wrong?

You pop items from the lists in l. Once they are empty, while len(h) > 0: is never True and the while loop doesn't run. Try copying the sublists instead
import csv
import random
import numpy
l = [[[10,20,50,10,10],[20,20,20,20,20]],[[10,20,50,10,10],[20,20,20,20,20]]]
with open("test.csv", "wb") as f:
writer = csv.writer(f)
for i in range(10):
x = random.randint(0,1)
h = l[x][:] # <-- copy list
d =[]
while len(h) > 0:
a=numpy.array(h.pop(0))
d.append(numpy.random.choice(range(1, 6), p=a/100.0))
c = [0] * 2
c[x]=1
writer.writerow(d+c)
Or enumerate the sublist directly
import csv
import random
import numpy
l = [[[10,20,50,10,10],[20,20,20,20,20]],[[10,20,50,10,10],[20,20,20,20,20]]]
with open("test.csv", "wb") as f:
writer = csv.writer(f)
for i in range(10):
x = random.randint(0,1)
d =[]
for h in l[x]: # <-- enumerate list
a=numpy.array(h)
d.append(numpy.random.choice(range(1, 6), p=a/100.0))
c = [0] * 2
c[x]=1
writer.writerow(d+c)

How do I create a function to find average above median?

I want to find the average of all scores above and below the median (not including the median), but I have no idea have to go about doing this.
import collections
def main():
names = ["gymnastics_school", "participant_name", "all_around_points_earned"]
Data = collections.namedtuple("Data", names)
data = []
values =[]
with open('state_meet.txt','r') as f:
for line in f:
line = line.strip()
items = line.split(',')
items[2] = float(items[2])
data.append(Data(*items))
values.append(items[2])
print("summary of data:")
sorted_data = sorted (values)
if len(data)%2==0:
a =sorted_data[len(values)//2]
b = sorted_data[len(values)//2-1]
median_val = (a+b)//2
else:
median_val = sorted_data[(len(values)-1)//2]
print(" median score",median_val) #median

We now have statistics as part of the standard library:
import statistics
nums = list(range(10))
med = statistics.median(nums)
hi_avg = statistics.mean(i for i in nums if i > med)
lo_avg = statistics.mean(i for i in nums if i < med)

You can use the build-in function filter and sum. For example
above_med = filter(lambda x: x>median_val, values)
print(" average of scores above median ", sum(above_med)/len(above_med))
Edited:
As suggested by #ChrisP, you can also use the standard package statistics introduced since python 3.4.

Here is an example:
import numpy as np
data_array = np.array(data)
med = np.median(data)
ave_above_med = data_array[data_array > med].mean()
so the function would be:
import numpy as np
def average_above_med(data):
data_array = np.array(data)
med = np.median(data)
response = data_array[data_array > med].mean()
return response
This is a test of that:
test_data = [1, 5, 66, 7, 5]
print(average_above_med(test_data))
which displays:
36.5
Hope this helps.

Reading from a python file

I have a text file that got some antonyms in the format:
able || unable
unable || able
abaxial || adaxial
adaxial || abaxial
and I need to check if this word is antonyms of another or not.
What i did is a code like this:
def antonyms():
f = open('antonyms.txt', 'r+')
for line in f:
a = line.split('||')
x = a[0]
y = a[1]
return x, y
but what I have got is only the last pairs, then I tried to indent return a bit so I did
def antonyms():
f = open('antonym_adjectives.txt', 'r+')
for line in f:
a = line.split('||')
x = a[0]
y = a[1]
return x, y
But again I got first pairs only
How can I get all of the pairs?
and how can I do something like:
>>> antonyms(x, y)
to tell me if they are True or False?

Because no answer answers BOTH your questions:
Firstly: if you return, the program will stop there. So you want to yield your antonym so the whole function becomes a generator:
def antonyms():
f = open('antonyms.txt', 'r+')
for line in f:
a = line.split(' || ') # Think you also need surrounding spaces here.
x = a[0]
y = a[1]
yield x, y
To use this function to check for is_antonym(a,b):
def is_antonym(a,b):
for x,y in antonyms():
if a == x and b == y:
return True
return False
Other answers have good tips too:
A good replacement for instance would be: [x,y] = line.split(' || ')

Your problem is that with return you are getting out of the function.
Instead, of x = a[0] and y = a[1], append those values to an array, and the return that array.
the_array = []
for line in f:
a = line.split('||')
the_array.append((a[0],a[1]))
return the_array

It would make more sense to write a function that gets the antonym of a given word:
def antonym(word):
with open('antonym_adjectives.txt', 'r+') as f:
for line in f:
a, b = line.split('||')
a = a.strip()
b = b.strip()
if a == word:
return b
if b == word:
return a
You can then write antonym(x) == y to check if x and y are antonyms. (However this assumes each word has a single unique antonym).
This reads the file from the beginning each time. If your list of antonyms is manageable in size it might make more sense to read it in to memory as an array or dictionary.
If you can't assume that each word has a single unique antonym, you could turn this into a generator that will return all the antonyms for a given word.
def antonym(word):
with open('antonym_adjectives.txt', 'r+') as f:
for line in f:
a, b = line.split('||')
a = a.strip()
b = b.strip()
if a == word:
yield b
if b == word:
yield a
Then y in antonyms(x) will tell you whether x and y are antonyms, and list(antonyms(x)) will give you the list of all the antonyms of x.

You could use yield:
def antonyms():
f = open('antonyms.txt', 'r+')
for line in f:
a = line.split('||')
x = a[0]
y = a[1]
yield x, y
for a,b in antonyms():
# rest of code here
By the way, you can assign directly to x and y:
x,y = line.split('||')
A simple check if a and b are antonyms could be:
(a == 'un' + b) or (b == 'un' + a)

that is my code ..
def antonyms(first,second):
f = open('text.txt', 'r')
for line in f.readlines():
lst = [s.strip() for s in line.split('||')]
if lst and len(lst) == 2:
x = lst[0]
y = lst[1]
if first == x and second == y:
return True
return False

I don't think list is a good data structure for this problem. After you've read all the antonym pairs into a list, you still have to search the whole list to find the antonym of a word. A dict would be more efficient.
antonym = {}
with open('antonym_adjectives.txt') as infile:
for line in infile:
x,y = line.split('||)
antonym[x] = y
antonym[y] = x
Now you can just look up an antonym in the dict:
try:
opposite = antonym[word]
except KeyError:
print("%s not found" %word)

Python 3 IndexError: list index out of range, working from CSV type file

I get the "IndexError: list index out of range" when trying this code:
import numpy as np
import matplotlib.pyplot as plt
import math
nameFile = input('Enter raw data txt filename to plot >>>')
print('plotting file {}:'.format(nameFile))
with open(nameFile, 'r') as readFile:
sepFile = readFile.read().split('\n')
x = []
y = []
for line in sepFile:
cols = line.split(' ')
X = float(cols[2])
Y = float(cols[3])
x.append(X)
y.append(Y)
plt.plot(X, Y)
plt.show()
Im pulling from a CSV type file that is formatted as such:
1 6887.79 245441
2 6883.52 234833
3 6879.26 231316
4 6874.99 243267
5 6870.72 254817
6 6866.46 267046
7 6862.19 277514
8 6857.92 282874
9 6853.65 283163
10 6849.39 285764 ....etc etc
I have used similar code on files like this so as to why its not working is probably so simple I am gonna laugh at myself.

Python uses 0-based indexing. So cols[0] is the first value in cols, cols[1] is the second value, and so on.
So:
for line in sepFile:
cols = line.split(' ')
X = float(cols[1]) # <-- change 2 to 1
Y = float(cols[2]) # <-- change 3 to 2
x.append(X)
y.append(Y)
plt.plot(x, y) # <-- change X to x, Y to y
plt.show()

The list starts at 0, not at 1.
Try:
X = float(cols[1])
Y = float(cols[2])

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Array of arrays from a dat file - python

Related

How to fix ' 'str' object has no attribute 'tell' ' error in Python when trying to read xpt files?

Writing random numbers to csv-file

How do I create a function to find average above median?

Reading from a python file

Python 3 IndexError: list index out of range, working from CSV type file

Categories

Resources