I want to output a csv-file where the lines look like this:
x,y,0,1 or x,y,1,0 where x and y are random integers from 1 to 5 with weighted probabilities. Unfortunately I get just this as a csv-file:
4,4,0,1
0,1
0,1
3,4,1,0
1,0
0,1
1,0
0,1
1,0
0,1
My python code looks like this:
import csv
import random
import numpy
l = [[[10,20,50,10,10],[20,20,20,20,20]],[[10,20,50,10,10],[20,20,20,20,20]]]
with open("test.csv", "wb") as f:
writer = csv.writer(f)
for i in range(10):
x = random.randint(0,1)
h = l[x]
d =[]
while len(h) > 0:
a=numpy.array(h.pop(0))
d.append(numpy.random.choice(range(1, 6), p=a/100.0))
c = [0] * 2
c[x]=1
writer.writerow(d+c)
What do I do wrong?
You pop items from the lists in l. Once they are empty, while len(h) > 0: is never True and the while loop doesn't run. Try copying the sublists instead
import csv
import random
import numpy
l = [[[10,20,50,10,10],[20,20,20,20,20]],[[10,20,50,10,10],[20,20,20,20,20]]]
with open("test.csv", "wb") as f:
writer = csv.writer(f)
for i in range(10):
x = random.randint(0,1)
h = l[x][:] # <-- copy list
d =[]
while len(h) > 0:
a=numpy.array(h.pop(0))
d.append(numpy.random.choice(range(1, 6), p=a/100.0))
c = [0] * 2
c[x]=1
writer.writerow(d+c)
Or enumerate the sublist directly
import csv
import random
import numpy
l = [[[10,20,50,10,10],[20,20,20,20,20]],[[10,20,50,10,10],[20,20,20,20,20]]]
with open("test.csv", "wb") as f:
writer = csv.writer(f)
for i in range(10):
x = random.randint(0,1)
d =[]
for h in l[x]: # <-- enumerate list
a=numpy.array(h)
d.append(numpy.random.choice(range(1, 6), p=a/100.0))
c = [0] * 2
c[x]=1
writer.writerow(d+c)
Related
I started out with a 4d list, something like
tokens = [[[["a"], ["b"], ["c"]], [["d"]]], [[["e"], ["f"], ["g"]],[["h"], ["i"], ["j"], ["k"], ["l"]]]]
So I converted this to a csv file using the code
import csv
def export_to_csv(tokens):
csv_list = [["A", "B", "C", word]]
for h_index, h in enumerate(tokens):
for i_index, i in enumerate(h):
for j_index, j in enumerate(i):
csv_list.append([h_index, i_index, j_index, j])
with open('TEST.csv', 'w') as f:
# using csv.writer method from CSV package
write = csv.writer(f)
write.writerows(csv_list)
But now I want to do the reverse process, want to convert a csv file obtained in this format, back to the list format mentioned above.
Assuming you wanted your csv file to look something like this (there were a couple typos in the posted code):
A,B,C,word
0,0,0,a
0,0,1,b
0,0,2,c
...
here's one solution:
import csv
def import_from_csv(filename):
retval = []
with open(filename) as fh:
reader = csv.reader(fh)
# discard header row
next(reader)
# process data rows
for (x,y,z,word) in reader:
x = int(x)
y = int(y)
z = int(z)
retval.extend([[[]]] * (x + 1 - len(retval)))
retval[x].extend([[]] * (y + 1 - len(retval[x])))
retval[x][y].extend([0] * (z + 1 - len(retval[x][y])))
retval[x][y][z] = [word]
return retval
def import_from_csv(file):
import ast
import csv
data = []
# Read the CSV file
with open(file) as fp:
reader = csv.reader(fp)
# Skip the first line, which contains the headers
next(reader)
for line in reader:
# Read the first 3 elements of the line
a, b, c = [int(i) for i in line[:3]]
# When we read it back, everything comes in as strings. Use
# `literal_eval` to convert it to a Python list
value = ast.literal_eval(line[3])
# Extend the list to accomodate the new element
data.append([[[]]]) if len(data) < a + 1 else None
data[a].append([[]]) if len(data[a]) < b + 1 else None
data[a][b].append([]) if len(data[a][b]) < c + 1 else None
data[a][b][c] = value
return data
# Test
assert import_from_csv("TEST.csv") == tokens
First, I'd make writing this construction in a CSV format independent from dimensions:
import csv
def deep_iter(seq):
for i, val in enumerate(seq):
if type(val) is list:
for others in deep_iter(val):
yield i, *others
else:
yield i, val
with open('TEST.csv', 'w') as f:
csv.writer(f).writerows(deep_iter(tokens))
Next, we can use the lexicographic order of the indices to recreate the structure. All we have to do is sequentially move deeper into the output list according to the indices of a word. We stop at the penultimate index to get the last list, because the last index is pointing only at the place of the word in this list and doesn't matter due to the natural ordering:
with open('TEST.csv', 'r') as f:
rows = [*csv.reader(f)]
res = []
for r in rows:
index = r[:-2] # skip the last index and word
e = res
while index:
i = int(index.pop(0)) # get next part of a current index
if i < len(e):
e = e[i]
else:
e.append([]) # add new record at this level
e = e[-1]
e.append(r[-1]) # append the word to the corresponding list
The list has 10,000,000 random numbers in it, and I can't figure out how to get the code to go to the next row when I reach the 16,384 column limit.
Current code:
import csv
import random
values = []
for i in range(1,10000000):
n = random.randint(0, 1000000)
values.append(n)
print(n)
file = open('randresults.csv', 'w')
with file:
wr = csv.writer(file, dialect = 'excel')
wr.writerow(values)
file.close()
I tried using wr.writerows(values) but it gives me Error: iterable expected, not float.
After every 16384 values you should write the values array, reset the values array and write it again on the next loop. you need a counter to count the columns
import csv
import random
values = []
file = open('randresults.csv', 'w')
wr = csv.writer(file, dialect = 'excel')
col_count = 0
for i in range(1,10000000):
col_count = col_count + 1
n = random.randint(0, 1000000)
values.append(n)
if col_count == 16384:
wr.writerow(values)
col_count = 0
values = []
# write the remaining values, an incomplete row
wr.writerow(values)
file.close()
import csv
f = open("savewl_ssj500k22_Minfreq1-lowercaseWords_1.csv", "r")
csvF = csv.reader(f, delimiter="\t")
s = 0
sez = []
sezB = []
for q in f:
s = s + 1
if s > 3:
l = q.split(",")
x = l[1]
y = l[0]
sezB.append(y)
sezB.append(int(x))
sez.append(sezB)
print(sez)
f.close()
How to get it work to get all rows from .csv in list or sez saved
from this code I get: MemoryError
in file is 77214 lines of something like this : je,17031
Every loop you are appending sezB which is growing by itself.
so you are apparently grows by O(number of lines ^2).
This is something like this pattern (just for the explanation):
[[1,2], [1,2,3,4], [1,2,3,4,5,6], .....]
I guess you wanted to reset sezB to [] every loop.
Your code can be simplified to
import csv
s = 0
sez = []
sezB = []
with open("savewl_ssj500k22_Minfreq1-lowercaseWords_1.csv", "r") as f:
csvF = csv.reader(f, delimiter="\t")
for q in f:
s += 1
if s > 3:
l = q.split(",")
x, y = l[:2]
sezB.extend([x, int(y)])
sez.append(sezB)
print(sez)
As you can see, you constantly add 2 more element to the sezB list, which is not that much, but you also keep adding the resulting sezB list to the sez list.
So since the file has 77214 lines, sez will need to hold about 6 trillion (5962079010) strings, which is way too many to be stored into memory...
I am writing a function to a CSV file (which is working), however it is cutting off halfway on one of the final rows. I know it is probably something to do with the closing of the file, but I thought I did it correctly.
Any suggestions where it may be going wrong?
from itertools import combinations as cb
import csv
import numpy as np
with open("usableReviewScores.csv") as f:
reader=csv.reader(f)
next(reader, None) # skip header
data=[filter(None,i) for i in reader]
writer = csv.writer(open("alexData1.csv", 'wb'))
def avgg(x):
ll=[float(i) for i in x[1:]] #take review no and convert to float
n=len(ll)
avg_list=[x[0]] #start result list with ref no.
final_list=[]
a = 0
b = []
c = []
d = []
global min_val
global max_val
min_val = 0
max_val = 0
for i in range(4,5):
for j in cb(ll,i):
# print(j)
c = i
avg_list.append(sum(j)/i)
final_list.append(sum(j)/i)
a = sum(final_list)/len(final_list)
min_val = min(final_list)
max_val = max(final_list)
d = np.std(final_list)
return (avg_list, "avg", a, "min", min_val, "max", max_val,
"Num of reviews", c, "std", d, "Total Reviews", n)
for x in data:
print(avgg(x))
for x in data:
writer.writerow(avgg(x))
You say that it's probably to do with the closing of the file. Well you don't actually close your output file at all. So I'm guessing that this is a symptom of file-system caching and the cache not being properly flushed because the file isn't closed
You should use with open(filename) as handle: for the writing as well as for your input:
with open("alexData1.csv", 'wb') as outfile:
writer = csv.writer(outfile)
for x in data:
writer.writerow(avgg(x))
I am trying to create random digits and have them stored in a file and I did some googling and came across the pickle function. I used it exactly how the tutorial did and now I need to know how to store all of the codes that I create in there? Here is my code
import string
import pickle
from random import randint
data = list(string.ascii_lowercase)
[data.append(n) for n in range(0, 10)]
x = [str(data[randint(0, len(data)-1)]) for n in range(0, 21)]
y = ''.join(x)
print (y)
inUse = []
inUse.append(y)
pickle.dump(inUse, open("data.pkl", "wb"))
inUse = pickle.load(open("data.pkl", "rb"))
Your way of generating x is overly convoluted
import string
import random
data = string.ascii_lowercase + string.digits
x = ''.join(random.choice(data) for n in range(20))
Now, you can simply print x to a file like this
with open("data.txt", "a")) as fout:
print(x, file=fout)
If you wish to append N codes to the file
with open("data.txt", "a")) as fout:
for i in range(N):
x = ''.join(random.choice(data) for n in range(20))
print(x, file=fout)
In the below line -
y = ''.join(x)
Lets say x is a list of random characters like - `['a', 'x', 'c', 'j']
After the above line executes, you will get y = 'axcj'
You can use pickle , to serialize the list object itself, so you would not even need y or inUse lists.
The code would look like -
import string
import pickle
from random import randint
data = list(string.ascii_lowercase)
[data.append(n) for n in range(0, 10)]
x = [str(data[randint(0, len(data)-1)]) for n in range(0, 21)]
pickle.dump(x, open("data.pkl", "ab"))
x = pickle.load(open("data.pkl", "rb"))
Please note the ab file mode, it is for appending to file, instead of overwriting it.