Python two-dimensional list: exporting as delimited text file - python

I have a 2-D list, myList[r][c], where there are r rows of c columns each.
I am trying to export it into a text file, with each of the column elements delimited by pipes | , and an ampersand & at the end of each row.
myList = [[[] for a in range(c)] for b in range(r)]
{a bunch of code populating myList}
f = open("myfile.txt","w")
for x in range(0,r):
thisRow = ''
for y in range(0,c):
appendThis = myList[x][y]
thisRow += appendThis + "|"
f.write(thisRow)
f.write("&")
f.close
...but I get TypeError: can only concatenate list (not "str") to list on the line where I add the pipe character.

The csv module is made for this. Python 2.7 example:
import csv
r=7
c=5
myList = [[b*10+a for a in range(c)] for b in range(r)]
with open("myfile.txt","wb") as f:
w = csv.writer(f,delimiter='|',lineterminator='&\r\n')
w.writerows(myList)
Output:
0|1|2|3|4&
10|11|12|13|14&
20|21|22|23|24&
30|31|32|33|34&
40|41|42|43|44&
50|51|52|53|54&
60|61|62|63|64&

Python has great support for CSV. The following is adapted straight from an example:
import csv
with open('myfile.txt', 'wb') as csvfile:
mywriter = csv.writer(csvfile, delimiter='|',lineterminator='&',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
mywriter.writerow(myList)
note the use of the lineterminator if you want each line to have an & at the end as well as a new line you can use '&\r\n' as the terminator.
In case I have got your rows and columns mixedup, please note that you can do something like
for line in myList:
mywriter.writerow(line)

So here you have a working version.
The problem was in the first line, where you had:
myList = [[[] for a in range(c)] for b in range(r)]
but that just creates 2-D array of arrays, I have replaced it with an array that simply contains the indexes of its elements. (And also now it adds newlines to the line ends.)
myList = [[a for a in range(c)] for b in range(r)]
f = open("myfile.txt","w")
for x in range(0,r):
thisRow = ''
for y in range(0,c):
appendThis = myList[x][y]
thisRow += str(appendThis) + "|"
f.write(thisRow)
f.write("&\n")
f.close

Related

How to transform a csv file into a multi-dimensional list using Python?

I started out with a 4d list, something like
tokens = [[[["a"], ["b"], ["c"]], [["d"]]], [[["e"], ["f"], ["g"]],[["h"], ["i"], ["j"], ["k"], ["l"]]]]
So I converted this to a csv file using the code
import csv
def export_to_csv(tokens):
csv_list = [["A", "B", "C", word]]
for h_index, h in enumerate(tokens):
for i_index, i in enumerate(h):
for j_index, j in enumerate(i):
csv_list.append([h_index, i_index, j_index, j])
with open('TEST.csv', 'w') as f:
# using csv.writer method from CSV package
write = csv.writer(f)
write.writerows(csv_list)
But now I want to do the reverse process, want to convert a csv file obtained in this format, back to the list format mentioned above.
Assuming you wanted your csv file to look something like this (there were a couple typos in the posted code):
A,B,C,word
0,0,0,a
0,0,1,b
0,0,2,c
...
here's one solution:
import csv
def import_from_csv(filename):
retval = []
with open(filename) as fh:
reader = csv.reader(fh)
# discard header row
next(reader)
# process data rows
for (x,y,z,word) in reader:
x = int(x)
y = int(y)
z = int(z)
retval.extend([[[]]] * (x + 1 - len(retval)))
retval[x].extend([[]] * (y + 1 - len(retval[x])))
retval[x][y].extend([0] * (z + 1 - len(retval[x][y])))
retval[x][y][z] = [word]
return retval
def import_from_csv(file):
import ast
import csv
data = []
# Read the CSV file
with open(file) as fp:
reader = csv.reader(fp)
# Skip the first line, which contains the headers
next(reader)
for line in reader:
# Read the first 3 elements of the line
a, b, c = [int(i) for i in line[:3]]
# When we read it back, everything comes in as strings. Use
# `literal_eval` to convert it to a Python list
value = ast.literal_eval(line[3])
# Extend the list to accomodate the new element
data.append([[[]]]) if len(data) < a + 1 else None
data[a].append([[]]) if len(data[a]) < b + 1 else None
data[a][b].append([]) if len(data[a][b]) < c + 1 else None
data[a][b][c] = value
return data
# Test
assert import_from_csv("TEST.csv") == tokens
First, I'd make writing this construction in a CSV format independent from dimensions:
import csv
def deep_iter(seq):
for i, val in enumerate(seq):
if type(val) is list:
for others in deep_iter(val):
yield i, *others
else:
yield i, val
with open('TEST.csv', 'w') as f:
csv.writer(f).writerows(deep_iter(tokens))
Next, we can use the lexicographic order of the indices to recreate the structure. All we have to do is sequentially move deeper into the output list according to the indices of a word. We stop at the penultimate index to get the last list, because the last index is pointing only at the place of the word in this list and doesn't matter due to the natural ordering:
with open('TEST.csv', 'r') as f:
rows = [*csv.reader(f)]
res = []
for r in rows:
index = r[:-2] # skip the last index and word
e = res
while index:
i = int(index.pop(0)) # get next part of a current index
if i < len(e):
e = e[i]
else:
e.append([]) # add new record at this level
e = e[-1]
e.append(r[-1]) # append the word to the corresponding list

Why is my code writing something twice while reading a file

I'm working on a code that sends mails to the persons given in the text file.
This is the text file:
X,y#gmail.com
Z,v#gmail.com
This is my code:
with open("mail_list.txt","r",encoding ="utf-8") as file:
a = file.read()
b = a.split("\n")
d = []
for i in b:
c = i.split(",")
d.append(c)
for x in d:
for y in x:
print(x[0])
print(x[1])
The output should be:
X
y#gmail.com
Z
v#gmail.com
But instead it is:
X
y#gmail.com
X
y#gmail.com
Z
v#gmail.com
Z
v#gmail.com
Why is that?
How can I fix it?
You're iterating over the columns in every row, but not using the column value:
for x in d:
for y in x:
print(y)
Please have a look on this solution. I believe this is more elegant and efficient than the current one. Don't just rely on line break splitting. Instead get all the data in form of lines already split by \n(line break) and then use the content as per your requirements.
lines = []
with open('mail_list.txt') as f:
lines = f.readlines()
for line in lines:
info = line.split(',')
print(info[0])
print(info[1])
You need to only iterate on list d.
with open("mail_list.txt", "r", encoding ="utf-8") as file:
a = file.read()
b = a.split("\n")
d = []
for i in b:
c = i.split(",")
d.append(c)
for x in d:
print(x[0])
print(x[1])
To make it simpler, you can read your file line by line and process it at the same time. The strip() method removes any leading (spaces at the beginning) and trailing (spaces or EOL at the end) characters.
with open("mail_list.txt", "r", encoding ="utf-8") as file:
for line in file:
line_s = line.split(",")
print(line_s[0])
print(line_s[1].strip())

Error in concatenation and one more error

I'm trying to import a csv file and then output the continuous series from the file into a new csv file
the contents of the file are like
1
5
6
7
8
and so on
here for example the output would be ['1,1','5,5','6,8']
The error i'm getting is
>>> gaps = [[s, e] for s, e in zip(nums, nums[1:]) if s+1 < e]
TypeError: can only concatenate str (not "int") to str
Also for some reason after I do str1 = str1.replace(i, '')
it turns str1 into
['2855']'2856']'3250']'3251']'3252']'3253']'3254']'3255']'3256']'3257']'3258']'3259']'3260']'3261']'3262']'3263']'3264']'3265']'3278']'3279']'3280']'3281']'3299']'3312']'3314']'3331']'3332']'3333']'3334']'3405']'3406']'3407']'3408']'3500']'4849']'4850']'5567']'5568']'5569']'6000']
2856]3250]3251]3252]3253]3254]3255]3256]3257]3258]3259]3260]3261]3262]3263]3264]3265]3278]3279]3280]3281]3299]3312]3314]3331]3332]3333]3334]3405]3406]3407]3408]3500]4849]4850]5567]5568]5569]6000]
intead of giving just
2856]3250]3251]3252]3253]3254]3255]3256]3257]3258]3259]3260]3261]3262]3263]3264]3265]3278]3279]3280]3281]3299]3312]3314]3331]3332]3333]3334]3405]3406]3407]3408]3500]4849]4850]5567]5568]5569]6000]
The code:
with open('Book1.csv', newline='') as f:
reader = csv.reader(f)
data = list(reader)
str1 = ''.join(str(e) for e in data)
bad_chars = ["[","'"]
for i in bad_chars :
str1 = str1.replace(i, '')
str1.split("]",-1)
x = list((str1.split("]")))
def ranges(nums):
nums = sorted(set(nums))
gaps = [[s, e] for s, e in zip(nums, nums[1:]) if s+1 < e]
edges = iter(nums[:1] + sum(gaps, []) + nums[-1:])
return list(zip(edges, edges))
print(ranges(x))
Try this:
def ranges(nums):
nums = sorted(set(nums))
gaps = [[s, e] for s, e in zip(nums, nums[1:]) if s+1 < e]
edges = iter(nums[:1] + sum(gaps, []) + nums[-1:])
return list(zip(edges, edges))
data = []
with open('Book1.csv', newline='') as f:
reader = csv.reader(f)
for i in reader:
data.append(int(i[0]))
print(ranges(data))
The problem with your code was you were making the code more complex and the task redundant by joining lists of strings to a big string, and then removing the bad chars from it. Instead you could just add the integer parts of separate lists beforehand and saved the time, like I did.
Also, the code in ranges function was giving error because you were trying to add string s to 1, which is an integer. What you didn't realise then that the x list still contained string types.

Read file line by line and create lists

I have a file which contains lines of the form
2.484 5.234
6.123 1.461
1.400 9.381
I would like to read these into python lists x containing the first value of each line and y containing the second value of each line.
How can I achieve this? Here is my attempt:
x = []
y = []
with open(filename) as file_:
for line in file_:
a, b = line
x.append(a)
y.append(b)
a, b = line
cannot work because you're trying to unpack a string into 2 elements (unless the string itself is 2 elements long, which isn't the case)
you want to convert to float & unpack the splitted line like this:
a, b = map(float,line.split())
in that case split() without arguments takes care of multiple spaces, linefeeds, tabs... like awk would do so it's pretty easy.
You can try this:
data =[map(float, i.strip('\n').split()) for i in open('filename.txt')]
You can do that
x = []
y = []
with open("file.txt", "r") as ins:
for line in ins:
elt = line.split()
x.append(elt[0])
y.append(elt[1])
print x
print y

string manipulation and adding values based on row they are

I have a file text delimited file which I am trying to make binary combination per each line and giving the number of line to each pairs.
Here is an example (you can download it here too if you want https://gist.github.com/anonymous/4107418c63b88c6da44281a8ae7a321f)
"A,B "
"AFD,DNGS,SGDH "
"NHYG,QHD,lkd,uyete"
"AFD,TTT"
I want to have it like this
A_1 B_1
AFD_2 DNGS_2
AFD_2 SGDH_2
DNGS_2 SGDH_2
NHYG_3 QHD_3
NHYG_3 lkd_3
NHYG_3 uyete_3
QHD_3 lkd_3
QHD_3 uyete_3
lkd_3 uyete_3
AFD_4 TTT_4
It means, A_1 and B_1 are coming from the first row
AFD_2 & DNGS_2 are coming from the second row , etc etc
I have tried to do it but I cannot figure it out
#!/usr/bin/python
import itertools
# make my output
out = {}
# give a name to my data
file_name = 'data.txt'
# read all the lines
for n, line in enumerate(open(file_name).readlines()):
# split each line by comma
item1 = line.split('\t')
# split each stirg from another one by a comma
item2 = item1.split(',')
# iterate over all combinations of 2 strings
for i in itertools.combinations(item2,2):
# save the data into out
out.write('\t'.join(i))
Output Answer 1
"A_1, B "_1
"AFD_2, DNGS_2
"AFD_2, SGDH "_2
DNGS_2, SGDH "_2
"NHYG_3, QHD_3
"NHYG_3, lkd_3
"NHYG_3, uyete"_3
QHD_3, lkd_3
QHD_3, uyete"_3
lkd_3, uyete"_3
"AFD_4, TTT"_4
answer 2
"A_1 B "_1
"AFD_2 DNGS_2
"AFD_2 SGDH "_2
DNGS_2 SGDH "_2
"NHYG_3 QHD_3
"NHYG_3 lkd_3
"NHYG_3 uyete"_3
QHD_3 lkd_3
QHD_3 uyete"_3
lkd_3 uyete"_3
"AFD_4 TTT"_4
Try this
#!/usr/bin/python
from itertools import combinations
with open('data1.txt') as f:
result = []
for n, line in enumerate(f, start=1):
items = line.strip().split(',')
x = [['%s_%d' % (x, n) for x in item] for item in combinations(items, 2)]
result.append(x)
for res in result:
for elem in res:
print(',\t'.join(elem))
You need a list of list of lists to represent each pair. You can build them using a list comprehension in a loop.
I wasn't sure what you wanted as your actual output format, but this prints your expected output.
If there are quotes in the input file, the simple fix is
items = line.replace("\"", "").strip().split(',')
For the above code. This would break if there were other double quotes in the data. So if you know there aren't its ok.
Otherwise, create a small function to strip the quotes. This example also writes to a file.
#!/usr/bin/python
from itertools import combinations
def remquotes(s):
beg, end = 0, len(s)
if s[0] == '"': beg = 1
if s[-1] == '"': end = -1
return s[beg:end]
with open('data1.txt') as f:
result = []
for n, line in enumerate(f, start=1):
items = remquotes(line.strip()).strip().split(',')
x = [['%s_%d' % (x, n) for x in item] for item in combinations(items, 2)]
result.append(x)
with open('out.txt', 'w') as fout:
for res in result:
for elem in res:
linestr = ',\t'.join(elem)
print(linestr)
fout.write(linestr + '\n')
Similar to the other answer provided adding that based on the comments it looks like you actually wish to write to a tab-delimited text file instead of a dictionary.
#!/usr/bin/python
import itertools
file_name = 'data.txt'
out_file = 'out.txt'
with open(file_name) as infile, open(out_file, "w") as out:
for n,line in enumerate(infile):
row = [i + "_" + str(n+1) for i in line.strip().split(",")]
for i in itertools.combinations(row,2):
out.write('\t'.join(i) + '\n')
The following seems to work with a minimal amount of code:
import itertools
input_filename = 'data.txt'
output_filename = 'split_data.txt'
with open(input_filename, 'rt') as inp, open(output_filename, 'wt') as outp:
for n, line in enumerate(inp, 1):
items = ('{}_{}'.format(x.strip(), n)
for x in line.replace('"', '').split(','))
for combo in itertools.combinations(items, 2):
outp.write('\t'.join(combo) + '\n')

Categories