I'm trying to append a random number in every line in csv file in row[2] i get the information from the original file then i write it to a new csv file + i append the second row with the random number
but i get the same random number in every line when i run the script (
i have the read file which contain exp:
car,golf
when i write this data to new csv file and append the second row i get the same number for every line
car,golf,1777
car,bmw,1777
car,m3,1777
how can i fix this so i can have random number in every line
data = []
with open("read.csv", "r") as the_file:
sid_row = 5000
for i in range(sid_row):
line = str(random.randint(1,5000))
sid = line
reader = csv.reader(the_file, delimiter=",")
for row in reader:
try:
new_row = [row[0], row[1],sid]
data.append(new_row)
except IndexError as error:
print(error)
pass
with open("Random.csv", "w+") as Ran_file:
writer = csv.writer(Ran_file, delimiter=",")
for new_row in data:
writer.writerow(new_row)
You need a new random number each time, for each row you're processing, something like:
data = []
with open("read.csv", "r") as the_file:
reader = csv.reader(the_file, delimiter=",")
for row in reader:
try:
line = str(random.randint(1,5000))
sid = line
new_row = [row[0], row[1],sid]
data.append(new_row)
except IndexError as error:
print(error)
pass
with open("Random.csv", "w+") as Ran_file:
writer = csv.writer(Ran_file, delimiter=",")
for new_row in data:
writer.writerow(new_row)
Related
I have put the together code to compare and match data between two CSVs and collate that data into a new CSV. The CSVs have similar data but the column names and their positions are different.
When I Run and Debug, it throws the following error at line 42:
Exception has occurred: TypeError
'dict_keys' object is not subscriptable
File "D:\Documents\Python\iris\accountmanagement2.py", line 42, in <module>
file1_columns[0]: file2_columns[1],
TypeError: 'dict_keys' object is not subscriptable
This is the code I have put together for this task. I tried using the type(data1[0].keys()) but that just says 'type' object is not subscriptable.
Really need some advice or ideas as I am completely stumped :)
import csv
# specify the file paths for the two CSV files
file1 = "D:\Documents\Python\iris\esr.csv"
file2 = "D:\Documents\Python\iris\iris.csv"
def define_columns(file_path):
with open(file_path, "r") as f:
reader = csv.reader(f)
# get the first row (column headers)
columns = next(reader)
return columns
# Define the keys of the two files
file1_keys = define_columns(file1)
file2_keys = define_columns(file2)
# read the data from the first CSV file
data1 = []
with open(file1, "r") as f:
reader = csv.DictReader(f)
for row in reader:
# Convert data to ASCII
for key in file1_keys:
row[key] = row[key].encode("ascii", "ignore").decode()
data1.append(row)
# read the data from the second CSV file
data2 = []
with open(file2, "r") as f:
reader = csv.DictReader(f)
for row in reader:
# Convert data to ASCII
for key in file2_keys:
row[key] = row[key].encode("ascii", "ignore").decode()
data2.append(row)
# Define the columns name of the two files
file1_columns = data1[0].keys()
file2_columns = data2[0].keys()
col_map = {
file1_columns[0]: file2_columns[1],
file1_columns[2]: file2_columns[5],
file1_columns[1]: file2_columns[6],
file1_columns[4]: file2_columns[7]
}
# match the data from the two files based on a specific column (e.g. "ID")
matching_data = []
non_matching_data1 = []
non_matching_data2 = []
for row1 in data1:
matched = False
for row2 in data2:
if row1[col_map[file1_columns[0]]] == row2[col_map[file2_columns[1]]]:
matching_data.append({**row1, **row2})
matched = True
break
if not matched:
non_matching_data1.append(row1)
for row2 in data2:
matched = False
for row1 in data1:
if row1[col_map[file1_columns[0]]] == row2[col_map[file2_columns[1]]]:
matched = True
break
if not matched:
non_matching_data2.append(row2)
# create a new CSV file with the matched data
with open("matched_data.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=list(col_map.keys())+list(col_map.values()))
writer.writeheader()
for row in matching_data:
writer.writerow(row)
# create a new CSV file with the non-matching data from file1
with open("non_matching_data1.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=list(data1[0].keys()))
writer.writeheader()
for row in non_matching_data1:
writer.writerow(row)
# create a new CSV file with the non-matching data from file2
with open("non_matching_data2.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=list(data2[0].keys()))
writer.writeheader()
for row in non_matching_data2:
writer.writerow(row)
scenario:
I'm trying to extract tweets from twitter, which is working fine,
next I'm trying to merge 10 files into 1(say file = QW).
for line in file:
my_row = [line]
filename = line.rstrip()+"_tweets"+".csv"
if(os.path.exists(filename)):
f = open(filename, "rt")
reader = csv.reader(f, delimiter="\t")
for line in enumerate(reader):
my_row.append(line)
writer.writerow(my_row)
else:
print(""+filename+ " doesnt exist")
my csv file looks like this
and then I will process that one file
problem: I want to read specific column of that CSV(QW) file
I tried row[0]
for row in input_file:
name_list = [] ;score = 0;
name_list.append(row[0])
print(name_list)
for a in row:
if a.find(skill_input) > 0 :
score = score+1;
name_list.append(score)
print(name_list)
writer.writerow([name_list])
and that point I get an error
my csv file looks like this
name_list.append(row[0])
IndexError: list index out of range
Try this
for line in open("csvfile.csv"):
csv_row = line.split(your_delimiter) #returns a list ["1","50","60"]
if not csv_row[k]:
continue
name_list.append(csv_row[k]) # Or csv_row[k] for specific kth column
I want to add a new column to an existing file. But it gets a little complicated with the additional loops i add.
input file:
testfile.csv
col1,col2,col3
1,2,3
3,4,5
4,6,7
output i want:
USA_testfile.csv
col1,col2,col3,country
1,2,3,USA
3,4,5,USA
4,6,7,USA
UK_testfile.csv
col1,col2,col3,country
1,2,3,UK
3,4,5,UK
4,6,7,UK
This is what i have tried:
import csv
import sys
country_list= ['USA', 'UK']
def add_col(csv_file):
for country in country_list:
with open(csv_file, 'rb') as fin:
with open(country+"_timeline_outfile_"+csv_file, 'wb') as fout:
writer = csv.writer(fout, lineterminator='\n')
reader = csv.reader(fin)
all_rows =[]
row = next(reader)
row.append('country')
all_rows.append(row)
print all_rows
for row in reader:
row.append(country)
all_rows.append(row)
writer.writerows(all_rows)
add_col(sys.argv[1])
And this is the error i got:
File "write_to_csv.py", line 33, in add_col
writer.writerows(all_rows)
ValueError: I/O operation on closed file
I was trying to follow this post here
import csv
countries = ['USA', 'UK']
data = list(csv.reader(open('testfile.csv', 'rb')))
for country in countries:
with open('{0}_testfile.csv'.format(country), 'wb') as f:
writer = csv.writer(f)
for i, row in enumerate(data):
if i == 0:
row = row + ['country']
else:
row = row + [country]
writer.writerow(row)
I couldn't reproduce your error, but i cleaned your code a bit.
There is no reason to reopen the input file for every language.
def add_col(csv_file):
with open(csv_file, 'rb') as fin:
reader = csv.reader(fin)
for country in country_list:
fin.seek(0) # jump to begin of file again
with open(country+"_timeline_outfile_"+csv_file, 'wb') as fout:
writer = csv.writer(fout, lineterminator='\n')
header = next(reader)
header.append('country')
writer.writerow(header)
for row in reader:
row.append(country)
writer.writerow(row)
I'm using this information (downloaded the file to my computer) http://www.genome.gov/admin/gwascatalog.txt
and wrote this
import csv
path = '/Users/mtchavez/Documents/ALS/Diseasome/GWAS.txt'
read_file = open(path)
reader = csv.reader(read_file, delimiter = '\t')
fieldnames = reader.next()
rows = list(reader)
read_file.close()
write_file = open('datatest.csv', 'wb')
writer = csv.writer(write_file, delimiter = '\t')
writer.writerow(('disease', 'genes'))
disease_gene = dict()
for row in rows:
disease = row[7]
reported_genes = row[13]
but I get an error message:
File "data1.py", line 18, in <module>
disease = row[7]
IndexError: list index out of range
There is an empty line at the end of this csv file and it will create an empty row. Delete the last line and the code works fine.
Try filtering for empty lines:
for row in rows:
if not row: continue
disease = row[7]
reported_genes = row[13]
Or more specifically, filter for the desired length:
for row in rows:
if len(row) != EXPECTED_LENGTH_OF_RECORD: continue
disease = row[7]
reported_genes = row[13]
I'm trying to add a new column by copying col#3 and then append #hotmail to the new column
Here is the script, only problem is that it will not finish processing the input file, it only show 61409 rows in the output file, whereas in the input file there are 61438 rows.
Also, there is an error message (the input file does not have empty line at the end):
email = row[3]
IndexError: list index out of range
inFile = 'c:\\Python27\\scripts\\intake.csv'
outFile = 'c:\\Python27\\scripts\\final.csv'
with open(inFile, 'rb') as fp_in1, open(outFile, 'wb') as fp_out1:
writer = csv.writer(fp_out1, delimiter=",")
reader = csv.reader(fp_in1, delimiter=",")
for col in reader:
del col[6:]
writer.writerow(col)
headers = next(reader)
writer.writerow(headers + ['email2'])
for row in reader:
if len(row) > 3:
email = email.split('#', 1)[0] + '#hotmail.com'
writer.writerow(row + [email])
It looks like you edited the code you received in your earlier answer.
Change
email = email.split('#', 1)[0] + '#hotmail.com'
to
email = row[3].split('#', 1)[0] + '#hotmail.com'