List index out of range using csv module - python

I'm using this information (downloaded the file to my computer) http://www.genome.gov/admin/gwascatalog.txt
and wrote this
import csv
path = '/Users/mtchavez/Documents/ALS/Diseasome/GWAS.txt'
read_file = open(path)
reader = csv.reader(read_file, delimiter = '\t')
fieldnames = reader.next()
rows = list(reader)
read_file.close()
write_file = open('datatest.csv', 'wb')
writer = csv.writer(write_file, delimiter = '\t')
writer.writerow(('disease', 'genes'))
disease_gene = dict()
for row in rows:
disease = row[7]
reported_genes = row[13]
but I get an error message:
File "data1.py", line 18, in <module>
disease = row[7]
IndexError: list index out of range

There is an empty line at the end of this csv file and it will create an empty row. Delete the last line and the code works fine.

Try filtering for empty lines:
for row in rows:
if not row: continue
disease = row[7]
reported_genes = row[13]
Or more specifically, filter for the desired length:
for row in rows:
if len(row) != EXPECTED_LENGTH_OF_RECORD: continue
disease = row[7]
reported_genes = row[13]

Related

CSV data matching: 'dict_keys' object is not subscriptable

I have put the together code to compare and match data between two CSVs and collate that data into a new CSV. The CSVs have similar data but the column names and their positions are different.
When I Run and Debug, it throws the following error at line 42:
Exception has occurred: TypeError
'dict_keys' object is not subscriptable
File "D:\Documents\Python\iris\accountmanagement2.py", line 42, in <module>
file1_columns[0]: file2_columns[1],
TypeError: 'dict_keys' object is not subscriptable
This is the code I have put together for this task. I tried using the type(data1[0].keys()) but that just says 'type' object is not subscriptable.
Really need some advice or ideas as I am completely stumped :)
import csv
# specify the file paths for the two CSV files
file1 = "D:\Documents\Python\iris\esr.csv"
file2 = "D:\Documents\Python\iris\iris.csv"
def define_columns(file_path):
with open(file_path, "r") as f:
reader = csv.reader(f)
# get the first row (column headers)
columns = next(reader)
return columns
# Define the keys of the two files
file1_keys = define_columns(file1)
file2_keys = define_columns(file2)
# read the data from the first CSV file
data1 = []
with open(file1, "r") as f:
reader = csv.DictReader(f)
for row in reader:
# Convert data to ASCII
for key in file1_keys:
row[key] = row[key].encode("ascii", "ignore").decode()
data1.append(row)
# read the data from the second CSV file
data2 = []
with open(file2, "r") as f:
reader = csv.DictReader(f)
for row in reader:
# Convert data to ASCII
for key in file2_keys:
row[key] = row[key].encode("ascii", "ignore").decode()
data2.append(row)
# Define the columns name of the two files
file1_columns = data1[0].keys()
file2_columns = data2[0].keys()
col_map = {
file1_columns[0]: file2_columns[1],
file1_columns[2]: file2_columns[5],
file1_columns[1]: file2_columns[6],
file1_columns[4]: file2_columns[7]
}
# match the data from the two files based on a specific column (e.g. "ID")
matching_data = []
non_matching_data1 = []
non_matching_data2 = []
for row1 in data1:
matched = False
for row2 in data2:
if row1[col_map[file1_columns[0]]] == row2[col_map[file2_columns[1]]]:
matching_data.append({**row1, **row2})
matched = True
break
if not matched:
non_matching_data1.append(row1)
for row2 in data2:
matched = False
for row1 in data1:
if row1[col_map[file1_columns[0]]] == row2[col_map[file2_columns[1]]]:
matched = True
break
if not matched:
non_matching_data2.append(row2)
# create a new CSV file with the matched data
with open("matched_data.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=list(col_map.keys())+list(col_map.values()))
writer.writeheader()
for row in matching_data:
writer.writerow(row)
# create a new CSV file with the non-matching data from file1
with open("non_matching_data1.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=list(data1[0].keys()))
writer.writeheader()
for row in non_matching_data1:
writer.writerow(row)
# create a new CSV file with the non-matching data from file2
with open("non_matching_data2.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=list(data2[0].keys()))
writer.writeheader()
for row in non_matching_data2:
writer.writerow(row)

Search value in column in CSV file using Python

I need to scan through line by line in time column time in CSV file, and see if there was activity in another column during 5 hours ago, then add column with value 1.
Here is my idea:
import csv
from collections import namedtuple
from contextlib import closing
light3 = pd.read_csv('G:/light3.csv')
light3.Time = pd.to_datetime(light3.Time)
m = light3.Time - DateOffset(hours = 5)
def search():
item = light3[(light3['Time']> m)| (light3['Time']== m)]
raw_data = 'G:/light3.csv'
failure = 'No matching item could be found with that item code. Please try again.'
check = False
with open('G:/project/test.pcap_z/light.csv','r') as csvinput:
with open('G:/light1.csv', 'w') as csvoutput:
writer = csv.writer(csvoutput, lineterminator='\n')
reader = csv.reader(csvinput)
read_data = csv.DictReader(csvinput, delimiter=';')
item_data = namedtuple(read_data['Time'], read_data.Time)
all = [ ]
row = next(reader)
row.append('f')
all.append(row)
while check == False:
for row in reader:
if row.Item == item:
row.append('f')
all.append(row)
row.append(1)
writer.writerows(all)

Append A Random Number in every line in CSV file python

I'm trying to append a random number in every line in csv file in row[2] i get the information from the original file then i write it to a new csv file + i append the second row with the random number
but i get the same random number in every line when i run the script (
i have the read file which contain exp:
car,golf
when i write this data to new csv file and append the second row i get the same number for every line
car,golf,1777
car,bmw,1777
car,m3,1777
how can i fix this so i can have random number in every line
data = []
with open("read.csv", "r") as the_file:
sid_row = 5000
for i in range(sid_row):
line = str(random.randint(1,5000))
sid = line
reader = csv.reader(the_file, delimiter=",")
for row in reader:
try:
new_row = [row[0], row[1],sid]
data.append(new_row)
except IndexError as error:
print(error)
pass
with open("Random.csv", "w+") as Ran_file:
writer = csv.writer(Ran_file, delimiter=",")
for new_row in data:
writer.writerow(new_row)
You need a new random number each time, for each row you're processing, something like:
data = []
with open("read.csv", "r") as the_file:
reader = csv.reader(the_file, delimiter=",")
for row in reader:
try:
line = str(random.randint(1,5000))
sid = line
new_row = [row[0], row[1],sid]
data.append(new_row)
except IndexError as error:
print(error)
pass
with open("Random.csv", "w+") as Ran_file:
writer = csv.writer(Ran_file, delimiter=",")
for new_row in data:
writer.writerow(new_row)

CSV file error while trying to access it

scenario:
I'm trying to extract tweets from twitter, which is working fine,
next I'm trying to merge 10 files into 1(say file = QW).
for line in file:
my_row = [line]
filename = line.rstrip()+"_tweets"+".csv"
if(os.path.exists(filename)):
f = open(filename, "rt")
reader = csv.reader(f, delimiter="\t")
for line in enumerate(reader):
my_row.append(line)
writer.writerow(my_row)
else:
print(""+filename+ " doesnt exist")
my csv file looks like this
and then I will process that one file
problem: I want to read specific column of that CSV(QW) file
I tried row[0]
for row in input_file:
name_list = [] ;score = 0;
name_list.append(row[0])
print(name_list)
for a in row:
if a.find(skill_input) > 0 :
score = score+1;
name_list.append(score)
print(name_list)
writer.writerow([name_list])
and that point I get an error
my csv file looks like this
name_list.append(row[0])
IndexError: list index out of range
Try this
for line in open("csvfile.csv"):
csv_row = line.split(your_delimiter) #returns a list ["1","50","60"]
if not csv_row[k]:
continue
name_list.append(csv_row[k]) # Or csv_row[k] for specific kth column

Loop creates unwanted duplicate

I am trying to pull in data from an input file and iterate over a symbol file to create output for an output file but my code is creating an unwanted duplicate in the output file. The input file is very big so I need to filter the input first before I reference it against the symbol (city/state) file to generate the output.
i_file = ('InputFile.csv')
o_file = ('OutputFile.csv')
symbol_file = ('SymbolFile.csv')
City = 'Tampa'
State = 'FL'
with open(symbol_file, 'r') as symfile:
with open(i_file, 'r') as infile:
with open(o_file, 'w') as outfile:
reader = csv.reader(infile)
symbol = csv.reader(symfile)
writer = csv.writer(outfile, delimiter = ',')
for row in reader:
if (row[2] == city and row[3] == state):
for line in symbol:
if (row[4] == line[0]):
nline = ([str(city)] + [str(line[3])])
writer.writerow(nline)
symfile.seek(0)
I only want one line for every line in the input file IF there is a matching line in the symbol file.
Try it like this then:
i_file = 'InputFile.csv'
o_file = 'OutputFile.csv'
symbol_file = 'SymbolFile.csv'
city = 'Tampa'
state = 'FL'
# load the symbols from the symbol file and store them in a dictionary
symbols = {}
with open(symbol_file, 'r') as symfile:
for line in csv.reader(symfile):
# key is line[0] which is the thing we match against
# value is line[3] which appears to be the only thing of interest later
symbols[line[0]] = line[3]
# now read the other files
with open(i_file, 'r') as infile, open(o_file, 'w') as outfile:
reader = csv.reader(infile)
writer = csv.writer(outfile, delimiter = ',')
for row in reader:
# check if `row[4] in symbols`
# which essentially checks whether row[4] is equal to a line[0] in the symbols file
if row[2] == city and row[3] == state and row[4] in symbols:
# get the symbol
symbol = symbols[row[4]]
# write output
nline = [city, symbol]
writer.writerow(nline)

Categories