Index Error: Index out of bounds when using numpy in python - python

I have a code that works fine when I have small CSV's of data but errors out when I try to run large CSV's through it. In essence this code is supposed to place 3 CSV's worth of data into 3 separate dictionaries, combine those dictionaries into a master dictionary, and then preform arithmetic operations on dictionary. The input CSV's look something like this:
time A B C D
0 3 4 6 4
.001 4 6 7 8
.002 4 6 7 3
The code that I am using is the code displayed below. The error occurs within the lines 47 and 65 where I am try to preform arithmetic with the dictionary. Any explanation as to why this is going on is greatly appreciated.
import numpy
Xcoord = {}
time = []
with open ('Nodal_QuardnetsX2.csv', 'r') as f:
f.readline() # Skips first line
for line in f:
values = [s.strip()for s in line.split(',')]
Xcoord[values[0]] = map(float, values[1:])
time.append(values[0])
Ycoord = {}
with open ('Nodal_QuardnetsY2.csv', 'r') as f:
f.readline() # Skips first line
for line in f:
values = [s.strip()for s in line.split(',')]
Ycoord[values[0]] = map(float, values[1:])
Zcoord = {}
with open ('Nodal_QuardnetsZ2.csv', 'r') as f:
f.readline() # Skips first line
for line in f:
values = [s.strip()for s in line.split(',')]
Zcoord[values[0]] = map(float, values[1:])
# Create a master dictionary of the form {'key':[[x, y, z], [x, y, z]}
CoordCombo = {}
for key in Xcoord.keys():
CoordnateList = zip(Xcoord[key], Ycoord[key], Zcoord[key])
CoordCombo[key] = CoordnateList
counter = 0
keycount1 = 0
keycount2 = 0.001
difference = []
NodalDisplacements = {}
#Find the difference between the x, y, and z quardnets relative to that point in time
while keycount2 <= float(values[0]):
Sub = numpy.subtract(CoordCombo[str(keycount2)][counter], CoordCombo[str(keycount1)][counter])
counter = counter + 1
difference.append(Sub)
NodalDisplacements[keycount1] = Sub
keycount1 = keycount1 + 0.001
keycount2 = keycount2 + 0.001
counter = 0
keycount3 = 0
keycount4 = 0.001
Sum = []
breakpoint = float(values[0])-0.001
while keycount4 <= breakpoint:
Add = numpy.sum(NodalDisplacements[keycount4][counter], NodalDisplacements[keycount3][counter])
Sum.append(Add)
keycount3 = keycount3 + 0.001
keycount4 = keycount4 + 0.001
counter = counter + 1
if counter == 2:
counter = 0
print Sum

probably a line of your csv file does not contain 5 elements or the line is empty.
In your logic I would suggest to use
for line in f:
line = line.strip()
if not line: continue
if len(values) != N_COLS: continue # or error...
# other ...

Related

Python - Read file I/O - Find average of each day temperature records

I have to write a Python function which records temperatures for different days. The temperature for the same day is stored on the same line.The first day is considered to be day 1, and each subsequent line of the file records the following days in sequential order (e.g. the 3rd line of data is collected from the 3rd day). If there was no data collected for a given day then the entire line will be blank. For example, The text file contains the following inputs for 6 days:
23 24.5
25
22.25 22.5
23.4
25.2 20.0
This file contains data collected for 6 days.
I am to define a function temp_record which takes a filename as a parameter. It reads the data from the parameter file and analyses the temperatures. The function should return a list of average temperatures per day. For example, the function returns the following list for the above text file:
[23.75, 25.0, 22.375, 0, 23.4, 22.6]
I wrote a code but it doesn't seem to work for all case types and I'm not sure what went wrong. Can someone help?
Here is the code I wrote:
def temp_record(filename):
input_file = open(filename,'r')
contents = input_file.read().split("\n")
sum_val = 0
lis = []
for string in contents:
split_str = string.split(" ")
for i in range(len(split_str)):
if split_str[i] == '':
split_str[i] = 0
else:
split_str[i] = float(split_str[i])
ans = (sum(split_str)/len(split_str))
if ans == 0.0:
ans = 0
lis.append(ans)
return lis
When you do contents = input_file.read().split("\n") you get an additional element in contents list that gets computed to 0.
You can fix this like this:
def temp_record(filename):
input_file = open(filename, 'r')
# read all lines
contents = input_file.readlines()
sum_val = 0
lis = []
for string in contents:
# lines end in \n use rstrip to remove it
split_str = string.rstrip().split(" ")
for i in range(len(split_str)):
if split_str[i] == '':
split_str[i] = 0
else:
split_str[i] = float(split_str[i])
ans = (sum(split_str) / len(split_str))
if ans == 0.0:
ans = 0
lis.append(ans)
return lis
but this can be much shorter:
def temp_record(filename):
result = []
with open(filename, 'r') as fp:
for line in fp:
temps = line.split()
avg_temp = sum(map(float, temps)) / len(temps) if temps else 0
result.append(avg_temp if avg_temp > 0 else 0)
return result
or even shorter if you want to play golfcode:
def temp_record2(filename):
with open(filename, 'r') as fp:
return list(map(lambda x: x if x > 0 else int(x), [sum(map(float, line.split())) / len(line.split()) if line.split() else 0 for line in fp]))
Perhaps the hidden test that fails is with an input like:
-1 1
0
30
The first two days do have recorded temperatures, but their average is 0. Following the format of using floats for all other averages, the average should be 0.0, not 0 (as that would imply no temperature was collected for the day, when in fact one was).
If this is the issue, this could be fixed:
def temp_record(filename):
input_file = open(filename,'r')
contents = input_file.read().split("\n")
sum_val = 0
lis = []
for string in contents:
split_str = string.split(" ")
for i in range(len(split_str)):
if split_str[i] == '':
split_str[i] = 0
else:
split_str[i] = float(split_str[i])
ans = (sum(split_str)/len(split_str))
if string == '':
ans = 0
lis.append(ans)
return lis

Python: How to read space delimited data with different length in text file and parse it

I have space delimited data in a text file look like the following:
0 1 2 3
1 2 3
3 4 5 6
1 3 5
1
2 3 5
3 5
each line has different length.
I need to read it starting from line 2 ('1 2 3')
and parse it and get the following information:
Number of unique data = (1,2,3,4,5,6)=6
Count of each data:
count data (1)=3
count data (2)=2
count data (3)=5
count data (4)=1
count data (5)=4
count data (6)=1
Number of lines=6
Sort the data in descending order:
data (3)
data (5)
data (1)
data (2)
data (4)
data (6)
I did this:
file=open('data.txt')
csvreader=csv.reader(file)
header=[]
header=next(csvreader)
print(header)
rows=[]
for row in csvreader:
rows.append(row)
print(rows)
After this step, what should I do to get the expected results?
I would do something like this:
from collections import Counter
with open('data.txt', 'r') as file:
lines = file.readlines()
lines = lines[1:] # skip first line
data = []
for line in lines:
data += line.strip().split(" ")
counter = Counter(data)
print(f'unique data: {list(counter.keys())}')
print(f'count data: {list(sorted(counter.most_common(), key=lambda x: x[0]))}')
print(f'number of lines: {len(lines)}')
print(f'sort data: {[x[0] for x in counter.most_common()]}')
A simple brute force approach:
nums = []
counts = {}
for row in open('data.txt'):
if row[0] == '0':
continue
nums.extend( [int(k) for k in row.rstrip().split()] )
print(nums)
for n in nums:
if n not in counts:
counts[n] = 1
else:
counts[n] += 1
print(counts)
ordering = list(sorted(counts.items(), key=lambda k: -k[1]))
print(ordering)
Here is another approach
def getData(infile):
""" Read file lines and return lines 1 thru end"""
lnes = []
with open(infile, 'r') as data:
lnes = data.readlines()
return lnes[1:]
def parseData(ld):
""" Parse data and print desired results """
unique_symbols = set()
all_symbols = dict()
for l in ld:
symbols = l.strip().split()
for s in symbols:
unique_symbols.add(s)
cnt = all_symbols.pop(s, 0)
cnt += 1
all_symbols[s] = cnt
print(f'Number of Unique Symbols = {len(unique_symbols)}')
print(f'Number of Lines Processed = {len(ld)}')
for symb in unique_symbols:
print(f'Number of {symb} = {all_symbols[symb]}')
print(f"Descending Sort of Symbols = {', '.join(sorted(list(unique_symbols), reverse=True))}")
On executing:
infile = r'spaced_text.txt'
parseData(getData(infile))
Produces:
Number of Unique Symbols = 6
Number of Lines Processed = 6
Number of 2 = 2
Number of 5 = 4
Number of 3 = 5
Number of 1 = 3
Number of 6 = 1
Number of 4 = 1
Descending Sort of Symbols = 6, 5, 4, 3, 2, 1

Why doesn't my script display the newly created file?

I am writing a Python script that opens a file and for each test case, it should write the total number of items gathered in that case and display the total amount $ that it costs.
When running my code:
f = open("shopping.txt", "r")
outFile = open("results.txt", "w")
t = int(f.readline().strip())
for z in range(t):
# Assuming prices are unique
myList = {}
items = int(f.readline().strip())
ind = 1
# Read each line for each item
for i in range(items):
p, w = map(int, f.readline().strip().split())
myList[p] = [w, ind]
ind+=1
weights = []
F = int(f.readline().strip())
for i in range(F):
weights.append(int(f.readline().strip()))
RES = []
values = []
for weight in weights:
sortedPrice = sorted(myList.keys())[::-1]
m = 0
p = 0
tmp = []
# Grabbing all possible results using greedy method
# Max price stored into values array and item # in RES array.
for i in range(len(myList)):
R = []
s = 0
p = 0
if myList[sortedPrice[i]][0]<=weight:
s=myList[sortedPrice[i]][0]
p=sortedPrice[i]
R+=myList[sortedPrice[i]][1],
for j in range(i+1, len(myList)):
if myList[sortedPrice[j]][0]+s<=weight:
s+=myList[sortedPrice[j]][0]
p+=sortedPrice[j]
R+=myList[sortedPrice[j]][1],
if m<p:
m = p
tmp = R
tmp.sort()
RES.append(tmp)
values.append(m)
outFile.write("Test Case %d\n" %(z+1))
outFile.write("Total Price: %d\n" %(sum(values)))
outFile.write("Member Items:\n")
for i in range(len(RES)):
outFile.write("%d: %s" %(i+1, " ".join(map(str, RES[i]))))
f.close()
outFile.close()
I get the result:
Test Case 1
Total Price: 0
Member Items:
Test Case 2
Total Price: 0
Member Items:
When I expected something like this:
Test Case1
Total Price 72
Member Items
1: 1
Test Case2
Total Price 568
Member Items
1: 3 4
2: 3 6
3: 3 6
4: 3 4 6
I am relatively new to programming in general so if there is any insight anyone could give for my code, I would appreciate it. Adding to this, my guess is that the sum() and/or the map commands may be breaking and not working as intended, as I'm writing to the file to get the total value and items of the case.

Iterating on a file and comparing values using python

I have a section of code that opens files containing information with wavenumber and intensity like this:
500.21506 -0.00134
500.45613 0.00231
500.69720 -0.00187
500.93826 0.00129
501.17933 -0.00049
501.42040 0.00028
501.66147 0.00114
501.90253 -0.00036
502.14360 0.00247
My code attempts to parse the information between two given wavelengths: lowwav and highwav. I would like to print only the intensities of the wavenumbers that fall between lowwav and highwav. My entire code looks like:
import datetime
import glob
path = '/Users/140803/*'
files = glob.glob(path)
for line in open('sfit4.ctl', 'r'):
x = line.strip()
if x.startswith('band.1.nu_start'):
a,b = x.split('=')
b = float(b)
b = "{0:.3f}".format(b)
lowwav = b
if x.startswith('band.1.nu_stop'):
a,b = x.split('=')
b = float(b)
b = "{0:.3f}".format(b)
highwav = b
with open('\\_spec_final.t15', 'w') as f:
with open('info.txt', 'rt') as infofile:
for count, line in enumerate(infofile):
lat = float(line[88:94])
lon = float(line[119:127])
year = int(line[190:194])
month = int(line[195:197])
day = int(line[198:200])
hour = int(line[201:203])
minute = int(line[204:206])
second = int(line[207:209])
dur = float(line[302:315])
numpoints = float(line[655:660])
fov = line[481:497] # field of view?
sza = float(line[418:426])
snr = 0.0000
roe = 6396.2
res = 0.5000
lowwav = float(lowwav)
highwav = float(highwav)
spacebw = (highwav - lowwav)/ numpoints
d = datetime.datetime(year, month, day, hour, minute, second)
f.write('{:>12.5f}{:>12.5f}{:>12.5f}{:>12.5f}{:>8.1f}'.format(sza,roe,lat,lon,snr)) # line 1
f.write("\n")
f.write('{:>10d}{:>5d}{:>5d}{:>5d}{:>5d}{:>5d}'.format(year,month,day,hour,minute,second)) # line 2
f.write("\n")
f.write( ('{:%Y/%m/%d %H:%M:%S}'.format(d)) + "UT Solar Azimuth:" + ('{:>6.3f}'.format(sza)) + " Resolution:" + ('{:>6.4f}'.format(res)) + " Duration:" + ('{:>6.2f}'.format(dur))) # line 3
f.write("\n")
f.write('{:>21.13f}{:>26.13f}{:>24.17e}{:>12f}'.format(lowwav,highwav,spacebw,numpoints)) # line 4
f.write("\n")
with open(files[count], 'r') as g:
for line in g:
wave_no, tensity = [float(item) for item in line.split()]
if lowwav <= wave_no <= highwav :
f.write(str(tensity) + '\n')
g.close()
f.close()
infofile.close()
Right now, everything works fine except the last part where I compare wavelengths and print out the intensities corresponding to wavelengths between lowwav and highwav. No intensities are printing into the output file.
The problem is that when you iterate over the file g you are effectively moving its "file pointer". So the second loop finds the file at the beginning and doesn't produce any value.
Secondly, you are producing all these nums lists, but every iteration of the lop shadows the previous value, making it unreachable.
Either you want to collected all the values and then iterate on those:
with open(files[count], 'r') as g:
all_nums = []
for line in g:
all_nums.append([float(item) for item in line.split()])
for nums in all_nums:
if (lowwav - nums[0]) < 0 or (highwav - nums[0]) > 0 :
f.write(str(nums[1]))
f.write('\n')
else: break
Or just do everything inside the first loop (this should be more efficient):
with open(files[count], 'r') as g:
for line in g:
nums = [float(item) for item in line.split()]
if (lowwav - nums[0]) < 0 or (highwav - nums[0]) > 0 :
f.write(str(nums[1]))
f.write('\n')
else: break
Also note that the break statement will stop the processing of the values when the condition is false for the first time, you probably want to remove it.
This said, note that your code prints all values where nums[0] that either are bigger than lowwav, or smaller than highwav, which means that if lowwav < highwav every number value will be printed. You probably want to use and in place of or if you want to check whether they are between lowwav and highwav. Moreover in python you could just write lowwav < nums[0] < highwav for this.
I would personally use the following:
with open(files[count], 'r') as g:
for line in g:
wave_no, intensity = [float(item) for item in line.split()]
if lowwav < wave_no < highwav:
f.write(str(intensity)+'\n')
Split each line by white space, unpack the split list to two names wavelength and intensity.
[line.split() for line in r] makes
500.21506 -0.00134
500.45613 0.00231
to
[['500.21506', '-0.00134'], ['500.45613', '0.00231']]
This listcomp [(wavelength, intensity) for wavelength,intensity in lol if low <= float(wavelength) <= high] returns
[('500.21506', '-0.00134'), ('500.45613', '0.00231')]
If you join them back [' '.join((w, i)) for w,i in [('500.21506', '-0.00134'), ('500.45613', '0.00231')] you get ['500.21506 -0.00134', '500.45613 0.00231']
Use listcomp to filter out wavelength. And join wavelength and intensity back to string and write to file.
with open('data.txt', 'r') as r, open('\\_spec_final.t15', 'w') as w:
lol = (line.split() for line in r)
intensities = (' '.join((wavelength, intensity)) for wavelength,intensity in lol if low <= float(wavelength) <= high)
w.writelines(intensities)
If you want to output to terminal do print(list(intensities)) instead of w.writelines(intensities)
Contents of data.txt;
500.21506 -0.00134
500.45613 0.00231
500.69720 -0.00187
500.93826 0.00129
501.17933 -0.00049
501.42040 0.00028
501.66147 0.00114
501.90253 -0.00036
502.14360 0.00247
Output when low is 500 and high is 50`;
['500.21506 -0.00134', '500.45613 0.00231']

splitting dictionary and writing it to different csv file in python

I want to split the python dictionary and write it to different files based on NO_OF_LINES_PER_FILE and size of dictionary
Input
NO_OF_LINES_PER_FILE
so if NO_OF_LINES_PER_FILE = 2 and size of dictionary is 10 the i want the dictionary to be splitted into 5 files(each file will have 2 rows)
Script
import csv
NO_OF_LINES_PER_FILE = 2
s = {"2222":["1","2","3"],"3456":["2","3","4"],"5634":["4","5"],"23543":["456","3"],"29587":["4","5"],"244":["23","34"],"455":["3","4"],"244221":["5"],"23232345":["2323","43"]}
def again(c,h,NO_OF_LINES_PER_FILE1):
f3 = open('num_'+str(h)+'.csv', 'at')
if c == 1:
ceh = 2
else:
ceh = c
print ceh
v = 0
for w in s:
v = v + 1
if v < ceh:
pass
elif v > NO_OF_LINES_PER_FILE1:
print "yes"
NO_OF_LINES_PER_FILE1 = NO_OF_LINES_PER_FILE1 + 1
h = NO_OF_LINES_PER_FILE1 + 1
again(c,h,NO_OF_LINES_PER_FILE1)
else:
writer = csv.writer(f3,delimiter = ',', lineterminator='\n',quoting=csv.QUOTE_ALL)
writer.writerow(s[w])
c = c + 1
def split():
f3 = open('has_'+str(NO_OF_LINES_PER_FILE)+'.csv', 'at')
writer = csv.writer(f3,delimiter = ',', lineterminator='\n',quoting=csv.QUOTE_ALL)
c = 0
for w in s:
if c >= NO_OF_LINES_PER_FILE:
NO_OF_LINES_PER_FILE1 = NO_OF_LINES_PER_FILE + 1
h = NO_OF_LINES_PER_FILE
again(c,h,NO_OF_LINES_PER_FILE1)
break
else:
#print NO_OF_LINES_PER_FILE
writer = csv.writer(f3,delimiter = ',', lineterminator='\n',quoting=csv.QUOTE_ALL)
writer.writerow(s[w])
c = c + 1
split()
But this script is not working and creates many files
In the above script NO_OF_LINES_PER_FILE = 2 and size of dictionary s is 9
so i want 5 files first four file will contain 2 rows each and fifth file will contain 1 row
How can i solve this problem?
my method is flat the dict first, then split flat dict to sub list with length you want
import csv
flatDict = [ i for i in s.items()]
splitFlatDict = [flatDict[i:i+NO_OF_LINES_PER_FILE] for i in xrange(0,len(flatDict),NO_OF_LINES_PER_FILE)]
for i,rows in enumerate(splitFlatDict):
with open(str(i) + '.csv','wb') as f:
writer = csv.writer(f)
writer.writerows(rows)

Categories