Finding Maximum Value in CSV File - python

Have an assignment of finding average and maximum rainfall in file "BoulderWeatherData.csv". Have found the average using this code:
rain = open("BoulderWeatherData.csv", "r")
data = rain.readline()
print(rain)
data = rain.readlines()
total = 0
linecounter = 0
for rain in data:
linecounter = linecounter + 1
print("The number of lines is", linecounter)
for line in data:
r = line.split(",")
total = total + float(r[4])
print(total)
average = float(total / linecounter)
print("The average rainfall is ", "%.2f" % average)
However, can't seem to find maximum using this same process. Attempted using max, function but the answer that must be obtained is float number, which can not be iterated through max.
Any help would be appreciated.

This is my prefered way of handling this.
#!/usr/bin/env python3
rain = open("BoulderWeatherData.csv","r")
average = 0.0
total = 0
maxt = 0.0
for line in rain:
try:
p = float(line.split(",")[4])
average += p
total += 1
maxt = max(maxt,p)
except:
pass
average = average / float(total)
print("Average:",average)
print("Maximum:",maxt)
This will output:
Average: 0.05465272591486193
Maximum: 1.98

import csv
INPUT = "BoulderWeatherData.csv"
PRECIP = 4 # 5th column
with open(INPUT, "rU") as inf:
incsv = csv.reader(inf)
header = next(incsv, None) # skip header row
precip = [float(row[PRECIP]) for row in incsv]
avg_precip = sum(precip, 0.) / (1 and len(precip)) # prevent div-by-0
max_precip = max(precip)
print(
"Avg precip: {:0.3f} in/day, max precip: {:0.3f} in/day"
.format(avg_precip, max_precip)
)
returns
Avg precip: 0.055 in/day, max precip: 1.980 in/day

max=0
for line in data:
r = line.split(",")
if float(r[4]) > max:
max=float(r[4])
print(max)
something like that

You're already accumulating total across loop iterations.
To keep track of a maxvalue, it's basically the same thing, except instead of adding you're maxing:
total = 0
maxvalue = 0
for line in data:
r = line.split(",")
value = float(r[4])
total = total + value
maxvalue = max(maxvalue, value)
print(total)
print(maxvalue)
Or, if you don't want to use the max function:
for line in data:
r = line.split(",")
value = float(r[4])
total = total + value
if value > maxvalue:
maxvalue = value

This code will attempt to find the maximum value, and the average value, of floats stored in the 5th position in a .csv.
rainval = []
Initializes the empty array where we will store values.
with open ("BoulderWeatherData.csv", "r") as rain:
Opens the .csv file and names it "rain".
for lines in rain:
This reads every line in rain until the end of the file.
rainval += [float(lines.strip().split(",")[4])]
We append the float value found in the fifth position (fourth index) of the line.
We repeat the above for every line located in the .csv file.
print (sorted(rainval)[len(rainval)])
This sorts the values in the rainval array and then takes the last (greatest) value, and prints it. This is the maximum value and is better than max because it can handle floats and not just ints.
print (sum(rainval)/len(rainval))
This prints the average rainfall.
Alternatively, if we don't want to use arrays:
maxrain = -float("inf")
total, count = 0, 0
with open ("test.txt", "r") as rain:
for lines in rain:
temp = float(lines.strip().split(",")[4])
if maxrain < temp:
maxrain = temp
total += temp
count += 1
print (maxrain)
print (total/count)

Related

Python - Read file I/O - Find average of each day temperature records

I have to write a Python function which records temperatures for different days. The temperature for the same day is stored on the same line.The first day is considered to be day 1, and each subsequent line of the file records the following days in sequential order (e.g. the 3rd line of data is collected from the 3rd day). If there was no data collected for a given day then the entire line will be blank. For example, The text file contains the following inputs for 6 days:
23 24.5
25
22.25 22.5
23.4
25.2 20.0
This file contains data collected for 6 days.
I am to define a function temp_record which takes a filename as a parameter. It reads the data from the parameter file and analyses the temperatures. The function should return a list of average temperatures per day. For example, the function returns the following list for the above text file:
[23.75, 25.0, 22.375, 0, 23.4, 22.6]
I wrote a code but it doesn't seem to work for all case types and I'm not sure what went wrong. Can someone help?
Here is the code I wrote:
def temp_record(filename):
input_file = open(filename,'r')
contents = input_file.read().split("\n")
sum_val = 0
lis = []
for string in contents:
split_str = string.split(" ")
for i in range(len(split_str)):
if split_str[i] == '':
split_str[i] = 0
else:
split_str[i] = float(split_str[i])
ans = (sum(split_str)/len(split_str))
if ans == 0.0:
ans = 0
lis.append(ans)
return lis
When you do contents = input_file.read().split("\n") you get an additional element in contents list that gets computed to 0.
You can fix this like this:
def temp_record(filename):
input_file = open(filename, 'r')
# read all lines
contents = input_file.readlines()
sum_val = 0
lis = []
for string in contents:
# lines end in \n use rstrip to remove it
split_str = string.rstrip().split(" ")
for i in range(len(split_str)):
if split_str[i] == '':
split_str[i] = 0
else:
split_str[i] = float(split_str[i])
ans = (sum(split_str) / len(split_str))
if ans == 0.0:
ans = 0
lis.append(ans)
return lis
but this can be much shorter:
def temp_record(filename):
result = []
with open(filename, 'r') as fp:
for line in fp:
temps = line.split()
avg_temp = sum(map(float, temps)) / len(temps) if temps else 0
result.append(avg_temp if avg_temp > 0 else 0)
return result
or even shorter if you want to play golfcode:
def temp_record2(filename):
with open(filename, 'r') as fp:
return list(map(lambda x: x if x > 0 else int(x), [sum(map(float, line.split())) / len(line.split()) if line.split() else 0 for line in fp]))
Perhaps the hidden test that fails is with an input like:
-1 1
0
30
The first two days do have recorded temperatures, but their average is 0. Following the format of using floats for all other averages, the average should be 0.0, not 0 (as that would imply no temperature was collected for the day, when in fact one was).
If this is the issue, this could be fixed:
def temp_record(filename):
input_file = open(filename,'r')
contents = input_file.read().split("\n")
sum_val = 0
lis = []
for string in contents:
split_str = string.split(" ")
for i in range(len(split_str)):
if split_str[i] == '':
split_str[i] = 0
else:
split_str[i] = float(split_str[i])
ans = (sum(split_str)/len(split_str))
if string == '':
ans = 0
lis.append(ans)
return lis

Calculate some averages in .txt python

I have a .txt-file called ecc.txt. It contains more than 8000 lines of numbers. I want to count the average of every 360 lines in that file.
Here is the code:
import math
f = open(r'ecc.txt').read()
data = []
for line in data:
sum = 0
for i in range (len(data)):
if i % 360 != 0:
sum = sum + ecc[i]
else:
average = sum / 360
print(average)
sum = 0
When I am running it, nothing happens. I didn't get any results. The code just running and end without any result.
Is there something wrong with this code?
Thank you.
avg_dict = {}
with open('ecc.txt') as f:
data = f.read().split(' ')
sum = 0
i = 0
for str_number in data:
sum += int(str_number)
i += 1
if i % 360 == 0:
avg_dict[i] = sum/360
sum = 0
I've assumed that your file text has an empty space as separator. Otherwise, you can change the sep value in the split method. If there is not separator change data as:
data = list(f.read())
You code would work with some changes:
import math
data=[]
with open(r'ecc.txt') as f:
for i in f:
data.append(int(i))
for line in data:
sum = 0
for i in range (len(data)):
if i%360 !=0:
sum = sum + ecc[i]
else:
average = sum/360
print(average)
sum=0
Be aware though, that this code doesn't include values for each 360th element (i guess it's not a problem for an average) and also you don't have average for last elements

Iterating on a file and comparing values using python

I have a section of code that opens files containing information with wavenumber and intensity like this:
500.21506 -0.00134
500.45613 0.00231
500.69720 -0.00187
500.93826 0.00129
501.17933 -0.00049
501.42040 0.00028
501.66147 0.00114
501.90253 -0.00036
502.14360 0.00247
My code attempts to parse the information between two given wavelengths: lowwav and highwav. I would like to print only the intensities of the wavenumbers that fall between lowwav and highwav. My entire code looks like:
import datetime
import glob
path = '/Users/140803/*'
files = glob.glob(path)
for line in open('sfit4.ctl', 'r'):
x = line.strip()
if x.startswith('band.1.nu_start'):
a,b = x.split('=')
b = float(b)
b = "{0:.3f}".format(b)
lowwav = b
if x.startswith('band.1.nu_stop'):
a,b = x.split('=')
b = float(b)
b = "{0:.3f}".format(b)
highwav = b
with open('\\_spec_final.t15', 'w') as f:
with open('info.txt', 'rt') as infofile:
for count, line in enumerate(infofile):
lat = float(line[88:94])
lon = float(line[119:127])
year = int(line[190:194])
month = int(line[195:197])
day = int(line[198:200])
hour = int(line[201:203])
minute = int(line[204:206])
second = int(line[207:209])
dur = float(line[302:315])
numpoints = float(line[655:660])
fov = line[481:497] # field of view?
sza = float(line[418:426])
snr = 0.0000
roe = 6396.2
res = 0.5000
lowwav = float(lowwav)
highwav = float(highwav)
spacebw = (highwav - lowwav)/ numpoints
d = datetime.datetime(year, month, day, hour, minute, second)
f.write('{:>12.5f}{:>12.5f}{:>12.5f}{:>12.5f}{:>8.1f}'.format(sza,roe,lat,lon,snr)) # line 1
f.write("\n")
f.write('{:>10d}{:>5d}{:>5d}{:>5d}{:>5d}{:>5d}'.format(year,month,day,hour,minute,second)) # line 2
f.write("\n")
f.write( ('{:%Y/%m/%d %H:%M:%S}'.format(d)) + "UT Solar Azimuth:" + ('{:>6.3f}'.format(sza)) + " Resolution:" + ('{:>6.4f}'.format(res)) + " Duration:" + ('{:>6.2f}'.format(dur))) # line 3
f.write("\n")
f.write('{:>21.13f}{:>26.13f}{:>24.17e}{:>12f}'.format(lowwav,highwav,spacebw,numpoints)) # line 4
f.write("\n")
with open(files[count], 'r') as g:
for line in g:
wave_no, tensity = [float(item) for item in line.split()]
if lowwav <= wave_no <= highwav :
f.write(str(tensity) + '\n')
g.close()
f.close()
infofile.close()
Right now, everything works fine except the last part where I compare wavelengths and print out the intensities corresponding to wavelengths between lowwav and highwav. No intensities are printing into the output file.
The problem is that when you iterate over the file g you are effectively moving its "file pointer". So the second loop finds the file at the beginning and doesn't produce any value.
Secondly, you are producing all these nums lists, but every iteration of the lop shadows the previous value, making it unreachable.
Either you want to collected all the values and then iterate on those:
with open(files[count], 'r') as g:
all_nums = []
for line in g:
all_nums.append([float(item) for item in line.split()])
for nums in all_nums:
if (lowwav - nums[0]) < 0 or (highwav - nums[0]) > 0 :
f.write(str(nums[1]))
f.write('\n')
else: break
Or just do everything inside the first loop (this should be more efficient):
with open(files[count], 'r') as g:
for line in g:
nums = [float(item) for item in line.split()]
if (lowwav - nums[0]) < 0 or (highwav - nums[0]) > 0 :
f.write(str(nums[1]))
f.write('\n')
else: break
Also note that the break statement will stop the processing of the values when the condition is false for the first time, you probably want to remove it.
This said, note that your code prints all values where nums[0] that either are bigger than lowwav, or smaller than highwav, which means that if lowwav < highwav every number value will be printed. You probably want to use and in place of or if you want to check whether they are between lowwav and highwav. Moreover in python you could just write lowwav < nums[0] < highwav for this.
I would personally use the following:
with open(files[count], 'r') as g:
for line in g:
wave_no, intensity = [float(item) for item in line.split()]
if lowwav < wave_no < highwav:
f.write(str(intensity)+'\n')
Split each line by white space, unpack the split list to two names wavelength and intensity.
[line.split() for line in r] makes
500.21506 -0.00134
500.45613 0.00231
to
[['500.21506', '-0.00134'], ['500.45613', '0.00231']]
This listcomp [(wavelength, intensity) for wavelength,intensity in lol if low <= float(wavelength) <= high] returns
[('500.21506', '-0.00134'), ('500.45613', '0.00231')]
If you join them back [' '.join((w, i)) for w,i in [('500.21506', '-0.00134'), ('500.45613', '0.00231')] you get ['500.21506 -0.00134', '500.45613 0.00231']
Use listcomp to filter out wavelength. And join wavelength and intensity back to string and write to file.
with open('data.txt', 'r') as r, open('\\_spec_final.t15', 'w') as w:
lol = (line.split() for line in r)
intensities = (' '.join((wavelength, intensity)) for wavelength,intensity in lol if low <= float(wavelength) <= high)
w.writelines(intensities)
If you want to output to terminal do print(list(intensities)) instead of w.writelines(intensities)
Contents of data.txt;
500.21506 -0.00134
500.45613 0.00231
500.69720 -0.00187
500.93826 0.00129
501.17933 -0.00049
501.42040 0.00028
501.66147 0.00114
501.90253 -0.00036
502.14360 0.00247
Output when low is 500 and high is 50`;
['500.21506 -0.00134', '500.45613 0.00231']

get value of one column by another column in csv file python

I have my csv file like this:
ID Value Amount
---- ------- -------
A 3 2
A 4 4
B 3 6
C 5 5
A 3 2
B 10 1
I want sum of column "Value" or "Amount" by the column "ID". I want the output that for 'A' it should give me sum of all values which is related to A means [3+4+3].
My Code:
import csv
file = open(datafile.csv)
rows=csv.DictReader(file)
summ=0.0
count=0
for r in rows:
summ=summ+int(r['Value'])
count=count+1
print "Mean for column Value is: ",(summ/count)
file.close()
You can use a defaultdict of list to group the data by the ID column. Then use sum() to produce the totals.
from collections import defaultdict
with open('datafile.csv') as f:
d = defaultdict(list)
next(f) # skip first header line
next(f) # skip second header line
for line in f:
id_, value, amount = line.split()
d[id_].append((int(value), int(amount)))
# sum and average of column Value by ID
for id_ in d:
total = sum(t[0] for t in d[id_])
average = total / float(len(d[id_]))
print('{}: sum = {}, avg = {:.2f}'.format(id_, total, average))
Output for your input data:
A: sum = 10, avg = 3.33
C: sum = 5, avg = 5.00
B: sum = 13, avg = 6.50
It can also be done with a standard Python dictionary. The solution is very similar:
with open('datafile.csv') as f:
d = {}
next(f) # skip first header line
next(f) # skip second header line
for line in f:
id_, value, amount = line.split()
d[id_] = d.get(id_, []) + [(int(value), int(amount))]
# sum and average of column Value by ID
for id_ in d:
total = sum(t[0] for t in d[id_])
average = total / float(len(d[id_]))
print('{}: sum = {}, avg = {:.2f}'.format(id_, total, average))

looping until the number of cells changed is neglible

This is probably a simple question, but it's driving me crazy! I have a python code that performs cellular automata on a land use grid. I've made a dictionary of cell id: land use code imported from a text file. I've also import of the adjacent neighbors of each cell from a text file. For each cell in the nested loop, I pick out the highest value, count the highest value of the neighboring cells. If this value is greater than the processing cell and occurred more than 4 times, then I update the dictionary for that cell id. The land use codes are ranked in priority. You will see < 6 in the code below...6 is water and wetlands which I do not want to be changed. The first time I run the code, 7509 cells changed land use based on adjacent neighbors land uses. I can comment out the reading the dictionary text file and run it again, then around 5,000 cells changed. Run it again, then even less and so on. What I would like to do is run this in a loop until only 0.0001 of the total cells change, after that break the loop.
I've tried several times using iterators like "for r in range(999)---something big; If End_Sim > count: break". But it breaks after the first one, because the count goes back to zero. I've tried putting the count = 0 inside the loop and it adds up...I want it to start back over every time so the number of cells gets less and less. I'm stump...hopefully this is trivial to somebody!
Here's my code (it's a clean slate...I've deleted my failed attempts to create the number of simulations loop):
import sys, string, csv
#Creating a dictionary of FID: LU_Codes from external txt file
text_file = open("H:\SWAT\NC\FID_Whole_Copy.txt", "rb")
#Lines = text_file.readlines()
FID_GC_dict = dict()
reader = csv.reader(text_file, delimiter='\t')
for line in reader:
FID_GC_dict[line[0]] = int(line[1])
text_file.close()
#Importing neighbor list file for each FID value
Neighbors_file = open("H:\SWAT\NC\Pro_NL_Copy.txt","rb")
Entries = Neighbors_file.readlines()
Neighbors_file.close()
Neighbors_List = map(string.split, Entries)
#print Neighbors_List
#creates a list of the current FID
FID = [x[0] for x in Neighbors_List]
#Calculate when to end of one sweep
Tot_Cells = len(FID)
End_Sim = int(0.0001*Tot_Cells)
gridList = []
for nlist in Neighbors_List:
row = []
for item in nlist:
row.append(FID_GC_dict[item])
gridList.append(row)
#print gridList
#Performs cellular automata rules on land use grid codes
i = iter(FID)
count = 0
for glist in gridList:
Cur_FID = i.next()
Cur_GC = glist[0]
glist.sort()
lr_Value = glist[-1]
if lr_Value < 6:
tie_LR = glist.count(lr_Value)
if tie_LR >= 4 and lr_Value > Cur_GC:
FID_GC_dict[Cur_FID] = lr_Value
#print "The updated gridcode for FID ", Cur_FID, "is ", FID_GC_dict[Cur_FID]
count += 1
print count
Thanks for any help!
use a while loop:
cnt_total = 1234 # init appropriately
cnt_changed = cnt_total
p = 0.001
while (cnt_changed > cnt_total*p):
# your code here
# remember to update the cnt_changed variable
Try with the while break statements
initialization stuff
while(1):
...
if x < 0.0001:
break
...
http://docs.python.org/tutorial/controlflow.html#break-and-continue-statements-and-else-clauses-on-loops
I fixed the code so the simulations stop once the number of cells change is less than 0.0001 of the total cells. I had the while loop in the wrong place. Here's the code if anyone is interested in cellular automata.
import sys, string, csv
#Creating a dictionary of FID: LU_Codes from external txt file
text_file = open("H:\SWAT\NC\FID_Whole_Copy.txt", "rb")
#Lines = text_file.readlines()
FID_GC_dict = dict()
reader = csv.reader(text_file, delimiter='\t')
for line in reader:
FID_GC_dict[line[0]] = int(line[1])
text_file.close()
#Importing neighbor list file for each FID value
Neighbors_file = open("H:\SWAT\NC\Pro_NL_Copy.txt","rb")
Entries = Neighbors_file.readlines()
Neighbors_file.close()
Neighbors_List = map(string.split, Entries)
#print Neighbors_List
#creates a list of the current FID
FID = [x[0] for x in Neighbors_List]
#print FID
#Calculate when to end the simulations (neglible change in land use)
tot_cells = len(FID)
end_sim = tot_cells
p = 0.0001
#Performs cellular automata rules on land use grid codes
while (end_sim > tot_cells*p):
gridList = []
for nlist in Neighbors_List:
row = []
for item in nlist:
row.append(FID_GC_dict[item])
gridList.append(row)
#print gridList
i = iter(FID)
count = 0
for glist in gridList:
Cur_FID = i.next()
Cur_GC = glist[0]
glist.sort()
lr_Value = glist[-1]
if lr_Value < 6:
tie_LR = glist.count(lr_Value)
if tie_LR >= 4 and lr_Value > Cur_GC:
FID_GC_dict[Cur_FID] = lr_Value
print "The updated gridcode for FID ", Cur_FID, "is ", FID_GC_dict[Cur_FID]
count += 1
end_sim = count
print count

Categories