Why doesn't my script display the newly created file? - python

I am writing a Python script that opens a file and for each test case, it should write the total number of items gathered in that case and display the total amount $ that it costs.
When running my code:
f = open("shopping.txt", "r")
outFile = open("results.txt", "w")
t = int(f.readline().strip())
for z in range(t):
# Assuming prices are unique
myList = {}
items = int(f.readline().strip())
ind = 1
# Read each line for each item
for i in range(items):
p, w = map(int, f.readline().strip().split())
myList[p] = [w, ind]
ind+=1
weights = []
F = int(f.readline().strip())
for i in range(F):
weights.append(int(f.readline().strip()))
RES = []
values = []
for weight in weights:
sortedPrice = sorted(myList.keys())[::-1]
m = 0
p = 0
tmp = []
# Grabbing all possible results using greedy method
# Max price stored into values array and item # in RES array.
for i in range(len(myList)):
R = []
s = 0
p = 0
if myList[sortedPrice[i]][0]<=weight:
s=myList[sortedPrice[i]][0]
p=sortedPrice[i]
R+=myList[sortedPrice[i]][1],
for j in range(i+1, len(myList)):
if myList[sortedPrice[j]][0]+s<=weight:
s+=myList[sortedPrice[j]][0]
p+=sortedPrice[j]
R+=myList[sortedPrice[j]][1],
if m<p:
m = p
tmp = R
tmp.sort()
RES.append(tmp)
values.append(m)
outFile.write("Test Case %d\n" %(z+1))
outFile.write("Total Price: %d\n" %(sum(values)))
outFile.write("Member Items:\n")
for i in range(len(RES)):
outFile.write("%d: %s" %(i+1, " ".join(map(str, RES[i]))))
f.close()
outFile.close()
I get the result:
Test Case 1
Total Price: 0
Member Items:
Test Case 2
Total Price: 0
Member Items:
When I expected something like this:
Test Case1
Total Price 72
Member Items
1: 1
Test Case2
Total Price 568
Member Items
1: 3 4
2: 3 6
3: 3 6
4: 3 4 6
I am relatively new to programming in general so if there is any insight anyone could give for my code, I would appreciate it. Adding to this, my guess is that the sum() and/or the map commands may be breaking and not working as intended, as I'm writing to the file to get the total value and items of the case.

Related

Python - Read file I/O - Find average of each day temperature records

I have to write a Python function which records temperatures for different days. The temperature for the same day is stored on the same line.The first day is considered to be day 1, and each subsequent line of the file records the following days in sequential order (e.g. the 3rd line of data is collected from the 3rd day). If there was no data collected for a given day then the entire line will be blank. For example, The text file contains the following inputs for 6 days:
23 24.5
25
22.25 22.5
23.4
25.2 20.0
This file contains data collected for 6 days.
I am to define a function temp_record which takes a filename as a parameter. It reads the data from the parameter file and analyses the temperatures. The function should return a list of average temperatures per day. For example, the function returns the following list for the above text file:
[23.75, 25.0, 22.375, 0, 23.4, 22.6]
I wrote a code but it doesn't seem to work for all case types and I'm not sure what went wrong. Can someone help?
Here is the code I wrote:
def temp_record(filename):
input_file = open(filename,'r')
contents = input_file.read().split("\n")
sum_val = 0
lis = []
for string in contents:
split_str = string.split(" ")
for i in range(len(split_str)):
if split_str[i] == '':
split_str[i] = 0
else:
split_str[i] = float(split_str[i])
ans = (sum(split_str)/len(split_str))
if ans == 0.0:
ans = 0
lis.append(ans)
return lis
When you do contents = input_file.read().split("\n") you get an additional element in contents list that gets computed to 0.
You can fix this like this:
def temp_record(filename):
input_file = open(filename, 'r')
# read all lines
contents = input_file.readlines()
sum_val = 0
lis = []
for string in contents:
# lines end in \n use rstrip to remove it
split_str = string.rstrip().split(" ")
for i in range(len(split_str)):
if split_str[i] == '':
split_str[i] = 0
else:
split_str[i] = float(split_str[i])
ans = (sum(split_str) / len(split_str))
if ans == 0.0:
ans = 0
lis.append(ans)
return lis
but this can be much shorter:
def temp_record(filename):
result = []
with open(filename, 'r') as fp:
for line in fp:
temps = line.split()
avg_temp = sum(map(float, temps)) / len(temps) if temps else 0
result.append(avg_temp if avg_temp > 0 else 0)
return result
or even shorter if you want to play golfcode:
def temp_record2(filename):
with open(filename, 'r') as fp:
return list(map(lambda x: x if x > 0 else int(x), [sum(map(float, line.split())) / len(line.split()) if line.split() else 0 for line in fp]))
Perhaps the hidden test that fails is with an input like:
-1 1
0
30
The first two days do have recorded temperatures, but their average is 0. Following the format of using floats for all other averages, the average should be 0.0, not 0 (as that would imply no temperature was collected for the day, when in fact one was).
If this is the issue, this could be fixed:
def temp_record(filename):
input_file = open(filename,'r')
contents = input_file.read().split("\n")
sum_val = 0
lis = []
for string in contents:
split_str = string.split(" ")
for i in range(len(split_str)):
if split_str[i] == '':
split_str[i] = 0
else:
split_str[i] = float(split_str[i])
ans = (sum(split_str)/len(split_str))
if string == '':
ans = 0
lis.append(ans)
return lis

Python: How to read space delimited data with different length in text file and parse it

I have space delimited data in a text file look like the following:
0 1 2 3
1 2 3
3 4 5 6
1 3 5
1
2 3 5
3 5
each line has different length.
I need to read it starting from line 2 ('1 2 3')
and parse it and get the following information:
Number of unique data = (1,2,3,4,5,6)=6
Count of each data:
count data (1)=3
count data (2)=2
count data (3)=5
count data (4)=1
count data (5)=4
count data (6)=1
Number of lines=6
Sort the data in descending order:
data (3)
data (5)
data (1)
data (2)
data (4)
data (6)
I did this:
file=open('data.txt')
csvreader=csv.reader(file)
header=[]
header=next(csvreader)
print(header)
rows=[]
for row in csvreader:
rows.append(row)
print(rows)
After this step, what should I do to get the expected results?
I would do something like this:
from collections import Counter
with open('data.txt', 'r') as file:
lines = file.readlines()
lines = lines[1:] # skip first line
data = []
for line in lines:
data += line.strip().split(" ")
counter = Counter(data)
print(f'unique data: {list(counter.keys())}')
print(f'count data: {list(sorted(counter.most_common(), key=lambda x: x[0]))}')
print(f'number of lines: {len(lines)}')
print(f'sort data: {[x[0] for x in counter.most_common()]}')
A simple brute force approach:
nums = []
counts = {}
for row in open('data.txt'):
if row[0] == '0':
continue
nums.extend( [int(k) for k in row.rstrip().split()] )
print(nums)
for n in nums:
if n not in counts:
counts[n] = 1
else:
counts[n] += 1
print(counts)
ordering = list(sorted(counts.items(), key=lambda k: -k[1]))
print(ordering)
Here is another approach
def getData(infile):
""" Read file lines and return lines 1 thru end"""
lnes = []
with open(infile, 'r') as data:
lnes = data.readlines()
return lnes[1:]
def parseData(ld):
""" Parse data and print desired results """
unique_symbols = set()
all_symbols = dict()
for l in ld:
symbols = l.strip().split()
for s in symbols:
unique_symbols.add(s)
cnt = all_symbols.pop(s, 0)
cnt += 1
all_symbols[s] = cnt
print(f'Number of Unique Symbols = {len(unique_symbols)}')
print(f'Number of Lines Processed = {len(ld)}')
for symb in unique_symbols:
print(f'Number of {symb} = {all_symbols[symb]}')
print(f"Descending Sort of Symbols = {', '.join(sorted(list(unique_symbols), reverse=True))}")
On executing:
infile = r'spaced_text.txt'
parseData(getData(infile))
Produces:
Number of Unique Symbols = 6
Number of Lines Processed = 6
Number of 2 = 2
Number of 5 = 4
Number of 3 = 5
Number of 1 = 3
Number of 6 = 1
Number of 4 = 1
Descending Sort of Symbols = 6, 5, 4, 3, 2, 1

The most efficient way to find the point where maximum number of intervals overlap with python

Consider I have a log register for users' entry and exit times from some server. I need to find the time at which there are maximum sessions. If there are more than one possible answer, the smallest should be chosen. The input contains the number of sessions in the first line.
Example
Input:
5
4 5
0 3
1 9
7 8
2 6
Output:
2
I tried this script:
from collections import Counter, OrderedDict
load = Counter()
with open("input.txt", "r") as f:
n = int(f.readline())
for i in range(n):
session = f.readline()
session = session.split()
load.update(range(int(session[0]), int(session[1])+1))
load = load.most_common()
i = 0
max = load[0][1]
candidates = []
while load[i][1] == max:
candidates.append(load[i][0])
i += 1
print(min(candidates))
First, I use Counter() to count the occurrences of all points. Second, I use load = load.most_common() to order the resulting dict by occurrences. Finally I find the minimum value of all keys with the corresponding maximum value (= # of occurrences).
In fact, if Counter() returned a dict ordered by key, it would be much simpler.
Anyway, it is my home task and it runs more than 1 second (given time limit) on one of the test inputs. What could be done to speed it up? I've read about interval trees but I'm not sure if it is relevant.
Let's say ins and outs are the log in and log out times:
ins = [4,0,1,7,2]
outs = [5,3,9,8,6]
Combine them in one sorted list with the sign of the number indicating whether it is an "arrival" (positive) or "departure" (negative):
times = sorted(ins + [-x for x in outs], key=abs)
Now, walk through the list and count the "arrivals" and "departures" as they happen:
lmax = -1
logged = 0
for t in times:
if t >= 0:
logged += 1
if logged > lmax:
tmax = t
lmax = logged
else:
logged -= 1
print(tmax, lmax)
#2 3
The fast solution for this is just storing +1, -1 on the enter/exit times - then sort the dict-keys and sum over it incrementally, followed by getting the maximal value:
data = """5
4 5
0 3
1 9
7 8
2 6"""
with open("input.txt", "w") as f:
f.write(data)
d = {}
with open("input.txt", "r") as f:
next(f)
for line in f:
if line.strip():
start, stop = map(int,line.strip().split())
d.setdefault(start,0)
d[start] += 1
d.setdefault(stop,0)
d[stop] -= 1
maxx = 0
s = 0
max_idx = 0
# iteratively summ over sorted times from dict
for idx,key in enumerate(sorted(d)):
s += d[key]
if maxx < s: # remembert new max_idx and max
maxx = s
max_idx = idx
print(max_idx)
You can use a defaultdict(int) if that is still too slow to solve your challenge.

I'm getting index out of list Error [duplicate]

This question already has answers here:
Does "IndexError: list index out of range" when trying to access the N'th item mean that my list has less than N items?
(7 answers)
Closed 5 years ago.
def calcDistance(x1, y1, x2, y2):
distance = sqrt((x1-x2)**2 + (y1-y2)**2)
return distance
def make_dict():
return defaultdict(make_dict)
# Capture 1 input from the command line.
# NOTE: sys.argv[0] is the name of the python file
# Try "print sys.argv" (without the quotes) to see the sys.argv list
# 1 input --> the sys.argv list should have 2 elements.
if (len(sys.argv) == 2):
print "\tOK. 1 command line argument was passed."
# Now, we'll store the command line inputs to variables
myFile = str(sys.argv[1])
else:
print 'ERROR: You passed', len(sys.argv)-1, 'input parameters.'
quit()
# Create an empty list:
cities = []
# Create an empty dictionary to hold our (x,y) coordinate info:
myCoordinates = {}
# Open our file:
myFile = '%s.csv' % (myFile)
with open(myFile, 'rb') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in spamreader:
# Only read rows that do NOT start with the "%" character.
if (row[0][0] != '%'):
# print row
id = int(row[0])
isHome = int(row[1])
x = float(row[2])
y = float(row[3])
myCoordinates[id] = {'x': x, 'y': y}
# print myCoordinates[id]['x']
# print myCoordinates[id]['y']
if (isHome == 1):
# Store this id as the home city
homeCity = id
cities.append(id)
print homeCity
print cities
# Create a TSP tour.
# VERSION 1 -- Using range() and for() loops:
myTour = []
for i in range(homeCity, len(cities)+1):
myTour.append(i)
for i in range(1, homeCity+1):
myTour.append(i)
print myTour
# VERSION 2 -- Using only range()
'''
firstPart = range(homeCity, len(cities)+1)
secondPart = range(1, homeCity+1)
myTour = firstPart + secondPart
print myTour
'''
tau = defaultdict(make_dict)
for i in cities:
# print "distance[%d][%d] = 0" % (i, i)
tau[i][i] = 0
for j in range(i+1, len(cities)+1):
# print "distance[%d][%d] > 0" % (i, j)
tau[i][j] = calcDistance(myCoordinates[i]['x'], myCoordinates[i]['y'], myCoordinates[j]['x'], myCoordinates[j]['y'])
# print "distance[%d][%d] = distance[%d][%d]" % (j, i, i, j)
tau[j][i] = tau[i][j]
# FIXME -- Edit the code below...
# Calculate the total distance of our TSP solution:
i = myTour[i]
for myIndex in range(1, len(myTour)+1):
j = myTour[myIndex]
print j
Function to calculate cost based on distance. Need to be modified.
def cost(rate,j):
cost = rate * j
cost = cost(1000,j)
print cost
Also I need to calculate cost based on distance traveled. with myIndex i am getting an error of list index out of range. I am not knowing what exactly is going there. The j is like total distance calculated.
List in python have 0 based index. If you add n elements to a list the indexes are from 0 to n-1. But you are running the loop from 1 to n. So, it getting list index out of range error.
You should do this-
for myIndex in range(0, len(myTour)):
j = myTour[myIndex]
print(j)
If you are getting list index out of range error then change the loop where you are getting the error and accessing a list using 1-based indexing, from range(1,len(some_list)+1) to range(0,len(some_list)). Or you can simply write range(len(some_list)). When there is no start value passed in the range function it starts from 0 by default.
To calculate cost try this -
for myIndex in range(0, len(myTour)):
j = myTour[myIndex]
cost = rate * j
print(cost)
Set the value of rate before starting the loop.

Index Error: Index out of bounds when using numpy in python

I have a code that works fine when I have small CSV's of data but errors out when I try to run large CSV's through it. In essence this code is supposed to place 3 CSV's worth of data into 3 separate dictionaries, combine those dictionaries into a master dictionary, and then preform arithmetic operations on dictionary. The input CSV's look something like this:
time A B C D
0 3 4 6 4
.001 4 6 7 8
.002 4 6 7 3
The code that I am using is the code displayed below. The error occurs within the lines 47 and 65 where I am try to preform arithmetic with the dictionary. Any explanation as to why this is going on is greatly appreciated.
import numpy
Xcoord = {}
time = []
with open ('Nodal_QuardnetsX2.csv', 'r') as f:
f.readline() # Skips first line
for line in f:
values = [s.strip()for s in line.split(',')]
Xcoord[values[0]] = map(float, values[1:])
time.append(values[0])
Ycoord = {}
with open ('Nodal_QuardnetsY2.csv', 'r') as f:
f.readline() # Skips first line
for line in f:
values = [s.strip()for s in line.split(',')]
Ycoord[values[0]] = map(float, values[1:])
Zcoord = {}
with open ('Nodal_QuardnetsZ2.csv', 'r') as f:
f.readline() # Skips first line
for line in f:
values = [s.strip()for s in line.split(',')]
Zcoord[values[0]] = map(float, values[1:])
# Create a master dictionary of the form {'key':[[x, y, z], [x, y, z]}
CoordCombo = {}
for key in Xcoord.keys():
CoordnateList = zip(Xcoord[key], Ycoord[key], Zcoord[key])
CoordCombo[key] = CoordnateList
counter = 0
keycount1 = 0
keycount2 = 0.001
difference = []
NodalDisplacements = {}
#Find the difference between the x, y, and z quardnets relative to that point in time
while keycount2 <= float(values[0]):
Sub = numpy.subtract(CoordCombo[str(keycount2)][counter], CoordCombo[str(keycount1)][counter])
counter = counter + 1
difference.append(Sub)
NodalDisplacements[keycount1] = Sub
keycount1 = keycount1 + 0.001
keycount2 = keycount2 + 0.001
counter = 0
keycount3 = 0
keycount4 = 0.001
Sum = []
breakpoint = float(values[0])-0.001
while keycount4 <= breakpoint:
Add = numpy.sum(NodalDisplacements[keycount4][counter], NodalDisplacements[keycount3][counter])
Sum.append(Add)
keycount3 = keycount3 + 0.001
keycount4 = keycount4 + 0.001
counter = counter + 1
if counter == 2:
counter = 0
print Sum
probably a line of your csv file does not contain 5 elements or the line is empty.
In your logic I would suggest to use
for line in f:
line = line.strip()
if not line: continue
if len(values) != N_COLS: continue # or error...
# other ...

Categories