I have a problem. I'm trying to print a serie of lists in python to have it with a vertical align. My code is:
def show():
book = "data.txt"
f = open(book,'r')
line = f.readlines()
z = ''
l = []
x = []
i = 0
starting = '{:>4} {:>15} {:>15}'.format('Name', "Gender", "Year")
for p in line:
p = p.replace(',',' ')
x = p.index(' ')
name = p[0:x]
a = p.index('e 1')
gender = p[x:a+1]
year = p[(a+2):]
if len(name) == 3:
line_new = '{:>2} {:>15} {:>15}'.format(name, gender, year)
line_new = '{:>5} {:>15} {:>15}'.format(name, gender, year)
The problem is that I'm trying to have something like:
I want to put all the names of the left (and I don't have problems) then, under Gender, I want to create an equal list of Genders all on the same vertical and same thing for year
Untested, but try this:
import itertools
with open("data.txt") as data:
pep = [line.strip().split(',') for line in data]
widths = [len(max(r, key=len)) for r in itertools.izip_longest(*pep, fillvalue="")]
print "%-{0}%s%-{1}%s%-{2}%s".format(widths[0], widths[1], widths[2])\
%("Name", "Gender", "Year")
print "\n".join(["%-{0}%s%-{1}%s%-{2}%s".format(widths[0], widths[1], widths[2])\
%(attr[0], attr[1], attr[2]) for attr in pep])
I am unable to get the last 3 digits of the id number.
from datetime import datetime
def days_to_birthday(date):
datetime_object = datetime.strptime(date, "%Y-%m-%d")
date = datetime_object.date()
num_days = date.timetuple().tm_yday
return num_days
fo = open("Data.txt", 'r') # File containg data
content = [i.rsplit() for i in fo.readlines()]
names = [content[i][0] for i in range(len(content))]
dates = [content[i][1] for i in range(len(content))]
gender = [content[i][2] for i in range(len(content))]
id_numbers = []
mydict = dict(zip(dates, gender))
for i in mydict:
x = days_to_birthday(i)
if mydict.get(i) == "F":x += 500
x = str(x)
if len(x) < 3:x = x.zfill(3)
i = i.split('-')
out = i[0] + x
for i in range(len(names)):
print(f"{names[i]} {id_numbers[i]}" )
Running your code would raise SyntaxError: 'return' outside function.
Because Python is a whitespace sensitive language, return num_days must be further indented so it applies within the days_to_birthday function.
I am doing an assignment where I have a csvfile and I have to generate matplot lib graphs. I have a column full of genres of different movies, with the genres seperated by | (vertical bar). I have to create a dictionary of these genres, once, without repeating, to assign them to the appropriate movies. How will I go about doing that?
this is what I have so far:
import csv
from matplotlib import pyplot as plt
dp = open("C:\Users\jayan\OneDrive\Desktop\IMDB_movie_metadata_for_assignment_6.csv",'r', encoding='utf8').read()
with open("C:\Users\jayan\OneDrive\Desktop\IMDB_movie_metadata_for_assignment_6.csv", errors = 'ignore') as csvfile:
for line in csvfile:
fields = line.split(",")
newField = (fields[4]).split("|")
newerField = fields[16].strip()
movies = (fields[0])
genre_dictionary = {tuple(newField):(movies)}
I will suppose your csv has two columns: genres and movies. Tell me if it's not the case. You can do something like:
def find_col_ind(columns):
ind = -1
ind_col_genres = -1
ind_col_movie = -1
for col in columns:
ind += 1
if col == 'movie_title':
ind_col_movie = ind
elif col == 'genres':
ind_col_genres = ind
return ind_col_genres, ind_col_movie
def create_dict(filename):
with open(filename, 'r') as csvfile:
data = dict()
is_first = True
for line in csvfile:
columns = line.split(',')
if is_first:
ind_col_genres, ind_col_movie = find_col_ind(columns)
is_first = False
genres = columns[ind_col_genres].split('|')
movie = columns[ind_col_movie]
for genre in genres:
if genre in data:
data[genre].append(movie.strip('\nÂ\xa0 '))
data[genre] = [movie.strip('\nÂ\xa0 ')]
return data
if __name__ == "__main__":
data = create_dict('test.csv')
f = open('studMarks.txt', 'r')
marks = 0
# Sort out names, split the words then sort which order
for line in f:
words = line.split()
fname = words[0]
lname = words[1]
f = open('studMarks.txt', 'r')
sum = 0
count = 0
for line in f:
count += 1
sum += float(line.split()[2])
n = []
average = sum/count
When using the for loop it seems to display a value of 64.3, which I believe is for the total of the whole student list and average for all marks.
I need to produce the an output which displays the student names and average on the same line. I can do for the names but I cannot do it for the average as I keep getting errors. I don't know what to input in.
Below is the full solution. The with open line is a context manager and ensures that the file will get closed as soon as you exit the block. You should get used to using this style as it's the safe way to do I/O. The rest is just bog standard Python.
with open('studMarks.txt', 'r') as f:
for line in f:
words = line.split()
fname = words[0]
lname = words[1]
score = int(words[2])
key = f'{fname} {lname}'
count_key = f'{fname} {lname}_count'
latest_score = score + (marks.get(key)[0] if marks.get(key) else 0)
latest_count = 1 + (marks.get(key)[1] if marks.get(key) else 0)
marks[key] = (latest_score, latest_count )
for name, value in marks.items():
print(f'{name} : {value[0]/value[1]}')
This is an interesting problem.
From what I understand you have a text file that looks like this:
Johnny Ly 90 100 Adam Best 80 30 Tim Smith 10 20 in a file called studentMarks2.txt
and want output like this:
Johnny_Ly 95.0 Adam_Best 55.0 Tim_Smith 15.0
if that is true then it can be done using code like this without pandas or csv
though those would make this a lot easier.
fileContents = []
with open('studMarks2.txt','r') as f:
fileContents = f.read().split()
students = dict()
names = []
for content in fileContents:
if content.isnumeric():
studentKey = '_'.join(names)
currentScore = students.get(studentKey,[])
newScore = currentScore + [float(content)]
if len(names) == 2:
for student,scores in students.items():
avg = sum(scores)/len(scores)
print(student,avg,end=' ')
Broken down
This part reads the contents and splits on white space
fileContents = []
with open('studMarks2.txt','r') as f:
fileContents = f.read().split()
this part then iterates through the contents
storing the names as keys in a dictionary and putting the scores in a list
students = dict()
names = []
for content in fileContents:
if content.isnumeric():
studentKey = '_'.join(names)
currentScore = students.get(studentKey,[])
newScore = currentScore + [float(content)]
if len(names) == 2:
Lastly it iterates over the dictionary and output the avg on one line
for student,scores in students.items():
avg = sum(scores)/len(scores)
print(student,avg,end=' ')
This the link to see the image of the text file I need help with feature one, click here to see the imageFor some reason when I print out the average of the list, it only shows the number from the text file. I don't know where my mistake is. I think the issue might be when am appending the list.
f = open("votes.txt")
lines = f.read().split('\n')
issueA = {}
voteAa = {}
voteBa = {}
for line in lines:
col = line.split(' ')
issue = col[0]
voteA = float(col[1])
voteB = float(col[2])
if voteA in voteAa:
voteAa[issue] = [voteA]
if voteB in voteBa:
voteBa[issue] = [voteB]
choice = int(input("Which choice? "))
if choice == 1:
for issue in voteAa:
sumVote = sum(voteAa[issue])
avg = sumVote / len(voteAa[issue])
print("avg is ",sumVote)
how about this
f = open("votes.txt")
lines = f.read().split('\n')
issues = []
voteAa = []
voteBa = []
for line in lines:
col = line.split(' ')
issue = col[0]
voteA = float(col[1])
voteB = float(col[2])
avgA = voteAa.count(0)/len(issues) * 1.0
avgB = voteBa.count(0)/len(issues) * 1.0
if (avgA > avgB):
print('B higher than A'
elif (avgB > avbA):
print('A higher than B')
Since you need average by issue
remove for issue loop
and compute average in one line with special mean function like below
avg = mean(voteAa.values())
or, if you prefer keep it close to your code
sumVote = sum(voteAa.values())
avg = sumVote / len(voteAa)
also correct print line to
print("avg is ", avg)
Also you should just collect votes not need for if in the first loop.
So resulting script is
f = open("votes.txt")
lines = f.read().split('\n')
issueA = {}
voteAa = {}
voteBa = {}
for line in lines:
col = line.split()
issue = col[0]
voteA = float(col[1])
voteB = float(col[2])
voteAa[issue] = voteA
sumVote = sum(voteAa.values())
avg = sumVote / len(voteAa)
print("avg is ", avg)
I tried to keep close to your original code, and did not say replace dictionaries with arrays, it could be simplified even further if you like, say with pandas or even standard https://docs.python.org/3/library/csv.html
Would anyone be able to help me with the below? I'm trying to create a program that can open the "notepad.txt" file and calculate the average price for the month of October.
infile = open('notepad.txt', 'r')
def clean_data():
line1 = infile.readline()
split1 = line1.rstrip('\n')
items = split1[0].split('-')
del items[0]
del items[0]
master = []
master = master + split1 + items
master = list(map(float, master))
line1 = infile.readline()
this prints and returns the average
def clean_data(infile):
lines = infile.readlines()
total = 0.0
num = 0
for line in lines:
spl = line.strip().split(":")
total += float(spl[len(spl)-1])
num += 1
average = total/num
return average
def sum_data():
n,c = 0,0
with open('notepad.txt', 'r') as infile:
x = infile.readline()
# for october 10
if x[:3]=='10-' and x[6:10]=='2010';
n += float(x[12:])
c += 1
If you want to use Pandas:
from io import StringIO
import pandas as pd
notepadtxt = StringIO("""10-15-2012:3.886
df = pd.read_csv(notepadtxt, sep='\:',header=None, engine='python')
df[0] = pd.to_datetime(df[0])
The following vanilla Python code should suffice:
infile = open('notepad.txt', 'r')
def clean_data():
data = []
for line in infile:
values = []
for value in data:
avg_price = sum(values)/len(values)