Creating a histogram with .txt file using Python? - python

Here's the code I tried. It gave me a syntax error highlighting 'data'. Any help? The .txt file has 4 columns if that's of any help.
def file():
file = open('hsp.txt', 'r')
col = [] data = file.readlines()
for i in range(1,len(data)-1):
col.append(int(float(data[i].split(',')[5])))
return col
def hist(col):
handspan = []
for i in range(11):
handspan.append(0)
for i in (col):
handspan[i] += 1
return handspan
col = file()
handspan = hist(col)
print(col)
print(handspan)

It is because your line
col = [] data = file.readlines()
should be on two separate lines:
col = []
data = file.readlines()

You can try this, it worked for me.
Hence it is a histogram it yields a dictionary.
Better answers are welcome!
import string
def list_from_file(filename):
myfile = open(filename, 'r')
data = myfile.read().split()
col = []
for word in data:
col.append(word)
return col
def myhist(col):
hist = {}
for word in col:
word = word.lower()
word = word.strip(string.punctuation + string.whitespace)
hist[word] = hist.get(word, 0)+1
return hist
col = list_from_file('em.txt')
colf = myhist(col)
print(colf)

Related

Dictionary from csv file to group different names in a column

I am doing an assignment where I have a csvfile and I have to generate matplot lib graphs. I have a column full of genres of different movies, with the genres seperated by | (vertical bar). I have to create a dictionary of these genres, once, without repeating, to assign them to the appropriate movies. How will I go about doing that?
this is what I have so far:
import csv
from matplotlib import pyplot as plt
dp = open("C:\Users\jayan\OneDrive\Desktop\IMDB_movie_metadata_for_assignment_6.csv",'r', encoding='utf8').read()
with open("C:\Users\jayan\OneDrive\Desktop\IMDB_movie_metadata_for_assignment_6.csv", errors = 'ignore') as csvfile:
for line in csvfile:
fields = line.split(",")
newField = (fields[4]).split("|")
newerField = fields[16].strip()
movies = (fields[0])
genre_dictionary = {tuple(newField):(movies)}
print(genre_dictionary)
I will suppose your csv has two columns: genres and movies. Tell me if it's not the case. You can do something like:
def find_col_ind(columns):
ind = -1
ind_col_genres = -1
ind_col_movie = -1
for col in columns:
ind += 1
if col == 'movie_title':
ind_col_movie = ind
elif col == 'genres':
ind_col_genres = ind
return ind_col_genres, ind_col_movie
def create_dict(filename):
with open(filename, 'r') as csvfile:
data = dict()
is_first = True
for line in csvfile:
columns = line.split(',')
if is_first:
ind_col_genres, ind_col_movie = find_col_ind(columns)
is_first = False
continue
genres = columns[ind_col_genres].split('|')
movie = columns[ind_col_movie]
for genre in genres:
if genre in data:
data[genre].append(movie.strip('\nÂ\xa0 '))
else:
data[genre] = [movie.strip('\nÂ\xa0 ')]
return data
if __name__ == "__main__":
data = create_dict('test.csv')
print(data)

How do i find the mean

def get_mean_temperature(filename):
with open(filename) as f:
lst = f.read().splitlines()
lst.pop(0)
result = 0
count = 0
for element in lst:
count += 1
el = int(element[6:])
result += el
print(result)
mn_tem = result / count
return mmn_tem
if __name__ == "__main__":
filename = "temp_log.txt"
with open(filename, "w") as f:
f.write("DATES T.\n07-01 28.0\n08-01 33.5\n09-01 27.0\n")
mean_temperature = get_mean_temperature(filename)
print(f"{mean_temperature:.1f}")
This is the code that I am trying to solve. So what I have to do here is to find the mean of temperature that are given in the text file, which are in this case "DATES T.\n07-01 28.0\n08-01 33.5\n09-01 27.0\n"
The text is sorted by MM-DD TT.T
Please help me have this code to work
from statistics import mean
data = "DATES T.\n07-01 28.0\n08-01 33.5\n09-01 27.0\n"
temperatures = [float(item.split()[1]) for item in data.split("\n")[1:] if item]
temperatures_mean = mean(temperatures)
print(temperatures)
print(temperatures_mean)
Output:
[28.0, 33.5, 27.0]
29.5
Or, as your original function:
from statistics import mean
def get_mean_temperature(filepath):
with open(filepath, "r") as f:
data = f.read()
temperatures = [float(item.split()[1]) for item in data.split("\n")[1:] if item]
return mean(temperatures)

Can somebody explain and solve my Value Error when reading in from a text file

For some reason my code(following) has brought up a Value Error which I cannot understand. Please evaluate my code too. You can find the project I am trying to do at
http://www.ocr.org.uk/Images/226767-unit-j276-03-programming-project-task-1-sample-non-exam-assessment.pdf
fileid = "details for nea.txt"
ID = []
surname = []
forename = []
dob = []
addr = []
addrT = []
addrTh = []
addrF = []
addrFi = []
homNum = []
gend = []
tutor = []
schoolEm = []
def Read():
file = open(fileid, "r")
Record = file.readline()
for line in Record:
line = line.strip()
A,B,C,D,E,F,G,H,I,J,K,L,M = line.split(',')
ID.append(A)
surname.append(B)
forename.append(C)
dob.append(D)
addr.append(E)
addrT.append(F)
addrTh.append(G)
addrF.append(H)
addrFi.append(I)
homNum.append(J)
gend.append(K)
tutor.append(L)
schoolEm.append(M)
file.close()
def Save():
Record = []
file = open(fileid,"w")
for i in range(len(ID)):
Record.append(ID[i] +","+surname[i]+","+forename[i]+","+dob[i]+","+addr[i]+","+addrT[i]+","+addrTh[i]+","+addrF[i]+","+addrFi[i]+","+homNum[i]+","+gend[i]+","+tutor[i]+","+schoolEm[i]+"\n")
file.writelines(Record)
file.close()
Read()
print(ID)
print(surname)
The Text File I used goes as following:
01,abe,fat,01/02/02,5,Stoney Lane,Stur,Dorset,DR101LM,0123,M,C,email#sc. The lists titled addr, addrT represent the different lines of address.
put last three lines inside main. Value error should go away

Python: Average Prie per Year

Would anyone be able to help me with the below? I'm trying to create a program that can open the "notepad.txt" file and calculate the average price for the month of October.
notepad.txt
10-15-2012:3.886
10-22-2012:3.756
10-29-2012:3.638
infile = open('notepad.txt', 'r')
def clean_data():
line1 = infile.readline()
split1 = line1.rstrip('\n')
items = split1[0].split('-')
del items[0]
del items[0]
master = []
master = master + split1 + items
master = list(map(float, master))
print(master)
print(total)
line1 = infile.readline()
clean_data()
this prints and returns the average
def clean_data(infile):
lines = infile.readlines()
total = 0.0
num = 0
for line in lines:
spl = line.strip().split(":")
total += float(spl[len(spl)-1])
num += 1
average = total/num
print(average)
return average
def sum_data():
n,c = 0,0
with open('notepad.txt', 'r') as infile:
x = infile.readline()
# for october 10
if x[:3]=='10-' and x[6:10]=='2010';
n += float(x[12:])
c += 1
print(n/c)
If you want to use Pandas:
from io import StringIO
import pandas as pd
notepadtxt = StringIO("""10-15-2012:3.886
10-22-2012:3.756
10-29-2012:3.638""")
df = pd.read_csv(notepadtxt, sep='\:',header=None, engine='python')
df[0] = pd.to_datetime(df[0])
df=df.set_index(0)
df.resample('M').mean().values[0][0]
Output:
3.7600000000000002
The following vanilla Python code should suffice:
infile = open('notepad.txt', 'r')
def clean_data():
data = []
for line in infile:
data.append(line.strip().split(':'))
values = []
for value in data:
values.append(float(value[1]))
avg_price = sum(values)/len(values)
print(avg_price)
clean_data()
infile.close()

In python, how can I search a specific column for a specific value in a txt file then return the specific value's row?

First of all, I have recently started studying python. So I am a beginner.
1111 1 3
1112 1 2
1113 2 3
1114 1 7
1115 7 2
Assume I have these values in the text file. As in the title, I want to search a specific column for a specific value in the txt file then return the specific value's row except for the searched "value".
Example:
Search the first column for 1113 value.
Then return 2 3 as:
x = 2
y = 3
Try something like this:
with open('file.txt', 'r') as f:
for line in f:
if line.startswith('1113'):
line = line.split()
x = int(line[1])
y = int(line[2])
as 'file.txt' put your file name and as '1113' put the value you are looking for. good luck
You could try this:
import sys
with open( filename, "r" ) as f:
for line in f:
parts = line.split(" ")
if parts[0] == "1113":
print("x={0} y={1}".format( parts[1], parts[2] ))
sys.exit(0)
There is a csv module which will do all the dirty work for you:
import csv
def find(filename, number):
with open(filename) as file:
reader = csv.DictReader(file,
fieldnames=['col1', 'col2', 'col3'],
delimiter=' ')
for line in reader:
if line['col1'] == str(number):
return (line['col2'], line['col3'])
if __name__ == '__main__':
(x, y) = find('data.txt', 1113) or (None, None)
print(x, y)
I'd prefer:
search_text = '1113'
with open(FileName) as f:
for i in f:
if i.strip() != '':
for j in i.strip().split():
if j[0] == search_text:
x = int(i[1])
y = int(i[2])
This will allow you to search for any value on any column.
import csv
def find(reader, col, val):
for row in reader:
if row[col] == val:
return row
raise ValueError('Value {} not found in row {}'.format(val, col))
def findexcept(reader, col, val):
r = find(reader, col, val)
r.pop(col)
return r
with open('searchdata.txt') as f:
lines = (line.strip() for line in f)
c = csv.reader(lines, delimiter=' ', skipinitialspace=True)
try:
x = findexcept(c, 0, '1114')
print(x)
except ValueError as ve:
print('Error: {}'.format(ve))

Categories