Here's the code I tried. It gave me a syntax error highlighting 'data'. Any help? The .txt file has 4 columns if that's of any help.
def file():
file = open('hsp.txt', 'r')
col = [] data = file.readlines()
for i in range(1,len(data)-1):
col.append(int(float(data[i].split(',')[5])))
return col
def hist(col):
handspan = []
for i in range(11):
handspan.append(0)
for i in (col):
handspan[i] += 1
return handspan
col = file()
handspan = hist(col)
print(col)
print(handspan)
It is because your line
col = [] data = file.readlines()
should be on two separate lines:
col = []
data = file.readlines()
You can try this, it worked for me.
Hence it is a histogram it yields a dictionary.
Better answers are welcome!
import string
def list_from_file(filename):
myfile = open(filename, 'r')
data = myfile.read().split()
col = []
for word in data:
col.append(word)
return col
def myhist(col):
hist = {}
for word in col:
word = word.lower()
word = word.strip(string.punctuation + string.whitespace)
hist[word] = hist.get(word, 0)+1
return hist
col = list_from_file('em.txt')
colf = myhist(col)
print(colf)
Related
I am doing an assignment where I have a csvfile and I have to generate matplot lib graphs. I have a column full of genres of different movies, with the genres seperated by | (vertical bar). I have to create a dictionary of these genres, once, without repeating, to assign them to the appropriate movies. How will I go about doing that?
this is what I have so far:
import csv
from matplotlib import pyplot as plt
dp = open("C:\Users\jayan\OneDrive\Desktop\IMDB_movie_metadata_for_assignment_6.csv",'r', encoding='utf8').read()
with open("C:\Users\jayan\OneDrive\Desktop\IMDB_movie_metadata_for_assignment_6.csv", errors = 'ignore') as csvfile:
for line in csvfile:
fields = line.split(",")
newField = (fields[4]).split("|")
newerField = fields[16].strip()
movies = (fields[0])
genre_dictionary = {tuple(newField):(movies)}
print(genre_dictionary)
I will suppose your csv has two columns: genres and movies. Tell me if it's not the case. You can do something like:
def find_col_ind(columns):
ind = -1
ind_col_genres = -1
ind_col_movie = -1
for col in columns:
ind += 1
if col == 'movie_title':
ind_col_movie = ind
elif col == 'genres':
ind_col_genres = ind
return ind_col_genres, ind_col_movie
def create_dict(filename):
with open(filename, 'r') as csvfile:
data = dict()
is_first = True
for line in csvfile:
columns = line.split(',')
if is_first:
ind_col_genres, ind_col_movie = find_col_ind(columns)
is_first = False
continue
genres = columns[ind_col_genres].split('|')
movie = columns[ind_col_movie]
for genre in genres:
if genre in data:
data[genre].append(movie.strip('\nÂ\xa0 '))
else:
data[genre] = [movie.strip('\nÂ\xa0 ')]
return data
if __name__ == "__main__":
data = create_dict('test.csv')
print(data)
def get_mean_temperature(filename):
with open(filename) as f:
lst = f.read().splitlines()
lst.pop(0)
result = 0
count = 0
for element in lst:
count += 1
el = int(element[6:])
result += el
print(result)
mn_tem = result / count
return mmn_tem
if __name__ == "__main__":
filename = "temp_log.txt"
with open(filename, "w") as f:
f.write("DATES T.\n07-01 28.0\n08-01 33.5\n09-01 27.0\n")
mean_temperature = get_mean_temperature(filename)
print(f"{mean_temperature:.1f}")
This is the code that I am trying to solve. So what I have to do here is to find the mean of temperature that are given in the text file, which are in this case "DATES T.\n07-01 28.0\n08-01 33.5\n09-01 27.0\n"
The text is sorted by MM-DD TT.T
Please help me have this code to work
from statistics import mean
data = "DATES T.\n07-01 28.0\n08-01 33.5\n09-01 27.0\n"
temperatures = [float(item.split()[1]) for item in data.split("\n")[1:] if item]
temperatures_mean = mean(temperatures)
print(temperatures)
print(temperatures_mean)
Output:
[28.0, 33.5, 27.0]
29.5
Or, as your original function:
from statistics import mean
def get_mean_temperature(filepath):
with open(filepath, "r") as f:
data = f.read()
temperatures = [float(item.split()[1]) for item in data.split("\n")[1:] if item]
return mean(temperatures)
For some reason my code(following) has brought up a Value Error which I cannot understand. Please evaluate my code too. You can find the project I am trying to do at
http://www.ocr.org.uk/Images/226767-unit-j276-03-programming-project-task-1-sample-non-exam-assessment.pdf
fileid = "details for nea.txt"
ID = []
surname = []
forename = []
dob = []
addr = []
addrT = []
addrTh = []
addrF = []
addrFi = []
homNum = []
gend = []
tutor = []
schoolEm = []
def Read():
file = open(fileid, "r")
Record = file.readline()
for line in Record:
line = line.strip()
A,B,C,D,E,F,G,H,I,J,K,L,M = line.split(',')
ID.append(A)
surname.append(B)
forename.append(C)
dob.append(D)
addr.append(E)
addrT.append(F)
addrTh.append(G)
addrF.append(H)
addrFi.append(I)
homNum.append(J)
gend.append(K)
tutor.append(L)
schoolEm.append(M)
file.close()
def Save():
Record = []
file = open(fileid,"w")
for i in range(len(ID)):
Record.append(ID[i] +","+surname[i]+","+forename[i]+","+dob[i]+","+addr[i]+","+addrT[i]+","+addrTh[i]+","+addrF[i]+","+addrFi[i]+","+homNum[i]+","+gend[i]+","+tutor[i]+","+schoolEm[i]+"\n")
file.writelines(Record)
file.close()
Read()
print(ID)
print(surname)
The Text File I used goes as following:
01,abe,fat,01/02/02,5,Stoney Lane,Stur,Dorset,DR101LM,0123,M,C,email#sc. The lists titled addr, addrT represent the different lines of address.
put last three lines inside main. Value error should go away
Would anyone be able to help me with the below? I'm trying to create a program that can open the "notepad.txt" file and calculate the average price for the month of October.
notepad.txt
10-15-2012:3.886
10-22-2012:3.756
10-29-2012:3.638
infile = open('notepad.txt', 'r')
def clean_data():
line1 = infile.readline()
split1 = line1.rstrip('\n')
items = split1[0].split('-')
del items[0]
del items[0]
master = []
master = master + split1 + items
master = list(map(float, master))
print(master)
print(total)
line1 = infile.readline()
clean_data()
this prints and returns the average
def clean_data(infile):
lines = infile.readlines()
total = 0.0
num = 0
for line in lines:
spl = line.strip().split(":")
total += float(spl[len(spl)-1])
num += 1
average = total/num
print(average)
return average
def sum_data():
n,c = 0,0
with open('notepad.txt', 'r') as infile:
x = infile.readline()
# for october 10
if x[:3]=='10-' and x[6:10]=='2010';
n += float(x[12:])
c += 1
print(n/c)
If you want to use Pandas:
from io import StringIO
import pandas as pd
notepadtxt = StringIO("""10-15-2012:3.886
10-22-2012:3.756
10-29-2012:3.638""")
df = pd.read_csv(notepadtxt, sep='\:',header=None, engine='python')
df[0] = pd.to_datetime(df[0])
df=df.set_index(0)
df.resample('M').mean().values[0][0]
Output:
3.7600000000000002
The following vanilla Python code should suffice:
infile = open('notepad.txt', 'r')
def clean_data():
data = []
for line in infile:
data.append(line.strip().split(':'))
values = []
for value in data:
values.append(float(value[1]))
avg_price = sum(values)/len(values)
print(avg_price)
clean_data()
infile.close()
First of all, I have recently started studying python. So I am a beginner.
1111 1 3
1112 1 2
1113 2 3
1114 1 7
1115 7 2
Assume I have these values in the text file. As in the title, I want to search a specific column for a specific value in the txt file then return the specific value's row except for the searched "value".
Example:
Search the first column for 1113 value.
Then return 2 3 as:
x = 2
y = 3
Try something like this:
with open('file.txt', 'r') as f:
for line in f:
if line.startswith('1113'):
line = line.split()
x = int(line[1])
y = int(line[2])
as 'file.txt' put your file name and as '1113' put the value you are looking for. good luck
You could try this:
import sys
with open( filename, "r" ) as f:
for line in f:
parts = line.split(" ")
if parts[0] == "1113":
print("x={0} y={1}".format( parts[1], parts[2] ))
sys.exit(0)
There is a csv module which will do all the dirty work for you:
import csv
def find(filename, number):
with open(filename) as file:
reader = csv.DictReader(file,
fieldnames=['col1', 'col2', 'col3'],
delimiter=' ')
for line in reader:
if line['col1'] == str(number):
return (line['col2'], line['col3'])
if __name__ == '__main__':
(x, y) = find('data.txt', 1113) or (None, None)
print(x, y)
I'd prefer:
search_text = '1113'
with open(FileName) as f:
for i in f:
if i.strip() != '':
for j in i.strip().split():
if j[0] == search_text:
x = int(i[1])
y = int(i[2])
This will allow you to search for any value on any column.
import csv
def find(reader, col, val):
for row in reader:
if row[col] == val:
return row
raise ValueError('Value {} not found in row {}'.format(val, col))
def findexcept(reader, col, val):
r = find(reader, col, val)
r.pop(col)
return r
with open('searchdata.txt') as f:
lines = (line.strip() for line in f)
c = csv.reader(lines, delimiter=' ', skipinitialspace=True)
try:
x = findexcept(c, 0, '1114')
print(x)
except ValueError as ve:
print('Error: {}'.format(ve))