I am learning Python, and I made a script which searches for several lines that contain the "keyword" and then write/print in a new file the (previously) selected list-line (I used a second argument to select the line from the list).
Everything went well until I tried to add a statement in case my selected list-line is > than the actual len(list) then the selected list_line = len(list); for whatever reason, it does not work.
Can anyone please point out to why it is not working, this my script. Thanks a million for the help. (Here is a link with an example of the files that I am using as an input)
import sys
import re
filename = sys.argv[1]
line_select = int(sys.argv[2])
newfile = str(filename) + ".3d"
openold = open(filename,"r")
opennew = open(newfile,"w")
rline = openold.readlines()
energies = []
line_number = 0
for line in rline:
line_number += 1
if re.search( r"SCF Done", line ):
words = line.split()
energy = float( words[4] )
energies.append(str(line_number) + " : " + "The energy of the molecule is %f kcal mol-1" % energy)
len_list = len(energies)
if line_select > len_list:
line_select = len_list
print >>opennew, energies[line_select]
openold.close()
opennew.close()
The last element of the energies list is actually energies[len_list-1], since Python indexes start from 0.
So if you want to print "the last element of energies", you need to initialize line_select to one less than the list length:
if line_select >= len_list:
line_select = len_list-1
Related
so -----2-----3----5----2----3----- would become -----4-----5----7----4----5-----
if the constant was 2 and etc. for every individual line in the text file.
This would involve splitting recognising numbers in between strings and adding a constant to them e.g ---15--- becomes ---17--- not ---35---.
(basically getting a guitar tab and adding a constant to every fret number)
Thanks. Realised this started out vague and confusing so sorry about that.
lets say the file is:
-2--3--5---7--1/n-6---3--5-1---5
and im adding 2, it should become:
-4--5--7---9--3/n-8---5--7-3---7
Change the filename to something relevant and this code will work. Anything below new_string needs to be change for what you need, eg writing to a file.
def addXToAllNum(int: delta, str: line):
values = [x for x in s.split('-') if x.isdigit()]
values = [str(int(x) + delta) for x in values]
return '--'.join(values)
new_string = '' # change this section to save to new file
for line in open('tabfile.txt', 'r'):
new_string += addXToAllNum(delta, line) + '\n'
## general principle
s = '-4--5--7---9--3 -8---5--7-3---7'
addXToAllNum(2, s) #6--7--9--11--10--7--9--5--9
This takes all numbers and increments by the shift regardless of the type of separating characters.
import re
shift = 2
numStr = "---1----9---15---"
print("Input: " + numStr)
resStr = ""
m = re.search("[0-9]+", numStr)
while (m):
resStr += numStr[:m.start(0)]
resStr += str(int(m.group(0)) + shift)
numStr = numStr[m.end(0):]
m = re.search("[0-9]+", numStr)
resStr += numStr
print("Result:" + resStr)
Hi You Can use that to betwine every line in text file add -
rt = ''
f = open('a.txt','r')
app = f.readlines()
for i in app:
rt+=str(i)+'-'
print " ".join(rt.split())
import re
c = 2 # in this example, the increment constant value is 2
with open ('<your file path here>', 'r+') as file:
new_content = re.sub (r'\d+', lambda m : str (int (m.group (0)) + c), file.read ())
file.seek (0)
file.write (new_content)
Problem is to return the name of the event that has the highest number of participants in this text file:
#Beyond the Imposter Syndrome
32 students
4 faculty
10 industries
#Diversifying Computing Panel
15 students
20 faculty
#Movie Night
52 students
So I figured I had to split it into a dictionary with the keys as the event names and the values as the sum of the integers at the beginning of the other lines. I'm having a lot of trouble and I think I'm making it too complicated than it is.
This is what I have so far:
def most_attended(fname):
'''(str: filename, )'''
d = {}
f = open(fname)
lines = f.read().split(' \n')
print lines
indexes = []
count = 0
for i in range(len(lines)):
if lines[i].startswith('#'):
event = lines[i].strip('#').strip()
if event not in d:
d[event] = []
print d
indexes.append(i)
print indexes
if not lines[i].startswith('#') and indexes !=0:
num = lines[i].strip().split()[0]
print num
if num not in d[len(d)-1]:
d[len(d)-1] += [num]
print d
f.close()
import sys
from collections import defaultdict
from operator import itemgetter
def load_data(file_name):
events = defaultdict(int)
current_event = None
for line in open(file_name):
if line.startswith('#'):
current_event = line[1:].strip()
else:
participants_count = int(line.split()[0])
events[current_event] += participants_count
return events
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Usage:\n\t{} <file>\n'.format(sys.argv[0]))
else:
events = load_data(sys.argv[1])
print('{}: {}'.format(*max(events.items(), key=itemgetter(1))))
Here's how I would do it.
with open("test.txt", "r") as f:
docText = f.read()
eventsList = []
#start at one because we don't want what's before the first #
for item in docText.split("#")[1:]:
individualLines = item.split("\n")
#get the sum by finding everything after the name, name is the first line here
sumPeople = 0
#we don't want the title
for line in individualLines[1:]:
if not line == "":
sumPeople += int(line.split(" ")[0]) #add everything before the first space to the sum
#add to the list a tuple with (eventname, numpeopleatevent)
eventsList.append((individualLines[0], sumPeople))
#get the item in the list with the max number of people
print(max(eventsList, key=lambda x: x[1]))
Essentially you first want to split up the document by #, ignoring the first item because that's always going to be empty. Now you have a list of events. Now for each event you have to go through, and for every additional line in that event (except the first) you have to add that lines value to the sum. Then you create a list of tuples like (eventname) (numPeopleAtEvent). Finally you use max() to get the item with the maximum number of people.
This code prints ('Movie Night', 104) obviously you can format it to however you like
Similar answers to the ones above.
result = {} # store the results
current_key = None # placeholder to hold the current_key
for line in lines:
# find what event we are currently stripping data for
# if this line doesnt start with '#', we can assume that its going to be info for the last seen event
if line.startswith("#"):
current_key = line[1:]
result[current_key] = 0
elif current_key:
# pull the number out of the string
number = [int(s) for s in line.split() if s.isdigit()]
# make sure we actually got a number in the line
if len(number) > 0:
result[current_key] = result[current_key] + number[0]
print(max(result, key=lambda x: x[1]))
This will print "Movie Night".
Your problem description says that you want to find the event with highest number of participants. I tried a solution which does not use list or dictionary.
Ps: I am new to Python.
bigEventName = ""
participants = 0
curEventName = ""
curEventParticipants = 0
# Use RegEx to split the file by lines
itr = re.finditer("^([#\w+].*)$", lines, flags = re.MULTILINE)
for m in itr:
if m.group(1).startswith("#"):
# Whenever a new group is encountered, check if the previous sum of
# participants is more than the recent event. If so, save the results.
if curEventParticipants > participants:
participants = curEventParticipants
bigEventName = curEventName
# Reset the current event name and sum as 0
curEventName = m.group(1)[1:]
curEventParticipants = 0
elif re.match("(\d+) .*", m.group(1)):
# If it is line which starts with number, extract the number and sum it
curEventParticipants += int(re.search("(\d+) .*", m.group(1)).group(1))
# This nasty code is needed to take care of the last event
bigEventName = curEventName if curEventParticipants > participants else bigEventName
# Here is the answer
print("Event: ", bigEventName)
You can do it without a dictionary and maybe make it a little simpler if just using lists:
with open('myfile.txt', 'r') as f:
lines = f.readlines()
lines = [l.strip() for l in lines if l[0] != '#'] # remove comment lines and '\n'
highest = 0
event = ""
for l in lines:
l = l.split()
if int(l[0]) > highest:
highest = int(l[0])
event = l[1]
print (event)
My code looks like this:
Right now my code outputs two text file named absorbance.txt and energy.txt separately. I need to modify it so that it outputs only one file named combined.txt such that every line of combined.txt has two values separated by comma. The first value must be from absorbance.txt and second must be from energy.txt. ( I apologize if anyone is confused by my writting, Please comment if you need more clarification)
g = open("absorbance.txt", "w")
h = open("Energy.txt", "w")
ask = easygui.fileopenbox()
f = open( ask, "r")
a = f.readlines()
bg = []
wavelength = []
for string in a:
index_j = 0
comma_count = 0
for j in string:
index_j += 1
if j == ',':
comma_count += 1
if comma_count == 1:
slicing_point = index_j
t = string[slicing_point:]
new_str = string[:(slicing_point- 1)]
new_energ = (float(1239.842 / int (float(new_str))) * 8065.54)
print >>h, new_energ
import math
list = []
for i in range(len(ref)):
try:
ans = ((float (ref[i]) - float (bg[i])) / (float(sample[i]) - float(bg[i])))
print ans
base= 10
final_ans = (math.log(ans, base))
except:
ans = -1 * ((float (ref[i]) - float (bg[i])) / (float(sample[i]) - float(bg[i])))
print ans
base= 10
final_ans = (math.log(ans, base))
print >>g, final_ans
Similar to Robert's approach, but aiming to keep control flow as simple as possible.
absorbance.txt:
Hello
How are you
I am fine
Does anybody want a peanut?
energy.txt:
123
456
789
Code:
input_a = open("absorbance.txt")
input_b = open("energy.txt")
output = open("combined.txt", "w")
for left, right in zip(input_a, input_b):
#use rstrip to remove the newline character from the left string
output.write(left.rstrip() + ", " + right)
input_a.close()
input_b.close()
output.close()
combined.txt:
Hello, 123
How are you, 456
I am fine, 789
Note that the fourth line of absorbance.txt was not included in the result, because energy.txt does not have a fourth line to go with it.
You can open both text files and append them to the new text file as shown below. This is what I gave based on your question, not necessarily the code your provided.
combined = open("Combined.txt","w")
with open(r'Engery.txt', "rU") as EnergyLine:
with open(r'Absorbance.txt', "rU") as AbsorbanceLine:
for line in EnergyLine:
Eng = line[:-1]
for line2 in AbsorbanceLine:
Abs = line2[:-1]
combined.write("%s,%s\n" %(Eng,Abs))
break
combined.close()
I wrote a python script to treat text files.
The input is a file with several lines. At the beginning of each line, there is a number (1, 2, 3... , n). Then an empty line and the last line on which some text is written.
I need to read through this file to delete some lines at the beginning and some in the end (say number 1 to 5 and then number 78 to end). I want to write the remaining lines on a new file (in a new directory) and renumber the first numbers written on these lines (in my example, 6 would become 1, 7 2 etc.)
I wrote the following:
def treatFiles(oldFile,newFile,firstF, startF, lastF):
% firstF is simply an index
% startF corresponds to the first line I want to keep
% lastF corresponds to the last line I want to keep
numberFToDeleteBeginning = int(startF) - int(firstF)
with open(oldFile) as old, open(newFile, 'w') as new:
countLine = 0
for line in old:
countLine += 1
if countLine <= numberFToDeleteBeginning:
pass
elif countLine > int(lastF) - int(firstF):
pass
elif line.split(',')[0] == '\n':
newLineList = line.split(',')
new.write(line)
else:
newLineList = [str(countLine - numberFToDeleteBeginning)] + line.split(',')
del newLineList[1]
newLine = str(newLineList[0])
for k in range(1, len(newLineList)):
newLine = newLine + ',' + str(newLineList[k])
new.write(newLine)
if __name__ == '__main__':
from sys import argv
import os
os.makedirs('treatedFiles')
new = 'treatedFiles/' + argv[1]
treatFiles(argv[1], argv[2], newFile, argv[3], argv[4], argv[5])
My code works correctly but is far too slow (I have files of about 10Gb to treat and it's been running for hours).
Does anyone know how I can improve it?
I would get rid of the for loop in the middle and the expensive .split():
from itertools import islice
def treatFiles(old_file, new_file, index, start, end):
with open(old_file, 'r') as old, open(new_file, 'w') as new:
sliced_file = islice(old, start - index, end - index)
for line_number, line in enumerate(sliced_file, start=1):
number, rest = line.split(',', 1)
if number == '\n':
new.write(line)
else:
new.write(str(line_number) + ',' + rest)
Also, convert your three numerical arguments to integers before passing them into the function:
treatFiles(argv[1], argv[2], newFile, int(argv[3]), int(argv[4]), int(argv[5]))
This is probably a simple question, but it's driving me crazy! I have a python code that performs cellular automata on a land use grid. I've made a dictionary of cell id: land use code imported from a text file. I've also import of the adjacent neighbors of each cell from a text file. For each cell in the nested loop, I pick out the highest value, count the highest value of the neighboring cells. If this value is greater than the processing cell and occurred more than 4 times, then I update the dictionary for that cell id. The land use codes are ranked in priority. You will see < 6 in the code below...6 is water and wetlands which I do not want to be changed. The first time I run the code, 7509 cells changed land use based on adjacent neighbors land uses. I can comment out the reading the dictionary text file and run it again, then around 5,000 cells changed. Run it again, then even less and so on. What I would like to do is run this in a loop until only 0.0001 of the total cells change, after that break the loop.
I've tried several times using iterators like "for r in range(999)---something big; If End_Sim > count: break". But it breaks after the first one, because the count goes back to zero. I've tried putting the count = 0 inside the loop and it adds up...I want it to start back over every time so the number of cells gets less and less. I'm stump...hopefully this is trivial to somebody!
Here's my code (it's a clean slate...I've deleted my failed attempts to create the number of simulations loop):
import sys, string, csv
#Creating a dictionary of FID: LU_Codes from external txt file
text_file = open("H:\SWAT\NC\FID_Whole_Copy.txt", "rb")
#Lines = text_file.readlines()
FID_GC_dict = dict()
reader = csv.reader(text_file, delimiter='\t')
for line in reader:
FID_GC_dict[line[0]] = int(line[1])
text_file.close()
#Importing neighbor list file for each FID value
Neighbors_file = open("H:\SWAT\NC\Pro_NL_Copy.txt","rb")
Entries = Neighbors_file.readlines()
Neighbors_file.close()
Neighbors_List = map(string.split, Entries)
#print Neighbors_List
#creates a list of the current FID
FID = [x[0] for x in Neighbors_List]
#Calculate when to end of one sweep
Tot_Cells = len(FID)
End_Sim = int(0.0001*Tot_Cells)
gridList = []
for nlist in Neighbors_List:
row = []
for item in nlist:
row.append(FID_GC_dict[item])
gridList.append(row)
#print gridList
#Performs cellular automata rules on land use grid codes
i = iter(FID)
count = 0
for glist in gridList:
Cur_FID = i.next()
Cur_GC = glist[0]
glist.sort()
lr_Value = glist[-1]
if lr_Value < 6:
tie_LR = glist.count(lr_Value)
if tie_LR >= 4 and lr_Value > Cur_GC:
FID_GC_dict[Cur_FID] = lr_Value
#print "The updated gridcode for FID ", Cur_FID, "is ", FID_GC_dict[Cur_FID]
count += 1
print count
Thanks for any help!
use a while loop:
cnt_total = 1234 # init appropriately
cnt_changed = cnt_total
p = 0.001
while (cnt_changed > cnt_total*p):
# your code here
# remember to update the cnt_changed variable
Try with the while break statements
initialization stuff
while(1):
...
if x < 0.0001:
break
...
http://docs.python.org/tutorial/controlflow.html#break-and-continue-statements-and-else-clauses-on-loops
I fixed the code so the simulations stop once the number of cells change is less than 0.0001 of the total cells. I had the while loop in the wrong place. Here's the code if anyone is interested in cellular automata.
import sys, string, csv
#Creating a dictionary of FID: LU_Codes from external txt file
text_file = open("H:\SWAT\NC\FID_Whole_Copy.txt", "rb")
#Lines = text_file.readlines()
FID_GC_dict = dict()
reader = csv.reader(text_file, delimiter='\t')
for line in reader:
FID_GC_dict[line[0]] = int(line[1])
text_file.close()
#Importing neighbor list file for each FID value
Neighbors_file = open("H:\SWAT\NC\Pro_NL_Copy.txt","rb")
Entries = Neighbors_file.readlines()
Neighbors_file.close()
Neighbors_List = map(string.split, Entries)
#print Neighbors_List
#creates a list of the current FID
FID = [x[0] for x in Neighbors_List]
#print FID
#Calculate when to end the simulations (neglible change in land use)
tot_cells = len(FID)
end_sim = tot_cells
p = 0.0001
#Performs cellular automata rules on land use grid codes
while (end_sim > tot_cells*p):
gridList = []
for nlist in Neighbors_List:
row = []
for item in nlist:
row.append(FID_GC_dict[item])
gridList.append(row)
#print gridList
i = iter(FID)
count = 0
for glist in gridList:
Cur_FID = i.next()
Cur_GC = glist[0]
glist.sort()
lr_Value = glist[-1]
if lr_Value < 6:
tie_LR = glist.count(lr_Value)
if tie_LR >= 4 and lr_Value > Cur_GC:
FID_GC_dict[Cur_FID] = lr_Value
print "The updated gridcode for FID ", Cur_FID, "is ", FID_GC_dict[Cur_FID]
count += 1
end_sim = count
print count