Python sys.argv and input combination with a definition NOT working - python

I have a python script like below that I was trying to run in shell. I want to use either argv or input in order to give specific inputs called index1 and index2 (ACCGTCG and TTCCAGC) and a file name to process. I tried two ways (with sys.argv and input separately) as below but I get no output.
With sys.argv:
#!/usr/bin/python
import sys
from Bio import SeqIO
def dual_index_positions():
script=sys.argv[0]
index1=sys.argv[1]
index2=sys.argv[2]
input_file=sys.argv[3]
count=0
with open(input_file, "r") as Fastq:
for record in SeqIO.parse(Fastq,'fastq'):
if index1 in record.seq and index2 in record.seq:
print(record.name)
ind1_rec=record.seq.find(index1)
ind2_rec=record.seq.find(index2)
rp_ind2=ind2_rec+len(index2)
dist=(ind1_rec)-(rp_ind2)
print('Index1 and index2 positions are '+ str(ind1_rec+1) + ' and ' + str(ind2_rec+1) + ' respectively' +
'; distance is: ' + str(dist))
count+=1
print('The total number of hits is: '+ str(count))
if __name__ == '__dual_index_positions__':
dual_index_positions()
with taking inputs:
#!/usr/bin/python
from Bio import SeqIO
def dual_index_positions():
input_file=input('please enter your input_file: ')
index1=str(input('please enter your index 1: '))
index2=str(input('please enter your index 2: '))
count=0
with open(input_file, "r") as Fastq:
for record in SeqIO.parse(Fastq,'fastq'):
if index1 in record.seq and index2 in record.seq:
print(record.name)
ind1_rec=record.seq.find(index1)
ind2_rec=record.seq.find(index2)
rp_ind2=ind2_rec+len(index2)
dist=(ind1_rec)-(rp_ind2)
print('Index1 and index2 positions are '+ str(ind1_rec+1) + ' and ' + str(ind2_rec+1) + ' respectively' +
'; distance is: ' + str(dist))
count+=1
print('The total number of hits is: '+ str(count))
if __name__ == '__dual_index_positions__':
dual_index_positions()
Can somebody help me with this to find where is the bug? Thank you in advance.

Instead of this
if __name__ == '__dual_index_positions__':
dual_index_positions()
Use this
if __name__ == '__main__':
dual_index_positions()
__name__ == '__dual_index_positions__' the RHS of that is not the function name you have to call, that is the name of the module and by default it has the value __main__

Related

NameError: name 'variable' is not defined. Already declared variable but still error

I tried to extract data from a text file (cisco switch logs) and convert it to CSV so I can create a table and sort out the data & create graphs out of it. So here is my code:
import pandas as pd
import csv
import time
from datetime import datetime
import os
import glob
import sys
pathh = glob.glob("C:\\Users\\Taffy R. Mantang\\Desktop\\PR logs\\*\\")
#This part of the code opens all the text with the name ISW-1.txt inside the PR logs folder
for x in pathh:
# Detect the line number in text file to know where the row begin
phrase = "Shelf Panel CPUID Power CPU(5s) CPU(1m) CPU(5m) Peak PhyMem FreeMem Mem"
file = open("{0}".format(x) + "\\ISW-1.txt")
for number, line in enumerate(file):
if phrase in line:
sh_pro = number
break
file.close()
#Convert the text file to CSV from the row determined earlier
with open("{0}".format(x) + '\\ISW-1.txt', 'r') as rf:
r = csv.reader(rf, skipinitialspace=True, delimiter=' ')
rows = list(r)
heada = rows[sh_pro]
heada.insert(0, " ")
print(heada)
#to mark the last row
skipprocessor = sh_pro + 4
for i in range(7):
if i == 0:
print(rows[skipprocessor + i])
if i == 2:
sub_heada = rows[skipprocessor + i]
sub_heada.insert(0, " ")
sub_heada.insert(1, " ")
sub_heada.insert(2, " ")
print(rows[skipprocessor + i])
if i == 4:
sub_heada = rows[skipprocessor + i]
sub_heada.insert(0, " ")
sub_heada.insert(1, " ")
sub_heada.insert(2, " ")
print(rows[skipprocessor + i])
if i == 6:
sub_heada = rows[skipprocessor + i]
sub_heada.insert(0, " ")
sub_heada.insert(1, " ")
sub_heada.insert(2, " ")
print(rows[skipprocessor + i])
Previously it worked and it printed the output successfully. However while I was experimenting with exporting the output to an excel table, suddenly there was an error saying:
Traceback (most recent call last):
File "C:\Users\Taffy R. Mantang\PycharmProjects\pythonProject\main.py", line 26, in
heada = rows[sh_pro]
NameError: name 'sh_pro' is not defined
I traced back and undo everything but it still gives the same error.
I tried to remove an indent on line 26, it managed to print(heada). but messed up the if else code down below it and not print out the rest below.
What exactly is the problem? Help :'''((
sh_pro is not defined because you are not hitting the condition if phrase in line:, I would suggest:
for number, line in enumerate(file):
if phrase in line:
sh_pro = number
break
file.close()
#Convert the text file to CSV from the row determined earlier
with open("{0}".format(x) + '\\ISW-1.txt', 'r') as rf:
r = csv.reader(rf, skipinitialspace=True, delimiter=' ')
rows = list(r)
try:
heada = rows[sh_pro]
except NameError:
# error handling
In order to declare sh_pro, the condition if phrase in line: in your for cycle should return True. So if your condition returns False then your interpreter never meets such name as sh_pro. You can try to modify your code in a way that sh_pro is declared before you want to start working with it.
for number, line in enumerate(file):
if phrase in line:
sh_pro = number
break
file.close()

Python output file formatting - text

This is my first post so please be gentle, im trying to write two text files generated from some user input, the file "hostname - VLAN_config" is fine, i use that file elsewhere in my code, what i do need however is another file (in this case "hostname - Trunk_config" to be formatted in a certain manner in order to use it for another part of my code.
The below code produces a file that looks like this;
", 1, 2, 3"
but i need it to generate a text file that looks like;
1, 2, 3
print ('VLANS')
print('-----------------------------------------------')
print(' ')
condition = True
while (condition == True):
vlan = raw_input('Specify a VLAN id: ')
name = raw_input('What name for this VLAN: ')
print(' ')
with open(hostname + ' - VLAN_config', "ab") as f:
f.write('vlan ' + vlan)
f.write('\n')
f.write('name ' + name)
f.write('\n')
with open(hostname + ' - Trunk_config', "ab") as f:
f.write(', ' + vlan)
test = raw_input('Would you like to add another? [Y] ')
if test == (''):
condition = True
elif test == ('n'):
condition = False
else:
test = raw_input('Invalid input, more?')
if test == ('y'):
condition = True
elif test == ('n'):
condition = False
any help will be much appreciated!
So the only problem is the first comma? Why don't you add a test to check if you are writing the first value?
Other solution: write ', ' only after the user said they want to add another input.

Python parsing csv file to store in file

I have not used Python in years and trying to get back into it. I have a Input_file (.csv) that I want to parse and store the output in a output.csv or .txt
I have managed to parse the .csv file using this code, and for the most part the it works but I cant get it save to save to file (Issue 1) without getting the below error (error 1)
import csv
import re
import itertools
file_name = 'PhoneCallData1.txt'
try:
lol = list(csv.reader(open(file_name, 'r'), delimiter=' '))
count =0
except:
print('File cannot be opened:',file_name)
exit()
try:
fout = open('output.txt','w')
except:
Print("File cannot be written to:","OutputFile")
exit()
d = dict()
for item in itertools.chain(lol): # Lists all items (field) in the CSV file.
count +=1 # counter to keep track of row im looping through
if lol[count][3] is None:
print("value is not blank")
count +=1
else:
try:
check_date = re.search(r'(\d+/\d+/\d+)', lol[count][3]) # check to determine if date is a date
except:
continue
check_cost = re.compile(r'($+\d*)', lol[count][9]) # check to determine if value is a cost
if check_date ==TRUE:
try:
key =lol[count][3] # If is a date value, store key
except ValueError:
continue
if check_cost==TRUE:
value = lol[count][9] # if is a cost ($) store value
d[key] = value
print (d[key])
# fout.write((d[key])
# What if there is no value in the cell?
# I keep getting "IndexError: list index out of range", anyone know why?
# Is there a better way to do this?
# I only want to store the destination and the charge
and now comes the complicated part. The file I need to parse has a number of irrelevant rows of data before and in between the required data.
Data Format
What I want to do;
I want to iterate over two columns of data, and only store the rows that have a date or cost in them, dis-guarding the rest of the data.
import csv
import re
import itertools
lol = list(csv.reader(open('PhoneCallData1.txt', 'r'), delimiter=' '))
count =0
d = dict()
for item in itertools.chain(lol): #Lists all items (field) in the CSV file.
count +=1 # counter to keep track of row im looping through
check_date = re.search(r'(\d+/\d+/\d+)', lol[count][3]) #check to determine
check_cost = re.compile(r'($+\d*)', lol[count][9]) #check to determine if value is a cost
if check_date ==TRUE:
key =lol[count][3] #If is a date value, store key
if check_cost==TRUE:
value = lol[count][9] #if is a cost ($) store value
d[key] = value
print (d[key])
#What if there is no value in the cell?
# I keep getting "IndexError: list index out of range", anyone know why?
# Is there a better way to do this?
# I only want to store the destination and the charges
What I have tried;
I tried to index the data after I loaded it, but that didn't seem to work.
I created this to only look at rows at that were more than a certain length, but its terrible code. I was hoping for something more practical and reusable.
import re
with open('PhoneCallData1.txt','r') as f, open('sample_output.txt','w') as fnew:
for line in f:
if len(line) > 50:
print(line)
fnew.write(line + '\n')
Import csv
lol = list(csv.reader(open('PhoneCallData1.txt', 'rb'), delimiter='\t'))
#d = dict()
#key = lol[5][0] # cell A7
#value = lol[5][3] # cell D7
#d[key] = value # add the entry to the dictionary
Keep getting index out of bounds errors
import re
import csv
match=re.search(r'(\d+/\d+/\d+)','testing date 11/12/2017')
print match.group(1)
Trying to use regex to search for the date in the first column of data.
NOTE: I wanted to try Pandas but I feel I need to start here. Any Help would be awesome.
answer to if next record need to be parsed must be specific, and I have answer a similar question, in the same way, finite-state machine may help
main code is:
state = 'init'
output = []
# for line loop:
if state == 'init': # seek for start parsing
# check if start parsing
state = 'start'
elif state == 'start': # start parsing now
# parsing
# check if need to end parsing
state = 'init'
import csv
import re
import itertools
import timeit
start_time = timeit.default_timer()
# code you want to evaluate
file_name = 'PhoneCallData.txt'
try:
lol = list(csv.reader(open(file_name, 'r'), delimiter=' '))
except:
print('File cannot be opened:', file_name)
exit()
try:
fout = open('output.txt','w')
except:
Print("File cannot be written to:","OutputFile")
exit()
# I could assign key value pairs and store in dictionry. Then print, search,ect on the dictionary. Version2
# d = dict()
count =0
total = 0
for row in lol: # Lists all items (field) in the CSV file.
#print(len(row))
count +=1 # counter to keep track of row im looping through
if len(row) == 8:
if row[2].isdigit():
# Remove the $ and convert to float
cost = re.sub('[$]', '', row[7])
# Assign total value
try:
# Calculate total for verification purposes
total = total + float(cost)
total = round(total, 2)
except:
continue
string = str(row[2] + " : " + (row[7]) + " : " + str(total) + "\n")
print (string)
fout.write(string)
if len(row) == 9:
if row[2].isdigit():
# Remove the $ and convert to float
cost = re.sub('[$]', '', row[8])
# Assign total value
try:
# Calculate total for verification purposes
total = total + float(cost)
total = round(total, 2)
except:
continue
string = str(row[2] + " : " + row[8] + " : " + str(total) + "\n")
print(string)
fout.write(string)
if len(row) == 10:
# print (row[2] +":"+ row[9])
# Remove the $ and convert to float
cost = re.sub('[$]', '', row[9])
# Assign total value
try:
# Calculate total for verification purposes
total = total + float(cost)
total = round(total, 2)
except:
continue
string = str(row[2] + " : " + row[9] + " : " + str(total) + "\n")
print(string)
fout.write(string)
# Convert to string so I can print and store in file
count_string = str(count)
total_string = str(total)
total_string.split('.', 2)
# Write to screen
print (total_string + " Total\n")
print("Rows parsed :" + count_string)
# write to file
fout.write(count_string + " Rows were parsed\n")
fout.write(total_string + " Total")
# Calcualte time spent on task
elapsed = timeit.default_timer() - start_time
round_elapsed = round(elapsed, 2)
string_elapsed = str(round_elapsed)
fout.write(string_elapsed)
print(string_elapsed + " seconds")
fout.close()

Check which label or name a part (from krn) has in music21

I want to extract 2 parts from 4 voice krn score and save them as a midi file.
I can load the files:
s = converter.parse('/something.krn')
I can get some basic info like this:
s.metadata.title
In v2, I want to store the part of s that has a label "Cantus". Any idea how to check for a label? They have a label in krn.
Once I have the number of the part, I can get it with
s.parts[i]
The krn file is defined like this:
**kern **kern **kern **kern **kern
*Ibass *Itenor *Itenor *Icalto *Icant
!Bassus !Tenor 2 !Tenor 1 !Altus !Cantus
I am guessing labels is not the correct name, as I can't find this in music21 documentation, perhaps the name of the part?
I can't seem to find the property in the music21 documentation.
I was finally able to do it this way:
import sys
from music21 import *
import os
# input ("Please make sure that you have places all the krn files in a subdirectory called data. Press enter to continue")
for filename in os.listdir('./data'):
s = converter.parse('./data/' + filename)
sys.stdout.write('Processing ' + filename + '... ')
numcant = -1
nums = list()
try:
length = len(s.parts)
except:
length = 0
if (length > 0):
for num in range(0,length):
# sys.stdout.write(s.parts[num].flat.getElementsByClass('SpineComment')[0].comment + ' - ')
if (s.parts[num].flat.getElementsByClass('SpineComment')[0].comment == "Cantus"):
numcant = num
# print "cant "
# print numcant
else:
# print "nums"
nums.append(num)
# print num
else:
# sys.stdout.write(' - no parts present.')
sys.stdout.write('\n')
try:
length = len(nums)
except:
length = 0
if (length > 0):
sys.stdout.write('\n')
if (numcant != -1):
for num in nums:
sys.stdout.write(' - ' + filename[:-4] + '_' + str(num) + '.mid written.\n')
# print "cantus present"
s2 = stream.Stream()
s2.insert(0, s.parts[num])
s2.insert(0, s.parts[numcant])
# write the midi file
s2.write('midi', './midi/' + filename[:-4] + '_' + str(num) + '.mid')
# sys.stdout.write('I')
else:
sys.stdout.write(' - no cantus specified for this file.\n')
else:
sys.stdout.write(' - not enough parts in this file.\n')
sys.stdout.write('\n')

Combine two Python codes

I am very new in python, but I have been able to make few useful python codes (at least useful for my work). I would like to combine two of my codes, but I have a hard time making it work, I think I am completely lost in how the code should looks like.
The first code basically takes a file, read it, extract to columns from it, and then write the columns in a new file. I repeat this with several files:
import sys
import re
filetowrite = sys.argv[1]
filetoread = sys.argv[2]
newfile = str(filetowrite) + ".txt"
openold = open(filetoread,"r")
opennew = open(newfile,"w")
rline = openold.readlines()
number = int(len(rline))
start = 0
for i in range (len(rline)) :
if "2theta" in rline[i] :
start = i
opennew.write ("q" + "\t" + "I" + "\n")
opennew.write ("1/A" + "\t" + "1/cm" + "\n")
opennew.write (str(filetowrite) + "\t" + str(filetowrite) + "\n")
for line in rline[start + 1 : number] :
words = line.split()
word1 = (words[1])
word2 = (words[2])
opennew.write (word1 + "\t" + word2 + "\n")
openold.close()
opennew.close()
The second code takes the new previously created files and combine them in a way in which the columns are next to each other in the final file.
import sys
from itertools import izip
filenames = sys.argv[2:]
filetowrite = sys.argv[1]
newfile = str(filetowrite) + ".txt"
opennew = open(newfile, "w")
files = map(open, filenames)
for lines in izip(*files):
opennew.write(('\t'.join(i.strip() for i in lines))+"\n")
Any help in how to proceed to make a single code out of these two codes is highly appreciated.
All the best
Make each file into a function in one larger file, then call the functions as necessary. Make use of __main__ to do that.
import sys
import re
from itertools import izip
def func_for_file1():
# All of the code from File 1 goes here.
def func_for_file2():
# ALl of the code from File 2 goes here.
def main():
# Decide what order you want to call these methods.
func_for_file1()
func_for_file2()
if __name__ == '__main__':
main()

Categories