How to select many folders and file to work on - python

the present script runs on two files within the same folder.
Each time I have to run a different case, I have to replace the path in the script with the right folder (i.e 11221 instead of 11220) and the right file names within that folder (i.e. 11221_S1 instead of 11220_S1) and run the script again.
Is there a way to make the script select all the folders contained in /mypath and inside that folder select the two files i need for the script to run? so i would not need to replace manually the text in the script for each file.
thank you in advance
Luca
import sys
infile=open("mypath/11220/11220_S1.vcf")
outfile=open('/mypath/11220/11220_S1.csv', 'w')
outfile2=open('/mypath/11220_S1.txt', 'w')
for line in infile:
data=line.split()
if data[0] == "#CHROM":
#print line
outfile.write(str(data[0]) + '\t' + str(data[1]) + '\t' +str(data[3]) + '\t' +str(data[4]) + '\t'+ str('SDP') + '\t'+ str('DP') + '\t'+ str('RD') + '\t'+ str('AD') + '\t'+ str('FREQ') + '\t'+ str('PVALUE') +'\t' + '\n')
outfile2.write(str("chrom") + '\t' + str("position") + '\n')
if data[0] == "chr17":
tag=data[9].split(":")
#print tag[3]
outfile.write(str(data[0]) + '\t' + str(data[1]) + '\t' +str(data[3]) + '\t' +str(data[4]) + '\t'+ str(tag[2]) + '\t'+ str(tag[3]) + '\t'+ str(tag[4]) + '\t'+ str(tag[5]) + '\t'+ str(tag[6]) + '\t'+ str(tag[7]) +'\t' + '\n')
outfile2.write(str(data[0]) + '\t' + str(data[1]) + '\n')
outfile.close()
outfile2.close()
infile=open("mypath/11220/11220_S2.vcf")
outfile=open('/mypath/11220/11220_S2.csv', 'w')
outfile2=open('/mypath/11220_S2.txt', 'w')
for line in infile:
data=line.split()
if data[0] == "#CHROM":
#print line
outfile.write(str(data[0]) + '\t' + str(data[1]) + '\t' +str(data[3]) + '\t' +str(data[4]) + '\t'+ str('SDP') + '\t'+ str('DP') + '\t'+ str('RD') + '\t'+ str('AD') + '\t'+ str('FREQ') + '\t'+ str('PVALUE') +'\t' + '\n')
outfile2.write(str("chrom") + '\t' + str("position") + '\n')
if data[0] == "chr17":
tag=data[9].split(":")
#print tag[3]
outfile.write(str(data[0]) + '\t' + str(data[1]) + '\t' +str(data[3]) + '\t' +str(data[4]) + '\t'+ str(tag[2]) + '\t'+ str(tag[3]) + '\t'+ str(tag[4]) + '\t'+ str(tag[5]) + '\t'+ str(tag[6]) + '\t'+ str(tag[7]) +'\t' + '\n')
outfile2.write(str(data[0]) + '\t' + str(data[1]) + '\n')
outfile.close()
outfile2.close()

Rather than hardcoding each file path, you could use glob (https://docs.python.org/2/library/glob.html) for Unix-like selections. Rough examples of how glob can be used:
import glob
filepath = glob.glob('mypath/11220/*.vcf')[0]
infile = open(logpath, "r")
or
import glob
filepath = glob.glob('mypath/1122*/*.vcf')[0]
infile = open(logpath, "r")
and so on.
Best of luck to you!

Related

Tweepy error with exporting array content

I am looking to extract tweets and write them to a CSV file, however, I cannot figure out how to get it to generate a file. I am using Tweepy to extract the tweets. I would like the CSV file to contain the following cells: User, date, tweet, likes, retweets, total, eng rate, rating, tweet id
import tweepy
import csv
auth = tweepy.OAuthHandler("", "")
auth.set_access_token("", "")
api = tweepy.API(auth)
try:
api.verify_credentials()
print("Authentication OK")
except:
print("Error during authentication")
def timeline(username):
tweets = api.user_timeline(screen_name=username, count = '100', tweet_mode="extended")
for status in (tweets):
eng = round(((status.favorite_count + status.retweet_count)/status.user.followers_count)*100, 2)
if (not status.retweeted) and ('RT #' not in status.full_text) and (eng <= 0.02):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: Low' + ',Tweet ID: ' + str(status.id))
elif (not status.retweeted) and ('RT #' not in status.full_text) and (0.02 < eng <= 0.09):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: Good' + ',Tweet ID: ' + str(status.id))
elif (not status.retweeted) and ('RT #' not in status.full_text) and (0.09 < eng <= 0.33):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: High' + ',Tweet ID: ' + str(status.id))
elif (not status.retweeted) and ('RT #' not in status.full_text) and (0.33 < eng):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: Very High' + ',Tweet ID: ' + str(status.id))
tweet = timeline("twitter")
with open('tweet.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow([tweet])
You can look at https://docs.python.org/3/library/csv.html for the info on how to generate a csv file in Python. Quick exmaple:
import csv
with open('some_output.csv', 'w') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["field1", "field2", "field3"])
Your function get_tweets does not return a value but you are trying to retrieve a value from that function which would result in None. Also it looks like tweet value will be list of strings. writerow method from csv.writer should get list of items and not list of lists. I have modified your code to address those issues. Let me know if it works.
def get_tweets(username):
tweets = api.user_timeline(screen_name=username, count=100)
tweets_for_csv = [tweet.text for tweet in tweets]
print(tweets_for_csv)
return tweets_for_csv
tweet = get_tweets("fazeclan")
with open('tweet.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(tweet)

How to put together datas into a file?

I would like to collect different type of datas into a file. Here is a part of the code.
val = str(float(data[-1]))
val_dB = float(val)
val_dB = math.log(val_dB, 10) * 10
myfile = open('../../../MLI_values/mli_value.txt', 'a')
myfile.write(date_ID + " " + val + val_dB + "\n")
myfile.close()
But it gives back an error:
myfile.write(date_ID + " " + val + val_dB + "\n")
TypeError: cannot concatenate 'str' and 'float' objects
How can I solve it to put them together? (into columns) into a file?
Change:
myfile.write(date_ID + " " + val + val_dB + "\n")
to:
myfile.write(date_ID + " " + val + " " + str(val_dB) + "\n")

for loop inside a string

I'm trying to write a script in python to make my job easier.
I need to use os.system to call some functions to an external software.
Is there a way to insert a for loop inside this string, without having to write obs_dir[n] every time??
import os
obs_dir = ['18185','18186','18187','19926','19987','19994','19995','20045','20046','20081']
xid = ['src21']
i=0
os.system("pset combine_spectra src_arfs=/"
+ obs_dir[0] + "/" + xid[i] + "_" + obs_dir[0] + "_spectrum.arf,"
+ "/" + obs_dir[1] + "/" + xid[i] + "_" + obs_dir[1] + "_spectrum.arf,"
+ "/" + obs_dir[2] + "/" + xid[i] + "_" + obs_dir[2] + "_spectrum.arf,"
+ "/" + obs_dir[3] + "/" + xid[i] + "_" + obs_dir[3] + "_spectrum.arf,"
+ "/" + obs_dir[4] + "/" + xid[i] + "_" + obs_dir[4] + "_spectrum.arf,"
+ "/" + obs_dir[5] + "/" + xid[i] + "_" + obs_dir[5] + "_spectrum.arf,"
+ "/" + obs_dir[6] + "/" + xid[i] + "_" + obs_dir[6] + "_spectrum.arf,"
+ "/" + obs_dir[7] + "/" + xid[i] + "_" + obs_dir[7] + "_spectrum.arf,"
+ "/" + obs_dir[8] + "/" + xid[i] + "_" + obs_dir[8] + "_spectrum.arf,"
+ "/" + obs_dir[9] + "/" + xid[i] + "_" + obs_dir[9] + "_spectrum.arf")
You can create the required command by first iterating over the list(obs_dir) and forming the string.
Ex:
import os
obs_dir = ['18185','18186','18187','19926','19987','19994','19995','20045','20046','20081']
xid = ['src21']
s = "pset combine_spectra src_arfs="
for i in obs_dir:
s += "/{0}/{1}_{0}_spectrum.arf, ".format(i, xid[0])
s = s.strip().rstrip(',')
print s
#os.system(s)
I think this might be what you want
import os
obs_dir = ['18185','18186','18187','19926','19987','19994','19995','20045','20046','20081']
xid = ['src21']
str_cmd = "pset combine_spectra src_arfs=" + obs_dir[0]
separator = ""
for dir in obs_dir
str_cmd + = separator + "/" + dir + "/" + xid[i] + "_" + dir + "_spectrum.arf"
separator = ","
os.system(str_cmd)
You have xid[i], but no i, so using xid[0],
"/{}/{}_{}_spectrum.arf".format(obs_dir[1],xid[0],obs_dir[1])
gives
'/18186/src21_18186_spectrum.arf'
So, format helps.
Also, join will help join these into a comma separated string:
",".join(['a', 'b'])
gives
'a,b'
Joining this together you get
s = ",".join(["/{}/{}_{}_spectrum.arf".format(o,xid[0],o) for o in obs_dir])
giving the parameter(s) you want
'/18185/src21_18185_spectrum.arf,/18186/src21_18186_spectrum.arf,/18187/src21_18g187_spectrum.arf,/19926/src21_19926_spectrum.arf,/19987/src21_19987_spectrum.arfg,/19994/src21_19994_spectrum.arf,/19995/src21_19995_spectrum.arf,/20045/src21_20g045_spectrum.arf,/20046/src21_20046_spectrum.arf,/20081/src21_20081_spectrum.arfg'
without a spare ',' on the end.
Then use it
os.system("pset combine_spectra src_arfs=" + s)
Not in the string, but we can build the string using features like list comprehension (in this case, a generator expression) and string joining:
obs_dir = ['18185','18186','18187','19926','19987','19994','19995','20045','20046','20081']
xid = ['src21']
i = 0
print("pset combine_spectra src_arfs=" +
",".join("/{0}/{1}_{0}_spectrum.arf".format(n,xid[i])
for n in obs_dir))

Python 2.7 IndexError: list index out of range, converting file

I have a code for convert Jmeter JTL FILE TO CSV, but when I run the code, I have the following error: IndexError: list index out of range in line 32
This is the code
import sys
import re
import datetime
import time
startTime = time.time()
cnt = 0
cnt2 = 0
failCnt = 0
reCompile = re.compile("\s([^\s]*?)=\"(.*?)\"")
delimiterCharacterOut = ","
def writeCSVLine(line):
x = reCompile.findall(line)
a = dict((row[0], row[1]) for row in x)
try:
a['ts1'] = str(int(int(a['ts'])/1000))
x = str(datetime.datetime.fromtimestamp(float(a['ts1'])))[0:19]
b = a['ts'] + ",\"" + x + "\"," + a['t'] + "," + a['lt'] + ",\"" + a['s'] + "\",\"" + a['lb'] + "\"," + a['rc'] + ",\"" + a['rm'] + "\",\"" + a['tn'] + "\",\"" + a['dt'] + "\"," + a['by'] + ",\"" + a['sc'] + "\"," + a['ec'] + ",\"" + a['ng'] + "\"," + a['na'] + ",\"" + a['hn'] + "\"," + a['in'] + "\n"
except:
return -1
o.write(b)
return 1
print "Splitting JTL file"
try:
runArgv = sys.argv #Save the command line
jtlInfile = str(sys.argv[1]) #Name of JTL input file
cvsOutfile = str(sys.argv[2]) # Name of CVS output file
reFilter = str(sys.argv[3]) # Filter the labels (lb) for the filter
except:
print "Error: Input format: <input file> <output file> <Filter by regular expression>"
raise
try:
f = open(jtlInfile, "r")
o = open(cvsOutfile, "w")
except:
raise
print "Filtering on regular expression : " + reFilter
cmpFilter = re.compile(reFilter)
# o.write("timestamp" + ",\""+ "datetime" + "\n")
o.write("timeStamp" + ",\"" + "datetime" + "\"," + "elapsed" + "," + "Latency" + ",\"" + "success" + "\",\"" + "label" + "\"," + "responseCode" + ",\"" + "responseMessage" + "\",\"" + "threadName"+ "\",\"" + "dataType" + "\"," + "bytes" + ",\"" + "SampleCount" + "\"," + "ErrorCount" + ",\"" + "grpThreads" + "\"," + "allThreads" + ",\"" + "Hostname" + "\"," + "IdleTime" + "\n")
for line in f:
try:
if cmpFilter.search(line):
returnVal = writeCSVLine(line)
if returnVal<0:
failCnt += 1
else:
cnt2 += 1
except:
print 'Error in line : ', cnt, line
raise
cnt += 1
endTime = time.time()
print "Time taken : ", str(endTime-startTime)
print "Lines processed : ", cnt
print "Lines that passed the filter : ", cnt2
print "Lines skipped (error?) : ", failCnt
f.close()
o.close()
Log de CMD
The base tutorial is in : http://balasoftwaretesting.blogspot.com/2012/03/converting-jmeter-jtl-file-to-csv-file.html?spref=bl
From the sys.argv docs, sys.argv is the list of command line arguments passed to a Python script.
Your command line log shows that you ran python JtltoCsv_Jmeter.py, which would result in an empty list for sys.argv. The tutorial provides a jtl file as an argument to JtltoCsv_Jmeter.py:
JtltoCsv_Jmeter.py C:\JtlToCsvConverter\input\sample.jtl
So it looks like maybe an error in copy/paste :)
Looking into the script, you need to pass 3 command line arguments:
Source JTL file
Target CSV file
Regular expression filter
So you need to execute the script like:
JtltoCsv_Jmeter.py example.jtl example.csv "(.*)"
Also there is an option to switch JMeter's results output format to CSV, in order to do so use one of the following steps:
Add jmeter.save.saveservice.output_format=csv line to user.properties file (lives under /bin folder of your JMeter installation)
Pass the property value via -J command line argument as:
jmeter -Jjmeter.save.saveservice.output_format=csv
See Apache JMeter Properties Customization Guide for more information on JMeter properties and ways of passing, setting and overriding them.

Python Directory traveling os.path

I try to create a program which can recursively traverse multiple directories and print the file listing in hieararchical way like :
Folder
----x.c
----x.bin
----Folder
---------x.c
I try to do with program like (with file/folders detail) :
#!/usr/bin/python
import os
for item in os.listdir(".") :
if os.path.isdir(item) :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile(item) :
print item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "Nothing \n"
But i can't go in any directory i try with like (A is a directory here) :
#!/usr/bin/python
import os
for item in os.listdir(".") :
if os.path.isdir(item) :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile(item):
print item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "Nothing"
for item in os.listdir("A") :
if os.path.isdir("A") :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile("A") :
print "--" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "Nothing"
The listing is wrong i don't understand why i can't just go from . to A and how to do it .And worst if i go on B (the second folder here):
#!/usr/bin/python
import os
for item in os.listdir(".") :
if os.path.isdir(item) :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile(item):
print item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "dunno"
for item in os.listdir("A") :
if os.path.isdir("A") :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile("A") :
print "--" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "lulz"
for item in os.listdir("A/B") :
if os.path.isfile("A/B") :
print "---" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print 'Nothing'
I think you want to use os.walk
for (cur, dirs, files) in os.walk('.'):
pass
This will give you the current directory, a list of directories in the current directory and a list of files in the current directory.
I think you want something like
for (cur, dirs, files) in os.walk('.'):
depth = len(cur.split('/'))
print "--" * depth, cur
for fname in files:
print "--" * (depth + 1), fname
Borrowed a bit from this answer: List directory tree structure using Python
import os
def list_files(path, spaceChar=' ', spaceWidth=4):
for root, dirs, files in os.walk(path):
level = root.replace(path, '').count(os.sep)
indent = spaceChar * (spaceWidth * level)
print('{}{}/'.format(indent, os.path.basename(root)))
subindent = spaceChar * spaceWidth * (level + 1)
for f in files:
print('{}{}'.format(subindent, f))
list_files(".", "-", 3)

Categories