If Else Range Search Python 3.5 - python

I am trying to use Python 3.5 to create a .xml document from csv. The .xml requires a specific schema, which I have been able to replicate through Python. My issue is that some of the schema needs to be changed, depending on input from the csv. If a specific product is listed in the csv, a couple of lines from the xml need to be left out. I have provided a basic example below; the issue lies in the middle of the code, where I try to call a defined variable string and do the if else statement on the range to eliminate the unneeded lines. No matter what I do the else is defaulted to; the if portion never solves true although the data in the source document matches the defined variable string, thus the two lines for the range are always left out -thanks in advance.
#! /usr/bin/env python
# coding= utf-8
import csv
csvFile = 'PRODUCT LIST.csv'
xmlFile = 'PRODUCT LIST.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
var1 = 'CocaCola'
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<ArrayOfProducts>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(0, 12):
tags[i] = tags[i].replace(' ', '_')
xmlData.write(' <Product>' + "\n")
xmlData.write(' <Name />' + "\n")
for i in range(0, 2):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
xmlData.write(' <List>' + "\n")
for i in range(2, 3):
xmlData.write(' <List ' + "p:type=" + '"' + row[i] + '"' + ' ' + "xmlns:p=" '"xsi"' '>' "\n")
for i in range(3, 7):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
if i in range(3,4) == var1:
for i in range(7, 9):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
else:
pass
xmlData.write(' <Supported>' + "\n")
for i in range (9, 10):
xmlData.write(' <Manufacturer ' + "p:type=" + '"' + row[i] + '"' + '>' "\n")
for i in range(10, 11):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
xmlData.write(' </Manufacturer>' + "\n")
xmlData.write(' </Supported>' + "\n")
for i in range(11, 12):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
xmlData.write(' </Manufacturer>' + "\n")
xmlData.write(' </List>' + "\n")
xmlData.write(' </Product>' + "\n")
rowNum +=1
xmlData.write('</ArrayOfProducts>' + "\n")
xmlData.close()

Related

Tweepy error with exporting array content

I am looking to extract tweets and write them to a CSV file, however, I cannot figure out how to get it to generate a file. I am using Tweepy to extract the tweets. I would like the CSV file to contain the following cells: User, date, tweet, likes, retweets, total, eng rate, rating, tweet id
import tweepy
import csv
auth = tweepy.OAuthHandler("", "")
auth.set_access_token("", "")
api = tweepy.API(auth)
try:
api.verify_credentials()
print("Authentication OK")
except:
print("Error during authentication")
def timeline(username):
tweets = api.user_timeline(screen_name=username, count = '100', tweet_mode="extended")
for status in (tweets):
eng = round(((status.favorite_count + status.retweet_count)/status.user.followers_count)*100, 2)
if (not status.retweeted) and ('RT #' not in status.full_text) and (eng <= 0.02):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: Low' + ',Tweet ID: ' + str(status.id))
elif (not status.retweeted) and ('RT #' not in status.full_text) and (0.02 < eng <= 0.09):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: Good' + ',Tweet ID: ' + str(status.id))
elif (not status.retweeted) and ('RT #' not in status.full_text) and (0.09 < eng <= 0.33):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: High' + ',Tweet ID: ' + str(status.id))
elif (not status.retweeted) and ('RT #' not in status.full_text) and (0.33 < eng):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: Very High' + ',Tweet ID: ' + str(status.id))
tweet = timeline("twitter")
with open('tweet.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow([tweet])
You can look at https://docs.python.org/3/library/csv.html for the info on how to generate a csv file in Python. Quick exmaple:
import csv
with open('some_output.csv', 'w') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["field1", "field2", "field3"])
Your function get_tweets does not return a value but you are trying to retrieve a value from that function which would result in None. Also it looks like tweet value will be list of strings. writerow method from csv.writer should get list of items and not list of lists. I have modified your code to address those issues. Let me know if it works.
def get_tweets(username):
tweets = api.user_timeline(screen_name=username, count=100)
tweets_for_csv = [tweet.text for tweet in tweets]
print(tweets_for_csv)
return tweets_for_csv
tweet = get_tweets("fazeclan")
with open('tweet.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(tweet)

How to put together datas into a file?

I would like to collect different type of datas into a file. Here is a part of the code.
val = str(float(data[-1]))
val_dB = float(val)
val_dB = math.log(val_dB, 10) * 10
myfile = open('../../../MLI_values/mli_value.txt', 'a')
myfile.write(date_ID + " " + val + val_dB + "\n")
myfile.close()
But it gives back an error:
myfile.write(date_ID + " " + val + val_dB + "\n")
TypeError: cannot concatenate 'str' and 'float' objects
How can I solve it to put them together? (into columns) into a file?
Change:
myfile.write(date_ID + " " + val + val_dB + "\n")
to:
myfile.write(date_ID + " " + val + " " + str(val_dB) + "\n")

How to select many folders and file to work on

the present script runs on two files within the same folder.
Each time I have to run a different case, I have to replace the path in the script with the right folder (i.e 11221 instead of 11220) and the right file names within that folder (i.e. 11221_S1 instead of 11220_S1) and run the script again.
Is there a way to make the script select all the folders contained in /mypath and inside that folder select the two files i need for the script to run? so i would not need to replace manually the text in the script for each file.
thank you in advance
Luca
import sys
infile=open("mypath/11220/11220_S1.vcf")
outfile=open('/mypath/11220/11220_S1.csv', 'w')
outfile2=open('/mypath/11220_S1.txt', 'w')
for line in infile:
data=line.split()
if data[0] == "#CHROM":
#print line
outfile.write(str(data[0]) + '\t' + str(data[1]) + '\t' +str(data[3]) + '\t' +str(data[4]) + '\t'+ str('SDP') + '\t'+ str('DP') + '\t'+ str('RD') + '\t'+ str('AD') + '\t'+ str('FREQ') + '\t'+ str('PVALUE') +'\t' + '\n')
outfile2.write(str("chrom") + '\t' + str("position") + '\n')
if data[0] == "chr17":
tag=data[9].split(":")
#print tag[3]
outfile.write(str(data[0]) + '\t' + str(data[1]) + '\t' +str(data[3]) + '\t' +str(data[4]) + '\t'+ str(tag[2]) + '\t'+ str(tag[3]) + '\t'+ str(tag[4]) + '\t'+ str(tag[5]) + '\t'+ str(tag[6]) + '\t'+ str(tag[7]) +'\t' + '\n')
outfile2.write(str(data[0]) + '\t' + str(data[1]) + '\n')
outfile.close()
outfile2.close()
infile=open("mypath/11220/11220_S2.vcf")
outfile=open('/mypath/11220/11220_S2.csv', 'w')
outfile2=open('/mypath/11220_S2.txt', 'w')
for line in infile:
data=line.split()
if data[0] == "#CHROM":
#print line
outfile.write(str(data[0]) + '\t' + str(data[1]) + '\t' +str(data[3]) + '\t' +str(data[4]) + '\t'+ str('SDP') + '\t'+ str('DP') + '\t'+ str('RD') + '\t'+ str('AD') + '\t'+ str('FREQ') + '\t'+ str('PVALUE') +'\t' + '\n')
outfile2.write(str("chrom") + '\t' + str("position") + '\n')
if data[0] == "chr17":
tag=data[9].split(":")
#print tag[3]
outfile.write(str(data[0]) + '\t' + str(data[1]) + '\t' +str(data[3]) + '\t' +str(data[4]) + '\t'+ str(tag[2]) + '\t'+ str(tag[3]) + '\t'+ str(tag[4]) + '\t'+ str(tag[5]) + '\t'+ str(tag[6]) + '\t'+ str(tag[7]) +'\t' + '\n')
outfile2.write(str(data[0]) + '\t' + str(data[1]) + '\n')
outfile.close()
outfile2.close()
Rather than hardcoding each file path, you could use glob (https://docs.python.org/2/library/glob.html) for Unix-like selections. Rough examples of how glob can be used:
import glob
filepath = glob.glob('mypath/11220/*.vcf')[0]
infile = open(logpath, "r")
or
import glob
filepath = glob.glob('mypath/1122*/*.vcf')[0]
infile = open(logpath, "r")
and so on.
Best of luck to you!

Converting CSV to XML

I'm currently trying to make the input file for a hydrologic model (HBV-light) compatible with external calibration software (PEST). HBV-light requires that it's input files be in XML format, while PEST can only read text files. My issue relates to writing a script that will automatically convert a parameter set written by PEST (in CSV format) to an XML file that can be read by HBV-light.
Here's a short example of a text file that can be written by PEST:
W,X,Y,Z
1,2,3,4
and this is how I'm attempting to organize the XML file:
<Parameters>
<GroupA>
<W>1</W>
<X>2</X>
</GroupA>
<GroupB>
<Y>3</Y>
<Z>4</Z>
</GroupB>
</Parameters>
I don't have very much programming experience whatsoever, but here is a python code that I wrote so far:
import csv
csvFile = 'myCSVfile.csv'
xmlFile = 'myXMLfile.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<Catchment xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">' + "\n")
xmlData.write('<CatchmentParamters>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(0, 2):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(0, 2):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
rowNum +=1
xmlData.write('</CatchmentParameters>' + "\n")
xmlData.write('<VegetationZone>' + "\n")
xmlData.write('<VegetationZoneParameters>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(3, 5):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(3, 5):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
rowNum +=1
xmlData.write('</VegetationZoneParameters>' + "\n")
xmlData.write('</VegetationZone>' + "\n")
xmlData.write('</Catchment>' + "\n")
xmlData.close()
I can get the Group A (or CathmentParameters specifically) to be written, but the Group B section is NOT being written. Not sure what to do!
I think that the loop is wrong.
Try if this works for you
#! /usr/bin/env python
# coding= utf-8
import csv
csvFile = 'myCSVfile.csv'
xmlFile = 'myXMLfile.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<Catchment xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">' + "\n")
xmlData.write('<CatchmentParamters>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(0, 2):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(0, 2):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</CatchmentParameters>' + "\n")
xmlData.write('<VegetationZone>' + "\n")
xmlData.write('<VegetationZoneParameters>' + "\n")
for i in range(2, 4):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</VegetationZoneParameters>' + "\n")
xmlData.write('</VegetationZone>' + "\n")
rowNum +=1
xmlData.write('</Catchment>' + "\n")
xmlData.close()
I think the issue is in your range definition in the second part... range(3, 5) means elements 4 and 5, what you want is probably range(2,4) meaning elements 3 and 4.
The problem is that you iterate over the contents of the csv file twice - it appears that you need to "rewind" after your first loop. There is also a minor indexing issue, with the second range needing to be range(2,4) and not range(3,5) as was already pointed out.
I created a piece of code that appears to work. It can probably be improved upon by people who understand Python properly. Note - I added a couple of print statements to convince myself I understood what is happening. If you don't open the csvFile a second time (at "starting the second for loop"), then no rows get printed. That's your clue that this is the problem.
import csv
csvFile = 'myCSVfile.csv'
xmlFile = 'myXMLfile.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<Catchment xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">' + "\n")
xmlData.write('<CatchmentParamters>' + "\n")
rowNum = 0
for row in csvData:
print "row is ", row
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(0, 2):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(0, 2):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
rowNum +=1
xmlData.write('</CatchmentParameters>' + "\n")
xmlData.write('<VegetationZone>' + "\n")
xmlData.write('<VegetationZoneParameters>' + "\n")
rowNum = 0
print "starting the second for loop"
csvData = csv.reader(open(csvFile))
for row in csvData:
print "row is now ", row
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(2, 4):
tags[i] = tags[i].replace(' ', '_')
else:
for i in range(2, 4):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
rowNum +=1
xmlData.write('</VegetationZoneParameters>' + "\n")
xmlData.write('</VegetationZone>' + "\n")
xmlData.write('</Catchment>' + "\n")
xmlData.close()
Using the above with the little test file you had given resulted in the following XML file:
<?xml version="1.0" encoding="utf-8"?>
<Catchment xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<CatchmentParamters>
<W>1</W>
<X>2</X>
</CatchmentParameters>
<VegetationZone>
<VegetationZoneParameters>
<Y>3</Y>
<Z>4</Z>
</VegetationZoneParameters>
</VegetationZone>
</Catchment>
Problem solved?

Python Directory traveling os.path

I try to create a program which can recursively traverse multiple directories and print the file listing in hieararchical way like :
Folder
----x.c
----x.bin
----Folder
---------x.c
I try to do with program like (with file/folders detail) :
#!/usr/bin/python
import os
for item in os.listdir(".") :
if os.path.isdir(item) :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile(item) :
print item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "Nothing \n"
But i can't go in any directory i try with like (A is a directory here) :
#!/usr/bin/python
import os
for item in os.listdir(".") :
if os.path.isdir(item) :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile(item):
print item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "Nothing"
for item in os.listdir("A") :
if os.path.isdir("A") :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile("A") :
print "--" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "Nothing"
The listing is wrong i don't understand why i can't just go from . to A and how to do it .And worst if i go on B (the second folder here):
#!/usr/bin/python
import os
for item in os.listdir(".") :
if os.path.isdir(item) :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile(item):
print item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "dunno"
for item in os.listdir("A") :
if os.path.isdir("A") :
print "-" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
elif os.path.isfile("A") :
print "--" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print "lulz"
for item in os.listdir("A/B") :
if os.path.isfile("A/B") :
print "---" + item + '\t' + str(os.stat(item).st_size) + "kb" + '\t' + str(os.stat(item).st_atime)
else :
print 'Nothing'
I think you want to use os.walk
for (cur, dirs, files) in os.walk('.'):
pass
This will give you the current directory, a list of directories in the current directory and a list of files in the current directory.
I think you want something like
for (cur, dirs, files) in os.walk('.'):
depth = len(cur.split('/'))
print "--" * depth, cur
for fname in files:
print "--" * (depth + 1), fname
Borrowed a bit from this answer: List directory tree structure using Python
import os
def list_files(path, spaceChar=' ', spaceWidth=4):
for root, dirs, files in os.walk(path):
level = root.replace(path, '').count(os.sep)
indent = spaceChar * (spaceWidth * level)
print('{}{}/'.format(indent, os.path.basename(root)))
subindent = spaceChar * spaceWidth * (level + 1)
for f in files:
print('{}{}'.format(subindent, f))
list_files(".", "-", 3)

Categories