Tweepy error with exporting array content - python

I am looking to extract tweets and write them to a CSV file, however, I cannot figure out how to get it to generate a file. I am using Tweepy to extract the tweets. I would like the CSV file to contain the following cells: User, date, tweet, likes, retweets, total, eng rate, rating, tweet id
import tweepy
import csv
auth = tweepy.OAuthHandler("", "")
auth.set_access_token("", "")
api = tweepy.API(auth)
try:
api.verify_credentials()
print("Authentication OK")
except:
print("Error during authentication")
def timeline(username):
tweets = api.user_timeline(screen_name=username, count = '100', tweet_mode="extended")
for status in (tweets):
eng = round(((status.favorite_count + status.retweet_count)/status.user.followers_count)*100, 2)
if (not status.retweeted) and ('RT #' not in status.full_text) and (eng <= 0.02):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: Low' + ',Tweet ID: ' + str(status.id))
elif (not status.retweeted) and ('RT #' not in status.full_text) and (0.02 < eng <= 0.09):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: Good' + ',Tweet ID: ' + str(status.id))
elif (not status.retweeted) and ('RT #' not in status.full_text) and (0.09 < eng <= 0.33):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: High' + ',Tweet ID: ' + str(status.id))
elif (not status.retweeted) and ('RT #' not in status.full_text) and (0.33 < eng):
print (status.user.screen_name + ',' + str(status.created_at) + ',' + status.full_text + ",Likes: " + str(status.favorite_count) + ",Retweets: " + str(status.retweet_count) + ',Total: ' + str(status.favorite_count + status.retweet_count) + ',Engagement rate: ' + str(eng) + '%' + 'Rating: Very High' + ',Tweet ID: ' + str(status.id))
tweet = timeline("twitter")
with open('tweet.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow([tweet])

You can look at https://docs.python.org/3/library/csv.html for the info on how to generate a csv file in Python. Quick exmaple:
import csv
with open('some_output.csv', 'w') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["field1", "field2", "field3"])

Your function get_tweets does not return a value but you are trying to retrieve a value from that function which would result in None. Also it looks like tweet value will be list of strings. writerow method from csv.writer should get list of items and not list of lists. I have modified your code to address those issues. Let me know if it works.
def get_tweets(username):
tweets = api.user_timeline(screen_name=username, count=100)
tweets_for_csv = [tweet.text for tweet in tweets]
print(tweets_for_csv)
return tweets_for_csv
tweet = get_tweets("fazeclan")
with open('tweet.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(tweet)

Related

Python Selenium Scraper error - ValueError: I/O operation on closed file

I am trying to run the following python code
Technology: Python, Selenium scraper
Device: Windows device
Getting error......
Traceback (most recent call last):
File "scraper.py", line 35, in for row in cp_url:
ValueError: I/O operation on closed file.
#!/usr/bin/python3
# Description: The Python code below will search selenium in Google.
import time
import csv
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
#EVERYTIME CHANGE THE DRIVER PATH TO THE CHROME DRIVER FOR LATEST CHROME VERSION
driver = webdriver.Chrome(
executable_path="D:\chromedriver.exe")
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
contents = []
filePath = 'output1.csv'
# As file at filePath is deleted now, so we should check if file
# exists or not not before deleting them
if os.path.exists(filePath):
os.remove(filePath)
else:
print("Can not delete the file as it doesn't exists")
f = open("output1.csv", "a")
f.write("website," + "htmltag," + "type," + "id," + "classname," + "for," + "href," + "alt," + "type," + "src,"
+ "name," + "width," + "height," + "data-src,"+ 'inner-text,' + 'action,' + 'value,' + "\n")
with open('inputLinks1.csv', 'rt') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
links = row[0]
contents.append(links)
driver.get(links)
with open('xpathtags.csv', 'rt') as cp2_csv:
cp_url2 = csv.reader(cp2_csv)
for row1 in cp_url2:
print(row[0])
(xtype, xpathtext) = row1[0].split(';')
print(xtype, xpathtext)
contents.append(xtype)
contents.append(xpathtext)
elems = driver.find_elements_by_xpath(xpathtext)
for elem in elems:
f = open('output1.csv', 'a', encoding='utf-8')
f.write( links + ", "+ xtype + ","
+ str(elem.get_attribute('type')) + ', '
+ str(elem.get_attribute('id')) + ', '
+ str(elem.get_attribute('class')) + ', '
+ str(elem.get_attribute('for')) + ', '
+ str(elem.get_attribute('href')) + ', '
+ str(elem.get_attribute('alt')) + ', '
+ str(elem.get_attribute('type')) + ', '
+ str(elem.get_attribute('src')) + ', '
+ str(elem.get_attribute('name')) + ', '
+ str(elem.get_attribute('width')) + ', '
+ str(elem.get_attribute('height')) + ', '
+ str(elem.get_attribute('data-src')) + ', '
+ str(elem.get_attribute('innerText').strip()) + ', '
+ str(elem.get_attribute('action')) + ', '
+ str(elem.get_attribute('value')) + ', '
+ '\n')
f.close()
driver.close()
I am using the following CSV files
A) inputlinks1.csv
www.flipkart.com
www.ebay.com
B) xpathtags.csv
Link;//a[#href]
Button;//button
Image;//img
Heading1;//h1
Heading2;//h2
Heading3;//h3
Heading4;//h4
C) Output.csv is a blank file
I am getting the following error
Traceback (most recent call last):
File "scraper.py", line 35, in <module>
for row in cp_url:
ValueError: I/O operation on closed file.
I can't test it but I think your problem is that you have wrong indentations
with open('inputLinks1.csv', 'rt') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
# ...rest...
so you run for-loop outside with...as... and with...as... automatically closes file.
You should run for-loop inside with...as...
with open('inputLinks1.csv', 'rt') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
# ...rest...
Or you could use standard open() and close()
cp_csv = open('inputLinks1.csv', 'rt')
cp_url = csv.reader(cp_csv)
for row in cp_url:
# ...rest...
cp_csv.close()
Had to make a few changes to your code to get it working.
After fixing indentation, it threw another error w.r.t inputlinks1.csv file.
Changed it to-
https://www.flipkart.com
https://www.ebay.com
And always try to use with open when handling files.
Code snippet:-
contents = []
filePath = 'output1.csv'
# As file at filePath is deleted now, so we should check if file
# exists or not not before deleting them
if os.path.exists(filePath):
os.remove(filePath)
else:
print("Can not delete the file as it doesn't exists")
with open("output1.csv", "a") as f:
f.write("website," + "htmltag," + "type," + "id," + "classname," + "for," + "href," + "alt," + "type," + "src,"
+ "name," + "width," + "height," + "data-src,"+ 'inner-text,' + 'action,' + 'value,' + "\n")
with open('inputLinks1.csv', 'r') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
links = row[0]
print(links)
contents.append(links)
driver.get(links)
with open('xpathtags.csv', 'r') as cp2_csv:
cp_url2 = csv.reader(cp2_csv)
for row1 in cp_url2:
print(row[0])
(xtype, xpathtext) = row1[0].split(';')
print(xtype, xpathtext)
contents.append(xtype)
contents.append(xpathtext)
elems = driver.find_elements_by_xpath(xpathtext)
for elem in elems:
with open('output1.csv', 'a', encoding='utf-8') as f:
f.write( links + ", "+ xtype + ","
+ str(elem.get_attribute('type')) + ', '
+ str(elem.get_attribute('id')) + ', '
+ str(elem.get_attribute('class')) + ', '
+ str(elem.get_attribute('for')) + ', '
+ str(elem.get_attribute('href')) + ', '
+ str(elem.get_attribute('alt')) + ', '
+ str(elem.get_attribute('type')) + ', '
+ str(elem.get_attribute('src')) + ', '
+ str(elem.get_attribute('name')) + ', '
+ str(elem.get_attribute('width')) + ', '
+ str(elem.get_attribute('height')) + ', '
+ str(elem.get_attribute('data-src')) + ', '
+ str(elem.get_attribute('innerText').strip()) + ', '
+ str(elem.get_attribute('action')) + ', '
+ str(elem.get_attribute('value')) + ', '
+ '\n')
driver.close()

Extract values ​from specific parts of a text file

I have a text file from which I need to extract some values, but several times I get more than one value between the beginning and the last line that starts with 0089 (CONVENIO) in the quadrant.
With my code I can only make it write in the txt the title I defined and it keeps repeating the first agreement found, but I need it to scroll through the text and bring me new information from the other quadrants.
I need the loop as it can have multiple covenants, for this reason I can't directly "anchor" the lines in a size forecast.
import re
import os
inicio = (' YM-INFRA-CASH MANAGMENT DEST.: 001-0001-CENTRAL ')
lista = []
contador = 3
banco = ' 0089'
convenio = ''
with open(caminho + '/Downloads/TESTE.txt', 'r') as arquivo:
for line in arquivo:
if line.strip() == inicio.strip():
localizar = arquivo.readlines()
inicio = localizar[contador]
van = inicio[13:17]
nomevan = inicio[20:50].strip()
inicio = localizar[contador + 1]
ag = inicio[13:17]
nomeag = inicio[20:50].strip()
inicio = localizar[contador + 2]
cliente = inicio[13:50].strip()
contadorum = 9
while localizar[contadorum][1:5] == '0033':
convenio = localizar[contadorum][1:22].strip()
narqrem = localizar[contadorum][22:34].strip()
bytesrem = localizar[contadorum][34:51].strip()
narqret = localizar[contadorum][51:63].strip()
bytesret = localizar[contadorum][63:81].strip()
totalbytes = localizar[contadorum][81:99].strip()
percrateio = localizar[contadorum][99:112].strip()
print(van, nomevan)
print(ag, nomeag)
print(cliente)
print(convenio, narqrem, bytesrem, narqret, bytesret, totalbytes, percrateio)
lista.append(convenio + narqrem + bytesrem + narqret + bytesret + totalbytes + percrateio +'\n')
with open(caminho + '/Downloads/testefim.txt', 'w') as consolidado:
consolidado.write('CONVENIO' + ';' + 'N ARQ REMES' + ';' + 'BYTES REMES' + ';' + 'N ARQ.RET.' + ';' + 'BYTES RET.' + ';' + 'TOTAL BYTES' + ';' + '% RATEIO' + '\n')
for linha in lista:
consolidado.write(convenio + ';' + narqrem + ';' + bytesrem + ';' + narqret + ';' + bytesret + ';' + totalbytes + ';' + percrateio + '\n')
consolidado.close()
else:
pass
arquivo.close()

How to put together datas into a file?

I would like to collect different type of datas into a file. Here is a part of the code.
val = str(float(data[-1]))
val_dB = float(val)
val_dB = math.log(val_dB, 10) * 10
myfile = open('../../../MLI_values/mli_value.txt', 'a')
myfile.write(date_ID + " " + val + val_dB + "\n")
myfile.close()
But it gives back an error:
myfile.write(date_ID + " " + val + val_dB + "\n")
TypeError: cannot concatenate 'str' and 'float' objects
How can I solve it to put them together? (into columns) into a file?
Change:
myfile.write(date_ID + " " + val + val_dB + "\n")
to:
myfile.write(date_ID + " " + val + " " + str(val_dB) + "\n")

python read all files from a folder and write the file name and other info into a txt file

I have 30911 html files. I need to do webscraping and then save the info into a txt file named index.txt.
It should look like
filename1, title, t1, date, p1
filename2, title, t1, date, p1
filename3, title, t1, date, p2
and so on...
I only want filename, but output gave me path+filename.
Your problem is that filename is filepath in reality, in order to get the filename you could use os module
os.path.basename('filepath')
so in order to write to the file:
indexFile.write(os.path.basename(filename)+ ', ' + title.get_text(strip=True) + ', '+ ticker.get_text(strip=True) + ', ' + d_date.get_text(strip=True) + ', ' + parti_names + '\n')
You can use:
path = 'C:/Users/.../.../output/'
#read html files
for filename in glob.glob(os.path.join(path, '*.html')):
soup = bs4.BeautifulSoup(open(filename).read(), "lxml")
title = soup.find('h1')
ticker = soup.find('p')
d_date = soup.find_all('div', {"id": "a-body"})[0].find_all("p")[2]
try:
def find_participant(tag):
return tag.name == 'p' and tag.find("strong", text=re.compile(r"Executives|Corporate Participants"))
participants = soup.find(find_participant)
parti_names = ""
for parti in participants.find_next_siblings("p"):
if parti.find("strong", text=re.compile(r"(Operator)")):
break
parti_names += parti.get_text(strip=True) + ","
except:
indexFile = open('C:/Users/.../output1/' + 'index.txt', 'a+')
indexFile.write(filename + ', ' + title.get_text(strip=True) + ', '+ ticker.get_text(strip=True) + ', ' + d_date.get_text(strip=True) + ', ' + 'No participants' + '\n')
else:
participants = soup.find(find_participant)
parti_names = ""
for parti in participants.find_next_siblings("p"):
if parti.find("strong", text=re.compile(r"(Operator)")):
break
parti_names += parti.get_text(strip=True) + ","
indexFile = open('C:/Users/.../output1/' + 'index.txt', 'a+')
indexFile.write(os.path.basename(filename) + ', ' + title.get_text(strip=True) + ', '+ ticker.get_text(strip=True) + ', ' + d_date.get_text(strip=True) + ', ' + parti_names + '\n')
indexFile.close()
ntpath is another module used to get base name from path.
>>> import ntpath
>>> ntpath.basename('C:/Users/.../output1/' + 'index.txt')
'index.txt'

How to encrypt a .bin file

I need to encrypt 3 .bin files which contain 2 keys for Diffie-Hellman. I have no clue how to do that, all I could think of was what I did in the following Python file. I have an example what the output should look like but my code doesn't seem to produce the right keys. The output file server.ini is used by a client to connect to a server.
import base64
fileList = [['game_key.bin', 'Game'], ['gate_key.bin', 'Gate'], ['auth_key.bin', 'Auth']]
iniList = []
for i in fileList:
file = open(i[0], 'rb')
n = list(file.read(64))
x = list(file.read(64))
file.close()
n.reverse()
x.reverse()
iniList.append(['Server.' + i[1] + '.N "' + base64.b64encode("".join(n)) + '"\n', 'Server.' + i[1] + '.X "' + base64.b64encode("".join(x)) + '"\n'])
iniList[0].append('\n')
#time for user Input
ip = '"' + raw_input('Hostname: ') + '"'
dispName = 'Server.DispName ' + '"' + raw_input('DispName: ') + '"' + '\n'
statusUrl = 'Server.Status ' + '"' + raw_input('Status URL: ') + '"' + '\n'
signupUrl = 'Server.Signup ' + '"' + raw_input('Signup URL: ') + '"' + '\n'
for l in range(1, 3):
iniList[l].append('Server.' + fileList[l][1] + '.Host ' + ip + '\n\n')
for l in [[dispName], [statusUrl], [signupUrl]]:
iniList.append(l)
outFile = open('server.ini', 'w')
for l in iniList:
for i in l:
outFile.write(i)
outFile.close()
The following was in my example file:
# Keys are Base64-encoded 512 bit RC4 keys, as generated by DirtSand's keygen
# command. Note that they MUST be quoted in the commands below, or the client
# won't parse them correctly!
I also tried it without inverting n and x

Categories