I'm using snscrape lib to scrape twitter data off twitter. I want to insert this data into my database but I seem to be failing no matter what method I try. when I use a loop and create a sql query after the loop to insert the values 1 by 1. I get an IndexError and a TypeError. When I try to append the data into a list. I can't loop in to each value 1 by 1. Now I'm stuck and don't know what to do.
method 1
class Tweet_list():
def tweets_list1(self):
dbname = '******'
user = '******'
password = '******'
host = '*******'
port = ****
cur = None
conn = None
try:
conn = psycopg2.connect(
dbname = dbname,
user = user,
password = password,
host = host,
port = port
)
cur = conn.cursor()
cur.execute('DROP TABLE IF EXISTS Machine_twitter')
create_table = '''CREATE TABLE IF NOT EXISTS Machine_twitter (
id int PRIMARY KEY,
Tweet text,
Tweet_id int,
Timestamp timestamp,
Replys int,
Retweets int,
Likes int,
Username char)'''
cur.execute(create_table)
for i, tweet in enumerate(sntwitter.TwitterSearchScraper('from:TheHoopCentral').get_items()):
if i > 5:
break
insert_tweet = 'INSERT INTO Machine_twitter (Tweet, Tweet_id, Timestamp, Replys, Retweets, Likes, Username) VALUES (%s, %s, %s, %s,%s, %s, %s)'
insert_values = (tweet.content, tweet.id, tweet.date, tweet.replyCount, tweet.retweetCount, tweet.likeCount, tweet.user.username)
cur.execute(insert_tweet, insert_values)
conn.commit()
print('completed')
except Exception as error:
print(error)
finally:
if cur is not None:
cur.close()
if conn is not None:
conn.close()
tweets = Tweet_list()
tweets2 = Tweet_list()
tweets2.tweets_list1()
error
IndexError: list index out of range
method 2
def update_list1(self):
tweets_list2 = []
for i, tweet in enumerate(sntwitter.TwitterSearchScraper('from:TheHoopCentral').get_items()):
if i > 100:
break
tweets_list2.append([tweet.content, tweet.id,tweet.likeCount, tweet.retweetCount, tweet.replyCount, tweet.user.username])
tweet_df = pd.DataFrame(tweets_list2, columns=('tweet', 'tweet id', 'likeCount', 'retweetCount', 'replyCount', 'username'))
tweet_df.head()
the problem with the second method is that after the list gets appended. I can't access the values(eg. tweet.content) so I can insert them into the database. I've tried every method under the sun but I'm failing miserably can somebody help.
This is the first time I'm creating an API for android retrofit. I modified this code according to the snippet I got online. The main functionality of the post method is to take the given parameters and store it in the sqlite3 database.
My schema of the following two tables:
sqlite> .schema spending
CREATE TABLE spending(
ID INTEGER PRIMARY KEY AUTOINCREMENT,
date TEXT ,
reason TEXT ,
amount INTEGER
);
CREATE TABLE receiving(
ID INTEGER PRIMARY KEY AUTOINCREMENT,
date TEXT ,
from_reason TEXT ,
amount INTEGER
);
from flask import Flask, request
from flask_restful import Resource, Api
from sqlalchemy import create_engine
from flask import jsonify
db_connect = create_engine('sqlite:///api.db')
app = Flask(__name__)
api = Api(app)
class AddSpending(Resource):
def add_spending(self):
try:
_json = request.json
_date = _json['date']
_reason = _json['reason']
_amount = _json['amount']
# validate the received values
if _date and _reason and _amount and request.method == 'POST':
#do not save password as a plain text
#_hashed_password = generate_password_hash(_password)
# save edits
sql = "INSERT INTO spending(date, reason, amount) VALUES(%s, %s, %d)"
data = (_date, _reason, _amount)
#conn = mysql.connect()
conn = db_connect.connect()
cursor = db_connect.cursor()
conn.cursor()
conn.execute(sql, data)
conn.commit()
#resp = jsonify('Spending added successfully!')
#resp.status_code = 200
return
else:
return 404
except Exception as e:
print(e)
finally:
cursor.close()
conn.close()
api.add_resource(AddSpending, '/spending_up',methods=['POST']) # Route_3
When a user passes data through this parameter. The data should be stored in the database
I think the problem is that you called you method as add_spending and shoud be named as post
change def add_spending(self) by def post(self)
the code for your api should look like that, without the methods='POST'
class AddSpending(Resource):
def post(self):
try:
_json = request.json
_date = _json['date']
_reason = _json['reason']
_amount = _json['amount']
# validate the received values
if _date and _reason and _amount and request.method == 'POST':
#do not save password as a plain text
#_hashed_password = generate_password_hash(_password)
# save edits
sql = "INSERT INTO spending(date, reason, amount) VALUES(%s, %s, %d)"
data = (_date, _reason, _amount)
#conn = mysql.connect()
conn = db_connect.connect()
cursor = db_connect.cursor()
conn.cursor()
conn.execute(sql, data)
conn.commit()
#resp = jsonify('Spending added successfully!')
#resp.status_code = 200
return
else:
return 404
except Exception as e:
print(e)
finally:
cursor.close()
conn.close()
api.add_resource(AddSpending, '/spending_up') # Route_3
UPDATE
I just tried with a code similar to yours and worked
ANOTHER UPDATE
your repo code
Though urls is properly defined, I do keep getting "global name 'urls' is not defined" and the url data is not inserted into MYSQL. Any suggestions on where? I'm making mistake here?
# ! /usr/bin/python
# Description : This script can collect the URLs from Tweets and Records them into research MYSQL DB.
from __future__ import print_function
import tweepy
import json
import MySQLdb
from dateutil import parser
WORDS = ['security']
# CREDENTAILS
CONSUMER_KEY = ""
CONSUMER_SECRET = ""
ACCESS_TOKEN = ""
ACCESS_TOKEN_SECRET = ""
HOST = "192.168.150.94"
USER = "root"
PASSWD = "blah"
DATABASE = "tweets"
def store_data(tweet_url):
db = MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE,
charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO tweet_url (urls) VALUES (%s)"
cursor.execute(insert_query, (urls))
db.commit()
cursor.close()
db.close()
return
class StreamListener(tweepy.StreamListener):
def on_connect(self):
print("We are now connected to the streaming API.")
def on_error(self, status_code):
print('An Error has occured: ' + repr(status_code))
return False
def on_data(self, data):
try:
datajson = json.loads(data)
web_url = datajson['entities']['urls']
print(web_url)
for i in web_url:
web_urls = i['expanded_url']
urls = web_urls
print(urls)
store_data(urls)
except Exception as e:
print(e)
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
listener = StreamListener(api=tweepy.API(wait_on_rate_limit=True))
streamer = tweepy.Stream(auth=auth, listener=listener)
print("Tracking: " + str(WORDS))
streamer.filter(track=WORDS)
You just need to rename the parameter urls in the function store_data to tweet_url
def store_data(tweet_url):
db = MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE,
charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO tweet_url (urls) VALUES (%s)"
cursor.execute(insert_query, (tweet_url))
The way you want to store data stays unclear. If you call store_data after the loop, it's only storing the last value, you should better store each value in a list:
def on_data(self, data):
try:
datajson = json.loads(data)
web_url = datajson['entities']['urls']
print(web_url)
urls = []
for i in web_url:
urls.append((i['expanded_url'],))
# stores a tuple to make it easy in the database insertion
print(urls)
store_data(urls)
except:
[...]
This way need another little fix inside store_data:
def store_data(urls):
db = MySQLdb.connect(host=HOST, user=USER, passwd=PASSWD, db=DATABASE,
charset="utf8")
cursor = db.cursor()
insert_query = "INSERT INTO tweet_url (urls) VALUES (%s)"
cursor.executemany(insert_query, urls)
db.commit()
cursor.close()
db.close()
return
Inside your function store_data() you are using urls which is not defined because what you pass to your function is tweet_url instead.
You need to either change your function argument to urls instead of tweet_url like this:
def store_data(urls):
# ...
Or change urls to tweet_url in your function body:
# ...
cursor.execute(insert_query, (tweet_url))
# ...
And make sure you fix the indentation inside on_data() method as below:
class StreamListener(tweepy.StreamListener):
# ...
def on_data(self, data):
try:
datajson = json.loads(data)
web_url = datajson['entities']['urls']
print(web_url)
for i in web_url:
web_urls = i['expanded_url']
urls = web_urls
print(urls)
store_data(urls)
except Exception as e:
print(e)
Getting an IndexError: list index out of range Error. New to Python, complete beginner and would appreciate some help to understand whats wrong.
Getting json from a timesheet api which i need to save to a mysql database
import requests
import urllib2
from urllib2 import urlopen
import json
import mysql.connector
site = 'https://api.site.com/Projects/?version=5'
hdr = {'Authorization': 'WRAP access_token="TOKEN"', 'Accept': 'application/json'}
req = urllib2.Request(site, headers=hdr)
try:
page = urllib2.urlopen(req)
except urllib2.HTTPError, e:
print e.fp.read()
response = urllib2.urlopen(req).read()
json_obj = json.loads(response.decode ('utf8'))
conn = mysql.connector.connect(host = "HOST_IP",
user = "USER",
passwd = "PASS",
db = "DB",
port=3306,
collation="utf8mb4_unicode_ci")
cursor = conn.cursor ()
for Project in json_obj["Projects"]:
cursor.execute("""
INSERT INTO project
(ID, Client_Name, Name, BusSector, ProjCat, SageCode)
VALUES
(%s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
Name = VALUES(Client_Name),
Name = VALUES(Name),
Name = VALUES(BusSector),
Name = VALUES(ProjCat),
Name = VALUES(SageCode);
""",(Project["ID"],Project["Client"]["Name"],Project["Name"],Project["CustomFields"][0]["Values"][0],Project["CustomFields"][2]["Values"][0],Project["CustomFields"][1]["Values"][0])
)
conn.commit()
cursor.close()
conn.close()
The output from Traceback is:
line 52, in <module> """,(Project["ID"],Project["Client"]["Name"],Project["Name"],Project["CustomFields"][0]["Values"][0],Project["CustomFields"][2]["Values"][0],Project["CustomFields"][1]["Values"][0])
I am using the following script to pull data from a third party tool, create a table in a MySQL database and populate it with the resulting data. The script runs through and I can see the print out of all of the requested data in the Python Shell window. However, when I open the database the table is created with the column names but there are no rows and no data. I have searched around and read that I do not need to use 'conn.commit' for a script that is just retrieving data. Is that the case here? If not does anyone see another reason why the data is not populating the table?
import httplib2, urllib, json, pprint, getpass, string, time, MySQLdb
def usage():
print "Usage: python26 mysql.py or ./mysql.py"
sys.exit(1)
if len(sys.argv) != 1:
usage()
# Connect to the database and create the tables
conn = MySQLdb.connect (host = "localhost",
user = "XXXXXXXXX",
passwd = "XXXXXXXX")
cursor = conn.cursor ()
cursor.execute ("DROP DATABASE IF EXISTS tenable")
cursor.execute ("CREATE DATABASE tenable")
cursor.execute ("USE tenable")
cursor.execute ("""
CREATE TABLE cumvulndata
(
offset BIGINT(10),
pluginName TEXT,
repositoryID SMALLINT(3),
severity TINYINT(2),
pluginID MEDIUMINT(8),
hasBeenMitigated TINYINT(1),
dnsName VARCHAR(255),
macAddress VARCHAR(40),
familyID INT(4),
recastRisk TINYINT(1),
firstSeen DATETIME,
ip VARCHAR(15),
acceptRisk TINYINT(1),
lastSeen DATETIME,
netbiosName VARCHAR(255),
port MEDIUMINT(5),
pluginText MEDIUMTEXT,
protocol TINYINT(3)
)
""")
#
# Security Center organizational user creds
user = 'XXXXXXXXX'
passwd = 'XXXXXXXX'
url = 'https://Security Center Server/request.php'
def SendRequest(url, headers, data):
http = httplib2.Http()
response, content = http.request(url,
'POST',
headers=headers,
body=urllib.urlencode(data))
if 'set-cookie' in response:
headers['Cookie'] = response['set-cookie']
return response, content
headers = {"Content-type": "application/x-www-form-urlencoded"}
input = {'password': passwd,
'username': user}
# Convert input to login JSON
inputjson = json.dumps(input)
data = {"request_id": "8",
"module": "auth",
"action": "login",
"input": inputjson}
# Send Login Request
response, content = SendRequest(url, headers, data)
# Decode JSON to python data structure
result = json.loads(content)
if result["error_code"] == 0:
print "SC4 Login Successful"
token = result['response']['token']
print "Session Token:",token
# Construct the cumulative vuln query JSON
cuminput = {'tool':'vulndetails',
'startOffset':'0',
'endOffset':sys.maxint,
'sortField':'ip',
'sortDir':'asc',
'sourceType':'cumulative',
'filters': [
{'filterName':'lastSeen',
'value':'31',
'operator':'<='},
{"filterName":"severity",
"value":"1,2,3",
"operator":"="}
]}
cuminputjson = json.dumps(cuminput)
#
cumdata = {"request_id": "1",
"module": "vuln",
"action": "query",
"input":cuminputjson,
"token": token}
# Send the cumulative JSON and then populate the table
cumresponse, content = SendRequest(url, headers, cumdata)
resultc = json.loads(content)
off = 0
print "\nFilling cumvulndata table with vulnerabilities from the cumulative database. Please wait..."
for result in resultc['response']['results']:
off += 1
cursor.execute ("""INSERT INTO cumvulndata (offset,pluginName,repositoryID,severity,pluginID,hasBeenMitigated,dnsName,macAddress,familyID,recastRisk,firstSeen,ip,acceptRisk,lastSeen,netbiosName,port,pluginText,protocol)
VALUES
(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,(FROM_UNIXTIME(%s)),%s,%s,(FROM_UNIXTIME(%s)),%s,%s,%s,%s)""", (off,result["pluginName"],result["repositoryID"],result["severity"],result["pluginID"],result["hasBeenMitigated"],result["dnsName"],result["macAddress"],result["familyID"],result["recastRisk"],result["firstSeen"],result["ip"],result["acceptRisk"],result["lastSeen"],result["netbiosName"],result["port"],result["pluginText"],result["protocol"]))
# Close the cursor and connection
cursor.close ()
conn.close ()
print "Done!!"
Try this
import httplib2, urllib, json, pprint, getpass, string, time, MySQLdb
import sys
def usage():
print "Usage: python26 mysql.py or ./mysql.py"
sys.exit(1)
if len(sys.argv) != 1:
usage()
# Connect to the database and create the tables
conn = MySQLdb.connect (host = "localhost",
user = "XXXXXXXXXX",
passwd = "XXXXXXXX")
cursor = conn.cursor ()
cursor.execute ("DROP DATABASE IF EXISTS tenable")
cursor.execute ("CREATE DATABASE tenable")
cursor.execute ("USE tenable")
cursor.execute ("""
CREATE TABLE cumvulndata
(
offset BIGINT(10),
pluginName TEXT,
repositoryID SMALLINT(3),
severity TINYINT(2),
pluginID MEDIUMINT(8),
hasBeenMitigated TINYINT(1),
dnsName VARCHAR(255),
macAddress VARCHAR(40),
familyID INT(4),
recastRisk TINYINT(1),
firstSeen DATETIME,
ip VARCHAR(15),
acceptRisk TINYINT(1),
lastSeen DATETIME,
netbiosName VARCHAR(255),
port MEDIUMINT(5),
pluginText MEDIUMTEXT,
protocol TINYINT(3)
)
""")
cursor.execute ("""INSERT INTO cumvulndata (offset,pluginName,repositoryID,severity,pluginID,hasBeenMitigated,dnsName,macAddress,familyID,recastRisk,firstSeen,ip,acceptRisk,lastSeen,netbiosName,port,pluginText,protocol)
VALUES
(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""", ('123','plugin','10','1','12','1',"dnsName","macAddress",'15','1','2011-2-2',"ip",'9','2012-5-2',"netbiosName",'123',"pluginText","2"))
#Commit the changes.
conn.commit()
cursor.close()
conn.close()
Please commit the changes then you will get the inserted data.