I'm using snscrape to scrape instagram. snscrape returns the data in tuple format but it creates the instagram data in a nested tuple. eg.
for b in enumerate(sninstagram.InstagramUserScraper(username='houston_2731').get_items()):
print[(b)]
output
(0, InstagramPost(url='https://www.instagram.com/p/CUdFfjEImHN/', date=datetime.datetime(2021, 9, 30, 17, 39, 20, tzinfo=datetime.timezone.utc), content='"Hardwork plus patience. A symbol of my sacrifice I\'m doing waiting." Nipsey Hussle \n\nIt\'s hard to believe what 5 months and a disciplined diet and hitting the gym hard can do. The first pic in the collage is me at a challenging point in my life. Depression and what not but I had to snap out of it and get in the gym and do the work. As I continue to embark on this fitness journey. I hope to inspire some to join me on this journey. \n\n#fitness #weightloss #muscles #gymmotivation #gymrat #intermittentfasting #fitnessmotivation #fitnessjourney #tenpercentbodyfat #shredded #fitnessgoals #hardwork #patience #discipline #dedication #hunger', thumbnailUrl='https://instagram.fjnb12-1.fna.fbcdn.net/v/t51.2885-15/243385646_584565779558058_6508985384396360110_n.webp?stp=dst-jpg_e35_s640x640_sh0.08&_nc_ht=instagram.fjnb12-1.fna.fbcdn.net&_nc_cat=106&_nc_ohc=nrtaOwxdg64AX8NQE-Z&edm=ABfd0MgBAAAA&ccb=7-4&oh=00_AT_xE-O75IP4MezdzoHM_WxAgbXiivb3aBFUMopAkxxJSA&oe=621D237E&_nc_sid=7bff83', displayUrl='https://instagram.fjnb12-1.fna.fbcdn.net/v/t51.2885-15/243385646_584565779558058_6508985384396360110_n.webp?stp=dst-jpg_e35&_nc_ht=instagram.fjnb12-1.fna.fbcdn.net&_nc_cat=106&_nc_ohc=nrtaOwxdg64AX8NQE-Z&edm=ABfd0MgBAAAA&ccb=7-4&oh=00_AT8JXpM2XKqA_d06LV10Qy_Jt1GYnvpjUEeVZZMRIdwgnQ&oe=621D237E&_nc_sid=7bff83', username='houston_2731', likes=1, comments=0, commentsDisabled=False, isVideo=False))
Now for this reason specifically this output cannot be inserted into the database because it creates a value error caused by the nested tuple because of its type. the database doesn't recognize its type and then fails. so now what I want to do is extract the nested tuple and use it as the main tuple. How do I go about doing that?
class insta():
def instagram(self):
dbname = '******'
user = '******'
password = '******'
host = '******'
port = ****
cur = None
conn = None
try:
conn = psycopg2.connect(
dbname = dbname,
user = user,
password = password,
host = host,
port = port
)
cur = conn.cursor()
cur.execute('DROP TABLE IF EXISTS Machine_instagram')
create_table = '''CREATE TABLE IF NOT EXISTS Machine_instagram (
id serial PRIMARY KEY,
url char,
date timestamp,
content char,
thumbnailUrl char,
displayUrl char,
username char,
likes int,
comments int,
commentsDisabled bool,
isVideo bool)'''
cur.execute(create_table)
for b in enumerate(sninstagram.InstagramUserScraper(username='houston_2731').get_items()):
insert_insta = 'INSERT INTO Machine_instagram (url, date, content,thumbnailUrl, displayUrl, username, likes, comments, commentsDisabled, isVideo) VALUES (%s, %s, %s, %s,%s, %s, %s, %s, %s, %s)'
insert_values = [(b)]
for records in insert_values:
cur.execute(insert_insta, records)
conn.commit()
print('completed')
except Exception as error:
print(error)
finally:
if cur is not None:
cur.close()
if conn is not None:
conn.close()
insta1 = insta()
insta1.instagram()
Related
I am writing a basic banking app to keep my skills fresh and am trying to implement a transaction table into the mix to of course keep track of transactions. This happens as I am trying to insert a new transaction programmatically using Python and psycopg[binary]. I have it the same as I do for creating new accounts and customers so I am at a loss for words after trying everything I can think of including using a formatted string for my parameters and the same problem was occurring. I will include a clip of the code that has the unexpected behavior as well as clips where the code is similar and follows the expected behavior. I even forced an error in the good code so that it would show that it has the data in a single tuple.
def create_transaction(self, transaction: Transaction) -> Transaction:
logging.info("Beginning DAL function create transaction")
sql = "insert into banking.transactions values (default, %s, %s, %s, %s) returning transaction_id;"
cursor = connection.cursor()
cursor.execute(sql, (transaction.account_id, transaction.transaction_type, transaction.amount, transaction.time_and_date))
connection.commit()
transaction_id = cursor.fetchone()[0]
transaction.transaction_id = transaction_id
logging.info("Finishing DAL function create transaction")
return transaction
def create_customer(self, customer: Customer) -> Customer:
logging.info("Beginning DAL function create customer")
sql = "insert into banking.customers values (default, %s, %s, %s, %s) returning customer_id;"
cursor = connection.cursor()
cursor.execute(sql, (customer.first_name, customer.last_name, customer.username, customer.password))
connection.commit()
customer_id = cursor.fetchone()[0]
customer.customer_id = customer_id
logging.info("Finishing DAL function create customer")
return customer
def create_account(self, account: BankAccount) -> BankAccount:
logging.info("Beginning DAL function create account")
sql = "insert into banking.bank_accounts values (default, %s, %s) returning account_id;"
cursor = connection.cursor()
cursor.execute(sql, (account.customer_id, account.balance))
connection.commit()
account_id = cursor.fetchone()[0]
account.account_id = account_id
logging.info("Finishing DAL function create account")
return account
query = 'insert into banking.bank_accounts values (default, %d, %s) returning account_id;'
params = (-1, 50.0)
query = 'insert into banking.transactions values (default, %s, %s, %s, %s) returning transaction_id;'
params = ((-1,), ('deposit',), (50.0,), '2022-11-28 21:56:22.000486')
def execute(
self: _Self,
query: Query,
params: Optional[Params] = None,
*,
prepare: Optional[bool] = None,
binary: Optional[bool] = None,
) -> _Self:
"""
Execute a query or command to the database.
"""
try:
with self._conn.lock:
self._conn.wait(
self._execute_gen(query, params, prepare=prepare, binary=binary)
)
except e.Error as ex:
raise ex.with_traceback(None)
E psycopg.errors.InvalidTextRepresentation: invalid input syntax for integer: "(-1)"
I have it the same as I do for creating new accounts and customers so I am at a loss for words after trying everything I can think of including using a formatted string for my parameters and the same problem was occurring. I have tried typecasting it to an integer before using the execute method but it happens within that line of code.
I'm using snscrape lib to scrape twitter data off twitter. I want to insert this data into my database but I seem to be failing no matter what method I try. when I use a loop and create a sql query after the loop to insert the values 1 by 1. I get an IndexError and a TypeError. When I try to append the data into a list. I can't loop in to each value 1 by 1. Now I'm stuck and don't know what to do.
method 1
class Tweet_list():
def tweets_list1(self):
dbname = '******'
user = '******'
password = '******'
host = '*******'
port = ****
cur = None
conn = None
try:
conn = psycopg2.connect(
dbname = dbname,
user = user,
password = password,
host = host,
port = port
)
cur = conn.cursor()
cur.execute('DROP TABLE IF EXISTS Machine_twitter')
create_table = '''CREATE TABLE IF NOT EXISTS Machine_twitter (
id int PRIMARY KEY,
Tweet text,
Tweet_id int,
Timestamp timestamp,
Replys int,
Retweets int,
Likes int,
Username char)'''
cur.execute(create_table)
for i, tweet in enumerate(sntwitter.TwitterSearchScraper('from:TheHoopCentral').get_items()):
if i > 5:
break
insert_tweet = 'INSERT INTO Machine_twitter (Tweet, Tweet_id, Timestamp, Replys, Retweets, Likes, Username) VALUES (%s, %s, %s, %s,%s, %s, %s)'
insert_values = (tweet.content, tweet.id, tweet.date, tweet.replyCount, tweet.retweetCount, tweet.likeCount, tweet.user.username)
cur.execute(insert_tweet, insert_values)
conn.commit()
print('completed')
except Exception as error:
print(error)
finally:
if cur is not None:
cur.close()
if conn is not None:
conn.close()
tweets = Tweet_list()
tweets2 = Tweet_list()
tweets2.tweets_list1()
error
IndexError: list index out of range
method 2
def update_list1(self):
tweets_list2 = []
for i, tweet in enumerate(sntwitter.TwitterSearchScraper('from:TheHoopCentral').get_items()):
if i > 100:
break
tweets_list2.append([tweet.content, tweet.id,tweet.likeCount, tweet.retweetCount, tweet.replyCount, tweet.user.username])
tweet_df = pd.DataFrame(tweets_list2, columns=('tweet', 'tweet id', 'likeCount', 'retweetCount', 'replyCount', 'username'))
tweet_df.head()
the problem with the second method is that after the list gets appended. I can't access the values(eg. tweet.content) so I can insert them into the database. I've tried every method under the sun but I'm failing miserably can somebody help.
I have been trying to return the values from two different tables, but can't seem to get the c.execute(query) function to return what I want it to. Currently my code will return the first c.fetchone()[0], but the second fetchone()[5] gives an error that it's out of range, which means it is probably still trying to get data from my 'clients' table which does not have 6 columns. I don't think I fully understand how MySQLdb works it's magic, but can't find any good examples of multi-table queries. Here is my code snippet below! Thanks!
c, conn = connection()
#check if already exists
x = c.execute("SELECT * FROM clients WHERE email = (%s)", (thwart(email),))
if int(x) > 0:
flash("That email already has an account, please try a new email or sign in.")
return render_template('register.html', form=form)
else:
c.execute("INSERT INTO clients (email, phone, password) VALUES (%s, %s, %s)", (thwart(email), thwart(phone), thwart(password)))
c.execute("SELECT cid FROM clients WHERE email = (%s)", (thwart(email),))
clientcid = c.fetchone()[0]
c.execute("INSERT INTO cpersonals (first_name, last_name, address, zip) VALUES (%s, %s, %s, %s)", (thwart(first_name), thwart(last_name), thwart(address), czip))
c.execute("SELECT reg_date FROM cpersonals WHERE cid = (%s)", (clientcid,))
reg_date = c.fetchone()[5]
rating = c.execute("SELECT rating FROM clients WHERE email = (%s)", (thwart(email),))
conn.commit()
flash("Thanks for registering!")
c.close()
conn.close()
Your query is SELECT reg_date FROM cpersonals .... You are only selecting one column. The reason fetchone()[5] fails is, there is no 6th column in the fetched record. Try 0 in place of 5.
Why were you using 5?
I am trying to store some TV information in a MySQLdb. I have tried about everything and I cannot get the variables to post. There is information in the variables as I am able to print the information.
My Code:
import pytvmaze
import MySQLdb
AddShow = pytvmaze.get_show(show_name='dexter')
MazeID = AddShow.maze_id
ShowName = "Show" + str(MazeID)
show = pytvmaze.get_show(MazeID, embed='episodes')
db = MySQLdb.connect("localhost","root","XXXXXXX","TVshows" )
cursor = db.cursor()
for episode in show.episodes:
Show = show.name
ShowStatus = show.status
ShowSummary = show.summary
Updated = show.updated
Season = episode.season_number
Episode = episode.episode_number
Title = episode.title
AirDate = episode.airdate
ShowUpdate = show.updated
EpisodeSummary = episode.summary
try:
sql = "INSERT INTO " + ShowName + " VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""" (Show,ShowStatus,ShowSummary,Updated,Season,Episode,Title,AirDate,ShowUpdate,EpisodeSummary)
cursor.execute(sql)
db.commit()
except:
db.rollback()
db.close()
Any thoughts? Thanks in advance.
EDIT - WORKING CODE
import pytvmaze
import MySQLdb
AddShow = pytvmaze.get_show(show_name='dexter')
MazeID = AddShow.maze_id
ShowNameandID = "Show" + str(MazeID)
show = pytvmaze.get_show(MazeID, embed='episodes')
db = MySQLdb.connect("localhost","root","letmein","TVshows" )
cursor = db.cursor()
for episode in show.episodes:
ShowName = show.name
ShowStatus = show.status
ShowSummary = show.summary
Updated = show.updated
Season = episode.season_number
Episode = episode.episode_number
Title = episode.title
AirDate = episode.airdate
ShowUpdate = show.updated
EpisodeSummary = episode.summary
sql = "INSERT INTO " + ShowNameandID + """ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
cursor.execute(sql, (ShowName, ShowStatus, ShowSummary, Updated, Season, Episode, Title, AirDate, ShowUpdate, EpisodeSummary))
db.commit()
print sql ##Great for debugging
db.close()
First of all, you've actually made things more difficult for yourself by catching all the exceptions via bare try/expect and then silently rolling back. Temporarily remove the try/except and see what the real error is, or log the exception in the except block. I bet the error would be related to a syntax error in the query since you would miss the quotes around the column value(s).
Anyway, arguably the biggest problem you have is how you pass the variables into the query. Currently, you are using string formatting, which is highly not recommended because of the SQL injection attack danger and problems with type conversions. Parameterize your query:
sql = """
INSERT INTO
{show}
VALUES
(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
""".format(show=Show)
cursor.execute(sql, (ShowStatus, ShowSummary, Updated, Season, Episode, Title, AirDate, ShowUpdate, EpisodeSummary))
Note that it is not possible to parameterize the table name (Show in your case) - we are using string formatting for it - make sure you either trust your source, or escape it manually via MySQLdb.escape_string(), or validate it with a separate custom code.
I'm new to Python (learnt how to code with it in 2 days ago). I'm trying to get feeds from MySQL database and insert theme into other table. But nothing inserted.
Here is my code:
cnx = MySQLConnection(**db_config)
if cnx.is_connected():
print("Database connected successfully...")
cursor = cnx.cursor(dictionary=True)
cursor.execute("SELECT * from external_feeds WHERE discipline = 'ALL' AND actif = 1")
rows = cursor.fetchall()
insert_feed = ("INSERT INTO feeds "
"(categorie, urlflux, titreflux, photonews, textnews, date, titrenews, liensnews, slug, photo)"
"VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
for row in rows:
feed = feedparser.parse(row["url"])
feed_link = row["url"]
name = row["name"]
image = row["photo"]
category = row["discipline"]
x = len(feed.entries)
for i in range(x):
feed_title = feed.entries[i].title
print feed_title
feed_url = feed.entries[i].link
print feed_url
feed_published = feed.entries[i].published
dPubPretty = strftime(feed_published, gmtime())
feed_description = feed.entries[i].description
slug = re.sub('[^a-zA-Z0-9 \n\-]', '', feed_url)
slug = slug.replace('httpwww', '')
slug = slug.replace('http', '')
# print insert_feed
data_feed = (category, feed_link, name, None, feed_description, dPubPretty, feed_title, feed_url, slug, image)
try:
cursor.execute(insert_feed, data_feed)
cursor.commit()
except:
cnx.rollback()
cursor.close()
Is there anyone who can help me figure out where the problem is? I am completly new to this so I'm totally lost
I see that you are performing 'cursor.commit()' after inserting the data, which is incorrect, try using 'cnx.commit()'.