How can I put from how much to how much data to enter into my mysql database, api limits me to 1000 per day. So I would like if I enter 1000 for one session so that I can continue from 1000 and up and so on until I enter everything
mysql=mysql.connector.connect(
host = host_input,
user = user_input,
passwd = passwd_input,
database = database_input
)
with open('sadrzaj.json') as json_file:
data = json.load(json_file)
for p in data['filmovi']:
naslov = p['naslov']
iframe = p['iframe']
opis = p['opis']
movie = GetMovie(title=naslov, api_key='5f8abea5', plot='full')
info = movie.get_data('Title', 'imdbRating', 'Genre', 'Year', 'Runtime', 'Country', 'Plot', 'Poster', 'Type', 'Language')
imdbRating = info["imdbRating"]
genre = info["Genre"]
year = info["Year"]
runtime = info["Runtime"]
country = info["Country"]
poster = info["Poster"]
typee = info['Type']
language = info["Language"]
mycursor = mysql.cursor()
sql = "INSERT INTO serije_filmovi(naslov, iframe, opis, imdbRating, genre, years, runtime, country, poster, typee, language) values(%s,%s, %s, %s, %s, %s,%s, %s, %s, %s, %s)"
val = (naslov, iframe, opis, imdbRating, genre, year, runtime, country, poster, typee, language)
print(naslov)
mycursor.execute(sql,val)
mysql.commit();
Related
I'm using snscrape to scrape instagram. snscrape returns the data in tuple format but it creates the instagram data in a nested tuple. eg.
for b in enumerate(sninstagram.InstagramUserScraper(username='houston_2731').get_items()):
print[(b)]
output
(0, InstagramPost(url='https://www.instagram.com/p/CUdFfjEImHN/', date=datetime.datetime(2021, 9, 30, 17, 39, 20, tzinfo=datetime.timezone.utc), content='"Hardwork plus patience. A symbol of my sacrifice I\'m doing waiting." Nipsey Hussle \n\nIt\'s hard to believe what 5 months and a disciplined diet and hitting the gym hard can do. The first pic in the collage is me at a challenging point in my life. Depression and what not but I had to snap out of it and get in the gym and do the work. As I continue to embark on this fitness journey. I hope to inspire some to join me on this journey. \n\n#fitness #weightloss #muscles #gymmotivation #gymrat #intermittentfasting #fitnessmotivation #fitnessjourney #tenpercentbodyfat #shredded #fitnessgoals #hardwork #patience #discipline #dedication #hunger', thumbnailUrl='https://instagram.fjnb12-1.fna.fbcdn.net/v/t51.2885-15/243385646_584565779558058_6508985384396360110_n.webp?stp=dst-jpg_e35_s640x640_sh0.08&_nc_ht=instagram.fjnb12-1.fna.fbcdn.net&_nc_cat=106&_nc_ohc=nrtaOwxdg64AX8NQE-Z&edm=ABfd0MgBAAAA&ccb=7-4&oh=00_AT_xE-O75IP4MezdzoHM_WxAgbXiivb3aBFUMopAkxxJSA&oe=621D237E&_nc_sid=7bff83', displayUrl='https://instagram.fjnb12-1.fna.fbcdn.net/v/t51.2885-15/243385646_584565779558058_6508985384396360110_n.webp?stp=dst-jpg_e35&_nc_ht=instagram.fjnb12-1.fna.fbcdn.net&_nc_cat=106&_nc_ohc=nrtaOwxdg64AX8NQE-Z&edm=ABfd0MgBAAAA&ccb=7-4&oh=00_AT8JXpM2XKqA_d06LV10Qy_Jt1GYnvpjUEeVZZMRIdwgnQ&oe=621D237E&_nc_sid=7bff83', username='houston_2731', likes=1, comments=0, commentsDisabled=False, isVideo=False))
Now for this reason specifically this output cannot be inserted into the database because it creates a value error caused by the nested tuple because of its type. the database doesn't recognize its type and then fails. so now what I want to do is extract the nested tuple and use it as the main tuple. How do I go about doing that?
class insta():
def instagram(self):
dbname = '******'
user = '******'
password = '******'
host = '******'
port = ****
cur = None
conn = None
try:
conn = psycopg2.connect(
dbname = dbname,
user = user,
password = password,
host = host,
port = port
)
cur = conn.cursor()
cur.execute('DROP TABLE IF EXISTS Machine_instagram')
create_table = '''CREATE TABLE IF NOT EXISTS Machine_instagram (
id serial PRIMARY KEY,
url char,
date timestamp,
content char,
thumbnailUrl char,
displayUrl char,
username char,
likes int,
comments int,
commentsDisabled bool,
isVideo bool)'''
cur.execute(create_table)
for b in enumerate(sninstagram.InstagramUserScraper(username='houston_2731').get_items()):
insert_insta = 'INSERT INTO Machine_instagram (url, date, content,thumbnailUrl, displayUrl, username, likes, comments, commentsDisabled, isVideo) VALUES (%s, %s, %s, %s,%s, %s, %s, %s, %s, %s)'
insert_values = [(b)]
for records in insert_values:
cur.execute(insert_insta, records)
conn.commit()
print('completed')
except Exception as error:
print(error)
finally:
if cur is not None:
cur.close()
if conn is not None:
conn.close()
insta1 = insta()
insta1.instagram()
I'm wring a web scraping program to collect data from truecar.com
my database has 3 columns
and when I run the program I get an error which is this : list indext out of range
here is what I've done so far:
import mysql.connector
from bs4 import BeautifulSoup
import requests
import re
# take the car's name
requested_car_name = input()
# inject the car's name into the URL
my_request = requests.get('https://www.truecar.com/used-cars-for-sale/listings/' +
requested_car_name + '/location-holtsville-ny/?sort[]=best_match')
my_soup = BeautifulSoup(my_request.text, 'html.parser')
# ************ car_model column in database ******************
car_model = my_soup.find_all(
'span', attrs={'class': 'vehicle-header-make-model text-truncate'})
# we have a list of car models
car_list = []
for item in range(20):
# appends car_model to car_list
car_list.append(car_model[item].text)
car_string = ', '.join('?' * len(car_list))
# ************** price column in database *****************************
price = my_soup.find_all(
'div', attrs={'data-test': 'vehicleCardPricingBlockPrice'})
price_list = []
for item in range(20):
# appends price to price_list
price_list.append(price[item].text)
price_string = ', '.join('?' * len(price_list))
# ************** distance column in database ***************************
distance = my_soup.find_all('div', attrs={'data-test': 'vehicleMileage'})
distance_list = []
for item in range(20):
# appends distance to distance_list
distance_list.append(distance[item].text)
distance_string = ', '.join('?' * len(distance_list))
# check the connection
print('CONNECTING ...')
mydb = mysql.connector.connect(
host="xxxxx",
user="xxxxxx",
password="xxxxxx",
port='xxxxxx',
database='xxxxxx'
)
print('CONNECTED')
# checking the connection is done
my_cursor = mydb.cursor(buffered=True)
insert_command = 'INSERT INTO car_name (car_model, price, distance) VALUES (%s, %s, %s);' % (car_string, price_string, distance_string)
# values = (car_string, price_string, distance_string)
my_cursor.execute(insert_command, car_list, price_list, distance_list)
mydb.commit()
print(my_cursor.rowcount, "Record Inserted")
mydb.close()
and I have another problem that I can't insert a list into my columns and I have tried many ways but unfortunately I wasn't able to get it working
I think the problem is in this line:
IndexError Traceback (most recent call last)
<ipython-input-1-4a3930bf0f57> in <module>
23 for item in range(20):
24 # appends car_model to car_list
---> 25 car_list.append(car_model[item].text)
26
27 car_string = ', '.join('?' * len(car_list))
IndexError: list index out of range
I don't want it to insert the whole list to 1 row in database . I want the first 20 car's price, model, mileage in truecar.com in my database
Ya you are hard coding the length. Change how you are iterating through your soup elements. So:
import mysql.connector
from bs4 import BeautifulSoup
import requests
# take the car's name
requested_car_name = input('Enter car name: ')
# inject the car's name into the URL
my_request = requests.get('https://www.truecar.com/used-cars-for-sale/listings/' +
requested_car_name + '/location-holtsville-ny/?sort[]=best_match')
my_soup = BeautifulSoup(my_request.text, 'html.parser')
# ************ car_model column in database ******************
car_model = my_soup.find_all(
'span', attrs={'class': 'vehicle-header-make-model text-truncate'})
# we have a list of car models
car_list = []
for item in car_model:
# appends car_model to car_list
car_list.append(item.text)
# ************** price column in database *****************************
price = my_soup.find_all(
'div', attrs={'data-test': 'vehicleCardPricingBlockPrice'})
price_list = []
for item in price:
# appends price to price_list
price_list.append(item.text)
# ************** distance column in database ***************************
distance = my_soup.find_all('div', attrs={'data-test': 'vehicleMileage'})
distance_list = []
for item in distance:
# appends distance to distance_list
distance_list.append(item.text)
# check the connection
print('CONNECTING ...')
mydb = mysql.connector.connect(
host="xxxxx",
user="xxxxxx",
password="xxxxxx",
port='xxxxxx',
database='xxxxxx'
)
print('CONNECTED')
# checking the connection is done
my_cursor = mydb.cursor(buffered=True)
insert_command = 'INSERT INTO car_name (car_model, price, distance) VALUES (%s, %s, %s)'
values = list(zip(car_list, price_list, distance_list))
my_cursor.executemany(insert_command, values)
mydb.commit()
print(my_cursor.rowcount, "Record Inserted")
mydb.close()
ALTERNATE:
there's also the API where you can fetch the dat:
import mysql.connector
import requests
import math
# take the car's name
requested_car_name = input('Enter car name: ')
# inject the car's name into the URL
url = 'https://www.truecar.com/abp/api/vehicles/used/listings'
payload = {
'city': 'holtsville',
'collapse': 'true',
'fallback': 'true',
'include_incentives': 'true',
'include_targeted_incentives': 'true',
'make_slug': requested_car_name,
'new_or_used': 'u',
'per_page': '30',
'postal_code': '',
'search_event': 'true',
'sort[]': 'best_match',
'sponsored': 'true',
'state': 'ny',
'page':'1'}
jsonData = requests.get(url, params=payload).json()
total = jsonData['total']
total_pages = math.ceil(total/30)
total_pages_input = input('There are %s pages to iterate.\nEnter the number of pages to go through or type ALL: ' %total_pages)
if total_pages_input.upper() == 'ALL':
total_pages = total_pages
else:
total_pages = int(total_pages_input)
values = []
for page in range(1,total_pages+1):
if page == 1:
car_listings = jsonData['listings']
else:
payload.update({'page':'%s' %page})
jsonData = requests.get(url, params=payload).json()
car_listings = jsonData['listings']
for listing in car_listings:
vehicle = listing['vehicle']
ex_color = vehicle['exterior_color']
in_color = vehicle['interior_color']
location = vehicle['location']
price = vehicle['list_price']
make = vehicle['make']
model = vehicle['model']
mileage = vehicle['mileage']
style = vehicle['style']
year = vehicle['year']
engine = vehicle['engine']
accidentCount = vehicle['condition_history']['accidentCount']
ownerCount = vehicle['condition_history']['ownerCount']
isCleanTitle = vehicle['condition_history']['titleInfo']['isCleanTitle']
isFrameDamaged = vehicle['condition_history']['titleInfo']['isFrameDamaged']
isLemon = vehicle['condition_history']['titleInfo']['isLemon']
isSalvage = vehicle['condition_history']['titleInfo']['isSalvage']
isTheftRecovered = vehicle['condition_history']['titleInfo']['isTheftRecovered']
values.append((ex_color, in_color,location,price,make,model,mileage,
style,year,engine,accidentCount,ownerCount,isCleanTitle,isFrameDamaged,
isLemon, isSalvage,isTheftRecovered))
print('Completed: Page %s of %s' %(page,total_pages))
# check the connection
print('CONNECTING ...')
mydb = mysql.connector.connect(
host="xxxxx",
user="xxxxxx",
password="xxxxxx",
port='xxxxxx',
database='xxxxxx'
)
print('CONNECTED')
# checking the connection is done
my_cursor = mydb.cursor(buffered=True)
# create_command = ''' create table car_information (exterior_color varchar(255), interior_color varchar(255),location varchar(255),price varchar(255),make varchar(255),model varchar(255),mileage varchar(255),
# style varchar(255),year varchar(255),engine varchar(255),accidentCount varchar(255),ownerCount varchar(255),isCleanTitle varchar(255),isFrameDamaged varchar(255),
# isLemon varchar(255), isSalvage varchar(255),isTheftRecovered varchar(255))'''
# my_cursor.execute(create_command)
# print('created')
insert_command = '''INSERT INTO car_name (exterior_color, interior_color,location,price,make,model,mileage,
style,year,engine,accidentCount,ownerCount,isCleanTitle,isFrameDamaged,
isLemon, isSalvage,isTheftRecovered) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
my_cursor.executemany(insert_command, values)
mydb.commit()
print(my_cursor.rowcount, "Record Inserted")
mydb.close()
the problem seems to be that the list of car models has less than 20 entries.
for item in range(20):
car_list.append(car_model[item].text)
this always tries to append exactly 20 items to the car list. if you have less than 20 entries, there is an error, because car_model[20].text does not exist when there are only 10 entries. you can try
for item in range(len(car_model)):
car_list.append(car_model[item].text)
I want to create a dataframe and update it to mysql.
If there is a duplicate key, it will be updated and if there is no duplicate key, it will be inserted.
user = 'test'
passw = '...'
host = '...'
port = '...'
database = '...'
conn = pymysql.connect(host=host,
port=port,
user=user,
password=passw,
database=database,
charset='utf8')
curs = conn.cursor()
data = list(dataframe.itertuples(index=False, name=None))
sql = "insert into naversbmapping(brand, startdate, enddate, cost, daycost) values (%s, %s, %s, %s, %s) on duplicate key update brand = %s, startdate = %s, enddate = %s, cost = %s, daycost = %s"
curs.executemany(sql, data)
conn.commit()
conn.close()
However, I get the following error. How do I fix it?
pymysql.err.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '%s, startdate = %s, enddate = %s, cost = %s, daycost = %s' at line 1")
)
You use following MySQL constriuct so that you don't need the data twice as you have the double number of values on your original, but are only sending it once
$sql = "INSERT INTO naversbmapping(brand, startdate, enddate, cost, daycost) VALUES (%s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE brand = VALUES(brand), startdate = VALUES(startdate), enddate = VALUES(enddate), cost = VALUES(cost), daycost = VALUES(daycost)")
I am trying to store some TV information in a MySQLdb. I have tried about everything and I cannot get the variables to post. There is information in the variables as I am able to print the information.
My Code:
import pytvmaze
import MySQLdb
AddShow = pytvmaze.get_show(show_name='dexter')
MazeID = AddShow.maze_id
ShowName = "Show" + str(MazeID)
show = pytvmaze.get_show(MazeID, embed='episodes')
db = MySQLdb.connect("localhost","root","XXXXXXX","TVshows" )
cursor = db.cursor()
for episode in show.episodes:
Show = show.name
ShowStatus = show.status
ShowSummary = show.summary
Updated = show.updated
Season = episode.season_number
Episode = episode.episode_number
Title = episode.title
AirDate = episode.airdate
ShowUpdate = show.updated
EpisodeSummary = episode.summary
try:
sql = "INSERT INTO " + ShowName + " VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""" (Show,ShowStatus,ShowSummary,Updated,Season,Episode,Title,AirDate,ShowUpdate,EpisodeSummary)
cursor.execute(sql)
db.commit()
except:
db.rollback()
db.close()
Any thoughts? Thanks in advance.
EDIT - WORKING CODE
import pytvmaze
import MySQLdb
AddShow = pytvmaze.get_show(show_name='dexter')
MazeID = AddShow.maze_id
ShowNameandID = "Show" + str(MazeID)
show = pytvmaze.get_show(MazeID, embed='episodes')
db = MySQLdb.connect("localhost","root","letmein","TVshows" )
cursor = db.cursor()
for episode in show.episodes:
ShowName = show.name
ShowStatus = show.status
ShowSummary = show.summary
Updated = show.updated
Season = episode.season_number
Episode = episode.episode_number
Title = episode.title
AirDate = episode.airdate
ShowUpdate = show.updated
EpisodeSummary = episode.summary
sql = "INSERT INTO " + ShowNameandID + """ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
cursor.execute(sql, (ShowName, ShowStatus, ShowSummary, Updated, Season, Episode, Title, AirDate, ShowUpdate, EpisodeSummary))
db.commit()
print sql ##Great for debugging
db.close()
First of all, you've actually made things more difficult for yourself by catching all the exceptions via bare try/expect and then silently rolling back. Temporarily remove the try/except and see what the real error is, or log the exception in the except block. I bet the error would be related to a syntax error in the query since you would miss the quotes around the column value(s).
Anyway, arguably the biggest problem you have is how you pass the variables into the query. Currently, you are using string formatting, which is highly not recommended because of the SQL injection attack danger and problems with type conversions. Parameterize your query:
sql = """
INSERT INTO
{show}
VALUES
(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
""".format(show=Show)
cursor.execute(sql, (ShowStatus, ShowSummary, Updated, Season, Episode, Title, AirDate, ShowUpdate, EpisodeSummary))
Note that it is not possible to parameterize the table name (Show in your case) - we are using string formatting for it - make sure you either trust your source, or escape it manually via MySQLdb.escape_string(), or validate it with a separate custom code.
I'm new to Python (learnt how to code with it in 2 days ago). I'm trying to get feeds from MySQL database and insert theme into other table. But nothing inserted.
Here is my code:
cnx = MySQLConnection(**db_config)
if cnx.is_connected():
print("Database connected successfully...")
cursor = cnx.cursor(dictionary=True)
cursor.execute("SELECT * from external_feeds WHERE discipline = 'ALL' AND actif = 1")
rows = cursor.fetchall()
insert_feed = ("INSERT INTO feeds "
"(categorie, urlflux, titreflux, photonews, textnews, date, titrenews, liensnews, slug, photo)"
"VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
for row in rows:
feed = feedparser.parse(row["url"])
feed_link = row["url"]
name = row["name"]
image = row["photo"]
category = row["discipline"]
x = len(feed.entries)
for i in range(x):
feed_title = feed.entries[i].title
print feed_title
feed_url = feed.entries[i].link
print feed_url
feed_published = feed.entries[i].published
dPubPretty = strftime(feed_published, gmtime())
feed_description = feed.entries[i].description
slug = re.sub('[^a-zA-Z0-9 \n\-]', '', feed_url)
slug = slug.replace('httpwww', '')
slug = slug.replace('http', '')
# print insert_feed
data_feed = (category, feed_link, name, None, feed_description, dPubPretty, feed_title, feed_url, slug, image)
try:
cursor.execute(insert_feed, data_feed)
cursor.commit()
except:
cnx.rollback()
cursor.close()
Is there anyone who can help me figure out where the problem is? I am completly new to this so I'm totally lost
I see that you are performing 'cursor.commit()' after inserting the data, which is incorrect, try using 'cnx.commit()'.