Scraping view function remembers its previous iterations

Scraping view function remembers its previous iterations - python

I have the following view function used to scrape data:
def results(request):
if request.method == 'POST':
form = RoomForm(request.POST)
if form.is_valid():
form_city = form.cleaned_data['city'].title()
form_country = form.cleaned_data['country'].title()
form_arrival_date = form.cleaned_data['arrival_date']
form_departure_date = form.cleaned_data['departure_date']
form_pages_to_scrape = form.cleaned_data['pages_to_scrape']
#launch scraper
scraper = AIRBNB_scraper(city=form_city, country=form_country, arrival_date=str(form_arrival_date), departure_date=str(form_departure_date))
scraped_dataframe = scraper.scrape_multiple_pages(last_page_selector_number=form_pages_to_scrape)
scraped_dataframe_sorted = scraped_dataframe.sort_values('prices')
print(scraped_dataframe_sorted)
#convert scraped dataframe into lists
prices = scraped_dataframe_sorted['prices'].tolist()
listings_links = scraped_dataframe_sorted['listings_links'].tolist()
listings_names = scraped_dataframe_sorted['listings_names'].tolist()
photo_links = scraped_dataframe_sorted['photo_links'].tolist()
dictionary = zip(prices, listings_links, listings_names, photo_links)
context = {'dictionary': dictionary}
return render(request, 'javascript/results.html', context)
On form submit, a post request is sent to this function using AJAX:
var frm = $('#login-form');
frm.submit(function () {
$.ajax({
type: "POST",
url: "/results",
data: frm.serialize(),
success: function (data) {
$("#table").html(data);
$('#go_back').remove();
},
error: function(data) {
$("#table").html("Something went wrong!");
}
});
return false;
});
After that the scraped data is displayed as HTML table on the same page the form is on.
The problem is the number of scraped items doubles every time the form submit is done. So for example if the number of scraped items on first button click is sixteen, the output will be 16, but on the second run it will be 32, then 64, and so on.
It is like the app remembers previous form submits, but I don't see any reason why. I tried clearin - at the end of this function - the pandas dataframe used to store the scraped data and also the dictionary passed as context, but to no avail.
The form is:
class RoomForm(forms.Form):
city = forms.CharField(max_length=100)
country = forms.CharField(max_length=100)
arrival_date = forms.DateField(widget=forms.DateInput(attrs=
{
'class':'datepicker'
}), required=False)
departure_date = forms.DateField(widget=forms.DateInput(attrs=
{
'class':'datepicker'
}), required=False)
pages_to_scrape = forms.IntegerField(label='Pages to scrape (max. 17)', min_value=0, max_value=17, widget=forms.NumberInput(attrs={'style':'width: 188px'}))
AIRBNB_scraper is:
import requests, bs4
import re
import pandas as pd
price_pattern = re.compile(r'\d*\s*?,?\s*?\d*\szł')
photo_link_pattern = re.compile(r'https.*\)')
prices = []
listings_links = []
photo_links = []
listings_names = []
class AIRBNB_scraper():
def __init__(self, city, country, accomodation_type='homes', arrival_date='2018-03-25', departure_date='2018-04-10'):
self.city = city
self.country = country
self.arrival_date = arrival_date
self.departure_date = departure_date
self.accomodation_type = accomodation_type
def make_soup(self, page_number):
url = 'https://www.airbnb.pl/s/'+ self.city +'--'+ self.country +'/'+ self.accomodation_type +'?query='+ self.city +'%2C%20'+ self.country +'&refinement_paths%5B%5D=%2F'+ self.accomodation_type +'&checkin=' + self.arrival_date + '&checkout=' + self.departure_date + '&section_offset=' + str(page_number)
response = requests.get(url)
soup = bs4.BeautifulSoup(response.text, "html.parser")
return soup
def get_listings(self, page_number):
soup = self.make_soup(page_number)
listings = soup.select('._f21qs6')
number_of_listings = len(listings)
print('\n' + "Number of listings found: " + str(number_of_listings))
while number_of_listings != 18:
print('\n' + str(number_of_listings) + ' is not correct number of listings, it should be 18. Trying again now.')
soup = self.make_soup(page_number)
listings = soup.find_all('div', class_='_f21qs6')
number_of_listings = len(listings)
print('\n' + "All fine! The number of listings is: " + str(number_of_listings) + '. Starting scraping now')
return listings
def scrape_listings_per_page(self, page_number):
listings_to_scrape = self.get_listings(page_number)
for listing in listings_to_scrape:
#get price
price_container = listing.find_all('span', class_='_hylizj6')
price_search = re.search(price_pattern, str(price_container))
price = price_search.group()
#get listing_link
listing_link = 'https://www.airbnb.pl' + listing.find('a', class_='_15ns6vh')['href']
#get photo_link
photo_link_node = listing.find('div', class_="_1df8dftk")['style']
photo_link_search = re.search(photo_link_pattern, str(photo_link_node))
#~ if photo_link_search:
#~ print('Is regex match')
#~ else:
#~ print('No regex match')
photo_link_before_strip = photo_link_search.group()
photo_link = photo_link_before_strip[:-1] #remove ") at the end of link
#get listing_name
listing_name = listing.find('div', class_='_1rths372').text
#append lists
prices.append(price)
listings_links.append(listing_link)
photo_links.append(photo_link)
listings_names.append(listing_name)
def scrape_multiple_pages(self, last_page_selector_number):
last_page_selector_number += 1
for x in range(0, last_page_selector_number):#18
self.scrape_listings_per_page(x)
print('\n' + "INDEX OF PAGE BEING SCRAPED: " + str(x))
scraped_data = pd.DataFrame({'prices': prices,
'listings_links': listings_links,
'photo_links': photo_links,
'listings_names': listings_names})
return scraped_data

You have module-level variables: prices, listings_links, etc. You append to these inside your AIRBNB_scraper instance but they are not part of that instance, and will persist between calls. You should make them instance attributes - define them as self.prices etc in the __init__ method.

Related

Scraping data beach volleyball on multiple pages

I am trying to scrape all the possible data from this webpage Gstaad 2017
Here is my code:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from selenium.webdriver.support.ui import Select
#Starts the driver and goes to our starting webpage
driver = webdriver.Chrome( "C:/Users/aldi/Downloads/chromedriver.exe")
driver.get('http://www.bvbinfo.com/Tournament.asp?ID=3294&Process=Matches')
#Imports HTML into python
page = requests.get('http://www.bvbinfo.com/Tournament.asp?ID=3294&Process=Matches')
soup = BeautifulSoup(driver.page_source, 'lxml')
stages = soup.find_all('div')
stages = driver.find_elements_by_class_name('clsTournBracketHeader')[-1].text
#TODO the first row (country quota matches) has no p tag and therefore it is not included in the data
rows = []
paragraphs = []
empty_paragraphs = []
for x in soup.find_all('p'):
if len(x.get_text(strip=True)) != 0:
paragraph = x.extract()
paragraphs.append(paragraph)
if len(x.get_text(strip=True)) == 0:
empty_paragraph = x.extract()
empty_paragraphs.append(empty_paragraph)
# players
home_team_player_1 = ''
home_team_player_2 = ''
away_team_player_1 = ''
away_team_player_2 = ''
for i in range(0, len(paragraphs)):
#round and satege of the competition
round_n= paragraphs[i].find('u').text
paragraph_rows = paragraphs[i].text.split('\n')[1:-1]
counter = 0
for j in range(0,len(paragraph_rows)):
#TODO tournament info, these can vary from tournament to tournament
tournament_info = soup.find('td', class_ = 'clsTournHeader').text.strip().split()
tournament_category = [' '.join(tournament_info[0 : 2])][0]
tournament_prize_money = tournament_info[2]
#TODO tournament city can also have two elements, not just one
tournament_city = tournament_info[3]
tournament_year = tournament_info[-1]
tournament_days = tournament_info[-2][:-1].split("-")
tournament_starting_day = tournament_days[0]
tournament_ending_day = tournament_days[-1]
tournament_month = tournament_info[-3]
tournament_stars = [' '.join(tournament_info[5 : 7])][0]
players = paragraphs[i].find_all('a', {'href':re.compile('.*player.*')})
home_team_player_1 = players[counter+0].text
home_team_player_2 = players[counter+1].text
away_team_player_1 = players[counter+2].text
away_team_player_2 = players[counter+3].text
#matches
match= paragraph_rows[j].split(":")[0].split()[-1].strip()
#nationalities
nationalities = ["United", "States"]
if paragraph_rows[j].split("def.")[0].split("/")[1].split("(")[0].split(" ")[3] in nationalities:
home_team_country = "United States"
else:
home_team_country = paragraph_rows[j].split("def.")[0].split("/")[1].split("(")[0].split(" ")[-2]
if paragraph_rows[j].split("def.")[1].split("/")[1].split(" ")[3] in nationalities:
away_team_country = "United States"
else:
away_team_country = paragraph_rows[j].split("def.")[1].split("/")[1].split("(")[0].split(" ")[-2]
parentheses = re.findall(r'\(.*?\)', paragraph_rows[j])
if "," in parentheses[0]:
home_team_ranking = parentheses[0].split(",")[0]
home_team_ranking = home_team_ranking[1:-1]
home_team_qualification_round = parentheses[0].split(",")[1]
home_team_qualification_round = home_team_qualification_round[1:-1]
else:
home_team_ranking = parentheses[0].split(",")[0]
home_team_ranking = home_team_ranking[1:-1]
home_team_qualification_round = None
if "," in parentheses[1]:
away_team_ranking = parentheses[1].split(",")[0]
away_team_ranking = away_team_ranking[1:-1]
away_team_qualification_round = parentheses[1].split(",")[1]
away_team_qualification_round = away_team_qualification_round[1:-1]
else:
away_team_ranking = parentheses[1].split(",")[0]
away_team_ranking = away_team_ranking[1:-1]
match_duration = parentheses[2]
match_duration = match_duration[1:-1]
away_team_qualification_round = None
# sets
sets = re.findall(r'\).*?\(', paragraph_rows[j])
sets = sets[1][1:-1]
if len(sets.split(",")) == 2:
score_set1 = sets.split(",")[0]
score_set2 = sets.split(",")[1]
score_set3 = None
if len(sets.split(",")) == 3:
score_set1 = sets.split(",")[0]
score_set2 = sets.split(",")[1]
score_set3 = sets.split(",")[2]
row = { " home_team_player_1 ": home_team_player_1 ,
" home_team_player_2": home_team_player_2,
"away_team_player_1": away_team_player_1,
"away_team_player_2":away_team_player_1,
"match": match,
"home_team_country":home_team_country,
"away_team_country": away_team_country,
"home_team_ranking": home_team_ranking,
"away_team_ranking": away_team_ranking,
"match_duration": match_duration,
"home_team_qualification_round": home_team_qualification_round,
"away_team_qualification_round": away_team_qualification_round,
"score_set1":score_set1,
"score_set2":score_set2,
"score_set3":score_set3,
"tournament_category": tournament_category,
"tournament_prize_money": tournament_prize_money,
"tournament_city": tournament_city,
"tournament_year": tournament_year,
"tournament_starting_day": tournament_starting_day,
"tournament_ending_day":tournament_ending_day,
"tournament_month":tournament_month,
"tournament_stars":tournament_stars,
"round_n": round_n
}
counter += 4
rows.append(row)
data = pd.DataFrame(rows)
data.to_csv("beachvb.csv", index = False)
I am not really experienced in web scraping. I have just started as a self-taught and find the HTML source code quite messy and poorly structured.
I want to improve my code in two ways:
Include all the missing matches (country quota matches, semifinals, bronze medal, and gold medal) and the respective category for each match (country quota matches, pool, winner's bracket, semifinals, bronze medal, and gold medal)
iterate the code for more years and tournaments from the dropdown menu at the top of the webpage
I have tried to iterate through different years but my code does not work
tournament_years = {"FIVB 2015", "FIVB 2016"}
dfs = []
for year in tournament_years:
# select desired tournament
box_year = Select(driver.find_element_by_xpath("/html/body/table[3]/tbody/tr/td/table[1]/tbody/tr[1]/td[2]/select"))
box_year.select_by_visible_text(year)
box_matches = Select(driver.find_element_by_xpath("/html/body/table[3]/tbody/tr/td/table[1]/tbody/tr[2]/td[2]/select"))
box_matches.select_by_visible_text("Matches")
The main idea was to create a list of dataframes for each year and each tournament by adding a new loop at the beginning of the code.
If someone has a better idea and technique to do so, it is really appreciated!

Scraped youtube comments amount and real amount are different

Im new to Python and Im trying to code a commentscraper for youtube with the most important informations, which I put in a JSON-file. But the my amount of comments and replys is not the same as on Youtube. I don't know, where my error is. I recognized, that it doesn't write any data in the files, if there are less than 20 comments, but I don't know, where I have to change something...
Example:
https://youtu.be/Re1m9O7q-9U here I get 102, but it should be 107
https://youtu.be/Q9Y5m1fQ7Fk here I get 423, but it should be 486
https://youtu.be/cMhE5BfmFkM here I get 1315, but it should be 2052
Here is the code:
class YT_Comments:
def __init__(self, api_key):
self.api_key = api_key
self.comment_int = 0
def get_video_comments(self, video_id, limit):
url = f"https://youtube.googleapis.com/youtube/v3/commentThreads?part=replies%2C%20snippet&order=relevance&videoId={video_id}&key={self.api_key}"
vid_comments = []
pc, npt = self._get_comments_per_page(url)
if limit is not None and isinstance(limit, int):
url += f"&maxResults={str(limit)}"
while (npt is not None):
nexturl = url + "&pageToken=" + npt
pc, npt = self._get_comments_per_page(nexturl)
vid_comments.append(pc)
print(self.comment_int)
print(len(vid_comments))
return vid_comments
def _get_comments_per_page(self, url):
json_url = requests.get(url)
data = json.loads(json_url.text)
page_comments = []
if "items" not in data:
return page_comments, None
item_data = data["items"]
nextPageToken = data.get("nextPageToken", None)
for item in tqdm.tqdm(item_data):
try:
kind = item["kind"]
if kind == "youtube#comment" or "youtube#commentThread":
comment_text = item["snippet"]["topLevelComment"]["snippet"]["textOriginal"]
comment_author = item["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"]
author_id = item["snippet"]["topLevelComment"]["snippet"]["authorChannelId"]["value"]
comment_like_count = item["snippet"]["topLevelComment"]["snippet"]["likeCount"]
comment_date = item["snippet"]["topLevelComment"]["snippet"]["publishedAt"]
comment = {"comment_text" : comment_text,
"comment_author" : comment_author,
"comment_author_id" : author_id,
"comment_like_count" : comment_like_count,
"comment_date" : comment_date}
replies_l = []
self.comment_int += 1
try:
replies = item["replies"]["comments"]
for reply in replies:
reply_txt = reply["snippet"]["textOriginal"]
reply_author = reply["snippet"]["authorDisplayName"]
reply_author_id = reply["snippet"]["authorChannelId"]["value"]
reply_like_count = reply["snippet"]["likeCount"]
reply_date = reply["snippet"]["publishedAt"]
reply_dict = {"text" : reply_txt,
"author" : reply_author,
"author_id" : reply_author_id,
"likes" : reply_like_count,
"date" : reply_date}
replies_l.append(reply_dict)
self.comment_int +=1
except KeyError:
replies_l.append(None)
comment_dict = {
"comment": comment,
"replies": replies_l,
}
page_comments.append(comment_dict)
except KeyError:
print("No Comments")
return page_comments, nextPageToken

Need to optimize scraping code - select URL with parameters

This is a simple code for get url with search parameters. It actually works, but I think it needs to be optimized.
def target_url(search_term, include_term, intext_term, target_site_in, page):
base_template_0 = f'https://www.google.com/search?q={search_term}+"{include_term}"+intext:{intext_term}+site:{target_site_in}&hl=en&rlz='
base_template_1 = f'https://www.google.com/search?q={search_term}+"{include_term}"+intext:{intext_term}&hl=en&rlz='
base_template_2 = f'https://www.google.com/search?q={search_term}+"{include_term}"&hl=en&rlz='
base_template_3 = f'https://www.google.com/search?q={search_term}&hl=en&rlz='
search_term = search_term.replace(' ', '+')
base_url_0 = base_template_0.format(search_term)
base_url_1 = base_template_1.format(search_term)
base_url_2 = base_template_2.format(search_term)
base_url_3 = base_template_3.format(search_term)
url_template_0 = base_url_0 + '&start={}'
url_template_1 = base_url_1 + '&start={}'
url_template_2 = base_url_2 + '&start={}'
url_template_3 = base_url_3 + '&start={}'
if page == 0 and search_term and include_term and intext_term and target_site:
return base_url_0
if page == 0 and search_term and include_term and intext_term:
return base_url_1
if page == 0 and search_term and include_term:
return base_url_2
if page == 0 and search_term:
return base_url_3
else:
if search_term and include_term and intext_term and target_site:
return url_template_0.format(page)
if search_term and include_term and intext_term:
return url_template_1.format(page)
if search_term and include_term:
return url_template_2.format(page)
if search_term:
return url_template_3.format(page)
Four parameters are required: search_term, inclusion_term, input_term, target_site_in - In each case, a conditioned URL was specified differently.
Give me a better idea for optimization.

Instead of having multiple templates strings and selecting on them, you can make a method that gives you the final search query:
def get_search_query(search_term, include_term, intext_term, target_site_in):
response = search_term.replace(' ', '+')
if include_term:
response = f"{response}+{include_term}"
if intext_term:
response = f"{response}+intext:{intext_term}"
if target_site_in:
response = f"{response}+site:{target_site_in}"
return response
now in your method you can call it
def target_url(search_term, include_term, intext_term, target_site_in, page):
query = get_search_query(search_term, include_term, intext_term, target_site_in)
url = f'https://www.google.com/search?q={query}&hl=en&rlz='
if page != 0:
url = f"{url}&page={page}"
return url

How do I return multiple 'scorers' when scraping for football results using Python?

I'm just a few hours into learning Python so please go easy with me! I'm just wanting to scrape scores and scorers off a website, I've been able to do that, however, I'm only getting one scorer (if there is one!), when there are multiple goal scorers I am only getting the first. I think I'm trying to look for multiple scorers under '# Home Scorers'.
My code:
from bs4 import BeautifulSoup
import requests
import pandas as pd
url = "https://www.skysports.com/football-results"
match_results = {}
match_details = {}
match_no = 0
response = requests.get(url)
data = response.text
soup = BeautifulSoup(data,'html.parser')
matches = soup.find_all('div',{'class':'fixres__item'})
for match in matches:
try:
match_url_get = match.find('a',{'class':'matches__item matches__link'}).get('href')
match_url = match_url_get if match_url_get else "unknown"
event_id = match_url[-6:]
match_response = requests.get(match_url)
match_data = match_response.text
match_soup = BeautifulSoup(match_data,'html.parser')
# Match Details
match_date = match_soup.find('time',{'class':'sdc-site-match-header__detail-time'}).text
match_location = match_soup.find('span',{'class':'sdc-site-match-header__detail-venue'}).text
match_info = match_soup.find('p',{'class':'sdc-site-match-header__detail-fixture'}).text
# Home Scores & Team
home_details = match_soup.find_all('span',{'class':'sdc-site-match-header__team-name sdc-site-match-header__team-name--home'})
for home_detail in home_details:
home_team = home_detail.find('span',{'class':'sdc-site-match-header__team-name-block-target'}).text
home_score_get = match_soup.find('span',{'class':'sdc-site-match-header__team-score-block','data-update':'score-home'})
home_score = home_score_get.text if home_score_get else "none"
# Home Scorers
home_scorer_details = match_soup.find_all('ul',{'class':'sdc-site-match-header__team-synopsis','data-update':'synopsis-home'})
for home_scorer_detail in home_scorer_details:
goal_scorer_get = home_scorer_detail.find('li',{'class':'sdc-site-match-header__team-synopsis-line'})
goal_scorer = goal_scorer_get.text if goal_scorer_get else "none"
goal_score_minute_get = home_scorer_detail.find('span',{'class':'sdc-site-match-header__event-time'})
goal_score_minute = goal_score_minute_get.text if goal_score_minute_get else "none"
# Away Scores & Team
away_details = match_soup.find_all('span',{'class':'sdc-site-match-header__team-name sdc-site-match-header__team-name--away'})
for away_detail in away_details:
away_team = away_detail.find('span',{'class':'sdc-site-match-header__team-name-block-target'}).text
away_score_get = match_soup.find('span',{'class':'sdc-site-match-header__team-score-block','data-update':'score-away'})
away_score = away_score_get.text if away_score_get else "none"
# Home Scorers
away_scorer_details = match_soup.find_all('ul',{'class':'sdc-site-match-header__team-synopsis','data-update':'synopsis-away'})
for away_scorer_detail in away_scorer_details:
away_goal_scorer_get = away_scorer_detail.find('li',{'class':'sdc-site-match-header__team-synopsis-line'})
away_goal_scorer = away_goal_scorer_get.text if away_goal_scorer_get else "none"
away_goal_score_minute_get = away_scorer_detail.find('span',{'class':'sdc-site-match-header__event-time'})
away_goal_score_minute = away_goal_score_minute_get.text if away_goal_score_minute_get else "none"
print("Match: ",event_id , "Match Date:", match_date, "Match Location:", match_location, "Match Info:", match_info, "\nResult: ", home_team, home_score, away_team, away_score)
print("Home Scorer:", goal_scorer, "Minute:",goal_score_minute, "\nAway Scorer:", away_goal_scorer, "Minute:",away_goal_score_minute)
print(match_date)
except:
pass
match_no+=1
match_results[match_no] = [event_id, home_team, home_score, away_team, away_score, match_url, match_date, match_location, match_info]
match_details[match_no] = [event_id, goal_scorer, goal_score_minute, away_goal_scorer, away_goal_score_minute]
Period = "2021-22"
print("Total Matches: ", match_no)
match_results = pd.DataFrame.from_dict(match_results, orient='index', columns = ['Event_ID:', 'Home Team:','Home Score:','Away Team:','Away Score:','Link:','Match Date:','Match Location:','Match Info:'])
match_results.to_csv("Python/FL/Premier League Results (SkySports.com) " + Period + ".csv")
match_details = pd.DataFrame.from_dict(match_details, orient='index', columns = ['Event_ID:', 'Home Goal:','Home Goal Minute:','Away Goal:','Away Goal Minute:'])
match_details.to_csv("Python/FL/Premier League Details (SkySports.com) " + Period + ".csv")
So the bit that's not working correctly is:
# Home Scorers
home_scorer_details = match_soup.find_all('ul',{'class':'sdc-site-match-header__team-synopsis','data-update':'synopsis-home'})
for home_scorer_detail in home_scorer_details:
goal_scorer_get = home_scorer_detail.find('li',{'class':'sdc-site-match-header__team-synopsis-line'})
goal_scorer = goal_scorer_get.text if goal_scorer_get else "none"
goal_score_minute_get = home_scorer_detail.find('span',{'class':'sdc-site-match-header__event-time'})
goal_score_minute = goal_score_minute_get.text if goal_score_minute_get else "none"
Any ideas how I can return multiple rows for that bit?!
Thanks in advance :)

home_scorer_details only has 1 item, the unordered list itself.
To get all the scorers you need to get the items in that list.
The following code, which is pretty rough, will create a list of dictionaries where each dictionary has the name of the scorer and the minute(s) they scored.
You could use similar code to get all the away scorers.
Like I said, this code is rough and needs refined but it should give you a start.
# Home Scorers
home_scorer_details = match_soup.find_all('ul',{'class':'sdc-site-match-header__team-synopsis','data-update':'synopsis-home'})
home_scorers = []
for home_scorer_detail in home_scorer_details[0].find_all('li'):
goal_scorer = home_scorer_detail.text
goal_score_minute_get = home_scorer_detail.find('span',{'class':'sdc-site-match-header__event-time'})
goal_score_minute = goal_score_minute_get.text if goal_score_minute_get else "none"
home_scorers.append({'scorer': goal_scorer, 'minute': goal_score_minute})
print(home_scorers)

how to request date and location in snapchat ads api?

i'm trying to request in snapchat ads api the result of a campaign by date (for example yesterday) and sorted by the country.
I find the request for the date
"https://adsapi.snapchat.com/v1/campaigns/["id"]/stats?granularity=DAY&start_time=["date_start"]&end_time=["date_end"]
And the request for the country
"https://adsapi.snapchat.com/v1/campaigns/["id"]/stats?granularity=LIFETIME&dimension=GEO&pivots=country"
this is my code i'm just requesting by country
r = requests.get("https://adsapi.snapchat.com/v1/organizations/" + organization_id + "/adaccounts", headers = data)
#get all the add accounts
ad_list = json.loads(r.text)
ad_list = ad_list["adaccounts"]
account_list = {}
for obj in ad_list:
test = obj["adaccount"]
account_list[test["name"]] = {}
account_list[test["name"]]["id"] = test["id"]
account_list[test["name"]]["timezone"] = test["timezone"]
#get all the add accounts campaign's
for key in account_list:
r = requests.get("https://adsapi.snapchat.com/v1/adaccounts/" + account_list[key]["id"] + "/campaigns", headers = data)
ad_id = json.loads(r.text)
ad_id = ad_id["campaigns"]
for obj in ad_id:
test = obj["campaign"]
account_list[key][test["name"]] = {}
account_list[key][test["name"]]["type"] = "campaign"
account_list[key][test["name"]]["id"] = test["id"]
#get campaign stats by country
try:
if (req == "1"):
r = requests.get("https://adsapi.snapchat.com/v1/campaigns/" + test["id"] + "/stats?granularity=LIFETIME&dimension=GEO&pivots=country&fields=impressions,swipes,spend", headers = data)
stat = json.loads(r.text)
stat = stat["lifetime_stats"]
stat = stat[0]["lifetime_stat"]
dic = stat["dimension_stats"]
for country in dic:
account_list[key][test["name"]][country["country"]] = {}
for key2 in country:
account_list[key][test["name"]][country["country"]][key2] = country[key2]
except:
continue
But whatever i'm trying i can't find a way to request the country's result between two date. Is there a solution for this problem? Thanks in Advance.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Scraping view function remembers its previous iterations - python

You have module-level variables: prices, listings_links, etc. You append to these inside your AIRBNB_scraper instance but they are not part of that instance, and will persist between calls. You should make them instance attributes - define them as self.prices etc in the init method.

Related

Scraping data beach volleyball on multiple pages

Scraped youtube comments amount and real amount are different

Need to optimize scraping code - select URL with parameters

How do I return multiple 'scorers' when scraping for football results using Python?

how to request date and location in snapchat ads api?

Categories

Resources