How y can get the "profile_pic_id" with python - python

I want to get the "profile_pic_id" from the json list
I can get the 'follower_count' , 'following_count' and the 'username'
import requests
import json
import re
pk = input("")
def getEndpoint(idUser):
info=[]
idUser=idUser.replace('\"','')
endPoint='https://i.instagram.com/api/v1/users/idUser/info/'
res=requests.get(endPoint.replace('idUser',idUser))
try:
full_name=json.dumps(res.json()['user']['full_name']['profile_pic_url'])
try:
fullName=re.sub('[^a-zA-Z \n]', ' ',full_name).lower().replace(',', ' ').replace('\n', ' ').replace('\r', ' ')
fullName=" ".join(fullName.split())
info.append(fullName)
except Exception as e:
print(e)
info.append('')
followersCount=json.dumps(res.json()['user']['follower_count'])
followingCount=json.dumps(res.json()['user']['following_count'])
followingCount=json.dumps(res.json()['user']['profile_pic_url'])
username=json.dumps(res.json()['user']['username']).replace('\"','')
info.append(username)
info.append(followersCount)
info.append(followingCount)
info.append(profile_pic_url)
return info
except Exception as e:
print(e)
return None
print(getEndpoint(pk))
I expect the output is followers, following and profile_pic_url, but the actual is follower and following only

'''
import requests
import json
import re
#'https://www.instagram.com/web/search/topsearch/?query={query}' para averiguar el pk
print("Colocar tu PK:")
pk = input("")
def getEndpoint(idUser):
info=[]
idUser=idUser.replace('\"','')
endPoint='https://i.instagram.com/api/v1/users/idUser/info/'
res=requests.get(endPoint.replace('idUser',idUser))
try:
full_name=json.dumps(res.json()['user']['full_name'])
try:
fullName=re.sub('[^a-zA-Z \n]', ' ',full_name).lower().replace(',', ' ').replace('\n', ' ').replace('\r', ' ')
fullName=" ".join(fullName.split())
info.append(fullName)
except Exception as e:
print(e)
info.append('')
followersCount=json.dumps(res.json()['user']['follower_count'])
followingCount=json.dumps(res.json()['user']['following_count'])
profile_pic_url=json.dumps(res.json()['user']['profile_pic_url'])
username=json.dumps(res.json()['user']['username']).replace('\"','')
info.append(username)
info.append(followersCount)
info.append(followingCount)
info.append(profile_pic_url)
return info
except Exception as e:
print(e)
return None
print(getEndpoint(pk))
'''

Related

problem with xpath i'm getting invalid url trying to scrap

I have the following problem with my code. I'm trying to scrap information of the next link
sepomex
I'm trying to get the Estado, Municipio and a list of cp (código postal btw) but I'm getting this:
enter image description here
and this is my code:
import os
import datetime
import requests
import lxml.html as html
HOME_URL = 'https://www.correosdemexico.gob.mx/SSLServicios/ConsultaCP/Descarga.aspx'
XPATH_ESTADOS = '//select[#id="DdlEstado"]/option[#value > 0]/text()'
XPATH_MUNICIPIOS = '//select[#id="DdlMunicipio"]/option[#value > 0]/text()'
XPATH_LOCALIDAD = '//table[#class="small"]/tbody/tr[#class="dgNormal"]/td/text()'
def parse_edos(estado, today):
try:
response = requests.get(estado)
if response.status_code == 200:
root = html.fromstring(response.content)
try:
municipios = root.xpath(XPATH_MUNICIPIOS)
for municipio in municipios:
localidad = root.xpath(XPATH_LOCALIDAD)
except IndexError:
return
with open(f'{today}/{estado}.txt', 'w', encoding='utf-8') as f:
for i in localidad:
f.write(i + '\n')
else:
raise ValueError(f'Error: {response.status_code}')
except ValueError as err:
print(err)
def main():
try:
response = requests.get(HOME_URL)
if response.status_code == 200:
root = html.fromstring(response.content)
estados = root.xpath(XPATH_ESTADOS)
today = datetime.datetime.now().strftime('%Y-%m-%d')
if not os.path.isdir(today):
os.mkdir(today)
for estado in estados:
parse_edos(estado, today)
else:
raise ValueError(f'Error: {response.status_code}')
except ValueError as err:
print(err)
def run():
main()
if __name__ == '__main__':
run()
srry about my kewl english :P

How to change an argument programmatically if connection error?

I am calling an API. While making requests I hit the maximum number of tries and I get a connection error. I would like to edit the url programmatically by incrementing the number in the url. I do know how to change the arguments programmatically but not sure how to change/increment an argument when I hit connection error.
My language of usage is Python and I am using requests library.
Code Snippet
Libraries importing
from requests.auth import HTTPBasicAuth
import requests
from requests.exceptions import ConnectionError
```def make_request(data , id=None):
url = "http://server001.net:8080/?id="
result = {}
if id:
response = requests.get(url +id , auth=HTTPBasicAuth('uname', 'pass'))
return response
else :
for line in data:
try:
response = requests.get(url +line , auth=HTTPBasicAuth('uname', 'pass'))
result = html_parser2(response)
if result:
write_csv(result)
else:
pass
except ConnectionError as e:
print (e)```
Expected output
url = "http://server001.net:8080/?id="
url_edited = "http://server002.net:8080/?id="
Only if I hit the maximum number of tries, i.e I get an exception or
else keep requesting the same url.
One of the options is to enclose the try..except block with a while loop.
Besides, may be you should put your first requests.get into try..except block too.
Also try to avoid multiple unrelated operations in one try..except block, i.e. execute write_csv after successful connection only.
def make_request(data , id=None):
url = 'http://server001.net:8080/?id={}'
connection_failed = False
response = None
if id:
try:
response = requests.get(url.format(id) , auth=HTTPBasicAuth('uname', 'pass'))
except ConnectionError as e:
print('id = {}, e: {}'.format(id, e))
else:
for line in data:
while not connection_failed:
try:
response = requests.get(url.format(line) , auth=HTTPBasicAuth('uname', 'pass'))
except ConnectionError as e:
connection_failed = True
print('line = {}, e: {}'.format(id, e))
else:
result = html_parser2(response)
if result:
write_csv(result)
return response
def make_request(data , id=None):
url = 'http://server001.net:8080/?id={}'
response = None
if id:
try:
response = requests.get(url.format(id) , auth=HTTPBasicAuth('uname', 'pass'))
except ConnectionError as e:
print('id = {}, e: {}'.format(id, e))
else:
for line in data:
try:
response = requests.get(url.format(line) , auth=HTTPBasicAuth('uname', 'pass'))
except ConnectionError as e:
print('line = {}, e: {}'.format(id, e))
else:
result = html_parser2(response)
if result:
write_csv(result)
break
return response

How to auto follow someone with Twython

I am making a bot that automatically re-tweets and favorites anything containing the keyword 'Australia'
I am successfully able to re-tweet and favorite the tweet automatically, but I have no idea how to follow them automatically (follow everyone I re-tweet automatically)
search_results = twitter.search(q='Australia', count=10)
try:
for tweet in search_results["statuses"]:
try:
twitter.retweet(id = tweet["id_str"])
twitter.create_favorite(id = tweet["id_str"])
twitter.create_friendship(user_id=?????)
except TwythonError as e:
print (e)
except TwythonError as e:
print (e)
After lot of trying found this. Try the below
twitter = Twython(consumer_key,consumer_secret, access_token, access_secret)
search_results = twitter.search(q='Australia', count=10)
try:
for tweet in search_results["statuses"]:
try:
twitter.retweet(id = tweet["id_str"])
twitter.create_favorite(id = tweet["id_str"])
st=tweet["entities"]["user_mentions"]
if st != []:
twitter.create_friendship(screen_name=st[0]["screen_name"])
except TwythonError as e:
print e
except TwythonError as e:
print e
use screen_name to follow the respective handle.

how to close the file after recording?

please help to fix the script.
import urllib
import re
import os
import pprint
import requests
import bs4
def make_catalog():
try:
os.mkdir('GRAB')
except FileExistsError:
print('FileExistsError')
except PermissionError :
print('PermissionError ')
except Exception:
print(Exception)
def change_catalog():
try:
os.chdir('GRAB')
except PermissionError :
print('PermissionError ')
except Exception:
print(Exception)
def download_image(path, name):
#urllib.URLopener().retrieve(prefix + path, name)
img = urllib.request.urlopen(prefix + path).read()
try:
f = open(name, "wb")
if f:
print('open!!!')
if f.write(img):
print('write!!!')
except OSError:
print('OSError')
except Exception:
print(Exception)
finally:
f.close()
beginIndex = 5794
endIndex = 5800
prefix = "http://www.inpic.ru"
rep_chars = ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '-' , ' ']
make_catalog()
change_catalog()
for i in range(beginIndex, endIndex):
req = requests.get(prefix + '/image/' + str(i))
if req.status_code == requests.codes.ok:
#print(i, '\t', req.status_code, '\t', req, end='\n')
soup = bs4.BeautifulSoup(req.content)
#print(soup.prettify())
name = soup.find("td", {"class": "post_title"}).contents[1].contents
#author = soup.find("div", {"class": "date_author"}).contents[1].contents
print('NAME: ', name[0])
#print(author[0])
#name[0] = re.sub('[\\\\/:*?"<>|-]', '_', name[0])
for char in rep_chars:
name[0] = name[0].replace(char, '_')
print('newNAME: ', name[0])
mainImagePath = soup.find("img", {"class": "image last"})["src"]
mainImageExt = mainImagePath.split('.')[-1]
manyImages = soup.findAll("img", {"class": "image"})
print('MAINUMAGE: ', mainImagePath)
print('MAINIMAGE EXT: ',mainImageExt)
print('MANYIMAGE: \n')
pprint.pprint(manyImages)
if len(manyImages) > 1:
print('CATALOG MAKE')
try:
os.mkdir(name[0])
#except FileExistsError:
#print('FileExistsError')
except PermissionError :
print('PermissionError')
except Exception:
print(Exception)
os.chdir(name[0])
#download_image(mainImagePath, str(name[0]) + '_0.' + mainImageExt)
i = 0
for name in manyImages:
#print(name['src'], end='------------\n')
download_image(name['src'], str(name['src']))
i = i + 1
os.chdir('../')
else:
print('IMAGE MAKE')
download_image(mainImagePath, str(name[0]) + '.' + mainImageExt)
print('mainImagePath', mainImagePath)
print('name', str(name[0]) + '.' + mainImageExt)
print('==================================')
the problem that when recording images from the page group
http://www.inpic.ru/image/5797/
displays the following error message:
Traceback (most recent call last):
File "C:\VINT\OPENSERVER\OpenServer\domains\localhost\python\parse_html\1\q.py", line 98, in <module>
download_image(name['src'], str(name['src']))
File "C:\VINT\OPENSERVER\OpenServer\domains\localhost\python\parse_html\1\q.py", line 46, in download_image
f.close()
UnboundLocalError: local variable 'f' referenced before assignment
You are trying to close a file that failed to open. f was never assigned to because the open() call raised an exception.
Instead of closing the file object in the finally handler, use it as a context manager:
def download_image(path, name):
#urllib.URLopener().retrieve(prefix + path, name)
img = urllib.request.urlopen(prefix + path).read()
try:
with open(name, "wb") as f:
print('open!!!')
f.write(img)
print('write!!!')
except OSError:
print('OSError')
except Exception:
print(Exception)
Here the with statement will ensure that f is closed for you if it was opened successfully, whatever happens.

Python: Handling requests exceptions the right way

I recently switched from urlib2 to requests and I'm not sure how to deal with exceptions. What is best practice? My current code looks like this, but is not doing any good:
try:
response = requests.get(url)
except requests.ConnectionError , e:
logging.error('ConnectionError = ' + str(e.code))
return False
except requests.HTTPError , e:
logging.error('HTTPError = ' + str(e.reason))
return False
except requests.Timeout, e:
logging.error('Timeout')
return False
except requests.TooManyRedirects:
logging.error('TooManyRedirects')
return False
except Exception:
import traceback
logging.error('generic exception: ' + traceback.format_exc())
return False
Since it looks bad as a comment, have you tried:
try:
# some code
except Exception as e:
print e

Categories