BadZipFile("File is not a zip file") - python

So I am pulling data from an api using the provided code:
#User Parameters
apiToken = apitokens
surveyId = j
fileFormat = 'json'
#Static Parameters
requestCheckProgress = 0
baseUrl = 'https://sdfs.asdfs.com/API/v3/responseexports/'
headers = {
'content-type': "application/json",
'x-api-token': apiToken,
}
#Creating Data Export
downloadRequestUrl = baseUrl
downloadRequestPayload = '{"format":"' + fileFormat + '","surveyId":"' + surveyId + '"}'
downloadRequestResponse = requests.request("POST", downloadRequestUrl, data=downloadRequestPayload, headers=headers)
progressId = downloadRequestResponse.json()['result']['id']
print (downloadRequestResponse.text)
#Checking on Data Export
while requestCheckProgress < 100:
requestCheckUrl = baseUrl + progressId
requestCheckResponse = requests.request("GET", requestCheckUrl, headers=headers)
requestCheckProgress = requestCheckResponse.json()['result']['percentComplete']
print ("Download is " + str(requestCheckProgress) + " complete")
#Downloading file
requestDownloadUrl = baseUrl + progressId + '/file'
requestDownload = requests.request("GET", requestDownloadUrl, headers=headers, stream=True)
with open('RequestFile.zip', "wb") as f:
for chunk in requestDownload.iter_content(chunk_size=1024):
f.write(chunk)
zipfile.ZipFile('RequestFile.zip').extractall("sdafsadf")
else:
print("error")
so basically before this code I am using some nested for loops to provide the apitokens and the surveyid for each survey. Therer are over 100 surveys being pulled. Sometimes, just randomly I will get thrown this:
zipfile.BadZipFile: File is not a zip file
and the download meter is at like 1350 percent. I can't seem to find what is wrong in their code. I am using python 3.6 and have tried using 3.5 too.

Related

Python Graph API request only retrieving 1 attachment from an email

I'm using a python script to send an API request to get the attachments of an email. The email I'm using has 4 attachments (plus pictures in the signature in the email). The python request only retrieves 1 attachment along with the pics from the signature. When using Postman with the exact same information, it retrieves all attachments along with the pics.
Any ideas on how I can get the other attachments?
import requests
url = 'https://graph.microsoft.com/v1.0/users/{{users email}}/messages/{{messageID}}/attachments'
body = None
head = {"Content-Type": "application/json;charset=UFT-8", "Authorization": "Bearer " + accessToken}
response1 = requests.get(url, data=body, headers=head)
response = response1.text
Below shows the response from the python script, with only 7 items, and the Postman response with 10 items.
Below code retrieves multiple attachments
(files being an array of attachment names)
def execute(accessToken, messageID, files, noAttachments):
import os
from os import path
import requests
import base64
import json
if noAttachments == "False":
url = 'https://graph.microsoft.com/v1.0/users/{{users email}}/messages/{{messageID}}/attachments'
body = {}
head = {"Authorization": "Bearer " + accessToken}
responseCode = requests.request("GET", url, headers=head, data = body)
response = responseCode.text
test = json.loads(responseCode.text.encode('utf8'))
x, contentBytes = response.split('"contentBytes":"',1)
if len(files) == 1:
imgdata = base64.b64decode(str(contentBytes))
filename = "C:/Temp/SAPCareAttachments/" + files[0]
with open(filename, 'wb') as f:
f.write(imgdata)
else:
for file in test["value"]:
imgdata = base64.b64decode(file["contentBytes"])
if file["name"] in files:
filename = "C:/Temp/" + file["name"]
with open(filename, 'wb') as f:
f.write(imgdata)
print(responseCode)

pandas .loc stops working when imbedded in loop

import requests
import json
import pandas as pd
CSV_output_df = pd.read_csv('output/DEMO_CSV.csv', index_col=None)
payload = {}
headers = {
'Authorization': 'Basic ***********************************************************'
}
for index, row in CSV_output_df.iterrows():
Package_label = CSV_output_df.loc[index, "Package Id"]
licenseNumber = CSV_output_df.loc[index, "licenseNumber"]
Package_label = str(Package_label)
licenseNumber = str(licenseNumber)
url = ("https://api-mi.metrc.com/packages/v1/" + Package_label + "?licenseNumber=" + licenseNumber)
response = requests.request("GET", url, headers=headers, data=payload)
json_data = (response.text.encode('utf8'))
json_data = str(json_data)
json_data = (json_data.strip('b'))
json_data = (json_data.strip("'"))
json_data = (json_data.strip('{'))
json_data = (json_data.strip('}'))
json_data = (json_data.replace('"Item":{', ''))
json_data = (json_data.split(','))
json_data_df = pd.DataFrame(json_data)
Id = json_data_df.loc[0, 0]
Id = Id.replace('"Id":', '')
CSV_output_df.loc[index, "api_id"] = Id
for index, row in CSV_output_df.iterrows():
api_id = CSV_output_df.loc[index, "api_id"]
licenseNumber = CSV_output_df.loc[index, "licenseNumber"]
api_id = str(api_id)
licenseNumber = str(licenseNumber)
url0 = ("https://api-mi.metrc.com/labtests/v1/results?packageId=" + api_id + "&licenseNumber=" + licenseNumber)
response0 = requests.request("GET", url0, headers=headers, data=payload)
json_data0 = (response0.text.encode('utf8'))
json_data0 = str(json_data0)
json_data0 = (json_data0.strip('b'))
json_data0 = (json_data0.strip("'"))
json_data0 = (json_data0.strip('{'))
json_data0 = (json_data0.strip('}'))
json_data0 = (json_data0.strip('['))
json_data0 = (json_data0.strip(']'))
json_data0 = (json_data0.split(','))
json_data_df0 = pd.DataFrame(json_data0)
data_point = (json_data_df0.loc[1187, 0])
Python noobie here and 1st-time poster. So the issue is, below command not working in my for loop but is working as a standalone command.
data_point = (json_data_df0.loc[1187, 0])
The traceback log is telling me
ValueError: 1187 is not in range
but there are 1326 rows in json_data_df0 and all values except 0, 0 do not work in the loop.
I think you are supposed to use .iloc if you want to access the columns/rows using integer. .loc is for accessing columns/rows using the label.
For your reference: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

How to parse json response from two different api calls in a nested for loop?

I want to parse response json from two different api calls ,but has same dictionary names with different values in a nested for loop
I was able to get from the forst api call, but failing on second api call with same dict as issues in both call but the key values are different.
def classtest(self):
apione = "http://jira.com/rest/1"
headers = {
'content-type': "application/json",
}
requestglobal = requests.request("GET", apione, headers=headers)
responseglobal = requestglobal.text
responseglobal = json.loads(responseglobal)
for i in responseglobal['issues']:
issue1 = i[key]
print(issue1)
apitwo = "https://jira.com/rest/2" + str(issue1)
requesttwo = requests.request("GET", apitwo, headers=headers)
responsetwo = requesttwo.text
responsetwo = json.loads(responsetwo)
for i in responsetwo['issues']:
issue2 = i[key] + str(issue1)
print(issue2)
apitthree = "https://jira.com/rest/3" + str(issue1) + str(issue3)
requestthree = requests.request("GET", apitthree, headers=headers)
responsethree = requestthree.text
responsethree = json.loads(responsetthree)
for i in responsethree['issues']:
issue3 = i[key] + str(issue2)
print(issue3)
print("something from thirdloop")
print("something from second loop")
print ("something from first for loop")
I want issues from all the 3 calls, as each call is interdependent

Getting ValueError: unknown url type: ' '

I have this code below that iterate through some tracks. And then for each track I want to use the musixmatch api to get and print the lyrics of the track based on the artist name and track name.
code that iterete trough some tracks and print the lyrics:
for i, v in tracks.items():
artist = tracks[i]['artist'].replace(" ", "+")
title = tracks[i]['title'].replace(" ", "+")
print(tracks)
print(song_lyric(title, artist))
The print(tracks) returns in this format:
{12: {'trackID': 12, 'title': 'Achtung Baby', 'number': '1', 'artist': 'U2', 'album': 'Achtung Baby', 'albumID': 2, 'duration': '291'}
When the code exuted the lyrics for the firsts tracks are printed, but then it appears an error:
Traceback (most recent call last):
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 239, in <module>
print(song_lyric(title, artist))
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 72, in song_lyric
lyrics_tracking(tracking_url)
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 79, in lyrics_tracking
request = urllib.request.Request(querystring)
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 329, in __init__
self.full_url = url
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 355, in full_url
self._parse()
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 384, in _parse
raise ValueError("unknown url type: %r" % self.full_url)
ValueError: unknown url type: ''
Do you know why this error is appearing?
The methods to get the lyrics from musixmatch are public available:
def song_lyric(song_name, artist_name):
while True:
querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote(
song_name) + "&q_artist=" + urllib.parse.quote(
artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1"
# matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
while True:
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
json_obj = json.loads(raw.decode('utf-8'))
body = json_obj["message"]["body"]["lyrics"]["lyrics_body"]
copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"]
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
#print(tracking_url)
lyrics_tracking(tracking_url)
return (body + "\n\n" + copyright)
def lyrics_tracking(tracking_url):
while True:
querystring = tracking_url
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
print(raw)
Full working exemple that reproduces the error:
import requests
import json
import urllib.request, urllib.error, urllib.parse
import socket
apikey_musixmatch = '0b4a363bbd71974c2634837d5b5d1d9a' #generated for the example
apiurl_musixmatch = 'http://api.musixmatch.com/ws/1.1/'
api_key = "b088cbedecd40b35dd89e90f55227ac2" #generated for the example
def song_lyric(song_name, artist_name):
while True:
querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote(
song_name) + "&q_artist=" + urllib.parse.quote(
artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1"
# matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
while True:
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
json_obj = json.loads(raw.decode('utf-8'))
body = json_obj["message"]["body"]["lyrics"]["lyrics_body"]
copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"]
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
print("Tracking_url====================" +tracking_url + "==================================")
lyrics_tracking(tracking_url)
return (body + "\n\n" + copyright)
def lyrics_tracking(tracking_url):
while True:
querystring = tracking_url
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
print(raw)
ID = 0
#get top artists from country
artists = {}
for i in range(2, 3):
artists_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=geo.gettopartists&country=spain&format=json&page=' + str(i) + '&api_key=' + api_key)
artists_data = artists_response.json()
for artist in artists_data["topartists"]["artist"]:
name = artist["name"]
url = artist["url"]
if ID > 1: continue
artists[ID] = {}
artists[ID]['ID'] = ID
artists[ID]['name'] = name
ID += 1
for i, v in artists.items():
chosen = artists[i]['name'].replace(" ", "+")
artist_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&format=json&artist=' + chosen + '&api_key=' + api_key)
artist_data = artist_response.json()
# get top albums of the artists
albums = {}
for i, v in artists.items():
chosen = artists[i]['name'].replace(" ", "+")
topalbums_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=artist.gettopalbums&format=json&artist=' + chosen + '&api_key=' + api_key + '&limit=5')
albums_data = topalbums_response.json()
for album in albums_data['topalbums']['album']:
name = album["name"]
url = album["url"]
albums[ID] = {}
albums[ID]['ID'] = ID
albums[ID]['artist'] = artists[i]['name']
albums[ID]['artistID'] = artists[i]['ID']
albums[ID]['name'] = name
ID += 1
# Get tracks of the album
tracks = {}
for i, v in albums.items():
artist = albums[i]['artist'].replace(" ", "+")
name = albums[i]['name'].replace(" ", "+")
album_response_data = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=album.getinfo&format=json&api_key=' + api_key + '&artist=' + artist + '&album=' + name)
album_response = album_response_data.json()
for album in album_response['album']['tracks']['track']:
title = album['name']
tracks[ID] = {}
tracks[ID]['trackID'] = ID
tracks[ID]['title'] = title
tracks[ID]['artist'] = albums[i]['artist']
tracks[ID]['album'] = albums[i]['name']
tracks[ID]['albumID'] = albums[i]['ID']
ID += 1
for i, v in tracks.items():
artist = tracks[i]['artist'].replace(" ", "+")
title = tracks[i]['title'].replace(" ", "+")
# print the lyric of each track
print(song_lyric(title, artist))
It seems like url is not correct. It happens here:
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
If you have ability to run that API locally and see what is returned into tracking_url, you can find out what is still wrong with it.
UPDATE:
I reproduced it, so the urllib.request cannot process empty string URL: "", that is why you need to check if the tracking_url != "" and only if its not empty string or None you need to request for the song.

Use of files on hard drives instead of url with python

I would like to modify this script to use offline files, if I download the files from url works, but if the same file as I withdraw from hard drives, does not open, someone helps me to understand why and how to do, thank you.
def INDEX():
TVLIST('https://www.*********/playlist/*******/test.m3u')
def TVLIST(url):
try:
m3u = getHtml(url)
parsem3u(m3u)
except:
addDir('Nothing found', '', '', '', Folder=False)
xbmcplugin.endOfDirectory(int(sys.argv[1]))
urlopen = urllib2.urlopen
Request = urllib2.Request
def getHtml(url, referer=None, hdr=None, data=None):
if not hdr:
req = Request(url, data, headers)
else:
req = Request(url, data, hdr)
if referer:
req.add_header('Referer', referer)
if data:
req.add_header('Content-Length', len(data))
response = urlopen(req)
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO( response.read())
f = gzip.GzipFile(fileobj=buf)
data = f.read()
f.close()
else:
data = response.read()
response.close()
return data
def parsem3u(html, sitechk=True):
match = re.compile('#.+,(.+?)\n(.+?)\n').findall(html)
txtfilter = txtfilter = GETFILTER()
txtfilter = txtfilter.split(',') if txtfilter else []
txtfilter = [f.lower().strip() for f in txtfilter]
i = 0
count = 0
for name, url in match:
status = ""
url = url.replace('\r','')
if not txtfilter or any(f in name.lower() for f in txtfilter):
if sitechk:
if i < 5:
try:
siteup = urllib.urlopen(url).getcode()
status = " [COLOR red]offline[/COLOR]" if siteup != 200 else " [COLOR green]online[/COLOR]"
except: status = " [COLOR red]offline[/COLOR]"
i += 1
addPlayLink(name+status, url, 3, uiptvicon)
count += 1
return count
I thought, was enough to put the local path
def INDEX():
TVLIST(r'c:\Desktop\IPTVLIST\M3U\playlist\test.m3u')
who explains why it does not work and how can I do? Thank you
As suggested by #languitar in the comments you would have file:// which of course it should work for windows, but moving to a platform like android, you have different file system there, you don't have C drive. So make sure you got an alternative location on the android.

Categories