i'm trying to run this code for my project but its given an error at the try statement i.e "unindent does not match any outer indentation level"
from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.tools import argparser
DEVELOPER_KEY = "Replaced_my_API_key"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
def youtube_search(options):
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
developerKey=DEVELOPER_KEY)
search_response = youtube.search().list(
q=options.q,
part="id,snippet",
maxResults=options.max_results
).execute()
videos = []
channels = []
playlists = []
for search_result in search_response.get("items", []):
if search_result["id"]["kind"] == "youtube#video":
videos.append("%s (%s)" % (search_result["snippet"]["title"],
search_result["id"]["videoId"]))
elif search_result["id"]["kind"] == "youtube#channel":
channels.append("%s (%s)" % (search_result["snippet"]["title"],
search_result["id"]["channelId"]))
elif search_result["id"]["kind"] == "youtube#playlist":
playlists.append("%s (%s)" % (search_result["snippet"]["title"],
search_result["id"]["playlistId"]))
print ("Videos:\n", "\n".join(videos), "\n")
print ("Channels:\n", "\n".join(channels), "\n")
print ("Playlists:\n", "\n".join(playlists), "\n")
if __name__ == "__main__":
to_search = "Witcher 3"
argparser.add_argument("--q", help="Search term", default=to_search)
argparser.add_argument("--max-results", help="Max results",
default=25)
args = argparser.parse_args()
**From the following statement the code is giving an error**
i.e "unindent error"
try:
youtube_search(args)
except HttpError, e:
print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)
This error is common if you are mixing idendation with spaces and tabs. Choose one and stick to it. You can try two things:
Colorize the whitespace in your editor. In vim you can do this with :set list. Find the offending line and correct it.
Give Python the option -tt in the shebang: #!/usr/bin/python -tt. This gives you extra warnings when you are mixing identation in the same file.
Related
springerlink has changed its structure, and now the script doesn't work anymore. With it you should could download all chapters at once instead of all single chapters.
i installed the script and its dependencies with linux.
from here http://milianw.de/code-snippets/take-2-download-script-for-springerlinkcom-ebooks and here https://github.com/milianw/springer_download
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import getopt
import urllib
import re
import tempfile
import shutil
import subprocess
# Set some kind of User-Agent so we don't get blocked by SpringerLink
class SpringerURLopener(urllib.FancyURLopener):
version = "Mozilla 5.0"
def pdfcat(fileList, bookTitlePath):
if findInPath("pdftk") != False:
command = [findInPath("pdftk")]
command.extend(fileList)
command.extend(["cat", "output", bookTitlePath])
subprocess.Popen(command, shell=False).wait()
elif findInPath("stapler") != False:
command = [findInPath("stapler"), "cat"]
command.extend(fileList)
command.append(bookTitlePath)
subprocess.Popen(command, shell=False).wait()
else:
error("You have to install pdftk (http://www.accesspdf.com/pdftk/) or stapler (http://github.com/hellerbarde/stapler).")
# validate CLI arguments and start downloading
def main(argv):
if not findInPath("iconv"):
error("You have to install iconv.")
#Test if convert is installed
if os.system("convert --version > /dev/null 2>&1")!=0:
error("You have to install the packet ImageMagick in order to use convert")
try:
opts, args = getopt.getopt(argv, "hl:c:n", ["help", "link=", "content=", "no-merge"])
except getopt.GetoptError:
error("Could not parse command line arguments.")
link = ""
hash = ""
merge = True
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit()
elif opt in ("-c", "--content"):
if link != "":
usage()
error("-c and -l arguments are mutually exclusive")
hash = arg
elif opt in ("-l", "--link"):
if hash != "":
usage()
error("-c and -l arguments are mutually exclusive")
match = re.match("(https?://)?(www\.)?springer(link)?.(com|de)/(content|.*book)/(?P<hash>[a-z0-9\-]+)/?(\?[^/]*)?$", arg)
if not match:
usage()
error("Bad link given. See example link.")
hash = match.group("hash")
elif opt in ("-n", "--no-merge"):
merge = False
if hash == "":
usage()
error("Either a link or a hash must be given.")
if merge and not findInPath("pdftk") and not findInPath("stapler"):
error("You have to install pdftk (http://www.accesspdf.com/pdftk/) or stapler (http://github.com/hellerbarde/stapler).")
baseLink = "http://www.springerlink.com/content/" + hash + "/"
link = baseLink + "contents/"
chapters = list()
loader = SpringerURLopener();
curDir = os.getcwd()
bookTitle = ""
coverLink = ""
front_matter = False
while True:
# download page source
try:
print "fetching book information...\n\t%s" % link
page = loader.open(link,"MUD=MP").read()
except IOError, e:
error("Bad link given (%s)" % e)
if re.search(r'403 Forbidden', page):
error("Could not access page: 403 Forbidden error.")
if bookTitle == "":
match = re.search(r'<h1[^<]+class="title">(.+?)(?:<br/>\s*<span class="subtitle">(.+?)</span>\s*)?</h1>', page, re.S)
if not match or match.group(1).strip() == "":
error("Could not evaluate book title - bad link %s" % link)
else:
bookTitle = match.group(1).strip()
# remove tags, e.g. <sub>
bookTitle = re.sub(r'<[^>]*?>', '', bookTitle)
# subtitle
if match and match.group(2) and match.group(2).strip() != "":
bookTitle += " - " + match.group(2).strip()
# edition
#match = re.search(r'<td class="labelName">Edition</td><td class="labelValue">([^<]+)</td>', page)
#if match:
#bookTitle += " " + match.group(1).strip()
## year
#match = re.search(r'<td class="labelName">Copyright</td><td class="labelValue">([^<]+)</td>', page)
#if match:
#bookTitle += " " + match.group(1).strip()
## publisher
#match = re.search(r'<td class="labelName">Publisher</td><td class="labelValue">([^<]+)</td>', page)
#if match:
#bookTitle += " - " + match.group(1).strip()
# coverimage
match = re.search(r'<div class="coverImage" title="Cover Image" style="background-image: url\(/content/([^/]+)/cover-medium\.gif\)">', page)
if match:
coverLink = "http://www.springerlink.com/content/" + match.group(1) + "/cover-large.gif"
bookTitlePath = curDir + "/%s.pdf" % sanitizeFilename(bookTitle)
if bookTitlePath == "":
error("could not transliterate book title %s" % bookTitle)
if os.path.isfile(bookTitlePath):
error("%s already downloaded" % bookTitlePath)
print "\nNow Trying to download book '%s'\n" % bookTitle
#error("foo")
# get chapters
for match in re.finditer('href="([^"]+\.pdf)"', page):
chapterLink = match.group(1)
if chapterLink[:7] == "http://": # skip external links
continue
if re.search(r'front-matter.pdf', chapterLink):
if front_matter:
continue
else:
front_matter = True
if re.search(r'back-matter.pdf', chapterLink) and re.search(r'<a href="([^"#]+)"[^>]*>Next</a>', page):
continue
#skip backmatter if it is in list as second chapter - will be there at the end of the book also
if re.search(r'back-matter.pdf', chapterLink):
if len(chapters)<2:
continue
chapters.append(chapterLink)
# get next page
match = re.search(r'<a href="([^"#]+)"[^>]*>Next</a>', page)
if match:
link = "http://www.springerlink.com" + match.group(1).replace("&", "&")
else:
break
if len(chapters) == 0:
error("No chapters found - bad link?")
print "found %d chapters" % len(chapters)
# setup; set tempDir as working directory
tempDir = tempfile.mkdtemp()
os.chdir(tempDir)
i = 1
fileList = list()
for chapterLink in chapters:
if chapterLink[0] == "/":
chapterLink = "http://www.springerlink.com" + chapterLink
else:
chapterLink = baseLink + chapterLink
chapterLink = re.sub("/[^/]+/\.\.", "", chapterLink)
print "downloading chapter %d/%d" % (i, len(chapters))
localFile, mimeType = geturl(chapterLink, "%d.pdf" % i)
if mimeType.gettype() != "application/pdf":
os.chdir(curDir)
shutil.rmtree(tempDir)
error("downloaded chapter %s has invalid mime type %s - are you allowed to download %s?" % (chapterLink, mimeType.gettype(), bookTitle))
fileList.append(localFile)
i += 1
if coverLink != "":
print "downloading front cover from %s" % coverLink
localFile, mimeType = geturl(coverLink, "frontcover")
if os.system("convert %s %s.pdf" % (localFile, localFile)) == 0:
fileList.insert(0, localFile + ".pdf")
if merge:
print "merging chapters"
if len(fileList) == 1:
shutil.move(fileList[0], bookTitlePath)
else:
pdfcat(fileList, bookTitlePath)
# cleanup
os.chdir(curDir)
shutil.rmtree(tempDir)
print "book %s was successfully downloaded, it was saved to %s" % (bookTitle, bookTitlePath)
log("downloaded %s chapters (%.2fMiB) of %s\n" % (len(chapters), os.path.getsize(bookTitlePath)/2.0**20, bookTitle))
else: #HL: if merge=False
print "book %s was successfully downloaded, unmerged chapters can be found in %s" % (bookTitle, tempDir)
log("downloaded %s chapters of %s\n" % (len(chapters), bookTitle))
sys.exit()
# give a usage message
def usage():
print """Usage:
%s [OPTIONS]
Options:
-h, --help Display this usage message
-l LINK, --link=LINK defines the link of the book you intend to download
-c ISBN, --content=ISBN builds the link from a given ISBN (see below)
-n, --no-merge Only download the chapters but don't merge them into a single PDF.
You have to set exactly one of these options.
LINK:
The link to your the detail page of the ebook of your choice on SpringerLink.
It lists book metadata and has a possibly paginated list of the chapters of the book.
It has the form:
http://www.springerlink.com/content/ISBN/STUFF
Where: ISBN is a string consisting of lower-case, latin chars and numbers.
It alone identifies the book you intent do download.
STUFF is optional and looks like #section=... or similar. It will be stripped.
""" % os.path.basename(sys.argv[0])
# raise an error and quit
def error(msg=""):
if msg != "":
log("ERR: " + msg + "\n")
print "\nERROR: %s\n" % msg
sys.exit(2)
return None
# log to file
def log(msg=""):
logFile = open('springer_download.log', 'a')
logFile.write(msg)
logFile.close()
# based on http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python
def findInPath(prog):
for path in os.environ["PATH"].split(os.pathsep):
exe_file = os.path.join(path, prog)
if os.path.exists(exe_file) and os.access(exe_file, os.X_OK):
return exe_file
return False
# based on http://mail.python.org/pipermail/python-list/2005-April/319818.html
def _reporthook(numblocks, blocksize, filesize, url=None):
#XXX Should handle possible filesize=-1.
try:
percent = min((numblocks*blocksize*100)/filesize, 100)
except:
percent = 100
if numblocks != 0:
sys.stdout.write("\b"*70)
sys.stdout.write("%-66s%3d%%" % (url, percent))
def geturl(url, dst):
downloader = SpringerURLopener()
if sys.stdout.isatty():
response = downloader.retrieve(url, dst,
lambda nb, bs, fs, url=url: _reporthook(nb,bs,fs,url), "MUD=MP")
sys.stdout.write("\n")
else:
response = downloader.retrieve(url, dst, None, "MUD=MP")
return response
def sanitizeFilename(filename):
p1 = subprocess.Popen(["echo", filename], stdout=subprocess.PIPE)
p2 = subprocess.Popen(["iconv", "-f", "UTF-8", "-t" ,"ASCII//TRANSLIT"], stdin=p1.stdout, stdout=subprocess.PIPE)
return re.sub("\s+", "_", p2.communicate()[0].strip().replace("/", "-"))
# start program
if __name__ == "__main__":
main(sys.argv[1:])
# kate: indent-width 4; replace-tabs on;
excpected: it should downlaod the book
actual results: with command ./springer_download.py -c "978-3-662-54804-2" i get ERROR: Could not evaluate book title - bad link http://www.springerlink.com/content/978-3-662-54804-2/contents/
the test
python2 ./springer_download.py -c "978-3-662-54804-2"
does not work either
in the code above the error is in the context
match = re.search(r'<h2 class="MPReader_Profiles_SpringerLink_Content_PrimitiveHeadingControlName">([^<]+)</h2>', page)
if not match or match.group(1).strip() == "":
error("Could not evaluate book title - bad link?")
else:
bookTitle = match.group(1).strip()
print "\nThe book you are trying to download is called '%s'\n" % bookTitle
i would also be happy with alternatives like browser addons or the like. Using the example https://link.springer.com/book/10.1007/978-3-662-54805-9#toc
The following script is an extract from
https://github.com/RittmanMead/obi-metrics-agent/blob/master/obi-metrics-agent.py
The script is written in jython & it hits the weblogic admin console to extract metrics
The problem is it runs only once and does not loop infinitely
Here's the script that I've extracted from the original for my purpose:
import calendar, time
import sys
import getopt
print '---------------------------------------'
# Check the arguments to this script are as expected.
# argv[0] is script name.
argLen = len(sys.argv)
if argLen -1 < 2:
print "ERROR: got ", argLen -1, " args, must be at least two."
print '$FMW_HOME/oracle_common/common/bin/wlst.sh obi-metrics-agent.py <AdminUserName> <AdminPassword> [<AdminServer_t3_url>] [<Carbon|InfluxDB>] [<target host>] [<target port>] [targetDB influx db>'
exit()
outputFormat='CSV'
url='t3://localhost:7001'
targetHost='localhost'
targetDB='obi'
targetPort='8086'
try:
wls_user = sys.argv[1]
wls_pw = sys.argv[2]
url = sys.argv[3]
outputFormat=sys.argv[4]
targetHost=sys.argv[5]
targetPort=sys.argv[6]
targetDB=sys.argv[7]
except:
print ''
print wls_user, wls_pw,url, outputFormat,targetHost,targetPort,targetDB
now_epoch = calendar.timegm(time.gmtime())*1000
if outputFormat=='InfluxDB':
import httplib
influx_msgs=''
connect(wls_user,wls_pw,url)
results = displayMetricTables('Oracle_BI*','dms_cProcessInfo')
while True:
for table in results:
tableName = table.get('Table')
rows = table.get('Rows')
rowCollection = rows.values()
iter = rowCollection.iterator()
while iter.hasNext():
row = iter.next()
rowType = row.getCompositeType()
keys = rowType.keySet()
keyIter = keys.iterator()
inst_name= row.get('Name').replace(' ','-')
try:
server= row.get('Servername').replace(' ','-').replace('/','_')
except:
try:
server= row.get('ServerName').replace(' ','-').replace('/','_')
except:
server='unknown'
try:
host= row.get('Host').replace(' ','-')
except:
host=''
while keyIter.hasNext():
columnName = keyIter.next()
value = row.get(columnName )
if columnName.find('.value')>0:
metric_name=columnName.replace('.value','')
if value is not None:
if outputFormat=='InfluxDB':
influx_msg= ('%s,server=%s,host=%s,metric_group=%s,metric_instance=%s value=%s %s') % (metric_name,server,host,tableName,inst_name, value,now_epoch*1000000)
influx_msgs+='\n%s' % influx_msg
conn = httplib.HTTPConnection('%s:%s' % (targetHost,targetPort))
## TODO pretty sure should be urlencoding this ...
a=conn.request("POST", ("/write?db=%s" % targetDB), influx_msg)
r=conn.getresponse()
if r.status != 204:
print 'Failed to send to InfluxDB! Error %s Reason %s' % (r.status,r.reason)
print influx_msg
#sys.exit(2)
else:
print 'Skipping None value %s,server=%s,host=%s,metric_group=%s,metric_instance=%s value=%s %s' % (metric_name,server,host,tableName,inst_name, value,now_epoch*1000000)
I've tried to use the While loop, but that just stopped the code from exiting and not re-looping
What I want to achieve is to loop it infinitely post connection to weblogic
i.e. after this line
connect(wls_user,wls_pw,url)
and perhaps sleep for 5 seconds before re-running
Any and all help will be appreciated
Thanks
P
You can use this kind of condition for the loop :
mainLoop = 'true'
while mainLoop == 'true' :
and this for the pause between iterations :
java.lang.Thread.sleep(3 * 1000)
I want to get the total number of views of a serie of videos on Youtube about a concrete topic.
I have this code, but it doesn't work because I am not getting the correct data from the list "videos". How can I browse this list and get the total views of the complete list?
#!/usr/bin/python
import urllib2
from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.tools import argparser
# Set DEVELOPER_KEY to the API key value from the APIs & auth > Registered apps
# tab of
# https://cloud.google.com/console
# Please ensure that you have enabled the YouTube Data API for your project.
DEVELOPER_KEY = "AIzaSyB..........I"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
def youtube_search(options):
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
developerKey=DEVELOPER_KEY)
# Call the search.list method to retrieve results matching the specified
# query term.
search_response = youtube.search().list(
q=options.q,
part="id,snippet",
maxResults=options.max_results
).execute()
videos = []
channels = []
playlists = []
# Add each result to the appropriate list, and then display the lists of
# matching videos, channels, and playlists.
for search_result in search_response.get("items", []):
if search_result["id"]["kind"] == "youtube#video":
videos.append("%s (%s)" % (search_result["snippet"]["title"],
search_result["id"]["videoId"]))
elif search_result["id"]["kind"] == "youtube#channel":
channels.append("%s (%s)" % (search_result["snippet"]["title"],
search_result["id"]["channelId"]))
elif search_result["id"]["kind"] == "youtube#playlist":
playlists.append("%s (%s)" % (search_result["snippet"]["title"],
search_result["id"]["playlistId"]))
print "Videos:\n", "\n".join(videos), "\n"
###################################
###################################
###HERE IS THE PROBLEM##########
###################################
for video in videos:
source = video
response = urllib2.urlopen(source)
html = response.read() #Done, you have the whole HTML file in a gigantic string
wordBreak = ['<','>']
html = list(html)
i = 0
while i < len(html):
if html[i] in wordBreak:
html[i] = ' '
i += 1
#The block above is just to make the html.split() easier.
html = ''.join(html)
html = html.split()
dataSwitch = False
numOfViews = ''
for element in html:
if element == '/div':
dataSwitch = False
if dataSwitch:
numOfViews += str(element)
if element == 'class="watch-view-count"':
dataSwitch = True
print (numOfViews)
########################################
########################################
#######################################
print "Channels:\n", "\n".join(channels), "\n"
print "Playlists:\n", "\n".join(playlists), "\n"
if __name__ == "__main__":
argparser.add_argument("--q", help="Search term", default="la la land")
argparser.add_argument("--max-results", help="Max results", default=25)
args = argparser.parse_args()
try:
youtube_search(args)
except HttpError, e:
print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)
Code in FileB.py works fine, but fail at one point when I am calling it from other file. I found that it's stops working when calling function "search_response" in the code below.
FileA.py
from FileB import *
search = "stackoverflow"
searchF(search)
FileB.py
from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.tools import argparser
search = "Google"
def searchF(search):
DEVELOPER_KEY = "REPLACE_ME"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
print "searchF started" - works
def youtube_search(options):
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
developerKey=DEVELOPER_KEY)
search_response = youtube.search().list(
q=options.q,
type="video",
part="id,snippet",
maxResults=options.max_results
).execute()
print "search_response executed" doesn't work
search_videos = []
for search_result in search_response.get("items", []):
search_videos.append(search_result["id"]["videoId"])
video_ids = ",".join(search_videos)
video_response = youtube.videos().list(
id=video_ids,
part='snippet, contentDetails'
).execute()
videos = []
for video_result in video_response.get("items", []):
videos.append("%s, (%s,%s)" % (video_result["snippet"]["title"],
video_result["contentDetails"],
video_result["contentDetails"]))
find = "licensedContent': True"
result = ', '.join(videos)
print find in result
if __name__ == "__main__":
print "__main__"
argparser.add_argument("--q", help="Search term", default=search)
argparser.add_argument("--max-results", help="Max results", default=25)
args = argparser.parse_args()
try:
youtube_search(args)
except HttpError, e:
print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)
I changed if __name__ == "__main__": to if 1: and it's kinda works. But I am assume it's a horrible solution.
According to the official documentation, I have to authenticate with OAuth1 in order to use their API. I can't seem to get all the necessary part to authenticate. Here's my code so far:
#!usr/bin/env python
#encoding=utf-8
import requests
import sys, getopt
import urllib2
LOCALE = 'zh-CN'
LANGUAGE = 'zh-CN'
def doRequest(imageUrl):
reqUrlA = 'https://api.cloudsightapi.com/image_requests/' # get token
reqUrlB = 'https://api.cloudsightapi.com/image_responses/' # get the final recognition result with token
headers = {
'Authorization' : 'CloudSight INSERT API KEY',
'Host' : 'api.cloudsightapi.com',
'Origin:' : 'https://cloudsightapi.com'
}
postData = {
'image_request[remote_image_url]' : imageUrl,
'image_request[locale]': LOCALE,
'image_request[language]': LANGUAGE
}
try:
response = requests.post(reqUrlA, headers=headers, data=postData)
except Exception, e:
print 'Error: connection error, please check your Internet and confirm the image url'
sys.exit()
if "error" in response.json():
# print "Error: %s" % response.json()["error"]
print "无法识别图片:请检查图片的连接是否合法"
print
sys.exit()
else:
token = response.json()['token']
# you may get some response with status 'not completed' for about some times before getting the final result
reqTimes = 20
isNotified = True
while reqTimes > 0:
try:
response = requests.get(reqUrlB + token, headers=headers)
except Exception, e:
print 'Error: connection error, please check your Internet and confirm the image url'
sys.exit()
status = response.json()['status']
if status == 'completed':
print 'RESULT: '
print '\timage url:', imageUrl
print '\timage name:', response.json()['name']
print
# return response.json()['name']
break
elif status == 'not completed':
if isNotified == True:
print 'recognition in progress'
isNotified = False
reqTimes -= 1
def usage():
print '''
usage:
cloudSightAPI ImageURL fdgdfgrtrgd
type `cloudSightAPI -h` to get help
'''
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], 'h')
for op, value in opts:
if op == '-h':
usage()
sys.exit()
if len(args) == 0:
usage()
sys.exit()
except getopt.GetoptError as e:
print 'Error: using invalid parameter -%s' % e.opt
usage()
sys.exit()
imageUrl = sys.argv[1]
doRequest(imageUrl)
if __name__ == '__main__':
main()
doRequest ("INSERT IMAGE URL")
According to the official documentation, I need to get the oauth_nonce and oauth_token in order to create a signature that would allow me use the api. How would I go about getting these details? Is there a oauth creator available?
Ended up doing it in Ruby. It was a lot easier!
https://github.com/cloudsight/cloudsight-ruby
Hope it will work sorry for the indentation dont forget to import requests
def quest(imageUrl):
fa=''
LOCALE = 'en-US'
LANGUAGE = 'en-US'
reqUrlB = 'https://api.cloudsightapi.com/image_responses/'
header = {
'Authorization' : 'CloudSight API_KEY_HERE',
'Host' : 'api.cloudsightapi.com',
'Origin:' : 'https://cloudsightapi.com'
}
footer = postData = {
'image_request[remote_image_url]' : imageUrl,
'image_request[locale]': LOCALE,
'image_request[language]': LANGUAGE
}
r = requests.post("https://api.cloudsightapi.com/image_requests",headers=header,data=footer)
print r
token = r.json()['token']
status='lol'
while True:
response = requests.get(reqUrlB + token, headers=header)
status = response.json()['status']
if status=='completed':
name=response.json()['name']
fa=name
break
return fa