sys.argv list index out of range error [duplicate] - python

This question already has answers here:
What does "sys.argv[1]" mean? (What is sys.argv, and where does it come from?)
(9 answers)
Closed 7 months ago.
I am trying to run this script that grabs rss feeds on the environment "Thonny" but I just keep receiving this error of "IndexError: List index out of range"
Traceback (most recent call last):
File "C:\Users\uri\rssfeedfour.py", line 11, in <module>
url = sys.argv[1]
IndexError: list index out of range
How do I resolve this to keep from getting this error over and over again. Im not sure how to solve this as I am a beginner. Do I need to define it, if so how? or could I take it out and go a different direction? Here is the code.
import feedparser
import time
from subprocess import check_output
import sys
#feed_name = 'TRIBUNE'
#url = 'http://chicagotribune.feedsportal.com/c/34253/f/622872/index.rss'
feed_name = sys.argv[1]
url = sys.argv[2]
db = 'http://feeds.feedburner.com/TheHackersNews'
limit = 12 * 3600 * 1000
current_time_millis = lambda: int(round(time.time() * 1000))
current_timestamp = current_time_millis()
def post_is_in_db(title):
with open(db, 'r') as database:
for line in database:
if title in line:
return True
return False
def post_is_in_db_with_old_timestamp(title):
with open(db, 'r') as database:
for line in database:
if title in line:
ts_as_string = line.split('|', 1)[1]
ts = long(ts_as_string)
if current_timestamp - ts > limit:
return True
return False
#
# get the feed data from the url
#
feed = feedparser.parse(url)
#
# figure out which posts to print
#
posts_to_print = []
posts_to_skip = []
for post in feed.entries:
# if post is already in the database, skip it
# TODO check the time
title = post.title
if post_is_in_db_with_old_timestamp(title):
posts_to_skip.append(title)
else:
posts_to_print.append(title)
#
# add all the posts we're going to print to the database with the current timestamp
# (but only if they're not already in there)
#
f = open(db, 'a')
for title in posts_to_print:
if not post_is_in_db(title):
f.write(title + "|" + str(current_timestamp) + "\n")
f.close
#
# output all of the new posts
#
count = 1
blockcount = 1
for title in posts_to_print:
if count % 5 == 1:
print("\n" + time.strftime("%a, %b %d %I:%M %p") + ' ((( ' + feed_name + ' - ' + str(blockcount) + ' )))')
print("-----------------------------------------\n")
blockcount += 1
print(title + "\n")
count += 1

sys.argv is a list in Python, which contains the command-line arguments passed to the script. sys.argv[0] contains the name of the script, sys.argv[1] contains the first argument and so on.
To prevent this error, you need to give command line arguments when starting the script. For example, you can start this script without any errors by
python rssfeedfour.py TRIBUNE http://chicagotribune.feedsportal.com/c/34253/f/622872/index.rss
You can also modify the script so that it works using the default arguments if you don't provide any command line arguments.
try:
feed_name = sys.argv[1]
except IndexError:
feed_name = 'TRIBUNE'
try:
url = sys.argv[2]
except IndexError:
url = 'http://chicagotribune.feedsportal.com/c/34253/f/622872/index.rss'
You can learn more about handling errors here.
Although it is much more convenient to use argparse library.

Related

Python JSON KeyError for non missing key

For some unknown reason, when I run the below script, the following error is returned along with the desired output. For some reason, this was working without any errors last night. The API output does change every minute but I wouldn't expect a KeyError to be returned. I can't simply pinpoint where this error is coming from:
[u'#AAPL 151204C00128000'] <----- What I want to see printed
Traceback (most recent call last):
File "Options_testing.py", line 60, in <module>
main()
File "Options_testing.py", line 56, in main
if quotes[x]['greeks']['impvol'] > 0: #change this for different greek vals
KeyError: 'impvol'
Here is a little snippet of data:
{"results":{"optionchain":{"expire":"all","excode":"oprac","equityinfo":{"longname":"Apple Inc","shortname":"AAPL"},"money":"at","callput":"all","key":{"symbol":["AAPL"],"exLgName":"Nasdaq Global Select","exShName":"NGS","exchange":"NGS"},"symbolstring":"AAPL"},"quote":[{"delaymin":15,"contract":{"strike":108,"openinterest":3516,"contracthigh":6.16,"contractlow":0.02,"callput":"Put","type":"WEEK","expirydate":"2015-11-13"},"root":{"equityinfo":{"longname":"Apple Inc","shortname":"AAPL"},"key":{"symbol":["AAPL"],"exLgName":"Nasdaq Global Select","exShName":"NGS","exchange":"NGS"}},"greeks":{"vega":0,"theta":0,"gamma":0,"delta":0,"impvol":0,"rho":0}
Code:
#Options screener using Quotemedia's API
import json
import requests
#import csv
def main():
url_auth= "https://app.quotemedia.com/user/g/authenticate/v0/102368/XXXXX/XXXXX"
decode_auth = requests.get(url_auth)
#print decode_auth.json()
#print(type(decode_auth))
auth_data = json.dumps(decode_auth.json())
#Parse decode_auth, grab 'sid'
sid_parsed = json.loads(auth_data)["sid"]
#print sid_parsed
#Pass sid into qm_options
#Construct URL
symbol = 'AAPL'
SID = sid_parsed
url_raw = 'http://app.quotemedia.com/data/getOptionQuotes.json?webmasterId=102368'
url_data = url_raw + '&symbol=' + symbol + '&greeks=true' + '&SID=' + SID
#print url_data
response = requests.get(url_data)
#print response
data = json.dumps(response.json())
#print data
#save data to a file
with open('AAPL_20151118.json', 'w') as outfile:
json.dumps (data, outfile)
#Turn into json object
obj = json.loads(data)
#slim the object
quotes = obj['results']['quote']
#find the number of options contracts
range_count = obj['results']['symbolcount']
#print all contracts with an implied vol > 0
for x in range(0,range_count):
if quotes[x]['greeks']['impvol'] > 0: #change this for different greek vals
print quotes[x]['key']['symbol']
if __name__ == '__main__':
main()
I can provide sample data if necessary.
for x in range(0,range_count):
if quotes[x]['greeks']['impvol'] > 0: #change this for different greek vals
print quotes[x]['key']['symbol']
This loops throug multiple quotes, so maybe there is even just one that does not have an impvol property.
You should add some error handling, so you find out when that happens. Something like this:
# no need to iterate over indexes, just iterate over the items
for quote in quotes:
if 'greeks' not in quote:
print('Quote does not contain `greeks`:', quote)
elif 'impvol' not in quote['greeks']:
print('Quote does not contain `impvol`:', quote)
elif quote['greeks']['impvol'] > 0:
print quote['key']['symbol']

i have this code. It is supposed to classify the segment files with the ratings in sys.argv[5].But it keeps having error

I have the following code tha uses FFmpeg . It has 5 argv and it takes in filename,video,segment size, start time, end time, ratings. thats supposed to let me classify segments many times with my ratings "PG G M18..." but there's this error,
"File "C:\c.py",line 92, in <module> os.rename<filename + str(x), filename + str(x) + classification)
WindowsError: [Error2] The system cannot find the file specified.
Ive tried to edit many times but this error still persists. Anyone have any idea what could this error mean and anyway to solve it?
import sys
import subprocess
import os
#change hh:mm:ss to seconds:
def getSeconds(sec):
l = sec.split(':')
return int(l[0])* 3600 + int(l[1])* 60 + float(l[2])
def get_total_time(filename):
proc = subprocess.Popen(["ffmpeg", "-i", filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
lines = proc.communicate()[1]
target = [line for line in lines.split('\n') if 'Duration:' in line][0]
time = target.split('Duration: ')[-1].split(',', 1)[0]
return time
#check command line arguments
if len(sys.argv) < 5:
print "Error: not enough arguments"
sys.exit()
#save filename to file_name
file_name = sys.argv[1]
if not file_name.endswith('mpg'):
print 'Error! File extension not supported'
sys.exit()
# save a size of chunk in chunk
segsize = int(sys.argv[2])
chunk = (segsize * 1024)
# get time of starting censorship in seconds
start_censorship = getSeconds(sys.argv[3])
# get time of ending censorship in seconds
end_censorship = getSeconds(sys.argv[4])
classification = sys.argv[5]
if classification not in ['P','PG','G','NC16','M18','R21']:
print "Error: invalid classification"
sys.exit()
#initialize variable for extension
file_ext = ''
# if extension exists then save it into file_ext
if '.' in file_name:
# split file_name on two parts from right
file_ext = sys.argv[1].split('.')[-1]
# file_name without extension
filename = '.'.join(file_name.split('.')[:-1])
# total_time of file in seconds
total_time = getSeconds(get_total_time(file_name))
print total_time
#open file
in_file = open(file_name,'rb')
#read first chunks
s = in_file.read(chunk)
in_file.seek(0, 2)
file_size = in_file.tell()
chunks = (file_size / chunk) + 1
chunk_time = total_time/ file_size * chunk
#close input file
in_file.close()
#loop for each chunk
for x in range(0, chunks):
# starting time of current chunk
t1 = chunk_time*x
# ending time of current chunk
t2 = chunk_time*(x+1)
if t2 < start_censorship or t1 > end_censorship:
pass
else:
if os.path.exists(filename + str(x) + 'x'):
os.rename(filename + str(x) + 'x', filename + str(x))
os.rename(filename + str(x), filename + str(x) + classification)
#read next bytes
You're not checking whether filename + str(x) exists before you try to rename it. Either check first, or put it in a try block and catch OSError (which WindowsError subclasses). Either that or you're missing an indent on the second rename.
In that last for loop it looks like you are possibly renaming multiple files -- where do the original files come from?
Or, asked another way, for every chunk that is not censored you are renaming a file -- is that really what you want?

NameError: Name is 'feed' not defined

I've got this these two code snippets from a webinar(slide 7 and 8 respectively)
The first one finds a desired URL i have tested it and it works:
def SECdownload(year, month):
import os
from urllib.request import urlopen
root = None
feedFile = None
feedData = None
good_read = False
itemIndex = 0
edgarFilingsFeed = 'http://www.sec.gov/Archives/edgar/monthly/xbrlrss-' + str(year) + '-' + str(month).zfill(2) + '.xml'
print( edgarFilingsFeed )
if not os.path.exists( "sec/" + str(year) ):
os.makedirs( "sec/" + str(year) )
if not os.path.exists( "sec/" + str(year) + '/' + str(month).zfill(2) ):
os.makedirs( "sec/" + str(year) + '/' + str(month).zfill(2) )
target_dir = "sec/" + str(year) + '/' + str(month).zfill(2) + '/'
try:
feedFile = urlopen( edgarFilingsFeed ) # urlopen will not work (python 3) needs from urllib.request import urlopen
try:
feedData = feedFile.read()
good_read = True
finally:
feedFile.close()
except HTTPError as e:
print( "HTTP Error:", e.code )
and the second one is supposed to parse the RSS Feed to find ZIP filenames:
#Downloading the data - parsing the RSS feed to extract the ZIP file enclosure filename
# Process RSS feed and walk through all items contained
for item in feed.entries:
print( item[ "summary" ], item[ "title" ], item[ "published" ] )
try:
# Identify ZIP file enclosure, if available
enclosures = [ l for l in item[ "links" ] if l[ "rel" ] == "enclosure" ]
if ( len( enclosures ) > 0 ):
# ZIP file enclosure exists, so we can just download the ZIP file
enclosure = enclosures[0]
sourceurl = enclosure[ "href" ]
cik = item[ "edgar_ciknumber" ]
targetfname = target_dir+cik +' - ' +sourceurl.split('/')[-1]
retry_counter = 3
while retry_counter > 0:
good_read = downloadfile( sourceurl, targetfname )
if good_read:
break
else:
print( "Retrying:", retry_counter )
retry_counter -= 1
except:
pass
However whenever i try to run the second module i get:
Traceback (most recent call last):
File "E:\Py_env\df2.py", line 3, in <module>
for item in feed.entries:
NameError: name 'feed' is not defined
What am i not understanding in the webinar right? And if i must define feed i have literally no idea how to do it while keeping a logical linkage to the data the first code snippet provides!
(On the sidenote this is a webinar from a reputable software vendor so how it is possible to have mistakes(?) there's something i am doing wrong...)
The problem is like the error message implies: you haven't defined any variable named feed that's in scope when the second snippet executes. Either their code omitted something, or you missed a part that was crucial.
That aside, the formatting on this code is really dodgy and not at all idiomatic Python. You're probably better off looking for a new snippet.
Migrated from a comment.
As your output is showing you and you noticed, feed wasn't defined nor was it shown to you in the slides. It looks as though the slide share is expecting you to make a logical jump, and they do point out in the right column that feedparser is an easy way to parse... feeds (RSS Feeds).
So they are expecting that you can adapt the feedData you found in your first function and can dump it into a method from feedparser.
As you can see in various examples online (such as the docs), this can be done from the string you got:
>>> import feedparser
>>> rawdata = """<rss version="2.0">
<channel>
<title>Sample Feed</title>
</channel>
</rss>"""
>>> d = feedparser.parse(rawdata)
>>> d['feed']['title']
u'Sample Feed'
Using that, I bet you can see where it goes (rather than me telling you).
as #PatrickCollins pointed out, this is kindof crappy examples for python, but that shouldn't get in your way as you're learning it.

Python script that performs line matching over stale files generates inconsistent output

I created a python script to parse mail (exim) logfiles and execute pattern matching in order to get a top 100 list for most send domains on my smtp servers.
However, everytime I execute the script I get a different count.
These are stale logfiles, and I cannot find a functional flaw in my code.
Example output:
1:
70353 gmail.com
68337 hotmail.com
53657 yahoo.com
2:
70020 gmail.com
67741 hotmail.com
54397 yahoo.com
3:
70191 gmail.com
67917 hotmail.com
54438 yahoo.com
Code:
#!/usr/bin/env python
import os
import datetime
import re
from collections import defaultdict
class DomainCounter(object):
def __init__(self):
self.base_path = '/opt/mail_log'
self.tmp = []
self.date = datetime.date.today() - datetime.timedelta(days=14)
self.file_out = '/var/tmp/parsed_exim_files-' + str(self.date.strftime('%Y%m%d')) + '.decompressed'
def parse_log_files(self):
sub_dir = os.listdir(self.base_path)
for directory in sub_dir:
if re.search('smtp\d+', directory):
fileInput = self.base_path + '/' + directory + '/maillog-' + str(self.date.strftime('%Y%m%d')) + '.bz2'
if not os.path.isfile(self.file_out):
os.popen('touch ' + self.file_out)
proccessFiles = os.popen('/bin/bunzip2 -cd ' + fileInput + ' > ' + self.file_out)
accessFileHandle = open(self.file_out, 'r')
readFileHandle = accessFileHandle.readlines()
print "Proccessing %s." % fileInput
for line in readFileHandle:
if '<=' in line and ' for ' in line and '<>' not in line:
distinctLine = line.split(' for ')
recipientAddresses = distinctLine[1].strip()
recipientAddressList = recipientAddresses.strip().split(' ')
if len(recipientAddressList) > 1:
for emailaddress in recipientAddressList:
# Since syslog messages are transmitted over UDP some messages are dropped and needs to be filtered out.
if '#' in emailaddress:
(login, domein) = emailaddress.split("#")
self.tmp.append(domein)
continue
else:
try:
(login, domein) = recipientAddressList[0].split("#")
self.tmp.append(domein)
except Exception as e:
print e, '<<No valid email address found, skipping line>>'
accessFileHandle.close()
os.unlink(self.file_out)
return self.tmp
if __name__ == '__main__':
domainCounter = DomainCounter()
result = domainCounter.parse_log_files()
domainCounts = defaultdict(int)
top = 100
for domain in result:
domainCounts[domain] += 1
sortedDict = dict(sorted(domainCounts.items(), key=lambda x: x[1], reverse=True)[:int(top)])
for w in sorted(sortedDict, key=sortedDict.get, reverse=True):
print '%-3s %s' % (sortedDict[w], w)
proccessFiles = os.popen('/bin/bunzip2 -cd ' + fileInput + ' > ' + self.file_out)
This line is non-blocking. Therefore it will start the command, but the few following lines are already reading the file. This is basically a concurrency issue. Try to wait for the command to complete before reading the file.
Also see:
Python popen command. Wait until the command is finished since os.popen is deprecated since python-2.6 (depending on which version you are using).
Sidenote - The same happens to the line below. The file may, or may not, exist after executing the following line:
os.popen('touch ' + self.file_out)

Python: Creating a file based on an array of strings

I'm trying to write a program that will go to a website and download all of the songs they have posted. Right now I'm having trouble creating new file names for each of the songs I download. I initially get all of the file names and the locations of the songs (html). However, when I try to create new files for the songs to be put in, I get an error saying:
IOError: [Errno 22] invalid mode ('w') or filename
I have tried using different modes like "w+", "a", and, "a+" to see if these would solve the issue but so far I keep getting the error message. I have also tried "% name"-ing the string but that has not worked either. My code is following, any help would be appreciated.
import urllib
import urllib2
def earmilk():
SongList = []
SongStrings = []
SongNames = []
earmilk = urllib.urlopen("http://www.earmilk.com/category/pop")
reader = earmilk.read()
#gets the position of the playlist
PlaylistPos = reader.find("var newPlaylistTracks = ")
#finds the number of songs in the playlist
NumberSongs = reader[reader.find("var newPlaylistIds = " ): PlaylistPos].count(",") + 1
initPos = PlaylistPos
#goes though the playlist and records the html address and name of the song
for song in range(0, NumberSongs):
songPos = reader[initPos:].find("http:") + initPos
namePos = reader[songPos:].find("name") + songPos
namePos += reader[namePos:].find(">")
nameEndPos = reader[namePos:].find("<") + namePos
SongStrings.append(reader[songPos: reader[songPos:].find('"') + songPos])
SongNames.append(reader[namePos + 1: nameEndPos])
#initPos += len(SongStrings[song])
initPos = nameEndPos
for correction in range(0, NumberSongs):
SongStrings[correction] = SongStrings[correction].replace('\\/', "/")
#downloading songs
#for download in range(0, NumberSongs):
#print reader.find("So F*")
#x= SongNames[0]
songDL = open(SongNames[0].formant(name), "w+")
songDL.write(urllib.urlretrieve(SongStrings[0], SongNames[0] + ".mp3"))
songDL.close()
print SongStrings
for name in range(0, NumberSongs):
print SongNames[name] + "\n"
earmilk.close()
You need to use filename = '%s' % (SongNames[0],) to construct the name but you also need to make sure that your file name is a valid one - I don't know of any songs called *.* but I wouldn't like to chance it so something like:
filename = ''.join([a.isalnum() and a or '_' for a in SongNames[0]])

Categories