Trying to run through each day and save as a separate CSV file with pendulum. Right now I am only able to get the first day of the period. Not sure if I need outfile or not but I am assuming I do since I want each separate CSV file to write, close, and start a new one.
import csv
import requests
import datetime
import pendulum
start = pendulum.datetime(2018, 1, 1)
end = pendulum.today()
period = pendulum.period(start, end)
for dt in period.range('days'):
dt.format('YYYY-MM-DD')
break
the_date = dt.format('YYYY-MM-DD')
outfile = open('TEST_PENDULUM_' + str(the_date) + '.csv',"w",newline='')
writer = csv.writer(outfile)
writer.writerow(["Date"])
req = requests.get('https://www.fantasylabs.com/api/lines/4/' + str(the_date) + '/startinggoalies')
data = req.json()['GoalieMatchups']
for teams in data:
HomeTeam = teams['Properties']['EventDate']
print(HomeTeam)
writer.writerow([HomeTeam])
outfile.close()
You didn't write iterating logic on your codes.
import csv
import requests
import datetime
import pendulum
start = pendulum.datetime(2018, 1, 1)
end = pendulum.today()
period = pendulum.period(start, end)
for dt in period.range('days'):
the_date = dt.format('YYYY-MM-DD')
outfile = open('TEST_PENDULUM_' + str(the_date) + '.csv',"w",newline='')
writer = csv.writer(outfile)
writer.writerow(["Date"])
req = requests.get('https://www.fantasylabs.com/api/lines/4/' + str(the_date) + '/startinggoalies')
data = req.json()['GoalieMatchups']
for teams in data:
HomeTeam = teams['Properties']['EventDate']
print(HomeTeam)
writer.writerow([HomeTeam])
outfile.close()
Related
please see the pandas based Patten scanner, here i am using csv as data source and loading the same in to data.
since data is loading from csv file, i have to reload/rerun the script every 5 min to read the updated csv file hence repeating the plot every 5min.
is there any way to use df.update to avoid reloading of the script and prevent the reloading of plot again and again.
import pandas as pd
import numpy as np
from scipy.signal import argrelextrema
import matplotlib.pyplot as plt
from harmonic_functions import *
import uuid
from csv import DictReader
data = pd.read_csv('temp.csv')
data.time = pd.to_datetime(data.time,format='%d.%m.%Y %H:%M:%S.%f')
data.index = data['time']
# data = data.drop_duplicates(keep=False)
price = data.close.copy()
err_allowed = 10.0/100
pnl = []
trade_dates=[]
correct_pats=0
pats=0
# plt.ion()
for i in range (100,len(price)):
current_idx,current_pat,start,end = peak_detect(price.values[:i],order=7)
XA = current_pat[1] - current_pat[0]
AB = current_pat[2] - current_pat[1]
BC = current_pat[3] - current_pat[2]
CD = current_pat[4] - current_pat[3]
moves = [XA,AB,BC,CD]
gart = is_gartley(moves,err_allowed)
butt = is_butterfly(moves,err_allowed)
bat = is_bat(moves,err_allowed)
crab = is_crab(moves,err_allowed)
shark = is_shark(moves,err_allowed)
trio = is_trio(moves,err_allowed)
cyph = is_cyph(moves,err_allowed)
three_dives = is_3dives(moves, err_allowed)
fivezero = is_50(moves, err_allowed)
altbat = is_altbat(moves, err_allowed)
deepcrab = is_deepcrab(moves, err_allowed)
dragon = is_dragon(moves, err_allowed)
snorm = is_snorm(moves, err_allowed)
harmonics = np.array([gart,butt,bat,crab,shark,trio,cyph,three_dives,fivezero,altbat,deepcrab,dragon,snorm])
labels = ['Garterly','Butterfly','Bat','Crab','Shark','Trio','Cypher','3Dives','5Zero','AltBat','DeepCrab','Dragon','Snorm']
if np.any(harmonics == 1) or np.any(harmonics == -1):
for j in range (0,len(harmonics)):
if harmonics[j] == 1 or harmonics[j]==-1:
pats+=1
sense = 'Bearish ' if harmonics[j]==-1 else 'Bullish '
label = sense + labels[j] + ' found'
print(label)
print(price.values[start])
plt.title(label)
plt.plot(np.arange(start,i+5),price.values[start:i+5])
plt.scatter(current_idx,current_pat,c='r')
filename = str(uuid.uuid1())[:8]
print(current_pat)
print(current_idx)
# with open('temp.csv', mode='r') as csv_file:
# file = DictReader(csv_file, delimiter=',')
# close = str(current_pat[4])
# print(current_pat)
# rows = [row for row in file if row['close'] in close]
# closetime = rows[-1]['ID']
# print(closetime)
write1 = str(current_idx)
write2 = str(current_pat)
write = write1 + ',' + write2
print(write)
with open("datadb", "r+") as file:
for line in file:
if write in line:
break
else: # not found, we are at the eof
file.write(f"{write}\n") # append missing data
print(filename)
plt.savefig(filename)
plt.close(filename)
# plt.show()
plt.clf()
The following code is appending every iteration to the next. Every file should be about 50 players but filename team 1 is 50 and filename team 2 is 100(team1 + team2) and so on. How can I create the individual file with only the 1 iteration of year+team.
from nfl_fun import make_soup
import os
from itertools import islice
import csv
from datetime import datetime
years = [2019,2018,2017,2016,2015]
year = datetime.now().year
if year not in years:
years.append(year)
linkname = ""
with open("teamlink.csv") as tl:
for row in islice(csv.reader(tl), 1, None):
for season in years:
rowlink = f"https://www.footballdb.com/{row[0]}/roster/{season}"
soup = make_soup(rowlink)
try:
for boot in soup.findAll('b'):
for link in boot.findAll('a'):
if link.has_attr('href'):
linkname = linkname + "\n" + (link.attrs['href'])[1:]
userfile = f"{rowlink[37:-12]}-{season}"
header="Links"
file = open(os.path.expanduser(f"{userfile}.csv"), "wb")
file.write(bytes(header, encoding="ascii", errors='ignore'))
file.write(bytes(linkname, encoding="ascii",errors='ignore'))
file.close()
except:
continue
You need to reset linkname every time you change team, you can just add
linkname = ""
after file.close() or something similar.
Having an issue appending gamepk to a new list to then set and remove duplicates. My first issue is just making a new list. I can worry about the set later.
import csv
import requests
import datetime
from pprint import pprint
import pendulum
start = pendulum.datetime(2016, 4, 3)
end = pendulum.datetime(2016, 10, 2)
period = pendulum.period(start, end)
for dt in period.range('days'):
day = dt.format('DD')
month = dt.format('MM')
year = dt.format('YYYY')
the_date = str(month) + "/" + str(day) + "/" + str(year)
try:
req = requests.get('http://gd.mlb.com/components/game/mlb/year_' + str(year) + '/month_' + str(month) + '/day_' + str(day) + '/miniscoreboard.json') #
get_gameIds = req.json()['data']['games']['game']
for gameId in get_gameIds:
gamepk = gameId['game_pk']
new_gamepk = []
for pk in gamepk:
new_gamepk.append(pk)
print(new_gamepk)
Printing gamepk after the second to last for loop results in a list like this:
446877
452866
446911
446873
446870
446875
446872
446876
446867
446874
446879
446871
I feel like its there that I should store the gamepk results as a list.. but im not sure. What I am trying to do is then append all of those gamepks to a new list which I can then call set on and remove the duplicates. I understand the concept of append but clearly cant seem to get it to work properly. Any help and brief explanation goes a long way!
The idea in general is to:
Initialize an empty collection (list, set) outside of the outermost loop
Once you've found the item you want, add it to the collection.
Using a list
import csv
import requests
import datetime
from pprint import pprint
import pendulum
start = pendulum.datetime(2016, 4, 3)
end = pendulum.datetime(2016, 10, 2)
period = pendulum.period(start, end)
gamepks = []
for dt in period.range('days'):
day = dt.format('DD')
month = dt.format('MM')
year = dt.format('YYYY')
the_date = str(month) + "/" + str(day) + "/" + str(year)
try:
req = requests.get('http://gd.mlb.com/components/game/mlb/year_' + str(year) + '/month_' + str(month) + '/day_' + str(day) + '/miniscoreboard.json') #
get_gameIds = req.json()['data']['games']['game']
for gameId in get_gameIds:
gamepk = gameId['game_pk']
gamepks.append(gamepk)
Using a set
import csv
import requests
import datetime
from pprint import pprint
import pendulum
start = pendulum.datetime(2016, 4, 3)
end = pendulum.datetime(2016, 10, 2)
period = pendulum.period(start, end)
gamepks = set()
for dt in period.range('days'):
day = dt.format('DD')
month = dt.format('MM')
year = dt.format('YYYY')
the_date = str(month) + "/" + str(day) + "/" + str(year)
try:
req = requests.get('http://gd.mlb.com/components/game/mlb/year_' + str(year) + '/month_' + str(month) + '/day_' + str(day) + '/miniscoreboard.json') #
get_gameIds = req.json()['data']['games']['game']
for gameId in get_gameIds:
gamepk = gameId['game_pk']
gamepks.add(gamepk)
It's important to understand scoping. Variables declared inside of inner scopes (e.g. inside of for loops) "disappear" once the loop exits. That may help guide you regarding where you should initialize the variables (in this case the list/set) that you want to persist after the loop ends.
I'm getting a list index out of range error, and not sure why. My code is a webscraper to collect temperature data from a website. All worked fine for months, until recently.
I have a number of functions shown below as reference. The important one is getDailyAve(), which is where I'm getting the exception thrown.
Any thoughts or advice is appreciated.
import sys
import urllib
from bs4 import BeautifulSoup
from urllib2 import urlopen, URLError
import webbrowser
import time
from collections import Counter
import numpy as np
import re
import csv
import datetime
from datetime import timedelta
DATE_FORMAT = '%Y/%m/%d'
def daterange(start, end):
def convert(date):
try:
date = datetime.datetime.strptime(date, DATE_FORMAT)
return date.date()
except TypeError:
return date
def get_date(n):
return datetime.datetime.strftime(convert(start) + timedelta(days=n), DATE_FORMAT)
days = (convert(end) - convert(start)).days
if days <= 0:
raise ValueError('The start date must be before the end date.')
for n in range(0, days):
yield get_date(n)
class SiteLocation:
"""class defining mine location parameters to lookup on weather search"""
def __init__(self, city, state, zip, code):
self.city = city
self.state = state
self.zip = zip
self.code = code
def getDailyAve(url):
url = urllib.urlopen(url)
soup = BeautifulSoup(url.read(), 'lxml')
form = soup.find("form",{"id": "archivedate"})
table = form.find_next_sibling("table")
rows = table.select("tr")[1:]
time=[]
temp=[]
minutes=[]
# handle no data case
if soup.find(text="Archive data not available for this date."):
print("Data not available, URL: '%s'" % url)
return None
# capture time and temps
for row in rows:
data = [td.text for td in row.find_all("td")]
match = re.search(r"[+-]?(?<!\.)\b[0-9]+\b(?!\.[0-9])",data[2])
if match:
temp.append(match.group())
time.append(data[0])
minutes.append(data[0][-4:-2])
common = Counter(minutes).most_common()[0][0]
finalTimes = []
finalTemps = []
for i in range(0,len(time)):
if minutes[i] == common:
finalTimes.append(time[i])
finalTemps.append(int(temp[i]))
dailyAve = sum(finalTemps) / float(len(finalTimes))
return dailyAve
def writeToCsv(list1, list2, list3, list4, list5, list6, list7, list8):
with open('results.csv', 'wb') as csvfile:
results = csv.writer(csvfile, delimiter=',')
results.writerow(['T-SJ', 'T- RB', 'T-DS', 'T-JW', 'T-GB', 'D', 'M', 'Y'])
for idx in range(0,len(list1)):
results.writerow([str(list1[idx]), str(list2[idx]), str(list3[idx]), str(list4[idx]), str(list5[idx]), str(list6[idx]), str(list7[idx]), str(list8[idx])])
def buildURL(location, day, month, year):
if day < 10:
strDay = '0'+str(day)
else:
strDay = str(day)
baseURL = "http://www.weatherforyou.com/reports/index.php?forecast=pass&pass=archive&zipcode=" + location.zip + "&pands=" + location.city + "%2" + "C" + location.state + "&place=" + location.city + "&state=" + location.state + "&icao=" + location.code + "&country=us&month=" + str(month) + "&day=" + strDay + "&year=" + str(year) + "&dosubmit=Go"
return baseURL
def main():
loc1 = SiteLocation('Farmington','NM','87401','KFMN')
loc2 = SiteLocation('Whitesville','WV','25209','KBKW')
loc3 = SiteLocation('Rangely','CO','81648','KVEL')
loc4 = SiteLocation('Brookwood','AL','35444','KTCL')
loc5 = SiteLocation('Princeton','IN','47670','KAJG')
start = '2016/08/31'
end = datetime.date.today()
dateRange = list(daterange(start, end))
listDailyAve1 = []
listDailyAve2 = []
listDailyAve3 = []
listDailyAve4 = []
listDailyAve5 = []
listDays = []
listMonths = []
listYears = []
for idx in range(0,len(dateRange)):
strDate = str(dateRange[idx]).split("/")
year = strDate[0]
month = strDate[1]
day = strDate[2]
url1 = buildURL(loc1, day, month, year)
url2 = buildURL(loc2, day, month, year)
url3 = buildURL(loc3, day, month, year)
url4 = buildURL(loc4, day, month, year)
url5 = buildURL(loc5, day, month, year)
dailyAve1 = getDailyAve(url1)
dailyAve2 = getDailyAve(url2)
dailyAve3 = getDailyAve(url3)
dailyAve4 = getDailyAve(url4)
dailyAve5 = getDailyAve(url5)
listDailyAve1.append(dailyAve1)
listDailyAve2.append(dailyAve2)
listDailyAve3.append(dailyAve3)
listDailyAve4.append(dailyAve4)
listDailyAve5.append(dailyAve5)
listDays.append(day)
listMonths.append(month)
listYears.append(year)
writeToCsv(listDailyAve1, listDailyAve2, listDailyAve3, listDailyAve4,listDailyAve5, listDays, listMonths, listYears)
if __name__ == '__main__':
status = main()
sys.exit(status)
Here is the exception thrown:
Traceback (most recent call last):
File ".\weatherScrape2.py", line 147, in <module>
status = main()
File ".\weatherScrape2.py", line 128, in main
dailyAve1 = getDailyAve(url1)
File ".\weatherScrape2.py", line 61, in getDailyAve
match = re.search(r"[+-]?(?<!\.)\b[0-9]+\b(?!\.[0-9])",data[2])
IndexError: list index out of range
First of all, you need to handle situations when there is no available data. Here is one way:
# handle "no data" case
if soup.find(text="Archive data not available for this date."):
print("Data not available, URL: '%s'." % url)
return None
Also, I think there is a problem in the logic of getting the rows. I'd do it this way:
form = soup.find("form", {"id": "archivedate"})
table = form.find_next_sibling("table")
rows = table.select("tr")[1:]
Here is a complete snippet that I'm executing (for a single URL):
import requests
from bs4 import BeautifulSoup
from collections import Counter
import re
def getDailyAve(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'lxml')
form = soup.find("form", {"id": "archivedate"})
table = form.find_next_sibling("table")
rows = table.select("tr")[1:]
time = []
temp = []
minutes = []
# handle no data case
if soup.find(text="Archive data not available for this date."):
print("Data not available, URL: '%s'" % url)
return None
# capture time and temps
for row in rows:
data = [td.text for td in row.find_all("td")]
match = re.search(r"[+-]?(?<!\.)\b[0-9]+\b(?!\.[0-9])", data[2])
if match:
temp.append(match.group())
time.append(data[0])
minutes.append(data[0][-4:-2])
common = Counter(minutes).most_common()[0][0]
finalTimes = []
finalTemps = []
for i in range(0, len(time)):
if minutes[i] == common:
finalTimes.append(time[i])
finalTemps.append(int(temp[i]))
dailyAve = sum(finalTemps) / float(len(finalTimes))
return dailyAve
print(getDailyAve("https://www.weatherforyou.com/reports/index.php?forecast=pass&pass=archive&zipcode=87401&pands=Farmington%2CNM&place=Farmington&state=NM&icao=KFMN&country=us&month=09&day=03&year=2016&dosubmit=Go"))
I have created a program that is able to pull monthly stock info from Yahoo Finance and print out a list of the information. I know my methodology for getting the info isn't the best yet, but the part I am stuck on is taking the stock info and writing it to a csv file. What I have so far rights each individual character into a csv file.
import requests
from urllib2 import urlopen
import csv
import pandas as pd
from pandas import DataFrame
import datetime
import pandas.io.data
YahooUrl = 'http://ichart.yahoo.com/table.csv?s='
start_month = 1 - 1
start_day = 1
start_year = 2010
end_month = 12 - 1
end_day = 31
end_year = 2014
Start_ApiMonth = '&a=%s' %(start_month)
Start_ApiDay = '&b=%s' %(start_day)
Start_ApiYear = '&c=%s' %(start_year)
End_ApiMonth = '&d=%s' %(end_month)
End_ApiDay = '&e=%s' %(end_day)
End_ApiYear = '&f=%s' %(end_year)
interval = 'm'
ApiInterval = '&g=%s' %(interval)
ApiStatic = '&ignore=.csv'
Ticker = raw_input("What is the ticker > ")
Website = urlopen(YahooUrl + Ticker + Start_ApiMonth + Start_ApiDay + Start_ApiYear + End_ApiMonth + End_ApiDay + End_ApiYear + ApiInterval + ApiStatic)
Info = Website.read()
output = open('output.csv','wb')
wr = csv.writer(output, dialect='excel')
for item in Info:
wr.writerow(item)
print Info
read() returns a string, hence Info is a string. Iteration over a string results in iterating over each of the characters that comprise the string:
>>> for c in 'abcd':
... print c
a
b
c
d
Therefore your code will write each character to the csv file on its own line.
The data that you are receiving is actually already in CSV format. Here are the first few lines for stock YHOO:
Date,Open,High,Low,Close,Volume,Adj Close
2014-12-01,51.43,51.68,48.290001,50.509998,16328900,50.509998
2014-11-03,46.049999,52.619999,45.740002,51.740002,25212400,51.740002
So it should simply be a matter of saving the file:
with open('{}.csv'.format(Ticker),'wb') as output:
output.write(Website.read())
This will write the data to a file named with the stock name followed by .csv extension, e.g. YHOO.csv
Update
The following code shows how to download and merge data for multiple stocks. The output file now needs an additional field to identify the stock - Stock ID. This is inserted as the first CSV field for the header and detail rows. The header is emitted once only.
import requests
yahoo_url = 'http://ichart.yahoo.com/table.csv'
params = {'a': 0, 'b': 1, 'c': 2010, 'd': 11, 'e': 31, 'f': 2014, 'g': 'm', 'ignore': '.csv'}
stock_ids = ['AAPL', 'GOOG', 'YHOO']
with open('output.csv', 'wb') as output:
header_written = False
for stock_id in stock_ids:
params['s'] = stock_id
r = requests.get(yahoo_url, params=params)
lines = r.iter_lines()
header = next(lines)
if not header_written:
output.write('Stock ID,{}\n'.format(header))
header_written = True
output.writelines('{},{}\n'.format(stock_id, line) for line in lines)