Submitting queries to gaia.aip.de seems to no longer work - python

So I wrote some code a month ago, and I've been consistently running/updating it. I uploaded my most recent one to GitHub and it works I know it works because I tested it over and over again before uploading. However, now I opened up the file nothing changed and submitting queries... NO LONGER WORKS, and by no longer works I mean that out of the 150 queries 2 succeed. I have the data from my most recent script and I know 104/150 work. Anyone know why this might be? My code is below
"""
Imports needed for the code.
"""
"""
Script to get and clean data
"""
import numpy as np
import pandas as pd
from itertools import chain
from astroquery.gaia import Gaia
from pynverse import inversefunc
from astropy.io import ascii
import wget
import requests
import matplotlib.pyplot as plt
import numpy as np
import math
import pandas as pd
from sklearn.metrics import r2_score
from scipy import stats
import sklearn.metrics as sm
defaults = [0] * 3#needed for ignoring values that don't exsist
data = []#array for storing data
def reject_outliers(data):#Outlier Rejection Function
m = 2
u = np.mean(data)
s = np.std(data)
filtered = [e for e in data if (u - 2 * s < e < u + 2 * s)]
return filtered
def isNaN(num):#Checking if it is NaN(Not a Number)
return num != num
def HMS2deg(ra='', dec=''):#Convert from form RA to Degree RA(Gaia Form)
RA, DEC, rs, ds = '', '', 1, 1
if ra:
H, M, S, *_ = [float(i) for i in chain(ra.split(), defaults)]
if str(H)[0] == '-':
rs, H = -1, abs(H)
deg = (H*15) + (M/4)
RA = '{0}'.format(deg*rs)
if ra and dec:
return (RA, DEC)
else:
return RA or DEC
def HMS2degDEC(dec='', ra=''):#Convert from form Dec to Degree Dec(Gaia Form)
RA, DEC, rs, ds = '', '', 1, 1
if dec:
D, M, S, *_ = [float(i) for i in chain(dec.split(), defaults)]
S = S[0] if S else 0
if str(D)[0] == '-':
ds, D = -1, abs(D)
deg = D + (M/60) + (S/3600)
DEC = '{0}'.format(deg*ds)
if ra and dec:
return (RA, DEC)
else:
return RA or DEC
count=1
csv_file='test1.csv'#Data Storing File for Gaia
data = pd.read_csv(csv_file, error_bad_lines=False)#Ignore the bad lines
radata=data['R.A.']#get RA
decdata=data['Dec.']#get dec
agedata=data['Age(Myr)']#get Age
diamaterdata=data['Diameter']#get Diameter later converted to FOV
ra=[]#cleaned RA
dec=[]#cleaned Dec
age=[]#Cleaned age
csv_files=['M42.csv', 'Horsehead.csv', 'M93.csv', 'IrisTrain.csv']#Pre exsisting data
ages=[3, 6, 25, 0.055]#pre exsisting data's age
diameter=[]#Diameter cleaned data
gooddata=[]#Overall data storage for cleaned data
for i in range(len(radata)):#cleaning RA data and converting
if(isNaN(radata[i])):
ra.append(0)
else:
ra.append(HMS2deg(radata[i]))
print(ra)
for i in range(len(decdata)):#Cleaning Dec Data and converting
if(isNaN(decdata[i])):
dec.append(0)
else:
dec.append(HMS2degDEC(decdata[i]))
print(dec)
for i in range(len(diamaterdata)):#cleaning diameter data and converting to FOV
if(isNaN(diamaterdata[i])):
diameter.append(0)
else:
diameter.append(((diamaterdata[i])/3600)*100)
print(diameter)
for i in range(len(ra)):#Modified Query for each object
query1=""" SELECT bp_rp, parallax, pmra, pmdec, phot_g_mean_mag AS gp
FROM gaiadr2.gaia_source
WHERE 1 = CONTAINS(POINT('ICRS', ra, dec),
"""
query1=query1+" CIRCLE('ICRS'," +str(ra[i])+","+ str(dec[i])+","+str(diameter[i])+")"+")"
string2="""
AND phot_g_mean_flux_over_error > 50
AND phot_rp_mean_flux_over_error > 20
AND phot_bp_mean_flux_over_error > 20
AND visibility_periods_used > 8
"""
print(query1)
query1=query1+string2
try:#Try the following code
job = Gaia.launch_job(query1)#Launch query to gaia webpage
print(job)
results = job.get_results()#get results
ascii.write(results, 'values'+str(count)+'.csv', format='csv', fast_writer=False)
csv_files.append('values'+str(count)+'.csv')#store in CSV
ages.append(agedata[i])#append data
print(ages)
count+=1#avoid re-writing CSV file by creating different ones
except:#If the code throws any error, usually 'can't query' it will ignore the file, another filter to clean out any useless or bad data
continue
"""
End of Cleaning and Gathering Data
"""
"""
Training and Creating Model with the data
"""
arr2=[]
datasetY=[]
datasetX=[]
Y=[]
av=0
count=[]
count2=[]
MAD=[]
"""
def adjR(x, y, degree):
results = {}
coeffs = np.polyfit(x, y, degree)
p = np.poly1d(coeffs)
yhat = p(x)
ybar = np.sum(y)/len(y)
ssreg = np.sum((yhat-ybar)**2)
sstot = np.sum((y - ybar)**2)
results['r_squared'] = 1- (((1-(ssreg/sstot))*(len(y)-1))/(len(y)-degree-1)
return results
original accuracy calculation
"""
"""
def objective(x, a, b, c):
return a * x + b
needed for scipy modeling, polyfit was more accurate
"""
"""
Line 59-68 checks if CSV data is NAN if it is it will ignore the value and only take the data that can be used
"""
count=0
for i in range(len(csv_files)):
data=pd.read_csv(csv_files[i])
arr=data['gp']
arr2=data['bp_rp']
for i in range(len(arr2)):
if(isNaN(arr2[i])):
continue
elif(13<=arr[i]<=19):
datasetX.append(arr2[i])
datasetY.append(arr[i])
count+=1
mad=stats.median_absolute_deviation(datasetY)#Calculate MAD for Magnitude
mad2=stats.median_absolute_deviation(datasetX)#Calculate MAD for Color
madav=(mad+mad2)/2#Total MAD
MAD.append(count)#Appending to an Array for training and plotting
datasetX.clear()#Clearing for next Iteration
datasetY.clear()#Clearing for next Iteration
count=0
"""
Plotting data and Traning
"""
ages3=[]
MAD2=[]
ages2 = [4000 if math.isnan(i) else i for i in ages]#ignore any age nan values
print(len(ages3))
print(len(MAD))
MAD=[1.5 if math.isnan(i) else i for i in MAD]#ignore any MAD computation error values
for i in range(len(MAD)):
if(-500<=MAD[i]<=1500 and -25<=ages2[i]<170 or (100<=MAD[i]<=1262) and (278<=ages2[i]<=5067) or (-20<=MAD[i]<=20) and (3900<=ages2[i]<=4100) or (2642<=MAD[i]<=4750) and (0<=ages2[i]<=200) or (7800<=MAD[i]<=315800) and (0<=ages2[i]<=20)):
continue
else:
ages3.append(float(ages2[i]))
MAD2.append(float(MAD[i]))
fig = plt.figure()
ax1 = fig.add_subplot('111')
ax1.scatter(ages3, MAD2, color='blue')
plt.ylim(-7800,315800)
polyline = np.linspace(-5, 9000, 20)
mod1 = np.poly1d(np.polyfit(ages3, MAD2, 2))#Train for a function of degree 2
predict = np.poly1d(mod1)
ax1.plot(polyline,mod1(polyline), color='red')
print(np.interp(0.795, mod1(polyline),polyline))
print(mod1)#print model
plt.show()
"""
End of Training and Creating model/End of Script
"""
Please focus on this part, the querying section:
for i in range(len(ra)):#Modified Query for each object
query1=""" SELECT bp_rp, parallax, pmra, pmdec, phot_g_mean_mag AS gp
FROM gaiadr2.gaia_source
WHERE 1 = CONTAINS(POINT('ICRS', ra, dec),
"""
query1=query1+" CIRCLE('ICRS'," +str(ra[i])+","+ str(dec[i])+","+str(diameter[i])+")"+")"
string2="""
AND phot_g_mean_flux_over_error > 50
AND phot_rp_mean_flux_over_error > 20
AND phot_bp_mean_flux_over_error > 20
AND visibility_periods_used > 8
"""
print(query1)
query1=query1+string2
try:#Try the following code
job = Gaia.launch_job(query1)#Launch query to gaia webpage
print(job)
results = job.get_results()#get results
ascii.write(results, 'values'+str(count)+'.csv', format='csv', fast_writer=False)
csv_files.append('values'+str(count)+'.csv')#store in CSV
ages.append(agedata[i])#append data
print(ages)
count+=1#avoid re-writing CSV file by creating different ones
except:#If the code throws any error, usually 'can't query' it will ignore the file, another filter to clean out any useless or bad data
continue
Thank you for your time. I know this is really unusual.
After removing the try/except this is the errors:
Traceback (most recent call last):
File "read.py", line 120, in <module>
job = Gaia.launch_job(query1)#Launch query to gaia webpage
File "C:\ProgramData\Anaconda3\lib\site-packages\astroquery\gaia\core.py", line 846, in launch_job
return TapPlus.launch_job(self, query=query, name=name,
File "C:\ProgramData\Anaconda3\lib\site-packages\astroquery\utils\tap\core.py", line 344, in launch_job
results = utils.read_http_response(response, output_format)
File "C:\ProgramData\Anaconda3\lib\site-packages\astroquery\utils\tap\xmlparser\utils.py", line 42, in read_http_response
result = APTable.read(data, format=astropyFormat)
File "C:\ProgramData\Anaconda3\lib\site-packages\astropy\table\connect.py", line 61, in __call__
out = registry.read(cls, *args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\astropy\io\registry.py", line 520, in read
data = reader(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\astropy\io\votable\connect.py", line 116, in read_table_votable
raise ValueError("No table found")
ValueError: No table found

Please note, this has been resolved. The reason for this is on their website: https://www.cosmos.esa.int/web/gaia/news, planned maintenance. For future reference, if your code stops working and it involves Querying, head to their website they have probably posted it.

Related

Python extract value from multiple substring

I have a dataframe named df which has a column named "text" consisting of each row which a string like this: This is the string of the MARC data format.
d20s 22 i2as¶001VNINDEA455133910000005¶008180529c 1996 frmmm wz 7b ¶009se z 1 m mm c¶008a ¶008at ¶008ap ¶008a ¶0441 $a2609-2565$c2609-2565¶0410 $afre$aeng$apor ¶0569 $a2758-8965$c4578-7854¶0300 $a789$987$754 ¶051 $atxt$asti$atdi$bc¶110 $317737535$w20..b.....$astock market situation¶3330 $aimport and export agency ABC¶7146 $q1$uwwww.abc.org$ma1¶7146 $q9$uAgency XYZ¶8799 $q1$uAgency ABC$fHTML$
Here I want to extract information containing in zones ¶7146, after $u or zone ¶0441, after $c.
The result table will be like this :
¶7146$u
¶0441$c
wwww.abc.org
2609-2565
Agency XYZ
2609-2565
Here is the code I made :
import os
import pandas as pd
import numpy as np
import requests
df = pd.read_csv('dataset.csv')
def extract(text, start_pattern, sc):
ist = text.find(start_pattern)
if ist < 0:
return ""
ist = text.find(sc, ist)
if ist < 0:
return ""
im = text.find("$", ist + len(sc))
iz = text.find("¶", ist + len(sc))
if im >= 0:
if iz >= 0:
ie = min(im, iz)
else:
ie = im
else:
ie = iz
if ie < 0:
return ""
return text[ist + len(sc): ie]
def extract_text(row, list_in_zones):
text = row["text"]
if pd.isna(text):
return [""] * len(list_in_zones)
patterns = [("¶" + p, "$" + c) for p, c in [zone.split("$") for zone in list_in_zones]]
return [extract(text, pattern, sc) for pattern, sc in patterns]
list_in_zones = ["7146$u", "0441$u", "200$y"]
df[list_in_zones] = df.apply(lambda row: extract_text(row, list_in_zones),
axis=1,
result_type="expand")
df.to_excel("extract.xlsx", index = False)
For zones ¶7146 and after $u, my code only extracted "www.abc.org", he cannot extract the duplicate with value "Agency XYZ". What's wrong here?.
Additional logical structure : The logic about the structure of the string is that each zone will start with a character ¶ like ¶7146, ¶0441,.. , and the fields start with $ for example $u, $c and this field ends with either $ or ¶. Here, I want to extract information in the fields $.
You could try splitting and then cleaning up strings as follows
import pandas as pd
text = ('d20s 22 i2as¶001VNINDEA455133910000005¶008180529c 1996 frmmm wz 7b ¶009se z 1 m mm c¶008a ¶008at ¶008ap ¶008a ¶0441 $a2609-2565$c2609-2565¶0410 $afre$aeng$apor ¶0569 $a2758-8965$c4578-7854¶0300 $a789$987$754 ¶051 $atxt$asti$atdi$bc¶110 $317737535$w20..b.....$astock market situation¶3330 $aimport and export agency ABC¶7146 $q1$uwwww.abc.org$ma1¶7146 $q9$uAgency XYZ¶8799 $q1$uAgency ABC$fHTML$')
u = text.split('$u')[1:3] # Taking just the seconds and third elements in the array because they match your desired output
c = text.split('$c')[1:3]
pd.DataFrame([u,c]).T
OUTPUT
0 1
0 wwww.abc.org$ma1¶7146 $q9 2609-2565¶0410 $afre$aeng$apor ¶0569 $a2758-8965
1 Agency XYZ¶8799 $q1 4578-7854¶0300 $a789$987$754 ¶051 $atxt$asti$a...
From here you can try to clean up the strings until they match the desired output.
It would be easier to give a more helpful answer if we could understand the logic behind this data structure - when do certain fields start and end?

BitCoin Algo not iterating through historical data correctly

I'm creating a simple trading backtester on Bitcoin, yet I'm having trouble with the for loops in my code. The current code is based on 2 simple moving averages q and z (currently for learning purposes no real strategy). info is a dataframe holding Bitcoin historical data from a csv file. There seems to be an outofbounce error and I can't figure it out. Any help would be appreciated.
import pandas as pd
import numpy as np
cash = 10000
file = 'BTC-USD.csv'
data = pd.read_csv(file)
y = data['Adj Close'][1000:]
x = data['Date'][1000:]
v = data['Volume'][1000:]
h = data['High'][1000:]
l = data['Low'][1000:]
def movAvg(values,time):
times=np.repeat(1.0,time)/time
sma = np.convolve(values,times,'valid')
return sma
z = movAvg(y,12)
q = movAvg(y,9)
SP = len(x[50-1:])
def AlgoCal(account,info):
#i = 1050
bought = False
test = []
for x in info.index:
if q[x]<z[x]:
if bought == False:
temp = info[x]
account = account-info[x]
test.append(account)
bought = True
elif q[x]>z[x]:
if bought == True:
temp = info[x]
account = account + info[x]
test.append(account)
bought = False
else:
print("Error")
return(test)
money = AlgoCal(cash,y)
print(money)
Sample Data from Yahoo Bitcoin csv
Date,Open,High,Low,Close,Adj Close,Volume
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200
........
........
2020-05-21,9522.740234,9555.242188,8869.930664,9081.761719,9081.761719,39326160532
2020-05-22,9080.334961,9232.936523,9008.638672,9182.577148,9182.577148,29810773699
2020-05-23,9185.062500,9302.501953,9118.108398,9209.287109,9209.287109,27727866812
2020-05-24,9196.930664,9268.914063,9165.896484,9268.914063,9268.914063,27658280960
Error:
Traceback (most recent call last):
File "main.py", line 47, in <module>
money = AlgoCal(cash,y)
File "main.py", line 31, in AlgoCal
if q[x]<z[x]:
IndexError: index 1066 is out of bounds for axis 0 with size 1066
Your moving averages have two different lengths. One is 12 periods and the other is 9 periods. When you try to compare them in AlgoCal your short one runs out and gives you the out of bounds error.
If you are going to compare moving averages in this way, you need to add a minimum period at the beginning to only start when both averages are available.

Error while plotting bar graph in python

I have this current code below. The way the code is supposed to work is ask for a user name and then print all the projects and hours associated to that user within the called csv and then plot a vertical bar graph with the x-values being the project names and the y values being the hours for that project. I'm running into an error which is also shown below. How do I pull the right values from my code and plot them accordingly? I'm very novice with python so any help you can lend would be great.
Thanks in advance
import csv
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
with open('Report1.csv') as csvfile:
hour_summation = {}
read_csv = csv.reader(csvfile, delimiter=',')
name_found = False
while not name_found:
# take user input for name
full_name = input('Enter your full name: ').lower()
# start search at top of file. Have to do this or each loop will
# start at the end of the file after the first check.
csvfile.seek(0)
for row in read_csv:
# check to see if row matches name
if(' '.join((row[0], row[1]))).lower() == full_name.strip().lower():
name_found = True
hour_summation[row[2]] = hour_summation.get(row[2], 0) + int(float(row[3]))
# name wasn't found, so repeat process
if not name_found:
# name wasn't found, go back to start of while loop
print('Name is not in system')
else:
# break out of while loop. Technically not required since while
# will exit here anyway, but clarity is nice
break
print('This is {} full hours report:'.format(full_name))
for k, v in hour_summation.items():
print(k + ': ' + str(v) + ' hours')
for k, v in hour_summation.items():
x = np.array(k)
y = np.array(v)
y_pos = np.arange(len(y))
plt.bar(y_pos, y, align='center', alpha=0.5)
plt.xticks(y_pos, x)
plt.ylabel('hours')
plt.title('Projects for: ', full_name)
plt.show()
Here is the error I'm getting. It is doing everything as it should until it reaches the graph section.
Enter your full name: david ayers
This is david ayers full hours report:
AD-0001 Administrative Time: 4 hours
AD-0002 Training: 24 hours
AD-0003 Personal Time: 8 hours
AD-0004 Sick Time: 0 hours
OPS-0001 General Operational Support: 61 hours
SUP-2507 NAFTA MFTS OS Support: 10 hours
ENG-2001_O Parts Maturity-Overhead: 1 hours
ENG-2006_O CHEC 2 Tool-Overhead: 19 hours
FIN-2005 BLU Lake BOM Analytics: 52 hours
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-17-c9c8a4aa3176> in <module>()
36 x = np.array(k)
37 y = np.array(v)
---> 38 y_pos = np.arange(len(y))
39 plt.bar(y_pos, y, align='center', alpha=0.5)
40 plt.xticks(y_pos, x)
TypeError: len() of unsized object

How to improve efficiency in while loop by pandas

I am a new python er. in my job, I open deal mass of data. So I begin to study python to improve the efficiency.
The first small trial is that: finding the nearest distance between two coordinates.
I have two files, one is named as "book.csv", the other is named as "macro.csv".[file content screen shot][1]
book.csv has three column: BookName, Longitude,Latitude; macro.csv has threed column: MacroName, Longitude,Latitude.
the trial purpose is to find the nearest Macro to each book. I try to use pandas to finish this trial, now I can get the right result, but the efficiency is a little low, when I have a 1500 book and 200 macro, it will take about 15 second.
please to help whether I can improve the efficiency. thx the following is my trial code:
#import pandas lib
from pandas import Series,DataFrame
import pandas as pd
#import geopy lib, to calculate the distance between two poins
import geopy.distance
#def func, to calculate the distance, input parameter: two points coordinates(Lat,Lon),return m
def dist(coord1,coord2):
return geopy.distance.vincenty(coord1, coord2).m
#def func, to find the nearest result: including MacroName and distance
def find_nearest_macro(df_macro,df_book):
#Get column content from dataframe to series
# Macro
s_macro_name = df_macro["MacroName"]
s_macro_Lat = df_macro["Latitude"]
s_macro_Lon = df_macro["Longitude"]
# Book
s_book_name = df_book["BookName"]
s_book_Lat = df_book["Latitude"]
s_book_Lon = df_book["Longitude"]
#def a empty list, used to append nearest result
nearest_macro = []
nearest_dist = []
#Loop through each book
ibook = 0
while ibook < len(s_book_name):
#Give initial value to result
nearest_macro_name = s_macro_name[0]
nearest_macro_dist = dist((s_book_Lat[0],s_book_Lon[0]), (s_macro_Lat[0],s_macro_Lon[0]))
#Get the coordinate of the x book
book_coord = (s_book_Lat[ibook],s_book_Lon[ibook])
#Loop through each Macro, Reset the loop variable
imacro = 1
while imacro < len(s_macro_name):
# Get the coordinate of the x Macro
macro_cood = (s_macro_Lat[imacro],s_macro_Lon[imacro])
#Calculate the distance between book and macro
tempd = dist(book_coord,macro_cood)
#if distance more close
if tempd < nearest_macro_dist:
#Update the result
nearest_macro_dist = tempd
nearest_macro_name = s_macro_name[imacro]
#Increments the loop variable
imacro = imacro + 1
#Loop over each book, append the nearest to the result
nearest_macro.append(nearest_macro_name)
nearest_dist.append(nearest_macro_dist)
# Increments the loop variable
ibook = ibook + 1
#return nearest macro name and distance(by tuple way can return 2 results
return (nearest_macro,nearest_dist)
# Assign the filename:
file_macro = '.\\TestFile\\Macro.csv'
file_book = '.\\TestFile\\Book.csv'
#read content from csv to dataframe
df_macro = pd.read_csv(file_macro)
df_book = pd.read_csv(file_book)
#find the nearest macro name and distance
t_nearest_result = find_nearest_macro(df_macro,df_book)
#create a new series, convert list to Series
s_nearest_marco_name = Series(t_nearest_result[0])
s_nearest_macro_dist = Series(t_nearest_result[1])
#insert the new Series to dataframe
df_book["NearestMacro"] = s_nearest_marco_name
df_book["NearestDist"] = s_nearest_macro_dist
print(df_book.head())
# write the new df_book to a new csv file
df_book.to_csv('.\\TestFile\\nearest.csv')

Resolving TypeError in Python code

I'm basically running some code as follows. Basically I'm just retrieving pairs of stocks (laid out as Row 1-Stock 1,2, Row 2-Stock 1,2 and so on, where Stock 1 and 2 are different in each row) from a CSV File. I then take in data from Yahoo associated with these "Pairs" of Stocks. I calculate the returns of the stocks and basically check if the distance (difference in returns) between a pair of stocks breaches some threshold and if so I return 1. However, I'm getting the following error which I am unable to resolve:
PricePort(tickers)
27 for ticker in tickers:
28 #print ticker
---> 29 x = pd.read_csv('http://chart.yahoo.com/table.csv?s=ttt'.replace('ttt',ticker),usecols=[0,6],index_col=0)
30 x.columns=[ticker]
31 final=pd.merge(final,x,left_index=True,right_index=True)
TypeError: expected a character buffer object
The code is as follows:
from datetime import datetime
import pytz
import csv
import pandas as pd
import pandas.io.data as web
import numpy as np
#Retrieves pairs of stocks (laid out as Row 1-Stock 1,2, Row 2-Stock 1,2 and so on, where Stock 1 and 2 are different in each row) from CSV File
def Dataretriever():
Pairs = []
f1=open('C:\Users\Pythoncode\Pairs.csv') #Enter the location of the file
csvdata= csv.reader(f1)
for row in csvdata: #reading tickers from the csv file
Pairs.append(row)
return Pairs
tickers = Dataretriever() #Obtaining the data
#Taking in data from Yahoo associated with these "Pairs" of Stocks
def PricePort(tickers):
"""
Returns historical adjusted prices of a portfolio of stocks.
tickers=pairs
"""
final=pd.read_csv('http://chart.yahoo.com/table.csv?s=^GSPC',usecols=[0,6],index_col=0)
final.columns=['^GSPC']
for ticker in tickers:
#print ticker
x = pd.read_csv('http://chart.yahoo.com/table.csv?s=ttt'.replace('ttt',ticker),usecols=[0,6],index_col=0)
x.columns=[ticker]
final=pd.merge(final,x,left_index=True,right_index=True)
return final
#Calculating returns of the stocks
def Returns(tickers):
l = []
begdate=(2014,1,1)
enddate=(2014,6,1)
p = PricePort(tickers)
ret = (p.close[1:] - p.close[:-1])/p.close[1:]
l.append(ret)
return l
#Basically a class to see if the distance (difference in returns) between a
#pair of stocks breaches some threshold
class ThresholdClass():
#constructor
def __init__(self, Pairs):
self.Pairs = Pairs
#Calculating the distance (difference in returns) between a pair of stocks
def Distancefunc(self, tickers):
k = 0
l = Returns(tickers)
summation=[[0 for x in range (k)]for x in range (k)] #2d matrix for the squared distance
for i in range (k):
for j in range (i+1,k): # it will be a upper triangular matrix
for p in range (len(self.PricePort(tickers))-1):
summation[i][j]= summation[i][j] + (l[i][p] - l[j][p])**2 #calculating distance
for i in range (k): #setting the lower half of the matrix 1 (if we see 1 in the answer we will set a higher limit but typically the distance squared is less than 1)
for j in range (i+1):
sum[i][j]=1
return sum
#This function is used in determining the threshold distance
def MeanofPairs(self, tickers):
sum = self.Distancefunc(tickers)
mean = np.mean(sum)
return mean
#This function is used in determining the threshold distance
def StandardDeviation(self, tickers):
sum = self.Distancefunc(tickers)
standard_dev = np.std(sum)
return standard_dev
def ThresholdandnewsChecker(self, tickers):
threshold = self.MeanofPairs(tickers) + 2*self.StandardDeviation(tickers)
if (self.Distancefunc(tickers) > threshold):
return 1
Threshold_Class = ThresholdClass(tickers)
Threshold_Class.ThresholdandnewsChecker(tickers,1)
The trouble is Dataretriever() returns a list, not a string. When you iterate over tickers(), the name ticker is bound to a list.
The str.replace method expects both arguments to be strings. The following code raises the error because the second argument is a list:
'http://chart.yahoo.com/table.csv?s=ttt'.replace('ttt', ticker)
The subsequent line x.columns = [ticker] will cause similar problems. Here, ticker needs to be a hashable object (like a string or integer), but lists are not hashable.

Categories