I want to check if my data is linearly separable or not.For that I am using the equations mentioned at this link. Below is the code that I am using:
try:
import os
#import random
import traceback
import datetime
#import numpy as np
import scipy.io as sio
import pulp
os.system('cls')
dicA = sio.loadmat('A1.mat')
A = dicA.get('A1')
var = pulp.LpVariable.dicts("var",range(11),pulp.LpContinuous)
A = A[:,0:10]
model = pulp.LpProblem("Data linearly seaparable", pulp.LpMinimize)
model+= 0
print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
for i in range(len(A)):
expr = pulp.LpAffineExpression()
for j in range(len(A[i])):
expr += var[j]*A[i][j]
expr+= var[10] <= -1
model+= expr
print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
model.solve()
print(pulp.LpStatus[model.status])
print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
except:
print('exception')
tb = traceback.format_exc()
print(tb)
finally:
print('reached finally')
And here is the output that I am getting:
C:\Users\puneet\Anaconda3\lib\site-packages\pulp\pulp.py:1348: UserWarning: Overwriting previously set objective.
warnings.warn("Overwriting previously set objective.")
2017-08-29 10:06:21
exception
Traceback (most recent call last):
File "C:/Hackerearth Challenge/Machine Learning #3/LInearlySeaparblePulp.py", line 31, in <module>
model.solve()
File "C:\Users\puneet\Anaconda3\lib\site-packages\pulp\pulp.py", line 1664, in solve
status = solver.actualSolve(self, **kwargs)
File "C:\Users\puneet\Anaconda3\lib\site-packages\pulp\solvers.py", line 1362, in actualSolve
return self.solve_CBC(lp, **kwargs)
File "C:\Users\puneet\Anaconda3\lib\site-packages\pulp\solvers.py", line 1384, in solve_CBC
tmpMps, rename = 1)
File "C:\Users\puneet\Anaconda3\lib\site-packages\pulp\pulp.py", line 1484, in writeMPS
f.write(" LO BND %-8s % .12e\n" % (n, v.lowBound))
TypeError: must be real number, not str
reached finally
I am adding 0 to specify that there is no objective function as mentioned in the link. Also since there are about 12000 rows in A variable, hence I am trying to create constraints dynamically.But there seems to be some problem in that.So, what is it that I am doing wrong?
var = pulp.LpVariable.dicts("var",range(11),pulp.LpContinuous)
needs to be
var = pulp.LpVariable.dicts("var",range(11),cat=pulp.LpContinuous)
as the LpVariable.dicts fn looks like this
def dicts(self, name, indexs, lowBound = None, upBound = None, cat = LpContinuous, indexStart = []):
Related
i created a watchdog in Python. It watches a folder (test_folder). If a certain file is created, I call a function that opens the file and creates a jpeg, which is the getting saved in the test_folder again.
The first iteration works just fine but when I upload a different file (different content and even slightly different name) there is the error:
Event: C:/Users/WEL4HO/Desktop/test_folder\AD1__Sampling2000KHz_AEKi-0.parquet created!
Exception in thread Thread-1:
Traceback (most recent call last):
File "C:\Users\WEL4HO\.conda\envs\anaconda\lib\threading.py", line 980, in _bootstrap_inner
self.run()
File "C:\Users\WEL4HO\.conda\envs\anaconda\lib\site-packages\watchdog\observers\api.py", line 205, in run
self.dispatch_events(self.event_queue)
File "C:\Users\WEL4HO\.conda\envs\anaconda\lib\site-packages\watchdog\observers\api.py", line 381, in dispatch_events
handler.dispatch(event)
File "C:\Users\WEL4HO\.conda\envs\anaconda\lib\site-packages\watchdog\events.py", line 271, in dispatch
self.on_any_event(event)
File "C:\Users\WEL4HO\Desktop\test_folder\watchdog3.py", line 34, in on_any_event
Final.GAF(event.src_path, 'test_watchdog.jpeg')
File "C:\Users\WEL4HO\Desktop\test_folder\Final.py", line 18, in GAF
df = pq.read_table(source=parquet_file).to_pandas()
File "C:\Users\WEL4HO\.conda\envs\anaconda\lib\site-packages\pyarrow\parquet\__init__.py", line 2737, in read_table
dataset = _ParquetDatasetV2(
File "C:\Users\WEL4HO\.conda\envs\anaconda\lib\site-packages\pyarrow\parquet\__init__.py", line 2340, in __init__
[fragment], schema=schema or fragment.physical_schema,
File "pyarrow\_dataset.pyx", line 870, in pyarrow._dataset.Fragment.physical_schema.__get__
File "pyarrow\error.pxi", line 144, in pyarrow.lib.pyarrow_internal_check_status
File "pyarrow\error.pxi", line 111, in pyarrow.lib.check_status
PermissionError: [WinError 32] Failed to open local file 'C:/Users/WEL4HO/Desktop/test_folder/AD1__Sampling2000KHz_AEKi-0.parquet'. Detail: [Windows error 32] The process cannot access the file because it is being used by another process.
This is my watchdog:
import sys
import time
import logging
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import Final
import os
class OnMyWatch:
watchDirectory = r'C:/Users/WEL4HO/Desktop/test_folder'
def __init__(self):
self.observer = Observer()
def run(self):
event_handler = Handler()
self.observer.schedule(event_handler, self.watchDirectory, recursive = True)
self.observer.start()
try:
while True:
time.sleep(1)
except:
self.observer.stop()
self.observer.join()
class Handler(FileSystemEventHandler):
#staticmethod
def on_any_event(event):
if event.is_directory:
return None
elif event.event_type == 'created':
print(f'Event: {event.src_path} created!')
#folder event.src_path -> folder raw -> file
if event.src_path.endswith('Sampling2000KHz_AEKi-0.parquet'):
Final.GAF(event.src_path, 'GAF_photo.jpeg')
if __name__ == '__main__':
watch = OnMyWatch()
watch.run()
This is my file with the function I call:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
from pyts.image import GramianAngularField
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
from pathlib import Path
from scipy.signal import savgol_filter
import pandas as pd
import datetime
def GAF (path, outputname):
Sampling_rate = 2000000 #' Ae sample rate'
parquet_file = path
#convert data into numpy array
df = pq.read_table(source=parquet_file).to_pandas()
acoustic_emission_1 = df.values # convert the data into numpy array
time = len(acoustic_emission_1)/(Sampling_rate) #process length
#downsampling
downsampling_rate = 90
acoustic_emission_1_clip= acoustic_emission_1[:len(acoustic_emission_1)-(len(acoustic_emission_1)%downsampling_rate)]
acoustic_emission_1_downsampled = acoustic_emission_1_clip.reshape(-1, downsampling_rate).mean(axis=1)
samplingrate_downsampled = (Sampling_rate)/(downsampling_rate)
#positive data in arr
arr = np.array([], dtype=np.uint16)
counter=0
negative_values=0
for x in np.nditer(acoustic_emission_1_downsampled):
if x < 0:
negative_values=negative_values+1
pass
else:
arr = np.append(arr, acoustic_emission_1_downsampled[counter])
counter=counter+1
#new samplingrate
u=(negative_values)/time
samplingrate_new=samplingrate_downsampled-u
#filter
window_length=int(round((len(arr)*0.285),0))
if (window_length % 2) == 0:
window_length=window_length+1
else:
pass
print ('window_length:',window_length)
y_filtered = savgol_filter(arr,window_length, 3)
print ('nach filter', len(y_filtered))
print ('Y filtered:', y_filtered)
#x-axis
length=len(y_filtered) #länge AE werte
x = np.array([], dtype=int) #array x achse (zeit) leer
counter=0
i=0
while i==0:
x = np.append(x, counter)
if counter == length-1:
break
counter=counter+1
print ('x achse:', len(x))
X=np.array([y_filtered, x], dtype=object)
print (X)
print (length)
# Transform the time series into Gramian Angular Fields
gasf = GramianAngularField(image_size=1000, method='summation')
X_gasf = gasf.transform(X)
# Show the images for the first time series
fig = plt.figure(figsize=(20, 10))
grid = ImageGrid(fig, 111,
nrows_ncols=(1, 1),
axes_pad=0.15,
share_all=True,
cbar_location="right",
cbar_mode="single",
cbar_size="7%",
cbar_pad=0.3,
)
images = [X_gasf[0]]
titles = ['Summation']
for image, title, ax in zip(images, titles, grid):
im = ax.imshow(image, cmap='rainbow', origin='lower')
ax.set_title(title, fontdict={'fontsize': 12})
ax.cax.colorbar(im)
ax.cax.toggle_label(True)
plt.suptitle('Gramian Angular Field', y=0.98, fontsize=16)
fig.savefig(outputname, dpi=300)
plt.close(fig)
del fig
del df
nvm, I fixed it. I added:
init_size = -1
while True:
current_size = os.path.getsize(event.src_path)
if current_size == init_size:
break
else:
init_size = os.path.getsize(event.src_path)
time.sleep(2)
print("file copy has now finished")
I am trying to create different python file where the code is given below. While calling the method, I pass the mydata as data frame with these columns
['wage', 'educ', 'exper', 'tenure'].
import pandas as pd
import numpy as np
from prettytable import PrettyTable as pt
def LinearRegressionOLS(mydata,target_column):
if(not isinstance(mydata,pd.DataFrame)):
raise TypeError("Data must be of type Data Frame")
if(not isinstance(target_column,str)):
raise TypeError("target_column must be String")
if(target_column not in mydata.columns):
raise KeyError("target_column doesn't exist in Data Frame")
data=mydata.copy()
data["one"]=np.ones(data.count()[target_column])
column_list=["one"]
for i in data.columns:
column_list.append(i)
Y=data[target_column].as_matrix()
data.drop(target_column,inplace=True,axis=1)
X=data[column_list].as_matrix()
del data
beta = np.matmul(np.matmul(np.linalg.inv(np.matmul(X.T,X)),X.T),Y)
predY = np.matmul(X,beta)
total = np.matmul((Y-np.mean(Y)).T,(Y-np.mean(Y)))
residual = np.matmul((Y-predY).T,(Y-predY))
sigma = np.matmul((Y-predY).T,(Y-predY))/(X.shape[0]-X.shape[1])
omega = np.square(sigma)*np.linalg.inv(np.matmul(X.T,X))
SE = np.sqrt(np.diag(omega))
tstat = beta/SE
Rsq = 1-(residual/total)
final = pt()
final.add_column(" ",column_list)
final.add_column("Coefficients",beta)
final.add_column("Standard Error",SE)
final.add_column("t-stat",tstat)
print(final)
print("Residual: ",residual)
print("Total: ",total)
print("Standard Error: ",sigma)
print("R Square: ",Rsq)
After running the above code, by calling the function given below,
>>> c
['wage', 'educ', 'exper', 'tenure']
>>> import LR_OLS as inf
>>> inf.LinearRegressionOLS(file[c],"wage")
, i get some error like this
Traceback (most recent call last):
File "<pyshell#182>", line 1, in <module>
inf.LinearRegressionOLS(file[c],"wage")
File "E:\python\LR_OLS.py", line 29, in LinearRegressionOLS
File "C:\Program Files\Python35\lib\site-packages\pandas\core\frame.py", line 2133, in __getitem__
return self._getitem_array(key)
File "C:\Program Files\Python35\lib\site-packages\pandas\core\frame.py", line 2177, in _getitem_array
indexer = self.loc._convert_to_indexer(key, axis=1)
File "C:\Program Files\Python35\lib\site-packages\pandas\core\indexing.py", line 1269, in _convert_to_indexer
.format(mask=objarr[mask]))
KeyError: "['wage'] not in index"
Can anyone help me as to why i am getting this error. How can i resolve it?
The problem is that you still have 'wage' in 'column_list. So in order to never let it get in there do the following adaptation:
for i in data.columns:
if i != 'wage': # add this line to your code
column_list.append(i)
I am reading the book Machine Learning in Action.
One example in Chapter 2 converts string to int for classification use. For example, 'student' = 1, 'teacher' = 2, engineer = 3.
See the code below in Line 12. While an error comes up while I execute it:
invalid literal for int() with base 10: 'largeDose'
Where is my problem.
def file2matrix(filename):
fr = open(filename)
numberOfLines = len(fr.readlines()) #get the number of lines in the file
returnMat = zeros((numberOfLines,3)) #prepare matrix to return
classLabelVector = [] #prepare labels return
fr = open(filename)
index = 0
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat[index,:] = listFromLine[0:3]
classLabelVector.append(int(listFromLine[-1]))
index += 1
return returnMat,classLabelVector
caller code:
from numpy import *
import kNN
datingDataMat,datingLabels = kNN.file2matrix('datingTestSet.txt')
import matplotlib
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
#ax.scatter(datingDataMat[:,1], datingDataMat[:,2])
ax.scatter(datingDataMat[:,1], datingDataMat[:,2], array(datingLabels), array(datingLabels))
plt.show()
Traceback and error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 714, in runfile
execfile(filename, namespace)
File "C:\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 74, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "C:/Users/Zhiming Zhang/Documents/Machine Learning/kNN/execute.py", line 10, in <module>
datingDataMat,datingLabels = kNN.file2matrix('datingTestSet.txt')
File "kNN.py", line 48, in file2matrix
classLabelVector.append(int(listFromLine[-1]))
ValueError: invalid literal for int() with base 10: 'largeDoses'
You try to convert a string like "largeDose" to an int using the conversion function int(). But that's not how this works. The function int() converts only strings which look like integer numbers (e. g. "123") to integers.
In your case you can use either an if-elif-else cascade or a dictionary.
Cascade:
if listFromLine[-1] == 'largeDose':
result = 1
elif listFromLine[-1] == 'teacher':
result = 2
elif …
…
else:
result = 42 # or raise an exception or whatever
Dictionary:
conversion = {
'largeDose': 1,
'teacher': 2,
… }
# ...
# later, in the loop:
classLabelVector.append(conversion[listFromLine[-1]])
# The above will raise a KeyError if an unexpected value is given.
# Ir in case you want to use a default value:
classLabelVector.append(conversion.get(listFromLine[-1], 42))
I'm trying to run pyalgotrade's event profiler. I'm using custom data, it works when I run it with the default stratergy/predicate 'BuyOnGap' however when I try and run it with a simple custom strategy it throw the error:
Traceback (most recent call last):
File "C:\Users\David\Desktop\Python\Coursera\Computational Finance\Week2\PyAlgoTrade\Bitfinex\FCT\FCT_single_event_test.py", line 43, in <module>
main(True)
File "C:\Users\David\Desktop\Python\Coursera\Computational Finance\Week2\PyAlgoTrade\Bitfinex\FCT\FCT_single_event_test.py", line 35, in main
eventProfiler.run(feed, True)
File "C:\Python27\lib\site-packages\pyalgotrade\eventprofiler.py", line 215, in run
disp.run()
File "C:\Python27\lib\site-packages\pyalgotrade\dispatcher.py", line 102, in run
eof, eventsDispatched = self.__dispatch()
File "C:\Python27\lib\site-packages\pyalgotrade\dispatcher.py", line 90, in __dispatch
if self.__dispatchSubject(subject, smallestDateTime):
File "C:\Python27\lib\site-packages\pyalgotrade\dispatcher.py", line 68, in __dispatchSubject
ret = subject.dispatch() is True
File "C:\Python27\lib\site-packages\pyalgotrade\feed\__init__.py", line 105, in dispatch
self.__event.emit(dateTime, values)
File "C:\Python27\lib\site-packages\pyalgotrade\observer.py", line 59, in emit
handler(*args, **kwargs)
File "C:\Python27\lib\site-packages\pyalgotrade\eventprofiler.py", line 172, in __onBars
eventOccurred = self.__predicate.eventOccurred(instrument, self.__feed[instrument])
File "C:\Python27\lib\site-packages\pyalgotrade\eventprofiler.py", line 89, in eventOccurred
raise NotImplementedError()
NotImplementedError
My code is:
from pyalgotrade import eventprofiler
from pyalgotrade.technical import stats
from pyalgotrade.technical import roc
from pyalgotrade.technical import ma
from pyalgotrade.barfeed import csvfeed
class single_event_strat( eventprofiler.Predicate ):
def __init__(self,feed):
self.__returns = {} # CLASS ATTR
for inst in feed.getRegisteredInstruments():
priceDS = feed[inst].getAdjCloseDataSeries() # STORE: priceDS ( a temporary representation )
self.__returns[inst] = roc.RateOfChange( priceDS, 1 )
# CALC: ATTR <- Returns over the adjusted close values, consumed priceDS
#( could be expressed as self.__returns[inst] = roc.RateOfChange( ( feed[inst].getAdjCloseDataSeries() ), 1 ),
#but would be less readable
def eventOccoured( self, instrument, aBarDS):
if (aBarDS[-1].getVolume() > 10 and aBarDS[-1].getClose() > 5 ):
return True
else:
return False
def main(plot):
feed = csvfeed.GenericBarFeed(0)
feed.addBarsFromCSV('FCT', "FCT_daily_converted.csv")
predicate = single_event_strat(feed)
eventProfiler = eventprofiler.Profiler( predicate, 5, 5)
eventProfiler.run(feed, True)
results = eventProfiler.getResults()
print "%d events found" % (results.getEventCount())
if plot:
eventprofiler.plot(results)
if __name__ == "__main__":
main(True)
What does this error mean ?
Does anyone know what's wrong and how to fix it ?
Here is a link to the eventprofiler code:
http://pastebin.com/QD220VQb
As a bonus does anyone know where I can find examples of the profiler being used? other that the example pyalgotrade gives, seen here
I think you just made a spelling mistake in eventOccurred method definition
def eventOccoured( self, instrument, aBarDS):
should be replaced by
def eventOccurred( self, instrument, aBarDS):
I am trying to run a cuda kernel in numbapro python, but I keep getting an out of resources error.
I then tried to execute the kernel into a loop and send smaller arrays, but that still gave me the same error.
Here is my error message:
Traceback (most recent call last):
File "./predict.py", line 418, in <module>
predict[griddim, blockdim, stream](callResult_d, catCount, numWords, counts_d, indptr_d, indices_d, probtcArray_d, priorC_d)
File "/home/mhagen/Developer/anaconda/lib/python2.7/site-packages/numba/cuda/compiler.py", line 228, in __call__
sharedmem=self.sharedmem)
File "/home/mhagen/Developer/anaconda/lib/python2.7/site-packages/numba/cuda/compiler.py", line 268, in _kernel_call
cu_func(*args)
File "/home/mhagen/Developer/anaconda/lib/python2.7/site-packages/numba/cuda/cudadrv/driver.py", line 1044, in __call__
self.sharedmem, streamhandle, args)
File "/home/mhagen/Developer/anaconda/lib/python2.7/site-packages/numba/cuda/cudadrv/driver.py", line 1088, in launch_kernel
None)
File "/home/mhagen/Developer/anaconda/lib/python2.7/site-packages/numba/cuda/cudadrv/driver.py", line 215, in safe_cuda_api_call
self._check_error(fname, retcode)
File "/home/mhagen/Developer/anaconda/lib/python2.7/site-packages/numba/cuda/cudadrv/driver.py", line 245, in _check_error
raise CudaAPIError(retcode, msg)
numba.cuda.cudadrv.driver.CudaAPIError: Call to cuLaunchKernel results in CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES
Here is my source code:
from numbapro.cudalib import cusparse
from numba import *
from numbapro import cuda
#cuda.jit(argtypes=(double[:], int64, int64, double[:], int64[:], int64[:], double[:,:], double[:] ))
def predict( callResult, catCount, wordCount, counts, indptr, indices, probtcArray, priorC ):
i = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
correct = 0
wrong = 0
lastDocIndex = -1
maxProb = -1e6
picked = -1
for cat in range(catCount):
probSum = 0.0
for j in range(indptr[i],indptr[i+1]):
wordIndex = indices[j]
probSum += (counts[j]*math.log(probtcArray[cat,wordIndex]))
probSum += math.log(priorC[cat])
if probSum > maxProb:
maxProb = probSum
picked = cat
callResult[i] = picked
predictions = []
counter = 1000
for i in range(int(math.ceil(numDocs/(counter*1.0)))):
docTestSliceList = docTestList[i*counter:(i+1)*counter]
numDocsSlice = len(docTestSliceList)
docTestArray = np.zeros((numDocsSlice,numWords))
for j,doc in enumerate(docTestSliceList):
for ind in doc:
docTestArray[j,ind['term']] = ind['count']
docTestArraySparse = cusparse.ss.csr_matrix(docTestArray)
start = time.time()
OPT_N = numDocsSlice
blockdim = 1024, 1
griddim = int(math.ceil(float(OPT_N)/blockdim[0])), 1
catCount = len(music_categories)
callResult = np.zeros(numDocsSlice)
stream = cuda.stream()
with stream.auto_synchronize():
probtcArray_d = cuda.to_device(numpy.asarray(probtcArray),stream)
priorC_d = cuda.to_device(numpy.asarray(priorC),stream)
callResult_d = cuda.to_device(callResult, stream)
counts_d = cuda.to_device(docTestArraySparse.data, stream)
indptr_d = cuda.to_device(docTestArraySparse.indptr, stream)
indices_d = cuda.to_device(docTestArraySparse.indices, stream)
predict[griddim, blockdim, stream](callResult_d, catCount, numWords, counts_d, indptr_d, indices_d, probtcArray_d, priorC_d)
callResult_d.to_host(stream)
#stream.synchronize()
predictions += list(callResult)
print "prediction %d: %f" % (i,time.time()-start)
I found out this was in the cuda procedure.
When you call predict the blockdim is set to 1024.
predict[griddim, blockdim, stream](callResult_d, catCount, numWords, counts_d, indptr_d, indices_d, probtcArray_d, priorC_d)
But the procedure is called iteratively with slice sizes of 1000 elements not 1024.
So, in the procedure it will attempt to write 24 elements that are out of bounds in the return array.
Sending a number of elements parameter (n_el) and placing an error checking call in the cuda procedure solves it.
#cuda.jit(argtypes=(double[:], int64, int64, int64, double[:], int64[:], int64[:], double[:,:], double[:] ))
def predict( callResult, n_el, catCount, wordCount, counts, indptr, indices, probtcArray, priorC ):
i = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
if i < n_el:
....