machine learning support vector machins(svm) model for predict - python

im doing python code using machine learning support vector machins(svm) model for predict, i used a bank dataset from kaggle
so this is the code and the error dicated in the 4th row
`encoded_data = [0] * len(data_to_predict)
for i in range(len(data_to_predict)):
if i == 0:
encoded_data[i] = gen[data_to_predict[i]]
elif i==1:
encoded_data[i] = m[data_to_predict[i]]
elif i==2:
encoded_data[i] = ed[data_to_predict[i]]
elif i==3:
encoded_data[i] = s_emp[data_to_predict[i]]
elif i==4:
encoded_data[i] = d[data_to_predict[i]]
elif i==10:
encoded_data[i] = a[data_to_predict[i]]
else:
encoded_data[i] = s.fit_transform(np.array(encoded_data[i]).reshape(-1,1))`
this is the error why dose it shows 'male' what am i supposed to change or add??
`KeyError Traceback (most recent call last)
<ipython-input-68-e2388fb9ed49> in <module>
2 for i in range(len(data_to_predict)):
3 if i == 0:
----> 4 encoded_data[i] = gen[data_to_predict[i]]
5 elif i==1:
6 encoded_data[i] = m[data_to_predict[i]]
> KeyError: 'male'`

what does your data_to_predict look like. What I understand from the provided information is on this line encoded_data[i] = gen[data_to_predict[i]] the data_to_predict[i] returns 'male' and there is no key named 'male' in gen.

Related

Make a binary tree using lists

I am trying to make a binary tree using lists, but it is showing me this error, can you help me with this?
class BT:
def __init__(self, lp , data , rp):
self.LeftPointer = lp
self.data = data
self.RightPointer = rp
def insert(x):
#current_position
c = 0
#temporary_position
t = 0
while True:
if dsBT[c].data == None :
dsBT[c].data = x
break
elif x > dsBT[c].data:
if dsBT[c].RightPointer == 0:
t = c
while dsBT[t].data != None :
t += 1
dsBT[t].data = x
dsBT[c].RightPointer = t
break
else:
c = dsBT[c].RightPointer
else:
if dsBT[c].LeftPointer == 0:
t = c
while dsBT[t].data != None:
t += 1
dsBT[t].data = x
dsBT[c].LeftPointer = t
break
else:
c = dsBT[c].LeftPointer
**this part is for printing out the data **
dsBT = []
for j in range(2):
dsBT.append(BT( None ,None , None ))
for h in range(len(dsBT)):
#video game name
vgm = input(str("enter the game name:\n"))
insert(vgm)
for i in range(len(dsBT)):
print(dsBT[i].LeftPointer , dsBT[i].data ,dsBT[i].RightPointer)
the error it is showing:
enter the game name:
sarim
enter the game name:
dasr
Traceback (most recent call last):
File "C:\Users\workm\Desktop\Sarim\untitled0.py", line 44, in <module>
insert(vgm)
File "C:\Users\workm\Desktop\Sarim\untitled0.py", line 14, in insert
if dsBT[c].data == None :
TypeError: list indices must be integers or slices, not NoneType

Name Error despite having defined it earlier?

def categorise_sourceIP(df):
df_sIPf = pd.df.sourceIP.value_counts()
df_sIPf['counts'] = np.array(df.sourceIP.value_counts())
df_sIPf['sourceIP'] = df_sIPf.index
df_sIPf.reset_index(level=0,inplace=True,drop=True)
counts_cate = []
for num in df_sIPf['counts']:
if num in range(0,21):
counts_cate.append('<20')
elif num in range(21,201):
counts_cate.append('21-200')
elif num in range(201,401):
counts_cate.append('201-400')
elif num > 400:
counts_cate.append('>400')
counts_cate=df_sIPf['categorised_count']
The error call back is the following
NameError Traceback (most recent call last)
<ipython-input-11-9622f76efabe> in <module>
27 elif num > 400:
28 counts_cate.append('>400')
---> 29 counts_cate=df_sIPf['categorised_count']
NameError: name 'df_sIPf' is not defined
How do I fix this? At a key stage in my problem set.
Essentially trying to build a relationship between clusters of two different variables in the dataframe so a similar piece of code will be written for the second set.
You need to return df_sIPf from your function if you want it to be accessible outside that function:
def categorise_sourceIP(df):
df_sIPf = pd.df.sourceIP.value_counts()
df_sIPf['counts'] = np.array(df.sourceIP.value_counts())
df_sIPf['sourceIP'] = df_sIPf.index
df_sIPf.reset_index(level=0,inplace=True,drop=True)
counts_cate = []
for num in df_sIPf['counts']:
if num in range(0,21):
counts_cate.append('<20')
elif num in range(21,201):
counts_cate.append('21-200')
elif num in range(201,401):
counts_cate.append('201-400')
elif num > 400:
counts_cate.append('>400')
return df_sIPf
counts_cate = categorise_sourceIP(df)['categorised_count']

Check if graph is Bipartite

so I need to check if a Graph is Bipartite using Numpy arrays and Python 2
This is the code that I've developed to check the graph:
from __future__ import print_function
import numpy as np
class grafo:
visited = []
def __init__(self, n):
self.vertices = []
self.adjacency = np.zeros((n,n))
self.visited.append(True)
self.size = n
self.color = [-1] * self.size
self.color.append(0)
def isBipartita(self):
finalRes = str()
init = 0
self.color[init] = 1
self.visited.append(init)
while self.visited:
i = self.visited.pop()
print("[FIRST CHECK] adyacencia[" + str(i)+"][" + str(i) + "] == 1?")
if self.adjacency[i][i] == 1: //HERE IT CORRUPTS AT SOME POINT
finalRes = "NO"
print("True")
return finalRes;
for f in range(self.size):
print("[SECOND CHECK] adyacencia[" + str(i)+"][" + str(f) + "] == 1 and color[" + str(f) +"] == -1")
if self.adjacency[i][f] == 1 and self.color[f] == -1:
print("True")
self.color[f] = 1 - self.color[i]
self.visited.append(f)
else:
print("[THIRD CHECK] adyacencia[" + str(i)+"][" + str(f) + "] == 1 and color[" + str(f) +"] == color[" + str(i) +"]")
if self.adjacency[i][f] == 1 and self.color[f] == self.color[i]:
print("True")
finalRes = "NO"
return finalRes
finalRes = "YES"
return finalRes
def __str__(self):
return str(self.adjacency)
#PROGRAM
lenV = raw_input("") #This is the length of the Adjacency array for the graph
lenV = int(lenV)
inputs = []
for i in range(0, lenV):
inputs.append(raw_input())
print("\n-> Inputs:")
print(inputs)
print("\n-> List with Inputs Values:")
tempVal = list()
for numC in inputs:
tempVal.append(numC)
print(tempVal)
print("\n-> Split and get into array:")
G = grafo(lenV)
for x in range(0,lenV):
tempList = list(tempVal[x])
print(tempList)
for y in range(0, lenV):
G.adjacency[x][y] = tempList[y]
print("\n-> Array to check:")
print(G)
print("\n")
print(G.isBipartita())
The problem is in the class "grafo", in the isBipartita method; at some point while checking, the numpy integer array returns a boolean and gives a ValueError as an output.
This are some examples that I've used to test this:
TEST CASES
3
011
101
110
4
0101
1010
0101
1010
and this is the output that I get:
/** LIKE 20 SUCCESSFUL CHECKS LATER **/
[SECOND CHECK] adyacencia[1][3] == 1 and color[3] == -1
[THIRD CHECK] adyacencia[1][3] == 1 and color[3] == color[1]
[FIRST CHECK] adyacencia[True][True] == 1?
Traceback (most recent call last):
File "bipartitaChecker.py", line 76, in <module>
print(G.isBipartita())
File "bipartitaChecker.py", line 23, in isBipartita
if self.adjacency[i][i] == 1:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
As you can see, before the Traceback call, the last "[FIRST CHECK]" has adyacencia[True][True], and this is not intended to be this way... I have done research on the ValueError, but everything leads me to a point where only in some foreign cases this error is presented, but in my case it shouldn't be causing it...
I don't know what is happening?!
I'll appreciate any help.

name 'Y' is not defined

So I've been trying to get this code to run:
def conference_call_discount(distance, basic_charge):
if(distance >= 50):
con_calc = basic_charge * 0.5
return con_calc
elif(distance < 50):
con_calc = 0
return con_calc
con_call = str(input("Is it a conference call? "))
from call_charge_functions import conference_call_discount
conference = conference_call_discount(dist, starting_cost)
if con_call == y:
print("your conference call discount is", conference)
else:
conference = 0
However I keep getting this error:
NameError: name 'y' is not defined
I don't know what's wrong
in this if:
if con_call == y:
You didn't set the value for y.
You should set a value for the y variable before comparing it.

python replace word as per condition

At standard input, I am providing the following file:
#123 595739778 "neutral" Won the match #getin
#164 595730008 "neutral" Good girl
data#2 looks like this:
labels 1 0 -1
-1 0.272653 0.139626 0.587721
1 0.0977782 0.0748234 0.827398
I want to see if its -1 in the data#2 file then replace with negative, 1 then positive and 0 then neutral
Following are my issues:
start in data#2 file on 2nd line
I am facing trouble with replace. I want to replace like below but its showing a error tht it expects 1 more argument but I already have 2 arguments there.
If I do, something like below (notice the print statement):
if binary == "-1":
senti = str.replace(senti.strip('"'),"negative")
elif binary == "1":
senti = str.replace(senti.strip('"'),"positive")
elif binary == "0":
senti = str.replace(senti.strip('"'),"neutral")
print id, "\t", num, "\t", senti, "\t", sent
but if I do(notice print) then it does not go in the 'if conditions':
if binary == "-1":
senti = str.replace(senti.strip('"'),"negative")
elif binary == "1":
senti = str.replace(senti.strip('"'),"positive")
elif binary == "0":
senti = str.replace(senti.strip('"'),"neutral")
print id, "\t", num, "\t", senti, "\t", sent
How do I print then.
output that I got:
#123 595739778 "neutral" Won the match #getin
#164 595730008 "neutral" Good girl
output expected (replace just replaces the negative, positive & neutral as per data# file:
#123 595739778 negative Won the match #getin
#164 595730008 positive Good girl
Error:
Traceback (most recent call last):
File "./combine.py", line 17, in <module>
senti = str.replace(senti.strip('"'),"negative")
TypeError: replace() takes at least 2 arguments (1 given)
Here is my code:
for line in sys.stdin:
(id,num,senti,sent) = re.split("\t+",line.strip())
tweet = re.split("\s+", sent.strip().lower())
f = open("data#2.txt","r")
for line1 in f:
(binary,rest,rest1,test2) = re.split("\s", line1.strip())
if binary == "-1":
senti = str.replace(senti.strip('"'),"negative")
elif binary == "1":
senti = str.replace(senti.strip('"'),"positive")
elif binary == "0":
senti = str.replace(senti.strip('"'),"neutral")
print id, "\t", num, "\t", senti, "\t", sent
You are in fact missing an argument for replace; since it is a method of the string itself, you could do either:
In [72]: str.replace('one','o','1')
Out[72]: '1ne'
or
In [73]: 'one'.replace('o','1')
Out[73]: '1ne'
In your code, you likely want, e.g.
if binary == "-1":
senti = senti.strip('"').replace("-1","negative")
To skip the first line of your data#2 file, one option is
f = open("data#2.txt","r")
for line1 in f.readlines()[1:]: # skip the first line
#rest of your code here
Edit: After a chat conversation, what I think you want is more like the following:
f = open("data#2.txt","r")
datalines = f.readlines()[1:]
count = 0
for line in sys.stdin:
if count == len(datalines): break # kill the loop if we've reached the end
(tweetid,num,senti,tweets) = re.split("\t+",line.strip())
tweet = re.split("\s+", tweets.strip().lower())
# grab the right index from our list
(binary,rest,rest1,test2) = re.split("\s", datalines[count].strip())
if binary == "-1":
sentiment = "negative"
elif binary == "1":
sentiment = "positive"
elif binary == "0":
sentiment = "neutral"
print tweetid, "\t", num, "\t", sentiment, "\t", tweets
count += 1 # add to our counter

Categories