I want to get string from list - python

there is a matrix like below
[[a1, b, c],
[d2, e, a1],
[c1, dfd, u],
....
]
if matched with a specific word, then returns index from matrix.
And find row fixed column.
then I convert Matrix to list.
And I get the result like this ["RESULT DATA"].
I just want to get string RESULT DATA only without [''].
r, c = np.where(Matrix == SpecificWord)
find_row = np.where(c == 0)
result_data = Matrix[find_row,1].tolist()
print(str(result_data ))
print(result_data[0])

Solved
row, col = np.where(Terms == ElemData)
cList = col .tolist()
rLits = row.tolist()
if 0 not in cList:
return None
else:
num_col = cList.index(0)
num_row = rLits[num_col]
data = Terms[num_row, 1].tolist()
return data

Related

Custom function - add as column to frame

I have a function which creates a dataframe, and a sub-function which does some diff's on that data frame to come up with a result - how do I add this result as a column to my dataframe.
def custom_compare_eq(series, other):
length = len(series.values)
for i in range(length):
r1 = eval(str(series.values[i]))
r2 = eval(str(other.values[i]))
if type(r1) != type(r2):
yield False
else:
if type(r1) == int:
yield r1 == r2
elif type(r1) == list:
yield set(r1) == set(r2)
print('store the result output')
result = list(custom_compare_eq(df.a_series, df.b_series))
print('add new match column to df')
df['match'] = result
print(df)
the last print(df) I expected to have the new 'match' column but it does not :(

create sublist of indices with each sublist referring to set of unique tuples from a list of tuples

I am trying to create sub list of indices by grouping indices of tuples with any of the elements being common from a list of tuples or keeping unique tuples indices separate. The definition of unique tuple being no element of the tuple is same as the elements in same position of other tuples in the list.
Example: List which groups same company together,with same company defined as same name or same registration number or same name of CEO.
company_list = [("companyA",0002,"ceoX"),
("companyB"),0002,"ceoY"),
("companyC",0003,"ceoX"),
("companyD",004,"ceoZ")]
The desired output would be:
[[0,1,2],[3]]
Does anyone know of a solution for this problem?
The companies form a graph. You want to create clusters from connected companies.
Try this:
company_list = [
("companyA",2,"ceoX"),
("companyB",2,"ceoY"),
("companyC",3,"ceoX"),
("companyD",4,"ceoZ")
]
# Prepare indexes
by_name = {}
by_number = {}
by_ceo = {}
for i, t in enumerate(company_list):
if t[0] not in by_name:
by_name[t[0]] = []
by_name[t[0]].append(i)
if t[1] not in by_number:
by_number[t[1]] = []
by_number[t[1]].append(i)
if t[2] not in by_ceo:
by_ceo[t[2]] = []
by_ceo[t[2]].append(i)
# BFS to propagate group to connected companies
groups = list(range(len(company_list)))
for i in range(len(company_list)):
g = groups[i]
queue = [g]
while queue:
x = queue.pop(0)
groups[x] = g
t = company_list[x]
for y in by_name[t[0]]:
if g < groups[y]:
queue.append(y)
for y in by_number[t[1]]:
if g < groups[y]:
queue.append(y)
for y in by_ceo[t[2]]:
if g < groups[y]:
queue.append(y)
# Assemble result
result = []
current = None
last = None
for i, g in enumerate(groups):
if g != last:
if current:
result.append(current)
current = []
last = g
current.append(i)
if current:
result.append(current)
print(result)
Fafl's answer is definitely more performant. If you're not worried about performance, here is a brute-force solution that might be easier to read. Tried to make it clear with some comments.
def find_index(res, target_index):
for index, sublist in enumerate(res):
if target_index in sublist:
# yes, it's present
return index
return None # not present
def main():
company_list = [
('companyA', '0002', 'CEOX'),
('companyB', '0002', 'CEOY'),
('companyC', '0003', 'CEOX'),
('companyD', '0004', 'CEOZ'),
('companyE', '0004', 'CEOM'),
]
res = []
for index, company_detail in enumerate(company_list):
# check if this `index` is already present in a sublist in `res`
# if the `index` is already present in a sublist in `res`, then we need to add to that sublist
# otherwise we will start a new sublist in `res`
index_to_add_to = None
if find_index(res, index) is None:
# does not exist
res.append([index])
index_to_add_to = len(res) - 1
else:
# exists
index_to_add_to = find_index(res, index)
for c_index, c_company_detail in enumerate(company_list):
# inner loop to compare company details with the other loop
if c_index == index:
# same, ignore
continue
if company_detail[0] == c_company_detail[0] or company_detail[1] == c_company_detail[1] or company_detail[2] == c_company_detail[2]:
# something matches, so append
res[index_to_add_to].append(c_index)
res[index_to_add_to] = list(set(res[index_to_add_to])) # make it unique
print(res)
if __name__ == '__main__':
main()
Check this out, I tried a lot for it. May be I am missing some test cases. Performance wise I think its good.
I have used set() and pop those which lie in one group.
company_list = [
("companyA",2,"ceoX"),
("companyB",2,"ceoY"),
("companyC",3,"ceoX"),
("companyD",4,"ceoZ"),
("companyD",3,"ceoW")
]
index = {val: key for key, val in enumerate(company_list)}
res = []
while len(company_list):
new_idx = 0
temp = []
val = company_list.pop(new_idx)
temp.append(index[val])
while new_idx < len(company_list) :
if len(set(val + company_list[new_idx])) < 6:
temp.append(index[company_list.pop(new_idx)])
else:
new_idx += 1
res.append(temp)
print(res)

editing data in a list while the length of data is different

I have a list like this:
a = [c0001203, c0334, c0000456, c034554, c00034506]. I need to remove 'c' and all 0 after 'c', until the data starts with a number. The length of each data is variable. The output should be like this:
a = [1203, 334, 456, 34506]. How can I do it without using regular expression?
Thank you.
You can drop the c and then .strip() the 0's like:
Code:
b = [x[1:].lstrip('0') for x in a]
Test Code:
a = ['c0001203', 'c0334', 'c0000456', 'c034554', 'c00034506']
b = [x[1:].lstrip('0') for x in a]
print(b)
Results:
['1203', '334', '456', '34554', '34506']
start iterating from index=1 (ignore 0th index) until you find a non-zero character. store the sub_string starting from non-zero character till the end
def process(str_arr):
ind = 0
res = []
while(ind < len(str_arr)):
cur_str = str_arr[ind]
ind_2 = 1
while(ind_2 < len(cur_str)):
char = cur_str[ind_2]
if(char != '0'):
res.append(cur_str[ind_2:len(cur_str)])
break;
ind_2 = ind_2+1
ind = ind+1
return res
ret = process(['c0001203', 'c0334', 'c0000456', 'c034554', 'c00034506'])
print(ret)
Output :
['1203', '334', '456', '34554', '34506']

Efficiently update columns based on one of the columns split value

So here is my code updating many column values based on a condition of split values of the column 'location'. The code works fine, but as its iterating by row it's not efficient enough. Can anyone help me to make this code work faster please?
for index, row in df.iterrows():
print index
location_split =row['location'].split(':')
after_county=False
after_province=False
for l in location_split:
if l.strip().endswith('ED'):
df[index, 'electoral_district'] = l
elif l.strip().startswith('County'):
df[index, 'county'] = l
after_county = True
elif after_province ==True:
if l.strip()!='Ireland':
df[index, 'dublin_postal_district'] = l
elif after_county==True:
df[index, 'province'] = l.strip()
after_province = True
'map' was what I needed :)
def fill_county(column):
res = ''
location_split = column.split(':')
for l in location_split:
if l.strip().startswith('County'):
res= l.strip()
break
return res
df['county'] = map(fill_county, df['location'])

validating list as valid Matrix

def matrixDimensions(m):
""" function that returns the dimensions of a matrix. """
mRowNum= len(m) # this is the number of rows
mColNum=len(m[0])# this is the size of columns in row 1
i=1
j=1
if mRowNum ==1: # if there is only one row , don't need to check for identical columns
return "This is a %ix%i matrix." %(mRowNum,mColNum)
ColNum=len(m[i])# # this is the size of columns in row 2
if mRowNum>1:# this is where you need to check all the columns are identical
while i< mRowNum:
i+=1
if len(m[j])== len(m[0]):
print (i,j,mRowNum,ColNum,m[j],len(m[j]))
j+=1
continue
elif len(m[j])!= len(m[0]):
return 'This is not a valid matrix.'
return "This is a %ix%i matrix." %(mRowNum,mColNum)
there got to be simpler logic, and how do you check for lists nested with, for example I think this is not a valid matrix, but would pass this test.
([ [1,4, 3], [4,0,21],[3,4,[5,7]],[1,2,3],[1,2,3]])
You could try something like this instead:
def are_int(iterable):
return all(isinstance(i, int) for i in iterable)
def matrix_dimensions(matrix):
col = len(matrix[0])
if not all(len(l) == col and are_int(l) for l in matrix):
return 'This is not a valid matrix'
else:
return len(matrix), col
m = [[1,4,3], [4,0,21], [3,4,[5,7]], [1,2,3], [1,2,3]]
l = [[1,4,3], [4,0,21], [3,4,7], [1,2,3], [1,2,3]]
print(matrix_dimensions(m))
print(matrix_dimensions(l))
Output:
This is not a valid matrix
(5, 3)

Categories