I am running a for loop in order to create a dataframe of 'New' values.
New = 0
Approved = 0
df = pd.DataFrame()
for row, rowdata in enumerate(combined):
for col, value in enumerate(rowdata.values()):
if col == 0:
print(value)
if col == 2:
New += value
print('Original New')
print(value)
if col == 4:
Approved = value
if Approved > 0:
New = New - Approved
print('Updated New')
print(New)
df['New'] = New
Everything in this code seems to be working except for the last df['New'] = New statement. Any ideas on why that might be happening would be greatly appreciate.
df['New'] = New is a wrong way to insert a single row.
One way to fix it:
all_rows = []
New = 0
Approved = 0
for row, rowdata in enumerate(combined):
for col, value in enumerate(rowdata.values()):
if col == 0:
print(value)
if col == 2:
New += value
print('Original New')
print(value)
if col == 4:
Approved = value
if Approved > 0:
New = New - Approved
print('Updated New')
print(New)
# Accumulate all the rows
all_rows.append(New)
# Finally create a dataframe
df = pd.DataFrame({'New': all_rows})
I am trying to create sub list of indices by grouping indices of tuples with any of the elements being common from a list of tuples or keeping unique tuples indices separate. The definition of unique tuple being no element of the tuple is same as the elements in same position of other tuples in the list.
Example: List which groups same company together,with same company defined as same name or same registration number or same name of CEO.
company_list = [("companyA",0002,"ceoX"),
("companyB"),0002,"ceoY"),
("companyC",0003,"ceoX"),
("companyD",004,"ceoZ")]
The desired output would be:
[[0,1,2],[3]]
Does anyone know of a solution for this problem?
The companies form a graph. You want to create clusters from connected companies.
Try this:
company_list = [
("companyA",2,"ceoX"),
("companyB",2,"ceoY"),
("companyC",3,"ceoX"),
("companyD",4,"ceoZ")
]
# Prepare indexes
by_name = {}
by_number = {}
by_ceo = {}
for i, t in enumerate(company_list):
if t[0] not in by_name:
by_name[t[0]] = []
by_name[t[0]].append(i)
if t[1] not in by_number:
by_number[t[1]] = []
by_number[t[1]].append(i)
if t[2] not in by_ceo:
by_ceo[t[2]] = []
by_ceo[t[2]].append(i)
# BFS to propagate group to connected companies
groups = list(range(len(company_list)))
for i in range(len(company_list)):
g = groups[i]
queue = [g]
while queue:
x = queue.pop(0)
groups[x] = g
t = company_list[x]
for y in by_name[t[0]]:
if g < groups[y]:
queue.append(y)
for y in by_number[t[1]]:
if g < groups[y]:
queue.append(y)
for y in by_ceo[t[2]]:
if g < groups[y]:
queue.append(y)
# Assemble result
result = []
current = None
last = None
for i, g in enumerate(groups):
if g != last:
if current:
result.append(current)
current = []
last = g
current.append(i)
if current:
result.append(current)
print(result)
Fafl's answer is definitely more performant. If you're not worried about performance, here is a brute-force solution that might be easier to read. Tried to make it clear with some comments.
def find_index(res, target_index):
for index, sublist in enumerate(res):
if target_index in sublist:
# yes, it's present
return index
return None # not present
def main():
company_list = [
('companyA', '0002', 'CEOX'),
('companyB', '0002', 'CEOY'),
('companyC', '0003', 'CEOX'),
('companyD', '0004', 'CEOZ'),
('companyE', '0004', 'CEOM'),
]
res = []
for index, company_detail in enumerate(company_list):
# check if this `index` is already present in a sublist in `res`
# if the `index` is already present in a sublist in `res`, then we need to add to that sublist
# otherwise we will start a new sublist in `res`
index_to_add_to = None
if find_index(res, index) is None:
# does not exist
res.append([index])
index_to_add_to = len(res) - 1
else:
# exists
index_to_add_to = find_index(res, index)
for c_index, c_company_detail in enumerate(company_list):
# inner loop to compare company details with the other loop
if c_index == index:
# same, ignore
continue
if company_detail[0] == c_company_detail[0] or company_detail[1] == c_company_detail[1] or company_detail[2] == c_company_detail[2]:
# something matches, so append
res[index_to_add_to].append(c_index)
res[index_to_add_to] = list(set(res[index_to_add_to])) # make it unique
print(res)
if __name__ == '__main__':
main()
Check this out, I tried a lot for it. May be I am missing some test cases. Performance wise I think its good.
I have used set() and pop those which lie in one group.
company_list = [
("companyA",2,"ceoX"),
("companyB",2,"ceoY"),
("companyC",3,"ceoX"),
("companyD",4,"ceoZ"),
("companyD",3,"ceoW")
]
index = {val: key for key, val in enumerate(company_list)}
res = []
while len(company_list):
new_idx = 0
temp = []
val = company_list.pop(new_idx)
temp.append(index[val])
while new_idx < len(company_list) :
if len(set(val + company_list[new_idx])) < 6:
temp.append(index[company_list.pop(new_idx)])
else:
new_idx += 1
res.append(temp)
print(res)
i am trying to check whether certain elements are in a list, and to execute numerical update but i keep getting an error (below).
"if h2output[1] not in h1output == True or h2output[2] not in h1output == True:
IndexError: list index out of range"
doublewin = 0
h1output = []
h2output = []
h3output = []
v1output = []
v2output = []
v3output = []
d1output = []
d2output = []
for i in h1:
if i not in h1output:
h1output.append(i)
if len(h1output) == 2:
doublewin += 1
for i in h2:
if i not in h2output:
h2output.append(i)
if len(h2output) == 2:
if h2output[1] not in h1output == True or h2output[2] not in h1output == True:
doublewin += 1
As len(h2output)==2, it has only 2 positions, which in python starts at zero, therefore h2output[2] is out of bounds, index must be 0 or 1
You have hardcoded indexes in h2output[1] and h2output[2]. Either one of them is causing the issue. Please check the size of the list.
Remove True booleans in if condition as it is unnecessary.
So here is my code updating many column values based on a condition of split values of the column 'location'. The code works fine, but as its iterating by row it's not efficient enough. Can anyone help me to make this code work faster please?
for index, row in df.iterrows():
print index
location_split =row['location'].split(':')
after_county=False
after_province=False
for l in location_split:
if l.strip().endswith('ED'):
df[index, 'electoral_district'] = l
elif l.strip().startswith('County'):
df[index, 'county'] = l
after_county = True
elif after_province ==True:
if l.strip()!='Ireland':
df[index, 'dublin_postal_district'] = l
elif after_county==True:
df[index, 'province'] = l.strip()
after_province = True
'map' was what I needed :)
def fill_county(column):
res = ''
location_split = column.split(':')
for l in location_split:
if l.strip().startswith('County'):
res= l.strip()
break
return res
df['county'] = map(fill_county, df['location'])
f = open('transaction.log','r')
ClerkHash = dict()
arr = [0,0]
for line in f:
Tdate = line[0:12]
AccountKey = line[12:50]
TransType = line[22:2]
ClerkKey = line[24:10]
CurrencyCode = line[34:2]
Amount = line[36:45]
print line
print '\n'
print AccountKey
print '\n'
print Tdate print '\n'
if TransType=="04":
ClerkHash[ClerkKey+AccountKey] = arr; // is this line corrent ? i don't want to corrupt the array every time ? how should i do it ?
ClerkHash[ClerkKey+AccountKey][0]+=1
ClerkHash[ClerkKey+AccountKey][1]+= Amount
for Key in ClerkHash.keys():
if ClerkHash[key][0] >= 3 and ClerkHash[key][1] > 1000:
print Key
i want to have an hash name ClerkHash[ClerkKey+AccountKey]
which consistes of array of 2 int : first index is withdrawl num , and second is ammount
did i defined the array and hash well ?
in addition i want to sum the ammount...how can i do it ?
Here is few issue I seen so far
Amount = line[36:45]
should be
Amount = int(line[36:45])
and
ClerkHash[ClerkKey+AccountKey] = arr[0,0]
should be
ClerkHash[ClerkKey+AccountKey] = [0,0]
Check your slice intervals! The second argument is another index, NOT the number of steps to take from the first index. I guess
TransType = line[22:2]
should rather be
TransType = line[22:24]
You overwrite values if you set
ClerkHash[ClerkKey+AccountKey] = [0, 0]
each time you encounter TransType == "04". So change
if TransType=="04":
ClerkHash[ClerkKey+AccountKey] = arr[0,0]
ClerkHash[ClerkKey+AccountKey][0]+=1
ClerkHash[ClerkKey+AccountKey][1]+= Amount
to
if TransType=="04":
if not ClerkHash.has_key(ClerkKey+AccountKey):
ClerkHash[ClerkKey+AccountKey] = [1, Amount]
else:
ClerkHash[ClerkKey+AccountKey][0] += 1
ClerkHash[ClerkKey+AccountKey][1] += Amount