I am working with a pandas dataframe (the dataframe is called market_info_df):
And I have the following Python code:
market_info_df['is_and_mp'] = market_info_df['issue_status'] + market_info_df['market_phase']
no_collision_issue_status = ['000', '200', '203', '204', '300']
MARKET_STATES_DICT = {
('000', ' '): MARKET_STATES.CLOSED,
('100', ' ', 'F'): MARKET_STATES.OPENING_AUCTION,
('200', ' '): MARKET_STATES.CONTINUOUS_TRADING,
('203', ' '): MARKET_STATES.UNSCHEDULED_AUCTION,
('204', ' '): MARKET_STATES.UNSCHEDULED_AUCTION,
('100', 'B0'): MARKET_STATES.UNSCHEDULED_AUCTION,
('200', 'B1'): MARKET_STATES.CONTINUOUS_TRADING,
('400', 'C0'): MARKET_STATES.HALTED,
('400', 'C1'): MARKET_STATES.CONTINUOUS_TRADING,
('400', 'D0'): MARKET_STATES.HALTED,
('400', 'D1'): MARKET_STATES.POST_TRADE}
I am trying to write a condition such that if the is_and_mp is in the no_collision_issue_status list, OR the trading_status is not , then use the MARKET_STATES_DICT to map a new column called market_state.
Here is what I have written, but I get an error TypeError: unhashable type: 'Series':
market_info_df.loc[(market_info_df['is_and_mp'] in no_collision_issue_status) | (~market_info_df['trading_state'] == ' '),
'market_state'] = MARKET_STATES_DICT[(market_info_df['issue_status'], market_info_df['trading_state'])]
I understand what is wrong and why I am getting the error, but I am not sure how to fix it!
Use apply function on dataframe. Check for the desired condition as you have written. If true then return the value from dict else return None:
market_info_df["market_state"] = market_info_df.apply(lambda row: MARKET_STATES_DICT[(row["is_and_mp"],row["trading_status"])] if row["is_and_mp"] in no_collision_issue_status or row["trading_status"] != " " else None, axis=1)
Full example with dummy data:
market_info_df = pd.DataFrame(data=[["10","0","B0"],["20","0"," "],["40","0","D1"]], columns=["issue_status", "market_phase", "trading_status"])
market_info_df['is_and_mp'] = market_info_df['issue_status'] + market_info_df['market_phase']
no_collision_issue_status = ['000', '200', '203', '204', '300']
MARKET_STATES_DICT = {
('000', ' '): "CLOSED",
('100', ' ', 'F'): "OPENING_AUCTION",
('200', ' '): "CONTINUOUS_TRADING",
('203', ' '): "UNSCHEDULED_AUCTION",
('204', ' '): "UNSCHEDULED_AUCTION",
('100', 'B0'): "UNSCHEDULED_AUCTION",
('200', 'B1'): "CONTINUOUS_TRADING",
('400', 'C0'): "HALTED",
('400', 'C1'): "CONTINUOUS_TRADING",
('400', 'D0'): "HALTED",
('400', 'D1'): "POST_TRADE"}
market_info_df["market_state"] = market_info_df.apply(lambda row: MARKET_STATES_DICT[(row["is_and_mp"],row["trading_status"])] if row["is_and_mp"] in no_collision_issue_status or row["trading_status"] != " " else None, axis=1)
[Out]:
issue_status market_phase trading_status is_and_mp market_state
0 10 0 B0 100 UNSCHEDULED_AUCTION
1 20 0 200 CONTINUOUS_TRADING
2 40 0 D1 400 POST_TRADE
I have a nested list:
Table=[['','','','',''],
['','','','',''],
['','','','',''],
['','','','',''],
['','','','',''],
['','','','','']]
I have randomly placed some values in Table and now I want to place other things in the 2D neighbours of those values. E.g.:
Table=[['','','','',''],
['','','','',''],
['','','','',''],
['','','value','',''],
['','','','',''],
['','','','','']]
Then i want to add:
Table=[['','','','',''],
['','','','',''],
['','','1','',''],
['','1','value','1',''],
['','','1','',''],
['','','','','']]
Under is all my code i don't know why but it would accept it in any other format sorry :/
def add_nukes():
pos=j.index('nuke')
if "nuke" not in j[0]:j[pos+1]='1'
if "nuke" not in j[-1]:
j[pos-1] = "1"
board[pos][i-1]="1"
board[i+1][pos]="1"
import random
size=150
if size%2==1:
size+=1
board = [[" "]*size for i in range(size)]
bombs = 25
all_cells = ["nuke"] * bombs + [" "] * (size - bombs)
random.shuffle(all_cells)
board = [all_cells[i:i+10] for i in range(0, size, 10)]
count=0
for j in board:
for i in range(len(j)):
count+=1
if "nuke" in j[i]:
add_nukes()
elif "nuke" in j[i]:
add_nukes()
for item in board:
print item
Any value in Table is identified uniquely by its x and y coordinates, i.e. the element in the 2nd column (x == 1 because 0-indexed) and 3rd row (y == 2) is Table[y][x] == Table[2][1].
The four immediate neighbours of any cell A are the cells with x one away from A OR with y one away from A. If A is Table[y][x], then the neighbours are [Table[y - 1][x], Table[y + 1][x], Table[y, x - 1], Table[y, x + 1]].
Just like #Aurel Bílý mentioned, there are four neighbouring coordinates in which you need to add value for the specific case: [Table[y - 1][x], Table[y + 1][x], Table[y, x - 1], Table[y, x + 1]].
In order to do that, you must first ensure that these coordinates are valid and do not throw an IndexError exception. After you make sure that this coordinates are valid, you can safely add them in your table.
The code below demonstrates this:
Table=[['','','','',''],
['','','','',''],
['','','','',''],
['','','value','',''],
['','','','',''],
['','','','','']]
def isInBounds(Table,x,y):
return 0 <= x < len(Table) and 0 <= y < len(Table[0])
def addValue(Table,x,y,value):
if isInBounds(Table,x,y):
Table[x][y] = value
def addValuesAround(Table,x,y,value):
addValue(Table,x-1,y,value)
addValue(Table,x,y-1,value)
addValue(Table,x+1,y,value)
addValue(Table,x,y+1,value)
addValuesAround(Table,3,2,1)
for elem in Table:
print(elem)
This will return:
['', '', '', '', '']
['', '', '', '', '']
['', '', 1, '', '']
['', 1, 'value', 1, '']
['', '', 1, '', '']
['', '', '', '', '']
EDIT:
I think I got it, using both of our codes. Just be sure to change the syntax of the print function, because you're using Python 2.7 and I use Python 3.6:
import random
def isInBounds(Table,x,y):
return 0 <= x < len(Table) and 0 <= y < len(Table[0])
def addValue(Table,x,y,value):
if isInBounds(Table,x,y):
Table[x][y] = value
def addValuesAround(Table,x,y,value):
addValue(Table,x-1,y,value)
addValue(Table,x,y-1,value)
addValue(Table,x+1,y,value)
addValue(Table,x,y+1,value)
size=150
if size%2==1:
size+=1
board = [[" " for i in range(size)] for i in range(size)]
bombs = 25
all_cells = ["nuke"] * bombs + [" "] * (size - bombs)
random.shuffle(all_cells)
board = [all_cells[i:i+10] for i in range(0, size, 10)]
count=0
for i in range(len(board)):
for j in range(len(board[i])):
if board[i][j] == 'nuke':
addValuesAround(board,i,j,"1")
for item in board:
print(item)
This will give an instance of a board like this:
[' ', ' ', ' ', ' ', '1', ' ', '1', ' ', '1', ' ']
[' ', ' ', ' ', '1', 'nuke', '1', 'nuke', '1', 'nuke', '1']
['1', ' ', ' ', ' ', '1', ' ', '1', ' ', '1', '1']
['nuke', '1', '1', '1', 'nuke', '1', ' ', ' ', '1', 'nuke']
['1', '1', 'nuke', '1', '1', ' ', '1', ' ', ' ', '1']
[' ', ' ', '1', ' ', ' ', '1', 'nuke', '1', ' ', ' ']
[' ', ' ', '1', ' ', ' ', '1', '1', ' ', ' ', ' ']
[' ', '1', 'nuke', '1', '1', 'nuke', '1', ' ', ' ', ' ']
['1', 'nuke', '1', ' ', '1', '1', '1', ' ', '1', ' ']
[' ', '1', 'nuke', '1', 'nuke', '1', 'nuke', '1', 'nuke', '1']
['1', 'nuke', '1', ' ', '1', ' ', '1', ' ', '1', ' ']
[' ', '1', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
[' ', ' ', '1', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
[' ', '1', 'nuke', '1', ' ', '1', ' ', '1', ' ', ' ']
[' ', ' ', '1', ' ', '1', 'nuke', '1', 'nuke', '1', ' ']
Given this:
['2014\\2014-01 Jan\\2014-01-01',
'2014\\2014-01 Jan\\2014-01-02',
'2014\\2014-01 Jan\\2014-01-03',
'2014\\2014-01 Jan\\2014-01-04',
'2014\\2014-01 Jan\\2014-01-05',
'2014\\2014-01 Jan\\2014-01-06',
'2014\\2014-01 Jan\\2014-01-07',
'2014\\2014-01 Jan\\2014-01-08',
'2014\\2014-01 Jan\\2014-01-09',
'2014\\2014-01 Jan\\2014-01-10',
'2014\\2014-01 Jan\\2014-01-11',
'2014\\2014-01 Jan\\2014-01-12',
'2014\\2014-01 Jan\\2014-01-13',
'2014\\2014-01 Jan\\2014-01-14',
'2014\\2014-01 Jan\\2014-01-15',
'2014\\2014-01 Jan\\2014-01-16',
'2014\\2014-01 Jan\\2014-01-17',
'2014\\2014-01 Jan\\2014-01-18',
'2014\\2014-01 Jan\\2014-01-19',
'2014\\2014-01 Jan\\2014-01-20',
'2014\\2014-01 Jan\\2014-01-21',
'2014\\2014-01 Jan\\2014-01-22',
'2014\\2014-01 Jan\\2014-01-23',
'2014\\2014-01 Jan\\2014-01-24',
'2014\\2014-01 Jan\\2014-01-25',
'2014\\2014-01 Jan\\2014-01-26',
'2014\\2014-01 Jan\\2014-01-27',
'2014\\2014-01 Jan\\2014-01-28',
'2014\\2014-01 Jan\\2014-01-29',
'2014\\2014-01 Jan\\2014-01-30',
'2014\\2014-01 Jan\\2014-01-31',
'2014\\2014-02 Feb\\2014-02-01',
'2014\\2014-02 Feb\\2014-02-02',
'2014\\2014-02 Feb\\2014-02-03',
'2014\\2014-02 Feb\\2014-02-04',
'2014\\2014-02 Feb\\2014-02-05',
'2014\\2014-02 Feb\\2014-02-06',
'2014\\2014-02 Feb\\2014-02-07',
'2014\\2014-02 Feb\\2014-02-08',
'2014\\2014-02 Feb\\2014-02-09',
'2014\\2014-02 Feb\\2014-02-10',
'2014\\2014-02 Feb\\2014-02-11',
'2014\\2014-02 Feb\\2014-02-12',
'2014\\2014-02 Feb\\2014-02-13',
'2014\\2014-02 Feb\\2014-02-14',
'2014\\2014-02 Feb\\2014-02-15',
'2014\\2014-02 Feb\\2014-02-16',
'2014\\2014-02 Feb\\2014-02-17',
'2014\\2014-02 Feb\\2014-02-18',
'2014\\2014-02 Feb\\2014-02-19']
How do you get something like this? (Solution 1: delimiter based, with user definable delimiter)
['2014\\2014-01 Jan\\2014-01-01',
' \\2014-01-02',
' \\2014-01-03',
' \\2014-01-04',
' \\2014-01-05',
' \\2014-01-06',
' \\2014-01-07',
' \\2014-01-08',
' \\2014-01-09',
' \\2014-01-10',
' \\2014-01-11',
' \\2014-01-12',
' \\2014-01-13',
' \\2014-01-14',
' \\2014-01-15',
' \\2014-01-16',
' \\2014-01-17',
' \\2014-01-18',
' \\2014-01-19',
' \\2014-01-20',
' \\2014-01-21',
' \\2014-01-22',
' \\2014-01-23',
' \\2014-01-24',
' \\2014-01-25',
' \\2014-01-26',
' \\2014-01-27',
' \\2014-01-28',
' \\2014-01-29',
' \\2014-01-30',
' \\2014-01-31',
' \\2014-02 Feb\\2014-02-01',
' \\2014-02-02',
' \\2014-02-03',
' \\2014-02-04',
' \\2014-02-05',
' \\2014-02-06',
' \\2014-02-07',
' \\2014-02-08',
' \\2014-02-09',
' \\2014-02-10',
' \\2014-02-11',
' \\2014-02-12',
' \\2014-02-13',
' \\2014-02-14',
' \\2014-02-15',
' \\2014-02-16',
' \\2014-02-17',
' \\2014-02-18',
' \\2014-02-19']
I encounter this situation quite often, basically I have a list of strings that I want to make it easier to process visually by removing redundant matching elements at the beginning of the string. Now I know this is what a TREE output is for normal folder traversal, but these are not real folders, but just strings in a list.
Ideally the function would accept a heirarchy delimiter or just do on a character basis (seperator=None).
def printheirarchy(data,seperator=","):
The output for a character level hierarchy would be like following: (Solution 2: character by character)
['2014\\2014-01 Jan\\2014-01-01',
' 2',
' 3',
' 4',
' 5',
' 6',
' 7',
' 8',
' 9',
' 10',
' 1',
' 2',
' 3',
' 4',
' 5',
' 6',
' 7',
' 8',
' 9',
' 20',
' 1',
' 2',
' 3',
' 4',
' 5',
' 6',
' 7',
' 8',
' 9',
' 30',
' 1',
' 2 Feb\\2014-02-01',
' 2',
' 3',
' 4',
' 5',
' 6',
' 7',
' 8',
' 9',
' 10',
' 1',
' 2',
' 3',
' 4',
' 5',
' 6',
' 7',
' 8',
' 9']
This seems less useful in this example but is very evident when analyzing urls, logs ...etc. Ideally you would just grey out the similar parts, rather than remove them, but I don't even know how to begin with that. (or conversely, bold the differences). Basically you are comparing each element with the previous element and highlighting differences & suppressing similarities.
I've searched and found many options that are close to this, but not exactly this. os.path.commonprefix is an example. Maybe difflib?
The value is in reducing visual clutter when examining lists of items.
Seems like you want to reinvent a http://en.wikipedia.org/wiki/Radix_tree
Anyhow, here's a simple generator:
def grouped(iterable):
prefix = None
for i in iterable:
pre, suf = i[:16], i[16:]
if pre != prefix:
prefix = pre
yield pre + suf
else:
yield " " * 16 + suf
Nice question. How about this small solution:
def commonPrefix(a, b):
i = 0
while i < len(a) and i < len(b) and a[i] == b[i]:
i += 1
return i
def eachWithPrefix(v):
p = ''
for x in v:
yield commonPrefix(p, x), x
p = x
Now you can choose what you want:
list(eachWithPrefix(v))
will return a list of your values and each will state how many characters are equal to the former line, so
print '\n'.join(' '*p + x[p:] for p, x in eachWithPrefix(v))
Will print the second solution you proposed.
print '\n'.join('\t' * p + '\\'.join(x[p:]) for p, x in eachWithPrefix(x.split('\\') for x in v))
on the other hand will perform the same action for the delimiter \ and replace the to-be-omitted parts with tab stops. This is not quite the format you proposed in your first output example but I guess you get the point.
Try:
print '\n'.join('\\'.join([ s if i >= p else ' '*len(s) for i, s in enumerate(x) ]) for p, x in eachWithPrefix(x.split('\\') for x in v))
This will replace the equal parts with like-sized just-space strings. The output will still contain the delimiters, though, but maybe that's even nicer:
2014\2014-01 Jan\2014-01-01
\ \2014-01-02
\ \2014-01-03
\ \2014-01-04
\ \2014-01-05
...
\ \2014-01-31
\2014-02 Feb\2014-02-01
\ \2014-02-02
\ \2014-02-03
...
To remove also those you can use this approach:
print '\n'.join(' ' * len('\\'.join(x[:p])) + '\\'.join(x)[len('\\'.join(x[:p])):] for p, x in eachWithPrefix(x.split('\\') for x in v))
But this now contains some code doubling, so maybe an iterative loop would be nicer here:
for p, x in eachWithPrefix(x.split('\\') for x in v):
s = '\\'.join(x)
c = '\\'.join(x[:p])
print ' '*len(c) + s[len(c):]
Or as an easy-to-use generator:
def heirarchy(data, separator=","):
for p, x in eachWithPrefix(x.split(separator) if separator else list(x) for x in data):
s = separator.join(x)
c = separator.join(x[:p])
yield ' '*len(c) + s[len(c):]
So now heirarchy(data, separator='\\') creates exactly your expected output.
from difflib import SequenceMatcher
def remove_redundant_prefixes(it):
"""
remove_redundant_prefixes(it) -> iterable (generator)
Iterate through a list of strings, removing successive common prefixes.
"""
prev_line = ''
for line in sorted(it):
sm = SequenceMatcher(a=prev_line, b=line)
prev_line = line
# Returns 3 element tuple, last element is the size of the match.
match_size = sm.get_matching_blocks()[0][2]
# No match == no prefix, don't prune the string.
if match_size == 0:
yield line
else:
# Prune per the match
yield line.replace(line[:match_size], ' ' * match_size, 1)
Ok inspired by the commonprefix answers from this question I played it with it for a bit and inspiration came when I realized I could send a list with just two elements each time!
Here's my code, this handles only the character by character case, and I'm not sure how good this is (i suspect not very much! as a lot of unnecessary copying occurs). But I was able to successfully reproduce the 3rd output from my question. This still leaves the other part unresolved.
def printheirarchy(data,seperator=","):
if len(data) < 2:
pprint(data)
return
newdata = []
newdata.append(data[0])
for i in range(1,len(data)):
prefix = os.path.commonprefix(data[i-1:i+1])
newdata.append(data[i].replace(prefix," "*len(prefix),1))
pprint(newdata)