I'm using python-chess and I would like to know what is a good way to track movement of let say a black King throughout a single match recorded in pgn format using python-chess. Essentially creating a string of every position it achieved.
Sample code to record black king squares, moves epd etc.
Code
import chess.pgn
# Save position of black king in each game.
data = []
pgnfile = "mygame.pgn"
gcnt = 0
with open(pgnfile) as pgn:
while True:
game = chess.pgn.read_game(pgn)
if game is None:
break
gcnt += 1
sqnames = []
sqvalues = []
epds = []
sanmoves = []
ucimoves = []
# first entry of moves is null move
sanmoves.append(str(chess.Move.null()))
ucimoves.append(str(chess.Move.null()))
# Save the first location of king.
b = game.board()
sqnames.append(chess.square_name(b.king(chess.BLACK)))
sqvalues.append(b.king(chess.BLACK))
epds.append(b.epd())
for node in game.mainline(): # parse nodes in this game
board = node.board()
m = node.move
tosq = m.to_square
p = board.piece_at(tosq)
if p.piece_type == chess.KING and p.color == chess.BLACK:
sqnames.append(chess.square_name(tosq))
sqvalues.append(tosq)
epds.append(board.epd())
sanmoves.append(node.parent.board().san(m))
ucimoves.append(node.parent.board().uci(m))
data.append({'game': gcnt, 'sqnames': sqnames, 'sqvalues': sqvalues, 'epd': epds, 'sanmoves': sanmoves, 'ucimoves': ucimoves})
if gcnt >= 10: # sample limit
break
# Print tracks of king per game.
for d in data:
print(d)
Output
{'game': 1, 'sqnames': ['e8', 'g8'], 'sqvalues': [60, 62], 'epd': ['rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq -', 'r2q1rk1/p2bbppp/2pppn2/6B1/3NP3/2NQ4/PPP2PPP/2KR3R w - -'], 'sanmoves': ['0000', 'O-O'], 'ucimoves': ['0000', 'e8g8']}
{'game': 2, 'sqnames': ['e8', 'd7', 'e8', 'f8'], 'sqvalues': [60, 51, 60, 61], 'epd': ['rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq -', 'rn5r/pp1k1p1p/4bp2/1N2p3/1b6/1N3P2/PPP3PP/R2K1B1R w - -', 'rnr1k3/pp2bp1p/4b3/1N2pp2/8/PN1B1P2/1PP2KPP/R6R w - -', 'r1r2k2/pp1nbp1p/4b3/4pp2/8/PNNB1P2/1PP2KPP/R3R3 w - -'], 'sanmoves': ['0000', 'Kd7', 'Ke8', 'Kf8'], 'ucimoves': ['0000', 'e8d7', 'd7e8', 'e8f8']}
Related
I have an old string and a modified one. Then also values from old string in dictionary format. I am trying to check if the values in dictionary is still present as such in new string. If yes nothing happens. If there is a change in the value, the value in the dictionary is replace by the modified value. If the value in dictionary is not present in new string, then update the value in dictionary by None.
Code
import re
db_tag_old = {"art":"art", "organizer":"james", "month":"December", "season":"summer"}
old = 'The art is performed by james. _______ Season is summer _____ time. It is December.'
new = 'The art is performed by ______ Mathew. Season is ______ autmn time. __ __ _________'
db_tag_new = {}
final_db_tag = {}
symbol = '_'
needle = f'{re.escape(symbol)}+'
position = [(match.start(),match.end()) for match in re.finditer(needle, old)]
for key,value in db_tag_old.items():
position_old = [(match.start(),match.end()) for match in re.finditer(value.lower(), old)]
position_new = [(match.start(),match.end()) for match in re.finditer(value.lower(), new)]
if position_old == position_new and [] not in (position_old, position_new)::
db_tag_new.update({key:value})
continue
else:
new_value = new[position[0][0]:position[0][1]]
db_tag_new.update({key:new_value})
final_db_tag.update({"old":db_tag_old,"new":db_tag_new})
print(final_db_tag)
Output Obtained
{'old': {'art': 'art', 'organizer': 'james', 'month': 'December', 'season': 'summer'}, 'new': {'art': 'art', 'organizer': 'Mathew.', 'month': 'Mathew.', 'season': 'Mathew.'}}
Here in the dictionary key "new", month and season are wring values.
Expected Output
{'old': {'art': 'art', 'organizer': 'james', 'month': 'December', 'season': 'summer'}, 'new': {'art': 'art', 'organizer': 'Mathew.', 'month': 'None', 'season': 'autmn'}}
How this can be corrected
It's not really clear to me, what the rule is to replace old with new text. The following code produces the wanted result, but I'm not sure whether this approach is as universal as needed:
import re
db_tag_old = {"art":"art", "organizer":"james.", "month":"December", "season":"summer"}
old = 'The art is performed by james. _______ Season is summer _____ time. It is December.'
new = 'The art is performed by ______ Mathew. Season is ______ autmn time. __ __ _________'
db_tag_new = {}
# pre-definition for dict-entries we won't find:
for key, val in db_tag_old.items():
db_tag_new[key] = "None"
owords = old.split();
nwords = new.split();
for (i, nw) in enumerate(nwords):
# the "art"-case:
for key, ow in db_tag_old.items():
if nw == ow:
db_tag_new[key] = ow
# "organizer" / "season" cases:
if re.match(r'^_+$', nw):
for key, ow in db_tag_old.items():
if ow == owords[i] and re.match(r'^_+$', owords[i+1]):
db_tag_new[key] = nwords[i+1]
print("old: ", db_tag_old)
print("new: ", db_tag_new)
I am a beginner in programming and I'm working on the projects in Automate the Boring Stuff with Python, In the book there is a project to create a sandwich, then return the total cost. I want to add to my program by providing an itemized receipt. For example, if I put in an order for 1 sandwich with wheat and chicken and 3 sandwiches with white and turkey, the receipt should show something like this (I will format it better when I figure it out):
1 sandwich ---3.5
wheat, chicken
3 sandwich ---10.5.
white, turkey
Total --- 14.00
My challenge is storing the different sandwich orders into different variables and printing them out at the end.
My code below:
menu = {
'wheat': 1.5, 'white': 1, 'sourdough': 2,
'chicken': 2, 'turkey': 2.5, 'ham': 2, 'tofu': 3,
'cheddar': 0.5, 'mozzarella': 0.25, 'american': 0.5,
'mayo': 0.25, 'mustard': 0.25, 'lettuce': 0.5, 'tomato': 0.5
}
total = 0.0
subtotal = 0.0
while True:
order = {}
print('What bread would you like?')
order['bread'] = pyip.inputChoice(['wheat', 'white', 'sourdough'])
print('How about for your protein?')
order['protein'] = pyip.inputChoice(['chicken', 'turkey', 'ham', 'tofu'])
wantCheese = pyip.inputYesNo('Would you like cheese on the sandwich?')
if wantCheese == 'yes':
order['cheese'] = pyip.inputChoice(['cheddar', 'mozzarella', 'american'])
wantToppings = pyip.inputYesNo('Would you like to add extra toppings?')
if wantToppings == 'yes':
while True:
order['side'] = pyip.inputChoice(['mayo', 'mustard', 'lettuce', 'tomato'])
anotherTopping = pyip.inputYesNo('Would you like another topping?')
if anotherTopping == 'no':
break
orderNumber = pyip.inputInt('How many of those sandwiches would you like? ', min = 1)
for choice in order:
if order[choice] in menu.keys():
subtotal += menu[order[choice]]
total *= orderNumber
total += subtotal
subtotal = 0
anotherOrder = pyip.inputYesNo('Would you like to order another sandwich?')
if anotherOrder == 'no':
break
print(total)
Adjust the following as you see fit. FYI, while coding this up I had "let's get this to work" in mind as opposed to "let's make this as efficient as possible". Moreover, you should format the receipt however you like.
Importantly, I created a list called orders just before the while that will be used to store orders. The form of the elements of orders will be 3-tuples where the first element of the 3-tuple records orderNumber, the third element records the subtotal, and the second element is an order dictionary, just as in your original code, except order["side"] will be a list as this allows for multiple additional toppings to be added. For the sample output below, orders is
[(2, {'bread': 'wheat', 'protein': 'chicken', 'cheese': 'cheddar', 'side': ['mustard', 'lettuce']}, 9.5), (1, {'bread': 'sourdough', 'protein': 'turkey', 'side': []}, 4.5)]
As you can see, there are 2 orders of 'wheat', 'chicken', 'cheddar', 'mustard', 'lettuce' (subtotal 9.5) and 1 order of 'sourdough', 'turkey' (subtotal 4.5).
I hope this helps. Any questions please let me know.
import pyinputplus as pyip
menu = {'wheat': 1.5, 'white': 1, 'sourdough': 2,
'chicken': 2, 'turkey': 2.5, 'ham': 2, 'tofu': 3,
'cheddar': 0.5, 'mozzarella': 0.25, 'american': 0.5,
'mayo': 0.25, 'mustard': 0.25, 'lettuce': 0.5, 'tomato': 0.5
}
orders = []
while True:
order = {}
# choose bread
print("What bread would you like?")
order['bread'] = pyip.inputChoice(['wheat', 'white', 'sourdough'])
# choose protein
print("How about for your protein?")
order['protein'] = pyip.inputChoice(['chicken', 'turkey', 'ham', 'tofu'])
# choose cheese
wantCheese = pyip.inputYesNo("Would you like cheese on the sandwich?")
if wantCheese == 'yes':
order['cheese'] = pyip.inputChoice(['cheddar', 'mozzarella', 'american'])
# choose extra toppings
order["side"] = []
wantToppings = pyip.inputYesNo("Would you like to add extra toppings?")
if wantToppings == 'yes':
while True:
order["side"].append(pyip.inputChoice(
['mayo', 'mustard', 'lettuce', 'tomato']))
anotherTopping = pyip.inputYesNo("Would you like another topping?")
if anotherTopping == 'no':
break
# order number
orderNumber = pyip.inputInt(
"How many of those sandwiches would you like?", min = 1)
# subtotal
subtotal = sum(menu[order[key]] for key in order if key != 'side')
subtotal += sum(menu[j] for j in order['side'])
subtotal *= orderNumber
# add 3-tuple to orders list
orders.append((orderNumber, order, subtotal))
# another order?
anotherOrder = pyip.inputYesNo("Would you like to order another sandwich?")
if anotherOrder == 'no':
break
# add subtotals to form total
total = sum(order[2] for order in orders)
# print orders (for programmer use)
print(f"\nOrders: {orders}")
# print receipt
print(f"\nReceipt\n")
for order in orders:
print(f"{order[0]} sandwich ---{order[2]}")
print(" ", end = "")
for key in order[1]:
if isinstance(order[1][key], list):
for x in order[1][key]:
print(x, end = ", ")
else:
print(order[1][key], end = ", ")
print("\n")
print(f"Total --- {total}")
Sample output:
Receipt
2 sandwich ---9.5
wheat, chicken, cheddar, mustard, lettuce,
1 sandwich ---4.5
sourdough, turkey,
Total --- 14.0
I have a rule-based code that prints out the Noun which is followed by a verb in a sentence
for text_id, text in enumerate(news_df['news_title'].values):
# Remove the comma and full stops
text = text.replace(',', '').replace('.', '').replace('-','')
sentence_tags = POSTAG(text.lower())
print(text)
# Sentences parts
for index, part in enumerate(sentence_tags):
try:
if 'NN' in part[1] and 'VB' in sentence_tags[index + 1][1]:
print(">", part[0])
break
elif 'NN' in part[1] and 'NN' in sentence_tags[index + 1][1] and 'VB' in sentence_tags[index + 2][1]:
print(">", part[0], sentence_tags[index + 1][0])
break
elif 'NN' in part[1] and 'NN' in sentence_tags[index + 1][1] and 'NN' in sentence_tags[index + 2][1] and 'VB' in sentence_tags[index + 3][1]:
print(">", part[0], sentence_tags[index + 1][0], sentence_tags[index + 2][0])
break
except:
pass
print()
The output of a sentence following this rule:
high school football players charged after video surfaces showing hazing
> school football players
trump accuser pushes new york to pass the adult survivors act plans to sue
>trump accuser
Is there a way to also print out the position of that Noun that was printed due to the rule?
for example :
>trump accuser , [0,5,"NN"] , [6,13,"VB"]
I changed the script and separated the state machine segment. The most serious problem with this program IMO is it's just returning the first pattern (you can fix it quickly).
import pandas as pd
import nltk
POSTAG = nltk.pos_tag
df = pd.DataFrame({'text':['high school football players charged after video surfaces showing hazing', 'trump accuser pushes new york to pass the adult survivors act plans to sue']})
for text_id, text in enumerate(df['text'].values):
# Remove the comma and full stops
text = text.replace(',', '').replace('.', '').replace('-','')
tokens = nltk.word_tokenize(text.lower())
sentence_tags = POSTAG(tokens)
words = [item[0] for item in sentence_tags]
start_end = []
temp = 0
for word in words:
start_end.append([temp, temp+len(word)])
temp+= (len(word)+1)
tags = [item[1] for item in sentence_tags]
words_to_print = []
tags_to_print = []
start_end_to_print = []
# the state machine
verb = False
first_noun = False
second_noun = False
third_noun = False
for w, t, se in zip(words, tags, start_end):
if t.startswith('NN'):
words_to_print.append(w)
tags_to_print.append(t)
start_end_to_print.append(se)
first_noun = True
elif t.startswith('NN') and first_noun:
words_to_print.append(w)
tags_to_print.append(t)
start_end_to_print.append(se)
second_noun = True
elif t.startswith('NN') and second_noun:
words_to_print.append(w)
tags_to_print.append(t)
start_end_to_print.append(se)
third_noun = True
elif t.startswith('VB') and (first_noun or second_noun or third_noun):
break
elif (first_noun or second_noun or third_noun):
words_to_print = []
tags_to_print = []
start_end_to_print = []
verb = False
first_noun, second_noun, third_noun = False, False, False
print('> ', ' '.join(words_to_print), ' '.join([str(item[0])+' '+str(item[1]) for item in zip(start_end_to_print, tags_to_print)]))
output:
> school football players [5, 11] NN [12, 20] NN [21, 28] NNS
> trump accuser [0, 5] NN [6, 13] NN
I want to find stand-alone or successively connected nouns in a text. I put together below code, but it is neither efficient nor pythonic. Does anybody have a more pythonic way of finding these nouns with spaCy?
Below code builds a dict with all tokens and then runs through them to find stand-alone or connected PROPN or NOUN until the for-loop runs out of range. It returns a list of the collected items.
def extract_unnamed_ents(doc):
"""Takes a string and returns a list of all succesively connected nouns or pronouns"""
nlp_doc = nlp(doc)
token_list = []
for token in nlp_doc:
token_dict = {}
token_dict['lemma'] = token.lemma_
token_dict['pos'] = token.pos_
token_dict['tag'] = token.tag_
token_list.append(token_dict)
ents = []
k = 0
for i in range(len(token_list)):
try:
if token_list[k]['pos'] == 'PROPN' or token_list[k]['pos'] == 'NOUN':
ent = token_list[k]['lemma']
if token_list[k+1]['pos'] == 'PROPN' or token_list[k+1]['pos'] == 'NOUN':
ent = ent + ' ' + token_list[k+1]['lemma']
k += 1
if token_list[k+1]['pos'] == 'PROPN' or token_list[k+1]['pos'] == 'NOUN':
ent = ent + ' ' + token_list[k+1]['lemma']
k += 1
if token_list[k+1]['pos'] == 'PROPN' or token_list[k+1]['pos'] == 'NOUN':
ent = ent + ' ' + token_list[k+1]['lemma']
k += 1
if token_list[k+1]['pos'] == 'PROPN' or token_list[k+1]['pos'] == 'NOUN':
ent = ent + ' ' + token_list[k+1]['lemma']
k += 1
if ent not in ents:
ents.append(ent)
except:
pass
k += 1
return ents
Test:
extract_unnamed_ents('Chancellor Angela Merkel and some of her ministers will discuss at a cabinet '
"retreat next week ways to avert driving bans in major cities after Germany's "
'top administrative court in February allowed local authorities to bar '
'heavily polluting diesel cars.')
Out:
['Chancellor Angela Merkel',
'minister',
'cabinet retreat',
'week way',
'ban',
'city',
'Germany',
'court',
'February',
'authority',
'diesel car']
spacy has a way of doing this but I'm not sure it is giving you exactly what you are after
import spacy
text = """Chancellor Angela Merkel and some of her ministers will discuss
at a cabinet retreat next week ways to avert driving bans in
major cities after Germany's top administrative court
in February allowed local authorities to bar heavily
polluting diesel cars.
""".replace('\n', ' ')
nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
print([i.text for i in doc.noun_chunks])
gives
['Chancellor Angela Merkel', 'her ministers', 'a cabinet retreat', 'ways', 'driving bans', 'major cities', "Germany's top administrative court", 'February', 'local authorities', 'heavily polluting diesel cars']
Here, however the i.lemma_ line doesn't really give you what you want (I think this might be fixed by this recent PR).
Since it isn't quite what you are after you could use itertools.groupby like so
import itertools
out = []
for i, j in itertools.groupby(doc, key=lambda i: i.pos_):
if i not in ("PROPN", "NOUN"):
continue
out.append(' '.join(k.lemma_ for k in j))
print(out)
gives
['Chancellor Angela Merkel', 'minister', 'cabinet retreat', 'week way', 'ban', 'city', 'Germany', 'court', 'February', 'authority', 'diesel car']
This should give you exactly the same output as your function (the output is slightly different here but I believe this is due to different spacy versions).
If you are feeling really adventurous you could use a list comprehension
out = [' '.join(k.lemma_ for k in j)
for i, j in itertools.groupby(doc, key=lambda i: i.pos_)
if i in ("PROPN", "NOUN")]
Note I see slightly different results with different spacy versions. The output above is from spacy-2.1.8
for word in list6:
if word = "TRUMP":
So, I have a list of every word in a debate transcript. When Trump speaks, it starts with "TRUMP". I need to take his words and put them into a seperate list. If the word in list6 is "TRUMP", then I need to put everything into a list until it says another person's name. He speaks more than once.
I just need help completing this loop.
list6 = ['TRUMP','I','am','good', 'HILLARY','I','am','good','too','TRUMP','But','How?']
person_words = {'TRUMP':[], 'HILLARY':[]}
person_names = person_words.keys()
one_person_onetime_words = []
for word in list6:
if word in person_names:
if len(one_person_onetime_words):
person_words[this_person].append(one_person_onetime_words)
one_person_onetime_words = []
this_person = word
else:
one_person_onetime_words.append(word)
person_words[this_person].append(one_person_onetime_words)
print person_words
Gives
{'HILLARY': [['I', 'am', 'good', 'too']], 'TRUMP': [['I', 'am', 'good'], ['But', 'How?']]}
So, this in a single shot gives all the different talks by all the persons.
As mentioned by you in the comments to your question, if you want to get one person's words only you can use the following:
from copy import copy
list6 = ['TRUMP','I','am','good', 'HILLARY','I','am','good','too','TRUMP','But','How?']
person_words = []
all_persons = ['TRUMP', 'HILLARY']
person_looking_for = 'TRUMP'
filter_out_persons = copy(all_persons)
filter_out_persons.remove(person_looking_for)
person_onetime_words = []
capture_words = False
for word in list6:
if word == person_looking_for:
capture_words = True
if len(person_onetime_words):
person_words.append(person_onetime_words)
person_onetime_words = []
elif word not in filter_out_persons and capture_words:
person_onetime_words.append(word)
else:
capture_words = False
person_words.append(person_onetime_words)
print "{}'s words".format(person_looking_for)
print person_words
That gives
TRUMP's words
[['I', 'am', 'good'], ['But', 'How?']]
And, the following will give a dictionary with words as keys and the value will be a dictionary again with frequency of each person for that word.
import pprint
list6 = ['TRUMP','I','am','good', 'HILLARY','I','am','good','too','TRUMP','But','How?']
person_names = ['TRUMP','HILLARY']
word_frequency = {}
for word in list6:
if word in person_names:
person = word
else:
word = word.lower()
if word in word_frequency:
if person in word_frequency[word]:
word_frequency[word][person] += 1
else:
word_frequency[word][person] = 1
else:
word_frequency[word] = {person: 1}
pprint.pprint(word_frequency)
Gives
{'am': {'HILLARY': 1, 'TRUMP': 1},
'but': {'TRUMP': 1},
'good': {'HILLARY': 1, 'TRUMP': 1},
'how?': {'TRUMP': 1},
'i': {'HILLARY': 1, 'TRUMP': 1},
'too': {'HILLARY': 1}}