How to set print() to a docx python - python

I have a code, that get selenium information and i need to print this information to the docx, but by template. Here i get information with help of print() (to set some part )
Stuyvesant High School
General Information
School Name:
Stuyvesant High School
Principal:
Mr. Eric Contreras
Principal’s E-mail:
ECONTRE#SCHOOLS.NYC.GOV
Type:
Regular school
Grade Span:
9-12
Address:
345 Chambers Street, New York, NY 10282
I printing this information in console, but i need print this information to the docx.
Here the part of code, where i print:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import openpyxl
import docx
from docx.shared import Pt
List = []
wb = openpyxl.load_workbook('D:\INSPR\Rating_100_schools\Top-100.xlsx')
sheet = wb['RI']
tuple(sheet['A1':'A100']) # Get all cells from A1 to A100.
for rowOfCellObjects in sheet['A1':'A100']:
for cellObj in rowOfCellObjects:
List.append(cellObj.value)
School_list_result = []
State = sheet.title
driver = webdriver.Chrome(executable_path='D:\chromedriver') #any path
def check_xpath(xpath):
try:
element = driver.find_element_by_xpath(xpath)
School_list_result.append(element.text)
except NoSuchElementException:
School_list_result.append("No data.")
def check_text(partial_link_text):
try:
element_text = driver.find_element_by_partial_link_text(partial_link_text)
School_list_result.append(element_text.get_attribute("href"))
except NoSuchElementException:
School_list_result.append("No data.")
def check_click(clicker):
try:
element_click = driver.find_element_by_partial_link_text(clicker)
element_click.click()
except NoSuchElementException:
print("No click.")
def get_url(url, _xpath, send_keys):
driver.get(url)
try:
_element = driver.find_element_by_xpath(_xpath)
_element.clear()
driver.implicitly_wait(10)
_element.send_keys(schools, send_keys)
_element.send_keys(u'\ue007')
driver.implicitly_wait(10)
except NoSuchElementException:
print("No data.")
for schools in List[98:100]:
#-----------------------------------------GREAT SCHOOLS-------------------------------------------
get_url("https://www.google.com/", '//*[#id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input'," " + State + " greatschools")
_clicker = driver.find_element_by_xpath('//*[#id="rso"]/div[1]/div/div[1]/a/h3').click()
check_xpath('//*[#id="hero"]/div/div[1]/h1') #School Name
check_xpath('/html/body/div[6]/div[8]/div/div[1]/div/div/div[2]/div[1]/div[2]/span[1]') #Principal
check_text('Principal email') #Principal’s E-mail
check_xpath('//*[#id="hero"]/div/div[2]/div[2]/div[3]/div[2]') #Grade Span
check_xpath('//*[#id="hero"]/div/div[2]/div[1]/div[1]/div[1]/div[1]/a/div/span[2]') #Address
check_xpath('/html/body/div[6]/div[8]/div/div[1]/div/div/div[2]/div[2]/span/a') #Phone
check_text('Website') #Website
check_xpath('//*[#id="hero"]/div/div[2]/div[1]/div[1]/div[1]/div[2]/a') #Associations/Communities
check_xpath('//*[#id="hero"]/div/div[2]/div[2]/div[1]/div/a/div[1]/div') #GreatSchools Rating
check_xpath('//*[#id="Students"]/div/div[2]/div[1]/div[2]') #Enrollment by Race/Ethnicity
#-----------------------------------------NCES-------------------------------------------
driver.implicitly_wait(10)
get_url("https://nces.ed.gov/search/index.asp?q=&btnG=Search#gsc.tab=0", '//*[#id="qt"]', " " + State)
check_click('Search for Public Schools - ')
driver.implicitly_wait(10)
check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[4]/td/table/tbody/tr[7]/td[1]/font[2]') #School type
check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[4]/td/table/tbody/tr[7]/td[3]/font') #Charter
check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[12]/td/table/tbody/tr[3]/td/table/tbody/tr[2]/td/table/tbody')
#Enrollment by Gender
check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[12]/td/table/tbody/tr[1]/td/table/tbody/tr[2]') #Enrollment by Grade
#-----------------------------------------USNEWS-------------------------------------------
driver.implicitly_wait(10)
url = "https://www.usnews.com/education/best-high-schools/new-york/rankings"
driver.get(url)
check_click(schools)
driver.implicitly_wait(10)
check_xpath('//*[#id="app"]/div/div/div/div[1]/div/div/div[2]/div[1]/div[2]/p[3]') #U.S.News Rankings
#-----------------------------------------PUBLIC REVIEW-------------------------------------------
driver.implicitly_wait(10)
get_url("https://www.google.com/", '//*[#id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input', " " + State + " publicschoolreview")
clicker = driver.find_element_by_partial_link_text('(2020)').click()
driver.implicitly_wait(10)
check_xpath('//*[#id="quick_stats"]/div/div[2]/ul/li[2]/strong') #Total # Students
check_xpath('//*[#id="total_teachers_data_row"]/td[2]') #Full-Time Teachers
check_xpath('//*[#id="quick_stats"]/div/div[2]/ul/li[3]/strong') #Student/Teacher Ratio
#-----------------------------------------PRINT INFOFMATION-------------------------------------------
print(" ---------------------------------------------------------------"+"\n",
" \033[1m", schools,"\033[0m"+"\n",
" ---------------------------------------------------------------"+"\n",
" \033[1mGeneral Information\033[0m "+"\n",
"\033[1mSchool Name:\n\033[0m",School_list_result[0]+"\n",
"\033[1mPrincipal:\n\033[0m",School_list_result[1]+"\n",
"\033[1mPrincipal’s E-mail:\n\033[0m",School_list_result[2]+"\n",
"\033[1mType:\n\033[0m",School_list_result[10]+"\n",
"\033[1mGrade Span:\n\033[0m",School_list_result[3]+"\n",
"\033[1mAddress:\n\033[0m",School_list_result[4]+"\n",
"\033[1mPhone:\n\033[0m",School_list_result[5]+"\n",
"\033[1mWebsite:\n\033[0m",School_list_result[6]+"\n",
"\033[1mAssociations/Communities:\n\033[0m",School_list_result[7]+"\n",
"\033[1mGreatSchools Summary Rating:\n\033[0m",School_list_result[8]+"\n",
"\033[1mU.S.News Rankings:\n\033[0m",School_list_result[14]+"\n",
" \033[1mSchool Details\033[0m"+"\n",
"\033[1mTotal # Students:\n\033[0m",School_list_result[15]+"\n",
"\033[1mFull-Time Teachers:\n\033[0m",School_list_result[16]+"\n",
"\033[1mStudent/Teacher Ratio:\n\033[0m",School_list_result[17]+"\n",
"\033[1mCharter:\n\033[0m",School_list_result[11]+"\n",
"\033[1mMagnet: \n\033[0m","No""\n",
" \033[1mEnrollment Data\033[0m"+"\n",
"\033[1mEnrollment by Race/Ethnicity: \n\033[0m",School_list_result[9]+"\n",
"\033[1mEnrollment by Gender: \n\033[0m",School_list_result[12]+"\n",
"\033[1mEnrollment by Grade: \n\033[0m",School_list_result[13]+"\n",
()
)
print()
School_list_result.clear()
What i need: print this result not into console by template, but into a docx by template.
And one more: if you know how to not using indexing (like: School_list_result[0]), please tell me.

I assume you are on a windows operating system just as I do, and know how to download python packages:
Install docx and python-docx modules (they are different, make sure you have installed both)
use the following code:
School_list_result = [
"Stuyvesant High School",
"Mr. Eric Contreras",
"ECONTRE#SCHOOLS.NYC.GOV",
"Regular school",
"9-12",
"345 Chambers Street, New York, NY 10282",
]
headers = [
"School Name: ",
"Principal: ",
"Principal's Email: ",
"Type: ",
"Grade Span: ",
"Address: ",
]
def print_into_one_doc():
import os
from docx import Document
from docx.shared import RGBColor
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
# after you create a docx file, make sure you double click to open it, write some stuff, press ctrl + s, delete what you have written, press ctrl + s, close the document
# delete what you have written. Otherwise python-docx reports a Package Not Find Error.
p = input('hold shift key right click, copy and paste the file path of docx here: ')
if p[0] == '"' or p[0] == "'":
# validate path
p = p[1:-1]
p = os.path.abspath(p)
doc = Document(p)
h = doc.add_paragraph()
# make title align to center
h.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
r = h.add_run(School_list_result[0])
# set title color
r.font.color.rgb = RGBColor(54, 95, 145)
# set title size
r.font.size = Pt(36)
doc.add_paragraph('\n')
su = doc.add_paragraph()
ru = su.add_run('General Information')
ru.font.size = Pt(30)
for i, d in enumerate(headers):
sp = doc.add_paragraph()
rp = sp.add_run(headers[i])
rp.bold = True
rp.font.size = Pt(23)
sm = doc.add_paragraph()
rm = sm.add_run(School_list_result[i])
rm.font.size = Pt(22)
rm.italic = True
doc.add_page_break()
doc.save(p)
print_into_one_doc()
If you have a list, which contains School_list_result, iterate it through, here is an example:
List_of_school_list_result = [
[
"Stuyvesant High School",
"Mr. Eric Contreras",
"ECONTRE#SCHOOLS.NYC.GOV",
"Regular school",
"9-12",
"345 Chambers Street, New York, NY 10282",
],
[
"Great Lake College",
"Mr. Jason Madunic",
"MADUNIC#SCHOOLS.VIC.GOV",
"Public school",
"6-12",
"167A High Street, Melbourne, VIC 3228",
],
]
headers = [
"School Name: ",
"Principal: ",
"Principal's Email: ",
"Type: ",
"Grade Span: ",
"Address: ",
]
def print_all_into_one_doc():
import os
from docx import Document
from docx.shared import RGBColor
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
# after you create a new docx file, double click to open it, write some stuff, press ctrl + s, delete what you have written, press ctrl + s, close the document
# Otherwise python-docx reports a Package Note Find Error.
p = input('hold shift key right click, copy and paste the file path of docx here: ')
if p[0] == '"' or p[0] == "'":
# validate path
p = p[1:-1]
p = os.path.abspath(p)
doc = Document(p)
# iterate List of all school
for j in List_of_school_list_result:
h = doc.add_paragraph()
# make title align to center
h.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
r = h.add_run(j[0])
# set title color: you can adjust any color of title here
r.font.color.rgb = RGBColor(54, 95, 145)
# set title size
r.font.size = Pt(36)
doc.add_paragraph('\n')
su = doc.add_paragraph()
ru = su.add_run('General Information')
ru.font.size = Pt(30)
for i, d in enumerate(headers):
sp = doc.add_paragraph()
rp = sp.add_run(headers[i])
rp.bold = True
rp.font.size = Pt(23)
sm = doc.add_paragraph()
rm = sm.add_run(j[i])
rm.font.size = Pt(22)
rm.italic = True
doc.add_page_break()
doc.save(p)
print_all_into_one_doc()
Let's make it simple, what you need to do is:
create a list named List_of_school_list_result, dump your data in, each of them should be one single record of a certain school.
in any location, create a new docx file, double click to open it, write some stuff, press ctrl + s, delete what you have written, press ctrl + s, close the document.
go to the directory where your docx file is, hold on shift, right click, copy as path.
make sure docx and python-docx are installed, run the code, when you are asked to input the path, paste it in from your clipboard. (Please make sure you use an absolute path, which is a full directory with root c, a relative path may not work).
PS: the reason that you have to open the docx file after create, is that Microsoft Word 2005+ docx file have 3 modes. first, if it's brand new after creation, it's in binary format. second, if we open it to edit, it generates a $cache.docx file as hidden into same level directory to ensure performance and secure data just in case of crash. third, if it's edited and saved, the format will be turned into XML, which is EDITABLE using python-docx module.
PS: the Result class below provides a clear way for creating List_of_school_list_result:
class Result:
def __init__(self, length):
self.l = length
self.res = []
self.col = []
def push(self, string):
self.col.append(string)
if(len(self.col) == self.l):
self.res.append(self.col)
self.col = []
def publish(self):
return self.res
r = Result(6) # pass in the length of the headers, then all you need, is to call `r.push()` over and over again. after that, assign it to `List_of_school_list_result`
r.push('school name 1')
r.push('principal name 1')
r.push('principal email 1')
r.push('school type 1')
r.push('grad span 1')
r.push('address 1')
r.push('school name 2')
r.push('principal name 2')
r.push('principal email 2')
r.push('school type 2')
r.push('grad span 2')
r.push('address 2')
List_of_school_list_result = r.publish()
Complete version of code:
headers = [
"School Name: ",
"Principal: ",
"Principal's Email: ",
"Type: ",
"Grade Span: ",
"Address: ",
]
class Result:
def __init__(self, length):
self.l = length
self.res = []
self.col = []
def push(self, string):
self.col.append(string)
if(len(self.col) == self.l):
self.res.append(self.col)
self.col = []
def publish(self):
return self.res
r = Result(len(headers))
# call r.push() over and over again, until all the string data is passed in.
''' for example
r.push('school name 1')
r.push('principal name 1')
r.push('principal email 1')
r.push('school type 1')
r.push('grad span 1')
r.push('address 1')
r.push('school name 2')
r.push('principal name 2')
r.push('principal email 2')
r.push('school type 2')
r.push('grad span 2')
r.push('address 2')
'''
List_of_school_list_result = r.publish()
def print_all_into_one_doc():
import os
from docx import Document
from docx.shared import RGBColor
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
# after you create a new docx file, double click to open it, write some stuff, press ctrl + s, delete what you have written, press ctrl + s, close the document
# Otherwise python-docx reports a Package Note Find Error.
p = input('hold shift key right click, copy and paste the file path of docx here: ')
if p[0] == '"' or p[0] == "'":
# validate path
p = p[1:-1]
p = os.path.abspath(p)
doc = Document(p)
# iterate List of all school
for j in List_of_school_list_result:
h = doc.add_paragraph()
# make title align to center
h.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
r = h.add_run(j[0])
# set title color: you can adjust any color of title here
r.font.color.rgb = RGBColor(54, 95, 145)
# set title size
r.font.size = Pt(36)
doc.add_paragraph('\n')
su = doc.add_paragraph()
ru = su.add_run('General Information')
ru.font.size = Pt(30)
for i, d in enumerate(headers):
sp = doc.add_paragraph()
rp = sp.add_run(headers[i])
rp.bold = True
rp.font.size = Pt(23)
sm = doc.add_paragraph()
rm = sm.add_run(j[i])
rm.font.size = Pt(22)
rm.italic = True
doc.add_page_break()
doc.save(p)
print_all_into_one_doc()

Related

How to add a headshot/picture of the player a user inputs

I'm an intermediate coder, and I made a project where a user can input an NBA player and data of that player shows up from a JSON file. Now, I'm trying to add a picture of the player but it almost seems impossible. I have never been able to add a picture to a python project.
from requests import get
from pprint import PrettyPrinter
from IPython.display import Image
from IPython.core.display import HTML
from difflib import get_close_matches
BASE_URL = "https://data.nba.net"
ALL_JSON = "/prod/v1/today.json"
printer = PrettyPrinter()
data = get(BASE_URL + ALL_JSON).json()
def get_links():
data = get(BASE_URL + ALL_JSON).json()
links = data['links']
return links
def get_teams():
teams = get_links()['teams']
all_teams = get(BASE_URL + teams).json()['league']['standard']
return all_teams
def ask_player():
players = get_links()['leagueRosterPlayers']
all_players = get(BASE_URL + players).json()['league']['standard']
all_teams = get_teams()
topic = input('Type the player you wish to know more about:')
firstName = topic.split()[0]
lastName = topic.split()[1]
playerselected = list(filter(lambda x: x['firstName'] == firstName and x['lastName'] == lastName, all_players ))
if not playerselected:
# Get a list of all player names for suggestions
player_names = [f"{p['firstName']} {p['lastName']}" for p in all_players]
# Find the closest match to the entered player name
suggestions = get_close_matches(topic, player_names)
if suggestions:
if len(suggestions) > 1:
print(f"Player {topic} was not found. Did you mean:")
for i, suggestion in enumerate(suggestions):
print(f"{i + 1}. {suggestion}")
choice = int(input("Enter the number of your choice:"))
playerselected = list(filter(lambda x: f"{x['firstName']} {x['lastName']}" == suggestions[choice - 1], all_players))
else:
playerselected = list(filter(lambda x: f"{x['firstName']} {x['lastName']}" == suggestions[0], all_players))
else:
print(f"Player {topic} was not found.")
return
if not playerselected:
return
for standard in playerselected:
name = standard['firstName']
name2 = standard['lastName']
team_id = standard['teamId']
team = next(t['fullName'] for t in all_teams if t['teamId'] == team_id)
jersey = standard['jersey']
college = standard['collegeName']
dob = standard['dateOfBirthUTC']
years = standard['yearsPro']
printer.pprint(f"{name} {name2}, jersey number {jersey}, is playing for {team}, went to {college} college, was born in {dob}, and has been in the nba for {years} full year(s).")
ask_player()
My code is above. I tried using IPython displays, but that didnt work. Please help me!

Scraping data beach volleyball on multiple pages

I am trying to scrape all the possible data from this webpage Gstaad 2017
Here is my code:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from selenium.webdriver.support.ui import Select
#Starts the driver and goes to our starting webpage
driver = webdriver.Chrome( "C:/Users/aldi/Downloads/chromedriver.exe")
driver.get('http://www.bvbinfo.com/Tournament.asp?ID=3294&Process=Matches')
#Imports HTML into python
page = requests.get('http://www.bvbinfo.com/Tournament.asp?ID=3294&Process=Matches')
soup = BeautifulSoup(driver.page_source, 'lxml')
stages = soup.find_all('div')
stages = driver.find_elements_by_class_name('clsTournBracketHeader')[-1].text
#TODO the first row (country quota matches) has no p tag and therefore it is not included in the data
rows = []
paragraphs = []
empty_paragraphs = []
for x in soup.find_all('p'):
if len(x.get_text(strip=True)) != 0:
paragraph = x.extract()
paragraphs.append(paragraph)
if len(x.get_text(strip=True)) == 0:
empty_paragraph = x.extract()
empty_paragraphs.append(empty_paragraph)
# players
home_team_player_1 = ''
home_team_player_2 = ''
away_team_player_1 = ''
away_team_player_2 = ''
for i in range(0, len(paragraphs)):
#round and satege of the competition
round_n= paragraphs[i].find('u').text
paragraph_rows = paragraphs[i].text.split('\n')[1:-1]
counter = 0
for j in range(0,len(paragraph_rows)):
#TODO tournament info, these can vary from tournament to tournament
tournament_info = soup.find('td', class_ = 'clsTournHeader').text.strip().split()
tournament_category = [' '.join(tournament_info[0 : 2])][0]
tournament_prize_money = tournament_info[2]
#TODO tournament city can also have two elements, not just one
tournament_city = tournament_info[3]
tournament_year = tournament_info[-1]
tournament_days = tournament_info[-2][:-1].split("-")
tournament_starting_day = tournament_days[0]
tournament_ending_day = tournament_days[-1]
tournament_month = tournament_info[-3]
tournament_stars = [' '.join(tournament_info[5 : 7])][0]
players = paragraphs[i].find_all('a', {'href':re.compile('.*player.*')})
home_team_player_1 = players[counter+0].text
home_team_player_2 = players[counter+1].text
away_team_player_1 = players[counter+2].text
away_team_player_2 = players[counter+3].text
#matches
match= paragraph_rows[j].split(":")[0].split()[-1].strip()
#nationalities
nationalities = ["United", "States"]
if paragraph_rows[j].split("def.")[0].split("/")[1].split("(")[0].split(" ")[3] in nationalities:
home_team_country = "United States"
else:
home_team_country = paragraph_rows[j].split("def.")[0].split("/")[1].split("(")[0].split(" ")[-2]
if paragraph_rows[j].split("def.")[1].split("/")[1].split(" ")[3] in nationalities:
away_team_country = "United States"
else:
away_team_country = paragraph_rows[j].split("def.")[1].split("/")[1].split("(")[0].split(" ")[-2]
parentheses = re.findall(r'\(.*?\)', paragraph_rows[j])
if "," in parentheses[0]:
home_team_ranking = parentheses[0].split(",")[0]
home_team_ranking = home_team_ranking[1:-1]
home_team_qualification_round = parentheses[0].split(",")[1]
home_team_qualification_round = home_team_qualification_round[1:-1]
else:
home_team_ranking = parentheses[0].split(",")[0]
home_team_ranking = home_team_ranking[1:-1]
home_team_qualification_round = None
if "," in parentheses[1]:
away_team_ranking = parentheses[1].split(",")[0]
away_team_ranking = away_team_ranking[1:-1]
away_team_qualification_round = parentheses[1].split(",")[1]
away_team_qualification_round = away_team_qualification_round[1:-1]
else:
away_team_ranking = parentheses[1].split(",")[0]
away_team_ranking = away_team_ranking[1:-1]
match_duration = parentheses[2]
match_duration = match_duration[1:-1]
away_team_qualification_round = None
# sets
sets = re.findall(r'\).*?\(', paragraph_rows[j])
sets = sets[1][1:-1]
if len(sets.split(",")) == 2:
score_set1 = sets.split(",")[0]
score_set2 = sets.split(",")[1]
score_set3 = None
if len(sets.split(",")) == 3:
score_set1 = sets.split(",")[0]
score_set2 = sets.split(",")[1]
score_set3 = sets.split(",")[2]
row = { " home_team_player_1 ": home_team_player_1 ,
" home_team_player_2": home_team_player_2,
"away_team_player_1": away_team_player_1,
"away_team_player_2":away_team_player_1,
"match": match,
"home_team_country":home_team_country,
"away_team_country": away_team_country,
"home_team_ranking": home_team_ranking,
"away_team_ranking": away_team_ranking,
"match_duration": match_duration,
"home_team_qualification_round": home_team_qualification_round,
"away_team_qualification_round": away_team_qualification_round,
"score_set1":score_set1,
"score_set2":score_set2,
"score_set3":score_set3,
"tournament_category": tournament_category,
"tournament_prize_money": tournament_prize_money,
"tournament_city": tournament_city,
"tournament_year": tournament_year,
"tournament_starting_day": tournament_starting_day,
"tournament_ending_day":tournament_ending_day,
"tournament_month":tournament_month,
"tournament_stars":tournament_stars,
"round_n": round_n
}
counter += 4
rows.append(row)
data = pd.DataFrame(rows)
data.to_csv("beachvb.csv", index = False)
I am not really experienced in web scraping. I have just started as a self-taught and find the HTML source code quite messy and poorly structured.
I want to improve my code in two ways:
Include all the missing matches (country quota matches, semifinals, bronze medal, and gold medal) and the respective category for each match (country quota matches, pool, winner's bracket, semifinals, bronze medal, and gold medal)
iterate the code for more years and tournaments from the dropdown menu at the top of the webpage
I have tried to iterate through different years but my code does not work
tournament_years = {"FIVB 2015", "FIVB 2016"}
dfs = []
for year in tournament_years:
# select desired tournament
box_year = Select(driver.find_element_by_xpath("/html/body/table[3]/tbody/tr/td/table[1]/tbody/tr[1]/td[2]/select"))
box_year.select_by_visible_text(year)
box_matches = Select(driver.find_element_by_xpath("/html/body/table[3]/tbody/tr/td/table[1]/tbody/tr[2]/td[2]/select"))
box_matches.select_by_visible_text("Matches")
The main idea was to create a list of dataframes for each year and each tournament by adding a new loop at the beginning of the code.
If someone has a better idea and technique to do so, it is really appreciated!

Python Chess Data (FEN) into Stockfish for Python

I am trying to use stockfish to evaluate a chess position using FEN notation all in Python. I am mainly using two libraries (pgnToFen I found on github here: https://github.com/SindreSvendby/pgnToFen and Stockfish the MIT licensed one here: https://github.com/zhelyabuzhsky/stockfish). After many bugs I have reached problem after problem. Stockfish not only can't analyse this FEN position (3b2k1/1p3pp1/8/3pP1P1/pP3P2/P2pB3/6K1/8 b f3 -) but it infinitely loops! "No worries!" and thought changing the source code would be accomplishable. Changed to _put(), but basically I am unable to put dummy values in because stdin.flush() won't execute once I give it those values! Meaning I don't even think I can skip to the next row in my dataframe. :( The code I changed is below.
def _put(self, command: str, tmp_time) -> None:
if not self.stockfish.stdin:
raise BrokenPipeError()
self.stockfish.stdin.write(f"{command}\n")
try:
self.stockfish.stdin.flush()
except:
if command != "quit":
self.stockfish.stdin.write('isready\n')
try:
time.sleep(tmp_time)
self.stockfish.stdin.flush()
except:
#print ('Imma head out', file=sys.stderr)
raise ValueError('Imma head out...')
#sys.stderr.close()
def get_evaluation(self) -> dict:
"""Evaluates current position
Returns:
A dictionary of the current advantage with "type" as "cp" (centipawns) or "mate" (checkmate in)
"""
evaluation = dict()
fen_position = self.get_fen_position()
if "w" in fen_position: # w can only be in FEN if it is whites move
compare = 1
else: # stockfish shows advantage relative to current player, convention is to do white positive
compare = -1
self._put(f"position {fen_position}", 5)
self._go()
x=0
while True:
x=x+1
text = self._read_line()
#print(text)
splitted_text = text.split(" ")
if splitted_text[0] == "info":
for n in range(len(splitted_text)):
if splitted_text[n] == "score":
evaluation = {
"type": splitted_text[n + 1],
"value": int(splitted_text[n + 2]) * compare,
}
elif splitted_text[0] == "bestmove":
return evaluation
elif x == 500:
evaluation = {
"type": 'cp',
"value": 10000,
}
return evaluation
and last but not least change to the init_ contructor below:
self._stockfish_major_version: float = float(self._read_line().split(" ")[1])
And the code where I am importing this code to is below, this is where errors pop up.
import pandas as pd
import re
import nltk
import numpy as np
from stockfish import Stockfish
import os
import sys
sys.path.insert(0, r'C:\Users\path\to\pgntofen')
import pgntofen
#nltk.download('punkt')
#Changed models.py for major version line 39 in stockfish from int to float
stockfish = Stockfish(r"C:\Users\path\to\Stockfish.exe")
file = r'C:\Users\path\to\selenium-pandas output.csv'
chunksize = 10 ** 6
for chunk in pd.read_csv(file, chunksize=chunksize):
for index, row in chunk.iterrows():
FullMovesStr = str(row['FullMoves'])
FullMovesStr = FullMovesStr.replace('+', '')
if "e.p" in FullMovesStr:
row.to_csv(r'C:\Users\MyName\Logger.csv', header=None, index=False, mode='a')
print('Enpassant')
continue
tokens = nltk.word_tokenize(FullMovesStr)
movelist = []
for tokenit in range(len(tokens)):
if "." in str(tokens[tokenit]):
try:
tokenstripped = re.sub(r"[0-9]+\.", "", tokens[tokenit])
token = [tokenstripped, tokens[tokenit+1]]
movelist.append(token)
except:
continue
else:
continue
DFMoves = pd.DataFrame(movelist, columns=[['WhiteMove', 'BlackMove']])
DFMoves['index'] = row['index']
DFMoves['Date'] = row['Date']
DFMoves['White'] = row['White']
DFMoves['Black'] = row['Black']
DFMoves['W ELO'] = row['W ELO']
DFMoves['B ELO'] = row['B ELO']
DFMoves['Av ELO'] = row['Av ELO']
DFMoves['Event'] = row['Event']
DFMoves['Site'] = row['Site']
DFMoves['ECO'] = row['ECO']
DFMoves['Opening'] = row['Opening']
pd.set_option('display.max_rows', DFMoves.shape[0]+1)
print(DFMoves[['WhiteMove', 'BlackMove']])
seqmoves = []
#seqmovesBlack = []
evalmove = []
pgnConverter = pgntofen.PgnToFen()
#stockfish.set_fen_position("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1")
#rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1
for index, row in DFMoves.iterrows():
try:
stockfish.set_fen_position("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1")
except:
evalmove.append("?")
continue
#stockfish.set_fen_position("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1")
pgnConverter.resetBoard()
WhiteMove = str(row['WhiteMove'])
BlackMove = str(row['BlackMove'])
if index == 0:
PGNMoves1 = [WhiteMove]
seqmoves.append(WhiteMove)
#seqmoves.append(BlackMove)
else:
seqmoves.append(WhiteMove)
#seqmoves.append(BlackMove)
PGNMoves1 = seqmoves.copy()
#print(seqmoves)
try:
pgnConverter.pgnToFen(PGNMoves1)
fen = pgnConverter.getFullFen()
except:
break
try:
stockfish.set_fen_position(fen)
print(stockfish.get_board_visual())
evalpos = stockfish.get_evaluation()
evalmove.append(evalpos)
except:
pass
try:
stockfish.set_fen_position("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1")
except:
evalmove.append("?")
continue
pgnConverter.resetBoard()
if index == 0:
PGNMoves2 = [WhiteMove, BlackMove]
seqmoves.append(BlackMove)
else:
seqmoves.append(BlackMove)
PGNMoves2 = seqmoves.copy()
try:
pgnConverter.pgnToFen(PGNMoves2)
fen = pgnConverter.getFullFen()
except:
break
try:
stockfish.set_fen_position(fen)
print(stockfish.get_board_visual())
evalpos = stockfish.get_evaluation()
print(evalpos)
evalmove.append(evalpos)
except:
pass
#DFMoves['EvalWhite'] = evalwhite
#DFMoves['EvalBlack'] = evalblack
print(evalmove)
So the detailed question is getting stockfish.get_evalution() to just skip, or better yet fix the problem, for this ( 3b2k1/1p3pp1/8/3pP1P1/pP3P2/P2pB3/6K1/8 b f3 - ) FEN position. I have been working on this problem for quite a while so any insight into this would be very much appreciated.
My specs are Windows 10, Python 3.9, Processor:Intel(R) Core(TM) i9-10980XE CPU # 3.00GHz 3.00 GHz and RAM is 64.0 GB.
Thanks :)
Ok. It seems your fen is invalid (3b2k1/1p3pp1/8/3pP1P1/pP3P2/P2pB3/6K1/8 b f3 -). So check that. And python-chess (https://python-chess.readthedocs.io/en/latest/index.html) library allows you to use FEN AND chess engines. So, pretty cool no ? Here is an example of theses two fantastics tools :
import chess
import chess.engine
import chess.pgn
pgn = open("your_pgn_file.pgn")
game = chess.pgn.read_game(pgn)
engine = chess.engine.SimpleEngine.popen_uci("your_stockfish_path.exe")
# Iterate through all moves, play them on a board and analyse them.
board = game.board()
for move in game.mainline_moves():
board.push(move)
print(engine.analyse(board, chess.engine.Limit(time=0.1))["score"])

how to know when a new paragraph in python-docx causes a new page

I have to create word documents dynamically using python-docx. I do it by adding table rows dynamically and there is no way to know how many records fit on a page because it depends on the specific data.
I need to know when a new element added to the document (table row or paragraph) causes a new page, so I can record some data in the database accordingly with the information that each page contains.
This is the code for the word document generation with python-docx:
def get_invoice_word_report(self, request, invoices_controllers):
import unicodedata
from django.core.files import File
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_BREAK
from docx.enum.table import WD_ALIGN_VERTICAL
from docx.enum.table import WD_TABLE_ALIGNMENT
document = Document()
section = document.sections[-1]
section.left_margin = Inches(0.5)
section.right_margin = Inches(0.5)
style = document.styles['Normal']
font = style.font
font.name ='Arial'
font.size = Pt(8)
i = 0
for invoices_controller in invoices_controllers:
context = invoices_controller.get_context()
if i > 0:
run.add_break(WD_BREAK.PAGE)
if i == len(invoices_controllers) - 1:
last = context['invoices']['invoice_number']
else:
first = context['invoices']['invoice_number']
document.add_paragraph("Invoice".format(context['invoices']['invoice_number'])).alignment = WD_ALIGN_PARAGRAPH.RIGHT
document.add_paragraph("Folio {}".format(context['invoices']['invoice_number'])).alignment = WD_ALIGN_PARAGRAPH.RIGHT
document.add_paragraph(context['invoices']['agency']['company']['name'])
document.add_paragraph(context['invoices']['agency']['company']['address'])
date = context['invoices']['period_end_date'].split('-')
document.add_paragraph("{} {} {}".format(date[2], date[1], date[0])).alignment = WD_ALIGN_PARAGRAPH.RIGHT
document.add_paragraph(context['invoices']['line'])
document.add_paragraph(context['invoices']['text'])
table = document.add_table(rows=1, cols=4)
hdr_cells = table.rows[0].cells
hdr_cells[0].width = Inches(0.1)
hdr_cells[1].width = Inches(10)
hdr_cells[2].width = Inches(1)
hdr_cells[3].width = Inches(1)
for entry in context['invoices']['entries']:
row_cells = table.add_row().cells
row_cells[0].text = str(entry['amount'])
row_cells[1].text = entry['line']
row_cells[2].text = entry['unit_price_label']
row_cells[2].paragraphs[0].alignment= WD_ALIGN_PARAGRAPH.RIGHT
row_cells[3].text = entry['subtotal']
row_cells[3].paragraphs[0].alignment= WD_ALIGN_PARAGRAPH.RIGHT
if entry['text']:
text_cells = table.add_row().cells
text_cells[1].text = entry['text']
row_cells = table.add_row().cells
row_cells[0].text = ''
row_cells[1].text = ''
row_cells[2].text = ''
row_cells[3].text = context['total']
row_cells[3].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.RIGHT
row_cells = table.add_row().cells
row_cells[0].text = ''
row_cells[1].text = ''
row_cells[2].text = ''
row_cells[3].text = '$0.00'
row_cells[3].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.RIGHT
row_cells = table.add_row().cells
row_cells[0].text = ''
row_cells[1].text = ''
row_cells[2].text = ''
row_cells[3].text = context['total']
row_cells[3].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.RIGHT
run = document.add_paragraph("Son {}".format(context['total_text'])).add_run()
i += 1
current_directory = settings.MEDIA_DIR
if len(invoices_controllers) > 1:
file_name = "Invoices {}-{}.docx".format(first, last)
else:
file_name = "Invoice {}.docx".format(first)
document.save(current_directory + file_name)
return request.get_host()+ settings.MEDIA_URL + file_name
Thanks for your help.
Detecting automatic (renderer-generated) page breaks in python-docx is not possible because those breaks are not reliably recorded in the XML.
You may be able to find some indication of the last rendered page break, depending on where your .docx files came from. Otherwise you probably need to use the Microsoft VBA interface to gain access to a live renderer which may be able to provide you this information. Note the page break location is subject to change based on the machine Word is running on, depending on factors like font metrics and printer drivers.
This has come up in other questions and answers. This one might be a good place to start: Page number python-docx
To see the rest, search on "[python-docx] page break" and you'll see there are quite a few. The square bracketed part limits results to those tagged with "python-docx".

Checking a checkBox in .docx form with Python using docx module

I am attempting to fill out a word document form with Python 2.7's docx module. I can modify text elements just fine but I am having difficulty figuring out how to check a yes or no checkbox.
How do I go about checking one the the checkboxes in the form. I have tried a few different ways but I think it all comes do to me not know how the docx xml is structured when it comes to check boxes.
Am I able to use the Bookmark property to find a specific checkbox and check it as seen in the picture below?
I have uploaded a copy of the test form to Google Drive here.
Ok, so after much frustration I finally figured out how to check a checkbox. There is a element within a checkbox element that signifies if the box is checked. I am essenially able to create that element with the following function.
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
def checkedElement():
elm = OxmlElement('w:checked')
elm.set(qn('w:val'),"true")
return elm
I can find all checkboxes within a table cell with the following function. Since the yes is always the first checkbox in each cell I can set the index for a yes check to 0 and a no check to index 1 and then I can append the checked element within the checkbox element:
def yesNoCheck(yes_no,tableIdx,coords):
print coords, yes_no
if yes_no == 'y':
index = 0
x = doc.tables[tableIdx].cell(coords[0],coords[1])._element.xpath('.//w:checkBox')
x[index].append(checkedElement())
elif yes_no == 'n':
index = 1
x = doc.tables[tableIdx].cell(coords[0],coords[1])._element.xpath('.//w:checkBox')
x[index].append(checkedElement())
else:
print "value was neither yes or no"
pass
here is my full code that I have written so far. I have a bunch of refactoring to do but it works great as of now. There are two tables in my .docx template and dictionary table1 and table2 contain the cell row and column coordinates. This script is used to fill out a required form using data published from ESRI's Survey123.
from docx import Document
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
import arcpy
import datetime
import os
table1 = {
'BusinessName':[2,3],
'LicenseNumber':[2,14],
'OwnerName':[3,3],
'PhoneNumber':[3,14],
'BusinessAddress':[4,5],
'County':[4,14],
'City':[5,1],
'St':[5,8],
'Zip':[5,15],
'LicenceExpired':[6,1], #CheckBox
'DateExpired':[6,15],
'LicenceRenewal':[7,1], #CheckBox
'NumberDisplayed':[8,1], #CheckBox
'NameAddDisplayed':[10,1], #CheckBox
'VehicleInfoMatches':[12,1], #CheckBox
'DischargeValveCapped':[14,1], #CheckBox
'DischargeValveCapChained':[15,1], #CheckBox
'HoseDisinfectCarried':[16,1], #CheckBox
'VehicleAndTankClean':[17,1], #CheckBox
'FreeOfLeaks':[18,1] #CheckBox
}
table2 = {
'LandApplyWaste':[1,1], #Yes/No CheckBox
'LocationDescriptionAccurate':[6,1], #Yes/No CheckBox
'LocationDescriptionAccDesc':[6,5], #text
'Slope':[7,1], #Yes/No CheckBox
'DistanceNearestResidence':[8,1], #Yes/No CheckBox
'DistanceNearestWell':[9,1], #Yes/No CheckBox
'DistanceNearestStreamLakeEtc':[10,1], #Yes/No CheckBox
'SeptageIncorporated':[11,1], #Yes/No CheckBox
'InjectedIncorporated':[12,3], #Yes/No CheckBox, dependent on the septage incorporated being yes
'SeptageStabilized':[13,1], #Yes/No CheckBox
'HowIsLimeMixed':[14,3], #text dependent on if lime was used
'ConfiningLayerOrGroundwater':[15,1], #Yes/No CheckBox
'ConfiningLayerOrGroundwaterDesc':[16,3], #text
'CropGrown':[17,1], #Yes/No CheckBox
'CropGrownHowVerified':[19,3], #text
'LandAppCompliance':[20,1], #Yes/No CheckBox
'AdditionalComments':[22,3],
'SignDate':[22,13]
}
def checkedElement():
elm = OxmlElement('w:checked')
elm.set(qn('w:val'),"true")
return elm
def yesNoCheck(yes_no,tableIdx,coords):
print coords, yes_no
if yes_no == 'y':
index = 0
x = doc.tables[tableIdx].cell(coords[0],coords[1])._element.xpath('.//w:checkBox')
x[index].append(checkedElement())
elif yes_no == 'n':
index = 1
x = doc.tables[tableIdx].cell(coords[0],coords[1])._element.xpath('.//w:checkBox')
x[index].append(checkedElement())
else:
print "value was neither yes or no"
pass
def disposalMethodCheck(method, locationDec):
vals = {
'WastewaterTreatmentFacility':[20,1],
'LandApplication':[22,1],
'SanitaryLandfill':[24,1],
'SeptageLagoonOrDryingBed':[26,1]
}
if method != None:
row,col = vals[method]
checkBoxElm = doc.tables[0].cell(row,col)._element.xpath('.//w:checkBox')[0]
print "{0} Checked!".format(method)
checkBoxElm.append(checkedElement())
editTxt(locationDec,0,[row,6])
def editTxt(text, tblIdx, coords, alignment = WD_ALIGN_PARAGRAPH.LEFT, bold=True):
print text, coords
field = doc.tables[tblIdx].cell(coords[0],coords[1]).paragraphs[0]
field.text = text
field.alignment = alignment
field.runs[0].font.bold = bold
def addSig(sigJpgPath):
para = doc.tables[1].row_cells(23)[0].paragraphs[0]
para.alignment = WD_ALIGN_PARAGRAPH.CENTER
run = para.add_run()
run.add_picture(sigJpgPath,width=Inches(1.34),height=Inches(.35))
fc = r"E:\PumperTruckInspectionFeatureClass"
arcpy.MakeFeatureLayer_management (fc, "PumperTruckInspections")
attach = r"PumperTruckInspection__ATTACH" #Where signatures are stored
def rows_as_dicts(cursor):
colnames = cursor.fields
for row in cursor:
yield dict(zip(colnames, row))
def dateString(date):
if date != None:
d = date.strftime('%m/%d/%Y')
return d
else:
print "no date"
return ''
def checkBusName(name):
if name != None:
return name
else:
return 'unknown'
with arcpy.da.SearchCursor(fc, '*') as sc:
for row in rows_as_dicts(sc):
doc = Document(r"path\to\TEMPLATE.docx")
t = datetime.datetime.now().strftime('%Y-%m-%d')
newDocName = checkBusName(row['BusinessName']) + t + '.docx'
editTxt(row['BusinessName'],0,table1['BusinessName'])
editTxt(row['LicenseNumber'],0,table1['LicenseNumber'])
editTxt(row['OwnerName'],0,table1['OwnerName'])
editTxt(row['PhoneNumber'],0,table1['PhoneNumber'])
editTxt(row['BusinessAddress'],0,table1['BusinessAddress'])
editTxt(row['County'],0,table1['County'])
editTxt(row['City'],0,table1['City'])
editTxt(row['St'],0,table1['St'])
editTxt(row['Zip'],0,table1['Zip'])
editTxt(dateString(row['DateExpired']),0,table1['DateExpired'])
yesNoCheck(row['LicenceExpired'],0, table1['LicenceExpired'])
yesNoCheck(row['LicenceRenewal'],0, table1['LicenceRenewal'])
yesNoCheck(row['NumberDisplayed'],0, table1['NumberDisplayed'])
yesNoCheck(row['NameAddDisplayed'],0, table1['NameAddDisplayed'])
yesNoCheck(row['VehicleInfoMatches'],0, table1['VehicleInfoMatches'])
yesNoCheck(row['DischargeValveCapped'],0, table1['DischargeValveCapped'])
yesNoCheck(row['DischargeValveCapChained'],0, table1['DischargeValveCapChained'])
yesNoCheck(row['HoseDisinfectCarried'],0, table1['HoseDisinfectCarried'])
yesNoCheck(row['VehicleAndTankClean'],0, table1['VehicleAndTankClean'])
yesNoCheck(row['FreeOfLeaks'],0, table1['FreeOfLeaks'])
disposalMethodCheck(row['DisposalMethod'],row['DisposalLocation'])
if row['DisposalMethod'] == 'LandApplication':
yesNoCheck(row['LandApplyWaste'],1,table2['LandApplyWaste'])
yesNoCheck(row['LocationDescriptionAccurate'],1,table2['LocationDescriptionAccurate'])
editTxt(row['LocationDescriptionAccDesc'],1,table2['LocationDescriptionAccDesc'])
yesNoCheck(row['Slope'],1,table2['Slope'])
yesNoCheck(row['DistanceNearestResidence'],1,table2['DistanceNearestResidence'])
yesNoCheck(row['DistanceNearestWell'],1,table2['DistanceNearestWell'])
yesNoCheck(row['DistanceNearestStreamLakeEtc'],1,table2['DistanceNearestStreamLakeEtc'])
yesNoCheck(row['SeptageIncorporated'],1,table2['SeptageIncorporated'])
yesNoCheck(row['InjectedIncorporated'],1,table2['InjectedIncorporated']) #might need a new method since its not yes/no
yesNoCheck(row['SeptageStabilized'],1,table2['SeptageStabilized'])
editTxt(row['HowIsLimeMixed'],1,table2['HowIsLimeMixed'])
yesNoCheck(row['ConfiningLayerOrGroundwater'],1,table2['ConfiningLayerOrGroundwater'])
editTxt(row['ConfiningLayerOrGroundwaterDescript'],1,table2['ConfiningLayerOrGroundwaterDescript'])
yesNoCheck(row['CropGrown'],1,table2['CropGrown'])
editTxt(row['CropGrownHowVerified'],1,table2['CropGrownHowVerified'])
yesNoCheck(row['LandAppCompliance'],1,table2['LandAppCompliance'])
editTxt(row['AdditionalComments'],1,table2['AdditionalComments'],bold=False)
where = "REL_GLOBALID = '{0}'".format(row['GlobalID'])
from pprint import pprint
with arcpy.da.SearchCursor(attach,['DATA', 'ATT_NAME', 'ATTACHMENTID'],where_clause=where) as cursor:
for r in rows_as_dicts(cursor):
pprint(r)
name = r['ATT_NAME']
attachment = r['DATA']
if name.split('_')[0] == 'InspectorSignature':
imagePath = os.path.join(name.split('_')[0] + "_" + )
open(("sig.jpeg"), 'wb').write(attachment.tobytes())
addSig("sig.jpeg")
break
editTxt(dateString(row['SignDate']),1,table2['SignDate'],alignment = WD_ALIGN_PARAGRAPH.CENTER,bold=False)
doc.save(newDocName)
del doc
I just created a checked checkbox in word and then recreated the xml codes. Compiled the whole in a function, you just have to pass the paragraph as an argument.
import docx
from docx import Document
from docx.shared import Inches
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
def addCheckedbox(para, box_id, name):
run = para.add_run()
tag = run._r
start = docx.oxml.shared.OxmlElement('w:bookmarkStart')
start.set(docx.oxml.ns.qn('w:id'), str(box_id - 1))
start.set(docx.oxml.ns.qn('w:name'), "_GoBack")
run2 = para.add_run()
tag2 = run2._r
fld = docx.oxml.shared.OxmlElement('w:fldChar')
fld.set(docx.oxml.ns.qn('w:fldCharType'), 'begin')
checker = docx.oxml.shared.OxmlElement('w:checkBox')
sizer = docx.oxml.shared.OxmlElement('w:sizeAuto')
checkValue = docx.oxml.shared.OxmlElement('w:default')
checkValue.set(docx.oxml.ns.qn('w:val'), '1')
checker.append(sizer)
checker.append(checkValue)
start.append(checker)
tag.append(start)
run3 = para.add_run()
tag3 = run3._r
instr = docx.oxml.OxmlElement('w:instrText')
instr.text = 'FORMCHECKBOX'
tag3.append(instr)
run4 = para.add_run()
tag4 = run4._r
fld2 = docx.oxml.shared.OxmlElement('w:fldChar')
fld2.set(docx.oxml.ns.qn('w:fldCharType'), 'end')
tag4.append(fld2)
run5 = para.add_run()
tag5 = run5._r
end = docx.oxml.shared.OxmlElement('w:bookmarkEnd')
end.set(docx.oxml.ns.qn('w:id'), str(box_id))
end.set(docx.oxml.ns.qn('w:name'), name)
tag5.append(end)
return

Categories