I have a code, that get selenium information and i need to print this information to the docx, but by template. Here i get information with help of print() (to set some part )
Stuyvesant High School
General Information
School Name:
Stuyvesant High School
Principal:
Mr. Eric Contreras
Principal’s E-mail:
ECONTRE#SCHOOLS.NYC.GOV
Type:
Regular school
Grade Span:
9-12
Address:
345 Chambers Street, New York, NY 10282
I printing this information in console, but i need print this information to the docx.
Here the part of code, where i print:
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import openpyxl
import docx
from docx.shared import Pt
List = []
wb = openpyxl.load_workbook('D:\INSPR\Rating_100_schools\Top-100.xlsx')
sheet = wb['RI']
tuple(sheet['A1':'A100']) # Get all cells from A1 to A100.
for rowOfCellObjects in sheet['A1':'A100']:
for cellObj in rowOfCellObjects:
List.append(cellObj.value)
School_list_result = []
State = sheet.title
driver = webdriver.Chrome(executable_path='D:\chromedriver') #any path
def check_xpath(xpath):
try:
element = driver.find_element_by_xpath(xpath)
School_list_result.append(element.text)
except NoSuchElementException:
School_list_result.append("No data.")
def check_text(partial_link_text):
try:
element_text = driver.find_element_by_partial_link_text(partial_link_text)
School_list_result.append(element_text.get_attribute("href"))
except NoSuchElementException:
School_list_result.append("No data.")
def check_click(clicker):
try:
element_click = driver.find_element_by_partial_link_text(clicker)
element_click.click()
except NoSuchElementException:
print("No click.")
def get_url(url, _xpath, send_keys):
driver.get(url)
try:
_element = driver.find_element_by_xpath(_xpath)
_element.clear()
driver.implicitly_wait(10)
_element.send_keys(schools, send_keys)
_element.send_keys(u'\ue007')
driver.implicitly_wait(10)
except NoSuchElementException:
print("No data.")
for schools in List[98:100]:
#-----------------------------------------GREAT SCHOOLS-------------------------------------------
get_url("https://www.google.com/", '//*[#id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input'," " + State + " greatschools")
_clicker = driver.find_element_by_xpath('//*[#id="rso"]/div[1]/div/div[1]/a/h3').click()
check_xpath('//*[#id="hero"]/div/div[1]/h1') #School Name
check_xpath('/html/body/div[6]/div[8]/div/div[1]/div/div/div[2]/div[1]/div[2]/span[1]') #Principal
check_text('Principal email') #Principal’s E-mail
check_xpath('//*[#id="hero"]/div/div[2]/div[2]/div[3]/div[2]') #Grade Span
check_xpath('//*[#id="hero"]/div/div[2]/div[1]/div[1]/div[1]/div[1]/a/div/span[2]') #Address
check_xpath('/html/body/div[6]/div[8]/div/div[1]/div/div/div[2]/div[2]/span/a') #Phone
check_text('Website') #Website
check_xpath('//*[#id="hero"]/div/div[2]/div[1]/div[1]/div[1]/div[2]/a') #Associations/Communities
check_xpath('//*[#id="hero"]/div/div[2]/div[2]/div[1]/div/a/div[1]/div') #GreatSchools Rating
check_xpath('//*[#id="Students"]/div/div[2]/div[1]/div[2]') #Enrollment by Race/Ethnicity
#-----------------------------------------NCES-------------------------------------------
driver.implicitly_wait(10)
get_url("https://nces.ed.gov/search/index.asp?q=&btnG=Search#gsc.tab=0", '//*[#id="qt"]', " " + State)
check_click('Search for Public Schools - ')
driver.implicitly_wait(10)
check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[4]/td/table/tbody/tr[7]/td[1]/font[2]') #School type
check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[4]/td/table/tbody/tr[7]/td[3]/font') #Charter
check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[12]/td/table/tbody/tr[3]/td/table/tbody/tr[2]/td/table/tbody')
#Enrollment by Gender
check_xpath('/html/body/div[1]/div[3]/table/tbody/tr[12]/td/table/tbody/tr[1]/td/table/tbody/tr[2]') #Enrollment by Grade
#-----------------------------------------USNEWS-------------------------------------------
driver.implicitly_wait(10)
url = "https://www.usnews.com/education/best-high-schools/new-york/rankings"
driver.get(url)
check_click(schools)
driver.implicitly_wait(10)
check_xpath('//*[#id="app"]/div/div/div/div[1]/div/div/div[2]/div[1]/div[2]/p[3]') #U.S.News Rankings
#-----------------------------------------PUBLIC REVIEW-------------------------------------------
driver.implicitly_wait(10)
get_url("https://www.google.com/", '//*[#id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input', " " + State + " publicschoolreview")
clicker = driver.find_element_by_partial_link_text('(2020)').click()
driver.implicitly_wait(10)
check_xpath('//*[#id="quick_stats"]/div/div[2]/ul/li[2]/strong') #Total # Students
check_xpath('//*[#id="total_teachers_data_row"]/td[2]') #Full-Time Teachers
check_xpath('//*[#id="quick_stats"]/div/div[2]/ul/li[3]/strong') #Student/Teacher Ratio
#-----------------------------------------PRINT INFOFMATION-------------------------------------------
print(" ---------------------------------------------------------------"+"\n",
" \033[1m", schools,"\033[0m"+"\n",
" ---------------------------------------------------------------"+"\n",
" \033[1mGeneral Information\033[0m "+"\n",
"\033[1mSchool Name:\n\033[0m",School_list_result[0]+"\n",
"\033[1mPrincipal:\n\033[0m",School_list_result[1]+"\n",
"\033[1mPrincipal’s E-mail:\n\033[0m",School_list_result[2]+"\n",
"\033[1mType:\n\033[0m",School_list_result[10]+"\n",
"\033[1mGrade Span:\n\033[0m",School_list_result[3]+"\n",
"\033[1mAddress:\n\033[0m",School_list_result[4]+"\n",
"\033[1mPhone:\n\033[0m",School_list_result[5]+"\n",
"\033[1mWebsite:\n\033[0m",School_list_result[6]+"\n",
"\033[1mAssociations/Communities:\n\033[0m",School_list_result[7]+"\n",
"\033[1mGreatSchools Summary Rating:\n\033[0m",School_list_result[8]+"\n",
"\033[1mU.S.News Rankings:\n\033[0m",School_list_result[14]+"\n",
" \033[1mSchool Details\033[0m"+"\n",
"\033[1mTotal # Students:\n\033[0m",School_list_result[15]+"\n",
"\033[1mFull-Time Teachers:\n\033[0m",School_list_result[16]+"\n",
"\033[1mStudent/Teacher Ratio:\n\033[0m",School_list_result[17]+"\n",
"\033[1mCharter:\n\033[0m",School_list_result[11]+"\n",
"\033[1mMagnet: \n\033[0m","No""\n",
" \033[1mEnrollment Data\033[0m"+"\n",
"\033[1mEnrollment by Race/Ethnicity: \n\033[0m",School_list_result[9]+"\n",
"\033[1mEnrollment by Gender: \n\033[0m",School_list_result[12]+"\n",
"\033[1mEnrollment by Grade: \n\033[0m",School_list_result[13]+"\n",
()
)
print()
School_list_result.clear()
What i need: print this result not into console by template, but into a docx by template.
And one more: if you know how to not using indexing (like: School_list_result[0]), please tell me.
I assume you are on a windows operating system just as I do, and know how to download python packages:
Install docx and python-docx modules (they are different, make sure you have installed both)
use the following code:
School_list_result = [
"Stuyvesant High School",
"Mr. Eric Contreras",
"ECONTRE#SCHOOLS.NYC.GOV",
"Regular school",
"9-12",
"345 Chambers Street, New York, NY 10282",
]
headers = [
"School Name: ",
"Principal: ",
"Principal's Email: ",
"Type: ",
"Grade Span: ",
"Address: ",
]
def print_into_one_doc():
import os
from docx import Document
from docx.shared import RGBColor
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
# after you create a docx file, make sure you double click to open it, write some stuff, press ctrl + s, delete what you have written, press ctrl + s, close the document
# delete what you have written. Otherwise python-docx reports a Package Not Find Error.
p = input('hold shift key right click, copy and paste the file path of docx here: ')
if p[0] == '"' or p[0] == "'":
# validate path
p = p[1:-1]
p = os.path.abspath(p)
doc = Document(p)
h = doc.add_paragraph()
# make title align to center
h.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
r = h.add_run(School_list_result[0])
# set title color
r.font.color.rgb = RGBColor(54, 95, 145)
# set title size
r.font.size = Pt(36)
doc.add_paragraph('\n')
su = doc.add_paragraph()
ru = su.add_run('General Information')
ru.font.size = Pt(30)
for i, d in enumerate(headers):
sp = doc.add_paragraph()
rp = sp.add_run(headers[i])
rp.bold = True
rp.font.size = Pt(23)
sm = doc.add_paragraph()
rm = sm.add_run(School_list_result[i])
rm.font.size = Pt(22)
rm.italic = True
doc.add_page_break()
doc.save(p)
print_into_one_doc()
If you have a list, which contains School_list_result, iterate it through, here is an example:
List_of_school_list_result = [
[
"Stuyvesant High School",
"Mr. Eric Contreras",
"ECONTRE#SCHOOLS.NYC.GOV",
"Regular school",
"9-12",
"345 Chambers Street, New York, NY 10282",
],
[
"Great Lake College",
"Mr. Jason Madunic",
"MADUNIC#SCHOOLS.VIC.GOV",
"Public school",
"6-12",
"167A High Street, Melbourne, VIC 3228",
],
]
headers = [
"School Name: ",
"Principal: ",
"Principal's Email: ",
"Type: ",
"Grade Span: ",
"Address: ",
]
def print_all_into_one_doc():
import os
from docx import Document
from docx.shared import RGBColor
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
# after you create a new docx file, double click to open it, write some stuff, press ctrl + s, delete what you have written, press ctrl + s, close the document
# Otherwise python-docx reports a Package Note Find Error.
p = input('hold shift key right click, copy and paste the file path of docx here: ')
if p[0] == '"' or p[0] == "'":
# validate path
p = p[1:-1]
p = os.path.abspath(p)
doc = Document(p)
# iterate List of all school
for j in List_of_school_list_result:
h = doc.add_paragraph()
# make title align to center
h.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
r = h.add_run(j[0])
# set title color: you can adjust any color of title here
r.font.color.rgb = RGBColor(54, 95, 145)
# set title size
r.font.size = Pt(36)
doc.add_paragraph('\n')
su = doc.add_paragraph()
ru = su.add_run('General Information')
ru.font.size = Pt(30)
for i, d in enumerate(headers):
sp = doc.add_paragraph()
rp = sp.add_run(headers[i])
rp.bold = True
rp.font.size = Pt(23)
sm = doc.add_paragraph()
rm = sm.add_run(j[i])
rm.font.size = Pt(22)
rm.italic = True
doc.add_page_break()
doc.save(p)
print_all_into_one_doc()
Let's make it simple, what you need to do is:
create a list named List_of_school_list_result, dump your data in, each of them should be one single record of a certain school.
in any location, create a new docx file, double click to open it, write some stuff, press ctrl + s, delete what you have written, press ctrl + s, close the document.
go to the directory where your docx file is, hold on shift, right click, copy as path.
make sure docx and python-docx are installed, run the code, when you are asked to input the path, paste it in from your clipboard. (Please make sure you use an absolute path, which is a full directory with root c, a relative path may not work).
PS: the reason that you have to open the docx file after create, is that Microsoft Word 2005+ docx file have 3 modes. first, if it's brand new after creation, it's in binary format. second, if we open it to edit, it generates a $cache.docx file as hidden into same level directory to ensure performance and secure data just in case of crash. third, if it's edited and saved, the format will be turned into XML, which is EDITABLE using python-docx module.
PS: the Result class below provides a clear way for creating List_of_school_list_result:
class Result:
def __init__(self, length):
self.l = length
self.res = []
self.col = []
def push(self, string):
self.col.append(string)
if(len(self.col) == self.l):
self.res.append(self.col)
self.col = []
def publish(self):
return self.res
r = Result(6) # pass in the length of the headers, then all you need, is to call `r.push()` over and over again. after that, assign it to `List_of_school_list_result`
r.push('school name 1')
r.push('principal name 1')
r.push('principal email 1')
r.push('school type 1')
r.push('grad span 1')
r.push('address 1')
r.push('school name 2')
r.push('principal name 2')
r.push('principal email 2')
r.push('school type 2')
r.push('grad span 2')
r.push('address 2')
List_of_school_list_result = r.publish()
Complete version of code:
headers = [
"School Name: ",
"Principal: ",
"Principal's Email: ",
"Type: ",
"Grade Span: ",
"Address: ",
]
class Result:
def __init__(self, length):
self.l = length
self.res = []
self.col = []
def push(self, string):
self.col.append(string)
if(len(self.col) == self.l):
self.res.append(self.col)
self.col = []
def publish(self):
return self.res
r = Result(len(headers))
# call r.push() over and over again, until all the string data is passed in.
''' for example
r.push('school name 1')
r.push('principal name 1')
r.push('principal email 1')
r.push('school type 1')
r.push('grad span 1')
r.push('address 1')
r.push('school name 2')
r.push('principal name 2')
r.push('principal email 2')
r.push('school type 2')
r.push('grad span 2')
r.push('address 2')
'''
List_of_school_list_result = r.publish()
def print_all_into_one_doc():
import os
from docx import Document
from docx.shared import RGBColor
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
# after you create a new docx file, double click to open it, write some stuff, press ctrl + s, delete what you have written, press ctrl + s, close the document
# Otherwise python-docx reports a Package Note Find Error.
p = input('hold shift key right click, copy and paste the file path of docx here: ')
if p[0] == '"' or p[0] == "'":
# validate path
p = p[1:-1]
p = os.path.abspath(p)
doc = Document(p)
# iterate List of all school
for j in List_of_school_list_result:
h = doc.add_paragraph()
# make title align to center
h.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
r = h.add_run(j[0])
# set title color: you can adjust any color of title here
r.font.color.rgb = RGBColor(54, 95, 145)
# set title size
r.font.size = Pt(36)
doc.add_paragraph('\n')
su = doc.add_paragraph()
ru = su.add_run('General Information')
ru.font.size = Pt(30)
for i, d in enumerate(headers):
sp = doc.add_paragraph()
rp = sp.add_run(headers[i])
rp.bold = True
rp.font.size = Pt(23)
sm = doc.add_paragraph()
rm = sm.add_run(j[i])
rm.font.size = Pt(22)
rm.italic = True
doc.add_page_break()
doc.save(p)
print_all_into_one_doc()
I'm wondering if there's a way to add a title or text on a folium map in python?
I have 8 maps to show and I want the user to know which map they're looking at without having to click on a marker. I attempted to add an image of the map, but couldn't because I don't have high enough reputation score.
My code:
#marker cluster
corpus_chris_loc = [27.783889, -97.510556]
harvey_avg_losses_map = folium.Map(location = corpus_chris_loc, zoom_start = 5)
marker_cluster = MarkerCluster().add_to(harvey_avg_losses_map)
#inside the loop add each marker to the cluster
for row_index, row_values in harvey_losses.iterrows():
loc = [row_values['lat'], row_values['lng']]
pop = ("zip code: " + str(row_values["loss_location_zip"]) + "\nzip_avg: " + "$" + str(row_values['zip_avg'])) #show the zip and it's avg
icon = folium.Icon(color='red')
marker = folium.Marker(
title = "Harvey: " + "$" + str(row_values['harvey_avg']),
location = loc,
popup = pop,
icon=icon)
marker.add_to(marker_cluster)
#save an interactive HTML map by calling .save()
harvey_avg_losses_map.save('../data/harveylossclustermap.html')
harvey_avg_losses_map[map of hurricane harvey insurance claims][1]
Of course you can add a title to a Folium map.
For example:
import folium
loc = 'Corpus Christi'
title_html = '''
<h3 align="center" style="font-size:16px"><b>{}</b></h3>
'''.format(loc)
m = folium.Map(location=[27.783889, -97.510556],
zoom_start=12)
m.get_root().html.add_child(folium.Element(title_html))
m.save('map-with-title.html')
m
I'm able to use a paragraph object to select font size, color, bold, etc. within a table cell. But, add_paragraph() seems to always insert a leading \n into the cell and this messes up the formatting on some tables.
If I just use the cell.text('') method it doesn't insert this newline but then I can't control the text attributes.
Is there a way to eliminate this leading newline?
Here is my function:
def add_table_cell(table, row, col, text, fontSize=8, r=0, g=0, b=0, width=-1):
cell = table.cell(row,col)
if (width!=-1):
cell.width = Inches(width)
para = cell.add_paragraph(style=None)
para.alignment = WD_ALIGN_PARAGRAPH.LEFT
run = para.add_run(text)
run.bold = False
run.font.size = Pt(fontSize)
run.font.color.type == MSO_COLOR_TYPE.RGB
run.font.color.rgb = RGBColor(r, g, b)
I tried the following and it worked out for me. Not sure if is the best approach:
cells[0].text = 'Some text' #Write the text to the cell
#Modify the paragraph alignment, first paragraph
cells[0].paragraphs[0].paragraph_format.alignment=WD_ALIGN_PARAGRAPH.CENTER
The solution that I find is to use text attribute instead of add_paragraph() but than use add_run():
row_cells[0].text = ''
row_cells[0].paragraphs[0].add_run('Total').bold = True
row_cells[0].paragraphs[0].paragraph_format.alignment = WD_ALIGN_PARAGRAPH.RIGHT
I've look through the documentation of cell, and it's not the problem of add_paragraph(). The problem is when you having a cell, by default, it will have a paragraph inside it.
class docx.table._Cell:
paragraphs: ... By default, a new cell contains a single paragraph. Read-only
Therefore, if you want to add paragraphs in the first row of cell, you should first delete the default paragraph first. Since python-docx don't have paragraph.delete(), you can use the function mention in this github issue: feature: Paragraph.delete()
def delete_paragraph(paragraph):
p = paragraph._element
p.getparent().remove(p)
p._p = p._element = None
Therefore, you should do something like:
cell = table.cell(0,0)
paragraph = cell.paragraphs[0]
delete_paragraph(paragraph)
paragraph = cell.add_paragraph('text you want to add', style='style you want')
Update at 10/8/2022
Sorry, the above approach is kinda unnecessary.
It's much intuitive to edit the default paragraph instead of first deleting it and add it back.
For the function add_table_cell, just replace the para = cell.paragraphs[0]
and para.style = None, the para.style = None is not necessary as it should be default value for a new paragraph.
Here is what worked for me. I don't call add_paragraph(). I just reference the first paragraph with this call -> para = cell.paragraphs[0]. Everything else after that is the usual api calls.
table = doc.add_table( rows=1, cols=3 ) # bar codes
for tableRow in table.rows:
for cell in tableRow.cells:
para = cell.paragraphs[0]
run = para.add_run( "*" + specIDStr + "*" )
font = run.font
font.name = 'Free 3 of 9'
font.size = Pt( 20 )
run = para.add_run( "\n" + specIDStr
+ "\n" + firstName + " " + lastName
+ "\tDOB: " + dob )
font = run.font
font.name = 'Arial'
font.size = Pt( 8 )