How to read Excel file, create a QR code? - python

I was trying with this code
from openpyxl import load_workbook
import qrcode
wb = load_workbook("D:\QR\qrcodes.xlsx")
ws = wb.['Sheet1']
column = ws['A'] # Column
data = [column[x].value for x in range(len(column))]
print(data)
qr = qrcode.QRCode(version = 1, error_correction = qrcode.constants.ERROR_CORRECT_H,box_size = 10, border = 4)
ext=".png"
for images in data:
qr.add_data(i)
qr.make(fit=True)
img=qr.make_image()
img.save("{}{}".format(i,ext))
But after every loop the image created contains the value of the previous image also, how to solve that?

You are creating the QR object outside the loop.
You're better off initializing the object <class 'qrcode.main.QRCode'> inside your for loop and also using a function to create your QR image as variables inside a function have a local scope.
ext=".png"
def createQr(data):
qr = qrcode.QRCode(version = 1, error_correction = qrcode.constants.ERROR_CORRECT_H,box_size = 10, border = 4)
qr.add_data(data)
qr.make(fit=True)
img=qr.make_image()
return img
for i in data:
img = createQr(i)
img.save("{}{}".format(i,ext))
Also as mentioned by #martineau you have to change your loop variable from images to i

Firstly please convert to csv. Then you should add enumerate so you will also have an index number for your file names instead of having multiple file.jpg.
import csv
import qrcode
with open('D:\QR\qrcodes.csv') as csvfile:
fieldnames= ["Your_Column"]
reader= csv.reader(csvfile)
qr = qrcode.QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=10,
border=4,
)
for i, row in enumerate(reader):
labeldata = row[0]
qr.add_data(labeldata)
qr.make(fit=True)
img = qr.make_image()
img.save("test{}.jpg".format(i))

From an Excel file, it reads data in "A" columns starting from the second row and produces their QR codes, and creates a new excel file named "qrcode_produced" that has QR codes produced in the B column.
# modules needed
import qrcode
from tkinter import filedialog
from tkinter import *
import openpyxl
from openpyxl import Workbook
from openpyxl.styles import Alignment
from openpyxl import load_workbook
#select the excel file to be read
# the texts must be in the "A" column starting with "2" row. In the B column, qrcodes will be seen.
print('select xlsx file:')
root = Tk()
root.filename = filedialog.askopenfilename(initialdir = "/",title = "Select file",filetypes = (("xlsx files","*.xlsx"),("all files","*.*")))
print (root.filename)
# select the folder to save qrcodes as png format images and excel file with qrcodes
print('where to save excel file and qrcodes:')
root2 = Tk()
root2.withdraw()
folder_selected = filedialog.askdirectory()
# read the excel file
workbook = load_workbook(str(root.filename))
sheet = workbook.active
# settings for qrcode to be produced
qr = qrcode.QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=4,
border=2,)
# excel file cell size settings that will be produced
sheet.column_dimensions['B'].width = 25
for i in range(1,len(sheet['A'])+1):
sheet.row_dimensions[i+1].height=150
# Title of B column
sheet["B1"]="Qr_Codes"
# production of qrcodes for each row in the A column except first row. Skips the empty rows.
for i in range(2,len(sheet['A'])+1):
if sheet.cell(row=i, column=1).value is None:
continue
else:
qr.add_data(str(sheet.cell(row=i, column=1).value))
qr.make(fit=True)
img = qr.make_image()
img.save(folder_selected + "/" + "row_"+str(i)+"_qrcode.png")
img=openpyxl.drawing.image.Image(folder_selected + "/" + "row_"+str(i)+"_qrcode.png")
img.anchor = "B" + str(i)
sheet.add_image(img)
sheet["B" + str(i)].alignment = Alignment(horizontal='center', vertical='center')
sheet["A" + str(i)].alignment = Alignment(horizontal='center', vertical='center')
# saving the excel file
workbook.save(folder_selected+ "/qrcode_produced.xlsx")

Related

Python openpyxl add image to excel file

if i separate the code only for add image and only for copy the format all can work normally.But when i combine it and it's always come out : UserWarning: wmf image format is not supported so the image is being dropped.How to solve it ?
import openpyxl
import copy
from openpyxl.utils import get_column_letter
# It is not required for one to create a workbook on
# filesystem, therefore creating a virtual workbook
wrkb = openpyxl.Workbook()
# Number of sheets in the workbook (1 sheet in our case)
ws = wrkb.worksheets[0]
# Adding a row of data to the worksheet (used to
# distinguish previous excel data from the image)
path = "/Users/LEON/OneDrive/Desktop/copy.xlsx"
save_path = "/Users/LEON/OneDrive/Desktop/paste.xlsx"
wb = openpyxl.load_workbook(path)
wb2 = openpyxl.Workbook()
sheetnames = wb.sheetnames
for sheetname in sheetnames:
print(sheetname)
sheet = wb[sheetname]
sheet2 = wb2.create_sheet(sheetname)
# tab颜色
sheet2.sheet_properties.tabColor = sheet.sheet_properties.tabColor
# 开始处理合并单元格形式为“(<CellRange A1:A4>,),替换掉(<CellRange 和 >,)' 找到合并单元格
wm = list(sheet.merged_cells)
if len(wm) > 0:
for i in range(0, len(wm)):
cell2 = str(wm[i]).replace('(<CellRange ', '').replace('>,)', '')
sheet2.merge_cells(cell2)
for i, row in enumerate(sheet.iter_rows()):
sheet2.row_dimensions[i+1].height = sheet.row_dimensions[i+1].height
for j, cell in enumerate(row):
sheet2.column_dimensions[get_column_letter(j+1)].width = sheet.column_dimensions[get_column_letter(j+1)].width
sheet2.cell(row=i + 1, column=j + 1, value=cell.value)
# 设置单元格格式
source_cell = sheet.cell(i+1, j+1)
target_cell = sheet2.cell(i+1, j+1)
target_cell.fill = copy.copy(source_cell.fill)
if source_cell.has_style:
target_cell._style = copy.copy(source_cell._style)
target_cell.font = copy.copy(source_cell.font)
target_cell.border = copy.copy(source_cell.border)
target_cell.fill = copy.copy(source_cell.fill)
target_cell.number_format = copy.copy(source_cell.number_format)
target_cell.protection = copy.copy(source_cell.protection)
target_cell.alignment = copy.copy(source_cell.alignment)
# A wrapper over PIL.Image, used to provide image
# inclusion properties to openpyxl library
img = openpyxl.drawing.image.Image('hlrb_image.png')
# The Coordinates where the image would be pasted
# (an image could span several rows and columns
# depending on it's size)
img.anchor = 'A2'
# Adding the image to the worksheet
# (with attributes like position)
ws.add_image(img)
# Saving the workbook created under the name of out.xlsx
wrkb.save('out.xlsx')
i need a logo top there and copy format detail down there.If i separate run the code,it will delete the sheet.Like only the logo there or only the copy format there dont hv logo.

How can I iterate through excel files sheets and insert formula in Python?

I get this error
TypeError: 'Workbook' object is not subscriptable
when i run this code
import xlsxwriter
from openpyxl import load_workbook
in_folder = r'xxx' #Input folder
out_folder = r'xxx' #Output folder
if not os.path.exists(out_folder):
os.makedirs(out_folder)
file_exist = False
dir_list = os.listdir(in_folder)
for xlfile in dir_list:
if xlfile.endswith('.xlsx') or xlfile.endswith('.xls'):
file_exist = True
str_file = os.path.join(in_folder, xlfile)
work_book = xlsxwriter.Workbook(filename=str_file)
work_sheet = work_book['test1'] #error above is thrown here
work_sheet.write_formula('C2', '=A2+B2') #Add formular but not sure of how to apply it to the entire column.
out_Path = os.path.join(out_folder,work_book)
Edit:
I managed to figure out the above and using this code:-
work_book = openpyxl.load_workbook(os.path.join(in_folder,xlfile))
work_sheet = work_book['test1']
However, the issue formulas still exists in the new code below:-
from openpyxl import load_workbook
in_folder = r'xxx' #Input folder
out_folder = r'xxx' #Output folder
if not os.path.exists(out_folder):
os.makedirs(out_folder)
file_exist = False
dir_list = os.listdir(in_folder)
for xlfile in dir_list:
if xlfile.endswith('.xlsx') or xlfile.endswith('.xls'):
str_file = xlfile
work_book = openpyxl.load_workbook(os.path.join(in_folder,str_file))
work_sheet = work_book['Sheet1']
row_count = work_sheet.max_row
for row in work_sheet.iter_rows(min_row=1, min_col=1, max_row=work_sheet.max_row):
print(row_count)
for i, cellObj in enumerate(work_sheet['U'], 2):
cellObj.value = f'=Q{row_count}-T{row_count}'
work_book.save(os.path.join(out_folder, xlfile))
Ideally, I would like to loop through a folder with .xlsx files, add a formular and apply it to the entire column (U). In this case, I would like to save the files(with the formula effected) in another folder(out_folder).
Documentation for xlsxwriter.Workbook shows
work_book.get_worksheet_by_name('test1')
Maybe openpyxl or other module could use ['test1']

Taking Same Worksheet from a Folder of xlsm Files with Python

I'm new to pandas/python and Ive come up with the following code to extract data from a specific part of a worksheet.
import openpyxl as xl
import pandas as pd
rows_with_data = [34,37,38,39,44,45,46,47,48,49, 50,54,55,57,58,59,60,62,63,64,65,66,70,71,72,76,77, 78,79,80,81,82,83,84,88,89,90,91,92]
path = r'XXX'
xpath = input('XXX')
file = r'**.xlsm'
xfile = input('Change file name, current is ' + file + ' :')
sheetname = r'Summary'
wb = xl.load_workbook(filename = xpath + '\\' +file, data_only = True)
sheet = wb.get_sheet_by_name(sheetname)
rows = len(rows_with_data)
line_items = []
for i in range(rows) :
line_items.append(sheet.cell(row = rows_with_data[i], column = 13).value)
period = []
for col in range(17,35):
period.append(sheet.cell(row = 20, column = col).value)
print(line_items)
vals = []
x = []
for i in range(rows):
if i != 0:
vals.append(x)
x = []
for col in range(17,35):
x.append(sheet.cell(row = rows_with_data[i], column = col).value)
vals.append(x)
all_values = {}
all_values['Period'] = period
for i in range(rows):
print(line_items[i])
all_values[line_items[i]] = vals[i]
print(all_values)
period_review = input('Enter a period (i.e. 2002): ')
item = input('Enter a period (i.e. XXX): ')
time = period.index(period_review)
display_item = str(all_values[item][time])
print(item + ' for ' + period_review + " is " + display_item)
Summary_Dataframe = pd.DataFrame(all_values)
writer = pd.ExcelWriter(xpath + '\\' + 'values.xlsx')
Summary_Dataframe.to_excel(writer,'Sheet1')
writer.save()
writer.close()
I have the same worksheet (summary results) across a library of 60 xlsm files and I'm having a hard time figuring out how to iterate this across the entire folder of files. I also want change this from extracting specific rows to taking the entire "Summary" worksheet, pasting it to the new file and naming the worksheet by its filename ("Experiment_A") when pasted to the new excel file. Any advice?
I was having hard time to read your code to understand that what you want to do finally. So it is just an advice not a solution. You can iterate through all files in the folder using os then read the files in to one dataframe then save the single big data frame in to csv. I usually avoid excel but I guess you need the excel conversion. In the example below I have read all txt file from a directory put them in to dataframe list then store the big data frame as json. You can also store it as excel/csv.
import os
import pandas as pd
def process_data():
# input file path in 2 part in case it is very long
input_path_1 = r'\\path\to\the\folder'
input_path_2 = r'\second\part\of\the\path'
# adding the all file path
file_path = input_path_1 + input_path_2
# listing all file in the file folder
file_list = os.listdir(os.path.join(file_path))
# selecting only the .txt files in to a list object
file_list = [file_name for file_name in file_list if '.txt' in file_name]
# selecting the fields we need
field_names = ['country', 'ticket_id']
# defining a list to put all the datafremes in one list
pd_list = []
inserted_files = []
# looping over txt files and storing in to database
for file_name in file_list:
# creating the file path to read the file
file_path_ = file_path + '\\' + file_name
df_ = pd.read_csv(os.path.join(file_path_), sep='\t', usecols=field_names)
# converting the datetime to date
# few internal data transformation example before writting
df_['sent_date'] = pd.to_datetime(df_['sent_date'])
df_['sent_date'] = df_['sent_date'].values.astype('datetime64[M]')
# adding each dataframe to the list
pd_list.append(df_)
# adding file name to the inserted list to print later
inserted_files.append(file_name)
print(inserted_files)
# sql like union all dataframes and create a single data source
df_ = pd.concat(pd_list)
output_path_1 = r'\\path\to\output'
output_path_2 = r'\path\to\output'
output_path = output_path_1 + output_path_2
# put the file name
file_name = 'xyz.json'
# adding the day the file processed
df_['etl_run_time'] = pd.to_datetime('today').strftime('%Y-%m-%d')
# write file to json
df_.to_json(os.path.join(output_path, file_name), orient='records')
return print('Data Stored as json successfully')
process_data()

Odoo image in excel

I'm creating an excel file with xlsxwriter and need to place my company logo into these excel file.. I've been trying with insert_image but not success.
I suppose that is something like parse partner.image into a buffer... but im stuck.. Pleace your help.
this is my code.
#api.multi
def report_print(self):
output=io.BytesIO()
book=xlsxwriter.Workbook(output)
sheet1=book.add_worksheet("PCA")
sheet1.write('A1','PCA')
#=======================================================================
# Looking for partner data
#=======================================================================
user=self.env['res.users'].browse(self.env.uid)
partner = self.env['res.partner'].browse(user.company_id.id)
#copy partner name in B1
partner_name = partner.name
sheet1.write("B1",partner_name)
#put partner logo in B3
buf_image=io.BytesIO(partner.image)
sheet1.insert_image('B3',base64.b64encode(buf_image.getvalue()),{'image_data': buf_image})
book.close()
self.write({
'file':base64.b64encode(output.getvalue())})
In Odoo v11 I use :
buf_image=io.BytesIO(base64.b64decode(partner.image))
sheet1.insert_image('B3', "any_name.png", {'image_data': buf_image})
this is the format for adding images in worksheet
import xlsxwriter
# Create an new Excel file and add a worksheet.
workbook = xlsxwriter.Workbook('images.xlsx')
worksheet = workbook.add_worksheet()
# Widen the first column to make the text clearer.
worksheet.set_column('A:A', 30)
# Insert an image.
worksheet.write('A2', 'Insert an image in a cell:')
worksheet.insert_image('B2', 'python.png')
# Insert an image offset in the cell.
worksheet.write('A12', 'Insert an image with an offset:')
worksheet.insert_image('B12', 'python.png', {'x_offset': 15, 'y_offset': 10})
# Insert an image with scaling.
worksheet.write('A23', 'Insert a scaled image:')
worksheet.insert_image('B23', 'python.png', {'x_scale': 0.5, 'y_scale': 0.5})
workbook.close()
In case of a stored image in Odoo look here an example using openpyxl, use the same format.
from openpyxl import Workbook
from openpyxl.writer.excel import ExcelWriter
from openpyxl.drawing import Image
from PIL import Image as PILImage
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
wb = Workbook()
ws = wb.get_active_sheet()
#extra has the data of the image from the database
im = PILImage.open(StringIO(extra))
img = Image(im)
img.anchor(ws.cell('F1'))
ws.add_image(img)
handler = StringIO()
writer = ExcelWriter(wb)
writer.save(handler)
xls = handler.getvalue()
handler.close()
Finaly did it with openpyxl
#api.multi
def report_print(self):
user=self.env['res.users'].browse(self.env.uid)
partner = self.env['res.partner'].browse(user.company_id.id)
partner_name = partner.name
wb = Workbook()
ws = wb.get_active_sheet()
binaryData=partner.image_medium
data=base64.b64decode(binaryData)
im = PILImage.open(BytesIO(data))
img = OPYImage(im)
ws.add_image(img, "A3")
width, height = im.size
#=======================================================================
# more code
#=======================================================================
output=BytesIO()
wb.save(output)
self.write({
'file':base64.b64encode(output.getvalue()),
'file_name':'my_file_name_.xlsx'
})
wb.close()
output.close()
It works in Odoo 11 and Python 3

From password-protected Excel file to pandas DataFrame

I can open a password-protected Excel file with this:
import sys
import win32com.client
xlApp = win32com.client.Dispatch("Excel.Application")
print "Excel library version:", xlApp.Version
filename, password = sys.argv[1:3]
xlwb = xlApp.Workbooks.Open(filename, Password=password)
# xlwb = xlApp.Workbooks.Open(filename)
xlws = xlwb.Sheets(1) # counts from 1, not from 0
print xlws.Name
print xlws.Cells(1, 1) # that's A1
I'm not sure though how to transfer the information to a pandas dataframe. Do I need to read cells one by one and all, or is there a convenient method for this to happen?
Simple solution
import io
import pandas as pd
import msoffcrypto
passwd = 'xyz'
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name='abc')
pip install --user msoffcrypto-tool
Exporting all sheets of each excel from directories and sub-directories to seperate csv files
from glob import glob
PATH = "Active Cons data"
# Scaning all the excel files from directories and sub-directories
excel_files = [y for x in os.walk(PATH) for y in glob(os.path.join(x[0], '*.xlsx'))]
for i in excel_files:
print(str(i))
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name=None)
sheets_count = len(df.keys())
sheet_l = list(df.keys()) # list of sheet names
print(sheet_l)
for i in range(sheets_count):
sheet = sheet_l[i]
df = pd.read_excel(decrypted_workbook, sheet_name=sheet)
new_file = f"D:\\all_csv\\{sheet}.csv"
df.to_csv(new_file, index=False)
Assuming the starting cell is given as (StartRow, StartCol) and the ending cell is given as (EndRow, EndCol), I found the following worked for me:
# Get the content in the rectangular selection region
# content is a tuple of tuples
content = xlws.Range(xlws.Cells(StartRow, StartCol), xlws.Cells(EndRow, EndCol)).Value
# Transfer content to pandas dataframe
dataframe = pandas.DataFrame(list(content))
Note: Excel Cell B5 is given as row 5, col 2 in win32com. Also, we need list(...) to convert from tuple of tuples to list of tuples, since there is no pandas.DataFrame constructor for a tuple of tuples.
from David Hamann's site (all credits go to him)
https://davidhamann.de/2018/02/21/read-password-protected-excel-files-into-pandas-dataframe/
Use xlwings, opening the file will first launch the Excel application so you can enter the password.
import pandas as pd
import xlwings as xw
PATH = '/Users/me/Desktop/xlwings_sample.xlsx'
wb = xw.Book(PATH)
sheet = wb.sheets['sample']
df = sheet['A1:C4'].options(pd.DataFrame, index=False, header=True).value
df
Assuming that you can save the encrypted file back to disk using the win32com API (which I realize might defeat the purpose) you could then immediately call the top-level pandas function read_excel. You'll need to install some combination of xlrd (for Excel 2003), xlwt (also for 2003), and openpyxl (for Excel 2007) first though. Here is the documentation for reading in Excel files. Currently pandas does not provide support for using the win32com API to read Excel files. You're welcome to open up a GitHub issue if you'd like.
Based on the suggestion provided by #ikeoddy, this should put the pieces together:
How to open a password protected excel file using python?
# Import modules
import pandas as pd
import win32com.client
import os
import getpass
# Name file variables
file_path = r'your_file_path'
file_name = r'your_file_name.extension'
full_name = os.path.join(file_path, file_name)
# print(full_name)
Getting command-line password input in Python
# You are prompted to provide the password to open the file
xl_app = win32com.client.Dispatch('Excel.Application')
pwd = getpass.getpass('Enter file password: ')
Workbooks.Open Method (Excel)
xl_wb = xl_app.Workbooks.Open(full_name, False, True, None, pwd)
xl_app.Visible = False
xl_sh = xl_wb.Worksheets('your_sheet_name')
# Get last_row
row_num = 0
cell_val = ''
while cell_val != None:
row_num += 1
cell_val = xl_sh.Cells(row_num, 1).Value
# print(row_num, '|', cell_val, type(cell_val))
last_row = row_num - 1
# print(last_row)
# Get last_column
col_num = 0
cell_val = ''
while cell_val != None:
col_num += 1
cell_val = xl_sh.Cells(1, col_num).Value
# print(col_num, '|', cell_val, type(cell_val))
last_col = col_num - 1
# print(last_col)
ikeoddy's answer:
content = xl_sh.Range(xl_sh.Cells(1, 1), xl_sh.Cells(last_row, last_col)).Value
# list(content)
df = pd.DataFrame(list(content[1:]), columns=content[0])
df.head()
python win32 COM closing excel workbook
xl_wb.Close(False)
Adding to #Maurice answer to get all the cells in the sheet without having to specify the range
wb = xw.Book(PATH, password='somestring')
sheet = wb.sheets[0] #get first sheet
#sheet.used_range.address returns string of used range
df = sheet[sheet.used_range.address].options(pd.DataFrame, index=False, header=True).value

Categories