import pandas as pd
# read by default 1st sheet of an excel file
dataframe1 = pd.read_excel(r'E:\Images\New Folder\afec9b91-5c2f-4cab-aca8-abd7bde854e0\P_SA_C0002_DcW_R1_01_FMV_000000000000.xlsx')
print(dataframe1)
Output
Sensor Longitude Sensor Latitude Survey ID
72.69362 32.090865 P_SA_C0002_DcW_R1_01
Now I want that output to written on specific image.
from PIL import Image,ImageDraw,ImageFont
import glob
import os
images=glob.glob("E:\Images/*.jpg")
for img in images:
images=Image.open(img)
draw=ImageDraw.Draw(images)
font=ImageFont.load_default()
import pandas as pd
# read by default 1st sheet of an excel file
dataframe1 = pd.read_excel(r'E:\Images\New Folder\afec9b91-5c2f-4cab-aca8-abd7bde854e0\P_SA_C0002_DcW_R1_01_FMV_000000000000.xlsx')
#print(dataframe1)
# write ="dataframe1"
text="print"
draw.text((0,240),text,(250,250,250),font=font)
images.save(img)
I am trying to write the output on image by using above code by its not working.Please help.
Related
I am trying to build a function that iterates over a bunch of names in a CSV I give then extracts the last serial number written from JSON file then adding one for each name and putting serial number beside every name in the csv, but what i get is that the function generates the first serial number successfully and saves it in Json file but fails to add it in the csv via pandas and fails to update the number in the JSON file.
this is the code of the function:
from docx import Document
import pandas as pd
from datetime import datetime
import time
import os
from docx2pdf import convert
import json
date=datetime.date(datetime.now())
strdate=date.strftime("%d-%m-%Y")
year=date.strftime("%Y")
month=date.strftime("%m")
def genrateserial(a):
jsonFile1 = open("data_file.json", "r")
lastserial = jsonFile1.read()
jsonFile1.close()
for d in range(len(lastserial)):
if lastserial[d]=="\"":
lastserial[d].replace("\"","")
jsonFile1.close()
if strdate=="01" or (month[1]!=lastserial[8]):
num=1
last=f"JO/{year}{month}{num}"
data=f"{last}"
jsonstring=json.dumps(data)
jsonfile2=open("data_file.json", "w")
jsonfile2.write(jsonstring)
jsonfile2.close()
database = pd.read_csv(a)
df = pd.DataFrame(database)
df = df.dropna(axis=0)
for z in range(len(df.Name)):
newentry=f"JO/{year}{month}{num+1}"
jsonstring1=json.dumps(newentry)
jsonfile3=open("data_file.json","w")
jsonfile3.write(jsonstring1)
jsonfile3.close()
df.iloc[[z],3]=newentry
genrateserial('database.csv')
I have a pdf file and extracting the data from pdf file using pdfquery and pandas
Code is as follows:
import pdfquery
import pandas as pd
pdf = pdfquery.PDFQuery('data/BUSTA_PAGA - 2.pdf')
pdf.load()
pdf.tree.write('pdfXML.txt', pretty_print = True)
Name = pdf.pq('LTTextLineHorizontal:overlaps_bbox("25.509, 188.273, 188.558,
748.621")').text()
s=pd.DataFrame({
Name
})
s.to_csv('file_name.csv')
When I run this, It gives the data of the full text box which I wanted but there is specific data that I want to extract. How would I do that?
i was confused with my project use firebase as db to collect sensor data and then do lstm, im newbie in python and code. How to convert json file to array and then associated the value and the last one is save file as .csv. Thanks for help. Here is my code:
Data from db
Data from db
`
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from google.colab import files
uploaded = files.upload()
data = next(iter(uploaded.values()))
type(data)
json.loads(data.decode("utf-8"))
df=json.dumps(data.decode("utf-8"))
df2 = json.loads(df)
`
I have am writing a script that reads a folder of .pdfs and extracts their fillable fields to a pandas df. I had success extracting one .pdf with the following code:
import numpy as np
import pandas as pd
import PyPDF2
import glob, os
pwd = os.getcwd()
pdfFileObj = open('pdf_filename', 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
fields_dict = pdfReader.getFormTextFields()
series = pd.Series(fields_dict).to_frame()
df = pd.DataFrame(pd.Series(fields_dict)).T
I want to build a function that runs this script for all pdfs in the directory. My first idea was to use a function in glob that collects all pdfs. Here is what I have so far:
import numpy as np
import pandas as pd
import PyPDF2
import glob, os
pwd = os.getcwd()
def readfiles():
os.chdir(pwd)
pdfs = []
for file in glob.glob("*.pdf"):
print(file)
pdfs.append(file)
pdfFileObj = open(readfiles, 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
fields_dict = pdfReader.getFormTextFields()
series = pd.Series(fields_dict).to_frame()
df = pd.DataFrame(pd.Series(fields_dict)).T
Unfortunately, this doesn't work because I cannot put a function in the pdfFileReader. Does anyone have suggestions on a better way to do this? Thanks!
I can't comment, new account. But you could try making your readFiles function return the array pdfs.
Then in code execution below just:
listofPDF=readfiles()
arrayofDF=list()
for file in listofPDF:
pdfFileObj = open(file , 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
##execute your code to obtain a single dataframe from a pdf here
fields_dict = pdfReader.getFormTextFields()
series = pd.Series(fields_dict).to_frame()
df = pd.DataFrame(pd.Series(fields_dict)).T
arrayofDF.append(df)
You would end up having a list of dataframes, each one corresponding to one of the pdf files, if the first part of the code ( in which you get the dataframe from the singular pdf file) works.
Additionally, you could make a dictionary like {filename:file , dataframe: df} and then append that to your list, so you can later recover the dataframe based of the name of the file. It all depends on what you plan to do with the dataframes later.
Using the one of the answers provided here: [https://stackoverflow.com/questions/46107348/how-to-display-image-stored-in-pandas-dataframe][1]
I am able to call image_formatter function in IPython.display HTML function to display thumbnail pngs in the dataframe.
I now want to export this HTML formatted table to a pdf file but haven't really found any helpful links to this effect. Any help will be appreciated!
import glob
import random
import base64
import pandas as pd
import os
from PIL import Image
from io import BytesIO
from IPython.display import HTML
#display inline images in pandas DataFrame
#source: https://www.kaggle.com/stassl/displaying-inline-images-in-pandas-dataframe
pd.set_option('display.max_colwidth', None)
def get_thumbnail(path):
i = Image.open(path)
return i
def image_base64(im):
if isinstance(im, str):
im = get_thumbnail(im)
with BytesIO() as buffer:
im.save(buffer, 'png')
return base64.b64encode(buffer.getvalue()).decode()
def image_formatter(im):
return f'<img src="data:image/jpeg;base64,{image_base64(im)}">'
To implement the above functions:
data = *path to csv containing a series of png paths & other details*
df=pd.DataFrame(data)
cols_2_keep= ['patient_id', "exam_id", "file_path"]
df = df[cols_2_keep]
images= HTML(df[['patient_id', "exam_id","file"]].to_html(formatters={'file': image_formatter}, escape=False))
#call images to look at HTML display
images
This, as expected, displays the images within the dataframe. What I'm now trying to do is export this dataframe (with images in place) into a pdf file.