I have a class and in that class I have a method that calls multiple methods in it.
But the problem I am facing now is that when the method with the multiple methods in it duplicate parameter has.
And so when I am calling the method with the multiple methods in it, it returns a empty list:[].
So this is the method with the multiple methods in it:
def show_extracted_data_from_file(self, file_name):
self.extractingText.extract_text_from_image(file_name)
total_fruit = self.filter_verdi_total_number_fruit()
fruit_name = self.filter_verdi_fruit_name()
fruit_total_cost = self.filter_verdi_total_fruit_cost(file_name)
return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))
and this is the method: filter_verdi_total_fruit_cost:
def filter_verdi_total_fruit_cost(self, file_name):
locale.setlocale(locale.LC_ALL, locale='Dutch')
self.extractingText.extract_text_from_image(file_name)
return [
locale.atof(items[-1]) for items in (
token.split() for token in file_name.split('\n')
) if len(items) > 2 and items[1] in self.extractingText.list_fruit
]
this method returns the following data:
[123.2, 2772.0, 46.2, 577.5, 69.3, 3488.16, 137.5, 500.0, 1000.0, 2000.0, 1000.0, 381.25]
You see that I am calling two times file_name.
and so when I calling the method show_extracted_data_from_file in the views.py:
if uploadfile.image.path.endswith('.pdf'):
content = filter_text.show_extracted_data_from_file(uploadfile.image.path)
print(content)
it produces a empty list: []
Question: how can I reduce the parameter file_name so that it will return the correct results?
this are my two other methods that I am calling in the combined method:
def filter_verdi_total_number_fruit(self):
regex = r"(\d*(?:\.\d+)*)\s*\W+(?:" + '|'.join(re.escape(word)
for word in self.extractingText.list_fruit) + ')'
return re.findall(regex, self.extractingText.text_factuur_verdi[0])
def filter_verdi_fruit_name(self):
regex = r"(?:\d*(?:\.\d+)*)\s*\W+(" + '|'.join(re.escape(word)
for word in self.extractingText.list_fruit) + ')'
return re.findall(regex, self.extractingText.text_factuur_verdi[0])
So this is the other class:
class ExtractingTextFromFile:
def extract_text_from_image(self, filename):
self.text_factuur_verdi = []
pdf_file = wi(filename=filename, resolution=300)
all_images = pdf_file.convert('jpeg')
for image in all_images.sequence:
image = wi(image=image)
image = image.make_blob('jpeg')
image = Image.open(io.BytesIO(image))
text = pytesseract.image_to_string(image, lang='eng')
self.text_factuur_verdi.append(text)
return self.text_factuur_verdi
def __init__(self):
# class variables:
self.tex_factuur_verdi = []
self.list_fruit = ['Appels', 'Ananas', 'Peen Waspeen',
'Tomaten Cherry', 'Sinaasappels',
'Watermeloenen', 'Rettich', 'Peren', 'Peen',
'Mandarijnen', 'Meloenen', 'Grapefruit', 'Rettich']
#AndrewRyan has the right idea.
I presume calling extract_text_from_image just adds the attribute list_fruit
Two routes you can go, from what you are commenting you'll probably just go with #1.. but I gave #2 as another option in case you'd ever want to call filter_verdi_total_fruit_cost by itself.
Path 1, Just remove it.
Note: filter_verdi_total_fruit_cost is only called from show_extracted_data_from_file.
def show_extracted_data_from_file(self, file_name):
# extract text
# Note: stores data in `self.extractingText.list_fruit`
self.extractingText.extract_text_from_image(file_name)
total_fruit = self.filter_verdi_total_number_fruit()
fruit_name = self.filter_verdi_fruit_name()
fruit_total_cost = self.filter_verdi_total_fruit_cost()
return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))
def filter_verdi_total_fruit_cost(self):
# Note: `self.extractingText.list_fruit` should be already defined
locale.setlocale(locale.LC_ALL, locale='Dutch')
return [
locale.atof(items[-1]) for items in (
token.split() for token in file_name.split('\n')
) if len(items) > 2 and items[1] in self.extractingText.list_fruit
]
Path 2, Check if it's already extracted- if not, extract; if so, continue
Note: if you wanted to just call filter_verdi_total_fruit_cost
def show_extracted_data_from_file(self, file_name):
# extract text
# Note: stores data in `self.extractingText.list_fruit`
self.extractingText.extract_text_from_image(file_name)
total_fruit = self.filter_verdi_total_number_fruit()
fruit_name = self.filter_verdi_fruit_name()
fruit_total_cost = self.filter_verdi_total_fruit_cost(file_name)
return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))
def filter_verdi_total_fruit_cost(self, file_name):
locale.setlocale(locale.LC_ALL, locale='Dutch')
if not hasattr(self, 'list_fruit'):
# file hasn't been extracted yet.. extract it
# Note: stores data in `self.extractingText.list_fruit`
self.extractingText.extract_text_from_image(file_name)
return [
locale.atof(items[-1]) for items in (
token.split() for token in file_name.split('\n')
) if len(items) > 2 and items[1] in self.extractingText.list_fruit
]
Related
I'm trying to parse tables in a PDF using Camelot. The cells have multiple lines of texts in them, and some have an empty line separating portions of the text:
First line
Second line
Third line
I would expect this to be parsed as First line\nSecond line\n\nThird line (notice the double line breaks), but I get this instead: T\nFirst line\nSecond line\nhird line. The first character after a double-line-break moves to the beginning of the text, and I only get a single line-break instead.
I also tried using tabula, but that one messes up de entire table (data-frame actually) when there is an empty row in the table, and also in case of some words it puts a space between the characters.
EDIT:
My main issue is the removal of multiple line-breaks. The other one I could fix from code if I knew where the empty lines were.
my friend, can you check the example here
https://camelot-py.readthedocs.io/en/master/user/advanced.html#improve-guessed-table-rows
tables = camelot.read_pdf('group_rows.pdf', flavor='stream', row_tol=10)
tables[0].df
I solved the same problem with the code below
tables = camelot.read_pdf(file, flavor = 'stream', table_areas=['24,618,579,93'], columns=['67,315,369,483,571'], row_tol=10,strip_text='t\r\n\v')
I also encountered the same problem in case of a double line break. It was Switching Characters around as its doing in your case. I Spent some time looking at the code and i did some changes and fixed the issue. You can use the below code.
After Adding the below code, instead of using camelot.read_pdf, use the custom method i made read_pdf_custom()
And for a better experience, i suggest you using camelot v==0.8.2
import sys
import warnings
from camelot import read_pdf
from camelot import handlers
from camelot.core import TableList
from camelot.parsers import Lattice
from camelot.parsers.base import BaseParser
from camelot.core import Table
import camelot
from camelot.utils import validate_input, remove_extra,TemporaryDirectory,get_page_layout,get_text_objects,get_rotation,is_url,download_url,scale_image,scale_pdf,segments_in_bbox,text_in_bbox,merge_close_lines,get_table_index,compute_accuracy,compute_whitespace
from camelot.image_processing import (
adaptive_threshold,
find_lines,
find_contours,
find_joints,
)
class custom_lattice(Lattice):
def _generate_columns_and_rows(self, table_idx, tk):
# select elements which lie within table_bbox
t_bbox = {}
v_s, h_s = segments_in_bbox(
tk, self.vertical_segments, self.horizontal_segments
)
custom_horizontal_indexes=[]
custom_vertical_indexes=[]
for zzz in self.horizontal_text:
try:
h_extracted_text=self.find_between(str(zzz),"'","'").strip()
h_text_index=self.find_between(str(zzz),"LTTextLineHorizontal","'").strip().split(",")
custom_horizontal_indexes.append(h_text_index[1])
except:
pass
inserted=0
for xxx in self.vertical_text:
v_extracted_text=self.find_between(str(xxx),"'","'").strip()
v_text_index=self.find_between(str(xxx),"LTTextLineVertical","'").strip().split(",")
custom_vertical_indexes.append(v_text_index[1])
vertical_second_index=v_text_index[1]
try:
horizontal_index=custom_horizontal_indexes.index(vertical_second_index)
self.horizontal_text.insert(horizontal_index,xxx)
except Exception as exxx:
pass
self.vertical_text=[]
t_bbox["horizontal"] = text_in_bbox(tk, self.horizontal_text)
t_bbox["vertical"] = text_in_bbox(tk, self.vertical_text)
t_bbox["horizontal"].sort(key=lambda x: (-x.y0, x.x0))
t_bbox["vertical"].sort(key=lambda x: (x.x0, -x.y0))
self.t_bbox = t_bbox
cols, rows = zip(*self.table_bbox[tk])
cols, rows = list(cols), list(rows)
cols.extend([tk[0], tk[2]])
rows.extend([tk[1], tk[3]])
cols = merge_close_lines(sorted(cols), line_tol=self.line_tol)
rows = merge_close_lines(sorted(rows, reverse=True), line_tol=self.line_tol)
cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
return cols, rows, v_s, h_s
def _generate_table(self, table_idx, cols, rows, **kwargs):
print("\n")
v_s = kwargs.get("v_s")
h_s = kwargs.get("h_s")
if v_s is None or h_s is None:
raise ValueError("No segments found on {}".format(self.rootname))
table = Table(cols, rows)
table = table.set_edges(v_s, h_s, joint_tol=self.joint_tol)
table = table.set_border()
table = table.set_span()
pos_errors = []
for direction in ["vertical", "horizontal"]:
for t in self.t_bbox[direction]:
indices, error = get_table_index(
table,
t,
direction,
split_text=self.split_text,
flag_size=self.flag_size,
strip_text=self.strip_text,
)
if indices[:2] != (-1, -1):
pos_errors.append(error)
indices = Lattice._reduce_index(
table, indices, shift_text=self.shift_text
)
for r_idx, c_idx, text in indices:
temp_text=text.strip().replace("\n","")
if len(temp_text)==1:
text=temp_text
table.cells[r_idx][c_idx].text = text
accuracy = compute_accuracy([[100, pos_errors]])
if self.copy_text is not None:
table = Lattice._copy_spanning_text(table, copy_text=self.copy_text)
data = table.data
table.df = pd.DataFrame(data)
table.shape = table.df.shape
whitespace = compute_whitespace(data)
table.flavor = "lattice"
table.accuracy = accuracy
table.whitespace = whitespace
table.order = table_idx + 1
table.page = int(os.path.basename(self.rootname).replace("page-", ""))
# for plotting
_text = []
_text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
_text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
table._text = _text
table._image = (self.image, self.table_bbox_unscaled)
table._segments = (self.vertical_segments, self.horizontal_segments)
table._textedges = None
return table
class PDFHandler(handlers.PDFHandler):
def parse(
self, flavor="lattice", suppress_stdout=False, layout_kwargs={}, **kwargs
):
tables = []
with TemporaryDirectory() as tempdir:
for p in self.pages:
self._save_page(self.filepath, p, tempdir)
pages = [
os.path.join(tempdir, f"page-{p}.pdf") for p in self.pages
]
parser = custom_lattice(**kwargs) if flavor == "lattice" else Stream(**kwargs)
for p in pages:
t = parser.extract_tables(
p, suppress_stdout=suppress_stdout, layout_kwargs=layout_kwargs
)
tables.extend(t)
return TableList(sorted(tables))
def read_pdf_custom(
filepath,
pages="1",
password=None,
flavor="lattice",
suppress_stdout=False,
layout_kwargs={},
**kwargs
):
if flavor not in ["lattice", "stream"]:
raise NotImplementedError(
"Unknown flavor specified." " Use either 'lattice' or 'stream'"
)
with warnings.catch_warnings():
if suppress_stdout:
warnings.simplefilter("ignore")
validate_input(kwargs, flavor=flavor)
p = PDFHandler(filepath, pages=pages, password=password)
kwargs = remove_extra(kwargs, flavor=flavor)
tables = p.parse(
flavor=flavor,
suppress_stdout=suppress_stdout,
layout_kwargs=layout_kwargs,
**kwargs
)
return tables
I'm trying to parse generated files into a list of objects.
Unfortunately the structure of the generated files is not always the same, but they contain the same fields (and lots of other garbage).
For example:
function foo(); # Don't Care
function maybeanotherfoo(); # Don't Care
int maybemoregarbage; # Don't Care
product_serial = "CDE1102"; # I want this <---------------------
unnecessary_info1 = 10; # Don't Care
unnecessary_info2 = "red" # Don't Care
product_id = 1134412; # I want this <---------------------
unnecessary_info3 = "88" # Don't Care
product_serial = "DD1232"; # I want this <---------------------
product_id = 3345111; # I want this <---------------------
unnecessary_info1 = "22" # Don't Care
unnecessary_info2 = "panda" # Don't Care
product_serial = "CDE1102"; # I want this <---------------------
unnecessary_info1 = 10; # Don't Care
unnecessary_info2 = "red" # Don't Care
unnecessary_info3 = "bear" # Don't Care
unnecessary_info4 = 119 # Don't Care
product_id = 1112331; # I want this <---------------------
unnecessary_info5 = "jj" # Don't Care
I want a list of objects (each object has: serial and id).
I have tried the following:
import re
class Product:
def __init__(self, id, serial):
self.product_id = id
self.product_serial = serial
linenum = 0
first_string = "product_serial"
second_string = "product_id"
with open('products.txt', "r") as products_file:
for line in products_file:
linenum += 1
if line.find(first_string) != -1:
product_serial = re.search('\"([^"]+)', line).group(1)
#How do I proceed?
Any advice would be greatly appreciated!
Thanks!
I've inlined the data here using an io.StringIO(), but you can substitute data for your products_file.
The idea is that we gather key/values into current_object, and as soon as we have all the data we know we need for a single object (the two keys), we push it onto a list of objects and prime a new current_object.
You could use something like if line.startswith('product_serial') instead of the admittedly complex regexp.
import io
import re
data = io.StringIO("""
function foo();
function maybeanotherfoo();
int maybemoregarbage;
product_serial = "CDE1102";
unnecessary_info1 = 10;
unnecessary_info2 = "red"
product_id = 1134412;
unnecessary_info3 = "88"
product_serial = "DD1232";
product_id = 3345111;
unnecessary_info1 = "22"
unnecessary_info2 = "panda"
product_serial = "CDE1102";
unnecessary_info1 = 10;
unnecessary_info2 = "red"
unnecessary_info3 = "bear"
unnecessary_info4 = 119
product_id = 1112331;
unnecessary_info5 = "jj"
""")
objects = []
current_object = {}
for line in data:
line = line.strip() # Remove leading and trailing whitespace
m = re.match(r'^(product_id|product_serial)\s*=\s*(\d+|"(?:.+?)");?$', line)
if m:
key, value = m.groups()
current_object[key] = value.strip('"')
if len(current_object) == 2: # Got the two keys we want, ship the object
objects.append(current_object)
current_object = {}
print(objects)
I am trying to rephrase the implementation found here. This is what I have so far:
import csv
import math
import random
training_set_ratio = 0.67
training_set = []
test_set = []
class IrisFlower:
def __init__(self, petal_length, petal_width, sepal_length, sepal_width, flower_type):
self.petal_length = petal_length
self.petal_width = petal_width
self.sepal_length = sepal_length
self.sepal_width = sepal_width
self.flower_type = flower_type
def __hash__(self) -> int:
return hash((self.petal_length, self.petal_width, self.sepal_length, self.sepal_width))
def __eq__(self, other):
return (self.petal_length, self.petal_width, self.sepal_length, self.sepal_width) \
== (other.petal_length, other.petal_width, other.sepal_length, other.sepal_width)
def load_data():
with open('dataset.csv') as csvfile:
rows = csv.reader(csvfile, delimiter=',')
for row in rows:
iris_flower = IrisFlower(float(row[0]), float(row[1]), float(row[2]), float(row[3]), row[4])
if random.random() < training_set_ratio:
training_set.append(iris_flower)
else:
test_set.append(iris_flower)
def euclidean_distance(flower_one: IrisFlower, flower_two: IrisFlower):
distance = 0.0
distance = distance + math.pow(flower_one.petal_length - flower_two.petal_length, 2)
distance = distance + math.pow(flower_one.petal_width - flower_two.petal_width, 2)
distance = distance + math.pow(flower_one.sepal_length - flower_two.sepal_length, 2)
distance = distance + math.pow(flower_one.sepal_width - flower_two.sepal_width, 2)
return distance
def get_neighbors(test_flower: IrisFlower):
distances = []
for training_flower in training_set:
dist = euclidean_distance(test_flower, training_flower)
d = dict()
d[training_flower] = dist
print(d)
return
load_data()
get_neighbors(test_set[0])
Currently, print statements in the following code block:
def get_neighbors(test_flower: IrisFlower):
distances = []
for training_flower in training_set:
dist = euclidean_distance(test_flower, training_flower)
d = dict()
d[training_flower] = dist
print(d)
return
will have outputs similar to
{<__main__.IrisFlower object at 0x107774fd0>: 0.25999999999999945}
which is ok. But I do not want to create the dictionary first, and then append the key value, as in:
d = dict()
d[training_flower] = dist
So this is what I am trying:
d = dict(training_flower = dist)
However, it does not seem like the dist method is using the instance, but rather a String, because what I see printed is as follows:
{'training_flower': 23.409999999999997}
{'training_flower': 16.689999999999998}
How do I create the dictionary by using the object as key in one statement?
In your snippet, where you write d = dict(training_flower=dist), "training_flower" is a keyword argument for dict function and not an object. It is equivalent to writing d = {'training_flower': dist}. The only way to create a dictionary with an object as a key is to use the latter syntax:
d = {training_flower: dist}
To directly create a dict with a key which is not a valid keyword, use the {} syntax like:
Code:
d = {training_flower: 'a_value'}
Test Code:
training_flower = 'a key'
d = {training_flower: 'a_value'}
print(d)
Results:
{'a key': 'a_value'}
to initialize a dictionary with an object as a key, (edit: and the string in Stephen's example is an object anyway)
class Flower:
def __repr__(self):
return 'i am flower'
flower1 = Flower()
d = {flower1: 4}
print(d)
outputs
{i am flower: 4}
this is my first post here, and I know I'm late, sorry if it's a duplicate solution. just to show it works with an object.
would upvote Stephen's answer but I can't yet.
i'm failing at dynamically changing the dropdown options for one field, after setting another field in a dexterity form.Schema.
the vocabularies are based in a sql database.
specifically i want to update the vocabulary options for township after selecting the county.
right now i am just pulling the full list of townships regardless of the country selected. as all attempts to dynamically change things led to errors.
any help on this would be appreciated. thanks!
from site.py:
from selectionProvider import SelectionProvider
vocabProvider = SelectionProvider()
#grok.provider(IContextSourceBinder)
def countySource(context):
return vocabProvider.getVocabulary('county')
#grok.provider(IContextSourceBinder)
def townshipSource(context):
return vocabProvider.getVocabulary('township')
class IESurrenderSite(form.Schema):
county = schema.Choice(
title=_(u"County"),
source=countySource,
required=True,
)
township = schema.Choice(
title=_(u"Township"),
source=townshipSource,
required=True,
)
...
from selectiorProvider.py:
class SelectionProvider(object):
DB = maap_db.maap_db()
vocabularies = {}
Counties = {}
Townships = {}
def getCountiesDropdownRecordSet(self, object):
"""input:object is a string
"""
print 'in getCountiesDropdownRecordSet'
self.DB.open()
rs = self.DB.RunQuery("Select *, CountyID as [value], name as [title] From County Order By name;")
self.DB.close()
SelectionProvider.CountiesByName = {}
for rec in rs:
SelectionProvider.CountiesByName[rec['CountyID']] = rec['title']
#
return rs
def getTownshipDropdownRecordSet(self, object):
"""input:object is a string
"""
print 'in getTownshipDropdownRecordSet'
self.DB.open()
rs = self.DB.RunQuery("Select *, TownshipID as [value], name as [title] From Township Order By name;")
self.DB.close()
SelectionProvider.TownshipsByName = {}
for rec in rs:
SelectionProvider.TownshipsByName[rec['TownshipID']] = rec['title']
#
return rs
# #
def getDropdownRecordSet(self, object):
"""input:object is a string
"""
print 'in getDropdownRecordSet'
self.DB.open()
rs = self.DB.RunQuery("Select * From DropdownSelections Where object = '%s' Order By seqNo;" % (object))
self.DB.close()
return rs
def buildVocabulary(self, rs, valueField='value', titleField='title'):
"""DO NOT USE directly outside this class, see getVocabulary() or rebuildVocabulary() instead
"""
data = []
for rec in rs:
data.append(SimpleTerm(value=rec[valueField], title=_(rec[titleField])))
#
return SimpleVocabulary(data)
#
def rebuildVocabulary(self, object):
"""Force a fetch from the database and rebuild the vocabulary.
input object: a string, matches the DropdownSelections field
"""
print 'initializing %s' % (object)
if object=="county":
print 'going to CountiesDropdowns'
vocab = self.buildVocabulary(self.getCountiesDropdownRecordSet(object), "CountyID","title")
SelectionProvider.vocabularies[object] = vocab
return vocab
if object=="township":
print 'going to TownshipDropdowns'
vocab = self.buildVocabulary(self.getTownshipDropdownRecordSet(object), "TownshipID","title")
SelectionProvider.vocabularies[object] = vocab
#print _SITE_NAME, '%s selection list initialized.' % (object)
return vocab
else:
vocab = self.buildVocabulary(self.getDropdownRecordSet(object))
SelectionProvider.vocabularies[object] = vocab
return vocab
def getVocabulary(self, object):
"""Retrieve cached vocabulary
input object: a string, matches the DropdownSelections field
"""
recreate = False
if not SelectionProvider.vocabularies.has_key(object):
recreate = True
#
vocab = SelectionProvider.vocabularies.get(object)
if vocab == None or len(vocab) == 0:
recreate = True
#
if recreate:
vocab = self.rebuildVocabulary(object)
#
return vocab
You can do this with plone.formwidget.masterselect
Like this (untested, but gives you an idear of how it works):
from zope import schema
from plone.supermodel import model
from plone.formwidget.masterselect import _
from plone.formwidget.masterselect import MasterSelectBoolField
from plone.formwidget.masterselect import MasterSelectField
def getTownshipDynVocab(master):
CountryID = master_value
# search for your township entries by CountryID
# and return it as a DisplayList
return townshipDynamicVocab
class IESurrenderSite(model.Schema):
county = MasterSelectField(
title=_(u"County"),
source=countySource,
slave_fields=(
# Controls the vocab of township
{'name': 'township',
'action': 'vocabulary',
'vocab_method': getTownshipDynVocab,
},
),
required=True,
)
township = schema.Set(
title=_(u"Township"),
value_type=schema.Choice(),
required=False,
)
How do I render glyphs in pyqt using the svggraphicsItem?
Recently I found that svg files generated by Cairo do not plot properly in pyqt. The error comes from the use of glyphs which seem not to be shown in pyqt (this might be wrong but I couldn't find any way of getting glyphs to render).
I ended up writing a set of functions that will convert the glyphs to svg paths so the file will render normally.
These could still use some improvements for rendering color and other style elements (which are locked in the functions that I wrote).
These functions will need to be embedded in a class or have self removed to be used elsewhere.
I just wanted people to have these so they wouldn't have to search high and low like I did to find a way to render glyphs in pyqt.
Hope for the best,
Kyle
def convertSVG(self, file):
dom = self._getsvgdom(file)
print dom
self._switchGlyphsForPaths(dom)
self._commitSVG(file, dom)
def _commitSVG(self, file, dom):
f = open(file, 'w')
dom.writexml(f)
f.close()
def _getsvgdom(self, file):
print 'getting DOM model'
import xml.dom
import xml.dom.minidom as mini
f = open(file, 'r')
svg = f.read()
f.close()
dom = mini.parseString(svg)
return dom
def _getGlyphPaths(self, dom):
symbols = dom.getElementsByTagName('symbol')
glyphPaths = {}
for s in symbols:
pathNode = [p for p in s.childNodes if 'tagName' in dir(p) and p.tagName == 'path']
glyphPaths[s.getAttribute('id')] = pathNode[0].getAttribute('d')
return glyphPaths
def _switchGlyphsForPaths(self, dom):
glyphs = self._getGlyphPaths(dom)
use = self._getUseTags(dom)
for glyph in glyphs.keys():
print glyph
nl = self.makeNewList(glyphs[glyph].split(' '))
u = self._matchUseGlyphs(use, glyph)
for u2 in u:
print u2, 'brefore'
self._convertUseToPath(u2, nl)
print u2, 'after'
def _getUseTags(self, dom):
return dom.getElementsByTagName('use')
def _matchUseGlyphs(self, use, glyph):
matches = []
for i in use:
print i.getAttribute('xlink:href')
if i.getAttribute('xlink:href') == '#'+glyph:
matches.append(i)
print matches
return matches
def _convertUseToPath(self, use, strokeD):
## strokeD is a list of lists of strokes to make the glyph
newD = self.nltostring(self.resetStrokeD(strokeD, use.getAttribute('x'), use.getAttribute('y')))
use.tagName = 'path'
use.removeAttribute('xlink:href')
use.removeAttribute('x')
use.removeAttribute('y')
use.setAttribute('style', 'fill: rgb(0%,0%,0%); stroke-width: 0.5; stroke-linecap: round; stroke-linejoin: round; stroke: rgb(0%,0%,0%); stroke-opacity: 1;stroke-miterlimit: 10; ')
use.setAttribute('d', newD)
def makeNewList(self, inList):
i = 0
nt = []
while i < len(inList):
start = i + self.listFind(inList[i:], ['M', 'L', 'C', 'Z'])
end = start + self.listFind(inList[start+1:], ['M', 'L', 'C', 'Z', '', ' '])
nt.append(inList[start:end+1])
i = end + 1
return nt
def listFind(self, x, query):
for i in range(len(x)):
if x[i] in query:
return i
return len(x)
def resetStrokeD(self, strokeD, x, y):
nsd = []
for i in strokeD:
nsd.append(self.resetXY(i, x, y))
return nsd
def resetXY(self, nl, x, y): # convert a list of strokes to xy coords
nl2 = []
for i in range(len(nl)):
if i == 0:
nl2.append(nl[i])
elif i%2: # it's odd
nl2.append(float(nl[i]) + float(x))
elif not i%2: # it's even
nl2.append(float(nl[i]) + float(y))
else:
print i, nl[i], 'error'
return nl2
def nltostring(self, nl): # convert a colection of nl's to a string
col = []
for l in nl:
templ = []
for c in l:
templ.append(str(c))
templ = ' '.join(templ)
col.append(templ)
return ' '.join(col)