Store results of nested for loop as single concatenated string - python

I'm trying to store the values of the function below to a single string that I can input into a query leveraging an F-string. The output looks correct but is really just a few separated print statements.
How can I store the output of the below to a single string?
import pandas as pd
view_dict = [{'id':'168058','viewtime_min':'2023-01-26 21:00:59.435 -0600','viewtime_max':'2023-01-26 21:59:59.435 -0600'},
{'id':'167268','viewtime_min':'2023-01-26 21:59:59.435 -0600','viewtime_max':'2023-01-26 21:59:59.435 -0600'},
{'id':'167268','viewtime_min':'2023-01-26 21:59:59.435 -0600','viewtime_max':'2023-01-26 21:59:59.435 -0600'}]
def get_where_clause(view_dictionary: dict):
where_clause = " "
for index in range(len(view_dictionary)):
if index != max(range(len(view_dictionary))):
print(f'''(b.id = {view_dictionary[index]['id']}
and b.viewed_at between coalesce({view_dictionary[index]['viewtime_min']},published_at) and {view_dictionary[index]['viewtime_max']})
or''')
else:
print(f'''(b.id = {view_dictionary[index]['id']}
and b.viewed_at between coalesce({view_dictionary[index]['viewtime_min']},published_at) and {view_dictionary[index]['viewtime_max']})''')
x = get_where_clause(view_dict)
x
I'm expecting this to store to a value but when accessing the value nothing is stored.

You aren't actually returning or storing anything, print simply writes to the console. Ideally, you'd collect these into something like a list to be returned, which you can then use str.join to concatenate:
view_dict = [{'id':'168058','viewtime_min':'2023-01-26 21:00:59.435 -0600','viewtime_max':'2023-01-26 21:59:59.435 -0600'},
{'id':'167268','viewtime_min':'2023-01-26 21:59:59.435 -0600','viewtime_max':'2023-01-26 21:59:59.435 -0600'},
{'id':'167268','viewtime_min':'2023-01-26 21:59:59.435 -0600','viewtime_max':'2023-01-26 21:59:59.435 -0600'}]
def get_where_clause(view_dictionary: dict):
# I've changed this to a list
where_clause = []
for index in range(len(view_dictionary)):
if index != max(range(len(view_dictionary))):
where_clause.append(f'''(b.id = {view_dictionary[index]['id']}
and b.viewed_at between coalesce({view_dictionary[index]['viewtime_min']},published_at) and {view_dictionary[index]['viewtime_max']})
or''')
else:
where_clause.append(f'''(b.id = {view_dictionary[index]['id']}
and b.viewed_at between coalesce({view_dictionary[index]['viewtime_min']},published_at) and {view_dictionary[index]['viewtime_max']})''')
# join together here
return ' '.join(where_clause)
x = get_where_clause(view_dict)
print(x)
I know it isn't asked for, but this could be cleaned up a little more with some basic iteration techniques:
def get_where_clause(view_dictionary: list):
# I've changed this to a list
where_clause = []
# grab the last element and iterate over a slice
# rather than checking an index
last = view_dictionary[-1]
# iterate over the views directly, don't use an index
for item in view_dictionary[:-1]:
where_clause.append(f'''(b.id = {item['id']}
and b.viewed_at between coalesce({item['viewtime_min']},published_at) and {item['viewtime_max']})
or''')
where_clause.append(f'''(b.id = {last['id']}
and b.viewed_at between coalesce({last['viewtime_min']},published_at) and {last['viewtime_max']})''')
# join together here
return ' '.join(where_clause)
And to simplify formatting, you can indent a single string by using parentheses:
for item in view_dictionary:
where_clause.append(
f"(b.id = {item['id']} "
"and b.viewed_at between "
f"coalesce({item['viewtime_min']},published_at) "
f"and {item['viewtime_max']})"
)
# rather than checking for first/last, you can join on
# 'or'
return ' or '.join(where_clause)

The print command just writes the text to your screen, it does not return a value. Your function needs to return a value for it to be "storable" in a variable.
I rewrote, formatted and commented your code, it now returns a string containing the where clause:
view_dict = [
{
"id": "168058",
"viewtime_min": "2023-01-26 21:00:59.435 -0600",
"viewtime_max": "2023-01-26 21:59:59.435 -0600",
},
{
"id": "167268",
"viewtime_min": "2023-01-26 21:59:59.435 -0600",
"viewtime_max": "2023-01-26 21:59:59.435 -0600",
},
{
"id": "167268",
"viewtime_min": "2023-01-26 21:59:59.435 -0600",
"viewtime_max": "2023-01-26 21:59:59.435 -0600",
},
]
def get_where_clause(view_dictionary: list[dict]) -> str:
# This is where all the outputs will be stored
outputs = []
# Loop over the view_dictionary, which is actually a list of dicts
for vd in view_dictionary:
# Just for readability: get the relevant parts out of each dict
bid = vd['id']
tmin = vd['viewtime_min']
tmax = vd['viewtime_max']
# Place everything in an f-string
output = f"(b.id = {bid} and b.viewed_at between coalesce('{tmin}',published_at) and '{tmax}') or"
# Print, just for fun, you won't use this output
print(output)
# Place every output in the outputs list
outputs.append(output)
# Create the actual WHERE clause by concatenating all outputs into a single string
# the [:-3] makes sure the last 'or' keyword is removed
where_clause = " ".join(outputs)[:-3]
return where_clause
x = get_where_clause(view_dict)

Related

How to reduce called paramets in methods?

I have a class and in that class I have a method that calls multiple methods in it.
But the problem I am facing now is that when the method with the multiple methods in it duplicate parameter has.
And so when I am calling the method with the multiple methods in it, it returns a empty list:[].
So this is the method with the multiple methods in it:
def show_extracted_data_from_file(self, file_name):
self.extractingText.extract_text_from_image(file_name)
total_fruit = self.filter_verdi_total_number_fruit()
fruit_name = self.filter_verdi_fruit_name()
fruit_total_cost = self.filter_verdi_total_fruit_cost(file_name)
return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))
and this is the method: filter_verdi_total_fruit_cost:
def filter_verdi_total_fruit_cost(self, file_name):
locale.setlocale(locale.LC_ALL, locale='Dutch')
self.extractingText.extract_text_from_image(file_name)
return [
locale.atof(items[-1]) for items in (
token.split() for token in file_name.split('\n')
) if len(items) > 2 and items[1] in self.extractingText.list_fruit
]
this method returns the following data:
[123.2, 2772.0, 46.2, 577.5, 69.3, 3488.16, 137.5, 500.0, 1000.0, 2000.0, 1000.0, 381.25]
You see that I am calling two times file_name.
and so when I calling the method show_extracted_data_from_file in the views.py:
if uploadfile.image.path.endswith('.pdf'):
content = filter_text.show_extracted_data_from_file(uploadfile.image.path)
print(content)
it produces a empty list: []
Question: how can I reduce the parameter file_name so that it will return the correct results?
this are my two other methods that I am calling in the combined method:
def filter_verdi_total_number_fruit(self):
regex = r"(\d*(?:\.\d+)*)\s*\W+(?:" + '|'.join(re.escape(word)
for word in self.extractingText.list_fruit) + ')'
return re.findall(regex, self.extractingText.text_factuur_verdi[0])
def filter_verdi_fruit_name(self):
regex = r"(?:\d*(?:\.\d+)*)\s*\W+(" + '|'.join(re.escape(word)
for word in self.extractingText.list_fruit) + ')'
return re.findall(regex, self.extractingText.text_factuur_verdi[0])
So this is the other class:
class ExtractingTextFromFile:
def extract_text_from_image(self, filename):
self.text_factuur_verdi = []
pdf_file = wi(filename=filename, resolution=300)
all_images = pdf_file.convert('jpeg')
for image in all_images.sequence:
image = wi(image=image)
image = image.make_blob('jpeg')
image = Image.open(io.BytesIO(image))
text = pytesseract.image_to_string(image, lang='eng')
self.text_factuur_verdi.append(text)
return self.text_factuur_verdi
def __init__(self):
# class variables:
self.tex_factuur_verdi = []
self.list_fruit = ['Appels', 'Ananas', 'Peen Waspeen',
'Tomaten Cherry', 'Sinaasappels',
'Watermeloenen', 'Rettich', 'Peren', 'Peen',
'Mandarijnen', 'Meloenen', 'Grapefruit', 'Rettich']
#AndrewRyan has the right idea.
I presume calling extract_text_from_image just adds the attribute list_fruit
Two routes you can go, from what you are commenting you'll probably just go with #1.. but I gave #2 as another option in case you'd ever want to call filter_verdi_total_fruit_cost by itself.
Path 1, Just remove it.
Note: filter_verdi_total_fruit_cost is only called from show_extracted_data_from_file.
def show_extracted_data_from_file(self, file_name):
# extract text
# Note: stores data in `self.extractingText.list_fruit`
self.extractingText.extract_text_from_image(file_name)
total_fruit = self.filter_verdi_total_number_fruit()
fruit_name = self.filter_verdi_fruit_name()
fruit_total_cost = self.filter_verdi_total_fruit_cost()
return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))
def filter_verdi_total_fruit_cost(self):
# Note: `self.extractingText.list_fruit` should be already defined
locale.setlocale(locale.LC_ALL, locale='Dutch')
return [
locale.atof(items[-1]) for items in (
token.split() for token in file_name.split('\n')
) if len(items) > 2 and items[1] in self.extractingText.list_fruit
]
Path 2, Check if it's already extracted- if not, extract; if so, continue
Note: if you wanted to just call filter_verdi_total_fruit_cost
def show_extracted_data_from_file(self, file_name):
# extract text
# Note: stores data in `self.extractingText.list_fruit`
self.extractingText.extract_text_from_image(file_name)
total_fruit = self.filter_verdi_total_number_fruit()
fruit_name = self.filter_verdi_fruit_name()
fruit_total_cost = self.filter_verdi_total_fruit_cost(file_name)
return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))
def filter_verdi_total_fruit_cost(self, file_name):
locale.setlocale(locale.LC_ALL, locale='Dutch')
if not hasattr(self, 'list_fruit'):
# file hasn't been extracted yet.. extract it
# Note: stores data in `self.extractingText.list_fruit`
self.extractingText.extract_text_from_image(file_name)
return [
locale.atof(items[-1]) for items in (
token.split() for token in file_name.split('\n')
) if len(items) > 2 and items[1] in self.extractingText.list_fruit
]

How to remove bracket from JSON in Flask

I want to remove one fo them bracket from JSON in Flask. I want to get result from database and convert to JSON.
i=0
a = []
for food in sorted_similar_food:
if i==0:
i = i+1
else:
name = get_title_from_index(food[0])
name = str(name)
db_cursor.execute("SELECT * FROM recipe where name = " + "'" + name + "'")
r = [dict((db_cursor.description[i][0], value)
for i, value in enumerate(row)) for row in db_cursor.fetchall()]
a.append( r)
return jsonify({'cursor': a})
AND my result JSON
{ "cursor": [
[
{
"id": 3,
"image": "https://firebasestorage.googleapis.com/v0/b/cricket-17449.appspot.com/o/manti.jpg?alt=media&token=d11a65ec-6486-4b24-a54a-2840ce4fdc",
"ind": "kiyma yumurta sogan un",
"name": "manti",
"recip": "Ge"
}
],
....
]}
There are two brackets. I want to one of them. I should remove one of them.
You start with an empty list a=[]. Inside your loop, you append a list of DB results (r is another list). What you want to do is a.extend(r). That way the elements inside r are appended to a and you end up with just the a list.

create dataframe by Iterating upto nth level of values in nested dictionary

I have a json file downloaded from this link/website human diseased icd-11 classification, this data have a upto 8 level of nesting e.g:
"name":"br08403",
"children":[
{
"name":"01 Certain infectious or parasitic diseases",
"children":[
{
"name":"Gastroenteritis or colitis of infectious origin",
"children":[
{
"name":"Bacterial intestinal infections",
"children":[
{
"name":"1A00 Cholera",
"children":[
{
"name":"H00110 Cholera"
}
I tried with this code:
def flatten_json(nested_json):
"""
Flatten json object with nested keys into a single level.
Args:
nested_json: A nested json object.
Returns:
The flattened json object if successful, None otherwise.
"""
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(nested_json)
return out
df2 = pd.Series(flatten_json(dictionary)).to_frame()
output i'm getting is:
name br08403
children_0_name 01 Certain infectious or parasitic diseases
children_0_children_0_name Gastroenteritis or colitis of infectious origin
children_0_children_0_children_0_name Bacterial intestinal infections
children_0_children_0_children_0_children_0_name 1A00 Cholera
... ...
children_21_children_17_children_10_name NF0A Certain early complications of trauma, n...
children_21_children_17_children_11_name NF0Y Other specified effects of external causes
children_21_children_17_children_12_name NF0Z Unspecified effects of external causes
children_21_children_18_name NF2Y Other specified injury, poisoning or cer...
children_21_children_19_name NF2Z Unspecified injury, poisoning or certain..
but the desired output is a dataframe with 8 columns which can accommodate the last depth of the nested name key e.g. something like this:
I would really appreciate any help
code tried for extracting the 'name' property by created a dataframe as follows:
with open('br08403.json') as f:
d = json.load(f)
df2 = pd.DataFrame(d)
data = []
for a in range(len(df2)):
# print(df2['children'][a]['name'])
data.append(df2['children'][a]['name'])
for b in range(len(df2['children'][a]['children'])):
# print(df2['children'][a]['children'][b]['name'])
data.append(df2['children'][a]['children'][b]['name'])
if len(df2['children'][a]['children'][b]) < 2:
print(df2['children'][a]['children'][b]['name'])
else:
for c in range(len(df2['children'][a]['children'][b]['children'])):
# print(df2['children'][a]['children'][b]['children'][c]['name'])
data.append(df2['children'][a]['children'][b]['children'][c]['name'])
if len(df2['children'][a]['children'][b]['children'][c]) < 2:
print(df2['children'][a]['children'][b]['children'][c]['name'])
else:
for d in range(len(df2['children'][a]['children'][b]['children'][c]['children'])):
# print(df2['children'][a]['children'][b]['children'][c]['children'][d]['name'])
data.append(df2['children'][a]['children'][b]['children'][c]['children'][d]['name'])
but i'm getting a plain list as follows:
['01 Certain infectious or parasitic diseases',
'Gastroenteritis or colitis of infectious origin',
'Bacterial intestinal infections',
'1A00 Cholera',
'1A01 Intestinal infection due to other Vibrio',
'1A02 Intestinal infections due to Shigella',
'1A03 Intestinal infections due to Escherichia coli',
'1A04 Enterocolitis due to Clostridium difficile',
'1A05 Intestinal infections due to Yersinia enterocolitica',
'1A06 Gastroenteritis due to Campylobacter',
'1A07 Typhoid fever',
'1A08 Paratyphoid Fever',
'1A09 Infections due to other Salmonella',....
A simple pandas only iterative approach.
res = requests.get("https://www.genome.jp/kegg-bin/download_htext?htext=br08403.keg&format=json&filedir=")
js = res.json()
df = pd.json_normalize(js)
for i in range(20):
df = pd.json_normalize(df.explode("children").to_dict(orient="records"))
if "children" in df.columns: df.drop(columns="children", inplace=True)
df = df.rename(columns={"children.name":f"level{i}","children.children":"children"})
if df[f"level{i}"].isna().all() or "children" not in df.columns: break

Python code produces incorrect results without using the .COPY() function

I have this code.
List1 = [{'pg_id': 100, "group_name": "test1", "product_price": 100}, {'pg_id': 200, "group_name": "test2", "product_price": 200}]
List2 = [{'lowest_price': 20}]
FINAL_DICT = {}
for latest_lowest in List1:
for idx, coupon_related__product in enumerate(List2):
if coupon_related__product['lowest_price'] < latest_lowest['product_price']:
FINAL_DICT[ latest_lowest['pg_id'] ] = coupon_related__product
FINAL_DICT[ latest_lowest['pg_id'] ]['group_name'] = latest_lowest['group_name']
print("Group name is %s"%(latest_lowest['group_name']))
for PG_ID, LOWEST_PRICED_PRODUCT_THIS_PG in FINAL_DICT.iteritems():
print(LOWEST_PRICED_PRODUCT_THIS_PG)
My desired output in terminal is
Group name is test1
Group name is test2
{'lowest_price': 20, 'group_name': 'test2'}
{'lowest_price': 20, 'group_name': 'test1'}
But it outputs
Group name is test1
Group name is test2
{'lowest_price': 20, 'group_name': 'test2'}
{'lowest_price': 20, 'group_name': 'test2'}
If I change
FINAL_DICT[ latest_lowest['pg_id'] ] = coupon_related__product
to
FINAL_DICT[ latest_lowest['pg_id'] ] = coupon_related__product.copy()
Then it produces correct output which I want.
My question is, why all of the dictionaries in FINAL_DICT has group_name=test2 when I don't use .copy()
this line:
FINAL_DICT[ latest_lowest['pg_id'] ] = coupon_related__product
assigns a dictionary to your entry. The next line:
FINAL_DICT[ latest_lowest['pg_id'] ]['group_name'] = latest_lowest['group_name']
changes the group name, but the reference of the dict is the last dictionary you assigned to, so the reference is shared.
that's why you have to make an copy of the dict so references are independent:
FINAL_DICT[ latest_lowest['pg_id'] ] = coupon_related__product.copy()
now modifying FINAL_DICT[ latest_lowest['pg_id'] ]['group_name'] doesn't change the values of others FINAL_DICT[ latest_lowest['pg_id'] ]
In Python dictionaries are mutable objects and are therefore not copied by default. You can read more about this here: python dictionary passed as an input to a function acts like a global in that function rather than a local
If the whole pass by name vs pass by value vs pass by reference thing is too confusing then all you need to know is that Python does not normally make a copy of a list or dict when assigning values using =. When you want to make a copy of a list or dict you need to explicitly tell Python you would like a copy. So in summary
x = {"hello": "world"}
y = x
y["hello"] = "not world"
print("x is " + str(x))
print("y is " + str(y))
> x is {'hello': 'not world'}
> y is {'hello': 'not world'}
vs
x = {"hello": "world"}
y = x.copy()
y["hello"] = "not world"
print("x is " + str(x))
print("y is " + str(y))
> x is {'hello': 'world'}
> y is {'hello': 'not world'}

Get correct brace grouping from string

I have files with incorrect JSON that I want to start fixing by getting it into properly grouped chunks.
The brace grouping {{ {} {} } } {{}} {{{}}} should already be correct
How can I grab all the top-level braces, correctly grouped, as separate strings?
If you don't want to install any extra modules simple function will do:
def top_level(s):
depth = 0
start = -1
for i, c in enumerate(s):
if c == '{':
if depth == 0:
start = i
depth += 1
elif c == '}' and depth:
depth -= 1
if depth == 0:
yield s[start:i+1]
print(list(top_level('{{ {} {} } } {{}} {{{}}}')))
Output:
['{{ {} {} } }', '{{}}', '{{{}}}']
It will skip invalid braces but could be easily modified to report an error when they are spotted.
Using the regex module:
In [1]: import regex
In [2]: braces = regex.compile(r"\{(?:[^{}]++|(?R))*\}")
In [3]: braces.findall("{{ {} {} } } {{}} {{{}}}")
Out[3]: ['{{ {} {} } }', '{{}}', '{{{}}}']
pyparsing can be really helpful here. It will handle pathological cases where you have braces inside strings, etc. It might be a little tricky to do all of this work yourself, but fortunately, somebody (the author of the library) has already done the hard stuff for us.... I'll reproduce the code here to prevent link-rot:
# jsonParser.py
#
# Implementation of a simple JSON parser, returning a hierarchical
# ParseResults object support both list- and dict-style data access.
#
# Copyright 2006, by Paul McGuire
#
# Updated 8 Jan 2007 - fixed dict grouping bug, and made elements and
# members optional in array and object collections
#
json_bnf = """
object
{ members }
{}
members
string : value
members , string : value
array
[ elements ]
[]
elements
value
elements , value
value
string
number
object
array
true
false
null
"""
from pyparsing import *
TRUE = Keyword("true").setParseAction( replaceWith(True) )
FALSE = Keyword("false").setParseAction( replaceWith(False) )
NULL = Keyword("null").setParseAction( replaceWith(None) )
jsonString = dblQuotedString.setParseAction( removeQuotes )
jsonNumber = Combine( Optional('-') + ( '0' | Word('123456789',nums) ) +
Optional( '.' + Word(nums) ) +
Optional( Word('eE',exact=1) + Word(nums+'+-',nums) ) )
jsonObject = Forward()
jsonValue = Forward()
jsonElements = delimitedList( jsonValue )
jsonArray = Group(Suppress('[') + Optional(jsonElements) + Suppress(']') )
jsonValue << ( jsonString | jsonNumber | Group(jsonObject) | jsonArray | TRUE | FALSE | NULL )
memberDef = Group( jsonString + Suppress(':') + jsonValue )
jsonMembers = delimitedList( memberDef )
jsonObject << Dict( Suppress('{') + Optional(jsonMembers) + Suppress('}') )
jsonComment = cppStyleComment
jsonObject.ignore( jsonComment )
def convertNumbers(s,l,toks):
n = toks[0]
try:
return int(n)
except ValueError, ve:
return float(n)
jsonNumber.setParseAction( convertNumbers )
Phew! That's a lot ... Now how do we use it? The general strategy here will be to scan the string for matches and then slice those matches out of the original string. Each scan result is a tuple of the form (lex-tokens, start_index, stop_index). For our use, we don't care about the lex-tokens, just the start and stop. We could do: string[result[1], result[2]] and it would work. We can also do string[slice(*result[1:])] -- Take your pick.
results = jsonObject.scanString(testdata)
for result in results:
print '*' * 80
print testdata[slice(*result[1:])]

Categories