Extracting and matching Keys/Values/Words etc from OCR JSON - python

I'm trying to match KEY_VALUE_SET info to it's real names. This is from Textract already (JSON).
KEY:
KEY_VALUE_SET "Key" VALUE ID > KEY_VALUE_SET "Child" multiple CHILD ID > WORD ID > WORD TEXT
VALUE:
KEY_VALUE_SET "Key" VALUE ID > KEY_VALUE_SET "Value" CHILD ID > WORD ID > WORD TEXT
I'm just starting on this with very basic Python knowledge.
element = ""
childIds = ""
valueIds = ""
keyResult = ""
for item in response["Blocks"]:
if item["BlockType"] == "KEY_VALUE_SET":
if item["Confidence"]:
keyValueSetId = "KeyValueSetID: " + item['Id']
for val in item["EntityTypes"]:
if val == "KEY":
for i in item["Relationships"]:
if i["Type"] == "VALUE":
for i in i["Ids"]:
count = 0
valueId = "ValueId" + str(count) + ": " + i
valueIds += valueId + " "
count +1
elif i["Type"] == "CHILD":
for i in i["Ids"]:
count = 0
childId = "ChildId" + str(count) + ": " + i
childIds += childId + " "
count +1

Related

Python reverse each word in a sentence without inbuilt function python while preserve order

Not allowed to use "Split(),Reverse(),Join() or regexes" or any other
helping inbuilt python function
input something like this:
" my name is scheven "
output like this:
"ym eman si nevehcs"
you need to consider removing the starting,inbetween,ending spaces aswell in the input
I have tried 2 tries, both failed i will share my try to solve this and maby an idea to improve it
First try:
def reverseString(someString):
#lenOfString = len(someString)-1
emptyList = []
for i in range(len(someString)):
emptyList.append(someString[i])
lenOfString = len(emptyList)-1
counter = 0
while counter < lenOfString:
if emptyList[counter] == " ":
counter+=1
if emptyList[lenOfString] == " ":
lenOfString-=1
else:
swappedChar = emptyList[counter]
emptyList[counter] = emptyList[lenOfString]
emptyList[lenOfString] = swappedChar
counter+=1
lenOfString-=1
str_contactantion = ""
#emptyList = emptyList[::-1]
#count_spaces_after_letter=0
for letter in emptyList:
if letter != " ":
str_contactantion+=letter
#str_contactantion+=" "
str_contactantion+=" "
return str_contactantion
second try:
def reverse(array, i, j):
emptyList = []
if (j == i ):
return ""
for k in range(i,j):
emptyList.append(array[k])
start = 0
end = len(emptyList) -1
if start > end: # ensure i <= j
start, end =end, start
while start < end:
emptyList[start], emptyList[end] = emptyList[end], emptyList[start]
start += 1
end -= 1
strconcat=""
for selement in emptyList:
strconcat+=selement
return strconcat
def reverseStr(someStr):
start=0
end=0
help=0
strconcat = ""
empty_list = []
for i in range(len(someStr)):
if(someStr[i] == " "):
continue
else:
start = i
j = start
while someStr[j] != " ":
j+=1
end = j
#if(reverse(someStr,start,end) != ""):
empty_list.append(reverse(someStr,start,end))
empty_list.append(" ")
for selement in empty_list:
strconcat += selement
i = end + 1
return strconcat
print(reverseStr(" my name is scheven "))
The following works without managing indices:
def reverseString(someString):
result = crnt = ""
for c in someString:
if c != " ":
crnt = c + crnt # build the reversed current token
elif crnt: # you only want to do anything for the first space of many
if result:
result += " " # append a space first
result += crnt # append the current token
crnt = "" # and reset it
if crnt:
result += " " + crnt
return result
reverseString(" my name is scheven ")
# 'ym eman si nevehcs'
Try this:
def reverseString(someString):
result = ""
word = ""
for i in (someString + " "):
if i == " ":
if word:
result = result + (result and " ") + word
word = ""
else:
word = i + word
return result
You can then call it like this:
reverseString(" my name is scheven ")
# Output: 'ym eman si nevehcs'
Try this:
string = " my name is scheven "
def reverseString(someString):
result = ''
curr_word = ''
for i in someString:
if i == ' ':
if curr_word:
if result:
result = f'{result} {curr_word}'
else:
result = f'{result}{curr_word}'
curr_word = ''
else:
curr_word = f'{i}{curr_word}'
return result
print(repr(reverseString(string)))
Output:
'ym eman si nevehcs'
Note: if you're allowed to use list.append method, I'd suggest using a collections.deque as it's more performant than appending to a list. But of course, in the end you'll need to join the list together, and you mentioned that you're not allowed to use str.join, so that certainly poses an issue.

Rosalind Consensus and Profile Problem code doesn't work

This is the problem: http://rosalind.info/problems/cons/
def file_read(fname):
with open(fname, "r") as myfile:
global data
data = myfile.readlines()
print(data)
i = 0
while i < len(data):
data[i] = data[i].replace("\n", "")
if ">" in data[i]:
data.remove(data[i])
else:
i += 1
file_read('rosalind_cons.txt')
res = ["".join(el) for el in zip(*data)]
print(res)
a_str = ""
c_str = ""
g_str = ""
t_str = ""
for x in range(0, len(res)):
a_str += (str(res[x].count("A"))) + " "
for x in range(0, len(res)):
c_str += (str(res[x].count("C"))) + " "
for x in range(0, len(res)):
g_str += (str(res[x].count("G"))) + " "
for x in range(0, len(res)):
t_str += (str(res[x].count("T"))) + " "
a_str_nospace = a_str.replace(" ", "")
c_str_nospace = c_str.replace(" ", "")
g_str_nospace = g_str.replace(" ", "")
t_str_nospace = t_str.replace(" ", "")
consensus_string = ""
for x in range(0, len(a_str_nospace)):
if max(a_str_nospace[x], c_str_nospace[x], g_str_nospace[x], t_str_nospace[x]) in a_str_nospace[x]:
consensus_string += "A"
elif max(a_str_nospace[x], c_str_nospace[x], g_str_nospace[x], t_str_nospace[x]) in c_str_nospace[x]:
consensus_string += "C"
elif max(a_str_nospace[x], c_str_nospace[x], g_str_nospace[x], t_str_nospace[x]) in g_str_nospace[x]:
consensus_string += "G"
elif max(a_str_nospace[x], c_str_nospace[x], g_str_nospace[x], t_str_nospace[x]) in t_str_nospace[x]:
consensus_string += "T"
print(consensus_string)
print("A: " + a_str)
print("C: " + c_str)
print("G: " + g_str)
print("T: " + t_str)
What's wrong with my code?
For the sample output it works but for the larger datasets it doesn't.
I don't know what is wrong, I think it's the file reading part that's not correct (maybe?)
EDIT: There are some print functions in there but I don't copy them in the answer box so they don't matter in the result
nice to see a fellow Rosalind user. I discovered that page when I studied Bioinformatics and just stumbled upon it again last month.
To answer your question:
You're creating a string of numbers, so that works fine if the numbers are all below 10.
Try building a list of integers first and only convert them to a string in the final step.

How to reverse particular string in an input string and return the whole string, with reversed part?

I am having problems with what was said in the title. Basically I am given sentences which contain addresses - I am to reverse only the address in the sentence and return the string. I can reverse the address fine but I am having troubles returning the whole string. My code: ( edit :now corrected):
def problem3(searchstring):
"""
Garble Street name.
:param searchstring: string
:return: string
"""
flag = 0
output = ""
#each word is considered in loop
for i in searchstring.split():
if i.endswith('.'): #if the word ends with .
flag = 0
stype = i
output += " " + stype
elif flag == 1: #if the flag is 1
#street =
output += " " + i[::-1]
elif i.isdigit(): #if the word is digit
flag =1
#num = i
output += i
else:
output += i + " "
#address = num + " " + street + " " + stype
return output
Try this
def problem3(searchstring):
"""
Garble Street name.
:param searchstring: string
:return: string
"""
flag = 0
street = ""
stri=""
#each word is considered in loop
for i in searchstring.split():
if i.endswith('.'): #if the word ends with .
flag = 0
stype = i
continue
if flag == 1: #if the flag is 1
street = street + " " + i[::-1]
continue
if i.isdigit(): #if the word is digit
flag =1
num = i
continue
stri=stri+' '+i
address =stri+" "+ num + " " + street + " " + stype
return address
Then if you call the function:
print(problem3('The EE building is at 465 Northwestern Ave.'))
print(problem3('Meet me at 201 South First St. at noon'))
output will be
The EE building is at 465 nretsewhtroN Ave.
Meet me at at noon 201 htuoS tsriF St.
Instead of using 'if' several times you can use the below code or you can use continue also:
def problem3(searchstring):
"""
Garble Street name.
:param searchstring: string
:return: string
"""
flag = 0
address = ""
#each word is considered in loop
for i in searchstring.split():
if i.endswith('.'): #if the word ends with .
flag = 0
stype = i
address += " " + stype
elif flag == 1: #if the flag is 1
#street =
address += " " + i[::-1]
elif i.isdigit(): #if the word is digit
flag =1
#num = i
address += i
else:
address += i + " "
#address = num + " " + street + " " + stype
return address
print(problem3('The EE building is at 465 Northwestern Ave.'))

Reading YAML files and accessing lists

I am currently reading data from a .yml file. Inside the file is the following part for every main entry:
- !
name: Martial Focus
prerequisites:
tier1:
any:
Attribute:
- Attribute1:§ 1
- Attribute2:§ 1
Feat:
- Feat1
Other:
- Other Prerequisites
cost:
- 3
description: |
[...]
effect: |
[...]
I've been able to read all the data, including 'prerequisites', but here I have a special problem:
Where with the other data, I was able to access sublists it seems to be different for this:
The "any:" part is optional, so it could also say something like
prerequisites:
tier1:
Attribute:
- Attribute1:§ 1
- Attribute2:§ 1
Feat:
- Feat1
Other:
- Other Prerequisites
Reading the .yml file converts the part above to
'prerequisites': {
'tier1': {
'any': {
'Attribute': ['Attribute1:§ 1', 'Attribute2:§ 1'],
'Feat': ['Feat1'],
'Other': ['Other Prerequisites']
}
}
}
So in my code, for every "tierX", I check if it contains a key "any:" via
if 'any' in tier:
# do the stuff to be done if 'any' exists
else:
# do the stuff to be done if it doesn't
But it never seems to be true. Since "Attribute:", "Feat:" and "Other:" are also optional, I do the same for those inside the if-else-statement and it's the same problem with them though for those there's no else-statement.
Below you can find the code I'm using. It won't be the prettiest since I litterally started with python today but I hope that you'll help me anyway:
prerequisites = ""
tierNum = 0
for tier in data['prerequisites']:
tierNum += 1
thisTier = ""
if 'any' in tier:
print("'any' found!")
content = tier['any']
if 'Other' in content:
other = ""
for s2 in content['Other'][:-1]:
other += s2 + ", "
thisTier += "**" + other
if len(content['Other'][:-1]) == 0:
thisTier += str(content['Other'][-1:])
else:
thisTier += "or " + str(content['Other'][-1:])
if 'Attribute' in content:
attributes = ""
for s2 in content['Attribute'][:-1]:
attributes += s2 + ", "
if thisTier.length() == 0:
thisTier += "**" + attributes
else:
thisTier += ", or " + attributes
if len(content['Attribute'][:-1]) == 0:
thisTier += str(content['Attribute'][-1:])
else:
thisTier += "or " + str(content['Attribute'][-1:])
if 'Feat' in content:
feats = ""
for s2 in content['Feat'][:-1]:
feats += s2 + ", "
if thisTier.length() == 0:
thisTier += "**" + feats
else:
thisTier += ", or " + feats
if len(content['Feat'][:-1]) == 0:
thisTier += str(content['Feat'][-1:])
else:
thisTier += "or " + str(content['Feat'][-1:])
else:
content = tier
if 'Other' in content:
other = ""
for s2 in content['Other'][:-1]:
other += s2 + ", "
thisTier += "**" + other
if len(content['Other'][:-1]) == 0:
thisTier += str(content['Other'][-1:])
else:
thisTier += "or " + str(content['Other'][-1:])
if 'Attribute' in content:
attributes = ""
for s2 in content['Attribute'][:-1]:
attributes += s2 + ", "
thisTier += "**" + attributes
if len(content['Attribute'][:-1]) == 0:
thisTier += str(content['Attribute'][-1:])
else:
thisTier += "or " + str(content['Attribute'][-1:])
if 'Feat' in content:
feats = ""
for s2 in content['Feat'][:-1]:
feats += s2 + ", "
thisTier += "**" + feats
if len(content['Feat'][:-1]) == 0:
thisTier += str(content['Feat'][-1:])
else:
thisTier += "or " + str(content['Feat'][-1:])
prerequisites += "*Tier {0}:\n{1}\n".format(tierNum, thisTier)
prerequisites = prerequisites[:-1]
I'm doing stuff like the content['Feat'][:-1] in order to get every element except the last so I can add a ", or " in front of the last element, should there be more than one.
EDIT:
My desired Output would be something like:
Prerequisites:
*Tier 1:
**Attribute1 1, or Attribute2 1
**Feat1
**Other Prerequisites
If no any exists and
Prerequisites:
*Tier 1:
**Attribute1 1, or Attribute2 1, or Feat1, or Other Prerequisites
if it doesn't
Your problem is that for tier in data["predicates"] iterates over the keys of the predicate dictionary, thus the subsequent if "any" in tier actually evaluates "any" in "tier1" which is of cause always false.
What you want to test here is "any" in data["predicates"]["tier1"]. When working with dictionaries (i.e. mappings) you have to differentiate between a key and its corresponding value.
Interestingly you have gotten it right for the next level down:
# ...
content = tier['any']
if 'Other' in content:
other = ""
for s2 in content['Other']:
# ...
Ways to iterate over a dictionary
d = {"key1":"value1", "key2":"value2", "key3":"value3"}
for key in d:
print(key)
# prints key1, key2, key3
for key in d.keys():
print(key)
# prints key1, key2, key3
for value in d.values():
print(value)
# prints value1, value2, value3
for item in d.items():
print(item)
# prints (key1,value1), (key2,value2), (key3,value3)
for key, value in d.items():
print(key)
print(value)
# prints key1, value1, key2, value2, key3, value3
see python documentation here and here
As you are new to Python and do not know what is possible, allow me to present you a much more elegant solution with out all the repetitive string operations:
import yaml
yamldata1 = r"""
- !
name: Martial Focus
prerequisites:
tier1:
any:
Attribute:
- Attribute1:§ 1
- Attribute2:§ 1
Feat:
- Feat1
Other:
- Other Prerequisites
cost:
- 3
description: |
[...]
effect: |
[...]
"""
yamldata2 = r"""
- !
name: Martial Focus
prerequisites:
tier1:
Attribute:
- Attribute1:§ 1
- Attribute2:§ 1
Feat:
- Feat1
Other:
- Other Prerequisites
cost:
- 3
description: |
[...]
effect: |
[...]
"""
def process(data):
output = ""
for tier_name, tier in data['prerequisites'].items():
output += f"* {tier_name}"
if 'any' in tier:
content = tier['any']
prerequisites = content.get('Other', []) + content.get('Attribute', []) + content.get('Feat', [])
if prerequisites:
output += "\n** " + " or ".join(prerequisites)
else:
content = tier
prerequisites = [content.get('Other', []), content.get('Attribute', []), content.get('Feat', [])]
for subset in prerequisites:
if subset:
output += "\n** " + " or ".join(subset)
return output
data = yaml.load(yamldata1)[0]
print(process(data))
print('#'*10)
data = yaml.load(yamldata2)[0]
print(process(data))

Index error- Works in one part of my code, not another

I keep getting an index error:
User_input is 0
Key is 0
Traceback (most recent call last):
File "C:\Users\Theo_2\Google Drive\Computer science\Encryption and decryption work\Cipher 2\Cipher 2.5.0 alpha1.py", line 62, in <module>
Test_algorithm(5)
File "C:\Users\Theo_2\Google Drive\Computer science\Encryption and decryption work\Cipher 2\Cipher 2.5.0 alpha1.py", line 49, in Test_algorithm
Encrypt(User_input)
File "C:\Users\Theo_2\Google Drive\Computer science\Encryption and decryption work\Cipher 2\Cipher 2.5.0 alpha1.py", line 21, in Encrypt
ref_for_output = Master_Key.index(User_input[Count]) + Master_Key.index(Key[Count])
IndexError: string index out of range
The User_input is 0 part is me testing
Basically the following code is a rewrite of something I wrote to encrypt and decrypt using a keyword. This version isn't finished, all my functions are defined, including one to test every possible value, but I haven't started any interface yet.
Anyway my code is as follows:
import time
Master_Key = "0123456789 abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#£$%&'()*+,-./:;?#[\\]^_`{|}~\t\n\r\x0b\x0c"
global Output
Output = ""
global Key
Key = ""
def Compatibility_check(Key):
while Key == "":
print("Your key cannot be blank")
Key = input("Please input a new key: ")
while len(Key) > len(User_input):
Key = Key[:-1]
while len(Key) < len(User_input):
Key += (Key[Temp])
Temp += 1
def Encrypt(User_input):
Count = 0
Output = ""
while Count <= len(User_input):\
ref_for_output = Master_Key.index(User_input[Count]) + Master_Key.index(Key[Count])
if ref_for_output > len(Master_Key):
ref_for_output -= len(Master_Key)
Output += Master_Key[ref_for_output]
Count += 1
print(Output)
def Decrypt(User_input):
Count = 0
Output = ""
while Count <= len(User_input):
ref_for_output = Master_Key.index(User_input[Count]) - Master_Key.index(Key[Count])
if ref_for_output < 0:
ref_for_output += len(Master_Key)
Count += 1
Output += Master_Key[ref_for_output]
def Test_algorithm(Null):
fail, Input_Counter = False, 0
while Input_Counter <= len(Master_Key):
User_input = Master_Key[Input_Counter]
print("User_input is " + User_input)
Key_Counter = 0
Input_Counter += 1
while fail == False:
while Key_Counter < len(Master_Key):
Key = Master_Key[Key_Counter]
print("Key is " + Key)
Encrypt(User_input)
print("The encrypted value is " + Output)
Decrypt(Output)
print("The decrypted value is " + Output)
if Output == User_input:
print("The encryption and decryption of " + str(User_input) + " with the key " + str(Key_Counter) + " results in " + Output)
print("pass")
else:
print("fail")
print("The encryption and decryption of " + str(User_input) + " with the key " + str(Key_Counter) + " results in " + Output)
fail = True
Key_Counter += 1
Test_algorithm(Nothing)
##Key = "abcdefghijklmnop"
##User_input = "12345"
##Compatibility_check(Key)
##Encrypt(User_input)
##print(Output)
This is a refinement of an old script, which worked but could really do with improvement. The reason I am rewriting it is that I want to output encrypted values to a file and then re-read and decrypt them upon user request.
I hope you can help- Thanks.
Looks like the variable User_input is the string "0" and therefore this fails when Count is 1 since the index starts at 0 and User_input[1] is the second letter of the string.
You probable want to start by changing this:
while Count <= len(User_input):
to this:
while Count < len(User_input):

Categories