Python equivalent of Ruby's .find - python

I'm trying to implement the following Ruby method into a Python Method:
CF = {:metre=>{:kilometre=>0.001, :metre=>1.0, :centimetre=>100.0}, :litre=>{:litre=>1.0, :millilitre=>1000.0, :imperial_pint=>1.75975}}
def common_dimension(from, to)
CF.keys.find do |canonical_unit|
CF[canonical_unit].keys.include?(from) &&
CF[canonical_unit].keys.include?(to)
end
end
Which behaves like:
>> common_dimension(:metre, :centimetre)
=> :metre
>> common_dimension(:litre, :centimetre)
=> nil
>> common_dimension(:millilitre, :imperial_pint)
=> :litre
What is the "Pythonic" way to implement this?

Below code in python for your ruby logic.
CF={"metre":{"kilometre":0.001, "metre":1.0, "centimetre":100.0}, "litre":{"litre":1.0, "millilitre":1000.0, "imperial_pint":1.75975}}
def common(fr,to):
for key,value in CF.items():
if (fr in value) and (to in value):
return key
print(common('metre','centimdetre'))
metre
print(com('metre','centimdetre'))
None
******************
single line function
com = lambda x,y:[key for key,value in CF.items() if (x in value) and (y in value)]
print(com('metre','centimdetre'))
['metre']

Other option both for Ruby and Python.
For Ruby:
cf = {:metre=>{:kilometre=>0.001, :metre=>1.0, :centimetre=>100.0}, :litre=>{:litre=>1.0, :millilitre=>1000.0, :imperial_pint=>1.75975}}
from = :litre
to = :millilitre
cf.select { |k, v| ([from, to] - v.keys).empty? }.keys
#=> [:litre]
For Python:
CF = {'metre': {'kilometre': 0.001, 'metre': 1.0, 'centimetre': 100.0}, 'litre': {'litre': 1.0, 'millilitre': 1000.0, 'imperial_pint': 1.75975}}
_from = 'millilitre'
_to = 'imperial_pint'
res = [ k for k, v in CF.items() if not bool(set([_from, _to]) - set(v.keys())) ]
#=> ['litre']

Related

How would I extract all names of a similar type in a file using python and regex?

I have a file that is filled with cultures, and each culture has a unique set of surnames for the people of each culture. The problem I have is that there are numerous files, each of them with hundreds, if not thousands of names, so instead of gathering all of these files by hand, I would like to automate this task in a sense using python and regex.
Here is an example of the file's contents:
###Myrman###
360 = { # DUPLICATE §§§§§§
name="of Myr"
culture = myrman
}
300507 = {
name = "of Myr"
culture = myrman
}
300525 = {
name = "Trellos"
culture = myrman
}
300534 = {
name = "Uteuran"
culture = myrman
}
##Lysene##
1386 = {
name="Ormollen"
culture = lysene
coat_of_arms = {
template = 0
layer = {
texture = 14
texture_internal = 9
emblem = 0
color = 0
color = 0
color = 0
}
}
}
300505 = {
name = "of Lys"
culture = lysene
}
300523 = {
name = "Lohar"
culture = lysene
}
300532 = {
name = "Assadyrn"
culture = lysene
}
So as you can see, there are two types of cultures here, each with different surnames for people of the respective cultures. I want to take all of these different names, and sort them into different groups, that are also separated by commas and quotes. Here is an example of what I want to happen:
Myrman: ["of Myr", "of Myr", "Trellos", "Uteuran"]
Lysene: ["Ormollen", "of Lys", "Lohar", "Assadyrn"]
How would I go about doing this with python and it's regex library?
Ooh, a parser problem! Let's use the lark parser generator to figure this out.
First, let's create a syntax for our file - this is cobbled together based on the JSON parser example:
import lark
parser = lark.Lark(r"""
start: (term)*
term: key "=" value "\n"
key: CNAME | SIGNED_NUMBER
value: CNAME | SIGNED_NUMBER | ESCAPED_STRING | map
map: "{" (term)* "}"
%import common.CNAME
%import common.ESCAPED_STRING
%import common.SIGNED_NUMBER
%import common.WS
%ignore WS
%ignore /#.*/
""")
Pretty straightforward; the file is a list of terms, which are key-values, where the key may be a name or a number, and the value can be a name, number, string, or a map, which is a brace-enclosed list of terms.
Then, let's write a transformer to transform the Lark parse tree to a dict:
class TreeTransformer(lark.Transformer):
def start(self, items):
return dict(items)
def term(self, items):
return (items[0], items[1])
def CNAME(self, item):
return item.value
def SIGNED_NUMBER(self, item):
return int(item.value)
def ESCAPED_STRING(self, item):
return item.value[1:-1]
def map(self, items):
return dict(items)
def key(self, item):
return item[0]
def value(self, item):
return item[0]
Could probably be more terse, but this works.
Let's run it against the data:
from pathlib import Path
from pprint import pprint
data = Path("./so75472097-data.txt").read_text()
tree = parser.parse(data)
res = TreeTransformer().transform(tree)
pprint(res)
The output is
{360: {'culture': 'myrman', 'name': 'of Myr'},
1386: {'coat_of_arms': {'layer': {'color': 0,
'emblem': 0,
'texture': 14,
'texture_internal': 9},
'template': 0},
'culture': 'lysene',
'name': 'Ormollen'},
300505: {'culture': 'lysene', 'name': 'of Lys'},
300507: {'culture': 'myrman', 'name': 'of Myr'},
300523: {'culture': 'lysene', 'name': 'Lohar'},
300525: {'culture': 'myrman', 'name': 'Trellos'},
300532: {'culture': 'lysene', 'name': 'Assadyrn'},
300534: {'culture': 'myrman', 'name': 'Uteuran'}}
-- that looks promising!
Then, it's just a matter of dict traversal:
from collections import defaultdict
names_by_culture = defaultdict(list)
for info in res.values():
names_by_culture[info["culture"]].append(info["name"])
pprint(dict(names_by_culture))
... and hey voilà!
{'lysene': ['Ormollen', 'of Lys', 'Lohar', 'Assadyrn'],
'myrman': ['of Myr', 'of Myr', 'Trellos', 'Uteuran']}
Now, all you have to do is wrap that bad boy into a function and call it on all of your files.
(EDIT, now that I read the latest comment and know what to google: you could just use the ClauseWizard library instead of writing the parser yourself, but this was more fun!)
EDIT 2
As discussed in the comments, a grammar and transformer that's also fine with "basically anything" for unquoted values:
parser = lark.Lark(r"""
start: (term)*
term: key "=" value "\n"
key: KEYNAME | SIGNED_NUMBER
value: VALUENAME | SIGNED_NUMBER | ESCAPED_STRING | map
map: "{" (term)* "}"
VALUENAME: /[a-zA-Z][^\s=]*/
KEYNAME: /[a-zA-Z][-a-zA-Z0-9_]*/
%import common.ESCAPED_STRING
%import common.SIGNED_NUMBER
%import common.WS
%ignore WS
%ignore /#.*/
""")
from operator import itemgetter, attrgetter
class TreeTransformer(lark.Transformer):
start = dict
map = dict
key = itemgetter(0)
value = itemgetter(0)
VALUENAME = attrgetter("value")
KEYNAME = attrgetter("value")
term = tuple
def SIGNED_NUMBER(self, item):
return int(item.value)
def ESCAPED_STRING(self, item):
return item.value[1:-1]
Since the file is well structured, just use regex with the appropriate query and treat the tuples that are outputted accordingly.
result = re.findall('name[ ]*=[ ]*"([A-z ]+)"\n[ ]+culture[ ]*=[ ]*([A-z]+)', a)
names_by_culture = {}
for i in result:
name = i[0]
culture = i[1]
try:
names_by_culture[culture].append(name)
except:
names_by_culture[culture] = []
names_by_culture[culture].append(name)
print(names_by_culture)
Output:
{'myrman': ['of Myr', 'of Myr', 'Trellos', 'Uteuran'],
'lysene': ['Ormollen', 'of Lys', 'Lohar', 'Assadyrn']}

Converting unicode to gsm encoding in golang

I am working on migrating my project in python to golang and I have a use case for converting utf-8 encoding to corresponding gsm ones if possible. I am very new to go, it will be really helpful to get some documentation or examples around it.
For example: Python snippet
ằ as unicode -> a after gsm encoding
for character in text:
if is_gsm(character):
transliterated_text += character.encode('utf-8')
continue
if is_nonascii_utf8(character):
transliterated_char = unidecode.unidecode(character)
if transliterated_char == '?' or transliterated_char == '':
gsm = False
break
if transliterated_char != rc:
character = transliterated_char
transliterated_text += character
else:
transliterated_text += character.encode('utf-8')
if gsm and is_gsm(transliterated_text.decode('utf-8')):
text = transliterated_text.decode('utf-8')
Thanks
You can do it in this way:
package main
import (
"fmt"
"regexp"
"strings"
)
var utf8GsmChars = map[string]string{
`#`: "\x00", `£`: "\x01", `$`: "\x02",
`¥`: "\x03", `è`: "\x04", `é`: "\x05",
`ù`: "\x06", `ì`: "\x07", `ò`: "\x08",
`Ç`: "\x09", `Ø`: "\x0B", `ø`: "\x0C",
`Å`: "\x0E", `Δ`: "\x10", `_`: "\x11",
`Φ`: "\x12", `Γ`: "\x13", `Λ`: "\x14",
`Ω`: "\x15", `Π`: "\x16", `Ψ`: "\x17",
`Σ`: "\x18", `Θ`: "\x19", `Ξ`: "\x1A",
`Æ`: "\x1C", `æ`: "\x1D", `ß`: "\x1E",
`É`: "\x1F", `Ä`: "\x5B", `Ö`: "\x5C",
`Ñ`: "\x5D", `Ü`: "\x5E", `§`: "\x5F",
`¿`: "\x60", `ä`: "\x7B", `ö`: "\x7C",
`ñ`: "\x7D", `ü`: "\x7E", `à`: "\x7F",
`^`: "\x1B\x14", `{`: "\x1B\x28",
`}`: "\x1B\x29", `\`: "\x1B\x2F",
`[`: "\x1B\x3C", `~`: "\x1B\x3D",
`]`: "\x1B\x3E", `|`: "\x1B\x40",
`€`: "\x1B\x65",
}
var gsmUtf8Chars = map[string]string{
"\x00": "\x40",
"\x01": "\xC2\xA3",
"\x02": "\x24",
"\x03": "\xC2\xA5",
"\x04": "\xC3\xA8",
"\x05": "\xC3\xA9",
"\x06": "\xC3\xB9",
"\x07": "\xC3\xAC",
"\x08": "\xC3\xB2",
"\x09": "\xC3\x87",
"\x0B": "\xC3\x98",
"\x0C": "\xC3\xB8",
"\x0E": "\xC3\xB8",
"\x0F": "\xC3\xA5",
"\x10": "\xCE\x94",
"\x11": "\x5F",
"\x12": "\xCE\xA6",
"\x13": "\xCE\x93",
"\x14": "\xCE\xA0",
"\x15": "\xCE\xA9",
"\x16": "\xCE\xA0",
"\x17": "\xCE\xA8",
"\x18": "\xCE\xA3",
"\x19": "\xCE\x98",
"\x1A": "\xCE\x9E",
"\x1C": "\xC3\x86",
"\x1D": "\xC3\xA6",
"\x1E": "\xC3\x9F",
"\x1F": "\xC3\x89",
"\x20": "\x20",
"\x24": "\xC2\xA4",
"\x40": "\xC2\xA1",
"\x5B": "\xC3\x84",
"\x5C": "\xC3\x96",
"\x5D": "\xC3\x91",
"\x5E": "\xC3\x9C",
"\x5F": "\xC2\xA7",
"\x60": "\xC2\xBF",
"\x7B": "\xC3\xA8",
"\x7C": "\xC3\xB6",
"\x7D": "\xC3\xB1",
"\x7E": "\xC3\xBC",
"\x7F": "\xC3\xA0",
}
func UTF8ToGsm0338(text string) string {
var s string = text
for k, v := range utf8GsmChars {
s = strings.Replace(s, k, v, -1)
}
re := regexp.MustCompile("[\\x{0080}-\\x{10FFFF}]")
s = re.ReplaceAllString(s, "?")
return s
}
func GSM0338ToUTF8(text string) string {
var s string = text
for k, v := range gsmUtf8Chars {
s = strings.Replace(s, k, v, -1)
}
return s
}
func main() {
s := "Hello World"
gsm := UTF8ToGsm0338(s)
utf8 := GSM0338ToUTF8(gsm)
fmt.Printf("word before: %s\nword after gsm: %s\nword after utf8: %s\n", s, gsm, utf8)
}

How to map nested list to flat values

I`m trying to parse a spreadsheet with a header that looks something like this:
My problem is those nested keys below "Контрагент". I decided to parse it like this:
['Дата',
'Номер документа',
'Дебет',
'Кредит',
['Контрагент',
['Наименование', 'ИНН', 'КПП', 'Счет', 'БИК', 'Наименование банка']],
'Назначение платежа',
'Код дебитора',
'Тип документа']
But now, I don`t really have an idea as how to map it to a flat list of values:
['21.05.2021',
'591324565436',
'0.00',
'526345428.99',
'asdasd',
'234525460140679',
'77130100123412341',
'302328105423534200000000280',
'0445252345234974',
'asdfsadfsd',
'sdfghsfgdhfdghdfgh',
'',
'dfghfgdhfdgh']
Given these variables, I want a function to return following dict:
{
"Дата": "21.05.2021",
"Номер документа": "591324565436",
"Дебет": "0.00",
"Кредит": "526345428.99",
"Контрагент": {
"Наименование": "asdasd",
"ИНН": "234525460140679",
"КПП": "77130100123412341",
"Счет": "302328105423534200000000280",
"БИК": "0445252345234974",
"Наименование банка": "asdfsadfsd"
},
"Назначение платежа": "sdfghsfgdhfdghdfgh",
"Код дебитора": "",
"Тип документа": "dfghfgdhfdgh"
}
I've gone this far without realizing it'd be raising IndexError on the 3rd line:
def map_to_schema(schema, data):
for i, elem in enumerate(data):
key = schema[i]
if isinstance(key, list):
if key[0] not in result:
result[key[0]] = {}
result[key[0]] |= {
key[1][i-len(key)]: elem
}
else:
result[key] = elem
What should I do? Maybe the structure for the schema isn't good enough? I really have no idea...
You could use a dictionary comprehension and an iterator:
headers = ['Дата', 'Номер документа', 'Дебет', 'Кредит',
['Контрагент', ['Наименование', 'ИНН', 'КПП', 'Счет', 'БИК', 'Наименование банка']],
'Назначение платежа', 'Код дебитора', 'Тип документа']
values = ['21.05.2021', '591324565436', '0.00', '526345428.99', 'asdasd', '234525460140679', '77130100123412341',
'302328105423534200000000280', '0445252345234974', 'asdfsadfsd', 'sdfghsfgdhfdghdfgh', '',
'dfghfgdhfdgh']
it = iter(values)
out = {k[0] if (islist := isinstance(k, list)) else k:
{k2: next(it) for k2 in k[1]} if islist else next(it)
for k in headers}
output:
{'Дата': '21.05.2021',
'Номер документа': '591324565436',
'Дебет': '0.00',
'Кредит': '526345428.99',
'Контрагент': {'Наименование': 'asdasd',
'ИНН': '234525460140679',
'КПП': '77130100123412341',
'Счет': '302328105423534200000000280',
'БИК': '0445252345234974',
'Наименование банка': 'asdfsadfsd'},
'Назначение платежа': 'sdfghsfgdhfdghdfgh',
'Код дебитора': '',
'Тип документа': 'dfghfgdhfdgh'}
Thanks #mozway for this solution! This is essentially the same algorithm, using a for loop.
def map(schema, s_length, row: list):
# If len(row) was less then *true* schema length, it would have thrown StopIteration.
# I ended up just extending row list by delta elements.
if (delta := s_length - len(row)) > 0:
row.extend([""] * delta)
iter_row = iter(row)
result = {}
for key in schema:
if isinstance(key, list):
result[key[0]] = {}
for sub_key in key[1]:
result[key[0]][sub_key] = next(iter_row)
else:
result[key] = next(iter_row)
return result

Call values of keys in dictionary through function

I have the following list:
cosmetics = {"Hair": ["Shampoo", "Conditioner", "Repair"], "Skin": ["Cream", "Lotion", "Wipes"], "MakeUp": ["Lipstick", "Foundation", "Liner"]}
print(cosmetics.get("Hair"))
print(cosmetics.get("Skin"))
print(cosmetics.get("MakeUp"))
1. def care(cosmetics):
x = cosmetics.keys()
for x in cosmetics:
print(cosmetics.get(x))
print(care("Skin"))
2. def care(key, value):
print(key, value)
[care(key, value) for key,value in cosmetics.items()]
I get an error here:
def care(key):
for key,value in cosmetics.items():
print(key, value)
So, my problem is I get the whole dictionary with these functions. I want to create a function care so that when I call care with a key like "skin", I get the values of that key.
Try this:
>>> dct = cosmetics = {"Hair": ["Shampoo", "Conditioner", "Repair"], "Skin": ["Cream", "Lotion", "Wipes"], "MakeUp": ["Lipstick", "Foundation", "Liner"]}
>>> def get_val_dct(dct, key):
... return dct.get(key)
>>> get_val_dct(dct,'Skin')
['Cream', 'Lotion', 'Wipes']

pretty-printing OrderedDicts using pprint

I'm using pprint to nicely print a dict and it's working fine. Now I switch to using an OrderedDict from module collections. Unfortunately, the pprint routing does not seem to recognize that such objects are more or less dicts as well and falls back to printing that as a long line.
>>> d = { i:'*'*i for i in range(8) }
>>> pprint.pprint(d)
{0: '',
1: '*',
2: '**',
3: '***',
4: '****',
5: '*****',
6: '******',
7: '*******'}
>>> pprint.pprint(collections.OrderedDict(d))
OrderedDict([(0, ''), (1, '*'), (2, '**'), (3, '***'), (4, '****'), (5, '*****'), (6, '******'), (7, '*******')])
Any way to get a nicer representation of OrderedDicts as well? Maybe even if they are nested inside a normal dict or list?
I found a relatively simple solution for this, but it includes the risk of making the output for your ordered dictionary appear exactly as if it were a regular dict object.
The original solution for using a context manager to prevent pprint from sorting dictionary keys comes from this answer.
#contextlib.contextmanager
def pprint_OrderedDict():
pp_orig = pprint._sorted
od_orig = OrderedDict.__repr__
try:
pprint._sorted = lambda x:x
OrderedDict.__repr__ = dict.__repr__
yield
finally:
pprint._sorted = pp_orig
OrderedDict.__repr__ = od_orig
(You could also just patch the OrderedDict.__repr__ method with dict.__repr__, but please don't.)
Example:
>>> foo = [('Roger', 'Owner'), ('Diane', 'Manager'), ('Bob', 'Manager'),
... ('Ian', 'Associate'), ('Bill', 'Associate'), ('Melinda', 'Associate')]
>>> d = OrderedDict(foo)
>>> pprint.pprint(d)
OrderedDict([('Roger', 'Owner'), ('Diane', 'Manager'), ('Bob', 'Manager'), ('Ian', 'Associate'), ('Bill', 'Associate'), ('Melinda', 'Associate')])
>>> pprint.pprint(dict(d))
{'Bill': 'Associate',
'Bob': 'Manager',
'Diane': 'Manager',
'Ian': 'Associate',
'Melinda': 'Associate',
'Roger': 'Owner'}
>>> with pprint_OrderedDict():
... pprint.pprint(d)
...
{'Roger': 'Owner',
'Diane': 'Manager',
'Bob': 'Manager',
'Ian': 'Associate',
'Bill': 'Associate',
'Melinda': 'Associate'}
Try this on:
d = collections.OrderedDict({ i:'*'*i for i in range(8) })
EDIT
pprint.pprint(list(d.items()))
If you are specifically targeting CPython* 3.6 or later, then you can just use regular dictionaries instead of OrderedDict. You'll miss out on a few methods exclusive to OrderedDict, and this is not (yet) guaranteed to be portable to other Python implementations,** but it is probably the simplest way to accomplish what you are trying to do.
* CPython is the reference implementation of Python which may be downloaded from python.org.
** CPython stole this idea from PyPy, so you can probably depend on it working there too.
I realize this is sort of necroposting, but I thought I'd post what I use. Its main virtue is that its aoutput can be read back into python, thus allowing, for instance, to shutlle between representations (which I use, for instance, on JSON files). Of course it breaks pprint encapsulation, by ripping some code off its inner _format function.
#!/bin/env python
from __future__ import print_function
import pprint;
from collections import OrderedDict
import json
import sys
class MyPP (pprint.PrettyPrinter):
def _format(self, object, stream, indent, allowance, context, level):
if not isinstance(object, OrderedDict) :
return pprint.PrettyPrinter._format(self, object, stream, indent, allowance, context, level)
level = level + 1
objid = id(object)
if objid in context:
stream.write(_recursion(object))
self._recursive = True
self._readable = False
return
write = stream.write
_len=len
rep = self._repr(object, context, level - 1)
typ = type(object)
sepLines = _len(rep) > (self._width - 1 - indent - allowance)
if self._depth and level > self._depth:
write(rep)
return
write('OrderedDict([\n%s'%(' '*(indent+1),))
if self._indent_per_level > 1:
write((self._indent_per_level - 1) * ' ')
length = _len(object)
#import pdb; pdb.set_trace()
if length:
context[objid] = 1
indent = indent + self._indent_per_level
items = object.items()
key, ent = items[0]
rep = self._repr(key, context, level)
write('( ')
write(rep)
write(', ')
self._format(ent, stream, indent + _len(rep) + 2,
allowance + 1, context, level)
write(' )')
if length > 1:
for key, ent in items[1:]:
rep = self._repr(key, context, level)
if sepLines:
write(',\n%s( %s , ' % (' '*indent, rep))
else:
write(', ( %s , ' % rep)
self._format(ent, stream, indent + _len(rep) + 2,
allowance + 1, context, level)
write(' )')
indent = indent - self._indent_per_level
del context[objid]
write('])')
return
pp = MyPP(indent=1)
handle=open(sys.argv[1],"r")
values=json.loads(handle.read(),object_pairs_hook=OrderedDict)
pp.pprint(values)

Categories