"Invalid configuration" error when using python-arabic-reshaper package with Pyinstaler - python

I am trying to use PyInstaller to put an exe wrapper around a Python 2.7 script that imports Python package python-arabic-reshaper, the script works fine when running by itself, but ends in the error below if run from within a Pyinstaller exe.
The config file is default-config.ini and configparser package is installed.
Code:
import sys
from arabic_reshaper import ArabicReshaper
config1={
'delete_harakat':False,
'support_ligatures':True,
'RIAL SIGN':True,
}
reshaper=ArabicReshaper(configuration=config1)
text=u"????"
reshaped_text=reshaper.reshape(text)
print(sys.stdout.encoding)
print(reshaped_text.encode('utf-8'))
Error:
F:\HisHands\Yang>F:\HisHands\Yang\dist\bob_test_1.exe
WARNING: file already exists but should not: C:\Users\Ownerc\AppData\Local\Temp\
_MEI153402\include\pyconfig.h
Traceback (most recent call last):
File "", line 13, in
File "C:\Users\Ownerc\Downloads\PyInstaller-2.1\PyInstaller-2.1\PyInstaller\lo
ader\pyi_importers.py", line 270, in load_module
File "F:\HisHands\Yang\build\bob_test_1\out00-PYZ.pyz\arabic_reshaper", line 1
, in
File "C:\Users\Ownerc\Downloads\PyInstaller-2.1\PyInstaller-2.1\PyInstaller\lo
ader\pyi_importers.py", line 270, in load_module
File "F:\HisHands\Yang\build\bob_test_1\out00-PYZ.pyz\arabic_reshaper.arabic_r
eshaper", line 1377, in
File "F:\HisHands\Yang\build\bob_test_1\out00-PYZ.pyz\arabic_reshaper.arabic_r
eshaper", line 1248, in __init__
ValueError: Invalid configuration: A section with the name ArabicReshaper was no
t found
I also used the archive viewer tool to view the content of the executable generated, shown below
pos, length, uncompressed, iscompressed, type, name
[(0, 170, 235, 1, 'm', u'struct'),
(170, 1153, 2704, 1, 'm', u'pyimod01_os_path'),
(1323, 4222, 11804, 1, 'm', u'pyimod02_archive'),
(5545, 6034, 18956, 1, 'm', u'pyimod03_importers'),
(11579, 1589, 4450, 1, 's', u'pyiboot01_bootstrap'),
(13168, 347, 504, 1, 's', u'bob_test_1'),
(13515, 48403, 89416, 1, 'b', u'VCRUNTIME140.dll'),
(61918, 39529, 87552, 1, 'b', u'_bz2.pyd'),
(101447, 624405, 1443840, 1, 'b', u'_hashlib.pyd'),
(725852, 76667, 146432, 1, 'b', u'_lzma.pyd'),
(802519, 28814, 66048, 1, 'b', u'_socket.pyd'),
(831333, 888894, 2045440, 1, 'b', u'_ssl.pyd'),
(1720227, 10439, 19136, 1, 'b', u'api-ms-win-core-console-l1-1-0.dll'),
(1730666, 10253, 18624, 1, 'b', u'api-ms-win-core-datetime-l1-1-0.dll'),
(1740919, 10265, 18624, 1, 'b', u'api-ms-win-core-debug-l1-1-0.dll'),
(1751184, 10322, 18624, 1, 'b', u'api-ms-win-core-errorhandling-l1-1-0.dll'),
(1761506, 11406, 22208, 1, 'b', u'api-ms-win-core-file-l1-1-0.dll'),
(1772912, 10289, 18624, 1, 'b', u'api-ms-win-core-file-l1-2-0.dll'),
(1783201, 10419, 18624, 1, 'b', u'api-ms-win-core-file-l2-1-0.dll'),
(1793620, 10290, 18624, 1, 'b', u'api-ms-win-core-handle-l1-1-0.dll'),
(1803910, 10469, 19136, 1, 'b', u'api-ms-win-core-heap-l1-1-0.dll'),
(1814379, 10302, 18624, 1, 'b', u'api-ms-win-core-interlocked-l1-1-0.dll'),
(1824681, 10532, 19136, 1, 'b', u'api-ms-win-core-libraryloader-l1-1-0.dll'),
(1835213, 11178, 21184, 1, 'b', u'api-ms-win-core-localization-l1-2-0.dll'),
(1846391, 10461, 19136, 1, 'b', u'api-ms-win-core-memory-l1-1-0.dll'),
(1856852, 10395, 18624, 1, 'b', u'api-ms-win-core-namedpipe-l1-1-0.dll'),
(1867247,
10555,
19648,
1,
'b',
u'api-ms-win-core-processenvironment-l1-1-0.dll'),
(1877802, 11078, 20672, 1, 'b', u'api-ms-win-core-processthreads-l1-1-0.dll'),
(1888880, 10498, 19136, 1, 'b', u'api-ms-win-core-processthreads-l1-1-1.dll'),
(1899378, 10215, 18112, 1, 'b', u'api-ms-win-core-profile-l1-1-0.dll'),
(1909593, 10486, 19136, 1, 'b', u'api-ms-win-core-rtlsupport-l1-1-0.dll'),
(1920079, 10347, 18624, 1, 'b', u'api-ms-win-core-string-l1-1-0.dll'),
(1930426, 10870, 20672, 1, 'b', u'api-ms-win-core-synch-l1-1-0.dll'),
(1941296, 10524, 19136, 1, 'b', u'api-ms-win-core-synch-l1-2-0.dll'),
(1951820, 10598, 19648, 1, 'b', u'api-ms-win-core-sysinfo-l1-1-0.dll'),
(1962418, 10376, 18624, 1, 'b', u'api-ms-win-core-timezone-l1-1-0.dll'),
(1972794, 10274, 18624, 1, 'b', u'api-ms-win-core-util-l1-1-0.dll'),
(1983068, 10607, 19648, 1, 'b', u'api-ms-win-crt-conio-l1-1-0.dll'),
(1993675, 11729, 22720, 1, 'b', u'api-ms-win-crt-convert-l1-1-0.dll'),
(2005404, 10429, 19136, 1, 'b', u'api-ms-win-crt-environment-l1-1-0.dll'),
(2015833, 11063, 20672, 1, 'b', u'api-ms-win-crt-filesystem-l1-1-0.dll'),
(2026896, 10584, 19648, 1, 'b', u'api-ms-win-crt-heap-l1-1-0.dll'),
(2037480, 10540, 19136, 1, 'b', u'api-ms-win-crt-locale-l1-1-0.dll'),
(2048020, 13628, 27840, 1, 'b', u'api-ms-win-crt-math-l1-1-0.dll'),
(2061648, 10654, 19648, 1, 'b', u'api-ms-win-crt-process-l1-1-0.dll'),
(2072302, 11901, 23232, 1, 'b', u'api-ms-win-crt-runtime-l1-1-0.dll'),
(2084203, 12357, 24768, 1, 'b', u'api-ms-win-crt-stdio-l1-1-0.dll'),
(2096560, 12530, 24768, 1, 'b', u'api-ms-win-crt-string-l1-1-0.dll'),
(2109090, 11174, 21184, 1, 'b', u'api-ms-win-crt-time-l1-1-0.dll'),
(2120264, 10601, 19136, 1, 'b', u'api-ms-win-crt-utility-l1-1-0.dll'),
(2130865, 485, 1035, 1, 'b', u'bob_test_1.exe.manifest'),
(2131350, 74629, 189952, 1, 'b', u'pyexpat.pyd'),
(2205979, 1637554, 3938304, 1, 'b', u'python35.dll'),
(3843533, 9127, 19968, 1, 'b', u'select.pyd'),
(3852660, 446584, 982720, 1, 'b', u'ucrtbase.dll'),
(4299244, 341035, 865792, 1, 'b', u'unicodedata.pyd'),
(4640279,
0,
0,
0,
'o',
u'pyi-windows-manifest-filename bob_test_1.exe.manifest'),
(4640279, 197523, 761033, 1, 'x', u'base_library.zip'),
(4837802, 1198430, 1198430, 0, 'z', u'out00-PYZ.pyz')]
?
Thanks

Related

Save dataframe as CSV in Python

I am trying to save the result of this code as a CSV file:
import pandas as pd
df = pd.DataFrame({'ID': ['a01', 'a01', 'a01', 'a01', 'a01', 'a01', 'a01', 'a01', 'a01', 'b02', 'b02','b02', 'b02', 'b02', 'b02', 'b02'],
'Row': [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 2, 2, 3, 3, 3],
'Col': [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 3, 1, 3, 1, 2, 3],
'Result': ['p', 'f', 'p', 'p', 'p', 'f', 'p', 'p', 'p', 'p', 'p', 'p', 'f', 'p', 'p', 'p']})
dfs = {}
for n, g in df.groupby('ID'):
dfs[n] = g.pivot('Row', 'Col', 'Result').fillna('')
print(f'ID: {n}')
print(dfs[n])
print('\n')
print(dfs[n].stack().value_counts().to_dict())
print('\n')
I found several methods and tried to save the output (dictionary form) into a CSV file, but without success. Any thoughts?
P.S. This is one of the methods I found, but I didn't know how to name the column based on my output?
with open("Output.csv", "w", newline="") as csv_file:
cols = ["???????????"]
writer = csv.DictWriter(csv_file, fieldnames=cols)
writer.writeheader()
writer.writerows(data)
df.to_csv('Output.csv', index = False)
For more details goto:
https://datatofish.com/export-dataframe-to-csv/
https://www.geeksforgeeks.org/saving-a-pandas-dataframe-as-a-csv/
Use the method provided by pandas data frame abject
df.to_csv()
You can use df.to_csv() to convert your data to csv.

How to count frequency of such list using basic libraries?

List looks like this having ascii character and number value, I want to count occurrence of each of ASCII character for 0, 1 and 2
So for A {0=10, 1=2, 2 =12} likewise
[('P', 0),
('S', 2),
('R', 1),
('O', 1),
('J', 1),
('E', 1),
('C', 1),
('T', 1),
('G', 1),
('U', 1),
('T', 1),
('E', 1),
('N', 1)]
I have tried
char_freq = {c:[0,0,0] for c in string.ascii_uppercase}
also
for i in range(3):
for x,i in a:
print(x,i)
I want to count X for i where X is [A-Z]
It should give me result like
Character | 0 | 1 | 2
A 10 5 4
although you don't supply enough example data to actually achieve your desired output.. i think this is what you're looking for:
from collections import Counter
import pandas as pd
l = [('P', 0),
('S', 2),
('R', 1),
('O', 1),
('J', 1),
('E', 1),
('C', 1),
('T', 1),
('G', 1),
('U', 1),
('T', 1),
('E', 1),
('N', 1)]
df = pd.DataFrame(l)
counts = df.groupby(0)[1].agg(Counter)
returns:
C {1: 1}
E {1: 2}
G {1: 1}
J {1: 1}
N {1: 1}
O {1: 1}
P {0: 1}
R {1: 1}
S {2: 1}
T {1: 2}
U {1: 1}
this will give you each ASCII character, along with each unique number, and how many occurrences of each number
from collections import Counter
l = [('A', 1),
('A', 1),
('A', 2),
('A', 2),
('B', 1),
('B', 2),
('B', 3),
('B', 4)]
data = {}
for k,v in l:
data[k] = [v] if k not in data else data[k] + [v]
char_freq = {k: dict(Counter(v)) for k, v in data.items()}
print(char_freq)
Outputs:
{'A': {1: 2, 2: 2}, 'B': {1: 1, 2: 1, 3: 1, 4: 1}}
your code looks fine you just have to make a small change to the char_freq variable to get the expected result:
char_freq = {c: {0: 0, 1: 0, 2: 0} for c in string.ascii_uppercase}
for x, i in a:
char_freq[x][i] += 1
to avoid having all the alphabet in your char_freq you could use only the necessary characters:
char_freq = {c: {0: 0, 1: 0, 2: 0} for c in {t[0] for t in a}}
for x, i in a:
char_freq[x][i] += 1
output:
{'O': {0: 0, 1: 1, 2: 0},
'T': {0: 0, 1: 2, 2: 0},
'N': {0: 0, 1: 1, 2: 0},
'G': {0: 0, 1: 1, 2: 0},
'U': {0: 0, 1: 1, 2: 0},
'E': {0: 0, 1: 2, 2: 0},
'J': {0: 0, 1: 1, 2: 0},
'R': {0: 0, 1: 1, 2: 0},
'C': {0: 0, 1: 1, 2: 0},
'S': {0: 0, 1: 0, 2: 1},
'P': {0: 1, 1: 0, 2: 0}}

Pandas - Add a column level to multi index

I would like to add a sublevel (L4) in my dataframe, based on a list of values:
x = [0.01, 0.01, 0.01, 0.02, 0.02, 0.02]
The df.columns returns me this:
MultiIndex(levels=[['Foo', 'Bar'], ['A', 'B', 'C'], ['a']],
labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2], [0, 0, 0, 0, 0, 0]],
names=['L1', 'L2', 'L3'])
So far I have tried that:
df = pd.concat([df], keys=x, names=['L4'], axis=1).swaplevel(i='L4', j='L1', axis=1).swaplevel(i='L4', j='L2', axis=1).swaplevel(i='L4', j='L3', axis=1)
but it doesn't give the good value, it repeats list_levels[0] (0.01).
Do you have any idea on how I can do it ?
Thanks
Here's a way:
cols = pd.MultiIndex(levels=[['Foo', 'Bar'], ['A', 'B', 'C'], ['a']],
labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2], [0, 0, 0, 0, 0, 0]],
names=['L1', 'L2', 'L3'])
pd.DataFrame(columns = cols).T\
.assign(x = [0.01, 0.01, 0.01, 0.02, 0.02, 0.02])\
.set_index('x', append=True).T
Output:
You can create a DataFrame with the column index as the Index, and the data being the level you want to add, as set_index(append=True) is only defined for the row Index. Then assign it with df.columns = ...
import pandas as pd
idx = pd.MultiIndex(levels=[['Foo', 'Bar'], ['A', 'B', 'C'], ['a']],
codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2], [0, 0, 0, 0, 0, 0]],
names=['L1', 'L2', 'L3'])
x = [0.01, 0.01, 0.01, 0.02, 0.02, 0.02]
pd.DataFrame(x, index=idx, columns=['L4']).set_index('L4', append=True).index
#MultiIndex([('Foo', 'A', 'a', 0.01),
# ('Foo', 'B', 'a', 0.01),
# ('Foo', 'C', 'a', 0.01),
# ('Bar', 'A', 'a', 0.02),
# ('Bar', 'B', 'a', 0.02),
# ('Bar', 'C', 'a', 0.02)],
# names=['L1', 'L2', 'L3', 'L4'])
Under the hood set_index just recreates the entire MultiIndex when appending, so a more hands-on approach is
arrays = []
for i in range(idx.nlevels):
arrays.append(idx.get_level_values(i))
arrays.append(pd.Index(x, name='L4')) # Add the new level
new_idx = pd.MultiIndex.from_arrays(arrays)
#MultiIndex([('Foo', 'A', 'a', 0.01),
# ('Foo', 'B', 'a', 0.01),
# ('Foo', 'C', 'a', 0.01),
# ('Bar', 'A', 'a', 0.02),
# ('Bar', 'B', 'a', 0.02),
# ('Bar', 'C', 'a', 0.02)],
# names=['L1', 'L2', 'L3', 'L4'])

Counting common elements in a list on that occur on same day in a pandas dataframe

I have a DataFrame which looks like:
Users Date
['A', 'B'] 2017-10-21
['B', 'C'] 2017-10-21
['A', 'D'] 2017-10-21
['D', 'E'] 2017-10-22
['A', 'E'] 2017-10-22
['A', 'E', 'D'] 2017-10-22
['C', 'B', 'E'] 2017-10-23
['D', 'C', 'F'] 2017-11-23
I need to make a new DataFrame from this DataFrame which would count the number of times the items show up in the list on each day. The count, therefore, would be across different rows on the same date..
For example, the new DataFrame would look like:
Users Date
[A=2, B=2, C=1, D=1] 2017-10-21
[E=3, D=2, A=2] 2017-10-22
[B=1, C=2, D=1, E=1, F=1] 2017-10-23
Some things to note: the all the items in the first dataset are lists with individual elements being strings. The Date column is of DateTime type.
I understand there would be a groupby function on the Date column but I can't figure out how to write the function that I would apply to.
Using groupby and apply with collections.Counter:
df.groupby('Date').Users.sum().apply(collections.Counter, 1)
Date
2017-10-21 {'A': 2, 'B': 2, 'C': 1, 'D': 1}
2017-10-22 {'D': 2, 'E': 3, 'A': 2}
2017-10-23 {'C': 1, 'B': 1, 'E': 1}
2017-11-23 {'D': 1, 'C': 1, 'F': 1}
Name: Users, dtype: object
If you have multiple columns that you want to count per group:
Setup
s = 'ABCDE'
df = pd.DataFrame({
'Users': [random.sample(s, random.randint(1, 5)) for _ in range(10)],
'Tools': [random.sample(s, random.randint(1, 5)) for _ in range(10)],
'Hours': [random.sample(s, random.randint(1, 5)) for _ in range(10)],
'Date': ['2017-10-21', '2017-10-21', '2017-10-21', '2017-10-22',
'2017-10-22', '2017-10-22', '2017-10-23', '2017-10-23', '2017-10-23', '2017-11-23']
})
Using agg:
df.groupby('Date').sum().agg({
'Users': collections.Counter,
'Tools': collections.Counter,
'Hours': collections.Counter
})
Users Tools Hours
Date
2017-10-21 {'C': 2, 'E': 2, 'A': 2, 'B': 2, 'D': 1} {'E': 3, 'A': 2, 'B': 3, 'D': 2, 'C': 2} {'B': 2, 'C': 2, 'E': 1, 'A': 1, 'D': 1}
2017-10-22 {'D': 2, 'A': 2, 'E': 1, 'C': 1, 'B': 2} {'E': 2, 'B': 3, 'A': 3, 'D': 1, 'C': 1} {'B': 1, 'C': 2, 'E': 2, 'A': 2, 'D': 2}
2017-10-23 {'B': 2, 'A': 2, 'D': 1, 'E': 1, 'C': 2} {'D': 3, 'E': 2, 'B': 2, 'C': 3, 'A': 2} {'C': 3, 'E': 2, 'D': 2, 'B': 1, 'A': 2}
2017-11-23 {'D': 1, 'B': 1, 'C': 1} {'B': 1} {'C': 1, 'E': 1}

couchdb-python specify own _id failed

How is it possible to define own _id in couchdb-python (0.9), because when I tried '_id': i[5] I got the following error message?
$ python test3.py
828288
Traceback (most recent call last):
File "test3.py", line 42, in <module>
db.save(doc)
File "/home/mictadlo/.virtualenvs/unisnp/lib/python2.7/site-packages/couchdb/client.py", line 415, in save
func = _doc_resource(self.resource, doc['_id']).put_json
File "/home/mictadlo/.virtualenvs/unisnp/lib/python2.7/site-packages/couchdb/client.py", line 954, in _doc_resource
if doc_id[:1] == '_':
TypeError: 'int' object has no attribute '__getitem__'
Below is the script which is causing the above error:
from couchdb.mapping import Document, TextField, IntegerField, Mapping
from couchdb.mapping import DictField, ViewField, BooleanField, ListField
from couchdb import Server
# $ sudo systemctl start couchdb
# http://localhost:5984/_utils/
server = Server()
db = server.create("test")
r = [["Test", "A", "B01", 828288, 1, 7, 'C', 5],
["Test", "A", "B01", 828288, 1, 7, 'T', 6],
["Test", "A", "B01", 171878, 3, 8, 'C', 5],
["Test", "A", "B01", 171878, 3, 8, 'T', 6],
["Test", "A", "B01", 871963, 3, 9, 'A', 5],
["Test", "A", "B01", 871963, 3, 9, 'G', 6],
["Test", "A", "B01", 1932523, 1, 10, 'T', 4],
["Test", "A", "B01", 1932523, 1, 10, 'A', 5],
["Test", "A", "B01", 1932523, 1, 10, 'X', 6],
["Test", "A", "B01", 667214, 1, 14, 'T', 4],
["Test", "A", "B01", 667214, 1, 14, 'G', 5],
["Test", "A", "B01", 667214, 1, 14, 'G', 6]]
for i in r:
print i[3]
doc = {
'type': i[0],
'name': i[1],
'sub_name': i[2],
'pos': i[3],
's_type': i[4],
'_id': i[5],
'chr':[]
}
doc['chr'].append({
"letter":i[6],
"no":i[7]
})
db.save(doc)
It expects _id to be a string and you are passing a type of int. The error is caused by this line:
if doc_id[:1] == '_':
Because script is trying to slice an int object.
So change it to string type:
...
...
'_id': str(i[5]),
...

Categories