Why dictionary generator doesn't work correctly - python

I need to read CSV file and fill dict by data from file. So I wrote one method
def read_data(self):
with open('storage/data/heart.csv') as f:
self.raw_data = {
len(self.raw_data): {
'age':line[0],
'sex':line[1],
'cp':line[2],
'trtbps':line[3],
'chol':line[4],
'fbs':line[5],
'restecg':line[6],
'thalachh':line[7]
} for line in csv.reader(f)}
But print(raw_data) returns this:
{0: {'age': '57', 'sex': '0', 'cp': '1', 'trtbps': '130', 'chol': '236', 'fbs': '0', 'restecg': '0', 'thalachh': '174'}}
As u can see my method saves only 1 line to dict and this line is the last line from the file. Pls help me

len(self.raw_data) is evaluated ones at the start and does not change inside the dict comprehension. Just use a normal loop or enumerate like:
def read_data(self):
with open('storage/data/heart.csv') as f:
self.raw_data = {
i: {
'age':line[0],
'sex':line[1],
'cp':line[2],
'trtbps':line[3],
'chol':line[4],
'fbs':line[5],
'restecg':line[6],
'thalachh':line[7]
} for line in i, enumerate(csv.reader(f))}

Related

renaming key in every dictionary in list, python

I can't figure out how to rename "Number" to "ProjectId" in every dictionary in the list of dictionaries. Can anyone help? I tried it in the renaming() function but it doesn't work.
pojects =
[{'Number': '5',
'Name': 'CFO'},
{'Number': '7',
'Name': 'Head of Product'},
{'Number': '6',
'Name': 'CEO'}]
def renaming(projects):
for i in projects:
i['ProjectId'] = i.pop('Number')
return projects
you can add a new key named 'ProjectId' that takes the value of 'Number' and then delete number
for item in projects:
item['ProjectId'] = item['Number']
del item['Number']

Python Dict append list with given vars

I trying to structure the data with dict with appending list, I tried using defaultdict but giving error.
data =
"""
[{'transit01_net': '192.168.1.0',
'transit01_subnet': '26',
'transit02_net': '192.168.2.0',
'transit02_subnet': '26',
'transit03_net': '192.168.3.0',
'transit03_subnet': '26',
}]
"""
output = {
'transit01': [],
'transit02': [],
'transit03': []
}
I would like to get:
{
'transit01': ['192.168.1.0', '26', 'Transit01'],
'transit02': ['192.168.2.0', '26', 'Transit02'],
'transit03': ['192.168.3.0', '26', 'Transit03'],
}
I have tried following, but only able to print the first
for item in data:
# Iterating the elements in list
output['transit01'].append(item['transit01_net'])
output['transit01'].append(item['transit01_subnet'])
output['transit01'].append('Transit01')
output['transit02'].append(item['transit02_net'])
output['transit02'].append(item['transit02_subnet'])
output['transit02'].append('Transit02')
output['transit03'].append(item['transit03_net'])
output['transit03'].append(item['transit03_subnet'])
output['transit03'].append('Transit03')
Step through this. You want to get from this:
data =
"""
[{'transit01_net': '192.168.1.0',
'transit01_subnet': '26',
'transit02_net': '192.168.2.0',
'transit02_subnet': '26',
'transit03_net': '192.168.3.0',
'transit03_subnet': '26',
}]
"""
To this
{
'transit01': ['192.168.1.0', '26', 'Transit01'],
'transit02': ['192.168.2.0', '26', 'Transit02'],
'transit03': ['192.168.3.0', '26', 'Transit03'],
}
The former is a string that describes a literal data structure. Python gives you access to ast to lex and tokenize that into a python object for you.
import ast
evald_data = ast.literal_eval(data)
From there you need to do the more difficult work of actually parsing the structure. Looks like you can split each key, though, and get what you need. Let's save off name of each field for now.
result = {}
for d in evald_data: # for each dictionary in the (single-item) list
for k, v in d.items():
name, key = k.split("_")
result.setdefault(name, {})[key] = v
# this should give you
expected = {
{'transit01': {'net': '192.168.1.0', 'subnet': '26'},
{'transit02': {'net': '192.168.2.0', 'subnet': '26'},
{'transit03': {'net': '192.168.3.0', 'subnet': '26'}
}
assert result == expected
From there it's pretty simple stuff. I'd posit that you probably want a tuple instead of a list, since these values' order seem to matter (sorting them isn't just bad, it's incorrect).
final_result = {k: (v['net'], v['subnet'], k.title()) for k,v in result.items()}
expected = {
'transit01': ['192.168.1.0', '26', 'Transit01'],
'transit02': ['192.168.2.0', '26', 'Transit02'],
'transit03': ['192.168.3.0', '26', 'Transit03'],
}
assert final_result == expected
Use collections.defaultdict
Ex.
from collections import defaultdict
data = [{'transit01_net': '192.168.1.0',
'transit01_subnet': '26',
'transit02_net': '192.168.2.0',
'transit02_subnet': '26',
'transit03_net': '192.168.3.0',
'transit03_subnet': '26',
}]
output = defaultdict(list)
temp = 1
for x in data[0]:
key = x.split("_")[0]
output[key].append(data[0][x])
sub_key = "transit0{}_subnet".format(temp)
if x == sub_key:
output[key].append(key.capitalize())
temp+=1
print(dict(output))
O/P
{'transit01': ['192.168.1.0', '26', 'Transit01'], 'transit02': ['192.168.2.0', '26',
'Transit02'], 'transit03': ['192.168.3.0', '26', 'Transit03']}

Create dict from string in Python

In my Python program, I have a string of format:
'name': 'Salman','age': '25', 'access': 'R', 'id': '00125'
I want to convert it to type dict so that I can query like dict["name"] to get "Salman" printed.
Use ast.literal_eval:
import ast
mystr = "'name': 'Salman','age': '25', 'access': 'R', 'id': '00125'"
d = ast.literal_eval('{'+mystr+'}')
# {'access': 'R', 'age': '25', 'id': '00125', 'name': 'Salman'}
d['access'] # 'R'
I think this is a neat solution using comprehensions
s = "'name': 'Salman','age': '25', 'access': 'R', 'id': '00125'"
d = dict([i.strip().replace("'", "") for i in kv.split(':')] for kv in s.split(","))
# d == {'access': 'R', 'age': '25', 'id': '00125', 'name': 'Salman'}
first split the string by ":" and "," and store it in a list.
then iterate from 0 to len(list)-2: mydict[list[i]] = list[i+1]

convert csv to dictionary

I have the following csv file(total 20000 lines)
ozone,paricullate_matter,carbon_monoxide,sulfure_dioxide,nitrogen_dioxide,longitude,latitude,timestamp,avg_measured_time,avg_speed,median_measured_time,timestamp:1,vehicle_count,lat1,long1,lat2,long2,distance_between_2_points,duration_of_measurement,ndt_in_kmh
99,99,98,116,118,10.09351660921,56.1671665604395,1407575099.99998,0,0,0,1407575099.99998,0,56.1089513576227,10.1823955595246,56.1048021343541,10.1988040846558,1124,65,62
99,99,98,116,118,10.09351660921,56.1671665604395,1407575099.99998,0,0,0,1407575099.99998,0,56.10986429895,10.1627288048935,56.1089513576227,10.1823955595246,1254,71,64
99,99,98,116,118,10.09351660921,56.1671665604395,1407575099.99998,0,0,0,1407575099.99998,0,56.1425188527673,10.1868802625656,56.1417522836526,10.1927236478157,521,62,30
99,99,98,116,118,10.09351660921,56.1671665604395,1407575099.99998,18,84,18,1407575099.99998,1,56.1395320665735,10.1772034087371,56.1384485157567,10.1791506011887,422,50,30
I want to convert this into a dictionary like
{'ozone': [99,99,99,99], 'paricullate_matter': [99,99,99,99],'carbon_monoxide': [98,98,98,98],etc....}
What i have tried
import csv
reader = csv.DictReader(open('resulttable.csv'))
output = open("finalprojdata.py","w")
result = {}
for row in reader:
for column, value in row.iteritems():
result.setdefault(column, []).append(float(value))
output.write(str(result))
The output am getting is consisting of only few dictionaries. Like from
{'vehicle_count': [0,0,0,1], 'lat1': etc}
The whole csv file is not getting converted to dictionary.
If you have pandas this is super easy:
import pandas as pd
data = pd.read_csv("data.csv")
data_dict = {col: list(data[col]) for col in data.columns}
this should do what you want:
import csv
def int_or_float(strg):
val = float(strg)
return int(val) if val.is_integer() else val
with open('test.csv') as in_file:
it = zip(*csv.reader(in_file))
dct = {el[0]: [int_or_float(val) for val in el[1:]] for el in it}
zip(*it) will just transpose the data you have and rearrange it in the way you want; the dictionary comprehension then builds your new dictionary.
dct now contains the dictionary you want.
Awk version
awk -F',' '
NR==1 {s=0;for( i=1;i<=NR;i++) D=sprintf("%s \"%s\" : [", (s++?",":""), $i);next}
{for( i=1;i<=NR;i++) D[i] = D[i] sprintf( "%s %s", (NR>2?",":""), $(i))}
END {
printf( "{ ")
s=0;for( d in D) { printf( "%s]", (s++?",":""), D[d] )
printf( "}"
}
' YourFile > final.py
quick and dirty,not memory optimized (2000 lines is not so huge form modern memory space)
from collections import defaultdict
import csv
columns = defaultdict(list)
with open('test.csv') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
for (k,v) in row.items():
columns[k].append(v)
print columns
#Output
defaultdict(<type 'list'>, {'vehicle_count': ['0', '0', '0', '1'], 'lat1': ['56.1089513576227', '56.10986429895', '56.1425188527673', '56.1395320665735'], 'lat2': ['56.1048021343541', '56.1089513576227', '56.1417522836526', '56.1384485157567'], 'paricullate_matter': ['99', '99', '99', '99'], 'timestamp': ['1407575099.99998', '1407575099.99998', '1407575099.99998', '1407575099.99998'], 'long1': ['10.1823955595246', '10.1627288048935', '10.1868802625656', '10.1772034087371'], 'longitude': ['10.09351660921', '10.09351660921', '10.09351660921', '10.09351660921'], 'nitrogen_dioxide': ['118', '118', '118', '118'], 'ozone': ['99', '99', '99', '99'], 'latitude': ['56.1671665604395', '56.1671665604395', '56.1671665604395', '56.1671665604395'], 'timestamp:1': ['1407575099.99998', '1407575099.99998', '1407575099.99998', '1407575099.99998'], 'distance_between_2_points': ['1124', '1254', '521', '422'], 'long2': ['10.1988040846558', '10.1823955595246', '10.1927236478157', '10.1791506011887'], 'avg_measured_time': ['0', '0', '0', '18'], 'carbon_monoxide': ['98', '98', '98', '98'], 'ndt_in_kmh': ['62', '64', '30', '30'], 'avg_speed': ['0', '0', '0', '84'], 'sulfure_dioxide': ['116', '116', '116', '116'], 'duration_of_measurement': ['65', '71', '62', '50'], 'median_measured_time': ['0', '0', '0', '18']})
pyexcel version:
import pyexcel as p
p.get_dict(file_name='test.csv')
$ cat tst.awk
BEGIN { FS=OFS=","; ORS="}\n" }
NR==1 {split($0,hdr); next }
{
for (i=1; i<=NF; i++) {
vals[i] = (i in vals ? vals[i] "," : "") $i
}
}
END {
printf "{"
for (i=1; i<=NF; i++) {
printf "\047%s\047: [%s]%s", hdr[i], vals[i], (i<NF?OFS:ORS)
}
}
$ awk -f tst.awk file
{'ozone': [99,99,99,99],'paricullate_matter': [99,99,99,99],'carbon_monoxide': [98,98,98,98],'sulfure_dioxide': [116,116,116,116],'nitrogen_dioxide': [118,118,118,118],'longitude': [10.09351660921,10.09351660921,10.09351660921,10.09351660921],'latitude': [56.1671665604395,56.1671665604395,56.1671665604395,56.1671665604395],'timestamp': [1407575099.99998,1407575099.99998,1407575099.99998,1407575099.99998],'avg_measured_time': [0,0,0,18],'avg_speed': [0,0,0,84],'median_measured_time': [0,0,0,18],'timestamp:1': [1407575099.99998,1407575099.99998,1407575099.99998,1407575099.99998],'vehicle_count': [0,0,0,1],'lat1': [56.1089513576227,56.10986429895,56.1425188527673,56.1395320665735],'long1': [10.1823955595246,10.1627288048935,10.1868802625656,10.1772034087371],'lat2': [56.1048021343541,56.1089513576227,56.1417522836526,56.1384485157567],'long2': [10.1988040846558,10.1823955595246,10.1927236478157,10.1791506011887],'distance_between_2_points': [1124,1254,521,422],'duration_of_measurement': [65,71,62,50],'ndt_in_kmh': [62,64,30,30]}

Parsing XML into a dictionary of lists Python/Django

I'm having a little issue with parsing an xml with python. I'm trying to get my dictionary to look like the following
listDict = [{'name':'Sales','id':'1','position':'1','order_by_type':'True','order_by_asc':'True;}, {'name':'Information','id':'2','position':'1','order_by_type':'True','order_by_asc':'True;}]
I'm thinking my loop after pulling data from the xml string is wrong.
xml_data = ElementTree.fromstring(self.data)
# Lets grab all the base cats info and add them to a dict containing a list
base_cats = xml_data.findall('./BaseCategory/Name')
base_cats_id = xml_data.findall('./BaseCategory/base_id')
base_postion = xml_data.findall('./BaseCategory/position')
base_order_by_type = xml_data.findall('./BaseCategory/order_by_type')
base_order_by_asc = xml_data.findall('./BaseCategory/order_by_asc')
# store all information into lists
base_cat = [t.text for t in base_cats]
base_id = [t.text for t in base_cats_id]
base_p = [t.text for t in base_postion]
base_obt = [t.text for t in base_order_by_type]
base_asc = [t.text for t in base_order_by_asc]
base_dict = defaultdict(list)
# lets put everything in the list into a dictionary
for base in range(len(base_cat)): # for each base in base_cat loop
base_dict[base].append(base_cat[base])
base_dict[base].append(base_id[base])
base_dict[base].append(base_p[base])
base_dict[base].append(base_obt[base])
base_dict[base].append(base_asc[base])
This produces the following.
instance = {0: ['Sales 2', '1', '10', 'True', 'True'], 1: ['Information 2', '2', '20', 'True', 'True'], 2: ['Listing 2', '3', '30', 'True', 'True'], 3: ['Information', '4', '40', 'True', 'True'], 4: ['Land', '5', '50', 'True', 'True'], 5: ['&', '6', '60', 'True', 'True'], 6: ['Tax', '7', '70', 'True', 'True'], 7: ['Construction', '9', '90', 'True', 'True'], 8: ['Interior/Utilites', '10', '100', 'True', 'True'], 9: ['HOA/Community', '11', '110', 'True', 'True'], 10: ['Remarks', '12', '120', 'True', 'True'], 11: ['Exterior', '8', '80', 'True', 'True']})
My end goal is to be able to do the following on my django template
{%for item in instance%}
{{ item.name }}
{% endfor %}
Any help on how I may have something wrong would help a lot. Thanks in advance for the help.
EDIT:
As asked here is the xml I have.
<?xml version="1.0" ?>
<FormInstance>
<BaseCategory>
<Name>Sales</Name>
<base_id>1</base_id>
<position>10</position>
<order_by_type>True</order_by_type>
<order_by_asc>True</order_by_asc>
</BaseCategory>
<BaseCategory>
<Name>Information</Name>
<base_id>2</base_id>
<position>20</position>
<order_by_type>True</order_by_type>
<order_by_asc>True</order_by_asc>
<MainCategory>
<main_id>1</main_id>
<Name>Address 3</Name>
<is_visible>True</is_visible>
<position>10</position>
<order_by_type>True</order_by_type>
<order_by_asc>True</order_by_asc>
<SubCategory>
<sub_id>1</sub_id>
<Name>Street Number 2</Name>
<sub_library_id>StreetNumber</sub_library_id>
<field_display_type>[u'input']</field_display_type>
<field_type>[u'varchar']</field_type>
<is_active>True</is_active>
<is_required>True</is_required>
<help_text>Street Number</help_text>
<main_category>1</main_category>
<is_visible>True</is_visible>
<position>10</position>
<order_by_type>True</order_by_type>
<order_by_asc>True</order_by_asc>
<show_seller>True</show_seller>
<Enumerations>
<enum_id>4</enum_id>
<Name>Test Enum</Name>
<library_id>test enum</library_id>
<is_active>True</is_active>
<sub_category>1</sub_category>
<is_visible>True</is_visible>
<position>10</position>
<order_by_type>True</order_by_type>
<order_by_asc>True</order_by_asc>
</Enumerations>
</SubCategory>
</MainCategory>
</BaseCategory>
</FormInstance>
So, for what I gather in the expected results, it looks like you just want to get the information about nodes that are strictly BaseCategory, right? In the XML that was provided in the edit, you have two of those.
You should see the XML as a tree of nodes. In the example, you have something like:
FormInstance # this is the root
/ \
/ \
BaseCategory BaseCategory
(name:Sales) (name:Information)
\
\
MainCategory
(name:Address 3)
\
\
Subcategory
(name:Street Number 2)
But you only need the information in the BaseCategory elements, right?
You could just position yourself in the root (which... well... is what xml.fromstring does anyway) iterate over its BaseCategory nodes, get the items you need from those BaseCategory nodes and put them in your list of dictionaries.
Something like:
import pprint
from xml.etree import ElementTree
with open("sample_xml.xml", 'r') as f:
data = f.read()
xml_data = ElementTree.fromstring(data)
base_categories = xml_data.findall("./BaseCategory")
print("Found %s base_categories." % len(base_categories))
list_dict = []
for base_category in base_categories:
list_dict.append({
"name": base_category.find("Name").text,
"id": int(base_category.find("base_id").text),
"position": int(base_category.find("position").text),
"order_by_type": (True if base_category.find("order_by_type").text.lower() == "true"
else False),
"order_by_asc": (True if base_category.find("order_by_asc").text.lower() == "true"
else False),
})
print("list_dict=%s" % (pprint.pformat(list_dict)))
Which outputs:
Found 2 base_categories.
list_dict=[{'id': 1,
'name': 'Sales',
'order_by_asc': True,
'order_by_type': True,
'position': 10},
{'id': 2,
'name': 'Information',
'order_by_asc': True,
'order_by_type': True,
'position': 20}]
The idea is that a BaseCategory item is something that can be seen as a self-contained record (like a dict, if it helps you see it) that can contain (in it) the following attributes:
A string with the name in Name
A numeric id in base_id
A numeric position
A boolean order_by_type
A boolean order_by_asc
Another object MainCategory with its own fields...
So every time you position yourself in one of these BaseCategory nodes, you just gather the interesting fields that it has and put them in dictionaries.
When you do:
base_cats = xml_data.findall('./BaseCategory/Name')
base_cats_id = xml_data.findall('./BaseCategory/base_id')
base_postion = xml_data.findall('./BaseCategory/position')
base_order_by_type = xml_data.findall('./BaseCategory/order_by_type')
base_order_by_asc = xml_data.findall('./BaseCategory/order_by_asc')
You are treating those element (base_id, position...) almost as independent elements, which is not exactly what you have in your XML.
However, if you are absolutely certain that all those lists (base_cats, base_cats_id, base_position...) do contain the same number of items, you can still re-build your dictionary, using the lenght of one of them (in the example below len(base_cats), but it could've been len(base_cats_id), len(base_position)... since all those lists have the same length) to iterate through all the lists in the same step:
base_cats = xml_data.findall('./BaseCategory/Name')
base_cats_id = xml_data.findall('./BaseCategory/base_id')
base_postion = xml_data.findall('./BaseCategory/position')
base_order_by_type = xml_data.findall('./BaseCategory/order_by_type')
base_order_by_asc = xml_data.findall('./BaseCategory/order_by_asc')
list_dict = []
for i in range(len(base_cats)):
list_dict.append({
"name": base_cats[i].text,
"id": int(base_cats_id[i].text),
"position": int(base_postion[i].text),
"order_by_type": True if base_order_by_type[i].text.lower() == "true" else False,
"order_by_asc": True if base_order_by_asc[i].text.lower() == "true" else False,
})
print("list_dict=%s" % (pprint.pformat(list_dict)))

Categories