Plotly Takes to long to Render an output - python

I need a help please :)
Hi! I have a dataframe CSV file in which I have longitude column, latitude column, and sales.
I would love to visualize my data geographically.
First I have tried to do a scatters directly CSV and wrote the next.
px.set_mapbox_access_token("my token on mapbox")
fig = px.scatter_mapbox(df, lat="latitude", lon="longitude",color="sales",
color_continuous_scale=px.colors.cyclical.IceFire, size_max=20,zoom=12)
fig.show()
fig.write_html("example_map.html")
It has never opened.
Then I have tried to try without mapbox and converted CSV into Json file.
import csv
import json
from collections import OrderedDict
li = []
with open("Path to my file") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
d = OrderedDict()
d['type'] = 'zipCode'
d['geometry'] = {
'type': 'Point',
'coordinates': [float(row['latitude']), float(row['longitude'])]
}
li.append(d)
d = OrderedDict()
d['type'] = 'FeatureCollection'
d['features'] = li
with open('output.json','w') as f:
json.dump(d,f,indent=2)
and next I have tried to plot it:
import csv
import json
from collections import OrderedDict
li = []
with open("C:\\Users\\Dell\\Desktop\\Intern 2021\\McGill\\full_dataset_csv.csv", newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
d = OrderedDict()
d['type'] = 'zipCode'
d['geometry'] = {
'type': 'Point',
'coordinates': [float(row['latitude']), float(row['longitude'])]
}
li.append(d)
d = OrderedDict()
d['type'] = 'FeatureCollection'
d['features'] = li
with open('output.json','w') as f:
json.dump(d,f,indent=2)
Again it took forever to show an output. I think I might have done something wrong in codding? Or I should think about an alternative to Plotly?

Related

write data from dictionary to csv by python

I have a dictionary as below which has repeated item name, the difference is the value of each part name. i want to write those info to csv with expected result is :
import csv
dict={
'test':['part_name','test1','test2','test3','part_name','test1','test2','test3'],
'value':['partA','12','55','109','partB','14','54','106'],
'lcl':['lcl','10','50','100','lcl','10','50','100'],
'ucl':['ucl','18','60','115','ucl','18','60','115'],
}
tmp={}
for k,v1,v2,v3 in zip(dict["test"],dict["value"],dict["lcl"],dict["ucl"]):
tmp.setdefault(k, []).append([v1,v2,v3])
print(tmp)
with open('table.csv','w') as f:
writer_inline = csv.writer(f, delimiter=',', lineterminator=',')
writer = csv.writer(f, delimiter=',', lineterminator='\n')
writer.writerow(tmp.keys())
writer.writerows(zip(*tmp.values()))
Try the below code to get your desired csv. I would recommend not to use dict as name for your dictionary. I have changed it to d:
import csv
d = {
'test':['part_name','test1','test2','test3','part_name','test1','test2','test3'],
'value':['partA','12','55','109','partB','14','54','106'],
'lcl':['lcl','10','50','100','lcl','10','50','100'],
'ucl':['ucl','18','60','115','ucl','18','60','115'],
}
headers = d['test'][:len(set(d['test']))]
size = len(headers)
d.pop('test', None)
parts = []
for i in d:
parts += [[d[i][j:(j+size)] for j in range(0, len(d['value']), size)]]
rows = []
for part in list(zip(*parts)):
rows += part
with open('table.csv','w') as f:
writer = csv.writer(f, delimiter=',', lineterminator='\n')
writer.writerow(headers)
writer.writerows(rows)

Adding a counter as index of the Dataframe

The code below takes a folder with xml files and parses them into a single csv file.
It does the job really good.
from xml.etree import ElementTree as ET
from collections import defaultdict
import csv
from pathlib import Path
directory = 'C:/Users/docs/FolderwithXMLs'
with open('output.csv', 'w', newline='') as f:
writer = csv.writer(f)
headers = ['id', 'service_code', 'rational', 'qualify', 'description_num', 'description_txt', 'set_data_xin', 'set_data_xax', 'set_data_value', 'set_data_x']
writer.writerow(headers)
xml_files_list = list(map(str,Path(directory).glob('**/*.xml')))
for xml_file in xml_files_list:
tree = ET.parse(xml_file)
root = tree.getroot()
start_nodes = root.findall('.//START')
for sn in start_nodes:
row = defaultdict(str)
for k,v in sn.attrib.items():
row[k] = v
for rn in sn.findall('.//Rational'):
row['rational'] = rn.text
for qu in sn.findall('.//Qualify'):
row['qualify'] = qu.text
for ds in sn.findall('.//Description'):
row['description_txt'] = ds.text
row['description_num'] = ds.attrib['num']
for st in sn.findall('.//SetData'):
for k,v in st.attrib.items():
row['set_data_'+ str(k)] = v
row_data = [row[i] for i in headers]
writer.writerow(row_data)
row = defaultdict(str)
The output looks like this
I have been trying to add a counter for the numbers of how many rows of set_data_value for that specific ID there are.
The output should look like this
If necessary I can provide the xml data also. I am sorry someone has to edit the question to show the pictures instead of just hypelink
I have checked other posts here but I wasn't able to implement into this code
Without seeing the XML it will be a bit if a guess, but if you add "Counter" to headers and then add enumerate on the last for loop it may work
for counter, st in enumerate( sn.findall('.//SetData') ):
for k,v in st.attrib.items():
row['set_data_'+ str(k)] = v
row["Counter"] = counter
row_data = [row[i] for i in headers]
writer.writerow(row_data)
row = defaultdict(str)

Why my code not working merge multiple list into one

I have a .csv file. I need to find the first column into a list.
IP
192.168.1.1,a
192.168.1.1,a
192.168.1.2,b
192.168.1.3,c
Code:
import csv
with open(r'C:\Users\windows\Desktop\file.csv', "r", encoding='utf-8') as f:
reader = csv.reader(f)
your_list = [e[0].strip().split(",") for e in reader if e]
your_list[1:]
My output:
[['192.168.1.1'], ['192.168.1.1'], ['192.168.1.2'], ['192.168.1.3']]
My expected output:
['192.168.1.1', '192.168.1.1', '192.168.1.2', '192.168.1.3']
import csv
your_list = []
with open(r'score.csv', "r", encoding='utf-8') as f:
reader = csv.reader(f)
your_list = [ip[0] for ip in reader]
print(your_list[1:])
output:
['192.168.1.1', '192.168.1.1', '192.168.1.2', '192.168.1.3']

Save Scraped Data as CSV file?

I am trying to scrape data from a link that contains JSON data and this is the code:
import requests
import json
parameters = ['a:1','a:2','a:3','a:4','a:3','a:4','a:5','a:6','a:7','a:8','a:9','a:10',]
for item in parameters:
key, value = item.split(':')[0], item.split(':')[1]
url = "https://xxxx.000webhostapp.com/getNamesEnc02Motasel2.php?keyword=%s&type=2&limit=%s" %(key, value)
r = requests.get(url)
cont = json.loads(r.content)
print(cont)
And the output be like
[{'name': 'Absz', 'phone': '66343212'}, {'name': 'ddd ', 'phone': '545432211'}, {'name': 'ezd' 'phone':'54856886'}]
I want to store all the data in a CSV file.
How can I do this?
Also, As you can see I am using parameters list to do multi requests but I think there is a way that I can loop the limit parameter from 1 to 200 without typing every single keyword and number in parameters.
Thanks in advance.
Try the below code it will create csv row wise:
import csv
import json
header = ["name","phone"]
for item in range(1,200):
key, value = 'a', item # Generating key and value from range 1 --> 200
url = "https://xxxx.000webhostapp.com/getNamesEnc02Motasel2.php?keyword=%s&type=2&limit=%s" %(key, value)
r = requests.get(url)
cont = json.loads(r.content)
print(cont)
with open('people.csv', 'a') as writeFile:
writer = csv.writer(writeFile)
writer.writerow(header)
for a_row in cont:
writer.writerow([a_row["name"],a_row["phone"]]) # To write name and phone
Hope this answers your question!!
import requests
import json
import pandas as pd
parameters = ['a:1','a:2','a:3','a:4','a:3','a:4','a:5','a:6','a:7','a:8','a:9','a:10']
results = pd.DataFrame()
for item in parameters:
key, value = item.split(':')
url = "https://xxxx.000webhostapp.com/getNamesEnc02Motasel2.php?keyword=%s&type=2&limit=%s" %(key, value)
r = requests.get(url)
cont = json.loads(r.content)
temp_df = pd.DataFrame(cont)
results = results.append(temp_df)
results.to_csv('path/to/filename.csv', index=False)

Merging two CSV files by a common column python

I am trying to merge two csv files with a common id column and write the merge to a new file. I have tried the following but it is giving me an error -
import csv
from collections import OrderedDict
filenames = "stops.csv", "stops2.csv"
data = OrderedDict()
fieldnames = []
for filename in filenames:
with open(filename, "rb") as fp: # python 2
reader = csv.DictReader(fp)
fieldnames.extend(reader.fieldnames)
for row in reader:
data.setdefault(row["stop_id"], {}).update(row)
fieldnames = list(OrderedDict.fromkeys(fieldnames))
with open("merged.csv", "wb") as fp:
writer = csv.writer(fp)
writer.writerow(fieldnames)
for row in data.itervalues():
writer.writerow([row.get(field, '') for field in fieldnames])
Both files have the "stop_id" column but I'm getting this error back -
KeyError: 'stop_id'
Any help would be much appreciated.
Thanks
Here is an example using pandas
import sys
from StringIO import StringIO
import pandas as pd
TESTDATA=StringIO("""DOB;First;Last
2016-07-26;John;smith
2016-07-27;Mathew;George
2016-07-28;Aryan;Singh
2016-07-29;Ella;Gayau
""")
list1 = pd.read_csv(TESTDATA, sep=";")
TESTDATA=StringIO("""Date of Birth;Patient First Name;Patient Last Name
2016-07-26;John;smith
2016-07-27;Mathew;XXX
2016-07-28;Aryan;Singh
2016-07-20;Ella;Gayau
""")
list2 = pd.read_csv(TESTDATA, sep=";")
print list2
print list1
common = pd.merge(list1, list2, how='left', left_on=['Last', 'First', 'DOB'], right_on=['Patient Last Name', 'Patient First Name', 'Date of Birth']).dropna()
print common
Thanks Shijo.
This is what worked for me after - merged by the first column in each csv.
import csv
from collections import OrderedDict
with open('stops.csv', 'rb') as f:
r = csv.reader(f)
dict2 = {row[0]: row[1:] for row in r}
with open('stops2.csv', 'rb') as f:
r = csv.reader(f)
dict1 = OrderedDict((row[0], row[1:]) for row in r)
result = OrderedDict()
for d in (dict1, dict2):
for key, value in d.iteritems():
result.setdefault(key, []).extend(value)
with open('ab_combined.csv', 'wb') as f:
w = csv.writer(f)
for key, value in result.iteritems():
w.writerow([key] + value)

Categories