So i have many csv files each one of them has three columns.
latitude
longitude
distance
for example:
car1.csv
lat long total_dist
23.33 73.32. 0
23.45. 73.34. 10
23.64. 73.53. 16 ---> #cumulative sum as car1 travels
#similarly there it for car2,car3,car4 etc
so i concatenated these csv file into one data frame to plot map
import pandas as pd
import folium
from pathlib import Path
import glob
path = r'C:/Users/Desktop/Sample/car_new' # use your path
all_files = glob.glob(path + "/*.csv")
li = []
for filename in all_files:
df = pd.read_csv(filename, index_col=None, header=0)
li.append(df)
car_locations = pd.concat(li, axis=0, ignore_index=True)
car_locations = car_locations[["Latitude", "Longitude",]]
map = folium.Map(location=[car_locations.Latitude.mean(), car_locations.Longitude.mean()], zoom_start=14, control_scale=True, tiles='Stamen Terrain')
folium.PolyLine(car_locations, color="red", weight=2.5, opacity=0.5).add_to(map)
map
i want set conditions like this
if(total_dist > 120):
#plot green line on map
else:
#plot red line
This is how it should look i have posted a link to the image.
https://imgur.com/a/07rxQ9l
# This script will plot multiple lines
# One line per file, each line in a colour specified in the list.
from itertools import count
import pandas as pd
import folium
from pathlib import Path
import glob
#Imported OS module to set path dynamically.
# as seen below.
import os
# Dynamically set project script, this will allow you to
# run the script in any directory without manually
# typing the path
project_dir = os.path.realpath('') + '/'
# This is not necessary, but since this was in your script
# I left it in here.
path = project_dir
all_files = glob.glob(path + "/*.csv")
li = []
# List of colors, and a counter.
# this is an easy less complicated way to do it, but not the only way.
colors = ['red', 'blue']
cl_count = 0
# You need to only create the map once
# If this is in the loop, it will create a new map and overwrite
# the map when you plot the second car.
map = folium.Map(zoom_start=14, control_scale=True, tiles='Stamen Terrain')
for filename in all_files:
df = pd.read_csv(filename, index_col=None, header=0)
li.append(df)
car_locations = pd.concat(li, axis=0, ignore_index=True)
car_locations = car_locations[["Latitude", "Longitude",]]
folium.PolyLine(car_locations, color=colors[cl_count], weight=3.0, opacity=1).add_to(map)
# You need to clear the list at the end of the loop
# Since you want the coordinates from each file to plot a unique line.
li = []
cl_count += 1
map
Related
I have the following code in which I am loading a single DICOM file and checking if there are sagittal and coronal view present or not.
I want to modify this to read all DICOM files from the folder.
print there is no sagittal and coronal view if sag_aspect,cor_aspect value is zero
How do I do this?
import pydicom
import numpy as np
import matplotlib.pyplot as plt
import sys
import glob
# load the DICOM files
files = []
print('glob: {}'.format(sys.argv[1]))
for fname in glob.glob('dicom/3.dcm', recursive=False):
print("loading: {}".format(fname))
files.append(pydicom.dcmread(fname))
print("file count: {}".format(len(files)))
# skip files with no SliceLocation (eg scout views)
slices = []
skipcount = 0
for f in files:
if hasattr(f, 'SliceLocation'):
slices.append(f)
else:
skipcount = skipcount + 1
print("skipped, no SliceLocation: {}".format(skipcount))
# ensure they are in the correct order
slices = sorted(slices, key=lambda s: s.SliceLocation)
# pixel aspects, assuming all slices are the same
ps = slices[0].PixelSpacing
ss = slices[0].SliceThickness
ax_aspect = ps[1]/ps[0]
sag_aspect = ps[1]/ss
cor_aspect = ss/ps[0]
# create 3D array
img_shape = list(slices[0].pixel_array.shape)
img_shape.append(len(slices))
img3d = np.zeros(img_shape)
# fill 3D array with the images from the files
for i, s in enumerate(slices):
img2d = s.pixel_array
img3d[:, :, i] = img2d
# plot 3 orthogonal slices
print(img3d.shape)
print(img_shape)
a1 = plt.subplot(2, 2, 1)
plt.imshow(img3d[:, :, img_shape[2]//2])
a1.set_title("transverse view")
a1.set_aspect(ax_aspect)
a2 = plt.subplot(2, 2, 2)
#print(img3d[:, img_shape[1]//2, :].shape)
plt.imshow(img3d[:, img_shape[1]//2, :])
a2.set_title("sagital view")
a2.set_aspect(sag_aspect)
a3 = plt.subplot(2, 2, 3)
plt.imshow(img3d[img_shape[0]//2, :, :].T)
a3.set_title("coronal view")
a3.set_aspect(cor_aspect)
plt.show()
For reading multiple dicom files from a folder you can use the code below.
import os
from pathlib import Path
import pydicom
dir_path = r"path\to\dicom\files"
dicom_set = []
for root, _, filenames in os.walk(dir_path):
for filename in filenames:
dcm_path = Path(root, filename)
if dcm_path.suffix == ".dcm":
try:
dicom = pydicom.dcmread(dcm_path, force=True)
except IOError as e:
print(f"Can't import {dcm_path.stem}")
else:
dicom_set.append(dicom)
I have leveraged the pathlib library which I strongly suggest to use whenever dealing with folder/file paths. I have also added an exception, but you can modify it to meet your needs.
I am trying to visualize Air Quality Data as time-series charts using pycaret and plotly dash python libraries , but i am getting very weird graphs, below is my code:
import pandas as pd
import plotly.express as px
data = pd.read_csv('E:/Self Learning/Djang_Dash/2019-2020_5.csv')
data['Date'] = pd.to_datetime(data['Date'], format='%d/%m/%Y')
#data.set_index('Date', inplace=True)
# combine store and item column as time_series
data['OBJECTID'] = ['Location_' + str(i) for i in data['OBJECTID']]
#data['AQI_Bins_AI'] = ['Bin_' + str(i) for i in data['AQI_Bins_AI']]
data['time_series'] = data[['OBJECTID']].apply(lambda x: '_'.join(x), axis=1)
data.drop(['OBJECTID'], axis=1, inplace=True)
# extract features from date
data['month'] = [i.month for i in data['Date']]
data['year'] = [i.year for i in data['Date']]
data['day_of_week'] = [i.dayofweek for i in data['Date']]
data['day_of_year'] = [i.dayofyear for i in data['Date']]
data.head(4000)
data['time_series'].nunique()
for i in data['time_series'].unique():
subset = data[data['time_series'] == i]
subset['moving_average'] = subset['CO'].rolling(window = 30).mean()
fig = px.line(subset, x="Date", y=["CO","moving_average"], title = i, template = 'plotly_dark')
fig.show()
require needful help in this regard,
here is my sample data Google Drive Link
data has not been provided in a usable way. Sought out publicly available similar data. found: https://www.kaggle.com/rohanrao/air-quality-data-in-india?select=station_hour.csv
using this data, with a couple of cleanups of your code, no issues with plots. I suspect your data has one of these issues
date is not datetime64[ns] in your data frame
date is not sorted, leading to lines being drawn in way you have noted
by refactoring way moving average is calculated, you can use animation instead of lots of separate figures
get some data
import kaggle.cli
import sys, math
import pandas as pd
from pathlib import Path
from zipfile import ZipFile
import plotly.express as px
# download data set
# https://www.kaggle.com/rohanrao/air-quality-data-in-india?select=station_hour.csv
sys.argv = [
sys.argv[0]
] + "datasets download rohanrao/air-quality-data-in-india".split(
" "
)
kaggle.cli.main()
zfile = ZipFile("air-quality-data-in-india.zip")
print([f.filename for f in zfile.infolist()])
plot using code from question
import pandas as pd
import plotly.express as px
from pathlib import Path
from distutils.version import StrictVersion
# data = pd.read_csv('E:/Self Learning/Djang_Dash/2019-2020_5.csv')
# use kaggle data
# dfs = {f.filename:pd.read_csv(zfile.open(f)) for f in zfile.infolist() if f.filename in ['station_day.csv',"stations.csv"]}
# data = pd.merge(dfs['station_day.csv'],dfs["stations.csv"], on="StationId")
# data['Date'] = pd.to_datetime(data['Date'])
# # kaggle data is different from question, make it compatible with questions data
# data = data.assign(OBJECTID=lambda d: d["StationId"])
# sample data from google drive link
data2 = pd.read_csv(Path.home().joinpath("Downloads").joinpath("AQI.csv"))
data2["Date"] = pd.to_datetime(data2["Date"])
data = data2
# as per very first commment - it's important data is ordered !
data = data.sort_values(["Date","OBJECTID"])
data['time_series'] = "Location_" + data["OBJECTID"].astype(str)
# clean up data, remove rows where there is no CO value
data = data.dropna(subset=["CO"])
# can do moving average in one step (can also be used by animation)
if StrictVersion(pd.__version__) < StrictVersion("1.3.0"):
data["moving_average"] = data.groupby("time_series",as_index=False)["CO"].rolling(window=30).mean().to_frame()["CO"].values
else:
data["moving_average"] = data.groupby("time_series",as_index=False)["CO"].rolling(window=30).mean()["CO"]
# just first two for purpose of demonstration
for i in data['time_series'].unique()[0:3]:
subset = data.loc[data['time_series'] == i]
fig = px.line(subset, x="Date", y=["CO","moving_average"], title = i, template = 'plotly_dark')
fig.show()
can use animation
px.line(
data,
x="Date",
y=["CO", "moving_average"],
animation_frame="time_series",
template="plotly_dark",
).update_layout(yaxis={"range":[data["CO"].min(), data["CO"].quantile(.97)]})
I have 2 data frames:
df1 contains columns: “time”, “bid_price”
df2 contains columns: “time”, “flag”
I want to plot a time series of df1 as a line graph and i want to put markers on that trace at points where df2 “flag” column value = True at those points in time
How can i do this?
You can do so in three steps:
set up a figure using go.Figure(),
add a trace for your bid_prices using fig.update(go.Scatter)
and do the same thing for your flags.
The snippet below does exactly what you're describing in your question. I've set up two dataframes df1 and df2, and then I've merged them together to make things a bit easier to reference later on.
I'm also showing flags for an accumulated series where each increment in the series > 0.9 is flagged in flags = [True if elem > 0.9 else False for elem in bid_price] . You should be able to easily adjust this to whatever your real world dataset looks like.
Plot:
Complete code with random data:
# imports
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import random
# settings
observations = 100
np.random.seed(5); cols = list('a')
bid_price = np.random.uniform(low=-1, high=1, size=observations).tolist()
flags = [True if elem > 0.9 else False for elem in bid_price]
time = [t for t in pd.date_range('2020', freq='D', periods=observations).format()]
# bid price
df1=pd.DataFrame({'time': time,
'bid_price':bid_price})
df1.set_index('time',inplace = True)
df1.iloc[0]=0; d1f=df1.cumsum()
# flags
df2=pd.DataFrame({'time': time,
'flags':flags})
df2.set_index('time',inplace = True)
df = df1.merge(df2, left_index=True, right_index=True)
df.bid_price = df.bid_price.cumsum()
df['flagged'] = np.where(df['flags']==True, df['bid_price'], np.nan)
# plotly setup
fig = go.Figure()
# trace for bid_prices
fig.add_traces(go.Scatter(x=df.index, y=df['bid_price'], mode = 'lines',
name='bid_price'))
# trace for flags
fig.add_traces(go.Scatter(x=df.index, y=df['flagged'], mode = 'markers',
marker =dict(symbol='triangle-down', size = 16),
name='Flag'))
fig.update_layout(template = 'plotly_dark')
fig.show()
I m using linear regression to predict the closing price of a stock on the current day. This works fine.
I m using Django.
I need to add graphs(time-series and a candlestick). after searching I found that Bokeh is best for what I want to achieve.
Question:
I want to add time-series and candlestick graph in my Django project.
Code
This is how I m predicting stocks closing price on the current day.
stockprediction.py
def get_stock_data(name):
try:
if model_check(name) == False:
data_path = os.getcwd()+"\\StockPrediction\\data\\HISTORICAL_DATA\\"
df = pd.read_csv(data_path + name + '_data.csv')
df.fillna(df.mean(), inplace=True)
X = df.iloc[:, [1, 2, 3]]
Y = df.iloc[:, [4]]
reg = linear_model.LinearRegression()
reg.fit(X,Y)
y_today = reg.predict([get_nse_data(name)])
model_path = os.getcwd() + "\\StockPrediction\\data\\saved_data\\"
file = model_path + name + ".pkl"
joblib.dump(reg, file)
return y_today[0][0]
else:
model_path = os.getcwd()+"\\StockPrediction\\data\\saved_data\\"
file = model_path + name+".pkl"
model = joblib.load(file)
y_today = model.predict([get_nse_data(name)])
return y_today
except:
return ("Error")
def get_nse_data(name):
data = nse.get_quote(name)
current = [data['open'], data['dayHigh'], data['dayLow']]
return current
Bonus Question:
I need graphs which are best for showing stocks price like candlestick and time-series(can you suggest more.)
Help!
If you want to implement Bokeh, you can set up everything using the steps in the documentation (https://docs.bokeh.org/en/latest/docs/user_guide/quickstart.html#userguide-quickstart) and that will generate your .html file which you can include in your templates folder.
However, I find libraries like chart.js much more handy and customizable. They can be implemented in django fairly easily. Here's a link to a very good tutorial that helped me a lot:
https://youtu.be/B4Vmm3yZPgc
I've found holoviews to be really nice for this kind of stuff - in your case, you want to work with RangeToolLink in conjunction with hv.Curve and a pandas dataframe to make a typical stock plot with a range tool on the bottom.
Here's a simple example, stolen from the holoviews website:
import bokeh
bokeh.sampledata.download() # only needs to run once
import pandas as pd
import holoviews as hv
from bokeh.sampledata.stocks import AAPL
from holoviews.plotting.links import RangeToolLink
from holoviews import opts
hv.extension('bokeh')
# Make dataframe from stock data
aapl_df = pd.DataFrame(AAPL['close'], columns=['close'], index=pd.to_datetime(AAPL['date']))
aapl_df.index.name = 'Date'
# Create stock curve
aapl_curve = hv.Curve(aapl_df, 'Date', ('close', 'Price ($)'))
# Labels and layout
tgt = aapl_curve.relabel('AAPL close price').opts(width=800, labelled=['y'], toolbar='disable')
src = aapl_curve.opts(width=800, height=100, yaxis=None, default_tools=[])
RangeToolLink(src, tgt)
# Merge rangetool
layout = (tgt + src).cols(1)
layout.opts(opts.Layout(shared_axes=False, merge_tools=False))
Here's what you should see:
An even simpler example here uses candlesticks in bokeh:
from math import pi
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.sampledata.stocks import MSFT
df = pd.DataFrame(AAPL)[:50]
df["date"] = pd.to_datetime(df["date"])
inc = df.close > df.open
dec = df.open > df.close
w = 12*60*60*1000 # half day in ms
TOOLS = "pan,wheel_zoom,box_zoom,reset,save"
p = figure(x_axis_type="datetime", tools=TOOLS, plot_width=1000, title = "MSFT Candlestick")
p.xaxis.major_label_orientation = pi/4
p.grid.grid_line_alpha=0.3
p.segment(df.date, df.high, df.date, df.low, color="black")
p.vbar(df.date[inc], w, df.open[inc], df.close[inc], fill_color="#D5E1DD", line_color="black")
p.vbar(df.date[dec], w, df.open[dec], df.close[dec], fill_color="#F2583E", line_color="black")
show(p)
Result:
This all works seamlessly in a Jupyter notebook, so it should be easy enough for you - you just need to get your predictions into a Pandas dataframe!
I was hoping somebody would be able to help me. I am trying to store a list of saved images from MatPlotLib as a dataframe (or a list) and then add it to an existing dataframe (effectively creating small barcharts for each entry in the dataframe e.g. databars).
I have managed to save the images successfully with a loop. There are 242 images. How can I show these images in a column in a dataframe. I want it to be easy to append it to my existing dataframe to show visually the number of zero values in this dataset. My code gives errors that it NoneType object is not iterable.
This is my code. (Top half just here for clarification as to what q1 and q2 are.)
Thanks.
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
q1 = pandas.read_csv("data\q1.csv") #dataframe
q1.apply(lambda x: x.str.strip() if x.dtype == "object" else x) #strip whitespace
q1 = q1.dropna()
code = q1.loc[:,"Code"]
region = q1.loc[:,"Region"]
name = q1.loc[:,"Name"]
heads = list(q1.columns.values) #creates list of header values
nz = (q1 == 0).sum(axis=1) #count number of zero values in rows
q2 = q1[['Code','Region','Name']]
q2 = q2.assign(nz=nz.values)
samples=[]
y=1
for val in q2['nz']:
val = val/q2['nz'].max() * 100
plt.barh(val, width = val, color="blue")
plt.xlim((0,100))
plt.yticks([0])
plt.axis('off')
x = plt.savefig("value" + str(y) + ".png", bbox_inches='tight')
samples.append(x)
plt.close()
y = y + 1
imgdf = pandas.DataFrame.from_records(samples)
q3 = q2.append(imgdf)
If you are working in a jupyter notebook, then you can use the HTML display to show the images.
# Some imports
import base64
import pandas as pd
from PIL import Image
from io import BytesIO
from IPython.display import HTML
pd.set_option('display.max_colwidth', -1)
def get_thumbnail(path):
"""
Output a 150x150 sized PIL Image
"""
i = Image.open(path)
i.thumbnail((150, 150), Image.LANCZOS)
return i
def image_base64(im):
"""
Convert to base64 to be given as the src field of img in HTML
"""
if isinstance(im, str):
im = get_thumbnail(im)
with BytesIO() as buffer:
im.save(buffer, 'jpeg')
return base64.b64encode(buffer.getvalue()).decode()
def image_formatter(im):
return f'<img src="data:image/jpeg;base64,{image_base64(im)}">'
# Skipping some of your code
image_paths = []
for val in q2['nz']:
#... Do somethings here
x = plt.savefig("value" + str(y) + ".png", bbox_inches='tight')
plt.close()
image_paths.append("value" + str(y) + ".png")
y = y + 1
q2["images_paths"] = pd.Series(image_paths).values
q2["image"] = q2.image_paths.map(lambda f: get_thumbnail(f))
# Display PIL Images embedded in the dataframe
HTML(q2.to_html(formatters={"image": image_formatter}, escape=False))