I am trying to add a timestamp column to the table shown in this Python Dash Example:
https://github.com/plotly/dash-sample-apps/blob/main/apps/dash-image-annotation/app.py
The aim is to have a timestamp for each of the created objects.
So far, I managed to:
Create a new column in the output table (Line 48 of the Github code)
Trying to append a timestamp by adding this to the "modify_table_entries" function (line 466 in the Github code):
annotations_table_data[0]["timestamp"] = time_passed(annotations_store_data["starttime"])
This gives me a timestamp only for the first entry in the output table:
I tried now however for several days now. I believe I have to somehow append the timestamp to each created object in this function of the code:
def modify_table_entries(
previous_n_clicks,
next_n_clicks,
graph_relayoutData,
annotations_table_data,
image_files_data,
annotations_store_data,
annotation_type,
):
cbcontext = [p["prop_id"] for p in dash.callback_context.triggered][0]
if cbcontext == "graph.relayoutData":
#debug_print("graph_relayoutData:", graph_relayoutData)
#debug_print("annotations_table_data before:", annotations_table_data)
if "shapes" in graph_relayoutData.keys():
# this means all the shapes have been passed to this function via
# graph_relayoutData, so we store them
annotations_table_data = [
shape_to_table_row(sh) for sh in graph_relayoutData["shapes"]
]
elif re.match("shapes\[[0-9]+\].x0", list(graph_relayoutData.keys())[0]):
# this means a shape was updated (e.g., by clicking and dragging its
# vertices), so we just update the specific shape
annotations_table_data = annotations_table_shape_resize(
annotations_table_data, graph_relayoutData
)
if annotations_table_data is None:
return dash.no_update
else:
debug_print("annotations_table_data after:", annotations_table_data)
annotations_table_data[0]["timestamp"] = time_passed(annotations_store_data["starttime"])
return (annotations_table_data, image_files_data)
image_index_change = 0
if cbcontext == "previous.n_clicks":
image_index_change = -1
if cbcontext == "next.n_clicks":
image_index_change = 1
image_files_data["current"] += image_index_change
image_files_data["current"] %= len(image_files_data["files"])
if image_index_change != 0:
# image changed, update annotations_table_data with new data
annotations_table_data = []
filename = image_files_data["files"][image_files_data["current"]]
#debug_print(annotations_store_data[filename])
for sh in annotations_store_data[filename]["shapes"]:
annotations_table_data.append(shape_to_table_row(sh))
return (annotations_table_data, image_files_data)
else:
return dash.no_update
Any help is much appreciated!
Related
hello everyone i just want ask if anyone has an idea how i can check if row exist from one sheet in other one and if not it will highlight the row i found the issue with verfication line by line i try this code
old = old.set_index('id')
new = new.set_index('id')
resultTest = pd.concat([old,new],sort=False)
result = resultTest.stack().groupby(level=[0,1]).unique().unstack(1).copy()
result.loc[~result.index.isin(new.index),'status'] = 'deleted' # is not new old
result.loc[~result.index.isin(old.index),'status'] = 'added' # is not old new
idx = resultTest.stack().groupby(level=[0,1]).nunique() # cell changed
result.loc[idx.mask(idx <= 1).dropna().index.get_level_values(0),'status'] = 'modified'
result['status'] = result['status'].fillna('same')
result[result["status"] == 'deleted'].style.apply(highlight_max)
I'm trying to understand why this pipeline writes no output to BigQuery.
What I'm trying to achieve is to calculate the USD index for the last 10 years, starting from different currency pairs observations.
All the data is in BigQuery and I need to organize it and sort it in a chronollogical way (if there is a better way to achieve this, I'm glad to read it because I think this might not be the optimal way to do this).
The idea behing the class Currencies() is to start grouping (and keep) the last observation of a currency pair (eg: EURUSD), update all currency pair values as they "arrive", sort them chronologically and finally get the open, high, low and close value of the USD index for that day.
This code works in my jupyter notebook and in cloud shell using DirectRunner, but when I use DataflowRunner it does not write any output. In order to see if I could figure it out, I tried to just create the data using beam.Create() and then write it to BigQuery (which it worked) and also just read something from BQ and write it on other table (also worked), so my best guess is that the problem is in the beam.CombineGlobally part, but I don't know what it is.
The code is as follows:
import logging
import collections
import apache_beam as beam
from datetime import datetime
SYMBOLS = ['usdjpy', 'usdcad', 'usdchf', 'eurusd', 'audusd', 'nzdusd', 'gbpusd']
TABLE_SCHEMA = "date:DATETIME,index:STRING,open:FLOAT,high:FLOAT,low:FLOAT,close:FLOAT"
class Currencies(beam.CombineFn):
def create_accumulator(self):
return {}
def add_input(self,accumulator,inputs):
logging.info(inputs)
date,currency,bid = inputs.values()
if '.' not in date:
date = date+'.0'
date = datetime.strptime(date,'%Y-%m-%dT%H:%M:%S.%f')
data = currency+':'+str(bid)
accumulator[date] = [data]
return accumulator
def merge_accumulators(self,accumulators):
merged = {}
for accum in accumulators:
ordered_data = collections.OrderedDict(sorted(accum.items()))
prev_date = None
for date,date_data in ordered_data.items():
if date not in merged:
merged[date] = {}
if prev_date is None:
prev_date = date
else:
prev_data = merged[prev_date]
merged[date].update(prev_data)
prev_date = date
for data in date_data:
currency,bid = data.split(':')
bid = float(bid)
currency = currency.lower()
merged[date].update({
currency:bid
})
return merged
def calculate_index_value(self,data):
return data['usdjpy']*data['usdcad']*data['usdchf']/(data['eurusd']*data['audusd']*data['nzdusd']*data['gbpusd'])
def extract_output(self,accumulator):
ordered = collections.OrderedDict(sorted(accumulator.items()))
index = {}
for dt,currencies in ordered.items():
if not all([symbol in currencies.keys() for symbol in SYMBOLS]):
continue
date = str(dt.date())
index_value = self.calculate_index_value(currencies)
if date not in index:
index[date] = {
'date':date,
'index':'usd',
'open':index_value,
'high':index_value,
'low':index_value,
'close':index_value
}
else:
max_value = max(index_value,index[date]['high'])
min_value = min(index_value,index[date]['low'])
close_value = index_value
index[date].update({
'high':max_value,
'low':min_value,
'close':close_value
})
return index
def main():
query = """
select date,currency,bid from data_table
where date(date) between '2022-01-13' and '2022-01-16'
and currency like ('%USD%')
"""
options = beam.options.pipeline_options.PipelineOptions(
temp_location = 'gs://PROJECT/temp',
project = 'PROJECT',
runner = 'DataflowRunner',
region = 'REGION',
num_workers = 1,
max_num_workers = 1,
machine_type = 'n1-standard-1',
save_main_session = True,
staging_location = 'gs://PROJECT/stag'
)
with beam.Pipeline(options = options) as pipeline:
inputs = (pipeline
| 'Read From BQ' >> beam.io.ReadFromBigQuery(query=query,use_standard_sql=True)
| 'Accumulate' >> beam.CombineGlobally(Currencies())
| 'Flat' >> beam.ParDo(lambda x: x.values())
| beam.io.Write(beam.io.WriteToBigQuery(
table = 'TABLE',
dataset = 'DATASET',
project = 'PROJECT',
schema = TABLE_SCHEMA))
)
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
main()
They way I execute this is from shell, using python3 -m first_script (is this the way I should run this batch jobs?).
What I'm missing or doing wrong? This is my first attemp to use Dataflow, so i'm probably making several mistakes in the book.
For whom it may help: I faced a similar problem but I already used the same code for a different flow that had a pubsub as input where it worked flawless instead a file based input where it simply did not. After a lot of experimenting I found that in the options I changed the flag
options = PipelineOptions(streaming=True, ..
to
options = PipelineOptions(streaming=False,
as of course it is not a streaming source, it's a bounded source, a batch. After I set this flag to true I found my rows in the BigQuery table. After it had finished it even stopped the pipeline as it where a batch operation. Hope this helps
I am currently having an issue where I am trying to store data in a list (using dataclasses). When I print the data inside the list in the function (PullIncursionData()) it responded with a certain amount of numbers (never the same, not possible due to it's nature). When printing it after it being called to store it's return in a Var it somehow prints only the same number.
I cannot share the numbers, as they update with EVE Online's API, so the only way is to run it locally and read the first list yourself.
The repository is Here: https://github.com/AtherActive/EVEAPI-Demo
Heads up! Inside the main.py (the file with issues) (a snippet of code is down below) are more functions. All functions from line 90 and forward are important, the rest can be ignored for this question, as they do not interact with the other functions.
def PullIncursionData():
#Pulls data from URL and converts it into JSON
url = 'https://esi.evetech.net/latest/incursions/?datasource=tranquility'
data = rq.get(url)
jsData = data.json()
#Init var to store incursions
incursions = []
#Set lenght for loop. yay
length = len(jsData)
# Every loop incursion data will be read by __parseIncursionData(). It then gets added to var Incursions.
for i in range(length):
# Add data to var Incursion.
incursions.append(__parseIncursionData(jsData, i))
# If Dev mode, print some debug. Can be toggled in settings.py
if settings.developerMode == 1:
print(incursions[i].constellation_id)
return incursions
# Basically parses the input data in a decent manner. No comments needed really.
def __parseIncursionData(jsData, i):
icstruct = stru.Incursion
icstruct.constellation_id = jsData[i]['constellation_id']
icstruct.constellation_name = 'none'
icstruct.staging = jsData[i]['staging_solar_system_id']
icstruct.region_name = ResolveSystemNames(icstruct.constellation_id, 'con-reg')
icstruct.status = jsData[i]['state']
icstruct.systems_id = jsData[i]['infested_solar_systems']
icstruct.systems_names = ResolveSystemNames(jsData[i]['infested_solar_systems'], 'system')
return icstruct
# Resolves names for systems, regions and constellations. Still WIP.
def ResolveSystemNames(id, mode='constellation'):
#init value
output_name = 'none'
# If constellation, pull data and find region name.
if mode == 'con-reg':
url = 'https://www.fuzzwork.co.uk/api/mapdata.php?constellationid={}&format=json'.format(id)
data = rq.get(url)
jsData = data.json()
output_name = jsData[0]['regionname']
# Pulls system name form Fuzzwork.co.uk.
elif mode == 'system':
#Convert output to a list.
output_name = []
lenght = len(id)
# Pulls system name from Fuzzwork. Not that hard.
for i in range(lenght):
url = 'https://www.fuzzwork.co.uk/api/mapdata.php?solarsystemid={}&format=json'.format(id[i])
data = rq.get(url)
jsData = data.json()
output_name.append(jsData[i]['solarsystemname'])
return output_name
icdata = PullIncursionData()
print('external data check:')
length = len(icdata)
for i in range(length):
print(icdata[i].constellation_id)
structures.py (custom file)
#dataclass
class Incursion:
constellation_id = int
constellation_name = str
staging = int
staging_name = str
systems_id = list
systems_names = list
region_name = str
status = str
def ___init___(self):
self.constellation_id = -1
self.constellation_name = 'undefined'
self.staging = -1
self.staging_name = 'undefined'
self.systems_id = []
self.systems_names = []
self.region_name = 'undefined'
self.status = 'unknown'
I have written a function that manually creates separate dataframes for each participant in the main dataframe. However, I'm trying to write it so that it's more automated as participants will be added to the dataframe in the future.
My original function:
def separate_participants(main_df):
S001 = main_df[main_df['participant'] == 'S001']
S001.name = "S001"
S002 = main_df[main_df['participant'] == 'S002']
S002.name = "S002"
S003 = main_df[main_df['participant'] == 'S003']
S003.name = "S003"
S004 = main_df[main_df['participant'] == 'S004']
S004.name = "S004"
S005 = main_df[main_df['participant'] == 'S005']
S005.name = "S005"
S006 = main_df[main_df['participant'] == 'S006']
S006.name = "S006"
S007 = main_df[main_df['participant'] == 'S007']
S007.name = "S007"
participants = (S001, S002, S003, S004, S005, S006, S007)
participant_names = (S001.name, S002.name, S003.name, S004.name, S005.name, S006.name, S007.name)
return participants, participant_names
However, when I try and change this I get a KeyError for the name of the participant in the main_df. The code is as follows:
def separate_participants(main_df):
participant_list = list(main_df.participant.unique())
participants = []
for participant in participant_list:
name = participant
temp_df = main_df[main_df[participant] == participant]
name = temp_df
participants.append(name)
return participants
The error I get: KeyError: 'S001'
I can't seem to figure out what I'm doing wrong, that means it works in the old function but not the new one. The length of the objects in the dataframe and the list are the same (4) so there are no extra characters.
Any help/pointers would be greatly appreciated!
Thanks #Iguananaut for the answer:
Your DataFrame has a column named 'participant' but you're indexing it with the value of the variable participant which is presumably not a column in your DataFrame. You probably wanted main_df['participant']. Most likely the KeyError came with a "traceback" leading back to the line temp_df = main_df[main_df[participant] == participant] which suggests you should examine it closely.
I've looked through as many answers on this subject as I could find and all suggested that it's a global - local conflict. I can't see how this would apply in my case but please do explain. Here's the error :
"local variable 'CompletelyUniqueName' referenced before assignment"
and here is the code, a function I call from another script :
def geopixsum(filename):
# register all of the GDAL drivers
gdal.AllRegister()
# Check file type (in this case Geotiff)
if filename.endswith('.tif'):
# open the image
try:
inDs = gdal.Open(filename)
except:
print 'Could not open ',file,'\n'
# get image size
rows = inDs.RasterYSize
cols = inDs.RasterXSize
# read band 1 into data
band1 = inDs.GetRasterBand(1)
data = band1.ReadAsArray(0,0,cols,rows)
# get nodata value
nandat = band1.GetNoDataValue()
sumvals = data[np.where(np.logical_not(data == nandat))]
CompletelyUniqueName = sumvals.sum()
print 'sum = ',CompletelyUniqueName
inDs = None
return CompletelyUniqueName
This code worked when not a function but rather just a script on its own. Again, I know this would make it it seem like a global - local issue but given the name I've assigned the variable I think I've gone to great enough lengths to avoid a conflict.
You should either define default value for CompletelyUniqueName (for case if filename.endswith('.tif') == False)
def geopixsum(filename):
CompletelyUniqueName = 0
if filename.endswith('.tif'):
...
CompletelyUniqueName = sumvals.sum()
return CompletelyUniqueName
Or return inside if statement
def geopixsum(filename):
if filename.endswith('.tif'):
...
CompletelyUniqueName = sumvals.sum()
return CompletelyUniqueName
The simplest fix:
def geopixsum(filename):
CompletelyUniqueName = 0 # or None, or anything you want to return
# if the file is not a tif
# register all of the GDAL drivers
gdal.AllRegister()
# Check file type (in this case Geotiff)
if filename.endswith('.tif'):
# open the image
try:
inDs = gdal.Open(filename)
except:
print 'Could not open ',file,'\n'
# get image size
rows = inDs.RasterYSize
cols = inDs.RasterXSize
# read band 1 into data
band1 = inDs.GetRasterBand(1)
data = band1.ReadAsArray(0,0,cols,rows)
# get nodata value
nandat = band1.GetNoDataValue()
sumvals = data[np.where(np.logical_not(data == nandat))]
CompletelyUniqueName = sumvals.sum()
print 'sum = ',CompletelyUniqueName
inDs = None
return CompletelyUniqueName