Delete features with PyQGIS in a loop - python

I have been for months trying unsuccesfully to make a function which can compare a feature with the other ones in a shp layer and if some conditions fulfill, the feature in the loop is deleted. I need it for my final work. The conditions to delete a feature have to be two: if the feature overlay the other one and if it is older than the feature that is in the loop.
def superpuestos2(capa, fecha, geom, id):
listaborrar = []
fecha_feat = fecha
fechainf_feat = datetime.strptime(fecha_feat, "%d/%m/%Y")
feat_geom = geom
features = capa.getFeatures()
for f in features:
id_feat = f.attribute('id')
if id != id_feat:
fecha_f = f.attribute('fecha')
fechainf_f = datetime.strptime(fecha_f, "%d/%m/%Y")
f_geom = f.geometry()
inters = f_geom.intersection(feat_geom)
areageom = feat_geom.area()
interarea = inters.area()
fraccion = interarea/areageom
if fraccion > 0.3:
if fechainf_feat >= fechainf_f:#si la fecha es mas antigua:
print("intersecta")#en vez de print que borre el feature de f.
listaborrar.append(f.id())
print(listaborrar)
capa.dataProvider().deleteFeatures(listaborrar)

Related

Make a filter using a column from df in another df in python

I'm using this dataframe:
# importando o dataframe
url = 'https://raw.githubusercontent.com/ipauchner/DNC/main/kc_house_data.csv'
df = pd.read_csv(url)
After some analysis I managed to separate 25 lines from this df, the analyzes were as follows:
# encontrando os id's repetidos, ou seja, as casas que foram vendidas mais de 1 vez
id_repetido = pd.concat(g for x, g in df.groupby('id') if len(g) > 1)
id_repetido
# encontrando a primeira venda
venda2 = id_repetido.duplicated(subset=['id'], keep = 'first')
venda1 = id_repetido[~venda2]
venda1 = venda1[venda1.id != 795000620]
venda1
venda2 = id_repetido[venda2]
venda2 = venda2[venda2.id != 795000620]
venda2
venda1['id'].value_counts().sort_values()
venda2['id'].value_counts().sort_values()
lucro_prej = pd.merge_asof(venda1, venda2, on='id') # juntando as informações das vendas
lucro_prej = lucro_prej.loc[:,['id', 'price_x', 'price_y']] # unindo pelo id
lucro_prej = lucro_prej.rename({'price_x': 'primeira_venda'}, axis = 1) # renomeando a coluna
lucro_prej = lucro_prej.rename({'price_y': 'segunda_venda'}, axis = 1) # renomeando a coluna
lucro_prej
lucro_prej['lucro/prejuízo'] = lucro_prej['segunda_venda'] -
lucro_prej['primeira_venda'] # calculando o valor do lucro ou prejuízo
lucro_prej['variação'] = ((lucro_prej['segunda_venda'] - lucro_prej['primeira_venda']) /
lucro_prej['primeira_venda'] * 100).round(decimals = 2) # calculando a % do lucro ou prejuízo
lucro_prej.sort_values(by=['variação'], ascending=False, inplace = True) # ordenando em ordem crescente
lucro_prej
maiores_lucros = lucro_prej.head(25)
maiores_lucros
This generated another df (maiores_lucros) with 25 lines.
What I did was multiple filters on the df originalla(df). For example: bathrooms >=1 and <=3, bedrooms >= 2 and <=4. I got this part with the following code:
lista_casas = df[((df.bedrooms > 2) & (df.bedrooms < 6)) & (df.bathrooms >= 1) & (df.bathrooms <= 3)]
But what I needed was to make a filter so that the id's of the df maiores_lucros do not appear in the lista_casas. I even tried the following way:
id_filtrar = maiores_lucros['id'].tolist()
id_filtrar
lista_casas2 = df[df.id != id_n_filtrar]
lista_casas2
But it returns the following error:
ValueError: ('Lengths must match to compare', (21528,), (25,))
Is there any way to make this filter?

Include precedence in SPT algorithm scheduling

How can I include a job precedence vector within this code?
In the Prec list, I set which job should go before another from the indexes, but how can I do the evaluation of this condition so that I can rearrange the allocation of jobs?
import numpy as np
M = 5
Pj = np.array([99,184,80,180,51,69,129,152,168,171])
Prec = [[9,2],[2,8],[4,2]]
J = np.array(range(len(Pj)))
#Organización de los índices de trabajos por duración
Pj_SI = np.argsort(Pj)
Pj_SA = Pj[Pj_SI]
J_Order = J[Pj_SI]
# print(Pj_SI)
# print(Pj_SA)
print(np.argsort(J_Order))
#SPT HEURISTIC
M_j = {}
M_pj = {}
M_Cj = {}
for m in range(M):
M_j[m] = []
M_pj[m] = []
M_Cj[m] = []
for i, Pj in enumerate(Pj_SA):
M_pj[(i)%M].append(Pj)
M_j[(i)%M].append(J_Order[i])
if i<M:
M_Cj[(i)%M].append(Pj)
else:
M_Cj[(i)%M].append(M_Cj[(i)%M][len(M_Cj[(i)%M])-1]+Pj)
print("Processing Time in Machines", M_pj)
print("Assignment Order", M_j)
Cmax = []
for m in range(len(M_Cj)):
Cmax.append(np.sum(M_pj[m]))
Makespan = max(Cmax)
print('Machines Load: ', Cmax)
print('Makespan SPT:', Makespan)

How I can set a specific cell from excel in python?

I'm doing a function with python where I have to create a matrix in Excel, but for that I need to know how I can manipulate some keyboard request to specific excel cells positions in it to create this matrix with some values in this cells.
The code that I have right now it is here:
import sys
import openpyxl as opxl
def crear_menu():
menu=int(input("Elija una opción \n 1.Crear parámetros \n
2.Aplicar Dijkstra \n 3.Aplicar Kruskal \n 4.Salir"))
if menu == 1:
min_nodos()
elif menu == 2:
dijkstra()
elif menu == 3:
kruskal()
elif menu == 4:
sys.exit()
else:
print("\n ERROR: Elija una opción válida.")
crear_menu()
def crear_matriz_adyacente2(cant_nodos):
lista_nodos = []
lista_matriz = []
lista_filas = []
lista_columnas = []
libro = opxl.Workbook()
pagina = libro.active
pagina.title = "matriz_de_adyacencia"
i = 0
while(i < cant_nodos):
num = str(i+1)
nodo = str(input("Ingresar nombre del nodo " + num + ":"))
if nodo not in lista_nodos:
lista_nodos.append(nodo)
pagina.cell(row = i+2, column = 1, value = nodo)
pagina.cell(row = 1, column = i+2, value = nodo)
i += 1
elif(nodo < 0):
print("ERROR: Nodo no valido")
else:
print("Error: Nodo existente. \n Ingrese otro nombre: ")
libro.save("matriz_de_adyacencia.xlsx")
def min_nodos():
cant_nodos = int(input("Elija la cantidad de nodos a utilizar
(mínimo 6):"))
while(cant_nodos < 6):
print("ERROR: Elija mínimo 6 nodos y que sea entero positivo.")
cant_nodos = int(input("Elija la cantidad de nodos a utilizar (mínimo 6):"))
else:
crear_matriz_adyacente(cant_nodos)
Here in the red box I'm trying to do the matrix, but I don't know the best way to import a specific excel cell. I mean, I don't know if with this I'm referring to A2.
Thank you for your help.

WARN: Tried to pass invalid video frame, marking as broken: Your frame has data type int64, but we require uint8

I am doing some Udemy AI courses and came across with one that "teaches" a bidimensional cheetah how to walk. I was doing the exercises on my computer, but it takes too much time. I decided to use Google Cloud to run the code and see the results some hours after. Nevertheless, when I run the code I get the following error " WARN: Tried to pass
invalid video frame, marking as broken: Your frame has data type int64, but we require uint8 (i.e. RGB values from 0-255)".
After the code is executed, I see into the folder and I don't see any videos (just the meta info).
Some more info (if it helps):
I have a 1 CPU (4g), SSD Ubuntu 16.04 LTS
I have not tried anything yet to solve it because I don´t know what to try. Im looking for solutions on the web, but nothing I could try.
This is the code
import os
import numpy as np
import gym
from gym import wrappers
import pybullet_envs
class Hp():
def __init__(self):
self.nb_steps = 1000
self.episode_lenght = 1000
self.learning_rate = 0.02
self.nb_directions = 32
self.nb_best_directions = 32
assert self.nb_best_directions <= self.nb_directions
self.noise = 0.03
self.seed = 1
self.env_name = 'HalfCheetahBulletEnv-v0'
class Normalizer():
def __init__(self, nb_inputs):
self.n = np.zeros(nb_inputs)
self.mean = np.zeros(nb_inputs)
self.mean_diff = np.zeros(nb_inputs)
self.var = np.zeros(nb_inputs)
def observe(self, x):
self.n += 1.
last_mean = self.mean.copy()
self.mean += (x - self.mean) / self.n
#abajo es el online numerator update
self.mean_diff += (x - last_mean) * (x - self.mean)
#abajo online computation de la varianza
self.var = (self.mean_diff / self.n).clip(min = 1e-2)
def normalize(self, inputs):
obs_mean = self.mean
obs_std = np.sqrt(self.var)
return (inputs - obs_mean) / obs_std
class Policy():
def __init__(self, input_size, output_size):
self.theta = np.zeros((output_size, input_size))
def evaluate(self, input, delta = None, direction = None):
if direction is None:
return self.theta.dot(input)
elif direction == 'positive':
return (self.theta + hp.noise * delta).dot(input)
else:
return (self.theta - hp.noise * delta).dot(input)
def sample_deltas(self):
return [np.random.randn(*self.theta.shape) for _ in range(hp.nb_directions)]
def update (self, rollouts, sigma_r):
step = np.zeros(self.theta.shape)
for r_pos, r_neg, d in rollouts:
step += (r_pos - r_neg) * d
self.theta += hp.learning_rate / (hp.nb_best_directions * sigma_r) * step
def explore(env, normalizer, policy, direction = None, delta = None):
state = env.reset()
done = False
num_plays = 0.
#abajo puede ser promedio de las rewards
sum_rewards = 0
while not done and num_plays < hp.episode_lenght:
normalizer.observe(state)
state = normalizer.normalize(state)
action = policy.evaluate(state, delta, direction)
state, reward, done, _ = env.step(action)
reward = max(min(reward, 1), -1)
#abajo sería poner un promedio
sum_rewards += reward
num_plays += 1
return sum_rewards
def train (env, policy, normalizer, hp):
for step in range(hp.nb_steps):
#iniciar las perturbaciones deltas y los rewards positivos/negativos
deltas = policy.sample_deltas()
positive_rewards = [0] * hp.nb_directions
negative_rewards = [0] * hp.nb_directions
#sacar las rewards en la dirección positiva
for k in range(hp.nb_directions):
positive_rewards[k] = explore(env, normalizer, policy, direction = 'positive', delta = deltas[k])
#sacar las rewards en dirección negativo
for k in range(hp.nb_directions):
negative_rewards[k] = explore(env, normalizer, policy, direction = 'negative', delta = deltas[k])
#sacar todas las rewards para sacar la desvest
all_rewards = np.array(positive_rewards + negative_rewards)
sigma_r = all_rewards.std()
#acomodar los rollauts por el max (r_pos, r_neg) y seleccionar la mejor dirección
scores = {k:max(r_pos, r_neg) for k, (r_pos, r_neg) in enumerate(zip(positive_rewards, negative_rewards))}
order = sorted(scores.keys(), key = lambda x:scores[x])[:hp.nb_best_directions]
rollouts = [(positive_rewards[k], negative_rewards[k], deltas[k]) for k in order]
#actualizar policy
policy.update (rollouts, sigma_r)
#poner el final reward del policy luego del update
reward_evaluation = explore (env, normalizer, policy)
print('Paso: ', step, 'Lejania: ', reward_evaluation)
def mkdir(base, name):
path = os.path.join(base, name)
if not os.path.exists(path):
os.makedirs(path)
return path
work_dir = mkdir('exp', 'brs')
monitor_dir = mkdir(work_dir, 'monitor')
hp = Hp()
np.random.seed(hp.seed)
env = gym.make(hp.env_name)
env = wrappers.Monitor(env, monitor_dir, force = True)
nb_inputs = env.observation_space.shape[0]
nb_outputs = env.action_space.shape[0]
policy = Policy(nb_inputs, nb_outputs)
normalizer = Normalizer(nb_inputs)
train(env, policy, normalizer, hp)
In the end, I think it was either a thing of an old version of ffmpeg or another compatibility issue (this is my first try with linux and I could not update ffmpeg properly). I changed my virtual environment from Ubunto 16.04 to Debian. It worked perfectly.

How to calculate time since last event on a temporal serie?

Working with Python 3.5 and Pandas 0.19.2
I describe my problem: I have in a data frame different "IDActivo" sorted by date and time ascending. Well, I have a field called Result whose values are NaN or 1. I need to calculate for each row how long ago was the last N time where the result field was 1 for that particular "IdActivo".
This is my dataframe:
import pandas as pd
import numpy as np
from datetime import datetime
df = pd.DataFrame({'IdActivo': [1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2],
'Fecha': ['1990-01-02','1990-01-03','1990-01-04','1990-01-05','1990-01-08',\
'1990-01-09','1990-01-10','1990-01-11','1990-01-12' ,'1990-01-15',\
'1990-01-16', '1990-01-17', '1990-01-18','1990-01-19','1990-01-22',\
'1990-01-23 ', '1990-01-24', '1990-01-25','1990-01-26','1990-01-29'],
'Hora': ['10:10:00','10:11:00','10:12:00','10:13:00','10:10:00',\
'10:10:00','10:17:00','10:14:00','11:14:00','12:14:00',\
'10:10:00', '10:20:00', '14:22:00','15:22:00','16:22:00',\
'10:10:00', '00:00:00', '00:00:00','00:00:00','00:00:00']})
def Inicio():
numHoraDia = '10:10:00'
numDia = 2 # para nosotros el 2 será el martes ya que le añadimos +1 al lunes que es 0 por defecto
nomDiasSemanaHora = " Resultado"; inpfield = "Fecha" ; oupfield = "Dia_Semana"
df_final = Fecha_Dia_Hora(df,inpfield,oupfield,numHoraDia,numDia,nomDiasSemanaHora)
print (df_final)
def Fecha_Dia_Hora(df, inpfield, oupfield,numHoraDia,numDia,nomDiasSemanaHora):
ord_df = df.sort_values(by=['IdActivo', 'Fecha'])
ord_df[inpfield] = pd.to_datetime(ord_df[inpfield])
ord_df[oupfield] = ord_df[inpfield].dt.dayofweek + 1
ord_df[nomDiasSemanaHora] = np.NaN
ord_df.ix[np.logical_and(ord_df[oupfield] == numDia, ord_df.Hora == numHoraDia), [nomDiasSemanaHora]] = '1'
return ord_df.sort_index()
def Fin():
print("FIN")
if __name__ == "__main__":
Inicio()
Fin()
I show you an example derivated of the dataframe you can see on the code:
imagen
What functions must I investigate to get it?
Thanks
Angel

Categories