Understanding neural networks architecture visually - python

I am following this book and I am trying to visualize the network.
This part seems tricky to me and I am trying to get my head around it by visualizing it:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data
nnfs.init()
class Layer_Dense:
# Layer initialization
def __init__(self, n_inputs, n_neurons):
# Initialize weights and biases
self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
self.biases = np.zeros((1, n_neurons))
# Forward pass
def forward(self, inputs):
# Calculate output values from inputs, weights and biases
self.output = np.dot(inputs, self.weights) + self.biases
# ReLU activation
class Activation_Relu():
# forward pass
def forward(self, inputs):
# calculate output values from inputs
self.output = np.maximum(0,inputs)
create dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)
# create ReLU activation which will be used with Dense layer
activation1 = Activation_Relu()
# create second dense layer with 3 input features from the previous layer and 3 output values
dense2 = Layer_Dense(3,3)
# create dataset
X, y = spiral_data(samples = 100, classes = 3)
dense1.forward(X)
activation1.forward(dense1.output)
dense2.forward(activation1.output)
My input data X is an array of 300 rows and 2 columns, meaning each of my 300 inputs will have 2 values that describes it.
The Layer_Dense class is initialized with parameters (2, 3) meaning that there are 2 inputs and 3 neurons.
At the moment my variables look like this:
X.shape # (300, 2)
x[:5]
# [[ 0. , 0. ],
# [ 0.00279763, 0.00970586],
# [-0.01122664, 0.01679536],
# [ 0.02998079, 0.0044075 ],
# [-0.01222386, 0.03851056]]
dense1.weights.shape
# (2, 3)
dense1.weights
# [[0.00862166, 0.00508044, 0.00461094],
# [0.00965116, 0.00796512, 0.00558731]])
dense1.biases
# [[0., 0., 0.]]
dense1.output.shape
(300, 3)
print(dense1.output[:5])
# [[0.0000000e+00 0.0000000e+00 0.0000000e+00]
# [8.0659374e-05 4.3710388e-05 6.5012209e-05]
# [1.5923499e-04 6.9124777e-05 1.0470775e-04]
# [2.3033096e-04 1.9152602e-04 2.7749798e-04]
# [1.9318146e-04 3.1980115e-04 4.5189835e-04]]
Does this configuration make my network look like so:
Where each of 300 inputs has 2 features
Or like so:
Do I understand this correctly:
There are 300 inputs with 2 features each
Each input is connected to 3 neurons in first layer, since its connected to 3 neurons there are 3 weights
Why the shape of weights is (2, 3) instead of (300, 3) since there are 300 inputs with 2 features each, each feature connected to 3 neurons
I have used this to draw networks.

Related

Pretrained VGG16 with 2 input features [duplicate]

Suppose I want to have the general neural network architecture:
Input1 --> CNNLayer
\
---> FCLayer ---> Output
/
Input2 --> FCLayer
Input1 is image data, input2 is non-image data. I have implemented this architecture in Tensorflow.
All pytorch examples I have found are one input go through each layer. How can I define forward func to process 2 inputs separately then combine them in a middle layer?
By "combine them" I assume you mean to concatenate the two inputs.
Assuming you concat along the second dimension:
import torch
from torch import nn
class TwoInputsNet(nn.Module):
def __init__(self):
super(TwoInputsNet, self).__init__()
self.conv = nn.Conv2d( ... ) # set up your layer here
self.fc1 = nn.Linear( ... ) # set up first FC layer
self.fc2 = nn.Linear( ... ) # set up the other FC layer
def forward(self, input1, input2):
c = self.conv(input1)
f = self.fc1(input2)
# now we can reshape `c` and `f` to 2D and concat them
combined = torch.cat((c.view(c.size(0), -1),
f.view(f.size(0), -1)), dim=1)
out = self.fc2(combined)
return out
Note that when you define the number of inputs to self.fc2 you need to take into account both out_channels of self.conv as well as the output spatial dimensions of c.

PyTorch: Sizes of tensors must match on 2 input neural network

I am attempting to recreate a 2 input neural network from this article: https://towardsdatascience.com/moving-from-keras-to-pytorch-f0d4fff4ce79
I have copied the network described in the post and adjusted it so that it fits my data. The first input is from GloVe Word embeddings while the other is numerical features about the text data.
class Net(nn.Module):
def __init__(self,hidden_size,lin_size, embedding_matrix=embedding_weights):
super(Alex_NeuralNet_Meta, self).__init__()
# Initialize some parameters for your model
self.hidden_size = hidden_size
drp = 0.1
# Layer 1: Embeddings.
self.embedding = nn.Embedding(size_of_vocabulary, pretrained_embedding_dim)
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
self.embedding.weight.requires_grad = False
# Layer 2: Dropout1D(0.1)
self.embedding_dropout = nn.Dropout2d(0.1)
# Layer 3: Bidirectional CuDNNLSTM
self.lstm = nn.LSTM(pretrained_embedding_dim, hidden_size, bidirectional=True, batch_first=True)
# Layer 4: Bidirectional CuDNNGRU
self.gru = nn.GRU(hidden_size*2, hidden_size, bidirectional=True, batch_first=True)
# Layer 7: A dense layer
self.linear = nn.Linear(hidden_size*6 + X2_train.shape[1], lin_size)
self.relu = nn.ReLU()
# Layer 8: A dropout layer
self.dropout = nn.Dropout(drp)
# Layer 9: Output dense layer with one output for our Binary Classification problem.
self.out = nn.Linear(lin_size, 1)
def forward(self, x):
'''
here x[0] represents the first element of the input that is going to be passed.
We are going to pass a tuple where first one contains the sequences(x[0])
and the second one is a additional feature vector(x[1])
'''
h_embedding = self.embedding(x[0].long())
h_embedding = torch.squeeze(self.embedding_dropout(torch.unsqueeze(h_embedding, 0)))
#print("emb", h_embedding.size())
h_lstm, _ = self.lstm(h_embedding)
# print("lst",h_lstm.size())
h_gru, hh_gru = self.gru(h_lstm)
hh_gru = hh_gru.view(-1, 2*self.hidden_size )
print("gru", h_gru.size())
print("h_gru", hh_gru.size())
# Layer 5: is defined dynamically as an operation on tensors.
avg_pool = torch.mean(h_gru, 1)
max_pool, _ = torch.max(h_gru, 1)
print("avg_pool", avg_pool.size())
print("max_pool", max_pool.size())
# the extra features you want to give to the model
f = torch.tensor(x[1], dtype=torch.float).cuda()
print("f", f.size())
# Layer 6: A concatenation of the last state, maximum pool, average pool and
# additional features
conc = torch.cat(( hh_gru, avg_pool, max_pool, f), 1)
#print("conc", conc.size())
# passing conc through linear and relu ops
conc = self.relu(self.linear(conc))
conc = self.dropout(conc)
out = self.out(conc)
# return the final output
return out
And during runtime I get an error on the concatenation line:
RuntimeError: Sizes of tensors must match except in dimension 0. Got 33164 and 20 (The offending index is 0)
From the dimensions of the outputs, I can see where the problem lies but I am not sure how I can fix it
The data inputs to the network is:
torch.Size([20, 150])
torch.Size([33164, 40])
The sizes of each layer output is:
gru torch.Size([20, 150, 80])
h_gru torch.Size([20, 80])
avg_pool torch.Size([20, 80])
max_pool torch.Size([20, 80])
f torch.Size([33164, 40])
For the example above the batch size is 20, hidden_size is 40, the number of rows in numerical data features is 33164 and its feature size is 40.
Thanks for any help in advance

R: Error while fitting deep learning model for Anomaly Detection in Time Series

I found an example for DeepAnT (A Deep Learning Approach for Unsupervised Anomaly Detection in Time Series) and wanted to try it out.
So I tried to convert the Python script (https://github.com/swlee23/deep-learning-time-series-anomaly-detection/blob/master/deep-ant-main.ipynb) to R and got stuck because I get an error when fitting the model.
Code:
library(ggplot2)
library(rlist)
library(keras)
library(tensorflow)
library(readr)
### Anomaly Detection ###
anomaly_detector <- function(prediction_seq, ground_truth_seq){
# calculate Euclidean between actual seq and predicted seq
dist <- norm(ground_truth_seq - prediction_seq, type = "F")
if (dist > anm_det_thr){
return (TRUE) # anomaly
}
else{
return (FALSE) # normal
}
}
### Hyperparameters ###
w = 2000 # History window (number of time stamps taken into account)
# i.e., filter(kernel) size
p_w = 300 # Prediction window (number of time stampes required to be
# predicted)
n_features = 1 # Univariate time series
kernel_size = 2 # Size of filter in conv layers
num_filt_1 = 32 # Number of filters in first conv layer
num_filt_2 = 32 # Number of filters in second conv layer
num_nrn_dl = 40 # Number of neurons in dense layer
num_nrn_ol = p_w # Number of neurons in output layer
conv_strides = 1
pool_size_1 = 2 # Length of window of pooling layer 1
pool_size_2 = 2 # Length of window of pooling layer 2
pool_strides_1 = 2 # Stride of window of pooling layer 1
pool_strides_2 = 2 # Stride of window of pooling layer 2
epochs = 30
dropout_rate = 0.5 # Dropout rate in the fully connected layer
learning_rate = 2e-5
anm_det_thr = 0.8 # Threshold for classifying anomaly (0.5~0.8)
# Loading data
df_sine <- read_csv('https://raw.githubusercontent.com/swlee23/Deep-Learning-Time-Series-Anomaly-Detection/master/data/sinewave.csv')
### Data preprocessing ###
# split a univariate sequence into samples
split_sequence <- function(sequence){
X <- list()
y <- list()
for(i in 1:length(sequence)){
# find the end of this pattern
end_ix <- i + w
out_end_ix <- end_ix + p_w
# check if we are beyond the sequence
if (out_end_ix > length(sequence)){
# print("if-break")
break
}
# gather input and output parts of the pattern
seq_x <- list(sequence[i:end_ix])
seq_y <- list(sequence[end_ix:out_end_ix])
X[length(X)+1] <- seq_x
y[length(y)+1] <- seq_y
}
result <- list("x" = X, "y" = y)
return(result)
}
# define input sequence
raw_seq = df_sine['sinewave'][[1]]
# split into samples
batch_ <- split_sequence(raw_seq)
batch_sample <- batch_$x
batch_label <-batch_$y
# tried to convert to matrix (did not change anything)
batch_sample_2 <- matrix(unlist(batch_sample), ncol = 2001, byrow = TRUE)
batch_label_2 <- matrix(unlist(batch_label), ncol = 301, byrow = TRUE)
### Generate model for predictor ###
model <- keras_model_sequential() %>%
layer_conv_1d(filters = num_filt_1,
kernel_size = kernel_size,
strides = conv_strides,
padding = 'valid',
activation = 'relu',
input_shape = c(w,n_features)
) %>% layer_max_pooling_1d(
pool_size = pool_size_1
)%>% layer_conv_1d(
filters = num_filt_2,
kernel_size = kernel_size,
strides = conv_strides,
padding = 'valid',
activation = 'relu'
) %>% layer_max_pooling_1d(
pool_size = pool_size_2
)
# Flatten tensor into a batch of vectors
# Input Tensor Shape: [batch_size, 0.25 * w, num_filt_1 * num_filt_2]
# Output Tensor Shape: [batch_size, 0.25 * w * num_filt_1 * num_filt_2]
model <- model %>% layer_flatten()
# Dense Layer (Output layer)
# Densely connected layer with 1024 neurons
# Input Tensor Shape: [batch_size, 0.25 * w * num_filt_1 * num_filt_2]
# Output Tensor Shape: [batch_size, 1024]
model <- model %>% layer_dense(units = num_nrn_dl, activation = 'relu')
# Dropout
# Prevents overfitting in deep neural networks
model <- model %>% layer_dropout(rate = dropout_rate)
# Output layer
# Input Tensor Shape: [batch_size, 1024]
# Output Tensor Shape: [batch_size, p_w]
model <- model %>% layer_dense(units = num_nrn_ol)
# Summarize model structure
summary(model)
### Configure model ###
model <- model %>% compile(optimizer = 'adam',
loss = 'mean_absolute_error')
### Training ###
model_fit <- model %>% fit(batch_sample_2,
batch_label_2,
epochs = epochs,
verbose = 1)
The error I get:
Error in py_call_impl(callable, dots$args, dots$keywords) :
ValueError: Error when checking input: expected conv1d_6_input to have 3 dimensions, but got array with shape (2701, 2001)
In Python the batch_sample looks like this:
[[0.8737364 ]
[0.90255357]
[0.92780878]
...
[0.7671179 ]
[0.80588467]
[0.84147098]]
The batch_label like this:
[[0.84147098 0.8737364 0.90255357 ... 0.72532366 0.7671179 0.80588467]
[0.8737364 0.90255357 0.92780878 ... 0.7671179 0.80588467 0.84147098]
[0.90255357 0.92780878 0.94940235 ... 0.80588467 0.84147098 0.8737364 ]
...
[0.80588467 0.84147098 0.8737364 ... 0.68066691 0.72532366 0.7671179 ]
[0.84147098 0.8737364 0.90255357 ... 0.72532366 0.7671179 0.80588467]
[0.8737364 0.90255357 0.92780878 ... 0.7671179 0.80588467 0.84147098]]
So I need to reshape the data for the fitting process but everything I try does not work. Tried lists of list (like it is in Python) and matrices and vectors but everytime the dimension error accurs.
How do I need to shape the data so it matches requirements for the fitting function ?
Your input shape is incorrect. Model is expecting 3 dims input of shape: (n_samples, window_size, n_features).
In your code n_features equals to 1 so all you need to do is to reshape your current input shape into the shape that your model expects (based on input configuration) so:
(2701, 2001) -> (2701, 2001, 1)
In order to do that you can use function like expand_dims from listarrays
expand_dims(x, -1)

How much deep a Neural Network Required for 12 inputs of ranging from -5000 to 5000 in a3c Reinforcement Learning

I am trying to use A3C with LSTM for an environment where states has 12 inputs ranging from -5000 to 5000.
I am using an LSTM layer of size 12 and then 2 fully connected hidden layers of size 256, then 1 fc for 3 action dim and 1 fc for 1 value function.
The reward is in range (-1,1).
However during initial training I am unable to get good results.
My question is- Is this Neural Network good enough for this kind of environment.
Below is the code for Actor Critic
class ActorCritic(torch.nn.Module):
def __init__(self, params):
super(ActorCritic, self).__init__()
self.state_dim = params.state_dim
self.action_space = params.action_dim
self.hidden_size = params.hidden_size
state_dim = params.state_dim
self.lstm = nn.LSTMCell(state_dim, state_dim)
self.lstm.bias_ih.data.fill_(0)
self.lstm.bias_hh.data.fill_(0)
lst = [state_dim]
for i in range(params.layers):
lst.append(params.hidden_size)
self.hidden = nn.ModuleList()
for k in range(len(lst)-1):
self.hidden.append(nn.Linear(lst[k], lst[k+1]))
for layer in self.hidden:
layer.apply(init_weights)
self.critic_linear = nn.Linear(params.hidden_size, 1)
self.critic_linear.apply(init_weights)
self.actor_linear = nn.Linear(params.hidden_size, self.action_space)
self.actor_linear.apply(init_weights)
self.train()
def forward(self, inputs):
inputs, (hx, cx) = inputs
inputs = inputs.reshape(1,-1)
hx, cx = self.lstm(inputs, (hx, cx))
x = hx
for layer in self.hidden:
x = torch.tanh(layer(x))
return self.critic_linear(x), self.actor_linear(x), (hx, cx)
class Params():
def __init__(self):
self.lr = 0.0001
self.gamma = 0.99
self.tau = 1.
self.num_processes = os.cpu_count()
self.state_dim = 12
self.action_dim = 3
self.hidden_size = 256
self.layers = 2
self.epochs = 10
self.lstm_layers = 1
self.lstm_size = self.state_dim
self.num_steps = 20
self.window = 50
Since you have 12 inputs so make sure you dont use too many parameters, also try changing activation function.
i dont use Torch so i can not understand model architecture.
why your first layer is LSTM? is your data time series?
try using only Dense layer,
1 Dense only with 12 neurons and output layer
2 Dense Layers with 12 neurons each and output layer
As for activation function use leaky relu, since your data is -5000, or you can make your data positive only by adding 5000 to all data samples.

How to construct a network with two inputs in PyTorch

Suppose I want to have the general neural network architecture:
Input1 --> CNNLayer
\
---> FCLayer ---> Output
/
Input2 --> FCLayer
Input1 is image data, input2 is non-image data. I have implemented this architecture in Tensorflow.
All pytorch examples I have found are one input go through each layer. How can I define forward func to process 2 inputs separately then combine them in a middle layer?
By "combine them" I assume you mean to concatenate the two inputs.
Assuming you concat along the second dimension:
import torch
from torch import nn
class TwoInputsNet(nn.Module):
def __init__(self):
super(TwoInputsNet, self).__init__()
self.conv = nn.Conv2d( ... ) # set up your layer here
self.fc1 = nn.Linear( ... ) # set up first FC layer
self.fc2 = nn.Linear( ... ) # set up the other FC layer
def forward(self, input1, input2):
c = self.conv(input1)
f = self.fc1(input2)
# now we can reshape `c` and `f` to 2D and concat them
combined = torch.cat((c.view(c.size(0), -1),
f.view(f.size(0), -1)), dim=1)
out = self.fc2(combined)
return out
Note that when you define the number of inputs to self.fc2 you need to take into account both out_channels of self.conv as well as the output spatial dimensions of c.

Categories