I am trying to scatter an array of strings when I am scattering one char. it is working fine , however when I change to strings it showing an error like that scattering is not able to scatter strings
data = np.array(["mmm","bbbb","css","ddd","e","f","g","h"],dtype=np.str_)
import numpy as np
comm = MPI.COMM_WORLD
size = comm.Get_size() # new: gives number of ranks in comm
rank = comm.Get_rank()
numDataPerRank = 4
data = None
if rank == 0:
data = np.array(["m","b","c","d","e","f","g","h"],dtype=np.str_)
# when size=4 (using -n 4), data = [1.0:40.0]
recvbuf = np.empty(numDataPerRank, dtype=np.str_) # allocate space for recvbuf
comm.Scatter(data, recvbuf, root=0)
print('Rank: ',rank, ', recvbuf received: ',recvbuf)
data = np.array(["m","b","c","d","e","f","g","h"],dtype=np.str_) working with scatter
data = np.array(["mmm","bbbb","css","ddd","e","f","g","h"],dtype=np.str_) is not working
Related
so i have this code :
class distKmeans(beam.DoFn):
#i will do an init function to add the kmeans parameters
def __init__(self, n_clusters,rseed=2):
self.n_clusters = n_clusters
self.rseed = rseed
self.centers = None
#The function "process" implements the main functionality of the K-means algorithm
def process(self,element):
if self.centers is None:
rng = np.random.RandomState(self.rseed)
#we use len instead of shape because element is a PCOLLECTION
i = rng.permutation(element.shape[0])[:self.n_clusters]
self.centers = element[i]
# b1. Calculate the closest center μ to xi
labels = pairwise_distances_argmin(element, self.centers)
# b2. Update the center
new_centers = np.array([element[labels == i].mean(0)
for i in range(self.n_clusters)])
# c.
if np.all(self.centers == new_centers):
return
self.centers = new_centers
yield self.centers, labels
with beam.Pipeline() as pipeline:
mydata = pipeline | beam.Create(X)
mydata = mydata |beam.ParDo(distKmeans(3))
mydata |"write" >> beam.io.WriteToText("sample_data/output.txt")
as i'm trying to create a distributed kmeans with apache beam, my data was generated using this code :
n_samples=200
n_features=2
X, y = make_blobs(n_samples=n_samples,centers=3, n_features=n_features)
data = np.c_[X,y]
plt.scatter(data[:, 0], data[:, 1], s=50);
and then X is :
X = data[['X1','X2']].to_numpy()
X = X[1:]
it shape is (200, 2 )
The code seems correct but i always get the fellowing error even tho my data is a 2d array:
Expected 2D array, got 1D array instead:
array=[-6.03120913 11.30181549].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample. [while running '[54]: ParDo(distKmeans)']
and this error comes in this line :
labels = pairwise_distances_argmin(element, self.centers)
I have the following code below:
import time,board,busio
import numpy as np
import adafruit_mlx90640
import matplotlib.pyplot as plt
print("Initializing MLX90640")
i2c = busio.I2C(board.SCL, board.SDA, frequency=800000) # setup I2C
mlx = adafruit_mlx90640.MLX90640(i2c) # begin MLX90640 with I2C comm
mlx.refresh_rate = adafruit_mlx90640.RefreshRate.REFRESH_2_HZ # set refresh rate
mlx_shape = (24,32)
print("Initialized")
# setup the figure for plotting
plt.ion() # enables interactive plotting
fig,ax = plt.subplots(figsize=(12,7))
therm1 = ax.imshow(np.zeros(mlx_shape),vmin=0,vmax=60) #start plot with zeros
cbar = fig.colorbar(therm1) # setup colorbar for temps
cbar.set_label('Temperature [$^{\circ}$C]',fontsize=14) # colorbar label
frame = np.zeros((24*32,)) # setup array for storing all 768 temperatures
t_array = []
print("Starting loop")
while True:
t1 = time.monotonic()
try:
mlx.getFrame(frame) # read MLX temperatures into frame var
data_array = (np.reshape(frame,mlx_shape)) # reshape to 24x32
therm1.set_data(np.fliplr(data_array)) # flip left to right
therm1.set_clim(vmin=np.min(data_array),vmax=np.max(data_array)) # set bounds
cbar.update_normal(therm1) # update colorbar range
plt.title(f"Max Temp: {np.max(data_array):.1f}C")
plt.pause(0.001) # required
#fig.savefig('mlx90640_test_fliplr.png',dpi=300,facecolor='#FCFCFC', bbox_inches='tight') # comment out to speed up
t_array.append(time.monotonic()-t1)
print('Sample Rate: {0:2.1f}fps'.format(len(t_array)/np.sum(t_array)))
except ValueError:
continue # if error, just read again
And my output is shown below:
Image of output
What I'm trying to do is output the x and y coordinates of the max heat that is detected and store them as variables. They are stored in data_array. I know it should be simple code, but I'm confused. Could someone help me?
I want to update python data which was originally created for a 1d array to process data. I tried different ways but still got errors. if I flatten my 2d data the data loses meaning sing it is voice data. Below is a made-up data and the function to reproduce the error.
x = np.random.normal(0,1,(40,2))
print(cpp_function(x=signal, fs=44100, pitch_range=[75, 300], trendline_quefrency_range=[0.001, 0.05]))
def cpp_function(x, fs, pitch_range, trendline_quefrency_range, smooth=False, time_smooth_len=None, quefrency_smooth_len=None):
"""
Computes cepstral peak prominence for a given signal
Parameters
-----------
x: ndarray
The audio signal
fs: integer
The sampling frequency
pitch_range: list of 2 elements
The pitch range where a peak is searched for
trendline_quefrency_range: list of 2 elements
The quefrency range for which the amplitudes will be modelled by a straight line
Returns
-----------
integer
The cepstral peak prominence of the audio signal
"""
# Cepstrum
x = np.hamming(len(x))*x
spectrum = np.fft.rfft(x)
spectrum = 20*np.log10(np.abs(spectrum))
ceps = np.fft.rfft(spectrum)
ceps = 20*np.log10(np.abs(ceps))
# Smoothing
if smooth == True:
def smooth(y, box_pts):
box = np.ones(box_pts)/box_pts
y_smooth = np.convolve(y, box, mode='same')
return y_smooth
ceps = smooth(ceps.T, time_smooth_len).T
ceps = smooth(ceps, quefrency_smooth_len)
# Quefrency
dt = 1/fs
freq_vector = np.fft.rfftfreq(len(x), d=dt)
df = freq_vector[1] - freq_vector[0]
quefrency_vector = np.fft.rfftfreq(2*ceps.size-2, df)
# Selecting part of cepstrum
quefrency_range = [1/pitch_range[1], 1/pitch_range[0]]
index_range = np.where((quefrency_vector >= quefrency_range[0]) & (quefrency_vector <=quefrency_range[1]))
# For trend line
index_range_tl = np.where((quefrency_vector >= trendline_quefrency_range[0]) & (quefrency_vector <=trendline_quefrency_range[1]))
# Linear regression
linear_regressor = LinearRegression()
linear_regressor.fit(quefrency_vector[index_range_tl].reshape(-1, 1), ceps[index_range_tl].reshape(-1, 1))
Y_pred = linear_regressor.predict(quefrency_vector.reshape(-1, 1))
# CPP
peak_value = np.max(ceps[index_range])
peak_index = np.argmax(ceps[index_range])
cpp = peak_value - Y_pred[index_range][peak_index][0]
return cpp
I want to calculate the single channel data (in order to calculate the audio cross correlation between the channel 1 and channel 4) of this code:
import time
import numpy as np
import pyaudio
import scipy
from scipy import signal, fftpack
pyaud = pyaudio.PyAudio()
#open the stream
stream = pyaud.open(
format = pyaudio.paInt16,
channels = 4,
rate = 16000,
input_device_index = 4,
output = False,
input = True,
frames_per_buffer=2048,)
while True:
rawsamps = stream.read(2048)
samps = np.fromstring(rawsamps, dtype=np.int16)
frames_per_buffer_length = len(samps) / 4 #(channels)
assert frames_per_buffer_length == int(frames_per_buffer_length)
samps = np.reshape(samps, (frames_per_buffer_length, 4)) #4 channels
Assuming that the raw data is interleaved.
This is the function i need to use :
signal.correlate(n1, n2, mode='full')
how can I create an array of data for each channel in order to use the correlate function? are the last lines of the code correct?
Thank you
I found the answer, using print loudness(samps[:,0]), loudness(samps[:,3]). It print in the shell " mic 1 loudness , mic 4 loudness"
I am tring to write a MPI-based code to do some calculation using python and MPI4py. However, following the example, I CANNOT scatter a numpy vector into cores. Here is the code and errors, is there anyone can help me? Thanks.
import numpy as np
from mpi4py import MPI
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
n = 6
if rank == 0:
d1 = np.arange(1, n+1)
split = np.array_split(d1, size)
split_size = [len(split[i]) for i in range(len(split))]
split_disp = np.insert(np.cumsum(split_size), 0, 0)[0:-1]
else:
#Create variables on other cores
d1 = None
split = None
split_size = None
split_disp = None
split_size = comm.bcast(split_size, root = 0)
split_disp = comm.bcast(split_disp, root = 0)
d1_local = np.zeros(split_size[rank])
comm.Scatterv([d1, split_size, split_disp, MPI.DOUBLE], d1_local, root=0)
print('rank ', rank, ': ', d1_local)
And the error result is:
rank 2 : [ 2.47032823e-323]
rank 3 : [ 2.96439388e-323]
rank 0 : [ 4.94065646e-324 9.88131292e-324]
rank 1 : [ 1.48219694e-323 1.97626258e-323]
Thanks.
The data type is not correct. I should specify the type of the array:
d1 = np.arange(1, n+1, dtype='float64')