How to compute inverse of a wave sound file in Python? - python

I am trying to compute inverse of a 16 bit mono wave file(8000Hz) using Python without using almost any external module except wave which is used to read samples from the wave file. I've tried reading all the 16 bit samples and computing 0xFFFF - sample for each sample and writing that to another wave file. But when I mix the two audio files together, they don't silence each other. I've read some stuff on the internet about DFT, FFT and such these but I don't understand much from them even through I've got a strong mathematics background from highschool. Any hint is appreciated. My code:
import wave
from dataclasses import dataclass
from typing import List
#dataclass
class SoundClip16b:
data: List["int"]
def __len__(self):
return len(self.data)
def inverted(self):
inv = list()
for d in self.data:
inv.append(0xFFFF - d)
return SoundClip16b(data=inv)
def to_bytes(self):
databyte = b""
for i in self.data:
b1 = i >> 8
b2 = i & 0xFF
databyte += bytes([b1, b2])
return databyte
#staticmethod
def from_bytes(bytedata):
data = list()
for index, byte in enumerate(bytedata[:-1:2]):
next_byte = bytedata[index+1]
data.append((byte << 8) + next_byte)
return SoundClip16b(data=data)
w_noise = wave.open("noise.wav")
w_antinoise = wave.open("antinoise.wav", "w")
clip = SoundClip16b.from_bytes(w_noise.readframes(-1))
inverted_clip = clip.inverted()
w_antinoise.setnchannels(1)
w_antinoise.setsampwidth(2)
w_antinoise.setframerate(8000)
w_antinoise.writeframes(inverted_clip.to_bytes())
w_antinoise.close()
Edit: I also have tried by assuming that the 16 bit samples are signed integers rather than unsigned ones.

Thanks to Jan, the problem was with endianess. The wave was little endian while my code was assuming it was big endian. This code worked for me right:
import wave
import struct
from dataclasses import dataclass
from typing import List
#dataclass
class SoundClip16b:
data: List["int"]
def __len__(self):
return len(self.data)
def inverted(self):
inv = list()
for d in self.data:
inv.append(~d)
return SoundClip16b(data=inv)
def to_bytes(self):
databyte = b""
for d in self.data:
databyte += struct.pack("<h", d)
return databyte
#staticmethod
def from_bytes(bytedata):
return SoundClip16b(
data=[x[0] for x in struct.iter_unpack("<h", bytedata)]
)
w_noise = wave.open("noise.wav")
w_antinoise = wave.open("antinoise.wav", "w")
clip = SoundClip16b.from_bytes(w_noise.readframes(-1))
inverted_clip = clip.inverted()
w_antinoise.setnchannels(1)
w_antinoise.setsampwidth(2)
w_antinoise.setframerate(8000)
w_antinoise.writeframes(inverted_clip.to_bytes())
w_antinoise.close()

Related

Getting TypeError: object of type 'int' has no len() when using struct.pack(), but only in a class

I'm getting this 'TypeError: object of type 'int' has no len()' for a data conversion function that I am trying to integrating into a class. Outside of the class it works with no errors, put it into the class, or try to access it inside of the class, and I'm getting the same error.
The program I'm writing sends int/float type data between two rf/lora nodes. (This is just a portion of a larger telemetry program) I am converting the data to byte arrays using struct.pack() in order to send them. Code is written in CircuitPython using Adafruit hardware (Feather m4 & lora breakout board) and libraries.
Any suggestion on how to fix the issue or what's causing it would be greatly appreciated!
Code Below:
class TransmitState(State):
def __init__(self):
super().__init__()
self.entered = time.monotonic()
self.rf = adafruit_rfm9x.RFM9x(spi, cs, reset, radio_freq)
self.rf.tx_power = 23
self.rf.enable_crc = True
self.rf.node = 1
self.rf.destination = 2
led.value = True
# converts data to byte array - line error on ba = ...
def pack_data(self, data_to_pack):
# First encode the number of data items, then the actual items
ba = struct.pack("!I" + "d" * len(data_to_pack), # <--- Line that's causing the error
len(data_to_pack), *data_to_pack)
return ba
# sendData sends the converted data to our 2nd node
def sendData(self, data):
# checks to see if data is null
if data:
ts = self.pack_data(data) # <--- Call to convert fcn
self.rf.send(ts)
#property
def name(self):
return 'transmit'
def enter(self, machine):
State.enter(self, machine)
self.entered = time.monotonic()
led.value = True
print("Entering Transmit")
def exit(self, machine):
print("Exiting Transmit")
State.exit(self, machine)
def update(self, machine):
if State.update(self, machine):
now = time.monotonic()
if now - self.entered >= 1.0:
led.value = False
print("Transmitting...")
self.sendData(3) # <-- FCN Call, 3 is arbitrary
machine.go_to_state('idle')
Note: Import and pin assignments not shown
That error is because you are trying to get the length of an int.
len() attribute is only for iterable types like dict, list, strings,sets and tuples.
Try :
ba = struct.pack("!I" + "d" * len(str(data_to_pack)), len(str(data_to_pack)), *data_to_pack):
What this will do is convert the data into string and then get the length.

Streaming upload of a slice of a (large) file using python-requests

I need to do a streaming upload (i.e., not load the full file part in memory) of a slice of a large (multi-GB) file, using python-requests.
I've looked around in the doc and on Stack Overflow, and haven't found a working way to do it (again, without loading the full slice in memory).
Here's the code I have:
class FileSlice(AbstractContextManager):
"""
File-like object that only reads a slice of a file
Inspired by stackoverflow.com/a/29838711/593036, but actually works.
"""
def __init__(self, filepath: str, seek_from: int, read_limit: int):
self.filepath = filepath
self.seek_from = seek_from
self.read_limit = read_limit
self.n_seen = 0
def __enter__(self):
self.f = open(self.filepath, "rb")
self.f.seek(self.seek_from)
return self
def __len__(self):
total_length = os.fstat(self.f.fileno()).st_size
return min(self.read_limit, total_length - self.seek_from)
def read(self, n=-1):
if self.n_seen >= self.read_limit:
return b""
remaining_amount = self.read_limit - self.n_seen
n_to_read = remaining_amount if n < 0 else min(n, remaining_amount)
self.n_seen += n_to_read
return self.f.read(n_to_read)
def __iter__(self):
yield self.read(n=io.DEFAULT_BUFFER_SIZE)
def __exit__(self, *args):
self.f.close()
Then the actual request:
with FileSlice(filepath, seek_from=i * chunk_size, read_limit=chunk_size) as data:
r = requests.put(presigned_url, data=data)
r.raise_for_status()
This seems pretty complex, so I'm wondering:
if I'm missing a simpler way
if my approach is correct.
Thank you!

Using a custom object in pandas.read_csv()

I am interested in streaming a custom object into a pandas dataframe. According to the documentation, any object with a read() method can be used. However, even after implementing this function I am still getting this error:
ValueError: Invalid file path or buffer object type: <class '__main__.DataFile'>
Here is a simple version of the object, and how I am calling it:
class DataFile(object):
def __init__(self, files):
self.files = files
def read(self):
for file_name in self.files:
with open(file_name, 'r') as file:
for line in file:
yield line
import pandas as pd
hours = ['file1.csv', 'file2.csv', 'file3.csv']
data = DataFile(hours)
df = pd.read_csv(data)
Am I missing something, or is it just not possible to use a custom generator in Pandas? When I call the read() method it works just fine.
EDIT:
The reason I want to use a custom object rather than concatenating the dataframes together is to see if it is possible to reduce memory usage. I have used the gensim library in the past, and it makes it really easy to use custom data objects, so I was hoping to find some similar approach.
One way to make a file-like object in Python3 by subclassing io.RawIOBase.
And using Mechanical snail's iterstream,
you can convert any iterable of bytes into a file-like object:
import tempfile
import io
import pandas as pd
def iterstream(iterable, buffer_size=io.DEFAULT_BUFFER_SIZE):
"""
http://stackoverflow.com/a/20260030/190597 (Mechanical snail)
Lets you use an iterable (e.g. a generator) that yields bytestrings as a
read-only input stream.
The stream implements Python 3's newer I/O API (available in Python 2's io
module).
For efficiency, the stream is buffered.
"""
class IterStream(io.RawIOBase):
def __init__(self):
self.leftover = None
def readable(self):
return True
def readinto(self, b):
try:
l = len(b) # We're supposed to return at most this much
chunk = self.leftover or next(iterable)
output, self.leftover = chunk[:l], chunk[l:]
b[:len(output)] = output
return len(output)
except StopIteration:
return 0 # indicate EOF
return io.BufferedReader(IterStream(), buffer_size=buffer_size)
class DataFile(object):
def __init__(self, files):
self.files = files
def read(self):
for file_name in self.files:
with open(file_name, 'rb') as f:
for line in f:
yield line
def make_files(num):
filenames = []
for i in range(num):
with tempfile.NamedTemporaryFile(mode='wb', delete=False) as f:
f.write(b'''1,2,3\n4,5,6\n''')
filenames.append(f.name)
return filenames
# hours = ['file1.csv', 'file2.csv', 'file3.csv']
hours = make_files(3)
print(hours)
data = DataFile(hours)
df = pd.read_csv(iterstream(data.read()), header=None)
print(df)
prints
0 1 2
0 1 2 3
1 4 5 6
2 1 2 3
3 4 5 6
4 1 2 3
5 4 5 6
The documentation mentions the read method but it's actually checking if it's a is_file_like argument (that's where the exception is thrown). That function is actually very simple:
def is_file_like(obj):
if not (hasattr(obj, 'read') or hasattr(obj, 'write')):
return False
if not hasattr(obj, "__iter__"):
return False
return True
So it also needs an __iter__ method.
But that's not the only problem. Pandas requires that it actually behaves file-like. So the read method should accept an additional argument for the number of bytes (so you can't make read a generator - because it has to be callable with 2 arguments and should return a string).
So for example:
class DataFile(object):
def __init__(self, files):
self.data = """a b
1 2
2 3
"""
self.pos = 0
def read(self, x):
nxt = self.pos + x
ret = self.data[self.pos:nxt]
self.pos = nxt
return ret
def __iter__(self):
yield from self.data.split('\n')
will be recognized as valid input.
However it's harder for multiple files, I hoped that fileinput could have some appropriate routines but it doesn't seem like it:
import fileinput
pd.read_csv(fileinput.input([...]))
# ValueError: Invalid file path or buffer object type: <class 'fileinput.FileInput'>
How about this alternative approach:
def get_merged_csv(flist, **kwargs):
return pd.concat([pd.read_csv(f, **kwargs) for f in flist], ignore_index=True)
df = get_merged_csv(hours)

Stacking filters around a file object

I would like to stack filters around an open() function. These filters are supposed, for example, to change every encountered a characters into b in the stream read from the file.
For example, here is a code sample:
def filter (stream):
for line in stream:
yield line.replace('a', 'b')
def add_filter(filter, file):
return io.TextIOWrapper(filter(file))
def processing_file(f):
import sys
for line in f:
sys.stdout.write("aa: " + line)
f = open('./example.txt', 'r')
f = add_filter(filter, f)
processing_file(f)
I guess that the filter_a() function should return a TextIOWrapper to mimic the result of an open() function. But, I keep having the following error message:
AttributeError: 'generator' object has no attribute 'readable'
In fact, I understand the error, but I do not know how to work around and make it work properly.
You can iterate directly over the filter generator:
with open('./example.txt', 'r') as f:
for line in filter(f):
sys.stdout.write("aa: " + line)
I came with a solution to my own question... I, first, have to admit that my question was not totally well formed and may have lacked of precision. So, I do not blame anybody to have discarded it.
My original intention was to come out with a stackable framework of filters over a stream (open()). Trying to make it easy to use, also.
I mainly found inspiration in this answer on StackOverflow which was solving about 90% of my problem.
So, imagine we have two filters (which are coded as generators):
def tab_filter(stream):
for line in stream:
yield line.replace ('\t', ' ' * 8)
def a_filter(stream):
for line in stream:
yield line.replace ('a', 'z')
Then, we have this class allowing to wrap a generator inside a stream:
class IterStream(object):
"File-like streaming iterator."
def __init__(self, generator):
self.generator = generator
self.iterator = iter(generator)
self.leftover = ''
def __len__(self):
return self.generator.__len__()
def __iter__(self):
return self.iterator
def next(self):
return self.iterator.next()
def read(self, size):
data = self.leftover
count = len(self.leftover)
try:
while count < size:
chunk = self.next()
data += chunk
count += len(chunk)
except StopIteration:
self.leftover = ''
return data
if count > size:
self.leftover = data[size:]
return data[:size]
Using it in the code will be as follow:
import sys
f = IterStream(a_filter(IterStream(tab_filter(open('Example.txt', 'r')))))
for line in f:
sys.stdout.write("aa: " + line)
But, this is not yet totally satisfactory because we need a lot of useless function stacking. So, I decided to wrap it inside a decorator:
def streamfilter(filter):
def stream(iostream):
return IterStream(filter(iostream))
return stream
#streamfilter
def tab_filter(stream):
for line in stream:
yield line.replace ('\t', ' ' * 8)
#streamfilter
def a_filter(stream):
for line in stream:
yield line.replace ('a', 'z')
Then, using the code is much easier now:
import sys
f = a_filter(tab_filter(open('Example.txt', 'r')))
for line in f:
sys.stdout.write("aa: " + line)
I hope that some of you will find this few lines useful.

Python: Creating a streaming gzip'd file-like?

I'm trying to figure out the best way to compress a stream with Python's zlib.
I've got a file-like input stream (input, below) and an output function which accepts a file-like (output_function, below):
with open("file") as input:
output_function(input)
And I'd like to gzip-compress input chunks before sending them to output_function:
with open("file") as input:
output_function(gzip_stream(input))
It looks like the gzip module assumes that either the input or the output will be a gzip'd file-on-disk… So I assume that the zlib module is what I want.
However, it doesn't natively offer a simple way to create a stream file-like… And the stream-compression it does support comes by way of manually adding data to a compression buffer, then flushing that buffer.
Of course, I could write a wrapper around zlib.Compress.compress and zlib.Compress.flush (Compress is returned by zlib.compressobj()), but I'd be worried about getting buffer sizes wrong, or something similar.
So, what's the simplest way to create a streaming, gzip-compressing file-like with Python?
Edit: To clarify, the input stream and the compressed output stream are both too large to fit in memory, so something like output_function(StringIO(zlib.compress(input.read()))) doesn't really solve the problem.
It's quite kludgy (self referencing, etc; just put a few minutes writing it, nothing really elegant), but it does what you want if you're still interested in using gzip instead of zlib directly.
Basically, GzipWrap is a (very limited) file-like object that produces a gzipped file out of a given iterable (e.g., a file-like object, a list of strings, any generator...)
Of course, it produces binary so there was no sense in implementing "readline".
You should be able to expand it to cover other cases or to be used as an iterable object itself.
from gzip import GzipFile
class GzipWrap(object):
# input is a filelike object that feeds the input
def __init__(self, input, filename = None):
self.input = input
self.buffer = ''
self.zipper = GzipFile(filename, mode = 'wb', fileobj = self)
def read(self, size=-1):
if (size < 0) or len(self.buffer) < size:
for s in self.input:
self.zipper.write(s)
if size > 0 and len(self.buffer) >= size:
self.zipper.flush()
break
else:
self.zipper.close()
if size < 0:
ret = self.buffer
self.buffer = ''
else:
ret, self.buffer = self.buffer[:size], self.buffer[size:]
return ret
def flush(self):
pass
def write(self, data):
self.buffer += data
def close(self):
self.input.close()
Here is a cleaner, non-self-referencing version based on Ricardo Cárdenes' very helpful answer.
from gzip import GzipFile
from collections import deque
CHUNK = 16 * 1024
class Buffer (object):
def __init__ (self):
self.__buf = deque()
self.__size = 0
def __len__ (self):
return self.__size
def write (self, data):
self.__buf.append(data)
self.__size += len(data)
def read (self, size=-1):
if size < 0: size = self.__size
ret_list = []
while size > 0 and len(self.__buf):
s = self.__buf.popleft()
size -= len(s)
ret_list.append(s)
if size < 0:
ret_list[-1], remainder = ret_list[-1][:size], ret_list[-1][size:]
self.__buf.appendleft(remainder)
ret = ''.join(ret_list)
self.__size -= len(ret)
return ret
def flush (self):
pass
def close (self):
pass
class GzipCompressReadStream (object):
def __init__ (self, fileobj):
self.__input = fileobj
self.__buf = Buffer()
self.__gzip = GzipFile(None, mode='wb', fileobj=self.__buf)
def read (self, size=-1):
while size < 0 or len(self.__buf) < size:
s = self.__input.read(CHUNK)
if not s:
self.__gzip.close()
break
self.__gzip.write(s)
return self.__buf.read(size)
Advantages:
Avoids repeated string concatenation, which would cause the entire string to be copied repeatedly.
Reads a fixed CHUNK size from the input stream, instead of reading whole lines at a time (which can be arbitrarily long).
Avoids circular references.
Avoids misleading public "write" method of GzipCompressStream(), which is really only used internally.
Takes advantage of name mangling for internal member variables.
The gzip module supports compressing to a file-like object, pass a fileobj parameter to GzipFile, as well as a filename. The filename you pass in doesn't need to exist, but the gzip header has a filename field which needs to be filled out.
Update
This answer does not work. Example:
# tmp/try-gzip.py
import sys
import gzip
fd=gzip.GzipFile(fileobj=sys.stdin)
sys.stdout.write(fd.read())
output:
===> cat .bash_history | python tmp/try-gzip.py > tmp/history.gzip
Traceback (most recent call last):
File "tmp/try-gzip.py", line 7, in <module>
sys.stdout.write(fd.read())
File "/usr/lib/python2.7/gzip.py", line 254, in read
self._read(readsize)
File "/usr/lib/python2.7/gzip.py", line 288, in _read
pos = self.fileobj.tell() # Save current position
IOError: [Errno 29] Illegal seek
Use the cStringIO (or StringIO) module in conjunction with zlib:
>>> import zlib
>>> from cStringIO import StringIO
>>> s.write(zlib.compress("I'm a lumberjack"))
>>> s.seek(0)
>>> zlib.decompress(s.read())
"I'm a lumberjack"
This works (at least in python 3):
with s3.open(path, 'wb') as f:
gz = gzip.GzipFile(filename, 'wb', 9, f)
gz.write(b'hello')
gz.flush()
gz.close()
Here it writes to s3fs's file object with a gzip compression on it.
The magic is the f parameter, which is GzipFile's fileobj. You have to provide a file name for gzip's header.
An even cleaner & more generalized version made of reusable components:
gzipped_iter = igizip(io_iter(input_file_obj))
gzipped_file_obj = iter_io(prefetch(gzipped_iter))
The functions above are from my gist:
iter_io and io_iter provide transparent conversion to/from Iterable[AnyStr] <-> SupportsRead[AnyStr]
igzip does streaming gzip compression
(optional) prefetch concurrently pulls from an underlying iterable via a thread, yielding to consumer as normal, for concurrent read/write
def as_bytes(s: str | bytes):
if type(s) not in [str, bytes]:
raise TypeError
return s.encode() if isinstance(s, str) else s
def iter_io(iterable: Iterable[AnyStr], buffer_size: int = io.DEFAULT_BUFFER_SIZE):
"""
Returns a buffered file obj that reads bytes from an iterable of str/bytes.
Example:
iter_io(['abc', 'def', 'g']).read() == b'abcdefg'
iter_io([b'abcd', b'efg']).read(5) == b'abcde'
"""
class IterIO(io.RawIOBase):
def __init__(self, iterable: Iterable[AnyStr]):
self._leftover = b''
self._iterable = (as_bytes(s) for s in iterable if s)
def readable(self):
return True
def readinto(self, buf):
try:
chunk = self._leftover or next(self._iterable)
except StopIteration:
return 0 # indicate EOF
output, self._leftover = chunk[:len(buf)], chunk[len(buf):]
buf[:len(output)] = output
return len(output)
return io.BufferedReader(IterIO(iterable), buffer_size=buffer_size)
def io_iter(fo: SupportsRead[AnyStr], size: int = io.DEFAULT_BUFFER_SIZE):
"""
Returns an iterator that reads from a file obj in sized chunks.
Example:
list(io_iter(io.StringIO('abcdefg'), 3)) == ['abc', 'def', 'g']
list(io_iter(io.BytesIO(b'abcdefg'), 4)) == [b'abcd', b'efg']
Usage notes/TODO:
* file obj isn't closed, fix /w keep_open=False and an internal contextmanager
"""
return iter(lambda: fo.read(size), fo.read(0))
def igzip(chunks: Iterable[AnyStr], level=zlib.Z_DEFAULT_COMPRESSION):
"""
Streaming gzip: lazily compresses an iterable of bytes or str (utf8)
Example:
gzipped_bytes_iter = igzip(['hello ', 'world!'])
gzip.decompress(b''.join(gzipped_bytes_iter)).encode() == 'hello world!'
"""
def gen():
gzip_format = 0b10000
c = zlib.compressobj(level=level, wbits=zlib.MAX_WBITS + gzip_format)
yield from (c.compress(as_bytes(chunk)) for chunk in chunks)
yield c.flush()
return filter(None, gen())
def prefetch(iterable: Iterable[Any], n: int = 1) -> Iterator[Any]:
"""
Prefetch an iterable via thread, yielding original contents as normal.
Example:
def slow_produce(*args):
for x in args:
time.sleep(1)
yield x
def slow_consume(iterable):
for _ in iterable:
time.sleep(1)
slow_consume(prefetch(slow_produce('a', 'b'))) # takes 3 sec, not 4
# Prefetch
# produce: | 'a' | 'b' |
# consume: | 'a' | 'b' |
# seconds: 0 --- 1 --- 2 --- 3
# No prefetch
# produce: | 'a' | | 'b' |
# consume: | 'a' | | 'b' |
# seconds: 0 --- 1 --- 2 --- 3 --- 4
Usage notes/TODO:
* mem leak: Thread is GC'd only after iterable is fully consumed, fix /w __del__
"""
queue = Queue(n)
finished = object()
def produce():
for x in iterable:
queue.put(x)
queue.put(finished)
t = Thread(target=produce, daemon=True)
t.start()
while True:
item = queue.get()
if item is finished:
break
else:
yield item

Categories