For loop crashing on speech_recognition - python

Python Newbie trying to teach myself how to use Python to run speech_recognition, and Im not having great luck.
The code below runs once and correctly converts a wav file to text, but then it crashes before running the remaining 2 wav files in my S3 bucket. The files are absolutely there:
OSR_us_000_0010_8k.wav
OSR_us_000_0011_8k.wav
OSR_us_000_0012_8k.wav
I could use some help fixing it.
Thanks in Advance.
import boto3
import speech_recognition as sr
r = sr.Recognizer()
session = boto3.client('s3',
aws_access_key_id= XXXX,
aws_secret_access_key=XXXX,
region_name='XXXX')
my_bucket = s3.Bucket(mys3bucket)
for my_bucket_object in my_bucket.objects.all():
with sr.AudioFile(my_bucket_object.key) as source:
print(my_bucket_object.key)
audio_data = r.record(source)
text = r.recognize_google(audio_data)
print(text)
OSR_us_000_0010_8k.wav<br>
Birch canoe slid on the smooth plank glue the sea to a dark blue background it is easy to tell the depth of a well these day the chicken leg of a variegated rice is often served in roundels the juice of lemons mix find the boxes on the side the pump truck the ha grimstead topcon and garbage for hours of Citi workspace a large-sized and stockings in the hearts of cell
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-4-385959f26678> in <module>
14
15 for my_bucket_object in my_bucket.objects.all():
---> 16 with sr.AudioFile(my_bucket_object.key) as source:
17 print(my_bucket_object.key)
18 audio_data = r.record(source)
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/site-packages/speech_recognition/__init__.py in __enter__(self)
201 try:
202 # attempt to read the file as WAV
--> 203 self.audio_reader = wave.open(self.filename_or_fileobject, "rb")
204 self.little_endian = True # RIFF WAV is a little-endian format (most ``audioop`` operations assume that the frames are stored in little-endian form)
205 except (wave.Error, EOFError):
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/wave.py in open(f, mode)
508 mode = 'rb'
509 if mode in ('r', 'rb'):
--> 510 return Wave_read(f)
511 elif mode in ('w', 'wb'):
512 return Wave_write(f)
~/anaconda3/envs/mxnet_latest_p37/lib/python3.7/wave.py in __init__(self, f)
158 self._i_opened_the_file = None
159 if isinstance(f, str):
--> 160 f = builtins.open(f, 'rb')
161 self._i_opened_the_file = f
162 # else, assume it is an open file object already
FileNotFoundError: [Errno 2] No such file or directory: 'OSR_us_000_0011_8k.wav'

Related

Upload large file (>100 MB) directly to github with pygithub

I am using pyGitHub to upload files to my repo, however some of the files are so large that the server connection times out. My code to upload/overwrite a file from a folder is:
def commit(folder):
foldername = folder.split("/")[-1]
onlyfiles = [f for f in listdir(folder) if isfile(join(folder, f))]
repo = g.get_repo(user.login+"/My-repo")
all_files = []
contents = repo.get_contents("")
while contents:
file_content = contents.pop(0)
if file_content.type == "dir":
contents.extend(repo.get_contents(file_content.path))
else:
file = file_content
all_files.append(str(file).replace('ContentFile(path="','').replace('")',''))
body = '''
Line 1: Message
Line 2: Sample Text
Line 3: yet another line
'''
for i in onlyfiles:
print(i)
input_file = open(folder + "/" + i, "rb")
data = input_file.read()
input_file.close()
if not(f"{foldername}/{i}" in all_files):
repo.create_file(f"{foldername}/{i}", "Created building data", data)
else:
file = repo.get_contents(f"{foldername}/{i}")
repo.update_file(file.path, "Updated information", data, file.sha)
This code works for files <25mb, but for larger ones I get the error:
---------------------------------------------------------------------------
GithubException Traceback (most recent call last)
<ipython-input-9-7d41473c81a0> in <module>()
79
80
---> 81 commit(str("/content/"+dirname))
3 frames
<ipython-input-9-7d41473c81a0> in commit(folder)
72 input_file.close()
73 if not(f"{foldername}/{i}" in all_files):
---> 74 repo.create_file(f"{foldername}/{i}", "Created building data", data)
75 else:
76 file = repo.get_contents(f"{foldername}/{i}")
/usr/local/lib/python3.7/dist-packages/github/Repository.py in create_file(self, path, message, content, branch, committer, author)
2091 "PUT",
2092 f"{self.url}/contents/{urllib.parse.quote(path)}",
-> 2093 input=put_parameters,
2094 )
2095
/usr/local/lib/python3.7/dist-packages/github/Requester.py in requestJsonAndCheck(self, verb, url, parameters, headers, input)
353 return self.__check(
354 *self.requestJson(
--> 355 verb, url, parameters, headers, input, self.__customConnection(url)
356 )
357 )
/usr/local/lib/python3.7/dist-packages/github/Requester.py in __check(self, status, responseHeaders, output)
376 output = self.__structuredFromJson(output)
377 if status >= 400:
--> 378 raise self.__createException(status, responseHeaders, output)
379 return responseHeaders, output
380
GithubException: 502 {"message": "Server Error"}
I am aware that the file upload limit for github is 25MB, but apparently files up to 100MB can be uploaded via the command line. How would I upload files larger than this to GitHub using pyGitHub? The file is zipped, so it really is as small as it can be, but is still ~150MB. Is this doable? If not, is there a way to reference a larger file in github which I can upload elsewhere? I am using Google Colab in case anyone is wondering.

Error trying to use AudioSegment for .wav files

I'm trying to iterate through all the .wav files in a folder "audios", but I receive the following error. I found similar questions that were solved by installing ffmpeg, but that didn't help.
FileNotFoundError Traceback (most recent call last)
<ipython-input-24-29ba732186ac> in <module>
1 for audio_file in os.listdir(base_path+"audios"):
2 # read wav audio file
----> 3 audio = AudioSegment.from_wav(audio_file)
4
5 # pass audio file, start time, end time & chunk path to create chunk
~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\pydub\audio_segment.py in from_wav(cls, file, parameters)
806 #classmethod
807 def from_wav(cls, file, parameters=None):
--> 808 return cls.from_file(file, 'wav', parameters=parameters)
809
810 #classmethod
~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\pydub\audio_segment.py in from_file(cls, file, format, codec, parameters, start_second, duration, **kwargs)
649 except TypeError:
650 filename = None
--> 651 file, close_file = _fd_or_path_or_tempfile(file, 'rb', tempfile=False)
652
653 if format:
~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\pydub\utils.py in _fd_or_path_or_tempfile(fd, mode, tempfile)
58
59 if isinstance(fd, basestring):
---> 60 fd = open(fd, mode=mode)
61 close_fd = True
62
FileNotFoundError: [Errno 2] No such file or directory: 'name_of_file.wav'
os.listdir doesn't return the full paths of files in the directory you give it, just the names they have within that directory. You will need to prepend this directory name to the filename you pass to AudioSegment.from_wav.
Try replacing the line
audio = AudioSegment.from_wav(audio_file)
with
audio = AudioSegment.from_wav(os.path.join(base_path+"audios", audio_file))

Why there is a error message Exception: This file is already closed

1.I was trying to write a python code to get all contents of files in each subfolder and create a index for each content (file contents). All the contents for each file can be get successfully. However, when I run the code, it always shows an error message Exception: This file is already closed.
2.Here is the code for building an index for each content, could someone explain to me why this thing could happened?
The traceback:
python-input-49-38a47b2f8c0c> in <module>
39 print(searcher)
40
---> 41 writers.commit(optimize=True)
42
43 # from whoosh.query import *
~/.local/lib/python3.8/site-packages/whoosh/writing.py in commit(self, mergetype, optimize, merge)
928 else:
929 # Close segment files
--> 930 self._close_segment()
931 # Write TOC
932 self._commit_toc(finalsegments)
~/.local/lib/python3.8/site-packages/whoosh/writing.py in _close_segment(self)
841 def _close_segment(self):
842 if not self.perdocwriter.is_closed:
--> 843 self.perdocwriter.close()
844 if not self.fieldwriter.is_closed:
845 self.fieldwriter.close()
~/.local/lib/python3.8/site-packages/whoosh/codec/whoosh3.py in close(self)
265 for writer in self._colwriters.values():
266 writer.finish(self._doccount)
--> 267 self._cols.save_as_files(self._storage, self._column_filename)
268
269 # If vectors were written, close the vector writers
~/.local/lib/python3.8/site-packages/whoosh/filedb/compound.py in save_as_files(self, storage, name_fn)
295
296 def save_as_files(self, storage, name_fn):
--> 297 for name, blocks in self._readback():
298 f = storage.create_file(name_fn(name))
299 for block in blocks():
~/.local/lib/python3.8/site-packages/whoosh/filedb/compound.py in _readback(self)
276
277 yield (name, gen)
--> 278 temp.close()
279 self._tempstorage.delete_file(self._tempname)
280
~/.local/lib/python3.8/site-packages/whoosh/filedb/structfile.py in close(self)
121
122 if self.is_closed:
--> 123 raise Exception("This file is already closed")
124 if self.onclose:
125 self.onclose(self)
Exception: This file is already closed
import os
import codecs
import whoosh
from whoosh.index import create_in
from whoosh.fields import *
from whoosh.qparser import QueryParser
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT,textdata=TEXT(stored=True))
ix = create_in("folder", schema)
filelist = []
for root, dirs, files in os.walk("./test_result"):
for file in files:
#append the file name to the list
filelist.append(os.path.join(root,file))
#print all the file names
writer = ix.writer()
i = 0
for name in filelist:
i = i +1
with codecs.open (name, "r",encoding='utf-8',
errors='ignore') as myfile:
text=myfile.read()
# print ("adding document "+name)
writer.add_document(title="document "+name, path="folder",content=text,textdata=text)
myfile.close()
print(text)
searcher = ix.searcher()
print(searcher)
writers.commit(optimize=True)
with statement handles resources management, including file closing. You could read more about it here.
This code:
f = open(file)
f.write("blablabla")
f.close
is equivalent to this:
with open(file) as f
f.write("blablabla")
This exception is a result of you trying to close a file that is already closed implicitly by with statement.
You only need to delete this line:
myfile.close()
EDIT:
I just explained the error in the code, but didn't notice the update in the comments. Please update the question itself with the mentioned line deleted.
On a side note, I see you used writers.commit() instead of writer.commit(), please make sure it's not a typo and update your question if your code still doesn't work.

Image won't open from response object using PIL in google Colaboratory

I am going through the "PyTorch for Deep Learning and Computer Vision" course on Udemy and inputting the code into Google Colaboratory as instructed.
However, there a part of the code where PIL is meant to read an image from a response object, but I have an error "AttributeError: can't set attribute"
I am using pillow 4.0.0 on python 3.6
I have tried changing resonse.raw to response.content, response.text, and just response. I have tried removing the stream = True attribute and I have tried inputting the url directly into the Image.open method
!pip3 install pillow==4.0.0
import PIL.ImageOps
import requests
from PIL import Image
url = 'https://c8.alamy.com/comp/DYC06A/hornless-reindeer-at-zoo-DYC06A.jpg'
response = requests.get(url, stream = True)
img = Image.open(response.raw)
plt.imshow(img)
I expect a plot with an image of a deer with the url in the url variable.
Instead I receive this error message:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-85-059526041234> in <module>()
4 url = 'https://c8.alamy.com/comp/DYC06A/hornless-reindeer-at-zoo-DYC06A.jpg'
5 response = requests.get(url, stream = True)
----> 6 img = Image.open(response.raw)
7 plt.imshow(img)
5 frames
/usr/local/lib/python3.6/dist-packages/PIL/Image.py in open(fp, mode)
/usr/local/lib/python3.6/dist-packages/PIL/Image.py in _open_core(fp, filename, prefix)
/usr/local/lib/python3.6/dist-packages/PIL/JpegImagePlugin.py in jpeg_factory(fp, filename)
750 # Factory for making JPEG and MPO instances
751 def jpeg_factory(fp=None, filename=None):
--> 752 im = JpegImageFile(fp, filename)
753 try:
754 mpheader = im._getmp()
/usr/local/lib/python3.6/dist-packages/PIL/ImageFile.py in __init__(self, fp, filename)
95
96 try:
---> 97 self._open()
98 except (IndexError, # end of data
99 TypeError, # end of data (ord)
/usr/local/lib/python3.6/dist-packages/PIL/JpegImagePlugin.py in _open(self)
321 # print(hex(i), name, description)
322 if handler is not None:
--> 323 handler(self, i)
324 if i == 0xFFDA: # start of scan
325 rawmode = self.mode
/usr/local/lib/python3.6/dist-packages/PIL/JpegImagePlugin.py in SOF(self, marker)
144 n = i16(self.fp.read(2))-2
145 s = ImageFile._safe_read(self.fp, n)
--> 146 self.size = i16(s[3:]), i16(s[1:])
147
148 self.bits = i8(s[0])
AttributeError: can't set attribute
Upgrading pillow solved this for me
pip install pillow --upgrade

Import audio files in python for analysis e.g Signal Analysis

I am trying to import a .wav file to perform Signal analysis on it. I have used all the Ipython,wave libraries that i am meant to import but its still showing me error.Some of the libraries were gotten from a book downloaded from git hub repository(https://github.com/AllenDowney/ThinkDSP). Can anyone one show me what is wrong with the code?
(This is after importing all necessary libraries in the book and in python)
Error Traceback (most recent call last)
in ()
----> 1 wave= thinkdsp.read_wave('365515__noedell__noedell-shady-scheme-01.wav')
C:\Users\Ademola\Desktop\500 Level\DSP\DSP_Python\ThinkDSP-master\ThinkDSP-master\code\thinkdsp.py in read_wave(filename)
99 returns: Wave
100 """
--> 101 fp = open_wave(filename, 'r')
102
103 nchannels = fp.getnchannels()
C:\Users\Ademola\Anaconda3\lib\wave.py in open(f, mode)
497 mode = 'rb'
498 if mode in ('r', 'rb'):
--> 499 return Wave_read(f)
500 elif mode in ('w', 'wb'):
501 return Wave_write(f)
C:\Users\Ademola\Anaconda3\lib\wave.py in init(self, f)
161 # else, assume it is an open file object already
162 try:
--> 163 self.initfp(f)
164 except:
165 if self._i_opened_the_file:
C:\Users\Ademola\Anaconda3\lib\wave.py in initfp(self, file)
141 chunkname = chunk.getname()
142 if chunkname == b'fmt ':
--> 143 self._read_fmt_chunk(chunk)
144 self._fmt_chunk_read = 1
145 elif chunkname == b'data':
C:\Users\Ademola\Anaconda3\lib\wave.py in _read_fmt_chunk(self, chunk)
258 self._sampwidth = (sampwidth + 7) // 8
259 else:
--> 260 raise Error('unknown format: %r' % (wFormatTag,))
261 self._framesize = self._nchannels * self._sampwidth
262 self._comptype = 'NONE'
Error: unknown format: 3
Without seeing your code its hard to answer your question...you can read wav files with the wav module that comes standard in python. Basic syntax below:
import wave
wav = wave.open('wavFile.wav', 'r')
here is the documentation:
https://docs.python.org/2/library/wave.html
Let me know if this helps!

Categories