wave write function not working, what am I doing wrong? - python

I am trying to halve the existing sampling rate of a folder full of .wav files. This is the only way I have found to do it but it is not working. The read part works just fine up until f.close(), then the wave.write part causes the error.
import wave
import contextlib
import os
for file_name in os.listdir(os.getcwd()):
if file_name.endswith(".wav"):
with contextlib.closing(wave.open(file_name, 'rb')) as f:
rate = f.getframerate()
new_rate = rate/2
f.close()
with contextlib.closing(wave.open(file_name, 'wb')) as f:
rate = f.setframerate(new_rate)
This is the output when I run it.
Traceback (most recent call last):
File "C:\Users\hsash\OneDrive\Desktop\used AR1-20210513T223533Z-001 - Copy (2)\sounds\python code.py", line 36, in <module>
rate = f.setframerate(new_rate)
File "C:\Users\hsash\AppData\Local\Programs\Python\Python39\lib\contextlib.py", line 303, in __exit__
self.thing.close()
File "C:\Users\hsash\AppData\Local\Programs\Python\Python39\lib\wave.py", line 444, in close
self._ensure_header_written(0)
File "C:\Users\hsash\AppData\Local\Programs\Python\Python39\lib\wave.py", line 462, in _ensure_header_written
raise Error('# channels not specified')
wave.Error: # channels not specified

It says right there that #channels not specified. When you are opening a wavefile for writing, python sets all of the header fields to zero irrespectively of the current state of the file.
In order to make sure that the other fields are saved you need to copy them over from the old file when you read it the first time.
In the snippet below I'm using getparams and setparams to copy the header fields over and I'm using readframes and writeframes to copy the wave data.
import wave
import contextlib
import os
for file_name in os.listdir(os.getcwd()):
if file_name.endswith(".wav"):
with contextlib.closing(wave.open(file_name, 'rb')) as f:
rate = f.getframerate()
params = f.getparams()
frames = f.getnframes()
data = f.readframes(frames)
new_rate = rate/2
f.close()
with contextlib.closing(wave.open(file_name, 'wb')) as f:
f.setparams(params)
f.setframerate(new_rate)
f.writeframes(data)

Related

No text parsed from document

I have written an html text parser, when I use it in a large batch of files i.e. 5,000 or more, it randomly produces this error, when I re-run it it produces the same error in the exact same files. So I removed those files and parsed them individually and the parser read them.
So I created a new folder with the "Problematic" files and tried parsing them separately, it produced no error for most then it re-produced the same error again.
This is the code
import pandas as pd
import shutil
import os
import glob
source_file = r'C:/Users/Ahmed_Abdelmuniem/Desktop/Mar/Problematic/'
file_names = glob.glob(os.path.join(source_file,"*.html"))
for file_name in file_names:
table = pd.read_html(file_name)
print (table)
This is the error:
Traceback (most recent call last):
File "C:\Users\Ahmed_Abdelmuniem\PycharmProjects\No Text Parsed Troubleshooting\main.py", line 11, in <module>
table = pd.read_html(file_name)
File "C:\Users\Ahmed_Abdelmuniem\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\util\_decorators.py", line 299, in wrapper
return func(*args, **kwargs)
File "C:\Users\Ahmed_Abdelmuniem\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\io\html.py", line 1085, in read_html
return _parse(
File "C:\Users\Ahmed_Abdelmuniem\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\io\html.py", line 893, in _parse
tables = p.parse_tables()
File "C:\Users\Ahmed_Abdelmuniem\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\io\html.py", line 213, in parse_tables
tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
File "C:\Users\Ahmed_Abdelmuniem\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\io\html.py", line 735, in _build_doc
raise XMLSyntaxError("no text parsed from document", 0, 0, 0)
File "<string>", line 0
lxml.etree.XMLSyntaxError: no text parsed from document
Process finished with exit code 1
I took the "unreadable" files outside of the folder and parsed them individually and the code read them, I can't seem to identify what is wrong.
I hope my explanation is clear and sufficient.
There is a hidden .DS_STORE file in the folder. This is my code:
from lxml import etree
import pandas as pd
import os
from time import sleep
locations_folder = '/Users/jerryhu/Documents/Documents/Zenly/locations'
failed_files = []
def parse(file):
tables = pd.read_html(file)
dataframe = pd.DataFrame(tables[0])
path, name = os.path.split(file)
with open(f'/Users/jerryhu/Documents/Documents/Zenly/locations_csv/{name}'.replace('.html', '.csv'), 'w') as writeCSV:
dataframe.to_csv(writeCSV)
print(f"Writing {name.replace('.html', '.csv')} to disk")
try:
failed_files.remove(file)
except:
pass
for filename in os.listdir(locations_folder):
file = os.path.join(locations_folder, filename)
if os.path.exists(file):
try:
parse(file)
except:
failed_files.append(file)
print("\nFinished. These files failed to parse:")
for i in failed_files:
print(i)
print("Retrying in 3 seconds.")
sleep(3)
for i in failed_files:
try:
parse(i)
except:
print(f'{i} couldn\'t be parsed.')
This is the error returned:
Writing 2022-10-09.csv to disk
Writing 2022-09-13.csv to disk
Writing 2022-09-05.csv to disk
Writing 2022-08-28.csv to disk
Writing 2022-12-22.csv to disk
Writing 2022-08-08.csv to disk
Writing 2023-01-01.csv to disk
Writing 2022-09-25.csv to disk
Writing 2022-12-02.csv to disk
Writing 2022-11-12.csv to disk
Writing 2022-12-14.csv to disk
Writing 2022-10-29.csv to disk
Writing 2022-11-04.csv to disk
Writing 2022-10-05.csv to disk
Writing 2022-11-28.csv to disk
Writing 2022-08-24.csv to disk
Writing 2022-07-17.csv to disk
Writing 2022-09-09.csv to disk
Writing 2022-10-13.csv to disk
Finished. These files failed to parse:
/Users/jerryhu/Documents/Documents/Zenly/locations/.DS_Store
Retrying in 3 seconds.
/Users/jerryhu/Documents/Documents/Zenly/locations/.DS_Store couldn't be parsed.
Just put a try and except block to skip the DS_Store file.

urlopen trouble while trying to download a gzip file

I am going to use the wiktionary dump for the purpose of POS tagging. Somehow it gets stuck when downloading. Here is my code:
import nltk
from urllib import urlopen
from collections import Counter
import gzip
url = 'http://dumps.wikimedia.org/enwiktionary/latest/enwiktionary-latest-all-titles-in-ns0.gz'
fStream = gzip.open(urlopen(url).read(), 'rb')
dictFile = fStream.read()
fStream.close()
text = nltk.Text(word.lower() for word in dictFile())
tokens = nltk.word_tokenize(text)
Here is the error I get:
Traceback (most recent call last):
File "~/dir1/dir1/wikt.py", line 15, in <module>
fStream = gzip.open(urlopen(url).read(), 'rb')
File "/usr/lib/python2.7/gzip.py", line 34, in open
return GzipFile(filename, mode, compresslevel)
File "/usr/lib/python2.7/gzip.py", line 89, in __init__
fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
TypeError: file() argument 1 must be encoded string without NULL bytes, not str
Process finished with exit code 1
You are passing the downloaded data to gzip.open(), which expects to be passed a filename instead.
The code then tries to open a filename named by the gzipped data, and fails.
Either save the URL data to a file, then use gzip.open() on that, or decompress the gzipped data using the zlib module instead. 'Saving' the data can be as easy as using a StringIO.StringIO() in-memory file object:
from StringIO import StringIO
from urllib import urlopen
import gzip
url = 'http://dumps.wikimedia.org/enwiktionary/latest/enwiktionary-latest-all-titles-in-ns0.gz'
inmemory = StringIO(urlopen(url).read())
fStream = gzip.GzipFile(fileobj=inmemory, mode='rb')

Use codecs to read file with correct encoding: TypeError

I need to read from a file, linewise. Also also need to make sure the encoding is correctly handled.
I wrote the following code:
#!/bin/bash
import codecs
filename = "something.x10"
f = open(filename, 'r')
fEncoded = codecs.getreader("ISO-8859-15")(f)
totalLength = 0
for line in fEncoded:
totalLength+=len(line)
print("Total Length is "+totalLength)
This code does not work on all files, on some files I get a
Traceback (most recent call last):
File "test.py", line 11, in <module>
for line in fEncoded:
File "/usr/lib/python3.2/codecs.py", line 623, in __next__
line = self.readline()
File "/usr/lib/python3.2/codecs.py", line 536, in readline
data = self.read(readsize, firstline=True)
File "/usr/lib/python3.2/codecs.py", line 480, in read
data = self.bytebuffer + newdata
TypeError: can't concat bytes to str
Im using python 3.3 and the script must work with this python version.
What am I doing wrong, I was not able to find out which files work and which not, even some plain ASCII files fail.
You are opening the file in non-binary mode. If you read from it, you get a string decoded according to your default encoding (http://docs.python.org/3/library/functions.html?highlight=open%20builtin#open).
codec's StreamReader needs a bytestream (http://docs.python.org/3/library/codecs#codecs.StreamReader)
So this should work:
import codecs
filename = "something.x10"
f = open(filename, 'rb')
f_decoded = codecs.getreader("ISO-8859-15")(f)
totalLength = 0
for line in f_decoded:
total_length += len(line)
print("Total Length is "+total_length)
or you can use the encoding parameter on open:
f_decoded = open(filename, mode='r', encoding='ISO-8859-15')
The reader returns decoded data, so I fixed your variable name. Also, consider pep8 as a guide for formatting and coding style.

unable to read some .wav files using scipy.io.wavread.read()

I am trying to read .wav file using scipy.io.wavread. It reads some file properly.
For some files its giving following error...
Warning (from warnings module):
File "D:\project\cardiocare-1.0\src\scipy\io\wavfile.py", line 121
warnings.warn("chunk not understood", WavFileWarning)
WavFileWarning: chunk not understood
Traceback (most recent call last):
File "D:\project\cardiocare-1.0\src\ccare\plot.py", line 37, in plot
input_data = read(p.bitfile)
File "D:\project\cardiocare-1.0\src\scipy\io\wavfile.py", line 119, in read
data = _read_data_chunk(fid, noc, bits)
File "D:\project\cardiocare-1.0\src\scipy\io\wavfile.py", line 56, in _read_data_chunk
data = data.reshape(-1,noc)
ValueError: total size of new array must be unchanged
Can any one suggest me any solution?
I use the below code to read wav files. I know it does not solve your problem, but maybee you could read your wav file with this code and maybe figure out what is wrong?
My experience is, that wav files sometimes contains "strange" things, that must be handled or removed.
Hope it helps you out
Rgds
Cyrex
import wave
import struct
def wavRead(fileN):
waveFile = wave.open(fileN, 'r')
NbChanels = waveFile.getnchannels()
data = []
for x in range(NbChanels):
data.append([])
for i in range(0,waveFile.getnframes()):
waveData = waveFile.readframes(1)
data[i%(NbChanels)].append(int(struct.unpack("<h", waveData)[0]))
RetAR = []
BitDebth = waveFile.getsampwidth()*8
for x in range(NbChanels):
RetAR.append(np.array(data[x]))
RetAR[-1] = RetAR[-1]/float(pow(2,(BitDebth-1)))
fs = waveFile.getframerate()
return RetAR,fs

How to copy files stored in variables by stutils

I want to change the extension of a file which is taken as a user generated input(raw_input) but I need to keep the original file the same and so this program copies the content from original file to output file.
This is a part of my program that aims to do the same -
var = raw_input("Enterfile ")
fid = open(var)
import os
name, extension = os.path.splitext(var)
path = os.path.abspath(var)
outputfile = os.path.splitext(var)[0]+ '.asd'
print path
print var
print outputfile
fo = open(outputfile, 'w')
import shutil
shutil.copyfile(var, fo)
data = fo.read()
print data
The error returned -
File "p.py", line 18, in <module>
shutil.copyfile(var, fo)
File "/usr/lib/python2.7/shutil.py", line 68, in copyfile
if _samefile(src, dst):
File "/usr/lib/python2.7/shutil.py", line 58, in _samefile
return os.path.samefile(src, dst)
File "/usr/lib/python2.7/posixpath.py", line 154, in samefile
s2 = os.stat(f2)
I am not sure what is wrong with my program,please help. Also is there more efficient way of doing this.
Thanks in advance.
The method shutil.copyfile works with file names, not file descriptors. You don't need to open the files, and you definitely don't want to pass file objects to the copyfile method.
The code should look something more like this: (and be sure to check http://docs.python.org/2/library/shutil.html if you are unsure about the parameter types)
import os.path
import shutil
input_filename = raw_input("Enterfile ")
base_name, extension = os.path.splitext(input_filename)
output_filename = base_name + '.asd'
shutil.copyfile(input_filename, output_filename)

Categories