Transitioning to speech recognition option for web application - python

I understand how it is possible to use speech to record data into a backend database. However, I am unsure about how I may allow this to be done from a web application. As I am not dealing with text, how do I offload the speech activation options and consequently allow text to be pushed from the web application?
import speech_recognition as sr
import gtts
from playsound import playsound
import os
from datetime import datetime
from notion import NotionClient
r = sr.Recognizer()
client = NotionClient(token, database_id)
def get_audio():
with sr.Microphone() as source:
print("Say something")
audio = r.listen(source)
return audio
def audio_to_text(audio):
text = ""
text = r.recognize_google(audio)
except sr.UnknownValueError:
print("Speech recognition could not understand audio")
except sr.RequestError:
print("could not request results from API")
return text
def play_sound(text):
tts = gtts.gTTS(text)
tempfile = "./temp.mp3"
except AssertionError:
print("could not play sound")
if __name__ == "__main__":
while True:
a = get_audio()
command = audio_to_text(a)
if ACTIVATION_COMMAND in command.lower():
play_sound("What can I do for you?")
note = get_audio()
note = audio_to_text(note)
if note:
now =
res = client.create_page(note, now, status="Active")
if res.status_code == 200:
play_sound("Stored new item")
import json
import requests
class NotionClient:
def __init__(self, token, database_id) -> None:
self.database_id = database_id
self.headers = {
"Authorization": "Bearer " + token,
"Content-Type": "application/json",
"Notion-Version": "2021-08-16"
# read, update
def create_page(self, description, date, status):
create_url = ''
data = {
"parent": { "database_id": self.database_id },
"properties": {
"Description": {
"title": [
"text": {
"content": description
"Date": {
"date": {
"start": date,
"end": None
"Status": {
"rich_text": [
"text": {
"content": status
data = json.dumps(data)
res =, headers=self.headers, data=data)
return res


Discord py How to filter messages coming in live from discord servers and dms

I'm trying to grab information from server A in a few channels (where I have no permissions, only view and read) to then take that information and send it to my server B (which I own).
I've shared with you what I've done so far. Which all it does so far it sends me all the messages in all servers and dms to me.
I'm unable to filter out Server A and the few channels in it and to then send it to my server
import websocket
import json
import threading
import time
def send_json_request(ws,request):
def recieve_json_response(ws):
response = ws.recv()
if response:
return json.loads(response)
def heartbeat(interval, ws):
print('Search activated')
while True:
heartbeatJSON = {
"op": 1,
"d": "null"
send_json_request(ws, heartbeatJSON)
print("Looking for information.")
ws = websocket.WebSocket()
event = recieve_json_response(ws)
heartbeat_interval = event['d']['heartbeat_interval'] / 1000
threading._start_new_thread(heartbeat, (heartbeat_interval, ws))
payload = {
"op": 2,
"d": {
"token": token,
"properties": {
"$os": 'windows',
'$browser': 'chrome',
'$device': 'pc'
send_json_request(ws, payload)
while True:
event = recieve_json_response(ws)
print(f"{event['d']['author']['username']}: {event['d']['content']}")
op_code = event('op')
if op_code == 11:
print('heartbeat received')

How to write an if statement using == for data loaded as json loads

I wrote this code and modified it many times, but I don't know why it doesn't work.
while True:
headers = {'client-id': twitch_Client_ID, 'Authorization': authorization}
response_channel = requests.get('' + twitchID, headers=headers)
if loads(response_channel.text)['data'][0]['type'] == 'live' and check == False:
title = loads(response_channel.text)['data'][0]['title']
game_name = loads(response_channel.text)['data'][0]['game_name']
username = loads(response_channel.text)['data'][0]['user_name']
thumbnail = ''
twitch_embed = discord.Embed(title=title , description="\n 방송보러가기 :")
twitch_embed.add_field(name="스트리머", value=username, inline=True)
twitch_embed.add_field(name="게임 이름", value=game_name, inline=True)
await channel.send("<#everyone> 오뱅온~", embeds=twitch_embed)
check = True
check = False
await asyncio.sleep(5)
"data": [{
"id": "39807966251",
"user_id": "119609287",
"user_login": "bboyul",
"user_name": "뽀율",
"game_id": "19620578",
"game_name": "Super People",
"type": "live",
"title": "입 딜 은 세 계 최 강 !(︶^︶)ノ",
"viewer_count": 14,
"started_at": "2021-08-13T13:02:23Z",
"language": "ko",
"thumbnail_url": "{width}x{height}.jpg",
"tag_ids": ["ab2975e3-b9ca-4b1a-a93e-fb61a5d5c3a4"],
"is_mature": false
"pagination": {}
I'm making a twitch live notification bot using However, it is difficult because there are not many data in Korea. Please help
json response comes, print("offline") works even though data type is live, notification doesn't work
Not only loads, use json.loads and save the result.
response_json = json.loads(response_channel.text)
if response_json['data'][0]['type'] == 'live' and check == False:

How can the Amazon Echo catch errors?

I am building an application for the Amazon Echo in python. When I speak a bad utterance that the Amazon Echo does not recognize, my skill quits and returns me to the home screen. I am looking to prevent this and repeat what was just uttered by the Amazon Echo.
To try to achieve this to some extent I try calling a function to say something when the session ends or bad input is detected.
def on_session_ended(session_ended_request, session):
Called when the user ends the session.
Is not called when the skill returns should_end_session=true
print("on_session_ended requestId=" + session_ended_request['requestId'] +
", sessionId=" + session['sessionId'])
return get_session_end_response()
However, I just get an error from the Echo -- this function, on_session_ended is never entered.
So how do I conduct error catching and handling on the Amazon Echo?
UPDATE 1: I reduced the number of utterances and the number of intents with custom slots to one. Now a user should only speak A, B, C, or D. If they speak anything outside of this, then the intent is still triggered but with no slot value. Thus, I can do some error checking based on whether the slot value is there or not. However, this seems like not the best way to do it. When I try to add in intents with no slots and a corresponding utterance, anything that doesn't match either of my intents defaults to this new intent. How can I resolve these issues?
UPDATE 2: Here are some relevant sections of my code.
Intent handlers:
def lambda_handler(event, context):
print("Python START -------------------------------")
print("event.session.application.applicationId=" +
if event['session']['new']:
on_session_started({'requestId': event['request']['requestId']},
if event['request']['type'] == "LaunchRequest":
return on_launch(event['request'], event['session'])
elif event['request']['type'] == "IntentRequest":
return on_intent(event['request'], event['session'])
elif event['request']['type'] == "SessionEndedRequest":
return on_session_ended(event['request'], event['session'])
def on_session_started(session_started_request, session):
print("on_session_started requestId=" + session_started_request['requestId']
+ ", sessionId=" + session['sessionId'])
def on_launch(launch_request, session):
""" Called when the user launches the skill without specifying what they want """
print("on_launch requestId=" + launch_request['requestId'] +
", sessionId=" + session['sessionId'])
# Dispatch to your skill's launch
return create_new_user()
def on_intent(intent_request, session):
""" Called when the user specifies an intent for this skill """
print("on_intent requestId=" + intent_request['requestId'] +
", sessionId=" + session['sessionId'])
intent = intent_request['intent']
intent_name = intent['name']
attributes = session["attributes"] if 'attributes' in session else None
intent_slots = intent['slots'] if 'slots' in intent else None
# Dispatch to skill's intent handlers
# TODO : Authenticate users
# TODO : Start session in a different spot depending on where user left off
if intent_name == "StartQuizIntent":
return create_new_user()
elif intent_name == "AnswerIntent":
return get_answer_response(intent_slots, attributes)
elif intent_name == "TestAudioIntent":
return get_audio_response()
elif intent_name == "AMAZON.HelpIntent":
return get_help_response()
elif intent_name == "AMAZON.CancelIntent":
return get_session_end_response()
elif intent_name == "AMAZON.StopIntent":
return get_session_end_response()
return get_session_end_response()
def on_session_ended(session_ended_request, session):
Called when the user ends the session.
Is not called when the skill returns should_end_session=true
print("on_session_ended requestId=" + session_ended_request['requestId'] +
", sessionId=" + session['sessionId'])
return get_session_end_response()
Then we have the functions that actually get called and the response builders. I have edited some of the code for privacy. I haven't built up all the display response text fields and have some uids hard coded so I don't have to worry about authentication yet.
# --------------- Functions that control the skill's behavior ------------------
####### GLOBAL SETTINGS ########
utility_background_image = ""
def get_welcome_response():
""" Returns the welcome message if a user invokes the skill without specifying an intent """
session_attributes = {}
card_title = ""
speech_output = ("Hello and welcome ... quiz .... blah blah ...")
reprompt_text = "Ask me to start and we will begin the test!"
should_end_session = False
# visual responses
primary_text = '' # TODO
secondary_text = '' # TODO
return build_response(session_attributes,
build_speechlet_response(card_title, speech_output, reprompt_text,
card_title, primary_text,
def get_session_end_response():
""" Returns the ending message if a user errs or exits the skill """
session_attributes = {}
card_title = ""
speech_output = "Thank you for your time!"
reprompt_text = ''
should_end_session = True
# visual responses
primary_text = '' # TODO
secondary_text = '' # TODO
return build_response(session_attributes,
build_speechlet_response(card_title, speech_output, reprompt_text,
card_title, primary_text,
def get_audio_response():
""" Tests the audio capabilities of the echo """
session_attributes = {}
card_title = "" # TODO : keep no 'welcome'?
speech_output = ""
reprompt_text = ""
should_end_session = False
# visual responses
primary_text = '' # TODO
secondary_text = '' # TODO
return build_response(session_attributes,
build_speechlet_response(card_title, speech_output, reprompt_text,
should_end_session, build_audio_response()))
def create_new_user():
""" Creates a new user that the server will recognize and whose action will be stored in db """
url = "http://XXXXXX:XXXX/create_user"
response = urllib.request.urlopen(url)
data = json.loads('utf8'))
uuid = data["uuid"]
return ask_question(uuid)
def query_server(uuid):
""" Requests to get num_questions number of questions from the server """
url = "http://XXXXXXXX:XXXX/get_question_json?uuid=%s" % (uuid) # TODO : change needs to to be uuid
response = urllib.request.urlopen(url)
data = json.loads('utf8'))
if data["status"]:
question = data["data"]["question"]
quid = data["data"]["quid"]
next_quid = data["data"]["next_quid"] # TODO : will we need any of this?
topic = data["data"]["topic"]
type = data["data"]["type"]
media_type = data["data"]["media_type"] # either 'IMAGE', 'AUDIO', or 'VIDEO'
answers = data["data"]["answer"] # list of answers stored in order they should be spoken
images = data["data"]["image"] # list of images that correspond to order of answers list
audio = data["data"]["audio"]
video = data["data"]["video"]
question_data = {"status": True, "data":{"question": question, "quid": quid, "answers": answers,
"media_type": media_type, "images": images, "audio": audio, "video": video}}
if next_quid is "None":
return None
return question_data
return {"status": False}
def ask_question(uuid):
""" Returns a quiz question to the user since they specified a QuizIntent """
question_data = query_server(uuid)
if question_data is None:
return get_session_end_response()
card_title = "Ask Question"
speech_output = ""
session_attributes = {}
should_end_session = False
reprompt_text = ""
# visual responses
display_title = ""
primary_text = ""
secondary_text = ""
images = []
answers = []
if question_data["status"]:
session_attributes = {
"quid": question_data["data"]["quid"],
"uuid": "df876c9d-cd41-4b9f-a3b9-3ccd1b441f24",
"question_start_time": time.time()
question = question_data["data"]["question"]
answers = question_data["data"]["answers"] # answers are shuffled when pulled from server
images = question_data["data"]["images"]
# TODO : consider different media types
speech_output += question
reprompt_text += ("Please choose an answer using the official NATO alphabet. For example," +
" A is alpha, B is bravo, and C is charlie.")
speech_output += "Oops! This is embarrassing. There seems to be a problem with the server."
reprompt_text += "I don't exactly know where to go from here. I suggest restarting this skill."
return build_response(session_attributes, build_speechlet_response(card_title, speech_output,
reprompt_text, should_end_session,
build_display_response_list_template2(title=question, image_urls=images, answers=answers)))
def send_quiz_responses_to_server(uuid, quid, time_used_for_question, answer_given):
""" Sends the users responses back to the server to be stored in the database """
url = ("http://XXXXXXXX:XXXX/send_answers?uuid=%s&quid=%s&time=%s&answer_given=%s" %
(uuid, quid, time_used_for_question, answer_given))
response = urllib.request.urlopen(url)
data = json.loads('utf8'))
return data["status"]
def get_answer_response(slots, attributes):
""" Returns a correct/incorrect message to the user depending on their AnswerIntent """
# get time, quid, and uuid from attributes
question_start_time = attributes["question_start_time"]
quid = attributes["quid"]
uuid = attributes["uuid"]
# get answer from slots
answer_given = slots["Answer"]["value"].lower()
except KeyError:
return get_session_end_response()
# calculate a rough estimate of the time it took to answer question
time_used_for_question = str(int(time.time() - question_start_time))
# record response data by sending it to the server
send_quiz_responses_to_server(uuid, quid, time_used_for_question, answer_given)
return ask_question(uuid)
def get_help_response():
""" Returns a help message to the user since they called AMAZON.HelpIntent """
session_attributes = {}
card_title = ""
speech_output = "" # TODO
reprompt_text = "" # TODO
should_end_session = False
return build_response(session_attributes,
build_speechlet_response(card_title, speech_output, reprompt_text, should_end_session,
build_display_response(utility_background_image, card_title)))
# --------------- Helpers that build all of the responses ----------------------
def build_hint_response(hint):
Builds the hint response for a display.
For example, Try "Alexa, play number 1" where "play number 1" is the hint.
return {
"type": "Hint",
"hint": {
"type": "RichText",
"text": hint
def build_display_response(url='', title='', primary_text='', secondary_text='', tertiary_text=''):
Builds the display template for the echo show to display.
Echo show screen is 1024px x 600px
For additional image size requirements, see the display interface reference.
return [{
"type": "Display.RenderTemplate",
"template": {
"type": "BodyTemplate1",
"token": "question",
"title": title,
"backgroundImage": {
"contentDescription": "Question",
"sources": [
"url": url
"textContent": {
"primaryText": {
"type": "RichText",
"text": primary_text
"secondaryText": {
"type": "RichText",
"text": secondary_text
"tertiaryText": {
"type": "RichText",
"text": tertiary_text
def build_list_item(url='', primary_text='', secondary_text='', tertiary_text=''):
return {
"token": "question_item",
"image": {
"sources": [
"url": url
"contentDescription": "Question Image"
"textContent": {
"primaryText": {
"type": "RichText",
"text": primary_text
"secondaryText": {
"text": secondary_text,
"type": "PlainText"
"tertiaryText": {
"text": tertiary_text,
"type": "PlainText"
def build_display_response_list_template2(title='', image_urls=[], answers=[]):
list_items = []
for image, answer in zip(image_urls, answers):
list_items.append(build_list_item(url=image, primary_text=answer))
return [{
"type": "Display.RenderTemplate",
"template": {
"type": "ListTemplate2",
"token": "question",
"title": title,
"backgroundImage": {
"contentDescription": "Question Background",
"sources": [
"url": ""
"listItems": list_items
def build_audio_response(url): # TODO add a display repsonse here as well
""" Builds audio response. I.e. plays back an audio file with zero offset """
return [{
"type": "AudioPlayer.Play",
"playBehavior": "REPLACE_ALL",
"audioItem": {
"stream": {
"token": "audio_clip",
"url": url,
"offsetInMilliseconds": 0
def build_speechlet_response(title, output, reprompt_text, should_end_session, directive=None):
""" Builds speechlet response and puts display response inside """
return {
'outputSpeech': {
'type': 'PlainText',
'text': output
'card': {
'type': 'Simple',
'title': title,
'content': output
'reprompt': {
'outputSpeech': {
'type': 'PlainText',
'text': reprompt_text
'shouldEndSession': should_end_session,
'directives': directive
def build_response(session_attributes, speechlet_response):
""" Builds the complete response to send back to Alexa """
return {
'version': '1.0',
'sessionAttributes': session_attributes,
'response': speechlet_response
UPDATE 3: I updated the intents so there is now one custom intent that takes a custom slot, and then I have another custom intent that takes no slots. These custom intents also have there own sample utterances. Both the intents and their utterances are listed below. When I start the skill, it works fine. Then when I say/type "zoo zoo zoo" to test bad input, I get an error. Both the request for "zoo zoo zoo" and the response are listed below. I am looking for a good way to catch this bad input error and resume/revert the skill back to its previous state.
"intent": "TestAudioIntent"
"slots": [
"name": "Answer",
"type": "LETTER"
"intent": "AnswerIntent"
Sample Utterances:
AnswerIntent {Answer}
AnswerIntent I think it is {Answer}
TestAudioIntent test the audio
Example JSON request:
"session": {
"new": false,
"sessionId": "SessionId.574f0b74-be17-4f79-bbd6-ce926a1bf856",
"application": {
"applicationId": "XXXXXXXX"
"attributes": {
"quid": "7fa9fcbf-35db-4bbd-ac73-37977bcef563",
"question_start_time": 1515691612.7381804,
"uuid": "df876c9d-cd41-4b9f-a3b9-3ccd1b441f24"
"user": {
"userId": "XXXXXXXX"
"request": {
"type": "IntentRequest",
"requestId": "EdwRequestId.23765cb0-f327-4f52-a9a3-b9f92a375a5f",
"intent": {
"name": "TestAudioIntent",
"slots": {}
"locale": "en-US",
"timestamp": "2018-01-11T17:26:57Z"
"context": {
"AudioPlayer": {
"playerActivity": "IDLE"
"System": {
"application": {
"applicationId": "XXXXXXXX"
"user": {
"userId": "XXXXXXXX"
"device": {
"supportedInterfaces": {
"Display": {
"templateVersion": "1",
"markupVersion": "1"
"version": "1.0"
And I get the following testing error as a response:
The remote endpoint could not be called, or the response it returned was invalid.
What I ended up doing is using something similar to Amazon's dialogue management system. If a user says something that doesn't fill a slot, I re-prompt them with that question. My goal is to record a user's statements/answers after each time they speak, thus I didn't use the built-in dialogue management. Additionally, I used Amazon's slot synonyms for all my slots to make my modal more robust.
I still don't know that this is the best way, but it is a starting point and seems to work O.K....

Properly formatting http.client.HTTPSConnection in Python

Overall, I'm trying to invoke the MS Cognitive key phrases API from Python 3.5.1 :: Anaconda 4.0.0 (32-bit). I looked everywhere and tried to incorporate this stackoverflow response.
To call the API your account key below marked as ## needs to be added from here,
however to format the body correctly you probably don't need the account key. A good portion of the code below is from sample code.
Request body should look like
body = {
"documents": [
"language": "en",
"id": "1",
"text": "One line of text."
"language": "en",
"id": "2",
"text": "another line of text."
my code <it now works!!>
import sys
import os.path
import http.client
import urllib.request
import urllib.parse
import urllib.error
import base64
import json
subscription_key = '##'
headers = {
'Content-Type': 'application/json',
'Ocp-Apim-Subscription-Key': subscription_key
#input text is: ID | text to analyze. How my input file is formatted.
input_text = ["100|One line of text.", "101|another line of text."]
# Inputs holds the params to call the web service in bulk.
body = []
indx = 1
for line in input_text:
input_text = line.split("|")
print ('-----\n')
print ("Input text is:", input_text)
input_text_analyze = input_text[1]
print ('\nInput text to be analyzed:', input_text_analyze)
body.append({ "language" : "en", "id" : str(indx), "text" : input_text_analyze })
indx = indx + 1
print ('-----\n')
print ('\nBody has', body)
print ("Calling API to get keywords...")
body_documents = { 'documents': body }
print ("\nParams:", body_documents)
params = urllib.parse.urlencode({ })
conn = http.client.HTTPSConnection('')
conn.request("POST", "/text/analytics/v2.0/keyPhrases?%s" % params, str(body_documents), headers)
response = conn.getresponse()
keyword_obj =
print("Returned keyword_obj is: ", keyword_obj)
except Exception as e:
print("[Errno {0}] {1}".format(e.errno, e.strerror))
I made 2 changed to the code above that allow it to work. 1) I was getting my params and body mixed up. 2) I needed to add str(body_documents) in my post. Both beginner mistakes.

YouTube API video upload error: parseError /w python

The example code for Google's YouTube Data API is a piece of junk. It's so complicated and tied to the oauth redirect flow that I can't use it. Trying to go raw with requests pip and not getting too far.
I've followed the instructions exactly (as far as I can tell), with the following code:
import json
import os
import sys
import urllib
import requests
payload_file = None
payload = None
print 'Loading Config'
# Get the directory path of this file. When using any relative file paths make
# sure they are relative to current_dir so that the script can be run from any CWD.
current_dir = os.path.dirname(os.path.abspath(__file__))
# Reads in the config.json file then parses it
config = json.loads(open(os.path.join(current_dir, '..', 'config.json')).read())
print 'Parsing Payload'
for i in range(len(sys.argv)):
if sys.argv[i] == "--json" and (i + 1) < len(sys.argv):
payload = json.loads(sys.argv[i + 1])
elif sys.argv[i] == "-payload" and (i + 1) < len(sys.argv):
payload_file = sys.argv[i + 1]
with open(payload_file,'r') as f:
payload = json.loads(
print 'Configuring youtube with token {0}'.format(payload['token'])
print 'Downloading video...'
# See how big it is
f = urllib.urlopen(payload['url'])
content_length = int(f.headers["Content-Length"])
# Download it
# urllib.urlretrieve(payload['url'], "video.mp4")
metadata = {
'snippet' : {
'title': payload['title'],
"categoryId": 22
'status' : {
"privacyStatus": "public",
"embeddable": True,
"license": "youtube"
if 'tags' in payload:
metadata['snippet']['tags'] = payload['tags']
if 'description' in payload:
metadata['snippet']['description'] = payload['description']
headers = {
'Authorization' : 'Bearer {0}'.format(payload['token']),
'Content-Type' : 'application/json; charset=UTF-8',
'Content-Length' : json.dumps(metadata).__len__(),
'X-Upload-Content-Length' : content_length,
'X-Upload-Content-Type' : 'video/*',
print 'Attempting to upload video'
print headers
# upload video file
r =',status', data=metadata, headers=headers);
print "RESPONSE!"
print r.text
# files = {
# 'file': video_file,
# }
# r ='', data={ "video" : video }, headers=headers);
Obviously its not finished, but its dying on the metadata upload request with the following output:
Loading Config
Parsing Payload
Configuring youtube with token <access-token>
Downloading video...
Attempting to upload video
{'X-Upload-Content-Length': 51998563, 'Content-Length': 578, 'Content-Type': 'application/json; charset=UTF-8', 'X-Upload-Content-Type': 'video/*', 'Authorization': 'Bearer <access-token>'}
"error": {
"errors": [
"domain": "global",
"reason": "parseError",
"message": "Parse Error"
"code": 400,
"message": "Parse Error"
This error is not even listed in their "Errors" docs.
What is wrong with my code?
Here is an example in python that works. It assumes you've already done the oauth part though.
import requests
from os import fstat
import json
fi = open('myvideo.mp4')
base_headers = {
'Authorization': '%s %s' % (auth_data['token_type'],
'content-type': 'application/json'
initial_headers = base_headers.copy()
'x-upload-content-length': fstat(fi.fileno()).st_size,
'x-upload-content-type': 'video/mp4'
initial_resp =
'snippet': {
'title': 'my title',
'status': {
'privacyStatus': 'unlisted',
'embeddable': True
upload_url = initial_resp.headers['location']
resp = requests.put(
the above is graet, just adding: you can also get the youtube id from the response (for future use):
cont = json.loads(resp.content)
youtube_id = cont['id']
