Cant access function in OOP python - python

For some reason, in my fruit scraper, i cannot access anything from listify function.
I'am getting an error, for exmaple: NameError: name 'family' is not defined.
And i cant figure out what is wrong with my code - is my function is bad, or i'am doing something wrong with class ?
import requests
import json
import random
import pickle
class FruitScraper():
def __init__(self):
self.name = []
self.id = []
self.family = []
self.genus = []
self.order = []
self.carbohydrates = []
self.protein = []
self.fat = []
self.calories = []
self.sugar = []
def scrape_all_fruits(self):
data_list = []
try:
for ID in range(1, 10):
url = f'https://www.fruityvice.com/api/fruit/{ID}'
response = requests.get(url)
data = response.json()
data_list.append(data)
except:
pass
return data_list
def listify(self, stats):
alist = json.dumps(self.scrape_all_fruits())
jsonSTr = json.loads(alist)
for i in jsonSTr:
try:
self.name.append(i['name'])
self.id.append(i['id'])
self.family.append(i['family'])
self.genus.append(i['genus'])
self.order.append(i['order'])
self.carbohydrates.append(i['nutritions']['carbohydrates'])
self.protein.append(i['nutritions']['protein'])
self.fat.append(i['nutritions']['fat'])
self.calories.append(i['nutritions']['calories'])
self.sugar.append(i['nutritions']['sugar'])
except:
pass
return stats
def get_summary(self):
for i in self.listify(zip(self.fat, self.protein, self.calories, self.sugar, self.carbohydrates, self.name)):
nutr_stats = f'\nNutrients maximum statistics:\nFat: {max(self.fat)}\nProtein: {max(self.protein)}\nCarbohydrates: {max(self.carbohydrates)}\nCalories: {max(self.calories)}\nSugar: {max(self.sugar)}' \
f'\nNutrients minimum statistics:\nFat: {min(self.fat)}\nProtein: {min(self.protein)}\nCarbohydrates: {min(self.carbohydrates)}\nCalories: {min(self.calories)}\nSugar: {min(self.sugar)}' \
f'\nTotal fruits scraped: {len(self.name)}'
return nutr_stats
Scraped_info = FruitScraper().scrape_all_fruits()
Listified_info = FruitScraper().listify(family)
Fruits_statistics = FruitScraper().get_summary()
It's my first time doing OOP.

Please consider changing this
Scraped_info = FruitScraper().scrape_all_fruits()
Listified_info = FruitScraper().listify(family)
Fruits_statistics = FruitScraper().get_summary()
to
myScraper = FruitScraper()
Scraped_info = myScraper.scrape_all_fruits()
myScraper.listify()
Fruits_statistics = myScraper.get_summary()
Otherwise you create three different objects of this class and discard them with all their attributes after running the individual method once.
This might also be critical to define family in this line of the code:
Listified_info = myScraper.listify(family)
But I can't see how you intended to use the parameter stats in your method listify(). It is just received and returned. I suggest that you change:
def listify(self, stats):
to
def listify(self):
and remove
return stats
If you want to get those lists inside the object of this class returned by listify(), you may do the following (but this is not OOP way of doing things):
import requests
import json
import copy
class FruitScraper():
def __init__(self):
self.name = []
self.id = []
self.family = []
self.genus = []
self.order = []
self.carbohydrates = []
self.protein = []
self.fat = []
self.calories = []
self.sugar = []
def collect_all_lists(self):
self.allLists = dict('name': self.name, 'id': self.id, 'family': self.family, 'genus': self.genus, 'order': self.order, 'carbohydrates': self.carbohydrates, 'protein': self.protein, 'fat': self.fat, 'calories': self.calories, 'sugar': self.sugar)
def scrape_all_fruits(self):
data_list = []
try:
for ID in range(1, 10):
url = f'https://www.fruityvice.com/api/fruit/{ID}'
response = requests.get(url)
data = response.json()
data_list.append(data)
except:
pass
return data_list
def listify(self):
alist = json.dumps(self.scrape_all_fruits())
jsonSTr = json.loads(alist)
for i in jsonSTr:
try:
self.name.append(i['name'])
self.id.append(i['id'])
self.family.append(i['family'])
self.genus.append(i['genus'])
self.order.append(i['order'])
self.carbohydrates.append(i['nutritions']['carbohydrates'])
self.protein.append(i['nutritions']['protein'])
self.fat.append(i['nutritions']['fat'])
self.calories.append(i['nutritions']['calories'])
self.sugar.append(i['nutritions']['sugar'])
except:
pass
self.collect_all_lists()
return copy.deepcopy(self.allLists)
def get_summary(self):
for i in self.listify(zip(self.fat, self.protein, self.calories, self.sugar, self.carbohydrates, self.name)):
nutr_stats = f'\nNutrients maximum statistics:\nFat: {max(self.fat)}\nProtein: {max(self.protein)}\nCarbohydrates: {max(self.carbohydrates)}\nCalories: {max(self.calories)}\nSugar: {max(self.sugar)}' \
f'\nNutrients minimum statistics:\nFat: {min(self.fat)}\nProtein: {min(self.protein)}\nCarbohydrates: {min(self.carbohydrates)}\nCalories: {min(self.calories)}\nSugar: {min(self.sugar)}' \
f'\nTotal fruits scraped: {len(self.name)}'
return nutr_stats
myScraper = FruitScraper()
Scraped_info = myScraper.scrape_all_fruits()
Listified_info = myScraper.listify()
Fruits_statistics = myScraper.get_summary()

Related

Scraped youtube comments amount and real amount are different

Im new to Python and Im trying to code a commentscraper for youtube with the most important informations, which I put in a JSON-file. But the my amount of comments and replys is not the same as on Youtube. I don't know, where my error is. I recognized, that it doesn't write any data in the files, if there are less than 20 comments, but I don't know, where I have to change something...
Example:
https://youtu.be/Re1m9O7q-9U here I get 102, but it should be 107
https://youtu.be/Q9Y5m1fQ7Fk here I get 423, but it should be 486
https://youtu.be/cMhE5BfmFkM here I get 1315, but it should be 2052
Here is the code:
class YT_Comments:
def __init__(self, api_key):
self.api_key = api_key
self.comment_int = 0
def get_video_comments(self, video_id, limit):
url = f"https://youtube.googleapis.com/youtube/v3/commentThreads?part=replies%2C%20snippet&order=relevance&videoId={video_id}&key={self.api_key}"
vid_comments = []
pc, npt = self._get_comments_per_page(url)
if limit is not None and isinstance(limit, int):
url += f"&maxResults={str(limit)}"
while (npt is not None):
nexturl = url + "&pageToken=" + npt
pc, npt = self._get_comments_per_page(nexturl)
vid_comments.append(pc)
print(self.comment_int)
print(len(vid_comments))
return vid_comments
def _get_comments_per_page(self, url):
json_url = requests.get(url)
data = json.loads(json_url.text)
page_comments = []
if "items" not in data:
return page_comments, None
item_data = data["items"]
nextPageToken = data.get("nextPageToken", None)
for item in tqdm.tqdm(item_data):
try:
kind = item["kind"]
if kind == "youtube#comment" or "youtube#commentThread":
comment_text = item["snippet"]["topLevelComment"]["snippet"]["textOriginal"]
comment_author = item["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"]
author_id = item["snippet"]["topLevelComment"]["snippet"]["authorChannelId"]["value"]
comment_like_count = item["snippet"]["topLevelComment"]["snippet"]["likeCount"]
comment_date = item["snippet"]["topLevelComment"]["snippet"]["publishedAt"]
comment = {"comment_text" : comment_text,
"comment_author" : comment_author,
"comment_author_id" : author_id,
"comment_like_count" : comment_like_count,
"comment_date" : comment_date}
replies_l = []
self.comment_int += 1
try:
replies = item["replies"]["comments"]
for reply in replies:
reply_txt = reply["snippet"]["textOriginal"]
reply_author = reply["snippet"]["authorDisplayName"]
reply_author_id = reply["snippet"]["authorChannelId"]["value"]
reply_like_count = reply["snippet"]["likeCount"]
reply_date = reply["snippet"]["publishedAt"]
reply_dict = {"text" : reply_txt,
"author" : reply_author,
"author_id" : reply_author_id,
"likes" : reply_like_count,
"date" : reply_date}
replies_l.append(reply_dict)
self.comment_int +=1
except KeyError:
replies_l.append(None)
comment_dict = {
"comment": comment,
"replies": replies_l,
}
page_comments.append(comment_dict)
except KeyError:
print("No Comments")
return page_comments, nextPageToken

can't adapt type 'data'

I have data class:
class data:
def __init__(self, ReadTime, Concentration_PM10, Concentration_SO2, Concentration_O3, Concentration_NO2, Concentration_CO, AQI_PM10,
AQI_SO2,AQI_O3, AQI_NO2, AQI_CO, AQI_AQIIndex, AQI_ContaminantParameter, AQI_State, AQI_Color ):
self.ReadTime = ReadTime
self.Concentration_PM10 = Concentration_PM10
self.Concentration_SO2 = Concentration_SO2
self.Concentration_O3 = Concentration_O3
self.Concentration_NO2 = Concentration_NO2
self.Concentration_CO = Concentration_CO
self.AQI_PM10 = AQI_PM10
self.AQI_SO2 = AQI_SO2
self.AQI_O3 = AQI_O3
self.AQI_NO2 = AQI_NO2
self.AQI_CO = AQI_CO
self.AQI_AQIIndex = AQI_AQIIndex
self.AQI_ContaminantParameter = AQI_ContaminantParameter
self.AQI_State = AQI_State
self.AQI_Color = AQI_Color
I'm sending a request to an api and populating the variables into a list.:
list = []
for i in result:
list.append( data(i['ReadTime'], i['Concentration']['PM10'], i['Concentration']['SO2'],i['Concentration']['O3'],
i['Concentration']['NO2'],i['Concentration']['CO'],i['AQI']['PM10'],
i['AQI']['SO2'],i['AQI']['O3'],i['AQI']['NO2'],i['AQI']['CO'],i['AQI']['AQIIndex'],i['AQI']['ContaminantParameter'],
i['AQI']['State'],i['AQI']['Color']))
then I want to insert this list into a table in PostgreSQL but I get error "can't adapt type 'data'"
list_record = ", ".join(["%s"] * len(list))
query_insert= (f"INSERT INTO hava_kalitesi (ReadTime, Concentration_PM10, Concentration_SO2, Concentration_O3, Concentration_NO2, Concentration_CO, AQI_PM10, AQI_SO2, AQI_O3, AQI_NO2, AQI_CO, AQI_AQIIndex, AQI_ContaminantParameter,AQI_State,AQI_Color) VALUES {list_record}"
)
cursor.execute(query_insert,list)

`FAISSDocumentStore` in `haystack` always returns empty results

I am new to haystack and I am using FAISSDocumentStore and EmbeddingRetriever to implement a QA system. This is my code:
from haystack.document_stores import InMemoryDocumentStore, FAISSDocumentStore
from haystack.nodes import TfidfRetriever, DensePassageRetriever, EmbeddingRetriever
from haystack.nodes import FARMReader, TransformersReader
from haystack.nodes import FARMReader
from haystack.pipelines import ExtractiveQAPipeline
import requests
import pandas as pd
class NeuralSearch:
def __init__(self):
self.HIDDEN_DIMS = 384
self.FAISS_INDEX = "Flat"
self.path = "https://raw.githubusercontent.com/deepset-ai/COVID-QA/master/data/faqs/faq_covidbert.csv"
#property
def document_store(self):
doc_store = FAISSDocumentStore(
embedding_dim=self.HIDDEN_DIMS, faiss_index_factory_str=self.FAISS_INDEX
)
return doc_store
#property
def retriever(self):
retriever = EmbeddingRetriever(
document_store=self.document_store,
embedding_model="sentence-transformers/all-MiniLM-L6-v2",
use_gpu=False,
)
return retriever
def load_data(self):
data = requests.get(self.path)
open("small_faq_covid.csv", "wb").write(data.content)
df = pd.read_csv("small_faq_covid.csv")
df.fillna(value="", inplace=True)
df["question"] = df["question"].apply(lambda x: x.strip())
questions = list(df["question"].values)
df["question_emb"] = self.retriever.embed_queries(texts=questions)
df = df.rename(columns={"question": "content"})
# # Convert Dataframe to list of dicts and index them in our DocumentStore
docs_to_index = df.to_dict(orient="records")
self.document_store.write_documents(docs_to_index)
self.document_store.update_embeddings(self.retriever)
self.document_store.save("testfile_path")
ss = FAISSDocumentStore.load(index_path="testfile_path")
# print(docs_to_index)
return docs_to_index, ss
#property
def reader(self):
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2")
return reader
#property
def pipeline(self):
pipe = ExtractiveQAPipeline(self.reader, self.retriever)
return pipe
def predict(self, query):
prediction = self.pipeline.run(
query=query,
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}},
)
return prediction
if __name__ == "__main__":
n = NeuralSearch()
q = "What is a novel coronavirus?"
print(n.predict(q))
my problem is that the FAISSDocumentStore always returns empty predictions as follows:
{'answers': [], 'documents': [], 'root_node': 'Query', 'params': {'Retriever': {'top_k': 10}, 'Reader': {'top_k': 5}}, 'query': 'What is a novel coronavirus?', 'node_id': 'Reader'}
How to fix it? and are there any tutorials on how to use FAISSDocumentStore?
Perhaps the problem is that you do not load the base itself here:
ss = FAISSDocumentStore.load(index_path="testfile_path")
Try adding a path to the configuration, it has a path to the base:
document_store = FAISSDocumentStore.load(index_path="file.faiss", config_path="file.json")

Shows a TypeError: unhashable type: 'list'

This is a K-Knights problem solved with CSP, and the issue seems to be the constraints.
Error coming from here
File "F:/Lectures/AI Lab/Codes/nonattacking/aicodes/csp.py", line 23, in init
self.setUpVariableDomains()
File "F:/Lectures/AI Lab/Codes/nonattacking/aicodes/csp.py", line 28, in setUpVariableDomains
self.addVariableDomain(var, self._domain)
File "F:/Lectures/AI Lab/Codes/nonattacking/aicodes/csp.py", line 35, in addVariableDomain
self._domainOfVariable[var] = copy.deepcopy(domain)
TypeError: unhashable type: 'list'
import variable
import copy
import notEqualConstraint
import simpleInference
import time
import backtrackingSearch
import consoleListener
import nonAttackingConstraint
class CSP():
def __init__(self, variables = [], domains = [], constraints = []):
self._variables = variables
self._domain = domains
self._constraints = constraints
self._domainOfVariable = {}
self._contraintsOfVariable = {}
self.setUpVariableDomains()
self.setUpConstraints()
def setUpVariableDomains(self):
for var in self._variables:
self.addVariableDomain(var, self._domain)
def setUpConstraints(self):
for constraint in self._constraints:
self.addConstraint(constraint)
def addVariableDomain(self,var,domain):
self._domainOfVariable[var] = copy.deepcopy(domain)
def addConstraint(self,constraint):
for var in constraint.getScope():
if var not in self._contraintsOfVariable:
self._contraintsOfVariable[var] = []
self._contraintsOfVariable[var].append(constraint)
def addSingleConstraint(self,constraint):
self._constraints.append(constraint)
for var in constraint.getScope():
if var not in self._contraintsOfVariable:
self._contraintsOfVariable[var] = []
self._contraintsOfVariable[var].append(constraint)
def addVariable(self,variable):
self._variables.append(variable)
self.addVariableDomain(variable,self._domain)
def getVariables(self):
return self._variables
def getDomainValues(self,var):
return self._domainOfVariable[var]
def getConstraints(self,var):
if var not in self._contraintsOfVariable:
return []
return self._contraintsOfVariable[var]
def getVariableDomains(self):
return self._domainOfVariable
def setVariableDomains(self,domainOfVariable):
self._domainOfVariable = domainOfVariable
def copy(self):
variables = copy.deepcopy(self._variables)
domains = copy.deepcopy(self._variables)
constraints = copy.deepcopy(self._variables)
csp = CSP(variables, domains, constraints)
return csp
def getNeighbour(self,variable,constraint):
neigh = []
for va in constraint.getScope():
if va != variable and (va not in neigh):
neigh.append(va)
return neigh
def removeValueFromDomain(self,variable,value):
values = []
for val in self.getDomainValues(variable):
if val != value:
values.append(val)
self._domainOfVariable[variable] = values
if __name__ == '__main__':
domains = ["K","A"]
variables = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
X=[[2,1],[2,-1],[-2,1],[-2,-1],[1,2],[1,-2],[-1,2],[-1,-2]]
constraints = [
nonAttackingConstraint.NotAttackingConstraint(X)
]
Csp = CSP(variables,domains,constraints)
inPro = simpleInference.SimpleInference()
bts = backtrackingSearch.BactrackingSearch(inPro,[consoleListener.ConsoleListener()],variableOrdering = True)
start = time.time()
result = bts.solve(Csp)
end = time.time()
print("%.2f ‐ %.2f" % (start,end))
Specifically stops at addVariableDomain in the CSP Class above, and gives me the unhashable type: 'list' error due to my constraint being a list. Is the issue identifiable? Is there a solution?
Lists can not be used as dictionary keys as they are mutable and hence not suitable for hashing. Try converting the list to tuple.
def addVariableDomain(self,var,domain):
self._domainOfVariable[tuple(var)] = copy.deepcopy(domain)
You may have to do the same wherever var is used as a dictionary key.

Instantiating a class for text analytics

I’ve found this code, a Python Class which takes a WhatsApp conversation text file processes and generates a Chat class which I can interact with. Things like generate charts, the response matrix etc.:
import re
import time
import pandas as pd
import dateutil
import matplotlib.pyplot as plt
class WppAnalyser:
def open_file(self):
x = open(self.filename,'r')
y = x.read()
content = y.splitlines()
return content
def ismessage(self,str):
patterns = {
"hor1":r'w{3}s{1}[0-9]{1,2},s{1}d{4},s{1}d{2}:d{2}',
"hor2":r'w{3}s{1}[0-9]{1,2},s{1}d{2}:d{2}',
"imp2":r'd{1,2}sw{3}sd{2}:d{2}',
"imp1":r'd{1,2}sw{3}sd{4}sd{2}:d{2}'
}
for key in patterns:
result = re.search(patterns[key], str)
if result and str.count(':') >=2:
name_start = str.find("-")+2
first_colon = str.find(":")
name_end = str.find(":", first_colon+1)
name=str[name_start:name_end]
message=str[name_end+1:]
return [name, message, result.group()]
return ["","",str]
def process(self,content):
j = 1
df = pd.DataFrame(index = range(1, len(content)+1), columns=[ 'Name', 'Message', 'date_string'])
for i in content:
results = self.ismessage(i)
if results[0] != "":
df.ix[j]=results
else:
df.ix[j]['Name']=df.ix[j-1]['Name']
df.ix[j]['date_string']=df.ix[j-1]['date_string']
df.ix[j]['Message']=results[2]
j = j+1
df['Time'] = df['date_string'].map(lambda x: dateutil.parser.parse(x))
df['Day'] = df['date_string'].map(lambda x: dateutil.parser.parse(x).strftime("%a"))
df['Date'] = df['date_string'].map(lambda x:dateutil.parser.parse(x).strftime("%x"))
df['Hour'] = df['date_string'].map(lambda x:dateutil.parser.parse(x).strftime("%H"))
How would I run these functions together, passing self in each function is confusing me. What would a main function looks like here?
I have to instantiate WppAnalyser class, right? So far, I tried this for the first method:
class Chat:
def __init__(self, x, y):
self.x = open("chatPPL.txt", "r")
self.y = y

Categories