Related
I'm looking to visit each URL and return every player image found within the HREF tags, meaning - visit URL, click each player, store profile image link.
I had the right result printing with the code below, but it was pushing the data one by one & ultimately hitting a 429 G Spread quota issue.
My full code is here:
import requests
from bs4 import BeautifulSoup
import gspread
gc = gspread.service_account(filename='creds.json')
sh = gc.open_by_key('1TD4YmhfAsnSL_Fwo1lckEbnUVBQB6VyKC05ieJ7PKCw')
worksheet = sh.get_worksheet(0)
# def get_links(url):
# data = []
# req_url = requests.get(url)
# soup = BeautifulSoup(req_url.content, "html.parser")
# for td in soup.select('td:has(>a[href^="/player"])'):
# a_tag = td.a
# name = a_tag.text
# player_url = a_tag['href']
# print(f"Getting {name}")
# req_player_url = requests.get(
# f"https://basketball.realgm.com{player_url}")
# soup_player = BeautifulSoup(req_player_url.content, "html.parser")
# print(f"soup_player for {name}: {soup_player}")
# div_profile_box = soup_player.find('div', {'class': 'profile-box'})
# img_tags = div_profile_box.find_all('img')
# for i, img_tag in enumerate(img_tags):
# image_url = img_tag['src']
# row = {"Name": name, "URL": player_url,
# f"Image URL {i}": image_url}
# data.append(row)
# return data
def get_links2(url):
data = []
req_url = requests.get(url)
soup = BeautifulSoup(req_url.content, "html.parser")
for td in soup.select('td.nowrap'):
a_tag = td.a
if a_tag:
name = a_tag.text
player_url = a_tag['href']
pos = td.find_next_sibling('td').text
print(f"Getting {name}")
req_player_url = requests.get(
f"https://basketball.realgm.com{player_url}")
soup_player = BeautifulSoup(req_player_url.content, "html.parser")
div_profile_box = soup_player.find("div", class_="profile-box")
row = {"Name": name, "URL": player_url, "pos_option1": pos}
row['pos_option2'] = div_profile_box.h2.span.text if div_profile_box.h2.span else None
for p in div_profile_box.find_all("p"):
try:
key, value = p.get_text(strip=True).split(':', 1)
row[key.strip()] = value.strip()
except: # not all entries have values
pass
# Add img tags to row dictionary
img_tags = div_profile_box.find_all('img')
for i, img in enumerate(img_tags):
row[f'img_{i+1}'] = img['src']
data.append(row)
return data
urls = ["https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/player/All/desc",
"https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/2", "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/3",
"https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/4"]
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/5",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/6",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/7",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/8",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/9",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/10",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/11",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/12",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/13",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/14",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/15",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/16",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/17",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/18",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/19",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/20",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/21",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/22",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/23",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/24",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/25",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/26",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/27",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/28",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/29",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/30",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/31",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/32",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/33",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/34",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/35",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/36",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/37",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/38",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/39",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/40",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/41",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/42",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/43",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/44",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/45",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/46",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/47",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/48",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/49",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/50",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/51",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/52",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/53",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/54",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/55",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/56",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/57",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/58",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/59",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/60",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/61",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/62",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/63",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/64",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/65",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/66",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/67",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/68",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/69",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/70",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/71",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/72",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/73",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/74",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/75",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/76",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/77",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/78",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/79",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/80",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/81",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/82",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/83",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/84",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/85",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/86",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/87",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/88",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/89",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/90",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/91",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/92",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/93",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/94",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/95",
# "https://basketball.realgm.com/international/stats/2023/Averages/Qualified/All/minutes/All/desc/96"]
for url in urls:
data = get_links2(url)
for row in data:
worksheet.insert_row(list(row.values()))
I tried to switch to "append_rows" instead of "insert_row" in my final statement. This created a very confusing error:
Traceback (most recent call last):
File "c:\Users\AMadle\GLeague Tracking\(A) INTLimgScrape.py", line 175, in <module>
worksheet.append_rows(list(row.values()))
File "C:\Python\python3.10.5\lib\site-packages\gspread\worksheet.py", line 1338, in append_rows
return self.spreadsheet.values_append(range_label, params, body)
File "C:\Python\python3.10.5\lib\site-packages\gspread\spreadsheet.py", line 149, in values_append
r = self.client.request("post", url, params=params, json=body)
File "C:\Python\python3.10.5\lib\site-packages\gspread\client.py", line 86, in request
raise APIError(response)
gspread.exceptions.APIError: {'code': 400, 'message': 'Invalid value at \'data.values[0]\' (type.googleapis.com/google.protobuf.ListValue), "Jaroslaw Zyskowski"\nInvalid value at \'data.values[1]\' (type.googleapis.com/google.protobuf.ListValue), "/player/Jaroslaw-Zyskowski/Summary/32427"\nInvalid value at \'data.values[2]\' (type.googleapis.com/google.protobuf.ListValue), "TRE"\nInvalid value at \'data.values[3]\' (type.googleapis.com/google.protobuf.ListValue), "SF"\nInvalid value at \'data.values[4]\' (type.googleapis.com/google.protobuf.ListValue), "Trefl Sopot"\nInvalid value at \'data.values[5]\' (type.googleapis.com/google.protobuf.ListValue), "Jul 16, 1992(30 years old)"\nInvalid value at \'data.values[6]\' (type.googleapis.com/google.protobuf.ListValue), "Wroclaw, Poland"\nInvalid value at \'data.values[7]\' (type.googleapis.com/google.protobuf.ListValue), "Poland"\nInvalid value at \'data.values[8]\' (type.googleapis.com/google.protobuf.ListValue), "6-7 (201cm)Weight:220 (100kg)"\nInvalid value at \'data.values[9]\' (type.googleapis.com/google.protobuf.ListValue), "Unrestricted Free Agent"\nInvalid value at \'data.values[10]\' (type.googleapis.com/google.protobuf.ListValue), "Manuel Capicchioni"\nInvalid value at \'data.values[11]\' (type.googleapis.com/google.protobuf.ListValue), "2014 NBA Draft"\nInvalid value at \'data.values[12]\' (type.googleapis.com/google.protobuf.ListValue), "Undrafted"\nInvalid value at \'data.values[13]\' (type.googleapis.com/google.protobuf.ListValue), "Kotwica Kolobrzeg (Poland)"\nInvalid value at \'data.values[14]\' (type.googleapis.com/google.protobuf.ListValue), "/images/nba/4.2/profiles/photos/2006/player_photo.jpg"\nInvalid value at \'data.values[15]\' (type.googleapis.com/google.protobuf.ListValue), "/images/basketball/5.0/team_logos/international/polish/trefl.png"', 'status': 'INVALID_ARGUMENT', 'details': [{'#type': 'type.googleapis.com/google.rpc.BadRequest', 'fieldViolations': [{'field': 'data.values[0]', 'description': 'Invalid value at \'data.values[0]\' (type.googleapis.com/google.protobuf.ListValue), "Jaroslaw Zyskowski"'}, {'field': 'data.values[1]', 'description': 'Invalid value at \'data.values[1]\' (type.googleapis.com/google.protobuf.ListValue), "/player/Jaroslaw-Zyskowski/Summary/32427"'}, {'field': 'data.values[2]', 'description': 'Invalid value at \'data.values[2]\' (type.googleapis.com/google.protobuf.ListValue), "TRE"'}, {'field': 'data.values[3]', 'description': 'Invalid value at \'data.values[3]\' (type.googleapis.com/google.protobuf.ListValue), "SF"'}, {'field': 'data.values[4]', 'description': 'Invalid value at \'data.values[4]\' (type.googleapis.com/google.protobuf.ListValue), "Trefl Sopot"'}, {'field': 'data.values[5]', 'description': 'Invalid value at \'data.values[5]\' (type.googleapis.com/google.protobuf.ListValue), "Jul 16, 1992(30 years old)"'}, {'field': 'data.values[6]', 'description': 'Invalid value at \'data.values[6]\' (type.googleapis.com/google.protobuf.ListValue), "Wroclaw, Poland"'}, {'field': 'data.values[7]', 'description': 'Invalid value at \'data.values[7]\' (type.googleapis.com/google.protobuf.ListValue), "Poland"'}, {'field': 'data.values[8]', 'description': 'Invalid value at \'data.values[8]\' (type.googleapis.com/google.protobuf.ListValue), "6-7 (201cm)Weight:220 (100kg)"'}, {'field': 'data.values[9]', 'description': 'Invalid value at \'data.values[9]\' (type.googleapis.com/google.protobuf.ListValue), "Unrestricted Free Agent"'}, {'field': 'data.values[10]', 'description': 'Invalid value at \'data.values[10]\' (type.googleapis.com/google.protobuf.ListValue), "Manuel Capicchioni"'}, {'field': 'data.values[11]', 'description': 'Invalid value at \'data.values[11]\' (type.googleapis.com/google.protobuf.ListValue), "2014 NBA Draft"'}, {'field': 'data.values[12]', 'description': 'Invalid value at \'data.values[12]\' (type.googleapis.com/google.protobuf.ListValue), "Undrafted"'}, {'field': 'data.values[13]', 'description': 'Invalid value at \'data.values[13]\' (type.googleapis.com/google.protobuf.ListValue), "Kotwica Kolobrzeg (Poland)"'}, {'field': 'data.values[14]', 'description': 'Invalid value at \'data.values[14]\' (type.googleapis.com/google.protobuf.ListValue), "/images/nba/4.2/profiles/photos/2006/player_photo.jpg"'}, {'field': 'data.values[15]', 'description': 'Invalid value at \'data.values[15]\'
(type.googleapis.com/google.protobuf.ListValue), "/images/basketball/5.0/team_logos/international/polish/trefl.png"'}]}]}
PS C:\Users\AMadle\GLeague Tracking>
Any thoughts as to how I could push this output to my Google Sheet in one move, rather than inserting rows each time?
In your showing script, worksheet.insert_row(list(row.values())) is used in a loop. I thought that this might be the reason for your current issue. In this case, how about the following modification?
From:
for url in urls:
data = get_links2(url)
for row in data:
worksheet.insert_row(list(row.values()))
To:
values = []
for url in urls:
values = [*values, *get_links2(url)]
if values != []:
header = list(values[0].keys())
values = [header, *[[e[k] if e.get(k) else "" for k in header] for e in values]]
worksheet.append_rows(values, value_input_option="USER_ENTERED")
By this modification, after all values were retrieved in for url in urls:, the values are put into the Spreadsheet. This flow can be achieved by one API call. I thought that by this, your current issue might be able to be removed.
If you don't want to include the header row, please modify [header, *[[e[k] if e.get(k) else "" for k in header] for e in values]] to [[e[k] if e.get(k) else "" for k in header] for e in values].
If you want to put the value of every URL, how about the following modification? But, in this case, the Sheets API for the number of URLs is used.
From
for url in urls:
data = get_links2(url)
for row in data:
worksheet.insert_row(list(row.values()))
To
header = None
for url in urls:
values = get_links2(url)
if values != []:
if not header:
header = list(values[0].keys())
values = [[e[k] if e.get(k) else "" for k in header] for e in values]
worksheet.append_rows(values, value_input_option="USER_ENTERED")
So I have a data set with user, date, and post columns. I'm trying to generate a column of the calories that foods contain in the post column for each user. This dataset has a length of 21, and the code below finds the food words, get their calorie value, append it to that user's respective calorie list, and append that list to the new column. The new generated column, however, somehow has a length of 25:
Current data: 21
New column: 25
Does anybody know why this occurs? Here is the code below and samples of what the original dataset and the new column look like:
while len(col) < len(data['post']):
for post, api_id, api_key in zip(data['post'], ids_keys.keys(), ids_keys.values()): # cycles through text data & api keys
headers = {
'Content-Type': 'application/x-www-form-urlencoded',
'x-app-id': api_id,
'x-app-key': api_key,
'x-remote-user-id': '0'
}
calories = []
print('Current data:', len(data['post']), '\n New column: ', len(col)) # prints length of post vs new cal column
for word in eval(post):
if word not in food:
continue
else:
print('Detected Word: ', word)
query = {'query': '{}'.format(word)}
try:
response = requests.request("POST", url, headers=headers, data=query)
except KeyError as ke:
print(ke, 'Out of calls, next key...')
ids_keys.pop(api_id) # drop current api id & key from dict if out of calls
print('API keys left:', len(ids_keys))
finally:
stats = response.json()
print('Food Stats: \n', stats)
print('Calories in food: ', stats['foods'][0]['nf_calories'])
calories.append(stats['foods'][0]['nf_calories'])
print('Current Key', api_id, ':', api_key)
col.append(calories)
if len(col) == len(data['post']):
break
I attempted to use the while loop to only append up to the length of the dataset, but to no avail.
Original Data Set:
pd.DataFrame({'user':['avskk', 'janejellyn', 'firlena227','...'],
'date': ['October 22', 'October 22', 'October 22','...'],
'post': [['autumn', 'fully', 'arrived', 'cooking', 'breakfast', 'toaster','...'],
['breakfast', 'chinese', 'sticky', 'rice', 'tempeh', 'sausage', 'cucumber', 'salad', 'lunch', 'going', 'lunch', 'coworkers', 'probably', 'black', 'bean', 'burger'],
['potato', 'bean', 'inspiring', 'food', 'day', 'today', '...']]
})
New Column:
pd.DataFrame({'Calories': [[22,33,45,32,2,5,7,9,76],
[43,78,54,97,32,56,97],
[23,55,32,22,7,99,66,98,54,35,33]]
})
I want to connect separated messages of a chat, so I created a list for all the dictionaries
messages = ["Hello", "How you doing","fine","how can I help you", "how to do this?", "like this", "thanks","man","no problem"]
Person1= [True,True,False,False,True,False,True,True,False]
data =[]
chat_messages = messages
people = Person1
k = 0
for i in range(len(messages)):
if people[i] == people[i+1]:
chat_messages[i+1] = chat_messages[i] +" " +chat_messages[i+1]
chatData = {'text': chat_messages[i+1], 'person1': people[i]}
data[k] = chatData
else:
k +=1
chatData = {'text': chat_messages[i+1], 'person1': people[i+1]}
print(chatData )
data[k] = chatData
print(data)
I'm getting errors in here
File "main.py", line 20, in <module>
data[k] = chatData
IndexError: list assignment index out of range
How can I fix it please?
I want the output of data to look like this:
data = [{'text': 'Hello How you doing', 'person1': True} , {'text': 'fine how can I help you', 'person1': False}, {'text': 'how to do this?', 'person1': True}]
You cant add elements to a list in python this way. you have to use python method append().
data.append(chatData)
This method will add elements at the end of the list.
You can learn more python list methods using this link
https://www.geeksforgeeks.org/list-methods-python/
The problem is that when you add the index i + 1, it gives an error when you get to the nr 9, because your list index stops at 8. Here is my solution:
messages = ["Hello", "How you doing","fine","how can I help you", "how to do this?", "like this", "thanks","man","no problem"]
Person1= [True,True,False,False,True,False,True,True,False]
data =[]
chat_messages = messages
people = Person1
k = 0
data = []
for i, msg in enumerate(messages):
try:
if people[i] == people[i+1]:
chat_messages[i+1] = chat_messages[i] +" " +chat_messages[i+1]
data.append({'text': chat_messages[i+1], 'person1': people[i]})
except:
pass
print(data)
messages = ["Hello", "How you doing","fine","how can I help you", "how to do this?", "like this", "thanks","man","no problem"]
Person1= [True,True,False,False,True,False,True,True,False]
data =[]
chat_messages = messages
people = Person1
k = 0
for i in range(len(messages)):
if len(messages)-1 is i:
None
else:
if people[i] == people[i+1]:
chat_messages[i+1] = chat_messages[i] +" " +chat_messages[i+1]
chatData = {'text': chat_messages[i+1], 'person1': people[i]}
data.append(chatData)
else:
chatData = {'text': chat_messages[i+1], 'person1': people[i+1]}
print(chatData )
data.append(chatData)
print(data)
I have a function named "search_suggestion" that takes search parameter and pass into MySQL then a result is appended into an empty list "Suggestion" inside a function below
def search_suggestion(self,search,limit=25):
"""This method takes the parameter search return the search suggestion of employees in database"""
cursor = None
suggestions = []
try:
cursor = kasaa()
cursor.execute(
'''
SELECT ospos_people.first_name,ospos_people.last_name
FROM ospos_employees
INNER JOIN ospos_people ON ospos_employees.person_id = ospos_people.person_id
WHERE ospos_employees.deleted = 0 AND ospos_people.first_name LIKE %s OR ospos_people.last_name LIKE %s
OR ospos_people.phone_number LIKE %s OR ospos_people.email LIKE %s
ORDER BY ospos_people.first_name ASC LIMIT %s
''',(search,search,search,search,limit)
)
row = cursor.fetchall()
for ro in row:
suggestions.append(ro["first_name"]+ " " + ro["last_name"])
print(suggestions)
except Exception as e:
print(e)
finally:
cursor.close()
what am expecting is a list like ['alkhadil Issa', 'john Magufuli'] a one single list
instead am getting two list.
[alkhadil Issa']
['alkhadil Issa' 'john Magufuli']
I have try to check if len(suggestions) < 1: before append ro["first_name"] but am not getting what i want. What is the most efficient way of doing this, any patient you can afford on my learning journey i would appreciate
I replicated your problem by manually creating an output similar to what cursor.fetchall() returns according to you.
>>> dic1 = {'first_name': 'Abdallah', 'last_name': 'Abdillah'}
>>> dic2 = {'first_name': 'Joseph', 'last_name': 'Magufuli'}
>>> row = [dic1, dic2]
>>> row
[{'first_name': 'Abdallah', 'last_name': 'Abdillah'}, {'first_name': 'Joseph', 'last_name': 'Magufuli'}]
Assuming cursor.fetchall() returns something similar to the list above your code should work fine:
>>> suggestions = []
>>> for r in row:
... suggestions.append(r['first_name'] + " " + r['last_name'])
... print(suggestions)
...
['Abdallah Abdillah']
['Abdallah Abdillah', 'Joseph Magufuli']
If that is not the case, then your problem is your cursor.fetchall() result.
Edit:
I just realized your problem is getting 2 lists. Please be aware that your print statement is inside the for loop, so each time a value is added to the list, the resulting list is printed. If you only want to print the list in the end, just add the print statement after the loop ends:
So, instead of:
>>> for dic in row:
... suggestions.append(dic['first_name'] + " " + dic['last_name'])
... print(suggestions)
...
['Abdallah Abdillah']
['Abdallah Abdillah', 'Joseph Magufuli']
Place the print outside of the loop:
>>> for r in row:
... suggestions.append(r['first_name'] + " " + r['last_name'])
...
>>> print(suggestions)
['Abdallah Abdillah', 'Joseph Magufuli']
I have below List output from the a code which i'm working in python where i'm specifically looking for memberUid string and want every names after that to be printed into new line:
like:
anshulm
jiafan
and while prnting these names as soon it will get 'cn' just stop the print.
[[('cn=oracle,ou=Group,ou=corp,ou=services,o=kk.com', {'description': ['oracle group'], 'businessCategory': ['Private'], 'objectClass': ['top', 'groupOfUniqueNames', 'posixGroup'], 'memberUid': ['anshulm', 'jiafan', 'manasij', 'asbisht', 'karnika', 'junle', 'amitsh', 'fuwei', 'dewansh', 'gouravr', 'harshitb', 'tandel', 'matte', 'izamir', 'elie', 'emiliano', 'mateuszw', 'theo', 'mahdi', 'hassan', 'gshruti', 'makhiles', 'prabhaka', 'shgarg', 'ritolia', 'wadhwani', 'steev', 'rtlsbld', 'nikhilb', 'fwang', 'ankitb', 'rtls', 'amitb', 'agautam', 'pratyush', 'hywang', 'dsouder', 'foutz', 'parimi', 'pradeepn', 'patrickg', 'pkunwar', 'tejinder', 'ramteke', 'jangra', 'kush', 'kundan', 'mohang', 'xiang', 'xinjia', 'anantv', 'christos', 'achugh', 'kbhatt', 'jroy', 'kusantos', 'kamleshm', 'iraa', 'indrajit'], 'gidNumber': ['9393'], 'owner': ['varshney'], 'cn': ['oracle']})]]
Below is my code which is yielding the above output:
import ldap
## first you must open a connection to the server
try:
l = ldap.initialize("ldap://ldapserver:389")
l.protocol_version = ldap.VERSION3
except ldap.LDAPError, e:
print e
baseDN = "ou=group,ou=corp,ou=services,o=kk.com"
searchScope = ldap.SCOPE_SUBTREE
retrieveAttributes = None
searchFilter = raw_input("Enter the Group Name: ")
try:
ldap_result_id = l.search(baseDN, searchScope, searchFilter, retrieveAttributes)
result_set = []
while 1:
result_type, result_data = l.result(ldap_result_id, 0)
if (result_data == []):
break
else:
if result_type == ldap.RES_SEARCH_ENTRY:
result_set.append(result_data)
print result_set
except ldap.LDAPError, e:
print e
You should extract desired part from result_set, for example:
result_set[0][0][1]['memberUid']
and print it with any manner you like:
from pprint import pprint
pprint(result_set[0][0][1]['memberUid'])
or
print('\n'.join(name for name in result_set[0][0][1]['memberUid']))