When scraping website with requests I cant search specific things

When scraping website with requests I cant search specific things - python

import time
import requests
from bs4 import BeautifulSoup
ts = time.time()
friend_api_url = 'https://api.namemc.com/profile/' # + /friends
player = 'https://namemc.com/profile/surfboarding'
username_to_uuid = 'https://api.mojang.com/users/profiles/minecraft/' # + username + ?at=(timestamp)
def findFriendByUsername(player, target): #add a function to find a users friend my username (player) is the player you want to search the friends of
r = requests.get(username_to_uuid + player + '?at=' + str(ts)) #uses mojangs api scrapes website (there uuid is the "id" part) (ts is the timestamp)
uuid_get = r.json()
uuid = (uuid_get['id']) # gets uuid
friend_scrape = requests.get(friend_api_url + uuid + '/friends')
response = friend_scrape.json()
names = [] #all usernames (dont know how to explain it)
for names in response: #makes loop to print usernames
player_friends = print(names['name']) #prints username
#returns output of the friends usernames
if player_friends==(target):
print('The username ' + (target) + ' is in ' + player + ' friends list') #concatinates usernames into one string
Currently Im trying to scrape a websites api and I search everything with the name (name) which fetches the username for who im trying to search It brings many strings of characters and Im trying to make a program where I can search it so I try to use if player_friends==(target): But it seems like I never get a output saying that they found that username it seems like its just one big clump of letters, Is there anyway I can make this searchable (sorry if the formatting is bad im pretty knew to stackoverflow)

import mcuuidButWorks.api as mcuuid
import requests
def areFriends(player1: str, player2: str) -> bool:
friends: list = []
api: str = "https://api.namemc.com/profile/"
player = mcuuid.GetPlayerData(player1)
uuid = player.uuid
api = api + uuid + "/friends"
response = requests.get(api).json()
for player in response:
friends.append(player["name"])
return True if player2 in friends else False
With this, you could do something like:
if areFriends("player1", "player2"):
. . .
like you mentioned.

Related

Creating an API request with specific parameters

Currently I am using the following code to scrape https://www.nike.com/w/mens-shoes-nik1zy7ok for all shoes on the page:
import requests
import json
# I used a placeholder for the anchor parameter
uri = 'https://api.nike.com/cic/browse/v1?queryid=products&country=us&endpoint=product_feed/rollup_threads/v2?filter=marketplace(US)%26filter=language(en)%26filter=employeePrice(true)%26filter=attributeIds(0f64ecc7-d624-4e91-b171-b83a03dd8550%2C16633190-45e5-4830-a068-232ac7aea82c)%26anchor={}%26consumerChannelId=d9a5bc42-4b9c-4976-858a-f159cf99c647%26count=60'
# collect all products
store = []
with requests.Session() as session:
found_all_products = False
anchor = 0
while not found_all_products:
result = session.get(uri.format(anchor)).json()
products = result['data']['products']['products']
store += products
if len(products) < 60:
found_all_products = True
else:
anchor += 24
# filter by cloudProductId to get a dictionary with unique products
cloudProductIds = set()
unique_products = []
for product in store:
if not product['cloudProductId'] in cloudProductIds:
cloudProductIds.add(product['cloudProductId'])
unique_products.append(product)
How do I write this same api request to retrieve either the mens' shoes from this site or the womens' shoes on the womens shoes page: https://www.nike.com/w/womens-shoes-5e1x6zy7ok ? Which parameter do I need to change?

#Greg I ran your provided API link in Postman and getting different results for men and women. All I have changed in the query string parameters is UUIDs which is unique in both the cases for men it is uuids: 0f64ecc7-d624-4e91-b171-b83a03dd8550,16633190-45e5-4830-a068-232ac7aea82c and for women uuids: 16633190-45e5-4830-a068-232ac7aea82c,193af413-39b0-4d7e-ae34-558821381d3f,7baf216c-acc6-4452-9e07-39c2ca77ba32.
If you pass these 2 unique set of uuids in the query string then you will get men and women result separately as there is no other parameter which will define their identity.
Below is the code:
import json
import requests
#common query parameters
queryid = 'filteredProductsWithContext'
anonymousId = '25AFE5BE9BB9BC03DE89DBE170D80669'
language = 'en-GB'
country = 'IN'
channel = 'NIKE'
localizedRangeStr = '%7BlowestPrice%7D%E2%80%94%7BhighestPrice%7D'
#UUIDs
uuids_men = '0f64ecc7-d624-4e91-b171-b83a03dd8550,16633190-45e5-4830-a068-232ac7aea82c'
uuids_women = '16633190-45e5-4830-a068-232ac7aea82c,193af413-39b0-4d7e-ae34-558821381d3f,7baf216c-acc6-4452-9e07-39c2ca77ba32'
def get_men_result():
url = 'https://api.nike.com/cic/browse/v1?queryid=' + queryid + '&anonymousId=' + anonymousId + '&uuids=' + uuids_men + '&language=' + language + '&country=' + country + '&channel=' + channel + '&localizedRangeStr=' + localizedRangeStr
data = requests.get(url,verify = False).json()
print(data)
def get_women_result():
url = 'https://api.nike.com/cic/browse/v1?queryid=' + queryid + '&anonymousId=' + anonymousId + '&uuids=' + uuids_women + '&language=' + language + '&country=' + country + '&channel=' + channel + '&localizedRangeStr=' + localizedRangeStr
data = requests.get(url,verify = False).json()
print(data)
get_men_result()
print('-'*100)
get_women_result()
If you look at the query string which i have created for men and women you will notice that there are 6 common parameters and only uuid is unique. Also if you want you can change country, language etc for more data fetching. Please refer screenshots as well.
Men
Women

IndexError: list index out of range (on Reddit data crawler)

is expected the below is supposed to run without issues.
Solution to Reddit data:
import requests
import re
import praw
from datetime import date
import csv
import pandas as pd
import time
import sys
class Crawler(object):
'''
basic_url is the reddit site.
headers is for requests.get method
REX is to find submission ids.
'''
def __init__(self, subreddit="apple"):
'''
Initialize a Crawler object.
subreddit is the topic you want to parse. default is r"apple"
basic_url is the reddit site.
headers is for requests.get method
REX is to find submission ids.
submission_ids save all the ids of submission you will parse.
reddit is an object created using praw API. Please check it before you use.
'''
self.basic_url = "https://www.reddit.com"
self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'}
self.REX = re.compile(r"<div class=\" thing id-t3_[\w]+")
self.subreddit = subreddit
self.submission_ids = []
self.reddit = praw.Reddit(client_id="your_id", client_secret="your_secret", user_agent="subreddit_comments_crawler")
def get_submission_ids(self, pages=2):
'''
Collect all ids of submissions..
One page has 25 submissions.
page url: https://www.reddit.com/r/subreddit/?count25&after=t3_id
id(after) is the last submission from last page.
'''
# This is page url.
url = self.basic_url + "/r/" + self.subreddit
if pages <= 0:
return []
text = requests.get(url, headers=self.headers).text
ids = self.REX.findall(text)
ids = list(map(lambda x: x[-6:], ids))
if pages == 1:
self.submission_ids = ids
return ids
count = 0
after = ids[-1]
for i in range(1, pages):
count += 25
temp_url = self.basic_url + "/r/" + self.subreddit + "?count=" + str(count) + "&after=t3_" + ids[-1]
text = requests.get(temp_url, headers=self.headers).text
temp_list = self.REX.findall(text)
temp_list = list(map(lambda x: x[-6:], temp_list))
ids += temp_list
if count % 100 == 0:
time.sleep(60)
self.submission_ids = ids
return ids
def get_comments(self, submission):
'''
Submission is an object created using praw API.
'''
# Remove all "more comments".
submission.comments.replace_more(limit=None)
comments = []
for each in submission.comments.list():
try:
comments.append((each.id, each.link_id[3:], each.author.name, date.fromtimestamp(each.created_utc).isoformat(), each.score, each.body) )
except AttributeError as e: # Some comments are deleted, we cannot access them.
# print(each.link_id, e)
continue
return comments
def save_comments_submissions(self, pages):
'''
1. Save all the ids of submissions.
2. For each submission, save information of this submission. (submission_id, #comments, score, subreddit, date, title, body_text)
3. Save comments in this submission. (comment_id, submission_id, author, date, score, body_text)
4. Separately, save them to two csv file.
Note: You can link them with submission_id.
Warning: According to the rule of Reddit API, the get action should not be too frequent. Safely, use the defalut time span in this crawler.
'''
print("Start to collect all submission ids...")
self.get_submission_ids(pages)
print("Start to collect comments...This may cost a long time depending on # of pages.")
submission_url = self.basic_url + "/r/" + self.subreddit + "/comments/"
comments = []
submissions = []
count = 0
for idx in self.submission_ids:
temp_url = submission_url + idx
submission = self.reddit.submission(url=temp_url)
submissions.append((submission.name[3:], submission.num_comments, submission.score, submission.subreddit_name_prefixed, date.fromtimestamp(submission.created_utc).isoformat(), submission.title, submission.selftext))
temp_comments = self.get_comments(submission)
comments += temp_comments
count += 1
print(str(count) + " submissions have got...")
if count % 50 == 0:
time.sleep(60)
comments_fieldnames = ["comment_id", "submission_id", "author_name", "post_time", "comment_score", "text"]
df_comments = pd.DataFrame(comments, columns=comments_fieldnames)
df_comments.to_csv("comments.csv")
submissions_fieldnames = ["submission_id", "num_of_comments", "submission_score", "submission_subreddit", "post_date", "submission_title", "text"]
df_submission = pd.DataFrame(submissions, columns=submissions_fieldnames)
df_submission.to_csv("submissions.csv")
return df_comments
if __name__ == "__main__":
args = sys.argv[1:]
if len(args) != 2:
print("Wrong number of args...")
exit()
subreddit, pages = args
c = Crawler(subreddit)
c.save_comments_submissions(int(pages))
but I got:
(base) UserAir:scrape_reddit user$ python reddit_crawler.py apple 2
Start to collect all submission ids...
Traceback (most recent call last):
File "reddit_crawler.py", line 127, in
c.save_comments_submissions(int(pages))
File "reddit_crawler.py", line 94, in save_comments_submissions
self.get_submission_ids(pages)
File "reddit_crawler.py", line 54, in get_submission_ids
after = ids[-1]
IndexError: list index out of range

Erik's answer diagnoses the specific cause of this error, but more broadly I think this is caused by you not using PRAW to its fullest potential. Your script imports requests and performs a lot of manual requests that PRAW has methods for already. The whole point of PRAW is to prevent you from having to write these requests that do things such as paginate a listing, so I recommend you take advantage of that.
As an example, your get_submission_ids function (which scrapes the web version of Reddit and handles paginating) could be replaced by just
def get_submission_ids(self, pages=2):
return [
submission.id
for submission in self.reddit.subreddit(self.subreddit).hot(
limit=25 * pages
)
]
because the .hot() function does everything you tried to do by hand.
I'm going to go one step further here and have the function just return a list of Submission objects, because the rest of your code ends up doing things that would better by done by interacting with the PRAW Submission object. Here's that code (I renamed the function to reflect its updated purpose):
def get_submissions(self, pages=2):
return list(self.reddit.subreddit(self.subreddit).hot(limit=25 * pages))
(I've updated this function to just return its result, as your version both returns the value and sets it as self.submission_ids, unless pages is 0. That felt quite inconsistent, so I made it just return the value.)
Your get_comments function looks good.
The save_comments_submissions function, like get_submission_ids, does a lot of manual work that PRAW can handle. You construct a temp_url that has the full URL of a post, and then use that to make a PRAW Submission object, but we can replace that with directly using the one returned by get_submissions. You also have some calls to time.sleep() which I removed because PRAW will automatically sleep the appropriate amount for you. Lastly, I removed the return value of this function because the point of the function is to save data to disk, not to return it to anywhere else, and the rest of your script doesn't use the return value. Here's the updated version of that function:
def save_comments_submissions(self, pages):
"""
1. Save all the ids of submissions.
2. For each submission, save information of this submission. (submission_id, #comments, score, subreddit, date, title, body_text)
3. Save comments in this submission. (comment_id, submission_id, author, date, score, body_text)
4. Separately, save them to two csv file.
Note: You can link them with submission_id.
Warning: According to the rule of Reddit API, the get action should not be too frequent. Safely, use the defalut time span in this crawler.
"""
print("Start to collect all submission ids...")
submissions = self.get_submissions(pages)
print(
"Start to collect comments...This may cost a long time depending on # of pages."
)
comments = []
pandas_submissions = []
for count, submission in enumerate(submissions):
pandas_submissions.append(
(
submission.name[3:],
submission.num_comments,
submission.score,
submission.subreddit_name_prefixed,
date.fromtimestamp(submission.created_utc).isoformat(),
submission.title,
submission.selftext,
)
)
temp_comments = self.get_comments(submission)
comments += temp_comments
print(str(count) + " submissions have got...")
comments_fieldnames = [
"comment_id",
"submission_id",
"author_name",
"post_time",
"comment_score",
"text",
]
df_comments = pd.DataFrame(comments, columns=comments_fieldnames)
df_comments.to_csv("comments.csv")
submissions_fieldnames = [
"submission_id",
"num_of_comments",
"submission_score",
"submission_subreddit",
"post_date",
"submission_title",
"text",
]
df_submission = pd.DataFrame(pandas_submissions, columns=submissions_fieldnames)
df_submission.to_csv("submissions.csv")
Here's an updated version of the whole script that uses PRAW fully:
from datetime import date
import sys
import pandas as pd
import praw
class Crawler:
"""
basic_url is the reddit site.
headers is for requests.get method
REX is to find submission ids.
"""
def __init__(self, subreddit="apple"):
"""
Initialize a Crawler object.
subreddit is the topic you want to parse. default is r"apple"
basic_url is the reddit site.
headers is for requests.get method
REX is to find submission ids.
submission_ids save all the ids of submission you will parse.
reddit is an object created using praw API. Please check it before you use.
"""
self.subreddit = subreddit
self.submission_ids = []
self.reddit = praw.Reddit(
client_id="your_id",
client_secret="your_secret",
user_agent="subreddit_comments_crawler",
)
def get_submissions(self, pages=2):
"""
Collect all submissions..
One page has 25 submissions.
page url: https://www.reddit.com/r/subreddit/?count25&after=t3_id
id(after) is the last submission from last page.
"""
return list(self.reddit.subreddit(self.subreddit).hot(limit=25 * pages))
def get_comments(self, submission):
"""
Submission is an object created using praw API.
"""
# Remove all "more comments".
submission.comments.replace_more(limit=None)
comments = []
for each in submission.comments.list():
try:
comments.append(
(
each.id,
each.link_id[3:],
each.author.name,
date.fromtimestamp(each.created_utc).isoformat(),
each.score,
each.body,
)
)
except AttributeError as e: # Some comments are deleted, we cannot access them.
# print(each.link_id, e)
continue
return comments
def save_comments_submissions(self, pages):
"""
1. Save all the ids of submissions.
2. For each submission, save information of this submission. (submission_id, #comments, score, subreddit, date, title, body_text)
3. Save comments in this submission. (comment_id, submission_id, author, date, score, body_text)
4. Separately, save them to two csv file.
Note: You can link them with submission_id.
Warning: According to the rule of Reddit API, the get action should not be too frequent. Safely, use the defalut time span in this crawler.
"""
print("Start to collect all submission ids...")
submissions = self.get_submissions(pages)
print(
"Start to collect comments...This may cost a long time depending on # of pages."
)
comments = []
pandas_submissions = []
for count, submission in enumerate(submissions):
pandas_submissions.append(
(
submission.name[3:],
submission.num_comments,
submission.score,
submission.subreddit_name_prefixed,
date.fromtimestamp(submission.created_utc).isoformat(),
submission.title,
submission.selftext,
)
)
temp_comments = self.get_comments(submission)
comments += temp_comments
print(str(count) + " submissions have got...")
comments_fieldnames = [
"comment_id",
"submission_id",
"author_name",
"post_time",
"comment_score",
"text",
]
df_comments = pd.DataFrame(comments, columns=comments_fieldnames)
df_comments.to_csv("comments.csv")
submissions_fieldnames = [
"submission_id",
"num_of_comments",
"submission_score",
"submission_subreddit",
"post_date",
"submission_title",
"text",
]
df_submission = pd.DataFrame(pandas_submissions, columns=submissions_fieldnames)
df_submission.to_csv("submissions.csv")
if __name__ == "__main__":
args = sys.argv[1:]
if len(args) != 2:
print("Wrong number of args...")
exit()
subreddit, pages = args
c = Crawler(subreddit)
c.save_comments_submissions(int(pages))
I realize that my answer here gets into Code Review territory, but I hope that this answer is helpful for understanding some of the things PRAW can do. Your "list index out of range" error would have been avoided by using the pre-existing library code, so I do consider this to be a solution to your problem.

When my_list[-1] throws an IndexError, it means that my_list is empty:
>>> ids = []
>>> ids[-1]
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
IndexError: list index out of range
>>> ids = ['1']
>>> ids[-1]
'1'

Insert table data from website into table on my own website using Python and Beautiful Soup

I wrote some code that grabs the numbers I need from this website, but I don't know what to do next.
It grabs the numbers from the table at the bottom. The ones under calving ease, birth weight, weaning weight, yearling weight, milk and total maternal.
#!/usr/bin/python
import urllib2
from bs4 import BeautifulSoup
import pyperclip
def getPageData(url):
if not ('abri.une.edu.au' in url):
return -1
webpage = urllib2.urlopen(url).read()
soup = BeautifulSoup(webpage, "html.parser")
# This finds the epd tree and saves it as a searchable list
pedTreeTable = soup.find('table', {'class':'TablesEBVBox'})
# This puts all of the epds into a list.
# it looks for anything in pedTreeTable with an td tag.
pageData = pedTreeTable.findAll('td')
pageData.pop(7)
return pageData
def createPedigree(animalPageData):
''' make animalPageData much more useful. Strip the text out and put it in a dict.'''
animals = []
for animal in animalPageData:
animals.append(animal.text)
prettyPedigree = {
'calving_ease' : animals[18],
'birth_weight' : animals[19],
'wean_weight' : animals[20],
'year_weight' : animals[21],
'milk' : animals[22],
'total_mat' : animals[23]
}
for animalKey in prettyPedigree:
if animalKey != 'year_weight' and animalKey != 'dam':
prettyPedigree[animalKey] = stripRegNumber(prettyPedigree[animalKey])
return prettyPedigree
def stripRegNumber(animal):
'''returns the animal with its registration number stripped'''
lAnimal = animal.split()
strippedAnimal = ""
for word in lAnimal:
if not word.isdigit():
strippedAnimal += word + " "
return strippedAnimal
def prettify(pedigree):
''' Takes the pedigree and prints it out in a usable format '''
s = ''
pedString = ""
# this is also ugly, but it was the only way I found to format with a variable
cFormat = '{{:^{}}}'
rFormat = '{{:>{}}}'
#row 1 of string
s += rFormat.format(len(pedigree['calving_ease'])).format(
pedigree['calving_ease']) + '\n'
#row 2 of string
s += rFormat.format(len(pedigree['birth_weight'])).format(
pedigree['birth_weight']) + '\n'
#row 3 of string
s += rFormat.format(len(pedigree['wean_weight'])).format(
pedigree['wean_weight']) + '\n'
#row 4 of string
s += rFormat.format(len(pedigree['year_weight'])).format(
pedigree['year_weight']) + '\n'
#row 4 of string
s += rFormat.format(len(pedigree['milk'])).format(
pedigree['milk']) + '\n'
#row 5 of string
s += rFormat.format(len(pedigree['total_mat'])).format(
pedigree['total_mat']) + '\n'
return s
if __name__ == '__main__':
while True:
url = raw_input('Input a url you want to use to make life easier: \n')
pageData = getPageData(url)
s = prettify(createPedigree(pageData))
pyperclip.copy(s)
if len(s) > 0:
print 'the easy string has been copied to your clipboard'
I've just been using this code for easy copying and pasting. All I have to do is insert the URL, and it saves the numbers to my clipboard.
Now I want to use this code on my website; I want to be able to insert a URL in my HTML code, and it displays these numbers on my page in a table.
My questions are as follows:
How do I use the python code on the website?
How do I insert collected data into a table with HTML?

It sounds like you would want to use something like Django. Although the learning curve is a bit steep, it is worth it and it (of course) supports python.

KeyError and TypeError in my python web scraper

So sorry about this vague and confusing title. But there is no really better way for me to summarize my problem in one sentence.
I was trying to get the student and grade information from a french website. The link is this (http://www.bankexam.fr/resultat/2014/BACCALAUREAT/AMIENS?filiere=BACS)
My code is as follows:
import time
import urllib2
from bs4 import BeautifulSoup
regions = {'R\xc3\xa9sultats Bac Amiens 2014':'/resultat/2014/BACCALAUREAT/AMIENS'}
base_url = 'http://www.bankexam.fr'
tests = {'es':'?filiere=BACES','s':'?filiere=BACS','l':'?filiere=BACL'}
for i in regions:
for x in tests:
# create the output file
output_file = open('/Users/student project/'+ i + '_' + x + '.txt','a')
time.sleep(2) #compassionate scraping
section_url = base_url + regions[i] + tests[x] #now goes to the x test page of region i
request = urllib2.Request(section_url)
response = urllib2.urlopen(request)
soup = BeautifulSoup(response,'html.parser')
content = soup.find('div',id='zone_res')
for row in content.find_all('tr'):
if row.td:
student = row.find_all('td')
name = student[0].strong.string.encode('utf8').strip()
try:
school = student[1].strong.string.encode('utf8')
except AttributeError:
school = 'NA'
result = student[2].span.string.encode('utf8')
output_file.write ('%s|%s|%s\n' % (name,school,result))
# Find the maximum pages to go through
if soup.find('div','pagination'):
import re
page_info = soup.find('div','pagination')
pages = []
for i in page_info.find_all('a',re.compile('elt')):
try:
pages.append(int(i.string.encode('utf8')))
except ValueError:
continue
max_page = max(pages)
# Now goes through page 2 to max page
for i in range(1,max_page):
page_url = '&p='+str(i)+'#anchor'
section2_url = section_url+page_url
request = urllib2.Request(section2_url)
response = urllib2.urlopen(request)
soup = BeautifulSoup(response,'html.parser')
content = soup.find('div',id='zone_res')
for row in content.find_all('tr'):
if row.td:
student = row.find_all('td')
name = student[0].strong.string.encode('utf8').strip()
try:
school = student[1].strong.string.encode('utf8')
except AttributeError:
school = 'NA'
result = student[2].span.string.encode('utf8')
output_file.write ('%s|%s|%s\n' % (name,school,result))
A little more description about the code:
I created a 'regions' dictionary and 'tests' dictionary because there are 30 other regions I need to collect and I just include one here for showcase. And I'm just interested in the test results of three tests (ES, S, L) and so I created this 'tests' dictionary.
Two errors keep showing up,
one is
KeyError: 2
and the error is linked to line 12,
section_url = base_url + regions[i] + tests[x]
The other is
TypeError: cannot concatenate 'str' and 'int' objects
and this is linked to line 10.
I know there is a lot of information here and I'm probably not listing the most important info for you to help me. But let me know how I can do to fix this!
Thanks

The issue is that you're using the variable i in more than one place.
Near the top of the file, you do:
for i in regions:
So, in some places i is expected to be a key into the regions dictionary.
The trouble comes when you use it again later. You do so in two places:
for i in page_info.find_all('a',re.compile('elt')):
And:
for i in range(1,max_page):
The second of these is what is causing your exceptions, as the integer values that get assigned to i don't appear in the regions dict (nor can an integer be added to a string).
I suggest renaming some or all of those variables. Give them meaningful names, if possible (i is perhaps acceptable for an "index" variable, but I'd avoid using it for anything else unless you're code golfing).

Reading specific test steps from Quality Center with python

I am working with Quality Center via OTA COM library. I figured out how to connect to server, but I am lost in OTA documentation on how to work with it. What I need is to create a function which takes a test name as an input and returns number of steps in this test from QC.
For now I am this far in this question.
import win32com
from win32com.client import Dispatch
# import codecs #to store info in additional codacs
import re
import json
import getpass #for password
qcServer = "***"
qcUser = "***"
qcPassword = getpass.getpass('Password: ')
qcDomain = "***"
qcProject = "***"
td = win32com.client.Dispatch("TDApiOle80.TDConnection.1")
#Starting to connect
td.InitConnectionEx(qcServer)
td.Login(qcUser,qcPassword)
td.Connect(qcDomain, qcProject)
if td.Connected == True:
print "Connected to " + qcProject
else:
print "Connection failed"
#Path = "Subject\Regression\C.001_Band_tones"
mg=td.TreeManager
npath="Subject\Regression"
tsFolder = td.TestSetTreeManager.NodeByPath(npath)
print tsFolder
td.Disconnect
td.Logout
print "Disconnected from " + qcProject
Any help on descent python examples or tutorials will be highly appreciated. For now I found this and this, but they doesn't help.

Using the OTA API to get data from Quality Center normally means to get some element by path, create a factory and then use the factory to get search the object. In your case you need the TreeManager to get a folder in the Test Plan, then you need a TestFactory to get the test and finally you need the DesignStepFactory to get the steps. I'm no Python programmer but I hope you can get something out of this:
mg=td.TreeManager
npath="Subject\Test"
tsFolder = mg.NodeByPath(npath)
testFactory = tsFolder.TestFactory
testFilter = testFactory.Filter
testFilter["TS_NAME"] = "Some Test"
testList = testFactory.NewList(testFilter.Text)
test = testList.Item(1) # There should be only 1 item
print test.Name
stepFactory = test.DesignStepFactory
stepList = stepFactory.NewList("")
for step in stepList:
print step.StepName
It takes some time to get used to the QC OTA API documentation but I find it very helpful. Nearly all of my knowledge comes from the examples in the API documentation—for your problem there are examples like "Finding a unique test" or "Get a test object with name and path". Both examples are examples to the Test object. Even if the examples are in VB it should be no big thing to adapt them to Python.

I figured out the solution, if there is a better way to do this you are welcome to post it.
import win32com
from win32com.client import Dispatch
import getpass
def number_of_steps(name):
qcServer = "***"
qcUser = "***"
qcPassword = getpass.getpass('Password: ')
qcDomain = "***"
qcProject = "***"
td = win32com.client.Dispatch("TDApiOle80.TDConnection.1")
#Starting to connect
td.InitConnectionEx(qcServer)
td.Login(qcUser, qcPassword)
td.Connect(qcDomain, qcProject)
if td.Connected is True:
print "Connected to " + qcProject
else:
print "Connection failed"
mg = td.TreeManager # Tree manager
folder = mg.NodeByPath("Subject\Regression")
testList = folder.FindTests(name) # Make a list of tests matching name (partial match is accepted)
if testList is not None:
if len(testList) > 1:
print "There are multiple tests matching this name, please check input parameter\nTests matching"
for test in testList:
print test.name
td.Disconnect
td.Logout
return False
if len(testList) == 1:
print "In test %s there is %d steps" % (testList[0].Name, testList[0].DesStepsNum)
else:
print "There are no test with this test name in Quality Center"
td.Disconnect
td.Logout
return False
td.Disconnect
td.Logout
print "Disconnected from " + qcProject
return testList[0].DesStepsNum # Return number of steps for given test

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

When scraping website with requests I cant search specific things - python

Related

Creating an API request with specific parameters

IndexError: list index out of range (on Reddit data crawler)

Insert table data from website into table on my own website using Python and Beautiful Soup

KeyError and TypeError in my python web scraper

Reading specific test steps from Quality Center with python

Categories

Resources