I am using pygsheets and would like to batch validate cells instead of looping through each cell and doing it iteratively. I have gone through the pygsheets documentation and have not found an example of this, would this be possible and if so how would one do this? I did see an example of batching in the documentation (through unlinking and then linking again), but this did not work for me instead no update happened.
Below I have a working example of the code that I am trying to optimise by batching the update.
A
B
C
import pygsheets
spread_sheet_id = "...insert...spreadsheet...id"
spreadsheet_name = "...spreadsheet_name..."
wks_name_or_pos = "...worksheet_name..."
spreadsheet = pygsheets.Spreadsheet(client=service,id=spread_sheet_id)
wksheet = spreadsheet.worksheet('title',wks_name_or_pos)
header_list = ["A","B","C"]
for index, element in enumerate(header_list):
cell_string = str(chr(65+index)+"1")
wksheet.cell(cell_string).set_text_format('bold', True).value = element
header_cell = wksheet.cell(cell_string)
header_cell.color = (0.9529412, 0.9529412, 0.9529412, 0) # set background color of this cell as a tuple (red, green, blue, alpha)
header_cell.update()
wksheet.set_data_validation(
start=cell_string,end=cell_string,
condition_type='TEXT_CONTAINS',
condition_values=[element], inputMessage=f"Value must be {element}", strict=True)
I have realised I can change the value in the cell by passing it in as a list of lists, but not sure how to batch the validation and batch format the cell.
header_list = ["A","B","C"]
list_of_lists = [[col] for col in header_list]
# update values with list of lists (working)
wksheet.update_cells('A1:C1',list_of_lists)
# batch update to bold, change the colour to grey and make sure values fit in cell (increase cell size) ?
# wksheet.add_conditional_formatting(start='A1', end='C1',
# condition_type='CUSTOM_FORMULA',
# format={'backgroundColor':{'red':0.5,'green':0.5, 'blue':0.5, 'alpha':0}},
# condition_values=['=NOT(ISBLANK(A1))'])
# batch validate multiple cells so that the value is strictly the value provided ?
I also tried just unlinking, running the pygsheets commands then linking again as
wksheet.unlink()
header_list = ["A","B","C"]
for index, element in enumerate(header_list):
cell_string = str(chr(65+index)+"1")
wksheet.cell(cell_string).set_text_format('bold', True).value = element
header_cell = wksheet.cell(cell_string)
header_cell.color = (0.9529412, 0.9529412, 0.9529412, 0) # set background color of this cell as a tuple (red, green, blue, alpha)
header_cell.update()
wksheet.set_data_validation(
start=cell_string,end=cell_string,
condition_type='TEXT_CONTAINS',condition_values=[element], inputMessage=f"Value must be {element}", strict=True)
wksheet.link()
I believe your goal is as follows.
Your showing 1st script works fine.
You want to reduce the process cost of your script and want to achieve your multiple requests by one API call.
You want to achieve this using pygsheets for python.
In this case, how about using batch_update of Sheet API Wrapper as follows?
Modified script:
header_list = ["A", "B", "C"] # This is from your script.
# I modified the below script.
values = [
{
"userEnteredValue": {"stringValue": e},
"userEnteredFormat": {"textFormat": {"bold": True}},
"dataValidation": {
"condition": {"type": "TEXT_CONTAINS", "values": [{"userEnteredValue": e}]},
"inputMessage": "Value must be " + e,
"strict": True,
},
}
for e in header_list
]
requests = [
{
"updateCells": {
"range": {
"sheetId": wksheet.id,
"startRowIndex": 0,
"startColumnIndex": 0,
"endRowIndex": 1,
"endColumnIndex": 3,
},
"rows": [{"values": values}],
"fields": "userEnteredValue,userEnteredFormat,dataValidation",
}
}
]
service.sheet.batch_update(spread_sheet_id, requests)
service is your client for pygsheets.
When this script is run, the same result as your 1st script is obtained by one API call.
References:
Sheet API Wrapper
UpdateCellsRequest
Added:
From your following reply,
I was looking for a solution with the bolding of the cells in the first row, and grey coloring.
I was also hoping to be able to pass the formatting in individual methods without writing dictionaries with strings (if possible, I understand this may be the only way).
How about the following sample script?
Sample script:
class Sample:
startRange = {}
values = []
userEnteredFormat = {"textFormat": {}, "backgroundColor": {}}
dataValidation = {}
def setStartCell(self, sheetId, row, col):
self.startRange = {"sheetId": sheetId, "rowIndex": row, "columnIndex": col}
def setValues(self, v):
self.values = v
def setTextFormat(self, v1, v2):
self.userEnteredFormat["textFormat"][v1] = v2
def setBackgroundColor(self, v1):
self.userEnteredFormat["backgroundColor"] = {
"red": v1[0],
"green": v1[1],
"blue": v1[2],
"alpha": v1[3],
}
def setDataValidation(self, v1, v2):
self.dataValidation = [v1, v2]
def create(self):
values = [
{
"userEnteredValue": {"stringValue": e},
"userEnteredFormat": self.userEnteredFormat,
"dataValidation": {
"condition": {
"type": self.dataValidation[0],
"values": [{"userEnteredValue": e}],
},
"inputMessage": self.dataValidation[1].replace("{element}", e),
"strict": True,
},
}
for e in self.values
]
return [
{
"updateCells": {
"start": self.startRange,
"rows": [{"values": values}],
"fields": "userEnteredValue,userEnteredFormat,dataValidation",
}
}
]
spread_sheet_id = "...insert...spreadsheet...id"
wks_name_or_pos = "...worksheet_name..."
spreadsheet = pygsheets.Spreadsheet(client=service, id=spread_sheet_id)
wksheet = spreadsheet.worksheet("title", wks_name_or_pos)
header_list = ["A", "B", "C"] # This is from your question.
s = Sample()
s.setStartCell(wksheet.id, 0, 0) # cell "A1" (0, 0) of wksheet.
s.setValues(header_list)
s.setTextFormat("bold", True)
s.setBackgroundColor([0.9529412, 0.9529412, 0.9529412, 0]) # R, G, B, Alpha
s.setDataValidation("TEXT_CONTAINS", "Value must be {element}") # type, inputMessage
service.sheet.batch_update(spread_sheet_id, s.create())
In this sample script, a request body for the batchUpdate method is created by Sample. And, the created request body is used with service.sheet.batch_update of pygsheets.
Related
I am trying to update values based off an API call on a server. I have a list of IDs that I have pulled from a previous call saved in a list. I am iterating through the 4 values in the list and doing a new API call to grab some alerts in JSON. If the part of the JSON I'm looking for is blank I want the loop to continue but if there is a value then I want it to find and replace text so I can use it on the next step to do a PUT API call.
I can't figure out why the loop continues to give me ALL of the values.
My code:
site_ids = []
for ids in parsed['resources']:
site_ids.append((ids['id']))
This gives me a list of [6, 5, 7, 1] which I then use in my next API call to get the alerts
for sid in site_ids:
smtp_url = "my url"+str(sid)+"API endpoint"
smtp_payload={}
smtp_headers = {
'Accept': 'application/json;charset=UTF-8',
'Authorization': 'my stuff'
}
smtp_response = requests.request("GET", smtp_url, headers=smtp_headers, data=smtp_payload, verify=False)
smtp_text = smtp_response.text
smtp_json = json.loads(smtp_text)
print(json.dumps(smtp_json["resources"], indent=4, sort_keys=True))
This gives me the results for each JSON
[
{
"name": "Test1",
"notification": "SMTP",
"recipients": [
"abc#abc.com"
],
"relayServer": "1.2.3.4",
"senderEmailAddress": "test#abc.com"
},
{
"name": "Test2",
"notification": "SMTP",
"recipients": [
"abc#abc.com"
],
"relayServer": "1.2.3.4",
"senderEmailAddress": "test#abc.com"
}
]
[
{
"name": "Test3",
"notification": "SMTP",
"recipients": [
"abc#abc.com"
],
"relayServer": "1.2.3.4",
"senderEmailAddress": "test#abc.com"
},
{
"name": "Test4",
"notification": "SMTP",
"recipients": [
"abc#abc.com"
],
"relayServer": "1.2.3.4",
"senderEmailAddress": "test#abc.com"
}
]
[]
[]
At the end you can see the last two sites that it iterated through are blank showing only the []
Everything up to this point is working as I expected. This is where I'm running into issues though. I'm trying to take that response in a further if statement that essentially ignores the results where the "resources" block is empty [] but adds the sid that was used from the call where there actually is data. My problem is that I'm still getting all 4 sid no matter how I do it.
When I use this:
site_ids_with_alerts = []
if smtp_json['resources'] != None:
site_ids_with_alerts.append(sid)
print(site_ids_with_alerts)
I still get a full list of [6, 5, 7, 1]
I was EXPECTING to get [6, 5]
I have also tried these below as well but every time I get the same results:
site_ids_with_alerts = []
site_ids_with_alerts = [sid if smtp_json['resources'] != "[]" else None]
if smtp_json['resources'] == None:
None
else:
site_ids_with_alerts.append(sid)
if smtp_json['resources'] == '[]':
None
else:
site_ids_with_alerts.append(sid)
The issue was how I was working with the empty value in JSON. I found the answer here
So I changed the code to look like this:
if not len(smtp_json['resources']) == 0:
site_ids_with_alerts.append(sid)
Which gives me the list I wanted of [6,5]
I'm working with a large number of large json files. I've written the below (extremely un elegant code) in order to generate two dictionaries with which I can create a dataframe to work with. However, in instances where the JSON has values with empty arrays, my code is propagating the last 'valid' values into the subsequent objects with empty arrays. I've tried replacing empty arrays with blanks but that doesn't seem to work either. (I know my code is very bad - still learning so please keep that in mind)
dicts = []
fid = []
x=0
while x < 1:
for i in files:
n=[]
k = []
t = []
op = open(i)
data = op.read()
js = json.loads(data)
items = js['metadata']['items']
#items = json.dumps(items).replace('[]','""')
#items = json.loads(items)
fileid = js['id']
fid.append(fileid)
##Everything after this point is what's throwing me off##
for a in items:
b = json.loads(json.dumps(a, sort_keys =True))
key = b['name']
k.append(key)
val = b['values']
values = []
for c in val:
j=json.dumps(c['value'])
if isinstance(c, list) == False:
continue
values.append(j)
j = ';'.join(values) #<-- For objects with more than one value
t.append(j)
output_dict = dict(zip([key], [j]))
n.append(output_dict)
dicts.append(n)
x = x+1
Here is an example section of the json where I'm observing this behavior:
x = {"metadata": {
"items": [
{
"values": [
{ "attribute1": "attribute", #<-- NOT IMPORTANT
"value": "VALUE 1" #<----VALUE I'M AFTER
},
{"attribute2": "attribute",#<-- NOT IMPORTANT
"value2": "VALUE 2"#<----VALUE I'M AFTER
}
],
"name": "NAME 1" #<--NAME I'M AFTER
},
{
"values": [
{
"value": []#<-- EMPTY ARRAY
}
],
"name": "NAME 2"}
]
}
}
In the above snippet, my ideal output is a list of dictionary pairings that looks like:
[{"NAME 1": "VALUE 1; VALUE 2", "NAME 2": " "...}]
But what I'm getting is:
[{"NAME 1": "VALUE 1; VALUE 2"}, {"NAME 2": "VALUE 1; VALUE 2"}...}]
I've tried deconstructing my work, and can't figure out why. I've re-indented and done a walk through a couple times and I don't understand why it would behave like this. What about the way my loop is constructed is causing this?
I am working with some ElasticSearch data and i would like to generate the tables from the aggregations like in Kibana. A sample output of the aggregation is below, based on the following code :
s.aggs.bucket("name1", "terms", field="field1").bucket(
"name2", "terms", field="innerField1"
).bucket("name3", "terms", field="InnerAgg1")
response = s.execute()
resp_dict = response.aggregations.name.buckets
{
"key": "Locationx",
"doc_count": 12,
"name2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "Sub-Loc1",
"doc_count": 1,
"name3": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "super-Loc1",
"doc_count": 1
}]
}
}, {
"key": "Sub-Loc2",
"doc_count": 1,
"name3": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "super-Loc1",
"doc_count": 1
}]
}
}]
}
}
In this case, the expected output would be:
Now, I have tried a variety of methods, with a short description of what went wrong :
Pandasticsearch = completely failed even with just 1 dictionary. The dictionary was not created, as it was struggling with keys, even with each dictionary being dealt with separately:
for d in resp_dict :
x= d.to_dict()
pandas_df = Select.from_dict(x).to_pandas()
print(pandas_df)
In particular, the error that was recieved related to the the fact that the dictionary was not made and thus ['took'] was not a key.
Pandas (pd.Dataframe.from_records()) = only gave me the first aggregation, with a column containing the inner dictionary, and using pd.apply(pd.Series) on it gave another table of resulting dictionaries.
StackOverflow posts recursive function = the dictionary looks completely different than the example used,and tinkering led me nowhere unless i drastically change the input.
Struggling with the same problem, I've come to believe the reason for this being that the response_dict are not normal dicts, but an elasticsearch_dsl.utils.AttrList of elasticsearch_dsl.utils.AttrDict.
If you have an AttrList of AttrDicts, it's possible to do:
resp_dict = response.aggregations.name.buckets
new_response = [i._d_ for i in resp_dict]
To get a list of normal dicts instead. This will probably play nicer with other libraries.
Edit:
I wrote a recursive function which at least handles some cases, not extensively tested yet though and not wrapped in a nice module or anything. It's just a script. The one_lvl function keeps track of all the siblings and siblings of parents in the tree in a dictionary called tmp, and recurses when it finds a new named aggregation. It assumes a lot about the structure of the data, which I'm not sure is warranted in the general case.
The lvl stuff is necessary I think because you might have duplicate names, so key exists at several aggregation-levels for instance.
#!/usr/bin/env python3
from elasticsearch_dsl.query import QueryString
from elasticsearch_dsl import Search, A
from elasticsearch import Elasticsearch
import pandas as pd
PORT = 9250
TIMEOUT = 10000
USR = "someusr"
PW = "somepw"
HOST = "test.com"
INDEX = "my_index"
QUERY = "foobar"
client = Elasticsearch([HOST], port = PORT, http_auth=(USR, PW), timeout = TIMEOUT)
qs = QueryString(query = QUERY)
s = Search(using=client, index=INDEX).query(qs)
s = s.params(size = 0)
agg= {
"dates" : A("date_histogram", field="date", interval="1M", time_zone="Europe/Berlin"),
"region" : A("terms", field="region", size=10),
"county" : A("terms", field="county", size = 10)
}
s.aggs.bucket("dates", agg["dates"]). \
bucket("region", agg["region"]). \
bucket("county", agg["county"])
resp = s.execute()
data = {"buckets" : [i._d_ for i in resp.aggregations.dates]}
rec_list = ["buckets"] + [*agg.keys()]
def get_fields(i, lvl):
return {(k + f"{lvl}"):v for k, v in i.items() if k not in rec_list}
def one_lvl(data, tmp, lvl, rows, maxlvl):
tmp = {**tmp, **get_fields(data, lvl)}
if "buckets" not in data:
rows.append(tmp)
for d in data:
if d in ["buckets"]:
for v, b in enumerate(data[d]):
tmp = {**tmp, **get_fields(data[d][v], lvl)}
for k in b:
if k in agg.keys():
one_lvl(data[d][v][k], tmp, lvl+1, rows, maxlvl)
else:
if lvl == maxlvl:
tmp = {**tmp, (k + f"{lvl}") : data[d][v][k]}
rows.append(tmp)
return rows
rows = one_lvl(data, {}, 1, [], len(agg))
df = pd.DataFrame(rows)
What I'm trying to do is getting each column in Source worksheet and pasting them to Target_sheet in another worksheet, each pasting action should start from the third row though (Ex: A3:A, B3:B ...)
However I get error such as:
ata.values[1731]","description": "Invalid value at 'data.values[1731]' (type.googleapis.com/google.protobuf.ListValue), \"x23232x2x2x442x42x42x42\""
},
{
"field": "data.values[1732]",
"description": "Invalid value at 'data.values[1732]' (type.googleapis.com/google.protobuf.ListValue), \"x242x42x42x42x42x442x427\""
},
{
"field": "data.values[1733]",
"description": "Invalid value at 'data.values[1733]' (type.googleapis.com/google.protobuf.ListValue), \"x42x424242x42454555x56666\""
}
.
.
.
My code:
sh = client.open('Target')
sh.values_clear("Target_sheet!A3:J10000")
source = client.open('Source')
source_col_numbers = source.sheet1.col_count
i = 1
# creating a holder for the values in Source.sheet1
columns = {}
#getting the values in each column at the Source.sheet1
while i <= source_col_numbers:
columns[i] = list(filter(None , source.sheet1.col_values(i)))
i += 1
# will use this variable to iterate between columns in the Target.Target_sheet
charn=ord("A")
#updating the columns in the Target with values from Source
b=1
while b <= source_col_numbers:
sh.values_update(
"Target_sheet!"+chr(charn)+"3:"+chr(charn)
,
params={
'valueInputOption': 'USER_ENTERED'
} ,
body={
'values': columns[b]
}
)
charn+=1
b+=1
#carlesgg97 tried with get_value but still getting error I mentioned under your comment:
target_worksheet.values_clear("Target!A3:J10000")
source = client.open('Source')
source_col_numbers = source.sheet1.col_count
source_values=source.values_get('Sheet1!A:J')
last_column=chr(source_col_numbers)
target_worksheet.values_update(
"Target!A3:"+last_column ,
params={ 'valueInputOption': 'USER_ENTERED' },
body={ 'values': source_values }
)
Using col_values you obtain the values of the column, in a list. However, when using the values_update method, the body requires the "values" property to be a list of lists (see: col_values).
Further to that, I believe the task you are attempting to accomplish can be done in a much more simpler way, using values_get. An example that would move the range A1:J9997 to A4:J10000 (moved 3 rows down) would look as follows:
sh = client.open('Target')
sh.values_clear("Target_sheet!A3:J10000")
response = sh.values_get("A1:J9997")
spreadsheet.values_update(
'Target_sheet!A3:J10000',
params = {
'valueInputOption': 'USER_ENTERED'
},
body = {
'values': response['values']
}
)
Python version 2.7.10
I have this script (which grabs AWS EBS volume meta data) which currently generates a key=value pair data which is CSV (comma separated value) output per line.
Python script:
#!/usr/bin/python
#Do `sudo pip install boto3` first
import boto3
import json
def generate(key, value):
"""
Creates a nicely formatted Key(Value) item for output
"""
return '{}={}'.format(key, value)
#if isinstance(value,int):
# return '\"{}\": {}'.format(key, value)
#else:
# return '\"{}\": \"{}\"'.format(key, value)
def main():
ec2 = boto3.resource('ec2', region_name="us-west-2")
volumes = ec2.volumes.all()
for vol in volumes:
if vol.state == "available":
vol_state_num_value = 1
else:
vol_state_num_value = 0
if vol.snapshot_id == "":
vol_snapshot_id = "None"
else:
vol_snapshot_id = vol.snapshot_id
output_parts = [
# Volume level details
generate('vol_id', vol.volume_id),
generate('az', vol.availability_zone),
generate('vol_type', vol.volume_type),
generate('size', vol.size),
generate('iops', vol.iops),
generate('snapshot_id', vol_snapshot_id),
generate('vol_state', vol_state_num_value),
]
# only process when there are tags to process
if vol.tags:
for _ in vol.tags:
# Get all of the tags
output_parts.extend([
generate(_.get('Key'), _.get('Value')),
])
# At last put volume state numberic value
# i.e. 0 (in-use) and 1 (available/unattached) volume
output_parts.extend([
generate('state', vol_state_num_value),
])
# output everything at once.
print ','.join(output_parts)
#print '{}{}{}'.format('{',output_parts,'}')
if __name__ == '__main__':
main()
Currently the output it generates looks like this:
vol_id=vol-0abcdab1b68111f8b,az=us-west-2b,vol_type=gp2,size=5,iops=100,snapshot_id=snap-0abcdab1b68111f8b,vol_state=0,mirror=primary,autoscale=true,cluster=customer,Name=[customer-2b-app41] primary,role=app,hostname=customer-2b-app41-i-0abcdab1b68111f8b,state=0
vol_id=vol-0abcdab1b68111f8c,az=us-west-2b,vol_type=gp2,size=12,iops=100,snapshot_id=snap-0abcdab1b68111f9c,vol_state=0,state=0
I'm trying to convert the script so that instead of generating a key=value pair CSV row per line, it'll generate a JSON object.
I tried to tweak the script by using IF statement as shown in the script i.e. isinstance() for the value part (to wrap the value with double quote or ignore it if it's number and using the following line:
#if isinstance(value,int):
# return '\"{}\": {}'.format(key, value)
#else:
# return '\"{}\": \"{}\"'.format(key, value)
and
#print '{}{}{}'.format('{',output_parts,'}')
But, that's not giving me the desired result.
Expecting the desired result as something like:
{
{
"vol_id": "vol-0abcdab1b68111f8b",
"az": "us-west-2b",
"vol_type": "gp2",
"size": 5,
"iops": 100,
"snapshot_id":"snap-0abcdab1b68111f8b",
...,
.....,
},
{
"vol_id": "vol-0abcdab1b68111f8c",
"az": "us-west-2b",
"vol_type": "gp2",
"size": 12,
"iops": 100,
"snapshot_id": "snap-0abcdab1b68111f9c",
...,
.....
}
}
I tried to use json.dumps(output_parts) but that didn't help to get the desired output. I don't need the CSV output.
To convert your csv lines into a dict can be done with:
Code:
def my_csv_to_dict(csv_line):
return dict(csv.split('=') for csv in csv_line.split(','))
Test Code:
To convert those dictionaries into json you can use the json lib.
test_data = [x.strip() for x in """
vol_id=vol-0abcdab1b68111f8b,az=us-west-2b,vol_type=gp2,size=5,iops=100,snapshot_id=snap-0abcdab1b68111f8b,vol_state=0,mirror=primary,autoscale=true,cluster=customer,Name=[customer-2b-app41] primary,role=app,hostname=customer-2b-app41-i-0abcdab1b68111f8b,state=0
vol_id=vol-0abcdab1b68111f8c,az=us-west-2b,vol_type=gp2,size=12,iops=100,snapshot_id=snap-0abcdab1b68111f9c,vol_state=0,state=0
""".split('\n')[1:-1]]
import json
print(json.dumps([my_csv_to_dict(x) for x in test_data], indent=2))
Results:
[
{
"az": "us-west-2b",
"autoscale": "true",
"Name": "[customer-2b-app41] primary",
"mirror": "primary",
"cluster": "customer",
"state": "0",
"iops": "100",
"role": "app",
"vol_type": "gp2",
"snapshot_id": "snap-0abcdab1b68111f8b",
"vol_id": "vol-0abcdab1b68111f8b",
"vol_state": "0",
"hostname": "customer-2b-app41-i-0abcdab1b68111f8b",
"size": "5"
},
{
"az": "us-west-2b",
"state": "0",
"iops": "100",
"vol_type": "gp2",
"snapshot_id": "snap-0abcdab1b68111f9c",
"vol_id": "vol-0abcdab1b68111f8c",
"vol_state": "0",
"size": "12"
}
]