Different Scrapy feeds export destination for each request

Different Scrapy feeds export destination for each request - python

I'm trying to save image urls for individual properties in their respective csv files via feeds export, in order for this to work, the FEEDS csv_path in custom_settings will have to be changed every time a scrapy.Request is yielded in start_requests. Every time a scrapy.Request is yielded, the self.get_csv_path in __init__ is assigned a new csv file path correspondent to the property id, it is then fetched to FEEDS by def get_feeds_csv_path as in the code below. The self.feeds_csv_path in custom_settings doesn't seem to be able to access def get_feeds_csv_path, where is the error here?
import asyncio
from configparser import ConfigParser
import os
import pandas as pd
import scrapy
import requests
import json
class GetpropertyimgurlsSpider(scrapy.Spider):
name = 'GetPropertyImgUrls'
custom_settings = {
"FEEDS": {
self.feeds_csv_path: {
"format": "csv",
"overwrite": True
}
}
}
def __init__(self, *args, **kwargs):
self.feeds_csv_path = None
super(GetpropertyimgurlsSpider, self).__init__(*args, **kwargs)
def start_requests(self):
files = self.get_html_files() # List of html file full paths
for file in files[:2]:
self.feeds_csv_path = self.get_feeds_csv_path(file)
yield scrapy.Request(file, callback=self.parse)
def parse(self, response):
texts = response.xpath("//text()").getall()
text = texts[1]
json_text = json.loads(text)
#print(text)
photos = json_text["#graph"][3]["photo"]
for photo in photos:
yield photo["contentUrl"]
def get_feeds_csv_path(self, html_file_path):
property_id = html_file_path.split("/")[-2].split("_")[1]
feeds_csv_path = f"{html_file_path}/images/Property_{property_id}_ImgSrcs.csv"
return feeds_csv_path
def get_path(self):
config = ConfigParser()
config.read("config.ini") # Location relative to main.py
path = config["scrapezoopla"]["path"]
return path
#Returns a list of html file dirs
def get_html_files(self):
path = self.get_path()
dir = f"{path}/data/properties/"
dir_list = os.listdir(dir)
folders = []
for ins in dir_list:
if os.path.isdir(f"{dir}{ins}") == True:
append_ins = folders.append(ins)
html_files = []
for folder in folders:
html_file = f"{dir}{folder}/{folder}.html"
if os.path.isfile(html_file) == True:
append_html_file = html_files.append(f"file:///{html_file}")
return html_files

The first problem I see is that you are using the self keyword in the namespace scope of your spider class. The self keyword is only available inside of instance methods where you pass the keyword in as the first argument. e.g. def __init__(self...).
Even if self was available it still wouldn't work though, because once you create the custom_settings dictionary, the self.feeds_csv_path is immediately converted to it's string value at runtime, so updating the instance variable would have no effect on the custom_settings propert.
Another issue is that scrapy collects all of the custom settings and stores them internally before the crawl is actually started, and updating the custom_settings dictionary mid crawl might not actually have an effect. I am not certain about that though.
All of that being said, your goal is still achievable. One means that I can think of is by creating the FEEDS dictionary runtime but prior to initiating the crawl and filtering using custom scrapy.Item classes to filter which item belongs to which output.
I have no way of testing it so it might be buggy but here is an example of what I am referring to:
from configparser import ConfigParser
import json
import os
import scrapy
def get_path():
config = ConfigParser()
config.read("config.ini") # Location relative to main.py
path = config["scrapezoopla"]["path"]
return path
#Returns a list of html file dirs
def get_html_files():
path = get_path()
folder = f"{path}/data/properties/"
dir_list = os.listdir(folder)
html_files = []
for ins in dir_list:
if os.path.isdir(f"{folder}{ins}"):
if os.path.isfile(f"{folder}{ins}/{ins}.html"):
html_files.append(f"file:///{folder}{ins}/{ins}.html")
return html_files
def get_feeds_csv_path(self, html_file_path):
property_id = html_file_path.split("/")[-2].split("_")[1]
feeds_csv_path = f"{html_file_path}/images/Property_{property_id}_ImgSrcs.csv"
return feeds_csv_path
def create_custom_item():
class Item(scrapy.Item):
contentUrl = scrapy.Field()
return Item
def customize_settings():
feeds = {}
files = get_html_files()
start_urls = {}
for path in files:
custom_class = create_custom_item()
output_path = get_feeds_csv_path(path)
start_urls[path] = custom_class
feeds[output_path] = {
"format": "csv",
"item_classes": [custom_class],
}
custom_settings = {"FEEDS": feeds}
return custom_settings, start_urls
class GetpropertyimgurlsSpider(scrapy.Spider):
name = 'GetPropertyImgUrls'
custom_settings, start_urls = customize_settings()
def start_requests(self):
for uri, itemclass in self.start_urls.items():
yield scrapy.Request(uri, callback=self.parse, cb_kwargs={'itemclass': itemclass})
def parse(self, response, itemclass):
texts = response.xpath("//text()").getall()
text = texts[1]
json_text = json.loads(text)
photos = json_text["#graph"][3]["photo"]
for photo in photos:
item = itemclass()
item['contentUrl'] = photo["contentUrl"]
yield item

Related

How do I get the orignal name of the file that is uploaded in the filefield in django

I want to access the name of the file uploaded to models.FileField() in my class.
Here is my class
class extract(models.Model):
doc = models.FileField(upload_to='files/')
Is there a function or some method, to access the original name of the uploaded file, without modifying the name during upload.
like something like this
name = doc.filename()
Edit - 2:
I'm very new to django and programming in general. So this might seem stupid but I want something like this:
class extract(models.Model):
def get_name(instance,filename):
path = 'files'
name = filename
return name,path
doc = models.FileField(fname,upload_to==get_name)
def ex(fname):
path0 = "E:/Projects/PDF Converter/pdf/files/"
fullpath = path0 + fname
document = open(fullpath)
reader = PyPDF2.PdfFileReader(document)
page1 = reader.getPage(0)
data = page1.extractText()
return data
text = models.TextField(default=ex(fname),editable=False)

Per the documentation, this can be accomplished by passing a callback function as the upload_to argument, and the callback function will be called with the given filename as the second argument:
def get_filename(_, filename):
print(filename) # or do whatever you want with the original filename here
return 'files/'
class extract(models.Model):
doc = models.FileField(upload_to=get_filename)

I want to read from a file and scrape each URL

What I want to do is read every URL from a file and scrape this URL. After that, I will move the scraping data to the class WebRealTor and then serialize data in json and finally save all the data in a json file.
This is the content of the file:
https://www.seloger.com/annonces/achat/appartement/paris-14eme-75/montsouris-dareau/143580615.htm?ci=750114&idtt=2,5&idtypebien=2,1&LISTING-LISTpg=8&naturebien=1,2,4&tri=initial&bd=ListToDetail
https://www.seloger.com/annonces/achat/appartement/montpellier-34/gambetta/137987697.htm?ci=340172&idtt=2,5&idtypebien=1,2&naturebien=1,2,4&tri=initial&bd=ListToDetail
https://www.seloger.com/annonces/achat/appartement/montpellier-34/celleneuve/142626025.htm?ci=340172&idtt=2,5&idtypebien=1,2&naturebien=1,2,4&tri=initial&bd=ListToDetail
https://www.seloger.com/annonces/achat/appartement/versailles-78/domaine-national-du-chateau/138291887.htm
And my script is:
import scrapy
import json
class selogerSpider(scrapy.Spider):
name = "realtor"
custom_settings = {
'DOWNLOADER_MIDDLEWARES': {
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
'scrapy_fake_useragent.middleware.RandomUserAgentMiddleware': 400,
}
}
def start_requests(self):
with open("annonces.txt", "r") as file:
for line in file.readlines():
yield scrapy.Request(line)
def parse(self, response):
name = response.css(".agence-link::text").extract_first()
address = response.css(".agence-adresse::text").extract_first()
XPATH_siren = ".//div[#class='legalNoticeAgency']//p/text()"
siren = response.xpath(XPATH_siren).extract_first()
XPATH_website = ".//div[#class='agence-links']//a/#href"
site = response.xpath(XPATH_website).extract()
XPATH_phone = ".//div[#class='contact g-row-50']//div[#class='g-col g-50 u-pad-0']//button[#class='btn-phone b-btn b-second fi fi-phone tagClick']/#data-phone"
phone = response.xpath(XPATH_phone).extract_first()
yield {
'Agency_Name =': name,
'Agency_Address =': address,
'Agency_profile_website =': site,
'Agency_number =': phone,
'Agency_siren =': siren
}
file.close()
class WebRealTor:
def __name__(self):
self.nom = selogerSpider.name
def __address__(self):
self.adress = selogerSpider.address
def __sirenn__(self):
self.sire = selogerSpider.siren
def __numero__(self):
self.numero = selogerSpider.phone
with open('data.txt', 'w') as outfile:
json.dump(data, outfile)

Try to move everything to start_requests in you class. Like this:
def start_requests(self):
with open("annonces.txt", "r") as file:
for line in file.readlines():
yield scrapy.Request(line) # self.parse is by default
def parse(self, response):
# each link parsing as you already did

Scrapy CSV column export

I´d like to export data to several columns in csv but I always obtain this kind of file:
csv
I´d like to obtain two columns one "articulo" and another one "price"
My pipelines:
import scrapy
from scrapy import signals
from scrapy.contrib.exporter import CsvItemExporter
import csv
class MercadoPipeline(object):
def __init__(self):
self.files = {}
#classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_items.csv' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = CsvItemexporter(file)
self.exporter.fields_to_export = ['articulo','precio']
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.closed()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
Can you help me please?

Here you are:
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from scrapy.exceptions import CloseSpider
from mercado.items import MercadoItem
class MercadoSpider(CrawlSpider):
name = 'mercado'
item_count = 0
allowed_domain = ['www.autodoc.es']
start_urls = ['https://www.autodoc.es/search?brandNo%5B0%5D=101']
rules = {
Rule(LinkExtractor(allow =(), restrict_xpaths = ('//span[#class="next"]/a'))),
Rule(LinkExtractor(allow =(), restrict_xpaths = ('//a[#class="ga-click"]')),
callback = 'parse_item', follow = False)
}
def parse_item(self, response):
ml_item = MercadoItem()
#info de producto
ml_item['articulo'] = response.xpath('normalize-space(//*[#id="content"]/div[4]/div[2]/div[1]/div[1]/div/span[1]/span/text())').extract()
ml_item['precio'] = response.xpath('normalize-space(//*[#id="content"]/div[4]/div[3]/div[2]/p[2]/text())').extract()
self.item_count += 1
if self.item_count > 20:
raise CloseSpider('item_exceeded')
yield ml_item

There is nothing wrong with the output of your code.
You are getting the two csv columns you want, but the program you are using to view the data is not interpreting it correctly.
By default, CsvItemExporter uses , as the delimiter, and the program seems to expect something else (and possibly even different quoting).
There are two possibilities to solve your problem:
Change the program's settings so it reads the file correctly
Change the way CsvItemExporter exports data (it will pass any additional keyword arguments to the underlying csv.writer object)

Best practice for keeping a config file in python

I have a python script in which I have a config file which looks like this:
PROD = 'production'
DEV = 'dev'
ENVIRONMENT = None
and I have a function which gets the wanted environment from a command argument and sets it like:
if sys.argv[1] in [config.PROD, config.DEV]:
config.ENVIRONMENT = sys.argv[1]
I understood it's not good practice when I started importing the config file in multiple files and ENV kept resetting back to None.
So, my question is: what is the best practice is this case

I'm not sure exactly what the best practice is but I like using JSON files. I use the following class as a layer of abstraction for interfacing with the config (properties) file. You can create one JSONPropertiesFile and pass it around your application.
import json
from collections import OrderedDict
import os
from stat import * # ST_SIZE etc
from datetime import datetime
from copy import deepcopy
class JSONPropertiesFileError(Exception):
pass
class JSONPropertiesFile(object):
def __init__(self, file_path, default={}):
self.file_path = file_path
self._default_properties = default
self._validate_file_path(file_path)
def _validate_file_path(self, file_path):
if not file_path.endswith(".json"):
raise JSONPropertiesFileError(f"Must be a JSON file: {file_path}")
if not os.path.exists(file_path):
self.set(self._default_properties)
def set(self, properties):
new_properties = deepcopy(self._default_properties)
new_properties.update(properties)
with open(self.file_path, 'w') as file:
json.dump(new_properties, file, indent=4)
def get(self):
properties = deepcopy(self._default_properties)
with open(self.file_path) as file:
properties.update(json.load(file, object_pairs_hook=OrderedDict))
return properties
def get_file_info(self):
st = os.stat(self.file_path)
res = {
'size':st[ST_SIZE],
'size_str':str(round(st[ST_SIZE]/1000,2)) + ' KB',
'last_mod': datetime.fromtimestamp(st[ST_MTIME]).strftime("%Y-%m-%d")
}
return res
In your case you might use it like this:
file_path = "path/to/your/config/file"
default_properties = {
'PROD': 'production',
'DEV': 'dev',
'ENVIRONMENT': ""
}
config_file = JSONPropertiesFile(file_path, default_properties)
config = config_file.get()
print(config["PROD"])
config["PROD"] = "something else"
config_file.set(config) # save new config

Python: How to loop through several ini files with ConfigParser?

I somewhat understand how to do looping in Python, seems easy enough to say "For each file in this directory...do something". I'm now having a hard time figuring out how to loop through a series of .ini files in a directory, read lines from them, and use the text in the ini files as variables in the same Python script. For example, in this script, a single .ini file provides the values for 12 variables in the script. Currently, to run the script multiple times, one has to replace the single ini file with another one, that contains a different 12 variables. The script performs routine maintenance of an on-line mapping service provider..thing is...I have dozen's of services I'd like to manage with the script. From the script, it appears that the name of the .ini file is fixed, not sure it's even possible to loop through multiple ini file? The good news is, that the script is using ConfigParser.....I hope this makes sense!
[FS_INFO]
SERVICENAME = MyMapService
FOLDERNAME = None
MXD = D:\nightly_updates\maps\MyMap.mxd
TAGS = points, dots, places
DESCRIPTION = This is the description text
MAXRECORDS = 1000
[FS_SHARE]
SHARE = True
EVERYONE = true
ORG = true
GROUPS = None
[AGOL]
USER = user_name
PASS = pass_word1
The script below is reading from the ini file above.
# Import system modules
import urllib, urllib2, json
import sys, os
import requests
import arcpy
import ConfigParser
from xml.etree import ElementTree as ET
class AGOLHandler(object):
def __init__(self, username, password, serviceName, folderName):
self.username = username
self.password = password
self.serviceName = serviceName
self.token, self.http = self.getToken(username, password)
self.itemID = self.findItem("Feature Service")
self.SDitemID = self.findItem("Service Definition")
self.folderName = folderName
self.folderID = self.findFolder()
def getToken(self, username, password, exp=60):
referer = "http://www.arcgis.com/"
query_dict = {'username': username,
'password': password,
'expiration': str(exp),
'client': 'referer',
'referer': referer,
'f': 'json'}
query_string = urllib.urlencode(query_dict)
url = "https://www.arcgis.com/sharing/rest/generateToken"
token = json.loads(urllib.urlopen(url + "?f=json", query_string).read())
if "token" not in token:
print token['error']
sys.exit()
else:
httpPrefix = "http://www.arcgis.com/sharing/rest"
if token['ssl'] == True:
httpPrefix = "https://www.arcgis.com/sharing/rest"
return token['token'], httpPrefix
def findItem(self, findType):
#
# Find the itemID of whats being updated
#
searchURL = self.http + "/search"
query_dict = {'f': 'json',
'token': self.token,
'q': "title:\""+ self.serviceName + "\"AND owner:\"" + self.username + "\" AND type:\"" + findType + "\""}
jsonResponse = sendAGOLReq(searchURL, query_dict)
if jsonResponse['total'] == 0:
print "\nCould not find a service to update. Check the service name in the settings.ini"
sys.exit()
else:
print("found {} : {}").format(findType, jsonResponse['results'][0]["id"])
return jsonResponse['results'][0]["id"]
def findFolder(self):
#
# Find the ID of the folder containing the service
#
if self.folderName == "None":
return ""
findURL = self.http + "/content/users/{}".format(self.username)
query_dict = {'f': 'json',
'num': 1,
'token': self.token}
jsonResponse = sendAGOLReq(findURL, query_dict)
for folder in jsonResponse['folders']:
if folder['title'] == self.folderName:
return folder['id']
print "\nCould not find the specified folder name provided in the settings.ini"
print "-- If your content is in the root folder, change the folder name to 'None'"
sys.exit()
def urlopen(url, data=None):
# monkey-patch URLOPEN
referer = "http://www.arcgis.com/"
req = urllib2.Request(url)
req.add_header('Referer', referer)
if data:
response = urllib2.urlopen(req, data)
else:
response = urllib2.urlopen(req)
return response
def makeSD(MXD, serviceName, tempDir, outputSD, maxRecords):
#
# create a draft SD and modify the properties to overwrite an existing FS
#
arcpy.env.overwriteOutput = True
# All paths are built by joining names to the tempPath
SDdraft = os.path.join(tempDir, "tempdraft.sddraft")
newSDdraft = os.path.join(tempDir, "updatedDraft.sddraft")
arcpy.mapping.CreateMapSDDraft(MXD, SDdraft, serviceName, "MY_HOSTED_SERVICES")
# Read the contents of the original SDDraft into an xml parser
doc = ET.parse(SDdraft)
root_elem = doc.getroot()
if root_elem.tag != "SVCManifest":
raise ValueError("Root tag is incorrect. Is {} a .sddraft file?".format(SDDraft))
# The following 6 code pieces modify the SDDraft from a new MapService
# with caching capabilities to a FeatureService with Query,Create,
# Update,Delete,Uploads,Editing capabilities as well as the ability to set the max
# records on the service.
# The first two lines (commented out) are no longer necessary as the FS
# is now being deleted and re-published, not truly overwritten as is the
# case when publishing from Desktop.
# The last three pieces change Map to Feature Service, disable caching
# and set appropriate capabilities. You can customize the capabilities by
# removing items.
# Note you cannot disable Query from a Feature Service.
#doc.find("./Type").text = "esriServiceDefinitionType_Replacement"
#doc.find("./State").text = "esriSDState_Published"
# Change service type from map service to feature service
for config in doc.findall("./Configurations/SVCConfiguration/TypeName"):
if config.text == "MapServer":
config.text = "FeatureServer"
#Turn off caching
for prop in doc.findall("./Configurations/SVCConfiguration/Definition/" +
"ConfigurationProperties/PropertyArray/" +
"PropertySetProperty"):
if prop.find("Key").text == 'isCached':
prop.find("Value").text = "false"
if prop.find("Key").text == 'maxRecordCount':
prop.find("Value").text = maxRecords
# Turn on feature access capabilities
for prop in doc.findall("./Configurations/SVCConfiguration/Definition/Info/PropertyArray/PropertySetProperty"):
if prop.find("Key").text == 'WebCapabilities':
prop.find("Value").text = "Query,Create,Update,Delete,Uploads,Editing"
# Add the namespaces which get stripped, back into the .SD
root_elem.attrib["xmlns:typens"] = 'http://www.esri.com/schemas/ArcGIS/10.1'
root_elem.attrib["xmlns:xs"] ='http://www.w3.org/2001/XMLSchema'
# Write the new draft to disk
with open(newSDdraft, 'w') as f:
doc.write(f, 'utf-8')
# Analyze the service
analysis = arcpy.mapping.AnalyzeForSD(newSDdraft)
if analysis['errors'] == {}:
# Stage the service
arcpy.StageService_server(newSDdraft, outputSD)
print "Created {}".format(outputSD)
else:
# If the sddraft analysis contained errors, display them and quit.
print analysis['errors']
sys.exit()
def upload(fileName, tags, description):
#
# Overwrite the SD on AGOL with the new SD.
# This method uses 3rd party module: requests
#
updateURL = agol.http+'/content/users/{}/{}/items/{}/update'.format(agol.username, agol.folderID, agol.SDitemID)
filesUp = {"file": open(fileName, 'rb')}
url = updateURL + "?f=json&token="+agol.token+ \
"&filename="+fileName+ \
"&type=Service Definition"\
"&title="+agol.serviceName+ \
"&tags="+tags+\
"&description="+description
response = requests.post(url, files=filesUp);
itemPartJSON = json.loads(response.text)
if "success" in itemPartJSON:
itemPartID = itemPartJSON['id']
print("updated SD: {}").format(itemPartID)
return True
else:
print "\n.sd file not uploaded. Check the errors and try again.\n"
print itemPartJSON
sys.exit()
def publish():
#
# Publish the existing SD on AGOL (it will be turned into a Feature Service)
#
publishURL = agol.http+'/content/users/{}/publish'.format(agol.username)
query_dict = {'itemID': agol.SDitemID,
'filetype': 'serviceDefinition',
'overwrite': 'true',
'f': 'json',
'token': agol.token}
jsonResponse = sendAGOLReq(publishURL, query_dict)
print("successfully updated...{}...").format(jsonResponse['services'])
return jsonResponse['services'][0]['serviceItemId']
def enableSharing(newItemID, everyone, orgs, groups):
#
# Share an item with everyone, the organization and/or groups
#
shareURL = agol.http+'/content/users/{}/{}/items/{}/share'.format(agol.username, agol.folderID, newItemID)
if groups == None:
groups = ''
query_dict = {'f': 'json',
'everyone' : everyone,
'org' : orgs,
'groups' : groups,
'token': agol.token}
jsonResponse = sendAGOLReq(shareURL, query_dict)
print("successfully shared...{}...").format(jsonResponse['itemId'])
def sendAGOLReq(URL, query_dict):
#
# Helper function which takes a URL and a dictionary and sends the request
#
query_string = urllib.urlencode(query_dict)
jsonResponse = urllib.urlopen(URL, urllib.urlencode(query_dict))
jsonOuput = json.loads(jsonResponse.read())
wordTest = ["success", "results", "services", "notSharedWith", "folders"]
if any(word in jsonOuput for word in wordTest):
return jsonOuput
else:
print "\nfailed:"
print jsonOuput
sys.exit()
if __name__ == "__main__":
#
# start
#
print "Starting Feature Service publish process"
# Find and gather settings from the ini file
localPath = sys.path[0]
settingsFile = os.path.join(localPath, "settings.ini")
if os.path.isfile(settingsFile):
config = ConfigParser.ConfigParser()
config.read(settingsFile)
else:
print "INI file not found. \nMake sure a valid 'settings.ini' file exists in the same directory as this script."
sys.exit()
# AGOL Credentials
inputUsername = config.get( 'AGOL', 'USER')
inputPswd = config.get('AGOL', 'PASS')
# FS values
MXD = config.get('FS_INFO', 'MXD')
serviceName = config.get('FS_INFO', 'SERVICENAME')
folderName = config.get('FS_INFO', 'FOLDERNAME')
tags = config.get('FS_INFO', 'TAGS')
description = config.get('FS_INFO', 'DESCRIPTION')
maxRecords = config.get('FS_INFO', 'MAXRECORDS')
# Share FS to: everyone, org, groups
shared = config.get('FS_SHARE', 'SHARE')
everyone = config.get('FS_SHARE', 'EVERYONE')
orgs = config.get('FS_SHARE', 'ORG')
groups = config.get('FS_SHARE', 'GROUPS') #Groups are by ID. Multiple groups comma separated
# create a temp directory under the script
tempDir = os.path.join(localPath, "tempDir")
if not os.path.isdir(tempDir):
os.mkdir(tempDir)
finalSD = os.path.join(tempDir, serviceName + ".sd")
#initialize AGOLHandler class
agol = AGOLHandler(inputUsername, inputPswd, serviceName, folderName)
# Turn map document into .SD file for uploading
makeSD(MXD, serviceName, tempDir, finalSD, maxRecords)
# overwrite the existing .SD on arcgis.com
if upload(finalSD, tags, description):
# publish the sd which was just uploaded
newItemID = publish()
# share the item
if shared:
enableSharing(newItemID, everyone, orgs, groups)
print "\nfinished."

If I understand your question correctly, you would just want to add another loop in your main and then place most of what you have in your main into a new function (in my example, the new function is called 'process_ini'.
So, try replacing everything from your name == main line through the end with:
def process_ini(fileName):
settingsFile = os.path.join(localPath, fileName)
if os.path.isfile(settingsFile):
config = ConfigParser.ConfigParser()
config.read(settingsFile)
else:
print "INI file not found. \nMake sure a valid 'settings.ini' file exists in the same directory as this script."
sys.exit()
# AGOL Credentials
inputUsername = config.get( 'AGOL', 'USER')
inputPswd = config.get('AGOL', 'PASS')
# FS values
MXD = config.get('FS_INFO', 'MXD')
serviceName = config.get('FS_INFO', 'SERVICENAME')
folderName = config.get('FS_INFO', 'FOLDERNAME')
tags = config.get('FS_INFO', 'TAGS')
description = config.get('FS_INFO', 'DESCRIPTION')
maxRecords = config.get('FS_INFO', 'MAXRECORDS')
# Share FS to: everyone, org, groups
shared = config.get('FS_SHARE', 'SHARE')
everyone = config.get('FS_SHARE', 'EVERYONE')
orgs = config.get('FS_SHARE', 'ORG')
groups = config.get('FS_SHARE', 'GROUPS') #Groups are by ID. Multiple groups comma separated
# create a temp directory under the script
tempDir = os.path.join(localPath, "tempDir")
if not os.path.isdir(tempDir):
os.mkdir(tempDir)
finalSD = os.path.join(tempDir, serviceName + ".sd")
#initialize AGOLHandler class
agol = AGOLHandler(inputUsername, inputPswd, serviceName, folderName)
# Turn map document into .SD file for uploading
makeSD(MXD, serviceName, tempDir, finalSD, maxRecords)
# overwrite the existing .SD on arcgis.com
if upload(finalSD, tags, description):
# publish the sd which was just uploaded
newItemID = publish()
# share the item
if shared:
enableSharing(newItemID, everyone, orgs, groups)
print "\nfinished."
if __name__ == "__main__":
print "Starting Feature Service publish process"
# Find and gather settings from the ini file
localPath = sys.path[0]
for fileName in ['settings.ini', 'flurb.ini', 'durf.ini']:
process_ini(fileName)
You'd have to write all the ini filenames in the list found in the penultimate line of my example.
Alternatively, you could identify all the .ini files in the directory via code:
if __name__ == "__main__":
print "Starting Feature Service publish process"
# Find and gather settings from the ini file
localPath = sys.path[0]
fileNames = [os.path.join(localPath, i) for i in os.listdir(localPath) if i.endswith('.ini')]
for fileName in fileNames:
process_ini(fileName)
It also might help to set the working directory (e.g., os.chdir(localPath)), but I'm going off of what you already had.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Different Scrapy feeds export destination for each request - python

Related

How do I get the orignal name of the file that is uploaded in the filefield in django

I want to read from a file and scrape each URL

Scrapy CSV column export

Best practice for keeping a config file in python

Python: How to loop through several ini files with ConfigParser?

Categories

Resources