Rewrite file content with new content - python

In a program - the program doesn't matter -, only the first lines, I open an empty file (named empty.txt).
Then I define functions, but never use them on main ... so, I do not actually write anything.
This the nearly complete code:
from os import chdir
chdir('C:\\Users\\Julien\\Desktop\\PS BOT')
fic=open('empty.txt','r+')
def addtodic(txt):
"""Messages de la forme !add id,message ; txt='id,message' """
fic.write(txt+'\n')
fic.seek(0)
def checkdic(txt):
"""Messages de la forme !lien id ; txt='id' """
for i in fic.readlines().split('\n'):
ind=i.index(',')
if i[:ind]==txt:
fic.seek(0)
return i[ind+1:]
fic.seek(0)
return 'Not found'
Then I launch it, and using the console, I simply ask "fic.write( 'tadam' )", like, to check if the writing works well before moving on.
%run "C:/Users/Julien/Desktop/PS BOT/dic.py"
fic
Out[8]: <open file 'empty.txt', mode 'r+' at 0x0000000008D9ED20>
fic.write('tadam')
fic.readline()
Out[10]: 'os import chdir\n'
fic.readline()
Out[11]: "chdir('C:\\\\Users\\\\Julien\\\\Desktop\\\\PS BOT')\n"
fic.readline()
Out[12]: '\n'
fic.readline()
Out[13]: "fic=open('empty.txt','r+')\n"
fic.readlines()
Out[14]:
['\n',
'def addtodic(txt):\n',
' """Messages de la forme !add id,message ; txt=\'id,message\' """\n',
' fic.seek(0)\n',
" fic.write(txt)+'\\n'\n",
'\n',
'def checkdic(txt):\n',
' """Messages de la forme !lien id ; txt=\'id\' """\n',
" for i in fic.readline().split('\\n'):\n",
" ind=i.index(',')\n",
' if i[:ind]==txt:\n',
' fic.seek(0)\n',
' return i[ind+1:]\n',
' fic.seek(0)\n',
" return 'Not found'\n",
' \n',
'def removedic(txt):\n',
' """Messages de la forme !remove id ; txt=\'id\' """\n',
' check=True\n',
' while check:\n',
' i=fic.readline()\n',
' if i[:len(txt)]==txt: \n',
' fic.seek(0)\n',
' return check\n',
'#removedic fauxeturn check\r\n',
"#removedic faux tmp_file = open(filename,'w')\n",
' tmp_file.write(data)\n',
' tmp_file.close()\n',
' return filename\n',
'\n',
' # TODO: This should be removed when Term is refactored.\n',
' def write(self,data):\n',
' """Write a string to the default output"""\n',
' io.stdout.write(data)\n',
'\n',
' # TODO: This should be removed when Term is refactored.\n',
' def write_err(self,data):\n',
' """Write a string to the default error output"""\n',
' io.stderr.write(data)\n',
'\n',
' def ask_yes_no(self, prompt, default=None):\n',
' if self.quiet:\n',
' return True\n',
' return ask_yes_no(prompt,default)\n',
'\n',
' def show_usage(self):\n',
' """Show a usage message"""\n',
' page.page(IPython.core.usage.interactive_usage)\n',
'\n',
' def extract_input_lines(self, range_str, raw=False):\n',
' """Return as a string a set of input history slices.\n',
'\n',
' Parameters\n',
' ----------\n',
' range_str : string\n',
' The set of slices is given as a string, like "~5/6-~4/2 4:8 9",\n',
' since this function is for use by magic functions which get their\n',
' arguments as strings. The number before the / is the session\n',
' number: ~n goes n back from the current session.\n',
'\n',
' Optional Parameters:\n',
' - raw(False): by default, the processed input is used. If this is\n',
' true, the raw input history is used instead.\n',
'\n',
' Note that slices can be called with two notations:\n',
'\n',
' N:M -> standard python form, means including items N...(M-1).\n',
'\n',
' N-M -> include items N..M (closed endpoint)."""\n',
' lines = self.history_manager.get_range_by_str(range_str, raw=raw)\n',
' return "\\n".join(x for _, _, x in lines)\n',
'\n',
' def find_user_code(self, target, raw=True, py_only=False, skip_encoding_cookie=True):\n',
' """Get a code string from history, file, url, or a string or macro.\n',
'\n',
' This is mainly used by magic functions.\n',
'\n',
' Parameters\n',
' ----------\n',
'\n',
' target : str\n',
'\n',
' A string specifying code to retrieve. This will be tried respectively\n',
' as: ranges of input history (see %history for syntax), url,\n',
' correspnding .py file, filename, or an expression evaluating to a\n',
' string or Macro in the user namespace.\n',
'\n',
' raw : bool\n',
' If true (default), retrieve raw history. Has no effect on the other\n',
' retrieval mechanisms.\n',
'\n',
' py_only : bool (default False)\n',
' Only try to fetch python code, do not try alternative methods to decode file\n',
' if unicode fails.\n',
'\n',
' Returns\n',
' -------\n',
' A string of code.\n',
'\n',
' ValueError is raised if nothing is found, and TypeError if it evaluates\n',
' to an object of another type. In each case, .args[0] is a printable\n',
' message.\n',
' """\n',
' code = self.extract_input_lines(target, raw=raw) # Grab history\n',
' if code:\n',
' return code\n',
' utarget = unquote_filename(target)\n',
' try:\n',
" if utarget.startswith(('http://', 'https://')):\n",
' return openpy.read_py_url(utarget, skip_encoding_cookie=skip_encoding_cookie)\n',
' except UnicodeDecodeError:\n',
' if not py_only :\n',
' from urllib import urlopen # Deferred import\n',
' response = urlopen(target)\n',
" return response.read().decode('latin1')\n",
' raise ValueError(("\'%s\' seem to be un']
KABOOM ! Has anybody an explanation ? By the way, I use Python 2.7 with Enthought Canopy.

When you open a file with 'r+', it doesn't get truncated, it still retains its old contents. To truncate it to 0 bytes, call fic.truncate(0) right after opening it.
You must seek between read and write operations on the same file object (otherwise the results are undefined because of buffering), e.g. add a fic.seek(0, 0) (or any other seek) after the write call.

Related

Why doesn't my "800m" value stay strip()ed and the "/n" comes back

for i in individuals:
dictionary.update({i[0]:{"200m":i[1], "400m": i[2], "800m": i[3].strip('/n')}})
return dictionary
This print out:
{'Louise HAZEL': {'200m': ' 24.48', '400m': ' 13.48', '800m': ' 138.78\n'}, 'Jessica ENNIS-HILL': {'200m': ' 22.83', '400m': ' 12.54', '800m': ' 128.65\n'}, 'Ivona DADIC': {'200m': ' 24.29', '400m': ' 14.58', '800m': ' 135.9\n'}}
The "/n" is printed when I clearly stripped it earlier. Why?

Clean Html Tags using Item Pipelines and get output in Scrapy

I am scraping company data using scrapy.
Some outputs are strings and some are lists.
I could process the string outputs using extract.strip() in the spider file itself but the list outputs are tricky.
I want a way to process all items together using item pipelines and print the result.
E.g:
this is my parse function:
def parse(self,response):
item = CompanycrawlerItem()
item['address'] = response.xpath('//*[#class="text data"]/text()').extract()[0]
item['status'] = response.xpath('//*[#class="text data"]/text()').extract()[1]
item['type'] = response.xpath('//*[#class="text data"]/text()').extract()[2]
item['accounts'] = response.xpath('//*[#class="column-half"]/p').extract()[:2]
item['confirmation_status'] = response.xpath('//*[#class="column-half"]/p').extract()[2:4]
item['incorporate'] = response.xpath('//*[#id="company-creation-date"]/text()').extract()
yield item
this is the output I am getting:
{'accounts': ['<p>\n'
'Next accounts made up to <strong>31 '
'December 2020</strong>\n'
' <br>\n'
' due by\n'
' <strong>30 September 2021</strong>\n'
' </p>',
'<p>\n'
' Last accounts made up to\n'
' <strong>31 December 2019</strong>\n'
' </p>'],
'address': '\n'
'Wellington House, 69/71 Upper Ground, London, SE1 9PQ ',
'confirmation_status': ['<p>\n'
'Next statement date <strong>7 June '
'2021</strong> <br>\n'
' due by <strong>21 June 2021</strong>\n'
' </p>',
'<p>\n'
' Last statement dated <strong>7 June '
'2020</strong>\n'
' </p>'],
'incorporate': ['31 December 1987'],
'status': '\n Active\n ',
'type': '\n Private limited Company\n '}
I want to use the Item Pipelines to get the tags and space out

Convert a list of tab prefixed strings to a dictionary

Text mining attempts here, I would like to turn the below:
a=['Colors.of.the universe:\n',
' Black: 111\n',
' Grey: 222\n',
' White: 11\n'
'Movies of the week:\n',
' Mission Impossible: 121\n',
' Die_Hard: 123\n',
' Jurassic Park: 33\n',
'Lands.categories.said:\n',
' Desert: 33212\n',
' forest: 4532\n',
' grassland : 431\n',
' tundra : 243451\n']
to this:
{'Colors.of.the universe':{Black:111,Grey:222,White:11},
'Movies of the week':{Mission Impossible:121,Die_Hard:123,Jurassic Park:33},
'Lands.categories.said': {Desert:33212,forest:4532,grassland:431,tundra:243451}}
Tried this code below but it was not good:
{words[1]:words[1:] for words in a}
which gives
{'o': 'olors.of.the universe:\n',
' ': ' tundra : 243451\n',
'a': 'ands.categories.said:\n'}
It only takes the first word as the key which is not what's needed.
A dict comprehension is an interesting approach.
a = ['Colors.of.the universe:\n',
' Black: 111\n',
' Grey: 222\n',
' White: 11\n',
'Movies of the week:\n',
' Mission Impossible: 121\n',
' Die_Hard: 123\n',
' Jurassic Park: 33\n',
'Lands.categories.said:\n',
' Desert: 33212\n',
' forest: 4532\n',
' grassland : 431\n',
' tundra : 243451\n']
result = dict()
current_key = None
for w in a:
# If starts with tab - its an item (under category)
if w.startswith(' '):
# Splitting item (i.e. ' Desert: 33212\n' -> [' Desert', ' 33212\n']
splitted = w.split(':')
# Setting the key and the value of the item
# Removing redundant spaces and '\n'
# Converting value to number
k, v = splitted[0].strip(), int(splitted[1].replace('\n', ''))
result[current_key][k] = v
# Else, it's a category
else:
# Removing ':' and '\n' form category name
current_key = w.replace(':', '').replace('\n', '')
# If category not exist - create a dictionary for it
if not current_key in result.keys():
result[current_key] = {}
# {'Colors.of.the universe': {'Black': 111, 'Grey': 222, 'White': 11}, 'Movies of the week': {'Mission Impossible': 121, 'Die_Hard': 123, 'Jurassic Park': 33}, 'Lands.categories.said': {'Desert': 33212, 'forest': 4532, 'grassland': 431, 'tundra': 243451}}
print(result)
That's really close to valid YAML already. You could just quote the property labels and parse. And parsing a known format is MUCH superior to dealing with and/or inventing your own. Even if you're just exploring base python, exploring good practices is just as (probably more) important.
import re
import yaml
raw = ['Colors.of.the universe:\n',
' Black: 111\n',
' Grey: 222\n',
' White: 11\n',
'Movies of the week:\n',
' Mission Impossible: 121\n',
' Die_Hard: 123\n',
' Jurassic Park: 33\n',
'Lands.categories.said:\n',
' Desert: 33212\n',
' forest: 4532\n',
' grassland : 431\n',
' tundra : 243451\n']
# Fix spaces in property names
fixed = []
for line in raw:
match = re.match(r'^( *)(\S.*?): ?(\S*)\s*', line)
if match:
fixed.append('{indent}{safe_label}:{value}'.format(
indent = match.group(1),
safe_label = "'{}'".format(match.group(2)),
value = ' ' + match.group(3) if match.group(3) else ''
))
else:
raise Exception("regex failed")
parsed = yaml.load('\n'.join(fixed), Loader=yaml.FullLoader)
print(parsed)

Set property with whitespace with py2neo

I'm trying to set a property with a value which could have with space.I'm using py2neo and I wrote this, but it doesn't work:
name = mention['name']
screen_name = mention['screen_name']
id = mention['id']
graph.run(
'MATCH (f) '
'WHERE EXISTS(f.account_id) AND f.account_id=\'' + id + '\' '
'SET f.name=\'' + re.escape(name) + '\', f.screen_name=\'#' + screen_name + '\' '
'RETURN null'
)
How can I solve ? Thanks
You should pass the values (including the problematic value for name) as parameters. That avoids messy issues with the Python and Cypher parsers (and escaping the string value should also not be necessary).
This may work for you:
graph.run(
'MATCH (f) '
'WHERE f.account_id=$id '
'SET f.name=$name, f.screen_name=$screen_name',
{"id": mention['id'], "name": mention['name'], "screen_name": '#'+mention['screen_name']}
)
P.S: I assume you are trying to set the property screen_name with
space.
You placed space after \' (single quote), It should be before it:
graph.run(
'MATCH (f) '
'WHERE EXISTS(f.account_id) AND f.account_id=\'' + id + '\''
'SET f.name=\'' + re.escape(name) + '\', f.screen_name=\'#' + screen_name + ' \''
'RETURN null'
)

prevent pandas from removing spaces in numbers in text columns

I'm trying to load CSV file into pandas dataframe. CSV is semicolon delimited. Values in text columns are in double quotation marks.
File in question: https://www.dropbox.com/s/1xv391gebjzmmco/file_01.csv?dl=0
In one of the text columns ('TYTUL') i have following value:
"00 307 1457 212"
I specify the column as str but when i print or export results to excel I get
003071457212
instead of
00 307 1457 212
How do I prevent pandas from removing spaces?
Here is my code:
import pandas
df = pandas.read_csv(r'file_01.csv'
,sep = ';'
,quotechar = '"'
,names = ['DATA_OPERACJI'
,'DATA_KSIEGOWANIA'
,'OPIS_OPERACJI'
,'TYTUL'
,'NADAWCA_ODBIORCA'
,'NUMER_KONTA'
,'KWOTA'
,'SALDO_PO_OPERACJI'
,'KOLUMNA_9']
,usecols = [0,1,2,3,4,5,6,7]
,skiprows = 38
,skipfooter = 3
,encoding = 'cp1250'
,thousands = ' '
,decimal = ','
,parse_dates = [0,1]
,converters = {'OPIS_OPERACJI': str
,'TYTUL': str
,'NADAWCA_ODBIORCA': str
,'NUMER_KONTA': str}
,engine = 'python'
)
df.TYTUL.replace([' +', '^ +', ' +$'], [' ', '', ''],regex=True,inplace=True) #this only removes excessive spaces
print(df.TYTUL)
I also came up with a workaround (comment #workaround) but I would like to ask if there is a better way.
import pandas
df = pandas.read_csv(r'file_01.csv'
,sep = ';'
,quotechar = '?' #workaround
,names = ['DATA_OPERACJI'
,'DATA_KSIEGOWANIA'
,'OPIS_OPERACJI'
,'TYTUL'
,'NADAWCA_ODBIORCA'
,'NUMER_KONTA'
,'KWOTA'
,'SALDO_PO_OPERACJI'
,'KOLUMNA_9']
,usecols = [0,1,2,3,4,5,6,7]
,skiprows = 38
,skipfooter = 3
,encoding = 'cp1250'
,thousands = ' '
,decimal = ','
,parse_dates = [0,1]
,converters = {'OPIS_OPERACJI': str
,'TYTUL': str
,'NADAWCA_ODBIORCA': str
,'NUMER_KONTA': str}
,engine = 'python'
)
df.TYTUL.replace([' +', '^ +', ' +$'], [' ', '', ''],regex=True,inplace=True) #this only removes excessive spaces
df.TYTUL.replace(['^"', '"$'], ['', ''],regex=True,inplace=True) #workaround
print(df.TYTUL)
remove this line from your read_csv code
,thousands = ' '
I tested it, the output is correct without this option
'00 307 1457 212'

Categories