Knowing how to assign from file - python

I am trying to know how to assign certain variables from the text files per line in Python.
Text file:
0 Longoria Julia Manager
1 Valdivia Corina Surgeon
In the C++ version, the code to assign each variable per line is coded like this:
#include <fstream>
#include <iostream>
#include <string>
using namespace std;
int ID[2];
string lName[2];
string fName[2];
string jobTitle[2];
int main()
{
fstream file;
file.open("Employee List.txt");
index = 0;
while (!file.eof())
{
file >> ID[index] >> lName[index] >> fName[index] >> jobTitle[index];
index++;
}
file.close();
return 0;
}
In the Java version, the code to assign each variable per line is coded like this:
import java.io.*;
import java.util.*;
public class fileToVariable
{
public static void main(String[] args)
{
int ID[2] = {0, 0};
String lName[2] = {"", ""};
String fName[2] = {"", ""};
String jobTitle[2] = {"", ""};
try
{
ifstream = new Scanner(new fileInputStream("Employee List.txt"));
}
catch (FileNotFoundException e)
{
}
while (ifstream.hasNextLine())
{
ID[index] = ifstream.nextInt();
lName[index] = ifstream.next();
fName[index] = ifstream.next();
jobTitle[index] = ifstream.next();
index++;
}
}
Does anyone know how the Python equivalent to assigning each variables from the file is coded?

Python doesn't have any built-in methods for reading words directly into variables. Instead, you read the whole line into a string, then use string operations to parse it.
And rather than having separate lists for each attribute, we would normally collect them all into a dictionary, and then make a list of these.
employees = []
line = f.readline()
id, lname, fname, jobtitle = line.split()
employees.append({"id": id, "fname": fname, "lname": lname, "title": jobtitle})

You can use this:
file = open("text.txt", "r")
lines = file.readlines()
ids = []
lnames = []
fnames = []
jobtitles = []
for line in lines:
id1, lname, fname, jobtitle = line.split()
ids.append(id1)
lnames.append(lname)
fnames.append(fname)
jobtitles.append(jobtitle)

Related

Why do I have different line counts?

I made these different programs in different programming languages to count the number of lines of a file, and it turns out that the output differs according to the program, but the strange thing is that some programs have the same results, I was testing them with a 6gb utf-8 xml file with about 146 million lines.
# Python
# Output -> 146114085 lines
import time
lines = 0
start = time.perf_counter()
with open('file_path') as myfile:
for line in myfile:
lines += 1
print("{} lines".format(lines))
end = time.perf_counter()
elapsed = end - start
print(f'Elapsed time: {elapsed:.3f} seconds')
// Java
// Output -> 146114085 lines (just as with python)
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
public class Main {
public static void main(String[] args) {
try {
long startTime = System.currentTimeMillis();
int BUFFER_SIZE = 1024*1024;
String filePath = "file_path";
FileReader file = file = new FileReader(filePath);
BufferedReader reader = new BufferedReader(file, BUFFER_SIZE);
long lines = reader.lines().count();
reader.close();
System.out.println("The number of lines is " + lines);
long elapsedTime = System.currentTimeMillis() - startTime;
System.out.println("Duration in seconds: " + elapsedTime/1000);
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
// Rust
// Output -> 146113746 lines
use std::fs::File;
use std::io::{BufRead, BufReader, Error, Read};
use std::time::Instant;
fn main() {
let file_path = "file_path";
let buffer_size = 1024*1024;
let start = Instant::now();
if let Err(err) = read_file(buffer_size, file_path) {
println!("{}", err);
}
let duration = start.elapsed();
println!("The function took {} seconds to execute", duration.as_secs());
}
fn read_file(buffer_size: usize, file_path: &str) -> Result<(), Error> {
let file = File::open(file_path)?;
let reader = BufReader::with_capacity(buffer_size, file);
let lines = reader.lines().fold(0, |sum, _| sum + 1);
println!("Number of lines {}", lines);
Ok(())
}
// C
// Output -> 146113745 lines (one line less than rust output)
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main(int argc, char *argv[]) {
// start time
clock_t start = clock();
// File path
const char* file_path = "file_path";
// Open the file for reading
FILE *fp = fopen(file_path, "r");
// Allocate a buffer to hold the data
const size_t BUFFER_SIZE = 1024*1024;
char *buffer = malloc(BUFFER_SIZE);
// Declare the number of lines variable
unsigned int lines = 0;
// Read the data in chunks
while (!feof(fp)) {
// Read a chunk of data from the file
size_t bytes_read = fread(buffer, 1, BUFFER_SIZE, fp);
// Process the data here...
for (int i = 0; i < bytes_read; i++) {
if (buffer[i] == '\n') {
lines++;
}
}
}
printf("The number of lines %u\n", lines);
// Clean up
free(buffer);
fclose(fp);
// End
clock_t end = clock();
// Calculate the elapsed time in seconds
double elapsed = (double) ((end - start) / CLOCKS_PER_SEC);
printf("Elapsed time: %f seconds", elapsed);
return 0;
}
Finally, the command wc
Output -> 146113745 lines (just as with C)
wc -l file_path
I think the correct answer is Rust's, because it has one more than wc/C, and it is the last line that has no line change as it reaches the end of the file. The cases that cause me confusion are java and python.
My Regex expression for a line is .*?\\n|.+. This works in https://regexr.com/.
For some reason in the file reading implementation I'm using in Python and Java the character '\r' is interpreted as a line feed, but this doesn't happen with the Rust implementation, nor the wc one and obviously neither with the one I made in C (even when it is explicit).
But if I change the conditional ((buffer[i] == '\n') for ((buffer[i] == '\n') || (buffer[i] == '\r')) I get the same value as in Python and Java minus 1.

Extract zip file inline in Oracle OCI - Object Storage without downloading to save time

Is it possible to extract a zip file 'inline' which is in cloud say Oracle cloud, Object storage. Meaning, without downloading it and extracting it in the o/s and re-uploading it to object storage, because the file size is big and we need to save time on upload/download.. Any sample code, with Oracle Functions, or python, java etc. ? Is it possible ? I tried with S3 browser/explorer or other similar tools, but that basically at the background, downloads and extract on local computer.
If I understand the question correctly, your use case would be that you have a compressed value on the server and want to extract it on the server and keep it there.
This is possible and mostly depends on how the values has been compressed.
If you use the Lempel-Ziv-Welch algorithm used in the UTL_COMPRESS package, you can extract it directly in PL/SQL.
For other formats like zip, you will need to use some custom Java code like the following example:
CREATE OR REPLACE
JAVA SOURCE NAMED ZIP_Java
AS
import java.io.*;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import java.util.zip.ZipInputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.BufferedOutputStream;
import java.sql.Blob;
public class Java_Class {
public static int ZipBlob(Blob inLob, Blob[] outLob, String filename) {
try {
// create the zipoutputstream from the end of the outLob
Blob zipLob = outLob[0];
OutputStream os = zipLob.setBinaryStream(1);
ZipOutputStream zos = new ZipOutputStream(os);
// add one zip entry
ZipEntry entry = new ZipEntry(filename);
zos.putNextEntry(entry);
// write data to the zip lob
long len = inLob.length();
long offset = 0;
byte[] buffer;
int chunksize = 32768;
while (offset < len) {
buffer = inLob.getBytes(offset + 1, chunksize);
if (buffer == null)
break;
zos.write(buffer, 0, buffer.length);
offset += buffer.length;
}
zos.closeEntry();
zos.close();
outLob[0] = zipLob;
} catch (Exception e) {
System.out.println("Exception: " + e.toString());
e.printStackTrace(System.out);
return 0;
}
return 1;
}
public static int UnzipBlob(Blob inLob, Blob[] outLob, String filename) {
try {
final int kBUFFER = 2048;
InputStream inps = inLob.getBinaryStream();
ZipInputStream zis = new ZipInputStream(inps);
ZipEntry entry;
Blob fileLob = outLob[0];
OutputStream os = fileLob.setBinaryStream(1);
while((entry = zis.getNextEntry()) != null) {
if (entry.getName().equalsIgnoreCase(filename)) {
byte data[] = new byte[kBUFFER];
BufferedOutputStream dest = new BufferedOutputStream(os, kBUFFER);
int count;
while ((count = zis.read(data, 0, kBUFFER)) != -1) {
dest.write(data, 0, count);
}
dest.flush();
dest.close();
}
}
zis.close();
return 1;
} catch (Exception e) {
System.out.println("Exception: " + e.toString());
e.printStackTrace();
return 0;
}
}
}
/
CREATE OR REPLACE
FUNCTION ZipBlobJava(theSource IN BLOB, theDestination IN OUT NOCOPY BLOB, theFilename IN VARCHAR2) RETURN NUMBER
AS LANGUAGE JAVA NAME 'Java_Class.ZipBlob(java.sql.Blob, java.sql.Blob[], java.lang.String) return int';
/
CREATE OR REPLACE
FUNCTION UnzipBlobJava(theSource IN BLOB, theDestination IN OUT NOCOPY BLOB, theFilename IN VARCHAR2) RETURN NUMBER
AS LANGUAGE JAVA NAME 'Java_Class.UnzipBlob(java.sql.Blob, java.sql.Blob[], java.lang.String) return int';
/

C char array from python string

I have a list of strings in python which I'm trying to pass down to a C extension for character analysis. I've gotten so far as to have the list broken up into their individual string PyObjects. Next, I'm hoping to split these strings into their individual characters so that every string PyObject is now a corresponding C-type character array. I can't seem to figure out how to do this though.
Here's what I have so far: Currently after building the .pyd file it will return a list of 1's as a filler to Python (so everything else works), I just don't know how to split a string PyObject into the C-type character array.
--- cExt.c ---
#include <Python.h>
#include <stdio.h>
static int *CitemCheck(PyObject *commandString, int commandStringLength) {
// HAALP
//char* commandChars = (char*) malloc(commandStringLength*sizeof(char*));
// char c[] = PyString_AsString("c", commandString);
// printf("%c" , c);
// printf("%s", PyString_AsString(commandString));
// for (int i=0; i<sizeof(commandChars)/sizeof(*commandChars); i++) {
// printf("%s", PyString_AsString(commandString));
// printf("%c", commandChars[i]);
// }
return 1; // TODO: RETURN PROPER RESULTANT
}
static PyObject *ClistCheck(PyObject *commandList, int commandListLength) {
PyObject *results = PyList_New(commandListLength);
for (int index = 0; index < commandListLength; index++) {
PyObject *commandString;
commandString = PyList_GetItem(commandList, index);
int commandStringLength = PyObject_Length(commandString);
// CitemCheck should take string PyObject and its length as int
int x = CitemCheck(commandString, commandStringLength);
PyObject* pyItem = Py_BuildValue("i", x);
PyList_SetItem(results, index, pyItem);
}
return results;
}
static PyObject *parseListCheck(PyObject *self, PyObject *args) {
PyObject *commandList;
int commandListLength;
if (!PyArg_ParseTuple(args, "O", &commandList)){
return NULL;
}
commandListLength = PyObject_Length(commandList);
return Py_BuildValue("O", ClistCheck(commandList, commandListLength));
}
static char listCheckDocs[] =
""; // TODO: ADD DOCSTRING
static PyMethodDef listCheck[] = {
{"listCheck", (PyCFunction) parseListCheck, METH_VARARGS, listCheckDocs},
{NULL,NULL,0,NULL}
};
static struct PyModuleDef DCE = {
PyModuleDef_HEAD_INIT,
"listCheck",
NULL,
-1,
listCheck
};
PyMODINIT_FUNC PyInit_cExt(void){
return PyModule_Create(&DCE);
}
for reference, my temporary extension build file:
--- _c_setup.py ---
(located in same folder as cExt.c)
"""
to build C files, pass:
python _c_setup.py build_ext --inplace clean --all
in command prompt which is cd'd to the file's dierctory
"""
import glob
from setuptools import setup, Extension, find_packages
from os import path
here = path.abspath(path.dirname(__file__))
files = [path.split(x)[1] for x in glob.glob(path.join(here, '**.c'))]
extensions = [Extension(
path.splitext(x)[0], [x]
) for x in files]
setup(
ext_modules = extensions,
)
You can use PyUnicode_AsEncodedString, which
Encode a Unicode object and return the result as Python bytes object. encoding and errors have the same meaning as the parameters of the same name in the Unicode encode() method. The codec to be used is looked up using the Python codec registry. Return NULL if an exception was raised by the codec.
see https://docs.python.org/3/c-api/unicode.html#c.PyUnicode_AsEncodedString
Then with PyBytes_AsString you get a pointer to internal buffer with a terminating NUL byte. This buffer must neither be deallocated nor modified. If you need a copy you could use e.g. strdup.
see https://docs.python.org/3/c-api/bytes.html#c.PyBytes_AsString
Slightly modifying your code it could look like this:
PyObject *encodedString = PyUnicode_AsEncodedString(commandString, "UTF-8", "strict");
if (encodedString) { //returns NULL if an exception was raised
char *commandChars = PyBytes_AsString(encodedString); //pointer refers to the internal buffer of encodedString
if(commandChars) {
printf("the string '%s' consists of the following chars:\n", commandChars);
for (int i = 0; commandChars[i] != '\0'; i++) {
printf("%c ", commandChars[i]);
}
printf("\n");
}
Py_DECREF(encodedString);
}
If one would test with:
import cExt
fruits = ["apple", "pears", "cherry", "pear", "blueberry", "strawberry"]
res = cExt.listCheck(fruits)
print(res)
The output would be:
the string 'apple' consists of the following chars:
a p p l e
the string 'pears' consists of the following chars:
p e a r s
the string 'cherry' consists of the following chars:
c h e r r y
the string 'pear' consists of the following chars:
p e a r
the string 'blueberry' consists of the following chars:
b l u e b e r r y
the string 'strawberry' consists of the following chars:
s t r a w b e r r y
[1, 1, 1, 1, 1, 1]
Side note not directly related to the question:
Your CitemCheck function returns a pointer to int, but if looking at how it is called, it seems that you want to return an int value. The function signature should look more like this:
static int CitemCheck(PyObject *commandString, int commandStringLength)
(note the removed * after int).

parsing of javascript objects using python

friends!
I'm starting to learn python. I have a problem with obtaining the required value from javascript text. Here is the code, which I managed to download from website:
[<script src="//maps.google.com/maps?file=api&v=2&sensor=false&key=ABQIAAAAOjFUxXImJbfYejRUbw0-uBSoJppdodHXaiZe2O5Byw3T7kzYihSys_Exmi235-oDCy6xEhVelBMhBQ" type="text/javascript"></script>, <script type="text/javascript">
var map_shop = null;
var marker_shop = null;
function google_maps_shop_initialize()
{
if (GBrowserIsCompatible())
{
map_shop = new GMap2(document.getElementById("map_canvas_shop"));
point_center = new GLatLng(51.6663267, 39.1898874);
marker_shop = new GMarker(point_center);
map_shop.addOverlay(marker_shop);
map_shop.setCenter(point_center, 13);
//Create new Tile Layer
var gTileUrlTemplate = '//mt1.google.com/vt/lyrs=m#121,transit|vm:1&hl=ru&opts=r&x={X}&y={Y}&z={Z}';
var tileLayerOverlay = new GTileLayerOverlay(
new GTileLayer(null, null, null, {
tileUrlTemplate: gTileUrlTemplate,
isPng:true,
opacity:1
})
);
map_shop.addOverlay(tileLayerOverlay);
}
}
google_maps_shop_initialize();
</script>]
I want to print only one line from text, which contains coordinates point_center = new GLatLng(51.6663267, 39.1898874);
I'm trying decide it using re module, but the problem is that number of line may vary and I get empty output with this code:
if re.match("point_center = new GLatLng", line):
print (line)
Desirable output looks like this:
51.6663267, 39.1898874
If the Javascript is .txt format then you can simply do this:
from ast import literal_eval as make_tuple
with open("filename.txt") as f:
for line in f:
if "point_center = new GLatLng" in line:
linestring = line
linestring = linestring[26:]
linestring = make_tuple(linestring)
Your output should be a tuple.

How to parse serialized C structs from binary file in python?

I have a handful of different type of C-structs that are all compressed into a binary file.
struct-id serialized-struct struct-id serialized-struct ...
If it were the same struct over and over, it would make sense to use the struct package, but I want to switch between previously defined structs all the time.
STRUCT1_ID = '\xAA'
STRUCT2_ID = '\xBB'
STRUCT_IDS = frozenset([STRUCT1_ID, STRUCT2_ID])
struct1s = []
struct2s = []
def create_test_file(filepath):
with open(filepath, 'wb') as f:
# Write an example struct1 id followed by struct
f.write(STRUCT1_ID)
f.write(b'\x01\x02\x03\x04\x05\x06')
# Write an example struct2 id followed by struct
f.write(STRUCT2_ID)
f.write(b'\x07\x08\x09\x0A')
def parse_test_file(filepath):
with open(filepath, 'rb') as f:
msg_type = f.read(1)
while msg_type:
print(byte)
if byte in STRUCT_IDS:
# Parse the next however many bytes needed by struct
# logic breaks down here
struct1s.append(turnIntoStruct(f.read(?)))
msg_type = f.read(1)
else:
print('Corrupted file. Unrecognized id')
In C, the structs would be:
typedef struct struct1_s {
uint16_t a;
uint16_t b;
uint16_t c;
} struct1_t;
typedef struct struct2_s {
uint16_t d;
uint16_t e;
} struct2_t;
// Declare and initialize the structs
struct1_t s1 = {
.a = 0x0201,
.b = 0x0403,
.c = 0x0605
};
struct2_t s2 = {
.d = 0x0807,
.e = 0x0A09
};
I'm less python than I am C right now. I seem unable to bring construct to python 3.4.3?
Map the ID to the struct pattern, and use the appropriate one.
structmap = {
b'\xaa': ('3H', struct1s),
b'\xbb': ('2H', struct2s)
}
...
structmap[msg_type][1].append(struct.unpack(structmap[msg_type][0],
f.read(struct.calcsize(structmap[msg_type][0]))))

Categories