Skip to content

MAINT: Cleanup pandas/src/parser #14740

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 6, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions ci/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,18 @@ if [ "$LINT" ]; then
done
echo "Linting *.pxi.in DONE"

# readability/casting: Warnings about C casting instead of C++ casting
# runtime/int: Warnings about using C number types instead of C++ ones
# build/include_subdir: Warnings about prefacing included header files with directory
pip install cpplint

echo "Linting *.c and *.h"
cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/parser
if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting *.c and *.h DONE"

echo "Check for invalid testing"
grep -r -E --include '*.py' --exclude nosetester.py --exclude testing.py '(numpy|np)\.testing' pandas
if [ $? = "0" ]; then
Expand Down
118 changes: 42 additions & 76 deletions pandas/src/parser/io.c
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
#include "io.h"
/*
Copyright (c) 2016, PyData Development Team
All rights reserved.

Distributed under the terms of the BSD Simplified License.

The full license is in the LICENSE file, distributed with this software.
*/

/*
On-disk FILE, uncompressed
*/
#include "io.h"

/*
On-disk FILE, uncompressed
*/

void *new_file_source(char *fname, size_t buffer_size) {
file_source *fs = (file_source *) malloc(sizeof(file_source));
file_source *fs = (file_source *)malloc(sizeof(file_source));
fs->fp = fopen(fname, "rb");

if (fs->fp == NULL) {
Expand All @@ -18,7 +26,7 @@ void *new_file_source(char *fname, size_t buffer_size) {
fs->initial_file_pos = ftell(fs->fp);

// Only allocate this heap memory if we are not memory-mapping the file
fs->buffer = (char*) malloc((buffer_size + 1) * sizeof(char));
fs->buffer = (char *)malloc((buffer_size + 1) * sizeof(char));

if (fs->buffer == NULL) {
return NULL;
Expand All @@ -27,33 +35,19 @@ void *new_file_source(char *fname, size_t buffer_size) {
memset(fs->buffer, 0, buffer_size + 1);
fs->buffer[buffer_size] = '\0';

return (void *) fs;
return (void *)fs;
}


// XXX handle on systems without the capability


/*
* void *new_file_buffer(FILE *f, int buffer_size)
*
* Allocate a new file_buffer.
* Returns NULL if the memory allocation fails or if the call to mmap fails.
*
* buffer_size is ignored.
*/


void* new_rd_source(PyObject *obj) {
rd_source *rds = (rd_source *) malloc(sizeof(rd_source));
void *new_rd_source(PyObject *obj) {
rd_source *rds = (rd_source *)malloc(sizeof(rd_source));

/* hold on to this object */
Py_INCREF(obj);
rds->obj = obj;
rds->buffer = NULL;
rds->position = 0;

return (void*) rds;
return (void *)rds;
}

/*
Expand All @@ -63,9 +57,7 @@ void* new_rd_source(PyObject *obj) {
*/

int del_file_source(void *fs) {
// fseek(FS(fs)->fp, FS(fs)->initial_file_pos, SEEK_SET);
if (fs == NULL)
return 0;
if (fs == NULL) return 0;

/* allocated on the heap */
free(FS(fs)->buffer);
Expand All @@ -89,27 +81,23 @@ int del_rd_source(void *rds) {

*/


void* buffer_file_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status) {
void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read,
int *status) {
file_source *src = FS(source);

*bytes_read = fread((void*) src->buffer, sizeof(char), nbytes,
src->fp);
*bytes_read = fread((void *)src->buffer, sizeof(char), nbytes, src->fp);

if (*bytes_read == 0) {
*status = REACHED_EOF;
} else {
*status = 0;
}

return (void*) src->buffer;

return (void *)src->buffer;
}


void* buffer_rd_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status) {
void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
int *status) {
PyGILState_STATE state;
PyObject *result, *func, *args, *tmp;

Expand All @@ -125,21 +113,18 @@ void* buffer_rd_bytes(void *source, size_t nbytes,
args = Py_BuildValue("(i)", nbytes);

func = PyObject_GetAttrString(src->obj, "read");
/* printf("%s\n", PyBytes_AsString(PyObject_Repr(func))); */

/* TODO: does this release the GIL? */
result = PyObject_CallObject(func, args);
Py_XDECREF(args);
Py_XDECREF(func);

/* PyObject_Print(PyObject_Type(result), stdout, 0); */
if (result == NULL) {
PyGILState_Release(state);
*bytes_read = 0;
*status = CALLING_READ_FAILED;
return NULL;
}
else if (!PyBytes_Check(result)) {
} else if (!PyBytes_Check(result)) {
tmp = PyUnicode_AsUTF8String(result);
Py_XDECREF(result);
result = tmp;
Expand All @@ -154,8 +139,7 @@ void* buffer_rd_bytes(void *source, size_t nbytes,

/* hang on to the Python object */
src->buffer = result;
retval = (void*) PyBytes_AsString(result);

retval = (void *)PyBytes_AsString(result);

PyGILState_Release(state);

Expand All @@ -165,42 +149,38 @@ void* buffer_rd_bytes(void *source, size_t nbytes,
return retval;
}


#ifdef HAVE_MMAP

#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/stat.h>

void *new_mmap(char *fname)
{
void *new_mmap(char *fname) {
struct stat buf;
int fd;
memory_map *mm;
/* off_t position; */
off_t filesize;

mm = (memory_map *) malloc(sizeof(memory_map));
mm = (memory_map *)malloc(sizeof(memory_map));
mm->fp = fopen(fname, "rb");

fd = fileno(mm->fp);
if (fstat(fd, &buf) == -1) {
fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n", errno);
return NULL;
}
filesize = buf.st_size; /* XXX This might be 32 bits. */

filesize = buf.st_size; /* XXX This might be 32 bits. */

if (mm == NULL) {
/* XXX Eventually remove this print statement. */
fprintf(stderr, "new_file_buffer: malloc() failed.\n");
return NULL;
}
mm->size = (off_t) filesize;
mm->size = (off_t)filesize;
mm->line_number = 0;

mm->fileno = fd;
mm->position = ftell(mm->fp);
mm->last_pos = (off_t) filesize;
mm->last_pos = (off_t)filesize;

mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0);
if (mm->memmap == NULL) {
Expand All @@ -210,30 +190,20 @@ void *new_mmap(char *fname)
mm = NULL;
}

return (void*) mm;
return (void *)mm;
}


int del_mmap(void *src)
{
int del_mmap(void *src) {
munmap(MM(src)->memmap, MM(src)->size);

fclose(MM(src)->fp);

/*
* With a memory mapped file, there is no need to do
* anything if restore == RESTORE_INITIAL.
*/
/* if (restore == RESTORE_FINAL) { */
/* fseek(FB(fb)->file, FB(fb)->current_pos, SEEK_SET); */
/* } */
free(src);

return 0;
}

void* buffer_mmap_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status) {
void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read,
int *status) {
void *retval;
memory_map *src = MM(source);

Expand Down Expand Up @@ -264,19 +234,15 @@ void* buffer_mmap_bytes(void *source, size_t nbytes,

/* kludgy */

void *new_mmap(char *fname) {
return NULL;
}
void *new_mmap(char *fname) { return NULL; }

int del_mmap(void *src) {
return 0;
}
int del_mmap(void *src) { return 0; }

/* don't use this! */

void* buffer_mmap_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status) {
return NULL;
void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read,
int *status) {
return NULL;
}

#endif
47 changes: 24 additions & 23 deletions pandas/src/parser/io.h
Original file line number Diff line number Diff line change
@@ -1,30 +1,33 @@
/*
Copyright (c) 2016, PyData Development Team
All rights reserved.

Distributed under the terms of the BSD Simplified License.

The full license is in the LICENSE file, distributed with this software.
*/

#ifndef PANDAS_SRC_PARSER_IO_H_
#define PANDAS_SRC_PARSER_IO_H_

#include "Python.h"
#include "tokenizer.h"


typedef struct _file_source {
/* The file being read. */
FILE *fp;

char *buffer;
/* Size of the file, in bytes. */
/* off_t size; */

/* file position when the file_buffer was created. */
off_t initial_file_pos;

/* Offset in the file of the data currently in the buffer. */
off_t buffer_file_pos;

/* Actual number of bytes in the current buffer. (Can be less than buffer_size.) */
/* Actual number of bytes in the current buffer. (Can be less than
* buffer_size.) */
off_t last_pos;

/* Size (in bytes) of the buffer. */
// off_t buffer_size;

/* Pointer to the buffer. */
// char *buffer;

} file_source;

#define FS(source) ((file_source *)source)
Expand All @@ -34,7 +37,6 @@ typedef struct _file_source {
#endif

typedef struct _memory_map {

FILE *fp;

/* Size of the file, in bytes. */
Expand All @@ -49,22 +51,20 @@ typedef struct _memory_map {
off_t position;
off_t last_pos;
char *memmap;

} memory_map;

#define MM(src) ((memory_map*) src)
#define MM(src) ((memory_map *)src)

void *new_mmap(char *fname);

int del_mmap(void *src);

void* buffer_mmap_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status);

void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read,
int *status);

typedef struct _rd_source {
PyObject* obj;
PyObject* buffer;
PyObject *obj;
PyObject *buffer;
size_t position;
} rd_source;

Expand All @@ -77,9 +77,10 @@ void *new_rd_source(PyObject *obj);
int del_file_source(void *src);
int del_rd_source(void *src);

void* buffer_file_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status);
void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read,
int *status);

void* buffer_rd_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status);
void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
int *status);

#endif // PANDAS_SRC_PARSER_IO_H_
Loading