Skip to content

Commit 50b6217

Browse files
Merge pull request #62 from karimbahgat/speedup
Significant speedups with minimal changes through batch unpacking bytes
2 parents c593c15 + 75d28ae commit 50b6217

File tree

2 files changed

+16
-14
lines changed

2 files changed

+16
-14
lines changed

shapefile.py

+16-14
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
__version__ = "1.2.3"
1212

13-
from struct import pack, unpack, calcsize, error
13+
from struct import pack, unpack, calcsize, error, Struct
1414
import os
1515
import sys
1616
import time
@@ -346,7 +346,8 @@ def __shape(self):
346346
record.partTypes = _Array('i', unpack("<%si" % nParts, f.read(nParts * 4)))
347347
# Read points - produces a list of [x,y] values
348348
if nPoints:
349-
record.points = [_Array('d', unpack("<2d", f.read(16))) for p in range(nPoints)]
349+
flat = unpack("<%sd" % (2 * nPoints), f.read(16*nPoints))
350+
record.points = list(izip(*(iter(flat),) * 2))
350351
# Read z extremes and values
351352
if shapeType in (13,15,18,31):
352353
(zmin, zmax) = unpack("<2d", f.read(16))
@@ -389,10 +390,12 @@ def __shapeIndex(self, i=None):
389390
numRecords = shxRecordLength // 8
390391
# Jump to the first record.
391392
shx.seek(100)
392-
for r in range(numRecords):
393-
# Offsets are 16-bit words just like the file length
394-
self._offsets.append(unpack(">i", shx.read(4))[0] * 2)
395-
shx.seek(shx.tell() + 4)
393+
shxRecords = _Array('i')
394+
# Each offset consists of two nrs, only the first one matters
395+
shxRecords.fromfile(shx, 2 * numRecords)
396+
if sys.byteorder != 'big':
397+
shxRecords.byteswap()
398+
self._offsets = [2 * el for el in shxRecords[::2]]
396399
if not i == None:
397400
return self._offsets[i]
398401

@@ -469,6 +472,8 @@ def __dbfHeader(self):
469472
if terminator != b("\r"):
470473
raise ShapefileException("Shapefile dbf header lacks expected terminator. (likely corrupt?)")
471474
self.fields.insert(0, ('DeletionFlag', 'C', 1, 0))
475+
fmt,fmtSize = self.__recordFmt()
476+
self.__recStruct = Struct(fmt)
472477

473478
def __recordFmt(self):
474479
"""Calculates the size of a .shp geometry record."""
@@ -481,8 +486,7 @@ def __recordFmt(self):
481486
def __record(self):
482487
"""Reads and returns a dbf record row as a list of values."""
483488
f = self.__getFileObj(self.dbf)
484-
recFmt = self.__recordFmt()
485-
recordContents = unpack(recFmt[0], f.read(recFmt[1]))
489+
recordContents = self.__recStruct.unpack(f.read(self.__recStruct.size))
486490
if recordContents[0] != b(' '):
487491
# deleted record
488492
return None
@@ -535,7 +539,7 @@ def record(self, i=0):
535539
if not self.numRecords:
536540
self.__dbfHeader()
537541
i = self.__restrictIndex(i)
538-
recSize = self.__recordFmt()[1]
542+
recSize = self.__recStruct.size
539543
f.seek(0)
540544
f.seek(self.__dbfHeaderLength() + (i * recSize))
541545
return self.__record()
@@ -544,13 +548,11 @@ def records(self):
544548
"""Returns all records in a dbf file."""
545549
if not self.numRecords:
546550
self.__dbfHeader()
547-
records = []
548551
f = self.__getFileObj(self.dbf)
549552
f.seek(self.__dbfHeaderLength())
550-
for i in range(self.numRecords):
551-
r = self.__record()
552-
if r:
553-
records.append(r)
553+
flat = unpack(self.__recStruct.format * self.numRecords, f.read(self.__recStruct.size * self.numRecords))
554+
rowlen = len(self.fields) - 1
555+
records = list(izip(*(iter(flat),) * rowlen))
554556
return records
555557

556558
def iterRecords(self):

shapefile.pyc

40.6 KB
Binary file not shown.

0 commit comments

Comments
 (0)