File: timeIO.py
"""
test various read and write file I/O modes for speed
in the version of Python that is running this script;
runs most common and valid read/write coding patterns;
tests ascii and binary, but not wide-char unicode files;
printed results can be parsed later for comparisons
"""
######################################################################
# generic timer
######################################################################
import time
def timeOnce(func, *args):
start = time.clock()
func(*args) # ignore any return value
return time.clock() - start
def timerAvg(func, *args):
warmcache = timeOnce(func, *args)
reps = 3
runavg = 0 # take average of 3 runs
for i in range(reps):
runavg += timeOnce(func, *args)
return runavg / reps # CHANGED: take low = "best"
# The following may be a bit better, but isn't directly comparable
def timerBest(func, *args):
warmcache = timeOnce(func, *args) # make sure disk caches active
reps = 3
runs = [] # take min of N runs
for i in range(reps):
runs.append(timeOnce(func, *args))
return min(runs)
timer = timerBest # CHANGED
######################################################################
# file read tests
######################################################################
#=====================================================================
# all of the following are probably valid use cases for 2.6 and 3.0,
# though lines/text and blocks/binary combos seem more typical in 3.0
# (programs will pick str xor bytes for text or binary data in 3.0);
# truly binary files can only be read in binary mode in 3.0, because
# they cannot be decoded into characters in text mode, and it makes
# no sense to read truly binary files by lines: they have no lineends;
# the allAtOnce modes may fail for pathologically large files;
#
# 3.0 has str + bytes; 2.6 has just str, plus binary files
# open mode default = 'r' = 'rt' in 3.0, and 'r' in 2.6
# (both mean text mode input when the mode argument is omitted)
#=====================================================================
blocksize = 1024 * 32
def read_byLines_textMode(filename):
for line in open(filename): # 2.6 text mode returns str, does not decode (use codecs.open)
pass # 3.0 text mode returns str, after decoding content
def read_byLines_binaryMode(filename): # less common in 3.0?
for line in open(filename, 'rb'): # 2.6 binary mode returns str, does not decode
pass # 3.0 binary mode returns bytes, does not decode
def read_byBlocks_textMode(filename, size=blocksize):
f = open(filename)
while True: # less common in 3.0?
block = f.read(size)
if not block: break
def read_byBlocks_binaryMode(filename, size=blocksize):
f = open(filename, 'rb')
while True:
block = f.read(size)
if not block: break
def read_allAtOnce_textMode(filename): # not for very large files
text = open(filename).read()
def read_allAtOnce_binaryMode(filename): # not for very large files
text = open(filename, 'rb').read()
######################################################################
# file write tests
######################################################################
#=====================================================================
# all the following work, but tests "write_byLines_binaryMode" and
# "write_byBlocks_textMode" are probably invalid use cases for 3.0,
# where programs are more likely to pick str xor bytes for text
# or binary data, and not convert to str or bytes just to write in
# text or binary mode; portability issues: 3.0's encoding arg required
# by 3.0's bytes() converter is not allowed in 2.6's bytes(), and
# 2.6's str.decode() creates a unicode object which adds some cost;
#
# hoist set-up ops out to avoid charging to test funcs
# 'xx' / b'xx' are str / bytes in 3.0, both are str in 2.6
# 'xx' == b'xx' and bytes(x) == str(X) in 2.6
# 2.6: str is a seq of bytes, unicode a distinct type
# 3.0: str is seq of Unicode chars, bytes is seq of ints
#=====================================================================
oneMeg = 1024 * 1024
halfMeg = oneMeg // 2 # use truncating division in both 2.6 and 3.0
repsList = list(range(halfMeg)) # force list in both 2.6 and 3.0
aLine = '*' * 49 + '\n' # 25M in file ((50+\r?) * ((1024 * 1024) / 2))
aBlock = b'1\x0234\x05' * 10 # 25M in file ((5 * 10) * (1M / 2))
aFileStr = aLine * halfMeg # 25M characters
aFileBin = aBlock * halfMeg # 25M bytes
print ('\nOutput data sizes: %s %s %s %s %s' %
(len(repsList), len(aLine), len(aBlock), len(aFileStr), len(aFileBin)) )
def write_byLines_textMode(filename): # writing by blocks in text mode is similar
file = open(filename, 'w') # 3.0 text mode takes str, encodes content, xlates newlines
for i in repsList: # 2.6 text mode takes str, xlates newlines
file.write(aLine) # 3.0 text mode takes open() flag to control lineends
file.close()
def write_byLines_binaryMode(filename): # less common in 3.0?
file = open(filename, 'wb') # 3.0 binary mode takes bytes, does not decode or xlate
for i in repsList: # 2.6 binary mode takes str, does not xlate newlines
file.write(aLine.encode()) # encode() makes bytes in 3.0, same str in 2.6
file.close()
def write_byBlocks_textMode(filename): # less common in 3.0?
file = open(filename, 'w') # decode() makes str in 3.0, unicode in 2.6
for i in repsList:
file.write(aBlock.decode())
file.close()
def write_byBlocks_binaryMode(filename): # writing by lines in binary mode is similar
file = open(filename, 'wb')
for i in repsList:
file.write(aBlock)
file.close()
def write_allAtOnce_textMode(filename): # not for very large files
open(filename, 'w').write(aFileStr)
def write_allAtOnce_binaryMode(filename): # not for very large files
open(filename, 'wb').write(aFileBin)
######################################################################
# run, collect test data for Python running me
######################################################################
def timePython():
import sys, os
outputfile = 'timeIO.out' # hard-code: I create this
textfile, binaryfile = sys.argv[1:3] # input files vary, command line
tests = {textfile: (read_byLines_textMode,
read_byLines_binaryMode, # less common in 3.0?
read_byBlocks_textMode, # less common in 3.0?
read_byBlocks_binaryMode,
read_allAtOnce_textMode, # not for very large files
read_allAtOnce_binaryMode), # not for very large files
binaryfile: (read_byBlocks_binaryMode, # other read modes not valid,
read_allAtOnce_binaryMode), # for truly binary data files
outputfile: (write_byLines_textMode,
write_byLines_binaryMode, # less common in 3.0?
write_byBlocks_textMode, # less common in 3.0?
write_byBlocks_binaryMode,
write_allAtOnce_textMode, # not for very large files
write_allAtOnce_binaryMode) # not for very large files
}
for filename in (textfile, binaryfile, outputfile):
filesize = os.path.getsize(filename) if os.path.exists(filename) else '0' # CHANGED
version = sys.version.split()[0]
print('\n[Python {0}: {1}, {2} bytes]'.format(version, filename, filesize))
for func in tests[filename]:
try:
testtime = timer(func, filename)
except:
print('%-26s => %s, %s' % (func.__name__, '*fail*', sys.exc_info()[0]))
else:
# int/int=float+remainder in 3.0, but not 2.6
filemegs = float(filesize) / oneMeg
testid = '%-26s (%s=%.2fM)' % (func.__name__, filename, filemegs)
print('%-46s => %f' % (testid, testtime))
if __name__ == '__main__':
timePython() # the version running me