大文字と小文字を区別しないキャッシュを使用して修正されたパスを引き出す拡張バージョン:
import os,re
def corrected_paths(start, pathlist):
''' This wrapper function takes a list of paths to correct vs. to allow caching '''
start = unicode(start)
pathlist = [unicode(path[:-1]) if path[-1] == '/' else unicode(path) for path in pathlist ]
# Use a dict as a cache, storing oldpath > newpath for first-pass replacement
# with path keys from incorrect to corrected paths
cache = dict()
corrected_path_list = []
corrections_count = 0
path_split = re.compile('(/+|\+)')
for path in pathlist:
cd = start
corrected_path = ''
parts = path_split.split(path)
# Pre-process against the cache
for n,p in enumerate(parts):
# We pass *parts to send through the contents of the list as a series of strings
uncorrected_path= os.path.join( cd, *parts[0:len(parts)-n] ).lower() # Walk backwards
if uncorrected_path in cache:
# Move up the basepath to the latest matched position
cd = os.path.join(cd, cache[uncorrected_path])
parts = parts[len(parts)-n:] # Retrieve the unmatched segment
break; # First hit, we exit since we're going backwards
# Fallback to walking, from the base path cd point
for n,p in enumerate(parts):
if not os.path.exists(os.path.join(cd,p)): # Check it's not correct already
#if p not in os.listdir(cd): # Alternative: The above does not work on Mac Os, returns case-insensitive path test
listing = os.listdir(cd)
cip = p.lower()
cilisting = [l.lower() for l in listing]
if cip in cilisting:
l = listing[ cilisting.index(cip) ] # Get our real folder name
# Store the path correction in the cache for next iteration
cache[ os.path.join(cd,p).lower() ] = os.path.join(cd, l)
cd = os.path.join(cd, l)
corrections_count += 1
else:
print "Error %s not in folder %s" % (cip, cilisting)
return False # Error, this path element isn't found
else:
cd = os.path.join(cd, p)
corrected_path_list.append(cd)
return corrected_path_list, corrections_count
一連のパスに対して実行した例では、これにより listdirs の数が大幅に削減されます (これは、パスがどの程度似ているかによって明らかに異なります)。
corrected_paths('/Users/', ['mxF793/ScRiPtS/meTApaTH','mxF793/ScRiPtS/meTApaTH/metapAth/html','mxF793/ScRiPtS/meTApaTH/metapAth/html/css','mxF793/ScRiPts/PuBfig'])
([u'/Users/mxf793/Scripts/metapath', u'/Users/mxf793/Scripts/metapath/metapath/html', u'/Users/mxf793/Scripts/metapath/metapath/html/css', u'/Users/mxf793/Scripts/pubfig'], 14)
([u'/Users/mxf793/Scripts/metapath', u'/Users/mxf793/Scripts/metapath/metapath/html', u'/Users/mxf793/Scripts/metapath/metapath/html/css', u'/Users/mxf793/Scripts/pubfig'], 5)
これに向かう途中で、Mac OSX Python は大文字と小文字を区別しないかのようにパスの一致を返すため、存在のテストは常に成功することに気付きました。その場合、listdir を上に移動して置き換えることができます。