Coverage for /pythoncovmergedfiles/medio/medio/usr/lib/python3.9/filecmp.py: 19%

164 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:05 +0000

1"""Utilities for comparing files and directories. 

2 

3Classes: 

4 dircmp 

5 

6Functions: 

7 cmp(f1, f2, shallow=True) -> int 

8 cmpfiles(a, b, common) -> ([], [], []) 

9 clear_cache() 

10 

11""" 

12 

13import os 

14import stat 

15from itertools import filterfalse 

16from types import GenericAlias 

17 

18__all__ = ['clear_cache', 'cmp', 'dircmp', 'cmpfiles', 'DEFAULT_IGNORES'] 

19 

20_cache = {} 

21BUFSIZE = 8*1024 

22 

23DEFAULT_IGNORES = [ 

24 'RCS', 'CVS', 'tags', '.git', '.hg', '.bzr', '_darcs', '__pycache__'] 

25 

26def clear_cache(): 

27 """Clear the filecmp cache.""" 

28 _cache.clear() 

29 

30def cmp(f1, f2, shallow=True): 

31 """Compare two files. 

32 

33 Arguments: 

34 

35 f1 -- First file name 

36 

37 f2 -- Second file name 

38 

39 shallow -- Just check stat signature (do not read the files). 

40 defaults to True. 

41 

42 Return value: 

43 

44 True if the files are the same, False otherwise. 

45 

46 This function uses a cache for past comparisons and the results, 

47 with cache entries invalidated if their stat information 

48 changes. The cache may be cleared by calling clear_cache(). 

49 

50 """ 

51 

52 s1 = _sig(os.stat(f1)) 

53 s2 = _sig(os.stat(f2)) 

54 if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG: 

55 return False 

56 if shallow and s1 == s2: 

57 return True 

58 if s1[1] != s2[1]: 

59 return False 

60 

61 outcome = _cache.get((f1, f2, s1, s2)) 

62 if outcome is None: 

63 outcome = _do_cmp(f1, f2) 

64 if len(_cache) > 100: # limit the maximum size of the cache 

65 clear_cache() 

66 _cache[f1, f2, s1, s2] = outcome 

67 return outcome 

68 

69def _sig(st): 

70 return (stat.S_IFMT(st.st_mode), 

71 st.st_size, 

72 st.st_mtime) 

73 

74def _do_cmp(f1, f2): 

75 bufsize = BUFSIZE 

76 with open(f1, 'rb') as fp1, open(f2, 'rb') as fp2: 

77 while True: 

78 b1 = fp1.read(bufsize) 

79 b2 = fp2.read(bufsize) 

80 if b1 != b2: 

81 return False 

82 if not b1: 

83 return True 

84 

85# Directory comparison class. 

86# 

87class dircmp: 

88 """A class that manages the comparison of 2 directories. 

89 

90 dircmp(a, b, ignore=None, hide=None) 

91 A and B are directories. 

92 IGNORE is a list of names to ignore, 

93 defaults to DEFAULT_IGNORES. 

94 HIDE is a list of names to hide, 

95 defaults to [os.curdir, os.pardir]. 

96 

97 High level usage: 

98 x = dircmp(dir1, dir2) 

99 x.report() -> prints a report on the differences between dir1 and dir2 

100 or 

101 x.report_partial_closure() -> prints report on differences between dir1 

102 and dir2, and reports on common immediate subdirectories. 

103 x.report_full_closure() -> like report_partial_closure, 

104 but fully recursive. 

105 

106 Attributes: 

107 left_list, right_list: The files in dir1 and dir2, 

108 filtered by hide and ignore. 

109 common: a list of names in both dir1 and dir2. 

110 left_only, right_only: names only in dir1, dir2. 

111 common_dirs: subdirectories in both dir1 and dir2. 

112 common_files: files in both dir1 and dir2. 

113 common_funny: names in both dir1 and dir2 where the type differs between 

114 dir1 and dir2, or the name is not stat-able. 

115 same_files: list of identical files. 

116 diff_files: list of filenames which differ. 

117 funny_files: list of files which could not be compared. 

118 subdirs: a dictionary of dircmp objects, keyed by names in common_dirs. 

119 """ 

120 

121 def __init__(self, a, b, ignore=None, hide=None): # Initialize 

122 self.left = a 

123 self.right = b 

124 if hide is None: 

125 self.hide = [os.curdir, os.pardir] # Names never to be shown 

126 else: 

127 self.hide = hide 

128 if ignore is None: 

129 self.ignore = DEFAULT_IGNORES 

130 else: 

131 self.ignore = ignore 

132 

133 def phase0(self): # Compare everything except common subdirectories 

134 self.left_list = _filter(os.listdir(self.left), 

135 self.hide+self.ignore) 

136 self.right_list = _filter(os.listdir(self.right), 

137 self.hide+self.ignore) 

138 self.left_list.sort() 

139 self.right_list.sort() 

140 

141 def phase1(self): # Compute common names 

142 a = dict(zip(map(os.path.normcase, self.left_list), self.left_list)) 

143 b = dict(zip(map(os.path.normcase, self.right_list), self.right_list)) 

144 self.common = list(map(a.__getitem__, filter(b.__contains__, a))) 

145 self.left_only = list(map(a.__getitem__, filterfalse(b.__contains__, a))) 

146 self.right_only = list(map(b.__getitem__, filterfalse(a.__contains__, b))) 

147 

148 def phase2(self): # Distinguish files, directories, funnies 

149 self.common_dirs = [] 

150 self.common_files = [] 

151 self.common_funny = [] 

152 

153 for x in self.common: 

154 a_path = os.path.join(self.left, x) 

155 b_path = os.path.join(self.right, x) 

156 

157 ok = 1 

158 try: 

159 a_stat = os.stat(a_path) 

160 except OSError: 

161 # print('Can\'t stat', a_path, ':', why.args[1]) 

162 ok = 0 

163 try: 

164 b_stat = os.stat(b_path) 

165 except OSError: 

166 # print('Can\'t stat', b_path, ':', why.args[1]) 

167 ok = 0 

168 

169 if ok: 

170 a_type = stat.S_IFMT(a_stat.st_mode) 

171 b_type = stat.S_IFMT(b_stat.st_mode) 

172 if a_type != b_type: 

173 self.common_funny.append(x) 

174 elif stat.S_ISDIR(a_type): 

175 self.common_dirs.append(x) 

176 elif stat.S_ISREG(a_type): 

177 self.common_files.append(x) 

178 else: 

179 self.common_funny.append(x) 

180 else: 

181 self.common_funny.append(x) 

182 

183 def phase3(self): # Find out differences between common files 

184 xx = cmpfiles(self.left, self.right, self.common_files) 

185 self.same_files, self.diff_files, self.funny_files = xx 

186 

187 def phase4(self): # Find out differences between common subdirectories 

188 # A new dircmp object is created for each common subdirectory, 

189 # these are stored in a dictionary indexed by filename. 

190 # The hide and ignore properties are inherited from the parent 

191 self.subdirs = {} 

192 for x in self.common_dirs: 

193 a_x = os.path.join(self.left, x) 

194 b_x = os.path.join(self.right, x) 

195 self.subdirs[x] = dircmp(a_x, b_x, self.ignore, self.hide) 

196 

197 def phase4_closure(self): # Recursively call phase4() on subdirectories 

198 self.phase4() 

199 for sd in self.subdirs.values(): 

200 sd.phase4_closure() 

201 

202 def report(self): # Print a report on the differences between a and b 

203 # Output format is purposely lousy 

204 print('diff', self.left, self.right) 

205 if self.left_only: 

206 self.left_only.sort() 

207 print('Only in', self.left, ':', self.left_only) 

208 if self.right_only: 

209 self.right_only.sort() 

210 print('Only in', self.right, ':', self.right_only) 

211 if self.same_files: 

212 self.same_files.sort() 

213 print('Identical files :', self.same_files) 

214 if self.diff_files: 

215 self.diff_files.sort() 

216 print('Differing files :', self.diff_files) 

217 if self.funny_files: 

218 self.funny_files.sort() 

219 print('Trouble with common files :', self.funny_files) 

220 if self.common_dirs: 

221 self.common_dirs.sort() 

222 print('Common subdirectories :', self.common_dirs) 

223 if self.common_funny: 

224 self.common_funny.sort() 

225 print('Common funny cases :', self.common_funny) 

226 

227 def report_partial_closure(self): # Print reports on self and on subdirs 

228 self.report() 

229 for sd in self.subdirs.values(): 

230 print() 

231 sd.report() 

232 

233 def report_full_closure(self): # Report on self and subdirs recursively 

234 self.report() 

235 for sd in self.subdirs.values(): 

236 print() 

237 sd.report_full_closure() 

238 

239 methodmap = dict(subdirs=phase4, 

240 same_files=phase3, diff_files=phase3, funny_files=phase3, 

241 common_dirs = phase2, common_files=phase2, common_funny=phase2, 

242 common=phase1, left_only=phase1, right_only=phase1, 

243 left_list=phase0, right_list=phase0) 

244 

245 def __getattr__(self, attr): 

246 if attr not in self.methodmap: 

247 raise AttributeError(attr) 

248 self.methodmap[attr](self) 

249 return getattr(self, attr) 

250 

251 __class_getitem__ = classmethod(GenericAlias) 

252 

253 

254def cmpfiles(a, b, common, shallow=True): 

255 """Compare common files in two directories. 

256 

257 a, b -- directory names 

258 common -- list of file names found in both directories 

259 shallow -- if true, do comparison based solely on stat() information 

260 

261 Returns a tuple of three lists: 

262 files that compare equal 

263 files that are different 

264 filenames that aren't regular files. 

265 

266 """ 

267 res = ([], [], []) 

268 for x in common: 

269 ax = os.path.join(a, x) 

270 bx = os.path.join(b, x) 

271 res[_cmp(ax, bx, shallow)].append(x) 

272 return res 

273 

274 

275# Compare two files. 

276# Return: 

277# 0 for equal 

278# 1 for different 

279# 2 for funny cases (can't stat, etc.) 

280# 

281def _cmp(a, b, sh, abs=abs, cmp=cmp): 

282 try: 

283 return not abs(cmp(a, b, sh)) 

284 except OSError: 

285 return 2 

286 

287 

288# Return a copy with items that occur in skip removed. 

289# 

290def _filter(flist, skip): 

291 return list(filterfalse(skip.__contains__, flist)) 

292 

293 

294# Demonstration and testing. 

295# 

296def demo(): 

297 import sys 

298 import getopt 

299 options, args = getopt.getopt(sys.argv[1:], 'r') 

300 if len(args) != 2: 

301 raise getopt.GetoptError('need exactly two args', None) 

302 dd = dircmp(args[0], args[1]) 

303 if ('-r', '') in options: 

304 dd.report_full_closure() 

305 else: 

306 dd.report() 

307 

308if __name__ == '__main__': 

309 demo()