Coverage for /pythoncovmergedfiles/medio/medio/src/pdfminer.six/pdfminer/image.py: 16%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

177 statements  

1import os 

2import os.path 

3import struct 

4from io import BytesIO 

5from typing import BinaryIO, Tuple 

6 

7try: 

8 from typing import Literal 

9except ImportError: 

10 # Literal was introduced in Python 3.8 

11 from typing_extensions import Literal # type: ignore[assignment] 

12 

13from pdfminer.jbig2 import JBIG2StreamReader, JBIG2StreamWriter 

14from pdfminer.layout import LTImage 

15from pdfminer.pdfcolor import ( 

16 LITERAL_DEVICE_CMYK, 

17 LITERAL_DEVICE_GRAY, 

18 LITERAL_DEVICE_RGB, 

19 LITERAL_INLINE_DEVICE_GRAY, 

20 LITERAL_INLINE_DEVICE_RGB, 

21) 

22from pdfminer.pdfexceptions import PDFValueError 

23from pdfminer.pdftypes import ( 

24 LITERALS_DCT_DECODE, 

25 LITERALS_FLATE_DECODE, 

26 LITERALS_JBIG2_DECODE, 

27 LITERALS_JPX_DECODE, 

28) 

29 

30PIL_ERROR_MESSAGE = ( 

31 "Could not import Pillow. This dependency of pdfminer.six is not " 

32 "installed by default. You need it to to save jpg images to a file. Install it " 

33 "with `pip install 'pdfminer.six[image]'`" 

34) 

35 

36 

37def align32(x: int) -> int: 

38 return ((x + 3) // 4) * 4 

39 

40 

41class BMPWriter: 

42 def __init__(self, fp: BinaryIO, bits: int, width: int, height: int) -> None: 

43 self.fp = fp 

44 self.bits = bits 

45 self.width = width 

46 self.height = height 

47 if bits == 1: 

48 ncols = 2 

49 elif bits == 8: 

50 ncols = 256 

51 elif bits == 24: 

52 ncols = 0 

53 else: 

54 raise PDFValueError(bits) 

55 self.linesize = align32((self.width * self.bits + 7) // 8) 

56 self.datasize = self.linesize * self.height 

57 headersize = 14 + 40 + ncols * 4 

58 info = struct.pack( 

59 "<IiiHHIIIIII", 

60 40, 

61 self.width, 

62 self.height, 

63 1, 

64 self.bits, 

65 0, 

66 self.datasize, 

67 0, 

68 0, 

69 ncols, 

70 0, 

71 ) 

72 assert len(info) == 40, str(len(info)) 

73 header = struct.pack( 

74 "<ccIHHI", 

75 b"B", 

76 b"M", 

77 headersize + self.datasize, 

78 0, 

79 0, 

80 headersize, 

81 ) 

82 assert len(header) == 14, str(len(header)) 

83 self.fp.write(header) 

84 self.fp.write(info) 

85 if ncols == 2: 

86 # B&W color table 

87 for i in (0, 255): 

88 self.fp.write(struct.pack("BBBx", i, i, i)) 

89 elif ncols == 256: 

90 # grayscale color table 

91 for i in range(256): 

92 self.fp.write(struct.pack("BBBx", i, i, i)) 

93 self.pos0 = self.fp.tell() 

94 self.pos1 = self.pos0 + self.datasize 

95 

96 def write_line(self, y: int, data: bytes) -> None: 

97 self.fp.seek(self.pos1 - (y + 1) * self.linesize) 

98 self.fp.write(data) 

99 

100 

101class ImageWriter: 

102 """Write image to a file 

103 

104 Supports various image types: JPEG, JBIG2 and bitmaps 

105 """ 

106 

107 def __init__(self, outdir: str) -> None: 

108 self.outdir = outdir 

109 if not os.path.exists(self.outdir): 

110 os.makedirs(self.outdir) 

111 

112 def export_image(self, image: LTImage) -> str: 

113 """Save an LTImage to disk""" 

114 (width, height) = image.srcsize 

115 

116 filters = image.stream.get_filters() 

117 

118 if filters[-1][0] in LITERALS_DCT_DECODE: 

119 name = self._save_jpeg(image) 

120 

121 elif filters[-1][0] in LITERALS_JPX_DECODE: 

122 name = self._save_jpeg2000(image) 

123 

124 elif self._is_jbig2_iamge(image): 

125 name = self._save_jbig2(image) 

126 

127 elif image.bits == 1: 

128 name = self._save_bmp(image, width, height, (width + 7) // 8, image.bits) 

129 

130 elif image.bits == 8 and ( 

131 LITERAL_DEVICE_RGB in image.colorspace 

132 or LITERAL_INLINE_DEVICE_RGB in image.colorspace 

133 ): 

134 name = self._save_bmp(image, width, height, width * 3, image.bits * 3) 

135 

136 elif image.bits == 8 and ( 

137 LITERAL_DEVICE_GRAY in image.colorspace 

138 or LITERAL_INLINE_DEVICE_GRAY in image.colorspace 

139 ): 

140 name = self._save_bmp(image, width, height, width, image.bits) 

141 

142 elif len(filters) == 1 and filters[0][0] in LITERALS_FLATE_DECODE: 

143 name = self._save_bytes(image) 

144 

145 else: 

146 name = self._save_raw(image) 

147 

148 return name 

149 

150 def _save_jpeg(self, image: LTImage) -> str: 

151 """Save a JPEG encoded image""" 

152 data = image.stream.get_data() 

153 

154 name, path = self._create_unique_image_name(image, ".jpg") 

155 with open(path, "wb") as fp: 

156 if LITERAL_DEVICE_CMYK in image.colorspace: 

157 try: 

158 from PIL import Image, ImageChops # type: ignore[import] 

159 except ImportError: 

160 raise ImportError(PIL_ERROR_MESSAGE) 

161 

162 ifp = BytesIO(data) 

163 i = Image.open(ifp) 

164 i = ImageChops.invert(i) 

165 i = i.convert("RGB") 

166 i.save(fp, "JPEG") 

167 else: 

168 fp.write(data) 

169 

170 return name 

171 

172 def _save_jpeg2000(self, image: LTImage) -> str: 

173 """Save a JPEG 2000 encoded image""" 

174 data = image.stream.get_data() 

175 

176 name, path = self._create_unique_image_name(image, ".jp2") 

177 with open(path, "wb") as fp: 

178 try: 

179 from PIL import Image # type: ignore[import] 

180 except ImportError: 

181 raise ImportError(PIL_ERROR_MESSAGE) 

182 

183 # if we just write the raw data, most image programs 

184 # that I have tried cannot open the file. However, 

185 # open and saving with PIL produces a file that 

186 # seems to be easily opened by other programs 

187 ifp = BytesIO(data) 

188 i = Image.open(ifp) 

189 i.save(fp, "JPEG2000") 

190 return name 

191 

192 def _save_jbig2(self, image: LTImage) -> str: 

193 """Save a JBIG2 encoded image""" 

194 name, path = self._create_unique_image_name(image, ".jb2") 

195 with open(path, "wb") as fp: 

196 input_stream = BytesIO() 

197 

198 global_streams = [] 

199 filters = image.stream.get_filters() 

200 for filter_name, params in filters: 

201 if filter_name in LITERALS_JBIG2_DECODE: 

202 global_streams.append(params["JBIG2Globals"].resolve()) 

203 

204 if len(global_streams) > 1: 

205 msg = ( 

206 "There should never be more than one JBIG2Globals " 

207 "associated with a JBIG2 embedded image" 

208 ) 

209 raise PDFValueError(msg) 

210 if len(global_streams) == 1: 

211 input_stream.write(global_streams[0].get_data().rstrip(b"\n")) 

212 input_stream.write(image.stream.get_data()) 

213 input_stream.seek(0) 

214 reader = JBIG2StreamReader(input_stream) 

215 segments = reader.get_segments() 

216 

217 writer = JBIG2StreamWriter(fp) 

218 writer.write_file(segments) 

219 return name 

220 

221 def _save_bmp( 

222 self, 

223 image: LTImage, 

224 width: int, 

225 height: int, 

226 bytes_per_line: int, 

227 bits: int, 

228 ) -> str: 

229 """Save a BMP encoded image""" 

230 name, path = self._create_unique_image_name(image, ".bmp") 

231 with open(path, "wb") as fp: 

232 bmp = BMPWriter(fp, bits, width, height) 

233 data = image.stream.get_data() 

234 i = 0 

235 for y in range(height): 

236 bmp.write_line(y, data[i : i + bytes_per_line]) 

237 i += bytes_per_line 

238 return name 

239 

240 def _save_bytes(self, image: LTImage) -> str: 

241 """Save an image without encoding, just bytes""" 

242 name, path = self._create_unique_image_name(image, ".jpg") 

243 width, height = image.srcsize 

244 channels = len(image.stream.get_data()) / width / height / (image.bits / 8) 

245 with open(path, "wb") as fp: 

246 try: 

247 from PIL import ( 

248 Image, # type: ignore[import] 

249 ImageOps, 

250 ) 

251 except ImportError: 

252 raise ImportError(PIL_ERROR_MESSAGE) 

253 

254 mode: Literal["1", "L", "RGB", "CMYK"] 

255 if image.bits == 1: 

256 mode = "1" 

257 elif image.bits == 8 and channels == 1: 

258 mode = "L" 

259 elif image.bits == 8 and channels == 3: 

260 mode = "RGB" 

261 elif image.bits == 8 and channels == 4: 

262 mode = "CMYK" 

263 

264 img = Image.frombytes(mode, image.srcsize, image.stream.get_data(), "raw") 

265 if mode == "L": 

266 img = ImageOps.invert(img) 

267 

268 img.save(fp) 

269 

270 return name 

271 

272 def _save_raw(self, image: LTImage) -> str: 

273 """Save an image with unknown encoding""" 

274 ext = ".%d.%dx%d.img" % (image.bits, image.srcsize[0], image.srcsize[1]) 

275 name, path = self._create_unique_image_name(image, ext) 

276 

277 with open(path, "wb") as fp: 

278 fp.write(image.stream.get_data()) 

279 return name 

280 

281 @staticmethod 

282 def _is_jbig2_iamge(image: LTImage) -> bool: 

283 filters = image.stream.get_filters() 

284 for filter_name, params in filters: 

285 if filter_name in LITERALS_JBIG2_DECODE: 

286 return True 

287 return False 

288 

289 def _create_unique_image_name(self, image: LTImage, ext: str) -> Tuple[str, str]: 

290 name = image.name + ext 

291 path = os.path.join(self.outdir, name) 

292 img_index = 0 

293 while os.path.exists(path): 

294 name = "%s.%d%s" % (image.name, img_index, ext) 

295 path = os.path.join(self.outdir, name) 

296 img_index += 1 

297 return name, path