Coverage for /pythoncovmergedfiles/medio/medio/src/pdfminer.six/pdfminer/image.py: 15%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

175 statements  

1import os 

2import os.path 

3import struct 

4from io import BytesIO 

5from typing import BinaryIO, Literal, Tuple 

6 

7from pdfminer.jbig2 import JBIG2StreamReader, JBIG2StreamWriter 

8from pdfminer.layout import LTImage 

9from pdfminer.pdfcolor import ( 

10 LITERAL_DEVICE_CMYK, 

11 LITERAL_DEVICE_GRAY, 

12 LITERAL_DEVICE_RGB, 

13 LITERAL_INLINE_DEVICE_GRAY, 

14 LITERAL_INLINE_DEVICE_RGB, 

15) 

16from pdfminer.pdfexceptions import PDFValueError 

17from pdfminer.pdftypes import ( 

18 LITERALS_DCT_DECODE, 

19 LITERALS_FLATE_DECODE, 

20 LITERALS_JBIG2_DECODE, 

21 LITERALS_JPX_DECODE, 

22) 

23 

24PIL_ERROR_MESSAGE = ( 

25 "Could not import Pillow. This dependency of pdfminer.six is not " 

26 "installed by default. You need it to to save jpg images to a file. Install it " 

27 "with `pip install 'pdfminer.six[image]'`" 

28) 

29 

30 

31def align32(x: int) -> int: 

32 return ((x + 3) // 4) * 4 

33 

34 

35class BMPWriter: 

36 def __init__(self, fp: BinaryIO, bits: int, width: int, height: int) -> None: 

37 self.fp = fp 

38 self.bits = bits 

39 self.width = width 

40 self.height = height 

41 if bits == 1: 

42 ncols = 2 

43 elif bits == 8: 

44 ncols = 256 

45 elif bits == 24: 

46 ncols = 0 

47 else: 

48 raise PDFValueError(bits) 

49 self.linesize = align32((self.width * self.bits + 7) // 8) 

50 self.datasize = self.linesize * self.height 

51 headersize = 14 + 40 + ncols * 4 

52 info = struct.pack( 

53 "<IiiHHIIIIII", 

54 40, 

55 self.width, 

56 self.height, 

57 1, 

58 self.bits, 

59 0, 

60 self.datasize, 

61 0, 

62 0, 

63 ncols, 

64 0, 

65 ) 

66 assert len(info) == 40, str(len(info)) 

67 header = struct.pack( 

68 "<ccIHHI", 

69 b"B", 

70 b"M", 

71 headersize + self.datasize, 

72 0, 

73 0, 

74 headersize, 

75 ) 

76 assert len(header) == 14, str(len(header)) 

77 self.fp.write(header) 

78 self.fp.write(info) 

79 if ncols == 2: 

80 # B&W color table 

81 for i in (0, 255): 

82 self.fp.write(struct.pack("BBBx", i, i, i)) 

83 elif ncols == 256: 

84 # grayscale color table 

85 for i in range(256): 

86 self.fp.write(struct.pack("BBBx", i, i, i)) 

87 self.pos0 = self.fp.tell() 

88 self.pos1 = self.pos0 + self.datasize 

89 

90 def write_line(self, y: int, data: bytes) -> None: 

91 self.fp.seek(self.pos1 - (y + 1) * self.linesize) 

92 self.fp.write(data) 

93 

94 

95class ImageWriter: 

96 """Write image to a file 

97 

98 Supports various image types: JPEG, JBIG2 and bitmaps 

99 """ 

100 

101 def __init__(self, outdir: str) -> None: 

102 self.outdir = outdir 

103 if not os.path.exists(self.outdir): 

104 os.makedirs(self.outdir) 

105 

106 def export_image(self, image: LTImage) -> str: 

107 """Save an LTImage to disk""" 

108 (width, height) = image.srcsize 

109 

110 filters = image.stream.get_filters() 

111 

112 if not filters: 

113 name = self._save_bytes(image) 

114 

115 elif filters[-1][0] in LITERALS_DCT_DECODE: 

116 name = self._save_jpeg(image) 

117 

118 elif filters[-1][0] in LITERALS_JPX_DECODE: 

119 name = self._save_jpeg2000(image) 

120 

121 elif self._is_jbig2_iamge(image): 

122 name = self._save_jbig2(image) 

123 

124 elif image.bits == 1: 

125 name = self._save_bmp(image, width, height, (width + 7) // 8, image.bits) 

126 

127 elif image.bits == 8 and ( 

128 LITERAL_DEVICE_RGB in image.colorspace 

129 or LITERAL_INLINE_DEVICE_RGB in image.colorspace 

130 ): 

131 name = self._save_bmp(image, width, height, width * 3, image.bits * 3) 

132 

133 elif image.bits == 8 and ( 

134 LITERAL_DEVICE_GRAY in image.colorspace 

135 or LITERAL_INLINE_DEVICE_GRAY in image.colorspace 

136 ): 

137 name = self._save_bmp(image, width, height, width, image.bits) 

138 

139 elif len(filters) == 1 and filters[0][0] in LITERALS_FLATE_DECODE: 

140 name = self._save_bytes(image) 

141 

142 else: 

143 name = self._save_raw(image) 

144 

145 return name 

146 

147 def _save_jpeg(self, image: LTImage) -> str: 

148 """Save a JPEG encoded image""" 

149 data = image.stream.get_data() 

150 

151 name, path = self._create_unique_image_name(image, ".jpg") 

152 with open(path, "wb") as fp: 

153 if LITERAL_DEVICE_CMYK in image.colorspace: 

154 try: 

155 from PIL import Image, ImageChops # type: ignore[import] 

156 except ImportError: 

157 raise ImportError(PIL_ERROR_MESSAGE) 

158 

159 ifp = BytesIO(data) 

160 i = Image.open(ifp) 

161 i = ImageChops.invert(i) 

162 i = i.convert("RGB") 

163 i.save(fp, "JPEG") 

164 else: 

165 fp.write(data) 

166 

167 return name 

168 

169 def _save_jpeg2000(self, image: LTImage) -> str: 

170 """Save a JPEG 2000 encoded image""" 

171 data = image.stream.get_data() 

172 

173 name, path = self._create_unique_image_name(image, ".jp2") 

174 with open(path, "wb") as fp: 

175 try: 

176 from PIL import Image # type: ignore[import] 

177 except ImportError: 

178 raise ImportError(PIL_ERROR_MESSAGE) 

179 

180 # if we just write the raw data, most image programs 

181 # that I have tried cannot open the file. However, 

182 # open and saving with PIL produces a file that 

183 # seems to be easily opened by other programs 

184 ifp = BytesIO(data) 

185 i = Image.open(ifp) 

186 i.save(fp, "JPEG2000") 

187 return name 

188 

189 def _save_jbig2(self, image: LTImage) -> str: 

190 """Save a JBIG2 encoded image""" 

191 name, path = self._create_unique_image_name(image, ".jb2") 

192 with open(path, "wb") as fp: 

193 input_stream = BytesIO() 

194 

195 global_streams = [] 

196 filters = image.stream.get_filters() 

197 for filter_name, params in filters: 

198 if filter_name in LITERALS_JBIG2_DECODE: 

199 global_streams.append(params["JBIG2Globals"].resolve()) 

200 

201 if len(global_streams) > 1: 

202 msg = ( 

203 "There should never be more than one JBIG2Globals " 

204 "associated with a JBIG2 embedded image" 

205 ) 

206 raise PDFValueError(msg) 

207 if len(global_streams) == 1: 

208 input_stream.write(global_streams[0].get_data().rstrip(b"\n")) 

209 input_stream.write(image.stream.get_data()) 

210 input_stream.seek(0) 

211 reader = JBIG2StreamReader(input_stream) 

212 segments = reader.get_segments() 

213 

214 writer = JBIG2StreamWriter(fp) 

215 writer.write_file(segments) 

216 return name 

217 

218 def _save_bmp( 

219 self, 

220 image: LTImage, 

221 width: int, 

222 height: int, 

223 bytes_per_line: int, 

224 bits: int, 

225 ) -> str: 

226 """Save a BMP encoded image""" 

227 name, path = self._create_unique_image_name(image, ".bmp") 

228 with open(path, "wb") as fp: 

229 bmp = BMPWriter(fp, bits, width, height) 

230 data = image.stream.get_data() 

231 i = 0 

232 for y in range(height): 

233 bmp.write_line(y, data[i : i + bytes_per_line]) 

234 i += bytes_per_line 

235 return name 

236 

237 def _save_bytes(self, image: LTImage) -> str: 

238 """Save an image without encoding, just bytes""" 

239 name, path = self._create_unique_image_name(image, ".jpg") 

240 width, height = image.srcsize 

241 channels = len(image.stream.get_data()) / width / height / (image.bits / 8) 

242 with open(path, "wb") as fp: 

243 try: 

244 from PIL import ( 

245 Image, # type: ignore[import] 

246 ImageOps, 

247 ) 

248 except ImportError: 

249 raise ImportError(PIL_ERROR_MESSAGE) 

250 

251 mode: Literal["1", "L", "RGB", "CMYK"] 

252 if image.bits == 1: 

253 mode = "1" 

254 elif image.bits == 8 and channels == 1: 

255 mode = "L" 

256 elif image.bits == 8 and channels == 3: 

257 mode = "RGB" 

258 elif image.bits == 8 and channels == 4: 

259 mode = "CMYK" 

260 

261 img = Image.frombytes(mode, image.srcsize, image.stream.get_data(), "raw") 

262 if mode == "L": 

263 img = ImageOps.invert(img) 

264 

265 img.save(fp) 

266 

267 return name 

268 

269 def _save_raw(self, image: LTImage) -> str: 

270 """Save an image with unknown encoding""" 

271 ext = ".%d.%dx%d.img" % (image.bits, image.srcsize[0], image.srcsize[1]) 

272 name, path = self._create_unique_image_name(image, ext) 

273 

274 with open(path, "wb") as fp: 

275 fp.write(image.stream.get_data()) 

276 return name 

277 

278 @staticmethod 

279 def _is_jbig2_iamge(image: LTImage) -> bool: 

280 filters = image.stream.get_filters() 

281 for filter_name, params in filters: 

282 if filter_name in LITERALS_JBIG2_DECODE: 

283 return True 

284 return False 

285 

286 def _create_unique_image_name(self, image: LTImage, ext: str) -> Tuple[str, str]: 

287 name = image.name + ext 

288 path = os.path.join(self.outdir, name) 

289 img_index = 0 

290 while os.path.exists(path): 

291 name = "%s.%d%s" % (image.name, img_index, ext) 

292 path = os.path.join(self.outdir, name) 

293 img_index += 1 

294 return name, path