Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/openpyxl/reader/workbook.py: 24%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

86 statements  

1# Copyright (c) 2010-2024 openpyxl 

2 

3from warnings import warn 

4 

5from openpyxl.xml.functions import fromstring 

6 

7from openpyxl.packaging.relationship import ( 

8 get_dependents, 

9 get_rels_path, 

10 get_rel, 

11) 

12from openpyxl.packaging.workbook import WorkbookPackage 

13from openpyxl.workbook import Workbook 

14from openpyxl.workbook.defined_name import DefinedNameList 

15from openpyxl.workbook.external_link.external import read_external_link 

16from openpyxl.pivot.cache import CacheDefinition 

17from openpyxl.pivot.record import RecordList 

18from openpyxl.worksheet.print_settings import PrintTitles, PrintArea 

19 

20from openpyxl.utils.datetime import CALENDAR_MAC_1904 

21 

22 

23class WorkbookParser: 

24 

25 _rels = None 

26 

27 def __init__(self, archive, workbook_part_name, keep_links=True): 

28 self.archive = archive 

29 self.workbook_part_name = workbook_part_name 

30 self.defined_names = DefinedNameList() 

31 self.wb = Workbook() 

32 self.keep_links = keep_links 

33 self.sheets = [] 

34 

35 

36 @property 

37 def rels(self): 

38 if self._rels is None: 

39 self._rels = get_dependents(self.archive, get_rels_path(self.workbook_part_name)).to_dict() 

40 return self._rels 

41 

42 

43 def parse(self): 

44 src = self.archive.read(self.workbook_part_name) 

45 node = fromstring(src) 

46 package = WorkbookPackage.from_tree(node) 

47 if package.properties.date1904: 

48 self.wb.epoch = CALENDAR_MAC_1904 

49 

50 self.wb.code_name = package.properties.codeName 

51 self.wb.active = package.active 

52 self.wb.views = package.bookViews 

53 self.sheets = package.sheets 

54 self.wb.calculation = package.calcPr 

55 self.caches = package.pivotCaches 

56 

57 # external links contain cached worksheets and can be very big 

58 if not self.keep_links: 

59 package.externalReferences = [] 

60 

61 for ext_ref in package.externalReferences: 

62 rel = self.rels.get(ext_ref.id) 

63 self.wb._external_links.append( 

64 read_external_link(self.archive, rel.Target) 

65 ) 

66 

67 if package.definedNames: 

68 self.defined_names = package.definedNames 

69 

70 self.wb.security = package.workbookProtection 

71 

72 

73 def find_sheets(self): 

74 """ 

75 Find all sheets in the workbook and return the link to the source file. 

76 

77 Older XLSM files sometimes contain invalid sheet elements. 

78 Warn user when these are removed. 

79 """ 

80 

81 for sheet in self.sheets: 

82 if not sheet.id: 

83 msg = f"File contains an invalid specification for {0}. This will be removed".format(sheet.name) 

84 warn(msg) 

85 continue 

86 yield sheet, self.rels[sheet.id] 

87 

88 

89 def assign_names(self): 

90 """ 

91 Bind defined names and other definitions to worksheets or the workbook 

92 """ 

93 

94 for idx, names in self.defined_names.by_sheet().items(): 

95 if idx == "global": 

96 self.wb.defined_names = names 

97 continue 

98 

99 try: 

100 sheet = self.wb._sheets[idx] 

101 except IndexError: 

102 warn(f"Defined names for sheet index {idx} cannot be located") 

103 continue 

104 

105 for name, defn in names.items(): 

106 reserved = defn.is_reserved 

107 if reserved is None: 

108 sheet.defined_names[name] = defn 

109 

110 elif reserved == "Print_Titles": 

111 titles = PrintTitles.from_string(defn.value) 

112 sheet._print_rows = titles.rows 

113 sheet._print_cols = titles.cols 

114 elif reserved == "Print_Area": 

115 try: 

116 sheet._print_area = PrintArea.from_string(defn.value) 

117 except TypeError: 

118 warn(f"Print area cannot be set to Defined name: {defn.value}.") 

119 continue 

120 

121 @property 

122 def pivot_caches(self): 

123 """ 

124 Get PivotCache objects 

125 """ 

126 d = {} 

127 for c in self.caches: 

128 cache = get_rel(self.archive, self.rels, id=c.id, cls=CacheDefinition) 

129 if cache.deps: 

130 records = get_rel(self.archive, cache.deps, cache.id, RecordList) 

131 cache.records = records 

132 d[c.cacheId] = cache 

133 return d