/src/tidy-html5/src/mappedio.c
Line | Count | Source |
1 | | /* Interface to mmap style I/O |
2 | | |
3 | | (c) 2006-2008 (W3C) MIT, ERCIM, Keio University |
4 | | See tidy.h for the copyright notice. |
5 | | |
6 | | Originally contributed by Cory Nelson and Nuno Lopes |
7 | | |
8 | | */ |
9 | | |
10 | | /* keep these here to keep file non-empty */ |
11 | | #include "forward.h" |
12 | | #include "mappedio.h" |
13 | | |
14 | | #if SUPPORT_POSIX_MAPPED_FILES |
15 | | |
16 | | #include "fileio.h" |
17 | | |
18 | | #include <sys/types.h> |
19 | | #include <sys/stat.h> |
20 | | #include <unistd.h> |
21 | | #include <stdio.h> |
22 | | |
23 | | #include <sys/mman.h> |
24 | | |
25 | | |
26 | | typedef struct |
27 | | { |
28 | | TidyAllocator *allocator; |
29 | | const byte *base; |
30 | | size_t pos, size; |
31 | | } MappedFileSource; |
32 | | |
33 | | static int TIDY_CALL mapped_getByte( void* sourceData ) |
34 | 0 | { |
35 | 0 | MappedFileSource* fin = (MappedFileSource*) sourceData; |
36 | 0 | return fin->base[fin->pos++]; |
37 | 0 | } |
38 | | |
39 | | static Bool TIDY_CALL mapped_eof( void* sourceData ) |
40 | 0 | { |
41 | 0 | MappedFileSource* fin = (MappedFileSource*) sourceData; |
42 | 0 | return (fin->pos >= fin->size); |
43 | 0 | } |
44 | | |
45 | | static void TIDY_CALL mapped_ungetByte( void* sourceData, byte ARG_UNUSED(bv) ) |
46 | 0 | { |
47 | 0 | MappedFileSource* fin = (MappedFileSource*) sourceData; |
48 | 0 | fin->pos--; |
49 | 0 | } |
50 | | |
51 | | int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp ) |
52 | 0 | { |
53 | 0 | MappedFileSource* fin; |
54 | 0 | struct stat sbuf; |
55 | 0 | int fd; |
56 | |
|
57 | 0 | fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) ); |
58 | 0 | if ( !fin ) |
59 | 0 | return -1; |
60 | | |
61 | 0 | fd = fileno(fp); |
62 | 0 | if ( fstat(fd, &sbuf) == -1 |
63 | 0 | || sbuf.st_size == 0 |
64 | 0 | || (fin->base = mmap(0, fin->size = sbuf.st_size, PROT_READ, |
65 | 0 | MAP_SHARED, fd, 0)) == MAP_FAILED) |
66 | 0 | { |
67 | 0 | TidyFree( allocator, fin ); |
68 | | /* Fallback on standard I/O */ |
69 | 0 | return TY_(initStdIOFileSource)( allocator, inp, fp ); |
70 | 0 | } |
71 | | |
72 | 0 | fin->pos = 0; |
73 | 0 | fin->allocator = allocator; |
74 | 0 | fclose(fp); |
75 | |
|
76 | 0 | inp->getByte = mapped_getByte; |
77 | 0 | inp->eof = mapped_eof; |
78 | 0 | inp->ungetByte = mapped_ungetByte; |
79 | 0 | inp->sourceData = fin; |
80 | |
|
81 | 0 | return 0; |
82 | 0 | } |
83 | | |
84 | | void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt ) |
85 | 0 | { |
86 | 0 | if ( inp->getByte == mapped_getByte ) |
87 | 0 | { |
88 | 0 | MappedFileSource* fin = (MappedFileSource*) inp->sourceData; |
89 | 0 | munmap( (void*)fin->base, fin->size ); |
90 | 0 | TidyFree( fin->allocator, fin ); |
91 | 0 | } |
92 | 0 | else |
93 | 0 | TY_(freeStdIOFileSource)( inp, closeIt ); |
94 | 0 | } |
95 | | |
96 | | #endif /* SUPPORT_POSIX_MAPPED_FILES */ |
97 | | |
98 | | |
99 | | #if defined(_WIN32) |
100 | | # if defined(_MSC_VER) && (_MSC_VER < 1300) /* less than msvc++ 7.0 */ |
101 | | # pragma warning(disable:4115) /* named type definition in parentheses in windows headers */ |
102 | | # endif |
103 | | # include <windows.h> |
104 | | # include <errno.h> |
105 | | # include "streamio.h" |
106 | | # include "tidy-int.h" |
107 | | # include "message.h" |
108 | | |
109 | | typedef struct _fp_input_mapped_source |
110 | | { |
111 | | TidyAllocator *allocator; |
112 | | LONGLONG size, pos; |
113 | | HANDLE file, map; |
114 | | byte *view, *iter, *end; |
115 | | unsigned int gran; |
116 | | } MappedFileSource; |
117 | | |
118 | | static int mapped_openView( MappedFileSource *data ) |
119 | | { |
120 | | DWORD numb = ( ( data->size - data->pos ) > data->gran ) ? |
121 | | data->gran : (DWORD)( data->size - data->pos ); |
122 | | |
123 | | if ( data->view ) |
124 | | { |
125 | | UnmapViewOfFile( data->view ); |
126 | | data->view = NULL; |
127 | | } |
128 | | |
129 | | data->view = MapViewOfFile( data->map, FILE_MAP_READ, |
130 | | (DWORD)( data->pos >> 32 ), |
131 | | (DWORD)data->pos, numb ); |
132 | | |
133 | | if ( !data->view ) return -1; |
134 | | |
135 | | data->iter = data->view; |
136 | | data->end = data->iter + numb; |
137 | | |
138 | | return 0; |
139 | | } |
140 | | |
141 | | static int TIDY_CALL mapped_getByte( void *sourceData ) |
142 | | { |
143 | | MappedFileSource *data = sourceData; |
144 | | |
145 | | if ( !data->view || data->iter >= data->end ) |
146 | | { |
147 | | data->pos += data->gran; |
148 | | |
149 | | if ( data->pos >= data->size || mapped_openView(data) != 0 ) |
150 | | return EndOfStream; |
151 | | } |
152 | | |
153 | | return *( data->iter++ ); |
154 | | } |
155 | | |
156 | | static Bool TIDY_CALL mapped_eof( void *sourceData ) |
157 | | { |
158 | | MappedFileSource *data = sourceData; |
159 | | return ( data->pos >= data->size ); |
160 | | } |
161 | | |
162 | | static void TIDY_CALL mapped_ungetByte( void *sourceData, byte ARG_UNUSED(bt) ) |
163 | | { |
164 | | MappedFileSource *data = sourceData; |
165 | | |
166 | | if ( data->iter >= data->view ) |
167 | | { |
168 | | --data->iter; |
169 | | return; |
170 | | } |
171 | | |
172 | | if ( data->pos < data->gran ) |
173 | | { |
174 | | assert(0); |
175 | | return; |
176 | | } |
177 | | |
178 | | data->pos -= data->gran; |
179 | | mapped_openView( data ); |
180 | | } |
181 | | |
182 | | static int initMappedFileSource( TidyAllocator *allocator, TidyInputSource* inp, HANDLE fp ) |
183 | | { |
184 | | MappedFileSource* fin = NULL; |
185 | | |
186 | | inp->getByte = mapped_getByte; |
187 | | inp->eof = mapped_eof; |
188 | | inp->ungetByte = mapped_ungetByte; |
189 | | |
190 | | fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) ); |
191 | | if ( !fin ) |
192 | | return -1; |
193 | | |
194 | | # if defined(__MINGW32__) |
195 | | { |
196 | | DWORD lowVal, highVal; |
197 | | lowVal = GetFileSize(fp, &highVal); |
198 | | if ((lowVal == INVALID_FILE_SIZE) && (GetLastError() != NO_ERROR)) |
199 | | { |
200 | | TidyFree(allocator, fin); |
201 | | return -1; |
202 | | } |
203 | | fin->size = highVal; |
204 | | fin->size = (fin->size << 32); |
205 | | fin->size += lowVal; |
206 | | } |
207 | | # else /* NOT a MinGW build */ |
208 | | # if defined(_MSC_VER) && (_MSC_VER < 1300) /* less than msvc++ 7.0 */ |
209 | | { |
210 | | LARGE_INTEGER* pli = (LARGE_INTEGER *)&fin->size; |
211 | | (DWORD)pli->LowPart = GetFileSize( fp, (DWORD *)&pli->HighPart ); |
212 | | if ( GetLastError() != NO_ERROR || fin->size <= 0 ) |
213 | | { |
214 | | TidyFree(allocator, fin); |
215 | | return -1; |
216 | | } |
217 | | } |
218 | | # else |
219 | | if ( !GetFileSizeEx( fp, (LARGE_INTEGER*)&fin->size ) |
220 | | || fin->size <= 0 ) |
221 | | { |
222 | | TidyFree(allocator, fin); |
223 | | return -1; |
224 | | } |
225 | | # endif |
226 | | # endif /* MinGW y/n */ |
227 | | |
228 | | fin->map = CreateFileMapping( fp, NULL, PAGE_READONLY, 0, 0, NULL ); |
229 | | |
230 | | if ( !fin->map ) |
231 | | { |
232 | | TidyFree(allocator, fin); |
233 | | return -1; |
234 | | } |
235 | | |
236 | | { |
237 | | SYSTEM_INFO info; |
238 | | GetSystemInfo( &info ); |
239 | | fin->gran = info.dwAllocationGranularity; |
240 | | } |
241 | | |
242 | | fin->allocator = allocator; |
243 | | fin->pos = 0; |
244 | | fin->view = NULL; |
245 | | fin->iter = NULL; |
246 | | fin->end = NULL; |
247 | | |
248 | | if ( mapped_openView( fin ) != 0 ) |
249 | | { |
250 | | CloseHandle( fin->map ); |
251 | | TidyFree( allocator, fin ); |
252 | | return -1; |
253 | | } |
254 | | |
255 | | fin->file = fp; |
256 | | inp->sourceData = fin; |
257 | | |
258 | | return 0; |
259 | | } |
260 | | |
261 | | static void freeMappedFileSource( TidyInputSource* inp, Bool closeIt ) |
262 | | { |
263 | | MappedFileSource* fin = (MappedFileSource*) inp->sourceData; |
264 | | if ( closeIt && fin && fin->file != INVALID_HANDLE_VALUE ) |
265 | | { |
266 | | if ( fin->view ) |
267 | | UnmapViewOfFile( fin->view ); |
268 | | |
269 | | CloseHandle( fin->map ); |
270 | | CloseHandle( fin->file ); |
271 | | } |
272 | | TidyFree( fin->allocator, fin ); |
273 | | } |
274 | | |
275 | | StreamIn* MappedFileInput ( TidyDocImpl* doc, HANDLE fp, int encoding ) |
276 | | { |
277 | | StreamIn *in = TY_(initStreamIn)( doc, encoding ); |
278 | | if ( initMappedFileSource( doc->allocator, &in->source, fp ) != 0 ) |
279 | | { |
280 | | TY_(freeStreamIn)( in ); |
281 | | return NULL; |
282 | | } |
283 | | in->iotype = FileIO; |
284 | | return in; |
285 | | } |
286 | | |
287 | | |
288 | | int TY_(DocParseFileWithMappedFile)( TidyDocImpl* doc, ctmbstr filnam ) { |
289 | | int status = -ENOENT; |
290 | | HANDLE fin = CreateFileA( filnam, GENERIC_READ, FILE_SHARE_READ, NULL, |
291 | | OPEN_EXISTING, 0, NULL ); |
292 | | |
293 | | # if PRESERVE_FILE_TIMES |
294 | | LONGLONG actime, modtime; |
295 | | TidyClearMemory( &doc->filetimes, sizeof(doc->filetimes) ); |
296 | | |
297 | | if ( fin != INVALID_HANDLE_VALUE && cfgBool(doc,TidyKeepFileTimes) && |
298 | | GetFileTime(fin, NULL, (FILETIME*)&actime, (FILETIME*)&modtime) ) |
299 | | { |
300 | | # define TY_I64(str) TYDYAPPEND(str,LL) |
301 | | # if _MSC_VER < 1300 && !defined(__GNUC__) /* less than msvc++ 7.0 */ |
302 | | # undef TY_I64 |
303 | | # define TY_I64(str) TYDYAPPEND(str,i64) |
304 | | # endif |
305 | | doc->filetimes.actime = |
306 | | (time_t)( ( actime - TY_I64(116444736000000000)) / 10000000 ); |
307 | | |
308 | | doc->filetimes.modtime = |
309 | | (time_t)( ( modtime - TY_I64(116444736000000000)) / 10000000 ); |
310 | | } |
311 | | # endif /* PRESERVE_FILE_TIMES */ |
312 | | |
313 | | if ( fin != INVALID_HANDLE_VALUE ) |
314 | | { |
315 | | StreamIn* in = MappedFileInput( doc, fin, |
316 | | cfg( doc, TidyInCharEncoding ) ); |
317 | | if ( !in ) |
318 | | { |
319 | | CloseHandle( fin ); |
320 | | return -ENOMEM; |
321 | | } |
322 | | |
323 | | status = TY_(DocParseStream)( doc, in ); |
324 | | freeMappedFileSource( &in->source, yes ); |
325 | | TY_(freeStreamIn)( in ); |
326 | | } |
327 | | else /* Error message! */ |
328 | | TY_(ReportFileError)( doc, filnam, FILE_CANT_OPEN ); |
329 | | return status; |
330 | | } |
331 | | |
332 | | #endif /* defined(_WIN32) */ |
333 | | |