Face3d
stb_image.h
Go to the documentation of this file.
1 /* stb_image - v2.02 - public domain image loader - http://nothings.org/stb_image.h
2 no warranty implied; use at your own risk
3 
4 Do this:
5 #define STB_IMAGE_IMPLEMENTATION
6 before you include this file in *one* C or C++ file to create the implementation.
7 
8 // i.e. it should look like this:
9 #include ...
10 #include ...
11 #include ...
12 #define STB_IMAGE_IMPLEMENTATION
13 #include "stb_image.h"
14 
15 You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
16 And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
17 
18 
19 QUICK NOTES:
20 Primarily of interest to game developers and other people who can
21 avoid problematic images and only need the trivial interface
22 
23 JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
24 PNG 1/2/4/8-bit-per-channel (16 bpc not supported)
25 
26 TGA (not sure what subset, if a subset)
27 BMP non-1bpp, non-RLE
28 PSD (composited view only, no extra channels)
29 
30 GIF (*comp always reports as 4-channel)
31 HDR (radiance rgbE format)
32 PIC (Softimage PIC)
33 PNM (PPM and PGM binary only)
34 
35 - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
36 - decode from arbitrary I/O callbacks
37 - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
38 
39 Full documentation under "DOCUMENTATION" below.
40 
41 
42 Revision 2.00 release notes:
43 
44 - Progressive JPEG is now supported.
45 
46 - PPM and PGM binary formats are now supported, thanks to Ken Miller.
47 
48 - x86 platforms now make use of SSE2 SIMD instructions for
49 JPEG decoding, and ARM platforms can use NEON SIMD if requested.
50 This work was done by Fabian "ryg" Giesen. SSE2 is used by
51 default, but NEON must be enabled explicitly; see docs.
52 
53 With other JPEG optimizations included in this version, we see
54 2x speedup on a JPEG on an x86 machine, and a 1.5x speedup
55 on a JPEG on an ARM machine, relative to previous versions of this
56 library. The same results will not obtain for all JPGs and for all
57 x86/ARM machines. (Note that progressive JPEGs are significantly
58 slower to decode than regular JPEGs.) This doesn't mean that this
59 is the fastest JPEG decoder in the land; rather, it brings it
60 closer to parity with standard libraries. If you want the fastest
61 decode, look elsewhere. (See "Philosophy" section of docs below.)
62 
63 See final bullet items below for more info on SIMD.
64 
65 - Added STBI_MALLOC, STBI_REALLOC, and STBI_FREE macros for replacing
66 the memory allocator. Unlike other STBI libraries, these macros don't
67 support a context parameter, so if you need to pass a context in to
68 the allocator, you'll have to store it in a global or a thread-local
69 variable.
70 
71 - Split existing STBI_NO_HDR flag into two flags, STBI_NO_HDR and
72 STBI_NO_LINEAR.
73 STBI_NO_HDR: suppress implementation of .hdr reader format
74 STBI_NO_LINEAR: suppress high-dynamic-range light-linear float API
75 
76 - You can suppress implementation of any of the decoders to reduce
77 your code footprint by #defining one or more of the following
78 symbols before creating the implementation.
79 
80 STBI_NO_JPEG
81 STBI_NO_PNG
82 STBI_NO_BMP
83 STBI_NO_PSD
84 STBI_NO_TGA
85 STBI_NO_GIF
86 STBI_NO_HDR
87 STBI_NO_PIC
88 STBI_NO_PNM (.ppm and .pgm)
89 
90 - You can request *only* certain decoders and suppress all other ones
91 (this will be more forward-compatible, as addition of new decoders
92 doesn't require you to disable them explicitly):
93 
94 STBI_ONLY_JPEG
95 STBI_ONLY_PNG
96 STBI_ONLY_BMP
97 STBI_ONLY_PSD
98 STBI_ONLY_TGA
99 STBI_ONLY_GIF
100 STBI_ONLY_HDR
101 STBI_ONLY_PIC
102 STBI_ONLY_PNM (.ppm and .pgm)
103 
104 Note that you can define multiples of these, and you will get all
105 of them ("only x" and "only y" is interpreted to mean "only x&y").
106 
107 - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
108 want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
109 
110 - Compilation of all SIMD code can be suppressed with
111 #define STBI_NO_SIMD
112 It should not be necessary to disable SIMD unless you have issues
113 compiling (e.g. using an x86 compiler which doesn't support SSE
114 intrinsics or that doesn't support the method used to detect
115 SSE2 support at run-time), and even those can be reported as
116 bugs so I can refine the built-in compile-time checking to be
117 smarter.
118 
119 - The old STBI_SIMD system which allowed installing a user-defined
120 IDCT etc. has been removed. If you need this, don't upgrade. My
121 assumption is that almost nobody was doing this, and those who
122 were will find the built-in SIMD more satisfactory anyway.
123 
124 - RGB values computed for JPEG images are slightly different from
125 previous versions of stb_image. (This is due to using less
126 integer precision in SIMD.) The C code has been adjusted so
127 that the same RGB values will be computed regardless of whether
128 SIMD support is available, so your app should always produce
129 consistent results. But these results are slightly different from
130 previous versions. (Specifically, about 3% of available YCbCr values
131 will compute different RGB results from pre-1.49 versions by +-1;
132 most of the deviating values are one smaller in the G channel.)
133 
134 - If you must produce consistent results with previous versions of
135 stb_image, #define STBI_JPEG_OLD and you will get the same results
136 you used to; however, you will not get the SIMD speedups for
137 the YCbCr-to-RGB conversion step (although you should still see
138 significant JPEG speedup from the other changes).
139 
140 Please note that STBI_JPEG_OLD is a temporary feature; it will be
141 removed in future versions of the library. It is only intended for
142 near-term back-compatibility use.
143 
144 
145 Latest revision history:
146 2.02 (2015-01-19) fix incorrect assert, fix warning
147 2.01 (2015-01-17) fix various warnings
148 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
149 2.00 (2014-12-25) optimize JPEG, including x86 SSE2 & ARM NEON SIMD
150 progressive JPEG
151 PGM/PPM support
152 STBI_MALLOC,STBI_REALLOC,STBI_FREE
153 STBI_NO_*, STBI_ONLY_*
154 GIF bugfix
155 1.48 (2014-12-14) fix incorrectly-named assert()
156 1.47 (2014-12-14) 1/2/4-bit PNG support (both grayscale and paletted)
157 optimize PNG
158 fix bug in interlaced PNG with user-specified channel count
159 1.46 (2014-08-26) fix broken tRNS chunk in non-paletted PNG
160 1.45 (2014-08-16) workaround MSVC-ARM internal compiler error by wrapping malloc
161 
162 See end of file for full revision history.
163 
164 
165 ============================ Contributors =========================
166 
167 Image formats Bug fixes & warning fixes
168 Sean Barrett (jpeg, png, bmp) Marc LeBlanc
169 Nicolas Schulz (hdr, psd) Christpher Lloyd
170 Jonathan Dummer (tga) Dave Moore
171 Jean-Marc Lienher (gif) Won Chun
172 Tom Seddon (pic) the Horde3D community
173 Thatcher Ulrich (psd) Janez Zemva
174 Ken Miller (pgm, ppm) Jonathan Blow
175 Laurent Gomila
176 Aruelien Pocheville
177 Extensions, features Ryamond Barbiero
178 Jetro Lauha (stbi_info) David Woo
179 Martin "SpartanJ" Golini (stbi_info) Martin Golini
180 James "moose2000" Brown (iPhone PNG) Roy Eltham
181 Ben "Disch" Wenger (io callbacks) Luke Graham
182 Omar Cornut (1/2/4-bit PNG) Thomas Ruf
183 John Bartholomew
184 Ken Hamada
185 Optimizations & bugfixes Cort Stratton
186 Fabian "ryg" Giesen Blazej Dariusz Roszkowski
187 Arseny Kapoulkine Thibault Reuille
188 Paul Du Bois
189 Guillaume George
190 If your name should be here but Jerry Jansson
191 isn't, let Sean know. Hayaki Saito
192 Johan Duparc
193 Ronny Chevalier
194 Michal Cichon
195 Tero Hanninen
196 Sergio Gonzalez
197 Cass Everitt
198 Engin Manap
199 
200 License:
201 This software is in the public domain. Where that dedication is not
202 recognized, you are granted a perpetual, irrevocable license to copy
203 and modify this file however you want.
204 
205 */
206 
207 #ifndef STBI_INCLUDE_STB_IMAGE_H
208 #define STBI_INCLUDE_STB_IMAGE_H
209 
210 // DOCUMENTATION
211 //
212 // Limitations:
213 // - no 16-bit-per-channel PNG
214 // - no 12-bit-per-channel JPEG
215 // - no JPEGs with arithmetic coding
216 // - no 1-bit BMP
217 // - GIF always returns *comp=4
218 //
219 // Basic usage (see HDR discussion below for HDR usage):
220 // int x,y,n;
221 // unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
222 // // ... process data if not NULL ...
223 // // ... x = width, y = height, n = # 8-bit components per pixel ...
224 // // ... replace '0' with '1'..'4' to force that many components per pixel
225 // // ... but 'n' will always be the number that it would have been if you said 0
226 // stbi_image_free(data)
227 //
228 // Standard parameters:
229 // int *x -- outputs image width in pixels
230 // int *y -- outputs image height in pixels
231 // int *comp -- outputs # of image components in image file
232 // int req_comp -- if non-zero, # of image components requested in result
233 //
234 // The return value from an image loader is an 'unsigned char *' which points
235 // to the pixel data, or NULL on an allocation failure or if the image is
236 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
237 // with each pixel consisting of N interleaved 8-bit components; the first
238 // pixel pointed to is top-left-most in the image. There is no padding between
239 // image scanlines or between pixels, regardless of format. The number of
240 // components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
241 // If req_comp is non-zero, *comp has the number of components that _would_
242 // have been output otherwise. E.g. if you set req_comp to 4, you will always
243 // get RGBA output, but you can check *comp to see if it's trivially opaque
244 // because e.g. there were only 3 channels in the source image.
245 //
246 // An output image with N components has the following components interleaved
247 // in this order in each pixel:
248 //
249 // N=#comp components
250 // 1 grey
251 // 2 grey, alpha
252 // 3 red, green, blue
253 // 4 red, green, blue, alpha
254 //
255 // If image loading fails for any reason, the return value will be NULL,
256 // and *x, *y, *comp will be unchanged. The function stbi_failure_reason()
257 // can be queried for an extremely brief, end-user unfriendly explanation
258 // of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid
259 // compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
260 // more user-friendly ones.
261 //
262 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
263 //
264 // ===========================================================================
265 //
266 // Philosophy
267 //
268 // stb libraries are designed with the following priorities:
269 //
270 // 1. easy to use
271 // 2. easy to maintain
272 // 3. good performance
273 //
274 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
275 // and for best performance I may provide less-easy-to-use APIs that give higher
276 // performance, in addition to the easy to use ones. Nevertheless, it's important
277 // to keep in mind that from the standpoint of you, a client of this library,
278 // all you care about is #1 and #3, and stb libraries do not emphasize #3 above all.
279 //
280 // Some secondary priorities arise directly from the first two, some of which
281 // make more explicit reasons why performance can't be emphasized.
282 //
283 // - Portable ("ease of use")
284 // - Small footprint ("easy to maintain")
285 // - No dependencies ("ease of use")
286 //
287 // ===========================================================================
288 //
289 // I/O callbacks
290 //
291 // I/O callbacks allow you to read from arbitrary sources, like packaged
292 // files or some other source. Data read from callbacks are processed
293 // through a small internal buffer (currently 128 bytes) to try to reduce
294 // overhead.
295 //
296 // The three functions you must define are "read" (reads some bytes of data),
297 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
298 //
299 // ===========================================================================
300 //
301 // SIMD support
302 //
303 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
304 // supported by the compiler. For ARM Neon support, you must explicitly
305 // request it.
306 //
307 // (The old do-it-yourself SIMD API is no longer supported in the current
308 // code.)
309 //
310 // On x86, SSE2 will automatically be used when available based on a run-time
311 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
312 // the typical path is to have separate builds for NEON and non-NEON devices
313 // (at least this is true for iOS and Android). Therefore, the NEON support is
314 // toggled by a build flag: define STBI_NEON to get NEON loops.
315 //
316 // The output of the JPEG decoder is slightly different from versions where
317 // SIMD support was introduced (that is, for versions before 1.49). The
318 // difference is only +-1 in the 8-bit RGB channels, and only on a small
319 // fraction of pixels. You can force the pre-1.49 behavior by defining
320 // STBI_JPEG_OLD, but this will disable some of the SIMD decoding path
321 // and hence cost some performance.
322 //
323 // If for some reason you do not want to use any of SIMD code, or if
324 // you have issues compiling it, you can disable it entirely by
325 // defining STBI_NO_SIMD.
326 //
327 // ===========================================================================
328 //
329 // HDR image support (disable by defining STBI_NO_HDR)
330 //
331 // stb_image now supports loading HDR images in general, and currently
332 // the Radiance .HDR file format, although the support is provided
333 // generically. You can still load any file through the existing interface;
334 // if you attempt to load an HDR file, it will be automatically remapped to
335 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
336 // both of these constants can be reconfigured through this interface:
337 //
338 // stbi_hdr_to_ldr_gamma(2.2f);
339 // stbi_hdr_to_ldr_scale(1.0f);
340 //
341 // (note, do not use _inverse_ constants; stbi_image will invert them
342 // appropriately).
343 //
344 // Additionally, there is a new, parallel interface for loading files as
345 // (linear) floats to preserve the full dynamic range:
346 //
347 // float *data = stbi_loadf(filename, &x, &y, &n, 0);
348 //
349 // If you load LDR images through this interface, those images will
350 // be promoted to floating point values, run through the inverse of
351 // constants corresponding to the above:
352 //
353 // stbi_ldr_to_hdr_scale(1.0f);
354 // stbi_ldr_to_hdr_gamma(2.2f);
355 //
356 // Finally, given a filename (or an open file or memory block--see header
357 // file for details) containing image data, you can query for the "most
358 // appropriate" interface to use (that is, whether the image is HDR or
359 // not), using:
360 //
361 // stbi_is_hdr(char *filename);
362 //
363 // ===========================================================================
364 //
365 // iPhone PNG support:
366 //
367 // By default we convert iphone-formatted PNGs back to RGB, even though
368 // they are internally encoded differently. You can disable this conversion
369 // by by calling stbi_convert_iphone_png_to_rgb(0), in which case
370 // you will always just get the native iphone "format" through (which
371 // is BGR stored in RGB).
372 //
373 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
374 // pixel to remove any premultiplied alpha *only* if the image file explicitly
375 // says there's premultiplied data (currently only happens in iPhone images,
376 // and only if iPhone convert-to-rgb processing is on).
377 //
378 
379 
380 #ifndef STBI_NO_STDIO
381 #include <stdio.h>
382 #endif // STBI_NO_STDIO
383 
384 #define STBI_VERSION 1
385 
386 enum
387 {
388  STBI_default = 0, // only used for req_comp
389 
392  STBI_rgb = 3,
394 };
395 
396 typedef unsigned char stbi_uc;
397 
398 #ifdef __cplusplus
399 extern "C" {
400 #endif
401 
402 #ifdef STB_IMAGE_STATIC
403 #define STBIDEF static
404 #else
405 #define STBIDEF extern
406 #endif
407 
409  //
410  // PRIMARY API - works on images of any type
411  //
412 
413  //
414  // load image by filename, open file, or memory buffer
415  //
416 
417  typedef struct
418  {
419  int(*read) (void *user, char *data, int size); // fill 'data' with 'size' bytes. return number of bytes actually read
420  void(*skip) (void *user, int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
421  int(*eof) (void *user); // returns nonzero if we are at end of file/data
423 
424  STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp);
425  STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
426  STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp);
427 
428 #ifndef STBI_NO_STDIO
429  STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp);
430  // for stbi_load_from_file, file pointer is left pointing immediately after image
431 #endif
432 
433 #ifndef STBI_NO_LINEAR
434  STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp);
435  STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
436  STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp);
437 
438 #ifndef STBI_NO_STDIO
439  STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp);
440 #endif
441 #endif
442 
443 #ifndef STBI_NO_HDR
444  STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);
445  STBIDEF void stbi_hdr_to_ldr_scale(float scale);
446 #endif
447 
448 #ifndef STBI_NO_LINEAR
449  STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);
450  STBIDEF void stbi_ldr_to_hdr_scale(float scale);
451 #endif // STBI_NO_HDR
452 
453  // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
454  STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
455  STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
456 #ifndef STBI_NO_STDIO
457  STBIDEF int stbi_is_hdr(char const *filename);
458  STBIDEF int stbi_is_hdr_from_file(FILE *f);
459 #endif // STBI_NO_STDIO
460 
461 
462  // get a VERY brief reason for failure
463  // NOT THREADSAFE
464  STBIDEF const char *stbi_failure_reason(void);
465 
466  // free the loaded image -- this is just free()
467  STBIDEF void stbi_image_free(void *retval_from_stbi_load);
468 
469  // get image dimensions & components without fully decoding
470  STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
471  STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
472 
473 #ifndef STBI_NO_STDIO
474  STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp);
475  STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp);
476 
477 #endif
478 
479 
480 
481  // for image formats that explicitly notate that they have premultiplied alpha,
482  // we just return the colors as stored in the file. set this flag to force
483  // unpremultiplication. results are undefined if the unpremultiply overflow.
484  STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
485 
486  // indicate whether we should process iphone images back to canonical format,
487  // or just pass them through "as-is"
488  STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
489 
490 
491  // ZLIB client - used by PNG, available for other purposes
492 
493  STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
494  STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
495  STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
496  STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
497 
498  STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
499  STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
500 
501 
502 #ifdef __cplusplus
503 }
504 #endif
505 
506 //
507 //
509 #endif // STBI_INCLUDE_STB_IMAGE_H
510 
511 #ifdef STB_IMAGE_IMPLEMENTATION
512 
513 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
514  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
515  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
516  || defined(STBI_ONLY_ZLIB)
517 #ifndef STBI_ONLY_JPEG
518 #define STBI_NO_JPEG
519 #endif
520 #ifndef STBI_ONLY_PNG
521 #define STBI_NO_PNG
522 #endif
523 #ifndef STBI_ONLY_BMP
524 #define STBI_NO_BMP
525 #endif
526 #ifndef STBI_ONLY_PSD
527 #define STBI_NO_PSD
528 #endif
529 #ifndef STBI_ONLY_TGA
530 #define STBI_NO_TGA
531 #endif
532 #ifndef STBI_ONLY_GIF
533 #define STBI_NO_GIF
534 #endif
535 #ifndef STBI_ONLY_HDR
536 #define STBI_NO_HDR
537 #endif
538 #ifndef STBI_ONLY_PIC
539 #define STBI_NO_PIC
540 #endif
541 #ifndef STBI_ONLY_PNM
542 #define STBI_NO_PNM
543 #endif
544 #endif
545 
546 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
547 #define STBI_NO_ZLIB
548 #endif
549 
550 
551 #include <stdarg.h>
552 #include <stddef.h> // ptrdiff_t on osx
553 #include <stdlib.h>
554 #include <string.h>
555 
556 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
557 #include <math.h> // ldexp
558 #endif
559 
560 #ifndef STBI_NO_STDIO
561 #include <stdio.h>
562 #endif
563 
564 #ifndef STBI_ASSERT
565 #include <assert.h>
566 #define STBI_ASSERT(x) assert(x)
567 #endif
568 
569 
570 #ifndef _MSC_VER
571 #ifdef __cplusplus
572 #define stbi_inline inline
573 #else
574 #define stbi_inline
575 #endif
576 #else
577 #define stbi_inline __forceinline
578 #endif
579 
580 
581 #ifdef _MSC_VER
582 typedef unsigned short stbi__uint16;
583 typedef signed short stbi__int16;
584 typedef unsigned int stbi__uint32;
585 typedef signed int stbi__int32;
586 #else
587 #include <stdint.h>
588 typedef uint16_t stbi__uint16;
589 typedef int16_t stbi__int16;
590 typedef uint32_t stbi__uint32;
591 typedef int32_t stbi__int32;
592 #endif
593 
594 // should produce compiler error if size is wrong
595 typedef unsigned char validate_uint32[sizeof(stbi__uint32) == 4 ? 1 : -1];
596 
597 #ifdef _MSC_VER
598 #define STBI_NOTUSED(v) (void)(v)
599 #else
600 #define STBI_NOTUSED(v) (void)sizeof(v)
601 #endif
602 
603 #ifdef _MSC_VER
604 #define STBI_HAS_LROTL
605 #endif
606 
607 #ifdef STBI_HAS_LROTL
608 #define stbi_lrot(x,y) _lrotl(x,y)
609 #else
610 #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y))))
611 #endif
612 
613 #if defined(STBI_MALLOC) && defined(STBI_FREE) && defined(STBI_REALLOC)
614 // ok
615 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC)
616 // ok
617 #else
618 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC."
619 #endif
620 
621 #ifndef STBI_MALLOC
622 #define STBI_MALLOC(sz) malloc(sz)
623 #define STBI_REALLOC(p,sz) realloc(p,sz)
624 #define STBI_FREE(p) free(p)
625 #endif
626 
627 #if defined(__GNUC__) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
628 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
629 // (but compiling with -msse2 allows the compiler to use SSE2 everywhere;
630 // this is just broken and gcc are jerks for not fixing it properly
631 // http://www.virtualdub.org/blog/pivot/entry.php?id=363 )
632 #define STBI_NO_SIMD
633 #endif
634 
635 #if !defined(STBI_NO_SIMD) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86))
636 #define STBI_SSE2
637 #include <emmintrin.h>
638 
639 #ifdef _MSC_VER
640 
641 #if _MSC_VER >= 1400 // not VC6
642 #include <intrin.h> // __cpuid
643 static int stbi__cpuid3(void)
644 {
645  int info[4];
646  __cpuid(info, 1);
647  return info[3];
648 }
649 #else
650 static int stbi__cpuid3(void)
651 {
652  int res;
653  __asm {
654  mov eax, 1
655  cpuid
656  mov res, edx
657  }
658  return res;
659 }
660 #endif
661 
662 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
663 
664 static int stbi__sse2_available()
665 {
666  int info3 = stbi__cpuid3();
667  return ((info3 >> 26) & 1) != 0;
668 }
669 #else // assume GCC-style if not VC++
670 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
671 
672 static int stbi__sse2_available()
673 {
674 #if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 // GCC 4.8 or later
675  // GCC 4.8+ has a nice way to do this
676  return __builtin_cpu_supports("sse2");
677 #else
678  // portable way to do this, preferably without using GCC inline ASM?
679  // just bail for now.
680  return 0;
681 #endif
682 }
683 #endif
684 #endif
685 
686 // ARM NEON
687 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
688 #undef STBI_NEON
689 #endif
690 
691 #ifdef STBI_NEON
692 #include <arm_neon.h>
693 // assume GCC or Clang on ARM targets
694 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
695 #endif
696 
697 #ifndef STBI_SIMD_ALIGN
698 #define STBI_SIMD_ALIGN(type, name) type name
699 #endif
700 
702 //
703 // stbi__context struct and start_xxx functions
704 
705 // stbi__context structure is our basic context used by all images, so it
706 // contains all the IO context, plus some basic image information
707 typedef struct
708 {
709  stbi__uint32 img_x, img_y;
710  int img_n, img_out_n;
711 
713  void *io_user_data;
714 
715  int read_from_callbacks;
716  int buflen;
717  stbi_uc buffer_start[128];
718 
719  stbi_uc *img_buffer, *img_buffer_end;
720  stbi_uc *img_buffer_original;
721 } stbi__context;
722 
723 
724 static void stbi__refill_buffer(stbi__context *s);
725 
726 // initialize a memory-decode context
727 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
728 {
729  s->io.read = NULL;
730  s->read_from_callbacks = 0;
731  s->img_buffer = s->img_buffer_original = (stbi_uc *)buffer;
732  s->img_buffer_end = (stbi_uc *)buffer + len;
733 }
734 
735 // initialize a callback-based context
736 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
737 {
738  s->io = *c;
739  s->io_user_data = user;
740  s->buflen = sizeof(s->buffer_start);
741  s->read_from_callbacks = 1;
742  s->img_buffer_original = s->buffer_start;
743  stbi__refill_buffer(s);
744 }
745 
746 #ifndef STBI_NO_STDIO
747 
748 static int stbi__stdio_read(void *user, char *data, int size)
749 {
750  return (int)fread(data, 1, size, (FILE*)user);
751 }
752 
753 static void stbi__stdio_skip(void *user, int n)
754 {
755  fseek((FILE*)user, n, SEEK_CUR);
756 }
757 
758 static int stbi__stdio_eof(void *user)
759 {
760  return feof((FILE*)user);
761 }
762 
763 static stbi_io_callbacks stbi__stdio_callbacks =
764 {
765  stbi__stdio_read,
766  stbi__stdio_skip,
767  stbi__stdio_eof,
768 };
769 
770 static void stbi__start_file(stbi__context *s, FILE *f)
771 {
772  stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *)f);
773 }
774 
775 //static void stop_file(stbi__context *s) { }
776 
777 #endif // !STBI_NO_STDIO
778 
779 static void stbi__rewind(stbi__context *s)
780 {
781  // conceptually rewind SHOULD rewind to the beginning of the stream,
782  // but we just rewind to the beginning of the initial buffer, because
783  // we only use it after doing 'test', which only ever looks at at most 92 bytes
784  s->img_buffer = s->img_buffer_original;
785 }
786 
787 #ifndef STBI_NO_JPEG
788 static int stbi__jpeg_test(stbi__context *s);
789 static stbi_uc *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
790 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
791 #endif
792 
793 #ifndef STBI_NO_PNG
794 static int stbi__png_test(stbi__context *s);
795 static stbi_uc *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
796 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
797 #endif
798 
799 #ifndef STBI_NO_BMP
800 static int stbi__bmp_test(stbi__context *s);
801 static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
802 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
803 #endif
804 
805 #ifndef STBI_NO_TGA
806 static int stbi__tga_test(stbi__context *s);
807 static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
808 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
809 #endif
810 
811 #ifndef STBI_NO_PSD
812 static int stbi__psd_test(stbi__context *s);
813 static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
814 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
815 #endif
816 
817 #ifndef STBI_NO_HDR
818 static int stbi__hdr_test(stbi__context *s);
819 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
820 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
821 #endif
822 
823 #ifndef STBI_NO_PIC
824 static int stbi__pic_test(stbi__context *s);
825 static stbi_uc *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
826 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
827 #endif
828 
829 #ifndef STBI_NO_GIF
830 static int stbi__gif_test(stbi__context *s);
831 static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
832 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
833 #endif
834 
835 #ifndef STBI_NO_PNM
836 static int stbi__pnm_test(stbi__context *s);
837 static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
838 static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
839 #endif
840 
841 // this is not threadsafe
842 static const char *stbi__g_failure_reason;
843 
844 STBIDEF const char *stbi_failure_reason(void)
845 {
846  return stbi__g_failure_reason;
847 }
848 
849 static int stbi__err(const char *str)
850 {
851  stbi__g_failure_reason = str;
852  return 0;
853 }
854 
855 static void *stbi__malloc(size_t size)
856 {
857  return STBI_MALLOC(size);
858 }
859 
860 // stbi__err - error
861 // stbi__errpf - error returning pointer to float
862 // stbi__errpuc - error returning pointer to unsigned char
863 
864 #ifdef STBI_NO_FAILURE_STRINGS
865 #define stbi__err(x,y) 0
866 #elif defined(STBI_FAILURE_USERMSG)
867 #define stbi__err(x,y) stbi__err(y)
868 #else
869 #define stbi__err(x,y) stbi__err(x)
870 #endif
871 
872 #define stbi__errpf(x,y) ((float *) (stbi__err(x,y)?NULL:NULL))
873 #define stbi__errpuc(x,y) ((unsigned char *) (stbi__err(x,y)?NULL:NULL))
874 
875 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
876 {
877  STBI_FREE(retval_from_stbi_load);
878 }
879 
880 #ifndef STBI_NO_LINEAR
881 static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
882 #endif
883 
884 #ifndef STBI_NO_HDR
885 static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp);
886 #endif
887 
888 static unsigned char *stbi_load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
889 {
890 #ifndef STBI_NO_JPEG
891  if (stbi__jpeg_test(s)) return stbi__jpeg_load(s, x, y, comp, req_comp);
892 #endif
893 #ifndef STBI_NO_PNG
894  if (stbi__png_test(s)) return stbi__png_load(s, x, y, comp, req_comp);
895 #endif
896 #ifndef STBI_NO_BMP
897  if (stbi__bmp_test(s)) return stbi__bmp_load(s, x, y, comp, req_comp);
898 #endif
899 #ifndef STBI_NO_GIF
900  if (stbi__gif_test(s)) return stbi__gif_load(s, x, y, comp, req_comp);
901 #endif
902 #ifndef STBI_NO_PSD
903  if (stbi__psd_test(s)) return stbi__psd_load(s, x, y, comp, req_comp);
904 #endif
905 #ifndef STBI_NO_PIC
906  if (stbi__pic_test(s)) return stbi__pic_load(s, x, y, comp, req_comp);
907 #endif
908 #ifndef STBI_NO_PNM
909  if (stbi__pnm_test(s)) return stbi__pnm_load(s, x, y, comp, req_comp);
910 #endif
911 
912 #ifndef STBI_NO_HDR
913  if (stbi__hdr_test(s)) {
914  float *hdr = stbi__hdr_load(s, x, y, comp, req_comp);
915  return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
916  }
917 #endif
918 
919 #ifndef STBI_NO_TGA
920  // test tga last because it's a crappy test!
921  if (stbi__tga_test(s))
922  return stbi__tga_load(s, x, y, comp, req_comp);
923 #endif
924 
925  return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
926 }
927 
928 #ifndef STBI_NO_STDIO
929 
930 static FILE *stbi__fopen(char const *filename, char const *mode)
931 {
932  FILE *f;
933 #if defined(_MSC_VER) && _MSC_VER >= 1400
934  if (0 != fopen_s(&f, filename, mode))
935  f = 0;
936 #else
937  f = fopen(filename, mode);
938 #endif
939  return f;
940 }
941 
942 
943 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
944 {
945  FILE *f = stbi__fopen(filename, "rb");
946  unsigned char *result;
947  if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
948  result = stbi_load_from_file(f, x, y, comp, req_comp);
949  fclose(f);
950  return result;
951 }
952 
953 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
954 {
955  unsigned char *result;
956  stbi__context s;
957  stbi__start_file(&s, f);
958  result = stbi_load_main(&s, x, y, comp, req_comp);
959  if (result) {
960  // need to 'unget' all the characters in the IO buffer
961  fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
962  }
963  return result;
964 }
965 #endif
966 
967 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
968 {
969  stbi__context s;
970  stbi__start_mem(&s, buffer, len);
971  return stbi_load_main(&s, x, y, comp, req_comp);
972 }
973 
974 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
975 {
976  stbi__context s;
977  stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
978  return stbi_load_main(&s, x, y, comp, req_comp);
979 }
980 
981 #ifndef STBI_NO_LINEAR
982 static float *stbi_loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
983 {
984  unsigned char *data;
985 #ifndef STBI_NO_HDR
986  if (stbi__hdr_test(s))
987  return stbi__hdr_load(s, x, y, comp, req_comp);
988 #endif
989  data = stbi_load_main(s, x, y, comp, req_comp);
990  if (data)
991  return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
992  return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
993 }
994 
995 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
996 {
997  stbi__context s;
998  stbi__start_mem(&s, buffer, len);
999  return stbi_loadf_main(&s, x, y, comp, req_comp);
1000 }
1001 
1002 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1003 {
1004  stbi__context s;
1005  stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1006  return stbi_loadf_main(&s, x, y, comp, req_comp);
1007 }
1008 
1009 #ifndef STBI_NO_STDIO
1010 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1011 {
1012  float *result;
1013  FILE *f = stbi__fopen(filename, "rb");
1014  if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1015  result = stbi_loadf_from_file(f, x, y, comp, req_comp);
1016  fclose(f);
1017  return result;
1018 }
1019 
1020 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1021 {
1022  stbi__context s;
1023  stbi__start_file(&s, f);
1024  return stbi_loadf_main(&s, x, y, comp, req_comp);
1025 }
1026 #endif // !STBI_NO_STDIO
1027 
1028 #endif // !STBI_NO_LINEAR
1029 
1030 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1031 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1032 // reports false!
1033 
1034 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1035 {
1036 #ifndef STBI_NO_HDR
1037  stbi__context s;
1038  stbi__start_mem(&s, buffer, len);
1039  return stbi__hdr_test(&s);
1040 #else
1041  STBI_NOTUSED(buffer);
1042  STBI_NOTUSED(len);
1043  return 0;
1044 #endif
1045 }
1046 
1047 #ifndef STBI_NO_STDIO
1048 STBIDEF int stbi_is_hdr(char const *filename)
1049 {
1050  FILE *f = stbi__fopen(filename, "rb");
1051  int result = 0;
1052  if (f) {
1053  result = stbi_is_hdr_from_file(f);
1054  fclose(f);
1055  }
1056  return result;
1057 }
1058 
1059 STBIDEF int stbi_is_hdr_from_file(FILE *f)
1060 {
1061 #ifndef STBI_NO_HDR
1062  stbi__context s;
1063  stbi__start_file(&s, f);
1064  return stbi__hdr_test(&s);
1065 #else
1066  return 0;
1067 #endif
1068 }
1069 #endif // !STBI_NO_STDIO
1070 
1071 STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1072 {
1073 #ifndef STBI_NO_HDR
1074  stbi__context s;
1075  stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1076  return stbi__hdr_test(&s);
1077 #else
1078  return 0;
1079 #endif
1080 }
1081 
1082 static float stbi__h2l_gamma_i = 1.0f / 2.2f, stbi__h2l_scale_i = 1.0f;
1083 static float stbi__l2h_gamma = 2.2f, stbi__l2h_scale = 1.0f;
1084 
1085 #ifndef STBI_NO_LINEAR
1086 STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
1087 STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1088 #endif
1089 
1090 STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1 / gamma; }
1091 STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1 / scale; }
1092 
1093 
1095 //
1096 // Common code used by all image loaders
1097 //
1098 
1099 enum
1100 {
1101  STBI__SCAN_load = 0,
1102  STBI__SCAN_type,
1103  STBI__SCAN_header
1104 };
1105 
1106 static void stbi__refill_buffer(stbi__context *s)
1107 {
1108  int n = (s->io.read)(s->io_user_data, (char*)s->buffer_start, s->buflen);
1109  if (n == 0) {
1110  // at end of file, treat same as if from memory, but need to handle case
1111  // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1112  s->read_from_callbacks = 0;
1113  s->img_buffer = s->buffer_start;
1114  s->img_buffer_end = s->buffer_start + 1;
1115  *s->img_buffer = 0;
1116  }
1117  else {
1118  s->img_buffer = s->buffer_start;
1119  s->img_buffer_end = s->buffer_start + n;
1120  }
1121 }
1122 
1123 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1124 {
1125  if (s->img_buffer < s->img_buffer_end)
1126  return *s->img_buffer++;
1127  if (s->read_from_callbacks) {
1128  stbi__refill_buffer(s);
1129  return *s->img_buffer++;
1130  }
1131  return 0;
1132 }
1133 
1134 stbi_inline static int stbi__at_eof(stbi__context *s)
1135 {
1136  if (s->io.read) {
1137  if (!(s->io.eof)(s->io_user_data)) return 0;
1138  // if feof() is true, check if buffer = end
1139  // special case: we've only got the special 0 character at the end
1140  if (s->read_from_callbacks == 0) return 1;
1141  }
1142 
1143  return s->img_buffer >= s->img_buffer_end;
1144 }
1145 
1146 static void stbi__skip(stbi__context *s, int n)
1147 {
1148  if (s->io.read) {
1149  int blen = (int)(s->img_buffer_end - s->img_buffer);
1150  if (blen < n) {
1151  s->img_buffer = s->img_buffer_end;
1152  (s->io.skip)(s->io_user_data, n - blen);
1153  return;
1154  }
1155  }
1156  s->img_buffer += n;
1157 }
1158 
1159 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1160 {
1161  if (s->io.read) {
1162  int blen = (int)(s->img_buffer_end - s->img_buffer);
1163  if (blen < n) {
1164  int res, count;
1165 
1166  memcpy(buffer, s->img_buffer, blen);
1167 
1168  count = (s->io.read)(s->io_user_data, (char*)buffer + blen, n - blen);
1169  res = (count == (n - blen));
1170  s->img_buffer = s->img_buffer_end;
1171  return res;
1172  }
1173  }
1174 
1175  if (s->img_buffer + n <= s->img_buffer_end) {
1176  memcpy(buffer, s->img_buffer, n);
1177  s->img_buffer += n;
1178  return 1;
1179  }
1180  else
1181  return 0;
1182 }
1183 
1184 static int stbi__get16be(stbi__context *s)
1185 {
1186  int z = stbi__get8(s);
1187  return (z << 8) + stbi__get8(s);
1188 }
1189 
1190 static stbi__uint32 stbi__get32be(stbi__context *s)
1191 {
1192  stbi__uint32 z = stbi__get16be(s);
1193  return (z << 16) + stbi__get16be(s);
1194 }
1195 
1196 static int stbi__get16le(stbi__context *s)
1197 {
1198  int z = stbi__get8(s);
1199  return z + (stbi__get8(s) << 8);
1200 }
1201 
1202 static stbi__uint32 stbi__get32le(stbi__context *s)
1203 {
1204  stbi__uint32 z = stbi__get16le(s);
1205  return z + (stbi__get16le(s) << 16);
1206 }
1207 
1208 #define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings
1209 
1210 
1212 //
1213 // generic converter from built-in img_n to req_comp
1214 // individual types do this automatically as much as possible (e.g. jpeg
1215 // does all cases internally since it needs to colorspace convert anyway,
1216 // and it never has alpha, so very few cases ). png can automatically
1217 // interleave an alpha=255 channel, but falls back to this for other cases
1218 //
1219 // assume data buffer is malloced, so malloc a new one and free that one
1220 // only failure mode is malloc failing
1221 
1222 static stbi_uc stbi__compute_y(int r, int g, int b)
1223 {
1224  return (stbi_uc)(((r * 77) + (g * 150) + (29 * b)) >> 8);
1225 }
1226 
1227 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1228 {
1229  int i, j;
1230  unsigned char *good;
1231 
1232  if (req_comp == img_n) return data;
1233  STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1234 
1235  good = (unsigned char *)stbi__malloc(req_comp * x * y);
1236  if (good == NULL) {
1237  STBI_FREE(data);
1238  return stbi__errpuc("outofmem", "Out of memory");
1239  }
1240 
1241  for (j = 0; j < (int)y; ++j) {
1242  unsigned char *src = data + j * x * img_n;
1243  unsigned char *dest = good + j * x * req_comp;
1244 
1245 #define COMBO(a,b) ((a)*8+(b))
1246 #define CASE(a,b) case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1247  // convert source image with img_n components to one with req_comp components;
1248  // avoid switch per pixel, so use switch per scanline and massive macros
1249  switch (COMBO(img_n, req_comp)) {
1250  CASE(1, 2) dest[0] = src[0], dest[1] = 255; break;
1251  CASE(1, 3) dest[0] = dest[1] = dest[2] = src[0]; break;
1252  CASE(1, 4) dest[0] = dest[1] = dest[2] = src[0], dest[3] = 255; break;
1253  CASE(2, 1) dest[0] = src[0]; break;
1254  CASE(2, 3) dest[0] = dest[1] = dest[2] = src[0]; break;
1255  CASE(2, 4) dest[0] = dest[1] = dest[2] = src[0], dest[3] = src[1]; break;
1256  CASE(3, 4) dest[0] = src[0], dest[1] = src[1], dest[2] = src[2], dest[3] = 255; break;
1257  CASE(3, 1) dest[0] = stbi__compute_y(src[0], src[1], src[2]); break;
1258  CASE(3, 2) dest[0] = stbi__compute_y(src[0], src[1], src[2]), dest[1] = 255; break;
1259  CASE(4, 1) dest[0] = stbi__compute_y(src[0], src[1], src[2]); break;
1260  CASE(4, 2) dest[0] = stbi__compute_y(src[0], src[1], src[2]), dest[1] = src[3]; break;
1261  CASE(4, 3) dest[0] = src[0], dest[1] = src[1], dest[2] = src[2]; break;
1262  default: STBI_ASSERT(0);
1263  }
1264 #undef CASE
1265  }
1266 
1267  STBI_FREE(data);
1268  return good;
1269 }
1270 
1271 #ifndef STBI_NO_LINEAR
1272 static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1273 {
1274  int i, k, n;
1275  float *output = (float *)stbi__malloc(x * y * comp * sizeof(float));
1276  if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1277  // compute number of non-alpha components
1278  if (comp & 1) n = comp; else n = comp - 1;
1279  for (i = 0; i < x*y; ++i) {
1280  for (k = 0; k < n; ++k) {
1281  output[i*comp + k] = (float)(pow(data[i*comp + k] / 255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1282  }
1283  if (k < comp) output[i*comp + k] = data[i*comp + k] / 255.0f;
1284  }
1285  STBI_FREE(data);
1286  return output;
1287 }
1288 #endif
1289 
1290 #ifndef STBI_NO_HDR
1291 #define stbi__float2int(x) ((int) (x))
1292 static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
1293 {
1294  int i, k, n;
1295  stbi_uc *output = (stbi_uc *)stbi__malloc(x * y * comp);
1296  if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1297  // compute number of non-alpha components
1298  if (comp & 1) n = comp; else n = comp - 1;
1299  for (i = 0; i < x*y; ++i) {
1300  for (k = 0; k < n; ++k) {
1301  float z = (float)pow(data[i*comp + k] * stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1302  if (z < 0) z = 0;
1303  if (z > 255) z = 255;
1304  output[i*comp + k] = (stbi_uc)stbi__float2int(z);
1305  }
1306  if (k < comp) {
1307  float z = data[i*comp + k] * 255 + 0.5f;
1308  if (z < 0) z = 0;
1309  if (z > 255) z = 255;
1310  output[i*comp + k] = (stbi_uc)stbi__float2int(z);
1311  }
1312  }
1313  STBI_FREE(data);
1314  return output;
1315 }
1316 #endif
1317 
1319 //
1320 // "baseline" JPEG/JFIF decoder
1321 //
1322 // simple implementation
1323 // - doesn't support delayed output of y-dimension
1324 // - simple interface (only one output format: 8-bit interleaved RGB)
1325 // - doesn't try to recover corrupt jpegs
1326 // - doesn't allow partial loading, loading multiple at once
1327 // - still fast on x86 (copying globals into locals doesn't help x86)
1328 // - allocates lots of intermediate memory (full size of all components)
1329 // - non-interleaved case requires this anyway
1330 // - allows good upsampling (see next)
1331 // high-quality
1332 // - upsampled channels are bilinearly interpolated, even across blocks
1333 // - quality integer IDCT derived from IJG's 'slow'
1334 // performance
1335 // - fast huffman; reasonable integer IDCT
1336 // - some SIMD kernels for common paths on targets with SSE2/NEON
1337 // - uses a lot of intermediate memory, could cache poorly
1338 
1339 #ifndef STBI_NO_JPEG
1340 
1341 // huffman decoding acceleration
1342 #define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
1343 
1344 typedef struct
1345 {
1346  stbi_uc fast[1 << FAST_BITS];
1347  // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1348  stbi__uint16 code[256];
1349  stbi_uc values[256];
1350  stbi_uc size[257];
1351  unsigned int maxcode[18];
1352  int delta[17]; // old 'firstsymbol' - old 'firstcode'
1353 } stbi__huffman;
1354 
1355 typedef struct
1356 {
1357  stbi__context *s;
1358  stbi__huffman huff_dc[4];
1359  stbi__huffman huff_ac[4];
1360  stbi_uc dequant[4][64];
1361  stbi__int16 fast_ac[4][1 << FAST_BITS];
1362 
1363  // sizes for components, interleaved MCUs
1364  int img_h_max, img_v_max;
1365  int img_mcu_x, img_mcu_y;
1366  int img_mcu_w, img_mcu_h;
1367 
1368  // definition of jpeg image component
1369  struct
1370  {
1371  int id;
1372  int h, v;
1373  int tq;
1374  int hd, ha;
1375  int dc_pred;
1376 
1377  int x, y, w2, h2;
1378  stbi_uc *data;
1379  void *raw_data, *raw_coeff;
1380  stbi_uc *linebuf;
1381  short *coeff; // progressive only
1382  int coeff_w, coeff_h; // number of 8x8 coefficient blocks
1383  } img_comp[4];
1384 
1385  stbi__uint32 code_buffer; // jpeg entropy-coded buffer
1386  int code_bits; // number of valid bits
1387  unsigned char marker; // marker seen while filling entropy buffer
1388  int nomore; // flag if we saw a marker so must stop
1389 
1390  int progressive;
1391  int spec_start;
1392  int spec_end;
1393  int succ_high;
1394  int succ_low;
1395  int eob_run;
1396 
1397  int scan_n, order[4];
1398  int restart_interval, todo;
1399 
1400  // kernels
1401  void(*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1402  void(*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1403  stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1404 } stbi__jpeg;
1405 
1406 static int stbi__build_huffman(stbi__huffman *h, int *count)
1407 {
1408  int i, j, k = 0, code;
1409  // build size list for each symbol (from JPEG spec)
1410  for (i = 0; i < 16; ++i)
1411  for (j = 0; j < count[i]; ++j)
1412  h->size[k++] = (stbi_uc)(i + 1);
1413  h->size[k] = 0;
1414 
1415  // compute actual symbols (from jpeg spec)
1416  code = 0;
1417  k = 0;
1418  for (j = 1; j <= 16; ++j) {
1419  // compute delta to add to code to compute symbol id
1420  h->delta[j] = k - code;
1421  if (h->size[k] == j) {
1422  while (h->size[k] == j)
1423  h->code[k++] = (stbi__uint16)(code++);
1424  if (code - 1 >= (1 << j)) return stbi__err("bad code lengths", "Corrupt JPEG");
1425  }
1426  // compute largest code + 1 for this size, preshifted as needed later
1427  h->maxcode[j] = code << (16 - j);
1428  code <<= 1;
1429  }
1430  h->maxcode[j] = 0xffffffff;
1431 
1432  // build non-spec acceleration table; 255 is flag for not-accelerated
1433  memset(h->fast, 255, 1 << FAST_BITS);
1434  for (i = 0; i < k; ++i) {
1435  int s = h->size[i];
1436  if (s <= FAST_BITS) {
1437  int c = h->code[i] << (FAST_BITS - s);
1438  int m = 1 << (FAST_BITS - s);
1439  for (j = 0; j < m; ++j) {
1440  h->fast[c + j] = (stbi_uc)i;
1441  }
1442  }
1443  }
1444  return 1;
1445 }
1446 
1447 // build a table that decodes both magnitude and value of small ACs in
1448 // one go.
1449 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1450 {
1451  int i;
1452  for (i = 0; i < (1 << FAST_BITS); ++i) {
1453  stbi_uc fast = h->fast[i];
1454  fast_ac[i] = 0;
1455  if (fast < 255) {
1456  int rs = h->values[fast];
1457  int run = (rs >> 4) & 15;
1458  int magbits = rs & 15;
1459  int len = h->size[fast];
1460 
1461  if (magbits && len + magbits <= FAST_BITS) {
1462  // magnitude code followed by receive_extend code
1463  int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1464  int m = 1 << (magbits - 1);
1465  if (k < m) k += (-1 << magbits) + 1;
1466  // if the result is small enough, we can fit it in fast_ac table
1467  if (k >= -128 && k <= 127)
1468  fast_ac[i] = (stbi__int16)((k << 8) + (run << 4) + (len + magbits));
1469  }
1470  }
1471  }
1472 }
1473 
1474 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1475 {
1476  do {
1477  int b = j->nomore ? 0 : stbi__get8(j->s);
1478  if (b == 0xff) {
1479  int c = stbi__get8(j->s);
1480  if (c != 0) {
1481  j->marker = (unsigned char)c;
1482  j->nomore = 1;
1483  return;
1484  }
1485  }
1486  j->code_buffer |= b << (24 - j->code_bits);
1487  j->code_bits += 8;
1488  } while (j->code_bits <= 24);
1489 }
1490 
1491 // (1 << n) - 1
1492 static stbi__uint32 stbi__bmask[17] = { 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767, 65535 };
1493 
1494 // decode a jpeg huffman value from the bitstream
1495 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1496 {
1497  unsigned int temp;
1498  int c, k;
1499 
1500  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1501 
1502  // look at the top FAST_BITS and determine what symbol ID it is,
1503  // if the code is <= FAST_BITS
1504  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
1505  k = h->fast[c];
1506  if (k < 255) {
1507  int s = h->size[k];
1508  if (s > j->code_bits)
1509  return -1;
1510  j->code_buffer <<= s;
1511  j->code_bits -= s;
1512  return h->values[k];
1513  }
1514 
1515  // naive test is to shift the code_buffer down so k bits are
1516  // valid, then test against maxcode. To speed this up, we've
1517  // preshifted maxcode left so that it has (16-k) 0s at the
1518  // end; in other words, regardless of the number of bits, it
1519  // wants to be compared against something shifted to have 16;
1520  // that way we don't need to shift inside the loop.
1521  temp = j->code_buffer >> 16;
1522  for (k = FAST_BITS + 1;; ++k)
1523  if (temp < h->maxcode[k])
1524  break;
1525  if (k == 17) {
1526  // error! code not found
1527  j->code_bits -= 16;
1528  return -1;
1529  }
1530 
1531  if (k > j->code_bits)
1532  return -1;
1533 
1534  // convert the huffman code to the symbol id
1535  c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1536  STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1537 
1538  // convert the id to a symbol
1539  j->code_bits -= k;
1540  j->code_buffer <<= k;
1541  return h->values[c];
1542 }
1543 
1544 // bias[n] = (-1<<n) + 1
1545 static int const stbi__jbias[16] = { 0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767 };
1546 
1547 // combined JPEG 'receive' and JPEG 'extend', since baseline
1548 // always extends everything it receives.
1549 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1550 {
1551  unsigned int k;
1552  int sgn;
1553  if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1554 
1555  sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1556  k = stbi_lrot(j->code_buffer, n);
1557  j->code_buffer = k & ~stbi__bmask[n];
1558  k &= stbi__bmask[n];
1559  j->code_bits -= n;
1560  return k + (stbi__jbias[n] & ~sgn);
1561 }
1562 
1563 // get some unsigned bits
1564 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1565 {
1566  unsigned int k;
1567  if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1568  k = stbi_lrot(j->code_buffer, n);
1569  j->code_buffer = k & ~stbi__bmask[n];
1570  k &= stbi__bmask[n];
1571  j->code_bits -= n;
1572  return k;
1573 }
1574 
1575 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1576 {
1577  unsigned int k;
1578  if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1579  k = j->code_buffer;
1580  j->code_buffer <<= 1;
1581  --j->code_bits;
1582  return k & 0x80000000;
1583 }
1584 
1585 // given a value that's at position X in the zigzag stream,
1586 // where does it appear in the 8x8 matrix coded as row-major?
1587 static stbi_uc stbi__jpeg_dezigzag[64 + 15] =
1588 {
1589  0, 1, 8, 16, 9, 2, 3, 10,
1590  17, 24, 32, 25, 18, 11, 4, 5,
1591  12, 19, 26, 33, 40, 48, 41, 34,
1592  27, 20, 13, 6, 7, 14, 21, 28,
1593  35, 42, 49, 56, 57, 50, 43, 36,
1594  29, 22, 15, 23, 30, 37, 44, 51,
1595  58, 59, 52, 45, 38, 31, 39, 46,
1596  53, 60, 61, 54, 47, 55, 62, 63,
1597  // let corrupt input sample past end
1598  63, 63, 63, 63, 63, 63, 63, 63,
1599  63, 63, 63, 63, 63, 63, 63
1600 };
1601 
1602 // decode one 64-entry block--
1603 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi_uc *dequant)
1604 {
1605  int diff, dc, k;
1606  int t;
1607 
1608  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1609  t = stbi__jpeg_huff_decode(j, hdc);
1610  if (t < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
1611 
1612  // 0 all the ac values now so we can do it 32-bits at a time
1613  memset(data, 0, 64 * sizeof(data[0]));
1614 
1615  diff = t ? stbi__extend_receive(j, t) : 0;
1616  dc = j->img_comp[b].dc_pred + diff;
1617  j->img_comp[b].dc_pred = dc;
1618  data[0] = (short)(dc * dequant[0]);
1619 
1620  // decode AC components, see JPEG spec
1621  k = 1;
1622  do {
1623  unsigned int zig;
1624  int c, r, s;
1625  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1626  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
1627  r = fac[c];
1628  if (r) { // fast-AC path
1629  k += (r >> 4) & 15; // run
1630  s = r & 15; // combined length
1631  j->code_buffer <<= s;
1632  j->code_bits -= s;
1633  // decode into unzigzag'd location
1634  zig = stbi__jpeg_dezigzag[k++];
1635  data[zig] = (short)((r >> 8) * dequant[zig]);
1636  }
1637  else {
1638  int rs = stbi__jpeg_huff_decode(j, hac);
1639  if (rs < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
1640  s = rs & 15;
1641  r = rs >> 4;
1642  if (s == 0) {
1643  if (rs != 0xf0) break; // end block
1644  k += 16;
1645  }
1646  else {
1647  k += r;
1648  // decode into unzigzag'd location
1649  zig = stbi__jpeg_dezigzag[k++];
1650  data[zig] = (short)(stbi__extend_receive(j, s) * dequant[zig]);
1651  }
1652  }
1653  } while (k < 64);
1654  return 1;
1655 }
1656 
1657 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
1658 {
1659  int diff, dc;
1660  int t;
1661  if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1662 
1663  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1664 
1665  if (j->succ_high == 0) {
1666  // first scan for DC coefficient, must be first
1667  memset(data, 0, 64 * sizeof(data[0])); // 0 all the ac values now
1668  t = stbi__jpeg_huff_decode(j, hdc);
1669  diff = t ? stbi__extend_receive(j, t) : 0;
1670 
1671  dc = j->img_comp[b].dc_pred + diff;
1672  j->img_comp[b].dc_pred = dc;
1673  data[0] = (short)(dc << j->succ_low);
1674  }
1675  else {
1676  // refinement scan for DC coefficient
1677  if (stbi__jpeg_get_bit(j))
1678  data[0] += (short)(1 << j->succ_low);
1679  }
1680  return 1;
1681 }
1682 
1683 // @OPTIMIZE: store non-zigzagged during the decode passes,
1684 // and only de-zigzag when dequantizing
1685 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
1686 {
1687  int k;
1688  if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1689 
1690  if (j->succ_high == 0) {
1691  int shift = j->succ_low;
1692 
1693  if (j->eob_run) {
1694  --j->eob_run;
1695  return 1;
1696  }
1697 
1698  k = j->spec_start;
1699  do {
1700  unsigned int zig;
1701  int c, r, s;
1702  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1703  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
1704  r = fac[c];
1705  if (r) { // fast-AC path
1706  k += (r >> 4) & 15; // run
1707  s = r & 15; // combined length
1708  j->code_buffer <<= s;
1709  j->code_bits -= s;
1710  zig = stbi__jpeg_dezigzag[k++];
1711  data[zig] = (short)((r >> 8) << shift);
1712  }
1713  else {
1714  int rs = stbi__jpeg_huff_decode(j, hac);
1715  if (rs < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
1716  s = rs & 15;
1717  r = rs >> 4;
1718  if (s == 0) {
1719  if (r < 15) {
1720  j->eob_run = (1 << r);
1721  if (r)
1722  j->eob_run += stbi__jpeg_get_bits(j, r);
1723  --j->eob_run;
1724  break;
1725  }
1726  k += 16;
1727  }
1728  else {
1729  k += r;
1730  zig = stbi__jpeg_dezigzag[k++];
1731  data[zig] = (short)(stbi__extend_receive(j, s) << shift);
1732  }
1733  }
1734  } while (k <= j->spec_end);
1735  }
1736  else {
1737  // refinement scan for these AC coefficients
1738 
1739  short bit = (short)(1 << j->succ_low);
1740 
1741  if (j->eob_run) {
1742  --j->eob_run;
1743  for (k = j->spec_start; k <= j->spec_end; ++k) {
1744  short *p = &data[stbi__jpeg_dezigzag[k]];
1745  if (*p != 0)
1746  if (stbi__jpeg_get_bit(j))
1747  if ((*p & bit) == 0) {
1748  if (*p > 0)
1749  *p += bit;
1750  else
1751  *p -= bit;
1752  }
1753  }
1754  }
1755  else {
1756  k = j->spec_start;
1757  do {
1758  int r, s;
1759  int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
1760  if (rs < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
1761  s = rs & 15;
1762  r = rs >> 4;
1763  if (s == 0) {
1764  if (r < 15) {
1765  j->eob_run = (1 << r) - 1;
1766  if (r)
1767  j->eob_run += stbi__jpeg_get_bits(j, r);
1768  r = 64; // force end of block
1769  }
1770  else
1771  r = 16; // r=15 is the code for 16 0s
1772  }
1773  else {
1774  if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
1775  // sign bit
1776  if (stbi__jpeg_get_bit(j))
1777  s = bit;
1778  else
1779  s = -bit;
1780  }
1781 
1782  // advance by r
1783  while (k <= j->spec_end) {
1784  short *p = &data[stbi__jpeg_dezigzag[k]];
1785  if (*p != 0) {
1786  if (stbi__jpeg_get_bit(j))
1787  if ((*p & bit) == 0) {
1788  if (*p > 0)
1789  *p += bit;
1790  else
1791  *p -= bit;
1792  }
1793  ++k;
1794  }
1795  else {
1796  if (r == 0) {
1797  if (s)
1798  data[stbi__jpeg_dezigzag[k++]] = (short)s;
1799  break;
1800  }
1801  --r;
1802  ++k;
1803  }
1804  }
1805  } while (k <= j->spec_end);
1806  }
1807  }
1808  return 1;
1809 }
1810 
1811 // take a -128..127 value and stbi__clamp it and convert to 0..255
1812 stbi_inline static stbi_uc stbi__clamp(int x)
1813 {
1814  // trick to use a single test to catch both cases
1815  if ((unsigned int)x > 255) {
1816  if (x < 0) return 0;
1817  if (x > 255) return 255;
1818  }
1819  return (stbi_uc)x;
1820 }
1821 
1822 #define stbi__f2f(x) ((int) (((x) * 4096 + 0.5)))
1823 #define stbi__fsh(x) ((x) << 12)
1824 
1825 // derived from jidctint -- DCT_ISLOW
1826 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
1827  int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
1828  p2 = s2; \
1829  p3 = s6; \
1830  p1 = (p2+p3) * stbi__f2f(0.5411961f); \
1831  t2 = p1 + p3*stbi__f2f(-1.847759065f); \
1832  t3 = p1 + p2*stbi__f2f( 0.765366865f); \
1833  p2 = s0; \
1834  p3 = s4; \
1835  t0 = stbi__fsh(p2+p3); \
1836  t1 = stbi__fsh(p2-p3); \
1837  x0 = t0+t3; \
1838  x3 = t0-t3; \
1839  x1 = t1+t2; \
1840  x2 = t1-t2; \
1841  t0 = s7; \
1842  t1 = s5; \
1843  t2 = s3; \
1844  t3 = s1; \
1845  p3 = t0+t2; \
1846  p4 = t1+t3; \
1847  p1 = t0+t3; \
1848  p2 = t1+t2; \
1849  p5 = (p3+p4)*stbi__f2f( 1.175875602f); \
1850  t0 = t0*stbi__f2f( 0.298631336f); \
1851  t1 = t1*stbi__f2f( 2.053119869f); \
1852  t2 = t2*stbi__f2f( 3.072711026f); \
1853  t3 = t3*stbi__f2f( 1.501321110f); \
1854  p1 = p5 + p1*stbi__f2f(-0.899976223f); \
1855  p2 = p5 + p2*stbi__f2f(-2.562915447f); \
1856  p3 = p3*stbi__f2f(-1.961570560f); \
1857  p4 = p4*stbi__f2f(-0.390180644f); \
1858  t3 += p1+p4; \
1859  t2 += p2+p3; \
1860  t1 += p2+p4; \
1861  t0 += p1+p3;
1862 
1863 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
1864 {
1865  int i, val[64], *v = val;
1866  stbi_uc *o;
1867  short *d = data;
1868 
1869  // columns
1870  for (i = 0; i < 8; ++i, ++d, ++v) {
1871  // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
1872  if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0
1873  && d[40] == 0 && d[48] == 0 && d[56] == 0) {
1874  // no shortcut 0 seconds
1875  // (1|2|3|4|5|6|7)==0 0 seconds
1876  // all separate -0.047 seconds
1877  // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
1878  int dcterm = d[0] << 2;
1879  v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
1880  }
1881  else {
1882  STBI__IDCT_1D(d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56])
1883  // constants scaled things up by 1<<12; let's bring them back
1884  // down, but keep 2 extra bits of precision
1885  x0 += 512; x1 += 512; x2 += 512; x3 += 512;
1886  v[0] = (x0 + t3) >> 10;
1887  v[56] = (x0 - t3) >> 10;
1888  v[8] = (x1 + t2) >> 10;
1889  v[48] = (x1 - t2) >> 10;
1890  v[16] = (x2 + t1) >> 10;
1891  v[40] = (x2 - t1) >> 10;
1892  v[24] = (x3 + t0) >> 10;
1893  v[32] = (x3 - t0) >> 10;
1894  }
1895  }
1896 
1897  for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride) {
1898  // no fast case since the first 1D IDCT spread components out
1899  STBI__IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])
1900  // constants scaled things up by 1<<12, plus we had 1<<2 from first
1901  // loop, plus horizontal and vertical each scale by sqrt(8) so together
1902  // we've got an extra 1<<3, so 1<<17 total we need to remove.
1903  // so we want to round that, which means adding 0.5 * 1<<17,
1904  // aka 65536. Also, we'll end up with -128 to 127 that we want
1905  // to encode as 0..255 by adding 128, so we'll add that before the shift
1906  x0 += 65536 + (128 << 17);
1907  x1 += 65536 + (128 << 17);
1908  x2 += 65536 + (128 << 17);
1909  x3 += 65536 + (128 << 17);
1910  // tried computing the shifts into temps, or'ing the temps to see
1911  // if any were out of range, but that was slower
1912  o[0] = stbi__clamp((x0 + t3) >> 17);
1913  o[7] = stbi__clamp((x0 - t3) >> 17);
1914  o[1] = stbi__clamp((x1 + t2) >> 17);
1915  o[6] = stbi__clamp((x1 - t2) >> 17);
1916  o[2] = stbi__clamp((x2 + t1) >> 17);
1917  o[5] = stbi__clamp((x2 - t1) >> 17);
1918  o[3] = stbi__clamp((x3 + t0) >> 17);
1919  o[4] = stbi__clamp((x3 - t0) >> 17);
1920  }
1921 }
1922 
1923 #ifdef STBI_SSE2
1924 // sse2 integer IDCT. not the fastest possible implementation but it
1925 // produces bit-identical results to the generic C version so it's
1926 // fully "transparent".
1927 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
1928 {
1929  // This is constructed to match our regular (generic) integer IDCT exactly.
1930  __m128i row0, row1, row2, row3, row4, row5, row6, row7;
1931  __m128i tmp;
1932 
1933  // dot product constant: even elems=x, odd elems=y
1934 #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
1935 
1936  // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
1937  // out(1) = c1[even]*x + c1[odd]*y
1938 #define dct_rot(out0,out1, x,y,c0,c1) \
1939  __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
1940  __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
1941  __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
1942  __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
1943  __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
1944  __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
1945 
1946  // out = in << 12 (in 16-bit, out 32-bit)
1947 #define dct_widen(out, in) \
1948  __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
1949  __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
1950 
1951  // wide add
1952 #define dct_wadd(out, a, b) \
1953  __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
1954  __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
1955 
1956  // wide sub
1957 #define dct_wsub(out, a, b) \
1958  __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
1959  __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
1960 
1961  // butterfly a/b, add bias, then shift by "s" and pack
1962 #define dct_bfly32o(out0, out1, a,b,bias,s) \
1963  { \
1964  __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
1965  __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
1966  dct_wadd(sum, abiased, b); \
1967  dct_wsub(dif, abiased, b); \
1968  out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
1969  out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
1970  }
1971 
1972  // 8-bit interleave step (for transposes)
1973 #define dct_interleave8(a, b) \
1974  tmp = a; \
1975  a = _mm_unpacklo_epi8(a, b); \
1976  b = _mm_unpackhi_epi8(tmp, b)
1977 
1978  // 16-bit interleave step (for transposes)
1979 #define dct_interleave16(a, b) \
1980  tmp = a; \
1981  a = _mm_unpacklo_epi16(a, b); \
1982  b = _mm_unpackhi_epi16(tmp, b)
1983 
1984 #define dct_pass(bias,shift) \
1985  { \
1986  /* even part */ \
1987  dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
1988  __m128i sum04 = _mm_add_epi16(row0, row4); \
1989  __m128i dif04 = _mm_sub_epi16(row0, row4); \
1990  dct_widen(t0e, sum04); \
1991  dct_widen(t1e, dif04); \
1992  dct_wadd(x0, t0e, t3e); \
1993  dct_wsub(x3, t0e, t3e); \
1994  dct_wadd(x1, t1e, t2e); \
1995  dct_wsub(x2, t1e, t2e); \
1996  /* odd part */ \
1997  dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
1998  dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
1999  __m128i sum17 = _mm_add_epi16(row1, row7); \
2000  __m128i sum35 = _mm_add_epi16(row3, row5); \
2001  dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2002  dct_wadd(x4, y0o, y4o); \
2003  dct_wadd(x5, y1o, y5o); \
2004  dct_wadd(x6, y2o, y5o); \
2005  dct_wadd(x7, y3o, y4o); \
2006  dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2007  dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2008  dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2009  dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2010  }
2011 
2012  __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2013  __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f(0.765366865f), stbi__f2f(0.5411961f));
2014  __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2015  __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2016  __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f(0.298631336f), stbi__f2f(-1.961570560f));
2017  __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f(3.072711026f));
2018  __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f(2.053119869f), stbi__f2f(-0.390180644f));
2019  __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f(1.501321110f));
2020 
2021  // rounding biases in column/row passes, see stbi__idct_block for explanation.
2022  __m128i bias_0 = _mm_set1_epi32(512);
2023  __m128i bias_1 = _mm_set1_epi32(65536 + (128 << 17));
2024 
2025  // load
2026  row0 = _mm_load_si128((const __m128i *) (data + 0 * 8));
2027  row1 = _mm_load_si128((const __m128i *) (data + 1 * 8));
2028  row2 = _mm_load_si128((const __m128i *) (data + 2 * 8));
2029  row3 = _mm_load_si128((const __m128i *) (data + 3 * 8));
2030  row4 = _mm_load_si128((const __m128i *) (data + 4 * 8));
2031  row5 = _mm_load_si128((const __m128i *) (data + 5 * 8));
2032  row6 = _mm_load_si128((const __m128i *) (data + 6 * 8));
2033  row7 = _mm_load_si128((const __m128i *) (data + 7 * 8));
2034 
2035  // column pass
2036  dct_pass(bias_0, 10);
2037 
2038  {
2039  // 16bit 8x8 transpose pass 1
2040  dct_interleave16(row0, row4);
2041  dct_interleave16(row1, row5);
2042  dct_interleave16(row2, row6);
2043  dct_interleave16(row3, row7);
2044 
2045  // transpose pass 2
2046  dct_interleave16(row0, row2);
2047  dct_interleave16(row1, row3);
2048  dct_interleave16(row4, row6);
2049  dct_interleave16(row5, row7);
2050 
2051  // transpose pass 3
2052  dct_interleave16(row0, row1);
2053  dct_interleave16(row2, row3);
2054  dct_interleave16(row4, row5);
2055  dct_interleave16(row6, row7);
2056  }
2057 
2058  // row pass
2059  dct_pass(bias_1, 17);
2060 
2061  {
2062  // pack
2063  __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2064  __m128i p1 = _mm_packus_epi16(row2, row3);
2065  __m128i p2 = _mm_packus_epi16(row4, row5);
2066  __m128i p3 = _mm_packus_epi16(row6, row7);
2067 
2068  // 8bit 8x8 transpose pass 1
2069  dct_interleave8(p0, p2); // a0e0a1e1...
2070  dct_interleave8(p1, p3); // c0g0c1g1...
2071 
2072  // transpose pass 2
2073  dct_interleave8(p0, p1); // a0c0e0g0...
2074  dct_interleave8(p2, p3); // b0d0f0h0...
2075 
2076  // transpose pass 3
2077  dct_interleave8(p0, p2); // a0b0c0d0...
2078  dct_interleave8(p1, p3); // a4b4c4d4...
2079 
2080  // store
2081  _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2082  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2083  _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2084  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2085  _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2086  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2087  _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2088  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2089  }
2090 
2091 #undef dct_const
2092 #undef dct_rot
2093 #undef dct_widen
2094 #undef dct_wadd
2095 #undef dct_wsub
2096 #undef dct_bfly32o
2097 #undef dct_interleave8
2098 #undef dct_interleave16
2099 #undef dct_pass
2100 }
2101 
2102 #endif // STBI_SSE2
2103 
2104 #ifdef STBI_NEON
2105 
2106 // NEON integer IDCT. should produce bit-identical
2107 // results to the generic C version.
2108 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2109 {
2110  int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2111 
2112  int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2113  int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2114  int16x4_t rot0_2 = vdup_n_s16(stbi__f2f(0.765366865f));
2115  int16x4_t rot1_0 = vdup_n_s16(stbi__f2f(1.175875602f));
2116  int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2117  int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2118  int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2119  int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2120  int16x4_t rot3_0 = vdup_n_s16(stbi__f2f(0.298631336f));
2121  int16x4_t rot3_1 = vdup_n_s16(stbi__f2f(2.053119869f));
2122  int16x4_t rot3_2 = vdup_n_s16(stbi__f2f(3.072711026f));
2123  int16x4_t rot3_3 = vdup_n_s16(stbi__f2f(1.501321110f));
2124 
2125 #define dct_long_mul(out, inq, coeff) \
2126  int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2127  int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2128 
2129 #define dct_long_mac(out, acc, inq, coeff) \
2130  int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2131  int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2132 
2133 #define dct_widen(out, inq) \
2134  int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2135  int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2136 
2137  // wide add
2138 #define dct_wadd(out, a, b) \
2139  int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2140  int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2141 
2142  // wide sub
2143 #define dct_wsub(out, a, b) \
2144  int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2145  int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2146 
2147  // butterfly a/b, then shift using "shiftop" by "s" and pack
2148 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2149  { \
2150  dct_wadd(sum, a, b); \
2151  dct_wsub(dif, a, b); \
2152  out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2153  out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2154  }
2155 
2156 #define dct_pass(shiftop, shift) \
2157  { \
2158  /* even part */ \
2159  int16x8_t sum26 = vaddq_s16(row2, row6); \
2160  dct_long_mul(p1e, sum26, rot0_0); \
2161  dct_long_mac(t2e, p1e, row6, rot0_1); \
2162  dct_long_mac(t3e, p1e, row2, rot0_2); \
2163  int16x8_t sum04 = vaddq_s16(row0, row4); \
2164  int16x8_t dif04 = vsubq_s16(row0, row4); \
2165  dct_widen(t0e, sum04); \
2166  dct_widen(t1e, dif04); \
2167  dct_wadd(x0, t0e, t3e); \
2168  dct_wsub(x3, t0e, t3e); \
2169  dct_wadd(x1, t1e, t2e); \
2170  dct_wsub(x2, t1e, t2e); \
2171  /* odd part */ \
2172  int16x8_t sum15 = vaddq_s16(row1, row5); \
2173  int16x8_t sum17 = vaddq_s16(row1, row7); \
2174  int16x8_t sum35 = vaddq_s16(row3, row5); \
2175  int16x8_t sum37 = vaddq_s16(row3, row7); \
2176  int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2177  dct_long_mul(p5o, sumodd, rot1_0); \
2178  dct_long_mac(p1o, p5o, sum17, rot1_1); \
2179  dct_long_mac(p2o, p5o, sum35, rot1_2); \
2180  dct_long_mul(p3o, sum37, rot2_0); \
2181  dct_long_mul(p4o, sum15, rot2_1); \
2182  dct_wadd(sump13o, p1o, p3o); \
2183  dct_wadd(sump24o, p2o, p4o); \
2184  dct_wadd(sump23o, p2o, p3o); \
2185  dct_wadd(sump14o, p1o, p4o); \
2186  dct_long_mac(x4, sump13o, row7, rot3_0); \
2187  dct_long_mac(x5, sump24o, row5, rot3_1); \
2188  dct_long_mac(x6, sump23o, row3, rot3_2); \
2189  dct_long_mac(x7, sump14o, row1, rot3_3); \
2190  dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2191  dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2192  dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2193  dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2194  }
2195 
2196  // load
2197  row0 = vld1q_s16(data + 0 * 8);
2198  row1 = vld1q_s16(data + 1 * 8);
2199  row2 = vld1q_s16(data + 2 * 8);
2200  row3 = vld1q_s16(data + 3 * 8);
2201  row4 = vld1q_s16(data + 4 * 8);
2202  row5 = vld1q_s16(data + 5 * 8);
2203  row6 = vld1q_s16(data + 6 * 8);
2204  row7 = vld1q_s16(data + 7 * 8);
2205 
2206  // add DC bias
2207  row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2208 
2209  // column pass
2210  dct_pass(vrshrn_n_s32, 10);
2211 
2212  // 16bit 8x8 transpose
2213  {
2214  // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2215  // whether compilers actually get this is another story, sadly.
2216 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2217 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2218 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2219 
2220  // pass 1
2221  dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2222  dct_trn16(row2, row3);
2223  dct_trn16(row4, row5);
2224  dct_trn16(row6, row7);
2225 
2226  // pass 2
2227  dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2228  dct_trn32(row1, row3);
2229  dct_trn32(row4, row6);
2230  dct_trn32(row5, row7);
2231 
2232  // pass 3
2233  dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2234  dct_trn64(row1, row5);
2235  dct_trn64(row2, row6);
2236  dct_trn64(row3, row7);
2237 
2238 #undef dct_trn16
2239 #undef dct_trn32
2240 #undef dct_trn64
2241  }
2242 
2243  // row pass
2244  // vrshrn_n_s32 only supports shifts up to 16, we need
2245  // 17. so do a non-rounding shift of 16 first then follow
2246  // up with a rounding shift by 1.
2247  dct_pass(vshrn_n_s32, 16);
2248 
2249  {
2250  // pack and round
2251  uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2252  uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2253  uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2254  uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2255  uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2256  uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2257  uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2258  uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2259 
2260  // again, these can translate into one instruction, but often don't.
2261 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2262 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2263 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2264 
2265  // sadly can't use interleaved stores here since we only write
2266  // 8 bytes to each scan line!
2267 
2268  // 8x8 8-bit transpose pass 1
2269  dct_trn8_8(p0, p1);
2270  dct_trn8_8(p2, p3);
2271  dct_trn8_8(p4, p5);
2272  dct_trn8_8(p6, p7);
2273 
2274  // pass 2
2275  dct_trn8_16(p0, p2);
2276  dct_trn8_16(p1, p3);
2277  dct_trn8_16(p4, p6);
2278  dct_trn8_16(p5, p7);
2279 
2280  // pass 3
2281  dct_trn8_32(p0, p4);
2282  dct_trn8_32(p1, p5);
2283  dct_trn8_32(p2, p6);
2284  dct_trn8_32(p3, p7);
2285 
2286  // store
2287  vst1_u8(out, p0); out += out_stride;
2288  vst1_u8(out, p1); out += out_stride;
2289  vst1_u8(out, p2); out += out_stride;
2290  vst1_u8(out, p3); out += out_stride;
2291  vst1_u8(out, p4); out += out_stride;
2292  vst1_u8(out, p5); out += out_stride;
2293  vst1_u8(out, p6); out += out_stride;
2294  vst1_u8(out, p7);
2295 
2296 #undef dct_trn8_8
2297 #undef dct_trn8_16
2298 #undef dct_trn8_32
2299  }
2300 
2301 #undef dct_long_mul
2302 #undef dct_long_mac
2303 #undef dct_widen
2304 #undef dct_wadd
2305 #undef dct_wsub
2306 #undef dct_bfly32o
2307 #undef dct_pass
2308 }
2309 
2310 #endif // STBI_NEON
2311 
2312 #define STBI__MARKER_none 0xff
2313 // if there's a pending marker from the entropy stream, return that
2314 // otherwise, fetch from the stream and get a marker. if there's no
2315 // marker, return 0xff, which is never a valid marker value
2316 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2317 {
2318  stbi_uc x;
2319  if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2320  x = stbi__get8(j->s);
2321  if (x != 0xff) return STBI__MARKER_none;
2322  while (x == 0xff)
2323  x = stbi__get8(j->s);
2324  return x;
2325 }
2326 
2327 // in each scan, we'll have scan_n components, and the order
2328 // of the components is specified by order[]
2329 #define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
2330 
2331 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2332 // the dc prediction
2333 static void stbi__jpeg_reset(stbi__jpeg *j)
2334 {
2335  j->code_bits = 0;
2336  j->code_buffer = 0;
2337  j->nomore = 0;
2338  j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
2339  j->marker = STBI__MARKER_none;
2340  j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2341  j->eob_run = 0;
2342  // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2343  // since we don't even allow 1<<30 pixels
2344 }
2345 
2346 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2347 {
2348  stbi__jpeg_reset(z);
2349  if (!z->progressive) {
2350  if (z->scan_n == 1) {
2351  int i, j;
2352  STBI_SIMD_ALIGN(short, data[64]);
2353  int n = z->order[0];
2354  // non-interleaved data, we just need to process one block at a time,
2355  // in trivial scanline order
2356  // number of blocks to do just depends on how many actual "pixels" this
2357  // component has, independent of interleaved MCU blocking and such
2358  int w = (z->img_comp[n].x + 7) >> 3;
2359  int h = (z->img_comp[n].y + 7) >> 3;
2360  for (j = 0; j < h; ++j) {
2361  for (i = 0; i < w; ++i) {
2362  int ha = z->img_comp[n].ha;
2363  if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2364  z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2*j * 8 + i * 8, z->img_comp[n].w2, data);
2365  // every data block is an MCU, so countdown the restart interval
2366  if (--z->todo <= 0) {
2367  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2368  // if it's NOT a restart, then just bail, so we get corrupt data
2369  // rather than no data
2370  if (!STBI__RESTART(z->marker)) return 1;
2371  stbi__jpeg_reset(z);
2372  }
2373  }
2374  }
2375  return 1;
2376  }
2377  else { // interleaved
2378  int i, j, k, x, y;
2379  STBI_SIMD_ALIGN(short, data[64]);
2380  for (j = 0; j < z->img_mcu_y; ++j) {
2381  for (i = 0; i < z->img_mcu_x; ++i) {
2382  // scan an interleaved mcu... process scan_n components in order
2383  for (k = 0; k < z->scan_n; ++k) {
2384  int n = z->order[k];
2385  // scan out an mcu's worth of this component; that's just determined
2386  // by the basic H and V specified for the component
2387  for (y = 0; y < z->img_comp[n].v; ++y) {
2388  for (x = 0; x < z->img_comp[n].h; ++x) {
2389  int x2 = (i*z->img_comp[n].h + x) * 8;
2390  int y2 = (j*z->img_comp[n].v + y) * 8;
2391  int ha = z->img_comp[n].ha;
2392  if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2393  z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2*y2 + x2, z->img_comp[n].w2, data);
2394  }
2395  }
2396  }
2397  // after all interleaved components, that's an interleaved MCU,
2398  // so now count down the restart interval
2399  if (--z->todo <= 0) {
2400  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2401  if (!STBI__RESTART(z->marker)) return 1;
2402  stbi__jpeg_reset(z);
2403  }
2404  }
2405  }
2406  return 1;
2407  }
2408  }
2409  else {
2410  if (z->scan_n == 1) {
2411  int i, j;
2412  int n = z->order[0];
2413  // non-interleaved data, we just need to process one block at a time,
2414  // in trivial scanline order
2415  // number of blocks to do just depends on how many actual "pixels" this
2416  // component has, independent of interleaved MCU blocking and such
2417  int w = (z->img_comp[n].x + 7) >> 3;
2418  int h = (z->img_comp[n].y + 7) >> 3;
2419  for (j = 0; j < h; ++j) {
2420  for (i = 0; i < w; ++i) {
2421  short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2422  if (z->spec_start == 0) {
2423  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2424  return 0;
2425  }
2426  else {
2427  int ha = z->img_comp[n].ha;
2428  if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2429  return 0;
2430  }
2431  // every data block is an MCU, so countdown the restart interval
2432  if (--z->todo <= 0) {
2433  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2434  if (!STBI__RESTART(z->marker)) return 1;
2435  stbi__jpeg_reset(z);
2436  }
2437  }
2438  }
2439  return 1;
2440  }
2441  else { // interleaved
2442  int i, j, k, x, y;
2443  for (j = 0; j < z->img_mcu_y; ++j) {
2444  for (i = 0; i < z->img_mcu_x; ++i) {
2445  // scan an interleaved mcu... process scan_n components in order
2446  for (k = 0; k < z->scan_n; ++k) {
2447  int n = z->order[k];
2448  // scan out an mcu's worth of this component; that's just determined
2449  // by the basic H and V specified for the component
2450  for (y = 0; y < z->img_comp[n].v; ++y) {
2451  for (x = 0; x < z->img_comp[n].h; ++x) {
2452  int x2 = (i*z->img_comp[n].h + x);
2453  int y2 = (j*z->img_comp[n].v + y);
2454  short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2455  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2456  return 0;
2457  }
2458  }
2459  }
2460  // after all interleaved components, that's an interleaved MCU,
2461  // so now count down the restart interval
2462  if (--z->todo <= 0) {
2463  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2464  if (!STBI__RESTART(z->marker)) return 1;
2465  stbi__jpeg_reset(z);
2466  }
2467  }
2468  }
2469  return 1;
2470  }
2471  }
2472 }
2473 
2474 static void stbi__jpeg_dequantize(short *data, stbi_uc *dequant)
2475 {
2476  int i;
2477  for (i = 0; i < 64; ++i)
2478  data[i] *= dequant[i];
2479 }
2480 
2481 static void stbi__jpeg_finish(stbi__jpeg *z)
2482 {
2483  if (z->progressive) {
2484  // dequantize and idct the data
2485  int i, j, n;
2486  for (n = 0; n < z->s->img_n; ++n) {
2487  int w = (z->img_comp[n].x + 7) >> 3;
2488  int h = (z->img_comp[n].y + 7) >> 3;
2489  for (j = 0; j < h; ++j) {
2490  for (i = 0; i < w; ++i) {
2491  short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2492  stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2493  z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2*j * 8 + i * 8, z->img_comp[n].w2, data);
2494  }
2495  }
2496  }
2497  }
2498 }
2499 
2500 static int stbi__process_marker(stbi__jpeg *z, int m)
2501 {
2502  int L;
2503  switch (m) {
2504  case STBI__MARKER_none: // no marker found
2505  return stbi__err("expected marker", "Corrupt JPEG");
2506 
2507  case 0xDD: // DRI - specify restart interval
2508  if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len", "Corrupt JPEG");
2509  z->restart_interval = stbi__get16be(z->s);
2510  return 1;
2511 
2512  case 0xDB: // DQT - define quantization table
2513  L = stbi__get16be(z->s) - 2;
2514  while (L > 0) {
2515  int q = stbi__get8(z->s);
2516  int p = q >> 4;
2517  int t = q & 15, i;
2518  if (p != 0) return stbi__err("bad DQT type", "Corrupt JPEG");
2519  if (t > 3) return stbi__err("bad DQT table", "Corrupt JPEG");
2520  for (i = 0; i < 64; ++i)
2521  z->dequant[t][stbi__jpeg_dezigzag[i]] = stbi__get8(z->s);
2522  L -= 65;
2523  }
2524  return L == 0;
2525 
2526  case 0xC4: // DHT - define huffman table
2527  L = stbi__get16be(z->s) - 2;
2528  while (L > 0) {
2529  stbi_uc *v;
2530  int sizes[16], i, n = 0;
2531  int q = stbi__get8(z->s);
2532  int tc = q >> 4;
2533  int th = q & 15;
2534  if (tc > 1 || th > 3) return stbi__err("bad DHT header", "Corrupt JPEG");
2535  for (i = 0; i < 16; ++i) {
2536  sizes[i] = stbi__get8(z->s);
2537  n += sizes[i];
2538  }
2539  L -= 17;
2540  if (tc == 0) {
2541  if (!stbi__build_huffman(z->huff_dc + th, sizes)) return 0;
2542  v = z->huff_dc[th].values;
2543  }
2544  else {
2545  if (!stbi__build_huffman(z->huff_ac + th, sizes)) return 0;
2546  v = z->huff_ac[th].values;
2547  }
2548  for (i = 0; i < n; ++i)
2549  v[i] = stbi__get8(z->s);
2550  if (tc != 0)
2551  stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2552  L -= n;
2553  }
2554  return L == 0;
2555  }
2556  // check for comment block or APP blocks
2557  if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2558  stbi__skip(z->s, stbi__get16be(z->s) - 2);
2559  return 1;
2560  }
2561  return 0;
2562 }
2563 
2564 // after we see SOS
2565 static int stbi__process_scan_header(stbi__jpeg *z)
2566 {
2567  int i;
2568  int Ls = stbi__get16be(z->s);
2569  z->scan_n = stbi__get8(z->s);
2570  if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int)z->s->img_n) return stbi__err("bad SOS component count", "Corrupt JPEG");
2571  if (Ls != 6 + 2 * z->scan_n) return stbi__err("bad SOS len", "Corrupt JPEG");
2572  for (i = 0; i < z->scan_n; ++i) {
2573  int id = stbi__get8(z->s), which;
2574  int q = stbi__get8(z->s);
2575  for (which = 0; which < z->s->img_n; ++which)
2576  if (z->img_comp[which].id == id)
2577  break;
2578  if (which == z->s->img_n) return 0; // no match
2579  z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff", "Corrupt JPEG");
2580  z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff", "Corrupt JPEG");
2581  z->order[i] = which;
2582  }
2583 
2584  {
2585  int aa;
2586  z->spec_start = stbi__get8(z->s);
2587  z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
2588  aa = stbi__get8(z->s);
2589  z->succ_high = (aa >> 4);
2590  z->succ_low = (aa & 15);
2591  if (z->progressive) {
2592  if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
2593  return stbi__err("bad SOS", "Corrupt JPEG");
2594  }
2595  else {
2596  if (z->spec_start != 0) return stbi__err("bad SOS", "Corrupt JPEG");
2597  if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS", "Corrupt JPEG");
2598  z->spec_end = 63;
2599  }
2600  }
2601 
2602  return 1;
2603 }
2604 
2605 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
2606 {
2607  stbi__context *s = z->s;
2608  int Lf, p, i, q, h_max = 1, v_max = 1, c;
2609  Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len", "Corrupt JPEG"); // JPEG
2610  p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit", "JPEG format not supported: 8-bit only"); // JPEG baseline
2611  s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
2612  s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width", "Corrupt JPEG"); // JPEG requires
2613  c = stbi__get8(s);
2614  if (c != 3 && c != 1) return stbi__err("bad component count", "Corrupt JPEG"); // JFIF requires
2615  s->img_n = c;
2616  for (i = 0; i < c; ++i) {
2617  z->img_comp[i].data = NULL;
2618  z->img_comp[i].linebuf = NULL;
2619  }
2620 
2621  if (Lf != 8 + 3 * s->img_n) return stbi__err("bad SOF len", "Corrupt JPEG");
2622 
2623  for (i = 0; i < s->img_n; ++i) {
2624  z->img_comp[i].id = stbi__get8(s);
2625  if (z->img_comp[i].id != i + 1) // JFIF requires
2626  if (z->img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files!
2627  return stbi__err("bad component ID", "Corrupt JPEG");
2628  q = stbi__get8(s);
2629  z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H", "Corrupt JPEG");
2630  z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V", "Corrupt JPEG");
2631  z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ", "Corrupt JPEG");
2632  }
2633 
2634  if (scan != STBI__SCAN_load) return 1;
2635 
2636  if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
2637 
2638  for (i = 0; i < s->img_n; ++i) {
2639  if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
2640  if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
2641  }
2642 
2643  // compute interleaved mcu info
2644  z->img_h_max = h_max;
2645  z->img_v_max = v_max;
2646  z->img_mcu_w = h_max * 8;
2647  z->img_mcu_h = v_max * 8;
2648  z->img_mcu_x = (s->img_x + z->img_mcu_w - 1) / z->img_mcu_w;
2649  z->img_mcu_y = (s->img_y + z->img_mcu_h - 1) / z->img_mcu_h;
2650 
2651  for (i = 0; i < s->img_n; ++i) {
2652  // number of effective pixels (e.g. for non-interleaved MCU)
2653  z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max - 1) / h_max;
2654  z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max - 1) / v_max;
2655  // to simplify generation, we'll allocate enough memory to decode
2656  // the bogus oversized data from using interleaved MCUs and their
2657  // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
2658  // discard the extra data until colorspace conversion
2659  z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
2660  z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
2661  z->img_comp[i].raw_data = stbi__malloc(z->img_comp[i].w2 * z->img_comp[i].h2 + 15);
2662 
2663  if (z->img_comp[i].raw_data == NULL) {
2664  for (--i; i >= 0; --i) {
2665  STBI_FREE(z->img_comp[i].raw_data);
2666  z->img_comp[i].data = NULL;
2667  }
2668  return stbi__err("outofmem", "Out of memory");
2669  }
2670  // align blocks for idct using mmx/sse
2671  z->img_comp[i].data = (stbi_uc*)(((size_t)z->img_comp[i].raw_data + 15) & ~15);
2672  z->img_comp[i].linebuf = NULL;
2673  if (z->progressive) {
2674  z->img_comp[i].coeff_w = (z->img_comp[i].w2 + 7) >> 3;
2675  z->img_comp[i].coeff_h = (z->img_comp[i].h2 + 7) >> 3;
2676  z->img_comp[i].raw_coeff = STBI_MALLOC(z->img_comp[i].coeff_w * z->img_comp[i].coeff_h * 64 * sizeof(short) + 15);
2677  z->img_comp[i].coeff = (short*)(((size_t)z->img_comp[i].raw_coeff + 15) & ~15);
2678  }
2679  else {
2680  z->img_comp[i].coeff = 0;
2681  z->img_comp[i].raw_coeff = 0;
2682  }
2683  }
2684 
2685  return 1;
2686 }
2687 
2688 // use comparisons since in some cases we handle more than one case (e.g. SOF)
2689 #define stbi__DNL(x) ((x) == 0xdc)
2690 #define stbi__SOI(x) ((x) == 0xd8)
2691 #define stbi__EOI(x) ((x) == 0xd9)
2692 #define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
2693 #define stbi__SOS(x) ((x) == 0xda)
2694 
2695 #define stbi__SOF_progressive(x) ((x) == 0xc2)
2696 
2697 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
2698 {
2699  int m;
2700  z->marker = STBI__MARKER_none; // initialize cached marker to empty
2701  m = stbi__get_marker(z);
2702  if (!stbi__SOI(m)) return stbi__err("no SOI", "Corrupt JPEG");
2703  if (scan == STBI__SCAN_type) return 1;
2704  m = stbi__get_marker(z);
2705  while (!stbi__SOF(m)) {
2706  if (!stbi__process_marker(z, m)) return 0;
2707  m = stbi__get_marker(z);
2708  while (m == STBI__MARKER_none) {
2709  // some files have extra padding after their blocks, so ok, we'll scan
2710  if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
2711  m = stbi__get_marker(z);
2712  }
2713  }
2714  z->progressive = stbi__SOF_progressive(m);
2715  if (!stbi__process_frame_header(z, scan)) return 0;
2716  return 1;
2717 }
2718 
2719 // decode image to YCbCr format
2720 static int stbi__decode_jpeg_image(stbi__jpeg *j)
2721 {
2722  int m;
2723  j->restart_interval = 0;
2724  if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
2725  m = stbi__get_marker(j);
2726  while (!stbi__EOI(m)) {
2727  if (stbi__SOS(m)) {
2728  if (!stbi__process_scan_header(j)) return 0;
2729  if (!stbi__parse_entropy_coded_data(j)) return 0;
2730  if (j->marker == STBI__MARKER_none) {
2731  // handle 0s at the end of image data from IP Kamera 9060
2732  while (!stbi__at_eof(j->s)) {
2733  int x = stbi__get8(j->s);
2734  if (x == 255) {
2735  j->marker = stbi__get8(j->s);
2736  break;
2737  }
2738  else if (x != 0) {
2739  return stbi__err("junk before marker", "Corrupt JPEG");
2740  }
2741  }
2742  // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
2743  }
2744  }
2745  else {
2746  if (!stbi__process_marker(j, m)) return 0;
2747  }
2748  m = stbi__get_marker(j);
2749  }
2750  if (j->progressive)
2751  stbi__jpeg_finish(j);
2752  return 1;
2753 }
2754 
2755 // static jfif-centered resampling (across block boundaries)
2756 
2757 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
2758  int w, int hs);
2759 
2760 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
2761 
2762 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2763 {
2764  STBI_NOTUSED(out);
2765  STBI_NOTUSED(in_far);
2766  STBI_NOTUSED(w);
2767  STBI_NOTUSED(hs);
2768  return in_near;
2769 }
2770 
2771 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2772 {
2773  // need to generate two samples vertically for every one in input
2774  int i;
2775  STBI_NOTUSED(hs);
2776  for (i = 0; i < w; ++i)
2777  out[i] = stbi__div4(3 * in_near[i] + in_far[i] + 2);
2778  return out;
2779 }
2780 
2781 static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2782 {
2783  // need to generate two samples horizontally for every one in input
2784  int i;
2785  stbi_uc *input = in_near;
2786 
2787  if (w == 1) {
2788  // if only one sample, can't do any interpolation
2789  out[0] = out[1] = input[0];
2790  return out;
2791  }
2792 
2793  out[0] = input[0];
2794  out[1] = stbi__div4(input[0] * 3 + input[1] + 2);
2795  for (i = 1; i < w - 1; ++i) {
2796  int n = 3 * input[i] + 2;
2797  out[i * 2 + 0] = stbi__div4(n + input[i - 1]);
2798  out[i * 2 + 1] = stbi__div4(n + input[i + 1]);
2799  }
2800  out[i * 2 + 0] = stbi__div4(input[w - 2] * 3 + input[w - 1] + 2);
2801  out[i * 2 + 1] = input[w - 1];
2802 
2803  STBI_NOTUSED(in_far);
2804  STBI_NOTUSED(hs);
2805 
2806  return out;
2807 }
2808 
2809 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
2810 
2811 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2812 {
2813  // need to generate 2x2 samples for every one in input
2814  int i, t0, t1;
2815  if (w == 1) {
2816  out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
2817  return out;
2818  }
2819 
2820  t1 = 3 * in_near[0] + in_far[0];
2821  out[0] = stbi__div4(t1 + 2);
2822  for (i = 1; i < w; ++i) {
2823  t0 = t1;
2824  t1 = 3 * in_near[i] + in_far[i];
2825  out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
2826  out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
2827  }
2828  out[w * 2 - 1] = stbi__div4(t1 + 2);
2829 
2830  STBI_NOTUSED(hs);
2831 
2832  return out;
2833 }
2834 
2835 #if defined(STBI_SSE2) || defined(STBI_NEON)
2836 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2837 {
2838  // need to generate 2x2 samples for every one in input
2839  int i = 0, t0, t1;
2840 
2841  if (w == 1) {
2842  out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
2843  return out;
2844  }
2845 
2846  t1 = 3 * in_near[0] + in_far[0];
2847  // process groups of 8 pixels for as long as we can.
2848  // note we can't handle the last pixel in a row in this loop
2849  // because we need to handle the filter boundary conditions.
2850  for (; i < ((w - 1) & ~7); i += 8) {
2851 #if defined(STBI_SSE2)
2852  // load and perform the vertical filtering pass
2853  // this uses 3*x + y = 4*x + (y - x)
2854  __m128i zero = _mm_setzero_si128();
2855  __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i));
2856  __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
2857  __m128i farw = _mm_unpacklo_epi8(farb, zero);
2858  __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
2859  __m128i diff = _mm_sub_epi16(farw, nearw);
2860  __m128i nears = _mm_slli_epi16(nearw, 2);
2861  __m128i curr = _mm_add_epi16(nears, diff); // current row
2862 
2863  // horizontal filter works the same based on shifted vers of current
2864  // row. "prev" is current row shifted right by 1 pixel; we need to
2865  // insert the previous pixel value (from t1).
2866  // "next" is current row shifted left by 1 pixel, with first pixel
2867  // of next block of 8 pixels added in.
2868  __m128i prv0 = _mm_slli_si128(curr, 2);
2869  __m128i nxt0 = _mm_srli_si128(curr, 2);
2870  __m128i prev = _mm_insert_epi16(prv0, t1, 0);
2871  __m128i next = _mm_insert_epi16(nxt0, 3 * in_near[i + 8] + in_far[i + 8], 7);
2872 
2873  // horizontal filter, polyphase implementation since it's convenient:
2874  // even pixels = 3*cur + prev = cur*4 + (prev - cur)
2875  // odd pixels = 3*cur + next = cur*4 + (next - cur)
2876  // note the shared term.
2877  __m128i bias = _mm_set1_epi16(8);
2878  __m128i curs = _mm_slli_epi16(curr, 2);
2879  __m128i prvd = _mm_sub_epi16(prev, curr);
2880  __m128i nxtd = _mm_sub_epi16(next, curr);
2881  __m128i curb = _mm_add_epi16(curs, bias);
2882  __m128i even = _mm_add_epi16(prvd, curb);
2883  __m128i odd = _mm_add_epi16(nxtd, curb);
2884 
2885  // interleave even and odd pixels, then undo scaling.
2886  __m128i int0 = _mm_unpacklo_epi16(even, odd);
2887  __m128i int1 = _mm_unpackhi_epi16(even, odd);
2888  __m128i de0 = _mm_srli_epi16(int0, 4);
2889  __m128i de1 = _mm_srli_epi16(int1, 4);
2890 
2891  // pack and write output
2892  __m128i outv = _mm_packus_epi16(de0, de1);
2893  _mm_storeu_si128((__m128i *) (out + i * 2), outv);
2894 #elif defined(STBI_NEON)
2895  // load and perform the vertical filtering pass
2896  // this uses 3*x + y = 4*x + (y - x)
2897  uint8x8_t farb = vld1_u8(in_far + i);
2898  uint8x8_t nearb = vld1_u8(in_near + i);
2899  int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
2900  int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
2901  int16x8_t curr = vaddq_s16(nears, diff); // current row
2902 
2903  // horizontal filter works the same based on shifted vers of current
2904  // row. "prev" is current row shifted right by 1 pixel; we need to
2905  // insert the previous pixel value (from t1).
2906  // "next" is current row shifted left by 1 pixel, with first pixel
2907  // of next block of 8 pixels added in.
2908  int16x8_t prv0 = vextq_s16(curr, curr, 7);
2909  int16x8_t nxt0 = vextq_s16(curr, curr, 1);
2910  int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
2911  int16x8_t next = vsetq_lane_s16(3 * in_near[i + 8] + in_far[i + 8], nxt0, 7);
2912 
2913  // horizontal filter, polyphase implementation since it's convenient:
2914  // even pixels = 3*cur + prev = cur*4 + (prev - cur)
2915  // odd pixels = 3*cur + next = cur*4 + (next - cur)
2916  // note the shared term.
2917  int16x8_t curs = vshlq_n_s16(curr, 2);
2918  int16x8_t prvd = vsubq_s16(prev, curr);
2919  int16x8_t nxtd = vsubq_s16(next, curr);
2920  int16x8_t even = vaddq_s16(curs, prvd);
2921  int16x8_t odd = vaddq_s16(curs, nxtd);
2922 
2923  // undo scaling and round, then store with even/odd phases interleaved
2924  uint8x8x2_t o;
2925  o.val[0] = vqrshrun_n_s16(even, 4);
2926  o.val[1] = vqrshrun_n_s16(odd, 4);
2927  vst2_u8(out + i * 2, o);
2928 #endif
2929 
2930  // "previous" value for next iter
2931  t1 = 3 * in_near[i + 7] + in_far[i + 7];
2932  }
2933 
2934  t0 = t1;
2935  t1 = 3 * in_near[i] + in_far[i];
2936  out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
2937 
2938  for (++i; i < w; ++i) {
2939  t0 = t1;
2940  t1 = 3 * in_near[i] + in_far[i];
2941  out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
2942  out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
2943  }
2944  out[w * 2 - 1] = stbi__div4(t1 + 2);
2945 
2946  STBI_NOTUSED(hs);
2947 
2948  return out;
2949 }
2950 #endif
2951 
2952 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2953 {
2954  // resample with nearest-neighbor
2955  int i, j;
2956  STBI_NOTUSED(in_far);
2957  for (i = 0; i < w; ++i)
2958  for (j = 0; j < hs; ++j)
2959  out[i*hs + j] = in_near[i];
2960  return out;
2961 }
2962 
2963 #ifdef STBI_JPEG_OLD
2964 // this is the same YCbCr-to-RGB calculation that stb_image has used
2965 // historically before the algorithm changes in 1.49
2966 #define float2fixed(x) ((int) ((x) * 65536 + 0.5))
2967 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
2968 {
2969  int i;
2970  for (i = 0; i < count; ++i) {
2971  int y_fixed = (y[i] << 16) + 32768; // rounding
2972  int r, g, b;
2973  int cr = pcr[i] - 128;
2974  int cb = pcb[i] - 128;
2975  r = y_fixed + cr*float2fixed(1.40200f);
2976  g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f);
2977  b = y_fixed + cb*float2fixed(1.77200f);
2978  r >>= 16;
2979  g >>= 16;
2980  b >>= 16;
2981  if ((unsigned)r > 255) { if (r < 0) r = 0; else r = 255; }
2982  if ((unsigned)g > 255) { if (g < 0) g = 0; else g = 255; }
2983  if ((unsigned)b > 255) { if (b < 0) b = 0; else b = 255; }
2984  out[0] = (stbi_uc)r;
2985  out[1] = (stbi_uc)g;
2986  out[2] = (stbi_uc)b;
2987  out[3] = 255;
2988  out += step;
2989  }
2990 }
2991 #else
2992 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
2993 // to make sure the code produces the same results in both SIMD and scalar
2994 #define float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8)
2995 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
2996 {
2997  int i;
2998  for (i = 0; i < count; ++i) {
2999  int y_fixed = (y[i] << 20) + (1 << 19); // rounding
3000  int r, g, b;
3001  int cr = pcr[i] - 128;
3002  int cb = pcb[i] - 128;
3003  r = y_fixed + cr* float2fixed(1.40200f);
3004  g = y_fixed + (cr*-float2fixed(0.71414f)) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
3005  b = y_fixed + cb* float2fixed(1.77200f);
3006  r >>= 20;
3007  g >>= 20;
3008  b >>= 20;
3009  if ((unsigned)r > 255) { if (r < 0) r = 0; else r = 255; }
3010  if ((unsigned)g > 255) { if (g < 0) g = 0; else g = 255; }
3011  if ((unsigned)b > 255) { if (b < 0) b = 0; else b = 255; }
3012  out[0] = (stbi_uc)r;
3013  out[1] = (stbi_uc)g;
3014  out[2] = (stbi_uc)b;
3015  out[3] = 255;
3016  out += step;
3017  }
3018 }
3019 #endif
3020 
3021 #if defined(STBI_SSE2) || defined(STBI_NEON)
3022 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3023 {
3024  int i = 0;
3025 
3026 #ifdef STBI_SSE2
3027  // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3028  // it's useful in practice (you wouldn't use it for textures, for example).
3029  // so just accelerate step == 4 case.
3030  if (step == 4) {
3031  // this is a fairly straightforward implementation and not super-optimized.
3032  __m128i signflip = _mm_set1_epi8(-0x80);
3033  __m128i cr_const0 = _mm_set1_epi16((short)(1.40200f*4096.0f + 0.5f));
3034  __m128i cr_const1 = _mm_set1_epi16(-(short)(0.71414f*4096.0f + 0.5f));
3035  __m128i cb_const0 = _mm_set1_epi16(-(short)(0.34414f*4096.0f + 0.5f));
3036  __m128i cb_const1 = _mm_set1_epi16((short)(1.77200f*4096.0f + 0.5f));
3037  __m128i y_bias = _mm_set1_epi8((char)(unsigned char)128);
3038  __m128i xw = _mm_set1_epi16(255); // alpha channel
3039 
3040  for (; i + 7 < count; i += 8) {
3041  // load
3042  __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y + i));
3043  __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr + i));
3044  __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb + i));
3045  __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3046  __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3047 
3048  // unpack to short (and left-shift cr, cb by 8)
3049  __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
3050  __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3051  __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3052 
3053  // color transform
3054  __m128i yws = _mm_srli_epi16(yw, 4);
3055  __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3056  __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3057  __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3058  __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3059  __m128i rws = _mm_add_epi16(cr0, yws);
3060  __m128i gwt = _mm_add_epi16(cb0, yws);
3061  __m128i bws = _mm_add_epi16(yws, cb1);
3062  __m128i gws = _mm_add_epi16(gwt, cr1);
3063 
3064  // descale
3065  __m128i rw = _mm_srai_epi16(rws, 4);
3066  __m128i bw = _mm_srai_epi16(bws, 4);
3067  __m128i gw = _mm_srai_epi16(gws, 4);
3068 
3069  // back to byte, set up for transpose
3070  __m128i brb = _mm_packus_epi16(rw, bw);
3071  __m128i gxb = _mm_packus_epi16(gw, xw);
3072 
3073  // transpose to interleave channels
3074  __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3075  __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3076  __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3077  __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3078 
3079  // store
3080  _mm_storeu_si128((__m128i *) (out + 0), o0);
3081  _mm_storeu_si128((__m128i *) (out + 16), o1);
3082  out += 32;
3083  }
3084  }
3085 #endif
3086 
3087 #ifdef STBI_NEON
3088  // in this version, step=3 support would be easy to add. but is there demand?
3089  if (step == 4) {
3090  // this is a fairly straightforward implementation and not super-optimized.
3091  uint8x8_t signflip = vdup_n_u8(0x80);
3092  int16x8_t cr_const0 = vdupq_n_s16((short)(1.40200f*4096.0f + 0.5f));
3093  int16x8_t cr_const1 = vdupq_n_s16(-(short)(0.71414f*4096.0f + 0.5f));
3094  int16x8_t cb_const0 = vdupq_n_s16(-(short)(0.34414f*4096.0f + 0.5f));
3095  int16x8_t cb_const1 = vdupq_n_s16((short)(1.77200f*4096.0f + 0.5f));
3096 
3097  for (; i + 7 < count; i += 8) {
3098  // load
3099  uint8x8_t y_bytes = vld1_u8(y + i);
3100  uint8x8_t cr_bytes = vld1_u8(pcr + i);
3101  uint8x8_t cb_bytes = vld1_u8(pcb + i);
3102  int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3103  int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3104 
3105  // expand to s16
3106  int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3107  int16x8_t crw = vshll_n_s8(cr_biased, 7);
3108  int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3109 
3110  // color transform
3111  int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3112  int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3113  int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3114  int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3115  int16x8_t rws = vaddq_s16(yws, cr0);
3116  int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3117  int16x8_t bws = vaddq_s16(yws, cb1);
3118 
3119  // undo scaling, round, convert to byte
3120  uint8x8x4_t o;
3121  o.val[0] = vqrshrun_n_s16(rws, 4);
3122  o.val[1] = vqrshrun_n_s16(gws, 4);
3123  o.val[2] = vqrshrun_n_s16(bws, 4);
3124  o.val[3] = vdup_n_u8(255);
3125 
3126  // store, interleaving r/g/b/a
3127  vst4_u8(out, o);
3128  out += 8 * 4;
3129  }
3130  }
3131 #endif
3132 
3133  for (; i < count; ++i) {
3134  int y_fixed = (y[i] << 20) + (1 << 19); // rounding
3135  int r, g, b;
3136  int cr = pcr[i] - 128;
3137  int cb = pcb[i] - 128;
3138  r = y_fixed + cr* float2fixed(1.40200f);
3139  g = y_fixed + cr*-float2fixed(0.71414f) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
3140  b = y_fixed + cb* float2fixed(1.77200f);
3141  r >>= 20;
3142  g >>= 20;
3143  b >>= 20;
3144  if ((unsigned)r > 255) { if (r < 0) r = 0; else r = 255; }
3145  if ((unsigned)g > 255) { if (g < 0) g = 0; else g = 255; }
3146  if ((unsigned)b > 255) { if (b < 0) b = 0; else b = 255; }
3147  out[0] = (stbi_uc)r;
3148  out[1] = (stbi_uc)g;
3149  out[2] = (stbi_uc)b;
3150  out[3] = 255;
3151  out += step;
3152  }
3153 }
3154 #endif
3155 
3156 // set up the kernels
3157 static void stbi__setup_jpeg(stbi__jpeg *j)
3158 {
3159  j->idct_block_kernel = stbi__idct_block;
3160  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3161  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3162 
3163 #ifdef STBI_SSE2
3164  if (stbi__sse2_available()) {
3165  j->idct_block_kernel = stbi__idct_simd;
3166 #ifndef STBI_JPEG_OLD
3167  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3168 #endif
3169  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3170  }
3171 #endif
3172 
3173 #ifdef STBI_NEON
3174  j->idct_block_kernel = stbi__idct_simd;
3175 #ifndef STBI_JPEG_OLD
3176  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3177 #endif
3178  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3179 #endif
3180 }
3181 
3182 // clean up the temporary component buffers
3183 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3184 {
3185  int i;
3186  for (i = 0; i < j->s->img_n; ++i) {
3187  if (j->img_comp[i].raw_data) {
3188  STBI_FREE(j->img_comp[i].raw_data);
3189  j->img_comp[i].raw_data = NULL;
3190  j->img_comp[i].data = NULL;
3191  }
3192  if (j->img_comp[i].raw_coeff) {
3193  STBI_FREE(j->img_comp[i].raw_coeff);
3194  j->img_comp[i].raw_coeff = 0;
3195  j->img_comp[i].coeff = 0;
3196  }
3197  if (j->img_comp[i].linebuf) {
3198  STBI_FREE(j->img_comp[i].linebuf);
3199  j->img_comp[i].linebuf = NULL;
3200  }
3201  }
3202 }
3203 
3204 typedef struct
3205 {
3206  resample_row_func resample;
3207  stbi_uc *line0, *line1;
3208  int hs, vs; // expansion factor in each axis
3209  int w_lores; // horizontal pixels pre-expansion
3210  int ystep; // how far through vertical expansion we are
3211  int ypos; // which pre-expansion row we're on
3212 } stbi__resample;
3213 
3214 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3215 {
3216  int n, decode_n;
3217  z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3218 
3219  // validate req_comp
3220  if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3221 
3222  // load a jpeg image from whichever source, but leave in YCbCr format
3223  if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3224 
3225  // determine actual number of components to generate
3226  n = req_comp ? req_comp : z->s->img_n;
3227 
3228  if (z->s->img_n == 3 && n < 3)
3229  decode_n = 1;
3230  else
3231  decode_n = z->s->img_n;
3232 
3233  // resample and color-convert
3234  {
3235  int k;
3236  unsigned int i, j;
3237  stbi_uc *output;
3238  stbi_uc *coutput[4];
3239 
3240  stbi__resample res_comp[4];
3241 
3242  for (k = 0; k < decode_n; ++k) {
3243  stbi__resample *r = &res_comp[k];
3244 
3245  // allocate line buffer big enough for upsampling off the edges
3246  // with upsample factor of 4
3247  z->img_comp[k].linebuf = (stbi_uc *)stbi__malloc(z->s->img_x + 3);
3248  if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3249 
3250  r->hs = z->img_h_max / z->img_comp[k].h;
3251  r->vs = z->img_v_max / z->img_comp[k].v;
3252  r->ystep = r->vs >> 1;
3253  r->w_lores = (z->s->img_x + r->hs - 1) / r->hs;
3254  r->ypos = 0;
3255  r->line0 = r->line1 = z->img_comp[k].data;
3256 
3257  if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3258  else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3259  else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3260  else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3261  else r->resample = stbi__resample_row_generic;
3262  }
3263 
3264  // can't error after this so, this is safe
3265  output = (stbi_uc *)stbi__malloc(n * z->s->img_x * z->s->img_y + 1);
3266  if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3267 
3268  // now go ahead and resample
3269  for (j = 0; j < z->s->img_y; ++j) {
3270  stbi_uc *out = output + n * z->s->img_x * j;
3271  for (k = 0; k < decode_n; ++k) {
3272  stbi__resample *r = &res_comp[k];
3273  int y_bot = r->ystep >= (r->vs >> 1);
3274  coutput[k] = r->resample(z->img_comp[k].linebuf,
3275  y_bot ? r->line1 : r->line0,
3276  y_bot ? r->line0 : r->line1,
3277  r->w_lores, r->hs);
3278  if (++r->ystep >= r->vs) {
3279  r->ystep = 0;
3280  r->line0 = r->line1;
3281  if (++r->ypos < z->img_comp[k].y)
3282  r->line1 += z->img_comp[k].w2;
3283  }
3284  }
3285  if (n >= 3) {
3286  stbi_uc *y = coutput[0];
3287  if (z->s->img_n == 3) {
3288  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3289  }
3290  else
3291  for (i = 0; i < z->s->img_x; ++i) {
3292  out[0] = out[1] = out[2] = y[i];
3293  out[3] = 255; // not used if n==3
3294  out += n;
3295  }
3296  }
3297  else {
3298  stbi_uc *y = coutput[0];
3299  if (n == 1)
3300  for (i = 0; i < z->s->img_x; ++i) out[i] = y[i];
3301  else
3302  for (i = 0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
3303  }
3304  }
3305  stbi__cleanup_jpeg(z);
3306  *out_x = z->s->img_x;
3307  *out_y = z->s->img_y;
3308  if (comp) *comp = z->s->img_n; // report original components, not output
3309  return output;
3310  }
3311 }
3312 
3313 static unsigned char *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
3314 {
3315  stbi__jpeg j;
3316  j.s = s;
3317  stbi__setup_jpeg(&j);
3318  return load_jpeg_image(&j, x, y, comp, req_comp);
3319 }
3320 
3321 static int stbi__jpeg_test(stbi__context *s)
3322 {
3323  int r;
3324  stbi__jpeg j;
3325  j.s = s;
3326  stbi__setup_jpeg(&j);
3327  r = stbi__decode_jpeg_header(&j, STBI__SCAN_type);
3328  stbi__rewind(s);
3329  return r;
3330 }
3331 
3332 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3333 {
3334  if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3335  stbi__rewind(j->s);
3336  return 0;
3337  }
3338  if (x) *x = j->s->img_x;
3339  if (y) *y = j->s->img_y;
3340  if (comp) *comp = j->s->img_n;
3341  return 1;
3342 }
3343 
3344 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3345 {
3346  stbi__jpeg j;
3347  j.s = s;
3348  return stbi__jpeg_info_raw(&j, x, y, comp);
3349 }
3350 #endif
3351 
3352 // public domain zlib decode v0.2 Sean Barrett 2006-11-18
3353 // simple implementation
3354 // - all input must be provided in an upfront buffer
3355 // - all output is written to a single output buffer (can malloc/realloc)
3356 // performance
3357 // - fast huffman
3358 
3359 #ifndef STBI_NO_ZLIB
3360 
3361 // fast-way is faster to check than jpeg huffman, but slow way is slower
3362 #define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
3363 #define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
3364 
3365 // zlib-style huffman encoding
3366 // (jpegs packs from left, zlib from right, so can't share code)
3367 typedef struct
3368 {
3369  stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3370  stbi__uint16 firstcode[16];
3371  int maxcode[17];
3372  stbi__uint16 firstsymbol[16];
3373  stbi_uc size[288];
3374  stbi__uint16 value[288];
3375 } stbi__zhuffman;
3376 
3377 stbi_inline static int stbi__bitreverse16(int n)
3378 {
3379  n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
3380  n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
3381  n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
3382  n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
3383  return n;
3384 }
3385 
3386 stbi_inline static int stbi__bit_reverse(int v, int bits)
3387 {
3388  STBI_ASSERT(bits <= 16);
3389  // to bit reverse n bits, reverse 16 and shift
3390  // e.g. 11 bits, bit reverse and shift away 5
3391  return stbi__bitreverse16(v) >> (16 - bits);
3392 }
3393 
3394 static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, int num)
3395 {
3396  int i, k = 0;
3397  int code, next_code[16], sizes[17];
3398 
3399  // DEFLATE spec for generating codes
3400  memset(sizes, 0, sizeof(sizes));
3401  memset(z->fast, 0, sizeof(z->fast));
3402  for (i = 0; i < num; ++i)
3403  ++sizes[sizelist[i]];
3404  sizes[0] = 0;
3405  for (i = 1; i < 16; ++i)
3406  STBI_ASSERT(sizes[i] <= (1 << i));
3407  code = 0;
3408  for (i = 1; i < 16; ++i) {
3409  next_code[i] = code;
3410  z->firstcode[i] = (stbi__uint16)code;
3411  z->firstsymbol[i] = (stbi__uint16)k;
3412  code = (code + sizes[i]);
3413  if (sizes[i])
3414  if (code - 1 >= (1 << i)) return stbi__err("bad codelengths", "Corrupt JPEG");
3415  z->maxcode[i] = code << (16 - i); // preshift for inner loop
3416  code <<= 1;
3417  k += sizes[i];
3418  }
3419  z->maxcode[16] = 0x10000; // sentinel
3420  for (i = 0; i < num; ++i) {
3421  int s = sizelist[i];
3422  if (s) {
3423  int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3424  stbi__uint16 fastv = (stbi__uint16)((s << 9) | i);
3425  z->size[c] = (stbi_uc)s;
3426  z->value[c] = (stbi__uint16)i;
3427  if (s <= STBI__ZFAST_BITS) {
3428  int k = stbi__bit_reverse(next_code[s], s);
3429  while (k < (1 << STBI__ZFAST_BITS)) {
3430  z->fast[k] = fastv;
3431  k += (1 << s);
3432  }
3433  }
3434  ++next_code[s];
3435  }
3436  }
3437  return 1;
3438 }
3439 
3440 // zlib-from-memory implementation for PNG reading
3441 // because PNG allows splitting the zlib stream arbitrarily,
3442 // and it's annoying structurally to have PNG call ZLIB call PNG,
3443 // we require PNG read all the IDATs and combine them into a single
3444 // memory buffer
3445 
3446 typedef struct
3447 {
3448  stbi_uc *zbuffer, *zbuffer_end;
3449  int num_bits;
3450  stbi__uint32 code_buffer;
3451 
3452  char *zout;
3453  char *zout_start;
3454  char *zout_end;
3455  int z_expandable;
3456 
3457  stbi__zhuffman z_length, z_distance;
3458 } stbi__zbuf;
3459 
3460 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3461 {
3462  if (z->zbuffer >= z->zbuffer_end) return 0;
3463  return *z->zbuffer++;
3464 }
3465 
3466 static void stbi__fill_bits(stbi__zbuf *z)
3467 {
3468  do {
3469  STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3470  z->code_buffer |= stbi__zget8(z) << z->num_bits;
3471  z->num_bits += 8;
3472  } while (z->num_bits <= 24);
3473 }
3474 
3475 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3476 {
3477  unsigned int k;
3478  if (z->num_bits < n) stbi__fill_bits(z);
3479  k = z->code_buffer & ((1 << n) - 1);
3480  z->code_buffer >>= n;
3481  z->num_bits -= n;
3482  return k;
3483 }
3484 
3485 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3486 {
3487  int b, s, k;
3488  // not resolved by fast table, so compute it the slow way
3489  // use jpeg approach, which requires MSbits at top
3490  k = stbi__bit_reverse(a->code_buffer, 16);
3491  for (s = STBI__ZFAST_BITS + 1;; ++s)
3492  if (k < z->maxcode[s])
3493  break;
3494  if (s == 16) return -1; // invalid code!
3495  // code size is s, so:
3496  b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s];
3497  STBI_ASSERT(z->size[b] == s);
3498  a->code_buffer >>= s;
3499  a->num_bits -= s;
3500  return z->value[b];
3501 }
3502 
3503 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3504 {
3505  int b, s;
3506  if (a->num_bits < 16) stbi__fill_bits(a);
3507  b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
3508  if (b) {
3509  s = b >> 9;
3510  a->code_buffer >>= s;
3511  a->num_bits -= s;
3512  return b & 511;
3513  }
3514  return stbi__zhuffman_decode_slowpath(a, z);
3515 }
3516 
3517 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes
3518 {
3519  char *q;
3520  int cur, limit;
3521  z->zout = zout;
3522  if (!z->z_expandable) return stbi__err("output buffer limit", "Corrupt PNG");
3523  cur = (int)(z->zout - z->zout_start);
3524  limit = (int)(z->zout_end - z->zout_start);
3525  while (cur + n > limit)
3526  limit *= 2;
3527  q = (char *)STBI_REALLOC(z->zout_start, limit);
3528  if (q == NULL) return stbi__err("outofmem", "Out of memory");
3529  z->zout_start = q;
3530  z->zout = q + cur;
3531  z->zout_end = q + limit;
3532  return 1;
3533 }
3534 
3535 static int stbi__zlength_base[31] = {
3536  3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
3537  15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
3538  67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 };
3539 
3540 static int stbi__zlength_extra[31] =
3541 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0 };
3542 
3543 static int stbi__zdist_base[32] = { 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
3544 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0 };
3545 
3546 static int stbi__zdist_extra[32] =
3547 { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 };
3548 
3549 static int stbi__parse_huffman_block(stbi__zbuf *a)
3550 {
3551  char *zout = a->zout;
3552  for (;;) {
3553  int z = stbi__zhuffman_decode(a, &a->z_length);
3554  if (z < 256) {
3555  if (z < 0) return stbi__err("bad huffman code", "Corrupt PNG"); // error in huffman codes
3556  if (zout >= a->zout_end) {
3557  if (!stbi__zexpand(a, zout, 1)) return 0;
3558  zout = a->zout;
3559  }
3560  *zout++ = (char)z;
3561  }
3562  else {
3563  stbi_uc *p;
3564  int len, dist;
3565  if (z == 256) {
3566  a->zout = zout;
3567  return 1;
3568  }
3569  z -= 257;
3570  len = stbi__zlength_base[z];
3571  if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
3572  z = stbi__zhuffman_decode(a, &a->z_distance);
3573  if (z < 0) return stbi__err("bad huffman code", "Corrupt PNG");
3574  dist = stbi__zdist_base[z];
3575  if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
3576  if (zout - a->zout_start < dist) return stbi__err("bad dist", "Corrupt PNG");
3577  if (zout + len > a->zout_end) {
3578  if (!stbi__zexpand(a, zout, len)) return 0;
3579  zout = a->zout;
3580  }
3581  p = (stbi_uc *)(zout - dist);
3582  if (dist == 1) { // run of one byte; common in images.
3583  stbi_uc v = *p;
3584  do *zout++ = v; while (--len);
3585  }
3586  else {
3587  do *zout++ = *p++; while (--len);
3588  }
3589  }
3590  }
3591 }
3592 
3593 static int stbi__compute_huffman_codes(stbi__zbuf *a)
3594 {
3595  static stbi_uc length_dezigzag[19] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
3596  stbi__zhuffman z_codelength;
3597  stbi_uc lencodes[286 + 32 + 137];//padding for maximum single op
3598  stbi_uc codelength_sizes[19];
3599  int i, n;
3600 
3601  int hlit = stbi__zreceive(a, 5) + 257;
3602  int hdist = stbi__zreceive(a, 5) + 1;
3603  int hclen = stbi__zreceive(a, 4) + 4;
3604 
3605  memset(codelength_sizes, 0, sizeof(codelength_sizes));
3606  for (i = 0; i < hclen; ++i) {
3607  int s = stbi__zreceive(a, 3);
3608  codelength_sizes[length_dezigzag[i]] = (stbi_uc)s;
3609  }
3610  if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
3611 
3612  n = 0;
3613  while (n < hlit + hdist) {
3614  int c = stbi__zhuffman_decode(a, &z_codelength);
3615  STBI_ASSERT(c >= 0 && c < 19);
3616  if (c < 16)
3617  lencodes[n++] = (stbi_uc)c;
3618  else if (c == 16) {
3619  c = stbi__zreceive(a, 2) + 3;
3620  memset(lencodes + n, lencodes[n - 1], c);
3621  n += c;
3622  }
3623  else if (c == 17) {
3624  c = stbi__zreceive(a, 3) + 3;
3625  memset(lencodes + n, 0, c);
3626  n += c;
3627  }
3628  else {
3629  STBI_ASSERT(c == 18);
3630  c = stbi__zreceive(a, 7) + 11;
3631  memset(lencodes + n, 0, c);
3632  n += c;
3633  }
3634  }
3635  if (n != hlit + hdist) return stbi__err("bad codelengths", "Corrupt PNG");
3636  if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
3637  if (!stbi__zbuild_huffman(&a->z_distance, lencodes + hlit, hdist)) return 0;
3638  return 1;
3639 }
3640 
3641 static int stbi__parse_uncomperssed_block(stbi__zbuf *a)
3642 {
3643  stbi_uc header[4];
3644  int len, nlen, k;
3645  if (a->num_bits & 7)
3646  stbi__zreceive(a, a->num_bits & 7); // discard
3647  // drain the bit-packed data into header
3648  k = 0;
3649  while (a->num_bits > 0) {
3650  header[k++] = (stbi_uc)(a->code_buffer & 255); // suppress MSVC run-time check
3651  a->code_buffer >>= 8;
3652  a->num_bits -= 8;
3653  }
3654  STBI_ASSERT(a->num_bits == 0);
3655  // now fill header the normal way
3656  while (k < 4)
3657  header[k++] = stbi__zget8(a);
3658  len = header[1] * 256 + header[0];
3659  nlen = header[3] * 256 + header[2];
3660  if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt", "Corrupt PNG");
3661  if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer", "Corrupt PNG");
3662  if (a->zout + len > a->zout_end)
3663  if (!stbi__zexpand(a, a->zout, len)) return 0;
3664  memcpy(a->zout, a->zbuffer, len);
3665  a->zbuffer += len;
3666  a->zout += len;
3667  return 1;
3668 }
3669 
3670 static int stbi__parse_zlib_header(stbi__zbuf *a)
3671 {
3672  int cmf = stbi__zget8(a);
3673  int cm = cmf & 15;
3674  /* int cinfo = cmf >> 4; */
3675  int flg = stbi__zget8(a);
3676  if ((cmf * 256 + flg) % 31 != 0) return stbi__err("bad zlib header", "Corrupt PNG"); // zlib spec
3677  if (flg & 32) return stbi__err("no preset dict", "Corrupt PNG"); // preset dictionary not allowed in png
3678  if (cm != 8) return stbi__err("bad compression", "Corrupt PNG"); // DEFLATE required for png
3679  // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
3680  return 1;
3681 }
3682 
3683 // @TODO: should statically initialize these for optimal thread safety
3684 static stbi_uc stbi__zdefault_length[288], stbi__zdefault_distance[32];
3685 static void stbi__init_zdefaults(void)
3686 {
3687  int i; // use <= to match clearly with spec
3688  for (i = 0; i <= 143; ++i) stbi__zdefault_length[i] = 8;
3689  for (; i <= 255; ++i) stbi__zdefault_length[i] = 9;
3690  for (; i <= 279; ++i) stbi__zdefault_length[i] = 7;
3691  for (; i <= 287; ++i) stbi__zdefault_length[i] = 8;
3692 
3693  for (i = 0; i <= 31; ++i) stbi__zdefault_distance[i] = 5;
3694 }
3695 
3696 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
3697 {
3698  int final, type;
3699  if (parse_header)
3700  if (!stbi__parse_zlib_header(a)) return 0;
3701  a->num_bits = 0;
3702  a->code_buffer = 0;
3703  do {
3704  final = stbi__zreceive(a, 1);
3705  type = stbi__zreceive(a, 2);
3706  if (type == 0) {
3707  if (!stbi__parse_uncomperssed_block(a)) return 0;
3708  }
3709  else if (type == 3) {
3710  return 0;
3711  }
3712  else {
3713  if (type == 1) {
3714  // use fixed code lengths
3715  if (!stbi__zdefault_distance[31]) stbi__init_zdefaults();
3716  if (!stbi__zbuild_huffman(&a->z_length, stbi__zdefault_length, 288)) return 0;
3717  if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
3718  }
3719  else {
3720  if (!stbi__compute_huffman_codes(a)) return 0;
3721  }
3722  if (!stbi__parse_huffman_block(a)) return 0;
3723  }
3724  } while (!final);
3725  return 1;
3726 }
3727 
3728 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
3729 {
3730  a->zout_start = obuf;
3731  a->zout = obuf;
3732  a->zout_end = obuf + olen;
3733  a->z_expandable = exp;
3734 
3735  return stbi__parse_zlib(a, parse_header);
3736 }
3737 
3738 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
3739 {
3740  stbi__zbuf a;
3741  char *p = (char *)stbi__malloc(initial_size);
3742  if (p == NULL) return NULL;
3743  a.zbuffer = (stbi_uc *)buffer;
3744  a.zbuffer_end = (stbi_uc *)buffer + len;
3745  if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
3746  if (outlen) *outlen = (int)(a.zout - a.zout_start);
3747  return a.zout_start;
3748  }
3749  else {
3750  STBI_FREE(a.zout_start);
3751  return NULL;
3752  }
3753 }
3754 
3755 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
3756 {
3757  return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
3758 }
3759 
3760 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
3761 {
3762  stbi__zbuf a;
3763  char *p = (char *)stbi__malloc(initial_size);
3764  if (p == NULL) return NULL;
3765  a.zbuffer = (stbi_uc *)buffer;
3766  a.zbuffer_end = (stbi_uc *)buffer + len;
3767  if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
3768  if (outlen) *outlen = (int)(a.zout - a.zout_start);
3769  return a.zout_start;
3770  }
3771  else {
3772  STBI_FREE(a.zout_start);
3773  return NULL;
3774  }
3775 }
3776 
3777 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
3778 {
3779  stbi__zbuf a;
3780  a.zbuffer = (stbi_uc *)ibuffer;
3781  a.zbuffer_end = (stbi_uc *)ibuffer + ilen;
3782  if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
3783  return (int)(a.zout - a.zout_start);
3784  else
3785  return -1;
3786 }
3787 
3788 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
3789 {
3790  stbi__zbuf a;
3791  char *p = (char *)stbi__malloc(16384);
3792  if (p == NULL) return NULL;
3793  a.zbuffer = (stbi_uc *)buffer;
3794  a.zbuffer_end = (stbi_uc *)buffer + len;
3795  if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
3796  if (outlen) *outlen = (int)(a.zout - a.zout_start);
3797  return a.zout_start;
3798  }
3799  else {
3800  STBI_FREE(a.zout_start);
3801  return NULL;
3802  }
3803 }
3804 
3805 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
3806 {
3807  stbi__zbuf a;
3808  a.zbuffer = (stbi_uc *)ibuffer;
3809  a.zbuffer_end = (stbi_uc *)ibuffer + ilen;
3810  if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
3811  return (int)(a.zout - a.zout_start);
3812  else
3813  return -1;
3814 }
3815 #endif
3816 
3817 // public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
3818 // simple implementation
3819 // - only 8-bit samples
3820 // - no CRC checking
3821 // - allocates lots of intermediate memory
3822 // - avoids problem of streaming data between subsystems
3823 // - avoids explicit window management
3824 // performance
3825 // - uses stb_zlib, a PD zlib implementation with fast huffman decoding
3826 
3827 #ifndef STBI_NO_PNG
3828 typedef struct
3829 {
3830  stbi__uint32 length;
3831  stbi__uint32 type;
3832 } stbi__pngchunk;
3833 
3834 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
3835 {
3836  stbi__pngchunk c;
3837  c.length = stbi__get32be(s);
3838  c.type = stbi__get32be(s);
3839  return c;
3840 }
3841 
3842 static int stbi__check_png_header(stbi__context *s)
3843 {
3844  static stbi_uc png_sig[8] = { 137, 80, 78, 71, 13, 10, 26, 10 };
3845  int i;
3846  for (i = 0; i < 8; ++i)
3847  if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig", "Not a PNG");
3848  return 1;
3849 }
3850 
3851 typedef struct
3852 {
3853  stbi__context *s;
3854  stbi_uc *idata, *expanded, *out;
3855 } stbi__png;
3856 
3857 
3858 enum {
3859  STBI__F_none = 0,
3860  STBI__F_sub = 1,
3861  STBI__F_up = 2,
3862  STBI__F_avg = 3,
3863  STBI__F_paeth = 4,
3864  // synthetic filters used for first scanline to avoid needing a dummy row of 0s
3865  STBI__F_avg_first,
3866  STBI__F_paeth_first
3867 };
3868 
3869 static stbi_uc first_row_filter[5] =
3870 {
3871  STBI__F_none,
3872  STBI__F_sub,
3873  STBI__F_none,
3874  STBI__F_avg_first,
3875  STBI__F_paeth_first
3876 };
3877 
3878 static int stbi__paeth(int a, int b, int c)
3879 {
3880  int p = a + b - c;
3881  int pa = abs(p - a);
3882  int pb = abs(p - b);
3883  int pc = abs(p - c);
3884  if (pa <= pb && pa <= pc) return a;
3885  if (pb <= pc) return b;
3886  return c;
3887 }
3888 
3889 static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0, 0, 0, 0x01 };
3890 
3891 // create the png data from post-deflated data
3892 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
3893 {
3894  stbi__context *s = a->s;
3895  stbi__uint32 i, j, stride = x*out_n;
3896  stbi__uint32 img_len, img_width_bytes;
3897  int k;
3898  int img_n = s->img_n; // copy it into a local for later
3899 
3900  STBI_ASSERT(out_n == s->img_n || out_n == s->img_n + 1);
3901  a->out = (stbi_uc *)stbi__malloc(x * y * out_n); // extra bytes to write off the end into
3902  if (!a->out) return stbi__err("outofmem", "Out of memory");
3903 
3904  img_width_bytes = (((img_n * x * depth) + 7) >> 3);
3905  img_len = (img_width_bytes + 1) * y;
3906  if (s->img_x == x && s->img_y == y) {
3907  if (raw_len != img_len) return stbi__err("not enough pixels", "Corrupt PNG");
3908  }
3909  else { // interlaced:
3910  if (raw_len < img_len) return stbi__err("not enough pixels", "Corrupt PNG");
3911  }
3912 
3913  for (j = 0; j < y; ++j) {
3914  stbi_uc *cur = a->out + stride*j;
3915  stbi_uc *prior = cur - stride;
3916  int filter = *raw++;
3917  int filter_bytes = img_n;
3918  int width = x;
3919  if (filter > 4)
3920  return stbi__err("invalid filter", "Corrupt PNG");
3921 
3922  if (depth < 8) {
3923  STBI_ASSERT(img_width_bytes <= x);
3924  cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
3925  filter_bytes = 1;
3926  width = img_width_bytes;
3927  }
3928 
3929  // if first row, use special filter that doesn't sample previous row
3930  if (j == 0) filter = first_row_filter[filter];
3931 
3932  // handle first byte explicitly
3933  for (k = 0; k < filter_bytes; ++k) {
3934  switch (filter) {
3935  case STBI__F_none: cur[k] = raw[k]; break;
3936  case STBI__F_sub: cur[k] = raw[k]; break;
3937  case STBI__F_up: cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
3938  case STBI__F_avg: cur[k] = STBI__BYTECAST(raw[k] + (prior[k] >> 1)); break;
3939  case STBI__F_paeth: cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0, prior[k], 0)); break;
3940  case STBI__F_avg_first: cur[k] = raw[k]; break;
3941  case STBI__F_paeth_first: cur[k] = raw[k]; break;
3942  }
3943  }
3944 
3945  if (depth == 8) {
3946  if (img_n != out_n)
3947  cur[img_n] = 255; // first pixel
3948  raw += img_n;
3949  cur += out_n;
3950  prior += out_n;
3951  }
3952  else {
3953  raw += 1;
3954  cur += 1;
3955  prior += 1;
3956  }
3957 
3958  // this is a little gross, so that we don't switch per-pixel or per-component
3959  if (depth < 8 || img_n == out_n) {
3960  int nk = (width - 1)*img_n;
3961 #define CASE(f) \
3962  case f: \
3963  for (k=0; k < nk; ++k)
3964  switch (filter) {
3965  // "none" filter turns into a memcpy here; make that explicit.
3966  case STBI__F_none: memcpy(cur, raw, nk); break;
3967  CASE(STBI__F_sub) cur[k] = STBI__BYTECAST(raw[k] + cur[k - filter_bytes]); break;
3968  CASE(STBI__F_up) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
3969  CASE(STBI__F_avg) cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - filter_bytes]) >> 1)); break;
3970  CASE(STBI__F_paeth) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - filter_bytes], prior[k], prior[k - filter_bytes])); break;
3971  CASE(STBI__F_avg_first) cur[k] = STBI__BYTECAST(raw[k] + (cur[k - filter_bytes] >> 1)); break;
3972  CASE(STBI__F_paeth_first) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - filter_bytes], 0, 0)); break;
3973  }
3974 #undef CASE
3975  raw += nk;
3976  }
3977  else {
3978  STBI_ASSERT(img_n + 1 == out_n);
3979 #define CASE(f) \
3980  case f: \
3981  for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \
3982  for (k=0; k < img_n; ++k)
3983  switch (filter) {
3984  CASE(STBI__F_none) cur[k] = raw[k]; break;
3985  CASE(STBI__F_sub) cur[k] = STBI__BYTECAST(raw[k] + cur[k - out_n]); break;
3986  CASE(STBI__F_up) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
3987  CASE(STBI__F_avg) cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - out_n]) >> 1)); break;
3988  CASE(STBI__F_paeth) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - out_n], prior[k], prior[k - out_n])); break;
3989  CASE(STBI__F_avg_first) cur[k] = STBI__BYTECAST(raw[k] + (cur[k - out_n] >> 1)); break;
3990  CASE(STBI__F_paeth_first) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - out_n], 0, 0)); break;
3991  }
3992 #undef CASE
3993  }
3994  }
3995 
3996  // we make a separate pass to expand bits to pixels; for performance,
3997  // this could run two scanlines behind the above code, so it won't
3998  // intefere with filtering but will still be in the cache.
3999  if (depth < 8) {
4000  for (j = 0; j < y; ++j) {
4001  stbi_uc *cur = a->out + stride*j;
4002  stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes;
4003  // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4004  // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4005  stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4006 
4007  // note that the final byte might overshoot and write more data than desired.
4008  // we can allocate enough data that this never writes out of memory, but it
4009  // could also overwrite the next scanline. can it overwrite non-empty data
4010  // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4011  // so we need to explicitly clamp the final ones
4012 
4013  if (depth == 4) {
4014  for (k = x*img_n; k >= 2; k -= 2, ++in) {
4015  *cur++ = scale * ((*in >> 4));
4016  *cur++ = scale * ((*in) & 0x0f);
4017  }
4018  if (k > 0) *cur++ = scale * ((*in >> 4));
4019  }
4020  else if (depth == 2) {
4021  for (k = x*img_n; k >= 4; k -= 4, ++in) {
4022  *cur++ = scale * ((*in >> 6));
4023  *cur++ = scale * ((*in >> 4) & 0x03);
4024  *cur++ = scale * ((*in >> 2) & 0x03);
4025  *cur++ = scale * ((*in) & 0x03);
4026  }
4027  if (k > 0) *cur++ = scale * ((*in >> 6));
4028  if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4029  if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4030  }
4031  else if (depth == 1) {
4032  for (k = x*img_n; k >= 8; k -= 8, ++in) {
4033  *cur++ = scale * ((*in >> 7));
4034  *cur++ = scale * ((*in >> 6) & 0x01);
4035  *cur++ = scale * ((*in >> 5) & 0x01);
4036  *cur++ = scale * ((*in >> 4) & 0x01);
4037  *cur++ = scale * ((*in >> 3) & 0x01);
4038  *cur++ = scale * ((*in >> 2) & 0x01);
4039  *cur++ = scale * ((*in >> 1) & 0x01);
4040  *cur++ = scale * ((*in) & 0x01);
4041  }
4042  if (k > 0) *cur++ = scale * ((*in >> 7));
4043  if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4044  if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4045  if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4046  if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4047  if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4048  if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4049  }
4050  if (img_n != out_n) {
4051  // insert alpha = 255
4052  stbi_uc *cur = a->out + stride*j;
4053  int i;
4054  if (img_n == 1) {
4055  for (i = x - 1; i >= 0; --i) {
4056  cur[i * 2 + 1] = 255;
4057  cur[i * 2 + 0] = cur[i];
4058  }
4059  }
4060  else {
4061  STBI_ASSERT(img_n == 3);
4062  for (i = x - 1; i >= 0; --i) {
4063  cur[i * 4 + 3] = 255;
4064  cur[i * 4 + 2] = cur[i * 3 + 2];
4065  cur[i * 4 + 1] = cur[i * 3 + 1];
4066  cur[i * 4 + 0] = cur[i * 3 + 0];
4067  }
4068  }
4069  }
4070  }
4071  }
4072 
4073  return 1;
4074 }
4075 
4076 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4077 {
4078  stbi_uc *final;
4079  int p;
4080  if (!interlaced)
4081  return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4082 
4083  // de-interlacing
4084  final = (stbi_uc *)stbi__malloc(a->s->img_x * a->s->img_y * out_n);
4085  for (p = 0; p < 7; ++p) {
4086  int xorig[] = { 0, 4, 0, 2, 0, 1, 0 };
4087  int yorig[] = { 0, 0, 4, 0, 2, 0, 1 };
4088  int xspc[] = { 8, 8, 4, 4, 2, 2, 1 };
4089  int yspc[] = { 8, 8, 8, 4, 4, 2, 2 };
4090  int i, j, x, y;
4091  // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4092  x = (a->s->img_x - xorig[p] + xspc[p] - 1) / xspc[p];
4093  y = (a->s->img_y - yorig[p] + yspc[p] - 1) / yspc[p];
4094  if (x && y) {
4095  stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4096  if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4097  STBI_FREE(final);
4098  return 0;
4099  }
4100  for (j = 0; j < y; ++j) {
4101  for (i = 0; i < x; ++i) {
4102  int out_y = j*yspc[p] + yorig[p];
4103  int out_x = i*xspc[p] + xorig[p];
4104  memcpy(final + out_y*a->s->img_x*out_n + out_x*out_n,
4105  a->out + (j*x + i)*out_n, out_n);
4106  }
4107  }
4108  STBI_FREE(a->out);
4109  image_data += img_len;
4110  image_data_len -= img_len;
4111  }
4112  }
4113  a->out = final;
4114 
4115  return 1;
4116 }
4117 
4118 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4119 {
4120  stbi__context *s = z->s;
4121  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4122  stbi_uc *p = z->out;
4123 
4124  // compute color-based transparency, assuming we've
4125  // already got 255 as the alpha value in the output
4126  STBI_ASSERT(out_n == 2 || out_n == 4);
4127 
4128  if (out_n == 2) {
4129  for (i = 0; i < pixel_count; ++i) {
4130  p[1] = (p[0] == tc[0] ? 0 : 255);
4131  p += 2;
4132  }
4133  }
4134  else {
4135  for (i = 0; i < pixel_count; ++i) {
4136  if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4137  p[3] = 0;
4138  p += 4;
4139  }
4140  }
4141  return 1;
4142 }
4143 
4144 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4145 {
4146  stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4147  stbi_uc *p, *temp_out, *orig = a->out;
4148 
4149  p = (stbi_uc *)stbi__malloc(pixel_count * pal_img_n);
4150  if (p == NULL) return stbi__err("outofmem", "Out of memory");
4151 
4152  // between here and free(out) below, exitting would leak
4153  temp_out = p;
4154 
4155  if (pal_img_n == 3) {
4156  for (i = 0; i < pixel_count; ++i) {
4157  int n = orig[i] * 4;
4158  p[0] = palette[n];
4159  p[1] = palette[n + 1];
4160  p[2] = palette[n + 2];
4161  p += 3;
4162  }
4163  }
4164  else {
4165  for (i = 0; i < pixel_count; ++i) {
4166  int n = orig[i] * 4;
4167  p[0] = palette[n];
4168  p[1] = palette[n + 1];
4169  p[2] = palette[n + 2];
4170  p[3] = palette[n + 3];
4171  p += 4;
4172  }
4173  }
4174  STBI_FREE(a->out);
4175  a->out = temp_out;
4176 
4177  STBI_NOTUSED(len);
4178 
4179  return 1;
4180 }
4181 
4182 static int stbi__unpremultiply_on_load = 0;
4183 static int stbi__de_iphone_flag = 0;
4184 
4185 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4186 {
4187  stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4188 }
4189 
4190 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4191 {
4192  stbi__de_iphone_flag = flag_true_if_should_convert;
4193 }
4194 
4195 static void stbi__de_iphone(stbi__png *z)
4196 {
4197  stbi__context *s = z->s;
4198  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4199  stbi_uc *p = z->out;
4200 
4201  if (s->img_out_n == 3) { // convert bgr to rgb
4202  for (i = 0; i < pixel_count; ++i) {
4203  stbi_uc t = p[0];
4204  p[0] = p[2];
4205  p[2] = t;
4206  p += 3;
4207  }
4208  }
4209  else {
4210  STBI_ASSERT(s->img_out_n == 4);
4211  if (stbi__unpremultiply_on_load) {
4212  // convert bgr to rgb and unpremultiply
4213  for (i = 0; i < pixel_count; ++i) {
4214  stbi_uc a = p[3];
4215  stbi_uc t = p[0];
4216  if (a) {
4217  p[0] = p[2] * 255 / a;
4218  p[1] = p[1] * 255 / a;
4219  p[2] = t * 255 / a;
4220  }
4221  else {
4222  p[0] = p[2];
4223  p[2] = t;
4224  }
4225  p += 4;
4226  }
4227  }
4228  else {
4229  // convert bgr to rgb
4230  for (i = 0; i < pixel_count; ++i) {
4231  stbi_uc t = p[0];
4232  p[0] = p[2];
4233  p[2] = t;
4234  p += 4;
4235  }
4236  }
4237  }
4238 }
4239 
4240 #define STBI__PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
4241 
4242 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4243 {
4244  stbi_uc palette[1024], pal_img_n = 0;
4245  stbi_uc has_trans = 0, tc[3];
4246  stbi__uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;
4247  int first = 1, k, interlace = 0, color = 0, depth = 0, is_iphone = 0;
4248  stbi__context *s = z->s;
4249 
4250  z->expanded = NULL;
4251  z->idata = NULL;
4252  z->out = NULL;
4253 
4254  if (!stbi__check_png_header(s)) return 0;
4255 
4256  if (scan == STBI__SCAN_type) return 1;
4257 
4258  for (;;) {
4259  stbi__pngchunk c = stbi__get_chunk_header(s);
4260  switch (c.type) {
4261  case STBI__PNG_TYPE('C', 'g', 'B', 'I'):
4262  is_iphone = 1;
4263  stbi__skip(s, c.length);
4264  break;
4265  case STBI__PNG_TYPE('I', 'H', 'D', 'R'): {
4266  int comp, filter;
4267  if (!first) return stbi__err("multiple IHDR", "Corrupt PNG");
4268  first = 0;
4269  if (c.length != 13) return stbi__err("bad IHDR len", "Corrupt PNG");
4270  s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large", "Very large image (corrupt?)");
4271  s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large", "Very large image (corrupt?)");
4272  depth = stbi__get8(s); if (depth != 1 && depth != 2 && depth != 4 && depth != 8) return stbi__err("1/2/4/8-bit only", "PNG not supported: 1/2/4/8-bit only");
4273  color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype", "Corrupt PNG");
4274  if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype", "Corrupt PNG");
4275  comp = stbi__get8(s); if (comp) return stbi__err("bad comp method", "Corrupt PNG");
4276  filter = stbi__get8(s); if (filter) return stbi__err("bad filter method", "Corrupt PNG");
4277  interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method", "Corrupt PNG");
4278  if (!s->img_x || !s->img_y) return stbi__err("0-pixel image", "Corrupt PNG");
4279  if (!pal_img_n) {
4280  s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4281  if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4282  if (scan == STBI__SCAN_header) return 1;
4283  }
4284  else {
4285  // if paletted, then pal_n is our final components, and
4286  // img_n is # components to decompress/filter.
4287  s->img_n = 1;
4288  if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large", "Corrupt PNG");
4289  // if SCAN_header, have to scan to see if we have a tRNS
4290  }
4291  break;
4292  }
4293 
4294  case STBI__PNG_TYPE('P', 'L', 'T', 'E'): {
4295  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4296  if (c.length > 256 * 3) return stbi__err("invalid PLTE", "Corrupt PNG");
4297  pal_len = c.length / 3;
4298  if (pal_len * 3 != c.length) return stbi__err("invalid PLTE", "Corrupt PNG");
4299  for (i = 0; i < pal_len; ++i) {
4300  palette[i * 4 + 0] = stbi__get8(s);
4301  palette[i * 4 + 1] = stbi__get8(s);
4302  palette[i * 4 + 2] = stbi__get8(s);
4303  palette[i * 4 + 3] = 255;
4304  }
4305  break;
4306  }
4307 
4308  case STBI__PNG_TYPE('t', 'R', 'N', 'S'): {
4309  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4310  if (z->idata) return stbi__err("tRNS after IDAT", "Corrupt PNG");
4311  if (pal_img_n) {
4312  if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4313  if (pal_len == 0) return stbi__err("tRNS before PLTE", "Corrupt PNG");
4314  if (c.length > pal_len) return stbi__err("bad tRNS len", "Corrupt PNG");
4315  pal_img_n = 4;
4316  for (i = 0; i < c.length; ++i)
4317  palette[i * 4 + 3] = stbi__get8(s);
4318  }
4319  else {
4320  if (!(s->img_n & 1)) return stbi__err("tRNS with alpha", "Corrupt PNG");
4321  if (c.length != (stbi__uint32)s->img_n * 2) return stbi__err("bad tRNS len", "Corrupt PNG");
4322  has_trans = 1;
4323  for (k = 0; k < s->img_n; ++k)
4324  tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[depth]; // non 8-bit images will be larger
4325  }
4326  break;
4327  }
4328 
4329  case STBI__PNG_TYPE('I', 'D', 'A', 'T'): {
4330  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4331  if (pal_img_n && !pal_len) return stbi__err("no PLTE", "Corrupt PNG");
4332  if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4333  if (ioff + c.length > idata_limit) {
4334  stbi_uc *p;
4335  if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4336  while (ioff + c.length > idata_limit)
4337  idata_limit *= 2;
4338  p = (stbi_uc *)STBI_REALLOC(z->idata, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4339  z->idata = p;
4340  }
4341  if (!stbi__getn(s, z->idata + ioff, c.length)) return stbi__err("outofdata", "Corrupt PNG");
4342  ioff += c.length;
4343  break;
4344  }
4345 
4346  case STBI__PNG_TYPE('I', 'E', 'N', 'D'): {
4347  stbi__uint32 raw_len, bpl;
4348  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4349  if (scan != STBI__SCAN_load) return 1;
4350  if (z->idata == NULL) return stbi__err("no IDAT", "Corrupt PNG");
4351  // initial guess for decoded data size to avoid unnecessary reallocs
4352  bpl = (s->img_x * depth + 7) / 8; // bytes per line, per component
4353  raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4354  z->expanded = (stbi_uc *)stbi_zlib_decode_malloc_guesssize_headerflag((char *)z->idata, ioff, raw_len, (int *)&raw_len, !is_iphone);
4355  if (z->expanded == NULL) return 0; // zlib should set error
4356  STBI_FREE(z->idata); z->idata = NULL;
4357  if ((req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n) || has_trans)
4358  s->img_out_n = s->img_n + 1;
4359  else
4360  s->img_out_n = s->img_n;
4361  if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, depth, color, interlace)) return 0;
4362  if (has_trans)
4363  if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4364  if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4365  stbi__de_iphone(z);
4366  if (pal_img_n) {
4367  // pal_img_n == 3 or 4
4368  s->img_n = pal_img_n; // record the actual colors we had
4369  s->img_out_n = pal_img_n;
4370  if (req_comp >= 3) s->img_out_n = req_comp;
4371  if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4372  return 0;
4373  }
4374  STBI_FREE(z->expanded); z->expanded = NULL;
4375  return 1;
4376  }
4377 
4378  default:
4379  // if critical, fail
4380  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4381  if ((c.type & (1 << 29)) == 0) {
4382 #ifndef STBI_NO_FAILURE_STRINGS
4383  // not threadsafe
4384  static char invalid_chunk[] = "XXXX PNG chunk not known";
4385  invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4386  invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4387  invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
4388  invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
4389 #endif
4390  return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4391  }
4392  stbi__skip(s, c.length);
4393  break;
4394  }
4395  // end of PNG chunk, read and skip CRC
4396  stbi__get32be(s);
4397  }
4398 }
4399 
4400 static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp)
4401 {
4402  unsigned char *result = NULL;
4403  if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4404  if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4405  result = p->out;
4406  p->out = NULL;
4407  if (req_comp && req_comp != p->s->img_out_n) {
4408  result = stbi__convert_format(result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4409  p->s->img_out_n = req_comp;
4410  if (result == NULL) return result;
4411  }
4412  *x = p->s->img_x;
4413  *y = p->s->img_y;
4414  if (n) *n = p->s->img_out_n;
4415  }
4416  STBI_FREE(p->out); p->out = NULL;
4417  STBI_FREE(p->expanded); p->expanded = NULL;
4418  STBI_FREE(p->idata); p->idata = NULL;
4419 
4420  return result;
4421 }
4422 
4423 static unsigned char *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
4424 {
4425  stbi__png p;
4426  p.s = s;
4427  return stbi__do_png(&p, x, y, comp, req_comp);
4428 }
4429 
4430 static int stbi__png_test(stbi__context *s)
4431 {
4432  int r;
4433  r = stbi__check_png_header(s);
4434  stbi__rewind(s);
4435  return r;
4436 }
4437 
4438 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
4439 {
4440  if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
4441  stbi__rewind(p->s);
4442  return 0;
4443  }
4444  if (x) *x = p->s->img_x;
4445  if (y) *y = p->s->img_y;
4446  if (comp) *comp = p->s->img_n;
4447  return 1;
4448 }
4449 
4450 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
4451 {
4452  stbi__png p;
4453  p.s = s;
4454  return stbi__png_info_raw(&p, x, y, comp);
4455 }
4456 #endif
4457 
4458 // Microsoft/Windows BMP image
4459 
4460 #ifndef STBI_NO_BMP
4461 static int stbi__bmp_test_raw(stbi__context *s)
4462 {
4463  int r;
4464  int sz;
4465  if (stbi__get8(s) != 'B') return 0;
4466  if (stbi__get8(s) != 'M') return 0;
4467  stbi__get32le(s); // discard filesize
4468  stbi__get16le(s); // discard reserved
4469  stbi__get16le(s); // discard reserved
4470  stbi__get32le(s); // discard data offset
4471  sz = stbi__get32le(s);
4472  r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
4473  return r;
4474 }
4475 
4476 static int stbi__bmp_test(stbi__context *s)
4477 {
4478  int r = stbi__bmp_test_raw(s);
4479  stbi__rewind(s);
4480  return r;
4481 }
4482 
4483 
4484 // returns 0..31 for the highest set bit
4485 static int stbi__high_bit(unsigned int z)
4486 {
4487  int n = 0;
4488  if (z == 0) return -1;
4489  if (z >= 0x10000) n += 16, z >>= 16;
4490  if (z >= 0x00100) n += 8, z >>= 8;
4491  if (z >= 0x00010) n += 4, z >>= 4;
4492  if (z >= 0x00004) n += 2, z >>= 2;
4493  if (z >= 0x00002) n += 1, z >>= 1;
4494  return n;
4495 }
4496 
4497 static int stbi__bitcount(unsigned int a)
4498 {
4499  a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
4500  a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
4501  a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
4502  a = (a + (a >> 8)); // max 16 per 8 bits
4503  a = (a + (a >> 16)); // max 32 per 8 bits
4504  return a & 0xff;
4505 }
4506 
4507 static int stbi__shiftsigned(int v, int shift, int bits)
4508 {
4509  int result;
4510  int z = 0;
4511 
4512  if (shift < 0) v <<= -shift;
4513  else v >>= shift;
4514  result = v;
4515 
4516  z = bits;
4517  while (z < 8) {
4518  result += v >> z;
4519  z += bits;
4520  }
4521  return result;
4522 }
4523 
4524 static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
4525 {
4526  stbi_uc *out;
4527  unsigned int mr = 0, mg = 0, mb = 0, ma = 0, fake_a = 0;
4528  stbi_uc pal[256][4];
4529  int psize = 0, i, j, compress = 0, width;
4530  int bpp, flip_vertically, pad, target, offset, hsz;
4531  if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
4532  stbi__get32le(s); // discard filesize
4533  stbi__get16le(s); // discard reserved
4534  stbi__get16le(s); // discard reserved
4535  offset = stbi__get32le(s);
4536  hsz = stbi__get32le(s);
4537  if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
4538  if (hsz == 12) {
4539  s->img_x = stbi__get16le(s);
4540  s->img_y = stbi__get16le(s);
4541  }
4542  else {
4543  s->img_x = stbi__get32le(s);
4544  s->img_y = stbi__get32le(s);
4545  }
4546  if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
4547  bpp = stbi__get16le(s);
4548  if (bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
4549  flip_vertically = ((int)s->img_y) > 0;
4550  s->img_y = abs((int)s->img_y);
4551  if (hsz == 12) {
4552  if (bpp < 24)
4553  psize = (offset - 14 - 24) / 3;
4554  }
4555  else {
4556  compress = stbi__get32le(s);
4557  if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
4558  stbi__get32le(s); // discard sizeof
4559  stbi__get32le(s); // discard hres
4560  stbi__get32le(s); // discard vres
4561  stbi__get32le(s); // discard colorsused
4562  stbi__get32le(s); // discard max important
4563  if (hsz == 40 || hsz == 56) {
4564  if (hsz == 56) {
4565  stbi__get32le(s);
4566  stbi__get32le(s);
4567  stbi__get32le(s);
4568  stbi__get32le(s);
4569  }
4570  if (bpp == 16 || bpp == 32) {
4571  mr = mg = mb = 0;
4572  if (compress == 0) {
4573  if (bpp == 32) {
4574  mr = 0xffu << 16;
4575  mg = 0xffu << 8;
4576  mb = 0xffu << 0;
4577  ma = 0xffu << 24;
4578  fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255
4579  STBI_NOTUSED(fake_a);
4580  }
4581  else {
4582  mr = 31u << 10;
4583  mg = 31u << 5;
4584  mb = 31u << 0;
4585  }
4586  }
4587  else if (compress == 3) {
4588  mr = stbi__get32le(s);
4589  mg = stbi__get32le(s);
4590  mb = stbi__get32le(s);
4591  // not documented, but generated by photoshop and handled by mspaint
4592  if (mr == mg && mg == mb) {
4593  // ?!?!?
4594  return stbi__errpuc("bad BMP", "bad BMP");
4595  }
4596  }
4597  else
4598  return stbi__errpuc("bad BMP", "bad BMP");
4599  }
4600  }
4601  else {
4602  STBI_ASSERT(hsz == 108 || hsz == 124);
4603  mr = stbi__get32le(s);
4604  mg = stbi__get32le(s);
4605  mb = stbi__get32le(s);
4606  ma = stbi__get32le(s);
4607  stbi__get32le(s); // discard color space
4608  for (i = 0; i < 12; ++i)
4609  stbi__get32le(s); // discard color space parameters
4610  if (hsz == 124) {
4611  stbi__get32le(s); // discard rendering intent
4612  stbi__get32le(s); // discard offset of profile data
4613  stbi__get32le(s); // discard size of profile data
4614  stbi__get32le(s); // discard reserved
4615  }
4616  }
4617  if (bpp < 16)
4618  psize = (offset - 14 - hsz) >> 2;
4619  }
4620  s->img_n = ma ? 4 : 3;
4621  if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
4622  target = req_comp;
4623  else
4624  target = s->img_n; // if they want monochrome, we'll post-convert
4625  out = (stbi_uc *)stbi__malloc(target * s->img_x * s->img_y);
4626  if (!out) return stbi__errpuc("outofmem", "Out of memory");
4627  if (bpp < 16) {
4628  int z = 0;
4629  if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
4630  for (i = 0; i < psize; ++i) {
4631  pal[i][2] = stbi__get8(s);
4632  pal[i][1] = stbi__get8(s);
4633  pal[i][0] = stbi__get8(s);
4634  if (hsz != 12) stbi__get8(s);
4635  pal[i][3] = 255;
4636  }
4637  stbi__skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4));
4638  if (bpp == 4) width = (s->img_x + 1) >> 1;
4639  else if (bpp == 8) width = s->img_x;
4640  else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
4641  pad = (-width) & 3;
4642  for (j = 0; j < (int)s->img_y; ++j) {
4643  for (i = 0; i < (int)s->img_x; i += 2) {
4644  int v = stbi__get8(s), v2 = 0;
4645  if (bpp == 4) {
4646  v2 = v & 15;
4647  v >>= 4;
4648  }
4649  out[z++] = pal[v][0];
4650  out[z++] = pal[v][1];
4651  out[z++] = pal[v][2];
4652  if (target == 4) out[z++] = 255;
4653  if (i + 1 == (int)s->img_x) break;
4654  v = (bpp == 8) ? stbi__get8(s) : v2;
4655  out[z++] = pal[v][0];
4656  out[z++] = pal[v][1];
4657  out[z++] = pal[v][2];
4658  if (target == 4) out[z++] = 255;
4659  }
4660  stbi__skip(s, pad);
4661  }
4662  }
4663  else {
4664  int rshift = 0, gshift = 0, bshift = 0, ashift = 0, rcount = 0, gcount = 0, bcount = 0, acount = 0;
4665  int z = 0;
4666  int easy = 0;
4667  stbi__skip(s, offset - 14 - hsz);
4668  if (bpp == 24) width = 3 * s->img_x;
4669  else if (bpp == 16) width = 2 * s->img_x;
4670  else /* bpp = 32 and pad = 0 */ width = 0;
4671  pad = (-width) & 3;
4672  if (bpp == 24) {
4673  easy = 1;
4674  }
4675  else if (bpp == 32) {
4676  if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
4677  easy = 2;
4678  }
4679  if (!easy) {
4680  if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
4681  // right shift amt to put high bit in position #7
4682  rshift = stbi__high_bit(mr) - 7; rcount = stbi__bitcount(mr);
4683  gshift = stbi__high_bit(mg) - 7; gcount = stbi__bitcount(mg);
4684  bshift = stbi__high_bit(mb) - 7; bcount = stbi__bitcount(mb);
4685  ashift = stbi__high_bit(ma) - 7; acount = stbi__bitcount(ma);
4686  }
4687  for (j = 0; j < (int)s->img_y; ++j) {
4688  if (easy) {
4689  for (i = 0; i < (int)s->img_x; ++i) {
4690  unsigned char a;
4691  out[z + 2] = stbi__get8(s);
4692  out[z + 1] = stbi__get8(s);
4693  out[z + 0] = stbi__get8(s);
4694  z += 3;
4695  a = (easy == 2 ? stbi__get8(s) : 255);
4696  if (target == 4) out[z++] = a;
4697  }
4698  }
4699  else {
4700  for (i = 0; i < (int)s->img_x; ++i) {
4701  stbi__uint32 v = (stbi__uint32)(bpp == 16 ? stbi__get16le(s) : stbi__get32le(s));
4702  int a;
4703  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
4704  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
4705  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
4706  a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
4707  if (target == 4) out[z++] = STBI__BYTECAST(a);
4708  }
4709  }
4710  stbi__skip(s, pad);
4711  }
4712  }
4713  if (flip_vertically) {
4714  stbi_uc t;
4715  for (j = 0; j < (int)s->img_y >> 1; ++j) {
4716  stbi_uc *p1 = out + j *s->img_x*target;
4717  stbi_uc *p2 = out + (s->img_y - 1 - j)*s->img_x*target;
4718  for (i = 0; i < (int)s->img_x*target; ++i) {
4719  t = p1[i], p1[i] = p2[i], p2[i] = t;
4720  }
4721  }
4722  }
4723 
4724  if (req_comp && req_comp != target) {
4725  out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
4726  if (out == NULL) return out; // stbi__convert_format frees input on failure
4727  }
4728 
4729  *x = s->img_x;
4730  *y = s->img_y;
4731  if (comp) *comp = s->img_n;
4732  return out;
4733 }
4734 #endif
4735 
4736 // Targa Truevision - TGA
4737 // by Jonathan Dummer
4738 #ifndef STBI_NO_TGA
4739 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
4740 {
4741  int tga_w, tga_h, tga_comp;
4742  int sz;
4743  stbi__get8(s); // discard Offset
4744  sz = stbi__get8(s); // color type
4745  if (sz > 1) {
4746  stbi__rewind(s);
4747  return 0; // only RGB or indexed allowed
4748  }
4749  sz = stbi__get8(s); // image type
4750  // only RGB or grey allowed, +/- RLE
4751  if ((sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11)) return 0;
4752  stbi__skip(s, 9);
4753  tga_w = stbi__get16le(s);
4754  if (tga_w < 1) {
4755  stbi__rewind(s);
4756  return 0; // test width
4757  }
4758  tga_h = stbi__get16le(s);
4759  if (tga_h < 1) {
4760  stbi__rewind(s);
4761  return 0; // test height
4762  }
4763  sz = stbi__get8(s); // bits per pixel
4764  // only RGB or RGBA or grey allowed
4765  if ((sz != 8) && (sz != 16) && (sz != 24) && (sz != 32)) {
4766  stbi__rewind(s);
4767  return 0;
4768  }
4769  tga_comp = sz;
4770  if (x) *x = tga_w;
4771  if (y) *y = tga_h;
4772  if (comp) *comp = tga_comp / 8;
4773  return 1; // seems to have passed everything
4774 }
4775 
4776 static int stbi__tga_test(stbi__context *s)
4777 {
4778  int res;
4779  int sz;
4780  stbi__get8(s); // discard Offset
4781  sz = stbi__get8(s); // color type
4782  if (sz > 1) return 0; // only RGB or indexed allowed
4783  sz = stbi__get8(s); // image type
4784  if ((sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11)) return 0; // only RGB or grey allowed, +/- RLE
4785  stbi__get16be(s); // discard palette start
4786  stbi__get16be(s); // discard palette length
4787  stbi__get8(s); // discard bits per palette color entry
4788  stbi__get16be(s); // discard x origin
4789  stbi__get16be(s); // discard y origin
4790  if (stbi__get16be(s) < 1) return 0; // test width
4791  if (stbi__get16be(s) < 1) return 0; // test height
4792  sz = stbi__get8(s); // bits per pixel
4793  if ((sz != 8) && (sz != 16) && (sz != 24) && (sz != 32))
4794  res = 0;
4795  else
4796  res = 1;
4797  stbi__rewind(s);
4798  return res;
4799 }
4800 
4801 static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
4802 {
4803  // read in the TGA header stuff
4804  int tga_offset = stbi__get8(s);
4805  int tga_indexed = stbi__get8(s);
4806  int tga_image_type = stbi__get8(s);
4807  int tga_is_RLE = 0;
4808  int tga_palette_start = stbi__get16le(s);
4809  int tga_palette_len = stbi__get16le(s);
4810  int tga_palette_bits = stbi__get8(s);
4811  int tga_x_origin = stbi__get16le(s);
4812  int tga_y_origin = stbi__get16le(s);
4813  int tga_width = stbi__get16le(s);
4814  int tga_height = stbi__get16le(s);
4815  int tga_bits_per_pixel = stbi__get8(s);
4816  int tga_comp = tga_bits_per_pixel / 8;
4817  int tga_inverted = stbi__get8(s);
4818  // image data
4819  unsigned char *tga_data;
4820  unsigned char *tga_palette = NULL;
4821  int i, j;
4822  unsigned char raw_data[4];
4823  int RLE_count = 0;
4824  int RLE_repeating = 0;
4825  int read_next_pixel = 1;
4826 
4827  // do a tiny bit of precessing
4828  if (tga_image_type >= 8)
4829  {
4830  tga_image_type -= 8;
4831  tga_is_RLE = 1;
4832  }
4833  /* int tga_alpha_bits = tga_inverted & 15; */
4834  tga_inverted = 1 - ((tga_inverted >> 5) & 1);
4835 
4836  // error check
4837  if ( //(tga_indexed) ||
4838  (tga_width < 1) || (tga_height < 1) ||
4839  (tga_image_type < 1) || (tga_image_type > 3) ||
4840  ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) &&
4841  (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32))
4842  )
4843  {
4844  return NULL; // we don't report this as a bad TGA because we don't even know if it's TGA
4845  }
4846 
4847  // If I'm paletted, then I'll use the number of bits from the palette
4848  if (tga_indexed)
4849  {
4850  tga_comp = tga_palette_bits / 8;
4851  }
4852 
4853  // tga info
4854  *x = tga_width;
4855  *y = tga_height;
4856  if (comp) *comp = tga_comp;
4857 
4858  tga_data = (unsigned char*)stbi__malloc(tga_width * tga_height * tga_comp);
4859  if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
4860 
4861  // skip to the data's starting position (offset usually = 0)
4862  stbi__skip(s, tga_offset);
4863 
4864  if (!tga_indexed && !tga_is_RLE) {
4865  for (i = 0; i < tga_height; ++i) {
4866  int y = tga_inverted ? tga_height - i - 1 : i;
4867  stbi_uc *tga_row = tga_data + y*tga_width*tga_comp;
4868  stbi__getn(s, tga_row, tga_width * tga_comp);
4869  }
4870  }
4871  else {
4872  // do I need to load a palette?
4873  if (tga_indexed)
4874  {
4875  // any data to skip? (offset usually = 0)
4876  stbi__skip(s, tga_palette_start);
4877  // load the palette
4878  tga_palette = (unsigned char*)stbi__malloc(tga_palette_len * tga_palette_bits / 8);
4879  if (!tga_palette) {
4880  STBI_FREE(tga_data);
4881  return stbi__errpuc("outofmem", "Out of memory");
4882  }
4883  if (!stbi__getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8)) {
4884  STBI_FREE(tga_data);
4885  STBI_FREE(tga_palette);
4886  return stbi__errpuc("bad palette", "Corrupt TGA");
4887  }
4888  }
4889  // load the data
4890  for (i = 0; i < tga_width * tga_height; ++i)
4891  {
4892  // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
4893  if (tga_is_RLE)
4894  {
4895  if (RLE_count == 0)
4896  {
4897  // yep, get the next byte as a RLE command
4898  int RLE_cmd = stbi__get8(s);
4899  RLE_count = 1 + (RLE_cmd & 127);
4900  RLE_repeating = RLE_cmd >> 7;
4901  read_next_pixel = 1;
4902  }
4903  else if (!RLE_repeating)
4904  {
4905  read_next_pixel = 1;
4906  }
4907  }
4908  else
4909  {
4910  read_next_pixel = 1;
4911  }
4912  // OK, if I need to read a pixel, do it now
4913  if (read_next_pixel)
4914  {
4915  // load however much data we did have
4916  if (tga_indexed)
4917  {
4918  // read in 1 byte, then perform the lookup
4919  int pal_idx = stbi__get8(s);
4920  if (pal_idx >= tga_palette_len)
4921  {
4922  // invalid index
4923  pal_idx = 0;
4924  }
4925  pal_idx *= tga_bits_per_pixel / 8;
4926  for (j = 0; j * 8 < tga_bits_per_pixel; ++j)
4927  {
4928  raw_data[j] = tga_palette[pal_idx + j];
4929  }
4930  }
4931  else
4932  {
4933  // read in the data raw
4934  for (j = 0; j * 8 < tga_bits_per_pixel; ++j)
4935  {
4936  raw_data[j] = stbi__get8(s);
4937  }
4938  }
4939  // clear the reading flag for the next pixel
4940  read_next_pixel = 0;
4941  } // end of reading a pixel
4942 
4943  // copy data
4944  for (j = 0; j < tga_comp; ++j)
4945  tga_data[i*tga_comp + j] = raw_data[j];
4946 
4947  // in case we're in RLE mode, keep counting down
4948  --RLE_count;
4949  }
4950  // do I need to invert the image?
4951  if (tga_inverted)
4952  {
4953  for (j = 0; j * 2 < tga_height; ++j)
4954  {
4955  int index1 = j * tga_width * tga_comp;
4956  int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
4957  for (i = tga_width * tga_comp; i > 0; --i)
4958  {
4959  unsigned char temp = tga_data[index1];
4960  tga_data[index1] = tga_data[index2];
4961  tga_data[index2] = temp;
4962  ++index1;
4963  ++index2;
4964  }
4965  }
4966  }
4967  // clear my palette, if I had one
4968  if (tga_palette != NULL)
4969  {
4970  STBI_FREE(tga_palette);
4971  }
4972  }
4973 
4974  // swap RGB
4975  if (tga_comp >= 3)
4976  {
4977  unsigned char* tga_pixel = tga_data;
4978  for (i = 0; i < tga_width * tga_height; ++i)
4979  {
4980  unsigned char temp = tga_pixel[0];
4981  tga_pixel[0] = tga_pixel[2];
4982  tga_pixel[2] = temp;
4983  tga_pixel += tga_comp;
4984  }
4985  }
4986 
4987  // convert to target component count
4988  if (req_comp && req_comp != tga_comp)
4989  tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
4990 
4991  // the things I do to get rid of an error message, and yet keep
4992  // Microsoft's C compilers happy... [8^(
4993  tga_palette_start = tga_palette_len = tga_palette_bits =
4994  tga_x_origin = tga_y_origin = 0;
4995  // OK, done
4996  return tga_data;
4997 }
4998 #endif
4999 
5000 // *************************************************************************************************
5001 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5002 
5003 #ifndef STBI_NO_PSD
5004 static int stbi__psd_test(stbi__context *s)
5005 {
5006  int r = (stbi__get32be(s) == 0x38425053);
5007  stbi__rewind(s);
5008  return r;
5009 }
5010 
5011 static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
5012 {
5013  int pixelCount;
5014  int channelCount, compression;
5015  int channel, i, count, len;
5016  int w, h;
5017  stbi_uc *out;
5018 
5019  // Check identifier
5020  if (stbi__get32be(s) != 0x38425053) // "8BPS"
5021  return stbi__errpuc("not PSD", "Corrupt PSD image");
5022 
5023  // Check file type version.
5024  if (stbi__get16be(s) != 1)
5025  return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5026 
5027  // Skip 6 reserved bytes.
5028  stbi__skip(s, 6);
5029 
5030  // Read the number of channels (R, G, B, A, etc).
5031  channelCount = stbi__get16be(s);
5032  if (channelCount < 0 || channelCount > 16)
5033  return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5034 
5035  // Read the rows and columns of the image.
5036  h = stbi__get32be(s);
5037  w = stbi__get32be(s);
5038 
5039  // Make sure the depth is 8 bits.
5040  if (stbi__get16be(s) != 8)
5041  return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 bit");
5042 
5043  // Make sure the color mode is RGB.
5044  // Valid options are:
5045  // 0: Bitmap
5046  // 1: Grayscale
5047  // 2: Indexed color
5048  // 3: RGB color
5049  // 4: CMYK color
5050  // 7: Multichannel
5051  // 8: Duotone
5052  // 9: Lab color
5053  if (stbi__get16be(s) != 3)
5054  return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5055 
5056  // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.)
5057  stbi__skip(s, stbi__get32be(s));
5058 
5059  // Skip the image resources. (resolution, pen tool paths, etc)
5060  stbi__skip(s, stbi__get32be(s));
5061 
5062  // Skip the reserved data.
5063  stbi__skip(s, stbi__get32be(s));
5064 
5065  // Find out if the data is compressed.
5066  // Known values:
5067  // 0: no compression
5068  // 1: RLE compressed
5069  compression = stbi__get16be(s);
5070  if (compression > 1)
5071  return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5072 
5073  // Create the destination image.
5074  out = (stbi_uc *)stbi__malloc(4 * w*h);
5075  if (!out) return stbi__errpuc("outofmem", "Out of memory");
5076  pixelCount = w*h;
5077 
5078  // Initialize the data to zero.
5079  //memset( out, 0, pixelCount * 4 );
5080 
5081  // Finally, the image data.
5082  if (compression) {
5083  // RLE as used by .PSD and .TIFF
5084  // Loop until you get the number of unpacked bytes you are expecting:
5085  // Read the next source byte into n.
5086  // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5087  // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5088  // Else if n is 128, noop.
5089  // Endloop
5090 
5091  // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
5092  // which we're going to just skip.
5093  stbi__skip(s, h * channelCount * 2);
5094 
5095  // Read the RLE data by channel.
5096  for (channel = 0; channel < 4; channel++) {
5097  stbi_uc *p;
5098 
5099  p = out + channel;
5100  if (channel >= channelCount) {
5101  // Fill this channel with default data.
5102  for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4;
5103  }
5104  else {
5105  // Read the RLE data.
5106  count = 0;
5107  while (count < pixelCount) {
5108  len = stbi__get8(s);
5109  if (len == 128) {
5110  // No-op.
5111  }
5112  else if (len < 128) {
5113  // Copy next len+1 bytes literally.
5114  len++;
5115  count += len;
5116  while (len) {
5117  *p = stbi__get8(s);
5118  p += 4;
5119  len--;
5120  }
5121  }
5122  else if (len > 128) {
5123  stbi_uc val;
5124  // Next -len+1 bytes in the dest are replicated from next source byte.
5125  // (Interpret len as a negative 8-bit int.)
5126  len ^= 0x0FF;
5127  len += 2;
5128  val = stbi__get8(s);
5129  count += len;
5130  while (len) {
5131  *p = val;
5132  p += 4;
5133  len--;
5134  }
5135  }
5136  }
5137  }
5138  }
5139 
5140  }
5141  else {
5142  // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
5143  // where each channel consists of an 8-bit value for each pixel in the image.
5144 
5145  // Read the data by channel.
5146  for (channel = 0; channel < 4; channel++) {
5147  stbi_uc *p;
5148 
5149  p = out + channel;
5150  if (channel > channelCount) {
5151  // Fill this channel with default data.
5152  for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4;
5153  }
5154  else {
5155  // Read the data.
5156  for (i = 0; i < pixelCount; i++)
5157  *p = stbi__get8(s), p += 4;
5158  }
5159  }
5160  }
5161 
5162  if (req_comp && req_comp != 4) {
5163  out = stbi__convert_format(out, 4, req_comp, w, h);
5164  if (out == NULL) return out; // stbi__convert_format frees input on failure
5165  }
5166 
5167  if (comp) *comp = channelCount;
5168  *y = h;
5169  *x = w;
5170 
5171  return out;
5172 }
5173 #endif
5174 
5175 // *************************************************************************************************
5176 // Softimage PIC loader
5177 // by Tom Seddon
5178 //
5179 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5180 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5181 
5182 #ifndef STBI_NO_PIC
5183 static int stbi__pic_is4(stbi__context *s, const char *str)
5184 {
5185  int i;
5186  for (i = 0; i<4; ++i)
5187  if (stbi__get8(s) != (stbi_uc)str[i])
5188  return 0;
5189 
5190  return 1;
5191 }
5192 
5193 static int stbi__pic_test_core(stbi__context *s)
5194 {
5195  int i;
5196 
5197  if (!stbi__pic_is4(s, "\x53\x80\xF6\x34"))
5198  return 0;
5199 
5200  for (i = 0; i<84; ++i)
5201  stbi__get8(s);
5202 
5203  if (!stbi__pic_is4(s, "PICT"))
5204  return 0;
5205 
5206  return 1;
5207 }
5208 
5209 typedef struct
5210 {
5211  stbi_uc size, type, channel;
5212 } stbi__pic_packet;
5213 
5214 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
5215 {
5216  int mask = 0x80, i;
5217 
5218  for (i = 0; i<4; ++i, mask >>= 1) {
5219  if (channel & mask) {
5220  if (stbi__at_eof(s)) return stbi__errpuc("bad file", "PIC file too short");
5221  dest[i] = stbi__get8(s);
5222  }
5223  }
5224 
5225  return dest;
5226 }
5227 
5228 static void stbi__copyval(int channel, stbi_uc *dest, const stbi_uc *src)
5229 {
5230  int mask = 0x80, i;
5231 
5232  for (i = 0; i<4; ++i, mask >>= 1)
5233  if (channel&mask)
5234  dest[i] = src[i];
5235 }
5236 
5237 static stbi_uc *stbi__pic_load_core(stbi__context *s, int width, int height, int *comp, stbi_uc *result)
5238 {
5239  int act_comp = 0, num_packets = 0, y, chained;
5240  stbi__pic_packet packets[10];
5241 
5242  // this will (should...) cater for even some bizarre stuff like having data
5243  // for the same channel in multiple packets.
5244  do {
5245  stbi__pic_packet *packet;
5246 
5247  if (num_packets == sizeof(packets) / sizeof(packets[0]))
5248  return stbi__errpuc("bad format", "too many packets");
5249 
5250  packet = &packets[num_packets++];
5251 
5252  chained = stbi__get8(s);
5253  packet->size = stbi__get8(s);
5254  packet->type = stbi__get8(s);
5255  packet->channel = stbi__get8(s);
5256 
5257  act_comp |= packet->channel;
5258 
5259  if (stbi__at_eof(s)) return stbi__errpuc("bad file", "file too short (reading packets)");
5260  if (packet->size != 8) return stbi__errpuc("bad format", "packet isn't 8bpp");
5261  } while (chained);
5262 
5263  *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
5264 
5265  for (y = 0; y<height; ++y) {
5266  int packet_idx;
5267 
5268  for (packet_idx = 0; packet_idx < num_packets; ++packet_idx) {
5269  stbi__pic_packet *packet = &packets[packet_idx];
5270  stbi_uc *dest = result + y*width * 4;
5271 
5272  switch (packet->type) {
5273  default:
5274  return stbi__errpuc("bad format", "packet has bad compression type");
5275 
5276  case 0: {//uncompressed
5277  int x;
5278 
5279  for (x = 0; x<width; ++x, dest += 4)
5280  if (!stbi__readval(s, packet->channel, dest))
5281  return 0;
5282  break;
5283  }
5284 
5285  case 1://Pure RLE
5286  {
5287  int left = width, i;
5288 
5289  while (left>0) {
5290  stbi_uc count, value[4];
5291 
5292  count = stbi__get8(s);
5293  if (stbi__at_eof(s)) return stbi__errpuc("bad file", "file too short (pure read count)");
5294 
5295  if (count > left)
5296  count = (stbi_uc)left;
5297 
5298  if (!stbi__readval(s, packet->channel, value)) return 0;
5299 
5300  for (i = 0; i<count; ++i, dest += 4)
5301  stbi__copyval(packet->channel, dest, value);
5302  left -= count;
5303  }
5304  }
5305  break;
5306 
5307  case 2: {//Mixed RLE
5308  int left = width;
5309  while (left>0) {
5310  int count = stbi__get8(s), i;
5311  if (stbi__at_eof(s)) return stbi__errpuc("bad file", "file too short (mixed read count)");
5312 
5313  if (count >= 128) { // Repeated
5314  stbi_uc value[4];
5315  int i;
5316 
5317  if (count == 128)
5318  count = stbi__get16be(s);
5319  else
5320  count -= 127;
5321  if (count > left)
5322  return stbi__errpuc("bad file", "scanline overrun");
5323 
5324  if (!stbi__readval(s, packet->channel, value))
5325  return 0;
5326 
5327  for (i = 0; i<count; ++i, dest += 4)
5328  stbi__copyval(packet->channel, dest, value);
5329  }
5330  else { // Raw
5331  ++count;
5332  if (count>left) return stbi__errpuc("bad file", "scanline overrun");
5333 
5334  for (i = 0; i<count; ++i, dest += 4)
5335  if (!stbi__readval(s, packet->channel, dest))
5336  return 0;
5337  }
5338  left -= count;
5339  }
5340  break;
5341  }
5342  }
5343  }
5344  }
5345 
5346  return result;
5347 }
5348 
5349 static stbi_uc *stbi__pic_load(stbi__context *s, int *px, int *py, int *comp, int req_comp)
5350 {
5351  stbi_uc *result;
5352  int i, x, y;
5353 
5354  for (i = 0; i<92; ++i)
5355  stbi__get8(s);
5356 
5357  x = stbi__get16be(s);
5358  y = stbi__get16be(s);
5359  if (stbi__at_eof(s)) return stbi__errpuc("bad file", "file too short (pic header)");
5360  if ((1 << 28) / x < y) return stbi__errpuc("too large", "Image too large to decode");
5361 
5362  stbi__get32be(s); //skip `ratio'
5363  stbi__get16be(s); //skip `fields'
5364  stbi__get16be(s); //skip `pad'
5365 
5366  // intermediate buffer is RGBA
5367  result = (stbi_uc *)stbi__malloc(x*y * 4);
5368  memset(result, 0xff, x*y * 4);
5369 
5370  if (!stbi__pic_load_core(s, x, y, comp, result)) {
5371  STBI_FREE(result);
5372  result = 0;
5373  }
5374  *px = x;
5375  *py = y;
5376  if (req_comp == 0) req_comp = *comp;
5377  result = stbi__convert_format(result, 4, req_comp, x, y);
5378 
5379  return result;
5380 }
5381 
5382 static int stbi__pic_test(stbi__context *s)
5383 {
5384  int r = stbi__pic_test_core(s);
5385  stbi__rewind(s);
5386  return r;
5387 }
5388 #endif
5389 
5390 // *************************************************************************************************
5391 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
5392 
5393 #ifndef STBI_NO_GIF
5394 typedef struct
5395 {
5396  stbi__int16 prefix;
5397  stbi_uc first;
5398  stbi_uc suffix;
5399 } stbi__gif_lzw;
5400 
5401 typedef struct
5402 {
5403  int w, h;
5404  stbi_uc *out; // output buffer (always 4 components)
5405  int flags, bgindex, ratio, transparent, eflags;
5406  stbi_uc pal[256][4];
5407  stbi_uc lpal[256][4];
5408  stbi__gif_lzw codes[4096];
5409  stbi_uc *color_table;
5410  int parse, step;
5411  int lflags;
5412  int start_x, start_y;
5413  int max_x, max_y;
5414  int cur_x, cur_y;
5415  int line_size;
5416 } stbi__gif;
5417 
5418 static int stbi__gif_test_raw(stbi__context *s)
5419 {
5420  int sz;
5421  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
5422  sz = stbi__get8(s);
5423  if (sz != '9' && sz != '7') return 0;
5424  if (stbi__get8(s) != 'a') return 0;
5425  return 1;
5426 }
5427 
5428 static int stbi__gif_test(stbi__context *s)
5429 {
5430  int r = stbi__gif_test_raw(s);
5431  stbi__rewind(s);
5432  return r;
5433 }
5434 
5435 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
5436 {
5437  int i;
5438  for (i = 0; i < num_entries; ++i) {
5439  pal[i][2] = stbi__get8(s);
5440  pal[i][1] = stbi__get8(s);
5441  pal[i][0] = stbi__get8(s);
5442  pal[i][3] = transp == i ? 0 : 255;
5443  }
5444 }
5445 
5446 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
5447 {
5448  stbi_uc version;
5449  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
5450  return stbi__err("not GIF", "Corrupt GIF");
5451 
5452  version = stbi__get8(s);
5453  if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF");
5454  if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF");
5455 
5456  stbi__g_failure_reason = "";
5457  g->w = stbi__get16le(s);
5458  g->h = stbi__get16le(s);
5459  g->flags = stbi__get8(s);
5460  g->bgindex = stbi__get8(s);
5461  g->ratio = stbi__get8(s);
5462  g->transparent = -1;
5463 
5464  if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments
5465 
5466  if (is_info) return 1;
5467 
5468  if (g->flags & 0x80)
5469  stbi__gif_parse_colortable(s, g->pal, 2 << (g->flags & 7), -1);
5470 
5471  return 1;
5472 }
5473 
5474 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
5475 {
5476  stbi__gif g;
5477  if (!stbi__gif_header(s, &g, comp, 1)) {
5478  stbi__rewind(s);
5479  return 0;
5480  }
5481  if (x) *x = g.w;
5482  if (y) *y = g.h;
5483  return 1;
5484 }
5485 
5486 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
5487 {
5488  stbi_uc *p, *c;
5489 
5490  // recurse to decode the prefixes, since the linked-list is backwards,
5491  // and working backwards through an interleaved image would be nasty
5492  if (g->codes[code].prefix >= 0)
5493  stbi__out_gif_code(g, g->codes[code].prefix);
5494 
5495  if (g->cur_y >= g->max_y) return;
5496 
5497  p = &g->out[g->cur_x + g->cur_y];
5498  c = &g->color_table[g->codes[code].suffix * 4];
5499 
5500  if (c[3] >= 128) {
5501  p[0] = c[2];
5502  p[1] = c[1];
5503  p[2] = c[0];
5504  p[3] = c[3];
5505  }
5506  g->cur_x += 4;
5507 
5508  if (g->cur_x >= g->max_x) {
5509  g->cur_x = g->start_x;
5510  g->cur_y += g->step;
5511 
5512  while (g->cur_y >= g->max_y && g->parse > 0) {
5513  g->step = (1 << g->parse) * g->line_size;
5514  g->cur_y = g->start_y + (g->step >> 1);
5515  --g->parse;
5516  }
5517  }
5518 }
5519 
5520 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
5521 {
5522  stbi_uc lzw_cs;
5523  stbi__int32 len, code;
5524  stbi__uint32 first;
5525  stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
5526  stbi__gif_lzw *p;
5527 
5528  lzw_cs = stbi__get8(s);
5529  clear = 1 << lzw_cs;
5530  first = 1;
5531  codesize = lzw_cs + 1;
5532  codemask = (1 << codesize) - 1;
5533  bits = 0;
5534  valid_bits = 0;
5535  for (code = 0; code < clear; code++) {
5536  g->codes[code].prefix = -1;
5537  g->codes[code].first = (stbi_uc)code;
5538  g->codes[code].suffix = (stbi_uc)code;
5539  }
5540 
5541  // support no starting clear code
5542  avail = clear + 2;
5543  oldcode = -1;
5544 
5545  len = 0;
5546  for (;;) {
5547  if (valid_bits < codesize) {
5548  if (len == 0) {
5549  len = stbi__get8(s); // start new block
5550  if (len == 0)
5551  return g->out;
5552  }
5553  --len;
5554  bits |= (stbi__int32)stbi__get8(s) << valid_bits;
5555  valid_bits += 8;
5556  }
5557  else {
5558  stbi__int32 code = bits & codemask;
5559  bits >>= codesize;
5560  valid_bits -= codesize;
5561  // @OPTIMIZE: is there some way we can accelerate the non-clear path?
5562  if (code == clear) { // clear code
5563  codesize = lzw_cs + 1;
5564  codemask = (1 << codesize) - 1;
5565  avail = clear + 2;
5566  oldcode = -1;
5567  first = 0;
5568  }
5569  else if (code == clear + 1) { // end of stream code
5570  stbi__skip(s, len);
5571  while ((len = stbi__get8(s)) > 0)
5572  stbi__skip(s, len);
5573  return g->out;
5574  }
5575  else if (code <= avail) {
5576  if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
5577 
5578  if (oldcode >= 0) {
5579  p = &g->codes[avail++];
5580  if (avail > 4096) return stbi__errpuc("too many codes", "Corrupt GIF");
5581  p->prefix = (stbi__int16)oldcode;
5582  p->first = g->codes[oldcode].first;
5583  p->suffix = (code == avail) ? p->first : g->codes[code].first;
5584  }
5585  else if (code == avail)
5586  return stbi__errpuc("illegal code in raster", "Corrupt GIF");
5587 
5588  stbi__out_gif_code(g, (stbi__uint16)code);
5589 
5590  if ((avail & codemask) == 0 && avail <= 0x0FFF) {
5591  codesize++;
5592  codemask = (1 << codesize) - 1;
5593  }
5594 
5595  oldcode = code;
5596  }
5597  else {
5598  return stbi__errpuc("illegal code in raster", "Corrupt GIF");
5599  }
5600  }
5601  }
5602 }
5603 
5604 static void stbi__fill_gif_background(stbi__gif *g)
5605 {
5606  int i;
5607  stbi_uc *c = g->pal[g->bgindex];
5608  // @OPTIMIZE: write a dword at a time
5609  for (i = 0; i < g->w * g->h * 4; i += 4) {
5610  stbi_uc *p = &g->out[i];
5611  p[0] = c[2];
5612  p[1] = c[1];
5613  p[2] = c[0];
5614  p[3] = c[3];
5615  }
5616 }
5617 
5618 // this function is designed to support animated gifs, although stb_image doesn't support it
5619 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
5620 {
5621  int i;
5622  stbi_uc *old_out = 0;
5623 
5624  if (g->out == 0) {
5625  if (!stbi__gif_header(s, g, comp, 0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
5626  g->out = (stbi_uc *)stbi__malloc(4 * g->w * g->h);
5627  if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
5628  stbi__fill_gif_background(g);
5629  }
5630  else {
5631  // animated-gif-only path
5632  if (((g->eflags & 0x1C) >> 2) == 3) {
5633  old_out = g->out;
5634  g->out = (stbi_uc *)stbi__malloc(4 * g->w * g->h);
5635  if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
5636  memcpy(g->out, old_out, g->w*g->h * 4);
5637  }
5638  }
5639 
5640  for (;;) {
5641  switch (stbi__get8(s)) {
5642  case 0x2C: /* Image Descriptor */
5643  {
5644  stbi__int32 x, y, w, h;
5645  stbi_uc *o;
5646 
5647  x = stbi__get16le(s);
5648  y = stbi__get16le(s);
5649  w = stbi__get16le(s);
5650  h = stbi__get16le(s);
5651  if (((x + w) > (g->w)) || ((y + h) > (g->h)))
5652  return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
5653 
5654  g->line_size = g->w * 4;
5655  g->start_x = x * 4;
5656  g->start_y = y * g->line_size;
5657  g->max_x = g->start_x + w * 4;
5658  g->max_y = g->start_y + h * g->line_size;
5659  g->cur_x = g->start_x;
5660  g->cur_y = g->start_y;
5661 
5662  g->lflags = stbi__get8(s);
5663 
5664  if (g->lflags & 0x40) {
5665  g->step = 8 * g->line_size; // first interlaced spacing
5666  g->parse = 3;
5667  }
5668  else {
5669  g->step = g->line_size;
5670  g->parse = 0;
5671  }
5672 
5673  if (g->lflags & 0x80) {
5674  stbi__gif_parse_colortable(s, g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
5675  g->color_table = (stbi_uc *)g->lpal;
5676  }
5677  else if (g->flags & 0x80) {
5678  for (i = 0; i < 256; ++i) // @OPTIMIZE: stbi__jpeg_reset only the previous transparent
5679  g->pal[i][3] = 255;
5680  if (g->transparent >= 0 && (g->eflags & 0x01))
5681  g->pal[g->transparent][3] = 0;
5682  g->color_table = (stbi_uc *)g->pal;
5683  }
5684  else
5685  return stbi__errpuc("missing color table", "Corrupt GIF");
5686 
5687  o = stbi__process_gif_raster(s, g);
5688  if (o == NULL) return NULL;
5689 
5690  if (req_comp && req_comp != 4)
5691  o = stbi__convert_format(o, 4, req_comp, g->w, g->h);
5692  return o;
5693  }
5694 
5695  case 0x21: // Comment Extension.
5696  {
5697  int len;
5698  if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
5699  len = stbi__get8(s);
5700  if (len == 4) {
5701  g->eflags = stbi__get8(s);
5702  stbi__get16le(s); // delay
5703  g->transparent = stbi__get8(s);
5704  }
5705  else {
5706  stbi__skip(s, len);
5707  break;
5708  }
5709  }
5710  while ((len = stbi__get8(s)) != 0)
5711  stbi__skip(s, len);
5712  break;
5713  }
5714 
5715  case 0x3B: // gif stream termination code
5716  return (stbi_uc *)s; // using '1' causes warning on some compilers
5717 
5718  default:
5719  return stbi__errpuc("unknown code", "Corrupt GIF");
5720  }
5721  }
5722 }
5723 
5724 static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
5725 {
5726  stbi_uc *u = 0;
5727  stbi__gif g;
5728  memset(&g, 0, sizeof(g));
5729 
5730  u = stbi__gif_load_next(s, &g, comp, req_comp);
5731  if (u == (stbi_uc *)s) u = 0; // end of animated gif marker
5732  if (u) {
5733  *x = g.w;
5734  *y = g.h;
5735  }
5736 
5737  return u;
5738 }
5739 
5740 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
5741 {
5742  return stbi__gif_info_raw(s, x, y, comp);
5743 }
5744 #endif
5745 
5746 // *************************************************************************************************
5747 // Radiance RGBE HDR loader
5748 // originally by Nicolas Schulz
5749 #ifndef STBI_NO_HDR
5750 static int stbi__hdr_test_core(stbi__context *s)
5751 {
5752  const char *signature = "#?RADIANCE\n";
5753  int i;
5754  for (i = 0; signature[i]; ++i)
5755  if (stbi__get8(s) != signature[i])
5756  return 0;
5757  return 1;
5758 }
5759 
5760 static int stbi__hdr_test(stbi__context* s)
5761 {
5762  int r = stbi__hdr_test_core(s);
5763  stbi__rewind(s);
5764  return r;
5765 }
5766 
5767 #define STBI__HDR_BUFLEN 1024
5768 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
5769 {
5770  int len = 0;
5771  char c = '\0';
5772 
5773  c = (char)stbi__get8(z);
5774 
5775  while (!stbi__at_eof(z) && c != '\n') {
5776  buffer[len++] = c;
5777  if (len == STBI__HDR_BUFLEN - 1) {
5778  // flush to end of line
5779  while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
5780  ;
5781  break;
5782  }
5783  c = (char)stbi__get8(z);
5784  }
5785 
5786  buffer[len] = 0;
5787  return buffer;
5788 }
5789 
5790 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
5791 {
5792  if (input[3] != 0) {
5793  float f1;
5794  // Exponent
5795  f1 = (float)ldexp(1.0f, input[3] - (int)(128 + 8));
5796  if (req_comp <= 2)
5797  output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
5798  else {
5799  output[0] = input[0] * f1;
5800  output[1] = input[1] * f1;
5801  output[2] = input[2] * f1;
5802  }
5803  if (req_comp == 2) output[1] = 1;
5804  if (req_comp == 4) output[3] = 1;
5805  }
5806  else {
5807  switch (req_comp) {
5808  case 4: output[3] = 1; /* fallthrough */
5809  case 3: output[0] = output[1] = output[2] = 0;
5810  break;
5811  case 2: output[1] = 1; /* fallthrough */
5812  case 1: output[0] = 0;
5813  break;
5814  }
5815  }
5816 }
5817 
5818 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
5819 {
5820  char buffer[STBI__HDR_BUFLEN];
5821  char *token;
5822  int valid = 0;
5823  int width, height;
5824  stbi_uc *scanline;
5825  float *hdr_data;
5826  int len;
5827  unsigned char count, value;
5828  int i, j, k, c1, c2, z;
5829 
5830 
5831  // Check identifier
5832  if (strcmp(stbi__hdr_gettoken(s, buffer), "#?RADIANCE") != 0)
5833  return stbi__errpf("not HDR", "Corrupt HDR image");
5834 
5835  // Parse header
5836  for (;;) {
5837  token = stbi__hdr_gettoken(s, buffer);
5838  if (token[0] == 0) break;
5839  if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
5840  }
5841 
5842  if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format");
5843 
5844  // Parse width and height
5845  // can't use sscanf() if we're not using stdio!
5846  token = stbi__hdr_gettoken(s, buffer);
5847  if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
5848  token += 3;
5849  height = (int)strtol(token, &token, 10);
5850  while (*token == ' ') ++token;
5851  if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
5852  token += 3;
5853  width = (int)strtol(token, NULL, 10);
5854 
5855  *x = width;
5856  *y = height;
5857 
5858  if (comp) *comp = 3;
5859  if (req_comp == 0) req_comp = 3;
5860 
5861  // Read data
5862  hdr_data = (float *)stbi__malloc(height * width * req_comp * sizeof(float));
5863 
5864  // Load image data
5865  // image data is stored as some number of sca
5866  if (width < 8 || width >= 32768) {
5867  // Read flat data
5868  for (j = 0; j < height; ++j) {
5869  for (i = 0; i < width; ++i) {
5870  stbi_uc rgbe[4];
5871  main_decode_loop:
5872  stbi__getn(s, rgbe, 4);
5873  stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
5874  }
5875  }
5876  }
5877  else {
5878  // Read RLE-encoded data
5879  scanline = NULL;
5880 
5881  for (j = 0; j < height; ++j) {
5882  c1 = stbi__get8(s);
5883  c2 = stbi__get8(s);
5884  len = stbi__get8(s);
5885  if (c1 != 2 || c2 != 2 || (len & 0x80)) {
5886  // not run-length encoded, so we have to actually use THIS data as a decoded
5887  // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
5888  stbi_uc rgbe[4];
5889  rgbe[0] = (stbi_uc)c1;
5890  rgbe[1] = (stbi_uc)c2;
5891  rgbe[2] = (stbi_uc)len;
5892  rgbe[3] = (stbi_uc)stbi__get8(s);
5893  stbi__hdr_convert(hdr_data, rgbe, req_comp);
5894  i = 1;
5895  j = 0;
5896  STBI_FREE(scanline);
5897  goto main_decode_loop; // yes, this makes no sense
5898  }
5899  len <<= 8;
5900  len |= stbi__get8(s);
5901  if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
5902  if (scanline == NULL) scanline = (stbi_uc *)stbi__malloc(width * 4);
5903 
5904  for (k = 0; k < 4; ++k) {
5905  i = 0;
5906  while (i < width) {
5907  count = stbi__get8(s);
5908  if (count > 128) {
5909  // Run
5910  value = stbi__get8(s);
5911  count -= 128;
5912  for (z = 0; z < count; ++z)
5913  scanline[i++ * 4 + k] = value;
5914  }
5915  else {
5916  // Dump
5917  for (z = 0; z < count; ++z)
5918  scanline[i++ * 4 + k] = stbi__get8(s);
5919  }
5920  }
5921  }
5922  for (i = 0; i < width; ++i)
5923  stbi__hdr_convert(hdr_data + (j*width + i)*req_comp, scanline + i * 4, req_comp);
5924  }
5925  STBI_FREE(scanline);
5926  }
5927 
5928  return hdr_data;
5929 }
5930 
5931 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
5932 {
5933  char buffer[STBI__HDR_BUFLEN];
5934  char *token;
5935  int valid = 0;
5936 
5937  if (strcmp(stbi__hdr_gettoken(s, buffer), "#?RADIANCE") != 0) {
5938  stbi__rewind(s);
5939  return 0;
5940  }
5941 
5942  for (;;) {
5943  token = stbi__hdr_gettoken(s, buffer);
5944  if (token[0] == 0) break;
5945  if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
5946  }
5947 
5948  if (!valid) {
5949  stbi__rewind(s);
5950  return 0;
5951  }
5952  token = stbi__hdr_gettoken(s, buffer);
5953  if (strncmp(token, "-Y ", 3)) {
5954  stbi__rewind(s);
5955  return 0;
5956  }
5957  token += 3;
5958  *y = (int)strtol(token, &token, 10);
5959  while (*token == ' ') ++token;
5960  if (strncmp(token, "+X ", 3)) {
5961  stbi__rewind(s);
5962  return 0;
5963  }
5964  token += 3;
5965  *x = (int)strtol(token, NULL, 10);
5966  *comp = 3;
5967  return 1;
5968 }
5969 #endif // STBI_NO_HDR
5970 
5971 #ifndef STBI_NO_BMP
5972 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
5973 {
5974  int hsz;
5975  if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') {
5976  stbi__rewind(s);
5977  return 0;
5978  }
5979  stbi__skip(s, 12);
5980  hsz = stbi__get32le(s);
5981  if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) {
5982  stbi__rewind(s);
5983  return 0;
5984  }
5985  if (hsz == 12) {
5986  *x = stbi__get16le(s);
5987  *y = stbi__get16le(s);
5988  }
5989  else {
5990  *x = stbi__get32le(s);
5991  *y = stbi__get32le(s);
5992  }
5993  if (stbi__get16le(s) != 1) {
5994  stbi__rewind(s);
5995  return 0;
5996  }
5997  *comp = stbi__get16le(s) / 8;
5998  return 1;
5999 }
6000 #endif
6001 
6002 #ifndef STBI_NO_PSD
6003 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6004 {
6005  int channelCount;
6006  if (stbi__get32be(s) != 0x38425053) {
6007  stbi__rewind(s);
6008  return 0;
6009  }
6010  if (stbi__get16be(s) != 1) {
6011  stbi__rewind(s);
6012  return 0;
6013  }
6014  stbi__skip(s, 6);
6015  channelCount = stbi__get16be(s);
6016  if (channelCount < 0 || channelCount > 16) {
6017  stbi__rewind(s);
6018  return 0;
6019  }
6020  *y = stbi__get32be(s);
6021  *x = stbi__get32be(s);
6022  if (stbi__get16be(s) != 8) {
6023  stbi__rewind(s);
6024  return 0;
6025  }
6026  if (stbi__get16be(s) != 3) {
6027  stbi__rewind(s);
6028  return 0;
6029  }
6030  *comp = 4;
6031  return 1;
6032 }
6033 #endif
6034 
6035 #ifndef STBI_NO_PIC
6036 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
6037 {
6038  int act_comp = 0, num_packets = 0, chained;
6039  stbi__pic_packet packets[10];
6040 
6041  stbi__skip(s, 92);
6042 
6043  *x = stbi__get16be(s);
6044  *y = stbi__get16be(s);
6045  if (stbi__at_eof(s)) return 0;
6046  if ((*x) != 0 && (1 << 28) / (*x) < (*y)) {
6047  stbi__rewind(s);
6048  return 0;
6049  }
6050 
6051  stbi__skip(s, 8);
6052 
6053  do {
6054  stbi__pic_packet *packet;
6055 
6056  if (num_packets == sizeof(packets) / sizeof(packets[0]))
6057  return 0;
6058 
6059  packet = &packets[num_packets++];
6060  chained = stbi__get8(s);
6061  packet->size = stbi__get8(s);
6062  packet->type = stbi__get8(s);
6063  packet->channel = stbi__get8(s);
6064  act_comp |= packet->channel;
6065 
6066  if (stbi__at_eof(s)) {
6067  stbi__rewind(s);
6068  return 0;
6069  }
6070  if (packet->size != 8) {
6071  stbi__rewind(s);
6072  return 0;
6073  }
6074  } while (chained);
6075 
6076  *comp = (act_comp & 0x10 ? 4 : 3);
6077 
6078  return 1;
6079 }
6080 #endif
6081 
6082 // *************************************************************************************************
6083 // Portable Gray Map and Portable Pixel Map loader
6084 // by Ken Miller
6085 //
6086 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
6087 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
6088 //
6089 // Known limitations:
6090 // Does not support comments in the header section
6091 // Does not support ASCII image data (formats P2 and P3)
6092 // Does not support 16-bit-per-channel
6093 
6094 #ifndef STBI_NO_PNM
6095 
6096 static int stbi__pnm_test(stbi__context *s)
6097 {
6098  char p, t;
6099  p = (char)stbi__get8(s);
6100  t = (char)stbi__get8(s);
6101  if (p != 'P' || (t != '5' && t != '6')) {
6102  stbi__rewind(s);
6103  return 0;
6104  }
6105  return 1;
6106 }
6107 
6108 static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
6109 {
6110  stbi_uc *out;
6111  if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
6112  return 0;
6113  *x = s->img_x;
6114  *y = s->img_y;
6115  *comp = s->img_n;
6116 
6117  out = (stbi_uc *)stbi__malloc(s->img_n * s->img_x * s->img_y);
6118  if (!out) return stbi__errpuc("outofmem", "Out of memory");
6119  stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
6120 
6121  if (req_comp && req_comp != s->img_n) {
6122  out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
6123  if (out == NULL) return out; // stbi__convert_format frees input on failure
6124  }
6125  return out;
6126 }
6127 
6128 static int stbi__pnm_isspace(char c)
6129 {
6130  return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
6131 }
6132 
6133 static void stbi__pnm_skip_whitespace(stbi__context *s, char *c)
6134 {
6135  while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
6136  *c = (char)stbi__get8(s);
6137 }
6138 
6139 static int stbi__pnm_isdigit(char c)
6140 {
6141  return c >= '0' && c <= '9';
6142 }
6143 
6144 static int stbi__pnm_getinteger(stbi__context *s, char *c)
6145 {
6146  int value = 0;
6147 
6148  while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
6149  value = value * 10 + (*c - '0');
6150  *c = (char)stbi__get8(s);
6151  }
6152 
6153  return value;
6154 }
6155 
6156 static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
6157 {
6158  int maxv;
6159  char c, p, t;
6160 
6161  stbi__rewind(s);
6162 
6163  // Get identifier
6164  p = (char)stbi__get8(s);
6165  t = (char)stbi__get8(s);
6166  if (p != 'P' || (t != '5' && t != '6')) {
6167  stbi__rewind(s);
6168  return 0;
6169  }
6170 
6171  *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm
6172 
6173  c = (char)stbi__get8(s);
6174  stbi__pnm_skip_whitespace(s, &c);
6175 
6176  *x = stbi__pnm_getinteger(s, &c); // read width
6177  stbi__pnm_skip_whitespace(s, &c);
6178 
6179  *y = stbi__pnm_getinteger(s, &c); // read height
6180  stbi__pnm_skip_whitespace(s, &c);
6181 
6182  maxv = stbi__pnm_getinteger(s, &c); // read max value
6183 
6184  if (maxv > 255)
6185  return stbi__err("max value > 255", "PPM image not 8-bit");
6186  else
6187  return 1;
6188 }
6189 #endif
6190 
6191 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
6192 {
6193 #ifndef STBI_NO_JPEG
6194  if (stbi__jpeg_info(s, x, y, comp)) return 1;
6195 #endif
6196 
6197 #ifndef STBI_NO_PNG
6198  if (stbi__png_info(s, x, y, comp)) return 1;
6199 #endif
6200 
6201 #ifndef STBI_NO_GIF
6202  if (stbi__gif_info(s, x, y, comp)) return 1;
6203 #endif
6204 
6205 #ifndef STBI_NO_BMP
6206  if (stbi__bmp_info(s, x, y, comp)) return 1;
6207 #endif
6208 
6209 #ifndef STBI_NO_PSD
6210  if (stbi__psd_info(s, x, y, comp)) return 1;
6211 #endif
6212 
6213 #ifndef STBI_NO_PIC
6214  if (stbi__pic_info(s, x, y, comp)) return 1;
6215 #endif
6216 
6217 #ifndef STBI_NO_PNM
6218  if (stbi__pnm_info(s, x, y, comp)) return 1;
6219 #endif
6220 
6221 #ifndef STBI_NO_HDR
6222  if (stbi__hdr_info(s, x, y, comp)) return 1;
6223 #endif
6224 
6225  // test tga last because it's a crappy test!
6226 #ifndef STBI_NO_TGA
6227  if (stbi__tga_info(s, x, y, comp))
6228  return 1;
6229 #endif
6230  return stbi__err("unknown image type", "Image not of any known type, or corrupt");
6231 }
6232 
6233 #ifndef STBI_NO_STDIO
6234 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
6235 {
6236  FILE *f = stbi__fopen(filename, "rb");
6237  int result;
6238  if (!f) return stbi__err("can't fopen", "Unable to open file");
6239  result = stbi_info_from_file(f, x, y, comp);
6240  fclose(f);
6241  return result;
6242 }
6243 
6244 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
6245 {
6246  int r;
6247  stbi__context s;
6248  long pos = ftell(f);
6249  stbi__start_file(&s, f);
6250  r = stbi__info_main(&s, x, y, comp);
6251  fseek(f, pos, SEEK_SET);
6252  return r;
6253 }
6254 #endif // !STBI_NO_STDIO
6255 
6256 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
6257 {
6258  stbi__context s;
6259  stbi__start_mem(&s, buffer, len);
6260  return stbi__info_main(&s, x, y, comp);
6261 }
6262 
6263 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
6264 {
6265  stbi__context s;
6266  stbi__start_callbacks(&s, (stbi_io_callbacks *)c, user);
6267  return stbi__info_main(&s, x, y, comp);
6268 }
6269 
6270 #endif // STB_IMAGE_IMPLEMENTATION
6271 
6272 /*
6273 revision history:
6274 2.02 (2015-01-19) fix incorrect assert, fix warning
6275 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
6276 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
6277 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
6278 progressive JPEG (stb)
6279 PGM/PPM support (Ken Miller)
6280 STBI_MALLOC,STBI_REALLOC,STBI_FREE
6281 GIF bugfix -- seemingly never worked
6282 STBI_NO_*, STBI_ONLY_*
6283 1.48 (2014-12-14) fix incorrectly-named assert()
6284 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
6285 optimize PNG (ryg)
6286 fix bug in interlaced PNG with user-specified channel count (stb)
6287 1.46 (2014-08-26)
6288 fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
6289 1.45 (2014-08-16)
6290 fix MSVC-ARM internal compiler error by wrapping malloc
6291 1.44 (2014-08-07)
6292 various warning fixes from Ronny Chevalier
6293 1.43 (2014-07-15)
6294 fix MSVC-only compiler problem in code changed in 1.42
6295 1.42 (2014-07-09)
6296 don't define _CRT_SECURE_NO_WARNINGS (affects user code)
6297 fixes to stbi__cleanup_jpeg path
6298 added STBI_ASSERT to avoid requiring assert.h
6299 1.41 (2014-06-25)
6300 fix search&replace from 1.36 that messed up comments/error messages
6301 1.40 (2014-06-22)
6302 fix gcc struct-initialization warning
6303 1.39 (2014-06-15)
6304 fix to TGA optimization when req_comp != number of components in TGA;
6305 fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
6306 add support for BMP version 5 (more ignored fields)
6307 1.38 (2014-06-06)
6308 suppress MSVC warnings on integer casts truncating values
6309 fix accidental rename of 'skip' field of I/O
6310 1.37 (2014-06-04)
6311 remove duplicate typedef
6312 1.36 (2014-06-03)
6313 convert to header file single-file library
6314 if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
6315 1.35 (2014-05-27)
6316 various warnings
6317 fix broken STBI_SIMD path
6318 fix bug where stbi_load_from_file no longer left file pointer in correct place
6319 fix broken non-easy path for 32-bit BMP (possibly never used)
6320 TGA optimization by Arseny Kapoulkine
6321 1.34 (unknown)
6322 use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
6323 1.33 (2011-07-14)
6324 make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
6325 1.32 (2011-07-13)
6326 support for "info" function for all supported filetypes (SpartanJ)
6327 1.31 (2011-06-20)
6328 a few more leak fixes, bug in PNG handling (SpartanJ)
6329 1.30 (2011-06-11)
6330 added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
6331 removed deprecated format-specific test/load functions
6332 removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
6333 error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
6334 fix inefficiency in decoding 32-bit BMP (David Woo)
6335 1.29 (2010-08-16)
6336 various warning fixes from Aurelien Pocheville
6337 1.28 (2010-08-01)
6338 fix bug in GIF palette transparency (SpartanJ)
6339 1.27 (2010-08-01)
6340 cast-to-stbi_uc to fix warnings
6341 1.26 (2010-07-24)
6342 fix bug in file buffering for PNG reported by SpartanJ
6343 1.25 (2010-07-17)
6344 refix trans_data warning (Won Chun)
6345 1.24 (2010-07-12)
6346 perf improvements reading from files on platforms with lock-heavy fgetc()
6347 minor perf improvements for jpeg
6348 deprecated type-specific functions so we'll get feedback if they're needed
6349 attempt to fix trans_data warning (Won Chun)
6350 1.23 fixed bug in iPhone support
6351 1.22 (2010-07-10)
6352 removed image *writing* support
6353 stbi_info support from Jetro Lauha
6354 GIF support from Jean-Marc Lienher
6355 iPhone PNG-extensions from James Brown
6356 warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
6357 1.21 fix use of 'stbi_uc' in header (reported by jon blow)
6358 1.20 added support for Softimage PIC, by Tom Seddon
6359 1.19 bug in interlaced PNG corruption check (found by ryg)
6360 1.18 2008-08-02
6361 fix a threading bug (local mutable static)
6362 1.17 support interlaced PNG
6363 1.16 major bugfix - stbi__convert_format converted one too many pixels
6364 1.15 initialize some fields for thread safety
6365 1.14 fix threadsafe conversion bug
6366 header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
6367 1.13 threadsafe
6368 1.12 const qualifiers in the API
6369 1.11 Support installable IDCT, colorspace conversion routines
6370 1.10 Fixes for 64-bit (don't use "unsigned long")
6371 optimized upsampling by Fabian "ryg" Giesen
6372 1.09 Fix format-conversion for PSD code (bad global variables!)
6373 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
6374 1.07 attempt to fix C++ warning/errors again
6375 1.06 attempt to fix C++ warning/errors again
6376 1.05 fix TGA loading to return correct *comp and use good luminance calc
6377 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free
6378 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
6379 1.02 support for (subset of) HDR files, float interface for preferred access to them
6380 1.01 fix bug: possible bug in handling right-side up bmps... not sure
6381 fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
6382 1.00 interface to zlib that skips zlib header
6383 0.99 correct handling of alpha in palette
6384 0.98 TGA loader by lonesock; dynamically add loaders (untested)
6385 0.97 jpeg errors on too large a file; also catch another malloc failure
6386 0.96 fix detection of invalid v value - particleman@mollyrocket forum
6387 0.95 during header scan, seek to markers in case of padding
6388 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same
6389 0.93 handle jpegtran output; verbose errors
6390 0.92 read 4,8,16,24,32-bit BMP files of several formats
6391 0.91 output 24-bit Windows 3.0 BMP files
6392 0.90 fix a few more warnings; bump version number to approach 1.0
6393 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd
6394 0.60 fix compiling as c++
6395 0.59 fix warnings: merge Dave Moore's -Wall fixes
6396 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
6397 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
6398 0.56 fix bug: zlib uncompressed mode len vs. nlen
6399 0.55 fix bug: restart_interval not initialized to 0
6400 0.54 allow NULL for 'int *comp'
6401 0.53 fix bug in png 3->4; speedup png decoding
6402 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
6403 0.51 obey req_comp requests, 1-component jpegs return as 1-component,
6404 on 'test' only check type, not whether we support this variant
6405 0.50 first released version
6406 */
#define STBIDEF
Definition: stb_image.h:405
STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
Definition: stb_image.h:391
STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
STBIDEF const char * stbi_failure_reason(void)
STBIDEF void stbi_hdr_to_ldr_scale(float scale)
STBIDEF float * stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
STBIDEF stbi_uc * stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
STBIDEF float * stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
STBIDEF float * stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
STBIDEF void stbi_ldr_to_hdr_gamma(float gamma)
Definition: stb_image.h:392
Definition: stb_image.h:417
STBIDEF stbi_uc * stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
STBIDEF void stbi_hdr_to_ldr_gamma(float gamma)
STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
STBIDEF void stbi_ldr_to_hdr_scale(float scale)
STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
STBIDEF int stbi_is_hdr_from_file(FILE *f)
STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
Definition: stb_image.h:393
STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp)
Definition: stb_image.h:390
STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
STBIDEF int stbi_is_hdr(char const *filename)
unsigned char stbi_uc
Definition: stb_image.h:396
STBIDEF stbi_uc * stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
STBIDEF float * stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
STBIDEF void stbi_image_free(void *retval_from_stbi_load)
STBIDEF char * stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen)
STBIDEF char * stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
STBIDEF char * stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen)
Definition: stb_image.h:388
STBIDEF stbi_uc * stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
STBIDEF char * stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)