https://github.com/libsdl-org/libtiff/commit/ef0edff7f7b3023804e24276b942eb39fc25ea40
From 189d65779275132c86abd1e06cdab8a080645b32 Mon Sep 17 00:00:00 2001
From: Even Rouault <[EMAIL REDACTED]>
Date: Thu, 10 Mar 2022 12:14:31 +0100
Subject: [PATCH 1/3] tif_lzw.c: make LZW_CHECKEOS non-optional
---
libtiff/tif_lzw.c | 45 +++++++++++++++------------------------------
1 file changed, 15 insertions(+), 30 deletions(-)
diff --git a/libtiff/tif_lzw.c b/libtiff/tif_lzw.c
index 85c895fb..ec976e1f 100644
--- a/libtiff/tif_lzw.c
+++ b/libtiff/tif_lzw.c
@@ -52,13 +52,6 @@
* Future revisions to the TIFF spec are expected to "clarify this issue".
*/
#define LZW_COMPAT /* include backwards compatibility code */
-/*
- * Each strip of data is supposed to be terminated by a CODE_EOI.
- * If the following #define is included, the decoder will also
- * check for end-of-strip w/o seeing this code. This makes the
- * library more robust, but also slower.
- */
-#define LZW_CHECKEOS /* include checks for strips w/o EOI code */
#define MAXCODE(n) ((1L<<(n))-1)
/*
@@ -131,10 +124,8 @@ typedef struct {
/* Decoding specific data */
long dec_nbitsmask; /* lzw_nbits 1 bits, right adjusted */
tmsize_t dec_restart; /* restart count */
-#ifdef LZW_CHECKEOS
uint64_t dec_bitsleft; /* available bits in raw data */
tmsize_t old_tif_rawcc; /* value of tif_rawcc at the end of the previous TIFLZWDecode() call */
-#endif
decodeFunc dec_decode; /* regular or backwards compatible */
code_t* dec_codep; /* current recognized code */
code_t* dec_oldcodep; /* previously recognized code */
@@ -167,25 +158,21 @@ static void cl_hash(LZWCodecState*);
* LZW Decoder.
*/
-#ifdef LZW_CHECKEOS
/*
* This check shouldn't be necessary because each
* strip is suppose to be terminated with CODE_EOI.
*/
-#define NextCode(_tif, _sp, _bp, _code, _get) { \
- if ((_sp)->dec_bitsleft < (uint64_t)nbits) { \
+#define NextCode(_tif, _sp, _bp, _code, _get, dec_bitsleft) { \
+ if (dec_bitsleft < (uint64_t)nbits) { \
TIFFWarningExt(_tif->tif_clientdata, module, \
"LZWDecode: Strip %"PRIu32" not terminated with EOI code", \
_tif->tif_curstrip); \
_code = CODE_EOI; \
} else { \
_get(_sp,_bp,_code); \
- (_sp)->dec_bitsleft -= nbits; \
+ dec_bitsleft -= nbits; \
} \
}
-#else
-#define NextCode(tif, sp, bp, code, get) get(sp, bp, code)
-#endif
static int
LZWFixupTags(TIFF* tif)
@@ -316,10 +303,8 @@ LZWPreDecode(TIFF* tif, uint16_t s)
sp->dec_restart = 0;
sp->dec_nbitsmask = MAXCODE(BITS_MIN);
-#ifdef LZW_CHECKEOS
sp->dec_bitsleft = 0;
sp->old_tif_rawcc = 0;
-#endif
sp->dec_free_entp = sp->dec_codetab + CODE_FIRST;
/*
* Zero entries that are not yet filled in. We do
@@ -417,9 +402,8 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
}
bp = (uint8_t*)tif->tif_rawcp;
-#ifdef LZW_CHECKEOS
sp->dec_bitsleft += (((uint64_t)tif->tif_rawcc - sp->old_tif_rawcc) << 3);
-#endif
+ uint64_t dec_bitsleft = sp->dec_bitsleft;
nbits = sp->lzw_nbits;
nextdata = sp->lzw_nextdata;
nextbits = sp->lzw_nextbits;
@@ -429,7 +413,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
maxcodep = sp->dec_maxcodep;
while (occ > 0) {
- NextCode(tif, sp, bp, code, GetNextCode);
+ NextCode(tif, sp, bp, code, GetNextCode, dec_bitsleft);
if (code == CODE_EOI)
break;
if (code == CODE_CLEAR) {
@@ -440,7 +424,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
nbits = BITS_MIN;
nbitsmask = MAXCODE(BITS_MIN);
maxcodep = sp->dec_codetab + nbitsmask-1;
- NextCode(tif, sp, bp, code, GetNextCode);
+ NextCode(tif, sp, bp, code, GetNextCode, dec_bitsleft);
} while (code == CODE_CLEAR); /* consecutive CODE_CLEAR codes */
if (code == CODE_EOI)
break;
@@ -543,9 +527,8 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
tif->tif_rawcc -= (tmsize_t)((uint8_t*) bp - tif->tif_rawcp );
tif->tif_rawcp = (uint8_t*) bp;
-#ifdef LZW_CHECKEOS
sp->old_tif_rawcc = tif->tif_rawcc;
-#endif
+ sp->dec_bitsleft = dec_bitsleft;
sp->lzw_nbits = (unsigned short) nbits;
sp->lzw_nextdata = nextdata;
sp->lzw_nextbits = nextbits;
@@ -636,9 +619,10 @@ LZWDecodeCompat(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
}
bp = (uint8_t*)tif->tif_rawcp;
-#ifdef LZW_CHECKEOS
+
sp->dec_bitsleft += (((uint64_t)tif->tif_rawcc - sp->old_tif_rawcc) << 3);
-#endif
+ uint64_t dec_bitsleft = sp->dec_bitsleft;
+
nbits = sp->lzw_nbits;
nextdata = sp->lzw_nextdata;
nextbits = sp->lzw_nextbits;
@@ -648,7 +632,7 @@ LZWDecodeCompat(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
maxcodep = sp->dec_maxcodep;
while (occ > 0) {
- NextCode(tif, sp, bp, code, GetNextCodeCompat);
+ NextCode(tif, sp, bp, code, GetNextCodeCompat, dec_bitsleft);
if (code == CODE_EOI)
break;
if (code == CODE_CLEAR) {
@@ -659,7 +643,7 @@ LZWDecodeCompat(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
nbits = BITS_MIN;
nbitsmask = MAXCODE(BITS_MIN);
maxcodep = sp->dec_codetab + nbitsmask;
- NextCode(tif, sp, bp, code, GetNextCodeCompat);
+ NextCode(tif, sp, bp, code, GetNextCodeCompat, dec_bitsleft);
} while (code == CODE_CLEAR); /* consecutive CODE_CLEAR codes */
if (code == CODE_EOI)
break;
@@ -752,9 +736,10 @@ LZWDecodeCompat(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
tif->tif_rawcc -= (tmsize_t)((uint8_t*) bp - tif->tif_rawcp );
tif->tif_rawcp = (uint8_t*) bp;
-#ifdef LZW_CHECKEOS
+
sp->old_tif_rawcc = tif->tif_rawcc;
-#endif
+ sp->dec_bitsleft = dec_bitsleft;
+
sp->lzw_nbits = (unsigned short)nbits;
sp->lzw_nextdata = nextdata;
sp->lzw_nextbits = nextbits;
From a2aa4074e90b63adc5d8d06679dc567de36f74cd Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Thu, 10 Mar 2022 12:19:01 +0100
Subject: [PATCH 2/3] LZWDecode(): modest speed improvement: fetch input data
by chunks of the largest natural integer of the architecture
---
libtiff/tif_lzw.c | 141 +++++++++++++++++++++++++++++++++++-----------
1 file changed, 107 insertions(+), 34 deletions(-)
diff --git a/libtiff/tif_lzw.c b/libtiff/tif_lzw.c
index ec976e1f..0124df9f 100644
--- a/libtiff/tif_lzw.c
+++ b/libtiff/tif_lzw.c
@@ -38,6 +38,10 @@
#include <stdio.h>
+/* Select the plausible largest natural integer type for the architecture */
+#define SIZEOF_WORDTYPE SIZEOF_SIZE_T
+typedef size_t WordType;
+
/*
* NB: The 5.0 spec describes a different algorithm than Aldus
* implements. Specifically, Aldus does code length transitions
@@ -85,7 +89,7 @@ typedef struct {
unsigned short nbits; /* # of bits/code */
unsigned short maxcode; /* maximum code for lzw_nbits */
unsigned short free_ent; /* next free entry in hash table */
- unsigned long nextdata; /* next bits of i/o */
+ WordType nextdata; /* next bits of i/o */
long nextbits; /* # of valid bits in lzw_nextdata */
int rw_mode; /* preserve rw_mode from init */
@@ -158,22 +162,6 @@ static void cl_hash(LZWCodecState*);
* LZW Decoder.
*/
-/*
- * This check shouldn't be necessary because each
- * strip is suppose to be terminated with CODE_EOI.
- */
-#define NextCode(_tif, _sp, _bp, _code, _get, dec_bitsleft) { \
- if (dec_bitsleft < (uint64_t)nbits) { \
- TIFFWarningExt(_tif->tif_clientdata, module, \
- "LZWDecode: Strip %"PRIu32" not terminated with EOI code", \
- _tif->tif_curstrip); \
- _code = CODE_EOI; \
- } else { \
- _get(_sp,_bp,_code); \
- dec_bitsleft -= nbits; \
- } \
-}
-
static int
LZWFixupTags(TIFF* tif)
{
@@ -322,16 +310,77 @@ LZWPreDecode(TIFF* tif, uint16_t s)
/*
* Decode a "hunk of data".
*/
-#define GetNextCode(sp, bp, code) { \
- nextdata = (nextdata<<8) | *(bp)++; \
- nextbits += 8; \
- if (nextbits < nbits) { \
- nextdata = (nextdata<<8) | *(bp)++; \
- nextbits += 8; \
- } \
- code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask); \
- nextbits -= nbits; \
-}
+
+/* Get the next 32 or 64-bit from the input data */
+#ifdef WORDS_BIGENDIAN
+# define GetNextData(nextdata, bp) memcpy(&nextdata, bp, sizeof(nextdata))
+#elif SIZEOF_WORDTYPE == 8
+# if defined(__GNUC__) && defined(__x86_64__)
+# define GetNextData(nextdata, bp) nextdata = __builtin_bswap64(*(uint64_t*)(bp))
+# elif defined(_M_X64)
+# define GetNextData(nextdata, bp) nextdata = _byteswap_uint64(*(uint64_t*)(bp))
+# elif defined(__GNUC__)
+# define GetNextData(nextdata, bp) memcpy(&nextdata, bp, sizeof(nextdata)); \
+ nextdata = __builtin_bswap64(nextdata)
+# else
+# define GetNextData(nextdata, bp) nextdata = (((uint64_t)bp[0]) << 56) | \
+ (((uint64_t)bp[1]) << 48) | \
+ (((uint64_t)bp[2]) << 40) | \
+ (((uint64_t)bp[3]) << 32) | \
+ (((uint64_t)bp[4]) << 24) | \
+ (((uint64_t)bp[5]) << 16) | \
+ (((uint64_t)bp[6]) << 8) | \
+ (((uint64_t)bp[7]))
+# endif
+#elif SIZEOF_WORDTYPE == 4
+# if defined(__GNUC__) && defined(__i386__)
+# define GetNextData(nextdata, bp) nextdata = __builtin_bswap32(*(uint32_t*)(bp))
+# elif defined(_M_X86)
+# define GetNextData(nextdata, bp) nextdata = _byteswap_ulong(*(unsigned long*)(bp))
+# elif defined(__GNUC__)
+# define GetNextData(nextdata, bp) memcpy(&nextdata, bp, sizeof(nextdata)); \
+ nextdata = __builtin_bswap32(nextdata)
+# else
+# define GetNextData(nextdata, bp) nextdata = (((uint32_t)bp[0]) << 24) | \
+ (((uint32_t)bp[1]) << 16) | \
+ (((uint32_t)bp[2]) << 8) | \
+ (((uint32_t)bp[3]))
+# endif
+#else
+# error "Unhandled SIZEOF_WORDTYPE"
+#endif
+
+#define GetNextCodeLZW() do { \
+ nextbits -= nbits; \
+ if (nextbits < 0) { \
+ if (dec_bitsleft >= 8 * SIZEOF_WORDTYPE) { \
+ unsigned codetmp = (unsigned)(nextdata << (-nextbits)); \
+ GetNextData(nextdata, bp); \
+ bp += SIZEOF_WORDTYPE; \
+ nextbits += 8 * SIZEOF_WORDTYPE; \
+ dec_bitsleft -= 8 * SIZEOF_WORDTYPE; \
+ code = (WordType)((codetmp | (nextdata >> nextbits)) & nbitsmask); \
+ break; \
+ } \
+ else {\
+ if( dec_bitsleft < 8) { \
+ goto no_eoi; \
+ }\
+ nextdata = (nextdata<<8) | *(bp)++; \
+ nextbits += 8; \
+ dec_bitsleft -= 8; \
+ if( nextbits < 0 ) { \
+ if( dec_bitsleft < 8) { \
+ goto no_eoi; \
+ }\
+ nextdata = (nextdata<<8) | *(bp)++; \
+ nextbits += 8; \
+ dec_bitsleft -= 8; \
+ } \
+ } \
+ } \
+ code = (WordType)((nextdata >> nextbits) & nbitsmask); \
+} while(0)
static void
codeLoop(TIFF* tif, const char* module)
@@ -350,10 +399,10 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
tmsize_t occ = occ0;
uint8_t *tp;
uint8_t *bp;
- hcode_t code;
+ WordType code;
int len;
long nbits, nextbits, nbitsmask;
- unsigned long nextdata;
+ WordType nextdata;
code_t *codep, *free_entp, *maxcodep, *oldcodep;
(void) s;
@@ -413,7 +462,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
maxcodep = sp->dec_maxcodep;
while (occ > 0) {
- NextCode(tif, sp, bp, code, GetNextCode, dec_bitsleft);
+ GetNextCodeLZW();
if (code == CODE_EOI)
break;
if (code == CODE_CLEAR) {
@@ -424,7 +473,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
nbits = BITS_MIN;
nbitsmask = MAXCODE(BITS_MIN);
maxcodep = sp->dec_codetab + nbitsmask-1;
- NextCode(tif, sp, bp, code, GetNextCode, dec_bitsleft);
+ GetNextCodeLZW();
} while (code == CODE_CLEAR); /* consecutive CODE_CLEAR codes */
if (code == CODE_EOI)
break;
@@ -544,9 +593,32 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
return (0);
}
return (1);
+
+no_eoi:
+ TIFFErrorExt(tif->tif_clientdata, module,
+ "LZWDecode: Strip %"PRIu32" not terminated with EOI code",
+ tif->tif_curstrip);
+ return 0;
}
#ifdef LZW_COMPAT
+
+/*
+ * This check shouldn't be necessary because each
+ * strip is suppose to be terminated with CODE_EOI.
+ */
+#define NextCode(_tif, _sp, _bp, _code, _get, dec_bitsleft) { \
+ if (dec_bitsleft < (uint64_t)nbits) { \
+ TIFFWarningExt(_tif->tif_clientdata, module, \
+ "LZWDecode: Strip %"PRIu32" not terminated with EOI code", \
+ _tif->tif_curstrip); \
+ _code = CODE_EOI; \
+ } else { \
+ _get(_sp,_bp,_code); \
+ dec_bitsleft -= nbits; \
+ } \
+}
+
/*
* Decode a "hunk of data" for old images.
*/
@@ -573,7 +645,8 @@ LZWDecodeCompat(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
uint8_t *bp;
int code, nbits;
int len;
- long nextbits, nextdata, nbitsmask;
+ long nextbits, nbitsmask;
+ WordType nextdata;
code_t *codep, *free_entp, *maxcodep, *oldcodep;
(void) s;
@@ -858,7 +931,7 @@ LZWEncode(TIFF* tif, uint8_t* bp, tmsize_t cc, uint16_t s)
hcode_t ent;
long disp;
tmsize_t incount, outcount, checkpoint;
- unsigned long nextdata;
+ WordType nextdata;
long nextbits;
int free_ent, maxcode, nbits;
uint8_t* op;
@@ -1022,7 +1095,7 @@ LZWPostEncode(TIFF* tif)
register LZWCodecState *sp = EncoderState(tif);
uint8_t* op = tif->tif_rawcp;
long nextbits = sp->lzw_nextbits;
- unsigned long nextdata = sp->lzw_nextdata;
+ WordType nextdata = sp->lzw_nextdata;
tmsize_t outcount = sp->enc_outcount;
int nbits = sp->lzw_nbits;
From 3079627ea0dee150e6a208cec8381de611bb842b Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Thu, 10 Mar 2022 12:32:32 +0100
Subject: [PATCH 3/3] LZWDecode(): major speed improvements
This mostly comes from dealing specifically with codes that expand to
2, 3 and 4 bytes or more to avoid branches, and dealing with longer
repeated sequences (e.g. lots of bytes to 0).
With the following bench.c, execution time is 32% faster on a 8000x8000
4 bands uint16 predictor=2 image that has a 1.6x compression ratio. with
gcc 9.4.0, on x86_64
bench.c:
#include “tiffio.h”
#include <stdlib.h>
#include <stdint.h>
int main(int argc, char* argv[])
{
if( argc != 2 )
{
fprintf(stderr, “Usage: ./bench my.tif\n”);
exit(1);
}
TIFF* tif = TIFFOpen(argv[1], “r”);
if( tif == NULL )
{
fprintf(stderr, “Cannot open %s\n”, argv[1]);
exit(1);
}
if( !TIFFIsTiled(tif) )
{
fprintf(stderr, “Only tiled image supported\n”);
exit(1);
}
int tilesize = (int)TIFFTileSize(tif);
char* c = malloc(tilesize);
if( c == NULL )
{
fprintf(stderr, “Out of memory\n”);
exit(1);
}
const uint32_t numtiles = TIFFNumberOfTiles(tif);
//int numloops = 4 * (int)(1e9 / ((double)tilesize * numtiles));
//printf(“Number of loops: %d\n”, numloops);
int numloops = 1;
for(int i =0; i< numloops; i++)
{
for(uint32_t tileindex = 0; tileindex < numtiles; tileindex++ )
{
TIFFReadEncodedTile(tif, tileindex, c, tilesize);
}
}
free(c);
TIFFClose(tif);
return 0;
}
---
libtiff/tif_lzw.c | 381 +++++++++++++++++++++++++++++-----------------
1 file changed, 244 insertions(+), 137 deletions(-)
diff --git a/libtiff/tif_lzw.c b/libtiff/tif_lzw.c
index 0124df9f..19e0e4aa 100644
--- a/libtiff/tif_lzw.c
+++ b/libtiff/tif_lzw.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 1988-1997 Sam Leffler
* Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ * Copyright (c) 2022 Even Rouault
*
* Permission to use, copy, modify, distribute, and sell this software and
* its documentation for any purpose is hereby granted without fee, provided
@@ -36,6 +37,7 @@
*/
#include "tif_predict.h"
+#include <stdbool.h>
#include <stdio.h>
/* Select the plausible largest natural integer type for the architecture */
@@ -116,8 +118,10 @@ typedef struct {
typedef struct code_ent {
struct code_ent *next;
unsigned short length; /* string len, including this token */
- unsigned char value; /* data value */
+ /* firstchar should be placed immediately before value in this structure */
unsigned char firstchar; /* first token of string */
+ unsigned char value; /* data value */
+ bool repeated;
} code_t;
typedef int (*decodeFunc)(TIFF*, uint8_t*, tmsize_t, uint16_t);
@@ -211,17 +215,17 @@ LZWSetupDecode(TIFF* tif)
*/
code = 255;
do {
- sp->dec_codetab[code].value = (unsigned char)code;
sp->dec_codetab[code].firstchar = (unsigned char)code;
+ sp->dec_codetab[code].value = (unsigned char)code;
+ sp->dec_codetab[code].repeated = true;
sp->dec_codetab[code].length = 1;
sp->dec_codetab[code].next = NULL;
} while (code--);
/*
- * Zero-out the unused entries
- */
- /* Silence false positive */
- /* coverity[overrun-buffer-arg] */
- _TIFFmemset(&sp->dec_codetab[CODE_CLEAR], 0,
+ * Zero-out the unused entries */
+ /* Silence false positive */
+ /* coverity[overrun-buffer-arg] */
+ memset(&sp->dec_codetab[CODE_CLEAR], 0,
(CODE_FIRST - CODE_CLEAR) * sizeof (code_t));
}
return (1);
@@ -292,8 +296,8 @@ LZWPreDecode(TIFF* tif, uint16_t s)
sp->dec_restart = 0;
sp->dec_nbitsmask = MAXCODE(BITS_MIN);
sp->dec_bitsleft = 0;
- sp->old_tif_rawcc = 0;
- sp->dec_free_entp = sp->dec_codetab + CODE_FIRST;
+ sp->old_tif_rawcc = 0;
+ sp->dec_free_entp = sp->dec_codetab - 1 ; // + CODE_FIRST;
/*
* Zero entries that are not yet filled in. We do
* this to guard against bogus input data that causes
@@ -301,8 +305,7 @@ LZWPreDecode(TIFF* tif, uint16_t s)
* come up with a way to safely bounds-check input codes
* while decoding then you can remove this operation.
*/
- _TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t));
- sp->dec_oldcodep = &sp->dec_codetab[-1];
+ sp->dec_oldcodep = &sp->dec_codetab[0];
sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1];
return (1);
}
@@ -382,14 +385,6 @@ LZWPreDecode(TIFF* tif, uint16_t s)
code = (WordType)((nextdata >> nextbits) & nbitsmask); \
} while(0)
-static void
-codeLoop(TIFF* tif, const char* module)
-{
- TIFFErrorExt(tif->tif_clientdata, module,
- "Bogus encoding, loop in the code table; scanline %"PRIu32,
- tif->tif_row);
-}
-
static int
LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
{
@@ -397,13 +392,10 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
LZWCodecState *sp = DecoderState(tif);
uint8_t *op = (uint8_t*) op0;
tmsize_t occ = occ0;
- uint8_t *tp;
uint8_t *bp;
- WordType code;
- int len;
long nbits, nextbits, nbitsmask;
- WordType nextdata;
- code_t *codep, *free_entp, *maxcodep, *oldcodep;
+ WordType nextdata;
+ code_t *free_entp, *maxcodep, *oldcodep;
(void) s;
assert(sp != NULL);
@@ -415,7 +407,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
if (sp->dec_restart) {
tmsize_t residue;
- codep = sp->dec_codep;
+ code_t* codep = sp->dec_codep;
residue = codep->length - sp->dec_restart;
if (residue > occ) {
/*
@@ -429,7 +421,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
codep = codep->next;
} while (--residue > occ && codep);
if (codep) {
- tp = op + occ;
+ uint8_t* tp = op + occ;
do {
*--tp = codep->value;
codep = codep->next;
@@ -442,7 +434,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
*/
op += residue;
occ -= residue;
- tp = op;
+ uint8_t* tp = op;
do {
*--tp = codep->value;
codep = codep->next;
@@ -460,120 +452,232 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
oldcodep = sp->dec_oldcodep;
free_entp = sp->dec_free_entp;
maxcodep = sp->dec_maxcodep;
+ code_t* const dec_codetab = sp->dec_codetab;
+ code_t* codep;
+
+ if (occ == 0) {
+ goto after_loop;
+ }
+
+begin:
+ {
+ WordType code;
+ GetNextCodeLZW();
+ codep = dec_codetab + code;
+ if (code >= CODE_FIRST)
+ goto code_above_or_equal_to_258;
+ if (code < 256)
+ goto code_below_256;
+ if (code == CODE_EOI)
+ goto after_loop;
+ goto code_clear;
+
+code_below_256:
+ {
+ if (codep > free_entp)
+ goto error_code;
+ free_entp->next = oldcodep;
+ free_entp->firstchar = oldcodep->firstchar;
+ free_entp->length = oldcodep->length+1;
+ free_entp->value = (uint8_t)code;
+ free_entp->repeated = (bool)(oldcodep->repeated & !(oldcodep->value - code));
+ if (++free_entp > maxcodep) {
+ if (++nbits > BITS_MAX) /* should not happen for a conformant encoder */
+ nbits = BITS_MAX;
+ nbitsmask = MAXCODE(nbits);
+ maxcodep = dec_codetab + nbitsmask-1;
+ if( free_entp >= &dec_codetab[CSIZE] )
+ {
+ /* At that point, the next valid states are either EOI or a */
+ /* CODE_CLEAR. If a regular code is read, at the next */
+ /* attempt at registering a new entry, we will error out */
+ /* due to setting free_entp before any valid code */
+ free_entp = dec_codetab - 1;
+ }
+ }
+ oldcodep = codep;
+ *op++ = (uint8_t)code;
+ occ--;
+ if (occ == 0)
+ goto after_loop;
+ goto begin;
+ }
- while (occ > 0) {
- GetNextCodeLZW();
- if (code == CODE_EOI)
- break;
- if (code == CODE_CLEAR) {
- do {
- free_entp = sp->dec_codetab + CODE_FIRST;
- _TIFFmemset(free_entp, 0,
- (CSIZE - CODE_FIRST) * sizeof (code_t));
- nbits = BITS_MIN;
- nbitsmask = MAXCODE(BITS_MIN);
- maxcodep = sp->dec_codetab + nbitsmask-1;
- GetNextCodeLZW();
- } while (code == CODE_CLEAR); /* consecutive CODE_CLEAR codes */
- if (code == CODE_EOI)
- break;
- if (code > CODE_CLEAR) {
- TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
- "LZWDecode: Corrupted LZW table at scanline %"PRIu32,
- tif->tif_row);
- return (0);
- }
- *op++ = (uint8_t)code;
- occ--;
- oldcodep = sp->dec_codetab + code;
- continue;
- }
- codep = sp->dec_codetab + code;
+code_above_or_equal_to_258:
+ {
+ /*
+ * Add the new entry to the code table.
+ */
+
+ if (codep >= free_entp)
+ {
+ if (codep != free_entp)
+ goto error_code;
+ free_entp->value = oldcodep->firstchar;
+ }
+ else
+ {
+ free_entp->value = codep->firstchar;
+ }
+ free_entp->repeated = (bool)(oldcodep->repeated & !(oldcodep->value - free_entp->value));
+ free_entp->next = oldcodep;
+
+ free_entp->firstchar = oldcodep->firstchar;
+ free_entp->length = oldcodep->length+1;
+ if (++free_entp > maxcodep) {
+ if (++nbits > BITS_MAX) /* should not happen for a conformant encoder */
+ nbits = BITS_MAX;
+ nbitsmask = MAXCODE(nbits);
+ maxcodep = dec_codetab + nbitsmask-1;
+ if (free_entp >= &dec_codetab[CSIZE])
+ {
+ /* At that point, the next valid states are either EOI or a */
+ /* CODE_CLEAR. If a regular code is read, at the next */
+ /* attempt at registering a new entry, we will error out */
+ /* due to setting free_entp before any valid code */
+ free_entp = dec_codetab - 1;
+ }
+ }
+ oldcodep = codep;
+
+ /*
+ * Code maps to a string, copy string
+ * value to output (written in reverse).
+ */
+ /* tiny bit faster on x86_64 to store in unsigned short than int */
+ unsigned short len = codep->length;
+
+ if (len < 3) /* equivalent to len == 2 given all other conditions */
+ {
+ if (occ <= 2)
+ {
+ if (occ == 2)
+ {
+ memcpy(op, &(codep->firstchar), 2);
+ op += 2;
+ occ -= 2;
+ goto after_loop;
+ }
+ goto too_short_buffer;
+ }
- /*
- * Add the new entry to the code table.
- */
- if (free_entp < &sp->dec_codetab[0] ||
- free_entp >= &sp->dec_codetab[CSIZE]) {
- TIFFErrorExt(tif->tif_clientdata, module,
- "Corrupted LZW table at scanline %"PRIu32,
- tif->tif_row);
- return (0);
- }
+ memcpy(op, &(codep->firstchar), 2);
+ op += 2;
+ occ -= 2;
+ goto begin; /* we can save the comparison occ > 0 */
+ }
+
+ if (len == 3)
+ {
+ if (occ <= 3)
+ {
+ if (occ == 3)
+ {
+ op[0] = codep->firstchar;
+ op[1] = codep->next->value;
+ op[2] = codep->value;
+ op += 3;
+ occ -= 3;
+ goto after_loop;
+ }
+ goto too_short_buffer;
+ }
- free_entp->next = oldcodep;
- if (free_entp->next < &sp->dec_codetab[0] ||
- free_entp->next >= &sp->dec_codetab[CSIZE]) {
- TIFFErrorExt(tif->tif_clientdata, module,
- "Corrupted LZW table at scanline %"PRIu32,
- tif->tif_row);
- return (0);
- }
- free_entp->firstchar = free_entp->next->firstchar;
- free_entp->length = free_entp->next->length+1;
- free_entp->value = (codep < free_entp) ?
- codep->firstchar : free_entp->firstchar;
- if (++free_entp > maxcodep) {
- if (++nbits > BITS_MAX) /* should not happen */
- nbits = BITS_MAX;
- nbitsmask = MAXCODE(nbits);
- maxcodep = sp->dec_codetab + nbitsmask-1;
- }
- oldcodep = codep;
- if (code >= 256) {
- /*
- * Code maps to a string, copy string
- * value to output (written in reverse).
- */
- if(codep->length == 0) {
- TIFFErrorExt(tif->tif_clientdata, module,
- "Wrong length of decoded string: "
- "data probably corrupted at scanline %"PRIu32,
- tif->tif_row);
- return (0);
- }
- if (codep->length > occ) {
- /*
- * String is too long for decode buffer,
- * locate portion that will fit, copy to
- * the decode buffer, and setup restart
- * logic for the next decoding call.
- */
- sp->dec_codep = codep;
- do {
- codep = codep->next;
- } while (codep && codep->length > occ);
- if (codep) {
- sp->dec_restart = occ;
- tp = op + occ;
- do {
- *--tp = codep->value;
- codep = codep->next;
- } while (--occ && codep);
- if (codep)
- codeLoop(tif, module);
- }
- break;
- }
- len = codep->length;
- tp = op + len;
- do {
- *--tp = codep->value;
- codep = codep->next;
- } while (codep && tp > op);
- if (codep) {
- codeLoop(tif, module);
- break;
- }
- assert(occ >= len);
- op += len;
- occ -= len;
- } else {
- *op++ = (uint8_t)code;
- occ--;
- }
- }
+ op[0] = codep->firstchar;
+ op[1] = codep->next->value;
+ op[2] = codep->value;
+ op += 3;
+ occ -= 3;
+ goto begin; /* we can save the comparison occ > 0 */
+ }
+
+ if (len > occ)
+ {
+ goto too_short_buffer;
+ }
+
+ if (codep->repeated)
+ {
+ memset(op, codep->value, len);
+ op += len;
+ occ -= len;
+ if (occ == 0)
+ goto after_loop;
+ goto begin;
+ }
+
+ uint8_t* tp = op + len;
+
+ assert(len >= 4);
+
+ *--tp = codep->value;
+ codep = codep->next;
+ *--tp = codep->value;
+ codep = codep->next;
+ *--tp = codep->value;
+ codep = codep->next;
+ *--tp = codep->value;
+ if (tp > op)
+ {
+ do {
+ codep = codep->next;
+ *--tp = codep->value;
+ } while (tp > op);
+ }
+
+ assert(occ >= len);
+ op += len;
+ occ -= len;
+ if (occ == 0)
+ goto after_loop;
+ goto begin;
+ }
+code_clear:
+ {
+ free_entp = dec_codetab + CODE_FIRST;
+ nbits = BITS_MIN;
+ nbitsmask = MAXCODE(BITS_MIN);
+ maxcodep = dec_codetab + nbitsmask-1;
+ do {
+ GetNextCodeLZW();
+ } while (code == CODE_CLEAR); /* consecutive CODE_CLEAR codes */
+ if (code == CODE_EOI)
+ goto after_loop;
+ if (code > CODE_EOI) {
+ goto error_code;
+ }
+ *op++ = (uint8_t)code;
+ occ--;
+ oldcodep = dec_codetab + code;
+ if (occ == 0)
+ goto after_loop;
+ goto begin;
+ }
+ }
+
+too_short_buffer:
+ {
+ /*
+ * String is too long for decode buffer,
+ * locate portion that will fit, copy to
+ * the decode buffer, and setup restart
+ * logic for the next decoding call.
+ */
+ sp->dec_codep = codep;
+ do {
+ codep = codep->next;
+ } while (codep->length > occ);
+
+ sp->dec_restart = occ;
+ uint8_t* tp = op + occ;
+ do {
+ *--tp = codep->value;
+ codep = codep->next;
+ } while (--occ);
+ }
+
+after_loop:
tif->tif_rawcc -= (tmsize_t)((uint8_t*) bp - tif->tif_rawcp );
tif->tif_rawcp = (uint8_t*) bp;
sp->old_tif_rawcc = tif->tif_rawcc;
@@ -599,6 +703,9 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
"LZWDecode: Strip %"PRIu32" not terminated with EOI code",
tif->tif_curstrip);
return 0;
+error_code:
+ TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Using code not yet in table");
+ return 0;
}
#ifdef LZW_COMPAT
(Patch may be truncated, please check the link at the top of this post.)