libtiff: Merge branch 'lzw_decode_improvements' into 'master'

https://github.com/libsdl-org/libtiff/commit/ef0edff7f7b3023804e24276b942eb39fc25ea40

From 189d65779275132c86abd1e06cdab8a080645b32 Mon Sep 17 00:00:00 2001
From: Even Rouault <[EMAIL REDACTED]>
Date: Thu, 10 Mar 2022 12:14:31 +0100
Subject: [PATCH 1/3] tif_lzw.c: make LZW_CHECKEOS non-optional

---
 libtiff/tif_lzw.c | 45 +++++++++++++++------------------------------
 1 file changed, 15 insertions(+), 30 deletions(-)

diff --git a/libtiff/tif_lzw.c b/libtiff/tif_lzw.c
index 85c895fb..ec976e1f 100644
--- a/libtiff/tif_lzw.c
+++ b/libtiff/tif_lzw.c
@@ -52,13 +52,6 @@
  * Future revisions to the TIFF spec are expected to "clarify this issue".
  */
 #define LZW_COMPAT              /* include backwards compatibility code */
-/*
- * Each strip of data is supposed to be terminated by a CODE_EOI.
- * If the following #define is included, the decoder will also
- * check for end-of-strip w/o seeing this code.  This makes the
- * library more robust, but also slower.
- */
-#define LZW_CHECKEOS            /* include checks for strips w/o EOI code */
 
 #define MAXCODE(n)	((1L<<(n))-1)
 /*
@@ -131,10 +124,8 @@ typedef struct {
 	/* Decoding specific data */
 	long    dec_nbitsmask;		/* lzw_nbits 1 bits, right adjusted */
 	tmsize_t dec_restart;		/* restart count */
-#ifdef LZW_CHECKEOS
 	uint64_t  dec_bitsleft;		/* available bits in raw data */
 	tmsize_t old_tif_rawcc;         /* value of tif_rawcc at the end of the previous TIFLZWDecode() call */
-#endif
 	decodeFunc dec_decode;		/* regular or backwards compatible */
 	code_t* dec_codep;		/* current recognized code */
 	code_t* dec_oldcodep;		/* previously recognized code */
@@ -167,25 +158,21 @@ static void cl_hash(LZWCodecState*);
  * LZW Decoder.
  */
 
-#ifdef LZW_CHECKEOS
 /*
  * This check shouldn't be necessary because each
  * strip is suppose to be terminated with CODE_EOI.
  */
-#define	NextCode(_tif, _sp, _bp, _code, _get) {				\
-	if ((_sp)->dec_bitsleft < (uint64_t)nbits) {			\
+#define	NextCode(_tif, _sp, _bp, _code, _get, dec_bitsleft) {				\
+	if (dec_bitsleft < (uint64_t)nbits) {			\
 		TIFFWarningExt(_tif->tif_clientdata, module,		\
 		    "LZWDecode: Strip %"PRIu32" not terminated with EOI code", \
 		    _tif->tif_curstrip);				\
 		_code = CODE_EOI;					\
 	} else {							\
 		_get(_sp,_bp,_code);					\
-		(_sp)->dec_bitsleft -= nbits;				\
+		dec_bitsleft -= nbits;				\
 	}								\
 }
-#else
-#define	NextCode(tif, sp, bp, code, get) get(sp, bp, code)
-#endif
 
 static int
 LZWFixupTags(TIFF* tif)
@@ -316,10 +303,8 @@ LZWPreDecode(TIFF* tif, uint16_t s)
 
 	sp->dec_restart = 0;
 	sp->dec_nbitsmask = MAXCODE(BITS_MIN);
-#ifdef LZW_CHECKEOS
 	sp->dec_bitsleft = 0;
         sp->old_tif_rawcc = 0;
-#endif
 	sp->dec_free_entp = sp->dec_codetab + CODE_FIRST;
 	/*
 	 * Zero entries that are not yet filled in.  We do
@@ -417,9 +402,8 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 	}
 
 	bp = (uint8_t*)tif->tif_rawcp;
-#ifdef LZW_CHECKEOS
 	sp->dec_bitsleft += (((uint64_t)tif->tif_rawcc - sp->old_tif_rawcc) << 3);
-#endif
+	uint64_t dec_bitsleft = sp->dec_bitsleft;
 	nbits = sp->lzw_nbits;
 	nextdata = sp->lzw_nextdata;
 	nextbits = sp->lzw_nextbits;
@@ -429,7 +413,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 	maxcodep = sp->dec_maxcodep;
 
 	while (occ > 0) {
-		NextCode(tif, sp, bp, code, GetNextCode);
+		NextCode(tif, sp, bp, code, GetNextCode, dec_bitsleft);
 		if (code == CODE_EOI)
 			break;
 		if (code == CODE_CLEAR) {
@@ -440,7 +424,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 				nbits = BITS_MIN;
 				nbitsmask = MAXCODE(BITS_MIN);
 				maxcodep = sp->dec_codetab + nbitsmask-1;
-				NextCode(tif, sp, bp, code, GetNextCode);
+				NextCode(tif, sp, bp, code, GetNextCode, dec_bitsleft);
 			} while (code == CODE_CLEAR);	/* consecutive CODE_CLEAR codes */
 			if (code == CODE_EOI)
 				break;
@@ -543,9 +527,8 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 
 	tif->tif_rawcc -= (tmsize_t)((uint8_t*) bp - tif->tif_rawcp );
 	tif->tif_rawcp = (uint8_t*) bp;
-#ifdef LZW_CHECKEOS
 	sp->old_tif_rawcc = tif->tif_rawcc;
-#endif
+	sp->dec_bitsleft = dec_bitsleft;
 	sp->lzw_nbits = (unsigned short) nbits;
 	sp->lzw_nextdata = nextdata;
 	sp->lzw_nextbits = nextbits;
@@ -636,9 +619,10 @@ LZWDecodeCompat(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 	}
 
 	bp = (uint8_t*)tif->tif_rawcp;
-#ifdef LZW_CHECKEOS
+
 	sp->dec_bitsleft += (((uint64_t)tif->tif_rawcc - sp->old_tif_rawcc) << 3);
-#endif
+	uint64_t dec_bitsleft = sp->dec_bitsleft;
+
 	nbits = sp->lzw_nbits;
 	nextdata = sp->lzw_nextdata;
 	nextbits = sp->lzw_nextbits;
@@ -648,7 +632,7 @@ LZWDecodeCompat(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 	maxcodep = sp->dec_maxcodep;
 
 	while (occ > 0) {
-		NextCode(tif, sp, bp, code, GetNextCodeCompat);
+		NextCode(tif, sp, bp, code, GetNextCodeCompat, dec_bitsleft);
 		if (code == CODE_EOI)
 			break;
 		if (code == CODE_CLEAR) {
@@ -659,7 +643,7 @@ LZWDecodeCompat(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 				nbits = BITS_MIN;
 				nbitsmask = MAXCODE(BITS_MIN);
 				maxcodep = sp->dec_codetab + nbitsmask;
-				NextCode(tif, sp, bp, code, GetNextCodeCompat);
+				NextCode(tif, sp, bp, code, GetNextCodeCompat, dec_bitsleft);
 			} while (code == CODE_CLEAR);	/* consecutive CODE_CLEAR codes */
 			if (code == CODE_EOI)
 				break;
@@ -752,9 +736,10 @@ LZWDecodeCompat(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 
 	tif->tif_rawcc -= (tmsize_t)((uint8_t*) bp - tif->tif_rawcp );
 	tif->tif_rawcp = (uint8_t*) bp;
-#ifdef LZW_CHECKEOS
+
 	sp->old_tif_rawcc = tif->tif_rawcc;
-#endif
+	sp->dec_bitsleft = dec_bitsleft;
+
 	sp->lzw_nbits = (unsigned short)nbits;
 	sp->lzw_nextdata = nextdata;
 	sp->lzw_nextbits = nextbits;

From a2aa4074e90b63adc5d8d06679dc567de36f74cd Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Thu, 10 Mar 2022 12:19:01 +0100
Subject: [PATCH 2/3] LZWDecode(): modest speed improvement: fetch input data
 by chunks of the largest natural integer of the architecture

---
 libtiff/tif_lzw.c | 141 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 107 insertions(+), 34 deletions(-)

diff --git a/libtiff/tif_lzw.c b/libtiff/tif_lzw.c
index ec976e1f..0124df9f 100644
--- a/libtiff/tif_lzw.c
+++ b/libtiff/tif_lzw.c
@@ -38,6 +38,10 @@
 
 #include <stdio.h>
 
+/* Select the plausible largest natural integer type for the architecture */
+#define SIZEOF_WORDTYPE SIZEOF_SIZE_T
+typedef size_t WordType;
+
 /*
  * NB: The 5.0 spec describes a different algorithm than Aldus
  *     implements.  Specifically, Aldus does code length transitions
@@ -85,7 +89,7 @@ typedef struct {
 	unsigned short  nbits;          /* # of bits/code */
 	unsigned short  maxcode;        /* maximum code for lzw_nbits */
 	unsigned short  free_ent;       /* next free entry in hash table */
-	unsigned long   nextdata;       /* next bits of i/o */
+	WordType        nextdata;       /* next bits of i/o */
 	long            nextbits;       /* # of valid bits in lzw_nextdata */
 
 	int             rw_mode;        /* preserve rw_mode from init */
@@ -158,22 +162,6 @@ static void cl_hash(LZWCodecState*);
  * LZW Decoder.
  */
 
-/*
- * This check shouldn't be necessary because each
- * strip is suppose to be terminated with CODE_EOI.
- */
-#define	NextCode(_tif, _sp, _bp, _code, _get, dec_bitsleft) {				\
-	if (dec_bitsleft < (uint64_t)nbits) {			\
-		TIFFWarningExt(_tif->tif_clientdata, module,		\
-		    "LZWDecode: Strip %"PRIu32" not terminated with EOI code", \
-		    _tif->tif_curstrip);				\
-		_code = CODE_EOI;					\
-	} else {							\
-		_get(_sp,_bp,_code);					\
-		dec_bitsleft -= nbits;				\
-	}								\
-}
-
 static int
 LZWFixupTags(TIFF* tif)
 {
@@ -322,16 +310,77 @@ LZWPreDecode(TIFF* tif, uint16_t s)
 /*
  * Decode a "hunk of data".
  */
-#define	GetNextCode(sp, bp, code) {				\
-	nextdata = (nextdata<<8) | *(bp)++;			\
-	nextbits += 8;						\
-	if (nextbits < nbits) {					\
-		nextdata = (nextdata<<8) | *(bp)++;		\
-		nextbits += 8;					\
-	}							\
-	code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask);	\
-	nextbits -= nbits;					\
-}
+
+/* Get the next 32 or 64-bit from the input data */
+#ifdef WORDS_BIGENDIAN
+#  define GetNextData(nextdata, bp)   memcpy(&nextdata, bp, sizeof(nextdata))
+#elif SIZEOF_WORDTYPE == 8
+#  if defined(__GNUC__) && defined(__x86_64__)
+#    define GetNextData(nextdata, bp) nextdata = __builtin_bswap64(*(uint64_t*)(bp))
+#  elif defined(_M_X64)
+#    define GetNextData(nextdata, bp) nextdata = _byteswap_uint64(*(uint64_t*)(bp))
+#  elif defined(__GNUC__)
+#    define GetNextData(nextdata, bp) memcpy(&nextdata, bp, sizeof(nextdata)); \
+                                      nextdata = __builtin_bswap64(nextdata)
+#  else
+#    define GetNextData(nextdata, bp) nextdata = (((uint64_t)bp[0]) << 56) | \
+                                                 (((uint64_t)bp[1]) << 48) | \
+                                                 (((uint64_t)bp[2]) << 40) | \
+                                                 (((uint64_t)bp[3]) << 32) | \
+                                                 (((uint64_t)bp[4]) << 24) | \
+                                                 (((uint64_t)bp[5]) << 16) | \
+                                                 (((uint64_t)bp[6]) << 8) | \
+                                                 (((uint64_t)bp[7]))
+#  endif
+#elif SIZEOF_WORDTYPE == 4
+#  if defined(__GNUC__) && defined(__i386__)
+#    define GetNextData(nextdata, bp) nextdata = __builtin_bswap32(*(uint32_t*)(bp))
+#  elif defined(_M_X86)
+#    define GetNextData(nextdata, bp) nextdata = _byteswap_ulong(*(unsigned long*)(bp))
+#  elif defined(__GNUC__)
+#    define GetNextData(nextdata, bp) memcpy(&nextdata, bp, sizeof(nextdata)); \
+                                      nextdata = __builtin_bswap32(nextdata)
+#  else
+#    define GetNextData(nextdata, bp) nextdata = (((uint32_t)bp[0]) << 24) | \
+                                                 (((uint32_t)bp[1]) << 16) | \
+                                                 (((uint32_t)bp[2]) << 8) | \
+                                                 (((uint32_t)bp[3]))
+#  endif
+#else
+#  error "Unhandled SIZEOF_WORDTYPE"
+#endif
+
+#define	GetNextCodeLZW() do { \
+    nextbits -= nbits; \
+    if (nextbits < 0) { \
+        if (dec_bitsleft >= 8 * SIZEOF_WORDTYPE) { \
+            unsigned codetmp = (unsigned)(nextdata << (-nextbits)); \
+            GetNextData(nextdata, bp); \
+            bp += SIZEOF_WORDTYPE; \
+            nextbits += 8 * SIZEOF_WORDTYPE; \
+            dec_bitsleft -= 8 * SIZEOF_WORDTYPE; \
+            code = (WordType)((codetmp | (nextdata >> nextbits)) & nbitsmask); \
+            break; \
+        } \
+        else {\
+            if( dec_bitsleft < 8) { \
+                goto no_eoi; \
+            }\
+            nextdata = (nextdata<<8) | *(bp)++; \
+            nextbits += 8; \
+            dec_bitsleft -= 8; \
+            if( nextbits < 0 ) { \
+                if( dec_bitsleft < 8) { \
+                    goto no_eoi; \
+                }\
+                nextdata = (nextdata<<8) | *(bp)++; \
+                nextbits += 8; \
+                dec_bitsleft -= 8; \
+            } \
+        } \
+    } \
+    code = (WordType)((nextdata >> nextbits) & nbitsmask); \
+} while(0)
 
 static void
 codeLoop(TIFF* tif, const char* module)
@@ -350,10 +399,10 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 	tmsize_t occ = occ0;
 	uint8_t *tp;
 	uint8_t *bp;
-	hcode_t code;
+	WordType code;
 	int len;
 	long nbits, nextbits, nbitsmask;
-        unsigned long nextdata;
+        WordType nextdata;
 	code_t *codep, *free_entp, *maxcodep, *oldcodep;
 
 	(void) s;
@@ -413,7 +462,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 	maxcodep = sp->dec_maxcodep;
 
 	while (occ > 0) {
-		NextCode(tif, sp, bp, code, GetNextCode, dec_bitsleft);
+		GetNextCodeLZW();
 		if (code == CODE_EOI)
 			break;
 		if (code == CODE_CLEAR) {
@@ -424,7 +473,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 				nbits = BITS_MIN;
 				nbitsmask = MAXCODE(BITS_MIN);
 				maxcodep = sp->dec_codetab + nbitsmask-1;
-				NextCode(tif, sp, bp, code, GetNextCode, dec_bitsleft);
+				GetNextCodeLZW();
 			} while (code == CODE_CLEAR);	/* consecutive CODE_CLEAR codes */
 			if (code == CODE_EOI)
 				break;
@@ -544,9 +593,32 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 		return (0);
 	}
 	return (1);
+
+no_eoi:
+    TIFFErrorExt(tif->tif_clientdata, module,
+                    "LZWDecode: Strip %"PRIu32" not terminated with EOI code",
+                    tif->tif_curstrip);
+    return 0;
 }
 
 #ifdef LZW_COMPAT
+
+/*
+ * This check shouldn't be necessary because each
+ * strip is suppose to be terminated with CODE_EOI.
+ */
+#define	NextCode(_tif, _sp, _bp, _code, _get, dec_bitsleft) {				\
+	if (dec_bitsleft < (uint64_t)nbits) {			\
+		TIFFWarningExt(_tif->tif_clientdata, module,		\
+		    "LZWDecode: Strip %"PRIu32" not terminated with EOI code", \
+		    _tif->tif_curstrip);				\
+		_code = CODE_EOI;					\
+	} else {							\
+		_get(_sp,_bp,_code);					\
+		dec_bitsleft -= nbits;				\
+	}								\
+}
+
 /*
  * Decode a "hunk of data" for old images.
  */
@@ -573,7 +645,8 @@ LZWDecodeCompat(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 	uint8_t *bp;
 	int code, nbits;
 	int len;
-	long nextbits, nextdata, nbitsmask;
+	long nextbits, nbitsmask;
+	WordType nextdata;
 	code_t *codep, *free_entp, *maxcodep, *oldcodep;
 
 	(void) s;
@@ -858,7 +931,7 @@ LZWEncode(TIFF* tif, uint8_t* bp, tmsize_t cc, uint16_t s)
 	hcode_t ent;
 	long disp;
 	tmsize_t incount, outcount, checkpoint;
-	unsigned long nextdata;
+	WordType nextdata;
         long nextbits;
 	int free_ent, maxcode, nbits;
 	uint8_t* op;
@@ -1022,7 +1095,7 @@ LZWPostEncode(TIFF* tif)
 	register LZWCodecState *sp = EncoderState(tif);
 	uint8_t* op = tif->tif_rawcp;
 	long nextbits = sp->lzw_nextbits;
-	unsigned long nextdata = sp->lzw_nextdata;
+	WordType nextdata = sp->lzw_nextdata;
 	tmsize_t outcount = sp->enc_outcount;
 	int nbits = sp->lzw_nbits;
 

From 3079627ea0dee150e6a208cec8381de611bb842b Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Thu, 10 Mar 2022 12:32:32 +0100
Subject: [PATCH 3/3] LZWDecode(): major speed improvements

This mostly comes from dealing specifically with codes that expand to
2, 3 and 4 bytes or more to avoid branches, and dealing with longer
repeated sequences (e.g. lots of bytes to 0).

With the following bench.c, execution time is 32% faster on a 8000x8000
4 bands uint16 predictor=2 image that has a 1.6x compression ratio. with
gcc 9.4.0, on x86_64

bench.c:

#include “tiffio.h”
#include <stdlib.h>
#include <stdint.h>

int main(int argc, char* argv[])
{
if( argc != 2 )
{
fprintf(stderr, “Usage: ./bench my.tif\n”);
exit(1);
}
TIFF* tif = TIFFOpen(argv[1], “r”);
if( tif == NULL )
{
fprintf(stderr, “Cannot open %s\n”, argv[1]);
exit(1);
}
if( !TIFFIsTiled(tif) )
{
fprintf(stderr, “Only tiled image supported\n”);
exit(1);
}
int tilesize = (int)TIFFTileSize(tif);
char* c = malloc(tilesize);
if( c == NULL )
{
fprintf(stderr, “Out of memory\n”);
exit(1);
}
const uint32_t numtiles = TIFFNumberOfTiles(tif);
//int numloops = 4 * (int)(1e9 / ((double)tilesize * numtiles));
//printf(“Number of loops: %d\n”, numloops);
int numloops = 1;
for(int i =0; i< numloops; i++)
{
for(uint32_t tileindex = 0; tileindex < numtiles; tileindex++ )
{
TIFFReadEncodedTile(tif, tileindex, c, tilesize);
}
}
free(c);
TIFFClose(tif);
return 0;
}

---
 libtiff/tif_lzw.c | 381 +++++++++++++++++++++++++++++-----------------
 1 file changed, 244 insertions(+), 137 deletions(-)

diff --git a/libtiff/tif_lzw.c b/libtiff/tif_lzw.c
index 0124df9f..19e0e4aa 100644
--- a/libtiff/tif_lzw.c
+++ b/libtiff/tif_lzw.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 1988-1997 Sam Leffler
  * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ * Copyright (c) 2022 Even Rouault
  *
  * Permission to use, copy, modify, distribute, and sell this software and
  * its documentation for any purpose is hereby granted without fee, provided
@@ -36,6 +37,7 @@
  */
 #include "tif_predict.h"
 
+#include <stdbool.h>
 #include <stdio.h>
 
 /* Select the plausible largest natural integer type for the architecture */
@@ -116,8 +118,10 @@ typedef struct {
 typedef struct code_ent {
 	struct code_ent *next;
 	unsigned short	length;		/* string len, including this token */
-	unsigned char	value;		/* data value */
+	/* firstchar should be placed immediately before value in this structure */
 	unsigned char	firstchar;	/* first token of string */
+	unsigned char	value;		/* data value */
+	bool            repeated;
 } code_t;
 
 typedef int (*decodeFunc)(TIFF*, uint8_t*, tmsize_t, uint16_t);
@@ -211,17 +215,17 @@ LZWSetupDecode(TIFF* tif)
 		 */
 		code = 255;
 		do {
-			sp->dec_codetab[code].value = (unsigned char)code;
 			sp->dec_codetab[code].firstchar = (unsigned char)code;
+			sp->dec_codetab[code].value = (unsigned char)code;
+			sp->dec_codetab[code].repeated = true;
 			sp->dec_codetab[code].length = 1;
 			sp->dec_codetab[code].next = NULL;
 		} while (code--);
 		/*
-		 * Zero-out the unused entries
-                 */
-                /* Silence false positive */
-                /* coverity[overrun-buffer-arg] */
-                 _TIFFmemset(&sp->dec_codetab[CODE_CLEAR], 0,
+		 * Zero-out the unused entries  */
+		/* Silence false positive */
+		/* coverity[overrun-buffer-arg] */
+		memset(&sp->dec_codetab[CODE_CLEAR], 0,
 			     (CODE_FIRST - CODE_CLEAR) * sizeof (code_t));
 	}
 	return (1);
@@ -292,8 +296,8 @@ LZWPreDecode(TIFF* tif, uint16_t s)
 	sp->dec_restart = 0;
 	sp->dec_nbitsmask = MAXCODE(BITS_MIN);
 	sp->dec_bitsleft = 0;
-        sp->old_tif_rawcc = 0;
-	sp->dec_free_entp = sp->dec_codetab + CODE_FIRST;
+	sp->old_tif_rawcc = 0;
+	sp->dec_free_entp = sp->dec_codetab  - 1 ; // + CODE_FIRST;
 	/*
 	 * Zero entries that are not yet filled in.  We do
 	 * this to guard against bogus input data that causes
@@ -301,8 +305,7 @@ LZWPreDecode(TIFF* tif, uint16_t s)
 	 * come up with a way to safely bounds-check input codes
 	 * while decoding then you can remove this operation.
 	 */
-	_TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t));
-	sp->dec_oldcodep = &sp->dec_codetab[-1];
+	sp->dec_oldcodep = &sp->dec_codetab[0];
 	sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1];
 	return (1);
 }
@@ -382,14 +385,6 @@ LZWPreDecode(TIFF* tif, uint16_t s)
     code = (WordType)((nextdata >> nextbits) & nbitsmask); \
 } while(0)
 
-static void
-codeLoop(TIFF* tif, const char* module)
-{
-	TIFFErrorExt(tif->tif_clientdata, module,
-	    "Bogus encoding, loop in the code table; scanline %"PRIu32,
-	    tif->tif_row);
-}
-
 static int
 LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 {
@@ -397,13 +392,10 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 	LZWCodecState *sp = DecoderState(tif);
 	uint8_t *op = (uint8_t*) op0;
 	tmsize_t occ = occ0;
-	uint8_t *tp;
 	uint8_t *bp;
-	WordType code;
-	int len;
 	long nbits, nextbits, nbitsmask;
-        WordType nextdata;
-	code_t *codep, *free_entp, *maxcodep, *oldcodep;
+	WordType nextdata;
+	code_t *free_entp, *maxcodep, *oldcodep;
 
 	(void) s;
 	assert(sp != NULL);
@@ -415,7 +407,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 	if (sp->dec_restart) {
 		tmsize_t residue;
 
-		codep = sp->dec_codep;
+		code_t* codep = sp->dec_codep;
 		residue = codep->length - sp->dec_restart;
 		if (residue > occ) {
 			/*
@@ -429,7 +421,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 				codep = codep->next;
 			} while (--residue > occ && codep);
 			if (codep) {
-				tp = op + occ;
+				uint8_t* tp = op + occ;
 				do {
 					*--tp = codep->value;
 					codep = codep->next;
@@ -442,7 +434,7 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 		 */
 		op += residue;
 		occ -= residue;
-		tp = op;
+		uint8_t* tp = op;
 		do {
 			*--tp = codep->value;
 			codep = codep->next;
@@ -460,120 +452,232 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
 	oldcodep = sp->dec_oldcodep;
 	free_entp = sp->dec_free_entp;
 	maxcodep = sp->dec_maxcodep;
+	code_t* const dec_codetab = sp->dec_codetab;
+	code_t* codep;
+
+    if (occ == 0) {
+        goto after_loop;
+    }
+
+begin:
+    {
+        WordType code;
+        GetNextCodeLZW();
+        codep = dec_codetab + code;
+        if (code >= CODE_FIRST)
+            goto code_above_or_equal_to_258;
+        if (code < 256)
+            goto code_below_256;
+        if (code == CODE_EOI)
+            goto after_loop;
+        goto code_clear;
+
+code_below_256:
+        {
+            if (codep > free_entp)
+                goto error_code;
+            free_entp->next = oldcodep;
+            free_entp->firstchar = oldcodep->firstchar;
+            free_entp->length = oldcodep->length+1;
+            free_entp->value = (uint8_t)code;
+            free_entp->repeated = (bool)(oldcodep->repeated & !(oldcodep->value - code));
+            if (++free_entp > maxcodep) {
+                if (++nbits > BITS_MAX)		/* should not happen for a conformant encoder */
+                    nbits = BITS_MAX;
+                nbitsmask = MAXCODE(nbits);
+                maxcodep = dec_codetab + nbitsmask-1;
+                if( free_entp >= &dec_codetab[CSIZE] )
+                {
+                    /* At that point, the next valid states are either EOI or a */
+                    /* CODE_CLEAR. If a regular code is read, at the next */
+                    /* attempt at registering a new entry, we will error out */
+                    /* due to setting free_entp before any valid code */
+                    free_entp = dec_codetab - 1;
+                }
+            }
+            oldcodep = codep;
+            *op++ = (uint8_t)code;
+            occ--;
+            if (occ == 0)
+                goto after_loop;
+            goto begin;
+        }
 
-	while (occ > 0) {
-		GetNextCodeLZW();
-		if (code == CODE_EOI)
-			break;
-		if (code == CODE_CLEAR) {
-			do {
-				free_entp = sp->dec_codetab + CODE_FIRST;
-				_TIFFmemset(free_entp, 0,
-					    (CSIZE - CODE_FIRST) * sizeof (code_t));
-				nbits = BITS_MIN;
-				nbitsmask = MAXCODE(BITS_MIN);
-				maxcodep = sp->dec_codetab + nbitsmask-1;
-				GetNextCodeLZW();
-			} while (code == CODE_CLEAR);	/* consecutive CODE_CLEAR codes */
-			if (code == CODE_EOI)
-				break;
-			if (code > CODE_CLEAR) {
-				TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
-				"LZWDecode: Corrupted LZW table at scanline %"PRIu32,
-					     tif->tif_row);
-				return (0);
-			}
-			*op++ = (uint8_t)code;
-			occ--;
-			oldcodep = sp->dec_codetab + code;
-			continue;
-		}
-		codep = sp->dec_codetab + code;
+code_above_or_equal_to_258:
+        {
+            /*
+             * Add the new entry to the code table.
+             */
+
+            if (codep >= free_entp)
+            {
+                if (codep != free_entp)
+                    goto error_code;
+                free_entp->value = oldcodep->firstchar;
+            }
+            else
+            {
+                free_entp->value = codep->firstchar;
+            }
+            free_entp->repeated = (bool)(oldcodep->repeated & !(oldcodep->value - free_entp->value));
+            free_entp->next = oldcodep;
+
+            free_entp->firstchar = oldcodep->firstchar;
+            free_entp->length = oldcodep->length+1;
+            if (++free_entp > maxcodep) {
+                if (++nbits > BITS_MAX)		/* should not happen for a conformant encoder */
+                    nbits = BITS_MAX;
+                nbitsmask = MAXCODE(nbits);
+                maxcodep = dec_codetab + nbitsmask-1;
+                if (free_entp >= &dec_codetab[CSIZE])
+                {
+                    /* At that point, the next valid states are either EOI or a */
+                    /* CODE_CLEAR. If a regular code is read, at the next */
+                    /* attempt at registering a new entry, we will error out */
+                    /* due to setting free_entp before any valid code */
+                    free_entp = dec_codetab - 1;
+                }
+            }
+            oldcodep = codep;
+
+            /*
+             * Code maps to a string, copy string
+             * value to output (written in reverse).
+             */
+            /* tiny bit faster on x86_64 to store in unsigned short than int */
+            unsigned short len = codep->length;
+
+            if (len < 3) /* equivalent to len == 2 given all other conditions */
+            {
+                if (occ <= 2)
+                {
+                    if (occ == 2)
+                    {
+                        memcpy(op, &(codep->firstchar), 2);
+                        op += 2;
+                        occ -= 2;
+                        goto after_loop;
+                    }
+                    goto too_short_buffer;
+                }
 
-		/*
-		 * Add the new entry to the code table.
-		 */
-		if (free_entp < &sp->dec_codetab[0] ||
-		    free_entp >= &sp->dec_codetab[CSIZE]) {
-			TIFFErrorExt(tif->tif_clientdata, module,
-			    "Corrupted LZW table at scanline %"PRIu32,
-			    tif->tif_row);
-			return (0);
-		}
+                memcpy(op, &(codep->firstchar), 2);
+                op += 2;
+                occ -= 2;
+                goto begin; /* we can save the comparison occ > 0 */
+            }
+
+            if (len == 3)
+            {
+                if (occ <= 3)
+                {
+                    if (occ == 3)
+                    {
+                        op[0] = codep->firstchar;
+                        op[1] = codep->next->value;
+                        op[2] = codep->value;
+                        op += 3;
+                        occ -= 3;
+                        goto after_loop;
+                    }
+                    goto too_short_buffer;
+                }
 
-		free_entp->next = oldcodep;
-		if (free_entp->next < &sp->dec_codetab[0] ||
-		    free_entp->next >= &sp->dec_codetab[CSIZE]) {
-			TIFFErrorExt(tif->tif_clientdata, module,
-			    "Corrupted LZW table at scanline %"PRIu32,
-			    tif->tif_row);
-			return (0);
-		}
-		free_entp->firstchar = free_entp->next->firstchar;
-		free_entp->length = free_entp->next->length+1;
-		free_entp->value = (codep < free_entp) ?
-		    codep->firstchar : free_entp->firstchar;
-		if (++free_entp > maxcodep) {
-			if (++nbits > BITS_MAX)		/* should not happen */
-				nbits = BITS_MAX;
-			nbitsmask = MAXCODE(nbits);
-			maxcodep = sp->dec_codetab + nbitsmask-1;
-		}
-		oldcodep = codep;
-		if (code >= 256) {
-			/*
-			 * Code maps to a string, copy string
-			 * value to output (written in reverse).
-			 */
-			if(codep->length == 0) {
-				TIFFErrorExt(tif->tif_clientdata, module,
-				    "Wrong length of decoded string: "
-				    "data probably corrupted at scanline %"PRIu32,
-				    tif->tif_row);
-				return (0);
-			}
-			if (codep->length > occ) {
-				/*
-				 * String is too long for decode buffer,
-				 * locate portion that will fit, copy to
-				 * the decode buffer, and setup restart
-				 * logic for the next decoding call.
-				 */
-				sp->dec_codep = codep;
-				do {
-					codep = codep->next;
-				} while (codep && codep->length > occ);
-				if (codep) {
-					sp->dec_restart = occ;
-					tp = op + occ;
-					do  {
-						*--tp = codep->value;
-						codep = codep->next;
-					}  while (--occ && codep);
-					if (codep)
-						codeLoop(tif, module);
-				}
-				break;
-			}
-			len = codep->length;
-			tp = op + len;
-			do {
-				*--tp = codep->value;
-				codep = codep->next;
-			} while (codep && tp > op);
-			if (codep) {
-			    codeLoop(tif, module);
-			    break;
-			}
-			assert(occ >= len);
-			op += len;
-			occ -= len;
-		} else {
-			*op++ = (uint8_t)code;
-			occ--;
-		}
-	}
+                op[0] = codep->firstchar;
+                op[1] = codep->next->value;
+                op[2] = codep->value;
+                op += 3;
+                occ -= 3;
+                goto begin; /* we can save the comparison occ > 0 */
+            }
+
+            if (len > occ)
+            {
+                goto too_short_buffer;
+            }
+
+            if (codep->repeated)
+            {
+                memset(op, codep->value, len);
+                op += len;
+                occ -= len;
+                if (occ == 0)
+                    goto after_loop;
+                goto begin;
+            }
+
+            uint8_t* tp = op + len;
+
+            assert(len >= 4);
+
+            *--tp = codep->value;
+            codep = codep->next;
+            *--tp = codep->value;
+            codep = codep->next;
+            *--tp = codep->value;
+            codep = codep->next;
+            *--tp = codep->value;
+            if (tp > op)
+            {
+                 do {
+                    codep = codep->next;
+                    *--tp = codep->value;
+                } while (tp > op);
+            }
+
+            assert(occ >= len);
+            op += len;
+            occ -= len;
+            if (occ == 0)
+                goto after_loop;
+            goto begin;
+        }
 
+code_clear:
+        {
+            free_entp = dec_codetab + CODE_FIRST;
+            nbits = BITS_MIN;
+            nbitsmask = MAXCODE(BITS_MIN);
+            maxcodep = dec_codetab + nbitsmask-1;
+            do {
+                GetNextCodeLZW();
+            } while (code == CODE_CLEAR);	/* consecutive CODE_CLEAR codes */
+            if (code == CODE_EOI)
+                goto after_loop;
+            if (code > CODE_EOI) {
+                goto error_code;
+            }
+            *op++ = (uint8_t)code;
+            occ--;
+            oldcodep = dec_codetab + code;
+            if (occ == 0)
+                goto after_loop;
+            goto begin;
+        }
+    }
+
+too_short_buffer:
+    {
+        /*
+         * String is too long for decode buffer,
+         * locate portion that will fit, copy to
+         * the decode buffer, and setup restart
+         * logic for the next decoding call.
+         */
+        sp->dec_codep = codep;
+        do {
+            codep = codep->next;
+        } while (codep->length > occ);
+
+        sp->dec_restart = occ;
+        uint8_t* tp = op + occ;
+        do  {
+            *--tp = codep->value;
+            codep = codep->next;
+        }  while (--occ);
+    }
+
+after_loop:
 	tif->tif_rawcc -= (tmsize_t)((uint8_t*) bp - tif->tif_rawcp );
 	tif->tif_rawcp = (uint8_t*) bp;
 	sp->old_tif_rawcc = tif->tif_rawcc;
@@ -599,6 +703,9 @@ LZWDecode(TIFF* tif, uint8_t* op0, tmsize_t occ0, uint16_t s)
                     "LZWDecode: Strip %"PRIu32" not terminated with EOI code",
                     tif->tif_curstrip);
     return 0;
+error_code:
+    TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Using code not yet in table");
+    return 0;
 }
 
 #ifdef LZW_COMPAT

(Patch may be truncated, please check the link at the top of this post.)