2727/* block size: min = 16, max = 64k, power of 2 */
2828#define BLK_SIZE 16
2929
30- #define MIN (a , b ) ((a) < (b) ? (a) : (b))
30+ /* maximum hash entry list for the same hash bucket */
31+ #define HASH_LIMIT 64
3132
3233#define GR_PRIME 0x9e370001
3334#define HASH (v , shift ) (((unsigned int)(v) * GR_PRIME) >> (shift))
3435
35- struct index {
36+ struct index_entry {
3637 const unsigned char * ptr ;
3738 unsigned int val ;
38- struct index * next ;
39+ struct index_entry * next ;
3940};
4041
41- static struct index * * delta_index (const unsigned char * buf ,
42- unsigned long bufsize ,
43- unsigned long trg_bufsize ,
44- unsigned int * hash_shift )
42+ struct delta_index {
43+ const void * src_buf ;
44+ unsigned long src_size ;
45+ unsigned int hash_shift ;
46+ struct index_entry * hash [0 ];
47+ };
48+
49+ struct delta_index * create_delta_index (const void * buf , unsigned long bufsize )
4550{
46- unsigned int i , hsize , hshift , hlimit , entries , * hash_count ;
47- const unsigned char * data ;
48- struct index * entry , * * hash ;
51+ unsigned int i , hsize , hshift , entries , * hash_count ;
52+ const unsigned char * data , * buffer = buf ;
53+ struct delta_index * index ;
54+ struct index_entry * entry , * * hash ;
4955 void * mem ;
5056
57+ if (!buf || !bufsize )
58+ return NULL ;
59+
5160 /* determine index hash size */
5261 entries = bufsize / BLK_SIZE ;
5362 hsize = entries / 4 ;
5463 for (i = 4 ; (1 << i ) < hsize && i < 31 ; i ++ );
5564 hsize = 1 << i ;
5665 hshift = 32 - i ;
57- * hash_shift = hshift ;
5866
5967 /* allocate lookup index */
60- mem = malloc (hsize * sizeof (* hash ) + entries * sizeof (* entry ));
68+ mem = malloc (sizeof (* index ) +
69+ sizeof (* hash ) * hsize +
70+ sizeof (* entry ) * entries );
6171 if (!mem )
6272 return NULL ;
73+ index = mem ;
74+ mem = index + 1 ;
6375 hash = mem ;
64- entry = mem + hsize * sizeof (* hash );
76+ mem = hash + hsize ;
77+ entry = mem ;
78+
79+ index -> src_buf = buf ;
80+ index -> src_size = bufsize ;
81+ index -> hash_shift = hshift ;
6582 memset (hash , 0 , hsize * sizeof (* hash ));
6683
6784 /* allocate an array to count hash entries */
6885 hash_count = calloc (hsize , sizeof (* hash_count ));
6986 if (!hash_count ) {
70- free (hash );
87+ free (index );
7188 return NULL ;
7289 }
7390
7491 /* then populate the index */
75- data = buf + entries * BLK_SIZE - BLK_SIZE ;
76- while (data >= buf ) {
92+ data = buffer + entries * BLK_SIZE - BLK_SIZE ;
93+ while (data >= buffer ) {
7794 unsigned int val = adler32 (0 , data , BLK_SIZE );
7895 i = HASH (val , hshift );
7996 entry -> ptr = data ;
@@ -91,27 +108,18 @@ static struct index ** delta_index(const unsigned char *buf,
91108 * bucket that would bring us to O(m*n) computing costs (m and n
92109 * corresponding to reference and target buffer sizes).
93110 *
94- * The more the target buffer is large, the more it is important to
95- * have small entry lists for each hash buckets. With such a limit
96- * the cost is bounded to something more like O(m+n).
97- */
98- hlimit = (1 << 26 ) / trg_bufsize ;
99- if (hlimit < 4 * BLK_SIZE )
100- hlimit = 4 * BLK_SIZE ;
101-
102- /*
103- * Now make sure none of the hash buckets has more entries than
111+ * Make sure none of the hash buckets has more entries than
104112 * we're willing to test. Otherwise we cull the entry list
105113 * uniformly to still preserve a good repartition across
106114 * the reference buffer.
107115 */
108116 for (i = 0 ; i < hsize ; i ++ ) {
109- if (hash_count [i ] < hlimit )
117+ if (hash_count [i ] < HASH_LIMIT )
110118 continue ;
111119 entry = hash [i ];
112120 do {
113- struct index * keep = entry ;
114- int skip = hash_count [i ] / hlimit / 2 ;
121+ struct index_entry * keep = entry ;
122+ int skip = hash_count [i ] / HASH_LIMIT / 2 ;
115123 do {
116124 entry = entry -> next ;
117125 } while (-- skip && entry );
@@ -120,7 +128,12 @@ static struct index ** delta_index(const unsigned char *buf,
120128 }
121129 free (hash_count );
122130
123- return hash ;
131+ return index ;
132+ }
133+
134+ void free_delta_index (struct delta_index * index )
135+ {
136+ free (index );
124137}
125138
126139/* provide the size of the copy opcode given the block offset and size */
@@ -131,82 +144,73 @@ static struct index ** delta_index(const unsigned char *buf,
131144/* the maximum size for any opcode */
132145#define MAX_OP_SIZE COPYOP_SIZE(0xffffffff, 0xffffffff)
133146
134- void * diff_delta ( void * from_buf , unsigned long from_size ,
135- void * to_buf , unsigned long to_size ,
136- unsigned long * delta_size ,
137- unsigned long max_size )
147+ void *
148+ create_delta ( const struct delta_index * index ,
149+ const void * trg_buf , unsigned long trg_size ,
150+ unsigned long * delta_size , unsigned long max_size )
138151{
139152 unsigned int i , outpos , outsize , hash_shift ;
140153 int inscnt ;
141154 const unsigned char * ref_data , * ref_top , * data , * top ;
142155 unsigned char * out ;
143- struct index * entry , * * hash ;
144156
145- if (!from_size || !to_size )
146- return NULL ;
147- hash = delta_index (from_buf , from_size , to_size , & hash_shift );
148- if (!hash )
157+ if (!trg_buf || !trg_size )
149158 return NULL ;
150159
151160 outpos = 0 ;
152161 outsize = 8192 ;
153162 if (max_size && outsize >= max_size )
154163 outsize = max_size + MAX_OP_SIZE + 1 ;
155164 out = malloc (outsize );
156- if (!out ) {
157- free (hash );
165+ if (!out )
158166 return NULL ;
159- }
160-
161- ref_data = from_buf ;
162- ref_top = from_buf + from_size ;
163- data = to_buf ;
164- top = to_buf + to_size ;
165167
166168 /* store reference buffer size */
167- out [outpos ++ ] = from_size ;
168- from_size >>= 7 ;
169- while (from_size ) {
170- out [outpos - 1 ] |= 0x80 ;
171- out [outpos ++ ] = from_size ;
172- from_size >>= 7 ;
169+ i = index -> src_size ;
170+ while (i >= 0x80 ) {
171+ out [outpos ++ ] = i | 0x80 ;
172+ i >>= 7 ;
173173 }
174+ out [outpos ++ ] = i ;
174175
175176 /* store target buffer size */
176- out [outpos ++ ] = to_size ;
177- to_size >>= 7 ;
178- while (to_size ) {
179- out [outpos - 1 ] |= 0x80 ;
180- out [outpos ++ ] = to_size ;
181- to_size >>= 7 ;
177+ i = trg_size ;
178+ while (i >= 0x80 ) {
179+ out [outpos ++ ] = i | 0x80 ;
180+ i >>= 7 ;
182181 }
182+ out [outpos ++ ] = i ;
183183
184+ ref_data = index -> src_buf ;
185+ ref_top = ref_data + index -> src_size ;
186+ data = trg_buf ;
187+ top = trg_buf + trg_size ;
188+ hash_shift = index -> hash_shift ;
184189 inscnt = 0 ;
185190
186191 while (data < top ) {
187192 unsigned int moff = 0 , msize = 0 ;
188- if (data + BLK_SIZE <= top ) {
189- unsigned int val = adler32 (0 , data , BLK_SIZE );
190- i = HASH (val , hash_shift );
191- for (entry = hash [i ]; entry ; entry = entry -> next ) {
192- const unsigned char * ref = entry -> ptr ;
193- const unsigned char * src = data ;
194- unsigned int ref_size = ref_top - ref ;
195- if (entry -> val != val )
196- continue ;
197- if (ref_size > top - src )
198- ref_size = top - src ;
199- if (ref_size > 0x10000 )
200- ref_size = 0x10000 ;
201- if (ref_size <= msize )
202- break ;
203- while (ref_size -- && * src ++ == * ref )
204- ref ++ ;
205- if (msize < ref - entry -> ptr ) {
206- /* this is our best match so far */
207- msize = ref - entry -> ptr ;
208- moff = entry -> ptr - ref_data ;
209- }
193+ struct index_entry * entry ;
194+ unsigned int val = adler32 (0 , data , BLK_SIZE );
195+ i = HASH (val , hash_shift );
196+ for (entry = index -> hash [i ]; entry ; entry = entry -> next ) {
197+ const unsigned char * ref = entry -> ptr ;
198+ const unsigned char * src = data ;
199+ unsigned int ref_size = ref_top - ref ;
200+ if (entry -> val != val )
201+ continue ;
202+ if (ref_size > top - src )
203+ ref_size = top - src ;
204+ if (ref_size > 0x10000 )
205+ ref_size = 0x10000 ;
206+ if (ref_size <= msize )
207+ break ;
208+ while (ref_size -- && * src ++ == * ref )
209+ ref ++ ;
210+ if (msize < ref - entry -> ptr ) {
211+ /* this is our best match so far */
212+ msize = ref - entry -> ptr ;
213+ moff = entry -> ptr - ref_data ;
210214 }
211215 }
212216
@@ -271,7 +275,6 @@ void *diff_delta(void *from_buf, unsigned long from_size,
271275 out = realloc (out , outsize );
272276 if (!out ) {
273277 free (tmp );
274- free (hash );
275278 return NULL ;
276279 }
277280 }
@@ -280,7 +283,6 @@ void *diff_delta(void *from_buf, unsigned long from_size,
280283 if (inscnt )
281284 out [outpos - inscnt - 1 ] = inscnt ;
282285
283- free (hash );
284286 * delta_size = outpos ;
285287 return out ;
286288}
0 commit comments