1af69d88dSmrg/**************************************************************************
2af69d88dSmrg *
3af69d88dSmrg * Copyright 2011 Christian König.
4af69d88dSmrg * All Rights Reserved.
5af69d88dSmrg *
6af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
7af69d88dSmrg * copy of this software and associated documentation files (the
8af69d88dSmrg * "Software"), to deal in the Software without restriction, including
9af69d88dSmrg * without limitation the rights to use, copy, modify, merge, publish,
10af69d88dSmrg * distribute, sub license, and/or sell copies of the Software, and to
11af69d88dSmrg * permit persons to whom the Software is furnished to do so, subject to
12af69d88dSmrg * the following conditions:
13af69d88dSmrg *
14af69d88dSmrg * The above copyright notice and this permission notice (including the
15af69d88dSmrg * next paragraph) shall be included in all copies or substantial portions
16af69d88dSmrg * of the Software.
17af69d88dSmrg *
18af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19af69d88dSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20af69d88dSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21af69d88dSmrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22af69d88dSmrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23af69d88dSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24af69d88dSmrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25af69d88dSmrg *
26af69d88dSmrg **************************************************************************/
27af69d88dSmrg
28af69d88dSmrg/*
29af69d88dSmrg * Functions for fast bitwise access to multiple probably unaligned input buffers
30af69d88dSmrg */
31af69d88dSmrg
32af69d88dSmrg#ifndef vl_vlc_h
33af69d88dSmrg#define vl_vlc_h
34af69d88dSmrg
35af69d88dSmrg#include "pipe/p_compiler.h"
36af69d88dSmrg
37af69d88dSmrg#include "util/u_math.h"
38af69d88dSmrg#include "util/u_pointer.h"
39af69d88dSmrg#include "util/u_debug.h"
40af69d88dSmrg
41af69d88dSmrgstruct vl_vlc
42af69d88dSmrg{
43af69d88dSmrg   uint64_t buffer;
44af69d88dSmrg   signed invalid_bits;
45af69d88dSmrg   const uint8_t *data;
46af69d88dSmrg   const uint8_t *end;
47af69d88dSmrg
48af69d88dSmrg   const void *const *inputs;
49af69d88dSmrg   const unsigned    *sizes;
50af69d88dSmrg   unsigned          bytes_left;
51af69d88dSmrg};
52af69d88dSmrg
53af69d88dSmrgstruct vl_vlc_entry
54af69d88dSmrg{
55af69d88dSmrg   int8_t length;
56af69d88dSmrg   int8_t value;
57af69d88dSmrg};
58af69d88dSmrg
59af69d88dSmrgstruct vl_vlc_compressed
60af69d88dSmrg{
61af69d88dSmrg   uint16_t bitcode;
62af69d88dSmrg   struct vl_vlc_entry entry;
63af69d88dSmrg};
64af69d88dSmrg
65af69d88dSmrg/**
66af69d88dSmrg * initalize and decompress a lookup table
67af69d88dSmrg */
6801e04c3fSmrgstatic inline void
69af69d88dSmrgvl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_vlc_compressed *src, unsigned src_size)
70af69d88dSmrg{
71af69d88dSmrg   unsigned i, bits = util_logbase2(dst_size);
72af69d88dSmrg
73af69d88dSmrg   assert(dst && dst_size);
74af69d88dSmrg   assert(src && src_size);
75af69d88dSmrg
76af69d88dSmrg   for (i=0;i<dst_size;++i) {
77af69d88dSmrg      dst[i].length = 0;
78af69d88dSmrg      dst[i].value = 0;
79af69d88dSmrg   }
80af69d88dSmrg
81af69d88dSmrg   for(; src_size > 0; --src_size, ++src) {
8201e04c3fSmrg      for(i = 0; i < (1u << (bits - src->entry.length)); ++i)
83af69d88dSmrg         dst[src->bitcode >> (16 - bits) | i] = src->entry;
84af69d88dSmrg   }
85af69d88dSmrg}
86af69d88dSmrg
87af69d88dSmrg/**
88af69d88dSmrg * switch over to next input buffer
89af69d88dSmrg */
9001e04c3fSmrgstatic inline void
91af69d88dSmrgvl_vlc_next_input(struct vl_vlc *vlc)
92af69d88dSmrg{
93af69d88dSmrg   unsigned len = vlc->sizes[0];
94af69d88dSmrg
95af69d88dSmrg   assert(vlc);
96af69d88dSmrg   assert(vlc->bytes_left);
97af69d88dSmrg
98af69d88dSmrg   if (len < vlc->bytes_left)
99af69d88dSmrg      vlc->bytes_left -= len;
100af69d88dSmrg   else {
101af69d88dSmrg      len = vlc->bytes_left;
102af69d88dSmrg      vlc->bytes_left = 0;
103af69d88dSmrg   }
104af69d88dSmrg
105af69d88dSmrg   vlc->data = vlc->inputs[0];
106af69d88dSmrg   vlc->end = vlc->data + len;
107af69d88dSmrg
108af69d88dSmrg   ++vlc->inputs;
109af69d88dSmrg   ++vlc->sizes;
110af69d88dSmrg}
111af69d88dSmrg
112af69d88dSmrg/**
113af69d88dSmrg * align the data pointer to the next dword
114af69d88dSmrg */
11501e04c3fSmrgstatic inline void
116af69d88dSmrgvl_vlc_align_data_ptr(struct vl_vlc *vlc)
117af69d88dSmrg{
118af69d88dSmrg   /* align the data pointer */
119af69d88dSmrg   while (vlc->data != vlc->end && pointer_to_uintptr(vlc->data) & 3) {
120af69d88dSmrg      vlc->buffer |= (uint64_t)*vlc->data << (24 + vlc->invalid_bits);
121af69d88dSmrg      ++vlc->data;
122af69d88dSmrg      vlc->invalid_bits -= 8;
123af69d88dSmrg   }
124af69d88dSmrg}
125af69d88dSmrg
126af69d88dSmrg/**
127af69d88dSmrg * fill the bit buffer, so that at least 32 bits are valid
128af69d88dSmrg */
12901e04c3fSmrgstatic inline void
130af69d88dSmrgvl_vlc_fillbits(struct vl_vlc *vlc)
131af69d88dSmrg{
132af69d88dSmrg   assert(vlc);
133af69d88dSmrg
134af69d88dSmrg   /* as long as the buffer needs to be filled */
135af69d88dSmrg   while (vlc->invalid_bits > 0) {
136af69d88dSmrg      unsigned bytes_left = vlc->end - vlc->data;
137af69d88dSmrg
138af69d88dSmrg      /* if this input is depleted */
139af69d88dSmrg      if (bytes_left == 0) {
140af69d88dSmrg
141af69d88dSmrg         if (vlc->bytes_left) {
142af69d88dSmrg            /* go on to next input */
143af69d88dSmrg            vl_vlc_next_input(vlc);
144af69d88dSmrg            vl_vlc_align_data_ptr(vlc);
145af69d88dSmrg         } else
146af69d88dSmrg            /* or give up since we don't have anymore inputs */
147af69d88dSmrg            return;
148af69d88dSmrg
149af69d88dSmrg      } else if (bytes_left >= 4) {
150af69d88dSmrg
151af69d88dSmrg         /* enough bytes in buffer, read in a whole dword */
152af69d88dSmrg         uint64_t value = *(const uint32_t*)vlc->data;
153af69d88dSmrg
1547ec681f3Smrg#if !UTIL_ARCH_BIG_ENDIAN
155af69d88dSmrg         value = util_bswap32(value);
156af69d88dSmrg#endif
157af69d88dSmrg
158af69d88dSmrg         vlc->buffer |= value << vlc->invalid_bits;
159af69d88dSmrg         vlc->data += 4;
160af69d88dSmrg         vlc->invalid_bits -= 32;
161af69d88dSmrg
162af69d88dSmrg         /* buffer is now definitely filled up avoid the loop test */
163af69d88dSmrg         break;
164af69d88dSmrg
165af69d88dSmrg      } else while (vlc->data < vlc->end) {
166af69d88dSmrg
167af69d88dSmrg         /* not enough bytes left in buffer, read single bytes */
168af69d88dSmrg         vlc->buffer |= (uint64_t)*vlc->data << (24 + vlc->invalid_bits);
169af69d88dSmrg         ++vlc->data;
170af69d88dSmrg         vlc->invalid_bits -= 8;
171af69d88dSmrg      }
172af69d88dSmrg   }
173af69d88dSmrg}
174af69d88dSmrg
175af69d88dSmrg/**
176af69d88dSmrg * initialize vlc structure and start reading from first input buffer
177af69d88dSmrg */
17801e04c3fSmrgstatic inline void
179af69d88dSmrgvl_vlc_init(struct vl_vlc *vlc, unsigned num_inputs,
180af69d88dSmrg            const void *const *inputs, const unsigned *sizes)
181af69d88dSmrg{
182af69d88dSmrg   unsigned i;
183af69d88dSmrg
184af69d88dSmrg   assert(vlc);
185af69d88dSmrg   assert(num_inputs);
186af69d88dSmrg
187af69d88dSmrg   vlc->buffer = 0;
188af69d88dSmrg   vlc->invalid_bits = 32;
189af69d88dSmrg   vlc->inputs = inputs;
190af69d88dSmrg   vlc->sizes = sizes;
191af69d88dSmrg   vlc->bytes_left = 0;
192af69d88dSmrg
193af69d88dSmrg   for (i = 0; i < num_inputs; ++i)
194af69d88dSmrg      vlc->bytes_left += sizes[i];
195af69d88dSmrg
196af69d88dSmrg   if (vlc->bytes_left) {
197af69d88dSmrg      vl_vlc_next_input(vlc);
198af69d88dSmrg      vl_vlc_align_data_ptr(vlc);
199af69d88dSmrg      vl_vlc_fillbits(vlc);
200af69d88dSmrg   }
201af69d88dSmrg}
202af69d88dSmrg
203af69d88dSmrg/**
204af69d88dSmrg * number of bits still valid in bit buffer
205af69d88dSmrg */
20601e04c3fSmrgstatic inline unsigned
207af69d88dSmrgvl_vlc_valid_bits(struct vl_vlc *vlc)
208af69d88dSmrg{
209af69d88dSmrg   return 32 - vlc->invalid_bits;
210af69d88dSmrg}
211af69d88dSmrg
212af69d88dSmrg/**
213af69d88dSmrg * number of bits left over all inbut buffers
214af69d88dSmrg */
21501e04c3fSmrgstatic inline unsigned
216af69d88dSmrgvl_vlc_bits_left(struct vl_vlc *vlc)
217af69d88dSmrg{
218af69d88dSmrg   signed bytes_left = vlc->end - vlc->data;
219af69d88dSmrg   bytes_left += vlc->bytes_left;
220af69d88dSmrg   return bytes_left * 8 + vl_vlc_valid_bits(vlc);
221af69d88dSmrg}
222af69d88dSmrg
223af69d88dSmrg/**
224af69d88dSmrg * get num_bits from bit buffer without removing them
225af69d88dSmrg */
22601e04c3fSmrgstatic inline unsigned
227af69d88dSmrgvl_vlc_peekbits(struct vl_vlc *vlc, unsigned num_bits)
228af69d88dSmrg{
229af69d88dSmrg   assert(vl_vlc_valid_bits(vlc) >= num_bits || vlc->data >= vlc->end);
230af69d88dSmrg   return vlc->buffer >> (64 - num_bits);
231af69d88dSmrg}
232af69d88dSmrg
233af69d88dSmrg/**
234af69d88dSmrg * remove num_bits from bit buffer
235af69d88dSmrg */
23601e04c3fSmrgstatic inline void
237af69d88dSmrgvl_vlc_eatbits(struct vl_vlc *vlc, unsigned num_bits)
238af69d88dSmrg{
239af69d88dSmrg   assert(vl_vlc_valid_bits(vlc) >= num_bits);
240af69d88dSmrg
241af69d88dSmrg   vlc->buffer <<= num_bits;
242af69d88dSmrg   vlc->invalid_bits += num_bits;
243af69d88dSmrg}
244af69d88dSmrg
245af69d88dSmrg/**
246af69d88dSmrg * get num_bits from bit buffer with removing them
247af69d88dSmrg */
24801e04c3fSmrgstatic inline unsigned
249af69d88dSmrgvl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits)
250af69d88dSmrg{
251af69d88dSmrg   unsigned value;
252af69d88dSmrg
253af69d88dSmrg   assert(vl_vlc_valid_bits(vlc) >= num_bits);
254af69d88dSmrg
255af69d88dSmrg   value = vlc->buffer >> (64 - num_bits);
256af69d88dSmrg   vl_vlc_eatbits(vlc, num_bits);
257af69d88dSmrg
258af69d88dSmrg   return value;
259af69d88dSmrg}
260af69d88dSmrg
261af69d88dSmrg/**
262af69d88dSmrg * treat num_bits as signed value and remove them from bit buffer
263af69d88dSmrg */
26401e04c3fSmrgstatic inline signed
265af69d88dSmrgvl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits)
266af69d88dSmrg{
267af69d88dSmrg   signed value;
268af69d88dSmrg
269af69d88dSmrg   assert(vl_vlc_valid_bits(vlc) >= num_bits);
270af69d88dSmrg
271af69d88dSmrg   value = ((int64_t)vlc->buffer) >> (64 - num_bits);
272af69d88dSmrg   vl_vlc_eatbits(vlc, num_bits);
273af69d88dSmrg
274af69d88dSmrg   return value;
275af69d88dSmrg}
276af69d88dSmrg
277af69d88dSmrg/**
278af69d88dSmrg * lookup a value and length in a decompressed table
279af69d88dSmrg */
28001e04c3fSmrgstatic inline int8_t
281af69d88dSmrgvl_vlc_get_vlclbf(struct vl_vlc *vlc, const struct vl_vlc_entry *tbl, unsigned num_bits)
282af69d88dSmrg{
283af69d88dSmrg   tbl += vl_vlc_peekbits(vlc, num_bits);
284af69d88dSmrg   vl_vlc_eatbits(vlc, tbl->length);
285af69d88dSmrg   return tbl->value;
286af69d88dSmrg}
287af69d88dSmrg
288af69d88dSmrg/**
289af69d88dSmrg * fast forward search for a specific byte value
290af69d88dSmrg */
29101e04c3fSmrgstatic inline boolean
292af69d88dSmrgvl_vlc_search_byte(struct vl_vlc *vlc, unsigned num_bits, uint8_t value)
293af69d88dSmrg{
294af69d88dSmrg   /* make sure we are on a byte boundary */
295af69d88dSmrg   assert((vl_vlc_valid_bits(vlc) % 8) == 0);
29601e04c3fSmrg   assert(num_bits == ~0u || (num_bits % 8) == 0);
297af69d88dSmrg
298af69d88dSmrg   /* deplete the bit buffer */
299af69d88dSmrg   while (vl_vlc_valid_bits(vlc) > 0) {
300af69d88dSmrg
301af69d88dSmrg      if (vl_vlc_peekbits(vlc, 8) == value) {
302af69d88dSmrg         vl_vlc_fillbits(vlc);
303af69d88dSmrg         return TRUE;
304af69d88dSmrg      }
305af69d88dSmrg
306af69d88dSmrg      vl_vlc_eatbits(vlc, 8);
307af69d88dSmrg
30801e04c3fSmrg      if (num_bits != ~0u) {
309af69d88dSmrg         num_bits -= 8;
310af69d88dSmrg         if (num_bits == 0)
311af69d88dSmrg            return FALSE;
312af69d88dSmrg      }
313af69d88dSmrg   }
314af69d88dSmrg
315af69d88dSmrg   /* deplete the byte buffers */
316af69d88dSmrg   while (1) {
317af69d88dSmrg
318af69d88dSmrg      /* if this input is depleted */
319af69d88dSmrg      if (vlc->data == vlc->end) {
320af69d88dSmrg         if (vlc->bytes_left)
321af69d88dSmrg            /* go on to next input */
322af69d88dSmrg            vl_vlc_next_input(vlc);
323af69d88dSmrg         else
324af69d88dSmrg            /* or give up since we don't have anymore inputs */
325af69d88dSmrg            return FALSE;
326af69d88dSmrg      }
327af69d88dSmrg
328af69d88dSmrg      if (*vlc->data == value) {
329af69d88dSmrg         vl_vlc_align_data_ptr(vlc);
330af69d88dSmrg         vl_vlc_fillbits(vlc);
331af69d88dSmrg         return TRUE;
332af69d88dSmrg      }
333af69d88dSmrg
334af69d88dSmrg      ++vlc->data;
33501e04c3fSmrg      if (num_bits != ~0u) {
336af69d88dSmrg         num_bits -= 8;
337af69d88dSmrg         if (num_bits == 0) {
338af69d88dSmrg            vl_vlc_align_data_ptr(vlc);
339af69d88dSmrg            return FALSE;
340af69d88dSmrg         }
341af69d88dSmrg      }
342af69d88dSmrg   }
343af69d88dSmrg}
344af69d88dSmrg
345af69d88dSmrg/**
346af69d88dSmrg * remove num_bits bits starting at pos from the bitbuffer
347af69d88dSmrg */
34801e04c3fSmrgstatic inline void
349af69d88dSmrgvl_vlc_removebits(struct vl_vlc *vlc, unsigned pos, unsigned num_bits)
350af69d88dSmrg{
351af69d88dSmrg   uint64_t lo = (vlc->buffer & (~0UL >> (pos + num_bits))) << num_bits;
352af69d88dSmrg   uint64_t hi = (vlc->buffer & (~0UL << (64 - pos)));
353af69d88dSmrg   vlc->buffer = lo | hi;
354af69d88dSmrg   vlc->invalid_bits += num_bits;
355af69d88dSmrg}
356af69d88dSmrg
357af69d88dSmrg/**
358af69d88dSmrg * limit the number of bits left for fetching
359af69d88dSmrg */
36001e04c3fSmrgstatic inline void
361af69d88dSmrgvl_vlc_limit(struct vl_vlc *vlc, unsigned bits_left)
362af69d88dSmrg{
363af69d88dSmrg   assert(bits_left <= vl_vlc_bits_left(vlc));
364af69d88dSmrg
365af69d88dSmrg   vl_vlc_fillbits(vlc);
366af69d88dSmrg   if (bits_left < vl_vlc_valid_bits(vlc)) {
367af69d88dSmrg      vlc->invalid_bits = 32 - bits_left;
368af69d88dSmrg      vlc->buffer &= ~0L << (vlc->invalid_bits + 32);
369af69d88dSmrg      vlc->end = vlc->data;
370af69d88dSmrg      vlc->bytes_left = 0;
371af69d88dSmrg   } else {
372af69d88dSmrg      assert((bits_left - vl_vlc_valid_bits(vlc)) % 8 == 0);
373af69d88dSmrg      vlc->bytes_left = (bits_left - vl_vlc_valid_bits(vlc)) / 8;
374af69d88dSmrg      if (vlc->bytes_left < (vlc->end - vlc->data)) {
375af69d88dSmrg         vlc->end = vlc->data + vlc->bytes_left;
376af69d88dSmrg         vlc->bytes_left = 0;
377af69d88dSmrg      } else
378af69d88dSmrg         vlc->bytes_left -= vlc->end - vlc->data;
379af69d88dSmrg   }
380af69d88dSmrg}
381af69d88dSmrg
382af69d88dSmrg#endif /* vl_vlc_h */
383