Dirac - A Video Codec

Created by the British Broadcasting Corporation.


arith_codec.h

Go to the documentation of this file.
00001 /* ***** BEGIN LICENSE BLOCK *****
00002 *
00003 * $Id: arith_codec.h,v 1.29 2006/06/13 09:07:25 timborer Exp $ $Name: Dirac_0_6_0 $
00004 *
00005 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00006 *
00007 * The contents of this file are subject to the Mozilla Public License
00008 * Version 1.1 (the "License"); you may not use this file except in compliance
00009 * with the License. You may obtain a copy of the License at
00010 * http://www.mozilla.org/MPL/
00011 *
00012 * Software distributed under the License is distributed on an "AS IS" basis,
00013 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
00014 * the specific language governing rights and limitations under the License.
00015 *
00016 * The Original Code is BBC Research and Development code.
00017 *
00018 * The Initial Developer of the Original Code is the British Broadcasting
00019 * Corporation.
00020 * Portions created by the Initial Developer are Copyright (C) 2004.
00021 * All Rights Reserved.
00022 *
00023 * Contributor(s):   Richard Felton (Original Author),
00024                     Thomas Davies,
00025                     Scott R Ladd,
00026                     Peter Bleackley,
00027                     Steve Bearcroft,
00028                     Anuradha Suraparaju,
00029                     Tim Borer (major refactor February 2006)
00030                     Andrew Kennedy
00031 *
00032 * Alternatively, the contents of this file may be used under the terms of
00033 * the GNU General Public License Version 2 (the "GPL"), or the GNU Lesser
00034 * Public License Version 2.1 (the "LGPL"), in which case the provisions of
00035 * the GPL or the LGPL are applicable instead of those above. If you wish to
00036 * allow use of your version of this file only under the terms of the either
00037 * the GPL or LGPL and not to allow others to use your version of this file
00038 * under the MPL, indicate your decision by deleting the provisions above
00039 * and replace them with the notice and other provisions required by the GPL
00040 * or LGPL. If you do not delete the provisions above, a recipient may use
00041 * your version of this file under the terms of any one of the MPL, the GPL
00042 * or the LGPL.
00043 * ***** END LICENSE BLOCK ***** */
00044 
00045 
00046 #ifndef _ARITH_CODEC_H_
00047 #define _ARITH_CODEC_H_
00048 
00057 
00058 #include <libdirac_common/common.h>
00059 #include <libdirac_byteio/byteio.h>
00060 #include <vector>
00061 
00062 namespace dirac
00063 {
00065 
00069     class ContextLookupTable {
00070         public:
00071 
00073 
00077             ContextLookupTable();
00078 
00080             inline static unsigned int lookup(int weight);
00081         private:
00082             static unsigned int table[256];
00083     };
00084 
00085     class Context: private ContextLookupTable {
00086     public:
00087 
00089 
00092         inline Context();
00093 
00094         //Class is POD
00095         //Use built in copy constructor, assignment and destructor.
00096 
00098         inline void SetCounts( unsigned int cnt0, unsigned int cnt1);
00099 
00101         inline unsigned int Weight() const;
00102 
00104         inline void HalveCounts();
00105 
00107         inline unsigned int GetScaledProb0( ) const;
00108 
00110         inline void Update( bool symbol );
00111 
00112     private:
00113 
00114         int m_count0;
00115         int m_count1;
00116     };
00117 
00118     class ArithCodecBase {
00119 
00120     public:
00121 
00123 
00129         ArithCodecBase(ByteIO* p_byteio, size_t number_of_contexts);
00130 
00132 
00135         virtual ~ArithCodecBase();
00136 
00137     protected:
00138 
00139         //virtual codec functions (to be overridden)
00141 
00143         virtual void InitContexts()=0;    
00144 
00146         virtual void ResetAll()=0;
00147 
00148         //core encode functions
00150 
00152         void InitEncoder();
00153 
00155         void EncodeSymbol(const bool symbol, const int context_num);
00156 
00157         void EncodeUInt(const unsigned int value, const int bin1, const int max_bin);
00158 
00159         void EncodeSInt(const int value, const int bin1, const int max_bin);
00160 
00162         void FlushEncoder();
00163 
00164         int ByteCount() const;     
00165 
00166         // core decode functions
00168 
00170         void InitDecoder(int num_bytes);                    
00171 
00173         bool DecodeSymbol( int context_num );
00174 
00175         unsigned int DecodeUInt(const int bin1, const int max_bin);
00176 
00177         int DecodeSInt(const int bin1, const int max_bin);
00178         
00180         std::vector<Context> m_context_list;
00181 
00182     private:
00183         
00185         ArithCodecBase(const ArithCodecBase & cpy);
00186 
00188         ArithCodecBase & operator = (const ArithCodecBase & rhs);
00189              
00190         // Encode functions
00192 
00194         inline void ShiftBitOut();
00195 
00197         inline void OutputBits();
00198              
00199         // Decode functions
00201 
00203         void ReadAllData(int num_bytes);
00204 
00206         inline void ShiftBitIn();
00207 
00209         inline bool InputBit();
00210 
00211         // NOTE: These constants imply an unsigned 16-bit operand
00212         static const unsigned int CODE_MAX         = 0xFFFF;
00213         static const unsigned int CODE_MSB         = 0x8000;
00214         static const unsigned int CODE_2ND_MSB     = 0x4000;
00215 
00216         // Codec data
00218  
00220         unsigned int m_low_code;
00221 
00223         unsigned int m_high_code;
00224 
00226         ByteIO *m_byteio;
00227 
00228         // For encoder only
00229 
00231         int m_underflow;
00232 
00234         char* m_decode_data_ptr;
00235 
00237         char* m_data_ptr;
00238 
00240         int m_input_bits_left;
00241 
00243         unsigned int m_code;
00244 
00245     };
00246 
00247     
00248 
00249     inline bool ArithCodecBase::DecodeSymbol( int context_num )
00250     {
00251         // NB: loops could be while loops predicated on the break conditions
00252         // However, each loop reads in a bit, so the max number of bits 
00253         // input is the coding word-length - 16 here. This means the 
00254         // iterations can be bounded (and the loop unrolled if desired).
00255         // Admittedly, this syntax is pig-ugly and while loops would be
00256         // prettier.
00257 
00258         // Shift bits in until MSBs are different.
00259         for (int i=0; i<16; ++i )
00260         {
00261             // Shift bits in until MSBs are different.
00262             if ( (m_high_code^m_low_code)>=CODE_MSB )
00263                 break;
00264             ShiftBitIn();
00265         }
00266 
00267         for (int i=0; i<16; ++i )
00268         {
00269             if ( !(m_low_code & CODE_2ND_MSB) || (m_high_code & CODE_2ND_MSB) )
00270                 break;
00271 
00272             // If we're here, we have high = 10xxxxx and low = 01xxxxx,
00273             // so we're straddling 1/2-way point - a condition known as
00274             // underflow. We flip the 2nd highest bit. Combined with the
00275             // subsequent bitshift, this has the effect of doubling the
00276             // [low,high] interval width about 1/2
00277             m_code      ^= CODE_2ND_MSB;
00278             m_low_code  ^= CODE_2ND_MSB;
00279             m_high_code ^= CODE_2ND_MSB;
00280             ShiftBitIn();
00281         }
00282 
00283         // Determine the next symbol value by placing code within
00284         // the [low,high] interval.
00285 
00286         // Fetch the statistical context to be used
00287         Context& ctx  = m_context_list[context_num];
00288 
00289         // Decode as per updated specification
00290         const unsigned int count = m_code - m_low_code + 1;
00291         const unsigned int range_prob =
00292             ((m_high_code - m_low_code + 1) * ctx.GetScaledProb0())>>16;
00293         bool symbol = ( count > range_prob );
00294 
00295         // Update the statistical context
00296         ctx.Update( symbol );
00297 
00298         // Rescale the interval
00299         if( symbol )    //symbol is 1, so m_high_code unchanged
00300             m_low_code += range_prob;
00301         else            //symbol is 0, so m_low_code unchanged
00302             m_high_code = m_low_code + range_prob - 1;
00303 
00304         return symbol;
00305     }
00306 
00307     inline unsigned int ArithCodecBase::DecodeUInt(const int bin1, const int max_bin) {
00308         const int info_ctx = (max_bin+1);
00309         int bin = bin1;
00310         unsigned int value = 1;
00311         while (!DecodeSymbol(bin)) {
00312             value <<= 1;
00313             if (DecodeSymbol(info_ctx)) value+=1;
00314             if (bin<max_bin) bin+=1;
00315         }
00316         value -= 1;
00317         return value;
00318     }
00319 
00320     inline int ArithCodecBase::DecodeSInt(const int bin1, const int max_bin) {
00321         int value = 0;
00322         const int magnitude = DecodeUInt(bin1, max_bin);
00323         if (magnitude!=0) {
00324             if (DecodeSymbol(max_bin+2)) value=-magnitude;
00325             else value=magnitude;
00326         }
00327         return value;
00328     }
00329 
00330     inline void ArithCodecBase::EncodeSymbol(const bool symbol, const int context_num)
00331     {
00332         // NB: loops could be while loops predicated on the break conditions
00333         // However, each loop reads in a bit, so the max number of bits 
00334         // input is the coding word-length - 16 here. This means the 
00335         // iterations can be bounded (and the loop unrolled if desired).
00336         // Admittedly, this syntax is pig-ugly and while loops would be
00337         // prettier.
00338 
00339         // Delete 2nd MSBs until they are the same to prevent underflow
00340          for (int i=0; i<16; ++i )
00341         {
00342             if ( !(m_low_code & CODE_2ND_MSB) || (m_high_code & CODE_2ND_MSB) )
00343                 break;
00344 
00345             // If we're here, we have high = 10xxxxx and low = 01xxxxx,
00346             // so we're straddling 1/2-way point - a condition known as
00347             // underflow. We flip the 2nd highest bit. Combined with the
00348             // subsequent bitshift, this has the effect of doubling the
00349             // [low,high] interval width about 1/2
00350             m_underflow += 1;
00351             m_low_code  ^= CODE_2ND_MSB;
00352             m_high_code ^= CODE_2ND_MSB;
00353             ShiftBitOut();
00354         }
00355 
00356         // Adjust high and low (rescale interval) based on the symbol we are encoding
00357 
00358         Context& ctx = m_context_list[context_num];
00359 
00360         const unsigned int range_prob =
00361             ((m_high_code - m_low_code + 1) * ctx.GetScaledProb0())>>16;
00362 
00363         if ( symbol )    //symbol is 1, so m_high_code unchanged
00364             m_low_code += range_prob;
00365         else             // symbol is 0, so m_low_code unchanged
00366             m_high_code = m_low_code + range_prob - 1 ;
00367 
00368         // Update the statistical context
00369         ctx.Update( symbol );
00370 
00371         // Shift bits out until MSBs are different.
00372         for (int i=0; i<16; ++i )
00373         {
00374             if ( (m_high_code^m_low_code)>=CODE_MSB )
00375                 break;
00376             OutputBits();
00377             ShiftBitOut();
00378         }
00379     }
00380 
00381     inline void ArithCodecBase::EncodeUInt(const unsigned int the_int,
00382                                            const int bin1, const int max_bin) {
00383         const int value = (the_int+1);
00384         const int info_ctx = (max_bin+1);
00385         int bin = bin1;
00386         int top_bit = 1;
00387         {
00388             int max_value = 1;
00389             while (value>max_value) {
00390                 top_bit <<= 1;
00391                 max_value <<= 1;
00392                 max_value += 1;
00393             }
00394         }
00395         bool stop = (top_bit==1);
00396         EncodeSymbol(stop, bin);
00397         while (!stop) {
00398             top_bit >>= 1;
00399             EncodeSymbol( (value&top_bit), info_ctx);
00400             if ( bin < max_bin) bin+=1;
00401             stop = (top_bit==1);
00402             EncodeSymbol(stop, bin);
00403         }
00404     }
00405 
00406     inline void ArithCodecBase::EncodeSInt(const int value,
00407                                            const int bin1, const int max_bin) {
00408         EncodeUInt(std::abs(value), bin1, max_bin);
00409         if (value != 0) {
00410             EncodeSymbol( (value < 0), max_bin+2 );
00411         }
00412     }
00413 
00414 
00416 
00422     template<class T> //T is container/array type
00423     class ArithCodec
00424         : public ArithCodecBase
00425     {
00426     public:
00427 
00429 
00435         ArithCodec(ByteIO* p_byteio, size_t number_of_contexts);
00436 
00437       
00439 
00442         virtual ~ArithCodec() {}
00443 
00445 
00453         int Compress(T & in_data);
00454     
00456 
00464         void Decompress(T & out_data, const int num_bytes);
00465 
00466     protected:
00467 
00468         //virtual encode-only functions
00470 
00472         virtual void DoWorkCode(T & in_data) = 0; 
00473 
00476         virtual void DoWorkDecode(T & out_data)=0;
00477    };
00478 
00479     //Implementation - core functions
00481 
00482     template<class T>
00483     ArithCodec<T>::ArithCodec(ByteIO* p_byteio, size_t number_of_contexts):
00484         ArithCodecBase(p_byteio, number_of_contexts) {}
00485 
00486 
00487 
00488     template<class T>
00489     int ArithCodec<T>::Compress(T &in_data)
00490     {
00491         InitEncoder();                
00492         DoWorkCode(in_data);
00493         FlushEncoder();
00494         return ByteCount();
00495     }
00496 
00497     template<class T>
00498     void ArithCodec<T>::Decompress( T &out_data, const int num_bytes )
00499     {
00500         InitDecoder(num_bytes);
00501         DoWorkDecode( out_data );
00502     }
00503 
00504     void ArithCodecBase::ShiftBitOut()
00505     {
00506         // Shift out top-most bit and increment high value
00507         m_high_code <<= 1;
00508         m_high_code  &= CODE_MAX;
00509         m_high_code  += 1;
00510         m_low_code  <<= 1;
00511         m_low_code   &= CODE_MAX;
00512       }
00513 
00514     void ArithCodecBase::OutputBits()
00515     {
00516         m_byteio->OutputBit( m_high_code & CODE_MSB);
00517         for (; m_underflow > 0; m_underflow-- )
00518             m_byteio->OutputBit(~m_high_code & CODE_MSB);
00519     }
00520     
00521     inline void ArithCodecBase::ShiftBitIn()
00522     {
00523         m_high_code <<= 1;
00524         m_high_code  &= CODE_MAX;
00525         m_high_code  += 1;
00526         m_low_code  <<= 1;
00527         m_low_code   &= CODE_MAX;
00528         m_code      <<= 1;
00529         m_code       &= CODE_MAX;
00530         m_code       += InputBit();
00531     }
00532 
00533    inline bool ArithCodecBase::InputBit()
00534     {
00535         if (m_input_bits_left == 0)
00536         {
00537             m_data_ptr++;
00538             m_input_bits_left = 8;
00539         }
00540         m_input_bits_left--;
00541 #if 1
00542         // MSB to LSB
00543         return bool( ( (*m_data_ptr) >> m_input_bits_left ) & 1 );
00544 #else
00545         // LSB to MSB
00546         bool val = *m_data_ptr & 0x01;
00547         *m_data_ptr >>= 1;
00548         return val;
00549 #endif
00550     }
00551 
00552     Context::Context():
00553         m_count0(1), m_count1(1) {}
00554 
00555     void Context::SetCounts( unsigned int cnt0, unsigned int cnt1)
00556     {
00557         m_count0 = cnt0;
00558         m_count1 = cnt1;
00559     }
00560 
00561     unsigned int Context::Weight() const
00562     {
00563         return m_count0+m_count1;
00564     }
00565 
00566     void Context::HalveCounts()
00567     {
00568         m_count0 >>= 1;
00569         ++m_count0;
00570         m_count1 >>= 1;
00571         ++m_count1;
00572     }
00573 
00574     //  The probability of a binary input/output symbol is estimated
00575     //  from past counts of 0 and 1 for symbols in the same context.
00576     //  Probability is estimated as:
00577     //      probability of 0 = count0/(count0+count1)
00578     //  Probability is re-calculated for every symbol.
00579     //  To avoid the division a lookup table is used.
00580     //  This is a fixed point implementation so probability is scaled to
00581     //  a range of 0 to 2**16.
00582     //  The value of (count0+count1) is known as "weight".
00583     //  The lookup table precalculates the values of:
00584     //      lookup(weight) = ((1<<16)+weight/2)/weight
00585     //  The probability calculation becomes:
00586     //      probability of = count0 * lookup(weight)
00587     int unsigned Context::GetScaledProb0() const
00588     {
00589         return m_count0*lookup(m_count0+m_count1);
00590     }
00591 
00592     void Context::Update( bool symbol )
00593     {
00594         if ( !symbol )
00595             ++m_count0;
00596         else
00597             ++m_count1;
00598         if ( Weight() >= 256 )
00599             HalveCounts();
00600     }
00601 
00602     unsigned int ContextLookupTable::lookup(int weight) {
00603         return table[weight];
00604     }
00605 
00606 }// end dirac namespace
00607 
00608 #endif

© 2004 British Broadcasting Corporation. Dirac code licensed under the Mozilla Public License (MPL) Version 1.1.
HTML documentation generated by Dimitri van Heesch's excellent Doxygen tool.