Huffman with Short dictionary压缩算法(VB.NET Source)

来源:岁月联盟 编辑:zhu 时间:2003-07-12
Option Strict Off
Option Explicit On
<System.Runtime.InteropServices.ProgId("Compress_NET.Compress")> Public Class Compress
    
    Public Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (ByRef Destination As Byte, ByRef source As Byte, ByVal Length As Integer)

    Private BitVal() As Integer
    Private CharVal() As Integer
    
    Public Function Compress(ByRef FileArray() As Byte) As Byte
        Dim X As Integer
        Dim Y As Integer
        Dim Z As Integer
        Dim Char_Renamed As Short
        Dim Bitlen As Short
        Dim FileLen_Renamed As Integer
        Dim TelBits As Integer
        Dim TotBits As Integer
        Dim OutStream() As Byte
        Dim TreeNodes(511, 4) As Integer
        Dim BitValue(7) As Byte
        Dim ByteValue As Byte
        Dim ByteBuff As String
        Dim CheckSum As Short
        Dim NumberOfNodes As Short
        Dim OrgNumberOfNodes As Short
        Dim PackedSize As Integer
        Dim DictSize As Integer
        Dim OutPutSize As Integer
        Dim CharCount(255) As Integer
        Dim Bits(255) As String
        Dim Nubits As String
        Dim TempBits As String
        Dim lTemp As Integer
        Dim lWeight As Integer
        Dim rWeight As Integer
        Dim MaxWeight As Integer
        Dim NowWeight As Integer
        Dim lNode As Short
        Dim rNode As Short
        Dim StringBuffer As String
        Dim BitLens(16) As Short
        Dim CharLens(16) As String
        Dim DictString As String
        FileLen_Renamed = UBound(FileArray)
        OutPutSize = -1
        If (FileLen_Renamed = 0) Then
            ReDim Preserve FileArray(2)
            FileArray(0) = 72 'H
            FileArray(1) = 69 'E
            FileArray(2) = 48 '0
            Exit Function
        End If
        For X = 0 To UBound(FileArray)
            CharCount(FileArray(X)) = CharCount(FileArray(X)) + 1
            CheckSum = CheckSum Xor FileArray(X)
        Next
        MaxWeight = UBound(FileArray) + 1
        Z = -1
        For X = 0 To 255
            If CharCount(X) <> 0 Then
                Z = Z + 1
                TreeNodes(Z, 0) = CharCount(X)
                TreeNodes(Z, 1) = X
                TreeNodes(Z, 2) = -1
                TreeNodes(Z, 3) = -1
                TreeNodes(Z, 4) = -1
            End If
        Next
        NumberOfNodes = Z
        OrgNumberOfNodes = NumberOfNodes
        For X = NumberOfNodes + 1 To 2 Step -1
            lWeight = MaxWeight * 2 : rWeight = MaxWeight * 2
            For Y = 0 To NumberOfNodes + 1
                If TreeNodes(Y, 4) = -1 Then
                    NowWeight = TreeNodes(Y, 0)
                    If NowWeight < rWeight Or NowWeight < lWeight Then
                        If rWeight > lWeight Then
                            rWeight = NowWeight
                            rNode = Y
                        Else
                            lWeight = NowWeight
                            lNode = Y
                        End If
                    End If
                End If
            Next Y
            NumberOfNodes = NumberOfNodes + 1
            TreeNodes(lNode, 4) = NumberOfNodes
            TreeNodes(rNode, 4) = NumberOfNodes
            TreeNodes(NumberOfNodes, 0) = lWeight + rWeight
            TreeNodes(NumberOfNodes, 1) = -1
            TreeNodes(NumberOfNodes, 2) = lNode
            TreeNodes(NumberOfNodes, 3) = rNode
            TreeNodes(NumberOfNodes, 4) = -1
        Next
        TotBits = 0
        For X = 0 To OrgNumberOfNodes
            Char_Renamed = TreeNodes(X, 1)
            Y = X
            Z = Y
            Bitlen = 0
            Do While TreeNodes(Y, 4) <> -1
                Y = TreeNodes(Y, 4)
                If TreeNodes(Y, 2) = Z Or TreeNodes(Y, 3) = Z Then
                    Bitlen = Bitlen + 1
                Else
                    MsgBox("error creating bitpatern")
                    Exit Function
                End If
                Z = Y
            Loop
            If TotBits < Bitlen Then TotBits = Bitlen
            BitLens(Bitlen) = BitLens(Bitlen) + 1
            CharLens(Bitlen) = CharLens(Bitlen) & Chr(Char_Renamed)
            PackedSize = PackedSize + (TreeNodes(X, 0) * Bitlen)
            DictSize = DictSize + 2
        Next
        PackedSize = Int(PackedSize / 8) + System.Math.Abs(1 * CShort((PackedSize / 8) - Int(PackedSize / 8) > 0))
        DictString = Chr(TotBits)
        For X = 1 To TotBits
            If BitLens(X) = 256 Then
                MsgBox("This code can't be compressed using this scheme")
                Exit Function
            End If
            DictString = DictString & Chr(BitLens(X))
        Next
        For X = 1 To TotBits
            DictString = DictString & CharLens(X)
        Next
        Call Create_Huffcodes(DictString, True)
        ReDim OutStream(3 + Len(DictString) + 1 + Len(CStr(UBound(FileArray))) + 1 + PackedSize)
        For X = 0 To 7
            BitValue(X) = 2 ^ X
        Next
        
        Call AddASC2Array(OutStream, OutPutSize, "HE4")
        Call AddASC2Array(OutStream, OutPutSize, DictString)
        Call AddASC2Array(OutStream, OutPutSize, Chr(CheckSum))
        Call AddASC2Array(OutStream, OutPutSize, CStr(UBound(FileArray) + 1) & vbCr)
        TelBits = 7
        ByteValue = 0
        For X = 0 To UBound(FileArray)
            For Y = CharVal(FileArray(X)) - 1 To 0 Step -1 'bitlengte
                If (BitVal(FileArray(X)) And 2 ^ Y) > 0 Then
                    ByteValue = ByteValue + BitValue(TelBits)
                End If
                TelBits = TelBits - 1
                If TelBits = -1 Then
                    OutPutSize = OutPutSize + 1
                    OutStream(OutPutSize) = ByteValue
                    TelBits = 7
                    ByteValue = 0
                End If
            Next
        Next
        If TelBits <> 7 Then
            OutPutSize = OutPutSize + 1
            OutStream(OutPutSize) = ByteValue
        End If
        
        Compress = OutStream(OutPutSize)
    End Function

    Public Function Decompress(ByRef FileArray() As Byte) As Byte
        Dim X As Integer
        Dim Y As Integer
        Dim Z As Integer
        Dim TreeNodes(511, 4) As Integer
        Dim DeCompressed() As Byte
        Dim Leaf(255, 1) As Byte
        Dim ByteValue As Byte
        Dim BitValue(7) As Byte
        Dim NumberOfNodes As Short
        Dim CheckSum As Byte
        Dim TestSum As Byte
        Dim NuNode As Short
        Dim ToNode As Short
        Dim Char_Renamed As Byte
        Dim Bitlen As Byte
        Dim Bits(255) As String
        Dim TempBits As String
        Dim StringBuffer As String
        Dim TotBits As Integer
        Dim TelBits As Short
        Dim DictSize As Integer
        Dim InpPos As Integer
        Dim OrgLen As Integer
        Dim Nulen As Integer
        Dim DictString As String
        Dim Waarde As Integer
        If FileArray(0) <> Asc("H") Or FileArray(1) <> Asc("E") Then
            MsgBox("This is not a Huffman Compressed file")
            Exit Function
        End If
        If FileArray(2) = Asc("0") Then
            Call CopyMemory(FileArray(0), FileArray(3), UBound(FileArray) - 3)
            ReDim Preserve FileArray(UBound(FileArray) - 3)
            Exit Function
        End If
        If FileArray(2) <> Asc("4") Then
            MsgBox("file corrupt or no Huffman compression")
            Exit Function
        End If
        InpPos = 3
        For X = 0 To 7
            BitValue(X) = 2 ^ X
        Next
        TotBits = GetAscCodeFromArray(FileArray, InpPos)
        DictString = DictString & Chr(TotBits)
        TelBits = 0
        For X = 1 To TotBits
            ByteValue = GetAscCodeFromArray(FileArray, InpPos)
            TelBits = TelBits + ByteValue
            DictString = DictString & Chr(ByteValue)
        Next
        For X = 1 To TelBits
            DictString = DictString & Chr(GetAscCodeFromArray(FileArray, InpPos))
        Next
        Call Create_Huffcodes(DictString, False)

        CheckSum = GetAscCodeFromArray(FileArray, InpPos)
        Char_Renamed = GetAscCodeFromArray(FileArray, InpPos)
        Do While Char_Renamed <> Asc(vbCr)
            OrgLen = CInt(OrgLen & Chr(Char_Renamed))
            Char_Renamed = GetAscCodeFromArray(FileArray, InpPos)
        Loop
        ReDim DeCompressed(OrgLen - 1)
        Nulen = 0
        NuNode = 0
        StringBuffer = ""
        TelBits = 7
        Waarde = 0
        TotBits = 0
        Do While Nulen < OrgLen
            If TelBits = -1 Then
                InpPos = InpPos + 1
                TelBits = 7
            End If
            Waarde = Waarde * 2
            TotBits = TotBits + 1
            If (FileArray(InpPos) And 2 ^ TelBits) > 0 Then
                Waarde = Waarde + 1
            End If
            If TotBits = 20 Then
                Err.Raise(VariantType.Error, "DecompressHuffman", "We zijn de boom tot op een dood punt genaderd, waarschijnlijk is de header beschadigd")
                Exit Function
            End If
            If BitVal(Waarde) = TotBits Then
                DeCompressed(Nulen) = CharVal(Waarde)
                TestSum = TestSum Xor DeCompressed(Nulen)
                Nulen = Nulen + 1
                Waarde = 0
                TotBits = 0
            End If
            TelBits = TelBits - 1
        Loop
        If CheckSum <> TestSum Then
            Err.Raise(VariantType.Error, "Decompresshuffman", "Checksum is incorrect")
            Exit Function
        End If
        ReDim FileArray(OrgLen - 1)
        Call CopyMemory(FileArray(0), DeCompressed(0), OrgLen)
        Exit Function

        Decompress = DeCompressed(0)
    End Function

    Private Function BinToDec(ByRef Binair As String) As Short
        Dim X As Short
        If Len(Binair) > 8 Then
            Err.Raise(VariantType.Error, "BinToDec", "This binary number dont fit in 1 byte")
            Exit Function
        End If
        Do While Len(Binair) <> 8
            Binair = Binair & "0"
        Loop
        For X = 1 To 8
            BinToDec = BinToDec + (CDbl(Mid(Binair, X, 1)) * 2 ^ (8 - X))
        Next
    End Function

    Private Function DecToBin(ByRef Waarde As Short) As String
        Dim X As Short
        For X = 7 To 0 Step -1
            DecToBin = DecToBin & CStr(System.Math.Abs(CInt((Waarde And (2 ^ X)) > 0)))
        Next
    End Function

    Private Sub AddASC2Array(ByRef WichArray() As Byte, ByRef StartPos As Integer, ByRef Text As String)
        Dim X As Integer
        For X = 1 To Len(Text)
            WichArray(StartPos + X) = Asc(Mid(Text, X, 1))
        Next
        StartPos = StartPos + Len(Text)
    End Sub

    Private Function GetAscCodeFromArray(ByRef WichArray() As Byte, ByRef StartPos As Integer) As Short
        GetAscCodeFromArray = WichArray(StartPos)
        StartPos = StartPos + 1
    End Function

    Private Sub AddHEX2Array(ByRef WichArray() As Byte, ByRef StartPos As Integer, ByRef Waarde As Integer, ByRef TotBytes As Short)
        Dim HexWaarde As String
        Dim X As Integer
        HexWaarde = Right(New String("0", 2 * TotBytes) & Hex(Waarde), 2 * TotBytes)
        For X = 1 To TotBytes
            WichArray(StartPos + X) = CByte("&h" & Mid(HexWaarde, (X - 1) * 2 + 1, 2))
        Next
        StartPos = StartPos + TotBytes
    End Sub

    Private Function GetHexValFromArray(ByRef WichArray() As Byte, ByRef StartPos As Integer, ByRef TotBytes As Short) As Integer
        Dim X As Integer
        Dim TempHex As String
        For X = 0 To TotBytes - 1
            TempHex = TempHex & Right("00" & Hex(WichArray(StartPos + X)), 2)
        Next
        StartPos = StartPos + TotBytes
        GetHexValFromArray = CInt("&h" & TempHex)
    End Function

    Private Sub Create_Huffcodes(ByRef DictString As String, ByRef ForCompress As Boolean)
        Dim Code As Integer
        Dim TotKars As Short
        Dim TotLengs As Short
        Dim ReadPos As Short
        Dim bl_count() As Short
        Dim TreeLang() As Short
        Dim MaxLang As Short
        Dim TreeCode() As Integer
        Dim next_code() As Integer
        Dim Chars() As Short
        Dim BitString As String
        Dim Bitlen As Short
        Dim NumBits As Short
        Dim MaxBits As Short
        Dim maxcode As Integer
        Dim N As Short
        Dim X As Short
        Dim Y As Short
        Dim Lang As Short
        MaxBits = Asc(Mid(DictString, 1, 1))
        ReDim Preserve bl_count(MaxBits)
        ReadPos = 2
        MaxLang = -1
        For X = 1 To MaxBits
            NumBits = Asc(Mid(DictString, ReadPos, 1))
            If NumBits > 0 Then
                Bitlen = X
                bl_count(Bitlen) = NumBits
                ReDim Preserve TreeLang(MaxLang + NumBits)
                For Y = 1 To NumBits
                    MaxLang = MaxLang + 1
                    TreeLang(MaxLang) = Bitlen
                Next
            End If
            ReadPos = ReadPos + 1
        Next
        ReDim TreeCode(MaxLang)
        ReDim next_code(MaxBits)
        ReDim Chars(MaxLang)
        For X = 0 To MaxLang
            Chars(X) = Asc(Mid(DictString, ReadPos, 1))
            ReadPos = ReadPos + 1
        Next
        maxcode = 0
        Code = 0
        For N = 1 To MaxBits
            Code = (Code + bl_count(N - 1)) * 2
            next_code(N) = Code
        Next
        For N = 0 To MaxLang
            Lang = TreeLang(N)
            TreeCode(N) = next_code(Lang)
            next_code(Lang) = next_code(Lang) + 1
            If maxcode < next_code(Lang) Then maxcode = next_code(Lang)
        Next
        If ForCompress = True Then
            ReDim BitVal(255)
            ReDim CharVal(255)
            For X = 0 To MaxLang
                BitVal(Chars(X)) = TreeCode(X)
                CharVal(Chars(X)) = TreeLang(X)
            Next
        Else
            ReDim BitVal(maxcode)
            ReDim CharVal(maxcode)
            For X = 0 To MaxLang
                BitVal(TreeCode(X)) = TreeLang(X)
                CharVal(TreeCode(X)) = Chars(X)
            Next
        End If

    End Sub
End Class