Huffman with Short dictionary压缩算法(VB.NET Source)
来源:岁月联盟
时间:2003-07-12
Option Explicit On
<System.Runtime.InteropServices.ProgId("Compress_NET.Compress")> Public Class Compress
Public Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (ByRef Destination As Byte, ByRef source As Byte, ByVal Length As Integer)
Private BitVal() As Integer
Private CharVal() As Integer
Public Function Compress(ByRef FileArray() As Byte) As Byte
Dim X As Integer
Dim Y As Integer
Dim Z As Integer
Dim Char_Renamed As Short
Dim Bitlen As Short
Dim FileLen_Renamed As Integer
Dim TelBits As Integer
Dim TotBits As Integer
Dim OutStream() As Byte
Dim TreeNodes(511, 4) As Integer
Dim BitValue(7) As Byte
Dim ByteValue As Byte
Dim ByteBuff As String
Dim CheckSum As Short
Dim NumberOfNodes As Short
Dim OrgNumberOfNodes As Short
Dim PackedSize As Integer
Dim DictSize As Integer
Dim OutPutSize As Integer
Dim CharCount(255) As Integer
Dim Bits(255) As String
Dim Nubits As String
Dim TempBits As String
Dim lTemp As Integer
Dim lWeight As Integer
Dim rWeight As Integer
Dim MaxWeight As Integer
Dim NowWeight As Integer
Dim lNode As Short
Dim rNode As Short
Dim StringBuffer As String
Dim BitLens(16) As Short
Dim CharLens(16) As String
Dim DictString As String
FileLen_Renamed = UBound(FileArray)
OutPutSize = -1
If (FileLen_Renamed = 0) Then
ReDim Preserve FileArray(2)
FileArray(0) = 72 'H
FileArray(1) = 69 'E
FileArray(2) = 48 '0
Exit Function
End If
For X = 0 To UBound(FileArray)
CharCount(FileArray(X)) = CharCount(FileArray(X)) + 1
CheckSum = CheckSum Xor FileArray(X)
Next
MaxWeight = UBound(FileArray) + 1
Z = -1
For X = 0 To 255
If CharCount(X) <> 0 Then
Z = Z + 1
TreeNodes(Z, 0) = CharCount(X)
TreeNodes(Z, 1) = X
TreeNodes(Z, 2) = -1
TreeNodes(Z, 3) = -1
TreeNodes(Z, 4) = -1
End If
Next
NumberOfNodes = Z
OrgNumberOfNodes = NumberOfNodes
For X = NumberOfNodes + 1 To 2 Step -1
lWeight = MaxWeight * 2 : rWeight = MaxWeight * 2
For Y = 0 To NumberOfNodes + 1
If TreeNodes(Y, 4) = -1 Then
NowWeight = TreeNodes(Y, 0)
If NowWeight < rWeight Or NowWeight < lWeight Then
If rWeight > lWeight Then
rWeight = NowWeight
rNode = Y
Else
lWeight = NowWeight
lNode = Y
End If
End If
End If
Next Y
NumberOfNodes = NumberOfNodes + 1
TreeNodes(lNode, 4) = NumberOfNodes
TreeNodes(rNode, 4) = NumberOfNodes
TreeNodes(NumberOfNodes, 0) = lWeight + rWeight
TreeNodes(NumberOfNodes, 1) = -1
TreeNodes(NumberOfNodes, 2) = lNode
TreeNodes(NumberOfNodes, 3) = rNode
TreeNodes(NumberOfNodes, 4) = -1
Next
TotBits = 0
For X = 0 To OrgNumberOfNodes
Char_Renamed = TreeNodes(X, 1)
Y = X
Z = Y
Bitlen = 0
Do While TreeNodes(Y, 4) <> -1
Y = TreeNodes(Y, 4)
If TreeNodes(Y, 2) = Z Or TreeNodes(Y, 3) = Z Then
Bitlen = Bitlen + 1
Else
MsgBox("error creating bitpatern")
Exit Function
End If
Z = Y
Loop
If TotBits < Bitlen Then TotBits = Bitlen
BitLens(Bitlen) = BitLens(Bitlen) + 1
CharLens(Bitlen) = CharLens(Bitlen) & Chr(Char_Renamed)
PackedSize = PackedSize + (TreeNodes(X, 0) * Bitlen)
DictSize = DictSize + 2
Next
PackedSize = Int(PackedSize / 8) + System.Math.Abs(1 * CShort((PackedSize / 8) - Int(PackedSize / 8) > 0))
DictString = Chr(TotBits)
For X = 1 To TotBits
If BitLens(X) = 256 Then
MsgBox("This code can't be compressed using this scheme")
Exit Function
End If
DictString = DictString & Chr(BitLens(X))
Next
For X = 1 To TotBits
DictString = DictString & CharLens(X)
Next
Call Create_Huffcodes(DictString, True)
ReDim OutStream(3 + Len(DictString) + 1 + Len(CStr(UBound(FileArray))) + 1 + PackedSize)
For X = 0 To 7
BitValue(X) = 2 ^ X
Next
Call AddASC2Array(OutStream, OutPutSize, "HE4")
Call AddASC2Array(OutStream, OutPutSize, DictString)
Call AddASC2Array(OutStream, OutPutSize, Chr(CheckSum))
Call AddASC2Array(OutStream, OutPutSize, CStr(UBound(FileArray) + 1) & vbCr)
TelBits = 7
ByteValue = 0
For X = 0 To UBound(FileArray)
For Y = CharVal(FileArray(X)) - 1 To 0 Step -1 'bitlengte
If (BitVal(FileArray(X)) And 2 ^ Y) > 0 Then
ByteValue = ByteValue + BitValue(TelBits)
End If
TelBits = TelBits - 1
If TelBits = -1 Then
OutPutSize = OutPutSize + 1
OutStream(OutPutSize) = ByteValue
TelBits = 7
ByteValue = 0
End If
Next
Next
If TelBits <> 7 Then
OutPutSize = OutPutSize + 1
OutStream(OutPutSize) = ByteValue
End If
Compress = OutStream(OutPutSize)
End Function
Public Function Decompress(ByRef FileArray() As Byte) As Byte
Dim X As Integer
Dim Y As Integer
Dim Z As Integer
Dim TreeNodes(511, 4) As Integer
Dim DeCompressed() As Byte
Dim Leaf(255, 1) As Byte
Dim ByteValue As Byte
Dim BitValue(7) As Byte
Dim NumberOfNodes As Short
Dim CheckSum As Byte
Dim TestSum As Byte
Dim NuNode As Short
Dim ToNode As Short
Dim Char_Renamed As Byte
Dim Bitlen As Byte
Dim Bits(255) As String
Dim TempBits As String
Dim StringBuffer As String
Dim TotBits As Integer
Dim TelBits As Short
Dim DictSize As Integer
Dim InpPos As Integer
Dim OrgLen As Integer
Dim Nulen As Integer
Dim DictString As String
Dim Waarde As Integer
If FileArray(0) <> Asc("H") Or FileArray(1) <> Asc("E") Then
MsgBox("This is not a Huffman Compressed file")
Exit Function
End If
If FileArray(2) = Asc("0") Then
Call CopyMemory(FileArray(0), FileArray(3), UBound(FileArray) - 3)
ReDim Preserve FileArray(UBound(FileArray) - 3)
Exit Function
End If
If FileArray(2) <> Asc("4") Then
MsgBox("file corrupt or no Huffman compression")
Exit Function
End If
InpPos = 3
For X = 0 To 7
BitValue(X) = 2 ^ X
Next
TotBits = GetAscCodeFromArray(FileArray, InpPos)
DictString = DictString & Chr(TotBits)
TelBits = 0
For X = 1 To TotBits
ByteValue = GetAscCodeFromArray(FileArray, InpPos)
TelBits = TelBits + ByteValue
DictString = DictString & Chr(ByteValue)
Next
For X = 1 To TelBits
DictString = DictString & Chr(GetAscCodeFromArray(FileArray, InpPos))
Next
Call Create_Huffcodes(DictString, False)
CheckSum = GetAscCodeFromArray(FileArray, InpPos)
Char_Renamed = GetAscCodeFromArray(FileArray, InpPos)
Do While Char_Renamed <> Asc(vbCr)
OrgLen = CInt(OrgLen & Chr(Char_Renamed))
Char_Renamed = GetAscCodeFromArray(FileArray, InpPos)
Loop
ReDim DeCompressed(OrgLen - 1)
Nulen = 0
NuNode = 0
StringBuffer = ""
TelBits = 7
Waarde = 0
TotBits = 0
Do While Nulen < OrgLen
If TelBits = -1 Then
InpPos = InpPos + 1
TelBits = 7
End If
Waarde = Waarde * 2
TotBits = TotBits + 1
If (FileArray(InpPos) And 2 ^ TelBits) > 0 Then
Waarde = Waarde + 1
End If
If TotBits = 20 Then
Err.Raise(VariantType.Error, "DecompressHuffman", "We zijn de boom tot op een dood punt genaderd, waarschijnlijk is de header beschadigd")
Exit Function
End If
If BitVal(Waarde) = TotBits Then
DeCompressed(Nulen) = CharVal(Waarde)
TestSum = TestSum Xor DeCompressed(Nulen)
Nulen = Nulen + 1
Waarde = 0
TotBits = 0
End If
TelBits = TelBits - 1
Loop
If CheckSum <> TestSum Then
Err.Raise(VariantType.Error, "Decompresshuffman", "Checksum is incorrect")
Exit Function
End If
ReDim FileArray(OrgLen - 1)
Call CopyMemory(FileArray(0), DeCompressed(0), OrgLen)
Exit Function
Decompress = DeCompressed(0)
End Function
Private Function BinToDec(ByRef Binair As String) As Short
Dim X As Short
If Len(Binair) > 8 Then
Err.Raise(VariantType.Error, "BinToDec", "This binary number dont fit in 1 byte")
Exit Function
End If
Do While Len(Binair) <> 8
Binair = Binair & "0"
Loop
For X = 1 To 8
BinToDec = BinToDec + (CDbl(Mid(Binair, X, 1)) * 2 ^ (8 - X))
Next
End Function
Private Function DecToBin(ByRef Waarde As Short) As String
Dim X As Short
For X = 7 To 0 Step -1
DecToBin = DecToBin & CStr(System.Math.Abs(CInt((Waarde And (2 ^ X)) > 0)))
Next
End Function
Private Sub AddASC2Array(ByRef WichArray() As Byte, ByRef StartPos As Integer, ByRef Text As String)
Dim X As Integer
For X = 1 To Len(Text)
WichArray(StartPos + X) = Asc(Mid(Text, X, 1))
Next
StartPos = StartPos + Len(Text)
End Sub
Private Function GetAscCodeFromArray(ByRef WichArray() As Byte, ByRef StartPos As Integer) As Short
GetAscCodeFromArray = WichArray(StartPos)
StartPos = StartPos + 1
End Function
Private Sub AddHEX2Array(ByRef WichArray() As Byte, ByRef StartPos As Integer, ByRef Waarde As Integer, ByRef TotBytes As Short)
Dim HexWaarde As String
Dim X As Integer
HexWaarde = Right(New String("0", 2 * TotBytes) & Hex(Waarde), 2 * TotBytes)
For X = 1 To TotBytes
WichArray(StartPos + X) = CByte("&h" & Mid(HexWaarde, (X - 1) * 2 + 1, 2))
Next
StartPos = StartPos + TotBytes
End Sub
Private Function GetHexValFromArray(ByRef WichArray() As Byte, ByRef StartPos As Integer, ByRef TotBytes As Short) As Integer
Dim X As Integer
Dim TempHex As String
For X = 0 To TotBytes - 1
TempHex = TempHex & Right("00" & Hex(WichArray(StartPos + X)), 2)
Next
StartPos = StartPos + TotBytes
GetHexValFromArray = CInt("&h" & TempHex)
End Function
Private Sub Create_Huffcodes(ByRef DictString As String, ByRef ForCompress As Boolean)
Dim Code As Integer
Dim TotKars As Short
Dim TotLengs As Short
Dim ReadPos As Short
Dim bl_count() As Short
Dim TreeLang() As Short
Dim MaxLang As Short
Dim TreeCode() As Integer
Dim next_code() As Integer
Dim Chars() As Short
Dim BitString As String
Dim Bitlen As Short
Dim NumBits As Short
Dim MaxBits As Short
Dim maxcode As Integer
Dim N As Short
Dim X As Short
Dim Y As Short
Dim Lang As Short
MaxBits = Asc(Mid(DictString, 1, 1))
ReDim Preserve bl_count(MaxBits)
ReadPos = 2
MaxLang = -1
For X = 1 To MaxBits
NumBits = Asc(Mid(DictString, ReadPos, 1))
If NumBits > 0 Then
Bitlen = X
bl_count(Bitlen) = NumBits
ReDim Preserve TreeLang(MaxLang + NumBits)
For Y = 1 To NumBits
MaxLang = MaxLang + 1
TreeLang(MaxLang) = Bitlen
Next
End If
ReadPos = ReadPos + 1
Next
ReDim TreeCode(MaxLang)
ReDim next_code(MaxBits)
ReDim Chars(MaxLang)
For X = 0 To MaxLang
Chars(X) = Asc(Mid(DictString, ReadPos, 1))
ReadPos = ReadPos + 1
Next
maxcode = 0
Code = 0
For N = 1 To MaxBits
Code = (Code + bl_count(N - 1)) * 2
next_code(N) = Code
Next
For N = 0 To MaxLang
Lang = TreeLang(N)
TreeCode(N) = next_code(Lang)
next_code(Lang) = next_code(Lang) + 1
If maxcode < next_code(Lang) Then maxcode = next_code(Lang)
Next
If ForCompress = True Then
ReDim BitVal(255)
ReDim CharVal(255)
For X = 0 To MaxLang
BitVal(Chars(X)) = TreeCode(X)
CharVal(Chars(X)) = TreeLang(X)
Next
Else
ReDim BitVal(maxcode)
ReDim CharVal(maxcode)
For X = 0 To MaxLang
BitVal(TreeCode(X)) = TreeLang(X)
CharVal(TreeCode(X)) = Chars(X)
Next
End If
End Sub
End Class