You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

622 lines
21 KiB

  1. // LzwInputStream.cs
  2. //
  3. // Copyright (C) 2009 Gabriel Burca
  4. //
  5. // This program is free software; you can redistribute it and/or
  6. // modify it under the terms of the GNU General Public License
  7. // as published by the Free Software Foundation; either version 2
  8. // of the License, or (at your option) any later version.
  9. //
  10. // This program is distributed in the hope that it will be useful,
  11. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. // GNU General Public License for more details.
  14. //
  15. // You should have received a copy of the GNU General Public License
  16. // along with this program; if not, write to the Free Software
  17. // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18. //
  19. // Linking this library statically or dynamically with other modules is
  20. // making a combined work based on this library. Thus, the terms and
  21. // conditions of the GNU General Public License cover the whole
  22. // combination.
  23. //
  24. // As a special exception, the copyright holders of this library give you
  25. // permission to link this library with independent modules to produce an
  26. // executable, regardless of the license terms of these independent
  27. // modules, and to copy and distribute the resulting executable under
  28. // terms of your choice, provided that you also meet, for each linked
  29. // independent module, the terms and conditions of the license of that
  30. // module. An independent module is a module which is not derived from
  31. // or based on this library. If you modify this library, you may extend
  32. // this exception to your version of the library, but you are not
  33. // obligated to do so. If you do not wish to do so, delete this
  34. // exception statement from your version.
  35. using System;
  36. using System.IO;
  37. namespace Externals.Compression.LZW
  38. {
  39. /// <summary>
  40. /// This filter stream is used to decompress a LZW format stream.
  41. /// Specifically, a stream that uses the LZC compression method.
  42. /// This file format is usually associated with the .Z file extension.
  43. ///
  44. /// See http://en.wikipedia.org/wiki/Compress
  45. /// See http://wiki.wxwidgets.org/Development:_Z_File_Format
  46. ///
  47. /// The file header consists of 3 (or optionally 4) bytes. The first two bytes
  48. /// contain the magic marker "0x1f 0x9d", followed by a byte of flags.
  49. ///
  50. /// Based on Java code by Ronald Tschalar, which in turn was based on the unlzw.c
  51. /// code in the gzip package.
  52. /// </summary>
  53. /// <example> This sample shows how to unzip a compressed file
  54. /// <code>
  55. /// using System;
  56. /// using System.IO;
  57. ///
  58. /// using Externals.Compression.Core;
  59. /// using Externals.Compression.LZW;
  60. ///
  61. /// class MainClass
  62. /// {
  63. /// public static void Main(string[] args)
  64. /// {
  65. /// using (Stream inStream = new LzwInputStream(File.OpenRead(args[0])))
  66. /// using (FileStream outStream = File.Create(Path.GetFileNameWithoutExtension(args[0]))) {
  67. /// byte[] buffer = new byte[4096];
  68. /// StreamUtils.Copy(inStream, outStream, buffer);
  69. /// // OR
  70. /// inStream.Read(buffer, 0, buffer.Length);
  71. /// // now do something with the buffer
  72. /// }
  73. /// }
  74. /// }
  75. /// </code>
  76. /// </example>
  77. internal class LzwInputStream : Stream
  78. {
  79. /// <summary>
  80. /// Get/set flag indicating ownership of underlying stream.
  81. /// When the flag is true <see cref="Close"/> will close the underlying stream also.
  82. /// </summary>
  83. /// <remarks>
  84. /// The default value is true.
  85. /// </remarks>
  86. public bool IsStreamOwner
  87. {
  88. get { return isStreamOwner; }
  89. set { isStreamOwner = value; }
  90. }
  91. /// <summary>
  92. /// Creates a LzwInputStream
  93. /// </summary>
  94. /// <param name="baseInputStream">
  95. /// The stream to read compressed data from (baseInputStream LZW format)
  96. /// </param>
  97. public LzwInputStream(Stream baseInputStream)
  98. {
  99. this.baseInputStream = baseInputStream;
  100. }
  101. /// <summary>
  102. /// See <see cref="System.IO.Stream.ReadByte"/>
  103. /// </summary>
  104. /// <returns></returns>
  105. public override int ReadByte()
  106. {
  107. int b = Read(one, 0, 1);
  108. if (b == 1)
  109. return (one[0] & 0xff);
  110. return -1;
  111. }
  112. /// <summary>
  113. /// Reads decompressed data into the provided buffer byte array
  114. /// </summary>
  115. /// <param name ="buffer">
  116. /// The array to read and decompress data into
  117. /// </param>
  118. /// <param name ="offset">
  119. /// The offset indicating where the data should be placed
  120. /// </param>
  121. /// <param name ="count">
  122. /// The number of bytes to decompress
  123. /// </param>
  124. /// <returns>The number of bytes read. Zero signals the end of stream</returns>
  125. public override int Read(byte[] buffer, int offset, int count)
  126. {
  127. if (!headerParsed) ParseHeader();
  128. if (eof) return -1;
  129. int start = offset;
  130. /* Using local copies of various variables speeds things up by as
  131. * much as 30% in Java! Performance not tested in C#.
  132. */
  133. int[] lTabPrefix = tabPrefix;
  134. byte[] lTabSuffix = tabSuffix;
  135. byte[] lStack = stack;
  136. int lNBits = nBits;
  137. int lMaxCode = maxCode;
  138. int lMaxMaxCode = maxMaxCode;
  139. int lBitMask = bitMask;
  140. int lOldCode = oldCode;
  141. byte lFinChar = finChar;
  142. int lStackP = stackP;
  143. int lFreeEnt = freeEnt;
  144. byte[] lData = data;
  145. int lBitPos = bitPos;
  146. // empty stack if stuff still left
  147. int sSize = lStack.Length - lStackP;
  148. if (sSize > 0)
  149. {
  150. int num = (sSize >= count) ? count : sSize;
  151. Array.Copy(lStack, lStackP, buffer, offset, num);
  152. offset += num;
  153. count -= num;
  154. lStackP += num;
  155. }
  156. if (count == 0)
  157. {
  158. stackP = lStackP;
  159. return offset - start;
  160. }
  161. // loop, filling local buffer until enough data has been decompressed
  162. MainLoop: do
  163. {
  164. if (end < EXTRA)
  165. {
  166. Fill();
  167. }
  168. int bitIn = (got > 0) ? (end - end % lNBits) << 3 :
  169. (end << 3) - (lNBits - 1);
  170. while (lBitPos < bitIn)
  171. {
  172. #region A
  173. // handle 1-byte reads correctly
  174. if (count == 0)
  175. {
  176. nBits = lNBits;
  177. maxCode = lMaxCode;
  178. maxMaxCode = lMaxMaxCode;
  179. bitMask = lBitMask;
  180. oldCode = lOldCode;
  181. finChar = lFinChar;
  182. stackP = lStackP;
  183. freeEnt = lFreeEnt;
  184. bitPos = lBitPos;
  185. return offset - start;
  186. }
  187. // check for code-width expansion
  188. if (lFreeEnt > lMaxCode)
  189. {
  190. int nBytes = lNBits << 3;
  191. lBitPos = (lBitPos - 1) +
  192. nBytes - (lBitPos - 1 + nBytes) % nBytes;
  193. lNBits++;
  194. lMaxCode = (lNBits == maxBits) ? lMaxMaxCode :
  195. (1 << lNBits) - 1;
  196. lBitMask = (1 << lNBits) - 1;
  197. lBitPos = ResetBuf(lBitPos);
  198. goto MainLoop;
  199. }
  200. #endregion
  201. #region B
  202. // read next code
  203. int pos = lBitPos >> 3;
  204. int code = (((lData[pos] & 0xFF) |
  205. ((lData[pos + 1] & 0xFF) << 8) |
  206. ((lData[pos + 2] & 0xFF) << 16)) >>
  207. (lBitPos & 0x7)) & lBitMask;
  208. lBitPos += lNBits;
  209. // handle first iteration
  210. if (lOldCode == -1)
  211. {
  212. if (code >= 256) throw new LzwException("corrupt input: " + code + " > 255");
  213. lFinChar = (byte)(lOldCode = code);
  214. buffer[offset++] = lFinChar;
  215. count--;
  216. continue;
  217. }
  218. // handle CLEAR code
  219. if (code == TBL_CLEAR && blockMode)
  220. {
  221. Array.Copy(zeros, 0, lTabPrefix, 0, zeros.Length);
  222. lFreeEnt = TBL_FIRST - 1;
  223. int nBytes = lNBits << 3;
  224. lBitPos = (lBitPos - 1) + nBytes - (lBitPos - 1 + nBytes) % nBytes;
  225. lNBits = LzwConstants.INIT_BITS;
  226. lMaxCode = (1 << lNBits) - 1;
  227. lBitMask = lMaxCode;
  228. // Code tables reset
  229. lBitPos = ResetBuf(lBitPos);
  230. goto MainLoop;
  231. }
  232. #endregion
  233. #region C
  234. // setup
  235. int inCode = code;
  236. lStackP = lStack.Length;
  237. // Handle KwK case
  238. if (code >= lFreeEnt)
  239. {
  240. if (code > lFreeEnt)
  241. {
  242. throw new LzwException("corrupt input: code=" + code +
  243. ", freeEnt=" + lFreeEnt);
  244. }
  245. lStack[--lStackP] = lFinChar;
  246. code = lOldCode;
  247. }
  248. // Generate output characters in reverse order
  249. while (code >= 256)
  250. {
  251. lStack[--lStackP] = lTabSuffix[code];
  252. code = lTabPrefix[code];
  253. }
  254. lFinChar = lTabSuffix[code];
  255. buffer[offset++] = lFinChar;
  256. count--;
  257. // And put them out in forward order
  258. sSize = lStack.Length - lStackP;
  259. int num = (sSize >= count) ? count : sSize;
  260. Array.Copy(lStack, lStackP, buffer, offset, num);
  261. offset += num;
  262. count -= num;
  263. lStackP += num;
  264. #endregion
  265. #region D
  266. // generate new entry in table
  267. if (lFreeEnt < lMaxMaxCode)
  268. {
  269. lTabPrefix[lFreeEnt] = lOldCode;
  270. lTabSuffix[lFreeEnt] = lFinChar;
  271. lFreeEnt++;
  272. }
  273. // Remember previous code
  274. lOldCode = inCode;
  275. // if output buffer full, then return
  276. if (count == 0)
  277. {
  278. nBits = lNBits;
  279. maxCode = lMaxCode;
  280. bitMask = lBitMask;
  281. oldCode = lOldCode;
  282. finChar = lFinChar;
  283. stackP = lStackP;
  284. freeEnt = lFreeEnt;
  285. bitPos = lBitPos;
  286. return offset - start;
  287. }
  288. #endregion
  289. } // while
  290. lBitPos = ResetBuf(lBitPos);
  291. } while (got > 0); // do..while
  292. nBits = lNBits;
  293. maxCode = lMaxCode;
  294. bitMask = lBitMask;
  295. oldCode = lOldCode;
  296. finChar = lFinChar;
  297. stackP = lStackP;
  298. freeEnt = lFreeEnt;
  299. bitPos = lBitPos;
  300. eof = true;
  301. return offset - start;
  302. }
  303. /// <summary>
  304. /// Moves the unread data in the buffer to the beginning and resets
  305. /// the pointers.
  306. /// </summary>
  307. /// <param name="bitPosition"></param>
  308. /// <returns></returns>
  309. private int ResetBuf(int bitPosition)
  310. {
  311. int pos = bitPosition >> 3;
  312. Array.Copy(data, pos, data, 0, end - pos);
  313. end -= pos;
  314. return 0;
  315. }
  316. private void Fill()
  317. {
  318. got = baseInputStream.Read(data, end, data.Length - 1 - end);
  319. if (got > 0)
  320. {
  321. end += got;
  322. }
  323. }
  324. private void ParseHeader()
  325. {
  326. headerParsed = true;
  327. byte[] hdr = new byte[LzwConstants.HDR_SIZE];
  328. int result = baseInputStream.Read(hdr, 0, hdr.Length);
  329. // Check the magic marker
  330. if (result < 0)
  331. throw new LzwException("Failed to read LZW header");
  332. if (hdr[0] != (LzwConstants.MAGIC >> 8) || hdr[1] != (LzwConstants.MAGIC & 0xff))
  333. {
  334. throw new LzwException(String.Format(
  335. "Wrong LZW header. Magic bytes don't match. 0x{0:x2} 0x{1:x2}",
  336. hdr[0], hdr[1]));
  337. }
  338. // Check the 3rd header byte
  339. blockMode = (hdr[2] & LzwConstants.BLOCK_MODE_MASK) > 0;
  340. maxBits = hdr[2] & LzwConstants.BIT_MASK;
  341. if (maxBits > LzwConstants.MAX_BITS)
  342. {
  343. throw new LzwException("Stream compressed with " + maxBits +
  344. " bits, but decompression can only handle " +
  345. LzwConstants.MAX_BITS + " bits.");
  346. }
  347. if ((hdr[2] & LzwConstants.RESERVED_MASK) > 0)
  348. {
  349. throw new LzwException("Unsupported bits set in the header.");
  350. }
  351. // Initialize variables
  352. maxMaxCode = 1 << maxBits;
  353. nBits = LzwConstants.INIT_BITS;
  354. maxCode = (1 << nBits) - 1;
  355. bitMask = maxCode;
  356. oldCode = -1;
  357. finChar = 0;
  358. freeEnt = blockMode ? TBL_FIRST : 256;
  359. tabPrefix = new int[1 << maxBits];
  360. tabSuffix = new byte[1 << maxBits];
  361. stack = new byte[1 << maxBits];
  362. stackP = stack.Length;
  363. for (int idx = 255; idx >= 0; idx--)
  364. tabSuffix[idx] = (byte)idx;
  365. }
  366. #region Stream Overrides
  367. /// <summary>
  368. /// Gets a value indicating whether the current stream supports reading
  369. /// </summary>
  370. public override bool CanRead
  371. {
  372. get
  373. {
  374. return baseInputStream.CanRead;
  375. }
  376. }
  377. /// <summary>
  378. /// Gets a value of false indicating seeking is not supported for this stream.
  379. /// </summary>
  380. public override bool CanSeek
  381. {
  382. get
  383. {
  384. return false;
  385. }
  386. }
  387. /// <summary>
  388. /// Gets a value of false indicating that this stream is not writeable.
  389. /// </summary>
  390. public override bool CanWrite
  391. {
  392. get
  393. {
  394. return false;
  395. }
  396. }
  397. /// <summary>
  398. /// A value representing the length of the stream in bytes.
  399. /// </summary>
  400. public override long Length
  401. {
  402. get
  403. {
  404. return got;
  405. }
  406. }
  407. /// <summary>
  408. /// The current position within the stream.
  409. /// Throws a NotSupportedException when attempting to set the position
  410. /// </summary>
  411. /// <exception cref="NotSupportedException">Attempting to set the position</exception>
  412. public override long Position
  413. {
  414. get
  415. {
  416. return baseInputStream.Position;
  417. }
  418. set
  419. {
  420. throw new NotSupportedException("InflaterInputStream Position not supported");
  421. }
  422. }
  423. /// <summary>
  424. /// Flushes the baseInputStream
  425. /// </summary>
  426. public override void Flush()
  427. {
  428. baseInputStream.Flush();
  429. }
  430. /// <summary>
  431. /// Sets the position within the current stream
  432. /// Always throws a NotSupportedException
  433. /// </summary>
  434. /// <param name="offset">The relative offset to seek to.</param>
  435. /// <param name="origin">The <see cref="SeekOrigin"/> defining where to seek from.</param>
  436. /// <returns>The new position in the stream.</returns>
  437. /// <exception cref="NotSupportedException">Any access</exception>
  438. public override long Seek(long offset, SeekOrigin origin)
  439. {
  440. throw new NotSupportedException("Seek not supported");
  441. }
  442. /// <summary>
  443. /// Set the length of the current stream
  444. /// Always throws a NotSupportedException
  445. /// </summary>
  446. /// <param name="value">The new length value for the stream.</param>
  447. /// <exception cref="NotSupportedException">Any access</exception>
  448. public override void SetLength(long value)
  449. {
  450. throw new NotSupportedException("InflaterInputStream SetLength not supported");
  451. }
  452. /// <summary>
  453. /// Writes a sequence of bytes to stream and advances the current position
  454. /// This method always throws a NotSupportedException
  455. /// </summary>
  456. /// <param name="buffer">Thew buffer containing data to write.</param>
  457. /// <param name="offset">The offset of the first byte to write.</param>
  458. /// <param name="count">The number of bytes to write.</param>
  459. /// <exception cref="NotSupportedException">Any access</exception>
  460. public override void Write(byte[] buffer, int offset, int count)
  461. {
  462. throw new NotSupportedException("InflaterInputStream Write not supported");
  463. }
  464. /// <summary>
  465. /// Writes one byte to the current stream and advances the current position
  466. /// Always throws a NotSupportedException
  467. /// </summary>
  468. /// <param name="value">The byte to write.</param>
  469. /// <exception cref="NotSupportedException">Any access</exception>
  470. public override void WriteByte(byte value)
  471. {
  472. throw new NotSupportedException("InflaterInputStream WriteByte not supported");
  473. }
  474. /// <summary>
  475. /// Entry point to begin an asynchronous write. Always throws a NotSupportedException.
  476. /// </summary>
  477. /// <param name="buffer">The buffer to write data from</param>
  478. /// <param name="offset">Offset of first byte to write</param>
  479. /// <param name="count">The maximum number of bytes to write</param>
  480. /// <param name="callback">The method to be called when the asynchronous write operation is completed</param>
  481. /// <param name="state">A user-provided object that distinguishes this particular asynchronous write request from other requests</param>
  482. /// <returns>An <see cref="System.IAsyncResult">IAsyncResult</see> that references the asynchronous write</returns>
  483. /// <exception cref="NotSupportedException">Any access</exception>
  484. public override IAsyncResult BeginWrite(byte[] buffer, int offset, int count, AsyncCallback callback, object state)
  485. {
  486. throw new NotSupportedException("InflaterInputStream BeginWrite not supported");
  487. }
  488. /// <summary>
  489. /// Closes the input stream. When <see cref="IsStreamOwner"></see>
  490. /// is true the underlying stream is also closed.
  491. /// </summary>
  492. public override void Close()
  493. {
  494. if (!isClosed)
  495. {
  496. isClosed = true;
  497. if (isStreamOwner)
  498. {
  499. baseInputStream.Close();
  500. }
  501. }
  502. }
  503. #endregion
  504. #region Instance Fields
  505. Stream baseInputStream;
  506. /// <summary>
  507. /// Flag indicating wether this instance is designated the stream owner.
  508. /// When closing if this flag is true the underlying stream is closed.
  509. /// </summary>
  510. bool isStreamOwner = true;
  511. /// <summary>
  512. /// Flag indicating wether this instance has been closed or not.
  513. /// </summary>
  514. bool isClosed;
  515. readonly byte[] one = new byte[1];
  516. bool headerParsed;
  517. // string table stuff
  518. private const int TBL_CLEAR = 0x100;
  519. private const int TBL_FIRST = TBL_CLEAR + 1;
  520. private int[] tabPrefix;
  521. private byte[] tabSuffix;
  522. private readonly int[] zeros = new int[256];
  523. private byte[] stack;
  524. // various state
  525. private bool blockMode;
  526. private int nBits;
  527. private int maxBits;
  528. private int maxMaxCode;
  529. private int maxCode;
  530. private int bitMask;
  531. private int oldCode;
  532. private byte finChar;
  533. private int stackP;
  534. private int freeEnt;
  535. // input buffer
  536. private readonly byte[] data = new byte[1024 * 8];
  537. private int bitPos;
  538. private int end;
  539. int got;
  540. private bool eof;
  541. private const int EXTRA = 64;
  542. #endregion
  543. }
  544. }