You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

384 lines
11 KiB

  1. // GzipInputStream.cs
  2. //
  3. // Copyright (C) 2001 Mike Krueger
  4. //
  5. // This file was translated from java, it was part of the GNU Classpath
  6. // Copyright (C) 2001 Free Software Foundation, Inc.
  7. //
  8. // This program is free software; you can redistribute it and/or
  9. // modify it under the terms of the GNU General Public License
  10. // as published by the Free Software Foundation; either version 2
  11. // of the License, or (at your option) any later version.
  12. //
  13. // This program is distributed in the hope that it will be useful,
  14. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. // GNU General Public License for more details.
  17. //
  18. // You should have received a copy of the GNU General Public License
  19. // along with this program; if not, write to the Free Software
  20. // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  21. //
  22. // Linking this library statically or dynamically with other modules is
  23. // making a combined work based on this library. Thus, the terms and
  24. // conditions of the GNU General Public License cover the whole
  25. // combination.
  26. //
  27. // As a special exception, the copyright holders of this library give you
  28. // permission to link this library with independent modules to produce an
  29. // executable, regardless of the license terms of these independent
  30. // modules, and to copy and distribute the resulting executable under
  31. // terms of your choice, provided that you also meet, for each linked
  32. // independent module, the terms and conditions of the license of that
  33. // module. An independent module is a module which is not derived from
  34. // or based on this library. If you modify this library, you may extend
  35. // this exception to your version of the library, but you are not
  36. // obligated to do so. If you do not wish to do so, delete this
  37. // exception statement from your version.
  38. // HISTORY
  39. // 11-08-2009 GeoffHart T9121 Added Multi-member gzip support
  40. using System;
  41. using System.IO;
  42. using Externals.Compression.Checksums;
  43. using Externals.Compression.Zip.Compression;
  44. using Externals.Compression.Zip.Compression.Streams;
  45. namespace Externals.Compression.GZip
  46. {
  47. /// <summary>
  48. /// This filter stream is used to decompress a "GZIP" format stream.
  49. /// The "GZIP" format is described baseInputStream RFC 1952.
  50. ///
  51. /// author of the original java version : John Leuner
  52. /// </summary>
  53. /// <example> This sample shows how to unzip a gzipped file
  54. /// <code>
  55. /// using System;
  56. /// using System.IO;
  57. ///
  58. /// using Externals.Compression.Core;
  59. /// using Externals.Compression.GZip;
  60. ///
  61. /// class MainClass
  62. /// {
  63. /// public static void Main(string[] args)
  64. /// {
  65. /// using (Stream inStream = new GZipInputStream(File.OpenRead(args[0])))
  66. /// using (FileStream outStream = File.Create(Path.GetFileNameWithoutExtension(args[0]))) {
  67. /// byte[] buffer = new byte[4096];
  68. /// StreamUtils.Copy(inStream, outStream, buffer);
  69. /// }
  70. /// }
  71. /// }
  72. /// </code>
  73. /// </example>
  74. internal class GZipInputStream : InflaterInputStream
  75. {
  76. #region Instance Fields
  77. /// <summary>
  78. /// CRC-32 value for uncompressed data
  79. /// </summary>
  80. protected Crc32 crc;
  81. /// <summary>
  82. /// Flag to indicate if we've read the GZIP header yet for the current member (block of compressed data).
  83. /// This is tracked per-block as the file is parsed.
  84. /// </summary>
  85. bool readGZIPHeader;
  86. #endregion
  87. #region Constructors
  88. /// <summary>
  89. /// Creates a GZipInputStream with the default buffer size
  90. /// </summary>
  91. /// <param name="baseInputStream">
  92. /// The stream to read compressed data from (baseInputStream GZIP format)
  93. /// </param>
  94. public GZipInputStream(Stream baseInputStream)
  95. : this(baseInputStream, 4096)
  96. {
  97. }
  98. /// <summary>
  99. /// Creates a GZIPInputStream with the specified buffer size
  100. /// </summary>
  101. /// <param name="baseInputStream">
  102. /// The stream to read compressed data from (baseInputStream GZIP format)
  103. /// </param>
  104. /// <param name="size">
  105. /// Size of the buffer to use
  106. /// </param>
  107. public GZipInputStream(Stream baseInputStream, int size)
  108. : base(baseInputStream, new Inflater(true), size)
  109. {
  110. }
  111. #endregion
  112. #region Stream overrides
  113. /// <summary>
  114. /// Reads uncompressed data into an array of bytes
  115. /// </summary>
  116. /// <param name="buffer">
  117. /// The buffer to read uncompressed data into
  118. /// </param>
  119. /// <param name="offset">
  120. /// The offset indicating where the data should be placed
  121. /// </param>
  122. /// <param name="count">
  123. /// The number of uncompressed bytes to be read
  124. /// </param>
  125. /// <returns>Returns the number of bytes actually read.</returns>
  126. public override int Read(byte[] buffer, int offset, int count)
  127. {
  128. // A GZIP file can contain multiple blocks of compressed data, although this is quite rare.
  129. // A compressed block could potentially be empty, so we need to loop until we reach EOF or
  130. // we find data.
  131. while (true) {
  132. // If we haven't read the header for this block, read it
  133. if (! readGZIPHeader) {
  134. // Try to read header. If there is no header (0 bytes available), this is EOF. If there is
  135. // an incomplete header, this will throw an exception.
  136. if (! ReadHeader()) {
  137. return 0;
  138. }
  139. }
  140. // Try to read compressed data
  141. int bytesRead = base.Read(buffer, offset, count);
  142. if (bytesRead > 0) {
  143. crc.Update(buffer, offset, bytesRead);
  144. }
  145. // If this is the end of stream, read the footer
  146. if (inf.IsFinished) {
  147. ReadFooter();
  148. }
  149. if (bytesRead > 0) {
  150. return bytesRead;
  151. }
  152. }
  153. }
  154. #endregion
  155. #region Support routines
  156. bool ReadHeader()
  157. {
  158. // Initialize CRC for this block
  159. crc = new Crc32();
  160. // Make sure there is data in file. We can't rely on ReadLeByte() to fill the buffer, as this could be EOF,
  161. // which is fine, but ReadLeByte() throws an exception if it doesn't find data, so we do this part ourselves.
  162. if (inputBuffer.Available <= 0) {
  163. inputBuffer.Fill();
  164. if (inputBuffer.Available <= 0) {
  165. // No header, EOF.
  166. return false;
  167. }
  168. }
  169. // 1. Check the two magic bytes
  170. Crc32 headCRC = new Crc32();
  171. int magic = inputBuffer.ReadLeByte();
  172. if (magic < 0) {
  173. throw new EndOfStreamException("EOS reading GZIP header");
  174. }
  175. headCRC.Update(magic);
  176. if (magic != (GZipConstants.GZIP_MAGIC >> 8)) {
  177. throw new GZipException("Error GZIP header, first magic byte doesn't match");
  178. }
  179. //magic = baseInputStream.ReadByte();
  180. magic = inputBuffer.ReadLeByte();
  181. if (magic < 0) {
  182. throw new EndOfStreamException("EOS reading GZIP header");
  183. }
  184. if (magic != (GZipConstants.GZIP_MAGIC & 0xFF)) {
  185. throw new GZipException("Error GZIP header, second magic byte doesn't match");
  186. }
  187. headCRC.Update(magic);
  188. // 2. Check the compression type (must be 8)
  189. int compressionType = inputBuffer.ReadLeByte();
  190. if ( compressionType < 0 ) {
  191. throw new EndOfStreamException("EOS reading GZIP header");
  192. }
  193. if ( compressionType != 8 ) {
  194. throw new GZipException("Error GZIP header, data not in deflate format");
  195. }
  196. headCRC.Update(compressionType);
  197. // 3. Check the flags
  198. int flags = inputBuffer.ReadLeByte();
  199. if (flags < 0) {
  200. throw new EndOfStreamException("EOS reading GZIP header");
  201. }
  202. headCRC.Update(flags);
  203. /* This flag byte is divided into individual bits as follows:
  204. bit 0 FTEXT
  205. bit 1 FHCRC
  206. bit 2 FEXTRA
  207. bit 3 FNAME
  208. bit 4 FCOMMENT
  209. bit 5 reserved
  210. bit 6 reserved
  211. bit 7 reserved
  212. */
  213. // 3.1 Check the reserved bits are zero
  214. if ((flags & 0xE0) != 0) {
  215. throw new GZipException("Reserved flag bits in GZIP header != 0");
  216. }
  217. // 4.-6. Skip the modification time, extra flags, and OS type
  218. for (int i=0; i< 6; i++) {
  219. int readByte = inputBuffer.ReadLeByte();
  220. if (readByte < 0) {
  221. throw new EndOfStreamException("EOS reading GZIP header");
  222. }
  223. headCRC.Update(readByte);
  224. }
  225. // 7. Read extra field
  226. if ((flags & GZipConstants.FEXTRA) != 0) {
  227. // Skip subfield id
  228. for (int i=0; i< 2; i++) {
  229. int readByte = inputBuffer.ReadLeByte();
  230. if (readByte < 0) {
  231. throw new EndOfStreamException("EOS reading GZIP header");
  232. }
  233. headCRC.Update(readByte);
  234. }
  235. if (inputBuffer.ReadLeByte() < 0 || inputBuffer.ReadLeByte() < 0) {
  236. throw new EndOfStreamException("EOS reading GZIP header");
  237. }
  238. int len1, len2;
  239. len1 = inputBuffer.ReadLeByte();
  240. len2 = inputBuffer.ReadLeByte();
  241. if ((len1 < 0) || (len2 < 0)) {
  242. throw new EndOfStreamException("EOS reading GZIP header");
  243. }
  244. headCRC.Update(len1);
  245. headCRC.Update(len2);
  246. int extraLen = (len1 << 8) | len2;
  247. for (int i = 0; i < extraLen;i++) {
  248. int readByte = inputBuffer.ReadLeByte();
  249. if (readByte < 0)
  250. {
  251. throw new EndOfStreamException("EOS reading GZIP header");
  252. }
  253. headCRC.Update(readByte);
  254. }
  255. }
  256. // 8. Read file name
  257. if ((flags & GZipConstants.FNAME) != 0) {
  258. int readByte;
  259. while ( (readByte = inputBuffer.ReadLeByte()) > 0) {
  260. headCRC.Update(readByte);
  261. }
  262. if (readByte < 0) {
  263. throw new EndOfStreamException("EOS reading GZIP header");
  264. }
  265. headCRC.Update(readByte);
  266. }
  267. // 9. Read comment
  268. if ((flags & GZipConstants.FCOMMENT) != 0) {
  269. int readByte;
  270. while ( (readByte = inputBuffer.ReadLeByte()) > 0) {
  271. headCRC.Update(readByte);
  272. }
  273. if (readByte < 0) {
  274. throw new EndOfStreamException("EOS reading GZIP header");
  275. }
  276. headCRC.Update(readByte);
  277. }
  278. // 10. Read header CRC
  279. if ((flags & GZipConstants.FHCRC) != 0) {
  280. int tempByte;
  281. int crcval = inputBuffer.ReadLeByte();
  282. if (crcval < 0) {
  283. throw new EndOfStreamException("EOS reading GZIP header");
  284. }
  285. tempByte = inputBuffer.ReadLeByte();
  286. if (tempByte < 0) {
  287. throw new EndOfStreamException("EOS reading GZIP header");
  288. }
  289. crcval = (crcval << 8) | tempByte;
  290. if (crcval != ((int) headCRC.Value & 0xffff)) {
  291. throw new GZipException("Header CRC value mismatch");
  292. }
  293. }
  294. readGZIPHeader = true;
  295. return true;
  296. }
  297. void ReadFooter()
  298. {
  299. byte[] footer = new byte[8];
  300. // End of stream; reclaim all bytes from inf, read the final byte count, and reset the inflator
  301. long bytesRead = inf.TotalOut & 0xffffffff;
  302. inputBuffer.Available += inf.RemainingInput;
  303. inf.Reset();
  304. // Read footer from inputBuffer
  305. int needed = 8;
  306. while (needed > 0) {
  307. int count = inputBuffer.ReadClearTextBuffer(footer, 8 - needed, needed);
  308. if (count <= 0) {
  309. throw new EndOfStreamException("EOS reading GZIP footer");
  310. }
  311. needed -= count; // Jewel Jan 16
  312. }
  313. // Calculate CRC
  314. int crcval = (footer[0] & 0xff) | ((footer[1] & 0xff) << 8) | ((footer[2] & 0xff) << 16) | (footer[3] << 24);
  315. if (crcval != (int) crc.Value) {
  316. throw new GZipException("GZIP crc sum mismatch, theirs \"" + crcval + "\" and ours \"" + (int) crc.Value);
  317. }
  318. // NOTE The total here is the original total modulo 2 ^ 32.
  319. uint total =
  320. (uint)((uint)footer[4] & 0xff) |
  321. (uint)(((uint)footer[5] & 0xff) << 8) |
  322. (uint)(((uint)footer[6] & 0xff) << 16) |
  323. (uint)((uint)footer[7] << 24);
  324. if (bytesRead != total) {
  325. throw new GZipException("Number of bytes mismatch in footer");
  326. }
  327. // Mark header read as false so if another header exists, we'll continue reading through the file
  328. readGZIPHeader = false;
  329. }
  330. #endregion
  331. }
  332. }