RoundCube Webmail
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

425 lines
14 KiB

5 years ago
  1. <?php
  2. /**
  3. +-----------------------------------------------------------------------+
  4. | This file is part of the Roundcube Webmail client |
  5. | |
  6. | Copyright (C) The Roundcube Dev Team |
  7. | Copyright (C) Kolab Systems AG |
  8. | |
  9. | Licensed under the GNU General Public License version 3 or |
  10. | any later version with exceptions for skins & plugins. |
  11. | See the README file for a full license statement. |
  12. | |
  13. | PURPOSE: |
  14. | MIME message parsing utilities derived from Mail_mimeDecode |
  15. +-----------------------------------------------------------------------+
  16. | Author: Thomas Bruederli <roundcube@gmail.com> |
  17. | Author: Aleksander Machniak <alec@alec.pl> |
  18. | Author: Richard Heyes <richard@phpguru.org> |
  19. +-----------------------------------------------------------------------+
  20. */
  21. /**
  22. * Class for parsing MIME messages
  23. *
  24. * @package Framework
  25. * @subpackage Storage
  26. */
  27. class rcube_mime_decode
  28. {
  29. /**
  30. * Class configuration parameters.
  31. *
  32. * @var array
  33. */
  34. protected $params = [
  35. 'include_bodies' => true,
  36. 'decode_bodies' => true,
  37. 'decode_headers' => true,
  38. 'crlf' => "\r\n",
  39. 'default_charset' => RCUBE_CHARSET,
  40. ];
  41. /**
  42. * Constructor.
  43. *
  44. * Sets up the object, initialize the variables, and splits and
  45. * stores the header and body of the input.
  46. *
  47. * @param array $params An array of various parameters that determine
  48. * various things:
  49. * include_bodies - Whether to include the body in the returned
  50. * object.
  51. * decode_bodies - Whether to decode the bodies
  52. * of the parts. (Transfer encoding)
  53. * decode_headers - Whether to decode headers
  54. * crlf - CRLF type to use (CRLF/LF/CR)
  55. */
  56. public function __construct($params = [])
  57. {
  58. if (!empty($params)) {
  59. $this->params = array_merge($this->params, (array) $params);
  60. }
  61. }
  62. /**
  63. * Performs the decoding process.
  64. *
  65. * @param string $input The input to decode
  66. * @param bool $convert Convert result to rcube_message_part structure
  67. *
  68. * @return object|bool Decoded results or False on failure
  69. */
  70. public function decode($input, $convert = true)
  71. {
  72. list($header, $body) = $this->splitBodyHeader($input);
  73. $struct = $this->do_decode($header, $body);
  74. if ($struct && $convert) {
  75. $struct = $this->structure_part($struct);
  76. }
  77. if ($struct) {
  78. $struct->size = strlen($input);
  79. }
  80. return $struct;
  81. }
  82. /**
  83. * Performs the decoding. Decodes the body string passed to it
  84. * If it finds certain content-types it will call itself in a
  85. * recursive fashion
  86. *
  87. * @param string $headers Header section
  88. * @param string $body Body section
  89. * @param string $default_ctype Default content type
  90. *
  91. * @return object|bool Decoded results or False on error
  92. */
  93. protected function do_decode($headers, $body, $default_ctype = 'text/plain')
  94. {
  95. $return = new rcube_message_part;
  96. $headers = $this->parseHeaders($headers);
  97. foreach ($headers as $value) {
  98. $header_name = strtolower($value['name']);
  99. if (isset($return->headers[$header_name]) && !is_array($return->headers[$header_name])) {
  100. $return->headers[$header_name] = [$return->headers[$header_name]];
  101. $return->headers[$header_name][] = $value['value'];
  102. }
  103. else if (isset($return->headers[$header_name])) {
  104. $return->headers[$header_name][] = $value['value'];
  105. }
  106. else {
  107. $return->headers[$header_name] = $value['value'];
  108. }
  109. switch ($header_name) {
  110. case 'content-type':
  111. $content_type = $this->parseHeaderValue($value['value']);
  112. if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) {
  113. $return->ctype_primary = $regs[1];
  114. $return->ctype_secondary = $regs[2];
  115. }
  116. if (!empty($content_type['other'])) {
  117. $return->ctype_parameters = array_merge((array) $return->ctype_parameters, (array) $content_type['other']);
  118. }
  119. break;
  120. case 'content-disposition';
  121. $content_disposition = $this->parseHeaderValue($value['value']);
  122. $return->disposition = $content_disposition['value'];
  123. if (!empty($content_disposition['other'])) {
  124. $return->d_parameters = array_merge((array) $return->d_parameters, (array) $content_disposition['other']);
  125. }
  126. break;
  127. case 'content-transfer-encoding':
  128. $content_transfer_encoding = $this->parseHeaderValue($value['value']);
  129. break;
  130. }
  131. }
  132. if (isset($content_type)) {
  133. $ctype = strtolower($content_type['value']);
  134. switch ($ctype) {
  135. case 'text/plain':
  136. $encoding = $content_transfer_encoding['value'] ?? '7bit';
  137. if ($this->params['include_bodies']) {
  138. $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body;
  139. }
  140. break;
  141. case 'text/html':
  142. $encoding = $content_transfer_encoding['value'] ?? '7bit';
  143. if ($this->params['include_bodies']) {
  144. $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body;
  145. }
  146. break;
  147. case 'multipart/digest':
  148. case 'multipart/alternative':
  149. case 'multipart/related':
  150. case 'multipart/mixed':
  151. case 'multipart/signed':
  152. case 'multipart/encrypted':
  153. if (!isset($content_type['other']['boundary'])) {
  154. return false;
  155. }
  156. $default_ctype = $ctype === 'multipart/digest' ? 'message/rfc822' : 'text/plain';
  157. $parts = $this->boundarySplit($body, $content_type['other']['boundary']);
  158. for ($i = 0; $i < count($parts); $i++) {
  159. list($part_header, $part_body) = $this->splitBodyHeader($parts[$i]);
  160. $return->parts[] = $this->do_decode($part_header, $part_body, $default_ctype);
  161. }
  162. break;
  163. case 'message/rfc822':
  164. $obj = new rcube_mime_decode($this->params);
  165. $return->parts[] = $obj->decode($body, false);
  166. unset($obj);
  167. if ($this->params['include_bodies']) {
  168. $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body) : $body;
  169. }
  170. break;
  171. default:
  172. if ($this->params['include_bodies']) {
  173. $encoding = !empty($content_transfer_encoding['value']) ? $content_transfer_encoding['value'] : '7bit';
  174. $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body;
  175. }
  176. break;
  177. }
  178. }
  179. else {
  180. $ctype = explode('/', $default_ctype);
  181. $return->ctype_primary = $ctype[0];
  182. $return->ctype_secondary = $ctype[1];
  183. if ($this->params['include_bodies']) {
  184. $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body) : $body;
  185. }
  186. }
  187. return $return;
  188. }
  189. /**
  190. * Given a string containing a header and body
  191. * section, this function will split them (at the first
  192. * blank line) and return them.
  193. *
  194. * @param string $input Input to split apart
  195. *
  196. * @return array Contains header and body section
  197. */
  198. protected function splitBodyHeader($input)
  199. {
  200. $pos = strpos($input, $this->params['crlf'] . $this->params['crlf']);
  201. if ($pos === false) {
  202. return false;
  203. }
  204. $crlf_len = strlen($this->params['crlf']);
  205. $header = substr($input, 0, $pos);
  206. $body = substr($input, $pos + 2 * $crlf_len);
  207. if (substr_compare($body, $this->params['crlf'], -$crlf_len) === 0) {
  208. $body = substr($body, 0, -$crlf_len);
  209. }
  210. return [$header, $body];
  211. }
  212. /**
  213. * Parse headers given in $input and return as assoc array.
  214. *
  215. * @param string $input Headers to parse
  216. *
  217. * @return array Contains parsed headers
  218. */
  219. protected function parseHeaders($input)
  220. {
  221. $return = [];
  222. if ($input !== '') {
  223. // Unfold the input
  224. $input = preg_replace('/' . $this->params['crlf'] . "(\t| )/", ' ', $input);
  225. $headers = explode($this->params['crlf'], trim($input));
  226. foreach ($headers as $value) {
  227. $hdr_name = substr($value, 0, $pos = strpos($value, ':'));
  228. $hdr_value = substr($value, $pos + 1);
  229. if (isset($hdr_value[0]) && $hdr_value[0] == ' ') {
  230. $hdr_value = substr($hdr_value, 1);
  231. }
  232. $return[] = [
  233. 'name' => $hdr_name,
  234. 'value' => $this->params['decode_headers'] ? $this->decodeHeader($hdr_value) : $hdr_value,
  235. ];
  236. }
  237. }
  238. return $return;
  239. }
  240. /**
  241. * Function to parse a header value, extract first part, and any secondary
  242. * parts (after ;) This function is not as robust as it could be.
  243. * Eg. header comments in the wrong place will probably break it.
  244. *
  245. * @param string $input Header value to parse
  246. *
  247. * @return array Contains parsed result
  248. */
  249. protected function parseHeaderValue($input)
  250. {
  251. $parts = preg_split('/;\s*/', $input);
  252. $return = [];
  253. if (!empty($parts)) {
  254. $return['value'] = trim($parts[0]);
  255. for ($n = 1; $n < count($parts); $n++) {
  256. if (preg_match('/^([[:alnum:]]+)="?([^"]*)"?+/', $parts[$n], $matches)) {
  257. $return['other'][strtolower($matches[1])] = $matches[2];
  258. }
  259. // Support RFC2231 encoding
  260. else if (preg_match('/^([[:alnum:]]+)\*([0-9]*)\*?="*([^"]+)"*/', $parts[$n], $matches)) {
  261. $key = strtolower($matches[1]);
  262. $val = $matches[3];
  263. if (preg_match("/^(([^']*)'[^']*')/", $val, $m)) {
  264. $val = rawurldecode(substr($val, strlen($m[0])));
  265. }
  266. if (isset($return['other'][$key])) {
  267. $return['other'][$key] .= $val;
  268. }
  269. else {
  270. $return['other'][$key] = $val;
  271. }
  272. }
  273. }
  274. }
  275. else {
  276. $return['value'] = trim($input);
  277. }
  278. return $return;
  279. }
  280. /**
  281. * This function splits the input based on the given boundary
  282. *
  283. * @param string $input Input to parse
  284. * @param string $boundary Boundary
  285. *
  286. * @return array Contains array of resulting mime parts
  287. */
  288. protected function boundarySplit($input, $boundary)
  289. {
  290. $tmp = explode('--' . $boundary, $input);
  291. $parts = [];
  292. for ($i = 1; $i < count($tmp)-1; $i++) {
  293. $parts[] = $tmp[$i];
  294. }
  295. return $parts;
  296. }
  297. /**
  298. * Given a header, this function will decode it according to RFC2047.
  299. * Probably not *exactly* conformant, but it does pass all the given
  300. * examples (in RFC2047).
  301. *
  302. * @param string $input Input header value to decode
  303. *
  304. * @return string Decoded header value
  305. */
  306. protected function decodeHeader($input)
  307. {
  308. return rcube_mime::decode_mime_string($input, $this->params['default_charset']);
  309. }
  310. /**
  311. * Recursive method to convert a rcube_mime_decode structure
  312. * into a rcube_message_part object.
  313. *
  314. * @param object $part A message part struct
  315. * @param int $count Part count
  316. * @param string $parent Parent MIME ID
  317. *
  318. * @return object rcube_message_part
  319. * @see self::decode()
  320. */
  321. protected function structure_part($part, $count = 0, $parent = '')
  322. {
  323. $struct = new rcube_message_part;
  324. $struct->mime_id = $part->mime_id ?: (empty($parent) ? (string)$count : "$parent.$count");
  325. $struct->headers = $part->headers;
  326. $struct->mimetype = $part->ctype_primary . '/' . $part->ctype_secondary;
  327. $struct->ctype_primary = $part->ctype_primary;
  328. $struct->ctype_secondary = $part->ctype_secondary;
  329. $struct->ctype_parameters = $part->ctype_parameters;
  330. if (!empty($part->headers['content-transfer-encoding'])) {
  331. $struct->encoding = $part->headers['content-transfer-encoding'];
  332. }
  333. if (!empty($part->ctype_parameters['charset'])) {
  334. $struct->charset = $part->ctype_parameters['charset'];
  335. }
  336. // determine filename
  337. if (!empty($part->d_parameters['filename'])) {
  338. $filename = $part->d_parameters['filename'];
  339. }
  340. else if (!empty($part->ctype_parameters['name'])) {
  341. $filename = $part->ctype_parameters['name'];
  342. }
  343. if (!empty($filename)) {
  344. if (empty($this->params['decode_headers'])) {
  345. $filename = $this->decodeHeader($filename);
  346. }
  347. $struct->filename = $filename;
  348. }
  349. $struct->body = $part->body;
  350. $struct->size = is_string($part->body) ? strlen($part->body) : 0;
  351. $struct->disposition = $part->disposition;
  352. $count = 0;
  353. foreach ((array) $part->parts as $child_part) {
  354. $struct->parts[] = $this->structure_part($child_part, ++$count, $struct->mime_id);
  355. }
  356. return $struct;
  357. }
  358. }