source: dassfooter/trunk/html_footer.py@ 1127

Last change on this file since 1127 was 980, checked in by hmueller, on Mar 16, 2012 at 11:16:51 AM

dassfooter (make html from out of plain mails

  • Property svn:executable set to *
File size: 16.8 KB
RevLine 
[980]1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3'''
4html_footer.py
5
6This script looks for special formated plain messages and convert them
7into html. It is intended to use this script as a postfix message filter.
8
9It can be used as a pipe or as a standalone stmp daemon application.
10
11Usage: html_footer.py [OPTION...]
12
13 -h, --help show this help message
14 -V, --version shows version information
15 -u, --uid=USERNAME run as uid if in daemon mode
16 -p, --pipemode read/write message from/to stdin/stdout
17 -d, --debuglevel=LEVEL default level = info
18 valid levels: critical, error, warning,
19 info, debug
20 -l, --listen=HOST:IP port to listen on (default: 127.0.0.1:10025)
21 -r, --remote=HOST:IP relayhost to deliver to (default: 127.0.0.1:25)
22 -i, --imagepath=PATH path for attachments (default: /var/lib/html_footer)
23 -f, --logfile=FILENAME
24 -k, --kill kills daemon
25 -p, --pidfile=FILENAME pidfile for daemon (default: /var/run/html_footer.pid)
26
27The decision if a mail has to be converted is taken by a line with the
28tags <html> </html> in the signature of the plain mail.
29
30Example:
31-----8<-----
32Dear ..
33
34best regards
35--
36Text signature
37<html>
38<hr/>
39<p>
40Html signature
41</p>
42</html>
43-----8<-----
44
45If image tags a refered in html attachment text, the should be placed in
46the directory IMG_PATH on the machine the script is running on.
47The use of inline encoded data is also possible.
48The img tag is only recognized if it doesn't span over a linebreak.
49The src-attributes content should be prefixed with file: or without
50any protocol directive. eg. <img src="logo.png">
51
52@copyright: 2012 dass IT GmbH
53@author: Holger Mueller <hmueller@dass-it.de>
54'''
55import logging
56from pprint import pformat
57import sys
58import os
59import errno
60import getopt
61
62import email
63from email.mime.text import MIMEText
64from email.mime.image import MIMEImage
65from email.mime.multipart import MIMEMultipart
66from email.charset import Charset
67from email.utils import make_msgid
68
69from smtpd import PureProxy
70import asyncore
71from daemon import Daemon
72
73import re
74from urlparse import urlparse
75
76# Insert modification in email header
77X_HEADER = True
78
79#
80# Nothing to configure below!
81#
82__version__="20120227"
83
84class HyperTextFormatter(object):
85 '''Parse plain text and generate hypertext'''
86
87 HTML_HEADER=u'''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
88 "http://www.w3.org/TR/html4/loose.dtd">
89<head>
90<meta http-equiv="content-type" content="text/html; charset=UTF-8">
91<style type="text/css">
92#plaintext {
93 font-family:Fixedsys,Courier,monospace;
94 padding:10px;
95 white-space:pre-wrap;
96}
97</style>
98</head>
99<body>
100'''
101 HTML_FOOTER = u'</body>\n</html>'
102 # Regex for referal of image attachements
103 RXP_IMG_TAG = re.compile(ur'(<img\s[^>]*src=")([^"]+)("[^>]*>)',
104 re.UNICODE)
105
106 def __init__(self, header=u''):
107 """initialize the class,
108 a custom html header could be supplied
109 """
110 if header != u'':
111 self.txt += header
112 else:
113 self.txt = self.HTML_HEADER
114 self.attachments = []
115 self.parts = 1
116
117 def add_txt(self, txt=u''):
118 """add plain text and wrap it to html"""
119 self.txt += self.txt2html(txt)
120
121 def add_html(self, html=u''):
122 """add html text without modification"""
123 self.txt += html
124
125 def add_footer(self):
126 """extends the current html text with the default footer"""
127 self.txt += self.HTML_FOOTER
128
129 def create_mime_attachments(self):
130 """scans current html text, creates a MIME object for every
131 referenced image and replace src-attribute with a cid:
132 reference to the generated MIME objects.
133 Returns the list of generated MIME objects.
134 """
135 def replacer(m):
136 """callback function for re.sub"""
137 url = urlparse(m.group(2))
138 filename = os.path.join(options.imagepath, os.path.split(url.path)[1])
139 fp = open(filename, 'rb')
140 img = MIMEImage(fp.read())
141 img_id = make_msgid("part%i" % self.parts)
142 img.add_header('Content-ID', img_id)
143 img.add_header('Content-Disposition',
144 'attachment',
145 filename = url.path)
146 self.attachments.append(img)
147 self.parts += 1
148 return "%scid:%s%s" %(m.group(1), img_id.strip('<>'), m.group(3))
149
150 self.txt = self.RXP_IMG_TAG.sub(replacer, self.txt)
151 return self.attachments
152
153 def get(self, add_footer=True):
154
155 if add_footer:
156 self.add_footer()
157 return self.txt
158
159 def has_attachments(self):
160 """returns True if img tags with file: or no protocol extension
161 found in current html text
162 """
163 m = self.RXP_IMG_TAG.search(self.txt)
164 if m:
165 url = urlparse(m.group(2))
166 if (url.path and (not url.scheme or
167 url.scheme == "file")):
168 return True
169
170 return False
171
172 def txt2html(self, txt=u""):
173 """helper function to preformat plain text"""
174 html = u'<pre id="plaintext">\n'
175 html += txt
176 html += u'</pre>\n'
177 return html
178
179class MIMEChanger(object):
180
181 # Regex to split message from signature
182 RXP_SIGNATURE = re.compile(r'(.*)^--\s+(.*)',
183 re.MULTILINE | re.DOTALL | re.UNICODE)
184 RXP_SIG_HTML = re.compile(ur'^<html>\n', re.MULTILINE | re.UNICODE)
185
186 def __init__(self):
187 pass
188
189 def _copy_mime_root(self,
190 msg,
191 strip_content = True):
192 """Make a copy of the non_payload root mime part of msg and change
193 content type to multipart/alternativ. By default drop old Content- headers.
194 """
195
196 msg_new = MIMEMultipart()
197 # drop default keys
198 for k in msg_new.keys():
199 del msg_new[k]
200
201 # make copy of old header
202 for k, v in msg.items():
203 if strip_content and k.startswith('Content-'): continue
204 msg_new[k] = v
205
206 if msg.get_unixfrom():
207 msg_new.set_unixfrom(msg.get_unixfrom())
208 if msg.preamble:
209 msg_new.preamble = msg.preamble
210 else:
211 msg_new.preamble = "This is a multi-part message in MIME format...\n"
212 if msg.epilogue:
213 msg_new.epilogue = msg.epilogue
214
215 # set msg_new type
216 msg_new.set_type('multipart/alternative')
217 return msg_new
218
219 def _first_text(self,msg):
220 """returns first text/plain part of a message as unicode string"""
221 if not msg.is_multipart():
222 if msg.get_content_type() != 'text/plain':
223 return u''
224 else:
225 return self._payload2unicode(msg)
226 else:
227 for m in msg.get_payload():
228 if m.get_content_type() == 'text/plain':
229 return self._payload2unicode(m)
230 return u''
231
232 def _payload2unicode(self, mimeobj):
233 """convert MIME text objects to unicode string"""
234 chrset = mimeobj.get_content_charset(Charset())
235 return unicode(mimeobj.get_payload(decode=True).decode(chrset))
236
237 def _process_multi(self, msg):
238 """multipart messages can be changend in place"""
239
240 # find the text/plain mime part in payload
241 i = 0
242 pl = msg.get_payload()
243 for m in pl:
244 if m.get_content_type() == 'text/plain': break
245 i += 1
246
247 # change it to the new payload
248 pl[i] = self.new_payload(pl[i])
249
250 return msg
251
252 def _process_plain(self, msg):
253 """make container for plain messages"""
254
255 msg_new = self._copy_mime_root(msg)
256 new_pl = self.new_payload(msg)
257 for m in new_pl.get_payload():
258 msg_new.attach(m)
259
260 return msg_new
261
262 def _split_content(self, txt = u''):
263 """Cuts content from signature of mail message"""
264 m = self.RXP_SIGNATURE.search(txt)
265 if m:
266 return m.groups()
267 else:
268 return [txt, u'']
269
270 def _split_signature(self, txt = u''):
271 """Cuts txt and html part of signature text"""
272 return self.RXP_SIG_HTML.split(txt, 1)
273
274 def alter_message(self, msg):
275 """message modification function"""
276 if not msg.is_multipart():
277 log.debug('plain message')
278 new_msg = self._process_plain(msg)
279 else:
280 log.debug('multipart message')
281 new_msg = self._process_multi(msg)
282
283 if X_HEADER:
284 log.debug('add X-Modified-By header')
285 new_msg.add_header('X-Modified-By', 'Html Footer %s' % __version__ )
286 return new_msg
287
288 def html_creator(self):
289 """returns a HyperTextFormatter instance, can be overloaded
290 in derived class for better layout creation"""
291 return HyperTextFormatter()
292
293 def msg_is_to_alter(self, msg):
294 """check if message should be altered
295 in this special case we look for a html/xml tag in the
296 beginning of a line in the the first text/plain mail parts signature
297 """
298 txt = self._first_text(msg)
299 s = self._split_content(txt)[1]
300
301 if self.RXP_SIG_HTML.search(s):
302 return True
303 else:
304 return False
305
306 def new_payload(self, mime_plain):
307 """create a new mime structure from text/plain
308 Examples:
309 multipart/alternative
310 text/plain
311 text/html
312
313 multipart/alternative
314 text/plain
315 multipart/related
316 text/html
317 image/jpg
318 image/png
319 """
320
321 html = self.html_creator()
322
323 chrset = mime_plain.get_content_charset(Charset())
324 t = unicode(mime_plain.get_payload(decode=True), chrset)
325
326 text, signature = self._split_content(t)
327 html.add_txt(text)
328 text += u'-- \n'
329 # strip html from signature
330
331 text += self._split_signature(signature)[0]
332
333 state_html = True
334 footer = u''
335 txtbuffer = u''
336 try:
337 footer = self._split_signature(signature)[1]
338 except IndexError:
339 pass
340 for l in footer.split(u'\n'):
341 if l == u'<html>':
342 state_html = True
343 if txtbuffer:
344 html.add_txt(txtbuffer)
345 txtbuffer = u''
346 elif l == u'</html>':
347 state_html = False
348 else:
349 if state_html:
350 html.add_html(l + u'\n')
351 else:
352 txtbuffer += l + u'\n'
353 text += l + u'\n'
354 if txtbuffer:
355 html.add_txt(txtbuffer)
356
357 if html.has_attachments():
358 attachments = html.create_mime_attachments()
359 msg_html = MIMEMultipart('related')
360 msg_html.attach(MIMEText(html.get().encode('utf-8'), 'html', 'utf-8'))
361 for a in attachments:
362 msg_html.attach(a)
363 else:
364 msg_html = MIMEText(html.get().encode('utf-8'), 'html', 'utf-8')
365
366 msg_plain = MIMEText(text.encode('utf-8'), 'plain', 'utf-8')
367
368 pl = MIMEMultipart('alternative')
369 pl.attach(msg_plain)
370 pl.attach(msg_html)
371
372 return pl
373
374class SMTPHTMLFooterServer(PureProxy):
375 def process_message(self, peer, mailfrom, rcpttos, data):
376 # TODO return error status (as SMTP answer string) if something goes wrong!
377 try:
378 data = modify_data(data)
379 refused = self._deliver(mailfrom, rcpttos, data)
380 except Exception, err:
381 log.exception('Error on delivery: %s', err)
382 return '550 content rejected: %s' % err
383 # TBD: what to do with refused addresses?
384 # print >> DEBUGSTREAM, 'we got some refusals:', refused
385 if refused:
386 log.error('content refused: %s', pformat(refused))
387 return '550 content rejected:'
388
389class FooterDaemon(Daemon):
390 def run(self):
391 asyncore.loop()
392
393class Options:
394 uid = ''
395 listen = ('127.0.0.1', 10025)
396 remote = ('127.0.0.1', 25)
397 debuglevel = logging.INFO
398 cmd = 'start'
399 pipemode = False
400 pidfile = '/var/run/hmtl_footer.pid'
401 imagepath = '/var/lib/html_footer'
402 logfile = ''
403 _txt2loglvl = {
404 'critical' : logging.CRITICAL,
405 'error': logging.ERROR,
406 'warning': logging.WARNING,
407 'info': logging.INFO,
408 'debug': logging.DEBUG,
409 }
410
411def usage(code, msg=''):
412 print >> sys.stderr, __doc__ % globals()
413 if msg:
414 print >> sys.stderr, msg
415 sys.exit(code)
416
417def parseargs():
418 try:
419 opts, args = getopt.getopt(
420 sys.argv[1:], 'u:Vhpd:l:r:i:f:kp:',
421 ['uid=', 'version', 'help', 'pipemode', 'debuglevel=',
422 'listen=', 'remote=', 'imagepath=', 'logfile=',
423 'kill', 'pidfile='])
424 except getopt.error, e:
425 usage(1, e)
426
427 options = Options()
428 for opt, arg in opts:
429 if opt in ('-h', '--help'):
430 usage(0)
431 elif opt in ('-V', '--version'):
432 print >> sys.stderr, __version__
433 sys.exit(0)
434 elif opt in ('-u', '--uid'):
435 options.uid = arg
436 elif opt in ('-p', '--pipemode'):
437 options.pipemode = True
438 elif opt in ('-d', '--debuglevel'):
439 if arg in options._txt2loglvl.keys():
440 options.debuglevel = options._txt2loglvl[arg]
441 else:
442 usage(1, 'Unknown debuglevel %s', arg)
443 elif opt in ('-l', '--listen'):
444 i = arg.find(':')
445 if i < 0:
446 usage(1, 'Bad listen address: %s' % arg)
447 try:
448 options.listen = (arg[:i], int(arg[i+1:]))
449 except ValueError:
450 usage(1, 'Bad local port: %s' % arg)
451 elif opt in ('-r', '--remote'):
452 i = arg.find(':')
453 if i < 0:
454 usage(1, 'Bad remote address: %s' % arg)
455 try:
456 options.remote = (arg[:i], int(arg[i+1:]))
457 except ValueError:
458 usage(1, 'Bad remote port: %s' % arg)
459 elif opt in ('-i', '--imagepath'):
460 options.imagepath = arg
461 elif opt in ('-f', '--logfile'):
462 options.logfile = arg
463 elif opt in ('-k', '--kill'):
464 options.cmd = 'stop'
465 elif opt in ('-p', '--pidfile'):
466 options.pidfile = arg
467 if len(args) > 0:
468 usage(1, 'unknown arguments %s' % ', '.join(args))
469
470 return options
471
472def modify_data(msg_in):
473 msg = email.message_from_string(msg_in)
474 if mymime.msg_is_to_alter(msg):
475 log.info('Msg(%s): altered' % msg.get('Message-ID',''))
476 msg = mymime.alter_message(msg)
477 return msg.as_string(unixfrom=True)
478 log.debug('Msg out:\n%s' % msg.as_string(unixfrom=True))
479 else:
480 log.info('Msg(%s): nothing to alter' % msg.get('Message-ID',''))
481 return msg_in
482
483#
484# Main program
485#
486if __name__ == '__main__':
487 options = parseargs()
488 logging.basicConfig(level = options.debuglevel,
489 filename = options.logfile)
490 log = logging.getLogger('html_footer')
491
492 # use as simple pipe filter
493 if options.pipemode:
494 msg_in = sys.stdin.read()
495 log.debug('Msg in:\n%s' % msg_in)
496 try:
497 mymime = MIMEChanger()
498 msg = modify_data(msg_in)
499 log.debug('Msg out:\n%s' % msg)
500 sys.stdout.write(msg)
501 except Exception, err:
502 log.exception(err)
503 sys.stdout.write(msg_in)
504 # run as smtpd
505 else:
506 mymime = MIMEChanger()
507 daemon = FooterDaemon(options.pidfile)
508 if options.cmd == 'stop':
509 log.info('stopping daemon')
510 daemon.stop()
511 sys.exit(0)
512
513 log.info('starting daemon')
514 if options.uid:
515 try:
516 import pwd
517 except ImportError:
518 log.exception('''Cannot import module "pwd";
519try running as pipe filter (-p).''')
520 sys.exit(1)
521 runas = pwd.getpwnam(options.uid)[2]
522 try:
523 os.setuid(runas)
524 except OSError, e:
525 if e.errno != errno.EPERM: raise
526 log.exception('''Cannot setuid "%s";
527try running as pipe filer (-p).''' % options.uid)
528 sys.exit(1)
529 log.debug('Creating server instance')
530 server = SMTPHTMLFooterServer(options.listen, options.remote)
531 # if uid is given daemonize
532 if options.uid:
533 daemon.start()
534 else:
535 asyncore.loop()
Note: See TracBrowser for help on using the repository browser.