Main Page   Class Hierarchy   Alphabetical List   Compound List   Examples  
qp.h
1 /***************************************************************************
2  copyright : (C) 2002-2008 by Stefano Barbato
3  email : stefano@codesink.org
4 
5  $Id: qp.h,v 1.20 2008-10-07 11:06:26 tat Exp $
6  ***************************************************************************/
7 #ifndef _MIMETIC_CODEC_QP_H_
8 #define _MIMETIC_CODEC_QP_H_
9 #include <iostream>
10 #include <string>
11 #include <sstream>
12 #include <cassert>
13 #include <mimetic/libconfig.h>
14 #include <mimetic/utils.h>
15 #include <mimetic/circular_buffer.h>
16 #include <mimetic/codec/codec_base.h>
17 #include <mimetic/codec/codec_chain.h>
18 
19 namespace mimetic
20 {
21 
22 class QP
23 {
24  friend class test_qp;
25  enum { LF = 0xA, CR = 0xD, NL = LF, TAB = 9, SP = 32 };
26  enum { default_maxlen = 76 };
27  enum {
28  printable, /* print as-is */
29  tab, /* print if !isBinary */
30  sp, /* ' ' */
31  newline, /* cr or lf; encode if isBinary*/
32  binary, /* rest of the ascii map */
33  unsafe /* "!\"#$@[]\\^`{}|~" */
34  };
35  static char sTb[256];
36 
37 public:
38 
39 /// quoted-printable encoder
40 /*!
41 
42  \sa encode decode
43  */
44 class Encoder: public buffered_codec, public chainable_codec<Encoder>
45 {
46  enum { laBufSz = 5 }; // look-ahead buffer
47  size_t m_pos, m_maxlen;
48  bool m_binary;
49  circular_buffer<char_type> m_cbuf;
50 
51  template<typename OutIt>
52  void hardLineBrk(OutIt& out)
53  {
54  *out = NL; ++out;
55  m_pos = 1;
56  }
57  template<typename OutIt>
58  void softLineBrk(OutIt& out)
59  {
60  *out = '='; ++out;
61  hardLineBrk(out);
62  }
63  template<typename OutIt>
64  void write(char_type ch, OutIt& out)
65  {
66  bool is_last_ch = m_cbuf.empty();
67  if(!is_last_ch && m_pos == m_maxlen)
68  softLineBrk(out);
69  *out = ch; ++out;
70  m_pos++;
71  }
72  template<typename OutIt>
73  void writeHex(char_type ch, OutIt& out)
74  {
75  static char_type hexc[] =
76  {
77  '0', '1', '2', '3', '4', '5' ,'6', '7', '8', '9',
78  'A', 'B', 'C', 'D', 'E', 'F'
79  };
80  bool is_last_ch = m_cbuf.empty();
81  if(m_pos + (is_last_ch ? 1 : 2) >= m_maxlen)
82  softLineBrk(out);
83  // write out =HH
84  *out = '='; ++out;
85  *out = hexc[ch >> 4]; ++out;
86  *out = hexc[ch & 0xf]; ++out;
87  m_pos += 3;
88  }
89  template<typename OutIt>
90  void encodeChar(char_type c, OutIt& out)
91  {
92  int cnt = m_cbuf.count();
93  switch(sTb[c])
94  {
95  case printable:
96  if(m_pos == 1)
97  {
98  switch(c)
99  {
100  case 'F': // hex enc on "^From .*"
101  if(cnt>=4 && m_cbuf.compare(0,4,"rom "))
102  {
103  writeHex(c,out);
104  return;
105  }
106  break;
107  case '.': // hex encode if "^.[\r\n]" or on eof
108  if(!cnt || sTb[ m_cbuf[0] ] == newline)
109  {
110  writeHex(c,out);
111  return;
112  }
113  break;
114  }
115  }
116  write(c,out);
117  break;
118  case tab:
119  case sp:
120  // on binary encoding, or last input ch or newline
121  if(m_binary || !cnt || sTb[ m_cbuf[0] ] == newline)
122  writeHex(c,out);
123  else
124  write(c,out);
125  break;
126  case newline:
127  if(m_binary)
128  writeHex(c, out);
129  else {
130  if(cnt && m_cbuf[0] == (c == CR ? LF : CR))
131  m_cbuf.pop_front(); // eat it
132  hardLineBrk(out);
133  }
134  break;
135  case binary:
136  if(!m_binary) m_binary = 1; // switch to binary mode
137  writeHex(c, out);
138  break;
139  case unsafe:
140  writeHex(c, out);
141  break;
142  }
143  }
144 public:
145  /*! return the multiplier of the required (max) size of the output buffer
146  * when encoding */
147  double codeSizeMultiplier() const
148  {
149  // worse case is *3 but we'll use the (euristic) average value of 1.5.
150  // this may decrease performance when encoding messages with many
151  // non-ASCII (> 127) characters
152  return 1.5;
153  }
154  /*!
155  Constructor
156  \param isBinary if true all space and newline characters will be
157  treated like binary chars and will be hex encoded (useful if you
158  want to encode a binary file).
159  */
160  Encoder(bool isBinary = false)
161  : m_pos(1), m_maxlen(default_maxlen),
162  m_binary(isBinary), m_cbuf(laBufSz)
163  {
164  }
165  /*! Returns the name of the codec ("Quoted-Printable") */
166  const char* name() const { return "Quoted-Printable"; }
167  /*! Returns the max line length */
168  size_t maxlen()
169  {
170  return m_maxlen;
171  }
172  /*!
173  Set the max line length. No more then \p i chars will be
174  printed on one line.
175  */
176  void maxlen(size_t i)
177  {
178  m_maxlen = i;
179  }
180  /*!
181  Encodes [\p bit,\p eit) and write any encoded char to \p out.
182  */
183  template<typename InIt, typename OutIt>
184  void process(InIt bit, InIt eit, OutIt out)
185  {
186  for(; bit != eit; ++bit)
187  process(*bit, out);
188  flush(out);
189  }
190  /*!
191  Encodes \p ic and write any encoded output char to \p out.
192  \warning You must call flush() when all chars have been
193  processed by the encode funcion.
194  \n
195  \code
196  while( (c = getchar()) != EOF )
197  qp.process(c, out);
198  qp.flush();
199  \endcode
200  \n
201  \sa flush()
202  */
203  template<typename OutIt>
204  void process(char_type ic, OutIt& out)
205  {
206  m_cbuf.push_back(ic);
207  if(m_cbuf.count() < laBufSz)
208  return;
209  char_type c = m_cbuf.front();
210  m_cbuf.pop_front();
211  encodeChar(c, out);
212  }
213  /*!
214  Write to \p out any buffered encoded char.
215  */
216  template<typename OutIt>
217  void flush(OutIt& out)
218  {
219  char_type c;
220  while(!m_cbuf.empty())
221  {
222  c = m_cbuf.front();
223  m_cbuf.pop_front();
224  encodeChar(c, out);
225  }
226  }
227 };
228 
229 /// quoted-printable decoder
230 /*!
231 
232  \sa encode decode
233  */
234 class Decoder: public buffered_codec, public chainable_codec<Encoder>
235 {
236  enum { laBufSz = 80 }; // look-ahead buffer
237  enum {
238  sWaitingChar,
239  sAfterEq,
240  sWaitingFirstHex,
241  sWaitingSecondHex,
242  sBlank,
243  sNewline,
244  sOtherChar
245  };
246  size_t m_pos, m_maxlen;
247 
248 
249  int m_state, m_nl;
250  std::string m_prev;
251 
252  template<typename OutIt>
253  void hardLineBrk(OutIt& out) const
254  {
255  *out = NL; ++out;
256  }
257  template<typename OutIt>
258  void write(char_type ch, OutIt& out) const
259  {
260  *out = ch; ++out;
261  }
262  bool isnl(char_type c) const
263  {
264  return (c == CR || c == LF);
265  }
266  template<typename OutIt>
267  void flushPrev(OutIt& out)
268  {
269  copy(m_prev.begin(), m_prev.end(), out);
270  m_prev.clear();
271  }
272  int hex_to_int(char_type c) const
273  {
274  if( c >= '0' && c <='9') return c - '0';
275  else if( c >= 'A' && c <='F') return c - 'A' + 10;
276  else if( c >= 'a' && c <='f') return c - 'a' + 10;
277  else return 0;
278  }
279  bool ishex(char_type c) const
280  {
281  return (c >= '0' && c <= '9') ||
282  (c >= 'A' && c <= 'F') ||
283  (c >= 'a' && c <= 'f');
284  }
285  template<typename OutIt>
286  void decodeChar(char_type c, OutIt& out)
287  {
288  for(;;)
289  {
290  switch(m_state)
291  {
292  case sBlank:
293  if(isblank(c))
294  m_prev.append(1,c);
295  else if(isnl(c)) {
296  // soft linebrk & ignore trailing blanks
297  m_prev.clear();
298  m_state = sWaitingChar;
299  } else {
300  flushPrev(out);
301  m_state = sWaitingChar;
302  continue;
303  }
304  return;
305  case sAfterEq:
306  if(isblank(c))
307  m_prev.append(1,c);
308  else if(isnl(c)) {
309  // soft linebrk
310  m_state = sNewline;
311  continue;
312  } else {
313  if(m_prev.length() > 1)
314  {
315  // there're blanks after =
316  flushPrev(out);
317  m_state = sWaitingChar;
318  } else
319  m_state = sWaitingFirstHex;
320  continue;
321  }
322  return;
323  case sWaitingFirstHex:
324  if(!ishex(c))
325  {
326  // malformed: =[not-hexch]
327  flushPrev(out);
328  write(c, out);
329  m_state = sWaitingChar;
330  return;
331  } else {
332  m_prev.append(1,c);
333  m_state = sWaitingSecondHex;
334  }
335  return;
336  case sWaitingSecondHex:
337  if(!ishex(c))
338  { // malformed (=[hexch][not-hexch])
339  flushPrev(out);
340  write(c, out);
341  } else {
342  char_type oc, last;
343  assert(m_prev.length());
344  last = m_prev[m_prev.length()-1];
345  oc = hex_to_int(last) << 4 |
346  hex_to_int(c) ;
347  write(oc,out);
348  m_prev.clear();
349  }
350  m_state = sWaitingChar;
351  return;
352  case sNewline:
353  if(m_nl == 0)
354  {
355  m_nl = c;
356  return;
357  } else {
358  int len = m_prev.length();
359  if(!len || m_prev[0] != '=')
360  hardLineBrk(out);
361  m_prev.clear();
362  m_state = sWaitingChar;
363  bool is2Ch;
364  is2Ch = (c == (m_nl == CR ? LF : CR));
365  m_nl = 0;
366  if(is2Ch)
367  return;
368  continue;
369  }
370  case sWaitingChar:
371  if(isblank(c))
372  {
373  m_state = sBlank;
374  continue;
375  } else if(isnl(c)) {
376  m_state = sNewline;
377  continue;
378  } else if(c == '=') {
379  m_state = sAfterEq;
380  m_prev.append(1, c);
381  return;
382  } else {
383  // WARNING: NOT ignoring chars > 126
384  // as suggested in rfc2045 6.7 note 4
385  if(c < 32 && c != TAB)
386  {
387  // malformed, CTRL ch found
388  // ignore (rfc2045 6.7 note 4)
389  return;
390  }
391  write(c,out);
392  }
393  return;
394  }
395  }
396  }
397 public:
398  /*! Constructor */
400  : m_state(sWaitingChar), m_nl(0)
401  {
402  }
403  /*! Returns the name of the codec ("Quoted-Printable") */
404  const char* name() const { return "Quoted-Printable"; }
405  /*! Returns the max line length */
406  size_t maxlen()
407  {
408  return m_maxlen;
409  }
410  /*!
411  Set the max line length. No more then \p i chars will be
412  printed on one line.
413  */
414  void maxlen(size_t i)
415  {
416  m_maxlen = i;
417  }
418  /*!
419  Decodes [\p bit,\p eit) and write any decoded char to \p out.
420  */
421  template<typename InIt, typename OutIt>
422  void process(InIt bit, InIt eit, OutIt out)
423  {
424  for(;bit != eit; ++bit)
425  decodeChar(*bit, out);
426  flush(out);
427  }
428  /*!
429  Decodes \p ic and write any decoded output char to \p out.
430 
431  \warning You must call flush() when all chars have been
432  processed by the code(...) funcion.
433  \n
434  \code
435  while( (c = getchar()) != EOF )
436  qp.process(c, out);
437  qp.flush();
438  \endcode
439  \n
440  \sa flush()
441  */
442  template<typename OutIt>
443  void process(char_type ic, OutIt& out)
444  {
445  decodeChar(ic, out);
446  }
447  /*!
448  Write to \p out any buffered decoded char.
449  */
450  template<typename OutIt>
451  void flush(OutIt& out)
452  {
453  /* m_prev can be (regex):
454  empty:
455  ok
456  '=' :
457  malformed, '=' is last stream char, print as is
458  (rfc2045 6.7 note 3)
459  '=[a-zA-Z]'
460  malformed, print as is
461  (rfc2045 6.7 note 2)
462  '= +'
463  malformed, just print '=' and ignore trailing
464  blanks (rfc2045 6.7 (3) )
465  */
466  int len = m_prev.length();
467  if(len)
468  {
469  if(len == 1)
470  {
471  /* malformed if m_prev[0] == '=' */
472  write('=', out);
473  } else {
474  write('=', out);
475  if(m_prev[1] != ' ')
476  write(m_prev[1], out);
477  }
478  } else if(m_nl != 0) // stream ends with newline
479  hardLineBrk(out);
480 
481  }
482 };
483 
484 };
485 
486 
487 } // namespace
488 
489 #endif
490 
void process(char_type ic, OutIt &out)
Definition: qp.h:204
void process(char_type ic, OutIt &out)
Definition: qp.h:443
size_t maxlen()
Definition: qp.h:406
size_t maxlen()
Definition: qp.h:168
Encoder(bool isBinary=false)
Definition: qp.h:160
quoted-printable decoder
Definition: qp.h:234
Base class for buffered codecs.
Definition: codec_base.h:47
const char * name() const
Definition: qp.h:404
double codeSizeMultiplier() const
Definition: qp.h:147
const char * name() const
Definition: qp.h:166
void maxlen(size_t i)
Definition: qp.h:414
void maxlen(size_t i)
Definition: qp.h:176
void process(InIt bit, InIt eit, OutIt out)
Definition: qp.h:422
quoted-printable encoder
Definition: qp.h:44
void process(InIt bit, InIt eit, OutIt out)
Definition: qp.h:184
void flush(OutIt &out)
Definition: qp.h:217
void flush(OutIt &out)
Definition: qp.h:451
Decoder()
Definition: qp.h:399