001/*
002 * JPPF.
003 * Copyright (C) 2005-2016 JPPF Team.
004 * http://www.jppf.org
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 *   http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.jppf.utils.base64;
020
021
022/**
023 * <p>Encodes and decodes to and from Base64 notation.</p>
024 * <p>Homepage: <a href="http://iharder.net/base64">http://iharder.net/base64</a>.</p>
025 * 
026 * <p>Example:</p>
027 * 
028 * <code>String encoded = Base64.encode( myByteArray );</code>
029 * <br />
030 * <code>byte[] myByteArray = Base64.decode( encoded );</code>
031 *
032 * <p>The <tt>options</tt> parameter, which appears in a few places, is used to pass
033 * several pieces of information to the encoder. In the "higher level" methods such as
034 * encodeBytes( bytes, options ) the options parameter can be used to indicate such
035 * things as first gzipping the bytes before encoding them, not inserting linefeeds,
036 * and encoding using the URL-safe and Ordered dialects.</p>
037 *
038 * <p>Note, according to <a href="http://www.faqs.org/rfcs/rfc3548.html">RFC3548</a>,
039 * Section 2.1, implementations should not add line feeds unless explicitly told
040 * to do so. I've got Base64 set to this behavior now, although earlier versions
041 * broke lines by default.</p>
042 *
043 * <p>The constants defined in Base64 can be OR-ed together to combine options, so you
044 * might make a call like this:</p>
045 *
046 * <code>String encoded = Base64.encodeBytes( mybytes, Base64.GZIP | Base64.DO_BREAK_LINES );</code>
047 * <p>to compress the data before encoding it and then making the output have newline characters.</p>
048 * <p>Also...</p>
049 * <code>String encoded = Base64.encodeBytes( crazyString.getBytes() );</code>
050 *
051 * <p>I am placing this code in the Public Domain. Do with it as you will.
052 * This software comes with no guarantees or warranties but with
053 * plenty of well-wishing instead!
054 * Please visit <a href="http://iharder.net/base64">http://iharder.net/base64</a>
055 * periodically to check for updates or to contribute improvements.
056 *
057 * @author Robert Harder
058 * @author rob@iharder.net
059 * @version 2.3.7
060 */
061public final class Base64
062{
063/* ********  P U B L I C   F I E L D S  ******** */
064
065  /** No options specified. Value is zero. */
066  public final static int NO_OPTIONS = 0;
067  /** Specify encoding in first bit. Value is one. */
068  public final static int ENCODE = 1;
069  /** Specify decoding in first bit. Value is zero. */
070  public final static int DECODE = 0;
071  /** Specify that data should be gzip-compressed in second bit. Value is two. */
072  public final static int GZIP = 2;
073  /** Specify that gzipped data should <em>not</em> be automatically gunzipped. */
074  public final static int DONT_GUNZIP = 4;
075  /** Do break lines when encoding. Value is 8. */
076  public final static int DO_BREAK_LINES = 8;
077  /**
078   * Encode using Base64-like encoding that is URL- and Filename-safe as described in Section 4 of RFC3548:
079   * <a href="http://www.faqs.org/rfcs/rfc3548.html">http://www.faqs.org/rfcs/rfc3548.html</a>.
080   * It is important to note that data encoded this way is <em>not</em> officially valid Base64,
081   * or at the very least should not be called Base64 without also specifying that is
082   * was encoded using the URL- and Filename-safe dialect.
083   */
084  public final static int URL_SAFE = 16;
085  /**
086   * Encode using the special "ordered" dialect of Base64 described here:
087   * <a href="http://www.faqs.org/qa/rfcc-1940.html">http://www.faqs.org/qa/rfcc-1940.html</a>.
088   */
089  public final static int ORDERED = 32;
090
091/* ********  P R I V A T E   F I E L D S  ******** */
092
093  /** Maximum line length (76) of Base64 output. */
094  final static int MAX_LINE_LENGTH = 76;
095  /** The equals sign (=) as a byte. */
096  final static byte EQUALS_SIGN = (byte)'=';
097  /** The new line character (\n) as a byte. */
098  final static byte NEW_LINE = (byte)'\n';
099  /** Preferred encoding. */
100  final static String PREFERRED_ENCODING = "US-ASCII";
101  /** Indicates white space in encoding */
102  final static byte WHITE_SPACE_ENC = -5; //
103  /** Indicates equals sign in encoding */
104  final static byte EQUALS_SIGN_ENC = -1; //
105
106/* ********  S T A N D A R D   B A S E 6 4   A L P H A B E T  ******** */
107
108  /** The 64 valid Base64 values. Host platform may be something funny like EBCDIC, so we hardcode these values. */
109  final static byte[] STANDARD_ALPHABET = {
110    (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F', (byte)'G',
111    (byte)'H', (byte)'I', (byte)'J', (byte)'K', (byte)'L', (byte)'M', (byte)'N',
112    (byte)'O', (byte)'P', (byte)'Q', (byte)'R', (byte)'S', (byte)'T', (byte)'U',
113    (byte)'V', (byte)'W', (byte)'X', (byte)'Y', (byte)'Z',
114    (byte)'a', (byte)'b', (byte)'c', (byte)'d', (byte)'e', (byte)'f', (byte)'g',
115    (byte)'h', (byte)'i', (byte)'j', (byte)'k', (byte)'l', (byte)'m', (byte)'n',
116    (byte)'o', (byte)'p', (byte)'q', (byte)'r', (byte)'s', (byte)'t', (byte)'u',
117    (byte)'v', (byte)'w', (byte)'x', (byte)'y', (byte)'z',
118    (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5',
119    (byte)'6', (byte)'7', (byte)'8', (byte)'9', (byte)'+', (byte)'/'
120  };
121
122  /**
123   * Translates a Base64 value to either its 6-bit reconstruction value
124   * or a negative number indicating some other meaning.
125   **/
126  final static byte[] STANDARD_DECODABET = {
127    -9,-9,-9,-9,-9,-9,-9,-9,-9,                 // Decimal  0 -  8
128    -5,-5,                                      // Whitespace: Tab and Linefeed
129    -9,-9,                                      // Decimal 11 - 12
130    -5,                                         // Whitespace: Carriage Return
131    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 14 - 26
132    -9,-9,-9,-9,-9,                             // Decimal 27 - 31
133    -5,                                         // Whitespace: Space
134    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,              // Decimal 33 - 42
135    62,                                         // Plus sign at decimal 43
136    -9,-9,-9,                                   // Decimal 44 - 46
137    63,                                         // Slash at decimal 47
138    52,53,54,55,56,57,58,59,60,61,              // Numbers zero through nine
139    -9,-9,-9,                                   // Decimal 58 - 60
140    -1,                                         // Equals sign at decimal 61
141    -9,-9,-9,                                      // Decimal 62 - 64
142    0,1,2,3,4,5,6,7,8,9,10,11,12,13,            // Letters 'A' through 'N'
143    14,15,16,17,18,19,20,21,22,23,24,25,        // Letters 'O' through 'Z'
144    -9,-9,-9,-9,-9,-9,                          // Decimal 91 - 96
145    26,27,28,29,30,31,32,33,34,35,36,37,38,     // Letters 'a' through 'm'
146    39,40,41,42,43,44,45,46,47,48,49,50,51,     // Letters 'n' through 'z'
147    -9,-9,-9,-9,-9                              // Decimal 123 - 127
148    ,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,       // Decimal 128 - 139
149    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 140 - 152
150    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 153 - 165
151    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 166 - 178
152    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 179 - 191
153    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 192 - 204
154    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 205 - 217
155    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 218 - 230
156    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 231 - 243
157    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9         // Decimal 244 - 255
158  };
159
160/* ********  U R L   S A F E   B A S E 6 4   A L P H A B E T  ******** */
161
162  /**
163   * Used in the URL- and Filename-safe dialect described in Section 4 of RFC3548:
164   * <a href="http://www.faqs.org/rfcs/rfc3548.html">http://www.faqs.org/rfcs/rfc3548.html</a>.
165   * Notice that the last two bytes become "hyphen" and "underscore" instead of "plus" and "slash."
166   */
167  final static byte[] URL_SAFE_ALPHABET = {
168    (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F', (byte)'G',
169    (byte)'H', (byte)'I', (byte)'J', (byte)'K', (byte)'L', (byte)'M', (byte)'N',
170    (byte)'O', (byte)'P', (byte)'Q', (byte)'R', (byte)'S', (byte)'T', (byte)'U',
171    (byte)'V', (byte)'W', (byte)'X', (byte)'Y', (byte)'Z',
172    (byte)'a', (byte)'b', (byte)'c', (byte)'d', (byte)'e', (byte)'f', (byte)'g',
173    (byte)'h', (byte)'i', (byte)'j', (byte)'k', (byte)'l', (byte)'m', (byte)'n',
174    (byte)'o', (byte)'p', (byte)'q', (byte)'r', (byte)'s', (byte)'t', (byte)'u',
175    (byte)'v', (byte)'w', (byte)'x', (byte)'y', (byte)'z',
176    (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5',
177    (byte)'6', (byte)'7', (byte)'8', (byte)'9', (byte)'-', (byte)'_'
178  };
179
180  /**
181   * Used in decoding URL- and Filename-safe dialects of Base64.
182   */
183  final static byte[] URL_SAFE_DECODABET = {
184    -9,-9,-9,-9,-9,-9,-9,-9,-9,                 // Decimal  0 -  8
185    -5,-5,                                      // Whitespace: Tab and Linefeed
186    -9,-9,                                      // Decimal 11 - 12
187    -5,                                         // Whitespace: Carriage Return
188    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 14 - 26
189    -9,-9,-9,-9,-9,                             // Decimal 27 - 31
190    -5,                                         // Whitespace: Space
191    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,              // Decimal 33 - 42
192    -9,                                         // Plus sign at decimal 43
193    -9,                                         // Decimal 44
194    62,                                         // Minus sign at decimal 45
195    -9,                                         // Decimal 46
196    -9,                                         // Slash at decimal 47
197    52,53,54,55,56,57,58,59,60,61,              // Numbers zero through nine
198    -9,-9,-9,                                   // Decimal 58 - 60
199    -1,                                         // Equals sign at decimal 61
200    -9,-9,-9,                                   // Decimal 62 - 64
201    0,1,2,3,4,5,6,7,8,9,10,11,12,13,            // Letters 'A' through 'N'
202    14,15,16,17,18,19,20,21,22,23,24,25,        // Letters 'O' through 'Z'
203    -9,-9,-9,-9,                                // Decimal 91 - 94
204    63,                                         // Underscore at decimal 95
205    -9,                                         // Decimal 96
206    26,27,28,29,30,31,32,33,34,35,36,37,38,     // Letters 'a' through 'm'
207    39,40,41,42,43,44,45,46,47,48,49,50,51,     // Letters 'n' through 'z'
208    -9,-9,-9,-9,-9                              // Decimal 123 - 127
209    ,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 128 - 139
210    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 140 - 152
211    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 153 - 165
212    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 166 - 178
213    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 179 - 191
214    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 192 - 204
215    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 205 - 217
216    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 218 - 230
217    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 231 - 243
218    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9         // Decimal 244 - 255
219  };
220
221/* ********  O R D E R E D   B A S E 6 4   A L P H A B E T  ******** */
222
223  /**
224   * I don't get the point of this technique, but someone requested it,
225   * and it is described here:
226   * <a href="http://www.faqs.org/qa/rfcc-1940.html">http://www.faqs.org/qa/rfcc-1940.html</a>.
227   */
228  final static byte[] ORDERED_ALPHABET = {
229    (byte)'-',
230    (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4',
231    (byte)'5', (byte)'6', (byte)'7', (byte)'8', (byte)'9',
232    (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F', (byte)'G',
233    (byte)'H', (byte)'I', (byte)'J', (byte)'K', (byte)'L', (byte)'M', (byte)'N',
234    (byte)'O', (byte)'P', (byte)'Q', (byte)'R', (byte)'S', (byte)'T', (byte)'U',
235    (byte)'V', (byte)'W', (byte)'X', (byte)'Y', (byte)'Z',
236    (byte)'_',
237    (byte)'a', (byte)'b', (byte)'c', (byte)'d', (byte)'e', (byte)'f', (byte)'g',
238    (byte)'h', (byte)'i', (byte)'j', (byte)'k', (byte)'l', (byte)'m', (byte)'n',
239    (byte)'o', (byte)'p', (byte)'q', (byte)'r', (byte)'s', (byte)'t', (byte)'u',
240    (byte)'v', (byte)'w', (byte)'x', (byte)'y', (byte)'z'
241  };
242
243  /**
244   * Used in decoding the "ordered" dialect of Base64.
245   */
246  final static byte[] ORDERED_DECODABET = {
247    -9,-9,-9,-9,-9,-9,-9,-9,-9,                 // Decimal  0 -  8
248    -5,-5,                                      // Whitespace: Tab and Linefeed
249    -9,-9,                                      // Decimal 11 - 12
250    -5,                                         // Whitespace: Carriage Return
251    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 14 - 26
252    -9,-9,-9,-9,-9,                             // Decimal 27 - 31
253    -5,                                         // Whitespace: Space
254    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,              // Decimal 33 - 42
255    -9,                                         // Plus sign at decimal 43
256    -9,                                         // Decimal 44
257    0,                                          // Minus sign at decimal 45
258    -9,                                         // Decimal 46
259    -9,                                         // Slash at decimal 47
260    1,2,3,4,5,6,7,8,9,10,                       // Numbers zero through nine
261    -9,-9,-9,                                   // Decimal 58 - 60
262    -1,                                         // Equals sign at decimal 61
263    -9,-9,-9,                                   // Decimal 62 - 64
264    11,12,13,14,15,16,17,18,19,20,21,22,23,     // Letters 'A' through 'M'
265    24,25,26,27,28,29,30,31,32,33,34,35,36,     // Letters 'N' through 'Z'
266    -9,-9,-9,-9,                                // Decimal 91 - 94
267    37,                                         // Underscore at decimal 95
268    -9,                                         // Decimal 96
269    38,39,40,41,42,43,44,45,46,47,48,49,50,     // Letters 'a' through 'm'
270    51,52,53,54,55,56,57,58,59,60,61,62,63,     // Letters 'n' through 'z'
271    -9,-9,-9,-9,-9                                 // Decimal 123 - 127
272    ,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 128 - 139
273    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 140 - 152
274    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 153 - 165
275    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 166 - 178
276    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 179 - 191
277    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 192 - 204
278    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 205 - 217
279    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 218 - 230
280    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,     // Decimal 231 - 243
281    -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9         // Decimal 244 - 255
282  };
283
284
285/* ********  D E T E R M I N E   W H I C H   A L H A B E T  ******** */
286
287
288  /**
289   * Returns one of the SOMETHING_ALPHABET byte arrays depending on the options specified.
290   * It's possible, though silly, to specify ORDERED <b>and</b> URLSAFE in which case one of them will be picked, though there is
291   * no guarantee as to which one will be picked.
292   * @param options the options.
293   * @return the byte array specified via the options.
294   */
295  static byte[] getAlphabet( final int options ) {
296    if ((options & URL_SAFE) == URL_SAFE)return URL_SAFE_ALPHABET;
297    else if ((options & ORDERED) == ORDERED)return ORDERED_ALPHABET;
298    return STANDARD_ALPHABET;
299  }     // end getAlphabet
300
301  /**
302   * Returns one of the _SOMETHING_DECODABET byte arrays depending on the options specified.
303   * It's possible, though silly, to specify ORDERED and URL_SAFE in which case one of them will be picked, though there is
304   * no guarantee as to which one will be picked.
305   * @param options the options.
306   * @return the byte array specified via the options.
307   */
308  static byte[] getDecodabet( final int options ) {
309    if( (options & URL_SAFE) == URL_SAFE)return URL_SAFE_DECODABET;
310    else if ((options & ORDERED) == ORDERED) return ORDERED_DECODABET;
311    return STANDARD_DECODABET;
312  }     // end getAlphabet
313
314  /** Defeats instantiation. */
315  private Base64(){}
316}   // end class Base64