用C来实现base64编码与解码
2007-07-31 14:44:37
base64编码是邮件传输中比较常用的一种简单编码,它把8bit的编码,转换成6bit有效的编码,头两个bit置零(和邮件传输网关有关,高位为1会被过滤),因为只有6bit有效,所以有64个码,另外,base64希望编码能在ascii范围内,以便打印出来,所以会设置一个码表,编码映射到码表来。
也就是说,base64把每3个8bit,转换成4个8bit,其中每个8bit里面的高2bit是恒0。
这个是码表:
编码程序:
也就是说,base64把每3个8bit,转换成4个8bit,其中每个8bit里面的高2bit是恒0。
这个是码表:
CODE:
static const char *codes =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";我写了一个编码和解码的程序,根据rfc来写的,也许因为我理解的漏洞,或者有逻辑错误,程序不一定是正确的(但是我测试多多个图片和email文本的编码,是正确的), 请用者自己检查正确性。编码程序:
CODE:
// 输入串,输入串长,输出串,输出串长。
void base64_encode(char *in, const int in_len, char *out, int out_len)
{
int base64_len = 4 * ((in_len+2)/3); // 要保证输出串的长度。
assert(out_len >= base64_len);
char *p = out;
int times = in_len / 3;
for(int i=0; i<times; ++i) {
*p++ = codes[(in[0] >> 2) & 0x3f];
*p++ = codes[((in[0] & 0x3) << 4) + (in[1] >> 4)];
*p++ = codes[((in[1] & 0xf) << 2) + (in[2] >> 6)];
*p++ = codes[in[2] & 0x3f];
in += 3;
}
// pad .. 如果不够3个8bit来,后面要补充'=',rfc里面称为pad
if(times * 3 + 1 == in_len) {
*p++ = codes[(in[0] >> 2) & 0x3f];
*p++ = codes[((in[0] & 0x3) << 4)];
*p++ = '=';
*p++ = '=';
}
if(times * 3 + 2 == in_len) {
*p++ = codes[(in[0] >> 2) & 0x3f];
*p++ = codes[((in[0] & 0x3) << 4) + (in[1] >> 4)];
*p++ = codes[((in[1] & 0xf) << 2)];
*p++ = '=';
}
*p = 0;
}解码过程需要弄一个编码反查表:
CODE:
char ords[128];
#define PAD -1
void init_ords()
{
ords['A'] = 0; ords['B'] = 1; ords['C'] = 2; ords['D'] = 3; ords['E'] = 4;
ords['F'] = 5; ords['G'] = 6; ords['H'] = 7; ords['I'] = 8; ords['J'] = 9;
ords['K'] = 10; ords['L'] = 11; ords['M'] = 12; ords['N'] = 13; ords['O'] = 14;
ords['P'] = 15; ords['Q'] = 16; ords['R'] = 17; ords['S'] = 18; ords['T'] = 19;
ords['U'] = 20; ords['V'] = 21; ords['W'] = 22; ords['X'] = 23; ords['Y'] = 24;
ords['Z'] = 25; ords['a'] = 26; ords['b'] = 27; ords['c'] = 28; ords['d'] = 29;
ords['e'] = 30; ords['f'] = 31; ords['g'] = 32; ords['h'] = 33; ords['i'] = 34;
ords['j'] = 35; ords['k'] = 36; ords['l'] = 37; ords['m'] = 38; ords['n'] = 39;
ords['o'] = 40; ords['p'] = 41; ords['q'] = 42; ords['r'] = 43; ords['s'] = 44;
ords['t'] = 45; ords['u'] = 46; ords['v'] = 47; ords['w'] = 48; ords['x'] = 49;
ords['y'] = 50; ords['z'] = 51; ords['0'] = 52; ords['1'] = 53; ords['2'] = 54;
ords['3'] = 55; ords['4'] = 56; ords['5'] = 57; ords['6'] = 58; ords['7'] = 59;
ords['8'] = 60; ords['9'] = 61; ords['+'] = 62; ords['/'] = 63;
ords['='] = PAD;
}这样可以根据明文,把编码位置查出来,然后把多余的00都去掉,最后的一个反pad过程要注意一下下
CODE:
void base64_decode(char *in, const int in_len, char *out, int *out_len)
{
int decode_len = in_len * 3 / 4;
assert(*out_len > decode_len);
char tmp[in_len]; // for ords[]
char *p = in;
for(int i=0; i<in_len; ++i,++p) {
tmp[i] = ords[*p];
}
char *q = out;
p = tmp;
*out_len = 0;
for(int i=0; i<in_len-4; i+=4) {
*q++ = (p[0] << 2) + (p[1] >> 4);
*q++ = (p[1] << 4) + (p[2] >> 2);
*q++ = (p[2] << 6) + p[3];
p += 4;
*out_len += 3;
}
// deal with pad
if(p[3] != PAD) { // no pad
*q++ = (p[0] << 2) + (p[1] >> 4);
*q++ = (p[1] << 4) + (p[2] >> 2);
*q++ = (p[2] << 6) + p[3];
*out_len += 3;
} else if(p[2] != PAD) { // one pad
*q++ = (p[0] << 2) + (p[1] >> 4);
*q++ = (p[1] << 4) + (p[2] >> 2);
*out_len += 2;
} else if(p[1] != PAD) { // two pads
*q++ = (p[0] << 2) + (p[1] >> 4);
*q++ = (p[1] << 4);
*out_len += 2;
}
*q++ = 0;
}我写的文件,仅仅考虑了自己测试的需要,没有考虑通用性,把主要实现发上来供讨论。
base64.rar
(2007-07-31 14:43:54, Size: 8.71 kB, Downloads: 0)
论坛模式 推荐 收藏 等级(0) 编辑 管理 查看(2065) 评论(2)
TAG:
-
secondch发布于2008-06-26 09:52:03
-
正好需要,谢谢楼上两位!
-
kmajian
发布于2007-08-28 10:45:33
-
这个是我的可对任意数据进行编码得Base64
http://blog.csdn.net/kmajian/archive/2006/06/23/825122.aspx