Javascript crc3232 that mathes PHP crc32 and works well with unicode

I have already written an article about hash functions that return integer values and provided javascript examples. This article included javascript source codes, but have not included one of the most widely used hash algorithms - CRC32. Recently I had to use it in one of my web projects both server-side and client-side. Sure results for the same string should be identical for server-side code and client-side. How surprised I was when I have fund that most of the javascript CRC32 implementations that I have found in the network return values different from PHP crc32 function. Some of them returned absolutely different values from PHP crc32 on any given string, some returned correct values on english strings, but wrong on other unicode strings (in my case russain strings). I am not ready to say what algorithms are correct and what are not, but for my needs I can take PHP as a standard. So, I have looked around found some code that worked,  mixed it, slightly updated for unicode and here what I have now.

There are several reasons why I am publishing this code. Firs of all I am sure somebody is looking for the same, so I hope this can save little time for somebody. Other reason is that other smart people can suggest some improvements and it will work faster (however it is good enough for my needs).

function utf8_encode (argString) {
    // Encodes an ISO-8859-1 string to UTF-8  
    // 
    // version: 1109.2015
    // discuss at: http://phpjs.org/functions/utf8_encode    // +   original by: Webtoolkit.info (http://www.webtoolkit.info/)
    // +   improved by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
    // +   improved by: sowberry
    // +    tweaked by: Jack
    // +   bugfixed by: Onno Marsman    // +   improved by: Yves Sucaet
    // +   bugfixed by: Onno Marsman
    // +   bugfixed by: Ulrich
    // +   bugfixed by: Rafal Kukawski
    // *     example 1: utf8_encode('Kevin van Zonneveld');    // *     returns 1: 'Kevin van Zonneveld'
    if (argString === null || typeof argString === "undefined") {
        return "";
    }
    var string = (argString + ''); // .replace(/\r\n/g, "\n").replace(/\r/g, "\n");
    var utftext = "", start, end, stringl = 0;
 
    start = end = 0;    stringl = string.length;
    for (var n = 0; n < stringl; n++) {
        var c1 = string.charCodeAt(n);
        var enc = null;
         if (c1 < 128) {
            end++;
        } else if (c1 > 127 && c1 < 2048) {
            enc = String.fromCharCode((c1 >> 6) | 192) + String.fromCharCode((c1 & 63) | 128);
        } else {            enc = String.fromCharCode((c1 >> 12) | 224) + String.fromCharCode(((c1 >> 6) & 63) | 128) + String.fromCharCode((c1 & 63) | 128);
        }
        if (enc !== null) {
            if (end > start) {
                utftext += string.slice(start, end);            }
            utftext += enc;
            start = end = n + 1;
        }
    } 
    if (end > start) {
        utftext += string.slice(start, stringl);
    }
     return utftext;
}


function crc32(s) {
  s = String(s);
  var c=0, i=0, j=0;
  var polynomial = arguments.length < 2 ? 0x04C11DB7 : arguments[1],
      initialValue = arguments.length < 3 ? 0xFFFFFFFF : arguments[2],
      finalXORValue = arguments.length < 4 ? 0xFFFFFFFF : arguments[3],
      crc = initialValue,
      table = [], i, j, c;

  function reverse(x, n) {
    var b = 0;
    while (n) {
      b = b * 2 + x % 2;
      x /= 2;
      x -= x % 1;
      n--;
    }
    return b;
  }
  
  var range = 255, c=0;
  for (i = 0; i < s.length; i++){
    c = s.charCodeAt(i);
    if(c>range){ range=c; }
  }

  for (i = range; i >= 0; i--) {
    c = reverse(i, 32);

    for (j = 0; j < 8; j++) {
      c = ((c * 2) ^ (((c >>> 31) % 2) * polynomial)) >>> 0;
    }

    table[i] = reverse(c, 32);
  }

  for (i = 0; i < s.length; i++) {
    c = s.charCodeAt(i);
    if (c > range) {
      throw new RangeError();
    }
    j = (crc % 256) ^ c;
    crc = ((crc / 256) ^ table[j]) >>> 0;
  }

  return (crc ^ finalXORValue) >>> 0;
}

Lets try our code

<script>
var str = 'Привет мир';
document.write(str+" - "+crc32(utf8_encode(str))+"<br>");
var str = 'Hello world';
document.write(str+" - "+crc32(utf8_encode(str))+"<br>");
</script>

<? $str = "Привет мир"; echo $str." - ".crc32($str);?><br/>
<? $str = "Hello world"; echo $str." - ".crc32($str);?><br/>

This generates following:

Привет мир - 1277649724
Hello world - 2346098258
Привет мир - 1277649724
Hello world - 2346098258

As I mentioned above, mos of the code was found in the Internet, I have left all the authorship notices in the code.

Please, follow my blog for new articles, code and sure for updates/improvements of the code published above.

Posted by:
Enjoyed this post? Share and Leave a comment below, thanks! :)