private void FindNoUTFFile(string Path) { System.IO.StreamReader reader = null; StringBuilder sb; StringBuilder sb2; DirectoryInfo Folder = new System.IO.DirectoryInfo(Path); DirectoryInfo[] subFolders = Folder.GetDirectories(); for (int i=0;i<subFolders.Length;i++) { FindNoUTFFile(subFolders[i].FullName); } FileInfo[] subFiles = Folder.GetFiles(); for(int j=0;j<subFiles.Length ;j++) { if(CheckFileType(subFiles[j].Extension.ToLower())) { FileStream fs = new FileStream(subFiles[j].FullName , FileMode.Open,FileAccess.Read); sb = new StringBuilder(); sb2 = new StringBuilder(); bool bUtf8 =IsUTF8(fs); fs.Close(); if (!bUtf8) { reader = new System.IO.StreamReader(subFiles[j].FullName,System.Text.Encoding.UTF8); sb2.Append(reader.ReadToEnd()); reader.Close(); reader = new System.IO.StreamReader(subFiles[j].FullName, System.Text.Encoding.Default,true); sb.Append(reader.ReadToEnd()); reader.Close(); } } } } //0000 0000-0000 007F - 0xxxxxxx (ascii converts to 1 octet!) //0000 0080-0000 07FF - 110xxxxx 10xxxxxx ( 2 octet format) //0000 0800-0000 FFFF - 1110xxxx 10xxxxxx 10xxxxxx (3 octet format)
private static bool IsUTF8(FileStream sbInputStream) { int i; byte cOctets; // octets to go in this UTF-8 encoded character byte chr; bool bAllAscii= true; long iLen = sbInputStream.Length;
cOctets= 0; for( i=0; i < iLen; i++ ) { chr = (byte)sbInputStream.ReadByte();
if( (chr & 0x80) != 0 ) bAllAscii= false;
if( cOctets == 0 ) { if( chr >= 0x80 ) { do { chr <<= 1; cOctets++; } while( (chr & 0x80) != 0 );
cOctets--; if( cOctets == 0 ) return false; } } else { if( (chr & 0xC0) != 0x80 ) { return false; } cOctets--; } }
if( cOctets > 0 ) { return false; }
if( bAllAscii ) { return false; }
return true;
} } } 
|