/**
* <p>Title: Word Cleaner</p>
* <p>Description: Strips out all of the rubbish that Word tends to generate (open, close quotes, etc)</p>
* @author Tim Yates
* @version 1.0
*
* Based on John Walker's "Demoroniser" Perl script : http://www.fourmilab.ch/webtools/demoroniser/
*/
public class WordCleaner
{
private WordCleaner
() {}
public static String runWordCleaner
( String input
)
{
StringBuffer sb =
new StringBuffer() ;
for( int i =
0 ; i < input.
length() ; i++
)
{
int c ;
switch( c =
(int)input.
charAt( i
) )
{
case 0x82 : sb.
append( "," ) ;
break ;
case 0x83 : sb.
append( "f" ) ;
break ;
case 0x84 : sb.
append( ",," ) ;
break ;
case 0x85 : sb.
append( "..." ) ;
break ;
case 0x88 : sb.
append( "^" ) ;
break ;
case 0x89 : sb.
append( "ppt" ) ;
break ;
case 0x8B : sb.
append( "<" ) ;
break ;
case 0x8C : sb.
append( "Oe" ) ;
break ;
case 0x91 : sb.
append( "'" ) ;
break ;
case 0x92 : sb.
append( "'" ) ;
break ;
case 0x93 : sb.
append( "\"" ) ;
break ;
case 0x94 : sb.
append( "\"" ) ;
break ;
case 0x95 : sb.
append( "*" ) ;
break ;
case 0x96 : sb.
append( "-" ) ;
break ;
case 0x97 : sb.
append( "--" ) ;
break ;
case 0x98 : sb.
append( "~" ) ;
break ;
case 0x99 : sb.
append( "TM" ) ;
break ;
case 0x9B : sb.
append( ">" ) ;
break ;
case 0x9C : sb.
append( "oe" ) ;
break ;
case 0xA9 : sb.
append( "(c)" ) ;
break ;
case 0xAE : sb.
append( "(r)" ) ;
break ;
case 0xBC : sb.
append( "1/4" ) ;
break ;
case 0xBD : sb.
append( "1/2" ) ;
break ;
case 0xBE : sb.
append( "3/4" ) ;
break ;
case 8208 : sb.
append( "-" ) ;
break ;
case 8209 : sb.
append( "-" ) ;
break ;
case 8211 : sb.
append( "--" ) ;
break ;
case 8212 : sb.
append( "--" ) ;
break ;
case 8213 : sb.
append( "--" ) ;
break ;
case 8214 : sb.
append( "||" ) ;
break ;
case 8215 : sb.
append( "_" ) ;
break ;
case 8216 : sb.
append( "'" ) ;
break ;
case 8217 : sb.
append( "'" ) ;
break ;
case 8218 : sb.
append( "," ) ;
break ;
case 8219 : sb.
append( "'" ) ;
break ;
case 8220 : sb.
append( "\"" ) ;
break ;
case 8221 : sb.
append( "\"" ) ;
break ;
case 8222 : sb.
append( ",," ) ;
break ;
case 8223 : sb.
append( "\"" ) ;
break ;
case 8226 : sb.
append( "*" ) ;
break ;
case 8227 : sb.
append( ">" ) ;
break ;
case 8228 : sb.
append( "*" ) ;
break ;
case 8229 : sb.
append( ".." ) ;
break ;
case 8230 : sb.
append( "..." ) ;
break ;
case 8231 : sb.
append( "-" ) ;
break ;
case 61514 : sb.
append( ":-)" ) ;
break ;
case 61515 : sb.
append( ":-|" ) ;
break ;
case 61516 : sb.
append( ":-(" ) ;
break ;
default : sb.
append( (char)c
) ;
}
}
return sb.
toString() ;
}
}