7627 |
08 Mar 19 |
nicklas |
1 |
package net.sf.basedb.util.charset; |
7627 |
08 Mar 19 |
nicklas |
2 |
|
7627 |
08 Mar 19 |
nicklas |
3 |
import java.nio.charset.Charset; |
7627 |
08 Mar 19 |
nicklas |
4 |
import java.nio.charset.spi.CharsetProvider; |
7627 |
08 Mar 19 |
nicklas |
5 |
import java.util.ArrayList; |
7627 |
08 Mar 19 |
nicklas |
6 |
import java.util.Collection; |
7627 |
08 Mar 19 |
nicklas |
7 |
import java.util.Iterator; |
7627 |
08 Mar 19 |
nicklas |
8 |
import java.util.List; |
7627 |
08 Mar 19 |
nicklas |
9 |
|
7627 |
08 Mar 19 |
nicklas |
10 |
import net.sf.basedb.core.Config; |
7627 |
08 Mar 19 |
nicklas |
11 |
import net.sf.basedb.util.Enumeration; |
7627 |
08 Mar 19 |
nicklas |
12 |
import se.raek.charset.Utf8WithFallbackCharsetProvider; |
7627 |
08 Mar 19 |
nicklas |
13 |
|
7627 |
08 Mar 19 |
nicklas |
14 |
/** |
7629 |
11 Mar 19 |
nicklas |
Helper class for working with charsets and enabling the UTF-8 with fallback charsets: |
7627 |
08 Mar 19 |
nicklas |
16 |
|
7629 |
11 Mar 19 |
nicklas |
* X-UTF-8_with_ISO-8859-1_fallback |
7629 |
11 Mar 19 |
nicklas |
* X-UTF-8_with_windows-1252_fallback |
7629 |
11 Mar 19 |
nicklas |
19 |
|
7627 |
08 Mar 19 |
nicklas |
@author nicklas |
7627 |
08 Mar 19 |
nicklas |
@since 3.15 |
7627 |
08 Mar 19 |
nicklas |
22 |
*/ |
7627 |
08 Mar 19 |
nicklas |
23 |
public class CharsetUtil |
7627 |
08 Mar 19 |
nicklas |
24 |
{ |
7627 |
08 Mar 19 |
nicklas |
25 |
|
7627 |
08 Mar 19 |
nicklas |
26 |
/** |
7627 |
08 Mar 19 |
nicklas |
The provider that is providing the UTF-8 with fallback characeter sets. |
7627 |
08 Mar 19 |
nicklas |
28 |
*/ |
7627 |
08 Mar 19 |
nicklas |
29 |
public static final CharsetProvider UTF8_WITH_FALLBACK = new Utf8WithFallbackCharsetProvider(); |
7627 |
08 Mar 19 |
nicklas |
30 |
|
7627 |
08 Mar 19 |
nicklas |
31 |
/** |
7627 |
08 Mar 19 |
nicklas |
Get a character set by name. This will first look if it is a UTF-8 with fallback charset |
7627 |
08 Mar 19 |
nicklas |
and if not check the system {@link Charset#forName(String)}. |
7627 |
08 Mar 19 |
nicklas |
34 |
*/ |
7627 |
08 Mar 19 |
nicklas |
35 |
public static Charset getCharset(String name) |
7627 |
08 Mar 19 |
nicklas |
36 |
{ |
7627 |
08 Mar 19 |
nicklas |
37 |
if (name == null) name = Config.getCharset(); |
7627 |
08 Mar 19 |
nicklas |
38 |
Charset cs = UTF8_WITH_FALLBACK.charsetForName(name); |
7627 |
08 Mar 19 |
nicklas |
39 |
return cs == null ? Charset.forName(name) : cs; |
7627 |
08 Mar 19 |
nicklas |
40 |
} |
7627 |
08 Mar 19 |
nicklas |
41 |
|
7627 |
08 Mar 19 |
nicklas |
42 |
/** |
7627 |
08 Mar 19 |
nicklas |
Get all charsets that {@link Config#getAllCharsets()} returns and optionally include |
7627 |
08 Mar 19 |
nicklas |
the fallback charsets. Note that the fallback charsets are automatically sorted just |
7627 |
08 Mar 19 |
nicklas |
below the UTF-8 character set. |
7627 |
08 Mar 19 |
nicklas |
46 |
*/ |
7627 |
08 Mar 19 |
nicklas |
47 |
public static List<String> getAllCharsets(boolean includeFallbacks) |
7627 |
08 Mar 19 |
nicklas |
48 |
{ |
7627 |
08 Mar 19 |
nicklas |
49 |
List<String> charsets = new ArrayList<>(Config.getAllCharsets()); |
7627 |
08 Mar 19 |
nicklas |
50 |
|
7627 |
08 Mar 19 |
nicklas |
51 |
if (includeFallbacks) |
7627 |
08 Mar 19 |
nicklas |
52 |
{ |
7627 |
08 Mar 19 |
nicklas |
53 |
int utf8 = charsets.indexOf("UTF-8")+1; |
7627 |
08 Mar 19 |
nicklas |
54 |
if (utf8 > 0) |
7627 |
08 Mar 19 |
nicklas |
55 |
{ |
7627 |
08 Mar 19 |
nicklas |
56 |
Iterator<Charset> it = UTF8_WITH_FALLBACK.charsets(); |
7627 |
08 Mar 19 |
nicklas |
57 |
while (it.hasNext()) |
7627 |
08 Mar 19 |
nicklas |
58 |
{ |
7627 |
08 Mar 19 |
nicklas |
59 |
charsets.add(utf8++, it.next().name()); |
7627 |
08 Mar 19 |
nicklas |
60 |
} |
7627 |
08 Mar 19 |
nicklas |
61 |
} |
7627 |
08 Mar 19 |
nicklas |
62 |
} |
7627 |
08 Mar 19 |
nicklas |
63 |
return charsets; |
7627 |
08 Mar 19 |
nicklas |
64 |
} |
7627 |
08 Mar 19 |
nicklas |
65 |
|
7627 |
08 Mar 19 |
nicklas |
66 |
/** |
7627 |
08 Mar 19 |
nicklas |
Convert charset names to an enumeration with display-friendly key/title |
7627 |
08 Mar 19 |
nicklas |
pairs. |
7627 |
08 Mar 19 |
nicklas |
69 |
*/ |
7627 |
08 Mar 19 |
nicklas |
70 |
public static Enumeration<String, String> asEnumeration(Collection<String> charsets) |
7627 |
08 Mar 19 |
nicklas |
71 |
{ |
7627 |
08 Mar 19 |
nicklas |
72 |
Enumeration<String, String> e = new Enumeration<String, String>(); |
7627 |
08 Mar 19 |
nicklas |
73 |
for (String name : charsets) |
7627 |
08 Mar 19 |
nicklas |
74 |
{ |
7627 |
08 Mar 19 |
nicklas |
75 |
Charset cs = getCharset(name); |
7627 |
08 Mar 19 |
nicklas |
76 |
e.add(cs.name(), cs.displayName()); |
7627 |
08 Mar 19 |
nicklas |
77 |
} |
7627 |
08 Mar 19 |
nicklas |
78 |
e.lock(); |
7627 |
08 Mar 19 |
nicklas |
79 |
return e; |
7627 |
08 Mar 19 |
nicklas |
80 |
} |
7627 |
08 Mar 19 |
nicklas |
81 |
|
7627 |
08 Mar 19 |
nicklas |
82 |
} |