> /opt/ruby-1.9.1/bin/irb irb(main):001:0> RUBY_VERSION => "1.9.1" irb(main):002:0> Encoding.constants.find_all{|const|Encoding.const_get(const).is_a?(Encoding)} => [:ASCII_8BIT, :Big5, :BIG5, :CP949, :Emacs_Mule, :EMACS_MULE, :EUC_JP, :EUC_KR, :EUC_TW, :GB18030, :GBK, :ISO_8859_1, :ISO_8859_2, :ISO_8859_3, :ISO_8859_4,:ISO_8859_5, :ISO_8859_6, :ISO_8859_7, :ISO_8859_8, :ISO_8859_9, :ISO_8859_10, :ISO_8859_11, :ISO_8859_13, :ISO_8859_14, :ISO_8859_15, :ISO_8859_16, :KOI8_R, :KOI8_U, :Shift_JIS, :SHIFT_JIS, :US_ASCII, :UTF_8, :UTF_16BE, :UTF_16LE, :UTF_32BE, :UTF_32LE, :Windows_1251, :WINDOWS_1251, :BINARY, :IBM437, :CP437, :IBM737, :CP737, :IBM775, :CP775, :CP850, :IBM850, :IBM852, :CP852, :IBM855, :CP855, :IBM857, :CP857, :IBM860, :CP860, :IBM861, :CP861, :IBM862, :CP862, :IBM863, :CP863,:IBM864, :CP864, :IBM865, :CP865, :IBM866, :CP866, :IBM869, :CP869, :Windows_1258, :WINDOWS_1258, :CP1258, :GB1988, :MacCentEuro, :MACCENTEURO, :MacCroatian, :MACCROATIAN, :MacCyrillic, :MACCYRILLIC, :MacGreek, :MACGREEK, :MacIceland, :MACICELAND, :MacRoman, :MACROMAN, :MacRomania, :MACROMANIA, :MacThai, :MACTHAI, :MacTurkish, :MACTURKISH, :MacUkraine, :MACUKRAINE, :CP950, :Stateless_ISO_2022_JP,:STATELESS_ISO_2022_JP, :EucJP, :EUCJP, :EucJP_ms, :EUCJP_MS, :EUC_JP_MS, :CP51932, :EucKR, :EUCKR, :EucTW, :EUCTW, :EUC_CN, :EucCN, :EUCCN, :GB12345, :CP936, :ISO_2022_JP, :ISO2022_JP, :ISO_2022_JP_2, :ISO2022_JP2, :ISO8859_1, :Windows_1252, :WINDOWS_1252, :CP1252, :ISO8859_2, :Windows_1250, :WINDOWS_1250, :CP1250, :ISO8859_3, :ISO8859_4, :ISO8859_5, :ISO8859_6, :Windows_1256, :WINDOWS_1256, :CP1256, :ISO8859_7, :Windows_1253, :WINDOWS_1253, :CP1253, :ISO8859_8, :Windows_1255, :WINDOWS_1255, :CP1255, :ISO8859_9, :Windows_1254, :WINDOWS_1254, :CP1254, :ISO8859_10, :ISO8859_11, :TIS_620, :Windows_874, :WINDOWS_874, :CP874, :ISO8859_13, :Windows_1257, :WINDOWS_1257, :CP1257, :ISO8859_14, :ISO8859_15, :ISO8859_16, :CP878, :SJIS, :Windows_31J, :WINDOWS_31J, :CP932, :CsWindows31J, :CSWINDOWS31J, :MacJapanese, :MACJAPANESE, :MacJapan, :MACJAPAN, :ASCII, :ANSI_X3_4_1968, :UTF_7, :CP65000, :CP65001, :UTF8_MAC, :UTF_8_MAC, :UCS_2BE, :UCS_4BE, :UCS_4LE, :CP1251]
Second, String class has 3 methods that have similar names and similar behaviors: force_encoding, encode! and encode. See the what these are different from.
irb(main):003:0> a0, b0, c0 = 'ü', 'ü', 'ü' => ["ü", "ü", "ü"] irb(main):004:0> [a0, b0, c0].collect(&:encoding) => [#<Encoding:UTF-8>, #<Encoding:UTF-8>, #<Encoding:UTF-8>] irb(main):005:0> a1 = a0.force_encoding(Encoding::EUC_JP) => "ü" irb(main):006:0> b1 = b0.encode!(Encoding::EUC_JP) => "���" irb(main):007:0> c1 = c0.encode(Encoding::EUC_JP) => "���" irb(main):008:0> [a1, b1, c1].collect(&:encoding) => [#<Encoding:EUC-JP>, #<Encoding:EUC-JP>, #<Encoding:EUC-JP>] irb(main):009:0> a0 => "ü" irb(main):010:0> b0 => "���" irb(main):011:0> c0 => "ü" irb(main):012:0> [a0, b0, c0].collect(&:encoding) => [#<Encoding:EUC-JP>, #<Encoding:EUC-JP>, #<Encoding:UTF-8>] irb(main):013:0> a2, b2, c2 = [a1, b1, c1].collect{|str| str.encode(Encoding::UTF_8)} => ["端", "ü", "ü"] irb(main):014:0> [a2, b2, c2].collect(&:encoding) => [#<Encoding:UTF-8>, #<Encoding:UTF-8>, #<Encoding:UTF-8>]force_encoding and encode! are destructive method.
No comments:
Post a Comment