// macintosh.hh // This file is part of libpbe; see http://anyterm.org/ // (C) 2009 Philip Endecott // Distributed under the Boost Software License, Version 1.0: // // Permission is hereby granted, free of charge, to any person or organization // obtaining a copy of the software and accompanying documentation covered by // this license (the "Software") to use, reproduce, display, distribute, // execute, and transmit the Software, and to prepare derivative works of the // Software, and to permit third-parties to whom the Software is furnished to // do so, all subject to the following: // // The copyright notices in the Software and this entire statement, including // the above license grant, this restriction and the following disclaimer, // must be included in all copies of the Software, in whole or in part, and // all derivative works of the Software, unless such copies or derivative // works are solely in the form of machine-executable object code generated by // a source language processor. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. #ifndef libpbe_charset_conv_macintosh_hh #define libpbe_charset_conv_macintosh_hh #include "charset/charset_t.hh" #include "charset/char_t.hh" #include "charset/charset_traits.hh" #include "charset/char_conv.hh" #include #include namespace pbe { // This file implements character conversions to and from the macintosh (aka // macroman) character set. // Conversions to Unicode characters // --------------------------------- // All macintosh characters correspond to "basic multilingual plane" unicode // characters, i.e. they fit in a 16-bit ucs2 character. // This is an ASCII superset: characters 0 to 127 inclusive map directly to // Unicode. Tables are used to map characters 128 to 255: typedef char16_t macintosh_table_t[128]; // character n in [n-128]. extern macintosh_table_t macintosh_table; // This table is automatically generated from data from unicode.org. static inline int macintosh_to_ucs ( char8_t c ) { int i = static_cast(static_cast(c)); return (i<128) ? i : macintosh_table[i-128]; } template struct char_conv { char16_t operator() ( char8_t c, charset_traits::state_t&, charset_traits::state_t& ) { return macintosh_to_ucs(c); } }; template struct char_conv { char32_t operator() ( char8_t c, charset_traits::state_t&, charset_traits::state_t& ) { return macintosh_to_ucs(c); } }; // Conversion from Unicode characters // ---------------------------------- // We break the unicode space into pages and have one table // for each ucs page. // These tables are generated dynamically only as needed by invoking // the reverse functions above. template static inline const char8_t* mk_ucs_to_macintosh_page_table() { char8_t* table = new char8_t[256]; // never deleted std::fill(table,table+256,0); // 0 = no equivalent for (int c=128; c<256; ++c) { int unichar = macintosh_to_ucs(c); if ((unichar>>8) == page) { table[unichar&0xff] = c; } } return table; } template static inline char8_t char_conv_ucs_to_macintosh_lookup ( uint8_t point ) { /*FIXME THREAD SAFE*/ static const char8_t* table_p = mk_ucs_to_macintosh_page_table(); char8_t c = table_p[point]; if (c==0) { return error_policy::no_equivalent(page<<8 | point); } return c; } template static inline char8_t ucs_to_macintosh ( int c ) { if (c<=127) { return c; } else { int page = c>>8; uint8_t point = c&0xff; switch (page) { // These are the only pages that have any characters in any macintosh character sets. // FIXME we could use specialisation to consider only those pages that apply to // a partiuclar character set. case 0x00: return char_conv_ucs_to_macintosh_lookup<0x00,error_policy>(point); case 0x01: return char_conv_ucs_to_macintosh_lookup<0x01,error_policy>(point); case 0x02: return char_conv_ucs_to_macintosh_lookup<0x02,error_policy>(point); case 0x03: return char_conv_ucs_to_macintosh_lookup<0x03,error_policy>(point); case 0x20: return char_conv_ucs_to_macintosh_lookup<0x20,error_policy>(point); case 0x21: return char_conv_ucs_to_macintosh_lookup<0x21,error_policy>(point); case 0x22: return char_conv_ucs_to_macintosh_lookup<0x22,error_policy>(point); case 0x25: return char_conv_ucs_to_macintosh_lookup<0x25,error_policy>(point); case 0xf8: return char_conv_ucs_to_macintosh_lookup<0xf8,error_policy>(point); case 0xfb: return char_conv_ucs_to_macintosh_lookup<0xfb,error_policy>(point); default: return error_policy::no_equivalent(c); } } } template struct char_conv { char8_t operator() ( char16_t c, charset_traits::state_t&, charset_traits::state_t& ) { return ucs_to_macintosh(c); } }; template struct char_conv { char8_t operator() ( char32_t c, charset_traits::state_t&, charset_traits::state_t& ) { return ucs_to_macintosh(c); } }; }; #endif