finished implementing `...send_unicode_sequence()`

(for now, as always - :) )
2013-05-28 01:38:59 -07:00 · 2013-05-28 01:38:59 -07:00 · 29446dfb24
parent 115ef0ff4f
commit 29446dfb24
5 changed files with 180 additions and 95 deletions
--- a/doc/references.md
+++ b/doc/references.md
@ -299,6 +299,17 @@
  (http://www.usb.org/developers/devclass_docs/Hut1_12v2.pdf)
  : pdf (from <http://www.usb.org/developers/hidpage>)

+### UTF-8
+
+* [Reading Unicode (UTF-8) in C]
+  (http://zaemis.blogspot.com/2011/06/reading-unicode-utf-8-by-hand-in-c.html)  
+  A short discussion on the subject.
+
+* [wikipedia: Hexadecimal Code Input (for the 3 major OSs)]
+  (http://en.wikipedia.org/wiki/Unicode_input#Hexadecimal_code_input)  
+  Note that both OS X and Windows require a little OS side setup for this to
+  work.
+  

 ## Other People's Code

--- a/firmware/keyboard/ergodox/layout/common/keys.c.h
+++ b/firmware/keyboard/ergodox/layout/common/keys.c.h
@ -23,6 +23,7 @@
 //   - macros
 //   - chorded keys
 //   - timed keys
+//   - automatic repetition of utf-8 sequence keys
 //   - layers
 //   - making layouts
 //   - changing the meaning of the LEDs
--- a/firmware/keyboard/ergodox/layout/qwerty-kinesis-mod.c
+++ b/firmware/keyboard/ergodox/layout/qwerty-kinesis-mod.c
@ -57,13 +57,6 @@ void kb__led__logical_off(char led) {
 KEYS__LAYER__NUM_PUSH(10, 3);
 KEYS__LAYER__NUM_POP(10);

-// TODO
-void P(u_yinyng)(void) {
-//     key_functions__send_unicode_sequence( PSTR("☯ hello world :)") );
-    key_functions__send_unicode_sequence( PSTR("☯") );
-}
-void R(u_yinyng)(void) {}
-

 // ----------------------------------------------------------------------------
 // layout
@ -81,7 +74,7 @@ static _layout_t _layout = {
       K,    nop,
 // left hand ...... ......... ......... ......... ......... ......... .........
   equal,        1,        2,        3,        4,        5,      esc,
-     tab,        q,        w,        e,        r,        t, u_yinyng,
+     tab,        q,        w,        e,        r,        t,   lpu1l1,
 bkslash,        a,        s,        d,        f,        g,
 shL2kcap,        z,        x,        c,        v,        b, lpupo1l1,
    guiL,    grave,  bkslash,   arrowL,   arrowR,
--- a/firmware/lib/layout/key-functions.h
+++ b/firmware/lib/layout/key-functions.h
@ -30,18 +30,7 @@ void key_functions__jump_to_bootloader (void);

 // special
 void key_functions__toggle_capslock       (void);
-// --- TODO ---
-void key_functions__send_unicode_sequence (const uint8_t * string);
-// TODO
-/*
- * Implementation notes:
- * - We use `uint8_t *` instead of `char *` because the signedness of `char` is
- *   implementation defined (and, actually, signed by default with avr-gcc,
- *   which is not what we want if we're going to be doing bitwise operations
- *   and comparisons).  It appears that one can give `char *` arguments to
- *   functions requiring `uint8_t *` ones without the compiler even giving a
- *   warning, so this works out.
- */
+void key_functions__send_unicode_sequence (const char * string);


 // ----------------------------------------------------------------------------
@ -107,3 +96,73 @@ void key_functions__send_unicode_sequence (const uint8_t * string);
 *   the state of both shift keys.
 */

+// === key_functions__send_unicode_sequence() ===
+/**                  functions/key_functions__send_unicode_sequence/description
+ * Send the "unicode sequence" for each character in `string`
+ *
+ * This function is, relative to the rest of life, extremely unportable. Sorry
+ * about that: I looked, but I couldn't find a better way to do it.  I'm
+ * including it in the hope that it will be useful anyway.
+ *
+ * Arguments:
+ * - `string`: A pointer to a valid UTF-8 string in PROGMEM
+ *
+ *
+ * Operating system considerations:
+ *
+ * - This function should work on OS X and Windows (after enabling "hexidecimal
+ *   code input" in the OS), but probably will not work on Linux (2 out of 3 of
+ *   the major OSs is better than nothing...).  If you're using Linux and you
+ *   want to flip that around :) please modify the function to send Linux
+ *   friendly start and end sequences.  See [this Wikipedia article]
+ *   (http://en.wikipedia.org/wiki/Unicode_input#Hexadecimal_code_input) for
+ *   more information.
+ *
+ * - On Windows (per the Wikipedia article above): Make sure the registry key
+ *   `HKEY_CURRENT_USER\Control Panel\Input Method\EnableHexNumpad` has a
+ *   string value of `1`.  If it does not, fix it, then reboot (or log off/on,
+ *   in Windows 7+)
+ *
+ * - On OS X: open "System Preferences", navigate to "Language & Text", select
+ *   the "Input Sources" tab (if it isn't selected already), and check the box
+ *   next to "Unicode Hex Input".  Make sure this input method is active
+ *   whenever you use this function.
+ *     - I recommend disabling the default (in the US at least) "U.S." input
+ *       method, and just leaving this one active all the time.  Note though
+ *       that this will render all the normal "Alt" special characters
+ *       unavailable.
+ *
+ *
+ * Usage notes:
+ *
+ * - This function disables all modifier keys on entry, and restores their
+ *   state on exit.
+ *
+ * - A "unicode sequence" is holding down "alt", typing "+", typing the 4
+ *   character unicode code for the specified character, then releasing "alt".
+ *   This is done for every character in `string`, even the ones with a
+ *   dedicated USB keycode.
+ *
+ * - Characters (and strings) sent with this function do not automatically
+ *   repeat (as normal keys do).
+ *
+ * - If you're holding down any of `[0-9A-F]` when this function is called, it
+ *   may not do what you want.
+ *
+ * - An easy way to pass a PROGMEM string to this function is to use the
+ *   `PSTR()` macro in `<avr/pgmspace.h>`, as in
+ *
+ *       key_functions__send_unicode_sequence( PSTR ( "❄" ) );
+ *
+ *   or
+ *
+ *       key_functions__send_unicode_sequence( PSTR (
+ *               "こんにちは世界 γειά σου κόσμε hello world ^_^" ) );
+ *
+ * - It's probably better to define a proper macro key than to use this
+ *   function for sending sequences of characters, despite the relative
+ *   inconvenience.  But... if you're not concerned about portability, or other
+ *   factors that might arise because of what this function is *actually*
+ *   typing... it's possible to do it this way to... :) lol
+ */
+
--- a/firmware/lib/layout/key-functions/special.c
+++ b/firmware/lib/layout/key-functions/special.c
@ -6,6 +6,13 @@

 /**                                                                 description
 * Implements the "special" section of "../key-functions.h"
+ *
+ * Notes:
+ * - If the USB keyboard modifier state functions turn out to be something that
+ *   would be generally useful, the functionality should be reimplemented in
+ *   ".../firmware/lib/usb" (and removed from here).  For now I'm leaving them
+ *   here because it seems better not to encourage messing with modifiers as a
+ *   special group of keys, except for special purposes.
 */


@ -17,7 +24,9 @@

 // ----------------------------------------------------------------------------

-// TODO: documentation?
+/**                                         types/_modifier_state_t/description
+ * A struct representing the state of the keyboard modifier keys
+ */
 struct _modifier_state_t {
    bool left_control  : 1;
    bool left_shift    : 1;
@ -31,7 +40,12 @@ struct _modifier_state_t {

 // ----------------------------------------------------------------------------

-// TODO: documentation?
+/**                                  functions/_read_modifier_state/description
+ * Return the state of the modifier keys
+ *
+ * Returns:
+ * - success: A `_modifier_state_t`
+ */
 static struct _modifier_state_t _read_modifier_state(void) {
    return (struct _modifier_state_t) {
        .left_control  = usb__kb__read_key( KEYBOARD__LeftControl  ),
@ -45,7 +59,12 @@ static struct _modifier_state_t _read_modifier_state(void) {
    };
 }

-// TODO: documentation?
+/**                                   functions/_set_modifier_state/description
+ * Set the state of the modifier keys to `state`
+ *
+ * Arguments:
+ * - `state`: A `_modifier_state_t`
+ */
 static void _set_modifier_state(struct _modifier_state_t state) {
    usb__kb__set_key( state.left_control  , KEYBOARD__LeftControl  );
    usb__kb__set_key( state.left_shift    , KEYBOARD__LeftShift    );
@ -59,7 +78,16 @@ static void _set_modifier_state(struct _modifier_state_t state) {
    usb__kb__send_report();
 }

-// TODO: documentation?
+/**                                       functions/_send_hex_digit/description
+ * Press then release the keycode corresponding to the character (0-9 A-F)
+ * representing the low 4 bits of `digit` in base 16
+ *
+ * Warnings:
+ * - Drops the high bit, but *does not do bounds checking on the value*
+ *
+ * Arguments:
+ * - `digit`: A `uint8_t` who's low 4 bits represents the character to send
+ */
 static void _send_hex_digit(uint8_t digit) {
    digit &= 0x0F;

@ -77,7 +105,12 @@ static void _send_hex_digit(uint8_t digit) {

 void key_functions__toggle_capslock (uint16_t ignore) {
    struct _modifier_state_t state = _read_modifier_state();
-    _set_modifier_state( (struct _modifier_state_t){} );
+    // -------
+    struct _modifier_state_t temp_state = state;
+    temp_state.left_shift = false;
+    temp_state.right_shift = false;
+    // -------
+    _set_modifier_state(temp_state);

    // toggle capslock
    usb__kb__set_key(true,  KEYBOARD__CapsLock);
@ -88,95 +121,83 @@ void key_functions__toggle_capslock (uint16_t ignore) {
    _set_modifier_state(state);
 }

-/** TODO
- *     --------------------------------------------------------
- *      UTF-8
- *     --------------------------------------------------------
- *      available bits  byte 1    byte 2    byte 3    byte 4
- *      --------------  --------  --------  --------  --------
- *                  7   0xxxxxxx
- *                 11   110xxxxx  10xxxxxx
- *                 16   1110xxxx  10xxxxxx  10xxxxxx
- *                 21   11110xxx  10xxxxxx  10xxxxxx  10xxxxxx
- *     --------------------------------------------------------
+/**                  functions/key_functions__send_unicode_sequence/description
+ * Implementation notes:
+ *
+ * - We use `uint8_t` instead of `char` when iterating over `string` because
+ *   the signedness of `char` is implementation defined (and, actually, signed
+ *   by default with avr-gcc, which is not what we want if we're going to be
+ *   doing bitwise operations and comparisons).
+ *
+ * - We assume, for the most part, that the string is valid modified (i.e.
+ *   null-terminated) UTF-8.  This should be a fairly safe assumption, since
+ *   all PROGMEM strings should be generated by the compiler :)
+ *
+ * - UTF-8 character format
+ *
+ *     ----------------------------------------------------------------------
+ *      code points      avail. bits  byte 1    byte 2    byte 3    byte 4
+ *      ---------------  -----------  --------  --------  --------  --------
+ *      0x0000 - 0x007F           7   0xxxxxxx
+ *      0x0080 - 0x07FF          11   110xxxxx  10xxxxxx
+ *      0x0800 - 0xFFFF          16   1110xxxx  10xxxxxx  10xxxxxx
+ *      0x010000 - 0x10FFFF      21   11110xxx  10xxxxxx  10xxxxxx  10xxxxxx
+ *     ----------------------------------------------------------------------
 */
-void key_functions__send_unicode_sequence (const uint8_t * string) {
+void key_functions__send_unicode_sequence (const char * string) {
    struct _modifier_state_t state = _read_modifier_state();
    _set_modifier_state( (struct _modifier_state_t){} );

+    uint8_t  c;       // for storing the current byte of the character
+    uint16_t c_full;  // for storing the full character
+
    // send string
-    for (char c = pgm_read_byte(string); c; c = pgm_read_byte(++string)) {
+    for (c = pgm_read_byte(string); c; c = pgm_read_byte(++string)) {

-        // send start sequence
-//         usb__kb__set_key(true,  KEYBOARD__LeftAlt   ); usb__kb__send_report();
-//         usb__kb__set_key(true,  KEYBOARD__Equal_Plus); usb__kb__send_report();
-//         usb__kb__set_key(false, KEYBOARD__Equal_Plus); usb__kb__send_report();
-
-        // --------------------------------------------------------------------
-//         uint8_t d = (c >> 4);
-// 
-//         _send_hex_digit( c >> 4 );  // e
-//         _send_hex_digit( d >> 4 );  // f
-//         _send_hex_digit( d & 0xF ); // e
-// 
-//         _send_hex_digit( d == 0xFE );               // 1
-//         _send_hex_digit( (c >> 4) == 0xFE );        // 0
-//         _send_hex_digit( ((c >> 4) & 0xF) == 0xE ); // 1
-        // --------------------------------------------------------------------
-//         uint8_t d = (c << 4);
-//         _send_hex_digit( d >> 4 );  // 2
-//         _send_hex_digit( d & 0xF ); // 0
-        // --------------------------------------------------------------------
-        // so, '>>' fills with 'f's ? or with junk?
-        //     '<<' fills with '0's ?
-        // and '==' only works if you haven't bit shifted the variable?
-        //
-        // i think i need to use '&' for testing anyway, now that i think about
-        // it... it's a much more common way to go about things
-        // --------------------------------------------------------------------
-        // 'char' is probably signed; in any case, it doesn't right shift the
-        // way we want it to
-        //
-        // 'uint8_t' on the other hand works :)
-        //
-        // but we should probably still use masking instead of shifting; it's
-        // not more operations, i think; even if we use 'uint8_t' for the type
-        // of 'c' (which i think we should do also.. it just feels cleaner)
-        // --------------------------------------------------------------------
-        // also, the logic of how the bits get put into 'c_full' is wrong.. lol
-        // --------------------------------------------------------------------
-
-        // send character
-        uint16_t c_full = 0;
-        if ((c >> 7) == 0b0) {
-            _send_hex_digit(0xA);
+        // get character
+        if (c >> 7 == 0b0) {
+            // a 1-byte utf-8 character
            c_full = c & 0x7F;
-        } else if ((c >> 5) == 0b110) {
-            _send_hex_digit(0xB);
-            c_full  = (uint16_t)(c <<  6) & 0x1F; c = pgm_read_byte(++string);
-            c_full |= (uint16_t)(c <<  0) & 0x3F;
-        } else if ((c >> 4) == 0b1110) {
-            _send_hex_digit(0xC);
-            c_full  = (uint16_t)(c << 12) & 0x0F; c = pgm_read_byte(++string);
-            c_full |= (uint16_t)(c <<  6) & 0x3F; c = pgm_read_byte(++string);
-            c_full |= (uint16_t)(c <<  0) & 0x3F;
+
+        } else if (c >> 5 == 0b110) {
+            // beginning of a 2-byte utf-8 character
+            // assume the string is valid
+            c_full  = (c & 0x1F) <<  6; c = pgm_read_byte(++string);
+            c_full |= (c & 0x3F) <<  0;
+
+        } else if (c >> 4 == 0b1110) {
+            // beginning of a 3-byte utf-8 character
+            // assume the string is valid
+            c_full  = (c & 0x0F) << 12; c = pgm_read_byte(++string);
+            c_full |= (c & 0x3F) <<  6; c = pgm_read_byte(++string);
+            c_full |= (c & 0x3F) <<  0;
+
        } else if ((c >> 3) == 0b11110) {
-            _send_hex_digit(0xD);
+            // beginning of a 4-byte utf-8 character
            // this character is too long, we can't send it
            // skip this byte, and the next 3
            string += 3;
            continue;
+
        } else {
-            // invalid utf-8
+            // ran across some invalid utf-8
+            // ignore it, try again next time
            continue;
        }
-        _send_hex_digit(  c_full >> 12        );
+
+        // send start sequence
+        usb__kb__set_key(true,  KEYBOARD__LeftAlt   ); usb__kb__send_report();
+        usb__kb__set_key(true,  KEYBOARD__Equal_Plus); usb__kb__send_report();
+        usb__kb__set_key(false, KEYBOARD__Equal_Plus); usb__kb__send_report();
+
+        // send character
+        _send_hex_digit( (c_full >> 12)       );
        _send_hex_digit( (c_full >>  8) & 0xF );
        _send_hex_digit( (c_full >>  4) & 0xF );
-        _send_hex_digit( (c_full >>  0) & 0xF );
+        _send_hex_digit( (c_full      ) & 0xF );

        // send end sequence
-//         usb__kb__set_key(false, KEYBOARD__LeftAlt); usb__kb__send_report();
+        usb__kb__set_key(false, KEYBOARD__LeftAlt); usb__kb__send_report();
    }

    _set_modifier_state(state);