finished implementing `...send_unicode_sequence()`

(for now, as always - :) )
partial-rewrite
Ben Blazak 2013-05-28 01:38:59 -07:00
parent 115ef0ff4f
commit 29446dfb24
5 changed files with 180 additions and 95 deletions

View File

@ -299,6 +299,17 @@
(http://www.usb.org/developers/devclass_docs/Hut1_12v2.pdf)
: pdf (from <http://www.usb.org/developers/hidpage>)
### UTF-8
* [Reading Unicode (UTF-8) in C]
(http://zaemis.blogspot.com/2011/06/reading-unicode-utf-8-by-hand-in-c.html)
A short discussion on the subject.
* [wikipedia: Hexadecimal Code Input (for the 3 major OSs)]
(http://en.wikipedia.org/wiki/Unicode_input#Hexadecimal_code_input)
Note that both OS X and Windows require a little OS side setup for this to
work.
## Other People's Code

View File

@ -23,6 +23,7 @@
// - macros
// - chorded keys
// - timed keys
// - automatic repetition of utf-8 sequence keys
// - layers
// - making layouts
// - changing the meaning of the LEDs

View File

@ -57,13 +57,6 @@ void kb__led__logical_off(char led) {
KEYS__LAYER__NUM_PUSH(10, 3);
KEYS__LAYER__NUM_POP(10);
// TODO
void P(u_yinyng)(void) {
// key_functions__send_unicode_sequence( PSTR("☯ hello world :)") );
key_functions__send_unicode_sequence( PSTR("") );
}
void R(u_yinyng)(void) {}
// ----------------------------------------------------------------------------
// layout
@ -81,7 +74,7 @@ static _layout_t _layout = {
K, nop,
// left hand ...... ......... ......... ......... ......... ......... .........
equal, 1, 2, 3, 4, 5, esc,
tab, q, w, e, r, t, u_yinyng,
tab, q, w, e, r, t, lpu1l1,
bkslash, a, s, d, f, g,
shL2kcap, z, x, c, v, b, lpupo1l1,
guiL, grave, bkslash, arrowL, arrowR,

View File

@ -30,18 +30,7 @@ void key_functions__jump_to_bootloader (void);
// special
void key_functions__toggle_capslock (void);
// --- TODO ---
void key_functions__send_unicode_sequence (const uint8_t * string);
// TODO
/*
* Implementation notes:
* - We use `uint8_t *` instead of `char *` because the signedness of `char` is
* implementation defined (and, actually, signed by default with avr-gcc,
* which is not what we want if we're going to be doing bitwise operations
* and comparisons). It appears that one can give `char *` arguments to
* functions requiring `uint8_t *` ones without the compiler even giving a
* warning, so this works out.
*/
void key_functions__send_unicode_sequence (const char * string);
// ----------------------------------------------------------------------------
@ -107,3 +96,73 @@ void key_functions__send_unicode_sequence (const uint8_t * string);
* the state of both shift keys.
*/
// === key_functions__send_unicode_sequence() ===
/** functions/key_functions__send_unicode_sequence/description
* Send the "unicode sequence" for each character in `string`
*
* This function is, relative to the rest of life, extremely unportable. Sorry
* about that: I looked, but I couldn't find a better way to do it. I'm
* including it in the hope that it will be useful anyway.
*
* Arguments:
* - `string`: A pointer to a valid UTF-8 string in PROGMEM
*
*
* Operating system considerations:
*
* - This function should work on OS X and Windows (after enabling "hexidecimal
* code input" in the OS), but probably will not work on Linux (2 out of 3 of
* the major OSs is better than nothing...). If you're using Linux and you
* want to flip that around :) please modify the function to send Linux
* friendly start and end sequences. See [this Wikipedia article]
* (http://en.wikipedia.org/wiki/Unicode_input#Hexadecimal_code_input) for
* more information.
*
* - On Windows (per the Wikipedia article above): Make sure the registry key
* `HKEY_CURRENT_USER\Control Panel\Input Method\EnableHexNumpad` has a
* string value of `1`. If it does not, fix it, then reboot (or log off/on,
* in Windows 7+)
*
* - On OS X: open "System Preferences", navigate to "Language & Text", select
* the "Input Sources" tab (if it isn't selected already), and check the box
* next to "Unicode Hex Input". Make sure this input method is active
* whenever you use this function.
* - I recommend disabling the default (in the US at least) "U.S." input
* method, and just leaving this one active all the time. Note though
* that this will render all the normal "Alt" special characters
* unavailable.
*
*
* Usage notes:
*
* - This function disables all modifier keys on entry, and restores their
* state on exit.
*
* - A "unicode sequence" is holding down "alt", typing "+", typing the 4
* character unicode code for the specified character, then releasing "alt".
* This is done for every character in `string`, even the ones with a
* dedicated USB keycode.
*
* - Characters (and strings) sent with this function do not automatically
* repeat (as normal keys do).
*
* - If you're holding down any of `[0-9A-F]` when this function is called, it
* may not do what you want.
*
* - An easy way to pass a PROGMEM string to this function is to use the
* `PSTR()` macro in `<avr/pgmspace.h>`, as in
*
* key_functions__send_unicode_sequence( PSTR ( "" ) );
*
* or
*
* key_functions__send_unicode_sequence( PSTR (
* "こんにちは世界 γειά σου κόσμε hello world ^_^" ) );
*
* - It's probably better to define a proper macro key than to use this
* function for sending sequences of characters, despite the relative
* inconvenience. But... if you're not concerned about portability, or other
* factors that might arise because of what this function is *actually*
* typing... it's possible to do it this way to... :) lol
*/

View File

@ -6,6 +6,13 @@
/** description
* Implements the "special" section of "../key-functions.h"
*
* Notes:
* - If the USB keyboard modifier state functions turn out to be something that
* would be generally useful, the functionality should be reimplemented in
* ".../firmware/lib/usb" (and removed from here). For now I'm leaving them
* here because it seems better not to encourage messing with modifiers as a
* special group of keys, except for special purposes.
*/
@ -17,7 +24,9 @@
// ----------------------------------------------------------------------------
// TODO: documentation?
/** types/_modifier_state_t/description
* A struct representing the state of the keyboard modifier keys
*/
struct _modifier_state_t {
bool left_control : 1;
bool left_shift : 1;
@ -31,7 +40,12 @@ struct _modifier_state_t {
// ----------------------------------------------------------------------------
// TODO: documentation?
/** functions/_read_modifier_state/description
* Return the state of the modifier keys
*
* Returns:
* - success: A `_modifier_state_t`
*/
static struct _modifier_state_t _read_modifier_state(void) {
return (struct _modifier_state_t) {
.left_control = usb__kb__read_key( KEYBOARD__LeftControl ),
@ -45,7 +59,12 @@ static struct _modifier_state_t _read_modifier_state(void) {
};
}
// TODO: documentation?
/** functions/_set_modifier_state/description
* Set the state of the modifier keys to `state`
*
* Arguments:
* - `state`: A `_modifier_state_t`
*/
static void _set_modifier_state(struct _modifier_state_t state) {
usb__kb__set_key( state.left_control , KEYBOARD__LeftControl );
usb__kb__set_key( state.left_shift , KEYBOARD__LeftShift );
@ -59,7 +78,16 @@ static void _set_modifier_state(struct _modifier_state_t state) {
usb__kb__send_report();
}
// TODO: documentation?
/** functions/_send_hex_digit/description
* Press then release the keycode corresponding to the character (0-9 A-F)
* representing the low 4 bits of `digit` in base 16
*
* Warnings:
* - Drops the high bit, but *does not do bounds checking on the value*
*
* Arguments:
* - `digit`: A `uint8_t` who's low 4 bits represents the character to send
*/
static void _send_hex_digit(uint8_t digit) {
digit &= 0x0F;
@ -77,7 +105,12 @@ static void _send_hex_digit(uint8_t digit) {
void key_functions__toggle_capslock (uint16_t ignore) {
struct _modifier_state_t state = _read_modifier_state();
_set_modifier_state( (struct _modifier_state_t){} );
// -------
struct _modifier_state_t temp_state = state;
temp_state.left_shift = false;
temp_state.right_shift = false;
// -------
_set_modifier_state(temp_state);
// toggle capslock
usb__kb__set_key(true, KEYBOARD__CapsLock);
@ -88,95 +121,83 @@ void key_functions__toggle_capslock (uint16_t ignore) {
_set_modifier_state(state);
}
/** TODO
* --------------------------------------------------------
* UTF-8
* --------------------------------------------------------
* available bits byte 1 byte 2 byte 3 byte 4
* -------------- -------- -------- -------- --------
* 7 0xxxxxxx
* 11 110xxxxx 10xxxxxx
* 16 1110xxxx 10xxxxxx 10xxxxxx
* 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
* --------------------------------------------------------
/** functions/key_functions__send_unicode_sequence/description
* Implementation notes:
*
* - We use `uint8_t` instead of `char` when iterating over `string` because
* the signedness of `char` is implementation defined (and, actually, signed
* by default with avr-gcc, which is not what we want if we're going to be
* doing bitwise operations and comparisons).
*
* - We assume, for the most part, that the string is valid modified (i.e.
* null-terminated) UTF-8. This should be a fairly safe assumption, since
* all PROGMEM strings should be generated by the compiler :)
*
* - UTF-8 character format
*
* ----------------------------------------------------------------------
* code points avail. bits byte 1 byte 2 byte 3 byte 4
* --------------- ----------- -------- -------- -------- --------
* 0x0000 - 0x007F 7 0xxxxxxx
* 0x0080 - 0x07FF 11 110xxxxx 10xxxxxx
* 0x0800 - 0xFFFF 16 1110xxxx 10xxxxxx 10xxxxxx
* 0x010000 - 0x10FFFF 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
* ----------------------------------------------------------------------
*/
void key_functions__send_unicode_sequence (const uint8_t * string) {
void key_functions__send_unicode_sequence (const char * string) {
struct _modifier_state_t state = _read_modifier_state();
_set_modifier_state( (struct _modifier_state_t){} );
uint8_t c; // for storing the current byte of the character
uint16_t c_full; // for storing the full character
// send string
for (char c = pgm_read_byte(string); c; c = pgm_read_byte(++string)) {
for (c = pgm_read_byte(string); c; c = pgm_read_byte(++string)) {
// send start sequence
// usb__kb__set_key(true, KEYBOARD__LeftAlt ); usb__kb__send_report();
// usb__kb__set_key(true, KEYBOARD__Equal_Plus); usb__kb__send_report();
// usb__kb__set_key(false, KEYBOARD__Equal_Plus); usb__kb__send_report();
// --------------------------------------------------------------------
// uint8_t d = (c >> 4);
//
// _send_hex_digit( c >> 4 ); // e
// _send_hex_digit( d >> 4 ); // f
// _send_hex_digit( d & 0xF ); // e
//
// _send_hex_digit( d == 0xFE ); // 1
// _send_hex_digit( (c >> 4) == 0xFE ); // 0
// _send_hex_digit( ((c >> 4) & 0xF) == 0xE ); // 1
// --------------------------------------------------------------------
// uint8_t d = (c << 4);
// _send_hex_digit( d >> 4 ); // 2
// _send_hex_digit( d & 0xF ); // 0
// --------------------------------------------------------------------
// so, '>>' fills with 'f's ? or with junk?
// '<<' fills with '0's ?
// and '==' only works if you haven't bit shifted the variable?
//
// i think i need to use '&' for testing anyway, now that i think about
// it... it's a much more common way to go about things
// --------------------------------------------------------------------
// 'char' is probably signed; in any case, it doesn't right shift the
// way we want it to
//
// 'uint8_t' on the other hand works :)
//
// but we should probably still use masking instead of shifting; it's
// not more operations, i think; even if we use 'uint8_t' for the type
// of 'c' (which i think we should do also.. it just feels cleaner)
// --------------------------------------------------------------------
// also, the logic of how the bits get put into 'c_full' is wrong.. lol
// --------------------------------------------------------------------
// send character
uint16_t c_full = 0;
if ((c >> 7) == 0b0) {
_send_hex_digit(0xA);
// get character
if (c >> 7 == 0b0) {
// a 1-byte utf-8 character
c_full = c & 0x7F;
} else if ((c >> 5) == 0b110) {
_send_hex_digit(0xB);
c_full = (uint16_t)(c << 6) & 0x1F; c = pgm_read_byte(++string);
c_full |= (uint16_t)(c << 0) & 0x3F;
} else if ((c >> 4) == 0b1110) {
_send_hex_digit(0xC);
c_full = (uint16_t)(c << 12) & 0x0F; c = pgm_read_byte(++string);
c_full |= (uint16_t)(c << 6) & 0x3F; c = pgm_read_byte(++string);
c_full |= (uint16_t)(c << 0) & 0x3F;
} else if (c >> 5 == 0b110) {
// beginning of a 2-byte utf-8 character
// assume the string is valid
c_full = (c & 0x1F) << 6; c = pgm_read_byte(++string);
c_full |= (c & 0x3F) << 0;
} else if (c >> 4 == 0b1110) {
// beginning of a 3-byte utf-8 character
// assume the string is valid
c_full = (c & 0x0F) << 12; c = pgm_read_byte(++string);
c_full |= (c & 0x3F) << 6; c = pgm_read_byte(++string);
c_full |= (c & 0x3F) << 0;
} else if ((c >> 3) == 0b11110) {
_send_hex_digit(0xD);
// beginning of a 4-byte utf-8 character
// this character is too long, we can't send it
// skip this byte, and the next 3
string += 3;
continue;
} else {
// invalid utf-8
// ran across some invalid utf-8
// ignore it, try again next time
continue;
}
_send_hex_digit( c_full >> 12 );
// send start sequence
usb__kb__set_key(true, KEYBOARD__LeftAlt ); usb__kb__send_report();
usb__kb__set_key(true, KEYBOARD__Equal_Plus); usb__kb__send_report();
usb__kb__set_key(false, KEYBOARD__Equal_Plus); usb__kb__send_report();
// send character
_send_hex_digit( (c_full >> 12) );
_send_hex_digit( (c_full >> 8) & 0xF );
_send_hex_digit( (c_full >> 4) & 0xF );
_send_hex_digit( (c_full >> 0) & 0xF );
_send_hex_digit( (c_full ) & 0xF );
// send end sequence
// usb__kb__set_key(false, KEYBOARD__LeftAlt); usb__kb__send_report();
usb__kb__set_key(false, KEYBOARD__LeftAlt); usb__kb__send_report();
}
_set_modifier_state(state);