Note: This website is archived. For up-to-date information about D projects and development, please visit wiki.dlang.org.

Changeset 2190

Show
Ignore:
Timestamp:
11/23/10 11:42:57 (14 years ago)
Author:
Masahiro Nakagawa
Message:

issue 5247: std.utf.stride() should not return 0xFF

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/docsrc/changelog.dd

    r2189 r2190  
    3737    $(LI $(BUGZILLA 4445): roundTo!ubyte(255.0) throws) 
    3838    $(LI $(BUGZILLA 4638): Regression: new writeln does not recognize "wstring toString") 
    3939    $(LI $(BUGZILLA 5053): Better error message for cyclic dependencies.) 
    4040    $(LI $(BUGZILLA 5054): Splitter example doesn't work) 
    4141    $(LI $(BUGZILLA 5120): ICE(mtype.c) void associative arrays) 
    4242    $(LI $(BUGZILLA 5131): Segfault(expression.c) opAssign and associative arrays (AA) are broken for types != this) 
    4343    $(LI $(BUGZILLA 5133): dmd fails to build rdmd (problem with startsWith)) 
    4444    $(LI $(BUGZILLA 5154): Class Range does not work in writeln) 
    4545    $(LI $(BUGZILLA 5163): meaningless error message with front() applied to void[].) 
    4646    $(LI $(BUGZILLA 5220): Make std.conv.ConvError an Exception instead of an Error; $(RED deprecated ConvError and ConvOverflowError) with ConvException and ConvOverflowException. Note that any code depending on the fact that these exceptions were Error gets broken.) 
     47    $(LI $(BUGZILLA 5247): std.utf.stride() should not return 0xFF) 
    4748    ) 
    4849) 
    4950 
    5051<div id=version> 
    5152$(UL 
    5253    $(NEW 050) 
    5354    $(NEW 049) 
    5455    $(NEW 048) 
    5556    $(NEW 047) 
    5657    $(NEW 046) 
  • trunk/phobos/std/utf.d

    r2189 r2190  
    128128    assert(!isValidDchar(cast(dchar)0x00DC00)); 
    129129    assert(!isValidDchar(cast(dchar)0x00DFFF)); 
    130130    assert(isValidDchar(cast(dchar)0x00FFFE)); 
    131131    assert(isValidDchar(cast(dchar)0x00FFFF)); 
    132132    assert(isValidDchar(cast(dchar)0x01FFFF)); 
    133133    assert(isValidDchar(cast(dchar)0x10FFFF)); 
    134134    assert(!isValidDchar(cast(dchar)0x110000)); 
    135135} 
    136136 
    137137 
    138 @safe pure nothrow 
     138@safe pure 
    139139{ 
    140140 
    141141private immutable ubyte[256] UTF8stride = 
    142142[ 
    143143    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
    144144    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
    145145    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
    146146    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
    147147    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
    148148    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
     
    155155    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 
    156156    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 
    157157    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 
    158158    4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF, 
    159159]; 
    160160 
    161161/** 
    162162 * stride() returns the length of a UTF-8 sequence starting at index $(D_PARAM i) 
    163163 * in string $(D_PARAM s). 
    164164 * Returns: 
    165  *  The number of bytes in the UTF-8 sequence or 
    166  *  0xFF meaning s[i] is not the start of of UTF-8 sequence. 
     165 *  The number of bytes in the UTF-8 sequence. 
     166 * Throws: 
     167 *  UtfException if s[i] is not the start of the UTF-8 sequence. 
    167168 */ 
    168169uint stride(in char[] s, size_t i) 
    169170{ 
    170     return UTF8stride[s[i]]; 
     171    immutable result = UTF8stride[s[i]]; 
     172    if (result == 0xFF) 
     173        throw new UtfException("Not the start of the UTF-8 sequence"); 
     174    return result; 
    171175} 
    172176 
    173177/** 
    174178 * stride() returns the length of a UTF-16 sequence starting at index $(D_PARAM i) 
    175179 * in string $(D_PARAM s). 
    176180 */ 
    177 uint stride(in wchar[] s, size_t i) 
     181nothrow uint stride(in wchar[] s, size_t i) 
    178182{ 
    179183    immutable uint u = s[i]; 
    180184    return 1 + (u >= 0xD800 && u <= 0xDBFF); 
    181185} 
    182186 
    183187/** 
    184188 * stride() returns the length of a UTF-32 sequence starting at index $(D_PARAM i) 
    185189 * in string $(D_PARAM s). 
    186190 * Returns: The return value will always be 1. 
    187191 */ 
    188 uint stride(in dchar[] s, size_t i) 
     192nothrow uint stride(in dchar[] s, size_t i) 
    189193{ 
    190194    return 1; 
    191195} 
    192196 
    193 }  // stride functions are @safe, pure and nothrow 
     197}  // stride functions are @safe and pure 
    194198 
    195199 
    196200@safe pure 
    197201{ 
    198202 
    199203/******************************************* 
    200204 * Given an index $(D_PARAM i) into an array of characters $(D_PARAM s[]), 
    201205 * and assuming that index $(D_PARAM i) is at the start of a UTF character, 
    202206 * determine the number of UCS characters up to that index $(D_PARAM i). 
    203207 */