Note: This website is archived. For up-to-date information about D projects and development, please visit wiki.dlang.org.

root/trunk/phobos/std/string.d

Revision 2354, 87.4 kB (checked in by andrei, 14 years ago)

Improvements to replicate(); documented splitter() for strings; renamed replace() in place to replaceInPlace(); removed replace() that takes void* in the last position; moved replace() from string to array and generalized it; attached constraint to functional.not; more cleanup of std.string; improved std.algorithm.util and count to accept ranges; improved constraint in std.algorithm.remove

  • Property svn:eol-style set to native
Line 
1 // Written in the D programming language.
2
3 /**
4 String handling functions. Objects of types $(D _string), $(D
5 wstring), and $(D dstring) are value types and cannot be mutated
6 element-by-element. For using mutation during building strings, use
7 $(D char[]), $(D wchar[]), or $(D dchar[]). The $(D *_string) types
8 are preferable because they don't exhibit undesired aliasing, thus
9 making code more robust.
10
11 Macros: WIKI = Phobos/StdString
12
13 Copyright: Copyright Digital Mars 2007-.
14
15 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
16
17 Authors: $(WEB digitalmars.com, Walter Bright), $(WEB erdani.org,
18 Andrei Alexandrescu)
19     
20 $(B $(RED IMPORTANT NOTE:)) Beginning with version 2.052, the
21 following symbols have been generalized beyond strings and moved to
22 different modules. This action was prompted by the fact that
23 generalized routines belong better in other places, although they
24 still work for strings as expected. In order to use moved symbols, you
25 will need to import the respective modules as follows:
26
27 $(BOOKTABLE ,
28
29 $(TR $(TH Symbol) $(TH Comment))
30
31 $(TR $(TD $(D cmp)) $(TD Moved to $(XREF algorithm, cmp) and
32 generalized to work for all input ranges and accept a custom
33 predicate.))
34
35 $(TR $(TD $(D count)) $(TD Moved to $(XREF algorithm, count) and
36 generalized to accept a custom predicate.))
37
38 $(TR $(TD $(D replace)) $(TD Moved to $(XREF array, replace).))
39
40 $(TR $(TD $(D ByCodeUnit)) $(TD Removed.))
41
42 $(TR $(TD $(D insert)) $(TD Use $(XREF array, insert) instead.))
43
44 $(TR $(TD $(D join)) $(TD Use $(XREF array, join) instead.))
45
46 $(TR $(TD $(D repeat)) $(TD Use $(XREF array, replicate) instead.))
47
48 $(TR $(TD $(D replace)) $(TD Use $(XREF array, replace) instead.))
49
50 $(TR $(TD $(D replaceSlice)) $(TD Use $(XREF array, replace) instead.))
51
52 $(TR $(TD $(D split)) $(TD Use $(XREF array, split) instead.))
53 )
54   
55 */
56 module std.string;
57
58 //debug=string;                 // uncomment to turn on debugging printf's
59
60 import core.exception : onRangeError;
61 import core.vararg, core.stdc.stdio, core.stdc.stdlib,
62     core.stdc.string/*, std.algorithm*/,
63     std.conv, std.ctype, std.encoding, std.exception, std.format,
64     std.functional, std.metastrings, std.range, std.regex, std.stdio,
65     std.traits, std.typetuple, std.uni, std.utf;
66 public import std.algorithm : startsWith, endsWith, cmp, count;
67 public import std.array : join, split;
68
69 version(Windows) extern (C)
70     {
71         size_t wcslen(in wchar *);
72         int wcscmp(in wchar *, in wchar *);
73     }
74
75 /* ************* Exceptions *************** */
76
77 /// Thrown on errors in string functions.
78 typedef Exception StringException;
79
80 /* ************* Constants *************** */
81
82 immutable char[16] hexdigits = "0123456789ABCDEF";          /// 0..9A..F
83 immutable char[10] digits    = "0123456789";                /// 0..9
84 immutable char[8]  octdigits = "01234567";                  /// 0..7
85 immutable char[26] lowercase = "abcdefghijklmnopqrstuvwxyz";/// a..z
86 immutable char[52] letters   = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
87     "abcdefghijklmnopqrstuvwxyz";                           /// A..Za..z
88 immutable char[26] uppercase = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";/// A..Z
89 immutable char[6] whitespace = " \t\v\r\n\f";               /// ASCII whitespace
90
91 enum dchar LS = '\u2028';                                   /// UTF line separator
92 enum dchar PS = '\u2029';                                   /// UTF paragraph separator
93
94                                                             /// Newline sequence for this system
95 version (Windows)
96     immutable char[2] newline = "\r\n";
97 else version (Posix)
98     immutable char[1] newline = "\n";
99
100 /**********************************
101  * Returns true if c is whitespace
102  */
103
104 bool iswhite(dchar c)
105 {
106     return c <= 0x7F
107         ? indexOf(whitespace, c) != -1
108         : (c == PS || c == LS);
109 }
110
111 /**********************************
112 Compare two ranges of characters lexicographically. $(D _cmp) is case
113 sensitive, $(D icmp) is case insensitive. $(D cmp) is aliased from
114 $(XREF algorithm, _cmp). $(D icmp) works like $(D cmp) but converts
115 both characters to lowercase prior to applying $(D pred). Technically
116 $(D icmp(r1, r2)) is equivalent to $(D cmp!"toUniLower(a) <
117 toUniLower(b)"(r1, r2)).
118
119 Returns (for $(D pred = "a < b")):
120
121 $(BOOKTABLE,
122 $(TR $(TD $(D < 0))  $(TD $(D s1 < s2) ))
123 $(TR $(TD $(D = 0))  $(TD $(D s1 == s2)))
124 $(TR $(TD $(D > 0))  $(TD $(D s1 > s2)))
125 )
126
127  */
128
129 int icmp(alias pred = "a < b", S1, S2)(S1 s1, S2 s2)
130 if (is(Unqual!(ElementType!S1) == dchar) && is(Unqual!(ElementType!S2) == dchar))
131 {
132     enum isLessThan = is(pred : string) && pred == "a < b";
133     foreach (e; zip(s1, s2))
134     {
135         dchar c1 = toUniLower(e[0]), c2 = toUniLower(e[1]);
136         static if (isLessThan)
137         {
138             if (c1 != c2) return cast(int) c1 - cast(int) c2;
139         }
140         else
141         {
142             if (binaryFun!pred(c1, c2)) return -1;
143             if (binaryFun!pred(c2, c1)) return 1;
144         }
145     }
146
147     static if (s1.length.sizeof == int.sizeof)
148         return s1.length - s2.length;
149     else
150         return s1.length > s2.length ? 1 : s1.length < s2.length ? -1 : 0;
151 }
152
153 unittest
154 {
155     assert(icmp("Ü", "ÃŒ") == 0, "Über failure");
156
157     sizediff_t result;
158
159     debug(string) printf("string.icmp.unittest\n");
160     result = icmp("abc", "abc");
161     assert(result == 0);
162     result = icmp("ABC", "abc");
163     assert(result == 0);
164     //    result = icmp(null, null);// Commented out since icmp()
165     //    assert(result == 0);      // has become templated.
166     result = icmp("", "");
167     assert(result == 0);
168     result = icmp("abc", "abcd");
169     assert(result < 0);
170     result = icmp("abcd", "abc");
171     assert(result > 0);
172     result = icmp("abc", "abd");
173     assert(result < 0);
174     result = icmp("bbc", "abc");
175     assert(result > 0);
176     result = icmp("abc", "abc"w);
177     assert (result == 0);
178     result = icmp("ABC"w, "abc");
179     assert (result == 0);
180     result = icmp("", ""w);
181     assert (result == 0);
182     result = icmp("abc"w, "abcd");
183     assert(result < 0);
184     result = icmp("abcd", "abc"w);
185     assert(result > 0);
186     result = icmp("abc", "abd");
187     assert(result < 0);
188     result = icmp("bbc"w, "abc");
189     assert(result > 0);
190     result = icmp("aaa", "aaaa"d);
191     assert(result < 0);
192     result = icmp("aaaa"w, "aaa"d);
193     assert(result > 0);
194     result = icmp("aaa"d, "aaa"w);
195     assert(result == 0);
196 }
197
198 /*********************************
199  * Convert array of chars $(D s[]) to a C-style 0-terminated string.
200  * $(D s[]) must not contain embedded 0's. If $(D s) is $(D null) or
201  * empty, a string containing only $(D '\0') is returned.
202  */
203
204 immutable(char)* toStringz(const(char)[] s)
205 in
206 {
207     // The assert below contradicts the unittests!
208     //assert(memchr(s.ptr, 0, s.length) == null,
209     //text(s.length, ": `", s, "'"));
210 }
211 out (result)
212 {
213     if (result)
214     {
215         auto slen = s.length;
216         while (slen > 0 && s[slen-1] == 0) --slen;
217         assert(strlen(result) == slen);
218         assert(memcmp(result, s.ptr, slen) == 0);
219     }
220 }
221 body
222 {
223     /+ Unfortunately, this isn't reliable.
224      We could make this work if string literals are put
225      in read-only memory and we test if s[] is pointing into
226      that.
227
228      /* Peek past end of s[], if it's 0, no conversion necessary.
229      * Note that the compiler will put a 0 past the end of static
230      * strings, and the storage allocator will put a 0 past the end
231      * of newly allocated char[]'s.
232      */
233      char* p = &s[0] + s.length;
234      if (*p == 0)
235      return s;
236      +/
237
238     // Need to make a copy
239     auto copy = new char[s.length + 1];
240     copy[0..s.length] = s;
241     copy[s.length] = 0;
242
243     return assumeUnique(copy).ptr;
244 }
245
246 /// Ditto
247 immutable(char)* toStringz(string s)
248 {
249     if (s.empty) return "".ptr;
250     /* Peek past end of s[], if it's 0, no conversion necessary.
251      * Note that the compiler will put a 0 past the end of static
252      * strings, and the storage allocator will put a 0 past the end
253      * of newly allocated char[]'s.
254      */
255     immutable p = s.ptr + s.length;
256     // Is p dereferenceable? A simple test: if the p points to an
257     // address multiple of 4, then conservatively assume the pointer
258     // might be pointing to a new block of memory, which might be
259     // unreadable. Otherwise, it's definitely pointing to valid
260     // memory.
261     if ((cast(size_t) p & 3) && *p == 0)
262         return s.ptr;
263     return toStringz(cast(const char[]) s);
264 }
265
266 unittest
267 {
268     debug(string) printf("string.toStringz.unittest\n");
269
270     auto p = toStringz("foo");
271     assert(strlen(p) == 3);
272     const(char) foo[] = "abbzxyzzy";
273     p = toStringz(foo[3..5]);
274     assert(strlen(p) == 2);
275
276     string test = "";
277     p = toStringz(test);
278     assert(*p == 0);
279
280     test = "\0";
281     p = toStringz(test);
282     assert(*p == 0);
283
284     test = "foo\0";
285     p = toStringz(test);
286     assert(p[0] == 'f' && p[1] == 'o' && p[2] == 'o' && p[3] == 0);
287 }
288
289 /**
290    Flag indicating whether a search is case-sensitive.
291 */
292 enum CaseSensitive { no, yes }
293
294 /**
295    $(D indexOf): find first occurrence of c in string s.  $(D
296    lastIndexOf): find last occurrence of c in string s. $(D
297    CaseSensitive.yes) means the searches are case sensitive.
298
299    Returns: Index in $(D s) where $(D c) is found, -1 if not found.
300 */
301 sizediff_t
302 indexOf(Char)(in Char[] s, dchar c, CaseSensitive cs = CaseSensitive.yes)
303 if (isSomeChar!Char)
304 {
305     if (cs == CaseSensitive.yes)
306     {
307         static if (Char.sizeof == 1)
308         {
309             if (c <= 0x7F)
310             {                                               // Plain old ASCII
311                 auto p = cast(char*)memchr(s.ptr, c, s.length);
312                 if (p)
313                     return p - cast(char *)s;
314                 else
315                     return -1;
316             }
317         }
318
319         // c is a universal character
320         foreach (int i, dchar c2; s)
321         {
322             if (c == c2)
323                 return i;
324         }
325     }
326     else
327     {
328         if (c <= 0x7F)
329         {                                                   // Plain old ASCII
330             auto c1 = cast(char) std.ctype.tolower(c);
331
332             foreach (int i, Char c2; s)
333             {
334                 auto c3 = cast(Char)std.ctype.tolower(c2);
335                 if (c1 == c3)
336                     return i;
337             }
338         }
339         else
340         {                                                   // c is a universal character
341             auto c1 = std.uni.toUniLower(c);
342
343             foreach (int i, dchar c2; s)
344             {
345                 auto c3 = std.uni.toUniLower(c2);
346                 if (c1 == c3)
347                     return i;
348             }
349         }
350     }
351     return -1;
352 }
353
354 unittest
355 {
356     debug(string) printf("string.find.unittest\n");
357
358     sizediff_t i;
359
360     foreach (S; TypeTuple!(string, wstring, dstring))
361     {
362         S s = null;
363         i = indexOf(s, cast(dchar)'a');
364         assert(i == -1);
365         s = "def";
366         i = indexOf(s, cast(dchar)'a');
367         assert(i == -1);
368         s = "abba";
369         i = indexOf(s, cast(dchar)'a');
370         assert(i == 0);
371         s = "def";
372         i = indexOf(s, cast(dchar)'f');
373         assert(i == 2);
374     }
375 }
376
377
378 /******************************************
379  * ditto
380  */
381
382 unittest
383 {
384     debug(string) printf("string.indexOf.unittest\n");
385
386     foreach (S; TypeTuple!(string, wstring, dstring))
387     {
388         S s = null;
389         auto i = indexOf(s, cast(dchar)'a', CaseSensitive.no);
390         assert(i == -1);
391         i = indexOf("def", cast(dchar)'a', CaseSensitive.no);
392         assert(i == -1);
393         i = indexOf("Abba", cast(dchar)'a', CaseSensitive.no);
394         assert(i == 0);
395         i = indexOf("def", cast(dchar)'F', CaseSensitive.no);
396         assert(i == 2);
397
398         string sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
399
400         i = indexOf("def", cast(char)'f', CaseSensitive.no);
401         assert(i == 2);
402
403         i = indexOf(sPlts, cast(char)'P', CaseSensitive.no);
404         assert(i == 23);
405         i = indexOf(sPlts, cast(char)'R', CaseSensitive.no);
406         assert(i == 2);
407     }
408 }
409
410 // @@@BUG@@@ This declaration shouldn't be needed
411 //int lastIndexOf(in char[] s, in char[] c, CaseSensitive cs = CaseSensitive.yes);
412
413 /******************************************
414  * ditto
415  */
416
417 sizediff_t lastIndexOf(Char)(const(Char)[] s, dchar c,
418         CaseSensitive cs = CaseSensitive.yes)
419 {
420     if (cs == CaseSensitive.yes)
421     {
422         if (c <= 0x7F || Char.sizeof == 4)
423         {
424             // Plain old ASCII or UTF32
425             auto i = s.length;
426             while (i-- != 0)
427             {
428                 if (s[i] == c)
429                     break;
430             }
431             return i;
432         }
433
434         // c is a universal character
435         auto sInit = s;
436         for (; !s.empty; s.popBack())
437         {
438             if (s.back == c) return s.ptr - sInit.ptr;
439         }
440     }
441     else
442     {
443         if (c <= 0x7F)
444         {                                                   // Plain old ASCII
445             immutable c1 = cast(char) std.ctype.tolower(c);
446
447             for (auto i = s.length; i-- != 0;)
448             {
449                 immutable c2 = cast(char) std.ctype.tolower(s[i]);
450                 if (c1 == c2)
451                     return i;
452             }
453         }
454         else
455         {                                                   // c is a universal character
456             immutable c1 = std.uni.toUniLower(c);
457
458             auto sInit = s;
459             for (; !s.empty; s.popBack())
460             {
461                 if (toUniLower(s.back) == c1) return s.ptr - sInit.ptr;
462             }
463         }
464     }
465     return -1;
466 }
467
468 unittest
469 {
470     debug(string) printf("string.rfind.unittest\n");
471
472     sizediff_t i;
473
474     i = lastIndexOf(cast(string) null, cast(dchar)'a');
475     assert(i == -1);
476     i = lastIndexOf("def", cast(dchar)'a');
477     assert(i == -1);
478     i = lastIndexOf("abba", cast(dchar)'a');
479     assert(i == 3);
480     i = lastIndexOf("def", cast(dchar)'f');
481     assert(i == 2);
482 }
483
484 unittest
485 {
486     debug(string) printf("string.irfind.unittest\n");
487
488     sizediff_t i;
489
490     i = lastIndexOf(cast(string) null, cast(dchar)'a', CaseSensitive.no);
491     assert(i == -1);
492     i = lastIndexOf("def", cast(dchar)'a', CaseSensitive.no);
493     assert(i == -1);
494     i = lastIndexOf("AbbA", cast(dchar)'a', CaseSensitive.no);
495     assert(i == 3);
496     i = lastIndexOf("def", cast(dchar)'F', CaseSensitive.no);
497     assert(i == 2);
498
499     string sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
500
501     i = lastIndexOf("def", cast(char)'f', CaseSensitive.no);
502     assert(i == 2);
503
504     i = lastIndexOf(sPlts, cast(char)'M', CaseSensitive.no);
505     assert(i == 34);
506     i = lastIndexOf(sPlts, cast(char)'S', CaseSensitive.no);
507     assert(i == 40);
508 }
509
510 /**
511  * Returns the representation type of a string, which is the same type
512  * as the string except the character type is replaced by $(D ubyte),
513  * $(D ushort), or $(D uint) depending on the character width.
514  *
515  * Example:
516 ----
517 string s = "hello";
518 static assert(is(typeof(representation(s)) == immutable(ubyte)[]));
519 ----
520  */
521 /*private*/ auto representation(Char)(Char[] s) if (isSomeChar!Char)
522 {
523     // Get representation type
524     static if (Char.sizeof == 1) enum t = "ubyte";
525     else static if (Char.sizeof == 2) enum t = "ushort";
526     else static if (Char.sizeof == 4) enum t = "uint";
527     else static assert(false); // can't happen due to isSomeChar!Char
528
529     // Get representation qualifier
530     static if (is(Char == immutable)) enum q = "immutable";
531     else static if (is(Char == const)) enum q = "const";
532     else static if (is(Char == shared)) enum q = "shared";
533     else enum q = "";
534
535     // Result type is qualifier(RepType)[]
536     static if (q.length)
537         return mixin("cast(" ~ q ~ "(" ~ t ~ ")[]) s");
538     else
539         return mixin("cast(" ~ t ~ "[]) s");
540 }
541
542 unittest
543 {
544     string s = "hello";
545     static assert(is(typeof(representation(s)) == immutable(ubyte)[]));
546 }
547
548 /**
549 $(D indexOf) find first occurrence of $(D sub[]) in string $(D s[]).
550 lastIndexOf find last occurrence of $(D sub[]) in string $(D s[]).
551
552 $(D CaseSensitive cs) controls whether the comparisons are case
553 sensitive or not.
554
555 Returns:
556
557 Index in $(D s) where $(D sub) is found, $(D -1) if not found.
558  */
559
560 sizediff_t
561 indexOf(Char1, Char2)(const(Char1)[] s, const(Char2)[] sub,
562         CaseSensitive cs = CaseSensitive.yes)
563 if (isSomeChar!Char1 && isSomeChar!Char2)
564 {
565     const(Char1)[] balance;
566     if (cs == CaseSensitive.yes)
567     {
568         balance = std.algorithm.find(s, sub);
569     }
570     else
571     {
572         balance = std.algorithm.find!
573             ((dchar a, dchar b){return toUniLower(a) == toUniLower(b);})
574             (s, sub);
575     }
576     return balance.empty ? -1 : balance.ptr - s.ptr;
577 }
578
579 unittest
580 {
581     debug(string) printf("string.find.unittest\n");
582
583     sizediff_t i;
584
585     foreach (S; TypeTuple!(string, wstring, dstring))
586     {
587         S s = null;
588         i = indexOf(s, "a");
589         assert(i == -1);
590         i = indexOf("def", "a");
591         assert(i == -1);
592         i = indexOf("abba", "a");
593         assert(i == 0);
594         i = indexOf("def", "f");
595         assert(i == 2);
596         i = indexOf("dfefffg", "fff");
597         assert(i == 3);
598         i = indexOf("dfeffgfff", "fff");
599         assert(i == 6);
600     }
601 }
602
603 unittest
604 {
605     debug(string) printf("string.ifind.unittest\n");
606
607     sizediff_t i;
608
609     foreach (S; TypeTuple!(string, wstring, dstring))
610     {
611         S s = null;
612         i = indexOf(s, "a", CaseSensitive.no);
613         assert(i == -1);
614         i = indexOf("def", "a", CaseSensitive.no);
615         assert(i == -1);
616         i = indexOf("abba", "a", CaseSensitive.no);
617         assert(i == 0, text(i));
618         i = indexOf("def", "f", CaseSensitive.no);
619         assert(i == 2);
620         i = indexOf("dfefffg", "fff", CaseSensitive.no);
621         assert(i == 3);
622         i = indexOf("dfeffgfff", "fff", CaseSensitive.no);
623         assert(i == 6);
624     }
625
626     string sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
627     string sMars = "Who\'s \'My Favorite Maritian?\'";
628
629     i = indexOf(sMars, "MY fAVe", CaseSensitive.no);
630     assert(i == -1);
631     i = indexOf(sMars, "mY fAVOriTe", CaseSensitive.no);
632     assert(i == 7);
633     i = indexOf(sPlts, "mArS:", CaseSensitive.no);
634     assert(i == 0);
635     i = indexOf(sPlts, "rOcK", CaseSensitive.no);
636     assert(i == 17);
637     i = indexOf(sPlts, "Un.", CaseSensitive.no);
638     assert(i == 41);
639     i = indexOf(sPlts, sPlts, CaseSensitive.no);
640     assert(i == 0);
641
642     i = indexOf("\u0100", "\u0100", CaseSensitive.no);
643     assert(i == 0);
644
645     // Thanks to Carlos Santander B. and zwang
646     i = indexOf("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
647             "page-break-before", CaseSensitive.no);
648     assert(i == -1);
649 }
650
651 /******************************************
652  * ditto
653  */
654
655 sizediff_t lastIndexOf(Char1, Char2)(in Char1[] s, in Char2[] sub,
656         CaseSensitive cs = CaseSensitive.yes) if (isSomeChar!Char1 && isSomeChar!Char2)
657 {
658     if (cs == CaseSensitive.yes)
659     {
660         char c;
661
662         if (sub.length == 0)
663             return s.length;
664         c = sub[0];
665         if (sub.length == 1)
666             return lastIndexOf(s, c);
667         for (ptrdiff_t i = s.length - sub.length; i >= 0; i--)
668         {
669             if (s[i] == c)
670             {
671                 if (memcmp(&s[i + 1], &sub[1], sub.length - 1) == 0)
672                     return i;
673             }
674         }
675         return -1;
676     }
677     else
678     {
679         dchar c;
680
681         if (sub.length == 0)
682             return s.length;
683         c = sub[0];
684         if (sub.length == 1)
685             return lastIndexOf(s, c, cs);
686         if (c <= 0x7F)
687         {
688             c = std.ctype.tolower(c);
689             for (ptrdiff_t i = s.length - sub.length; i >= 0; i--)
690             {
691                 if (std.ctype.tolower(s[i]) == c)
692                 {
693                     if (icmp(s[i + 1 .. i + sub.length], sub[1 .. sub.length]) == 0)
694                         return i;
695                 }
696             }
697         }
698         else
699         {
700             for (ptrdiff_t i = s.length - sub.length; i >= 0; i--)
701             {
702                 if (icmp(s[i .. i + sub.length], sub) == 0)
703                     return i;
704             }
705         }
706         return -1;
707     }
708 }
709
710 unittest
711 {
712     sizediff_t i;
713
714     debug(string) printf("string.lastIndexOf.unittest\n");
715     i = lastIndexOf("abcdefcdef", "c");
716     assert(i == 6);
717     i = lastIndexOf("abcdefcdef", "cd");
718     assert(i == 6);
719     i = lastIndexOf("abcdefcdef", "x");
720     assert(i == -1);
721     i = lastIndexOf("abcdefcdef", "xy");
722     assert(i == -1);
723     i = lastIndexOf("abcdefcdef", "");
724     assert(i == 10);
725 }
726
727
728 /******************************************
729  * ditto
730  */
731
732 unittest
733 {
734     sizediff_t i;
735
736     debug(string) printf("string.lastIndexOf.unittest\n");
737     i = lastIndexOf("abcdefCdef", "c", CaseSensitive.no);
738     assert(i == 6);
739     i = lastIndexOf("abcdefCdef", "cD", CaseSensitive.no);
740     assert(i == 6);
741     i = lastIndexOf("abcdefcdef", "x", CaseSensitive.no);
742     assert(i == -1);
743     i = lastIndexOf("abcdefcdef", "xy", CaseSensitive.no);
744     assert(i == -1);
745     i = lastIndexOf("abcdefcdef", "", CaseSensitive.no);
746     assert(i == 10);
747
748     string sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
749     string sMars = "Who\'s \'My Favorite Maritian?\'";
750
751     i = lastIndexOf("abcdefcdef", "c", CaseSensitive.no);
752     assert(i == 6);
753     i = lastIndexOf("abcdefcdef", "cd", CaseSensitive.no);
754     assert(i == 6);
755     i = lastIndexOf( "abcdefcdef", "def", CaseSensitive.no);
756     assert(i == 7);
757
758     i = lastIndexOf(sMars, "RiTE maR", CaseSensitive.no);
759     assert(i == 14);
760     i = lastIndexOf(sPlts, "FOuRTh", CaseSensitive.no);
761     assert(i == 10);
762     i = lastIndexOf(sMars, "whO\'s \'MY", CaseSensitive.no);
763     assert(i == 0);
764     i = lastIndexOf(sMars, sMars, CaseSensitive.no);
765     assert(i == 0);
766 }
767
768
769 /************************************
770  * Convert string s[] to lower case.
771  */
772
773 S tolower(S)(S s) if (isSomeString!S)
774 {
775     foreach (i, dchar cOuter; s)
776     {
777         if (!std.uni.isUniUpper(cOuter)) continue;
778         auto result = s[0.. i].dup;
779         foreach (dchar c; s[i .. $])
780         {
781             if (std.uni.isUniUpper(c))
782             {
783                 c = std.uni.toUniLower(c);
784             }
785             result ~= c;
786         }
787         return cast(S) result;
788     }
789     return s;
790 }
791
792 /**
793    Converts $(D s) to lowercase in place.
794  */
795
796 void tolowerInPlace(C)(ref C[] s) if (isSomeChar!C)
797 {
798     for (size_t i = 0; i < s.length; )
799     {
800         immutable c = s[i];
801         if ('A' <= c && c <= 'Z')
802         {
803             s[i++] = cast(C) (c + (cast(C)'a' - 'A'));
804         }
805         else if (c > 0x7F)
806         {
807             // wide character
808             size_t j = i;
809             dchar dc = decode(s, j);
810             assert(j > i);
811             if (!std.uni.isUniUpper(dc))
812             {
813                 i = j;
814                 continue;
815             }
816             auto toAdd = to!(C[])(std.uni.toUniLower(dc));
817             s = s[0 .. i] ~ toAdd  ~ s[j .. $];
818             i += toAdd.length;
819         }
820         else
821         {
822             ++i;
823         }
824     }
825 }
826
827 unittest
828 {
829     debug(string) printf("string.tolower.unittest\n");
830
831     string s1 = "FoL";
832     string s2;
833
834     s2 = tolower(s1);
835     assert(cmp(s2, "fol") == 0, s2);
836     assert(s2 != s1);
837
838     char[] s3 = s1.dup;
839     tolowerInPlace(s3);
840     assert(s3 == s2, s3);
841
842     s1 = "A\u0100B\u0101d";
843     s2 = tolower(s1);
844     s3 = s1.dup;
845     assert(cmp(s2, "a\u0101b\u0101d") == 0);
846     assert(s2 !is s1);
847     tolowerInPlace(s3);
848     assert(s3 == s2, s3);
849
850     s1 = "A\u0460B\u0461d";
851     s2 = tolower(s1);
852     s3 = s1.dup;
853     assert(cmp(s2, "a\u0461b\u0461d") == 0);
854     assert(s2 !is s1);
855     tolowerInPlace(s3);
856     assert(s3 == s2, s3);
857
858     s1 = "\u0130";
859     s2 = tolower(s1);
860     s3 = s1.dup;
861     assert(s2 == "i");
862     assert(s2 !is s1);
863     tolowerInPlace(s3);
864     assert(s3 == s2, s3);
865
866     // Test on wchar and dchar strings.
867     assert(tolower("Some String"w) == "some string"w);
868     assert(tolower("Some String"d) == "some string"d);
869 }
870
871 /************************************
872  * Convert string s[] to upper case.
873  */
874
875 S toupper(S)(S s) if (isSomeString!S)
876 {
877     alias typeof(s[0]) Char;
878     int changed;
879     Unqual!(Char)[] r;
880
881     foreach (i; 0 .. s.length)
882     {
883         immutable c = s[i];
884         if ('a' <= c && c <= 'z')
885         {
886             if (!changed)
887             {
888                 r = to!(typeof(r))(s);
889                 changed = 1;
890             }
891             r[i] = cast(Unqual!(Char)) (c - ('a' - 'A'));
892         }
893         else if (c > 0x7F)
894         {
895             foreach (size_t j, dchar dc; s[i .. $])
896             {
897                 if (std.uni.isUniLower(dc))
898                 {
899                     dc = std.uni.toUniUpper(dc);
900                     if (!changed)
901                     {
902                         r = s[0 .. i + j].dup;
903                         changed = 2;
904                     }
905                 }
906                 if (changed)
907                 {
908                     if (changed == 1)
909                     {   r = r[0 .. i + j];
910                         changed = 2;
911                     }
912                     std.utf.encode(r, dc);
913                 }
914             }
915             break;
916         }
917     }
918     return changed ? assumeUnique(r) : s;
919 }
920
921 /**
922    Converts $(D s) to uppercase in place.
923  */
924
925 void toupperInPlace(C)(ref C[] s) if (isSomeChar!C)
926 {
927     for (size_t i = 0; i < s.length; )
928     {
929         immutable c = s[i];
930         if ('a' <= c && c <= 'z')
931         {
932             s[i++] = cast(C) (c - (cast(C)'a' - 'A'));
933         }
934         else if (c > 0x7F)
935         {
936             // wide character
937             size_t j = i;
938             dchar dc = decode(s, j);
939             assert(j > i);
940             if (!std.uni.isUniLower(dc))
941             {
942                 i = j;
943                 continue;
944             }
945             auto toAdd = to!(C[])(std.uni.toUniUpper(dc));
946             s = s[0 .. i] ~ toAdd  ~ s[j .. $];
947             i += toAdd.length;
948         }
949         else
950         {
951             ++i;
952         }
953     }
954 }
955
956 unittest
957 {
958     debug(string) printf("string.toupper.unittest\n");
959
960     string s1 = "FoL";
961     string s2;
962     char[] s3;
963
964     s2 = toupper(s1);
965     s3 = s1.dup; toupperInPlace(s3);
966     assert(s3 == s2, s3);
967     assert(cmp(s2, "FOL") == 0);
968     assert(s2 !is s1);
969
970     s1 = "a\u0100B\u0101d";
971     s2 = toupper(s1);
972     s3 = s1.dup; toupperInPlace(s3);
973     assert(s3 == s2);
974     assert(cmp(s2, "A\u0100B\u0100D") == 0);
975     assert(s2 !is s1);
976
977     s1 = "a\u0460B\u0461d";
978     s2 = toupper(s1);
979     s3 = s1.dup; toupperInPlace(s3);
980     assert(s3 == s2);
981     assert(cmp(s2, "A\u0460B\u0460D") == 0);
982     assert(s2 !is s1);
983 }
984
985 /********************************************
986  * Capitalize first character of string s[], convert rest of string s[]
987  * to lower case.
988  */
989
990 S capitalize(S)(S s) if (isSomeString!S)
991 {
992     Unqual!(typeof(s[0]))[] r;
993     bool changed = 0;
994
995     foreach (size_t i, dchar c; s)
996     {
997         dchar c2;
998
999         if (i == 0)
1000         {
1001             c2 = std.uni.toUniUpper(c);
1002             if (c != c2)
1003             {
1004                 changed = 1;
1005                 r = null;
1006             }
1007         }
1008         else
1009         {
1010             c2 = std.uni.toUniLower(c);
1011             if (c != c2)
1012             {
1013                 if (!changed)
1014                 {   changed = 1;
1015                     r = s[0 .. i].dup;
1016                 }
1017             }
1018         }
1019         if (changed)
1020             std.utf.encode(r, c2);
1021     }
1022     return changed ? cast(S) r : s;
1023 }
1024
1025 unittest
1026 {
1027     debug(string) printf("string.toupper.capitalize\n");
1028
1029     foreach (S; TypeTuple!(string, wstring, dstring, char[], wchar[], dchar[]))
1030     {
1031         S s1 = to!S("FoL");
1032         S s2;
1033
1034         s2 = capitalize(s1);
1035         assert(cmp(s2, "Fol") == 0);
1036         assert(s2 !is s1);
1037        
1038         s2 = capitalize(s1[0 .. 2]);
1039         assert(cmp(s2, "Fo") == 0);
1040         assert(s2.ptr == s1.ptr);
1041        
1042         s1 = to!S("fOl");
1043         s2 = capitalize(s1);
1044         assert(cmp(s2, "Fol") == 0);
1045         assert(s2 !is s1);
1046     }
1047 }
1048
1049
1050 /********************************************
1051  * Capitalize all words in string s[].
1052  * Remove leading and trailing whitespace.
1053  * Replace all sequences of whitespace with a single space.
1054  */
1055
1056 S capwords(S)(S s) if (isSomeString!S)
1057 {
1058     Unqual!(typeof(s[0]))[] r;
1059     bool inword = false;
1060     size_t istart = 0;
1061     size_t i;
1062
1063     for (i = 0; i < s.length; i++)
1064     {
1065         switch (s[i])
1066         {
1067         case ' ':
1068         case '\t':
1069         case '\f':
1070         case '\r':
1071         case '\n':
1072         case '\v':
1073             if (inword)
1074             {
1075                 r ~= capitalize(s[istart .. i]);
1076                 inword = false;
1077             }
1078             break;
1079            
1080         default:
1081             if (!inword)
1082             {
1083                 if (r.length)
1084                     r ~= ' ';
1085                 istart = i;
1086                 inword = true;
1087             }
1088             break;
1089         }
1090     }
1091     if (inword)
1092     {
1093         r ~= capitalize(s[istart .. i]);
1094     }
1095    
1096     return cast(S) r;
1097 }
1098
1099
1100 unittest
1101 {
1102     debug(string) printf("string.capwords.unittest\n");
1103
1104     foreach (S; TypeTuple!(string, wstring, dstring, char[], wchar[], dchar[]))
1105     {
1106         auto s1 = to!S("\tfoo abc(aD)*  \t  (q PTT  ");
1107         S s2;
1108
1109         s2 = capwords(s1);
1110         //writefln("s2 = '%s'", s2);
1111         assert(cmp(s2, "Foo Abc(ad)* (q Ptt") == 0);
1112     }
1113 }
1114
1115 /********************************************
1116  * Repeat $(D s) for $(D n) times. This function is scheduled for
1117  * deprecation - use $(XREF array, replicate) instead.
1118  */
1119 S repeat(S)(S s, size_t n)
1120 {
1121     return std.array.replicate(s, n);
1122 }
1123
1124 unittest
1125 {
1126     debug(string) printf("string.repeat.unittest\n");
1127
1128     foreach (S; TypeTuple!(string, wstring, dstring, char[], wchar[], dchar[]))
1129     {
1130         S s;
1131
1132         s = repeat(to!S("1234"), 0);
1133         assert(s is null);
1134         s = repeat(to!S("1234"), 1);
1135         assert(cmp(s, "1234") == 0);
1136         s = repeat(to!S("1234"), 2);
1137         assert(cmp(s, "12341234") == 0);
1138         s = repeat(to!S("1"), 4);
1139         assert(cmp(s, "1111") == 0);
1140         s = repeat(cast(S) null, 4);
1141         assert(s is null);
1142     }
1143 }
1144
1145 /**************************************
1146  * Split s[] into an array of lines,
1147  * using CR, LF, or CR-LF as the delimiter.
1148  * The delimiter is not included in the line.
1149  */
1150
1151 S[] splitlines(S)(S s)
1152 {
1153     size_t istart;
1154     auto result = appender!(S[])();
1155
1156     foreach (i; 0 .. s.length)
1157     {
1158         immutable c = s[i];
1159         if (c == '\r' || c == '\n')
1160         {
1161             result.put(s[istart .. i]);
1162             istart = i + 1;
1163             if (c == '\r' && i + 1 < s.length && s[i + 1] == '\n')
1164             {
1165                 i++;
1166                 istart++;
1167             }
1168         }
1169     }
1170     if (istart != s.length)
1171     {
1172         result.put(s[istart .. $]);
1173     }
1174
1175     return result.data;
1176 }
1177
1178 unittest
1179 {
1180     debug(string) printf("string.splitlines\n");
1181
1182     foreach (S; TypeTuple!(string, wstring, dstring))
1183     {
1184         S s = "\rpeter\n\rpaul\r\njerry\n";
1185         S[] lines;
1186         int i;
1187
1188         lines = splitlines(s);
1189         //printf("lines.length = %d\n", lines.length);
1190         assert(lines.length == 5);
1191         //printf("lines[0] = %llx, '%.*s'\n", lines[0], lines[0]);
1192         assert(lines[0].length == 0);
1193         i = cmp(lines[1], "peter");
1194         assert(i == 0);
1195         assert(lines[2].length == 0);
1196         i = cmp(lines[3], "paul");
1197         assert(i == 0);
1198         i = cmp(lines[4], "jerry");
1199         assert(i == 0);
1200
1201         s = s[0 .. s.length - 1];   // lop off trailing \n
1202         lines = splitlines(s);
1203         //printf("lines.length = %d\n", lines.length);
1204         assert(lines.length == 5);
1205         i = cmp(lines[4], "jerry");
1206         assert(i == 0);
1207     }
1208 }
1209
1210 /*****************************************
1211  * Strips leading or trailing whitespace, or both.
1212  */
1213
1214 String stripl(String)(String s)
1215 {
1216     uint i;
1217     for (i = 0; i < s.length; i++)
1218     {
1219         if (!std.ctype.isspace(s[i]))
1220             break;
1221     }
1222     return s[i .. s.length];
1223 }
1224
1225 String stripr(String)(String s) /// ditto
1226 {
1227     for (auto i = s.length;;)
1228     {
1229         if (i == 0) return null;
1230         --i;
1231         if (!std.ctype.isspace(s[i]))
1232             return s[0 .. i + 1];
1233     }
1234 }
1235
1236 String strip(String)(String s) /// ditto
1237 {
1238     return stripr(stripl(s));
1239 }
1240
1241 unittest
1242 {
1243     assert(strip("  foo\t ") == "foo");
1244     assert(strip("1") == "1");
1245 }
1246
1247 // Too slow for release mode
1248 debug unittest
1249 {
1250     // fails to compile with: Error: array equality comparison type
1251     // mismatch, immutable(char)[] vs ubyte[]
1252     version(none)
1253     {
1254         alias TypeTuple!(string, wstring, dstring, char[], wchar[], dchar[])
1255             StringTypes;
1256         alias TypeTuple!(ubyte[], int[], double[]) OtherTypes;
1257         foreach (T1 ; StringTypes)
1258         {
1259             foreach (T2 ; StringTypes)
1260             {
1261                 foreach (T3 ; OtherTypes)
1262                 {
1263                     auto a = to!(T1)("abcde"), b = to!(T2)("abcdefgh"),
1264                         c = to!(T2)("");
1265                     auto d = to!(T3)([2, 3]);
1266                     assert(startsWith(b, a));
1267                     assert(!startsWith(a, b));
1268                     assert(startsWith(b, c));
1269                     assert(startsWith(a, c));
1270                     assert(!startsWith(c, b));
1271                     assert(!startsWith(c, a));
1272                     assert(!startsWith(a, d));
1273                     assert(!startsWith(d, a));
1274                     assert(!startsWith(b, d));
1275                     assert(!startsWith(d, b));
1276                     assert(!startsWith(c, d));
1277                     assert(startsWith(d, c));
1278                 }
1279             }
1280         }
1281     }
1282 }
1283
1284 // Too slow for release mode
1285 debug unittest
1286 {
1287     alias TypeTuple!(string, wstring, dstring, char[], wchar[], dchar[])
1288         TestTypes;
1289     alias TypeTuple!(ubyte[], int[], double[]) OtherTypes;
1290      // fails to compile with: Error: array equality comparison type
1291      // mismatch, immutable(char)[] vs ubyte[]
1292     version(none)
1293     {
1294         foreach (T1 ; TestTypes)
1295         {
1296             foreach (T2 ; TestTypes)
1297             {
1298                 foreach (T3 ; OtherTypes)
1299                 {
1300                     auto a = to!(T1)("efgh"), b = to!(T2)("abcdefgh"),
1301                         c = to!(T2)(""), d = to!(T3)([1, 2]);
1302                     assert(endsWith(a, a));
1303                     assert(endsWith(b, b));
1304                     // writeln(T2.stringof);
1305                     // writeln(T1.stringof);
1306                     assert(endsWith(b, a));
1307                     assert(!endsWith(a, b));
1308                     assert(endsWith(b, c));
1309                     assert(endsWith(a, c));
1310                     assert(!endsWith(c, b));
1311                     assert(!endsWith(c, a));
1312                     assert(!endsWith(a, d));
1313                     assert(!endsWith(d, a));
1314                     assert(!endsWith(b, d));
1315                     assert(!endsWith(d, b));
1316                     assert(!endsWith(c, d));
1317                     assert(endsWith(d, c));
1318                 }
1319             }
1320         }
1321         foreach (T1; OtherTypes)
1322         {
1323             foreach (T2; OtherTypes)
1324             {
1325                 auto a = to!(T1)([1, 2]);
1326                 auto b = to!(T2)([0, 1, 2]);
1327                 //assert(!std.string.endsWith(a, b));
1328                 // assert(endsWith(b, a));
1329             }
1330         }
1331     }
1332 }
1333
1334 /*******************************************
1335  * Returns s[] sans trailing delimiter[], if any.
1336  * If delimiter[] is null, removes trailing CR, LF, or CRLF, if any.
1337  */
1338
1339 C[] chomp(C)(C[] s)
1340 {
1341     auto len = s.length;
1342     if (!len)
1343     {
1344         return s;
1345     }
1346     auto c = s[len - 1];
1347     if (c == '\r')          // if ends in CR
1348         len--;
1349     else if (c == '\n')         // if ends in LF
1350     {
1351         len--;
1352         if (len && s[len - 1] == '\r')
1353             len--;          // remove CR-LF
1354     }
1355     else
1356     {
1357         // no change
1358         return s;
1359     }
1360     return s[0 .. len];
1361 }
1362
1363 /// Ditto
1364 C[] chomp(C, C1)(C[] s, in C1[] delimiter)
1365 {
1366     if (endsWith(s, delimiter))
1367     {
1368         return s[0 .. $ - delimiter.length];
1369     }
1370     return s;
1371 }
1372
1373 unittest
1374 {
1375     debug(string) printf("string.chomp.unittest\n");
1376     string s;
1377
1378 //     s = chomp(null);
1379 //     assert(s is null);
1380     s = chomp("hello");
1381     assert(s == "hello");
1382     s = chomp("hello\n");
1383     assert(s == "hello");
1384     s = chomp("hello\r");
1385     assert(s == "hello");
1386     s = chomp("hello\r\n");
1387     assert(s == "hello");
1388     s = chomp("hello\n\r");
1389     assert(s == "hello\n");
1390     s = chomp("hello\n\n");
1391     assert(s == "hello\n");
1392     s = chomp("hello\r\r");
1393     assert(s == "hello\r");
1394     s = chomp("hello\nxxx\n");
1395     assert(s == "hello\nxxx");
1396
1397 //     s = chomp(null, null);
1398 //     assert(s is null);
1399     s = chomp("hello", "o");
1400     assert(s == "hell");
1401     s = chomp("hello", "p");
1402     assert(s == "hello");
1403     // @@@ BUG IN COMPILER, MUST INSERT CAST
1404     s = chomp("hello", cast(string) null);
1405     assert(s == "hello");
1406     s = chomp("hello", "llo");
1407     assert(s == "he");
1408 }
1409
1410 /**
1411  * If $(D_PARAM longer.startsWith(shorter)), returns $(D_PARAM
1412  * longer[shorter.length .. $]). Otherwise, returns $(D_PARAM longer).
1413  */
1414
1415 C1[] chompPrefix(C1, C2)(C1[] longer, C2[] shorter)
1416 {
1417     return startsWith(longer, shorter) ? longer[shorter.length .. $]
1418         : longer;
1419 }
1420
1421 unittest
1422 {
1423     auto a = "abcde", b = "abcdefgh";
1424     assert(chompPrefix(b, a) == "fgh");
1425     assert(chompPrefix(a, b) == "abcde");
1426 }
1427
1428 /***********************************************
1429  * Returns s[] sans trailing character, if there is one.
1430  * If last two characters are CR-LF, then both are removed.
1431  */
1432 S chop(S)(S s) if (isSomeString!S)
1433 {
1434     auto len = s.length;
1435     if (!len) return s;
1436     if (len >= 2 && s[len - 1] == '\n' && s[len - 2] == '\r')
1437         return s[0 .. len - 2];
1438     s.popBack();
1439     return s;
1440 }
1441
1442 unittest
1443 {
1444     debug(string) printf("string.chop.unittest\n");
1445     string s;
1446
1447     s = chop(cast(string) null);
1448     assert(s is null);
1449     s = chop("hello");
1450     assert(s == "hell");
1451     s = chop("hello\r\n");
1452     assert(s == "hello");
1453     s = chop("hello\n\r");
1454     assert(s == "hello\n");
1455 }
1456
1457 /*******************************************
1458  * Left justify, right justify, or center string s[]
1459  * in field width chars wide.
1460  */
1461
1462 S ljustify(S)(S s, size_t width) if (isSomeString!S)
1463 {
1464     immutable len = s.walkLength();
1465     if (len >= width)
1466         return s;
1467     auto r = new Unqual!(typeof(s[0]))[width + s.length - len];
1468     r[0..s.length] = s;
1469     r[s.length .. $] = ' ';
1470     return cast(S) r;
1471 }
1472
1473 /// ditto
1474 S rjustify(S)(S s, size_t width) if (isSomeString!S)
1475 {
1476     immutable len = s.walkLength();
1477     if (len >= width)
1478         return s;
1479     auto r = new Unqual!(typeof(s[0]))[width + s.length - len];
1480     r[0 .. $ - s.length] = ' ';
1481     r[$ - s.length .. $] = s;
1482     return cast(S) r;
1483 }
1484
1485 /// ditto
1486 S center(S)(S s, size_t width) if (isSomeString!S)
1487 {
1488     immutable len = s.walkLength();
1489     if (len >= width)
1490         return s;
1491     auto r = new Unqual!(typeof(s[0]))[width + s.length - len];
1492     immutable left = (r.length - s.length) / 2;
1493     r[0 .. left] = ' ';
1494     r[left .. left + s.length] = s;
1495     r[left + s.length .. $] = ' ';
1496     return cast(S) r;
1497 }
1498
1499 unittest
1500 {
1501     debug(string) printf("string.justify.unittest\n");
1502
1503     string s = "hello";
1504     string r;
1505     int i;
1506
1507     r = ljustify(s, 8);
1508     i = cmp(r, "hello   ");
1509     assert(i == 0);
1510
1511     r = rjustify(s, 8);
1512     i = cmp(r, "   hello");
1513     assert(i == 0);
1514
1515     r = center(s, 8);
1516     i = cmp(r, " hello  ");
1517     assert(i == 0);
1518
1519     r = zfill(s, 8);
1520     i = cmp(r, "000hello");
1521     assert(i == 0);
1522 }
1523
1524
1525 /*****************************************
1526  * Same as rjustify(), but fill with '0's.
1527  *
1528  */
1529
1530 S zfill(S)(S s, int width) if (isSomeString!S)
1531 {
1532     immutable len = s.walkLength();
1533     if (len >= width)
1534         return s;
1535     auto r = new Unqual!(typeof(s[0]))[width + s.length - len];
1536     r[0 .. $ - s.length] = '0';
1537     r[$ - s.length .. $] = s;
1538     return cast(S) r;
1539 }
1540
1541 /**********************************************
1542  * Insert sub[] into s[] at location index. Scheduled for deprecation
1543  * - use $(XREF array, _insert) instead.
1544  */
1545
1546 S insert(S)(S s, size_t index, S sub)
1547 in
1548 {
1549     assert(0 <= index && index <= s.length);
1550 }
1551 body
1552 {
1553     std.array.insert(s, index, sub);
1554     return s;
1555 }
1556
1557 unittest
1558 {
1559     debug(string) printf("string.insert.unittest\n");
1560
1561     string r;
1562     int i;
1563
1564     r = insert("abcd", 0, "e");
1565     i = cmp(r, "eabcd");
1566     assert(i == 0);
1567
1568     r = insert("abcd", 4, "e");
1569     i = cmp(r, "abcde");
1570     assert(i == 0);
1571
1572     r = insert("abcd", 2, "ef");
1573     i = cmp(r, "abefcd");
1574     assert(i == 0);
1575
1576     r = insert(cast(string) null, 0, "e");
1577     i = cmp(r, "e");
1578     assert(i == 0);
1579
1580     r = insert("abcd", 0, cast(string) null);
1581     i = cmp(r, "abcd");
1582     assert(i == 0);
1583 }
1584
1585 /************************************************
1586  * Replace tabs with the appropriate number of spaces.
1587  * tabsize is the distance between tab stops.
1588  */
1589
1590 S expandtabs(S)(S str, size_t tabsize = 8) if (isSomeString!S)
1591 {
1592     bool changes = false;
1593     Unqual!(typeof(str[0]))[] result;
1594     int column;
1595     size_t nspaces;
1596
1597     foreach (size_t i, dchar c; str)
1598     {
1599         switch (c)
1600         {
1601         case '\t':
1602             nspaces = tabsize - (column % tabsize);
1603             if (!changes)
1604             {
1605                 changes = true;
1606                 result = null;
1607                 result.length = str.length + nspaces - 1;
1608                 result.length = i + nspaces;
1609                 result[0 .. i] = str[0 .. i];
1610                 result[i .. i + nspaces] = ' ';
1611             }
1612             else
1613             {
1614                 sizediff_t j = result.length;
1615                 result.length = j + nspaces;
1616                 result[j .. j + nspaces] = ' ';
1617             }
1618             column += nspaces;
1619             break;
1620
1621         case '\r':
1622         case '\n':
1623         case PS:
1624         case LS:
1625             column = 0;
1626             goto L1;
1627
1628         default:
1629             column++;
1630         L1:
1631             if (changes)
1632             {
1633                 if (c <= 0x7F)
1634                     result ~= cast(char)c;
1635                 else
1636                     std.utf.encode(result, c);
1637             }
1638             break;
1639         }
1640     }
1641
1642     return changes ? cast(S) result : str;
1643 }
1644
1645 unittest
1646 {
1647     debug(string) printf("string.expandtabs.unittest\n");
1648
1649     string s = "This \tis\t a fofof\tof list";
1650     string r;
1651     int i;
1652
1653     r = expandtabs(s, 8);
1654     i = cmp(r, "This    is       a fofof        of list");
1655     assert(i == 0);
1656
1657     r = expandtabs(cast(string) null);
1658     assert(r == null);
1659     r = expandtabs("");
1660     assert(r.length == 0);
1661     r = expandtabs("a");
1662     assert(r == "a");
1663     r = expandtabs("\t");
1664     assert(r == "        ");
1665     r = expandtabs(  "  ab\tasdf ");
1666     //writefln("r = '%s'", r);
1667     assert(r == "  ab    asdf ");
1668     // TODO: need UTF test case
1669 }
1670
1671
1672 /*******************************************
1673  * Replace spaces in string s with the optimal number of tabs.
1674  * Trailing spaces or tabs in a line are removed.
1675  * Params:
1676  *  s = String to convert.
1677  *  tabsize = Tab columns are tabsize spaces apart. tabsize defaults to 8.
1678  */
1679
1680 S entab(S)(S s, size_t tabsize = 8)
1681 {
1682     bool changes = false;
1683     Unqual!(typeof(s[0]))[] result;
1684
1685     int nspaces = 0;
1686     int nwhite = 0;
1687     size_t column = 0;         // column number
1688
1689     foreach (size_t i, dchar c; s)
1690     {
1691
1692         void change()
1693         {
1694             changes = true;
1695             result = null;
1696             result.length = s.length;
1697             result.length = i;
1698             result[0 .. i] = s[0 .. i];
1699         }
1700
1701         switch (c)
1702         {
1703         case '\t':
1704             nwhite++;
1705             if (nspaces)
1706             {
1707                 if (!changes)
1708                     change();
1709
1710                 sizediff_t j = result.length - nspaces;
1711                 auto ntabs = (((column - nspaces) % tabsize) + nspaces) / tabsize;
1712                 result.length = j + ntabs;
1713                 result[j .. j + ntabs] = '\t';
1714                 nwhite += ntabs - nspaces;
1715                 nspaces = 0;
1716             }
1717             column = (column + tabsize) / tabsize * tabsize;
1718             break;
1719
1720         case '\r':
1721         case '\n':
1722         case PS:
1723         case LS:
1724             // Truncate any trailing spaces or tabs
1725             if (nwhite)
1726             {
1727                 if (!changes)
1728                     change();
1729                 result = result[0 .. result.length - nwhite];
1730             }
1731             break;
1732
1733         default:
1734             if (nspaces >= 2 && (column % tabsize) == 0)
1735             {
1736                 if (!changes)
1737                     change();
1738
1739                 auto j = result.length - nspaces;
1740                 auto ntabs = (nspaces + tabsize - 1) / tabsize;
1741                 result.length = j + ntabs;
1742                 result[j .. j + ntabs] = '\t';
1743                 nwhite += ntabs - nspaces;
1744                 nspaces = 0;
1745             }
1746             if (c == ' ')
1747             {   nwhite++;
1748                 nspaces++;
1749             }
1750             else
1751             {   nwhite = 0;
1752                 nspaces = 0;
1753             }
1754             column++;
1755             break;
1756         }
1757         if (changes)
1758         {
1759             if (c <= 0x7F)
1760                 result ~= cast(char)c;
1761             else
1762                 std.utf.encode(result, c);
1763         }
1764     }
1765
1766     // Truncate any trailing spaces or tabs
1767     if (nwhite)
1768     {
1769         if (changes)
1770             result = result[0 .. result.length - nwhite];
1771         else
1772             s = s[0 .. s.length - nwhite];
1773     }
1774     return changes ? assumeUnique(result) : s;
1775 }
1776
1777 unittest
1778 {
1779     debug(string) printf("string.entab.unittest\n");
1780
1781     string r;
1782
1783     r = entab(cast(string) null);
1784     assert(r == null);
1785     r = entab("");
1786     assert(r.length == 0);
1787     r = entab("a");
1788     assert(r == "a");
1789     r = entab("        ");
1790     assert(r == "");
1791     r = entab("        x");
1792     assert(r == "\tx");
1793     r = entab("  ab    asdf ");
1794     assert(r == "  ab\tasdf");
1795     r = entab("  ab     asdf ");
1796     assert(r == "  ab\t asdf");
1797     r = entab("  ab \t   asdf ");
1798     assert(r == "  ab\t   asdf");
1799     r = entab("1234567 \ta");
1800     assert(r == "1234567\t\ta");
1801     r = entab("1234567  \ta");
1802     assert(r == "1234567\t\ta");
1803     r = entab("1234567   \ta");
1804     assert(r == "1234567\t\ta");
1805     r = entab("1234567    \ta");
1806     assert(r == "1234567\t\ta");
1807     r = entab("1234567     \ta");
1808     assert(r == "1234567\t\ta");
1809     r = entab("1234567      \ta");
1810     assert(r == "1234567\t\ta");
1811     r = entab("1234567       \ta");
1812     assert(r == "1234567\t\ta");
1813     r = entab("1234567        \ta");
1814     assert(r == "1234567\t\ta");
1815     r = entab("1234567         \ta");
1816     assert(r == "1234567\t\t\ta");
1817     // TODO: need UTF test case
1818 }
1819
1820
1821
1822 /************************************
1823  * Construct translation table for translate().
1824  * BUG: only works with ASCII
1825  */
1826
1827 string maketrans(in char[] from, in char[] to)
1828 in
1829 {
1830     assert(from.length == to.length);
1831     assert(from.length <= 128);
1832     foreach (char c; from)
1833     {
1834         assert(c <= 0x7F);
1835     }
1836     foreach (char c; to)
1837     {
1838         assert(c <= 0x7F);
1839     }
1840 }
1841 body
1842 {
1843     char[] t = new char[256];
1844
1845     foreach (i; 0 .. t.length)
1846         t[i] = cast(char)i;
1847     foreach (i; 0 .. from.length)
1848         t[from[i]] = to[i];
1849
1850     return assumeUnique(t);
1851 }
1852
1853 /******************************************
1854  * Translate characters in s[] using table created by maketrans().
1855  * Delete chars in delchars[].
1856  * BUG: only works with ASCII
1857  */
1858
1859 string translate(in char[] s, in char[] transtab, in char[] delchars)
1860 in
1861 {
1862     assert(transtab.length == 256);
1863 }
1864 body
1865 {
1866     bool[256] deltab;
1867
1868     deltab[] = false;
1869     foreach (char c; delchars)
1870     {
1871         deltab[c] = true;
1872     }
1873
1874     size_t count = 0;
1875     foreach (char c; s)
1876     {
1877         if (!deltab[c])
1878             count++;
1879         //printf("s[%d] = '%c', count = %d\n", i, s[i], count);
1880     }
1881
1882     auto r = new char[count];
1883     count = 0;
1884     foreach (char c; s)
1885     {
1886         if (!deltab[c])
1887         {
1888             r[count] = transtab[c];
1889             count++;
1890         }
1891     }
1892
1893     return assumeUnique(r);
1894 }
1895
1896 unittest
1897 {
1898     debug(string) printf("string.translate.unittest\n");
1899
1900     string from = "abcdef";
1901     string to   = "ABCDEF";
1902     string s    = "The quick dog fox";
1903     string t;
1904     string r;
1905     int i;
1906
1907     t = maketrans(from, to);
1908     r = translate(s, t, "kg");
1909     //printf("r = '%.*s'\n", r);
1910     i = cmp(r, "ThE quiC Do Fox");
1911     assert(i == 0);
1912 }
1913
1914 /**
1915 Convert to string. WARNING! This function has been deprecated. Instead
1916  of $(D toString(x)), you may want to import $(D std.conv) and use $(D
1917  to!string(x)) instead.
1918  */
1919 deprecated auto toString(T, string f = __FILE__, uint line = __LINE__)(T obj)
1920     if (is(typeof(to!string(T.init))))
1921 {
1922     pragma(msg, "toString("~T.stringof~") called from "~f~"("~ToString!(line)
1923             ~") is deprecated."
1924             " Instead you may want to"
1925             " import std.conv and use to!string(x) instead of toString(x).");
1926     return to!string(obj);
1927 }
1928
1929 /**
1930 Convert string to integer. WARNING. This function has been
1931  deprecated. Instead of $(D atoi(s)), you may want to import $(D
1932  std.conv) and use $(D to!int(s)) instead.
1933  */
1934 deprecated auto atoi(T, string f = __FILE__, uint line = __LINE__)(T obj)
1935     if (isSomeString!T)
1936 {
1937     pragma(msg, "atoi("~T.stringof~") called from "~f~"("~ToString!(line)
1938             ~") is deprecated."
1939             " Instead you may want to"
1940             " import std.conv and use to!int(x) instead of atoi(x).");
1941     return to!int(obj);
1942 }
1943
1944 unittest
1945 {
1946     string s = "foo";
1947     string s2;
1948     foreach (char c; s)
1949     {
1950         s2 ~= to!string(c);
1951     }
1952     //printf("%.*s", s2);
1953     assert(s2 == "foo");
1954 }
1955
1956 unittest
1957 {
1958     debug(string) printf("string.to!string(uint).unittest\n");
1959
1960     string r;
1961     int i;
1962
1963     r = to!string(0u);
1964     i = cmp(r, "0");
1965     assert(i == 0);
1966
1967     r = to!string(9u);
1968     i = cmp(r, "9");
1969     assert(i == 0);
1970
1971     r = to!string(123u);
1972     i = cmp(r, "123");
1973     assert(i == 0);
1974 }
1975
1976 unittest
1977 {
1978     debug(string) printf("string.to!string(ulong).unittest\n");
1979
1980     string r;
1981     int i;
1982
1983     r = to!string(0uL);
1984     i = cmp(r, "0");
1985     assert(i == 0);
1986
1987     r = to!string(9uL);
1988     i = cmp(r, "9");
1989     assert(i == 0);
1990
1991     r = to!string(123uL);
1992     i = cmp(r, "123");
1993     assert(i == 0);
1994 }
1995
1996 unittest
1997 {
1998     debug(string) printf("string.to!string(int).unittest\n");
1999
2000     string r;
2001     int i;
2002
2003     r = to!string(0);
2004     i = cmp(r, "0");
2005     assert(i == 0);
2006
2007     r = to!string(9);
2008     i = cmp(r, "9");
2009     assert(i == 0);
2010
2011     r = to!string(123);
2012     i = cmp(r, "123");
2013     assert(i == 0);
2014
2015     r = to!string(-0);
2016     i = cmp(r, "0");
2017     assert(i == 0);
2018
2019     r = to!string(-9);
2020     i = cmp(r, "-9");
2021     assert(i == 0);
2022
2023     r = to!string(-123);
2024     i = cmp(r, "-123");
2025     assert(i == 0);
2026 }
2027
2028 unittest
2029 {
2030     debug(string) printf("string.to!string(long).unittest\n");
2031
2032     string r;
2033     int i;
2034
2035     r = to!string(0L);
2036     i = cmp(r, "0");
2037     assert(i == 0);
2038
2039     r = to!string(9L);
2040     i = cmp(r, "9");
2041     assert(i == 0);
2042
2043     r = to!string(123L);
2044     i = cmp(r, "123");
2045     assert(i == 0);
2046
2047     r = to!string(-0L);
2048     i = cmp(r, "0");
2049     assert(i == 0);
2050
2051     r = to!string(-9L);
2052     i = cmp(r, "-9");
2053     assert(i == 0);
2054
2055     r = to!string(-123L);
2056     i = cmp(r, "-123");
2057     assert(i == 0);
2058 }
2059
2060 unittest
2061 {
2062     debug(string) printf("string.to!string(char*).unittest\n");
2063
2064     string r;
2065     int i;
2066
2067     r = to!string(cast(char*) null);
2068     i = cmp(r, "");
2069     assert(i == 0);
2070
2071     r = to!string("foo\0".ptr);
2072     assert(r == "foo");
2073     // i = cmp(r, "foo");
2074     // assert(i == 0);
2075 }
2076
2077 private:
2078
2079 // @@@BUG@@@ workaround for bugzilla 2479
2080 string bug2479format(TypeInfo[] arguments, va_list argptr)
2081 {
2082     char[] s;
2083
2084     void putc(dchar c)
2085     {
2086         std.utf.encode(s, c);
2087     }
2088     std.format.doFormat(&putc, arguments, argptr);
2089     return assumeUnique(s);
2090 }
2091
2092 // @@@BUG@@@ workaround for bugzilla 2479
2093 char[] bug2479sformat(char[] s, TypeInfo[] arguments, va_list argptr)
2094 {   size_t i;
2095
2096     void putc(dchar c)
2097     {
2098     if (c <= 0x7F)
2099     {
2100         if (i >= s.length)
2101             onRangeError("std.string.sformat", 0);
2102         s[i] = cast(char)c;
2103         ++i;
2104     }
2105     else
2106     {   char[4] buf;
2107         auto b = std.utf.toUTF8(buf, c);
2108         if (i + b.length > s.length)
2109             onRangeError("std.string.sformat", 0);
2110         s[i..i+b.length] = b[];
2111         i += b.length;
2112     }
2113     }
2114
2115     std.format.doFormat(&putc, arguments, argptr);
2116     return s[0 .. i];
2117 }
2118 public:
2119
2120
2121 /*****************************************************
2122  * Format arguments into a string.
2123  */
2124
2125 string format(...)
2126 {
2127 /+ // @@@BUG@@@ Fails due to regression bug 2479.
2128     char[] s;
2129
2130     void putc(dchar c)
2131     {
2132         std.utf.encode(s, c);
2133     }
2134
2135     std.format.doFormat(&putc, _arguments, _argptr);
2136     return assumeUnique(s);
2137     +/
2138     return bug2479format(_arguments, _argptr);
2139 }
2140
2141
2142 /*****************************************************
2143  * Format arguments into string <i>s</i> which must be large
2144  * enough to hold the result. Throws RangeError if it is not.
2145  * Returns: s
2146  */
2147 char[] sformat(char[] s, ...)
2148 {
2149 /+ // @@@BUG@@@ Fails due to regression bug 2479.
2150
2151   size_t i;
2152
2153     void putc(dchar c)
2154     {
2155     if (c <= 0x7F)
2156     {
2157         if (i >= s.length)
2158             onRangeError("std.string.sformat", 0);
2159         s[i] = cast(char)c;
2160         ++i;
2161     }
2162     else
2163     {   char[4] buf;
2164         auto b = std.utf.toUTF8(buf, c);
2165         if (i + b.length > s.length)
2166             onRangeError("std.string.sformat", 0);
2167         s[i..i+b.length] = b[];
2168         i += b.length;
2169     }
2170     }
2171
2172     std.format.doFormat(&putc, _arguments, _argptr);
2173     return s[0 .. i];
2174     +/
2175     return bug2479sformat(s, _arguments, _argptr);
2176 }
2177
2178 unittest
2179 {
2180     debug(string) printf("std.string.format.unittest\n");
2181
2182     string r;
2183     int i;
2184 /+
2185     r = format(null);
2186     i = cmp(r, "");
2187     assert(i == 0);
2188 +/
2189     r = format("foo");
2190     i = cmp(r, "foo");
2191     assert(i == 0);
2192
2193     r = format("foo%%");
2194     i = cmp(r, "foo%");
2195     assert(i == 0);
2196
2197     r = format("foo%s", 'C');
2198     i = cmp(r, "fooC");
2199     assert(i == 0);
2200
2201     r = format("%s foo", "bar");
2202     i = cmp(r, "bar foo");
2203     assert(i == 0);
2204
2205     r = format("%s foo %s", "bar", "abc");
2206     i = cmp(r, "bar foo abc");
2207     assert(i == 0);
2208
2209     r = format("foo %d", -123);
2210     i = cmp(r, "foo -123");
2211     assert(i == 0);
2212
2213     r = format("foo %d", 123);
2214     i = cmp(r, "foo 123");
2215     assert(i == 0);
2216 }
2217
2218
2219 /***********************************************
2220  * See if character c is in the pattern.
2221  * Patterns:
2222  *
2223  *  A <i>pattern</i> is an array of characters much like a <i>character
2224  *  class</i> in regular expressions. A sequence of characters
2225  *  can be given, such as "abcde". The '-' can represent a range
2226  *  of characters, as "a-e" represents the same pattern as "abcde".
2227  *  "a-fA-F0-9" represents all the hex characters.
2228  *  If the first character of a pattern is '^', then the pattern
2229  *  is negated, i.e. "^0-9" means any character except a digit.
2230  *  The functions inPattern, <b>countchars</b>, <b>removeschars</b>,
2231  *  and <b>squeeze</b>
2232  *  use patterns.
2233  *
2234  * Note: In the future, the pattern syntax may be improved
2235  *  to be more like regular expression character classes.
2236  */
2237
2238 bool inPattern(S)(dchar c, in S pattern) if (isSomeString!S)
2239 {
2240     bool result = false;
2241     int range = 0;
2242     dchar lastc;
2243
2244     foreach (size_t i, dchar p; pattern)
2245     {
2246     if (p == '^' && i == 0)
2247     {   result = true;
2248         if (i + 1 == pattern.length)
2249         return (c == p);    // or should this be an error?
2250     }
2251     else if (range)
2252     {
2253         range = 0;
2254         if (lastc <= c && c <= p || c == p)
2255         return !result;
2256     }
2257     else if (p == '-' && i > result && i + 1 < pattern.length)
2258     {
2259         range = 1;
2260         continue;
2261     }
2262     else if (c == p)
2263         return !result;
2264     lastc = p;
2265     }
2266     return result;
2267 }
2268
2269
2270 unittest
2271 {
2272     debug(string) printf("std.string.inPattern.unittest\n");
2273
2274     int i;
2275
2276     i = inPattern('x', "x");
2277     assert(i == 1);
2278     i = inPattern('x', "y");
2279     assert(i == 0);
2280     i = inPattern('x', cast(string)null);
2281     assert(i == 0);
2282     i = inPattern('x', "^y");
2283     assert(i == 1);
2284     i = inPattern('x', "yxxy");
2285     assert(i == 1);
2286     i = inPattern('x', "^yxxy");
2287     assert(i == 0);
2288     i = inPattern('x', "^abcd");
2289     assert(i == 1);
2290     i = inPattern('^', "^^");
2291     assert(i == 0);
2292     i = inPattern('^', "^");
2293     assert(i == 1);
2294     i = inPattern('^', "a^");
2295     assert(i == 1);
2296     i = inPattern('x', "a-z");
2297     assert(i == 1);
2298     i = inPattern('x', "A-Z");
2299     assert(i == 0);
2300     i = inPattern('x', "^a-z");
2301     assert(i == 0);
2302     i = inPattern('x', "^A-Z");
2303     assert(i == 1);
2304     i = inPattern('-', "a-");
2305     assert(i == 1);
2306     i = inPattern('-', "^A-");
2307     assert(i == 0);
2308     i = inPattern('a', "z-a");
2309     assert(i == 1);
2310     i = inPattern('z', "z-a");
2311     assert(i == 1);
2312     i = inPattern('x', "z-a");
2313     assert(i == 0);
2314 }
2315
2316
2317 /***********************************************
2318  * See if character c is in the intersection of the patterns.
2319  */
2320
2321 bool inPattern(S)(dchar c, S[] patterns) if (isSomeString!S)
2322 {
2323     foreach (string pattern; patterns)
2324     {
2325         if (!inPattern(c, pattern))
2326         {
2327             return false;
2328         }
2329     }
2330     return true;
2331 }
2332
2333
2334 /********************************************
2335  * Count characters in s that match pattern.
2336  */
2337
2338 size_t countchars(S, S1)(S s, in S1 pattern) if (isSomeString!S && isSomeString!S1)
2339 {
2340     size_t count;
2341     foreach (dchar c; s)
2342     {
2343         count += inPattern(c, pattern);
2344     }
2345     return count;
2346 }
2347
2348 unittest
2349 {
2350     debug(string) printf("std.string.count.unittest\n");
2351
2352     size_t c;
2353
2354     c = countchars("abc", "a-c");
2355     assert(c == 3);
2356     c = countchars("hello world", "or");
2357     assert(c == 3);
2358 }
2359
2360
2361 /********************************************
2362  * Return string that is s with all characters removed that match pattern.
2363  */
2364
2365 S removechars(S)(S s, in S pattern) if (isSomeString!S)
2366 {
2367     Unqual!(typeof(s[0]))[] r;
2368     bool changed = false;
2369
2370     foreach (size_t i, dchar c; s)
2371     {
2372         if (inPattern(c, pattern)){
2373             if (!changed)
2374             {
2375                 changed = true;
2376                 r = s[0 .. i].dup;
2377             }
2378             continue;
2379         }
2380         if (changed)
2381         {
2382             std.utf.encode(r, c);
2383         }
2384     }
2385     return (changed? cast(S) r : s);
2386 }
2387
2388 unittest
2389 {
2390     debug(string) printf("std.string.removechars.unittest\n");
2391
2392     string r;
2393
2394     r = removechars("abc", "a-c");
2395     assert(r.length == 0);
2396     r = removechars("hello world", "or");
2397     assert(r == "hell wld");
2398     r = removechars("hello world", "d");
2399     assert(r == "hello worl");
2400     r = removechars("hah", "h");
2401     assert(r == "a");
2402 }
2403
2404
2405 /***************************************************
2406  * Return string where sequences of a character in s[] from pattern[]
2407  * are replaced with a single instance of that character.
2408  * If pattern is null, it defaults to all characters.
2409  */
2410
2411 S squeeze(S)(S s, in S pattern = null)
2412 {
2413     Unqual!(typeof(s[0]))[] r;
2414     dchar lastc;
2415     size_t lasti;
2416     int run;
2417     bool changed;
2418
2419     foreach (size_t i, dchar c; s)
2420     {
2421         if (run && lastc == c)
2422         {
2423             changed = true;
2424         }
2425         else if (pattern is null || inPattern(c, pattern))
2426         {
2427             run = 1;
2428             if (changed)
2429             {   if (r is null)
2430                     r = s[0 .. lasti].dup;
2431                 std.utf.encode(r, c);
2432             }
2433             else
2434                 lasti = i + std.utf.stride(s, i);
2435             lastc = c;
2436         }
2437         else
2438         {
2439             run = 0;
2440             if (changed)
2441             {   if (r is null)
2442                     r = s[0 .. lasti].dup;
2443                 std.utf.encode(r, c);
2444             }
2445         }
2446     }
2447     return changed ? ((r is null) ? s[0 .. lasti] : cast(S) r) : s;
2448 }
2449
2450 unittest
2451 {
2452     debug(string) printf("std.string.squeeze.unittest\n");
2453     string s,r;
2454
2455     r = squeeze("hello");
2456     //writefln("r = '%s'", r);
2457     assert(r == "helo");
2458     s = "abcd";
2459     r = squeeze(s);
2460     assert(r is s);
2461     s = "xyzz";
2462     r = squeeze(s);
2463     assert(r.ptr == s.ptr); // should just be a slice
2464     r = squeeze("hello goodbyee", "oe");
2465     assert(r == "hello godbye");
2466 }
2467
2468 /***************************************************************
2469  Finds the position $(D_PARAM pos) of the first character in $(D_PARAM
2470  s) that does not match $(D_PARAM pattern) (in the terminology used by
2471  $(LINK2 std_string.html,inPattern)). Updates $(D_PARAM s =
2472  s[pos..$]). Returns the slice from the beginning of the original
2473  (before update) string up to, and excluding, $(D_PARAM pos).
2474
2475  Example:
2476  ---
2477 string s = "123abc";
2478 string t = munch(s, "0123456789");
2479 assert(t == "123" && s == "abc");
2480 t = munch(s, "0123456789");
2481 assert(t == "" && s == "abc");
2482  ---
2483
2484 The $(D_PARAM munch) function is mostly convenient for skipping
2485 certain category of characters (e.g. whitespace) when parsing
2486 strings. (In such cases, the return value is not used.)
2487  */
2488
2489 S1 munch(S1, S2)(ref S1 s, S2 pattern)
2490 {
2491     size_t j = s.length;
2492     foreach (i, c; s)
2493     {
2494         if (!inPattern(c, pattern))
2495         {
2496             j = i;
2497             break;
2498         }
2499     }
2500     scope(exit) s = s[j .. $];
2501     return s[0 .. j];
2502 }
2503
2504 unittest
2505 {
2506     string s = "123abc";
2507     string t = munch(s, "0123456789");
2508     assert(t == "123" && s == "abc");
2509     t = munch(s, "0123456789");
2510     assert(t == "" && s == "abc");
2511 }
2512
2513
2514 /**********************************************
2515  * Return string that is the 'successor' to s[].
2516  * If the rightmost character is a-zA-Z0-9, it is incremented within
2517  * its case or digits. If it generates a carry, the process is
2518  * repeated with the one to its immediate left.
2519  */
2520
2521 S succ(S)(S s) if (isSomeString!S)
2522 {
2523     if (s.length && isalnum(s[$ - 1]))
2524     {
2525         auto r = s.dup;
2526         size_t i = r.length - 1;
2527
2528         while (1)
2529         {
2530             dchar c = s[i];
2531             dchar carry;
2532            
2533             switch (c)
2534             {
2535             case '9':
2536                 c = '0';
2537                 carry = '1';
2538                 goto Lcarry;
2539             case 'z':
2540             case 'Z':
2541                 c -= 'Z' - 'A';
2542                 carry = c;
2543             Lcarry:
2544                 r[i] = cast(char)c;
2545                 if (i == 0)
2546                 {
2547                     auto t = new typeof(r[0])[r.length + 1];
2548                     t[0] = cast(char) carry;
2549                     t[1 .. $] = r[];
2550                     return assumeUnique(t);
2551                 }
2552                 i--;
2553                 break;
2554
2555             default:
2556                 if (std.ctype.isalnum(c))
2557                     r[i]++;
2558                 return cast(S) r;
2559             }
2560         }
2561     }
2562     return s;
2563 }
2564
2565 unittest
2566 {
2567     debug(string) printf("std.string.succ.unittest\n");
2568
2569     string r;
2570
2571     r = succ(cast(string) null);
2572     assert(r is null);
2573     r = succ("!@#$%");
2574     assert(r == "!@#$%");
2575     r = succ("1");
2576     assert(r == "2");
2577     r = succ("9");
2578     assert(r == "10");
2579     r = succ("999");
2580     assert(r == "1000");
2581     r = succ("zz99");
2582     assert(r == "aaa00");
2583 }
2584
2585
2586 /***********************************************
2587  * Replaces characters in str[] that are in from[]
2588  * with corresponding characters in to[] and returns the resulting
2589  * string.
2590  * Params:
2591  *  modifiers = a string of modifier characters
2592  * Modifiers:
2593         <table border=1 cellspacing=0 cellpadding=5>
2594         <tr> <th>Modifier <th>Description
2595         <tr> <td><b>c</b> <td>Complement the list of characters in from[]
2596         <tr> <td><b>d</b> <td>Removes matching characters with no corresponding replacement in to[]
2597         <tr> <td><b>s</b> <td>Removes adjacent duplicates in the replaced characters
2598         </table>
2599
2600     If modifier <b>d</b> is present, then the number of characters
2601     in to[] may be only 0 or 1.
2602
2603     If modifier <b>d</b> is not present and to[] is null,
2604     then to[] is taken _to be the same as from[].
2605
2606     If modifier <b>d</b> is not present and to[] is shorter
2607     than from[], then to[] is extended by replicating the
2608     last character in to[].
2609
2610     Both from[] and to[] may contain ranges using the <b>-</b>
2611     character, for example <b>a-d</b> is synonymous with <b>abcd</b>.
2612     Neither accept a leading <b>^</b> as meaning the complement of
2613     the string (use the <b>c</b> modifier for that).
2614  */
2615
2616 string tr(const(char)[] str, const(char)[] from, const(char)[] to, const(char)[] modifiers = null)
2617 {
2618     int mod_c;
2619     int mod_d;
2620     int mod_s;
2621
2622     foreach (char c; modifiers)
2623     {
2624         switch (c)
2625         {
2626         case 'c':   mod_c = 1; break;   // complement
2627         case 'd':   mod_d = 1; break;   // delete unreplaced chars
2628         case 's':   mod_s = 1; break;   // squeeze duplicated replaced chars
2629         default:    assert(0);
2630         }
2631     }
2632
2633     if (to is null && !mod_d)
2634         to = from;
2635
2636     char[] result = new char[str.length];
2637     result.length = 0;
2638     int m;
2639     dchar lastc;
2640
2641     foreach (dchar c; str)
2642     {
2643         dchar lastf;
2644         dchar lastt;
2645         dchar newc;
2646         int n = 0;
2647        
2648         for (size_t i = 0; i < from.length; )
2649         {
2650             dchar f = std.utf.decode(from, i);
2651             //writefln("\tf = '%s', c = '%s', lastf = '%x', '%x', i = %d, %d", f, c, lastf, dchar.init, i, from.length);
2652             if (f == '-' && lastf != dchar.init && i < from.length)
2653             {
2654                 dchar nextf = std.utf.decode(from, i);
2655                 //writefln("\tlastf = '%s', c = '%s', nextf = '%s'", lastf, c, nextf);
2656                 if (lastf <= c && c <= nextf)
2657                 {
2658                     n += c - lastf - 1;
2659                     if (mod_c)
2660                         goto Lnotfound;
2661                     goto Lfound;
2662                 }
2663                 n += nextf - lastf;
2664                 lastf = lastf.init;
2665                 continue;
2666             }
2667
2668             if (c == f)
2669             {   if (mod_c)
2670                     goto Lnotfound;
2671                 goto Lfound;
2672             }
2673             lastf = f;
2674             n++;
2675         }
2676         if (!mod_c)
2677             goto Lnotfound;
2678         n = 0;          // consider it 'found' at position 0
2679
2680       Lfound:
2681
2682         // Find the nth character in to[]
2683         //writefln("\tc = '%s', n = %d", c, n);
2684         dchar nextt;
2685         for (size_t i = 0; i < to.length; )
2686         {   dchar t = std.utf.decode(to, i);
2687             if (t == '-' && lastt != dchar.init && i < to.length)
2688             {
2689                 nextt = std.utf.decode(to, i);
2690                 //writefln("\tlastt = '%s', c = '%s', nextt = '%s', n = %d", lastt, c, nextt, n);
2691                 n -= nextt - lastt;
2692                 if (n < 0)
2693                 {
2694                     newc = nextt + n + 1;
2695                     goto Lnewc;
2696                 }
2697                 lastt = dchar.init;
2698                 continue;
2699             }
2700             if (n == 0)
2701             {   newc = t;
2702                 goto Lnewc;
2703             }
2704             lastt = t;
2705             nextt = t;
2706             n--;
2707         }
2708         if (mod_d)
2709             continue;
2710         newc = nextt;
2711
2712       Lnewc:
2713         if (mod_s && m && newc == lastc)
2714             continue;
2715         std.utf.encode(result, newc);
2716         m = 1;
2717         lastc = newc;
2718         continue;
2719
2720       Lnotfound:
2721         std.utf.encode(result, c);
2722         lastc = c;
2723         m = 0;
2724     }
2725     return assumeUnique(result);
2726 }
2727
2728 unittest
2729 {
2730     debug(string) printf("std.string.tr.unittest\n");
2731
2732     string r;
2733     //writefln("r = '%s'", r);
2734
2735     r = tr("abcdef", "cd", "CD");
2736     assert(r == "abCDef");
2737
2738     r = tr("abcdef", "b-d", "B-D");
2739     assert(r == "aBCDef");
2740
2741     r = tr("abcdefgh", "b-dh", "B-Dx");
2742     assert(r == "aBCDefgx");
2743
2744     r = tr("abcdefgh", "b-dh", "B-CDx");
2745     assert(r == "aBCDefgx");
2746
2747     r = tr("abcdefgh", "b-dh", "B-BCDx");
2748     assert(r == "aBCDefgx");
2749
2750     r = tr("abcdef", "ef", "*", "c");
2751     assert(r == "****ef");
2752
2753     r = tr("abcdef", "ef", "", "d");
2754     assert(r == "abcd");
2755
2756     r = tr("hello goodbye", "lo", null, "s");
2757     assert(r == "helo godbye");
2758
2759     r = tr("hello goodbye", "lo", "x", "s");
2760     assert(r == "hex gxdbye");
2761
2762     r = tr("14-Jul-87", "a-zA-Z", " ", "cs");
2763     assert(r == " Jul ");
2764
2765     r = tr("Abc", "AAA", "XYZ");
2766     assert(r == "Xbc");
2767 }
2768
2769
2770 /* ************************************************
2771  * Version       : v0.3
2772  * Author        : David L. 'SpottedTiger' Davis
2773  * Date Created  : 31.May.05 Compiled and Tested with dmd v0.125
2774  * Date Modified : 01.Jun.05 Modified the function to handle the
2775  *               :           imaginary and complex float-point
2776  *               :           datatypes.
2777  *               :
2778  * Licence       : Public Domain / Contributed to Digital Mars
2779  */
2780
2781 /**
2782  * [in] string s can be formatted in the following ways:
2783  *
2784  * Integer Whole Number:
2785  * (for byte, ubyte, short, ushort, int, uint, long, and ulong)
2786  * ['+'|'-']digit(s)[U|L|UL]
2787  *
2788  * examples: 123, 123UL, 123L, +123U, -123L
2789  *
2790  * Floating-Point Number:
2791  * (for float, double, real, ifloat, idouble, and ireal)
2792  * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
2793  *      or [nan|nani|inf|-inf]
2794  *
2795  * examples: +123., -123.01, 123.3e-10f, 123.3e-10fi, 123.3e-10L
2796  *
2797  * (for cfloat, cdouble, and creal)
2798  * ['+'|'-']digit(s)[.][digit(s)][[e-|e+]digit(s)][+]
2799  *         [digit(s)[.][digit(s)][[e-|e+]digit(s)][i|f|L|Li|fi]]
2800  *      or [nan|nani|nan+nani|inf|-inf]
2801  *
2802  * examples: nan, -123e-1+456.9e-10Li, +123e+10+456i, 123+456
2803  *
2804  * [in] bool bAllowSep
2805  * False by default, but when set to true it will accept the
2806  * separator characters "," and "_" within the string, but these
2807  * characters should be stripped from the string before using any
2808  * of the conversion functions like toInt(), toFloat(), and etc
2809  * else an error will occur.
2810  *
2811  * Also please note, that no spaces are allowed within the string
2812  * anywhere whether it's a leading, trailing, or embedded space(s),
2813  * thus they too must be stripped from the string before using this
2814  * function, or any of the conversion functions.
2815  */
2816
2817 bool isNumeric(const(char)[] s, in bool bAllowSep = false)
2818 {
2819     sizediff_t iLen = s.length;
2820     bool   bDecimalPoint = false;
2821     bool   bExponent = false;
2822     bool   bComplex = false;
2823     auto sx = std.string.tolower(s);
2824     int    j  = 0;
2825     char   c;
2826
2827     //writefln("isNumeric(string, bool = false) called!");
2828     // Empty string, return false
2829     if (iLen == 0)
2830         return false;
2831
2832     // Check for NaN (Not a Number)
2833     if (sx == "nan" || sx == "nani" || sx == "nan+nani")
2834         return true;
2835
2836     // Check for Infinity
2837     if (sx == "inf" || sx == "-inf")
2838         return true;
2839
2840     // A sign is allowed only in the 1st character
2841     if (sx[0] == '-' || sx[0] == '+')
2842         j++;
2843
2844     for (int i = j; i < iLen; i++)
2845     {
2846         c = sx[i];
2847
2848         // Digits are good, continue checking
2849         // with the popFront character... ;)
2850         if (c >= '0' && c <= '9')
2851             continue;
2852
2853         // Check for the complex type, and if found
2854         // reset the flags for checking the 2nd number.
2855         else if (c == '+')
2856             if (i > 0)
2857             {
2858                 bDecimalPoint = false;
2859                 bExponent = false;
2860                 bComplex = true;
2861                 continue;
2862             }
2863             else
2864                 return false;
2865
2866         // Allow only one exponent per number
2867         else if (c == 'e')
2868         {
2869             // A 2nd exponent found, return not a number
2870             if (bExponent)
2871                 return false;
2872
2873             if (i + 1 < iLen)
2874             {
2875                 // Look forward for the sign, and if
2876                 // missing then this is not a number.
2877                 if (sx[i + 1] != '-' && sx[i + 1] != '+')
2878                     return false;
2879                 else
2880                 {
2881                     bExponent = true;
2882                     i++;
2883                 }
2884             }
2885             else
2886                 // Ending in "E", return not a number
2887                 return false;
2888         }
2889         // Allow only one decimal point per number to be used
2890         else if (c == '.' )
2891         {
2892             // A 2nd decimal point found, return not a number
2893             if (bDecimalPoint)
2894                 return false;
2895
2896             bDecimalPoint = true;
2897             continue;
2898         }
2899         // Check for ending literal characters: "f,u,l,i,ul,fi,li",
2900         // and wheater they're being used with the correct datatype.
2901         else if (i == iLen - 2)
2902         {
2903             // Integer Whole Number
2904             if (sx[i..iLen] == "ul" &&
2905                (!bDecimalPoint && !bExponent && !bComplex))
2906                 return true;
2907             // Floating-Point Number
2908             else if ((sx[i..iLen] == "fi" || sx[i..iLen] == "li") &&
2909                      (bDecimalPoint || bExponent || bComplex))
2910                 return true;
2911             else if (sx[i..iLen] == "ul" &&
2912                     (bDecimalPoint || bExponent || bComplex))
2913                 return false;
2914             // Could be a Integer or a Float, thus
2915             // all these suffixes are valid for both
2916             else if (sx[i..iLen] == "ul" ||
2917                      sx[i..iLen] == "fi" ||
2918                      sx[i..iLen] == "li")
2919                 return true;
2920             else
2921                 return false;
2922         }
2923         else if (i == iLen - 1)
2924         {
2925             // Integer Whole Number
2926             if ((c == 'u' || c == 'l') &&
2927                 (!bDecimalPoint && !bExponent && !bComplex))
2928                 return true;
2929             // Check to see if the last character in the string
2930             // is the required 'i' character
2931             else if (bComplex)
2932                 if (c == 'i')
2933                     return true;
2934                 else
2935                     return false;
2936             // Floating-Point Number
2937             else if ((c == 'l' || c == 'f' || c == 'i') &&
2938                      (bDecimalPoint || bExponent))
2939                 return true;
2940             // Could be a Integer or a Float, thus
2941             // all these suffixes are valid for both
2942             else if (c == 'l' || c == 'f' || c == 'i')
2943                 return true;
2944             else
2945                 return false;
2946         }
2947         else
2948             // Check if separators are allow
2949             // to be in the numeric string
2950             if (bAllowSep == true && (c == '_' || c == ','))
2951                 continue;
2952             else
2953                 return false;
2954     }
2955
2956     return true;
2957 }
2958
2959 /// Allow any object as a parameter
2960 bool isNumeric(...)
2961 {
2962     return isNumeric(_arguments, _argptr);
2963 }
2964
2965 /// Check only the first parameter, all others will be ignored.
2966 bool isNumeric(TypeInfo[] _arguments, va_list _argptr)
2967 {
2968     auto  s = ""c;
2969     auto ws = ""w;
2970     auto ds = ""d;
2971
2972     //writefln("isNumeric(...) called!");
2973     if (_arguments.length == 0)
2974         return false;
2975
2976     if (_arguments[0] == typeid(char[]))
2977         return isNumeric(va_arg!(char[])(_argptr));
2978     else if (_arguments[0] == typeid(wchar[]))
2979         return isNumeric(std.utf.toUTF8(va_arg!(wchar[])(_argptr)));
2980     else if (_arguments[0] == typeid(dchar[]))
2981         return isNumeric(std.utf.toUTF8(va_arg!(dstring)(_argptr)));
2982     else if (_arguments[0] == typeid(real))
2983         return true;
2984     else if (_arguments[0] == typeid(double))
2985         return true;
2986     else if (_arguments[0] == typeid(float))
2987         return true;
2988     else if (_arguments[0] == typeid(ulong))
2989         return true;
2990     else if (_arguments[0] == typeid(long))
2991         return true;
2992     else if (_arguments[0] == typeid(uint))
2993         return true;
2994     else if (_arguments[0] == typeid(int))
2995         return true;
2996     else if (_arguments[0] == typeid(ushort))
2997         return true;
2998     else if (_arguments[0] == typeid(short))
2999         return true;
3000     else if (_arguments[0] == typeid(ubyte))
3001     {
3002     char[1] t;
3003     t[0]= va_arg!(ubyte)(_argptr);
3004     return isNumeric(cast(string)t);
3005     }
3006     else if (_arguments[0] == typeid(byte))
3007     {
3008     char[1] t;
3009     t[0] = va_arg!(char)(_argptr);
3010     return isNumeric(cast(string)t);
3011     }
3012     else if (_arguments[0] == typeid(ireal))
3013         return true;
3014     else if (_arguments[0] == typeid(idouble))
3015         return true;
3016     else if (_arguments[0] == typeid(ifloat))
3017         return true;
3018     else if (_arguments[0] == typeid(creal))
3019         return true;
3020     else if (_arguments[0] == typeid(cdouble))
3021         return true;
3022     else if (_arguments[0] == typeid(cfloat))
3023         return true;
3024     else if (_arguments[0] == typeid(char))
3025     {
3026     char[1] t;
3027     t[0] = va_arg!(char)(_argptr);
3028         return isNumeric(cast(string)t);
3029     }
3030     else if (_arguments[0] == typeid(wchar))
3031     {
3032     wchar[1] t;
3033     t[0] = va_arg!(wchar)(_argptr);
3034         return isNumeric(std.utf.toUTF8(t));
3035     }
3036     else if (_arguments[0] == typeid(dchar))
3037     {
3038     dchar[1] t;
3039     t[0] = va_arg!(dchar)(_argptr);
3040         dchar[] t1 = t;
3041     return isNumeric(std.utf.toUTF8(cast(dstring) t1));
3042     }
3043     //else if (_arguments[0] == typeid(cent))
3044     //    return true;
3045     //else if (_arguments[0] == typeid(ucent))
3046     //    return true;
3047     else
3048        return false;
3049 }
3050
3051 unittest
3052 {
3053     debug (string) printf("isNumeric(in string, bool = false).unittest\n");
3054     string s;
3055
3056     // Test the isNumeric(in string) function
3057     assert(isNumeric("1") == true );
3058     assert(isNumeric("1.0") == true );
3059     assert(isNumeric("1e-1") == true );
3060     assert(isNumeric("12345xxxx890") == false );
3061     assert(isNumeric("567L") == true );
3062     assert(isNumeric("23UL") == true );
3063     assert(isNumeric("-123..56f") == false );
3064     assert(isNumeric("12.3.5.6") == false );
3065     assert(isNumeric(" 12.356") == false );
3066     assert(isNumeric("123 5.6") == false );
3067     assert(isNumeric("1233E-1+1.0e-1i") == true );
3068
3069     assert(isNumeric("123.00E-5+1234.45E-12Li") == true);
3070     assert(isNumeric("123.00e-5+1234.45E-12iL") == false);
3071     assert(isNumeric("123.00e-5+1234.45e-12uL") == false);
3072     assert(isNumeric("123.00E-5+1234.45e-12lu") == false);
3073
3074     assert(isNumeric("123fi") == true);
3075     assert(isNumeric("123li") == true);
3076     assert(isNumeric("--123L") == false);
3077     assert(isNumeric("+123.5UL") == false);
3078     assert(isNumeric("123f") == true);
3079     assert(isNumeric("123.u") == false);
3080
3081     assert(isNumeric(to!string(real.nan)) == true);
3082     assert(isNumeric(to!string(-real.infinity)) == true);
3083     assert(isNumeric(to!string(123e+2+1234.78Li)) == true);
3084
3085     s = "$250.99-";
3086     assert(isNumeric(s[1..s.length - 2]) == true);
3087     assert(isNumeric(s) == false);
3088     assert(isNumeric(s[0..s.length - 1]) == false);
3089
3090     // These test calling the isNumeric(...) function
3091     assert(isNumeric(1,123UL) == true);
3092     assert(isNumeric('2') == true);
3093     assert(isNumeric('x') == false);
3094     assert(isNumeric(cast(byte)0x57) == false); // 'W'
3095     assert(isNumeric(cast(byte)0x37) == true);  // '7'
3096     assert(isNumeric(cast(wchar[])"145.67") == true);
3097     assert(isNumeric(cast(dchar[])"145.67U") == false);
3098     assert(isNumeric(123_000.23fi) == true);
3099     assert(isNumeric(123.00E-5+1234.45E-12Li) == true);
3100     assert(isNumeric(real.nan) == true);
3101     assert(isNumeric(-real.infinity) == true);
3102 }
3103
3104
3105 /*****************************
3106  * Soundex algorithm.
3107  *
3108  * The Soundex algorithm converts a word into 4 characters
3109  * based on how the word sounds phonetically. The idea is that
3110  * two spellings that sound alike will have the same Soundex
3111  * value, which means that Soundex can be used for fuzzy matching
3112  * of names.
3113  *
3114  * Params:
3115  *  string = String to convert to Soundex representation.
3116  *  buffer = Optional 4 char array to put the resulting Soundex
3117  *      characters into. If null, the return value
3118  *      buffer will be allocated on the heap.
3119  * Returns:
3120  *  The four character array with the Soundex result in it.
3121  *  Returns null if there is no Soundex representation for the string.
3122  *
3123  * See_Also:
3124  *  $(LINK2 http://en.wikipedia.org/wiki/Soundex, Wikipedia),
3125  *  $(LINK2 http://www.archives.gov/publications/general-info-leaflets/55.html, The Soundex Indexing System)
3126  *
3127  * Bugs:
3128  *  Only works well with English names.
3129  *  There are other arguably better Soundex algorithms,
3130  *  but this one is the standard one.
3131  */
3132
3133 char[] soundex(const(char)[] string, char[] buffer = null)
3134 in
3135 {
3136     assert(!buffer || buffer.length >= 4);
3137 }
3138 out (result)
3139 {
3140     if (result)
3141     {
3142         assert(result.length == 4);
3143         assert(result[0] >= 'A' && result[0] <= 'Z');
3144         foreach (char c; result[1 .. 4])
3145             assert(c >= '0' && c <= '6');
3146     }
3147 }
3148 body
3149 {
3150     static immutable dex =
3151         // ABCDEFGHIJKLMNOPQRSTUVWXYZ
3152         "01230120022455012623010202";
3153
3154     int b = 0;
3155     char lastc;
3156     foreach (char cs; string)
3157     {   auto c = cs;        // necessary because cs is final
3158
3159         if (c >= 'a' && c <= 'z')
3160             c -= 'a' - 'A';
3161         else if (c >= 'A' && c <= 'Z')
3162         {
3163             ;
3164         }
3165         else
3166         {   lastc = lastc.init;
3167             continue;
3168         }
3169         if (b == 0)
3170         {
3171             if (!buffer)
3172                 buffer = new char[4];
3173             buffer[0] = c;
3174             b++;
3175             lastc = dex[c - 'A'];
3176         }
3177         else
3178         {
3179             if (c == 'H' || c == 'W')
3180                 continue;
3181             if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
3182                 lastc = lastc.init;
3183             c = dex[c - 'A'];
3184             if (c != '0' && c != lastc)
3185             {
3186                 buffer[b] = c;
3187                 b++;
3188                 lastc = c;
3189             }
3190         }
3191         if (b == 4)
3192             goto Lret;
3193     }
3194     if (b == 0)
3195         buffer = null;
3196     else
3197         buffer[b .. 4] = '0';
3198   Lret:
3199     return buffer;
3200 }
3201
3202 unittest
3203 {   char[4] buffer;
3204
3205     assert(soundex(null) == null);
3206     assert(soundex("") == null);
3207     assert(soundex("0123^&^^**&^") == null);
3208     assert(soundex("Euler") == "E460");
3209     assert(soundex(" Ellery ") == "E460");
3210     assert(soundex("Gauss") == "G200");
3211     assert(soundex("Ghosh") == "G200");
3212     assert(soundex("Hilbert") == "H416");
3213     assert(soundex("Heilbronn") == "H416");
3214     assert(soundex("Knuth") == "K530");
3215     assert(soundex("Kant", buffer) == "K530");
3216     assert(soundex("Lloyd") == "L300");
3217     assert(soundex("Ladd") == "L300");
3218     assert(soundex("Lukasiewicz", buffer) == "L222");
3219     assert(soundex("Lissajous") == "L222");
3220     assert(soundex("Robert") == "R163");
3221     assert(soundex("Rupert") == "R163");
3222     assert(soundex("Rubin") == "R150");
3223     assert(soundex("Washington") == "W252");
3224     assert(soundex("Lee") == "L000");
3225     assert(soundex("Gutierrez") == "G362");
3226     assert(soundex("Pfister") == "P236");
3227     assert(soundex("Jackson") == "J250");
3228     assert(soundex("Tymczak") == "T522");
3229     assert(soundex("Ashcraft") == "A261");
3230
3231     assert(soundex("Woo") == "W000");
3232     assert(soundex("Pilgrim") == "P426");
3233     assert(soundex("Flingjingwaller") == "F452");
3234     assert(soundex("PEARSE") == "P620");
3235     assert(soundex("PIERCE") == "P620");
3236     assert(soundex("Price") == "P620");
3237     assert(soundex("CATHY") == "C300");
3238     assert(soundex("KATHY") == "K300");
3239     assert(soundex("Jones") == "J520");
3240     assert(soundex("johnsons") == "J525");
3241     assert(soundex("Hardin") == "H635");
3242     assert(soundex("Martinez") == "M635");
3243 }
3244
3245
3246 /***************************************************
3247  * Construct an associative array consisting of all
3248  * abbreviations that uniquely map to the strings in values.
3249  *
3250  * This is useful in cases where the user is expected to type
3251  * in one of a known set of strings, and the program will helpfully
3252  * autocomplete the string once sufficient characters have been
3253  * entered that uniquely identify it.
3254  * Example:
3255  * ---
3256  * import std.stdio;
3257  * import std.string;
3258  *
3259  * void main()
3260  * {
3261  *    static string[] list = [ "food", "foxy" ];
3262  *
3263  *    auto abbrevs = std.string.abbrev(list);
3264  *
3265  *    foreach (key, value; abbrevs)
3266  *    {
3267  *       writefln("%s => %s", key, value);
3268  *    }
3269  * }
3270  * ---
3271  * produces the output:
3272  * <pre>
3273  * fox =&gt; foxy
3274  * food =&gt; food
3275  * foxy =&gt; foxy
3276  * foo =&gt; food
3277  * </pre>
3278  */
3279
3280 string[string] abbrev(string[] values)
3281 {
3282     string[string] result;
3283
3284     // Make a copy when sorting so we follow COW principles.
3285     values = values.dup.sort;
3286
3287     size_t values_length = values.length;
3288     size_t lasti = values_length;
3289     size_t nexti;
3290
3291     string nv;
3292     string lv;
3293
3294     for (size_t i = 0; i < values_length; i = nexti)
3295     {   string value = values[i];
3296
3297     // Skip dups
3298     for (nexti = i + 1; nexti < values_length; nexti++)
3299     {   nv = values[nexti];
3300         if (value != values[nexti])
3301         break;
3302     }
3303
3304     for (size_t j = 0; j < value.length; j += std.utf.stride(value, j))
3305     {   string v = value[0 .. j];
3306
3307         if ((nexti == values_length || j > nv.length || v != nv[0 .. j]) &&
3308         (lasti == values_length || j > lv.length || v != lv[0 .. j]))
3309         result[v] = value;
3310     }
3311     result[value] = value;
3312     lasti = i;
3313     lv = value;
3314     }
3315
3316     return result;
3317 }
3318
3319 unittest
3320 {
3321     debug(string) printf("string.abbrev.unittest\n");
3322
3323     string[] values;
3324     values ~= "hello";
3325     values ~= "hello";
3326     values ~= "he";
3327
3328     string[string] r;
3329
3330     r = abbrev(values);
3331     auto keys = r.keys.dup;
3332     keys.sort;
3333
3334     assert(keys.length == 4);
3335     assert(keys[0] == "he");
3336     assert(keys[1] == "hel");
3337     assert(keys[2] == "hell");
3338     assert(keys[3] == "hello");
3339
3340     assert(r[keys[0]] == "he");
3341     assert(r[keys[1]] == "hello");
3342     assert(r[keys[2]] == "hello");
3343     assert(r[keys[3]] == "hello");
3344 }
3345
3346
3347 /******************************************
3348  * Compute column number after string if string starts in the
3349  * leftmost column, which is numbered starting from 0.
3350  */
3351
3352 size_t column(S)(S str, size_t tabsize = 8) if (isSomeString!S)
3353 {
3354     size_t column;
3355
3356     foreach (dchar c; str)
3357     {
3358         switch (c)
3359         {
3360         case '\t':
3361             column = (column + tabsize) / tabsize * tabsize;
3362             break;
3363
3364         case '\r':
3365         case '\n':
3366         case PS:
3367         case LS:
3368             column = 0;
3369             break;
3370
3371         default:
3372             column++;
3373             break;
3374         }
3375     }
3376     return column;
3377 }
3378
3379 unittest
3380 {
3381     debug(string) printf("string.column.unittest\n");
3382
3383     assert(column(cast(string) null) == 0);
3384     assert(column("") == 0);
3385     assert(column("\t") == 8);
3386     assert(column("abc\t") == 8);
3387     assert(column("12345678\t") == 16);
3388 }
3389
3390 /******************************************
3391  * Wrap text into a paragraph.
3392  *
3393  * The input text string s is formed into a paragraph
3394  * by breaking it up into a sequence of lines, delineated
3395  * by \n, such that the number of columns is not exceeded
3396  * on each line.
3397  * The last line is terminated with a \n.
3398  * Params:
3399  *  s = text string to be wrapped
3400  *  columns = maximum number of _columns in the paragraph
3401  *  firstindent = string used to _indent first line of the paragraph
3402  *  indent = string to use to _indent following lines of the paragraph
3403  *  tabsize = column spacing of tabs
3404  * Returns:
3405  *  The resulting paragraph.
3406  */
3407
3408 S wrap(S)(S s, size_t columns = 80, S firstindent = null,
3409         S indent = null, size_t tabsize = 8) if (isSomeString!S)
3410 {
3411     typeof(s.dup) result;
3412     int spaces;
3413     bool inword;
3414     bool first = true;
3415     size_t wordstart;
3416
3417     result.length = firstindent.length + s.length;
3418     result.length = firstindent.length;
3419     result[] = firstindent[];
3420     auto col = column(result.idup, tabsize);
3421     foreach (size_t i, dchar c; s)
3422     {
3423     if (iswhite(c))
3424     {
3425         if (inword)
3426         {
3427         if (first)
3428         {
3429             ;
3430         }
3431         else if (col + 1 + (i - wordstart) > columns)
3432         {
3433             result ~= '\n';
3434             result ~= indent;
3435             col = column(indent, tabsize);
3436         }
3437         else
3438         {   result ~= ' ';
3439             col += 1;
3440         }
3441         result ~= s[wordstart .. i];
3442         col += i - wordstart;
3443         inword = false;
3444         first = false;
3445         }
3446     }
3447     else
3448     {
3449         if (!inword)
3450         {
3451         wordstart = i;
3452         inword = true;
3453         }
3454     }
3455     }
3456
3457     if (inword)
3458     {
3459     if (col + 1 + (s.length - wordstart) >= columns)
3460     {
3461         result ~= '\n';
3462         result ~= indent;
3463     }
3464     else if (result.length != firstindent.length)
3465         result ~= ' ';
3466     result ~= s[wordstart .. s.length];
3467     }
3468     result ~= '\n';
3469
3470     return assumeUnique(result);
3471 }
3472
3473 unittest
3474 {
3475     debug(string) printf("string.wrap.unittest\n");
3476
3477     assert(wrap(cast(string) null) == "\n");
3478     assert(wrap(" a b   df ") == "a b df\n");
3479     //writefln("'%s'", wrap(" a b   df ",3));
3480     assert(wrap(" a b   df ", 3) == "a b\ndf\n");
3481     assert(wrap(" a bc   df ", 3) == "a\nbc\ndf\n");
3482     //writefln("'%s'", wrap(" abcd   df ",3));
3483     assert(wrap(" abcd   df ", 3) == "abcd\ndf\n");
3484     assert(wrap("x") == "x\n");
3485     assert(wrap("u u") == "u u\n");
3486 }
3487
3488 // For backwards compatibility
3489
3490 deprecated size_t find(in char[] s, dchar c)
3491 {
3492     return indexOf(s, c, CaseSensitive.yes);
3493 }
3494
3495 deprecated size_t find(in char[] str, in char[] sub)
3496 {
3497     return indexOf(str, sub, CaseSensitive.yes);
3498 }
3499
3500 deprecated unittest
3501 {
3502     string a = "abc";
3503     string b = "bc";
3504     assert(find(a, b) == 1);
3505 }
3506
3507 deprecated size_t ifind(in char[] s, dchar c)
3508 {
3509     return indexOf(s, c, CaseSensitive.no);
3510 }
3511
3512 deprecated size_t rfind(in char[] s, dchar c)
3513 {
3514     return lastIndexOf(s, c, CaseSensitive.yes);
3515 }
3516
3517 deprecated size_t irfind(in char[] s, dchar c)
3518 {
3519     return lastIndexOf(s, c, CaseSensitive.no);
3520 }
3521
3522 deprecated size_t ifind(in char[] s, in char[] c)
3523 {
3524     return indexOf(s, c, CaseSensitive.no);
3525 }
3526
3527 deprecated size_t rfind(in char[] s, in char[] c)
3528 {
3529     return lastIndexOf(s, c, CaseSensitive.yes);
3530 }
3531
3532 deprecated size_t irfind(in char[] s, in char[] c)
3533 {
3534     return lastIndexOf(s, c, CaseSensitive.no);
3535 }
Note: See TracBrowser for help on using the browser.