123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355 |
|
/*******************************************************************************
Copyright: Copyright (C) 2008 Kris Bell. All rights reserved.
License: BSD style: $(LICENSE)
version: Aug 2008: Initial release
Authors: Kris
*******************************************************************************/
module tango.text.xml.DocEntity;
private import Util = tango.text.Util;
/******************************************************************************
Convert XML entity patterns to normal characters
---
& => ;
" => "
etc
---
******************************************************************************/
T[] fromEntity (T) (T[] src, T[] dst = null)
{
int delta;
auto s = src.ptr;
auto len = src.length;
// take a peek first to see if there's anything
if ((delta = Util.indexOf (s, '&', len)) < len)
{
// make some room if not enough provided
if (dst.length < src.length)
dst.length = src.length;
auto d = dst.ptr;
// copy segments over, a chunk at a time
do {
d [0 .. delta] = s [0 .. delta];
len -= delta;
s += delta;
d += delta;
// translate entity
auto token = 0;
switch (s[1])
{
case 'a':
if (len > 4 && s[1..5] == "amp;")
*d++ = '&', token = 5;
else
if (len > 5 && s[1..6] == "apos;")
*d++ = '\'', token = 6;
break;
case 'g':
if (len > 3 && s[1..4] == "gt;")
*d++ = '>', token = 4;
break;
case 'l':
if (len > 3 && s[1..4] == "lt;")
*d++ = '<', token = 4;
break;
case 'q':
if (len > 5 && s[1..6] == "quot;")
*d++ = '"', token = 6;
break;
default:
break;
}
if (token is 0)
*d++ = '&', token = 1;
s += token, len -= token;
} while ((delta = Util.indexOf (s, '&', len)) < len);
// copy tail too
d [0 .. len] = s [0 .. len];
return dst [0 .. (d + len) - dst.ptr];
}
return src;
}
/******************************************************************************
Convert XML entity patterns to normal characters
---
& => ;
" => "
etc
---
This variant does not require an interim workspace, and instead
emits directly via the provided delegate
******************************************************************************/
void fromEntity (T) (T[] src, void delegate(T[]) emit)
{
int delta;
auto s = src.ptr;
auto len = src.length;
// take a peek first to see if there's anything
if ((delta = Util.indexOf (s, '&', len)) < len)
{
// copy segments over, a chunk at a time
do {
emit (s [0 .. delta]);
len -= delta;
s += delta;
// translate entity
auto token = 0;
switch (s[1])
{
case 'a':
if (len > 4 && s[1..5] == "amp;")
emit("&"), token = 5;
else
if (len > 5 && s[1..6] == "apos;")
emit("'"), token = 6;
break;
case 'g':
if (len > 3 && s[1..4] == "gt;")
emit(">"), token = 4;
break;
case 'l':
if (len > 3 && s[1..4] == "lt;")
emit("<"), token = 4;
break;
case 'q':
if (len > 5 && s[1..6] == "quot;")
emit("\""), token = 6;
break;
default:
break;
}
if (token is 0)
emit ("&"), token = 1;
s += token, len -= token;
} while ((delta = Util.indexOf (s, '&', len)) < len);
// copy tail too
emit (s [0 .. len]);
}
else
emit (src);
}
/******************************************************************************
Convert reserved chars to entities. For example: " => "
Either a slice of the provided output buffer is returned, or the
original content, depending on whether there were reserved chars
present or not. The output buffer should be sufficiently large to
accomodate the converted output, or it will be allocated from the
heap instead
******************************************************************************/
T[] toEntity(T) (T[] src, T[] dst = null)
{
T[] entity;
auto s = src.ptr;
auto t = s;
auto e = s + src.length;
auto index = 0;
while (s < e)
switch (*s)
{
case '"':
entity = """;
goto common;
case '>':
entity = ">";
goto common;
case '<':
entity = "<";
goto common;
case '&':
entity = "&";
goto common;
case '\'':
entity = "'";
goto common;
common:
auto len = s - t;
if (dst.length <= index + len + entity.length)
dst.length = (dst.length + len + entity.length) + dst.length / 2;
dst [index .. index + len] = t [0 .. len];
index += len;
dst [index .. index + entity.length] = entity;
index += entity.length;
t = ++s;
break;
default:
++s;
break;
}
// did we change anything?
if (index)
{
// copy tail too
auto len = e - t;
if (dst.length <= index + len)
dst.length = index + len;
dst [index .. index + len] = t [0 .. len];
return dst [0 .. index + len];
}
return src;
}
/******************************************************************************
Convert reserved chars to entities. For example: " => "
This variant does not require an interim workspace, and instead
emits directly via the provided delegate
******************************************************************************/
void toEntity(T) (T[] src, void delegate(T[]) emit)
{
T[] entity;
auto s = src.ptr;
auto t = s;
auto e = s + src.length;
while (s < e)
switch (*s)
{
case '"':
entity = """;
goto common;
case '>':
entity = ">";
goto common;
case '<':
entity = "<";
goto common;
case '&':
entity = "&";
goto common;
case '\'':
entity = "'";
goto common;
common:
if (s - t > 0)
emit (t [0 .. s - t]);
emit (entity);
t = ++s;
break;
default:
++s;
break;
}
// did we change anything? Copy tail also
if (entity.length)
emit (t [0 .. e - t]);
else
emit (src);
}
/*******************************************************************************
*******************************************************************************/
debug (DocEntity)
{
import tango.io.Console;
void main()
{
auto s = fromEntity ("&");
assert (s == "&");
s = fromEntity (""");
assert (s == "\"");
s = fromEntity ("'");
assert (s == "'");
s = fromEntity (">");
assert (s == ">");
s = fromEntity ("<");
assert (s == "<");
s = fromEntity ("<&'");
assert (s == "<&'");
s = fromEntity ("*<&'*");
assert (s == "*<&'*");
assert (fromEntity ("abc") == "abc");
assert (fromEntity ("abc&") == "abc&");
assert (fromEntity ("abc<") == "abc<");
assert (fromEntity ("abc>goo") == "abc>goo");
assert (fromEntity ("&") == "&");
assert (fromEntity (""'") == "\"'");
assert (fromEntity ("&q&s") == "&q&s");
auto d = toEntity (">");
assert (d == ">");
d = toEntity ("<");
assert (d == "<");
d = toEntity ("&");
assert (d == "&");
d = toEntity ("'");
assert (d == "'");
d = toEntity ("\"");
assert (d == """);
d = toEntity ("^^>*>*");
assert (d == "^^>*>*");
}
}
|