| 1 |
// Demangles a mangled D identifier |
|---|
| 2 |
// by James Dunne |
|---|
| 3 |
// Jan. 5 2005 |
|---|
| 4 |
|
|---|
| 5 |
module demangle; |
|---|
| 6 |
|
|---|
| 7 |
import std.ctype; |
|---|
| 8 |
import std.string; |
|---|
| 9 |
|
|---|
| 10 |
// extracts a series of dot-separated identifiers: |
|---|
| 11 |
char[] extractidentifiers(char[] id, inout int i) { |
|---|
| 12 |
char[] s; |
|---|
| 13 |
bool first = true; |
|---|
| 14 |
|
|---|
| 15 |
for (;;) { |
|---|
| 16 |
int num = i; |
|---|
| 17 |
|
|---|
| 18 |
// Parse numeric length: |
|---|
| 19 |
while (i < id.length) { |
|---|
| 20 |
if (!isdigit(id[i])) break; |
|---|
| 21 |
++i; |
|---|
| 22 |
} |
|---|
| 23 |
if (num == i) break; |
|---|
| 24 |
|
|---|
| 25 |
// Get the length as an integer: |
|---|
| 26 |
int len = atoi(id[num .. i]); |
|---|
| 27 |
// Extract the identifier |
|---|
| 28 |
if (!first) s ~= "."; |
|---|
| 29 |
s ~= id[i .. i+len]; |
|---|
| 30 |
i += len; |
|---|
| 31 |
|
|---|
| 32 |
// Dot-separate after this: |
|---|
| 33 |
first = false; |
|---|
| 34 |
} |
|---|
| 35 |
|
|---|
| 36 |
// Return the identifier string: |
|---|
| 37 |
return s; |
|---|
| 38 |
} |
|---|
| 39 |
|
|---|
| 40 |
// extracts a single definition (like a parameter) |
|---|
| 41 |
char[] extracttypeinfo(char[] id, inout int i) { |
|---|
| 42 |
if (i >= id.length) return null; |
|---|
| 43 |
// Extract the type info: |
|---|
| 44 |
switch (id[i]) { |
|---|
| 45 |
// array, static array, dynamic array: |
|---|
| 46 |
case 'A', 'G', 'H': ++i; return extracttypeinfo(id, i) ~ "[]"; |
|---|
| 47 |
// pointer: |
|---|
| 48 |
case 'P': ++i; return extracttypeinfo(id, i) ~ "*"; |
|---|
| 49 |
// reference: |
|---|
| 50 |
case 'R': ++i; return extracttypeinfo(id, i) ~ "&"; |
|---|
| 51 |
// return value: |
|---|
| 52 |
case 'Z': ++i; return extracttypeinfo(id, i); |
|---|
| 53 |
// out: |
|---|
| 54 |
case 'J': ++i; return "out " ~ extracttypeinfo(id, i); |
|---|
| 55 |
// inout: |
|---|
| 56 |
case 'K': ++i; return "inout " ~ extracttypeinfo(id, i); |
|---|
| 57 |
|
|---|
| 58 |
// enum: |
|---|
| 59 |
case 'E': ++i; return extractidentifiers(id, i); |
|---|
| 60 |
// typedef: |
|---|
| 61 |
case 'T': ++i; return extractidentifiers(id, i); |
|---|
| 62 |
// delegate: |
|---|
| 63 |
case 'D': ++i; return extractidentifiers(id, i); |
|---|
| 64 |
// class: |
|---|
| 65 |
case 'C': ++i; return extractidentifiers(id, i); |
|---|
| 66 |
// struct: |
|---|
| 67 |
case 'S': ++i; return extractidentifiers(id, i); |
|---|
| 68 |
// identifier: |
|---|
| 69 |
case 'I': ++i; return extractidentifiers(id, i); |
|---|
| 70 |
|
|---|
| 71 |
// basic types: |
|---|
| 72 |
case 'n': ++i; return "none"; // ever used? |
|---|
| 73 |
case 'v': ++i; return "void"; |
|---|
| 74 |
case 'g': ++i; return "byte"; |
|---|
| 75 |
case 'h': ++i; return "ubyte"; |
|---|
| 76 |
case 's': ++i; return "short"; |
|---|
| 77 |
case 't': ++i; return "ushort"; |
|---|
| 78 |
case 'i': ++i; return "int"; |
|---|
| 79 |
case 'k': ++i; return "uint"; |
|---|
| 80 |
case 'l': ++i; return "long"; |
|---|
| 81 |
case 'm': ++i; return "ulong"; |
|---|
| 82 |
case 'f': ++i; return "float"; |
|---|
| 83 |
case 'd': ++i; return "double"; |
|---|
| 84 |
case 'e': ++i; return "real"; |
|---|
| 85 |
|
|---|
| 86 |
// imaginary and complex: |
|---|
| 87 |
case 'o': ++i; return "ifloat"; |
|---|
| 88 |
case 'p': ++i; return "idouble"; |
|---|
| 89 |
case 'j': ++i; return "ireal"; |
|---|
| 90 |
case 'q': ++i; return "cfloat"; |
|---|
| 91 |
case 'r': ++i; return "cdouble"; |
|---|
| 92 |
case 'c': ++i; return "creal"; |
|---|
| 93 |
|
|---|
| 94 |
// other types: |
|---|
| 95 |
case 'b': ++i; return "bit"; |
|---|
| 96 |
case 'a': ++i; return "char"; |
|---|
| 97 |
case 'u': ++i; return "wchar"; |
|---|
| 98 |
case 'w': ++i; return "dchar"; |
|---|
| 99 |
|
|---|
| 100 |
// typeinfo, error, instance: |
|---|
| 101 |
case '@': ++i; return extractidentifiers(id, i); // BUG: is this right? |
|---|
| 102 |
|
|---|
| 103 |
default: return "unknown"; |
|---|
| 104 |
} |
|---|
| 105 |
} |
|---|
| 106 |
|
|---|
| 107 |
// Returns true if it's an identifier (should be modified to return the string) |
|---|
| 108 |
bool demangle(char[] id) { |
|---|
| 109 |
char[] name; |
|---|
| 110 |
int i; |
|---|
| 111 |
|
|---|
| 112 |
// D linkage function: |
|---|
| 113 |
if (id[0] == '_') { |
|---|
| 114 |
if (id[1] == 'D') { |
|---|
| 115 |
// is it main? |
|---|
| 116 |
if (id[2 .. 7] == "main") { |
|---|
| 117 |
// aww that's easy: |
|---|
| 118 |
printf("int main(char[][])\n"); |
|---|
| 119 |
return true; |
|---|
| 120 |
} |
|---|
| 121 |
|
|---|
| 122 |
// Parse the collection of identifiers: |
|---|
| 123 |
i = 2; |
|---|
| 124 |
name = extractidentifiers(id, i); |
|---|
| 125 |
// function: |
|---|
| 126 |
if (id[i] == 'F') { |
|---|
| 127 |
char[] params; |
|---|
| 128 |
bool first = true; |
|---|
| 129 |
|
|---|
| 130 |
// extract all the parameters: |
|---|
| 131 |
++i; |
|---|
| 132 |
while (i < id.length) { |
|---|
| 133 |
if (id[i] == 'Z') break; |
|---|
| 134 |
if (!first) params ~= ", "; |
|---|
| 135 |
params ~= extracttypeinfo(id, i); |
|---|
| 136 |
first = false; |
|---|
| 137 |
} |
|---|
| 138 |
|
|---|
| 139 |
// extract the return type: |
|---|
| 140 |
char[] rettype = extracttypeinfo(id, i); |
|---|
| 141 |
|
|---|
| 142 |
// print the function definition: |
|---|
| 143 |
printf("%.*s %.*s(%.*s)\n\n", rettype, name, params); |
|---|
| 144 |
return true; |
|---|
| 145 |
} else { |
|---|
| 146 |
// should always be a function with _D linkage?!?! |
|---|
| 147 |
printf("wtf?!\n\n"); |
|---|
| 148 |
return false; |
|---|
| 149 |
} |
|---|
| 150 |
// Check the type: |
|---|
| 151 |
} else if (id[1 .. 6] == "Class") { |
|---|
| 152 |
i = 7; |
|---|
| 153 |
printf("class %.*s\n", extractidentifiers(id, i)); |
|---|
| 154 |
} else if (id[1 .. 5] == "init") { |
|---|
| 155 |
i = 6; |
|---|
| 156 |
printf("init %.*s\n", extractidentifiers(id, i)); |
|---|
| 157 |
} else if (id[1 .. 5] == "vtbl") { |
|---|
| 158 |
i = 6; |
|---|
| 159 |
printf("vtbl %.*s\n", extractidentifiers(id, i)); |
|---|
| 160 |
} else if (id[1 .. 8] == "modctor") { |
|---|
| 161 |
i = 9; |
|---|
| 162 |
printf("ctor %.*s\n", extractidentifiers(id, i)); |
|---|
| 163 |
} else if (id[1 .. 8] == "moddtor") { |
|---|
| 164 |
i = 9; |
|---|
| 165 |
printf("dtor %.*s\n", extractidentifiers(id, i)); |
|---|
| 166 |
} else if (id[1 .. 11] == "ModuleInfo") { |
|---|
| 167 |
i = 12; |
|---|
| 168 |
printf("module %.*s\n", extractidentifiers(id, i)); |
|---|
| 169 |
} else { |
|---|
| 170 |
return false; |
|---|
| 171 |
} |
|---|
| 172 |
printf("\n"); |
|---|
| 173 |
return true; |
|---|
| 174 |
} |
|---|
| 175 |
|
|---|
| 176 |
return false; |
|---|
| 177 |
} |
|---|
| 178 |
|
|---|
| 179 |
// should be a unittest: |
|---|
| 180 |
int main(char[][] args) { |
|---|
| 181 |
// these were taken from phobos.lib: |
|---|
| 182 |
demangle("_D3std6stream4File5_ctorFT3std1c7windows7windows6HANDLEE8FileModeZC3std6stream4File"); |
|---|
| 183 |
demangle("_Class_3std6socket9UdpSocket"); |
|---|
| 184 |
demangle("_D3std6stream4File6handleFZT3std1c7windows7windows6HANDLE"); |
|---|
| 185 |
demangle("_D3std6stream12BufferedFile6createFAaE8FileModeZv"); |
|---|
| 186 |
demangle("_D3std5math24polyFeAeZe"); |
|---|
| 187 |
demangle("_D3std3uri15decodeComponentFAaZAa"); |
|---|
| 188 |
demangle("_D3std7windows8registry17Reg_CreateKeyExA_FT3std7windows8registry4HKEYAakE6REGSAMPvJT3std7windows8registry4HKEYJkZi"); |
|---|
| 189 |
demangle("_D3std6string5ifindFAawZi"); |
|---|
| 190 |
demangle("_D3std6stream12EndianStream4readFJqZv"); |
|---|
| 191 |
// these were taken from the compiled EXE of this module: |
|---|
| 192 |
demangle("_D8demangle18extractidentifiersFAaKiZAa"); |
|---|
| 193 |
demangle("_D8demangle15extracttypeinfoFAaKiZAa"); |
|---|
| 194 |
return 0; |
|---|
| 195 |
} |
|---|
| 196 |
|
|---|
| 197 |
// |
|---|
| 198 |
// Demangling algorithm: |
|---|
| 199 |
// |
|---|
| 200 |
// 1. check the prefix of the mangled name. |
|---|
| 201 |
// a. if _D then D-linkage function |
|---|
| 202 |
// b. if __Class_ then class definition |
|---|
| 203 |
// c. if __init_ then init-table for class |
|---|
| 204 |
// d. if __vtbl_ then inherited class/interface for class |
|---|
| 205 |
// e. if __modctor_ then constructor for class |
|---|
| 206 |
// f. if __moddtor_ then destructor for class |
|---|
| 207 |
// g. if __ModuleInfo_ then module definition |
|---|
| 208 |
// 2. To parse an identifier: |
|---|
| 209 |
// 1. read digits until non-digit character |
|---|
| 210 |
// 2. convert that collection of digits to integer |
|---|
| 211 |
// 3. use integer as length to extract identifier |
|---|
| 212 |
// 4. suffix identifier with . |
|---|
| 213 |
// 5. repeat steps 1-4 until no initial digit character found. |
|---|
| 214 |
// 3. To parse the parameter list of a function definition: |
|---|
| 215 |
// 1. make sure function starts with '_D', has an identifier, and then an 'F' |
|---|
| 216 |
// 2. check demangling type table function "extracttypeinfo" to appropriate action. |
|---|
| 217 |
// 3. all lower-case letters are basic types |
|---|
| 218 |
// 4. all upper-case letters are complex types (like arrays, structs, classes, etc) |
|---|
| 219 |
// 5. upper-case letters are either followed by lower-case letters (basic types) or |
|---|
| 220 |
// identifiers. |
|---|
| 221 |
// |
|---|