1 module dython; 2 3 import std.algorithm; 4 import std.array; 5 import std.range; 6 import std.uni; 7 8 pure @safe: 9 10 pragma(inline, true) { 11 /// 12 S dythonize(S: const(char)[ ] = string)(const(char)[ ] source) { 13 return parser(source).parse(); 14 } 15 16 /// 17 string dythonizeFile(string fileName)() { 18 return dythonizeFile!(string, fileName); 19 } 20 21 /// ditto 22 S dythonizeFile(S: const(char)[ ], string fileName)() { 23 auto p = parser(import(fileName)); 24 p.sink ~= "#line 1 \"" ~ fileName ~ "\"\n"; 25 return p.parse(); 26 } 27 } 28 29 private: 30 31 pragma(inline, true) 32 bool isIdent(dchar c) nothrow @nogc { 33 return c == '_' || isAlphaNum(c); 34 } 35 36 enum { 37 //Emit static assertions (instead of, for example, throwing exceptions while parsing) so that 38 //a compiler reports an error in the user code and not in the library. 39 indentationError = q{static assert(false, "Indentation error");}, 40 mixedIndentationError = 41 q{static assert(false, "Indentation error: tabs and spaces can't be mixed together");}, 42 } 43 44 enum Emit: ubyte { 45 closingBraceOnly, 46 braceOnly, 47 everything, 48 } 49 50 struct BracedBlock { 51 nothrow pure @safe: 52 53 int[ ] levels; 54 int lTop; 55 int parenCount;//Brackets (but not braces) are included too. 56 int curLineIndentation; 57 58 pragma(inline, true) 59 @property ref int level() @nogc { 60 return levels[lTop]; 61 } 62 63 void pushLevel(int value) 64 in { 65 assert(value > level, "Indentation does not increase"); 66 } 67 body { 68 if (++lTop == levels.length) 69 levels ~= value; 70 else 71 level = value; 72 } 73 } 74 75 struct Parser { 76 pure @safe: 77 78 const(char)[ ] s, checkpoint; 79 Appender!(char[ ]) sink; 80 Appender!(char[ ]) stringMarker;//for q"EOF ... EOF". 81 char indentChar = '\0';//' ' or '\t'. 82 bool bol = true; 83 Emit now, delayed; 84 int qBraceCount; 85 int bTop = -1; 86 BracedBlock[ ] blocks; 87 88 //Utility functions: 89 pragma(inline, true) 90 @property ref BracedBlock block() nothrow @nogc { 91 return blocks[bTop]; 92 } 93 94 void skip1() @nogc { 95 if (!s.empty) 96 s.popFront(); 97 } 98 99 void skipAscii1() nothrow @nogc { 100 if (!s.empty) 101 s = s[1 .. $]; 102 } 103 104 void pushBlock() nothrow { 105 if (++bTop == blocks.length) 106 blocks ~= BracedBlock(uninitializedArray!(int[ ])(16)); 107 else 108 block.lTop = block.parenCount = block.curLineIndentation = 0; 109 block.level = -1; 110 } 111 112 void skipTillEol() { 113 s = s.find!(c => c.among!('\n', '\r', '\u2028', '\u2029')); 114 } 115 116 //Grammar functions: 117 void processNewline() nothrow { 118 if (bol || block.parenCount || qBraceCount) 119 return; 120 sink ~= checkpoint[0 .. $ - s.length - 1]; 121 checkpoint = checkpoint[$ - s.length - 1 .. $]; 122 delayed = now; 123 now = Emit.closingBraceOnly; 124 bol = true; 125 } 126 127 void processSignificantWhitespace() { 128 if (block.parenCount || qBraceCount) 129 return; 130 else if (s.empty) { 131 block.curLineIndentation = 0; 132 return; 133 } 134 dchar c = s.front; 135 if (!c.among!(' ', '\t')) 136 block.curLineIndentation = 0; 137 else { 138 if (indentChar) { 139 if (c != indentChar) 140 sink ~= mixedIndentationError; 141 } else 142 indentChar = cast(char)c;//Remember the first seen whitespace character in the file. 143 const temp = s; 144 s = s[1 .. $].find!`a != b`(indentChar); 145 if (!s.empty && s.front.among!(' ', '\t')) 146 sink ~= mixedIndentationError; 147 block.curLineIndentation = cast(int)(temp.length - s.length); 148 } 149 } 150 151 void processFirstWord() nothrow { 152 if (!bol || block.parenCount || qBraceCount) 153 return; 154 if (block.level < 0) { 155 //Initialize the level with that of the first line in the block. 156 block.level = block.curLineIndentation; 157 assert(delayed == Emit.closingBraceOnly); 158 } 159 if (block.curLineIndentation == block.level) { 160 if (delayed == Emit.everything) 161 sink ~= ';';//The most common case. 162 } else if (block.curLineIndentation > block.level) { 163 if (delayed != Emit.closingBraceOnly) { 164 sink ~= '{'; 165 block.pushLevel(block.curLineIndentation); 166 } 167 } else { 168 if (delayed == Emit.everything) 169 sink ~= ';'; 170 auto found = ( 171 block.levels[0 .. block.lTop] 172 .retro() 173 .find!(level => level <= block.curLineIndentation) 174 ); 175 if (found.empty) { 176 //Dedented past the zeroth level, silently allow. 177 sink ~= repeat('}', block.lTop); 178 block.lTop = 0; 179 } else { 180 if (found.front < block.curLineIndentation) 181 sink ~= indentationError; 182 sink ~= repeat('}', block.lTop - (found.length - 1)); 183 block.lTop = cast(int)found.length - 1; 184 } 185 } 186 delayed = Emit.closingBraceOnly; 187 } 188 189 void processSeparator() nothrow { 190 processFirstWord(); 191 now = Emit.closingBraceOnly; 192 bol = false; 193 } 194 195 void processTerminator() nothrow { 196 processFirstWord(); 197 now = Emit.braceOnly; 198 bol = false; 199 } 200 201 void processBackslash() nothrow { 202 if (qBraceCount) 203 return; 204 sink ~= checkpoint[0 .. $ - s.length - 1]; 205 checkpoint = s; 206 now = Emit.closingBraceOnly; 207 bol = false; 208 } 209 210 void processParen() nothrow { 211 if (qBraceCount) 212 return; 213 processFirstWord(); 214 block.parenCount++; 215 } 216 217 void processCloseParen() nothrow @nogc { 218 if (qBraceCount) 219 return; 220 if (block.parenCount) 221 block.parenCount--; 222 now = Emit.everything; 223 bol = false; 224 } 225 226 void processBrace() nothrow { 227 if (qBraceCount) 228 qBraceCount++; 229 else { 230 processFirstWord(); 231 pushBlock(); 232 now = Emit.closingBraceOnly; 233 bol = true; 234 } 235 } 236 237 void processCloseBrace() nothrow { 238 if (qBraceCount) 239 qBraceCount--; 240 else { 241 if (delayed == Emit.everything) 242 sink ~= ';'; 243 sink ~= checkpoint[0 .. $ - s.length - 1]; 244 checkpoint = checkpoint[$ - s.length - 1 .. $]; 245 if (now == Emit.everything) 246 sink ~= ';'; 247 if (bTop) { 248 sink ~= repeat('}', block.lTop);//Close everything in a block. 249 bTop--; 250 } 251 now = Emit.braceOnly; 252 bol = false; 253 } 254 } 255 256 void processSomeString(alias handler)() { 257 processFirstWord(); 258 handler(); 259 now = Emit.everything; 260 bol = false; 261 } 262 263 void skipString(char delimiter)() { 264 while (!s.empty) { 265 const dchar c = s.front; 266 s.popFront(); 267 if (c == delimiter) 268 return; 269 if (c == '\\') 270 skip1(); 271 } 272 } 273 274 //r"\d+:\d+" 275 void skipRawString(char delimiter)() { 276 s = s.find(delimiter); 277 skipAscii1(); 278 } 279 280 void skipDelimitedString() { 281 if (s.empty) 282 return; 283 const dchar delim = s.front; 284 dchar closeDelim; 285 s.popFront(); 286 switch (delim) { 287 case '(': closeDelim = ')'; break; 288 case '[': closeDelim = ']'; break; 289 case '{': closeDelim = '}'; break; 290 case '<': closeDelim = '>'; break; 291 default: 292 if (isIdent(delim)) { 293 //q"EOF ... EOF" 294 const temp = s; 295 s = s.find!(c => !isIdent(c)); 296 297 stringMarker.clear(); 298 stringMarker.reserve(temp.length - s.length + 2); 299 stringMarker ~= '\n'; 300 stringMarker ~= temp[0 .. $ - s.length]; 301 stringMarker ~= '"'; 302 303 s = s.find(stringMarker.data); 304 if (!s.empty) 305 s = s[stringMarker.data.length .. $]; 306 } else { 307 //q"/just a "test" string/" 308 s = s.find(delim); 309 if (!s.empty) { 310 s.popFront(); 311 skip1();//'"' 312 } 313 } 314 return; 315 } 316 317 //q"(ab(cd)ef)" 318 int depth = 1; 319 while (!s.empty) { 320 const dchar c = s.front; 321 s.popFront(); 322 if (c == delim) 323 depth++; 324 else if (c == closeDelim && !--depth) { 325 skip1();//'"' 326 return; 327 } 328 } 329 } 330 331 //q{a > b} 332 void processTokenString() nothrow { 333 processFirstWord(); 334 qBraceCount++; 335 } 336 337 void processSlash() { 338 if (s.empty) { 339 now = Emit.everything; 340 bol = false; 341 return; 342 } 343 dchar c = s.front; 344 if (c == '/') { 345 processNewline(); 346 skipTillEol(); 347 skip1(); 348 processSignificantWhitespace(); 349 } else if (c == '*') { 350 s = s[1 .. $].find(`*/`); 351 if (!s.empty) 352 s = s[2 .. $]; 353 } else if (c == '+') { 354 int depth = 1; 355 s = s[1 .. $]; 356 while (!s.empty) { 357 c = s.front; 358 s.popFront(); 359 if (c == '+') { 360 if (!s.empty && s.front == '/') { 361 s = s[1 .. $]; 362 if (!--depth) 363 return; 364 } 365 } else if (c == '/') 366 if (!s.empty && s.front == '+') { 367 s = s[1 .. $]; 368 depth++; 369 } 370 } 371 } else { 372 processFirstWord(); 373 now = Emit.everything; 374 bol = false; 375 } 376 } 377 378 //#line 123 "main.dy" 379 void processHash() { 380 skipTillEol(); 381 skip1(); 382 //Does not affect the parser state at all. 383 } 384 385 char[ ] parse() { 386 processSignificantWhitespace(); 387 388 parseLoop: 389 while (!s.empty) { 390 const dchar c = s.front; 391 s.popFront(); 392 if (isIdent(c)) { 393 if (c == 'r') { 394 if (!s.empty && s.front == '"') { 395 s.popFront(); 396 processSomeString!(skipRawString!'"'); 397 continue; 398 } 399 } else if (c == 'q') { 400 if (!s.empty) { 401 const dchar c2 = s.front; 402 if (c2 == '"') { 403 s.popFront(); 404 processSomeString!skipDelimitedString(); 405 continue; 406 } else if (c2 == '{') { 407 s.popFront(); 408 processTokenString(); 409 continue; 410 } 411 } 412 } else if (c == '_' && s.skipOver(`_EOF__`) && (s.empty || !isIdent(s.front))) { 413 checkpoint.length -= s.length;//Trim the source. 414 break parseLoop; 415 } 416 processFirstWord(); 417 s = s.find!(c => !isIdent(c)); 418 now = Emit.everything; 419 bol = false; 420 } else 421 switch (c) { 422 case ' ': case '\t': case '\v': case '\f': 423 break; 424 425 case '\n': case '\r': case '\u2028': case '\u2029': 426 processNewline(); 427 processSignificantWhitespace(); 428 break; 429 430 case ',': case '=': case '>': 431 processSeparator(); 432 break; 433 434 case ':': case ';': 435 processTerminator(); 436 break; 437 438 case '\\': 439 processBackslash(); 440 break; 441 442 case '(': case '[': 443 processParen(); 444 break; 445 446 case ')': case ']': 447 processCloseParen(); 448 break; 449 450 case '/': 451 processSlash(); 452 break; 453 454 case '"': 455 processSomeString!(skipString!'"'); 456 break; 457 458 case '\'': 459 processSomeString!(skipString!'\''); 460 break; 461 462 case '`': 463 processSomeString!(skipRawString!'`'); 464 break; 465 466 case '{': 467 processBrace(); 468 break; 469 470 case '}': 471 processCloseBrace(); 472 break; 473 474 case '#': 475 processHash(); 476 break; 477 478 case '\0': case '\x1A'://Treated as EOF. 479 checkpoint.length -= s.length;//Trim the source. 480 break parseLoop; 481 482 default: 483 processFirstWord(); 484 now = Emit.everything; 485 bol = false; 486 } 487 } 488 if (delayed == Emit.everything) 489 sink ~= ';'; 490 sink ~= checkpoint; 491 if (now == Emit.everything) 492 sink ~= ';'; 493 sink ~= repeat('}', bTop + sum(blocks[0 .. bTop + 1].map!`a.lTop`)); 494 return sink.data; 495 } 496 } 497 498 auto parser(const(char)[ ] source) nothrow { 499 Parser p = { source, }; 500 with (p) { 501 sink.reserve(s.length + (s.length >> 4));//Reserve 1/16 of source for syntactic garbage. 502 blocks = minimallyInitializedArray!(BracedBlock[ ])(4); 503 foreach (ref b; blocks) 504 b.levels = uninitializedArray!(int[ ])(16); 505 pushBlock(); 506 checkpoint = s; 507 } 508 return p; 509 }