1 module dython;
2 
3 import std.algorithm;
4 import std.array;
5 import std.range;
6 import std.uni;
7 
8 pure @safe:
9 
10 pragma(inline, true) {
11     ///
12     S dythonize(S: const(char)[ ] = string)(const(char)[ ] source) {
13         return parser(source).parse();
14     }
15 
16     ///
17     string dythonizeFile(string fileName)() {
18         return dythonizeFile!(string, fileName);
19     }
20 
21     /// ditto
22     S dythonizeFile(S: const(char)[ ], string fileName)() {
23         auto p = parser(import(fileName));
24         p.sink ~= "#line 1 \"" ~ fileName ~ "\"\n";
25         return p.parse();
26     }
27 }
28 
29 private:
30 
31 pragma(inline, true)
32 bool isIdent(dchar c) nothrow @nogc {
33     return c == '_' || isAlphaNum(c);
34 }
35 
36 enum {
37     //Emit static assertions (instead of, for example, throwing exceptions while parsing) so that
38     //a compiler reports an error in the user code and not in the library.
39     indentationError = q{static assert(false, "Indentation error");},
40     mixedIndentationError =
41         q{static assert(false, "Indentation error: tabs and spaces can't be mixed together");},
42 }
43 
44 enum Emit: ubyte {
45     closingBraceOnly,
46     braceOnly,
47     everything,
48 }
49 
50 struct BracedBlock {
51     nothrow pure @safe:
52 
53     int[ ] levels;
54     int lTop;
55     int parenCount;//Brackets (but not braces) are included too.
56     int curLineIndentation;
57 
58     pragma(inline, true)
59     @property ref int level() @nogc {
60         return levels[lTop];
61     }
62 
63     void pushLevel(int value)
64     in {
65         assert(value > level, "Indentation does not increase");
66     }
67     body {
68         if (++lTop == levels.length)
69             levels ~= value;
70         else
71             level = value;
72     }
73 }
74 
75 struct Parser {
76     pure @safe:
77 
78     const(char)[ ] s, checkpoint;
79     Appender!(char[ ]) sink;
80     Appender!(char[ ]) stringMarker;//for q"EOF ... EOF".
81     char indentChar = '\0';//' ' or '\t'.
82     bool bol = true;
83     Emit now, delayed;
84     int qBraceCount;
85     int bTop = -1;
86     BracedBlock[ ] blocks;
87 
88     //Utility functions:
89     pragma(inline, true)
90     @property ref BracedBlock block() nothrow @nogc {
91         return blocks[bTop];
92     }
93 
94     void skip1() @nogc {
95         if (!s.empty)
96             s.popFront();
97     }
98 
99     void skipAscii1() nothrow @nogc {
100         if (!s.empty)
101             s = s[1 .. $];
102     }
103 
104     void pushBlock() nothrow {
105         if (++bTop == blocks.length)
106             blocks ~= BracedBlock(uninitializedArray!(int[ ])(16));
107         else
108             block.lTop = block.parenCount = block.curLineIndentation = 0;
109         block.level = -1;
110     }
111 
112     void skipTillEol() {
113         s = s.find!(c => c.among!('\n', '\r', '\u2028', '\u2029'));
114     }
115 
116     //Grammar functions:
117     void processNewline() nothrow {
118         if (bol || block.parenCount || qBraceCount)
119             return;
120         sink ~= checkpoint[0 .. $ - s.length - 1];
121         checkpoint = checkpoint[$ - s.length - 1 .. $];
122         delayed = now;
123         now = Emit.closingBraceOnly;
124         bol = true;
125     }
126 
127     void processSignificantWhitespace() {
128         if (block.parenCount || qBraceCount)
129             return;
130         else if (s.empty) {
131             block.curLineIndentation = 0;
132             return;
133         }
134         dchar c = s.front;
135         if (!c.among!(' ', '\t'))
136             block.curLineIndentation = 0;
137         else {
138             if (indentChar) {
139                 if (c != indentChar)
140                     sink ~= mixedIndentationError;
141             } else
142                 indentChar = cast(char)c;//Remember the first seen whitespace character in the file.
143             const temp = s;
144             s = s[1 .. $].find!`a != b`(indentChar);
145             if (!s.empty && s.front.among!(' ', '\t'))
146                 sink ~= mixedIndentationError;
147             block.curLineIndentation = cast(int)(temp.length - s.length);
148         }
149     }
150 
151     void processFirstWord() nothrow {
152         if (!bol || block.parenCount || qBraceCount)
153             return;
154         if (block.level < 0) {
155             //Initialize the level with that of the first line in the block.
156             block.level = block.curLineIndentation;
157             assert(delayed == Emit.closingBraceOnly);
158         }
159         if (block.curLineIndentation == block.level) {
160             if (delayed == Emit.everything)
161                 sink ~= ';';//The most common case.
162         } else if (block.curLineIndentation > block.level) {
163             if (delayed != Emit.closingBraceOnly) {
164                 sink ~= '{';
165                 block.pushLevel(block.curLineIndentation);
166             }
167         } else {
168             if (delayed == Emit.everything)
169                 sink ~= ';';
170             auto found = (
171                 block.levels[0 .. block.lTop]
172                 .retro()
173                 .find!(level => level <= block.curLineIndentation)
174             );
175             if (found.empty) {
176                 //Dedented past the zeroth level, silently allow.
177                 sink ~= repeat('}', block.lTop);
178                 block.lTop = 0;
179             } else {
180                 if (found.front < block.curLineIndentation)
181                     sink ~= indentationError;
182                 sink ~= repeat('}', block.lTop - (found.length - 1));
183                 block.lTop = cast(int)found.length - 1;
184             }
185         }
186         delayed = Emit.closingBraceOnly;
187     }
188 
189     void processSeparator() nothrow {
190         processFirstWord();
191         now = Emit.closingBraceOnly;
192         bol = false;
193     }
194 
195     void processTerminator() nothrow {
196         processFirstWord();
197         now = Emit.braceOnly;
198         bol = false;
199     }
200 
201     void processBackslash() nothrow {
202         if (qBraceCount)
203             return;
204         sink ~= checkpoint[0 .. $ - s.length - 1];
205         checkpoint = s;
206         now = Emit.closingBraceOnly;
207         bol = false;
208     }
209 
210     void processParen() nothrow {
211         if (qBraceCount)
212             return;
213         processFirstWord();
214         block.parenCount++;
215     }
216 
217     void processCloseParen() nothrow @nogc {
218         if (qBraceCount)
219             return;
220         if (block.parenCount)
221             block.parenCount--;
222         now = Emit.everything;
223         bol = false;
224     }
225 
226     void processBrace() nothrow {
227         if (qBraceCount)
228             qBraceCount++;
229         else {
230             processFirstWord();
231             pushBlock();
232             now = Emit.closingBraceOnly;
233             bol = true;
234         }
235     }
236 
237     void processCloseBrace() nothrow {
238         if (qBraceCount)
239             qBraceCount--;
240         else {
241             if (delayed == Emit.everything)
242                 sink ~= ';';
243             sink ~= checkpoint[0 .. $ - s.length - 1];
244             checkpoint = checkpoint[$ - s.length - 1 .. $];
245             if (now == Emit.everything)
246                 sink ~= ';';
247             if (bTop) {
248                 sink ~= repeat('}', block.lTop);//Close everything in a block.
249                 bTop--;
250             }
251             now = Emit.braceOnly;
252             bol = false;
253         }
254     }
255 
256     void processSomeString(alias handler)() {
257         processFirstWord();
258         handler();
259         now = Emit.everything;
260         bol = false;
261     }
262 
263     void skipString(char delimiter)() {
264         while (!s.empty) {
265             const dchar c = s.front;
266             s.popFront();
267             if (c == delimiter)
268                 return;
269             if (c == '\\')
270                 skip1();
271         }
272     }
273 
274     //r"\d+:\d+"
275     void skipRawString(char delimiter)() {
276         s = s.find(delimiter);
277         skipAscii1();
278     }
279 
280     void skipDelimitedString() {
281         if (s.empty)
282             return;
283         const dchar delim = s.front;
284         dchar closeDelim;
285         s.popFront();
286         switch (delim) {
287             case '(': closeDelim = ')'; break;
288             case '[': closeDelim = ']'; break;
289             case '{': closeDelim = '}'; break;
290             case '<': closeDelim = '>'; break;
291             default:
292                 if (isIdent(delim)) {
293                     //q"EOF ... EOF"
294                     const temp = s;
295                     s = s.find!(c => !isIdent(c));
296 
297                     stringMarker.clear();
298                     stringMarker.reserve(temp.length - s.length + 2);
299                     stringMarker ~= '\n';
300                     stringMarker ~= temp[0 .. $ - s.length];
301                     stringMarker ~= '"';
302 
303                     s = s.find(stringMarker.data);
304                     if (!s.empty)
305                         s = s[stringMarker.data.length .. $];
306                 } else {
307                     //q"/just a "test" string/"
308                     s = s.find(delim);
309                     if (!s.empty) {
310                         s.popFront();
311                         skip1();//'"'
312                     }
313                 }
314                 return;
315         }
316 
317         //q"(ab(cd)ef)"
318         int depth = 1;
319         while (!s.empty) {
320             const dchar c = s.front;
321             s.popFront();
322             if (c == delim)
323                 depth++;
324             else if (c == closeDelim && !--depth) {
325                 skip1();//'"'
326                 return;
327             }
328         }
329     }
330 
331     //q{a > b}
332     void processTokenString() nothrow {
333         processFirstWord();
334         qBraceCount++;
335     }
336 
337     void processSlash() {
338         if (s.empty) {
339             now = Emit.everything;
340             bol = false;
341             return;
342         }
343         dchar c = s.front;
344         if (c == '/') {
345             processNewline();
346             skipTillEol();
347             skip1();
348             processSignificantWhitespace();
349         } else if (c == '*') {
350             s = s[1 .. $].find(`*/`);
351             if (!s.empty)
352                 s = s[2 .. $];
353         } else if (c == '+') {
354             int depth = 1;
355             s = s[1 .. $];
356             while (!s.empty) {
357                 c = s.front;
358                 s.popFront();
359                 if (c == '+') {
360                     if (!s.empty && s.front == '/') {
361                         s = s[1 .. $];
362                         if (!--depth)
363                             return;
364                     }
365                 } else if (c == '/')
366                     if (!s.empty && s.front == '+') {
367                         s = s[1 .. $];
368                         depth++;
369                     }
370             }
371         } else {
372             processFirstWord();
373             now = Emit.everything;
374             bol = false;
375         }
376     }
377 
378     //#line 123 "main.dy"
379     void processHash() {
380         skipTillEol();
381         skip1();
382         //Does not affect the parser state at all.
383     }
384 
385     char[ ] parse() {
386         processSignificantWhitespace();
387 
388         parseLoop:
389         while (!s.empty) {
390             const dchar c = s.front;
391             s.popFront();
392             if (isIdent(c)) {
393                 if (c == 'r') {
394                     if (!s.empty && s.front == '"') {
395                         s.popFront();
396                         processSomeString!(skipRawString!'"');
397                         continue;
398                     }
399                 } else if (c == 'q') {
400                     if (!s.empty) {
401                         const dchar c2 = s.front;
402                         if (c2 == '"') {
403                             s.popFront();
404                             processSomeString!skipDelimitedString();
405                             continue;
406                         } else if (c2 == '{') {
407                             s.popFront();
408                             processTokenString();
409                             continue;
410                         }
411                     }
412                 } else if (c == '_' && s.skipOver(`_EOF__`) && (s.empty || !isIdent(s.front))) {
413                     checkpoint.length -= s.length;//Trim the source.
414                     break parseLoop;
415                 }
416                 processFirstWord();
417                 s = s.find!(c => !isIdent(c));
418                 now = Emit.everything;
419                 bol = false;
420             } else
421                 switch (c) {
422                     case ' ': case '\t': case '\v': case '\f':
423                         break;
424 
425                     case '\n': case '\r': case '\u2028': case '\u2029':
426                         processNewline();
427                         processSignificantWhitespace();
428                         break;
429 
430                     case ',': case '=': case '>':
431                         processSeparator();
432                         break;
433 
434                     case ':': case ';':
435                         processTerminator();
436                         break;
437 
438                     case '\\':
439                         processBackslash();
440                         break;
441 
442                     case '(': case '[':
443                         processParen();
444                         break;
445 
446                     case ')': case ']':
447                         processCloseParen();
448                         break;
449 
450                     case '/':
451                         processSlash();
452                         break;
453 
454                     case '"':
455                         processSomeString!(skipString!'"');
456                         break;
457 
458                     case '\'':
459                         processSomeString!(skipString!'\'');
460                         break;
461 
462                     case '`':
463                         processSomeString!(skipRawString!'`');
464                         break;
465 
466                     case '{':
467                         processBrace();
468                         break;
469 
470                     case '}':
471                         processCloseBrace();
472                         break;
473 
474                     case '#':
475                         processHash();
476                         break;
477 
478                     case '\0': case '\x1A'://Treated as EOF.
479                         checkpoint.length -= s.length;//Trim the source.
480                         break parseLoop;
481 
482                     default:
483                         processFirstWord();
484                         now = Emit.everything;
485                         bol = false;
486                 }
487         }
488         if (delayed == Emit.everything)
489             sink ~= ';';
490         sink ~= checkpoint;
491         if (now == Emit.everything)
492             sink ~= ';';
493         sink ~= repeat('}', bTop + sum(blocks[0 .. bTop + 1].map!`a.lTop`));
494         return sink.data;
495     }
496 }
497 
498 auto parser(const(char)[ ] source) nothrow {
499     Parser p = { source, };
500     with (p) {
501         sink.reserve(s.length + (s.length >> 4));//Reserve 1/16 of source for syntactic garbage.
502         blocks = minimallyInitializedArray!(BracedBlock[ ])(4);
503         foreach (ref b; blocks)
504             b.levels = uninitializedArray!(int[ ])(16);
505         pushBlock();
506         checkpoint = s;
507     }
508     return p;
509 }