1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.mal_lang.lib;
18
19 import java.io.File;
20 import java.io.IOException;
21 import java.nio.file.Path;
22 import java.util.ArrayList;
23 import java.util.HashSet;
24 import java.util.List;
25 import java.util.Locale;
26 import java.util.Optional;
27 import java.util.Set;
28
29 public class Parser {
30 private MalLogger LOGGER;
31 private Lexer lex;
32 private Token tok;
33 private Set<File> included;
34 private File currentFile;
35 private Path originPath;
36
37 private Parser(File file, boolean verbose, boolean debug) throws IOException {
38 Locale.setDefault(Locale.ROOT);
39 LOGGER = new MalLogger("PARSER", verbose, debug);
40 var canonicalFile = file.getCanonicalFile();
41 this.lex = new Lexer(canonicalFile);
42 this.included = new HashSet<File>();
43 this.included.add(canonicalFile);
44 this.currentFile = canonicalFile;
45 this.originPath = Path.of(canonicalFile.getParent());
46 }
47
48 private Parser(File file, Path originPath, Set<File> included, boolean verbose, boolean debug)
49 throws IOException {
50 Locale.setDefault(Locale.ROOT);
51 LOGGER = new MalLogger("PARSER", verbose, debug);
52 this.lex = new Lexer(file, originPath.relativize(Path.of(file.getPath())).toString());
53 this.included = included;
54 this.included.add(file);
55 this.currentFile = file;
56 this.originPath = originPath;
57 }
58
59 public static AST parse(File file) throws IOException, CompilerException {
60 return parse(file, false, false);
61 }
62
63 public static AST parse(File file, boolean verbose, boolean debug)
64 throws IOException, CompilerException {
65 return new Parser(file, verbose, debug).parseLog();
66 }
67
68 private static AST parse(
69 File file, Path originPath, Set<File> included, boolean verbose, boolean debug)
70 throws IOException, CompilerException {
71 return new Parser(file, originPath, included, verbose, debug).parseLog();
72 }
73
74 private AST parseLog() throws CompilerException {
75 try {
76 var ast = _parse();
77 LOGGER.print();
78 return ast;
79 } catch (CompilerException e) {
80 LOGGER.print();
81 throw e;
82 }
83 }
84
85
86 private static TokenType[] malFirst = {
87 TokenType.CATEGORY, TokenType.ASSOCIATIONS, TokenType.INCLUDE, TokenType.HASH
88 };
89
90
91 private static TokenType[] assetFirst = {TokenType.ABSTRACT, TokenType.ASSET};
92
93
94 private static TokenType[] attackStepFirst = {
95 TokenType.ALL, TokenType.ANY, TokenType.HASH, TokenType.EXIST, TokenType.NOTEXIST
96 };
97
98 private void _next() throws CompilerException {
99 tok = lex.next();
100 }
101
102 private void _expect(TokenType type) throws CompilerException {
103 if (tok.type != type) {
104 throw exception(type);
105 }
106 _next();
107 }
108
109
110 private AST _parse() throws CompilerException {
111 var ast = new AST();
112 _next();
113
114 while (true) {
115 switch (tok.type) {
116 case CATEGORY:
117 var category = _parseCategory();
118 ast.addCategory(category);
119 break;
120 case ASSOCIATIONS:
121 var associations = _parseAssociations();
122 ast.addAssociations(associations);
123 break;
124 case INCLUDE:
125 var include = _parseInclude();
126 ast.include(include);
127 break;
128 case HASH:
129 var define = _parseDefine();
130 ast.addDefine(define);
131 break;
132 case EOF:
133 return ast;
134 default:
135 throw exception(malFirst);
136 }
137 }
138 }
139
140
141 private AST.ID _parseID() throws CompilerException {
142 switch (tok.type) {
143 case ID:
144 var id = new AST.ID(tok, tok.stringValue);
145 _next();
146 return id;
147 default:
148 throw exception(TokenType.ID);
149 }
150 }
151
152
153 private String _parseString() throws CompilerException {
154 switch (tok.type) {
155 case STRING:
156 var str = tok.stringValue;
157 _next();
158 return str;
159 default:
160 throw exception(TokenType.STRING);
161 }
162 }
163
164
165 private AST.Define _parseDefine() throws CompilerException {
166 var firstToken = tok;
167
168 _expect(TokenType.HASH);
169 var key = _parseID();
170 _expect(TokenType.COLON);
171 var value = _parseString();
172 return new AST.Define(firstToken, key, value);
173 }
174
175
176 private AST.Meta _parseMeta1() throws CompilerException {
177 var type = _parseID();
178 return _parseMeta2(type);
179 }
180
181
182 private AST.Meta _parseMeta2(AST.ID type) throws CompilerException {
183 _expect(TokenType.INFO);
184 _expect(TokenType.COLON);
185 var value = _parseString();
186 return new AST.Meta(type, type, value);
187 }
188
189
190 private List<AST.Meta> _parseMeta1List() throws CompilerException {
191 var meta = new ArrayList<AST.Meta>();
192 while (tok.type == TokenType.ID) {
193 meta.add(_parseMeta1());
194 }
195 return meta;
196 }
197
198
199 private AST _parseInclude() throws CompilerException {
200 _expect(TokenType.INCLUDE);
201 var firstTok = tok;
202 var filename = _parseString();
203 var file = new File(filename);
204
205 if (!file.isAbsolute()) {
206 var currentDir = currentFile.getParent();
207 file = new File(String.format("%s/%s", currentDir, filename));
208 }
209
210 try {
211 file = file.getCanonicalFile();
212 } catch (IOException e) {
213 throw exception(firstTok, e.getMessage());
214 }
215
216 if (included.contains(file)) {
217 return new AST();
218 } else {
219 try {
220 return Parser.parse(file, originPath, included, LOGGER.isVerbose(), LOGGER.isDebug());
221 } catch (IOException e) {
222 throw exception(firstTok, e.getMessage());
223 }
224 }
225 }
226
227
228 private double _parseNumber() throws CompilerException {
229 double val = 0.0;
230 switch (tok.type) {
231 case INT:
232 val = tok.intValue;
233 _next();
234 return val;
235 case FLOAT:
236 val = tok.doubleValue;
237 _next();
238 return val;
239 default:
240 throw exception(TokenType.INT, TokenType.FLOAT);
241 }
242 }
243
244
245 private AST.Category _parseCategory() throws CompilerException {
246 var firstToken = tok;
247
248 _expect(TokenType.CATEGORY);
249 var name = _parseID();
250 var meta = _parseMeta1List();
251 if (tok.type == TokenType.LCURLY) {
252 _next();
253 } else {
254 throw exception(TokenType.ID, TokenType.LCURLY);
255 }
256 var assets = _parseAssetList();
257 if (tok.type == TokenType.RCURLY) {
258 _next();
259 } else {
260 throw exception(assetFirst, TokenType.RCURLY);
261 }
262 return new AST.Category(firstToken, name, meta, assets);
263 }
264
265
266
267 private AST.Asset _parseAsset() throws CompilerException {
268 var firstToken = tok;
269
270 var isAbstract = false;
271 if (tok.type == TokenType.ABSTRACT) {
272 isAbstract = true;
273 _next();
274 }
275 _expect(TokenType.ASSET);
276 var name = _parseID();
277 Optional<AST.ID> parent = Optional.empty();
278 if (tok.type == TokenType.EXTENDS) {
279 _next();
280 parent = Optional.of(_parseID());
281 }
282 var meta = _parseMeta1List();
283 if (tok.type == TokenType.LCURLY) {
284 _next();
285 } else {
286 throw exception(TokenType.ID, TokenType.LCURLY);
287 }
288 var attackSteps = new ArrayList<AST.AttackStep>();
289 var variables = new ArrayList<AST.Variable>();
290 loop:
291 while (true) {
292 switch (tok.type) {
293 case LET:
294 variables.add(_parseVariable());
295 break;
296 case ALL:
297 case ANY:
298 case HASH:
299 case EXIST:
300 case NOTEXIST:
301 attackSteps.add(_parseAttackStep());
302 break;
303 case RCURLY:
304 _next();
305 break loop;
306 default:
307 throw exception(attackStepFirst, TokenType.LET, TokenType.RCURLY);
308 }
309 }
310 return new AST.Asset(firstToken, isAbstract, name, parent, meta, attackSteps, variables);
311 }
312
313
314 private List<AST.Asset> _parseAssetList() throws CompilerException {
315 var assets = new ArrayList<AST.Asset>();
316 while (true) {
317 switch (tok.type) {
318 case ABSTRACT:
319 case ASSET:
320 assets.add(_parseAsset());
321 break;
322 default:
323 return assets;
324 }
325 }
326 }
327
328
329 private AST.AttackStep _parseAttackStep() throws CompilerException {
330 var firstToken = tok;
331
332 var asType = _parseAttackStepType();
333 var name = _parseID();
334 List<AST.ID> tags = new ArrayList<>();
335 while (tok.type == TokenType.AT) {
336 tags.add(_parseTag());
337 }
338 Optional<List<AST.CIA>> cia = Optional.empty();
339 if (tok.type == TokenType.LCURLY) {
340 cia = Optional.of(_parseCIA());
341 }
342 Optional<AST.TTCExpr> ttc = Optional.empty();
343 if (tok.type == TokenType.LBRACKET) {
344 ttc = _parseTTC();
345 }
346 var meta = _parseMeta1List();
347 Optional<AST.Requires> requires = Optional.empty();
348 if (tok.type == TokenType.REQUIRE) {
349 requires = Optional.of(_parseExistence());
350 }
351 Optional<AST.Reaches> reaches = Optional.empty();
352 if (tok.type == TokenType.INHERIT || tok.type == TokenType.OVERRIDE) {
353 reaches = Optional.of(_parseReaches());
354 }
355 return new AST.AttackStep(firstToken, asType, name, tags, cia, ttc, meta, requires, reaches);
356 }
357
358
359 private AST.AttackStepType _parseAttackStepType() throws CompilerException {
360 switch (tok.type) {
361 case ALL:
362 _next();
363 return AST.AttackStepType.ALL;
364 case ANY:
365 _next();
366 return AST.AttackStepType.ANY;
367 case HASH:
368 _next();
369 return AST.AttackStepType.DEFENSE;
370 case EXIST:
371 _next();
372 return AST.AttackStepType.EXIST;
373 case NOTEXIST:
374 _next();
375 return AST.AttackStepType.NOTEXIST;
376 default:
377 throw exception(attackStepFirst);
378 }
379 }
380
381
382 private AST.ID _parseTag() throws CompilerException {
383 _expect(TokenType.AT);
384 return _parseID();
385 }
386
387
388 private List<AST.CIA> _parseCIA() throws CompilerException {
389 _expect(TokenType.LCURLY);
390 List<AST.CIA> cia = new ArrayList<AST.CIA>();
391 if (tok.type != TokenType.RCURLY) {
392 _parseCIAList(cia);
393 }
394 _expect(TokenType.RCURLY);
395 return cia;
396 }
397
398
399 private void _parseCIAList(List<AST.CIA> cia) throws CompilerException {
400 cia.add(_parseCIAClass());
401 while (tok.type == TokenType.COMMA) {
402 _next();
403 cia.add(_parseCIAClass());
404 }
405 }
406
407
408 private AST.CIA _parseCIAClass() throws CompilerException {
409 switch (tok.type) {
410 case C:
411 _next();
412 return AST.CIA.C;
413 case I:
414 _next();
415 return AST.CIA.I;
416 case A:
417 _next();
418 return AST.CIA.A;
419 default:
420 throw exception(TokenType.C, TokenType.I, TokenType.A, TokenType.RCURLY);
421 }
422 }
423
424
425 private Optional<AST.TTCExpr> _parseTTC() throws CompilerException {
426 _expect(TokenType.LBRACKET);
427 Optional<AST.TTCExpr> expr = Optional.empty();
428 if (tok.type != TokenType.RBRACKET) {
429 expr = Optional.of(_parseTTCExpr());
430 } else {
431
432 expr = Optional.of(new AST.TTCFuncExpr(tok, new AST.ID(tok, "Zero"), new ArrayList<>()));
433 }
434 _expect(TokenType.RBRACKET);
435 return expr;
436 }
437
438
439 private AST.TTCExpr _parseTTCExpr() throws CompilerException {
440 var firstToken = tok;
441
442 var lhs = _parseTTCTerm();
443 while (tok.type == TokenType.PLUS || tok.type == TokenType.MINUS) {
444 var addType = tok.type;
445 _next();
446 var rhs = _parseTTCTerm();
447 if (addType == TokenType.PLUS) {
448 lhs = new AST.TTCAddExpr(firstToken, lhs, rhs);
449 } else {
450 lhs = new AST.TTCSubExpr(firstToken, lhs, rhs);
451 }
452 }
453 return lhs;
454 }
455
456
457 private AST.TTCExpr _parseTTCTerm() throws CompilerException {
458 var firstToken = tok;
459
460 var lhs = _parseTTCFact();
461 while (tok.type == TokenType.STAR || tok.type == TokenType.DIVIDE) {
462 var mulType = tok.type;
463 _next();
464 var rhs = _parseTTCFact();
465 if (mulType == TokenType.STAR) {
466 lhs = new AST.TTCMulExpr(firstToken, lhs, rhs);
467 } else {
468 lhs = new AST.TTCDivExpr(firstToken, lhs, rhs);
469 }
470 }
471 return lhs;
472 }
473
474
475 private AST.TTCExpr _parseTTCFact() throws CompilerException {
476 var firstToken = tok;
477
478 var e = _parseTTCPrim();
479 if (tok.type == TokenType.POWER) {
480 _next();
481 e = new AST.TTCPowExpr(firstToken, e, _parseTTCFact());
482 }
483 return e;
484 }
485
486
487
488 private AST.TTCExpr _parseTTCPrim() throws CompilerException {
489 var firstToken = tok;
490 if (tok.type == TokenType.ID) {
491 var function = _parseID();
492 var params = new ArrayList<Double>();
493 if (tok.type == TokenType.LPAREN) {
494 _next();
495 if (tok.type == TokenType.INT || tok.type == TokenType.FLOAT) {
496 params.add(_parseNumber());
497 while (tok.type == TokenType.COMMA) {
498 _next();
499 params.add(_parseNumber());
500 }
501 }
502 _expect(TokenType.RPAREN);
503 }
504 return new AST.TTCFuncExpr(firstToken, function, params);
505 } else if (tok.type == TokenType.LPAREN) {
506 _next();
507 var e = _parseTTCExpr();
508 _expect(TokenType.RPAREN);
509 return e;
510 } else if (tok.type == TokenType.INT || tok.type == TokenType.FLOAT) {
511 double num = _parseNumber();
512 return new AST.TTCNumExpr(firstToken, num);
513 } else {
514 throw exception(TokenType.ID, TokenType.LPAREN, TokenType.INT, TokenType.FLOAT);
515 }
516 }
517
518
519 private AST.Requires _parseExistence() throws CompilerException {
520 var firstToken = tok;
521
522 _expect(TokenType.REQUIRE);
523 var requires = new ArrayList<AST.Expr>();
524 requires.add(_parseExpr());
525 while (tok.type == TokenType.COMMA) {
526 _next();
527 requires.add(_parseExpr());
528 }
529 return new AST.Requires(firstToken, requires);
530 }
531
532
533 private AST.Reaches _parseReaches() throws CompilerException {
534 var firstToken = tok;
535
536 var inherits = false;
537 if (tok.type == TokenType.INHERIT) {
538 inherits = true;
539 } else if (tok.type == TokenType.OVERRIDE) {
540 inherits = false;
541 } else {
542 throw exception(TokenType.INHERIT, TokenType.OVERRIDE);
543 }
544 _next();
545 var reaches = new ArrayList<AST.Expr>();
546 reaches.add(_parseExpr());
547 while (tok.type == TokenType.COMMA) {
548 _next();
549 reaches.add(_parseExpr());
550 }
551 return new AST.Reaches(firstToken, inherits, reaches);
552 }
553
554
555 private AST.Variable _parseVariable() throws CompilerException {
556 var firstToken = tok;
557
558 _expect(TokenType.LET);
559 var id = _parseID();
560 _expect(TokenType.ASSIGN);
561 var e = _parseExpr();
562 return new AST.Variable(firstToken, id, e);
563 }
564
565
566 private AST.Expr _parseExpr() throws CompilerException {
567 var firstToken = tok;
568
569 var lhs = _parseSteps();
570 while (tok.type == TokenType.UNION
571 || tok.type == TokenType.INTERSECT
572 || tok.type == TokenType.MINUS) {
573 var setType = tok.type;
574 _next();
575 var rhs = _parseSteps();
576 if (setType == TokenType.UNION) {
577 lhs = new AST.UnionExpr(firstToken, lhs, rhs);
578 } else if (setType == TokenType.INTERSECT) {
579 lhs = new AST.IntersectionExpr(firstToken, lhs, rhs);
580 } else {
581 lhs = new AST.DifferenceExpr(firstToken, lhs, rhs);
582 }
583 }
584 return lhs;
585 }
586
587
588 private AST.Expr _parseSteps() throws CompilerException {
589 var firstToken = tok;
590
591 var lhs = _parseStep();
592 while (tok.type == TokenType.DOT) {
593 _next();
594 var rhs = _parseStep();
595 lhs = new AST.StepExpr(firstToken, lhs, rhs);
596 }
597 return lhs;
598 }
599
600
601 private AST.Expr _parseStep() throws CompilerException {
602 var firstToken = tok;
603
604 AST.Expr e = null;
605 if (tok.type == TokenType.LPAREN) {
606 _next();
607 e = _parseExpr();
608 _expect(TokenType.RPAREN);
609 } else if (tok.type == TokenType.ID) {
610 var id = _parseID();
611 e = new AST.IDExpr(firstToken, id);
612 if (tok.type == TokenType.LPAREN) {
613 _next();
614 _expect(TokenType.RPAREN);
615 e = new AST.CallExpr(firstToken, id);
616 }
617 } else {
618 throw exception(TokenType.LPAREN, TokenType.ID);
619 }
620 while (tok.type == TokenType.STAR || tok.type == TokenType.LBRACKET) {
621 if (tok.type == TokenType.STAR) {
622 _next();
623 e = new AST.TransitiveExpr(firstToken, e);
624 } else if (tok.type == TokenType.LBRACKET) {
625 e = new AST.SubTypeExpr(firstToken, e, _parseType());
626 }
627 }
628 return e;
629 }
630
631
632 private List<AST.Association> _parseAssociations() throws CompilerException {
633 _expect(TokenType.ASSOCIATIONS);
634 _expect(TokenType.LCURLY);
635 List<AST.Association> assocs = new ArrayList<>();
636 if (tok.type == TokenType.ID) {
637 assocs = _parseAssociations1();
638 }
639 _expect(TokenType.RCURLY);
640 return assocs;
641 }
642
643
644 private List<AST.Association> _parseAssociations1() throws CompilerException {
645 var assocs = new ArrayList<AST.Association>();
646 var leftAsset = _parseID();
647 var assoc = _parseAssociation(leftAsset);
648 while (tok.type == TokenType.ID) {
649 var id = _parseID();
650 if (tok.type == TokenType.INFO) {
651 assoc.meta.add(_parseMeta2(id));
652 } else if (tok.type == TokenType.LBRACKET) {
653 assocs.add(assoc);
654 assoc = _parseAssociation(id);
655 } else {
656 throw exception(TokenType.INFO, TokenType.LBRACKET);
657 }
658 }
659 assocs.add(assoc);
660 return assocs;
661 }
662
663
664 private AST.Association _parseAssociation(AST.ID leftAsset) throws CompilerException {
665 var leftField = _parseType();
666 var leftMult = _parseMultiplicity();
667 _expect(TokenType.LARROW);
668 var linkName = _parseID();
669 _expect(TokenType.RARROW);
670 var rightMult = _parseMultiplicity();
671 var rightField = _parseType();
672 var rightAsset = _parseID();
673 return new AST.Association(
674 leftAsset,
675 leftAsset,
676 leftField,
677 leftMult,
678 linkName,
679 rightMult,
680 rightField,
681 rightAsset,
682 new ArrayList<>());
683 }
684
685
686 private AST.Multiplicity _parseMultiplicity() throws CompilerException {
687 var firstTok = tok;
688
689 var min = _parseMultiplicityUnit();
690 if (tok.type == TokenType.RANGE) {
691 _next();
692 var max = _parseMultiplicityUnit();
693 if (min == 0 && max == 1) {
694 return AST.Multiplicity.ZERO_OR_ONE;
695 } else if (min == 0 && max == 2) {
696 return AST.Multiplicity.ZERO_OR_MORE;
697 } else if (min == 1 && max == 1) {
698 return AST.Multiplicity.ONE;
699 } else if (min == 1 && max == 2) {
700 return AST.Multiplicity.ONE_OR_MORE;
701 } else {
702 throw exception(
703 firstTok,
704 String.format("Invalid multiplicity '%c..%c'", intToMult(min), intToMult(max)));
705 }
706 } else {
707 if (min == 0) {
708 throw exception(firstTok, "Invalid multiplicity '0'");
709 } else if (min == 1) {
710 return AST.Multiplicity.ONE;
711 } else {
712 return AST.Multiplicity.ZERO_OR_MORE;
713 }
714 }
715 }
716
717 private static char intToMult(int n) {
718 switch (n) {
719 case 0:
720 return '0';
721 case 1:
722 return '1';
723 default:
724 return '*';
725 }
726 }
727
728
729
730 private int _parseMultiplicityUnit() throws CompilerException {
731 if (tok.type == TokenType.INT) {
732 var n = tok.intValue;
733 if (n == 0 || n == 1) {
734 _next();
735 return n;
736 }
737 } else if (tok.type == TokenType.STAR) {
738 _next();
739 return 2;
740 }
741 throw expectedException("'0', '1', or '*'");
742 }
743
744
745 private AST.ID _parseType() throws CompilerException {
746 _expect(TokenType.LBRACKET);
747 var id = _parseID();
748 _expect(TokenType.RBRACKET);
749 return id;
750 }
751
752
753
754
755
756 private CompilerException expectedException(String expected) {
757 return exception(String.format("expected %s, found %s", expected, tok.type.toString()));
758 }
759
760 private CompilerException exception(String msg) {
761 return exception(tok, msg);
762 }
763
764 private CompilerException exception(Position pos, String msg) {
765 LOGGER.error(pos, msg);
766 return new CompilerException("There were syntax errors");
767 }
768
769 private CompilerException exception(TokenType... types) {
770 return exception(new TokenType[0], types);
771 }
772
773 private CompilerException exception(TokenType[] firstTypes, TokenType... followingTypes) {
774 if (firstTypes.length == 0 && followingTypes.length == 0) {
775 return expectedException("(null)");
776 } else {
777 var sb = new StringBuilder();
778 var totalLength = firstTypes.length + followingTypes.length;
779 for (int i = 0; i < totalLength; ++i) {
780 TokenType type = null;
781 if (i < firstTypes.length) {
782 type = firstTypes[i];
783 } else {
784 type = followingTypes[i - firstTypes.length];
785 }
786 if (i == 0) {
787 sb.append(type.toString());
788 } else if (i == totalLength - 1) {
789 if (totalLength == 2) {
790 sb.append(String.format(" or %s", type.toString()));
791 } else {
792 sb.append(String.format(", or %s", type.toString()));
793 }
794 } else {
795 sb.append(String.format(", %s", type.toString()));
796 }
797 }
798 return expectedException(sb.toString());
799 }
800 }
801 }