# This is an example SSS file formed by taking a bit of the Java source code of the SSS reference implementation and SSS-izing it. It serves as an extended example of the syntax, as an example of what an SSS programming language might look like, and (for those that know Java) as an illustration of the difference between keywords, constants and identifiers. # The original Java file was an early version of the code, and I now know that it contained some bugs. I have not bothered to fix this version. You should therefore not use this as an example of how to lex an SSS file! # Games you can play: # - Place a " character at the beginning of the file. That makes a literal string which ends just a moment ago. # - Place a " character just after this comment. That makes a literal string that ends at '"' (which used to be a character constant) at about line 30. PACKAGE "org.sc3d.apt.sss.v3"; IMPORT "java.io"; # Represents a lexical analysis of a Sentence. The analysis recognises comments, keywords (including punctuation strings and separator characters), constants, identifers, numbers and strings, which it wraps up as Tokens with appropriate types. All white-space is discarded. PUBLIC CLASS Lex { # Constructs a Lex representing the lexical structure of 'sentence'. PUBLIC Lex(Sentence sentence) { THIS.Sentence = sentence; FINAL TokenBuffer tb = NEW TokenBuffer(); BOOLEAN suppress = False; # Prevents multiple 'Illegal character' messages. for (INT i=0; iCc.Length | Cc[c]!=CPunctuation) BREAK; i++; } RETURN NEW Token(Token.TypeWord, THIS.Sentence, start, i-start); } # Reads a maximal string of alphanumeric characters starting at 'start' and returns a Token of type 'TypeWord', 'TypeConstant' or 'TypeIdentifier' as appropriate. PRIVATE Token LexAlphanumeric(FINAL INT start) { INT i = start; IF (i>=THIS.Sentence.Length) THROW NEW IllegalArgumentException(); CHAR c = THIS.Sentence.Get(i++); BOOLEAN isInitialCapital = c>='A' && c<='Z'; BOOLEAN isAllCapital = isInitialCapital; WHILE (i='a' && c<='z') || (c>='0' && c<='9')) isAllCapital = false; ELSE IF (c<'A' || c>='Z') BREAK; i++; } FINAL INT length = i-start; RETURN NEW Token( isAllCapital && length>1 ? Token.TypeWord : isInitialCapital ? Token.TypeConstant : Token.TypeIdentifier, THIS.Sentence, start, length ); } # Lexes an SSS comment starting at 'start', and RETURNs a Token with type 'TypeComment'. PRIVATE Token LexComment(FINAL INT start) { INT i = start; IF (i>=THIS.Sentence.Length || THIS.Sentence.Get(i++)!='#') { THROW NEW IllegalArgumentException("Comments must start with #"); } INT numBrackets = 0, numBraces = 0; WHILE (i0 ? ')' : numBraces<0 ? '{' : '}')+ "' character missing somewhere." ); RETURN ans; } # Reads an SSS string literal from 'sentence' starting at 'start' and returns a Token with type 'TypeString'. PRIVATE Token LexString(FINAL INT start) { INT i = start; IF (i>=THIS.Sentence.Length || THIS.Sentence.Get(i++)!='"') { THROW NEW IllegalArgumentException("Strings must start with \22/"); } WHILE (i0) i += l; ELSE { THIS.Sentence.AddError("Malformed escape sequence.", i, 1); i++; } } ELSE { i++; } } FINAL Token ans = ( NEW Token(Token.TypeString, THIS.Sentence, start, i-start) ); ans.AddError( "This string does not end. There's a \22/ character missing somewhere." ); RETURN ans; } # Reads an SSS character literal from 'sentence' starting at 'start' and returns a Token of type 'TypeChar'. PRIVATE Token LexCHAR(FINAL INT start) { INT i = start; IF (i>=THIS.Sentence.Length || THIS.Sentence.Get(i++)!=''') { THROW NEW IllegalArgumentException("characters must start with '"); } IF (i0) i += l; ELSE { THIS.Sentence.AddError("Malformed escape sequence.", i, 1); i++; } } IF (i=THIS.Sentence.Length) RETURN 0; IF (THIS.Sentence.Get(start)!='\5C/') RETURN 0; FOR (INT i=1; i<=5; i++) { IF (start+i>=THIS.Sentence.Length) RETURN 0; FINAL CHAR c = THIS.Sentence.Get(start+i); IF (c=='\5C/') RETURN i+1; IF ((c<'0' || c>'9') && (c<'A' || c>='F')) RETURN 0; } RETURN 0; } # An array of length 'numTokens', containing the lexographic Tokens. PRIVATE FINAL Token[] tokens; # The value stored in 'CC' for white-space characters. PRIVATE STATIC FINAL INT CWhiteSpace = 1; # The value stored in 'CC' for characters that form a Token all on their own. PRIVATE STATIC FINAL INT CLone = 2; # The value stored in 'CC' for SSS punctuation characters. PRIVATE STATIC FINAL INT CPunctuation = 3; # The vlaue stored in 'CC' for letters. PRIVATE STATIC FINAL INT CLetter = 4; # The value stored in 'CC' for decimal digits. PRIVATE STATIC FINAL INT CDigit = 5; # The value stored in 'CC' for special characters that introduce things. PRIVATE STATIC FINAL INT CSpecial = 6; # An array which classifies ASCII characters into one of the 'CXxx' classes. PRIVATE STATIC FINAL INT[] Cc = NEW INT[127]; STATIC { Classify(CWhiteSpace, " \9/\A/\D/"); Classify(CLone, ",.;(){}"); Classify(CLetter, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); Classify(CDigit, "0123456789"); Classify(CSpecial, "#\22/'"); Classify(CPunctuation, "!$%&*+-/:<=>?@[\5C/]^_`|"); } PRIVATE STATIC VOID Classify(INT type, String s) { FOR (INT i=0; i" ); FINAL Sentence sentence = Sentence.ReadFile(args[0]); FINAL Lex me = NEW Lex(sentence); IF (sentence.CountErrors()>0) { sentence.PrintErrorReport(System.Out, 100); RETURN; } FOR (INT i=0; i