using System; using System.Collections.Generic; using System.Collections; using System.IO; using System.Text; using System.Collections.ObjectModel; using System.Runtime.Serialization; using System.Runtime.InteropServices; namespace Assignment_1 { class Program { [Flags] enum VariableFlags { Empty = 0, Reserved = 1, NoPrint = 2, Static = 4, Undef = 8 } /// /// This captures the end-point of each part of an expression (in the stream), to validate the syntax /// Optionally also captures the parsed string for each expression. /// /// For this program, a word is considered to be any non-whitespace value bounded by whitespace or the array boundary. /// /// static Dictionary> Symbols = new Dictionary> { { "SPACE", new Tuple(" ", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "TAB", new Tuple("\t", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "NEWLINE", new Tuple("\n", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "CARRIAGE_RETURN", new Tuple("\r", VariableFlags.Reserved | VariableFlags.NoPrint) } }; static List ForbiddenChars = new List { '$', '\\', '\"', '\'' }; static void Main(string[] args) { Console.WriteLine("┌──────────────────────────────────────────┐"); Console.WriteLine("│ 159.341 2021 Semester 1, Assignment 1 │"); Console.WriteLine("│ Submitted by Brychan Dempsey, 14299890 │"); Console.WriteLine("└──────────────────────────────────────────┘"); MemoryStream sourceStream = new MemoryStream(1024); // Creates a memory stream to retain source while being interpreted. Parser parser = new Parser(); bool dynamicInput = false; // From https://stackoverflow.com/questions/3453220/how-to-detect-if-console-in-stdin-has-been-redirected // Reading from pipes is equivalent to reading user input, though the input is redirected if (Console.IsInputRedirected) { sourceStream.Write(Encoding.UTF8.GetBytes(Console.In.ReadToEnd())); sourceStream.Position = 0; } else { sourceStream.Write(Encoding.UTF8.GetBytes("{ \r\n")); sourceStream.Position = 0; dynamicInput = true; } parser.StartParsing(sourceStream, dynamicInput); Console.ReadLine(); } public class Parser { public enum statements { exit, append, list, print, printlength, printwords, printwordcount, set, reverse, h, writeout } public void StartParsing(Stream source, bool dynamicInput = false) { if ((byte)source.ReadByte() == '{') { while (true) { if (dynamicInput) { Console.WriteLine("Enter a command: "); string s = Console.ReadLine(); long pos = source.Position; source.Write(Encoding.UTF8.GetBytes(s)); source.Position = pos; int g = 0; } // parse the statement or list of statements; // This is done by reading the next word SkipWhitespace(source); long initPos = source.Position; long position = FindNextWord(source, out string word); object statementType; try { if (Enum.TryParse(typeof(statements), word, out statementType)) { // By turning the result of the command into an action, // we can defer processing the final result until the end of this control flow // I.e. "I don't know what action to do, but I will need it, when I know where this statement ends" // In some ways, it makes more sense. The action is determined by the interpreter's result Action result = () => { }; source.Position = position; switch ((statements)statementType) { case statements.exit: result = Exit(); break; case statements.append: result = AppendSet(source); break; case statements.list: result = List(); break; case statements.print: result = Print(source, 0); break; case statements.printlength: result = Print(source, 1); break; case statements.printwordcount: result = Print(source, 2); break; case statements.printwords: result = Print(source, 3); break; case statements.set: result = AppendSet(source, false); break; case statements.reverse: result = Reverse(source); break; case statements.h: Console.WriteLine("Commands are: "); foreach (var item in Enum.GetValues(typeof(statements))) { Console.WriteLine("\t{0}", ((statements)item).ToString()); } // Ignore these as actual commands source.Position = initPos; source.SetLength(initPos); break; case statements.writeout: // Writes the full command history to the stream. Console.WriteLine("Writing input commands to {0}..."); source.Position = initPos; source.SetLength(initPos); break; } // Do a check semicolons etc if (IsNextEoS(source)) { result(); } else { throw new ParserException("expected a semi-colon", 0, source.Position); } } else { // Statement parse failed, // Ensure stream gets trimmed back to the correct position Console.WriteLine("Failed parsing statement"); source.Position = initPos; source.SetLength(initPos); } } // Throwing a parserexception will return us to this point immediately. From here, the line is automatically restored, // and the excepion printed to the console window. // This means that each function does not need to keep track of our current position in the stream catch (ParserException e) { if (e.Importance > 3) { throw new ApplicationException("A critical error occurred."); } if (e.LinePosition > 0) { WriteDebugLine(initPos, e.LinePosition, e.Message, source); } else { Console.WriteLine(e.LinePosition + ": " + e.Message); source.Position = initPos; source.SetLength(initPos); } } } } } #region Function Handling private string ValidateKey(Stream source, bool checkExist) { string key; long keyEndPos = FindIdentifier(source, out key); if (keyEndPos < 0 || key.Length == 0) { throw new ParserException("Could not identify object", 0, source.Position); } else if (checkExist && !Symbols.ContainsKey(key)) { throw new ParserException("Key not found", 0, source.Position); } else { if (Symbols.ContainsKey(key) && Symbols[key].Item2.HasFlag(VariableFlags.Reserved)) { throw new ParserException("Cannot assign a value to a reserved constant", 0, keyEndPos - (key.Length + 1)); } source.Position = keyEndPos; } return key; } private string ValidateValue(Stream source) { string value; long valuePos = FindExpression(source, out value); if (valuePos < 0) { throw new ParserException("Could not evaluate expression", 0, source.Position); } else { source.Position = valuePos; } return value; } /// /// Handles the 'append x y [ + z];' case & /// And the 'set x y [ + z];' case /// /// /// An Action that will add the key to the dictionary Action AppendSet(Stream source, bool appendMode=true) { string key = ValidateKey(source, appendMode); string value = ValidateValue(source); if (appendMode) { return () => Symbols[key] = new Tuple(Symbols[key].Item1 + value, Symbols[key].Item2); } else { return () => Symbols.Add(key, new Tuple(value, VariableFlags.Empty)); } } /// /// Creates and prints a list of all defined variables /// /// List values normally excluded from printing Action List(bool printUnprint = false) { int keyWidth = 10; int valueWidth = 50; int flagWidth = 9; StringBuilder consoleOutput = new StringBuilder(); consoleOutput.Append(string.Format("┌" + new string('─', keyWidth) + "┬" + new string('─', valueWidth) + "┬" + new string('─', flagWidth) + "┐\n")); consoleOutput.Append(string.Format("│{0}│{1}│{2}│\n", CenterString("Symbol", keyWidth), CenterString("Value", valueWidth), CenterString("Flags", flagWidth))); // Figure out how many symbols are eligible for printing List eligibleKeys = new List(Symbols.Count); foreach (var item in Symbols.Keys) { if (!Symbols[item].Item2.HasFlag(VariableFlags.NoPrint) || (Symbols[item].Item2.HasFlag(VariableFlags.NoPrint) && printUnprint)) { eligibleKeys.Add(item); } } // Control printing based on how many keys are available if (eligibleKeys.Count > 0) { consoleOutput.Append(string.Format("├" + new string('─', keyWidth) + "┼" + new string('─', valueWidth) + "┼" + new string('─', flagWidth) + "┤\n")); for (int i = 0; i < eligibleKeys.Count; i++) { string entryFormat = "│{0," + -1*keyWidth + "}│{1," + -1*valueWidth + "}│{2," + -1*flagWidth + "}│\n"; consoleOutput.Append(string.Format(entryFormat, eligibleKeys[i], Symbols[eligibleKeys[i]].Item1.Replace("\r", "\\r").Replace("\n", "\\n").Replace("\t", "\\t"), Convert.ToString((byte)Symbols[eligibleKeys[i]].Item2, 2).PadLeft(8, '0'))); if (i + 1 < eligibleKeys.Count) { consoleOutput.Append(string.Format("├" + new string('─', keyWidth) + "┼" + new string('─', valueWidth) + "┼" + new string('─', flagWidth) + "┤\n")); } } } consoleOutput.Append(string.Format("└" + new string('─', keyWidth) + "┴" + new string('─', valueWidth) + "┴" + new string('─', flagWidth) + "┘\n")); return () => Console.WriteLine(consoleOutput.ToString()); } Action Exit() { // Should do some save command here return () => Environment.Exit(0); } Action Print(Stream source, int mode=0) { StringBuilder outputString = new StringBuilder(); string expression = ValidateValue(source); if (mode == 0) { outputString.Append(expression + Environment.NewLine); } else if (mode == 1) { outputString.Append("Length of the expression is: "); outputString.Append(expression.Length + Environment.NewLine); } else if (mode >= 2) { string[] words = expression.Split(' '); if (mode == 3) { outputString.Append("Wordcount is: "); outputString.Append(words.Length + Environment.NewLine); } else { Console.WriteLine("Words are:"); foreach (string word in words) { outputString.Append(word + Environment.NewLine); } } } return () => Console.WriteLine(outputString.ToString()); } Action Reverse(Stream source) { string key = ValidateKey(source, true); string ToReverse = Symbols[key].Item1; string[] words = ToReverse.Split(' '); StringBuilder reversed = new StringBuilder(); for (int i = words.Length-1; i >= 0; i--) { reversed.Append(words[i]); reversed.Append(' '); } return () => Symbols[key] = new Tuple(reversed.ToString(), Symbols[key].Item2); } /// /// Writes the debug info to the screen in the form:
/// line read from stream (lineStart) to line end
/// <whitespace@caratPos> ^ <errorMessage> ///
/// /// /// /// void WriteDebugLine(long lineStart, long caratPos, string errorMessage, Stream source) { source.Position = lineStart; string fullLine = GetNextLine(source); string errorMSG = new string (' ', (caratPos - lineStart) >= 0 ? (int)(caratPos - lineStart):0) + "^ " + errorMessage; Console.WriteLine(fullLine); Console.WriteLine(errorMSG); source.SetLength(source.Position); } /// /// Gets the starting point of the expression at expected line. /// /// /// /// long GetLineStart(Stream source, string word) { // Decrement and check the previous value is 32 (space), before incrementing back to our current pos // Don't need to guard against oob - implied it's within bounds by the set command being at least 3 bytes long source.Position--; if (source.ReadByte() == 32) { source.Position--; } //source.Position++; return source.Position - word.Length; } #endregion #region Data Handling // Data Handling /// /// Parses & evaluates the expression from the stream, moving the stream to the end of the last value /// /// /// /// long FindExpression(Stream s, out string expression) { // Expressions are one or more occurances of a variable name or literal definition. // To make logical sense, there needs to be an operator between them. Typically, for strings, this is // the append operator: + // Variable symbols should be evaluated immediately. // Start by ensuring we don't try reading past the end of the stream // Also check for the EoS long realStart = s.Position; string result = ""; while (s.Position < s.Length && !IsNextEoS(s)) { long currStart = s.Position; if (IsNextEoS(s, '+')) { s.Position = FindNextWord(s, out _); } else { string value; long firstPos = s.Position; long val = FindValue(s, out value); if (val == -1) { Console.WriteLine("Could not parse value"); } s.Position = val; result += value; } } expression = result; return s.Position; } /// /// Checks ahead to see if the next non-whitespace character is the EoS indicator (';') /// /// /// /// true if the next char is , else false static bool IsNextEoS(Stream s, char EoSChar = ';') { long pos = s.Position; char readChar = PeekChar(s); while (readChar != 0 && char.IsWhiteSpace(readChar)) { readChar = ReadChar(s); } s.Position = pos; if (readChar == EoSChar) return true; else return false; } /// /// Finds the next value in the stream /// /// /// /// long FindValue(Stream s, out string returnedValue) { SkipWhitespace(s); char result = PeekChar(s); if (result == '\"') { // The first char is a ", i.e. the start of a literal - search as if it were a literal. return FindLiteral(s, out returnedValue); } else { string keyValue; long t = FindExistingIdentifier(s, out keyValue); // Set the key value to result + this read string //keyValue = result + keyValue; if (!Symbols.ContainsKey(keyValue)) { throw new ParserException("Could not find key: " + keyValue, 0); } returnedValue = Symbols[keyValue].Item1; return t; } } long FindIdentifier(Stream s, out string returnedKey) { long wordEnd = FindNextWord(s, out returnedKey); return wordEnd; } long FindExistingIdentifier(Stream s, out string returnedKey) { string identifier; long wordEnd = FindNextWord(s, out identifier); if (identifier.Length > 1 && identifier.EndsWith(';')) { // Remove the trailing semicolon from the parse & backtrack the identifier length one spot identifier = identifier.TrimEnd(';'); wordEnd--; s.Position--; } // Lookup the value in the symbol table returnedKey = identifier; return wordEnd; } /// /// Finds the end of the complete literal definition, returning the stream to the original position /// /// /// /// long FindLiteral(Stream s, out string returnedLiteral) { long pos = s.Position; // Is a literal. Now we must parse until we find the end of the literal // Remove the first char, if it is a literal definition. if (PeekChar(s) == '\"') ReadChar(s); string resultLiteral; long resultPosition = FindNextOccurance(s, (c, s) => { if (c == '\"') { long pos = s.Position--; if (ReadChar(s) == '\\') { // TODO: handle the \\ escape return false; } else { return true; } } return false; }, out resultLiteral); if (resultPosition > -1) { returnedLiteral = resultLiteral; } else { throw new ParserException("Could not parse the literal"); } s.Position = pos; return resultPosition; } #endregion } #region HelperFunctions /// /// Reads the memory stream as a UTF-8 encoded string until the next occurance of '\n' or '\r\n' (consuming, and excluded) /// /// /// static string GetNextLine(Stream s) { string nextLine; FindNextOccurance(s, '\n', out nextLine); return nextLine; } /// /// Finds the end-boundary of the next word in the stream, and returns the stream to the original position /// /// /// /// static long FindNextWord(Stream s, out string nextWord) { StringBuilder newWord = new StringBuilder(); // Record our current position long start = s.Position; // Check if the character at the current pos is whitespace, if so, keep advancing until it isn't char currentChar = ReadChar(s); while (s.Position < s.Length && char.IsWhiteSpace(currentChar)) { currentChar = ReadChar(s); } // Add the last read value to the SB newWord.Append(currentChar); // Start a second loop, this time checking we're not a whitespace char while (s.Position < s.Length) { currentChar = ReadChar(s); if (char.IsWhiteSpace(currentChar) || currentChar == ';') { s.Position--; break; } else { newWord.Append(currentChar); } } nextWord = newWord.ToString(); long endPos = s.Position; s.Position = start; return endPos; } /// /// Finds and returns the position of the next occurance of the Func returning true. /// /// /// A 'predicate'-like Func /// Returns the string captured while searching for the next char /// static long FindNextOccurance(Stream s, Func p, out string result) { long start = s.Position; StringBuilder sb = new StringBuilder(); bool charFound = false; while (s.Position < s.Length && !charFound) { char nextChar = ReadChar(s); if (nextChar == 0) { charFound = true; } else if (p(nextChar, s)) { charFound = true; } else { sb.Append(nextChar); } } result = sb.ToString(); long newPosition = s.Position; s.Position = start; return newPosition--; } /// /// Finds the next position of the character /// /// /// /// Captures the string read in searching for the character /// static long FindNextOccurance(Stream s, char c, out string result) { return FindNextOccurance(s, (streamChar, s) => streamChar == c, out result); } /// /// Reads the next UTF-8 encoded character in the stream, and advances the stream by the amount of characters read /// /// /// static char ReadChar(Stream s) { // As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected // value. // Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size int readAmount = 0; int firstChar = s.ReadByte(); if (firstChar == -1) { return (char)0; } if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character { readAmount = 3; } else if ((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte { readAmount = 2; } else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte { readAmount = 1; } byte[] charBytes = new byte[readAmount + 1]; charBytes[0] = (byte)firstChar; for (int i = 1; i < readAmount; i++) { int nextChar = s.ReadByte(); if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!"); charBytes[i] = (byte)nextChar; } s.Position += readAmount; string converted = Encoding.UTF8.GetString(charBytes); return converted[0]; } /// /// Reads the next character in the stream, and returns the position to the original position /// /// /// static char PeekChar(Stream s) { long curr = s.Position; char c = ReadChar(s); s.Position = curr; return c; } /// /// Reads the previous char /// /// /// static char PreviousChar(Stream s) { Stack charBytes = new Stack(4); for (int i = 0; i < 4; i++) { if (s.Position == 0) { break; } s.Position--; byte read = (byte)s.ReadByte(); charBytes.Push(read); // No longer an UTF-8 extension, last byte is the final if (read >> 6 != 2) break; } string converted = Encoding.UTF8.GetString(charBytes.ToArray()); return converted[0]; } /// /// Skips whitespace characters /// /// static void SkipWhitespace(Stream s) { char c = PeekChar(s); while (s.Position < s.Length && char.IsWhiteSpace(c)) { ReadChar(s); // move by the size of that character c = PeekChar(s); } } static string CenterString(string source, int totalPadding, char paddingChar=' ') { if (source.Length >= totalPadding) return source; int rightHalf = (int)Math.Ceiling(source.Length / 2.0); int leftHalfPad = (int)Math.Floor(totalPadding / 2.0); int rightHalfPad = (int)Math.Ceiling(totalPadding / 2.0); string t = "{0," + leftHalfPad + "}{1," + -1 * rightHalfPad + "}"; string result = string.Format(t, source[..rightHalf], source[rightHalf..]); return result; } #endregion public class ParserException : Exception { /// /// Importance is used to signify how the parser should respond to the error. /// A code of 3 or greater is a critical error; the application will throw the error further up the call and exit. /// 0 implies the line may be retried. /// 1 should imply the current block is not valid and should be retried. /// public int Importance = 0; public long LinePosition = -1; public ParserException(string message, int importance, long linePos) : base(message) { } public ParserException(string message, int importance) : base(message) { } public ParserException(string message) : base(message) { } } } }