/* Dempsey-Jensen, Brychan, 14299890, Assignment 1, 159.341 */ /* Parses and interprets a simple programming language line-by-line */ using System; using System.Collections.Generic; using System.IO; using System.Text; using System.Threading; namespace Assignment_1 { class Program { /// /// Flags to set symbol properties /// [Flags] enum VariableFlags { Empty = 0, Reserved = 1, NoPrint = 2, Static = 4 } static readonly int ConsoleWidthLimit = 80; static void Main(string[] args) { Console.WriteLine(CenterString("┌──────────────────────────────────────────┐", ConsoleWidthLimit)); Console.WriteLine(CenterString("│ 159.341 2021 Semester 1, Assignment 1 │", ConsoleWidthLimit)); Console.WriteLine(CenterString("│ Submitted by Brychan Dempsey, 14299890 │", ConsoleWidthLimit)); Console.WriteLine(CenterString("└──────────────────────────────────────────┘", ConsoleWidthLimit)); bool loadedFromFile = false; bool exit = false; while (!exit) { MemoryStream sourceStream = new MemoryStream(1024); Parser parser = new Parser(); bool dynamicInput = false; // From https://stackoverflow.com/questions/3453220/how-to-detect-if-console-in-stdin-has-been-redirected // Reading from pipes is equivalent to reading user input, but the input is redirected if (Console.IsInputRedirected || loadedFromFile) { // To simplify reading, we read all input bytes from the piped input to the stream. // Not the best way to do it; we don't need to keep any data that has already been read and parsed successfully. sourceStream.Write(Encoding.UTF8.GetBytes(Console.In.ReadToEnd())); Console.In.Dispose(); Console.SetIn(new StreamReader(Console.OpenStandardInput())); Console.OpenStandardInput(); sourceStream.Position = 0; } else { sourceStream.Position = 0; dynamicInput = true; } parser.StartParsing(sourceStream, dynamicInput); Console.WriteLine(Environment.NewLine + new string('─', 40)); Console.WriteLine("\nProgram Parsed Successfully!"); if (Console.IsInputRedirected) { Thread.Sleep(3000); Environment.Exit(0); } ConsoleKeyInfo ck = new ConsoleKeyInfo(); while (ck.Key != ConsoleKey.Y && ck.Key != ConsoleKey.N) { Console.WriteLine("\nWould you like to parse another program? Y/n:"); ck = Console.ReadKey(true); } if (ck.Key == ConsoleKey.N) { exit = true; } else { // Need the logic to prep the next source stream ck = new ConsoleKeyInfo(); while (ck.Key != ConsoleKey.Y && ck.Key != ConsoleKey.N) { Console.WriteLine("\nWould you like to pipe data from a source file? Y/n:"); ck = Console.ReadKey(true); } if (ck.Key == ConsoleKey.N) { // Set the input to standard input stream Console.SetIn(Console.In); loadedFromFile = false; } else { Console.WriteLine("Enter the source path:"); string sourcePath = Console.ReadLine(); if (File.Exists(sourcePath)) { try { Console.SetIn(File.OpenText(sourcePath)); loadedFromFile = true; } catch (Exception e) { Console.WriteLine("Encountered an error opening the source file: " + e.Message); } } } } } } public class Parser { Dictionary> Symbols = new Dictionary>() { { "SPACE", new Tuple(" ", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "TAB", new Tuple("\t", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "NEWLINE", new Tuple("\n", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "CARRIAGE_RETURN", new Tuple("\r", VariableFlags.Reserved | VariableFlags.NoPrint) } }; public enum Statements { exit, append, list, print, printlength, printwords, printwordcount, set, reverse, h, writeout } public void StartParsing(Stream source, bool dynamicInput = false) { long initSourceLength = source.Length; long lastLinePos = 0; long initPos = 0; bool cont = false; while (true) { if (dynamicInput) { lastLinePos = source.Position; if (!cont) { Console.WriteLine("Enter a command: "); } string s = Console.ReadLine(); long pos = source.Position; source.Write(Encoding.UTF8.GetBytes(s)); source.Position = pos; } // parse the statement or list of statements; // This is done by reading the next word if (!cont) { initPos = source.Position; } else { source.Position = initPos; } SkipWhitespace(source); long position = FindNextWord(source, out string word); try { if (Enum.TryParse(typeof(Statements), word, out object statementType)) { // By turning the result of the command into an action, // we can defer processing the final result until the end of this control flow Action result = () => { }; source.Position = position; switch ((Statements)statementType) { case Statements.exit: result = Exit(source, initSourceLength, dynamicInput); break; case Statements.append: result = AppendSet(source); break; case Statements.list: long pos = FindNextWord(source, out string nextWord); if (nextWord == "all") { source.Position = pos; result = List(true); } else { result = List(); } break; case Statements.print: result = Print(source, 0); break; case Statements.printlength: result = Print(source, 1); break; case Statements.printwords: result = Print(source, 2); break; case Statements.printwordcount: result = Print(source, 3); break; case Statements.set: result = AppendSet(source, false); break; case Statements.reverse: result = Reverse(source); break; // These are additional helper functions. Thier input gets excluded from the MemoryStream case Statements.h: Console.WriteLine("Commands are: "); foreach (var item in Enum.GetValues(typeof(Statements))) { Console.WriteLine("\t{0}", ((Statements)item).ToString()); } // Ignore these as actual commands source.Position = initPos; source.SetLength(initPos); break; } // Do a check semicolons etc if (IsNextEoS(source)) { // Increment the source pos past the semi-colon cont = false; source.Position++; if (dynamicInput) { // Nicely format the output stream, so we may print it cleanly source.WriteByte((byte)'\n'); } result(); if (((Statements)statementType).Equals(Statements.exit)) { return; } } else if (source.Position != lastLinePos)// - 1) { // In the case that we expect some more data, we must keep tabs of our current line, and keep accumulating data until we're finished lastLinePos = source.Position; cont = true; source.WriteByte((byte)' '); Console.Write(">"); } else { throw new ParserException("expected a semi-colon", 0, source.Position); } } else { throw new ParserException("Failed parsing statement", 0, source.Position); } } // Throwing a parserexception will return us to this point immediately. From here, the line is automatically restored, // and the excepion printed to the console window. // This means that each function does not need to keep track of our current position in the stream catch (ParserException e) { if (e.Importance > 3) { throw new ApplicationException("A critical error occurred."); } if (e.LinePosition > 0) { WriteDebugLine(initPos, e.LinePosition, e.Message, source); } else { /* Console.WriteLine(e.LinePosition + ": " + e.Message); source.Position = initPos; source.SetLength(initPos);*/ WriteDebugLine(0, 0, e.Message, source); } if (!dynamicInput) { Environment.Exit(-1); } } } } #region Function Handling /// /// Checks if the next expression in the source meets the requirements of being a key, /// and optionally verify that key exists. /// Also contracts the key is not reserved or constant /// /// /// /// private string ValidateKey(Stream source, bool checkExist) { long keyEndPos = FindIdentifier(source, out string key); if (keyEndPos < 0 || key.Length == 0) { throw new ParserException("Could not identify object", 0, source.Position); } else if (checkExist && !Symbols.ContainsKey(key)) { throw new ParserException("Key not found", 0, source.Position); } else if (Symbols.ContainsKey(key) && Symbols[key].Item2.HasFlag(VariableFlags.Reserved)) { throw new ParserException("Cannot assign a value to a reserved constant", 0, keyEndPos - (key.Length + 1)); } else { int indx = Array.FindIndex(key.ToCharArray(), (c) => (c > 122 || c > 90 && c < 97 && c != '_' || c > 57 && c < 65 || c < 48)); // If the overall result is good, move until one isn't if (indx > -1) { throw new ParserException(string.Format("Character \'{0}\' is not valid for an identifier",key[indx]), 0, keyEndPos-key.Length + indx); } source.Position = keyEndPos; } return key; } /// /// Checks if the next expression meets the requirements of being a value /// /// /// private string ValidateValue(Stream source) { long valuePos = FindExpression(source, out string value); if (valuePos < 0) { throw new ParserException("Could not evaluate expression", 0, source.Position); } else { source.Position = valuePos; } return value; } /// /// Handles the 'append x y [ + z];' case & /// And the 'set x y [ + z];' case /// /// /// An Action that will add the key to the dictionary Action AppendSet(Stream source, bool appendMode = true) { string key = ValidateKey(source, appendMode); string value = ValidateValue(source); if (appendMode) { return () => Symbols[key] = new Tuple(Symbols[key].Item1 + value, Symbols[key].Item2); } else { if (Symbols.ContainsKey(key)) { return () => Symbols[key] = new Tuple(value, Symbols[key].Item2); } else { return () => Symbols.Add(key, new Tuple(value, VariableFlags.Empty)); } } } /// /// Creates and prints a nicely formatted table of all values /// /// List values normally excluded from printing Action List(bool printUnprint = false) { int flagWidth = Math.Max(Enum.GetNames(typeof(VariableFlags)).Length, "Flags".Length); int keyWidth = (int)((ConsoleWidthLimit - flagWidth) * 0.2); // 20% - flag width int valueWidth = (int)((ConsoleWidthLimit - flagWidth) * 0.8); // 80% - flag width StringBuilder consoleOutput = new StringBuilder(); consoleOutput.Append(string.Format("┌" + new string('─', keyWidth) + "┬" + new string('─', valueWidth) + "┬" + new string('─', flagWidth) + "┐\n")); consoleOutput.Append(string.Format("│{0}│{1}│{2}│\n", CenterString("Symbol", keyWidth), CenterString("Value", valueWidth), CenterString("Flags", flagWidth))); List eligibleKeys = new List(Symbols.Count); foreach (var item in Symbols.Keys) { if (!Symbols[item].Item2.HasFlag(VariableFlags.NoPrint) || (Symbols[item].Item2.HasFlag(VariableFlags.NoPrint) && printUnprint)) { eligibleKeys.Add(item); } } if (eligibleKeys.Count > 0) { consoleOutput.Append(string.Format("├" + new string('─', keyWidth) + "┼" + new string('─', valueWidth) + "┼" + new string('─', flagWidth) + "┤\n")); for (int i = 0; i < eligibleKeys.Count; i++) { string entryFormat = "│{0," + -1 * keyWidth + "}│{1," + -1 * valueWidth + "}│{2," + -1 * flagWidth + "}│\n"; List keyLines = GetStringLines(eligibleKeys[i], keyWidth); List valueLines = GetStringLines(Symbols[eligibleKeys[i]].Item1.Replace("\r", "\\r").Replace("\n", "\\n").Replace("\t", "\\t"), valueWidth); for (int j = 0; j < (keyLines.Count > valueLines.Count ? keyLines.Count : valueLines.Count); j++) { consoleOutput.Append(string.Format(entryFormat, j >= keyLines.Count ? "" : keyLines[j], j >= valueLines.Count ? "" : valueLines[j], j == 0 ? Convert.ToString((byte)Symbols[eligibleKeys[i]].Item2, 2).PadLeft(flagWidth, '0'): "")); } if (i + 1 < eligibleKeys.Count) { consoleOutput.Append(string.Format("├" + new string('─', keyWidth) + "┼" + new string('─', valueWidth) + "┼" + new string('─', flagWidth) + "┤\n")); } } } consoleOutput.Append(string.Format("└" + new string('─', keyWidth) + "┴" + new string('─', valueWidth) + "┴" + new string('─', flagWidth) + "┘\n")); return () => Console.WriteLine(consoleOutput.ToString()); } Action Exit(Stream source, long initialStreamLength, bool isDynamicInput=false) { void exitAction() { if (source.Length != initialStreamLength && isDynamicInput) { Console.WriteLine("Commands list has been modified; would you like to save it to a file?"); string commandState = ""; while (commandState.ToLower() != "y" && commandState.ToLower() != "n") { Console.Write("Y/n: "); commandState = Console.ReadLine(); } if (commandState.ToLower() == "y") { Console.WriteLine("Enter an output file (default {0}):", Environment.CurrentDirectory); string path = Console.ReadLine(); if (path != "") { path = Path.Combine(Environment.CurrentDirectory, path); source.Position = 0; using (FileStream fs = File.OpenWrite(path)) { source.CopyTo(fs); } source.Close(); } } } } return exitAction; } Action Print(Stream source, int mode = 0) { StringBuilder outputString = new StringBuilder(); string expression = ValidateValue(source); if (mode == 0) { outputString.Append(expression + Environment.NewLine); } else if (mode == 1) { outputString.Append("Length of the expression is: "); outputString.Append(expression.Length + Environment.NewLine); } else if (mode >= 2) { string[] words = expression.Split(' '); if (mode == 3) { outputString.Append("Wordcount is: "); outputString.Append(words.Length + Environment.NewLine); } else { Console.WriteLine("Words are:"); foreach (string word in words) { outputString.Append(word + Environment.NewLine); } } } return () => Console.WriteLine(outputString.ToString()); } Action Reverse(Stream source) { string key = ValidateKey(source, true); string ToReverse = Symbols[key].Item1; string[] words = ToReverse.Split(' '); StringBuilder reversed = new StringBuilder(); for (int i = words.Length - 1; i >= 0; i--) { reversed.Append(words[i]); reversed.Append(' '); } return () => Symbols[key] = new Tuple(reversed.ToString(), Symbols[key].Item2); } /// /// Writes the debug info to the screen in the form:
/// line read from stream (lineStart) to line end
/// <whitespace@caratPos> ^ <errorMessage> ///
/// /// /// /// static void WriteDebugLine(long lineStart, long caratPos, string errorMessage, Stream source) { source.Position = lineStart; string fullLine = GetNextLine(source); string errorMSG = new string(' ', (caratPos - lineStart) >= 0 ? (int)(caratPos - lineStart) : 0) + "^ " + errorMessage; Console.WriteLine(fullLine); Console.WriteLine(errorMSG); source.Position = lineStart; source.SetLength(source.Position); } #endregion #region Data Handling /// /// Parses & evaluates the expression from the stream, moving the stream to the end of the last value /// /// /// /// long FindExpression(Stream s, out string expression) { string result = ""; // iterate through values until we reach either the end of the stream or the end-of-statement bool IsAppendSet = true; while (s.Position < s.Length && !IsNextEoS(s)) { if (IsNextEoS(s, '+')) { s.Position = FindNextWord(s, out _); IsAppendSet = true; } else { long val = FindValue(s, out string value); if (val == -1) { Console.WriteLine("Could not parse value"); } if (IsAppendSet) { s.Position = val; result += value; IsAppendSet = false; } else { throw new ParserException("Append operator not set", 0, s.Position); } } } expression = result; return s.Position; } /// /// Checks ahead to see if the next non-whitespace character is the EoS indicator (';') /// /// /// /// true if the next char is , else false static bool IsNextEoS(Stream s, char EoSChar = ';') { long pos = s.Position; char readChar = PeekChar(s); while (readChar != 0 && char.IsWhiteSpace(readChar)) { readChar = ReadChar(s); } s.Position = pos; if (readChar == EoSChar) return true; else return false; } /// /// Finds the next value in the stream /// /// /// /// long FindValue(Stream s, out string returnedValue) { SkipWhitespace(s); char result = PeekChar(s); if (result == '\"') { // The first char is a ", i.e. the start of a literal - search as if it were a literal. return FindLiteral(s, out returnedValue); } else { long t = FindExistingIdentifier(s, out string keyValue); // Set the key value to result + this read string //keyValue = result + keyValue; if (!Symbols.ContainsKey(keyValue)) { throw new ParserException("Could not find key: " + keyValue, 0, s.Position); } returnedValue = Symbols[keyValue].Item1; return t; } } static long FindIdentifier(Stream s, out string returnedKey) { long wordEnd = FindNextWord(s, out returnedKey); return wordEnd; } static long FindExistingIdentifier(Stream s, out string returnedKey) { long wordEnd = FindNextWord(s, out string identifier); if (identifier.Length > 1 && identifier.EndsWith(';')) { // Remove the trailing semicolon from the parse & backtrack the identifier length one spot identifier = identifier.TrimEnd(';'); wordEnd--; s.Position--; } // Lookup the value in the symbol table returnedKey = identifier; return wordEnd; } /// /// Finds the end of the complete literal definition, returning the stream to the original position /// /// /// /// static long FindLiteral(Stream s, out string returnedLiteral) { long pos = s.Position; // Is a literal. Now we must parse until we find the end of the literal // Remove the first char, if it is a literal definition. if (PeekChar(s) == '\"') ReadChar(s); long resultPosition = FindNextOccurance(s, (c, s) => { if (c == '\"') { long pos = s.Position--; if (ReadChar(s) == '\\') { // TODO: handle the \\ escape return false; } else { return true; } } return false; }, out string resultLiteral); if (resultPosition > -1) { returnedLiteral = resultLiteral; } else { throw new ParserException("Could not parse the literal", 0, s.Position); } s.Position = pos; return resultPosition; } #endregion } #region HelperFunctions /// /// Reads the memory stream as a UTF-8 encoded string until the next occurance of '\n' or '\r\n' (consuming, and excluded) /// /// /// static string GetNextLine(Stream s) { FindNextOccurance(s, '\n', out string nextLine); return nextLine; } /// /// Finds the end-boundary of the next word in the stream, and returns the stream to the original position /// /// /// /// static long FindNextWord(Stream s, out string nextWord) { StringBuilder newWord = new StringBuilder(); // Record our current position long start = s.Position; // Check if the character at the current pos is whitespace, if so, keep advancing until it isn't. // NB: Whitespace includes carriage returns and line feeds, // so 'set\r\n // var // "expression"; // should be valid char currentChar = ReadChar(s); while (s.Position < s.Length && char.IsWhiteSpace(currentChar)) { currentChar = ReadChar(s); } // Add the last read value to the SB newWord.Append(currentChar); // Start a second loop, this time checking we're not a whitespace char while (s.Position < s.Length) { currentChar = ReadChar(s); if (char.IsWhiteSpace(currentChar) || currentChar == ';') { s.Position--; break; } else { newWord.Append(currentChar); } } nextWord = newWord.ToString(); long endPos = s.Position; s.Position = start; return endPos; } /// /// Finds and returns the position of the next occurance of the Func returning true. /// /// /// A 'predicate'-like Func /// Returns the string captured while searching for the next char /// static long FindNextOccurance(Stream s, Func p, out string result) { long start = s.Position; StringBuilder sb = new StringBuilder(); bool charFound = false; while (s.Position < s.Length && !charFound) { char nextChar = ReadChar(s); if (nextChar == 0) { charFound = true; } else if (p(nextChar, s)) { charFound = true; } else { sb.Append(nextChar); } } result = sb.ToString(); long newPosition = s.Position; s.Position = start; return newPosition--; } /// /// Finds the next position of the character /// /// /// /// Captures the string read in searching for the character /// static long FindNextOccurance(Stream s, char c, out string result) { return FindNextOccurance(s, (streamChar, s) => streamChar == c, out result); } /// /// Reads the next UTF-8 encoded character in the stream, and advances the stream by the amount of characters read /// /// /// static char ReadChar(Stream s) { // As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected // value. // Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size int readAmount = 0; int firstChar = s.ReadByte(); if (firstChar == -1) { return (char)0; } if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character { readAmount = 3; } else if ((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte { readAmount = 2; } else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte { readAmount = 1; } byte[] charBytes = new byte[readAmount + 1]; charBytes[0] = (byte)firstChar; for (int i = 1; i < readAmount; i++) { int nextChar = s.ReadByte(); if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!"); charBytes[i] = (byte)nextChar; } s.Position += readAmount; string converted = Encoding.UTF8.GetString(charBytes); return converted[0]; } /// /// Reads the next character in the stream, and returns the position to the original position /// /// /// static char PeekChar(Stream s) { long curr = s.Position; char c = ReadChar(s); s.Position = curr; return c; } /// /// Reads the previous char /// /// /// static char PreviousChar(Stream s) { Stack charBytes = new Stack(4); for (int i = 0; i < 4; i++) { if (s.Position == 0) { break; } s.Position--; byte read = (byte)s.ReadByte(); charBytes.Push(read); // No longer an UTF-8 extension, last byte is the final if (read >> 6 != 2) break; } string converted = Encoding.UTF8.GetString(charBytes.ToArray()); return converted[0]; } /// /// Skips whitespace characters /// /// static void SkipWhitespace(Stream s) { char c = PeekChar(s); while (s.Position < s.Length && char.IsWhiteSpace(c)) { ReadChar(s); // move by the size of that character c = PeekChar(s); } } static string CenterString(string source, int totalPadding, char paddingChar = ' ') { if (source.Length >= totalPadding) return source; int rightHalf = (int)Math.Ceiling(source.Length / 2.0); int leftHalfPad = (int)Math.Floor(totalPadding / 2.0); int rightHalfPad = (int)Math.Ceiling(totalPadding / 2.0); string t = "{0," + leftHalfPad + "}{1," + -1 * rightHalfPad + "}"; string result = string.Format(t, source[..rightHalf], source[rightHalf..]); return result; } static List GetStringLines(string source, int maxWidth) { List lines = new List(); int j = 0; while (j < source.Length) { int max = j + maxWidth <= source.Length ? j + maxWidth : source.Length; lines.Add(source[j..max]); j = max; } return lines; } #endregion public class ParserException : Exception { /// /// Importance is used to signify how the parser should respond to the error. /// A code of 3 or greater is a critical error; the application will throw the error further up the call and exit. /// 0 implies the line may be retried. /// 1 should imply the current block is not valid and should be retried. /// public int Importance = 0; public long LinePosition = -1; public ParserException(string message, int importance, long linePos) : base(message) { Importance = importance; LinePosition = linePos; } } } }