using System; using System.Collections.Generic; using System.IO; using System.Text; namespace Assignment_1 { class Program { [Flags] enum VariableFlags { Empty = 0, Reserved = 1, NoPrint = 2 } /// /// This captures the end-point of each part of an expression (in the stream), to validate the syntax /// Optionally also captures the parsed string for each expression. /// /// For this program, a word is considered to be any non-whitespace value bounded by whitespace or the array boundary. /// /// static Dictionary> Symbols = new Dictionary> { { "SPACE", new Tuple(" ", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "TAB", new Tuple("\t", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "NEWLINE", new Tuple("\n", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "CARRIAGE_RETURN", new Tuple("\r", VariableFlags.Reserved | VariableFlags.NoPrint) } }; static void Main(string[] args) { Console.WriteLine("┌──────────────────────────────────────────┐"); Console.WriteLine("│ 159.341 2021 Semester 1, Assignment 1 │"); Console.WriteLine("│ Submitted by Brychan Dempsey, 14299890 │"); Console.WriteLine("└──────────────────────────────────────────┘"); MemoryStream sourceStream = new MemoryStream(1024); // Creates a memory stream to retain source while being interpreted. Parser parser = new Parser(); bool dynamicInput = false; // From https://stackoverflow.com/questions/3453220/how-to-detect-if-console-in-stdin-has-been-redirected // Reading from pipes is equivalent to reading user input, though the input is redirected if (Console.IsInputRedirected) { sourceStream.Write(Encoding.UTF8.GetBytes(Console.In.ReadToEnd())); sourceStream.Position = 0; } else { sourceStream.Write(Encoding.UTF8.GetBytes("{ \r\n")); sourceStream.Position = 0; dynamicInput = true; } parser.StartParsing(sourceStream, dynamicInput); Console.ReadLine(); } public class Parser { public enum statements { exit, append, list, print, printlength, printwords, printwordcount, set, reverse } public void StartParsing(Stream source, bool dynamicInput = false) { if ((byte)source.ReadByte() == '{') { while (true) { if (dynamicInput) { Console.WriteLine("Enter a command: "); string s = Console.ReadLine(); long pos = source.Position; source.Write(Encoding.UTF8.GetBytes(s)); source.Position = pos; int g = 0; } // parse the statement or list of statements; // This is done by reading the next word SkipWhitespace(source); long position = FindNextWord(source, out string word); object statementType; if (Enum.TryParse(typeof(statements), word, out statementType)) { source.Position = position; switch ((statements)statementType) { case statements.exit: Exit(); break; case statements.append: Append(source); break; case statements.list: List(); break; case statements.print: Print(source, 0); break; case statements.printlength: Print(source, 1); break; case statements.printwordcount: Print(source, 2); break; case statements.printwords: Print(source, 3); break; case statements.set: Set(source); break; case statements.reverse: Reverse(source); break; } } else Console.WriteLine("Failed parsing statement"); } } } #region Function Handling /// /// Handles the append x y case. /// /// /// /// /// bool Append(Stream source, long lineStart = -1) { // If it wasn't explicitly set, assume line starts the length of 'append ' before the current position if (lineStart == -1) { lineStart = source.Position - "append ".Length; } string key; long keyEndPos = FindIdentifier(source, out key); if (keyEndPos < 0 || !Symbols.ContainsKey(key)) { WriteDebugLine(lineStart, lineStart + "append ".Length, "could not identify object", source); return false; } else { source.Position = keyEndPos; } string value; long valuePos = FindValue(source, out value); if (valuePos < 0) { // Error on finding object WriteDebugLine(lineStart, keyEndPos, "could not evaluate expression", source); return false; } else { source.Position = valuePos; } string eol; FindNextWord(source, out eol); if (eol.Length == 0 || eol[0] != ';') { WriteDebugLine(lineStart, valuePos, "expected a semicolon", source); return false; } if (Symbols[key].Item2.HasFlag(VariableFlags.Reserved)) { WriteDebugLine(lineStart, keyEndPos - (key.Length + 1), "cannot assign a value to a reserved constant", source); return false; } Symbols[key] = new Tuple(Symbols[key].Item1 + value, Symbols[key].Item2); return true; } void List() { Console.WriteLine("┌" + new string('─', 15) + "┬" + new string('─', 25) + "┬" + new string('─', 9) + "┐"); Console.WriteLine("│{0,-15}│{1,-25}│{2,9}│", "Symbol", "Value", "Flags"); Console.WriteLine("├" + new string('─', 15) + "┼" + new string('─', 25) + "┼" + new string('─', 9) + "┤"); int keyPos = 0; foreach (var item in Symbols) { Console.WriteLine("│{0,-15}│{1,-25}│{2,9}│", item.Key, item.Value.Item1.Replace("\r","\\r").Replace("\n","\\n").Replace("\t", "\\t"), Convert.ToString((byte)item.Value.Item2,2).PadLeft(8,'0')); if (keyPos == Symbols.Count-1) { Console.WriteLine("└" + new string('─', 15) + "┴" + new string('─', 25) + "┴" + new string('─', 9) + "┘"); } else { Console.WriteLine("├" + new string('─', 15) + "┼" + new string('─', 25) + "┼" + new string('─', 9) + "┤"); } keyPos++; } } void Exit() { Environment.Exit(0); } bool Print(Stream source, int mode=0) { string expression; long result = FindExpression(source, out expression); if (result < 0) { // Could not print return false; } if (mode == 0) { Console.WriteLine(expression); } else if (mode == 1) { Console.Write("Length of the expression is: "); Console.WriteLine(expression.Length); } else if (mode >= 2) { string[] words = expression.Split(' '); if (mode == 3) { Console.Write("Wordcount is: "); Console.WriteLine(words.Length); } else { Console.WriteLine("Words are:"); foreach (string word in words) { Console.WriteLine(word); } } } source.Position = result; return true; } bool Set(Stream source, long lineStart=-1) { if(lineStart == -1) { lineStart = source.Position - "set ".Length; } string identifier; long identifierEndPos = FindIdentifier(source, out identifier); if (identifierEndPos < 0 || identifierEndPos == source.Position) { string fullLine = GetNextLine(source); // Grab a copy of the line to show the user // set x string errorMSG = " ^ could not identify object"; Console.WriteLine(fullLine); Console.WriteLine(errorMSG); source.SetLength(source.Position); return false; } source.Position = identifierEndPos; string expression; long expressionEndPos = FindExpression(source, out expression); if (expressionEndPos < 0) { // Couldn't match expression return false; } if (Symbols.ContainsKey(identifier)) { if (Symbols[identifier].Item2.HasFlag(VariableFlags.Reserved)) { Console.WriteLine("Error: Cannot assign to {0} as it is a reserved constant.", identifier); source.SetLength(source.Position); // Wipe the remainder of the stream, so that it doesn't get read return false; } Symbols[identifier] = new Tuple(expression, Symbols[identifier].Item2); } else { Symbols.Add(identifier, new Tuple(expression, VariableFlags.Empty)); } source.Position = expressionEndPos; return true; } bool Reverse(Stream source) { string identifier; long resultPos = FindIdentifier(source, out identifier); if (resultPos < 0) { // Couldn't match an identifier // If ID Doesn't exist, we should make it return false; } string ToReverse = Symbols[identifier].Item1; string[] words = ToReverse.Split(' '); StringBuilder reversed = new StringBuilder(); for (int i = words.Length-1; i < 0; i--) { reversed.Append(words[i]); reversed.Append(' '); } Symbols[identifier] = new Tuple(reversed.ToString(), Symbols[identifier].Item2); return true; } /// /// Writes the debug info to the screen in the form:
/// line read from stream (lineStart) to line end
/// <whitespace@caratPos> ^ <errorMessage> ///
/// /// /// /// void WriteDebugLine(long lineStart, long caratPos, string errorMessage, Stream source) { source.Position = lineStart; string fullLine = GetNextLine(source); string errorMSG = new string (' ', (int)(caratPos - lineStart)) + "^ " + errorMessage; Console.WriteLine(fullLine); Console.WriteLine(errorMSG); source.SetLength(source.Position); } #endregion #region Data Handling // Data Handling /// /// Parses the expression from the point in the string /// /// /// /// long FindExpression(Stream s, out string expression) { // must contain at least one value string result; long wordEnd = FindValue(s, out result); while (true) { string nextWord; wordEnd = FindNextWord(s, out nextWord); if (wordEnd > 0 && nextWord == "+") { s.Position = wordEnd; } else { break; } s.Position = wordEnd; wordEnd = FindNextWord(s, out nextWord); result += nextWord; } expression = result; return wordEnd; } // Most atomic unit is 'value': /// /// Finds the next value in the stream /// /// /// /// long FindValue(Stream s, out string returnedValue) { SkipWhitespace(s); int result = s.ReadByte(); if (result == '\"') { return FindLiteral(s, out returnedValue); } else { s.Position--; string keyValue; long t = FindExistingIdentifier(s, out keyValue); if (!Symbols.ContainsKey(keyValue)) { returnedValue = ""; return -1; } returnedValue = Symbols[keyValue].Item1; return t; } } long FindIdentifier(Stream s, out string returnedKey) { long wordEnd = FindNextWord(s, out returnedKey); return wordEnd; } long FindExistingIdentifier(Stream s, out string returnedKey) { string identifier; long wordEnd = FindNextWord(s, out identifier); if (identifier.EndsWith(';') && identifier.Length > 1) { // Remove the trailing semicolon from the parse & backtrack the identifier length one spot identifier = identifier.TrimEnd(';'); wordEnd--; } // Lookup the value in the symbol table returnedKey = identifier; return wordEnd; } long FindLiteral(Stream s, out string returnedLiteral) { // Is a literal. Now we must parse until we find the end of the literal string resultLiteral; long resultPosition = FindNextOccurance(s, (c, s) => { if (c == '\"') { long pos = s.Position--; if (GetChar(s) == '\\') { // TODO: handle the \\ escape return false; } else { return true; } } return false; }, out resultLiteral); if (resultPosition > -1) { returnedLiteral = resultLiteral; } else { returnedLiteral = ""; } return resultPosition; } #endregion } /// /// Reads the memory stream as a UTF-8 encoded string until the next occurance of '\n' or '\r\n' (consuming, and excluded) /// /// /// static string GetNextLine(Stream s) { string nextLine; FindNextOccurance(s, '\n', out nextLine); return nextLine; } /// /// Finds the next word in the string /// /// /// /// A value <0 if an error occurred, else the position of the end of the word static long FindNextWord(string s, out string nextWord) { // remove whitespace from the start int wordStart = 0; if (char.IsWhiteSpace(s[0])) { for (int i = 0; i < s.Length; i++) { if (char.IsWhiteSpace(s[i])) break; wordStart = i; } } int wordEnd = wordStart; for (int i = wordEnd; i < s.Length; i++) { if (char.IsWhiteSpace(s[i])) break; wordEnd = i; } nextWord = s.Substring(wordStart, wordEnd); return wordEnd; } /// /// Finds the end-boundary of the next word in the stream /// /// /// /// static long FindNextWord(Stream s, out string nextWord) { return FindNextOccurance(s, (c, s) => Char.IsWhiteSpace(c), out nextWord); } /// /// Finds and returns the position of the next occurance of the Func returning true. /// /// /// /// /// static long FindNextOccurance(Stream s, Func p, out string result) { long start = s.Position; StringBuilder sb = new StringBuilder(); bool charFound = false; while (!charFound) { char nextChar = GetChar(s); if (nextChar == 0) { charFound = true; } else if (p(nextChar, s)) { /*if (c == '\n') { s.Position--; if (s.ReadByte() != '\r') s.Position--; // Avoid capturing the carriage return }*/ charFound = true; } else { sb.Append(nextChar); } } result = sb.ToString(); long newPosition = s.Position; s.Position = start; return newPosition--; } /// /// Finds the next position of the character /// /// /// /// Captures the string read in searching for the character /// static long FindNextOccurance(Stream s, char c, out string result) { return FindNextOccurance(s, (streamChar, s) => streamChar == c, out result); } static char GetChar(Stream s) { // As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected // value. // Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size int readAmount = 0; int firstChar = s.ReadByte(); if (firstChar == -1) { return (char)0; } if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character { readAmount = 3; } else if ((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte { readAmount = 2; } else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte { readAmount = 1; } byte[] charBytes = new byte[readAmount + 1]; charBytes[0] = (byte)firstChar; for (int i = 1; i < readAmount; i++) { int nextChar = s.ReadByte(); if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!"); charBytes[i] = (byte)nextChar; } s.Position += readAmount; string converted = Encoding.UTF8.GetString(charBytes); return converted[0]; } static void SkipWhitespace(Stream s) { int readByte = s.ReadByte(); while(readByte > -1 && char.IsWhiteSpace((char)readByte)) { readByte = s.ReadByte(); } s.Position--; } } }