using System; using System.Collections.Generic; using System.IO; using System.Text; namespace Assignment_1 { class Program { [Flags] enum VariableFlags { Empty = 0, Reserved = 1, NoPrint = 2 } /// /// This captures the end-point of each part of an expression (in the stream), to validate the syntax /// Optionally also captures the parsed string for each expression. /// /// For this program, a word is considered to be any non-whitespace value bounded by whitespace or the array boundary. /// /// static Dictionary> Symbols = new Dictionary> { { "SPACE", new Tuple(" ", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "TAB", new Tuple("\t", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "NEWLINE", new Tuple("\n", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "CARRIAGE_RETURN", new Tuple("\r", VariableFlags.Reserved | VariableFlags.NoPrint) } }; static List ForbiddenChars = new List { '$', '\\', '\"', '\'' }; static void Main(string[] args) { Console.WriteLine("┌──────────────────────────────────────────┐"); Console.WriteLine("│ 159.341 2021 Semester 1, Assignment 1 │"); Console.WriteLine("│ Submitted by Brychan Dempsey, 14299890 │"); Console.WriteLine("└──────────────────────────────────────────┘"); MemoryStream sourceStream = new MemoryStream(1024); // Creates a memory stream to retain source while being interpreted. Parser parser = new Parser(); bool dynamicInput = false; // From https://stackoverflow.com/questions/3453220/how-to-detect-if-console-in-stdin-has-been-redirected // Reading from pipes is equivalent to reading user input, though the input is redirected if (Console.IsInputRedirected) { sourceStream.Write(Encoding.UTF8.GetBytes(Console.In.ReadToEnd())); sourceStream.Position = 0; } else { sourceStream.Write(Encoding.UTF8.GetBytes("{ \r\n")); sourceStream.Position = 0; dynamicInput = true; } parser.StartParsing(sourceStream, dynamicInput); Console.ReadLine(); } public class Parser { public enum statements { exit, append, list, print, printlength, printwords, printwordcount, set, reverse, h, writeout } public void StartParsing(Stream source, bool dynamicInput = false) { if ((byte)source.ReadByte() == '{') { while (true) { if (dynamicInput) { Console.WriteLine("Enter a command: "); string s = Console.ReadLine(); long pos = source.Position; source.Write(Encoding.UTF8.GetBytes(s)); source.Position = pos; int g = 0; } // parse the statement or list of statements; // This is done by reading the next word SkipWhitespace(source); long initPos = source.Position; long position = FindNextWord(source, out string word); object statementType; if (Enum.TryParse(typeof(statements), word, out statementType)) { source.Position = position; switch ((statements)statementType) { case statements.exit: Exit(); break; case statements.append: Append(source); break; case statements.list: List(); break; case statements.print: Print(source, 0); break; case statements.printlength: Print(source, 1); break; case statements.printwordcount: Print(source, 2); break; case statements.printwords: Print(source, 3); break; case statements.set: Set(source); break; case statements.reverse: Reverse(source); break; case statements.h: Console.WriteLine("Commands are: "); foreach (var item in Enum.GetValues(typeof(statements))) { Console.WriteLine("\t{0}", ((statements)item).ToString()); } break; case statements.writeout: // Writes the full command history to the stream. Console.WriteLine("Writing input commands to {0}..."); break; } } else { // Statement parse failed, // Ensure stream gets trimmed back to the correct position Console.WriteLine("Failed parsing statement"); source.Position = initPos; source.SetLength(initPos); } } } } #region Function Handling /// /// Handles the append x y case. /// /// /// /// /// bool Append(Stream source, long lineStart = -1) { if (lineStart == -1) { lineStart = GetLineStart(source, "append"); } string key; long keyEndPos = FindIdentifier(source, out key); if (keyEndPos < 0 || !Symbols.ContainsKey(key)) { WriteDebugLine(lineStart, lineStart + "append ".Length, "could not identify object", source); return false; } else { source.Position = keyEndPos; } string value; long valuePos = FindValue(source, out value); if (valuePos < 0) { // Error on finding object WriteDebugLine(lineStart, keyEndPos, "could not evaluate expression", source); return false; } else { source.Position = valuePos; } string eol; FindNextWord(source, out eol); if (eol.Length == 0 || eol[0] != ';') { WriteDebugLine(lineStart, valuePos, "expected a semicolon", source); return false; } if (Symbols[key].Item2.HasFlag(VariableFlags.Reserved)) { WriteDebugLine(lineStart, keyEndPos - (key.Length + 1), "cannot assign a value to a reserved constant", source); return false; } Symbols[key] = new Tuple(Symbols[key].Item1 + value, Symbols[key].Item2); return true; } /// /// Creates and prints a list of all defined variables /// void List() { Console.WriteLine("┌" + new string('─', 15) + "┬" + new string('─', 25) + "┬" + new string('─', 9) + "┐"); Console.WriteLine("│{0}│{1}│{2}│", CenterString("Symbol",15), CenterString("Value",25), CenterString("Flags",9)); Console.WriteLine("├" + new string('─', 15) + "┼" + new string('─', 25) + "┼" + new string('─', 9) + "┤"); int keyPos = 0; foreach (var item in Symbols) { Console.WriteLine("│{0,-15}│{1,-25}│{2,9}│", item.Key, item.Value.Item1.Replace("\r","\\r").Replace("\n","\\n").Replace("\t", "\\t"), Convert.ToString((byte)item.Value.Item2,2).PadLeft(8,'0')); if (keyPos == Symbols.Count-1) { Console.WriteLine("└" + new string('─', 15) + "┴" + new string('─', 25) + "┴" + new string('─', 9) + "┘"); } else { Console.WriteLine("├" + new string('─', 15) + "┼" + new string('─', 25) + "┼" + new string('─', 9) + "┤"); } keyPos++; } } void Exit() { Environment.Exit(0); } bool Print(Stream source, int mode=0) { string expression; long result = FindExpression(source, out expression); if (result < 0) { // Could not print return false; } if (mode == 0) { Console.WriteLine(expression); } else if (mode == 1) { Console.Write("Length of the expression is: "); Console.WriteLine(expression.Length); } else if (mode >= 2) { string[] words = expression.Split(' '); if (mode == 3) { Console.Write("Wordcount is: "); Console.WriteLine(words.Length); } else { Console.WriteLine("Words are:"); foreach (string word in words) { Console.WriteLine(word); } } } source.Position = result; return true; } bool Set(Stream source, long lineStart=-1) { if(lineStart == -1) { lineStart = GetLineStart(source, "set"); } string identifier; long identifierEndPos = FindIdentifier(source, out identifier); if (identifierEndPos <= source.Position + 1 || identifier.Trim().Length == 0) { WriteDebugLine(lineStart, "set ".Length, "expected an identifier", source); return false; } else if (ForbiddenChars.Exists((c) => identifier.Contains(c))) { char fbChar = ForbiddenChars.Find((c) => identifier.Contains(c)); WriteDebugLine(lineStart, "set ".Length, string.Format("character {0} is not valid for an identifier", fbChar), source); return false; } source.Position = identifierEndPos; string expression; long expressionEndPos = FindExpression(source, out expression); if (expressionEndPos < 0) { WriteDebugLine(lineStart, identifierEndPos, "failed parsing expression", source); // Couldn't match expression return false; } if (Symbols.ContainsKey(identifier)) { if (Symbols[identifier].Item2.HasFlag(VariableFlags.Reserved)) { WriteDebugLine(lineStart, identifierEndPos - identifier.Length, "cannot assign to a reserved constant", source); return false; } Symbols[identifier] = new Tuple(expression, Symbols[identifier].Item2); } else { Symbols.Add(identifier, new Tuple(expression, VariableFlags.Empty)); } source.Position = expressionEndPos; return true; } bool Reverse(Stream source) { string identifier; long resultPos = FindIdentifier(source, out identifier); if (resultPos < 0) { // Couldn't match an identifier // If ID Doesn't exist, we should make it return false; } string ToReverse = Symbols[identifier].Item1; string[] words = ToReverse.Split(' '); StringBuilder reversed = new StringBuilder(); for (int i = words.Length-1; i < 0; i--) { reversed.Append(words[i]); reversed.Append(' '); } Symbols[identifier] = new Tuple(reversed.ToString(), Symbols[identifier].Item2); return true; } /// /// Writes the debug info to the screen in the form:
/// line read from stream (lineStart) to line end
/// <whitespace@caratPos> ^ <errorMessage> ///
/// /// /// /// void WriteDebugLine(long lineStart, long caratPos, string errorMessage, Stream source) { source.Position = lineStart; string fullLine = GetNextLine(source); string errorMSG = new string (' ', (caratPos - lineStart) >= 0 ? (int)(caratPos - lineStart):0) + "^ " + errorMessage; Console.WriteLine(fullLine); Console.WriteLine(errorMSG); source.SetLength(source.Position); } /// /// Gets the starting point of the expression at expected line. /// /// /// /// long GetLineStart(Stream source, string word) { // Decrement and check the previous value is 32 (space), before incrementing back to our current pos // Don't need to guard against oob - implied it's within bounds by the set command being at least 3 bytes long source.Position--; if (source.ReadByte() == 32) { source.Position--; } //source.Position++; return source.Position - word.Length; } #endregion #region Data Handling // Data Handling /// /// Parses & evaluates the expression from the stream, moving the stream to the end of the last value /// /// /// /// long FindExpression(Stream s, out string expression) { // Expressions are one or more occurances of a variable name or literal definition. // To make logical sense, there needs to be an operator between them. Typically, for strings, this is // the append operator: + // Variable symbols should be evaluated immediately. // Start by ensuring we don't try reading past the end of the stream // Also check for the EoS string result = ""; while (s.Position < s.Length && !IsNextEoS(s)) { } // must contain at least one value, so parse the next word string result; long wordEnd = FindValue(s, out result); int sequenceCount = 0; // If the word after the word we just parsed is the concatenation operator ('+'), // then there may be more we can parse afterwards. // Ensure we aren't at the end of the stream; the next value isn't the EoS, and that we haven't // parsed two values in a row while (s.Position < s.Length && !IsNextEoS(s) && sequenceCount < 2) { if (IsNextEoS(s, '+')) { // next char is an append; skip sequenceCount = 0; s.Position = FindNextWord(s, out _); } else { sequenceCount++; string tValue; s.Position = FindValue(s, out tValue); result += tValue; } } expression = result; return wordEnd; } /// /// Checks ahead to see if the next non-whitespace character is the EoS indicator (';') /// /// /// /// true if the next char is , else false static bool IsNextEoS(Stream s, char EoSChar = ';') { char readChar = PeekChar(s); while (readChar != -1 && char.IsWhiteSpace(readChar)) { readChar = PeekChar(s); } if (readChar == EoSChar) return true; else return false; } // Most atomic unit is 'value': /// /// Finds the next value in the stream /// /// /// /// long FindValue(Stream s, out string returnedValue) { SkipWhitespace(s); int result = s.ReadByte(); if (result == '\"') { return FindLiteral(s, out returnedValue); } else { s.Position--; string keyValue; long t = FindExistingIdentifier(s, out keyValue); if (!Symbols.ContainsKey(keyValue)) { returnedValue = ""; return -1; } returnedValue = Symbols[keyValue].Item1; return t; } } long FindIdentifier(Stream s, out string returnedKey) { long wordEnd = FindNextWord(s, out returnedKey); return wordEnd; } long FindExistingIdentifier(Stream s, out string returnedKey) { string identifier; long wordEnd = FindNextWord(s, out identifier); if (identifier.EndsWith(';') && identifier.Length > 1) { // Remove the trailing semicolon from the parse & backtrack the identifier length one spot identifier = identifier.TrimEnd(';'); wordEnd--; } // Lookup the value in the symbol table returnedKey = identifier; return wordEnd; } long FindLiteral(Stream s, out string returnedLiteral) { // Is a literal. Now we must parse until we find the end of the literal string resultLiteral; long resultPosition = FindNextOccurance(s, (c, s) => { if (c == '\"') { long pos = s.Position--; if (ReadChar(s) == '\\') { // TODO: handle the \\ escape return false; } else { return true; } } return false; }, out resultLiteral); if (resultPosition > -1) { returnedLiteral = resultLiteral; } else { returnedLiteral = ""; } return resultPosition; } #endregion } #region HelperFunctions /// /// Reads the memory stream as a UTF-8 encoded string until the next occurance of '\n' or '\r\n' (consuming, and excluded) /// /// /// static string GetNextLine(Stream s) { string nextLine; FindNextOccurance(s, '\n', out nextLine); return nextLine; } /// /// Finds the end-boundary of the next word in the stream, and returns the stream to the original position /// /// /// /// static long FindNextWord(Stream s, out string nextWord) { // Find the next occurance of a whitespace character long originalPos = s.Position; string tempstring; long nextWSOccurance = FindNextOccurance(s, (c, s) => Char.IsWhiteSpace(c), out tempstring); while (tempstring.Length == 0 && s.Position < s.Length) { nextWSOccurance = FindNextOccurance(s, (c, s) => Char.IsWhiteSpace(c), out tempstring); s.Position++; } nextWord = tempstring; s.Position = originalPos; return nextWSOccurance; } /// /// Finds and returns the position of the next occurance of the Func returning true. /// /// /// A 'predicate'-like Func /// Returns the string captured while searching for the next char /// static long FindNextOccurance(Stream s, Func p, out string result) { long start = s.Position; StringBuilder sb = new StringBuilder(); bool charFound = false; while (s.Position < s.Length && !charFound) { char nextChar = ReadChar(s); if (nextChar == 0) { charFound = true; } else if (p(nextChar, s)) { charFound = true; } else { sb.Append(nextChar); } } result = sb.ToString(); long newPosition = s.Position; s.Position = start; return newPosition--; } /// /// Finds the next position of the character /// /// /// /// Captures the string read in searching for the character /// static long FindNextOccurance(Stream s, char c, out string result) { return FindNextOccurance(s, (streamChar, s) => streamChar == c, out result); } /// /// Reads the next UTF-8 encoded character in the stream, and advances the stream by the amount of characters read /// /// /// static char ReadChar(Stream s) { // As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected // value. // Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size int readAmount = 0; int firstChar = s.ReadByte(); if (firstChar == -1) { return (char)0; } if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character { readAmount = 3; } else if ((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte { readAmount = 2; } else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte { readAmount = 1; } byte[] charBytes = new byte[readAmount + 1]; charBytes[0] = (byte)firstChar; for (int i = 1; i < readAmount; i++) { int nextChar = s.ReadByte(); if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!"); charBytes[i] = (byte)nextChar; } s.Position += readAmount; string converted = Encoding.UTF8.GetString(charBytes); return converted[0]; } /// /// Reads the next character in the stream, and returns the position to the original position /// /// /// static char PeekChar(Stream s) { long curr = s.Position; char c = ReadChar(s); s.Position = curr; return c; } /// /// Skips whitespace characters /// /// static void SkipWhitespace(Stream s) { char c = PeekChar(s); while (s.Position < s.Length && char.IsWhiteSpace(c)) { s.Position++; c = PeekChar(s); } } static string CenterString(string source, int totalPadding, char paddingChar=' ') { if (source.Length >= totalPadding) return source; int leftHalf = (int)Math.Floor(source.Length / 2.0); int rightHalf = (int)Math.Ceiling(source.Length / 2.0); int leftHalfPad = (int)Math.Floor(totalPadding / 2.0); int rightHalfPad = (int)Math.Ceiling(totalPadding / 2.0); string t = "{0," + leftHalfPad + "}{1," + -1 * rightHalfPad + "}"; string result = string.Format(t, source.Substring(0,leftHalf+1), source.Substring(rightHalf,source.Length-rightHalf)); return result; } #endregion } }