/* Dempsey-Jensen, Brychan, 14299890, Assignment 1, 159.341 */
/* Parses and interprets a simple programming language line-by-line */
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Threading;
namespace Assignment_1
{
class Program
{
///
/// Flags to set symbol properties
///
[Flags]
enum VariableFlags
{
Empty = 0,
Reserved = 1,
NoPrint = 2,
Static = 4
}
static readonly int ConsoleWidthLimit = 80;
static void Main(string[] args)
{
Console.WriteLine(CenterString("┌──────────────────────────────────────────┐", ConsoleWidthLimit));
Console.WriteLine(CenterString("│ 159.341 2021 Semester 1, Assignment 1 │", ConsoleWidthLimit));
Console.WriteLine(CenterString("│ Submitted by Brychan Dempsey, 14299890 │", ConsoleWidthLimit));
Console.WriteLine(CenterString("└──────────────────────────────────────────┘", ConsoleWidthLimit));
bool loadedFromFile = false;
bool exit = false;
while (!exit)
{
MemoryStream sourceStream = new MemoryStream(1024);
Parser parser = new Parser();
bool dynamicInput = false;
// From https://stackoverflow.com/questions/3453220/how-to-detect-if-console-in-stdin-has-been-redirected
// Reading from pipes is equivalent to reading user input, but the input is redirected
if (Console.IsInputRedirected || loadedFromFile)
{
// To simplify reading, we read all input bytes from the piped input to the stream.
// Not the best way to do it; we don't need to keep any data that has already been read and parsed successfully.
sourceStream.Write(Encoding.UTF8.GetBytes(Console.In.ReadToEnd()));
Console.In.Dispose();
Console.SetIn(new StreamReader(Console.OpenStandardInput()));
Console.OpenStandardInput();
sourceStream.Position = 0;
}
else
{
sourceStream.Position = 0;
dynamicInput = true;
}
parser.StartParsing(sourceStream, dynamicInput);
Console.WriteLine(Environment.NewLine + new string('─', 40));
Console.WriteLine("\nProgram Parsed Successfully!");
if (Console.IsInputRedirected)
{
Thread.Sleep(3000);
Environment.Exit(0);
}
ConsoleKeyInfo ck = new ConsoleKeyInfo();
while (ck.Key != ConsoleKey.Y && ck.Key != ConsoleKey.N)
{
Console.WriteLine("\nWould you like to parse another program? Y/n:");
ck = Console.ReadKey(true);
}
if (ck.Key == ConsoleKey.N)
{
exit = true;
}
else
{
// Need the logic to prep the next source stream
ck = new ConsoleKeyInfo();
while (ck.Key != ConsoleKey.Y && ck.Key != ConsoleKey.N)
{
Console.WriteLine("\nWould you like to pipe data from a source file? Y/n:");
ck = Console.ReadKey(true);
}
if (ck.Key == ConsoleKey.N)
{
// Set the input to standard input stream
Console.SetIn(Console.In);
loadedFromFile = false;
}
else
{
Console.WriteLine("Enter the source path:");
string sourcePath = Console.ReadLine();
if (File.Exists(sourcePath))
{
try
{
Console.SetIn(File.OpenText(sourcePath));
loadedFromFile = true;
}
catch (Exception e)
{
Console.WriteLine("Encountered an error opening the source file: " + e.Message);
}
}
}
}
}
}
public class Parser
{
Dictionary> Symbols = new Dictionary>()
{
{ "SPACE", new Tuple(" ", VariableFlags.Reserved | VariableFlags.NoPrint) },
{ "TAB", new Tuple("\t", VariableFlags.Reserved | VariableFlags.NoPrint) },
{ "NEWLINE", new Tuple("\n", VariableFlags.Reserved | VariableFlags.NoPrint) },
{ "CARRIAGE_RETURN", new Tuple("\r", VariableFlags.Reserved | VariableFlags.NoPrint) }
};
public enum Statements
{
exit,
append,
list,
print,
printlength,
printwords,
printwordcount,
set,
reverse,
h,
writeout
}
public void StartParsing(Stream source, bool dynamicInput = false)
{
long initSourceLength = source.Length;
long lastLinePos = 0;
long initPos = 0;
bool cont = false;
while (true)
{
if (dynamicInput)
{
lastLinePos = source.Position;
if (!cont)
{
Console.WriteLine("Enter a command: ");
}
string s = Console.ReadLine();
long pos = source.Position;
source.Write(Encoding.UTF8.GetBytes(s));
source.Position = pos;
}
// parse the statement or list of statements;
// This is done by reading the next word
if (!cont)
{
initPos = source.Position;
}
else
{
source.Position = initPos;
}
SkipWhitespace(source);
long position = FindNextWord(source, out string word);
try
{
if (Enum.TryParse(typeof(Statements), word, out object statementType))
{
// By turning the result of the command into an action,
// we can defer processing the final result until the end of this control flow
Action result = () => { };
source.Position = position;
switch ((Statements)statementType)
{
case Statements.exit:
result = Exit(source, initSourceLength, dynamicInput);
break;
case Statements.append:
result = AppendSet(source);
break;
case Statements.list:
long pos = FindNextWord(source, out string nextWord);
if (nextWord == "all")
{
source.Position = pos;
result = List(true);
}
else
{
result = List();
}
break;
case Statements.print:
result = Print(source, 0);
break;
case Statements.printlength:
result = Print(source, 1);
break;
case Statements.printwords:
result = Print(source, 2);
break;
case Statements.printwordcount:
result = Print(source, 3);
break;
case Statements.set:
result = AppendSet(source, false);
break;
case Statements.reverse:
result = Reverse(source);
break;
// These are additional helper functions. Thier input gets excluded from the MemoryStream
case Statements.h:
Console.WriteLine("Commands are: ");
foreach (var item in Enum.GetValues(typeof(Statements)))
{
Console.WriteLine("\t{0}", ((Statements)item).ToString());
}
// Ignore these as actual commands
source.Position = initPos;
source.SetLength(initPos);
break;
}
// Do a check semicolons etc
if (IsNextEoS(source))
{
// Increment the source pos past the semi-colon
cont = false;
source.Position++;
if (dynamicInput)
{
// Nicely format the output stream, so we may print it cleanly
source.WriteByte((byte)'\n');
}
result();
if (((Statements)statementType).Equals(Statements.exit))
{
return;
}
}
else if (source.Position != lastLinePos)// - 1)
{
// In the case that we expect some more data, we must keep tabs of our current line, and keep accumulating data until we're finished
lastLinePos = source.Position;
cont = true;
source.WriteByte((byte)' ');
Console.Write(">");
}
else
{
throw new ParserException("expected a semi-colon", 0, source.Position);
}
}
else
{
throw new ParserException("Failed parsing statement", 0, source.Position);
}
}
// Throwing a parserexception will return us to this point immediately. From here, the line is automatically restored,
// and the excepion printed to the console window.
// This means that each function does not need to keep track of our current position in the stream
catch (ParserException e)
{
if (e.Importance > 3)
{
throw new ApplicationException("A critical error occurred.");
}
if (e.LinePosition > 0)
{
WriteDebugLine(initPos, e.LinePosition, e.Message, source);
}
else
{
Console.WriteLine(e.LinePosition + ": " + e.Message);
source.Position = initPos;
source.SetLength(initPos);
}
if (!dynamicInput)
{
Environment.Exit(-1);
}
}
}
}
#region Function Handling
///
/// Checks if the next expression in the source meets the requirements of being a key,
/// and optionally verify that key exists.
/// Also contracts the key is not reserved or constant
///
///
///
///
private string ValidateKey(Stream source, bool checkExist)
{
long keyEndPos = FindIdentifier(source, out string key);
if (keyEndPos < 0 || key.Length == 0)
{
throw new ParserException("Could not identify object", 0, source.Position);
}
else if (checkExist && !Symbols.ContainsKey(key))
{
throw new ParserException("Key not found", 0, source.Position);
}
else if (Symbols.ContainsKey(key) && Symbols[key].Item2.HasFlag(VariableFlags.Reserved))
{
throw new ParserException("Cannot assign a value to a reserved constant", 0, keyEndPos - (key.Length + 1));
}
else
{
int indx = Array.FindIndex(key.ToCharArray(), (c) => (c > 122 || c > 90 && c < 97 && c != '_' || c > 57 && c < 65 || c < 48)); // If the overall result is good, move until one isn't
if (indx > -1)
{
throw new ParserException(string.Format("Character \'{0}\' is not valid for an identifier",key[indx]), 0, keyEndPos-key.Length + indx);
}
source.Position = keyEndPos;
}
return key;
}
///
/// Checks if the next expression meets the requirements of being a value
///
///
///
private string ValidateValue(Stream source)
{
long valuePos = FindExpression(source, out string value);
if (valuePos < 0)
{
throw new ParserException("Could not evaluate expression", 0, source.Position);
}
else
{
source.Position = valuePos;
}
return value;
}
///
/// Handles the 'append x y [ + z];' case &
/// And the 'set x y [ + z];' case
///
///
/// An Action that will add the key to the dictionary
Action AppendSet(Stream source, bool appendMode = true)
{
string key = ValidateKey(source, appendMode);
string value = ValidateValue(source);
if (appendMode)
{
return () => Symbols[key] = new Tuple(Symbols[key].Item1 + value, Symbols[key].Item2);
}
else
{
if (Symbols.ContainsKey(key))
{
return () => Symbols[key] = new Tuple(value, Symbols[key].Item2);
}
else
{
return () => Symbols.Add(key, new Tuple(value, VariableFlags.Empty));
}
}
}
///
/// Creates and prints a nicely formatted table of all values
///
/// List values normally excluded from printing
Action List(bool printUnprint = false)
{
int flagWidth = Math.Max(Enum.GetNames(typeof(VariableFlags)).Length, "Flags".Length);
int keyWidth = (int)((ConsoleWidthLimit - flagWidth) * 0.2); // 20% - flag width
int valueWidth = (int)((ConsoleWidthLimit - flagWidth) * 0.8); // 80% - flag width
StringBuilder consoleOutput = new StringBuilder();
consoleOutput.Append(string.Format("┌" + new string('─', keyWidth) + "┬" + new string('─', valueWidth) + "┬" + new string('─', flagWidth) + "┐\n"));
consoleOutput.Append(string.Format("│{0}│{1}│{2}│\n", CenterString("Symbol", keyWidth), CenterString("Value", valueWidth), CenterString("Flags", flagWidth)));
List eligibleKeys = new List(Symbols.Count);
foreach (var item in Symbols.Keys)
{
if (!Symbols[item].Item2.HasFlag(VariableFlags.NoPrint) || (Symbols[item].Item2.HasFlag(VariableFlags.NoPrint) && printUnprint))
{
eligibleKeys.Add(item);
}
}
if (eligibleKeys.Count > 0)
{
consoleOutput.Append(string.Format("├" + new string('─', keyWidth) + "┼" + new string('─', valueWidth) + "┼" + new string('─', flagWidth) + "┤\n"));
for (int i = 0; i < eligibleKeys.Count; i++)
{
string entryFormat = "│{0," + -1 * keyWidth + "}│{1," + -1 * valueWidth + "}│{2," + -1 * flagWidth + "}│\n";
List keyLines = GetStringLines(eligibleKeys[i], keyWidth);
List valueLines = GetStringLines(Symbols[eligibleKeys[i]].Item1.Replace("\r", "\\r").Replace("\n", "\\n").Replace("\t", "\\t"), valueWidth);
for (int j = 0; j < (keyLines.Count > valueLines.Count ? keyLines.Count : valueLines.Count); j++)
{
consoleOutput.Append(string.Format(entryFormat, j >= keyLines.Count ? "" : keyLines[j], j >= valueLines.Count ? "" : valueLines[j], j == 0 ? Convert.ToString((byte)Symbols[eligibleKeys[i]].Item2, 2).PadLeft(flagWidth, '0'): ""));
}
if (i + 1 < eligibleKeys.Count)
{
consoleOutput.Append(string.Format("├" + new string('─', keyWidth) + "┼" + new string('─', valueWidth) + "┼" + new string('─', flagWidth) + "┤\n"));
}
}
}
consoleOutput.Append(string.Format("└" + new string('─', keyWidth) + "┴" + new string('─', valueWidth) + "┴" + new string('─', flagWidth) + "┘\n"));
return () => Console.WriteLine(consoleOutput.ToString());
}
Action Exit(Stream source, long initialStreamLength, bool isDynamicInput=false)
{
void exitAction()
{
if (source.Length != initialStreamLength && isDynamicInput)
{
Console.WriteLine("Commands list has been modified; would you like to save it to a file?");
string commandState = "";
while (commandState.ToLower() != "y" && commandState.ToLower() != "n")
{
Console.Write("Y/n: ");
commandState = Console.ReadLine();
}
if (commandState.ToLower() == "y")
{
Console.WriteLine("Enter an output file (default {0}):", Environment.CurrentDirectory);
string path = Console.ReadLine();
if (path != "")
{
path = Path.Combine(Environment.CurrentDirectory, path);
source.Position = 0;
using (FileStream fs = File.OpenWrite(path))
{
source.CopyTo(fs);
}
source.Close();
}
}
}
}
return exitAction;
}
Action Print(Stream source, int mode = 0)
{
StringBuilder outputString = new StringBuilder();
string expression = ValidateValue(source);
if (mode == 0)
{
outputString.Append(expression + Environment.NewLine);
}
else if (mode == 1)
{
outputString.Append("Length of the expression is: ");
outputString.Append(expression.Length + Environment.NewLine);
}
else if (mode >= 2)
{
string[] words = expression.Split(' ');
if (mode == 3)
{
outputString.Append("Wordcount is: ");
outputString.Append(words.Length + Environment.NewLine);
}
else
{
Console.WriteLine("Words are:");
foreach (string word in words)
{
outputString.Append(word + Environment.NewLine);
}
}
}
return () => Console.WriteLine(outputString.ToString());
}
Action Reverse(Stream source)
{
string key = ValidateKey(source, true);
string ToReverse = Symbols[key].Item1;
string[] words = ToReverse.Split(' ');
StringBuilder reversed = new StringBuilder();
for (int i = words.Length - 1; i >= 0; i--)
{
reversed.Append(words[i]);
reversed.Append(' ');
}
return () => Symbols[key] = new Tuple(reversed.ToString(), Symbols[key].Item2);
}
///
/// Writes the debug info to the screen in the form:
/// line read from stream (lineStart) to line end
/// <whitespace@caratPos> ^ <errorMessage>
///
///
///
///
///
static void WriteDebugLine(long lineStart, long caratPos, string errorMessage, Stream source)
{
source.Position = lineStart;
string fullLine = GetNextLine(source);
string errorMSG = new string(' ', (caratPos - lineStart) >= 0 ? (int)(caratPos - lineStart) : 0) + "^ " + errorMessage;
Console.WriteLine(fullLine);
Console.WriteLine(errorMSG);
source.Position = lineStart;
source.SetLength(source.Position);
}
#endregion
#region Data Handling
///
/// Parses & evaluates the expression from the stream, moving the stream to the end of the last value
///
///
///
///
long FindExpression(Stream s, out string expression)
{
string result = "";
// iterate through values until we reach either the end of the stream or the end-of-statement
bool IsAppendSet = true;
while (s.Position < s.Length && !IsNextEoS(s))
{
if (IsNextEoS(s, '+'))
{
s.Position = FindNextWord(s, out _);
IsAppendSet = true;
}
else
{
long val = FindValue(s, out string value);
if (val == -1)
{
Console.WriteLine("Could not parse value");
}
if (IsAppendSet)
{
s.Position = val;
result += value;
IsAppendSet = false;
}
else
{
throw new ParserException("Append operator not set", 0, s.Position);
}
}
}
expression = result;
return s.Position;
}
///
/// Checks ahead to see if the next non-whitespace character is the EoS indicator (';')
///
///
///
/// true if the next char is , else false
static bool IsNextEoS(Stream s, char EoSChar = ';')
{
long pos = s.Position;
char readChar = PeekChar(s);
while (readChar != 0 && char.IsWhiteSpace(readChar))
{
readChar = ReadChar(s);
}
s.Position = pos;
if (readChar == EoSChar) return true;
else return false;
}
///
/// Finds the next value in the stream
///
///
///
///
long FindValue(Stream s, out string returnedValue)
{
SkipWhitespace(s);
char result = PeekChar(s);
if (result == '\"')
{
// The first char is a ", i.e. the start of a literal - search as if it were a literal.
return FindLiteral(s, out returnedValue);
}
else
{
long t = FindExistingIdentifier(s, out string keyValue);
// Set the key value to result + this read string
//keyValue = result + keyValue;
if (!Symbols.ContainsKey(keyValue))
{
throw new ParserException("Could not find key: " + keyValue, 0, s.Position);
}
returnedValue = Symbols[keyValue].Item1;
return t;
}
}
static long FindIdentifier(Stream s, out string returnedKey)
{
long wordEnd = FindNextWord(s, out returnedKey);
return wordEnd;
}
static long FindExistingIdentifier(Stream s, out string returnedKey)
{
long wordEnd = FindNextWord(s, out string identifier);
if (identifier.Length > 1 && identifier.EndsWith(';'))
{
// Remove the trailing semicolon from the parse & backtrack the identifier length one spot
identifier = identifier.TrimEnd(';');
wordEnd--;
s.Position--;
}
// Lookup the value in the symbol table
returnedKey = identifier;
return wordEnd;
}
///
/// Finds the end of the complete literal definition, returning the stream to the original position
///
///
///
///
static long FindLiteral(Stream s, out string returnedLiteral)
{
long pos = s.Position;
// Is a literal. Now we must parse until we find the end of the literal
// Remove the first char, if it is a literal definition.
if (PeekChar(s) == '\"') ReadChar(s);
long resultPosition = FindNextOccurance(s, (c, s) =>
{
if (c == '\"')
{
long pos = s.Position--;
if (ReadChar(s) == '\\')
{
// TODO: handle the \\ escape
return false;
}
else
{
return true;
}
}
return false;
}, out string resultLiteral);
if (resultPosition > -1)
{
returnedLiteral = resultLiteral;
}
else
{
throw new ParserException("Could not parse the literal", 0, s.Position);
}
s.Position = pos;
return resultPosition;
}
#endregion
}
#region HelperFunctions
///
/// Reads the memory stream as a UTF-8 encoded string until the next occurance of '\n' or '\r\n' (consuming, and excluded)
///
///
///
static string GetNextLine(Stream s)
{
FindNextOccurance(s, '\n', out string nextLine);
return nextLine;
}
///
/// Finds the end-boundary of the next word in the stream, and returns the stream to the original position
///
///
///
///
static long FindNextWord(Stream s, out string nextWord)
{
StringBuilder newWord = new StringBuilder();
// Record our current position
long start = s.Position;
// Check if the character at the current pos is whitespace, if so, keep advancing until it isn't.
// NB: Whitespace includes carriage returns and line feeds,
// so 'set\r\n
// var
// "expression";
// should be valid
char currentChar = ReadChar(s);
while (s.Position < s.Length && char.IsWhiteSpace(currentChar))
{
currentChar = ReadChar(s);
}
// Add the last read value to the SB
newWord.Append(currentChar);
// Start a second loop, this time checking we're not a whitespace char
while (s.Position < s.Length)
{
currentChar = ReadChar(s);
if (char.IsWhiteSpace(currentChar) || currentChar == ';')
{
s.Position--;
break;
}
else
{
newWord.Append(currentChar);
}
}
nextWord = newWord.ToString();
long endPos = s.Position;
s.Position = start;
return endPos;
}
///
/// Finds and returns the position of the next occurance of the Func returning true.
///
///
/// A 'predicate'-like Func
/// Returns the string captured while searching for the next char
///
static long FindNextOccurance(Stream s, Func p, out string result)
{
long start = s.Position;
StringBuilder sb = new StringBuilder();
bool charFound = false;
while (s.Position < s.Length && !charFound)
{
char nextChar = ReadChar(s);
if (nextChar == 0)
{
charFound = true;
}
else if (p(nextChar, s))
{
charFound = true;
}
else
{
sb.Append(nextChar);
}
}
result = sb.ToString();
long newPosition = s.Position;
s.Position = start;
return newPosition--;
}
///
/// Finds the next position of the character
///
///
///
/// Captures the string read in searching for the character
///
static long FindNextOccurance(Stream s, char c, out string result)
{
return FindNextOccurance(s, (streamChar, s) => streamChar == c, out result);
}
///
/// Reads the next UTF-8 encoded character in the stream, and advances the stream by the amount of characters read
///
///
///
static char ReadChar(Stream s)
{
// As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected
// value.
// Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size
int readAmount = 0;
int firstChar = s.ReadByte();
if (firstChar == -1)
{
return (char)0;
}
if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character
{
readAmount = 3;
}
else if ((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte
{
readAmount = 2;
}
else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte
{
readAmount = 1;
}
byte[] charBytes = new byte[readAmount + 1];
charBytes[0] = (byte)firstChar;
for (int i = 1; i < readAmount; i++)
{
int nextChar = s.ReadByte();
if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!");
charBytes[i] = (byte)nextChar;
}
s.Position += readAmount;
string converted = Encoding.UTF8.GetString(charBytes);
return converted[0];
}
///
/// Reads the next character in the stream, and returns the position to the original position
///
///
///
static char PeekChar(Stream s)
{
long curr = s.Position;
char c = ReadChar(s);
s.Position = curr;
return c;
}
///
/// Reads the previous char
///
///
///
static char PreviousChar(Stream s)
{
Stack charBytes = new Stack(4);
for (int i = 0; i < 4; i++)
{
if (s.Position == 0)
{
break;
}
s.Position--;
byte read = (byte)s.ReadByte();
charBytes.Push(read);
// No longer an UTF-8 extension, last byte is the final
if (read >> 6 != 2) break;
}
string converted = Encoding.UTF8.GetString(charBytes.ToArray());
return converted[0];
}
///
/// Skips whitespace characters
///
///
static void SkipWhitespace(Stream s)
{
char c = PeekChar(s);
while (s.Position < s.Length && char.IsWhiteSpace(c))
{
ReadChar(s); // move by the size of that character
c = PeekChar(s);
}
}
static string CenterString(string source, int totalPadding, char paddingChar = ' ')
{
if (source.Length >= totalPadding) return source;
int rightHalf = (int)Math.Ceiling(source.Length / 2.0);
int leftHalfPad = (int)Math.Floor(totalPadding / 2.0);
int rightHalfPad = (int)Math.Ceiling(totalPadding / 2.0);
string t = "{0," + leftHalfPad + "}{1," + -1 * rightHalfPad + "}";
string result = string.Format(t, source[..rightHalf], source[rightHalf..]);
return result;
}
static List GetStringLines(string source, int maxWidth)
{
List lines = new List();
int j = 0;
while (j < source.Length)
{
int max = j + maxWidth <= source.Length ? j + maxWidth : source.Length;
lines.Add(source[j..max]);
j = max;
}
return lines;
}
#endregion
public class ParserException : Exception
{
///
/// Importance is used to signify how the parser should respond to the error.
/// A code of 3 or greater is a critical error; the application will throw the error further up the call and exit.
/// 0 implies the line may be retried.
/// 1 should imply the current block is not valid and should be retried.
///
public int Importance = 0;
public long LinePosition = -1;
public ParserException(string message, int importance, long linePos) : base(message)
{
Importance = importance;
LinePosition = linePos;
}
}
}
}