829 lines
35 KiB
C#

using System;
using System.Collections.Generic;
using System.Collections;
using System.IO;
using System.Text;
using System.Collections.ObjectModel;
using System.Runtime.Serialization;
using System.Runtime.InteropServices;
namespace Assignment_1
{
class Program
{
/// <summary>
/// Flags to set object properties.
/// </summary>
[Flags]
enum VariableFlags
{
Empty = 0,
Reserved = 1,
NoPrint = 2,
Static = 4,
Undef = 8
}
static Dictionary<string, Tuple<string, VariableFlags>> Symbols = new()
{
{ "SPACE", new Tuple<string, VariableFlags>(" ", VariableFlags.Reserved | VariableFlags.NoPrint) },
{ "TAB", new Tuple<string, VariableFlags>("\t", VariableFlags.Reserved | VariableFlags.NoPrint) },
{ "NEWLINE", new Tuple<string, VariableFlags>("\n", VariableFlags.Reserved | VariableFlags.NoPrint) },
{ "CARRIAGE_RETURN", new Tuple<string, VariableFlags>("\r", VariableFlags.Reserved | VariableFlags.NoPrint) }
};
/// <summary>
/// Characters that cannot appear in a normal string
/// </summary>
static readonly List<char> ForbiddenChars = new()
{
'$', '\\', '\"', '\''
};
static void Main(string[] args)
{
Console.WriteLine("┌──────────────────────────────────────────┐");
Console.WriteLine("│ 159.341 2021 Semester 1, Assignment 1 │");
Console.WriteLine("│ Submitted by Brychan Dempsey, 14299890 │");
Console.WriteLine("└──────────────────────────────────────────┘");
// Parse the source from the memory stream
MemoryStream sourceStream = new MemoryStream(1024);
Parser parser = new Parser();
bool dynamicInput = false;
// From https://stackoverflow.com/questions/3453220/how-to-detect-if-console-in-stdin-has-been-redirected
// Reading from pipes is equivalent to reading user input, though the input is redirected
if (Console.IsInputRedirected)
{
sourceStream.Write(Encoding.UTF8.GetBytes(Console.In.ReadToEnd()));
sourceStream.Position = 0;
}
else
{
sourceStream.Write(Encoding.UTF8.GetBytes("{ \r\n"));
sourceStream.Position = 0;
dynamicInput = true;
}
parser.StartParsing(sourceStream, dynamicInput);
Console.ReadLine();
}
public class Parser
{
public enum statements
{
exit,
append,
list,
print,
printlength,
printwords,
printwordcount,
set,
reverse,
h,
writeout
}
public void StartParsing(Stream source, bool dynamicInput = false)
{
long initSourceLength = source.Length;
if ((byte)source.ReadByte() == '{')
{
long lastLinePos = 0;
long initPos = 0;
bool cont = false;
while (true)
{
if (dynamicInput)
{
lastLinePos = source.Position;
if (!cont)
{
Console.WriteLine("Enter a command: ");
}
string s = Console.ReadLine();
long pos = source.Position;
source.Write(Encoding.UTF8.GetBytes(s));
source.Position = pos;
}
// parse the statement or list of statements;
// This is done by reading the next word
if (!cont)
{
initPos = source.Position;
}
else
{
source.Position = initPos;
}
SkipWhitespace(source);
long position = FindNextWord(source, out string word);
try
{
if (Enum.TryParse(typeof(statements), word, out object statementType))
{
// By turning the result of the command into an action,
// we can defer processing the final result until the end of this control flow
// I.e. "I don't know what action to do, but I will need it, when I know where this statement ends"
// In some ways, it makes more sense. The action is determined by the interpreter's result
Action result = () => { };
source.Position = position;
switch ((statements)statementType)
{
case statements.exit:
result = Exit(source, initSourceLength);
break;
case statements.append:
result = AppendSet(source);
break;
case statements.list:
result = List();
break;
case statements.print:
result = Print(source, 0);
break;
case statements.printlength:
result = Print(source, 1);
break;
case statements.printwordcount:
result = Print(source, 2);
break;
case statements.printwords:
result = Print(source, 3);
break;
case statements.set:
result = AppendSet(source, false);
break;
case statements.reverse:
result = Reverse(source);
break;
// These are additional helper functions. Thier input gets excluded from the MemoryStream
case statements.h:
Console.WriteLine("Commands are: ");
foreach (var item in Enum.GetValues(typeof(statements)))
{
Console.WriteLine("\t{0}", ((statements)item).ToString());
}
// Ignore these as actual commands
source.Position = initPos;
source.SetLength(initPos);
break;
case statements.writeout:
// Writes the full command history to the stream.
Console.WriteLine("Writing input commands to {0}...");
source.Position = initPos;
source.SetLength(initPos);
break;
}
// Do a check semicolons etc
if (IsNextEoS(source))
{
// Increment the source pos past the semi-colon
cont = false;
source.Position++;
if (dynamicInput)
{
// Nicely format the output stream, so we may print it cleanly
source.WriteByte((byte)'\n');
}
result();
}
else if(source.Position != lastLinePos)
{
// In the case that we expect some more data, we must keep tabs of our current line, and keep accumulating data until we're finished
cont = true;
source.WriteByte((byte)' ');
Console.Write(">");
}
else
{
throw new ParserException("expected a semi-colon", 0, source.Position);
}
}
else
{
// Statement parse failed,
// Ensure stream gets trimmed back to the correct position
Console.WriteLine("Failed parsing statement");
source.Position = initPos;
source.SetLength(initPos);
}
}
// Throwing a parserexception will return us to this point immediately. From here, the line is automatically restored,
// and the excepion printed to the console window.
// This means that each function does not need to keep track of our current position in the stream
catch (ParserException e)
{
if (e.Importance > 3)
{
throw new ApplicationException("A critical error occurred.");
}
if (e.LinePosition > 0)
{
WriteDebugLine(initPos, e.LinePosition, e.Message, source);
}
else
{
Console.WriteLine(e.LinePosition + ": " + e.Message);
source.Position = initPos;
source.SetLength(initPos);
}
}
}
}
}
#region Function Handling
private string ValidateKey(Stream source, bool checkExist)
{
string key;
long keyEndPos = FindIdentifier(source, out key);
if (keyEndPos < 0 || key.Length == 0)
{
throw new ParserException("Could not identify object", 0, source.Position);
}
else if (checkExist && !Symbols.ContainsKey(key))
{
throw new ParserException("Key not found", 0, source.Position);
}
else
{
if (Symbols.ContainsKey(key) && Symbols[key].Item2.HasFlag(VariableFlags.Reserved))
{
throw new ParserException("Cannot assign a value to a reserved constant", 0, keyEndPos - (key.Length + 1));
}
else if (Symbols.ContainsKey(key) && !checkExist)
{
// key already exists, remove it
Symbols.Remove(key);
}
source.Position = keyEndPos;
}
return key;
}
private string ValidateValue(Stream source)
{
string value;
long valuePos = FindExpression(source, out value);
if (valuePos < 0)
{
throw new ParserException("Could not evaluate expression", 0, source.Position);
}
else
{
source.Position = valuePos;
}
return value;
}
/// <summary>
/// Handles the 'append x y [ + z];' case &
/// And the 'set x y [ + z];' case
/// </summary>
/// <param name="source"></param>
/// <returns>An Action that will add the key to the dictionary</returns>
Action AppendSet(Stream source, bool appendMode=true)
{
string key = ValidateKey(source, appendMode);
string value = ValidateValue(source);
if (appendMode)
{
return () => Symbols[key] = new Tuple<string, VariableFlags>(Symbols[key].Item1 + value, Symbols[key].Item2);
}
else
{
return () => Symbols.Add(key, new Tuple<string, VariableFlags>(value, VariableFlags.Empty));
}
}
/// <summary>
/// Creates and prints a list of all defined variables
/// </summary>
/// <param name="printUnprint">List values normally excluded from printing</param>
Action List(bool printUnprint = false)
{
int keyWidth = 10;
int valueWidth = 50;
int flagWidth = 9;
StringBuilder consoleOutput = new StringBuilder();
consoleOutput.Append(string.Format("┌" + new string('─', keyWidth) + "┬" + new string('─', valueWidth) + "┬" + new string('─', flagWidth) + "┐\n"));
consoleOutput.Append(string.Format("│{0}│{1}│{2}│\n", CenterString("Symbol", keyWidth), CenterString("Value", valueWidth), CenterString("Flags", flagWidth)));
// Figure out how many symbols are eligible for printing
List<string> eligibleKeys = new List<string>(Symbols.Count);
foreach (var item in Symbols.Keys)
{
if (!Symbols[item].Item2.HasFlag(VariableFlags.NoPrint) || (Symbols[item].Item2.HasFlag(VariableFlags.NoPrint) && printUnprint))
{
eligibleKeys.Add(item);
}
}
// Control printing based on how many keys are available
if (eligibleKeys.Count > 0)
{
consoleOutput.Append(string.Format("├" + new string('─', keyWidth) + "┼" + new string('─', valueWidth) + "┼" + new string('─', flagWidth) + "┤\n"));
for (int i = 0; i < eligibleKeys.Count; i++)
{
string entryFormat = "│{0," + -1*keyWidth + "}│{1," + -1*valueWidth + "}│{2," + -1*flagWidth + "}│\n";
consoleOutput.Append(string.Format(entryFormat, eligibleKeys[i], Symbols[eligibleKeys[i]].Item1.Replace("\r", "\\r").Replace("\n", "\\n").Replace("\t", "\\t"), Convert.ToString((byte)Symbols[eligibleKeys[i]].Item2, 2).PadLeft(8, '0')));
if (i + 1 < eligibleKeys.Count)
{
consoleOutput.Append(string.Format("├" + new string('─', keyWidth) + "┼" + new string('─', valueWidth) + "┼" + new string('─', flagWidth) + "┤\n"));
}
}
}
consoleOutput.Append(string.Format("└" + new string('─', keyWidth) + "┴" + new string('─', valueWidth) + "┴" + new string('─', flagWidth) + "┘\n"));
return () => Console.WriteLine(consoleOutput.ToString());
}
Action Exit(Stream source, long initialStreamLength)
{
Action exitAction = () =>
{
if (source.Length != initialStreamLength)
{
Console.WriteLine("Commands list has been modified; would you like to save it to a file?");
string commandState = "";
while (commandState.ToLower() != "y" && commandState.ToLower() != "n")
{
Console.Write("Y/n: ");
commandState = Console.ReadLine();
}
if (commandState.ToLower() == "y")
{
Console.WriteLine("Enter an output file (default {0}):", Environment.CurrentDirectory);
string path = Console.ReadLine();
if (path == "")
{
Environment.Exit(0);
}
path = Path.Combine(Environment.CurrentDirectory, path);
// insert the final closing bracket
source.WriteByte((byte)'}');
source.Position = 0;
using (FileStream fs = File.OpenWrite(path))
{
source.CopyTo(fs);
}
source.Close();
}
}
Environment.Exit(0);
};
return exitAction;
}
Action Print(Stream source, int mode=0)
{
StringBuilder outputString = new StringBuilder();
string expression = ValidateValue(source);
if (mode == 0)
{
outputString.Append(expression + Environment.NewLine);
}
else if (mode == 1)
{
outputString.Append("Length of the expression is: ");
outputString.Append(expression.Length + Environment.NewLine);
}
else if (mode >= 2)
{
string[] words = expression.Split(' ');
if (mode == 3)
{
outputString.Append("Wordcount is: ");
outputString.Append(words.Length + Environment.NewLine);
}
else
{
Console.WriteLine("Words are:");
foreach (string word in words)
{
outputString.Append(word + Environment.NewLine);
}
}
}
return () => Console.WriteLine(outputString.ToString());
}
Action Reverse(Stream source)
{
string key = ValidateKey(source, true);
string ToReverse = Symbols[key].Item1;
string[] words = ToReverse.Split(' ');
StringBuilder reversed = new StringBuilder();
for (int i = words.Length-1; i >= 0; i--)
{
reversed.Append(words[i]);
reversed.Append(' ');
}
return () => Symbols[key] = new Tuple<string, VariableFlags>(reversed.ToString(), Symbols[key].Item2);
}
/// <summary>
/// Writes the debug info to the screen in the form:<br/>
/// line read from stream (lineStart) to line end<br/>
/// &lt;whitespace@caratPos&gt; ^ &lt;errorMessage&gt;
/// </summary>
/// <param name="lineStart"></param>
/// <param name="caratPos"></param>
/// <param name="errorMessage"></param>
/// <param name="source"></param>
static void WriteDebugLine(long lineStart, long caratPos, string errorMessage, Stream source)
{
source.Position = lineStart;
string fullLine = GetNextLine(source);
string errorMSG = new string (' ', (caratPos - lineStart) >= 0 ? (int)(caratPos - lineStart):0) + "^ " + errorMessage;
Console.WriteLine(fullLine);
Console.WriteLine(errorMSG);
source.SetLength(source.Position);
}
#endregion
#region Data Handling
// Data Handling
/// <summary>
/// Parses & evaluates the expression from the stream, moving the stream to the end of the last value
/// </summary>
/// <param name="s"></param>
/// <param name="expression"></param>
/// <returns></returns>
long FindExpression(Stream s, out string expression)
{
string result = "";
// iterate through values until we reach either the end of the stream or the end-of-statement
while (s.Position < s.Length && !IsNextEoS(s))
{
if (IsNextEoS(s, '+'))
{
s.Position = FindNextWord(s, out _);
}
else
{
long val = FindValue(s, out string value);
if (val == -1)
{
Console.WriteLine("Could not parse value");
}
s.Position = val;
result += value;
}
}
expression = result;
return s.Position;
}
/// <summary>
/// Checks ahead to see if the next non-whitespace character is the EoS indicator (';')
/// </summary>
/// <param name="s"></param>
/// <param name="EoSChar"></param>
/// <returns>true if the next char is <paramref name="EoSChar"/>, else false</returns>
static bool IsNextEoS(Stream s, char EoSChar = ';')
{
long pos = s.Position;
char readChar = PeekChar(s);
while (readChar != 0 && char.IsWhiteSpace(readChar))
{
readChar = ReadChar(s);
}
s.Position = pos;
if (readChar == EoSChar) return true;
else return false;
}
/// <summary>
/// Finds the next value in the stream
/// </summary>
/// <param name="s"></param>
/// <param name="returnedValue"></param>
/// <returns></returns>
long FindValue(Stream s, out string returnedValue)
{
SkipWhitespace(s);
char result = PeekChar(s);
if (result == '\"')
{
// The first char is a ", i.e. the start of a literal - search as if it were a literal.
return FindLiteral(s, out returnedValue);
}
else
{
long t = FindExistingIdentifier(s, out string keyValue);
// Set the key value to result + this read string
//keyValue = result + keyValue;
if (!Symbols.ContainsKey(keyValue))
{
throw new ParserException("Could not find key: " + keyValue, 0);
}
returnedValue = Symbols[keyValue].Item1;
return t;
}
}
long FindIdentifier(Stream s, out string returnedKey)
{
long wordEnd = FindNextWord(s, out returnedKey);
return wordEnd;
}
long FindExistingIdentifier(Stream s, out string returnedKey)
{
long wordEnd = FindNextWord(s, out string identifier);
if (identifier.Length > 1 && identifier.EndsWith(';'))
{
// Remove the trailing semicolon from the parse & backtrack the identifier length one spot
identifier = identifier.TrimEnd(';');
wordEnd--;
s.Position--;
}
// Lookup the value in the symbol table
returnedKey = identifier;
return wordEnd;
}
/// <summary>
/// Finds the end of the complete literal definition, returning the stream to the original position
/// </summary>
/// <param name="s"></param>
/// <param name="returnedLiteral"></param>
/// <returns></returns>
long FindLiteral(Stream s, out string returnedLiteral)
{
long pos = s.Position;
// Is a literal. Now we must parse until we find the end of the literal
// Remove the first char, if it is a literal definition.
if (PeekChar(s) == '\"') ReadChar(s);
string resultLiteral;
long resultPosition = FindNextOccurance(s, (c, s) =>
{
if (c == '\"')
{
long pos = s.Position--;
if (ReadChar(s) == '\\')
{
// TODO: handle the \\ escape
return false;
}
else
{
return true;
}
}
return false;
}, out resultLiteral);
if (resultPosition > -1)
{
returnedLiteral = resultLiteral;
}
else
{
throw new ParserException("Could not parse the literal");
}
s.Position = pos;
return resultPosition;
}
#endregion
}
#region HelperFunctions
/// <summary>
/// Reads the memory stream as a UTF-8 encoded string until the next occurance of '\n' or '\r\n' (consuming, and excluded)
/// </summary>
/// <param name="s"></param>
/// <returns></returns>
static string GetNextLine(Stream s)
{
string nextLine;
FindNextOccurance(s, '\n', out nextLine);
return nextLine;
}
/// <summary>
/// Finds the end-boundary of the next word in the stream, and returns the stream to the original position
/// </summary>
/// <param name="s"></param>
/// <param name="nextWord"></param>
/// <returns></returns>
static long FindNextWord(Stream s, out string nextWord)
{
StringBuilder newWord = new();
// Record our current position
long start = s.Position;
// Check if the character at the current pos is whitespace, if so, keep advancing until it isn't.
// NB: Whitespace includes carriage returns or line feeds,
// so 'set\r\n
// var
// "expression";
// should be valid
char currentChar = ReadChar(s);
while (s.Position < s.Length && char.IsWhiteSpace(currentChar))
{
currentChar = ReadChar(s);
}
// Add the last read value to the SB
newWord.Append(currentChar);
// Start a second loop, this time checking we're not a whitespace char
while (s.Position < s.Length)
{
currentChar = ReadChar(s);
if (char.IsWhiteSpace(currentChar) || currentChar == ';')
{
s.Position--;
break;
}
else
{
newWord.Append(currentChar);
}
}
nextWord = newWord.ToString();
long endPos = s.Position;
s.Position = start;
return endPos;
}
/// <summary>
/// Finds and returns the position of the next occurance of the Func returning true.
/// </summary>
/// <param name="s"></param>
/// <param name="p">A 'predicate'-like Func</param>
/// <param name="result">Returns the string captured while searching for the next char</param>
/// <returns></returns>
static long FindNextOccurance(Stream s, Func<char, Stream, bool> p, out string result)
{
long start = s.Position;
StringBuilder sb = new StringBuilder();
bool charFound = false;
while (s.Position < s.Length && !charFound)
{
char nextChar = ReadChar(s);
if (nextChar == 0)
{
charFound = true;
}
else if (p(nextChar, s))
{
charFound = true;
}
else
{
sb.Append(nextChar);
}
}
result = sb.ToString();
long newPosition = s.Position;
s.Position = start;
return newPosition--;
}
/// <summary>
/// Finds the next position of the character
/// </summary>
/// <param name="s"></param>
/// <param name="c"></param>
/// <param name="result">Captures the string read in searching for the character</param>
/// <returns></returns>
static long FindNextOccurance(Stream s, char c, out string result)
{
return FindNextOccurance(s, (streamChar, s) => streamChar == c, out result);
}
/// <summary>
/// Reads the next UTF-8 encoded character in the stream, and advances the stream by the amount of characters read
/// </summary>
/// <param name="s"></param>
/// <returns></returns>
static char ReadChar(Stream s)
{
// As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected
// value.
// Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size
int readAmount = 0;
int firstChar = s.ReadByte();
if (firstChar == -1)
{
return (char)0;
}
if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character
{
readAmount = 3;
}
else if ((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte
{
readAmount = 2;
}
else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte
{
readAmount = 1;
}
byte[] charBytes = new byte[readAmount + 1];
charBytes[0] = (byte)firstChar;
for (int i = 1; i < readAmount; i++)
{
int nextChar = s.ReadByte();
if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!");
charBytes[i] = (byte)nextChar;
}
s.Position += readAmount;
string converted = Encoding.UTF8.GetString(charBytes);
return converted[0];
}
/// <summary>
/// Reads the next character in the stream, and returns the position to the original position
/// </summary>
/// <param name="s"></param>
/// <returns></returns>
static char PeekChar(Stream s)
{
long curr = s.Position;
char c = ReadChar(s);
s.Position = curr;
return c;
}
/// <summary>
/// Reads the previous char
/// </summary>
/// <param name="s"></param>
/// <returns></returns>
static char PreviousChar(Stream s)
{
Stack<byte> charBytes = new Stack<byte>(4);
for (int i = 0; i < 4; i++)
{
if (s.Position == 0)
{
break;
}
s.Position--;
byte read = (byte)s.ReadByte();
charBytes.Push(read);
// No longer an UTF-8 extension, last byte is the final
if (read >> 6 != 2) break;
}
string converted = Encoding.UTF8.GetString(charBytes.ToArray());
return converted[0];
}
/// <summary>
/// Skips whitespace characters
/// </summary>
/// <param name="s"></param>
static void SkipWhitespace(Stream s)
{
char c = PeekChar(s);
while (s.Position < s.Length && char.IsWhiteSpace(c))
{
ReadChar(s); // move by the size of that character
c = PeekChar(s);
}
}
static string CenterString(string source, int totalPadding, char paddingChar=' ')
{
if (source.Length >= totalPadding) return source;
int rightHalf = (int)Math.Ceiling(source.Length / 2.0);
int leftHalfPad = (int)Math.Floor(totalPadding / 2.0);
int rightHalfPad = (int)Math.Ceiling(totalPadding / 2.0);
string t = "{0," + leftHalfPad + "}{1," + -1 * rightHalfPad + "}";
string result = string.Format(t, source[..rightHalf], source[rightHalf..]);
return result;
}
#endregion
public class ParserException : Exception
{
/// <summary>
/// Importance is used to signify how the parser should respond to the error.
/// A code of 3 or greater is a critical error; the application will throw the error further up the call and exit.
/// 0 implies the line may be retried.
/// 1 should imply the current block is not valid and should be retried.
/// </summary>
public int Importance = 0;
public long LinePosition = -1;
public ParserException(string message, int importance, long linePos) : base(message)
{
}
public ParserException(string message, int importance) : base(message)
{
}
public ParserException(string message) : base(message)
{
}
}
}
}