2021-03-11 12:56:25 +13:00
using System ;
2021-03-11 16:18:05 +13:00
using System.Collections.Generic ;
2021-03-11 12:56:25 +13:00
using System.IO ;
using System.Text ;
namespace Assignment_1
{
class Program
{
2021-03-12 11:00:29 +13:00
[Flags]
2021-03-11 16:18:05 +13:00
enum VariableFlags
{
Empty = 0 ,
2021-03-11 16:39:15 +13:00
Reserved = 1 ,
NoPrint = 2
2021-03-11 16:18:05 +13:00
}
/// <summary>
/// This captures the end-point of each part of an expression (in the stream), to validate the syntax
/// Optionally also captures the parsed string for each expression.
///
/// For this program, a word is considered to be any non-whitespace value bounded by whitespace or the array boundary.
///
/// </summary>
static Dictionary < string , Tuple < string , VariableFlags > > Symbols = new Dictionary < string , Tuple < string , VariableFlags > >
{
2021-03-11 16:39:15 +13:00
{ "SPACE" , new Tuple < string , VariableFlags > ( " " , VariableFlags . Reserved | VariableFlags . NoPrint ) } ,
{ "TAB" , new Tuple < string , VariableFlags > ( "\t" , VariableFlags . Reserved | VariableFlags . NoPrint ) } ,
{ "NEWLINE" , new Tuple < string , VariableFlags > ( "\n" , VariableFlags . Reserved | VariableFlags . NoPrint ) } ,
{ "CARRIAGE_RETURN" , new Tuple < string , VariableFlags > ( "\r" , VariableFlags . Reserved | VariableFlags . NoPrint ) }
2021-03-11 16:18:05 +13:00
} ;
2021-03-11 12:56:25 +13:00
static void Main ( string [ ] args )
{
2021-03-11 19:22:49 +13:00
Console . WriteLine ( "┌──────────────────────────────────────────┐" ) ;
Console . WriteLine ( "│ 159.341 2021 Semester 1, Assignment 1 │" ) ;
Console . WriteLine ( "│ Submitted by Brychan Dempsey, 14299890 │" ) ;
Console . WriteLine ( "└──────────────────────────────────────────┘" ) ;
2021-03-11 12:56:25 +13:00
MemoryStream sourceStream = new MemoryStream ( 1024 ) ; // Creates a memory stream to retain source while being interpreted.
2021-03-11 16:18:05 +13:00
2021-03-11 12:56:25 +13:00
Parser parser = new Parser ( ) ;
bool dynamicInput = false ;
// From https://stackoverflow.com/questions/3453220/how-to-detect-if-console-in-stdin-has-been-redirected
// Reading from pipes is equivalent to reading user input, though the input is redirected
if ( Console . IsInputRedirected )
{
sourceStream . Write ( Encoding . UTF8 . GetBytes ( Console . In . ReadToEnd ( ) ) ) ;
sourceStream . Position = 0 ;
}
else
{
2021-03-11 19:22:49 +13:00
sourceStream . Write ( Encoding . UTF8 . GetBytes ( "{ \r\n" ) ) ;
2021-03-11 12:56:25 +13:00
sourceStream . Position = 0 ;
dynamicInput = true ;
}
2021-03-11 16:18:05 +13:00
parser . StartParsing ( sourceStream , dynamicInput ) ;
2021-03-11 19:22:49 +13:00
Console . ReadLine ( ) ;
2021-03-11 12:56:25 +13:00
}
public class Parser
{
2021-03-11 16:18:05 +13:00
public enum statements
2021-03-11 12:56:25 +13:00
{
2021-03-11 16:18:05 +13:00
exit ,
append ,
list ,
print ,
printlength ,
printwords ,
printwordcount ,
set ,
reverse
2021-03-11 12:56:25 +13:00
}
2021-03-11 16:18:05 +13:00
public void StartParsing ( Stream source , bool dynamicInput = false )
2021-03-11 12:56:25 +13:00
{
2021-03-11 19:22:49 +13:00
if ( ( byte ) source . ReadByte ( ) = = '{' )
{
while ( true )
{
if ( dynamicInput )
{
Console . WriteLine ( "Enter a command: " ) ;
string s = Console . ReadLine ( ) ;
long pos = source . Position ;
source . Write ( Encoding . UTF8 . GetBytes ( s ) ) ;
source . Position = pos ;
int g = 0 ;
}
// parse the statement or list of statements;
// This is done by reading the next word
SkipWhitespace ( source ) ;
2021-03-12 11:00:29 +13:00
long position = FindNextWord ( source , out string word ) ;
2021-03-11 19:22:49 +13:00
object statementType ;
if ( Enum . TryParse ( typeof ( statements ) , word , out statementType ) )
{
source . Position = position ;
switch ( ( statements ) statementType )
{
case statements . exit :
Exit ( ) ;
break ;
case statements . append :
Append ( source ) ;
break ;
case statements . list :
List ( ) ;
break ;
case statements . print :
Print ( source , 0 ) ;
break ;
case statements . printlength :
Print ( source , 1 ) ;
break ;
case statements . printwordcount :
Print ( source , 2 ) ;
break ;
case statements . printwords :
Print ( source , 3 ) ;
break ;
case statements . set :
Set ( source ) ;
break ;
case statements . reverse :
Reverse ( source ) ;
break ;
}
}
else Console . WriteLine ( "Failed parsing statement" ) ;
}
}
2021-03-11 12:56:25 +13:00
}
2021-03-11 16:18:05 +13:00
#region Function Handling
2021-03-12 11:10:24 +13:00
/// <summary>
/// Handles the append x y case.
///
/// </summary>
/// <param name="source"></param>
/// <param name="lineStart"></param>
/// <returns></returns>
2021-03-12 11:00:29 +13:00
bool Append ( Stream source , long lineStart = - 1 )
2021-03-11 12:56:25 +13:00
{
2021-03-12 11:00:29 +13:00
// If it wasn't explicitly set, assume line starts the length of 'append ' before the current position
if ( lineStart = = - 1 )
{
lineStart = source . Position - "append " . Length ;
}
2021-03-11 16:18:05 +13:00
string key ;
2021-03-12 11:00:29 +13:00
long keyEndPos = FindIdentifier ( source , out key ) ;
if ( keyEndPos < 0 | | ! Symbols . ContainsKey ( key ) )
2021-03-11 12:56:25 +13:00
{
2021-03-11 16:18:05 +13:00
// Error on finding object
2021-03-12 11:00:29 +13:00
if ( lineStart ! = - 1 )
{
source . Position = lineStart ;
}
string fullLine = GetNextLine ( source ) ; // Grab a copy of the line to show the user
// append x
string errorMSG = " ^ could not identify object" ;
Console . WriteLine ( fullLine ) ;
Console . WriteLine ( errorMSG ) ;
source . SetLength ( source . Position ) ;
2021-03-11 16:18:05 +13:00
return false ;
2021-03-11 12:56:25 +13:00
}
else
{
2021-03-12 11:00:29 +13:00
source . Position = keyEndPos ;
2021-03-11 12:56:25 +13:00
}
2021-03-11 16:18:05 +13:00
string value ;
2021-03-12 11:00:29 +13:00
long valuePos = FindValue ( source , out value ) ;
if ( valuePos < 0 )
2021-03-11 12:56:25 +13:00
{
2021-03-12 11:00:29 +13:00
// Error on finding object
source . Position = lineStart ;
string fullLine = GetNextLine ( source ) ; // Grab a copy of the line to show the user
// append x
string errorMSG = new string ( ' ' , ( keyEndPos - source . Position ) > 0 ? ( int ) ( keyEndPos - source . Position ) : "append" . Length ) + "^ could not evaluate value" ;
Console . WriteLine ( fullLine ) ;
Console . WriteLine ( errorMSG ) ;
// Value didn't parse, set stream length to current length (removes excess characters from the stream
// so the next command is parsed correctly)
source . SetLength ( source . Position ) ;
2021-03-11 16:18:05 +13:00
return false ;
2021-03-11 12:56:25 +13:00
}
2021-03-11 16:18:05 +13:00
else
{
2021-03-12 11:00:29 +13:00
source . Position = valuePos ;
2021-03-11 16:18:05 +13:00
}
string eol ;
FindNextWord ( source , out eol ) ;
2021-03-12 11:00:29 +13:00
if ( eol . Length = = 0 | | eol [ 0 ] ! = ';' )
2021-03-11 16:18:05 +13:00
{
2021-03-12 11:00:29 +13:00
// reset our position to the start of this line
source . Position = lineStart ;
string fullLine = GetNextLine ( source ) ; // Grab a copy of the line to show the user
// Align the message carat to the point which could not be parsed
string errorMSG = new string ( ' ' , ( valuePos - source . Position ) > 0 ? ( int ) ( valuePos - source . Position ) : "append" . Length ) + "^ expected a semicolon" ;
Console . WriteLine ( fullLine ) ;
Console . WriteLine ( errorMSG ) ;
source . SetLength ( source . Position ) ;
2021-03-11 16:18:05 +13:00
return false ;
}
2021-03-12 11:00:29 +13:00
if ( Symbols [ key ] . Item2 . HasFlag ( VariableFlags . Reserved ) )
2021-03-11 16:18:05 +13:00
{
2021-03-12 11:00:29 +13:00
// reset our position to the start of this line
source . Position = lineStart ;
string fullLine = GetNextLine ( source ) ; // Grab a copy of the line to show the user
// Align the message carat to the point which could not be parsed
string errorMSG = new string ( ' ' , ( keyEndPos - ( key . Length + 1 ) - source . Position ) > 0 ? ( int ) ( keyEndPos - ( key . Length + 1 ) - source . Position ) : "append" . Length ) + "^ cannot assign a value to a reserved constant" ;
Console . WriteLine ( fullLine ) ;
Console . WriteLine ( errorMSG ) ;
source . SetLength ( source . Position ) ;
2021-03-11 16:18:05 +13:00
return false ;
}
Symbols [ key ] = new Tuple < string , VariableFlags > ( Symbols [ key ] . Item1 + value , Symbols [ key ] . Item2 ) ;
return true ;
2021-03-11 12:56:25 +13:00
}
2021-03-11 16:53:20 +13:00
void List ( )
2021-03-11 16:39:15 +13:00
{
2021-03-11 19:22:49 +13:00
Console . WriteLine ( "┌" + new string ( '─' , 15 ) + "┬" + new string ( '─' , 25 ) + "┬" + new string ( '─' , 9 ) + "┐" ) ;
Console . WriteLine ( "│{0,-15}│{1,-25}│{2,9}│" , "Symbol" , "Value" , "Flags" ) ;
2021-03-11 16:39:15 +13:00
Console . WriteLine ( "├" + new string ( '─' , 15 ) + "┼" + new string ( '─' , 25 ) + "┼" + new string ( '─' , 9 ) + "┤" ) ;
2021-03-11 19:22:49 +13:00
int keyPos = 0 ;
2021-03-11 16:39:15 +13:00
foreach ( var item in Symbols )
{
2021-03-11 19:22:49 +13:00
Console . WriteLine ( "│{0,-15}│{1,-25}│{2,9}│" , item . Key , item . Value . Item1 . Replace ( "\r" , "\\r" ) . Replace ( "\n" , "\\n" ) . Replace ( "\t" , "\\t" ) , Convert . ToString ( ( byte ) item . Value . Item2 , 2 ) . PadLeft ( 8 , '0' ) ) ;
2021-03-11 16:39:15 +13:00
if ( keyPos = = Symbols . Count - 1 )
{
Console . WriteLine ( "└" + new string ( '─' , 15 ) + "┴" + new string ( '─' , 25 ) + "┴" + new string ( '─' , 9 ) + "┘" ) ;
}
else
{
Console . WriteLine ( "├" + new string ( '─' , 15 ) + "┼" + new string ( '─' , 25 ) + "┼" + new string ( '─' , 9 ) + "┤" ) ;
}
2021-03-11 19:22:49 +13:00
keyPos + + ;
2021-03-11 16:39:15 +13:00
}
2021-03-11 16:53:20 +13:00
}
void Exit ( )
{
Environment . Exit ( 0 ) ;
}
bool Print ( Stream source , int mode = 0 )
{
string expression ;
long result = FindExpression ( source , out expression ) ;
if ( result < 0 )
{
// Could not print
return false ;
}
if ( mode = = 0 )
{
Console . WriteLine ( expression ) ;
}
else if ( mode = = 1 )
{
Console . Write ( "Length of the expression is: " ) ;
Console . WriteLine ( expression . Length ) ;
}
else if ( mode > = 2 )
{
string [ ] words = expression . Split ( ' ' ) ;
if ( mode = = 3 )
{
Console . Write ( "Wordcount is: " ) ;
Console . WriteLine ( words . Length ) ;
}
else
{
Console . WriteLine ( "Words are:" ) ;
foreach ( string word in words )
{
Console . WriteLine ( word ) ;
}
}
}
2021-03-11 19:22:49 +13:00
source . Position = result ;
2021-03-11 16:39:15 +13:00
return true ;
}
2021-03-11 17:18:02 +13:00
bool Set ( Stream source )
{
string identifier ;
long resultPos = FindIdentifier ( source , out identifier ) ;
if ( resultPos < 0 )
{
// Couldn't match an identifier
// If ID Doesn't exist, we should make it
return false ;
}
source . Position = resultPos ;
string expression ;
resultPos = FindExpression ( source , out expression ) ;
if ( resultPos < 0 )
{
// Couldn't match expression
return false ;
}
if ( Symbols . ContainsKey ( identifier ) )
{
2021-03-11 19:22:49 +13:00
if ( Symbols [ identifier ] . Item2 . HasFlag ( VariableFlags . Reserved ) )
2021-03-11 17:18:02 +13:00
{
2021-03-11 19:22:49 +13:00
Console . WriteLine ( "Error: Cannot assign to {0} as it is a reserved constant." , identifier ) ;
source . SetLength ( source . Position ) ; // Wipe the remainder of the stream, so that it doesn't get read
2021-03-11 17:18:02 +13:00
return false ;
}
Symbols [ identifier ] = new Tuple < string , VariableFlags > ( expression , Symbols [ identifier ] . Item2 ) ;
}
else
{
Symbols . Add ( identifier , new Tuple < string , VariableFlags > ( expression , VariableFlags . Empty ) ) ;
}
2021-03-11 19:22:49 +13:00
source . Position = resultPos ;
2021-03-11 17:18:02 +13:00
return true ;
}
2021-03-11 17:47:38 +13:00
bool Reverse ( Stream source )
{
string identifier ;
long resultPos = FindIdentifier ( source , out identifier ) ;
if ( resultPos < 0 )
{
// Couldn't match an identifier
// If ID Doesn't exist, we should make it
return false ;
}
string ToReverse = Symbols [ identifier ] . Item1 ;
string [ ] words = ToReverse . Split ( ' ' ) ;
StringBuilder reversed = new StringBuilder ( ) ;
for ( int i = words . Length - 1 ; i < 0 ; i - - )
{
reversed . Append ( words [ i ] ) ;
reversed . Append ( ' ' ) ;
}
Symbols [ identifier ] = new Tuple < string , VariableFlags > ( reversed . ToString ( ) , Symbols [ identifier ] . Item2 ) ;
return true ;
}
2021-03-11 16:18:05 +13:00
#endregion
#region Data Handling
// Data Handling
/// <summary>
/// Parses the expression from the point in the string
/// </summary>
/// <param name="s"></param>
/// <param name="expression"></param>
/// <returns></returns>
long FindExpression ( Stream s , out string expression )
2021-03-11 12:56:25 +13:00
{
2021-03-11 16:18:05 +13:00
// must contain at least one value
string result ;
long wordEnd = FindValue ( s , out result ) ;
while ( true )
{
string nextWord ;
wordEnd = FindNextWord ( s , out nextWord ) ;
if ( wordEnd > 0 & & nextWord = = "+" )
{
s . Position = wordEnd ;
}
else
{
break ;
}
s . Position = wordEnd ;
wordEnd = FindNextWord ( s , out nextWord ) ;
result + = nextWord ;
}
expression = result ;
return wordEnd ;
2021-03-11 12:56:25 +13:00
}
2021-03-11 16:18:05 +13:00
// Most atomic unit is 'value':
/// <summary>
/// Finds the next value in the stream
/// </summary>
/// <param name="s"></param>
/// <param name="returnedValue"></param>
/// <returns></returns>
long FindValue ( Stream s , out string returnedValue )
2021-03-11 12:56:25 +13:00
{
2021-03-11 16:18:05 +13:00
SkipWhitespace ( s ) ;
int result = s . ReadByte ( ) ;
if ( result = = '\"' )
2021-03-11 12:56:25 +13:00
{
2021-03-11 16:18:05 +13:00
return FindLiteral ( s , out returnedValue ) ;
2021-03-11 12:56:25 +13:00
}
2021-03-11 16:18:05 +13:00
else
2021-03-11 12:56:25 +13:00
{
2021-03-11 19:22:49 +13:00
s . Position - - ;
2021-03-11 16:18:05 +13:00
string keyValue ;
2021-03-11 19:22:49 +13:00
long t = FindExistingIdentifier ( s , out keyValue ) ;
2021-03-12 11:00:29 +13:00
if ( ! Symbols . ContainsKey ( keyValue ) )
{
returnedValue = "" ;
return - 1 ;
}
2021-03-11 16:18:05 +13:00
returnedValue = Symbols [ keyValue ] . Item1 ;
return t ;
2021-03-11 12:56:25 +13:00
}
2021-03-11 16:18:05 +13:00
}
long FindIdentifier ( Stream s , out string returnedKey )
2021-03-11 19:22:49 +13:00
{
long wordEnd = FindNextWord ( s , out returnedKey ) ;
return wordEnd ;
}
long FindExistingIdentifier ( Stream s , out string returnedKey )
2021-03-11 16:18:05 +13:00
{
string identifier ;
long wordEnd = FindNextWord ( s , out identifier ) ;
2021-03-12 11:10:24 +13:00
if ( identifier . EndsWith ( ';' ) & & identifier . Length > 1 )
{
// Remove the trailing semicolon from the parse & backtrack the identifier length one spot
identifier = identifier . TrimEnd ( ';' ) ;
wordEnd - - ;
}
2021-03-11 16:18:05 +13:00
// Lookup the value in the symbol table
2021-03-11 19:22:49 +13:00
returnedKey = identifier ;
2021-03-11 16:18:05 +13:00
return wordEnd ;
}
long FindLiteral ( Stream s , out string returnedLiteral )
{
// Is a literal. Now we must parse until we find the end of the literal
string resultLiteral ;
long resultPosition = FindNextOccurance ( s , ( c , s ) = >
2021-03-11 12:56:25 +13:00
{
2021-03-11 16:18:05 +13:00
if ( c = = '\"' )
{
long pos = s . Position - - ;
if ( GetChar ( s ) = = '\\' )
{
// TODO: handle the \\ escape
return false ;
}
else
{
return true ;
}
}
return false ;
} , out resultLiteral ) ;
if ( resultPosition > - 1 )
{
returnedLiteral = resultLiteral ;
2021-03-11 12:56:25 +13:00
}
else
{
2021-03-11 16:18:05 +13:00
returnedLiteral = "" ;
2021-03-11 12:56:25 +13:00
}
2021-03-11 16:18:05 +13:00
return resultPosition ;
2021-03-11 12:56:25 +13:00
}
2021-03-11 16:18:05 +13:00
#endregion
2021-03-11 12:56:25 +13:00
}
2021-03-11 16:18:05 +13:00
/// <summary>
/// Reads the memory stream as a UTF-8 encoded string until the next occurance of '\n' or '\r\n' (consuming, and excluded)
/// </summary>
/// <param name="s"></param>
/// <returns></returns>
static string GetNextLine ( Stream s )
{
string nextLine ;
FindNextOccurance ( s , '\n' , out nextLine ) ;
return nextLine ;
}
/// <summary>
/// Finds the next word in the string
/// </summary>
/// <param name="s"></param>
/// <param name="nextWord"></param>
/// <returns>A value <0 if an error occurred, else the position of the end of the word</returns>
static long FindNextWord ( string s , out string nextWord )
2021-03-11 12:56:25 +13:00
{
// remove whitespace from the start
int wordStart = 0 ;
if ( char . IsWhiteSpace ( s [ 0 ] ) )
{
for ( int i = 0 ; i < s . Length ; i + + )
{
if ( char . IsWhiteSpace ( s [ i ] ) ) break ;
wordStart = i ;
}
}
int wordEnd = wordStart ;
for ( int i = wordEnd ; i < s . Length ; i + + )
{
if ( char . IsWhiteSpace ( s [ i ] ) ) break ;
wordEnd = i ;
}
2021-03-11 16:18:05 +13:00
nextWord = s . Substring ( wordStart , wordEnd ) ;
return wordEnd ;
}
/// <summary>
/// Finds the end-boundary of the next word in the stream
/// </summary>
/// <param name="s"></param>
/// <param name="nextWord"></param>
/// <returns></returns>
static long FindNextWord ( Stream s , out string nextWord )
{
return FindNextOccurance ( s , ( c , s ) = > Char . IsWhiteSpace ( c ) , out nextWord ) ;
}
/// <summary>
/// Finds and returns the position of the next occurance of the Func returning true.
/// </summary>
/// <param name="s"></param>
/// <param name="p"></param>
/// <param name="result"></param>
/// <returns></returns>
static long FindNextOccurance ( Stream s , Func < char , Stream , bool > p , out string result )
{
long start = s . Position ;
StringBuilder sb = new StringBuilder ( ) ;
bool charFound = false ;
while ( ! charFound )
{
char nextChar = GetChar ( s ) ;
2021-03-11 19:22:49 +13:00
if ( nextChar = = 0 )
{
charFound = true ;
}
else if ( p ( nextChar , s ) )
2021-03-11 16:18:05 +13:00
{
/ * if ( c = = '\n' )
{
s . Position - - ;
if ( s . ReadByte ( ) ! = '\r' ) s . Position - - ;
// Avoid capturing the carriage return
} * /
charFound = true ;
}
else
{
sb . Append ( nextChar ) ;
}
}
result = sb . ToString ( ) ;
long newPosition = s . Position ;
s . Position = start ;
2021-03-11 19:22:49 +13:00
return newPosition - - ;
2021-03-11 16:18:05 +13:00
}
/// <summary>
/// Finds the next position of the character
/// </summary>
/// <param name="s"></param>
/// <param name="c"></param>
/// <param name="result">Captures the string read in searching for the character</param>
/// <returns></returns>
static long FindNextOccurance ( Stream s , char c , out string result )
{
return FindNextOccurance ( s , ( streamChar , s ) = > streamChar = = c , out result ) ;
}
static char GetChar ( Stream s )
{
// As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected
// value.
// Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size
int readAmount = 0 ;
int firstChar = s . ReadByte ( ) ;
2021-03-11 19:22:49 +13:00
if ( firstChar = = - 1 )
{
return ( char ) 0 ;
}
2021-03-11 16:18:05 +13:00
if ( ( firstChar > > 3 ) = = 0x1E ) // 11110xxx implies a 4-byte length character
{
readAmount = 3 ;
}
else if ( ( firstChar > > 4 ) = = 0xE ) // 1110xxxx, 3-byte
{
readAmount = 2 ;
}
else if ( ( firstChar > > 5 ) = = 0x6 ) // 110xxxxx, 2-byte
{
readAmount = 1 ;
}
byte [ ] charBytes = new byte [ readAmount + 1 ] ;
charBytes [ 0 ] = ( byte ) firstChar ;
for ( int i = 1 ; i < readAmount ; i + + )
{
int nextChar = s . ReadByte ( ) ;
if ( nextChar > > 6 ! = 2 ) throw new Exception ( "Character is not a valid UTF-8 code point!" ) ;
charBytes [ i ] = ( byte ) nextChar ;
}
2021-03-11 19:22:49 +13:00
s . Position + = readAmount ;
2021-03-11 16:18:05 +13:00
string converted = Encoding . UTF8 . GetString ( charBytes ) ;
return converted [ 0 ] ;
}
static void SkipWhitespace ( Stream s )
{
int readByte = s . ReadByte ( ) ;
while ( readByte > - 1 & & char . IsWhiteSpace ( ( char ) readByte ) )
{
readByte = s . ReadByte ( ) ;
}
2021-03-11 19:22:49 +13:00
s . Position - - ;
2021-03-11 12:56:25 +13:00
}
}
}