Friday, April 16, 2010

Objective-C to Lua translator

Supposing, of course, that speculation is correct and iPhone developers will be prohibited from originating their apps using any language other than C, C++, and Objective-C, it's only natural that one should translate those languages to the language of one's choice. I've spent a few hours trying to do just that. Naturally, I used Lua's excellent LPEG library, and googled for bits of Objective-C grammar.

Here's the input to the translator:

// This is a comment
#import "foobar"

int i = 1;

-(void)mySelector:(int)count
{
    int foo = 3;
    int bar;
}

The results of the translation are:

-- This is a comment
 require "foobar"
 local i = 1; function mySelector(count) local foo = 3; local bar; end 


The Lua code is:

require 're'

parser = re.compile[[
 S <- <externaldeclaration>*
 WS <- [ %nl\t]+
 NWS <- [ %nl\t]*
 IDENTIFIER <- ([%a$_] [%a$_%d]*)
 LINECOMMENT <- '//' {[^%nl]* %nl} -> '--%1'
 COMMENT <- '/*' <CLOSECOMMENT> -> '--'
 COMMENTMIDDLE <- {.} <CLOSECOMMENT> -> '%1'
 CLOSECOMMENT <- '*/' / <COMMENTMIDDLE>
 IMPORT <- ('#import' <WS> <filespecification>) / ('#include' <WS> <filespecification>)
 filespecification <- (["<] {[%a%d%s/_]+} [">]{%nl}) -> 'require "%1"%2'
 preprocessordeclaration <- <IMPORT> 
 identifier <- <IDENTIFIER>
 DECIMALLITERAL <- {%d+} <IntegerTypeSuffix>? -> '%1'
 IntegerTypeSuffix <- [uUlL]
 STRINGLITERAL <- { ('"' ( <EscapeSequence> / [^\"] )* '%') } -> '%1'
 EscapeSequence <- '\' [btnfr"'\] / <OctalEscape>
 OctalEscape <- ('\' [0-3] [0-7] [0-7]) / ('\' [0-7] [0-7]) / ('%' [0-7])
 constant <- <DECIMALLITERAL>

 externaldeclaration <-
  <WS>
  / <COMMENT> 
  / <LINECOMMENT> 
  / <preprocessordeclaration>
  / <declaration>
  / <instancemethoddefinition>

 typespecifier <- 
  ('void' / 'char' / 'short' / 'int' / 'long' / 'float' / 'double' / 'signed' / 'unsigned' 
  / <identifier>)

 declaration <- ( <declarationspecifiers> <NWS> <initdeclaratorlist>? <NWS> ';' <NWS> )
 declarationspecifiers <- <typespecifier>

 initdeclaratorlist <- <declarator>

 declarator <- <directdeclarator>

 directdeclarator <- {<identifier> <NWS> ( '=' <NWS> <primaryexpression> )? <NWS> } -> "local %1;"

 expression <- (<assignmentexpression> <NWS> (',' <NWS> <assignmentexpression> <NWS> )* )

 assignmentexpression <- <conditionalexpression> ( <NWS> <assignmentoperator> <NWS>  <assignmentexpression>)?

 assignmentoperator <- ('=' / '*=' / '/=' / '%=' / '+=' / '-=' / '<<=' / '>>=' / '&=' / '^=' / '|=') <NWS>

 conditionalexpression <- <logicalorexpression> <NWS> ('?' <NWS> <logicalorexpression> <NWS> ':' <NWS> <logicalorexpression> <NWS>)?

 constantexpression <- <conditionalexpression>

 logicalorexpression <- <logicalandexpression> <NWS> 
   ('||' <NWS> <logicalandexpression> <NWS>)*

 logicalandexpression <- <inclusiveorexpression> <NWS> 
   ('&&' <NWS>  <inclusiveorexpression> <NWS>)*

 inclusiveorexpression <- <exclusiveorexpression> <NWS> 
   ('|' <NWS> <exclusiveorexpression> <NWS>)*

 exclusiveorexpression <- <andexpression> <NWS>  ('^' <NWS>  <andexpression> <NWS>)*

 andexpression <- <equalityexpression> <NWS> ('&' <NWS> <equalityexpression> <NWS>)*

 equalityexpression <- <relationalexpression> <NWS>
   (('!=' / '==') <NWS> <relationalexpression> <NWS>)*

 relationalexpression <- <shiftexpression> <NWS>
  (('<' / '>' / '<=' / '>=') <NWS> <shiftexpression> <NWS>)*

 shiftexpression <- <additiveexpression> <NWS> (('<<' / '>>') <NWS> <additiveexpression> <NWS>)*

 additiveexpression <- <multiplicativeexpression> <NWS> 
   (('+' / '-') <NWS> <multiplicativeexpression> <NWS>)*

 multiplicativeexpression <- <castexpression> <NWS> 
   (('*' / '/' / '%') <NWS> <castexpression> <NWS>)*

 castexpression <- ('(' <NWS> <typename> <NWS> ')' <NWS> <castexpression>) / (<unaryexpression> <NWS>)

 unaryexpression <- <postfixexpression>
   / ('++' <NWS> <unaryexpression> <NWS> )
   / ('--' <NWS> <unaryexpression> <NWS> )
   / (<unaryoperator> <NWS> <castexpression> <NWS> )
   / ('sizeof' <NWS> ( ('(' <NWS> <typename> <NWS> ')') / (<unaryexpression> <NWS>) ) )

 unaryoperator <- ( ( '&' / '*' / '-' / '~' / '!' ) <NWS>)

 postfixexpression <- <primaryexpression>
   ( ('[' <NWS> <expression> <NWS> ']' <NWS>)
   / ('.' <NWS> <identifier> <NWS>)
   / ('->' <NWS> <identifier> <NWS>)
   / ('++' <NWS>)
   / ('--' <NWS>)
   )*

 primaryexpression <-
  ( <IDENTIFIER> <NWS> )
  / ( <constant> <NWS> )
  / ( <STRINGLITERAL> <NWS> )

 instancemethoddefinition <- ('-' <NWS> <methoddefinition> <NWS>)
   
 methoddefinition <- (<methodtype>)? <methodselector> <compoundstatement>
 
 methodselector <- (<keyworddeclarator>+ / <plainselector>)
 plainselector <- {<selector>} <NWS> -> 'function %1()'

 selector <- <IDENTIFIER>

 methodtype <- '(' <NWS> <typename> <NWS> ')' <NWS>

 typename <- (<typespecifier>)
 keyworddeclarator <- ( { <selector> } <NWS> ':' <NWS> <parameter> <NWS> ) -> 'function %1(%2)'

 parameter <- <methodtype> {<IDENTIFIER>}
 
 compoundstatement <- '{' <NWS> (<declaration>)* <NWS> <statementlist>? <NWS> '}' <NWS> -> 'end'

 statementlist <- (<statement>)+ 

 statement <- <WS>
  / ';'
  / (<expression> <NWS> ';')
  / <compoundstatement>
  / <selectionstatement>

 selectionstatement
  <- ('if' <NWS> '(' <NWS> <expression> <NWS> ')' <statement> <NWS> ('else' <NWS> <statement>)? <NWS> )
]]

print(parser:match[[
// This is a comment
#import "foobar"

int i = 1;

-(void)mySelector:(int)count
{
    int foo = 3;
    int bar;
}
]]
)



No comments: