lexer.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. // Copyright 2017 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. package asm
  17. import (
  18. "fmt"
  19. "os"
  20. "strings"
  21. "unicode"
  22. "unicode/utf8"
  23. )
  24. // stateFn is used through the lifetime of the
  25. // lexer to parse the different values at the
  26. // current state.
  27. type stateFn func(*lexer) stateFn
  28. // token is emitted when the lexer has discovered
  29. // a new parsable token. These are delivered over
  30. // the tokens channels of the lexer
  31. type token struct {
  32. typ tokenType
  33. lineno int
  34. text string
  35. }
  36. // tokenType are the different types the lexer
  37. // is able to parse and return.
  38. type tokenType int
  39. const (
  40. eof tokenType = iota // end of file
  41. lineStart // emitted when a line starts
  42. lineEnd // emitted when a line ends
  43. invalidStatement // any invalid statement
  44. element // any element during element parsing
  45. label // label is emitted when a label is found
  46. labelDef // label definition is emitted when a new label is found
  47. number // number is emitted when a number is found
  48. stringValue // stringValue is emitted when a string has been found
  49. Numbers = "1234567890" // characters representing any decimal number
  50. HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal
  51. Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
  52. )
  53. // String implements stringer
  54. func (it tokenType) String() string {
  55. if int(it) > len(stringtokenTypes) {
  56. return "invalid"
  57. }
  58. return stringtokenTypes[it]
  59. }
  60. var stringtokenTypes = []string{
  61. eof: "EOF",
  62. invalidStatement: "invalid statement",
  63. element: "element",
  64. lineEnd: "end of line",
  65. lineStart: "new line",
  66. label: "label",
  67. labelDef: "label definition",
  68. number: "number",
  69. stringValue: "string",
  70. }
  71. // lexer is the basic construct for parsing
  72. // source code and turning them in to tokens.
  73. // Tokens are interpreted by the compiler.
  74. type lexer struct {
  75. input string // input contains the source code of the program
  76. tokens chan token // tokens is used to deliver tokens to the listener
  77. state stateFn // the current state function
  78. lineno int // current line number in the source file
  79. start, pos, width int // positions for lexing and returning value
  80. debug bool // flag for triggering debug output
  81. }
  82. // lex lexes the program by name with the given source. It returns a
  83. // channel on which the tokens are delivered.
  84. func Lex(source []byte, debug bool) <-chan token {
  85. ch := make(chan token)
  86. l := &lexer{
  87. input: string(source),
  88. tokens: ch,
  89. state: lexLine,
  90. debug: debug,
  91. }
  92. go func() {
  93. l.emit(lineStart)
  94. for l.state != nil {
  95. l.state = l.state(l)
  96. }
  97. l.emit(eof)
  98. close(l.tokens)
  99. }()
  100. return ch
  101. }
  102. // next returns the next rune in the program's source.
  103. func (l *lexer) next() (rune rune) {
  104. if l.pos >= len(l.input) {
  105. l.width = 0
  106. return 0
  107. }
  108. rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
  109. l.pos += l.width
  110. return rune
  111. }
  112. // backup backsup the last parsed element (multi-character)
  113. func (l *lexer) backup() {
  114. l.pos -= l.width
  115. }
  116. // peek returns the next rune but does not advance the seeker
  117. func (l *lexer) peek() rune {
  118. r := l.next()
  119. l.backup()
  120. return r
  121. }
  122. // ignore advances the seeker and ignores the value
  123. func (l *lexer) ignore() {
  124. l.start = l.pos
  125. }
  126. // Accepts checks whether the given input matches the next rune
  127. func (l *lexer) accept(valid string) bool {
  128. if strings.ContainsRune(valid, l.next()) {
  129. return true
  130. }
  131. l.backup()
  132. return false
  133. }
  134. // acceptRun will continue to advance the seeker until valid
  135. // can no longer be met.
  136. func (l *lexer) acceptRun(valid string) {
  137. for strings.ContainsRune(valid, l.next()) {
  138. }
  139. l.backup()
  140. }
  141. // acceptRunUntil is the inverse of acceptRun and will continue
  142. // to advance the seeker until the rune has been found.
  143. func (l *lexer) acceptRunUntil(until rune) bool {
  144. // Continues running until a rune is found
  145. for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() {
  146. if i == 0 {
  147. return false
  148. }
  149. }
  150. return true
  151. }
  152. // blob returns the current value
  153. func (l *lexer) blob() string {
  154. return l.input[l.start:l.pos]
  155. }
  156. // Emits a new token on to token channel for processing
  157. func (l *lexer) emit(t tokenType) {
  158. token := token{t, l.lineno, l.blob()}
  159. if l.debug {
  160. fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
  161. }
  162. l.tokens <- token
  163. l.start = l.pos
  164. }
  165. // lexLine is state function for lexing lines
  166. func lexLine(l *lexer) stateFn {
  167. for {
  168. switch r := l.next(); {
  169. case r == '\n':
  170. l.emit(lineEnd)
  171. l.ignore()
  172. l.lineno++
  173. l.emit(lineStart)
  174. case r == ';' && l.peek() == ';':
  175. return lexComment
  176. case isSpace(r):
  177. l.ignore()
  178. case isLetter(r) || r == '_':
  179. return lexElement
  180. case isNumber(r):
  181. return lexNumber
  182. case r == '@':
  183. l.ignore()
  184. return lexLabel
  185. case r == '"':
  186. return lexInsideString
  187. default:
  188. return nil
  189. }
  190. }
  191. }
  192. // lexComment parses the current position until the end
  193. // of the line and discards the text.
  194. func lexComment(l *lexer) stateFn {
  195. l.acceptRunUntil('\n')
  196. l.ignore()
  197. return lexLine
  198. }
  199. // lexLabel parses the current label, emits and returns
  200. // the lex text state function to advance the parsing
  201. // process.
  202. func lexLabel(l *lexer) stateFn {
  203. l.acceptRun(Alpha + "_" + Numbers)
  204. l.emit(label)
  205. return lexLine
  206. }
  207. // lexInsideString lexes the inside of a string until
  208. // the state function finds the closing quote.
  209. // It returns the lex text state function.
  210. func lexInsideString(l *lexer) stateFn {
  211. if l.acceptRunUntil('"') {
  212. l.emit(stringValue)
  213. }
  214. return lexLine
  215. }
  216. func lexNumber(l *lexer) stateFn {
  217. acceptance := Numbers
  218. if l.accept("0") || l.accept("xX") {
  219. acceptance = HexadecimalNumbers
  220. }
  221. l.acceptRun(acceptance)
  222. l.emit(number)
  223. return lexLine
  224. }
  225. func lexElement(l *lexer) stateFn {
  226. l.acceptRun(Alpha + "_" + Numbers)
  227. if l.peek() == ':' {
  228. l.emit(labelDef)
  229. l.accept(":")
  230. l.ignore()
  231. } else {
  232. l.emit(element)
  233. }
  234. return lexLine
  235. }
  236. func isLetter(t rune) bool {
  237. return unicode.IsLetter(t)
  238. }
  239. func isSpace(t rune) bool {
  240. return unicode.IsSpace(t)
  241. }
  242. func isNumber(t rune) bool {
  243. return unicode.IsNumber(t)
  244. }