forked from ghc/hsc2hs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHSCParser.hs
320 lines (267 loc) · 9.69 KB
/
HSCParser.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
module HSCParser where
import Control.Applicative hiding ( many )
import Control.Monad ( MonadPlus(..), liftM, liftM2, ap )
import Data.Char ( isAlpha, isAlphaNum, isSpace, isDigit )
------------------------------------------------------------------------
-- A deterministic parser which remembers the text which has been parsed.
newtype Parser a = Parser (SourcePos -> String -> ParseResult a)
runParser :: Parser a -> String -> String -> ParseResult a
runParser (Parser p) file_name = p (SourcePos file_name 1)
data ParseResult a = Success !SourcePos String String a
| Failure !SourcePos String
data SourcePos = SourcePos String !Int
updatePos :: SourcePos -> Char -> SourcePos
updatePos pos@(SourcePos name line) ch = case ch of
'\n' -> SourcePos name (line + 1)
_ -> pos
instance Functor Parser where
fmap = liftM
instance Applicative Parser where
pure = return
(<*>) = ap
instance Monad Parser where
return a = Parser $ \pos s -> Success pos [] s a
Parser m >>= k =
Parser $ \pos s -> case m pos s of
Success pos' out1 s' a -> case k a of
Parser k' -> case k' pos' s' of
Success pos'' out2 imp'' b ->
Success pos'' (out1++out2) imp'' b
Failure pos'' msg -> Failure pos'' msg
Failure pos' msg -> Failure pos' msg
fail msg = Parser $ \pos _ -> Failure pos msg
instance Alternative Parser where
empty = mzero
(<|>) = mplus
instance MonadPlus Parser where
mzero = fail "mzero"
Parser m `mplus` Parser n =
Parser $ \pos s -> case m pos s of
success@(Success _ _ _ _) -> success
Failure _ _ -> n pos s
getPos :: Parser SourcePos
getPos = Parser $ \pos s -> Success pos [] s pos
setPos :: SourcePos -> Parser ()
setPos pos = Parser $ \_ s -> Success pos [] s ()
message :: Parser a -> String -> Parser a
Parser m `message` msg =
Parser $ \pos s -> case m pos s of
success@(Success _ _ _ _) -> success
Failure pos' _ -> Failure pos' msg
catchOutput_ :: Parser a -> Parser String
catchOutput_ (Parser m) =
Parser $ \pos s -> case m pos s of
Success pos' out s' _ -> Success pos' [] s' out
Failure pos' msg -> Failure pos' msg
fakeOutput :: Parser a -> String -> Parser a
Parser m `fakeOutput` out =
Parser $ \pos s -> case m pos s of
Success pos' _ s' a -> Success pos' out s' a
Failure pos' msg -> Failure pos' msg
lookAhead :: Parser String
lookAhead = Parser $ \pos s -> Success pos [] s s
satisfy :: (Char -> Bool) -> Parser Char
satisfy p =
Parser $ \pos s -> case s of
c:cs | p c -> Success (updatePos pos c) [c] cs c
_ -> Failure pos "Bad character"
satisfy_ :: (Char -> Bool) -> Parser ()
satisfy_ p = satisfy p >> return ()
char_ :: Char -> Parser ()
char_ c = do
satisfy_ (== c) `message` (show c++" expected")
anyChar_ :: Parser ()
anyChar_ = do
satisfy_ (const True) `message` "Unexpected end of file"
any2Chars_ :: Parser ()
any2Chars_ = anyChar_ >> anyChar_
many :: Parser a -> Parser [a]
many p = many1 p `mplus` return []
many1 :: Parser a -> Parser [a]
many1 p = liftM2 (:) p (many p)
many_ :: Parser a -> Parser ()
many_ p = many1_ p `mplus` return ()
many1_ :: Parser a -> Parser ()
many1_ p = p >> many_ p
manySatisfy, manySatisfy1 :: (Char -> Bool) -> Parser String
manySatisfy = many . satisfy
manySatisfy1 = many1 . satisfy
manySatisfy_, manySatisfy1_ :: (Char -> Bool) -> Parser ()
manySatisfy_ = many_ . satisfy
manySatisfy1_ = many1_ . satisfy
------------------------------------------------------------------------
-- Parser of hsc syntax.
data Token
= Text SourcePos String
| Special SourcePos String String
parser :: Parser [Token]
parser = do
pos <- getPos
t <- catchOutput_ text
s <- lookAhead
rest <- case s of
[] -> return []
_:_ -> liftM2 (:) (special `fakeOutput` []) parser
return (if null t then rest else Text pos t : rest)
text :: Parser ()
text = do
s <- lookAhead
case s of
[] -> return ()
c:_ | isAlpha c || c == '_' -> do
anyChar_
manySatisfy_ (\c' -> isAlphaNum c' || c' == '_' || c' == '\'')
text
c:_ | isHsSymbol c -> do
symb <- catchOutput_ (manySatisfy_ isHsSymbol)
case symb of
"#" -> return ()
'-':'-':symb' | all (== '-') symb' -> do
return () `fakeOutput` symb
manySatisfy_ (/= '\n')
text
_ -> do
return () `fakeOutput` unescapeHashes symb
text
'\"':_ -> do anyChar_; hsString '\"'; text
'\'':_ -> do anyChar_; hsString '\''; text
'{':'-':_ -> do any2Chars_; linePragma `mplus` hsComment; text
_:_ -> do anyChar_; text
hsString :: Char -> Parser ()
hsString quote = do
s <- lookAhead
case s of
[] -> return ()
c:_ | c == quote -> anyChar_
'\\':c:_
| isSpace c -> do
anyChar_
manySatisfy_ isSpace
char_ '\\' `mplus` return ()
hsString quote
| otherwise -> do any2Chars_; hsString quote
_:_ -> do anyChar_; hsString quote
hsComment :: Parser ()
hsComment = do
s <- lookAhead
case s of
[] -> return ()
'-':'}':_ -> any2Chars_
'{':'-':_ -> do any2Chars_; hsComment; hsComment
_:_ -> do anyChar_; hsComment
linePragma :: Parser ()
linePragma = do
char_ '#'
manySatisfy_ isSpace
satisfy_ (\c -> c == 'L' || c == 'l')
satisfy_ (\c -> c == 'I' || c == 'i')
satisfy_ (\c -> c == 'N' || c == 'n')
satisfy_ (\c -> c == 'E' || c == 'e')
manySatisfy1_ isSpace
line <- liftM read $ manySatisfy1 isDigit
manySatisfy1_ isSpace
char_ '\"'
name <- manySatisfy (/= '\"')
char_ '\"'
manySatisfy_ isSpace
char_ '#'
char_ '-'
char_ '}'
setPos (SourcePos name (line - 1))
isHsSymbol :: Char -> Bool
isHsSymbol '!' = True; isHsSymbol '#' = True; isHsSymbol '$' = True
isHsSymbol '%' = True; isHsSymbol '&' = True; isHsSymbol '*' = True
isHsSymbol '+' = True; isHsSymbol '.' = True; isHsSymbol '/' = True
isHsSymbol '<' = True; isHsSymbol '=' = True; isHsSymbol '>' = True
isHsSymbol '?' = True; isHsSymbol '@' = True; isHsSymbol '\\' = True
isHsSymbol '^' = True; isHsSymbol '|' = True; isHsSymbol '-' = True
isHsSymbol '~' = True
isHsSymbol _ = False
unescapeHashes :: String -> String
unescapeHashes [] = []
unescapeHashes ('#':'#':s) = '#' : unescapeHashes s
unescapeHashes (c:s) = c : unescapeHashes s
lookAheadC :: Parser String
lookAheadC = liftM joinLines lookAhead
where
joinLines [] = []
joinLines ('\\':'\n':s) = joinLines s
joinLines (c:s) = c : joinLines s
satisfyC :: (Char -> Bool) -> Parser Char
satisfyC p = do
s <- lookAhead
case s of
'\\':'\n':_ -> do any2Chars_ `fakeOutput` []; satisfyC p
_ -> satisfy p
satisfyC_ :: (Char -> Bool) -> Parser ()
satisfyC_ p = satisfyC p >> return ()
charC_ :: Char -> Parser ()
charC_ c = satisfyC_ (== c) `message` (show c++" expected")
anyCharC_ :: Parser ()
anyCharC_ = satisfyC_ (const True) `message` "Unexpected end of file"
any2CharsC_ :: Parser ()
any2CharsC_ = anyCharC_ >> anyCharC_
manySatisfyC :: (Char -> Bool) -> Parser String
manySatisfyC = many . satisfyC
manySatisfyC_ :: (Char -> Bool) -> Parser ()
manySatisfyC_ = many_ . satisfyC
special :: Parser Token
special = do
manySatisfyC_ (\c -> isSpace c && c /= '\n')
s <- lookAheadC
case s of
'{':_ -> do
anyCharC_
manySatisfyC_ isSpace
sp <- keyArg (== '\n')
charC_ '}'
return sp
_ -> keyArg (const False)
keyArg :: (Char -> Bool) -> Parser Token
keyArg eol = do
pos <- getPos
key <- keyword `message` "hsc keyword or '{' expected"
manySatisfyC_ (\c' -> isSpace c' && c' /= '\n' || eol c')
arg <- catchOutput_ (argument eol)
return (Special pos key arg)
keyword :: Parser String
keyword = do
c <- satisfyC (\c' -> isAlpha c' || c' == '_')
cs <- manySatisfyC (\c' -> isAlphaNum c' || c' == '_')
return (c:cs)
argument :: (Char -> Bool) -> Parser ()
argument eol = do
s <- lookAheadC
case s of
[] -> return ()
c:_ | eol c -> do anyCharC_; argument eol
'\n':_ -> return ()
'\"':_ -> do anyCharC_; cString '\"'; argument eol
'\'':_ -> do anyCharC_; cString '\''; argument eol
'(':_ -> do anyCharC_; nested ')'; argument eol
')':_ -> return ()
'/':'*':_ -> do any2CharsC_; cComment; argument eol
'/':'/':_ -> do
any2CharsC_; manySatisfyC_ (/= '\n'); argument eol
'[':_ -> do anyCharC_; nested ']'; argument eol
']':_ -> return ()
'{':_ -> do anyCharC_; nested '}'; argument eol
'}':_ -> return ()
_:_ -> do anyCharC_; argument eol
nested :: Char -> Parser ()
nested c = do argument (== '\n'); charC_ c
cComment :: Parser ()
cComment = do
s <- lookAheadC
case s of
[] -> return ()
'*':'/':_ -> do any2CharsC_
_:_ -> do anyCharC_; cComment
cString :: Char -> Parser ()
cString quote = do
s <- lookAheadC
case s of
[] -> return ()
c:_ | c == quote -> anyCharC_
'\\':_:_ -> do any2CharsC_; cString quote
_:_ -> do anyCharC_; cString quote