19 unstable releases (4 breaking)

✓ Uses Rust 2018 edition

new 0.5.1 Oct 20, 2019
0.4.2 Jul 12, 2019
0.3.1 Mar 30, 2019
0.2.4 Dec 19, 2018
0.2.1 Nov 25, 2018
Download history 452/week @ 2019-07-07 232/week @ 2019-07-14 495/week @ 2019-07-21 279/week @ 2019-07-28 312/week @ 2019-08-04 356/week @ 2019-08-11 466/week @ 2019-08-18 591/week @ 2019-08-25 373/week @ 2019-09-01 378/week @ 2019-09-08 385/week @ 2019-09-15 456/week @ 2019-09-22 220/week @ 2019-09-29 357/week @ 2019-10-06 357/week @ 2019-10-13

1,641 downloads per month
Used in 1 crate

MIT/Apache

20KB
287 lines

uwl

A crate for providing a simple way of streaming over characters without giving a damn about unicode.

License

Licensed under either of

at your option.

Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.


lib.rs:

A stream of chars for building such as a lexer. Making the step of "iteration between characters" considerably easier. And providing certain utilites for making the code simpler. Respects both ASCII and Unicode.

Example, lexing identifiers, numbers and some punctuation marks:

use uwl::Stream;

#[derive(Debug, PartialEq)]
enum TokenKind {
    Ident,
    Number,
    Question,
    Exclamation,
    Comma,
    Point,

    // An invalid token
    Illegal,
}

#[derive(Debug, PartialEq)]
enum Lit<'a> {
    Short(char),
    Long(&'a str),
}

#[derive(Debug, PartialEq)]
struct Token<'a> {
    kind: TokenKind,
    lit: Lit<'a>,
}

impl<'a> Token<'a> {
    fn new(kind: TokenKind, lit: Lit<'a>) -> Self {
        Token {
            kind,
            lit,
        }
    }
}

fn lex<'a>(stream: &mut Stream<'a>) -> Option<Token<'a>> {
    match stream.current() {
        Some(c) => match c {
            // Ignore whitespace.
            s if s.is_whitespace() => {
                stream.take_while(|c| c.is_whitespace());
                return lex(stream);
            },
            s if s.is_alphabetic() => {
                let lit = Lit::Long(stream.take_while(|s| s.is_alphabetic()));
                Some(Token::new(TokenKind::Ident, lit))
            },
            s if s.is_numeric() => {
                let lit = Lit::Long(stream.take_while(|s| s.is_numeric()));
                Some(Token::new(TokenKind::Number, lit))
            },
            '?' => Some(Token::new(TokenKind::Question, Lit::Short(stream.next()?))),
            '!' => Some(Token::new(TokenKind::Exclamation, Lit::Short(stream.next()?))),
            ',' => Some(Token::new(TokenKind::Comma, Lit::Short(stream.next()?))),
            '.' => Some(Token::new(TokenKind::Point, Lit::Short(stream.next()?))),
            _ => Some(Token::new(TokenKind::Illegal, Lit::Short(stream.next()?))),
        },
        None => None,
    }
}

fn main() {
    let mut stream = Stream::new("Hello, world! ...world? Hello?");

    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, Lit::Long("Hello"))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Comma, Lit::Short(','))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, Lit::Long("world"))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Exclamation, Lit::Short('!'))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Point, Lit::Short('.'))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Point, Lit::Short('.'))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Point, Lit::Short('.'))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, Lit::Long("world"))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Question, Lit::Short('?'))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Ident, Lit::Long("Hello"))));
    assert_eq!(lex(&mut stream), Some(Token::new(TokenKind::Question, Lit::Short('?'))));

    // Reached the end
    assert_eq!(lex(&mut stream), None);
}

No runtime deps