#csv #column #pandas #safe #dataframe #python #dataset

nightly koala

Python's pandas implementation in Rust

6 releases

0.1.5 Jun 29, 2020
0.1.4 Jun 27, 2020

#5 in #pandas

MIT/Apache

22KB
423 lines

Koala

What is the library?

Python's pandas implemented for fast, type safe programming in Rust.

Available Functions & Attributes

  • CSV

    .read_csv(path: &str) -> CSV

    returns CSV struct reading file from given path

          let mut content = String::new();
          let csv : CSV = dataframe::df::read_csv("test.csv", &mut content); // CSV { headers, values, matrix }
    

    .get_df() -> DataFrame

    returns DataFrame from a CSV struct

           let mut df = csv.get_df(); // DataFrame { columns, dataset, values }
    
  • DataFrame

    .columns -> Vec<&str>

    returns array of strings, containing column names

           df.columns; // ["name","age"]
    

    .dataset -> Vec<Vec<&str>>

    returns dataset matrix

           df.dataset; // [["bob","30"]
                          ["richard", "25"]]
    

    .values -> Vec<Pair(&str, Vec<&str>)>

    returns vector of pairs, containing column name, and all column values

           df.values; // [("name", ["bob", "richard"]), ("age", ["30", "25])]
    

    .max(column: &str) -> f64

    returns max from all values inside a column

           df.max("age"); // 30 as f64
    

    .max(column: &str) -> f64

    return min from all values inside a column

           df.min("age"); // 25 as f64
    

    .mean(column: &str) -> f64

    return mean from all values inside a column

           df.mean("age"); // 27.5 as f64
    

    .sum(column: &str) -> f64

    returns sum of all non N/A values from column

           df.sum("age"); // 55 as f64
    

    [&str] -> Vec<&str>

    string index for DataFrame, returns all values from a given column

           df["age"]; // ["30", "25"]
    

    [usize] -> Vec<&str>

    usize index for DataFrame, returns given row with all columns

           df[0]; // ["bob", "30"]
    

    .iloc(Vec<Range, Range>) -> Vec<Vec<&str>>

    returns sliced dataset matrix from given range

           df.iloc([0..2, 0..1].to_vec()); // [["richard"], ["bob"]]
    

    .is_na_col(column: &str) -> bool

    returns if given column on DataFrame has a missing value

           df.is_na_col("age"); // false
    

    .is_na() -> bool

    returns matrix containing missing value bool for each value

           df.is_na(); // [[false, false], [false, false]]
    

    .push(value: Vec<&str>)

    returns matrix containing missing value bool for each value

           df.push(["ann", "20"]);
           df.dataset; // [["richard", "30"], ["bob", "25"], ["ann", "20"]]
    

    .pop(value: Vec<&str>) -> Vec<&str>

    returns matrix containing missing value bool for each value

          df.pop(); // ["ann", "20"]
          df.pop(); // ["bob", "25"]
          df.dataset; // [["richard", "30"]]
    

    .n_uniques(column: &str) -> usize

    returns matrix containing missing value bool for each value

         df.n_uniques("age"); // 2 as usize
    

    .uniques(column: &str) -> Vec<&str>

    returns matrix containing missing value bool for each value

        df.uniques("age"); // ["30", "25"]
    

    .apply(column: &str, function: for<'r> fn(&'r str) -> &'a str)

    applies closure function to each value on given column

        fn in_my_twenties<'r>(age: &str) -> &'r str { "20" }   
        df.apply("age", in_my_twenties);
        df.dataset; // [["richard", "20"], ["bob", "20"]]
    

    .fillna(column: &str, value: &str)

    assigns given value to each N/A value on column

       df.fillna("age", df.mean("age")); // [["richard", "26"], ["bob", "26"]] given bob had no prior age
    

    .dtypes -> HashMap<&str, &str>

    returns type of each column

      df.dtypes // {"age": "numeric", "name": "str" }
    

Dependencies

~2–3MB
~54K SLoC