Tutorial.java

package tech.tablesaw.docs;

import static tech.tablesaw.aggregate.AggregateFunctions.count;
import static tech.tablesaw.aggregate.AggregateFunctions.mean;
import static tech.tablesaw.aggregate.AggregateFunctions.median;

import java.io.IOException;
import tech.tablesaw.aggregate.CrossTab;
import tech.tablesaw.api.DateColumn;
import tech.tablesaw.api.IntColumn;
import tech.tablesaw.api.QuerySupport;
import tech.tablesaw.api.StringColumn;
import tech.tablesaw.api.Table;
import tech.tablesaw.docs.OutputWriter.System;

public class Tutorial implements DocsSourceFile {

  public static final OutputWriter outputWriter = new OutputWriter(Tutorial.class);

  public void run() throws IOException {
    // @@ table_read
    Table tornadoes = Table.read().csv("../data/tornadoes_1950-2014.csv");
    // @@ table_read

    outputWriter.write(
        // @@ table_columns
        tornadoes.columnNames()
        // @@ table_columns
        ,
        "table_columns");

    outputWriter.write(
        // @@ table_shape
        tornadoes.shape()
        // @@ table_shape
        ,
        "table_shape");

    outputWriter.write(
        // @@ table_structure
        tornadoes.structure().printAll()
        // @@ table_structure
        ,
        "table_structure");

    // @@ print_table
    System.out.println(tornadoes);
    // @@ print_table
    outputWriter.write(tornadoes, "print_table");

    outputWriter.write(
        // @@ filter_structure
        tornadoes
            .structure()
            .where(tornadoes.structure().stringColumn("Column Type").isEqualTo("DOUBLE"))
        // @@ filter_structure
        ,
        "filter_structure");

    outputWriter.write(
        // @@ first_n
        tornadoes.first(3)
        // @@ first_n
        ,
        "first_n");

    // @@ date_col
    StringColumn month = tornadoes.dateColumn("Date").month();
    // @@ date_col

    // @@ add_date_col
    tornadoes.addColumns(month);
    // @@ add_date_col

    // @@ remove_col
    tornadoes.removeColumns("State No");
    // @@ remove_col

    // @@ sort_on
    tornadoes.sortOn("-Fatalities");
    // @@ sort_on

    outputWriter.write(
        // @@ summary
        tornadoes.column("Fatalities").summary().print()
        // @@ summary
        ,
        "summary");

    // @@ filtering
    Table result = tornadoes.where(tornadoes.intColumn("Fatalities").isGreaterThan(0));
    result = tornadoes.where(result.dateColumn("Date").isInApril());
    result =
        tornadoes.where(
            result
                .intColumn("Width")
                .isGreaterThan(300) // 300 yards
                .or(result.doubleColumn("Length").isGreaterThan(10))); // 10 miles

    result = result.selectColumns("State", "Date");

    // @@ filtering
    outputWriter.write(result.first(3), "filtering");

    // @@ totals
    Table injuriesByScale = tornadoes.summarize("Injuries", median).by("Scale").sortOn("Scale");
    injuriesByScale.setName("Median injuries by Tornado Scale");
    // @@ totals
    outputWriter.write(injuriesByScale.first(10), "totals");

    outputWriter.write(
        // @@ crosstabs
        CrossTab.counts(tornadoes, tornadoes.stringColumn("State"), tornadoes.intColumn("Scale"))
            .first(10)
        // @@ crosstabs
        ,
        "crosstabs");

    // Putting it all togeather.

    // @@ all_together_where
    Table summer =
        tornadoes.where(
            QuerySupport.or(
                // In June
                QuerySupport.and(
                    t -> t.dateColumn("Date").month().isEqualTo("JUNE"),
                    t -> t.dateColumn("Date").dayOfMonth().isGreaterThanOrEqualTo(21)),
                // In July or August
                t -> t.dateColumn("Date").month().isIn("JULY", "AUGUST"),
                // In September
                QuerySupport.or(
                    t -> t.dateColumn("Date").month().isEqualTo("SEPTEMBER"),
                    t -> t.dateColumn("Date").dayOfMonth().isLessThan(22))));
    // @@ all_together_where

    // @@ all_together_lag
    summer = summer.sortAscendingOn("Date", "Time");
    summer.addColumns(summer.dateColumn("Date").lag(1));

    DateColumn summerDate = summer.dateColumn("Date");
    DateColumn laggedDate = summer.dateColumn("Date lag(1)");

    IntColumn delta = laggedDate.daysUntil(summerDate);
    summer.addColumns(delta);
    // @@ all_together_lag

    // @@ all_together_summarize
    Table summary = summer.summarize(delta, mean, count).by(summerDate.year());
    // @@ all_together_summarize
    outputWriter.write(summary.first(5), "all_together_summarize");

    outputWriter.write(
        // @@ all_together_single_col_summary
        summary.nCol(1).mean()
        // @@ all_together_single_col_summary
        ,
        "all_together_single_col_summary");

    // @@ write_csv
    tornadoes.write().csv("rev_tornadoes_1950-2014.csv");
    // @@ write_csv
  }
}