#!/usr/bin/ruby -w ## Frequency Analysis library, planned ##Frequency_Analysis runs basic frequency analysis routines on a provided ##String corpus. In the standard invocation the constructor is ##called with a String as its' only argument, analyses are performed, ##and the results are then availble through the accessors. A useful ##reset_corpus method allow for finer grained operation as well as ##testing and debugging. There is a driver method planned that will ##allow command line operation. Unit tests are in Test_Frequency_Analysis. class Frequency_Analysis attr_reader :words_list, :characters_list, :word_frequency_table, :character_frequency_table, :corpus ## TODO etna constant? @corpus = '''Lorum ipsum dolor sic amet.''' ## Print friendly message and exit on unknown or bad input. def usage p 'FreqAnal: a frequency analysis library (planned)... ' exit end ## Create a new Frequency Analyzer and run all analyses on the ## provided String corpus. Checks for String duck type before ## doing any analysis. Downcases all input for consistency. def initialize(victim_text) usage if not victim_text.class == 'String'.class @corpus = victim_text.downcase build_words_list build_characters_list build_word_frequency_table build_character_frequency_table end ## Sets a new corpus after initialization, intended ## primarily for testing and debugging of individual methods. ## Downcases all input for consistency. def reset_corpus(new_victim) usage if not new_victim.class == 'String'.class @corpus = new_victim.downcase end ## Builds list of words in corpus sorted by their frequency of ## appearance descending (most common words first). def build_words_list @wl = [] @words_list = @wl end ## Builds list of characters in corpus sorted by their frequency ## of appearance descending (most common characters first). def build_characters_list @cl = [] @characters_list = @cl end ## Builds hashtable of words and their frequency in corpus. Frequencies ## are keyed to words. Eg: ## { 'the' => 40, 'a' => 121, 'i' => 87 ... } def build_word_frequency_table @wft = [] @word_frequency_table = @wft end ## Builds hashtable of characters and their frequency in corpus. ## Frequencies are keyed to characters. Eg: ## { 'e' => 40, 't' => 121, 'n' => 87 ... } def build_character_frequency_table @cft = [] @character_frequency_table = @cft end end ## Driver will eventually will check for args and return reports based on ## input arguments and flags, even... if __FILE__ == $0 ## TODO check and parse args fa = Frequency_Analysis.new('Lorum ipsum nil carbonadrum est.') ## TODO print some reports to stdout fa.usage end