measures.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334
  1. #!/usr/bin/env python
  2. """ Test Measures Script """
  3. import logging
  4. import pandas
  5. pandas.set_option("display.max_rows", None)
  6. logger = logging.getLogger(__name__)
  7. logging.basicConfig(level=logging.INFO, format="%(message)s")
  8. def _summarize(summary_df, measures_df, column, threshold):
  9. measures_df = measures_df.sort_values(column, ascending=False)
  10. total = measures_df[column].sum()
  11. column_df = measures_df[measures_df[column] > threshold]
  12. top = column_df[column].sum()
  13. percent = 100 * (top / total)
  14. summary_df.loc[len(summary_df)] = [ column, total, top, len(column_df), percent ]
  15. return column_df.to_string(index=False) + "\n"
  16. def main():
  17. measures_df = pandas.read_csv("dist/test/measures.csv")
  18. measures_df.fillna(0, inplace=True)
  19. summary_df = pandas.DataFrame(columns=[ "Name", "Total", "Top", "Count", "Ratio" ])
  20. logger.info(_summarize(summary_df, measures_df, "load", 1))
  21. logger.info(_summarize(summary_df, measures_df, "validate", 1))
  22. logger.info(_summarize(summary_df, measures_df, "render", 1))
  23. logger.info(summary_df.to_string(index=False))
  24. if __name__ == "__main__":
  25. main()