import re from datetime import datetime from collections import namedtuple import pandas as pd def get_date(str): match = re.match(r'(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december)\s+(\d+),\s*(\d+)', str.lower()) if match: month_conversation_map = {'jan':'01', 'janurary':'01', 'feb':'02', 'february':'02', 'mar':'03', 'march':'03', 'apr':'04', 'april':'04', 'may':'05', 'jun':'06', 'june':'06', 'jul':'07', 'july':'07', 'aug':'08', 'august':'08', 'sep':'09', 'september':'09', 'oct':'10', 'october':'10', 'nov':'11', 'november':'11', 'dec':'12', 'december':'12'} raw_month = match.group(1) month = month_conversation_map.get(raw_month) if month == None: print(f'Problem month: {raw_month} from {str}') return None day = match.group(2) if len(day) == 1: day = '0' + day year = match.group(3)[-2:] return datetime.strptime(f'{month}{day}{year}','%m%d%y') def get_trials(str): ret = [] match = re.match(r'(random|real)\s+words\s*-\s*(\d+)\s*mins?\s*-\s*(\d+)\s*wpm\s*-\s*(.+)', str) if match: mins = int(match.group(2)) wpm = int(match.group(3)) trials = match.group(4) for trial_raw in re.findall(r'\d+%',trials): trial = int(trial_raw[:-1]) ret.append((wpm,mins,trial)) return ret EntryTuple = namedtuple('EntryTuple', 'date day_num wpm mins pcorrect'.split()) def get_entries(filename='Morse Code Progress.txt'): log_file = open(filename, 'r') entries = [] first_date = None for line in log_file: possible_entry_date = get_date(line) if possible_entry_date: entry_date = possible_entry_date.strftime('%Y-%m-%d') if first_date == None: first_date = possible_entry_date day_num = 1 else: day_num = (possible_entry_date - first_date).days for wpm, mins, pcorrect in get_trials(line): entries.append(EntryTuple(entry_date, day_num, wpm, mins, pcorrect)) #word_trial_file.write(f'{entry_date},{day_num},{wpm},{mins},{pcorrect}\n') return entries def get_max_pcorrect_group_by_day_num_and_wpm(entries): t = entries.groupby(['day_num','wpm']).max() day_num = [ind[0] for ind in t.index] t['day_num'] = day_num wpm = [ind[1] for ind in t.index] t['wpm'] = wpm t['index'] = range(0,len(t)) t = t.set_index('index') t = t[['day_num','wpm','pcorrect']] return t def write_grouped_entries(entries): word_trial_file = open('word_trials.csv', 'w') word_trial_file.write('Day,WPM,Percent Correct\n') for day_num, wpm, pcorrect in grouped_entries.itertuples(index=False): print(f'{day_num},{wpm},{pcorrect}') word_trial_file.write(f'{day_num},{wpm},{pcorrect}\n') word_trial_file.close() entries = pd.DataFrame(get_entries())[['day_num','wpm','pcorrect']] grouped_entries = get_max_pcorrect_group_by_day_num_and_wpm(entries) write_grouped_entries(grouped_entries) entry_count = len(entries) grouped_entry_count = len(grouped_entries) print(f'Found {entry_count} entries') print(f'Found Grouped {grouped_entry_count} entries')