package main import ( log "github.com/sirupsen/logrus" "strconv" "strings" "time" ) /* Sample Text AAA Financial Trail Report Super Finance Markets Pty Ltd Loan Number SettDate Loan Balance Arrears DisDate IntTrail$ Comments Facility Columbus Period Servicing: Feb 2020 400053440 02-Sep-19 $552,463 552,579.52 $32.19 400063271 19-Feb-20 $832,000 832,000.00 $0.00 Columbus Total: $32.19 Grand Total: $32.19 Super Finance Markets Pty Ltd */ type PayInAAARow struct { Period time.Time LoanNumber string Settlement time.Time LoanFacility float64 Balance float64 InTrail float64 } func (m *AiDecodeIncome) isAAA(raw string) bool { keyword := "AAA Financial Trail Report" lines := strings.Split(raw, "\n") return m.isKeywordExist(keyword, lines) } func (m *AiDecodeIncome) decodeAAAPdf(raw string) (e error) { m.AAA = make([]PayInAAARow, 0, 10) lines := strings.Split(raw, "\n") var currentRow = PayInAAARow{} var currentPeriod = time.Time{} state := "start" for _, l := range lines { // DFA, wow, finally it's used. after years of learning switch state { case "start": state = currentRow.processStart(l) if state == "LookingForPeriod" { // determine column index, if their column is changing } break case "LookingForPeriod": state = currentRow.processPeriod(l) if state == "LookingForRows" { currentPeriod, e = currentRow.getPeriod(l) if e != nil { log.Warn("cannot find period", l, e) state = "LookingForPeriod" } else { currentRow.Period = currentPeriod } } break case "LookingForRows", "LookingForRowsSkipCurrent": nextState, valid := currentRow.processRow(l) if valid { m.AAA = append(m.AAA, currentRow) } state = nextState if nextState == "start" { // reset current state currentRow = PayInAAARow{} currentRow.Period = currentPeriod } break } } return } func (m *PayInAAARow) processStart(line string) (nextState string) { nextState = "start" if strings.Contains(line, "Loan Number") && strings.Contains(line, "SettDate") && strings.Contains(line, "Balance") && strings.Contains(line, "IntTrail$") { nextState = "LookingForPeriod" } return } func (m *PayInAAARow) processPeriod(line string) (nextState string) { nextState = "LookingForPeriod" if strings.Contains(line, "Period Servicing:") { nextState = "LookingForRows" } return } // Period Servicing: Feb 2020 func (m *PayInAAARow) getPeriod(line string) (p time.Time, e error) { idx := strings.Index(line, ":") subStr := strings.TrimSpace(line[idx+1:]) p, e = time.Parse("Jan 2006", subStr) return } func (m *PayInAAARow) processRow(line string) (nextState string, valid bool) { nextState = "LookingForRows" valid = false allParts := strings.Split(line, " ") el := make([]string, 0, 10) for _, item := range allParts { if len(item) > 0 { el = append(el, item) } } if len(el) >= 5 { m.LoanNumber = el[0] m.Settlement, _ = time.Parse("02-Jan-06", el[1]) m.LoanFacility = m.currencyToFloat64(el[2]) m.Balance = m.currencyToFloat64(el[3]) m.InTrail = m.currencyToFloat64(el[len(el)-1]) //last element valid = true } else { if strings.Contains(line, "Total:") { nextState = "start" } else { nextState = "LookingForRowsSkipCurrent" } } return } func (m *PayInAAARow) currencyToFloat64(cur string) (ret float64) { cur = strings.ReplaceAll(cur, " ", "") //remove space cur = strings.ReplaceAll(cur, "$", "") //remove $ cur = strings.ReplaceAll(cur, ",", "") //remove , ret, _ = strconv.ParseFloat(cur, 64) return ret }