package main import ( log "github.com/sirupsen/logrus" "strconv" "strings" "time" ) /* Sample Text AAA Financial Trail Report Super Finance Markets Pty Ltd Loan Number SettDate Loan Balance Arrears DisDate IntTrail$ Comments Facility Columbus Period Servicing: Feb 2020 400053440 02-Sep-19 $552,463 552,579.52 $32.19 400063271 19-Feb-20 $832,000 832,000.00 $0.00 Columbus Total: $32.19 Grand Total: $32.19 Super Finance Markets Pty Ltd */ type PayInAAARow struct { LoanNumber string Settlement time.Time LoanAmount float64 Balance float64 InTrail float64 } type PayInAAAPeriod struct { Period time.Time Rows []PayInAAARow } func (m *AiDecodeIncome) decodeAAAPdf(raw string) (e error) { m.AAA = make([]PayInAAAPeriod, 0, 10) lines := strings.Split(raw, "\n") currentDecoder := PayInAAAPeriod{} state := "start" for _, l := range lines { // DFA, wow, finally it's used. after years of learning switch state { case "start": state = currentDecoder.processStart(l) if state == "LookingForPeriod" { // determine column index, if their column is changing } break case "LookingForPeriod": state = currentDecoder.processPeriod(l) if state == "LookingForRows" { currentDecoder.Period, e = currentDecoder.getPeriod(l) currentDecoder.Rows = make([]PayInAAARow, 0, 10) if e != nil { log.Warn("cannot find period", l, e) state = "LookingForPeriod" } else { m.AAA = append(m.AAA, currentDecoder) } } break case "LookingForRows", "LookingForRowsSkipCurrent": nextState, row, valid := currentDecoder.processRow(l) if valid { currentDecoder.Rows = append(currentDecoder.Rows, row) } state = nextState if nextState == "start" { currentDecoder = PayInAAAPeriod{} //renew to a empty state } break } } return } func (m *PayInAAAPeriod) processStart(line string) (nextState string) { nextState = "start" if strings.Contains(line, "Loan Number") && strings.Contains(line, "SettDate") && strings.Contains(line, "Balance") && strings.Contains(line, "IntTrail$") { nextState = "LookingForPeriod" } return } func (m *PayInAAAPeriod) processPeriod(line string) (nextState string) { nextState = "LookingForPeriod" if strings.Contains(line, "Period Servicing:") { nextState = "LookingForRows" } return } // Period Servicing: Feb 2020 func (m *PayInAAAPeriod) getPeriod(line string) (p time.Time, e error) { idx := strings.Index(line, ":") subStr := strings.TrimSpace(line[idx+1:]) p, e = time.Parse("Jan 2006", subStr) return } func (m *PayInAAAPeriod) processRow(line string) (nextState string, row PayInAAARow, valid bool) { nextState = "LookingForRows" valid = false allParts := strings.Split(line, " ") el := make([]string, 0, 10) for _, item := range allParts { if len(item) > 0 { el = append(el, item) } } if len(el) >= 5 { row.LoanNumber = el[0] row.Settlement, _ = time.Parse("02-Jan-06", el[1]) row.LoanAmount = m.currencyToFloat64(el[2]) row.Balance = m.currencyToFloat64(el[3]) row.InTrail = m.currencyToFloat64(el[len(el)-1]) //last element valid = true } else { if strings.Contains(line, "Total:") { nextState = "start" } else { nextState = "LookingForRowsSkipCurrent" } } return } func (m *PayInAAAPeriod) currencyToFloat64(cur string) (ret float64) { cur = strings.ReplaceAll(cur, " ", "") //remove space cur = strings.ReplaceAll(cur, "$", "") //remove $ cur = strings.ReplaceAll(cur, ",", "") //remove , ret, _ = strconv.ParseFloat(cur, 64) return ret }