package main import ( "biukop.com/sfm/loan" "errors" log "github.com/sirupsen/logrus" "os/exec" "strings" ) type FunderType string const ( Funder_AAA FunderType = "AAA Financial" Funder_Pepper = "Pepper" Funder_Resimac = "Resimac" Funder_Unknown = "cannot detect funder type" ) type AiDecodeIncome struct { Input loan.Uploads ul uploadsOnDisk // internal data Mime string //mime actually detected. PayIn []loan.PayIn Funder FunderType AAA PayInAAAData } func (m *AiDecodeIncome) decodeUploadToPayIn(ulMeta loan.Uploads) (e error) { m.Input = ulMeta m.ul.Upload = ulMeta m.PayIn = make([]loan.PayIn, 0, 10) switch m.getFileType() { case "pdf": m.decodePdf() break case "excel", "opensheet": m.decodeXls() break default: e = errors.New("unknown format") m.Funder = "" // mark unknown decoding } return } func (m *AiDecodeIncome) getFileType() (ret string) { strMime, e := GetFileContentType(m.ul.filePath()) if e != nil { return } m.Mime = strMime ret, e = m.ul.GetFileType() if e != nil { ret = "" } return } func (m *AiDecodeIncome) decodePdf() (e error) { cmd := exec.Command("pdftotext", "-layout", m.ul.filePath(), "-") out, e := cmd.Output() if e != nil { log.Fatal(e) } raw := string(out) switch m.detectFunder(raw) { case Funder_AAA: m.Funder = Funder_AAA e = m.AAA.decodeAAAPdf(raw) log.Println("AAA final result", m.AAA) break case Funder_Unknown: e = errors.New(Funder_Unknown) break // not able to detect Funder } return } func (m *AiDecodeIncome) decodeXls() (e error) { return } func (m *AiDecodeIncome) detectFunder(raw string) FunderType { if m.isAAA(raw) { return Funder_AAA } return Funder_Unknown } func (m *AiDecodeIncome) isAAA(raw string) bool { keyword := "AAA Financial Trail Report" lines := strings.Split(raw, "\n") return m.checkFunderKeyword(keyword, lines, 0, 3) } func (m *AiDecodeIncome) checkFunderKeyword(keyword string, lines []string, start int, end int) bool { for idx, line := range lines { // first 10 lines has Key word if strings.Contains(line, keyword) && idx >= start && idx <= 10 { return true } } return false }