package main import ( "biukop.com/sfm/loan" "errors" log "github.com/sirupsen/logrus" "net/http" "os" "os/exec" "strings" ) type FunderType string const ( Funder_AAA FunderType = "AAA Financial" Funder_Pepper = "Pepper" Funder_Resimac = "Resimac" Funder_Unknown = "cannot detect funder type" ) type AiDecodeIncome struct { Input struct { Uploads loan.Uploads FileName string //a local file on disk InMime string //may not be correct, just some suggestion only. } Mime string //mime actually detected. PayIn []loan.PayIn Funder FunderType AAA PayInAAAData } func decodePayInMain(filename string, format string) (ai AiDecodeIncome, e error) { ai.Input.FileName = filename ai.Input.InMime = format ai.PayIn = make([]loan.PayIn, 0, 10) ai.Mime, e = GetFileContentType(filename) if e != nil { return } switch ai.Mime { case "application/pdf": ai.decodePayInPdf(filename, format) } return ai, e } // tested, not accurate with xls, xlsx, it becomes zip and octstream sometime. func GetFileContentType(filename string) (contentType string, e error) { contentType = "" input, e := os.OpenFile(filename, os.O_RDONLY, 0755) // Only the first 512 bytes are used to sniff the content type. buffer := make([]byte, 512) _, e = input.Read(buffer) if e != nil { return } // Use the net/http package's handy DectectContentType function. Always returns a valid // content-type by returning "application/octet-stream" if no others seemed to match. contentType = http.DetectContentType(buffer) return } func (m *AiDecodeIncome) decodePayInPdf(filename string, format string) (ret []loan.PayIn, e error) { cmd := exec.Command("pdftotext", "-layout", filename, "-") //log.Println(cmd.String()) out, e := cmd.Output() if e != nil { log.Fatal(e) } raw := string(out) switch m.detectFunder(raw) { case Funder_AAA: e = m.AAA.decodeAAAPdf(raw) log.Println("AAA final result", m.AAA) break case Funder_Unknown: e = errors.New(Funder_Unknown) break // not able to detect Funder } return } func (m *AiDecodeIncome) detectFunder(raw string) FunderType { if m.isAAA(raw) { return Funder_AAA } return Funder_Unknown } func (m *AiDecodeIncome) isAAA(raw string) bool { keyword := "AAA Financial Trail Report" lines := strings.Split(raw, "\n") return m.checkFunderKeyword(keyword, lines, 0, 3) } func (m *AiDecodeIncome) checkFunderKeyword(keyword string, lines []string, start int, end int) bool { for idx, line := range lines { // first 10 lines has Key word if strings.Contains(line, keyword) && idx >= start && idx <= 10 { return true } } return false }