|
- package main
-
- import (
- "biukop.com/sfm/loan"
- "errors"
- log "github.com/sirupsen/logrus"
- "net/http"
- "os"
- "os/exec"
- "strings"
- )
-
- type FunderType string
-
- const (
- Funder_AAA FunderType = "AAA Financial"
- Funder_Pepper = "Pepper"
- Funder_Resimac = "Resimac"
- Funder_Unknown = "cannot detect funder type"
- )
-
- type AiDecodeIncome struct {
- Input struct {
- Uploads loan.Uploads
- FileName string //a local file on disk
- InMime string //may not be correct, just some suggestion only.
- }
- Mime string //mime actually detected.
- PayIn []loan.PayIn
- Funder FunderType
- AAA PayInAAAData
- }
-
- func decodePayInMain(filename string, format string) (ai AiDecodeIncome, e error) {
- ai.Input.FileName = filename
- ai.Input.InMime = format
- ai.PayIn = make([]loan.PayIn, 0, 10)
- ai.Mime, e = GetFileContentType(filename)
- if e != nil {
- return
- }
-
- switch ai.Mime {
- case "application/pdf":
- ai.decodePayInPdf(filename, format)
- }
-
- return ai, e
- }
-
- // tested, not accurate with xls, xlsx, it becomes zip and octstream sometime.
- func GetFileContentType(filename string) (contentType string, e error) {
- contentType = ""
- input, e := os.OpenFile(filename, os.O_RDONLY, 0755)
- // Only the first 512 bytes are used to sniff the content type.
- buffer := make([]byte, 512)
-
- _, e = input.Read(buffer)
- if e != nil {
- return
- }
-
- // Use the net/http package's handy DectectContentType function. Always returns a valid
- // content-type by returning "application/octet-stream" if no others seemed to match.
- contentType = http.DetectContentType(buffer)
- return
- }
-
- func (m *AiDecodeIncome) decodePayInPdf(filename string, format string) (ret []loan.PayIn, e error) {
- cmd := exec.Command("pdftotext", "-layout", filename, "-")
- //log.Println(cmd.String())
- out, e := cmd.Output()
- if e != nil {
- log.Fatal(e)
- }
-
- raw := string(out)
- switch m.detectFunder(raw) {
- case Funder_AAA:
- e = m.AAA.decodeAAAPdf(raw)
- log.Println("AAA final result", m.AAA)
- break
- case Funder_Unknown:
- e = errors.New(Funder_Unknown)
- break // not able to detect Funder
- }
- return
- }
-
- func (m *AiDecodeIncome) detectFunder(raw string) FunderType {
- if m.isAAA(raw) {
- return Funder_AAA
- }
-
- return Funder_Unknown
- }
-
- func (m *AiDecodeIncome) isAAA(raw string) bool {
- keyword := "AAA Financial Trail Report"
- lines := strings.Split(raw, "\n")
- return m.checkFunderKeyword(keyword, lines, 0, 3)
- }
-
- func (m *AiDecodeIncome) checkFunderKeyword(keyword string, lines []string, start int, end int) bool {
- for idx, line := range lines {
- // first 10 lines has Key word
- if strings.Contains(line, keyword) && idx >= start && idx <= 10 {
- return true
- }
- }
- return false
- }
|