Initial commit

This commit is contained in:
Tony Blyler 2017-01-09 17:18:25 -05:00
parent a015229c09
commit 4f0687cd6f
2 changed files with 222 additions and 0 deletions

20
README.md Normal file
View File

@ -0,0 +1,20 @@
# Knight's Daily Special Scraper
Who doesn't love [Knight's](http://www.knightsrestaurants.com/)? Their steak sandwich lunch special is delicious. So here is a simple HTML scraper to get Knight's current lunch specials is a very interceptable way.
## Installation
### Requirements
* Go version >= 1.5
* GOPATH set in your ENV
* (optional, but recommended) `$GOPATH/bin` in `$PATH`
```bash
go get -u github.com/tblyler/knights-special
```
## Documentation
Run the following after installation...
```bash
knights-special --help
```

202
main.go Normal file
View File

@ -0,0 +1,202 @@
package main
import (
"encoding/json"
"flag"
"fmt"
"net/http"
"os"
"regexp"
"strconv"
"strings"
"golang.org/x/net/html"
)
const (
// the max font size used for format-pretty spaces
maxFontSize = 12
// the base url for knights
baseURL = "http://www.knightsrestaurants.com/specials/"
)
// regex to calculate format-pretty spaces per entry
var fontRegex = regexp.MustCompile("FONT-SIZE:([0-9]+)pt")
func main() {
jsonify := false
jsonPretty := false
lunch := false
dinner := false
downtown := false
annarbor := false
jackson := false
flag.BoolVar(&jsonify, "json", false, "output as JSON")
flag.BoolVar(&jsonPretty, "jsonPretty", false, "output as pretty JSON")
flag.BoolVar(&lunch, "lunch", false, "get the lunch special")
flag.BoolVar(&dinner, "dinner", false, "get the dinner special")
flag.BoolVar(&downtown, "downtown", false, "get the downtown special")
flag.BoolVar(&annarbor, "annarbor", false, "get the Ann Arbor Dexter Rd special")
flag.BoolVar(&jackson, "jackson", false, "get the Jackson special")
flag.Parse()
if jsonPretty {
jsonify = true
}
if (lunch && dinner) || (!lunch && !dinner) {
fmt.Fprintln(os.Stderr, "You must specify lunch or dinner, not multiples, nor neither")
flag.PrintDefaults()
os.Exit(1)
}
if (downtown && (annarbor || jackson)) || (annarbor && (downtown || jackson)) || (jackson && (annarbor || downtown)) {
fmt.Fprintln(os.Stderr, "You must specify only one location")
flag.PrintDefaults()
os.Exit(1)
}
url := baseURL
if annarbor {
if lunch {
url += "lunch_kbr.html"
} else if dinner {
url += "dinner_kbr.html"
} else {
fmt.Fprintln(os.Stderr, "Unable to determine the meal to pull from for Ann Arbor Dexter Rd")
flag.PrintDefaults()
os.Exit(1)
}
} else if downtown {
if lunch {
url += "lunch_klm.html"
} else if dinner {
url += "dinner_klm.html"
} else {
fmt.Fprintln(os.Stderr, "Unable to determine the meal to pull from for downtown")
flag.PrintDefaults()
os.Exit(1)
}
} else if jackson {
if lunch {
url += "lunch_krj.html"
} else if dinner {
url += "dinner_krj.html"
} else {
fmt.Fprintln(os.Stderr, "Unable to determine the meal to pull from for downtown")
flag.PrintDefaults()
os.Exit(1)
}
} else {
fmt.Fprintln(os.Stderr, "Unable to determine which location to pull from")
flag.PrintDefaults()
os.Exit(1)
}
// run as a callback for graceful error handling
err := func(url string, jsonify bool, jsonPretty bool) error {
// pull the raw HTML
resp, err := http.Get(url)
if err != nil {
return err
}
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("Got bad status on request: %d", resp.StatusCode)
}
// start the HTML tokenizer to parse the HTML pulled from the website
tokenizer := html.NewTokenizer(resp.Body)
if tokenizer.Err() != nil {
return tokenizer.Err()
}
// this will store the output information
output := []string{}
// continue until /html
for tokenizer.Token().Data != "html" {
tokenType := tokenizer.Next()
if tokenType == html.StartTagToken {
token := tokenizer.Token()
if token.Data == "td" {
// keep track of the amount of spaces to prepend to the output string
spaces := 0
// go to the element that has text
for inner := tokenizer.Next(); inner != html.TextToken; inner = tokenizer.Next() {
if tokenizer.Err() != nil {
return tokenizer.Err()
}
if !jsonify {
// get the "font size" to determine the amount of spaces to use if this has a font attribute
for _, attr := range tokenizer.Token().Attr {
// use the predefined regex to check this attribute
fontSizeMatch := fontRegex.FindStringSubmatch(attr.Val)
// not a valid match, continue
if fontSizeMatch == nil {
continue
}
// trust the regex to not have a bad match for strcov.Atoi
fontSize, _ := strconv.Atoi(fontSizeMatch[1])
// assume that maxFontSize is correct
spaces = maxFontSize - fontSize
break
}
}
}
if !jsonify {
// if this is the first entry, have no spaces regardless of the font value
if len(output) == 0 {
spaces = 0
}
}
// prepend the amount of spaces to the output text and add it to the output slice
output = append(output, strings.Repeat(" ", spaces)+strings.TrimSpace(string(tokenizer.Text())))
}
}
}
if jsonify {
jsonOutput := ""
if jsonPretty {
jsonByte, err := json.MarshalIndent(output, "", " ")
if err != nil {
return err
}
jsonOutput = string(jsonByte)
} else {
jsonByte, err := json.Marshal(output)
if err != nil {
return err
}
jsonOutput = string(jsonByte)
}
fmt.Println(jsonOutput)
} else {
for _, line := range output {
fmt.Println(line)
}
}
return nil
}(url, jsonify, jsonPretty)
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}