15/04/2021 - GO
In this example we are going to read a JSON file and decode it. However, we are going to read and decode the file as in streaming fashion. The reason for this is because, we don't know how big the file would be. It could be as little as one kilobyte or as big as hundreds of megabytes or gigabytes so on. If we had to read the whole file into the memory, many things could have gone wrong with the application. That's why we are using JSON streaming here.
[
{
"id": 1,
"name": "user-1"
},
{
"id": 2,
"name": "user-2"
},
{
"id": 3,
"name": "user-3"
},
{
"id": 4,
"name": "user-4"
},
{
"id": 5,
"name": "user-5"
},
...,
{
"id": 1000,
"name": "user-1000"
}
]
package user
type User struct {
ID int `json:"id"`
Name string `json:"name"`
}
package user
import (
"encoding/json"
"fmt"
"os"
)
// Entry represents each stream. If the stream fails, an error will be present.
type Entry struct {
Error error
User User
}
// Stream helps transmit each streams withing a channel.
type Stream struct {
stream chan Entry
}
// NewJSONStream returns a new `Stream` type.
func NewJSONStream() Stream {
return Stream{
stream: make(chan Entry),
}
}
// Watch watches JSON streams. Each stream entry will either have an error or a
// User object. Client code does not need to explicitly exit after catching an
// error as the `Start` method will close the channel automatically.
func (s Stream) Watch() <-chan Entry {
return s.stream
}
// Start starts streaming JSON file line by line. If an error occurs, the channel
// will be closed.
func (s Stream) Start(path string) {
// Stop streaming channel as soon as nothing left to read in the file.
defer close(s.stream)
// Open file to read.
file, err := os.Open(path)
if err != nil {
s.stream <- Entry{Error: fmt.Errorf("open file: %w", err)}
return
}
defer file.Close()
decoder := json.NewDecoder(file)
// Read opening delimiter. `[` or `{`
if _, err := decoder.Token(); err != nil {
s.stream <- Entry{Error: fmt.Errorf("decode opening delimiter: %w", err)}
return
}
// Read file content as long as there is something.
i := 1
for decoder.More() {
var user User
if err := decoder.Decode(&user); err != nil {
s.stream <- Entry{Error: fmt.Errorf("decode line %d: %w", i, err)}
return
}
s.stream <- Entry{User: user}
i++
}
// Read closing delimiter. `]` or `}`
if _, err := decoder.Token(); err != nil {
s.stream <- Entry{Error: fmt.Errorf("decode closing delimiter: %w", err)}
return
}
}
package main
import (
"log"
"github.com/you/internal/user"
)
func main() {
stream := user.NewJSONStream()
go func() {
for data := range stream.Watch() {
if data.Error != nil {
log.Println(data.Error)
}
log.Println(data.User.ID, ":", data.User.Name)
}
}()
stream.Start("users.json")
}
As you can see below, each JSON block has been printed one by one.
$ go run -race .
2021/04/15 22:45:23 1 : user-1
2021/04/15 22:45:23 2 : user-2
2021/04/15 22:45:23 3 : user-3
2021/04/15 22:45:23 4 : user-4
2021/04/15 22:45:23 5 : user-5
...
2021/04/15 22:45:24 1000 : user-1000