Share memory by copying. (#180)

This commit is contained in:
Nuno Cruces
2024-10-31 15:21:15 +00:00
committed by GitHub
parent b2e8636227
commit 17f7840a83
13 changed files with 307 additions and 34 deletions

View File

@@ -17,11 +17,13 @@ echo aix ; GOOS=aix GOARCH=ppc64 go build .
echo js ; GOOS=js GOARCH=wasm go build .
echo wasip1 ; GOOS=wasip1 GOARCH=wasm go build .
echo linux-flock ; GOOS=linux GOARCH=amd64 go build -tags sqlite3_flock .
echo linux-noshm ; GOOS=linux GOARCH=amd64 go build -tags sqlite3_noshm .
echo linux-dotlk ; GOOS=linux GOARCH=amd64 go build -tags sqlite3_dotlk .
echo linux-nosys ; GOOS=linux GOARCH=amd64 go build -tags sqlite3_nosys .
echo darwin-flock ; GOOS=darwin GOARCH=amd64 go build -tags sqlite3_flock .
echo darwin-noshm ; GOOS=darwin GOARCH=amd64 go build -tags sqlite3_noshm .
echo darwin-dotlk ; GOOS=darwin GOARCH=amd64 go build -tags sqlite3_dotlk .
echo darwin-nosys ; GOOS=darwin GOARCH=amd64 go build -tags sqlite3_nosys .
echo windows-dotlk ; GOOS=windows GOARCH=amd64 go build -tags sqlite3_dotlk .
echo windows-nosys ; GOOS=windows GOARCH=amd64 go build -tags sqlite3_nosys .
echo freebsd-dotlk ; GOOS=freebsd GOARCH=amd64 go build -tags sqlite3_dotlk .
echo freebsd-nosys ; GOOS=freebsd GOARCH=amd64 go build -tags sqlite3_nosys .
echo solaris-flock ; GOOS=solaris GOARCH=amd64 go build -tags sqlite3_flock .
echo solaris-dotlk ; GOOS=solaris GOARCH=amd64 go build -tags sqlite3_dotlk .

View File

@@ -60,10 +60,6 @@ jobs:
run: go test -v -tags sqlite3_dotlk ./...
if: matrix.os == 'macos-latest'
- name: Test no shared memory
run: go test -v -tags sqlite3_noshm ./...
if: matrix.os == 'ubuntu-latest'
- name: Test no locks
run: go test -v -tags sqlite3_nosys ./...
if: matrix.os == 'ubuntu-latest'

View File

@@ -1,4 +1,4 @@
//go:build (linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && !sqlite3_nosys
//go:build ((linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos) && !sqlite3_nosys) || sqlite3_flock || sqlite3_dotlk
package driver_test

View File

@@ -1,4 +1,4 @@
//go:build unix && !(sqlite3_noshm || sqlite3_nosys)
//go:build unix && !sqlite3_nosys
package util

View File

@@ -1,4 +1,4 @@
//go:build !unix || sqlite3_noshm || sqlite3_nosys
//go:build !unix || sqlite3_nosys
package util

View File

@@ -1,4 +1,4 @@
//go:build (linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && !sqlite3_nosys
//go:build ((linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos) && !sqlite3_nosys) || sqlite3_flock || sqlite3_dotlk
package bradfitz

View File

@@ -15,8 +15,7 @@ The main differences are [file locking](#file-locking) and [WAL mode](#write-ahe
POSIX advisory locks, which SQLite uses on Unix, are
[broken by design](https://github.com/sqlite/sqlite/blob/b74eb0/src/os_unix.c#L1073-L1161).
On Linux and macOS, this package uses
Instead, on Linux and macOS, this package uses
[OFD locks](https://www.gnu.org/software/libc/manual/html_node/Open-File-Description-Locks.html)
to synchronize access to database files.
@@ -45,7 +44,7 @@ to check if your build supports file locking.
### Write-Ahead Logging
On little-endian Unix, this package uses `mmap` to implement
On Unix, this package may use `mmap` to implement
[shared-memory for the WAL-index](https://sqlite.org/wal.html#implementation_of_shared_memory_for_the_wal_index),
like SQLite.
@@ -54,6 +53,9 @@ a WAL database can only be accessed by a single proccess.
Other processes that attempt to access a database locked with BSD locks,
will fail with the [`SQLITE_PROTOCOL`](https://sqlite.org/rescode.html#protocol) error code.
You can also opt into a cross platform, in-process, memory sharing implementation
with the `sqlite3_dotlk` build tag.
Otherwise, [WAL support is limited](https://sqlite.org/wal.html#noshm),
and `EXCLUSIVE` locking mode must be set to create, read, and write WAL databases.
To use `EXCLUSIVE` locking mode with the
@@ -66,7 +68,7 @@ to check if your build supports shared memory.
### Batch-Atomic Write
On 64-bit Linux, this package supports
On Linux, this package may support
[batch-atomic writes](https://sqlite.org/cgi/src/technote/714)
on the F2FS filesystem.
@@ -87,9 +89,7 @@ The implementation is compatible with SQLite's
The VFS can be customized with a few build tags:
- `sqlite3_flock` forces the use of BSD locks.
- `sqlite3_dotlk` forces the use of dot-file locks.
- `sqlite3_nosys` prevents importing [`x/sys`](https://pkg.go.dev/golang.org/x/sys);
disables locking _and_ shared memory on all platforms.
- `sqlite3_noshm` disables shared memory on all platforms.
- `sqlite3_nosys` prevents importing [`x/sys`](https://pkg.go.dev/golang.org/x/sys).
> [!IMPORTANT]
> The default configuration of this package is compatible with the standard

View File

@@ -1,4 +1,4 @@
//go:build (linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && !sqlite3_nosys
//go:build ((linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos) && !sqlite3_nosys) || sqlite3_flock || sqlite3_dotlk
package adiantum_test

View File

@@ -1,4 +1,4 @@
//go:build (darwin || linux || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !(sqlite3_dotlk || sqlite3_noshm || sqlite3_nosys)
//go:build ((darwin || linux || freebsd || openbsd || netbsd || dragonfly || illumos) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !sqlite3_nosys) || sqlite3_flock || sqlite3_dotlk
package vfs

View File

@@ -1,4 +1,4 @@
//go:build (freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !(sqlite3_dotlk || sqlite3_noshm || sqlite3_nosys)
//go:build ((freebsd || openbsd || netbsd || dragonfly || illumos) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !(sqlite3_dotlk || sqlite3_nosys)) || sqlite3_flock
package vfs
@@ -23,9 +23,9 @@ type vfsShmFile struct {
// +checklocks:vfsShmFilesMtx
refs int
// +checklocks:lockMtx
lock [_SHM_NLOCK]int16
lockMtx sync.Mutex
// +checklocks:Mutex
lock [_SHM_NLOCK]int16
sync.Mutex
}
var (
@@ -54,7 +54,7 @@ func (s *vfsShm) Close() error {
s.shmLock(0, _SHM_NLOCK, _SHM_UNLOCK)
// Decrease reference count.
if s.vfsShmFile.refs > 1 {
if s.vfsShmFile.refs > 0 {
s.vfsShmFile.refs--
s.vfsShmFile = nil
return nil
@@ -119,7 +119,6 @@ func (s *vfsShm) shmOpen() (rc _ErrorCode) {
s.vfsShmFile = &vfsShmFile{
File: f,
info: fi,
refs: 1,
}
f = nil // Don't close the file.
for i, g := range vfsShmFiles {
@@ -174,8 +173,8 @@ func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, ext
}
func (s *vfsShm) shmLock(offset, n int32, flags _ShmFlag) _ErrorCode {
s.lockMtx.Lock()
defer s.lockMtx.Unlock()
s.Lock()
defer s.Unlock()
switch {
case flags&_SHM_UNLOCK != 0:
@@ -234,8 +233,7 @@ func (s *vfsShm) shmUnmap(delete bool) {
for _, r := range s.regions {
r.Unmap()
}
clear(s.regions)
s.regions = s.regions[:0]
s.regions = nil
// Close the file.
if delete {
@@ -245,7 +243,7 @@ func (s *vfsShm) shmUnmap(delete bool) {
}
func (s *vfsShm) shmBarrier() {
s.lockMtx.Lock()
s.Lock()
//lint:ignore SA2001 memory barrier.
s.lockMtx.Unlock()
s.Unlock()
}

277
vfs/shm_copy.go Normal file
View File

@@ -0,0 +1,277 @@
//go:build sqlite3_dotlk
package vfs
import (
"context"
"sync"
"unsafe"
"github.com/ncruces/go-sqlite3/internal/util"
"github.com/tetratelabs/wazero/api"
)
const (
_SHM_NLOCK = 8
_WALINDEX_PGSZ = 32768
)
type vfsShmBuffer struct {
shared []byte // +checklocks:Mutex
refs int // +checklocks:vfsShmBuffersMtx
lock [_SHM_NLOCK]int16 // +checklocks:Mutex
sync.Mutex
}
var (
// +checklocks:vfsShmBuffersMtx
vfsShmBuffers = map[string]*vfsShmBuffer{}
vfsShmBuffersMtx sync.Mutex
)
type vfsShm struct {
*vfsShmBuffer
mod api.Module
alloc api.Function
free api.Function
path string
shadow []byte
ptrs []uint32
stack [1]uint64
lock [_SHM_NLOCK]bool
readOnly bool
}
func (s *vfsShm) Close() error {
if s.vfsShmBuffer == nil {
return nil
}
vfsShmBuffersMtx.Lock()
defer vfsShmBuffersMtx.Unlock()
// Unlock everything.
s.shmLock(0, _SHM_NLOCK, _SHM_UNLOCK)
// Decrease reference count.
if s.vfsShmBuffer.refs > 0 {
s.vfsShmBuffer.refs--
s.vfsShmBuffer = nil
return nil
}
delete(vfsShmBuffers, s.path)
return nil
}
func (s *vfsShm) shmOpen() {
if s.vfsShmBuffer != nil {
return
}
vfsShmBuffersMtx.Lock()
defer vfsShmBuffersMtx.Unlock()
// Find a shared buffer, increase the reference count.
if g, ok := vfsShmBuffers[s.path]; ok {
s.vfsShmBuffer = g
g.refs++
return
}
// Add the new shared buffer.
s.vfsShmBuffer = &vfsShmBuffer{}
vfsShmBuffers[s.path] = s.vfsShmBuffer
}
func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, extend bool) (uint32, _ErrorCode) {
if size != _WALINDEX_PGSZ {
return 0, _IOERR_SHMMAP
}
if s.mod == nil {
s.mod = mod
s.free = mod.ExportedFunction("sqlite3_free")
s.alloc = mod.ExportedFunction("sqlite3_malloc64")
}
s.shmOpen()
s.Lock()
defer s.Unlock()
defer s.shmAcquire()
n := (int(id) + 1) * int(size)
if n > len(s.shared) {
if !extend {
return 0, _OK
}
s.shared = append(s.shared, make([]byte, n-len(s.shared))...)
}
if n > len(s.shadow) {
s.shadow = append(s.shadow, make([]byte, n-len(s.shadow))...)
}
for int(id) >= len(s.ptrs) {
s.stack[0] = uint64(size)
if err := s.alloc.CallWithStack(ctx, s.stack[:]); err != nil {
panic(err)
}
if s.stack[0] == 0 {
panic(util.OOMErr)
}
clear(util.View(s.mod, uint32(s.stack[0]), _WALINDEX_PGSZ))
s.ptrs = append(s.ptrs, uint32(s.stack[0]))
}
return s.ptrs[id], _OK
}
func (s *vfsShm) shmLock(offset, n int32, flags _ShmFlag) _ErrorCode {
s.Lock()
defer s.Unlock()
if flags&_SHM_UNLOCK == 0 {
s.shmAcquire()
} else {
s.shmRelease()
}
switch {
case flags&_SHM_UNLOCK != 0:
for i := offset; i < offset+n; i++ {
if s.lock[i] {
if s.vfsShmBuffer.lock[i] == 0 {
panic(util.AssertErr())
}
if s.vfsShmBuffer.lock[i] <= 0 {
s.vfsShmBuffer.lock[i] = 0
} else {
s.vfsShmBuffer.lock[i]--
}
s.lock[i] = false
}
}
case flags&_SHM_SHARED != 0:
for i := offset; i < offset+n; i++ {
if s.lock[i] {
panic(util.AssertErr())
}
if s.vfsShmBuffer.lock[i]+1 <= 0 {
return _BUSY
}
}
for i := offset; i < offset+n; i++ {
s.vfsShmBuffer.lock[i]++
s.lock[i] = true
}
case flags&_SHM_EXCLUSIVE != 0:
for i := offset; i < offset+n; i++ {
if s.lock[i] {
panic(util.AssertErr())
}
if s.vfsShmBuffer.lock[i] != 0 {
return _BUSY
}
}
for i := offset; i < offset+n; i++ {
s.vfsShmBuffer.lock[i] = -1
s.lock[i] = true
}
default:
panic(util.AssertErr())
}
return _OK
}
func (s *vfsShm) shmUnmap(delete bool) {
if s.vfsShmBuffer == nil {
return
}
defer s.Close()
s.Lock()
s.shmRelease()
defer s.Unlock()
for _, p := range s.ptrs {
s.stack[0] = uint64(p)
if err := s.free.CallWithStack(context.Background(), s.stack[:]); err != nil {
panic(err)
}
}
s.ptrs = nil
s.shadow = nil
}
func (s *vfsShm) shmBarrier() {
s.Lock()
s.shmAcquire()
s.shmRelease()
s.Unlock()
}
// This looks like a safe, if inefficient, way of keeping memory in sync.
//
// The WAL-index file starts with a header.
// This header starts with two 48 byte, checksummed, copies of the same information,
// which are accessed independently between memory barriers.
// The checkpoint information that follows uses 4 byte aligned words.
//
// Finally, we have the WAL-index hash tables,
// which are only modified holding the exclusive WAL_WRITE_LOCK.
//
// Since all the data is either redundant+checksummed,
// 4 byte aligned, or modified under an exclusive lock,
// the copies below should correctly keep memory in sync.
//
// https://sqlite.org/walformat.html#the_wal_index_file_format
// +checklocks:s.Mutex
func (s *vfsShm) shmAcquire() {
// Copies modified words from shared to private memory.
for id, p := range s.ptrs {
i0 := id * _WALINDEX_PGSZ
i1 := i0 + _WALINDEX_PGSZ
shared := shmPage(s.shared[i0:i1])
shadow := shmPage(s.shadow[i0:i1])
privat := shmPage(util.View(s.mod, p, _WALINDEX_PGSZ))
if *shadow == *shared {
continue
}
for i, shared := range shared {
if shadow[i] != shared {
shadow[i] = shared
privat[i] = shared
}
}
}
}
// +checklocks:s.Mutex
func (s *vfsShm) shmRelease() {
// Copies modified words from private to shared memory.
for id, p := range s.ptrs {
i0 := id * _WALINDEX_PGSZ
i1 := i0 + _WALINDEX_PGSZ
shared := shmPage(s.shared[i0:i1])
shadow := shmPage(s.shadow[i0:i1])
privat := shmPage(util.View(s.mod, p, _WALINDEX_PGSZ))
if *shadow == *privat {
continue
}
for i, privat := range privat {
if shadow[i] != privat {
shadow[i] = privat
shared[i] = privat
}
}
}
}
func shmPage(s []byte) *[_WALINDEX_PGSZ / 4]uint32 {
p := (*uint32)(unsafe.Pointer(unsafe.SliceData(s)))
return (*[_WALINDEX_PGSZ / 4]uint32)(unsafe.Slice(p, _WALINDEX_PGSZ/4))
}

View File

@@ -1,4 +1,4 @@
//go:build (darwin || linux) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !(sqlite3_flock || sqlite3_dotlk || sqlite3_noshm || sqlite3_nosys)
//go:build (darwin || linux) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !(sqlite3_flock || sqlite3_dotlk || sqlite3_nosys)
package vfs

View File

@@ -1,4 +1,4 @@
//go:build !(darwin || linux || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) || !(386 || arm || amd64 || arm64 || riscv64 || ppc64le) || sqlite3_dotlk || sqlite3_noshm || sqlite3_nosys
//go:build !(((darwin || linux || freebsd || openbsd || netbsd || dragonfly || illumos) && (386 || arm || amd64 || arm64 || riscv64 || ppc64le) && !sqlite3_nosys) || sqlite3_flock || sqlite3_dotlk)
package vfs