mirror of
https://github.com/SajadMRjl/find-me-internet.git
synced 2026-07-02 15:09:00 +00:00
feat: optimization
This commit is contained in:
23
.env.example
23
.env.example
@@ -1,4 +1,21 @@
|
|||||||
LOG_LEVEL=DEBUG
|
# --- Application Settings ---
|
||||||
MAX_WORKERS=20
|
LOG_LEVEL=INFO
|
||||||
|
MAX_WORKERS=50
|
||||||
|
|
||||||
|
# --- Network Timeouts ---
|
||||||
|
TCP_TIMEOUT=2s
|
||||||
|
TEST_TIMEOUT=10s
|
||||||
|
TEST_URL=http://cp.cloudflare.com
|
||||||
|
|
||||||
|
# --- Paths & Resources ---
|
||||||
|
# The path to your Sing-box binary
|
||||||
SING_BOX_PATH=./bin/sing-box
|
SING_BOX_PATH=./bin/sing-box
|
||||||
TCP_TIMEOUT=1500ms
|
|
||||||
|
# The source file containing raw proxy links (one per line)
|
||||||
|
INPUT_PATH=./data/proxies.txt
|
||||||
|
|
||||||
|
# Where to save the valid results (JSONL format)
|
||||||
|
OUTPUT_PATH=./data/valid_proxies.jsonl
|
||||||
|
|
||||||
|
# Path to the MaxMind GeoLite2 Country database
|
||||||
|
GEOIP_PATH=./data/GeoLite2-Country.mmdb
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -6,6 +6,9 @@ scanner
|
|||||||
*.dll
|
*.dll
|
||||||
*.so
|
*.so
|
||||||
*.dylib
|
*.dylib
|
||||||
|
data/
|
||||||
|
data/*
|
||||||
|
|
||||||
|
|
||||||
# Test binaries built with 'go test -c'
|
# Test binaries built with 'go test -c'
|
||||||
*.test
|
*.test
|
||||||
|
|||||||
114
cmd/main.go
114
cmd/main.go
@@ -3,70 +3,98 @@ package main
|
|||||||
import (
|
import (
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
|
"os/signal"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"syscall"
|
||||||
|
|
||||||
"find-me-internet/internal/config"
|
"find-me-internet/internal/config"
|
||||||
|
"find-me-internet/internal/dedup"
|
||||||
"find-me-internet/internal/filter"
|
"find-me-internet/internal/filter"
|
||||||
|
"find-me-internet/internal/geoip"
|
||||||
"find-me-internet/internal/logger"
|
"find-me-internet/internal/logger"
|
||||||
"find-me-internet/internal/parser"
|
"find-me-internet/internal/parser"
|
||||||
|
"find-me-internet/internal/sink"
|
||||||
|
"find-me-internet/internal/source"
|
||||||
"find-me-internet/internal/tester"
|
"find-me-internet/internal/tester"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
// 1. Initialization
|
// 1. Init
|
||||||
cfg := config.Load()
|
cfg := config.Load()
|
||||||
logger.Setup(cfg.LogLevel)
|
logger.Setup(cfg.LogLevel)
|
||||||
|
|
||||||
if _, err := os.Stat(cfg.SingBoxPath); os.IsNotExist(err) {
|
// Graceful Shutdown Channel
|
||||||
slog.Error("singbox_binary_missing", "path", cfg.SingBoxPath)
|
sigChan := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||||
|
|
||||||
|
// 2. Services
|
||||||
|
geoDB, err := geoip.Open(cfg.GeoIPPath)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("geoip_db_missing", "error", err, "msg", "Countries will be marked N/A")
|
||||||
|
} else {
|
||||||
|
defer geoDB.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
resultsWriter, err := sink.NewJSONL(cfg.OutputPath)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("cannot_create_output_file", "error", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
defer resultsWriter.Close()
|
||||||
|
|
||||||
slog.Info("scanner_started", "workers", cfg.Workers, "log_level", cfg.LogLevel)
|
deduplicator := dedup.New()
|
||||||
|
|
||||||
// Mock Data
|
|
||||||
rawLinks := []string{
|
|
||||||
"vless://4525c260-df3c-4f62-b8f1-f4f5f305694b@66.81.247.155:443?encryption=none&security=tls&sni=yyzsuabw9e3qd5ud7ihi5dxm96oglnsvr83cjojnm1efncfhr9ucordq.zjde5.de5.net&fp=chrome&insecure=0&allowInsecure=0&type=ws&host=yyzsuabw9e3qd5ud7ihi5dxm96oglnsvr83cjojnm1efncfhr9ucordq.zjde5.de5.net&path=%2F%3Fed#%DA%86%D9%86%D9%84%20%D8%AA%D9%84%DA%AF%D8%B1%D8%A7%D9%85%20%3A%20%40CroSs_Guildd%F0%9F%92%8A",
|
|
||||||
"vless://efdb2890-6dd7-4e65-8984-f0b1d3ae4e01@here-we-go-again.embeddedonline.org:443?encryption=none&security=tls&sni=here-we-go-again.embeddedonline.org&fp=chrome&alpn=http%2F1.1&insecure=0&allowInsecure=0&type=ws&host=here-we-go-again.embeddedonline.org&path=%2FJ1jTS0GMxqS0Atmd5x#here-we-go-again.embeddedonline.org%20tls%20WS%20direct%20vless",
|
|
||||||
// Add more links here...
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Pipelines
|
|
||||||
netFilter := filter.NewPipeline(cfg.TcpTimeout)
|
netFilter := filter.NewPipeline(cfg.TcpTimeout)
|
||||||
boxRunner := tester.NewRunner(cfg.SingBoxPath, cfg.TestURL, cfg.TestTimeout)
|
boxRunner := tester.NewRunner(cfg.SingBoxPath, cfg.TestURL, cfg.TestTimeout)
|
||||||
|
|
||||||
// 3. Concurrency Control
|
// 3. Input Stream (Example: reading from a local file 'proxies.txt')
|
||||||
|
// In production, you might loop through a list of URLs here
|
||||||
|
linkStream, err := source.LoadFromFile(cfg.InputPath)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("input_source_failed", "error", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Worker Pool
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
semaphore := make(chan struct{}, cfg.Workers)
|
semaphore := make(chan struct{}, cfg.Workers)
|
||||||
|
|
||||||
// 4. Thread-Safe Counter
|
slog.Info("pipeline_started", "workers", cfg.Workers)
|
||||||
var (
|
|
||||||
validCount int
|
|
||||||
mu sync.Mutex // The lock protecting validCount
|
|
||||||
)
|
|
||||||
|
|
||||||
startTotal := time.Now()
|
countProcessed := 0
|
||||||
|
|
||||||
|
// Main Loop
|
||||||
|
loop:
|
||||||
|
for rawLink := range linkStream {
|
||||||
|
select {
|
||||||
|
case <-sigChan:
|
||||||
|
slog.Info("shutdown_signal_received", "msg", "finishing pending jobs...")
|
||||||
|
break loop
|
||||||
|
default:
|
||||||
|
// Continue
|
||||||
|
}
|
||||||
|
|
||||||
for _, link := range rawLinks {
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
|
|
||||||
go func(raw string) {
|
go func(raw string) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
// Step A: Parse
|
// --- STAGE 1: PARSE ---
|
||||||
proxy, err := parser.ParseLink(raw)
|
proxy, err := parser.ParseLink(raw)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step B: Filter
|
// --- STAGE 2: DEDUP ---
|
||||||
|
if deduplicator.Seen(proxy.Address, proxy.Port) {
|
||||||
|
return // Skip duplicates silently
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- STAGE 3: FILTER ---
|
||||||
if !netFilter.Check(proxy) {
|
if !netFilter.Check(proxy) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step C: Test
|
// --- STAGE 4: TEST ---
|
||||||
semaphore <- struct{}{}
|
semaphore <- struct{}{} // Rate limit expensive tests
|
||||||
err = boxRunner.Test(proxy)
|
err = boxRunner.Test(proxy)
|
||||||
<-semaphore
|
<-semaphore
|
||||||
|
|
||||||
@@ -74,22 +102,30 @@ func main() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
mu.Lock()
|
// --- STAGE 5: ENRICH ---
|
||||||
validCount++
|
if geoDB != nil {
|
||||||
mu.Unlock()
|
proxy.Country = geoDB.Lookup(proxy.Address)
|
||||||
|
}
|
||||||
|
|
||||||
slog.Info("proxy_verified",
|
// --- STAGE 6: SAVE ---
|
||||||
"target", proxy.Address,
|
if err := resultsWriter.Write(proxy); err != nil {
|
||||||
"latency_ms", proxy.Latency.Milliseconds(),
|
slog.Error("write_failed", "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("proxy_saved",
|
||||||
|
"country", proxy.Country,
|
||||||
|
"latency", proxy.Latency.Milliseconds(),
|
||||||
|
"type", proxy.Type,
|
||||||
)
|
)
|
||||||
|
|
||||||
}(link)
|
}(rawLink)
|
||||||
|
|
||||||
|
countProcessed++
|
||||||
|
if countProcessed % 1000 == 0 {
|
||||||
|
slog.Info("progress_report", "processed", countProcessed)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
slog.Info("scan_complete",
|
slog.Info("scan_finished", "total_processed", countProcessed)
|
||||||
"duration", time.Since(startTotal),
|
|
||||||
"valid_count", validCount,
|
|
||||||
"total_scanned", len(rawLinks),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
4
go.mod
4
go.mod
@@ -6,6 +6,7 @@ require (
|
|||||||
github.com/gvcgo/vpnparser v0.2.7
|
github.com/gvcgo/vpnparser v0.2.7
|
||||||
github.com/joho/godotenv v1.5.1
|
github.com/joho/godotenv v1.5.1
|
||||||
github.com/kelseyhightower/envconfig v1.4.0
|
github.com/kelseyhightower/envconfig v1.4.0
|
||||||
|
github.com/oschwald/geoip2-golang v1.13.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
@@ -28,12 +29,13 @@ require (
|
|||||||
github.com/mattn/go-runewidth v0.0.15 // indirect
|
github.com/mattn/go-runewidth v0.0.15 // indirect
|
||||||
github.com/muesli/reflow v0.3.0 // indirect
|
github.com/muesli/reflow v0.3.0 // indirect
|
||||||
github.com/muesli/termenv v0.15.2 // indirect
|
github.com/muesli/termenv v0.15.2 // indirect
|
||||||
|
github.com/oschwald/maxminddb-golang v1.13.0 // indirect
|
||||||
github.com/pterm/pterm v0.12.62 // indirect
|
github.com/pterm/pterm v0.12.62 // indirect
|
||||||
github.com/rivo/uniseg v0.4.4 // indirect
|
github.com/rivo/uniseg v0.4.4 // indirect
|
||||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
|
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
|
||||||
go.opentelemetry.io/otel v1.15.1 // indirect
|
go.opentelemetry.io/otel v1.15.1 // indirect
|
||||||
go.opentelemetry.io/otel/trace v1.15.1 // indirect
|
go.opentelemetry.io/otel/trace v1.15.1 // indirect
|
||||||
golang.org/x/sys v0.15.0 // indirect
|
golang.org/x/sys v0.20.0 // indirect
|
||||||
golang.org/x/term v0.15.0 // indirect
|
golang.org/x/term v0.15.0 // indirect
|
||||||
golang.org/x/text v0.14.0 // indirect
|
golang.org/x/text v0.14.0 // indirect
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
|
|||||||
12
go.sum
12
go.sum
@@ -88,6 +88,10 @@ github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo
|
|||||||
github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8=
|
github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8=
|
||||||
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
|
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
|
||||||
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
|
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
|
||||||
|
github.com/oschwald/geoip2-golang v1.13.0 h1:Q44/Ldc703pasJeP5V9+aFSZFmBN7DKHbNsSFzQATJI=
|
||||||
|
github.com/oschwald/geoip2-golang v1.13.0/go.mod h1:P9zG+54KPEFOliZ29i7SeYZ/GM6tfEL+rgSn03hYuUo=
|
||||||
|
github.com/oschwald/maxminddb-golang v1.13.0 h1:R8xBorY71s84yO06NgTmQvqvTvlS/bnYZrrWX1MElnU=
|
||||||
|
github.com/oschwald/maxminddb-golang v1.13.0/go.mod h1:BU0z8BfFVhi1LQaonTwwGQlsHUEu9pWNdMfmq4ztm0o=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/pterm/pterm v0.12.27/go.mod h1:PhQ89w4i95rhgE+xedAoqous6K9X+r6aSOI2eFF7DZI=
|
github.com/pterm/pterm v0.12.27/go.mod h1:PhQ89w4i95rhgE+xedAoqous6K9X+r6aSOI2eFF7DZI=
|
||||||
@@ -109,8 +113,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
|
|||||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||||
github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs=
|
github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs=
|
||||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
|
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
|
||||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
|
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
|
||||||
@@ -148,8 +152,8 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc
|
|||||||
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
|
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
|
||||||
golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||||
|
|||||||
@@ -10,27 +10,29 @@ import (
|
|||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
// App Settings
|
// App Settings
|
||||||
LogLevel string `envconfig:"LOG_LEVEL" default:"INFO"` // INFO, DEBUG, ERROR
|
LogLevel string `envconfig:"LOG_LEVEL" default:"INFO"`
|
||||||
Workers int `envconfig:"MAX_WORKERS" default:"10"`
|
Workers int `envconfig:"MAX_WORKERS" default:"20"`
|
||||||
|
|
||||||
// Paths
|
// Network Logic
|
||||||
SingBoxPath string `envconfig:"SING_BOX_PATH" default:"./bin/sing-box"`
|
|
||||||
|
|
||||||
// Testing Parameters
|
|
||||||
TestURL string `envconfig:"TEST_URL" default:"http://cp.cloudflare.com"`
|
TestURL string `envconfig:"TEST_URL" default:"http://cp.cloudflare.com"`
|
||||||
TcpTimeout time.Duration `envconfig:"TCP_TIMEOUT" default:"2s"`
|
TcpTimeout time.Duration `envconfig:"TCP_TIMEOUT" default:"2s"`
|
||||||
TestTimeout time.Duration `envconfig:"TEST_TIMEOUT" default:"10s"`
|
TestTimeout time.Duration `envconfig:"TEST_TIMEOUT" default:"10s"`
|
||||||
|
|
||||||
|
// File System Paths
|
||||||
|
SingBoxPath string `envconfig:"SING_BOX_PATH" default:"./bin/sing-box"`
|
||||||
|
InputPath string `envconfig:"INPUT_PATH" default:"proxies.txt"`
|
||||||
|
OutputPath string `envconfig:"OUTPUT_PATH" default:"valid.jsonl"`
|
||||||
|
GeoIPPath string `envconfig:"GEOIP_PATH" default:"GeoLite2-Country.mmdb"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load reads .env and maps variables to Config struct
|
// Load reads .env and processes environment variables
|
||||||
func Load() *Config {
|
func Load() *Config {
|
||||||
// 1. Try loading .env file (optional, for local dev)
|
// Silently ignore if .env is missing (production might use real ENV vars)
|
||||||
_ = godotenv.Load()
|
_ = godotenv.Load()
|
||||||
|
|
||||||
var cfg Config
|
var cfg Config
|
||||||
// 2. Process environment variables
|
|
||||||
if err := envconfig.Process("", &cfg); err != nil {
|
if err := envconfig.Process("", &cfg); err != nil {
|
||||||
log.Fatalf("Failed to load config: %v", err)
|
log.Fatalf("Configuration Error: %v", err)
|
||||||
}
|
}
|
||||||
return &cfg
|
return &cfg
|
||||||
}
|
}
|
||||||
36
internal/dedup/filter.go
Normal file
36
internal/dedup/filter.go
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
package dedup
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Filter struct {
|
||||||
|
seen map[string]struct{}
|
||||||
|
mu sync.RWMutex
|
||||||
|
}
|
||||||
|
|
||||||
|
func New() *Filter {
|
||||||
|
return &Filter{
|
||||||
|
seen: make(map[string]struct{}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check returns true if the item is NEW (not seen before)
|
||||||
|
func (f *Filter) Seen(address string, port int) bool {
|
||||||
|
key := fmt.Sprintf("%s:%d", address, port)
|
||||||
|
|
||||||
|
f.mu.RLock()
|
||||||
|
_, exists := f.seen[key]
|
||||||
|
f.mu.RUnlock()
|
||||||
|
|
||||||
|
if exists {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
f.mu.Lock()
|
||||||
|
f.seen[key] = struct{}{}
|
||||||
|
f.mu.Unlock()
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
40
internal/geoip/lookup.go
Normal file
40
internal/geoip/lookup.go
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
package geoip
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net"
|
||||||
|
"github.com/oschwald/geoip2-golang"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Database struct {
|
||||||
|
reader *geoip2.Reader
|
||||||
|
}
|
||||||
|
|
||||||
|
func Open(path string) (*Database, error) {
|
||||||
|
r, err := geoip2.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &Database{reader: r}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Database) Lookup(ipStr string) string {
|
||||||
|
if d == nil || d.reader == nil {
|
||||||
|
return "N/A"
|
||||||
|
}
|
||||||
|
|
||||||
|
ip := net.ParseIP(ipStr)
|
||||||
|
if ip == nil {
|
||||||
|
return "INVALID_IP"
|
||||||
|
}
|
||||||
|
|
||||||
|
record, err := d.reader.Country(ip)
|
||||||
|
if err != nil || record.Country.IsoCode == "" {
|
||||||
|
return "UNKNOWN"
|
||||||
|
}
|
||||||
|
|
||||||
|
return record.Country.IsoCode
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Database) Close() {
|
||||||
|
d.reader.Close()
|
||||||
|
}
|
||||||
38
internal/sink/writer.go
Normal file
38
internal/sink/writer.go
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
package sink
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"find-me-internet/internal/model"
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
type JSONLWriter struct {
|
||||||
|
file *os.File
|
||||||
|
mu sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewJSONL(path string) (*JSONLWriter, error) {
|
||||||
|
f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &JSONLWriter{file: f}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *JSONLWriter) Write(p *model.Proxy) error {
|
||||||
|
w.mu.Lock()
|
||||||
|
defer w.mu.Unlock()
|
||||||
|
|
||||||
|
data, err := json.Marshal(p)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = w.file.Write(append(data, '\n'))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *JSONLWriter) Close() {
|
||||||
|
w.file.Close()
|
||||||
|
}
|
||||||
61
internal/source/loader.go
Normal file
61
internal/source/loader.go
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
package source
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Loader returns a channel of strings to keep memory usage low
|
||||||
|
func LoadFromFile(path string) (<-chan string, error) {
|
||||||
|
file, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan string)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer file.Close()
|
||||||
|
defer close(out)
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(file)
|
||||||
|
// Increase buffer size for very long lines (some subscription links are huge)
|
||||||
|
buf := make([]byte, 0, 64*1024)
|
||||||
|
scanner.Buffer(buf, 1024*1024)
|
||||||
|
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
if line != "" && !strings.HasPrefix(line, "#") {
|
||||||
|
out <- line
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadFromURL streams directly from a URL (e.g., Github raw)
|
||||||
|
func LoadFromURL(url string) (<-chan string, error) {
|
||||||
|
resp, err := http.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan string)
|
||||||
|
go func() {
|
||||||
|
defer resp.Body.Close()
|
||||||
|
defer close(out)
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(resp.Body)
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
if line != "" {
|
||||||
|
out <- line
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user