From d48a7750cc7272b861b9b380e06a954b525a1de8 Mon Sep 17 00:00:00 2001 From: sajadMRjl Date: Wed, 28 Jan 2026 03:42:36 +0330 Subject: [PATCH] feat: optimization --- .env.example | 23 +++++++- .gitignore | 3 + cmd/main.go | 116 +++++++++++++++++++++++++------------- go.mod | 4 +- go.sum | 12 ++-- internal/config/config.go | 26 +++++---- internal/dedup/filter.go | 36 ++++++++++++ internal/geoip/lookup.go | 40 +++++++++++++ internal/sink/writer.go | 38 +++++++++++++ internal/source/loader.go | 61 ++++++++++++++++++++ 10 files changed, 299 insertions(+), 60 deletions(-) create mode 100644 internal/dedup/filter.go create mode 100644 internal/geoip/lookup.go create mode 100644 internal/sink/writer.go create mode 100644 internal/source/loader.go diff --git a/.env.example b/.env.example index 1250d10..a2107c4 100644 --- a/.env.example +++ b/.env.example @@ -1,4 +1,21 @@ -LOG_LEVEL=DEBUG -MAX_WORKERS=20 +# --- Application Settings --- +LOG_LEVEL=INFO +MAX_WORKERS=50 + +# --- Network Timeouts --- +TCP_TIMEOUT=2s +TEST_TIMEOUT=10s +TEST_URL=http://cp.cloudflare.com + +# --- Paths & Resources --- +# The path to your Sing-box binary SING_BOX_PATH=./bin/sing-box -TCP_TIMEOUT=1500ms \ No newline at end of file + +# The source file containing raw proxy links (one per line) +INPUT_PATH=./data/proxies.txt + +# Where to save the valid results (JSONL format) +OUTPUT_PATH=./data/valid_proxies.jsonl + +# Path to the MaxMind GeoLite2 Country database +GEOIP_PATH=./data/GeoLite2-Country.mmdb \ No newline at end of file diff --git a/.gitignore b/.gitignore index 082fd49..6425a65 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ scanner *.dll *.so *.dylib +data/ +data/* + # Test binaries built with 'go test -c' *.test diff --git a/cmd/main.go b/cmd/main.go index bc501d2..9e2b33c 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -3,70 +3,98 @@ package main import ( "log/slog" "os" + "os/signal" "sync" - "time" + "syscall" "find-me-internet/internal/config" + "find-me-internet/internal/dedup" "find-me-internet/internal/filter" + "find-me-internet/internal/geoip" "find-me-internet/internal/logger" "find-me-internet/internal/parser" + "find-me-internet/internal/sink" + "find-me-internet/internal/source" "find-me-internet/internal/tester" ) func main() { - // 1. Initialization + // 1. Init cfg := config.Load() logger.Setup(cfg.LogLevel) + + // Graceful Shutdown Channel + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) - if _, err := os.Stat(cfg.SingBoxPath); os.IsNotExist(err) { - slog.Error("singbox_binary_missing", "path", cfg.SingBoxPath) + // 2. Services + geoDB, err := geoip.Open(cfg.GeoIPPath) + if err != nil { + slog.Warn("geoip_db_missing", "error", err, "msg", "Countries will be marked N/A") + } else { + defer geoDB.Close() + } + + resultsWriter, err := sink.NewJSONL(cfg.OutputPath) + if err != nil { + slog.Error("cannot_create_output_file", "error", err) os.Exit(1) } + defer resultsWriter.Close() - slog.Info("scanner_started", "workers", cfg.Workers, "log_level", cfg.LogLevel) - - // Mock Data - rawLinks := []string{ - "vless://4525c260-df3c-4f62-b8f1-f4f5f305694b@66.81.247.155:443?encryption=none&security=tls&sni=yyzsuabw9e3qd5ud7ihi5dxm96oglnsvr83cjojnm1efncfhr9ucordq.zjde5.de5.net&fp=chrome&insecure=0&allowInsecure=0&type=ws&host=yyzsuabw9e3qd5ud7ihi5dxm96oglnsvr83cjojnm1efncfhr9ucordq.zjde5.de5.net&path=%2F%3Fed#%DA%86%D9%86%D9%84%20%D8%AA%D9%84%DA%AF%D8%B1%D8%A7%D9%85%20%3A%20%40CroSs_Guildd%F0%9F%92%8A", - "vless://efdb2890-6dd7-4e65-8984-f0b1d3ae4e01@here-we-go-again.embeddedonline.org:443?encryption=none&security=tls&sni=here-we-go-again.embeddedonline.org&fp=chrome&alpn=http%2F1.1&insecure=0&allowInsecure=0&type=ws&host=here-we-go-again.embeddedonline.org&path=%2FJ1jTS0GMxqS0Atmd5x#here-we-go-again.embeddedonline.org%20tls%20WS%20direct%20vless", - // Add more links here... - } - - // 2. Pipelines + deduplicator := dedup.New() netFilter := filter.NewPipeline(cfg.TcpTimeout) boxRunner := tester.NewRunner(cfg.SingBoxPath, cfg.TestURL, cfg.TestTimeout) - // 3. Concurrency Control + // 3. Input Stream (Example: reading from a local file 'proxies.txt') + // In production, you might loop through a list of URLs here + linkStream, err := source.LoadFromFile(cfg.InputPath) + if err != nil { + slog.Error("input_source_failed", "error", err) + os.Exit(1) + } + + // 4. Worker Pool var wg sync.WaitGroup semaphore := make(chan struct{}, cfg.Workers) + + slog.Info("pipeline_started", "workers", cfg.Workers) - // 4. Thread-Safe Counter - var ( - validCount int - mu sync.Mutex // The lock protecting validCount - ) + countProcessed := 0 - startTotal := time.Now() + // Main Loop +loop: + for rawLink := range linkStream { + select { + case <-sigChan: + slog.Info("shutdown_signal_received", "msg", "finishing pending jobs...") + break loop + default: + // Continue + } - for _, link := range rawLinks { wg.Add(1) - go func(raw string) { defer wg.Done() - - // Step A: Parse + + // --- STAGE 1: PARSE --- proxy, err := parser.ParseLink(raw) if err != nil { return } - // Step B: Filter + // --- STAGE 2: DEDUP --- + if deduplicator.Seen(proxy.Address, proxy.Port) { + return // Skip duplicates silently + } + + // --- STAGE 3: FILTER --- if !netFilter.Check(proxy) { return } - // Step C: Test - semaphore <- struct{}{} + // --- STAGE 4: TEST --- + semaphore <- struct{}{} // Rate limit expensive tests err = boxRunner.Test(proxy) <-semaphore @@ -74,22 +102,30 @@ func main() { return } - mu.Lock() - validCount++ - mu.Unlock() + // --- STAGE 5: ENRICH --- + if geoDB != nil { + proxy.Country = geoDB.Lookup(proxy.Address) + } - slog.Info("proxy_verified", - "target", proxy.Address, - "latency_ms", proxy.Latency.Milliseconds(), + // --- STAGE 6: SAVE --- + if err := resultsWriter.Write(proxy); err != nil { + slog.Error("write_failed", "error", err) + } + + slog.Info("proxy_saved", + "country", proxy.Country, + "latency", proxy.Latency.Milliseconds(), + "type", proxy.Type, ) - }(link) + }(rawLink) + + countProcessed++ + if countProcessed % 1000 == 0 { + slog.Info("progress_report", "processed", countProcessed) + } } wg.Wait() - slog.Info("scan_complete", - "duration", time.Since(startTotal), - "valid_count", validCount, - "total_scanned", len(rawLinks), - ) + slog.Info("scan_finished", "total_processed", countProcessed) } \ No newline at end of file diff --git a/go.mod b/go.mod index 5a5c42e..76eee15 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/gvcgo/vpnparser v0.2.7 github.com/joho/godotenv v1.5.1 github.com/kelseyhightower/envconfig v1.4.0 + github.com/oschwald/geoip2-golang v1.13.0 ) require ( @@ -28,12 +29,13 @@ require ( github.com/mattn/go-runewidth v0.0.15 // indirect github.com/muesli/reflow v0.3.0 // indirect github.com/muesli/termenv v0.15.2 // indirect + github.com/oschwald/maxminddb-golang v1.13.0 // indirect github.com/pterm/pterm v0.12.62 // indirect github.com/rivo/uniseg v0.4.4 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect go.opentelemetry.io/otel v1.15.1 // indirect go.opentelemetry.io/otel/trace v1.15.1 // indirect - golang.org/x/sys v0.15.0 // indirect + golang.org/x/sys v0.20.0 // indirect golang.org/x/term v0.15.0 // indirect golang.org/x/text v0.14.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 3a0c73a..385d1b0 100644 --- a/go.sum +++ b/go.sum @@ -88,6 +88,10 @@ github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= +github.com/oschwald/geoip2-golang v1.13.0 h1:Q44/Ldc703pasJeP5V9+aFSZFmBN7DKHbNsSFzQATJI= +github.com/oschwald/geoip2-golang v1.13.0/go.mod h1:P9zG+54KPEFOliZ29i7SeYZ/GM6tfEL+rgSn03hYuUo= +github.com/oschwald/maxminddb-golang v1.13.0 h1:R8xBorY71s84yO06NgTmQvqvTvlS/bnYZrrWX1MElnU= +github.com/oschwald/maxminddb-golang v1.13.0/go.mod h1:BU0z8BfFVhi1LQaonTwwGQlsHUEu9pWNdMfmq4ztm0o= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pterm/pterm v0.12.27/go.mod h1:PhQ89w4i95rhgE+xedAoqous6K9X+r6aSOI2eFF7DZI= @@ -109,8 +113,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= @@ -148,8 +152,8 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= -golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= diff --git a/internal/config/config.go b/internal/config/config.go index f592f8e..8f25bd4 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -10,27 +10,29 @@ import ( type Config struct { // App Settings - LogLevel string `envconfig:"LOG_LEVEL" default:"INFO"` // INFO, DEBUG, ERROR - Workers int `envconfig:"MAX_WORKERS" default:"10"` + LogLevel string `envconfig:"LOG_LEVEL" default:"INFO"` + Workers int `envconfig:"MAX_WORKERS" default:"20"` - // Paths + // Network Logic + TestURL string `envconfig:"TEST_URL" default:"http://cp.cloudflare.com"` + TcpTimeout time.Duration `envconfig:"TCP_TIMEOUT" default:"2s"` + TestTimeout time.Duration `envconfig:"TEST_TIMEOUT" default:"10s"` + + // File System Paths SingBoxPath string `envconfig:"SING_BOX_PATH" default:"./bin/sing-box"` - - // Testing Parameters - TestURL string `envconfig:"TEST_URL" default:"http://cp.cloudflare.com"` - TcpTimeout time.Duration `envconfig:"TCP_TIMEOUT" default:"2s"` - TestTimeout time.Duration `envconfig:"TEST_TIMEOUT" default:"10s"` + InputPath string `envconfig:"INPUT_PATH" default:"proxies.txt"` + OutputPath string `envconfig:"OUTPUT_PATH" default:"valid.jsonl"` + GeoIPPath string `envconfig:"GEOIP_PATH" default:"GeoLite2-Country.mmdb"` } -// Load reads .env and maps variables to Config struct +// Load reads .env and processes environment variables func Load() *Config { - // 1. Try loading .env file (optional, for local dev) + // Silently ignore if .env is missing (production might use real ENV vars) _ = godotenv.Load() var cfg Config - // 2. Process environment variables if err := envconfig.Process("", &cfg); err != nil { - log.Fatalf("Failed to load config: %v", err) + log.Fatalf("Configuration Error: %v", err) } return &cfg } \ No newline at end of file diff --git a/internal/dedup/filter.go b/internal/dedup/filter.go new file mode 100644 index 0000000..df79f83 --- /dev/null +++ b/internal/dedup/filter.go @@ -0,0 +1,36 @@ +package dedup + +import ( + "fmt" + "sync" +) + +type Filter struct { + seen map[string]struct{} + mu sync.RWMutex +} + +func New() *Filter { + return &Filter{ + seen: make(map[string]struct{}), + } +} + +// Check returns true if the item is NEW (not seen before) +func (f *Filter) Seen(address string, port int) bool { + key := fmt.Sprintf("%s:%d", address, port) + + f.mu.RLock() + _, exists := f.seen[key] + f.mu.RUnlock() + + if exists { + return true + } + + f.mu.Lock() + f.seen[key] = struct{}{} + f.mu.Unlock() + + return false +} \ No newline at end of file diff --git a/internal/geoip/lookup.go b/internal/geoip/lookup.go new file mode 100644 index 0000000..d33992d --- /dev/null +++ b/internal/geoip/lookup.go @@ -0,0 +1,40 @@ +package geoip + +import ( + "net" + "github.com/oschwald/geoip2-golang" +) + +type Database struct { + reader *geoip2.Reader +} + +func Open(path string) (*Database, error) { + r, err := geoip2.Open(path) + if err != nil { + return nil, err + } + return &Database{reader: r}, nil +} + +func (d *Database) Lookup(ipStr string) string { + if d == nil || d.reader == nil { + return "N/A" + } + + ip := net.ParseIP(ipStr) + if ip == nil { + return "INVALID_IP" + } + + record, err := d.reader.Country(ip) + if err != nil || record.Country.IsoCode == "" { + return "UNKNOWN" + } + + return record.Country.IsoCode +} + +func (d *Database) Close() { + d.reader.Close() +} \ No newline at end of file diff --git a/internal/sink/writer.go b/internal/sink/writer.go new file mode 100644 index 0000000..22c088e --- /dev/null +++ b/internal/sink/writer.go @@ -0,0 +1,38 @@ +package sink + +import ( + "encoding/json" + "find-me-internet/internal/model" + "os" + "sync" +) + +type JSONLWriter struct { + file *os.File + mu sync.Mutex +} + +func NewJSONL(path string) (*JSONLWriter, error) { + f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return nil, err + } + return &JSONLWriter{file: f}, nil +} + +func (w *JSONLWriter) Write(p *model.Proxy) error { + w.mu.Lock() + defer w.mu.Unlock() + + data, err := json.Marshal(p) + if err != nil { + return err + } + + _, err = w.file.Write(append(data, '\n')) + return err +} + +func (w *JSONLWriter) Close() { + w.file.Close() +} \ No newline at end of file diff --git a/internal/source/loader.go b/internal/source/loader.go new file mode 100644 index 0000000..6bf3089 --- /dev/null +++ b/internal/source/loader.go @@ -0,0 +1,61 @@ +package source + +import ( + "bufio" + "net/http" + "os" + "strings" +) + +// Loader returns a channel of strings to keep memory usage low +func LoadFromFile(path string) (<-chan string, error) { + file, err := os.Open(path) + if err != nil { + return nil, err + } + + out := make(chan string) + + go func() { + defer file.Close() + defer close(out) + + scanner := bufio.NewScanner(file) + // Increase buffer size for very long lines (some subscription links are huge) + buf := make([]byte, 0, 64*1024) + scanner.Buffer(buf, 1024*1024) + + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line != "" && !strings.HasPrefix(line, "#") { + out <- line + } + } + }() + + return out, nil +} + +// LoadFromURL streams directly from a URL (e.g., Github raw) +func LoadFromURL(url string) (<-chan string, error) { + resp, err := http.Get(url) + if err != nil { + return nil, err + } + + out := make(chan string) + go func() { + defer resp.Body.Close() + defer close(out) + + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line != "" { + out <- line + } + } + }() + + return out, nil +} \ No newline at end of file