diff --git a/.gitignore b/.gitignore index 5ce5cb0..9a58ef7 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ scanner *.dylib data/* !data/valid_proxies.txt +!data/alive_proxies.txt bin/ bin/* diff --git a/cmd/main.go b/cmd/main.go index 1a39eb7..f3532f9 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -19,123 +19,103 @@ import ( ) func main() { - // 1. Init + // 1. Init & Config cfg := config.Load() logger.Setup(cfg.LogLevel) + if len(os.Args) > 1 { cfg.InputPath = os.Args[1] } - // CLI Argument Override - // Usage: ./find-me-internet [OPTIONAL_INPUT_SOURCE] - if len(os.Args) > 1 { - cfg.InputPath = os.Args[1] - slog.Info("input_source_overridden", "source", cfg.InputPath) - } + // 2. Writers (Valid, Alive, Dataset) + validJson, _ := sink.NewJSONL(cfg.OutputPath) + defer validJson.Close() + validTxt, _ := sink.NewText(cfg.TxtOutputPath) + defer validTxt.Close() - sigChan := make(chan os.Signal, 1) - signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) + aliveJson, _ := sink.NewJSONL(cfg.AliveOutputPath) + defer aliveJson.Close() + aliveTxt, _ := sink.NewText(cfg.AliveTxtOutputPath) + defer aliveTxt.Close() - // 2. Services - geoDB, err := geoip.Open(cfg.GeoIPPath) - if err != nil { - slog.Warn("geoip_db_missing", "error", err) - } else { - defer geoDB.Close() - } + datasetWriter, _ := sink.NewJSONL(cfg.DatasetOutputPath) + defer datasetWriter.Close() - jsonWriter, err := sink.NewJSONL(cfg.OutputPath) - if err != nil { - slog.Error("cannot_create_json_output", "error", err) - os.Exit(1) - } - defer jsonWriter.Close() - - txtWriter, err := sink.NewText(cfg.TxtOutputPath) - if err != nil { - slog.Error("cannot_create_txt_output", "error", err) - os.Exit(1) - } - defer txtWriter.Close() + // 3. Services + geoDB, _ := geoip.Open(cfg.GeoIPPath) + if geoDB != nil { defer geoDB.Close() } deduplicator := dedup.New() netFilter := filter.NewPipeline(cfg.TcpTimeout) boxRunner := tester.NewRunner(cfg.SingBoxPath, cfg.TestURL, cfg.TestTimeout) - - // 3. Input Stream (Smart Load) - // Supports both http://... and ./path/to/file.txt + + // 4. Input Stream linkStream, err := source.Load(cfg.InputPath) - if err != nil { - slog.Error("input_source_failed", "error", err, "path", cfg.InputPath) - os.Exit(1) - } + if err != nil { slog.Error("input_failed", "err", err); os.Exit(1) } - // 4. Worker Pool var wg sync.WaitGroup semaphore := make(chan struct{}, cfg.Workers) - - countProcessed := 0 - countValid := 0 - var mu sync.Mutex - + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) + slog.Info("pipeline_started", "workers", cfg.Workers) - // Main Loop -loop: for rawLink := range linkStream { select { case <-sigChan: - slog.Info("shutdown_signal_received", "msg", "finishing pending jobs...") - break loop + goto cleanup default: } wg.Add(1) go func(raw string) { defer wg.Done() - - // A. Parse + + // STEP 1: PARSE proxy, err := parser.ParseLink(raw) - if err != nil { return } + if err != nil { return } // Cannot track unparseable junk - // B. Dedup - if deduplicator.Seen(proxy.Address, proxy.Port) { return } + // STEP 2: DEDUP + if deduplicator.Seen(proxy) { return } - // C. Filter - if !netFilter.Check(proxy) { return } - - // D. Test - semaphore <- struct{}{} - err = boxRunner.Test(proxy) - <-semaphore - - if err != nil { return } - - // E. Enrich + // STEP 3: ENRICH (Country) + // We do this EARLY so even "Dead" proxies in the dataset have a Country label if geoDB != nil { proxy.Country = geoDB.Lookup(proxy.Address) } - // F. Save - jsonWriter.Write(proxy) - txtWriter.Write(proxy) + // STEP 4: FILTER (Sets p.Status, p.FailureReason if fails) + if !netFilter.Check(proxy) { + // Proxy is DEAD. The Filter has already set: + // p.Status = "dead" + // p.FailureReason = "tcp_timeout" (etc) + datasetWriter.Write(proxy) + return + } - // Stats - mu.Lock() - countValid++ - mu.Unlock() + // STEP 5: TEST (Sets p.Status, p.FailureReason if fails) + semaphore <- struct{}{} + err = boxRunner.Test(proxy) + <-semaphore - slog.Info("proxy_saved", - "country", proxy.Country, - "latency", proxy.Latency.Milliseconds(), - "type", proxy.Type, - ) + if err != nil { + // Proxy is ALIVE (Semi-working). Runner has already set: + // p.Status = "alive" + // p.FailureReason = "http_error_502" (etc) + + aliveJson.Write(proxy) + aliveTxt.Write(proxy) + datasetWriter.Write(proxy) + return + } + + validJson.Write(proxy) + validTxt.Write(proxy) + datasetWriter.Write(proxy) + + slog.Info("proxy_verified", "country", proxy.Country, "latency", proxy.Latency.Milliseconds()) }(rawLink) - - countProcessed++ - if countProcessed % 1000 == 0 { - slog.Info("progress_report", "processed", countProcessed, "valid", countValid) - } } +cleanup: wg.Wait() - slog.Info("scan_finished", "total_processed", countProcessed, "total_valid", countValid) + slog.Info("scan_finished") } \ No newline at end of file diff --git a/data/alive_proxies.txt b/data/alive_proxies.txt new file mode 100644 index 0000000..b8970a0 --- /dev/null +++ b/data/alive_proxies.txt @@ -0,0 +1,9 @@ +vless://b4bd0613-ff7c-4f2f-954d-185915e6ddad@216.239.38.120:443?path=%2F%40JavidnamanIran%2FJavid-SHAH-KingRezaPahlavi%2F&security=tls&encryption=none&insecure=0&host=o-cdn.igoii.org&type=ws&allowInsecure=0&sni=o-cdn.igoii.org#%F0%9F%86%98%EF%B8%8F%20%F0%9F%87%A9%F0%9F%87%AA%20-1 +vless://33676069-bc5a-443c-bb64-14a215544f2b@deu711.deulucker.org:444?mode=auto&path=%2Fapi%2Fv1%2F&security=reality&encryption=none&pbk=BhTJ3phnq-Z-10aFKSsj1lzhA8mULR4L6leE4-0WTAs&fp=chrome&type=xhttp&sni=deu711.deulucker.org#@Vip_Security join us - 68 +ss://YWVzLTI1Ni1nY206S2l4THZLendqZWtHMDBybQ@38.91.100.134:8080#@Vip_Security join us - 328 +trojan://5a2c16f9@one.cf.cdn.hyli.xyz:443?path=/&security=tls&host=snippets.kkii.eu.org&type=ws&sni=snippets.kkii.eu.org#@Vip_Security join us - 47 +vless://7abc75eb-b58b-4e28-af59-20f41bdf7a2a@dns.ownlink.pro:443?path=%2Frestart&security=tls&alpn=h2%2Chttp%2F1.1&encryption=none&insecure=0&host=last.ownlink.pro&fp=chrome&type=ws&allowInsecure=0&sni=last.ownlink.pro#4 +vless://c4426a36-247f-4abf-bf4e-e9ea0ed01c32@ip.ali.lat:2053?path=%2F&security=tls&alpn=h2%2Chttp%2F1.1&encryption=none&insecure=0&host=temp.ali.lat&type=ws&allowInsecure=0&sni=temp.ali.lat#@Vip_Security join us - 108 +vless://dd0cfef0-fda9-47ec-8a65-49d7bc004f82@cf.narton.ir:443?path=%2Fvpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl-vpnowl%3Fed%3D2560&security=tls&alpn=h2&encryption=none&insecure=0&host=www.narton.ir&fp=firefox&type=ws&allowInsecure=0&sni=www.narton.ir#@Vip_Security join us - 112 +vless://c077a7aa-7ec8-4117-8ffc-9ade75a5efce@chatgpt.com:2096?path=%2F&security=tls&alpn=http%2F1.1&encryption=none&insecure=0&host=cdn.sheriffbus.com&fp=chrome&type=ws&allowInsecure=0&sni=cdn.sheriffbus.com#@Vip_Security join us - 106 +vless://2fb8808b-b94c-42ea-9dd2-cd77d2efcc8d@www.perplexity.ai:2096?path=%2FeyJqdW5rIjoidDZLaDRBMWhpIiwicHJvdG9jb2wiOiJ2bCIsIm1vZGUiOiJwcm94eWlwIiwicGFuZWxJUHMiOltdfQ&security=tls&alpn=http%2F1.1&encryption=none&host=digikalaa.dpdns.org&fp=chrome&type=ws&sni=DiGIkALaA.dpdns.ORG#@Vip_Security join us - 66 diff --git a/data/valid_proxies.txt b/data/valid_proxies.txt index ffeb120..bffcc5e 100644 --- a/data/valid_proxies.txt +++ b/data/valid_proxies.txt @@ -1,8 +1 @@ -vless://9e685fe3-e0f9-482d-939c-200a3f89b363@172.64.145.38:8443?path=%2F%3Fed%3D2560fp%3Drandom&security=tls&alpn=http%2F1.1&encryption=none&insecure=0&host=vyznthvt7f5fr.zjde5.de5.net&fp=random&type=ws&allowInsecure=0&sni=vyznthvt7f5fr.zjde5.de5.net#%F0%9F%87%A9%F0%9F%87%AA%20%40vmesspv -vless://bb8c74a1-abc1-4511-b100-9876e30cb65c@172.64.145.38:8443?path=%2F%3Fed%3D2560&security=tls&alpn=http%2F1.1&encryption=none&insecure=0&host=xfjd79v2tjscrm6jqo.zjde5.de5.net&fp=chrome&type=ws&allowInsecure=0&sni=xfjd79v2tjscrm6jqo.zjde5.de5.net#@Vip_Security join us - 55 -vless://f85f60b1-2b96-49e9-8bde-b656d1516df0@104.17.165.123:8443?path=%2F%3Fed%3D2560&security=tls&alpn=http%2F1.1&encryption=none&insecure=0&host=gx8rw8fz783ncefn332y7uyfsvb59o820mryrxu1cj19jiuuur.zjde5.de5.net&fp=chrome&type=ws&allowInsecure=0&sni=gx8rw8fz783ncefn332y7uyfsvb59o820mryrxu1cj19jiuuur.zjde5.de5.net#@Vip_Security join us - 67 -vless://4525c260-df3c-4f62-b8f1-f4f5f305694b@104.17.164.123:8443?path=%2F%3Fed%3D2560&security=tls&encryption=none&insecure=0&host=yyzsuabw9e3qd5ud7ihi5dxm96oglnsvr83cjojnm1efncfhr9ucordq.zjde5.de5.net&fp=chrome&type=ws&allowInsecure=0&sni=yyzsuabw9e3qd5ud7ihi5dxm96oglnsvr83cjojnm1efncfhr9ucordq.zjde5.de5.net#%F0%9F%8C%8E%20%40vmesspv -vless://9e685fe3-e0f9-482d-939c-200a3f89b363@172.64.145.38:8443?path=%2F%3Fed%3D2560fp%3Drandom&security=tls&alpn=http%2F1.1&encryption=none&insecure=0&host=vyznthvt7f5fr.zjde5.de5.net&fp=random&type=ws&allowInsecure=0&sni=vyznthvt7f5fr.zjde5.de5.net#%F0%9F%87%A9%F0%9F%87%AA%20%40vmesspv -vless://4525c260-df3c-4f62-b8f1-f4f5f305694b@66.81.247.155:443?path=%2F%3Fed%3D512&security=tls&encryption=none&insecure=0&host=yyzsuabw9e3qd5ud7ihi5dxm96oglnsvr83cjojnm1efncfhr9ucordq.zjde5.de5.net&fp=chrome&type=ws&allowInsecure=0&sni=yyzsuabw9e3qd5ud7ihi5dxm96oglnsvr83cjojnm1efncfhr9ucordq.zjde5.de5.net#%F0%9F%8C%8E%20%40vmesspv -vless://83f03646-fb28-44cc-9d2c-8853f6c09285@104.17.162.123:8443?path=%2F%3Fed%3D%23TELEGRAM-Yam%3Fed%3D512&security=tls&alpn=http%2F1.1&encryption=none&insecure=0&host=r4fnviw9jl4i4rx.zjde5.de5.net&fp=random&type=ws&allowInsecure=0&sni=r4fnviw9jl4i4rx.zjde5.de5.net#@chthxyz - 61 -vless://3a4ddfac-e7da-48c9-9648-4a366109fc3a@api.steamsale.ir:443?path=%2FX0PX5Vup1qlVVzhxp6ic50a&security=tls&alpn=http%2F1.1&encryption=none&insecure=0&host=mtn.vpsmee.ir&fp=chrome&type=ws&allowInsecure=0&sni=cdn.vpsmee.ir#@chthxyz - 28 +vless://83f03646-fb28-44cc-9d2c-8853f6c09285@104.17.162.123:8443?path=%2F%3Fed%3D%23TELEGRAM-MARAMBASHI_MARAMBASHI_MARAMBASHI_MARAMBASHI_MARAMBASHI%3Fed%3D512&security=tls&alpn=http%2F1.1&encryption=none&insecure=0&host=r4fnviw9jl4i4rx.zjde5.de5.net&fp=random&type=ws&allowInsecure=0&sni=r4fnviw9jl4i4rx.zjde5.de5.net#@Vip_Security join us - 98 diff --git a/internal/config/config.go b/internal/config/config.go index 00c86d7..8ccb442 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -24,6 +24,9 @@ type Config struct { OutputPath string `envconfig:"OUTPUT_PATH" default:"valid.jsonl"` GeoIPPath string `envconfig:"GEOIP_PATH" default:"GeoLite2-Country.mmdb"` TxtOutputPath string `envconfig:"TXT_OUTPUT_PATH" default:"valid.txt"` + AliveOutputPath string `envconfig:"ALIVE_OUTPUT_PATH" default:"alive.jsonl"` + AliveTxtOutputPath string `envconfig:"ALIVE_TXT_OUTPUT_PATH" default:"alive.txt"` + DatasetOutputPath string `envconfig:"DATASET_OUTPUT_PATH" default:"dataset.jsonl"` } // Load reads .env and processes environment variables diff --git a/internal/dedup/filter.go b/internal/dedup/filter.go index df79f83..b6df813 100644 --- a/internal/dedup/filter.go +++ b/internal/dedup/filter.go @@ -3,6 +3,7 @@ package dedup import ( "fmt" "sync" + "find-me-internet/internal/model" ) type Filter struct { @@ -16,9 +17,11 @@ func New() *Filter { } } -// Check returns true if the item is NEW (not seen before) -func (f *Filter) Seen(address string, port int) bool { - key := fmt.Sprintf("%s:%d", address, port) +// Seen checks if the proxy is new. +// Key format: "vless://1.2.3.4:443" +// This allows the same IP to be scanned again if it uses a different protocol. +func (f *Filter) Seen(p *model.Proxy) bool { + key := fmt.Sprintf("%s://%s:%d", p.Type, p.Address, p.Port) f.mu.RLock() _, exists := f.seen[key] diff --git a/internal/filter/network.go b/internal/filter/network.go index d29399b..5fd874f 100644 --- a/internal/filter/network.go +++ b/internal/filter/network.go @@ -2,7 +2,6 @@ package filter import ( "crypto/tls" - "log/slog" "net" "strconv" "time" @@ -18,53 +17,46 @@ func NewPipeline(timeout time.Duration) *Pipeline { return &Pipeline{Timeout: timeout} } +// Check performs cheap checks and updates the Proxy model with results. +// Returns true ONLY if all checks pass. func (f *Pipeline) Check(p *model.Proxy) bool { - target := net.JoinHostPort(p.Address, strconv.Itoa(p.Port)) - log := slog.With("target", target, "protocol", p.Type) - - // 1. TCP Connectivity - start := time.Now() + // 1. TCP Check if !f.checkTCP(p) { - log.Debug("tcp_connect_failed", "duration", time.Since(start)) p.IsOnline = false + p.Status = "dead" + p.FailureStage = "filter" + p.FailureReason = "tcp_timeout_or_refused" return false } p.IsOnline = true - // 2. TLS Handshake - // Only proceed if protocol supports/requires TLS + // 2. TLS Check + // Determine if TLS is required shouldCheckTLS := p.SNI != "" || p.Port == 443 || p.Type == model.TypeVLESS || p.Type == model.TypeTrojan if shouldCheckTLS { sni := p.SNI - if sni == "" { - sni = p.Address // Fallback for handshake - } + if sni == "" { sni = p.Address } - startTLS := time.Now() if !f.checkTLS(p, sni) { - log.Debug("tls_handshake_failed", - "sni", sni, - "duration", time.Since(startTLS), - ) p.IsTLSSecure = false + p.Status = "dead" + p.FailureStage = "filter" + p.FailureReason = "tls_handshake_failed" return false } p.IsTLSSecure = true - log.Debug("network_checks_passed", "duration", time.Since(start)) - } else { - log.Debug("network_checks_passed", "note", "tls_skipped_no_sni") } + // If we got here, it passed the filter stage + p.FailureStage = "none" return true } func (f *Pipeline) checkTCP(p *model.Proxy) bool { address := net.JoinHostPort(p.Address, strconv.Itoa(p.Port)) conn, err := net.DialTimeout("tcp", address, f.Timeout) - if err != nil { - return false - } + if err != nil { return false } conn.Close() return true } @@ -72,16 +64,9 @@ func (f *Pipeline) checkTCP(p *model.Proxy) bool { func (f *Pipeline) checkTLS(p *model.Proxy, sni string) bool { address := net.JoinHostPort(p.Address, strconv.Itoa(p.Port)) dialer := &net.Dialer{Timeout: f.Timeout} - - conf := &tls.Config{ - InsecureSkipVerify: true, - ServerName: sni, - } - + conf := &tls.Config{InsecureSkipVerify: true, ServerName: sni} conn, err := tls.DialWithDialer(dialer, "tcp", address, conf) - if err != nil { - return false - } + if err != nil { return false } conn.Close() return true } \ No newline at end of file diff --git a/internal/model/proxy.go b/internal/model/proxy.go index b127507..599a887 100644 --- a/internal/model/proxy.go +++ b/internal/model/proxy.go @@ -2,7 +2,6 @@ package model import "time" -// ProxyType defines the protocol (vless, vmess, etc.) type ProxyType string const ( @@ -13,25 +12,25 @@ const ( TypeUnknown ProxyType = "unknown" ) -// Proxy represents a single internet access point type Proxy struct { - // Identity + // --- Identity --- RawLink string `json:"link"` Type ProxyType `json:"type"` + Address string `json:"address"` + Port int `json:"port"` + Network string `json:"network"` + SNI string `json:"sni"` - // Connection Details - Address string `json:"address"` // IP or Domain - Port int `json:"port"` - UUID string `json:"uuid"` // Or Password - SNI string `json:"sni"` // TLS Server Name Indicator - Network string `json:"network"` // tcp, ws, grpc, h2 + // --- Enrichment --- + Country string `json:"country"` // e.g., "US", "IR", "DE" - // Filter Stage Results - IsOnline bool `json:"is_online"` // TCP Connect success - IsTLSSecure bool `json:"is_tls_secure"` // TLS Handshake success - - // Tester Stage Results + // --- Metrics --- Latency time.Duration `json:"latency_ms"` - Country string `json:"country_code"` - PacketLoss float64 `json:"packet_loss"` // 0.0 to 1.0 + IsOnline bool `json:"is_online"` // TCP Connect Status + IsTLSSecure bool `json:"is_tls_secure"` // TLS Handshake Status + + // --- Data Collection (The fields you want filled) --- + Status string `json:"status"` // "valid", "alive", "dead" + FailureStage string `json:"failure_stage"` // "filter", "tester", "none" + FailureReason string `json:"failure_reason"` // "tcp_timeout", "http_502", "tls_error", etc. } \ No newline at end of file diff --git a/internal/tester/runner.go b/internal/tester/runner.go index bed876b..f5a2627 100644 --- a/internal/tester/runner.go +++ b/internal/tester/runner.go @@ -71,17 +71,20 @@ func (r *Runner) Test(p *model.Proxy) error { } // 5. HTTP Probe - startProbe := time.Now() latency, err := r.measureLatency(port) if err != nil { - log.Debug("http_probe_failed", - "duration", time.Since(startProbe), - "error", err, - ) + // SET THE MODEL VALUES HERE + p.Status = "alive" // It passed TCP, so it's "alive" but failed the test + p.FailureStage = "tester" + p.FailureReason = err.Error() // e.g., "http_timeout" or "status_502" return err } + // Success p.Latency = latency + p.Status = "valid" + p.FailureStage = "none" + p.FailureReason = "none" return nil } @@ -95,15 +98,17 @@ func (r *Runner) measureLatency(port int) (time.Duration, error) { Timeout: r.Timeout, } - start := time.Now() + start := time.Now() resp, err := client.Get(r.TestURL) - if err != nil { - return 0, err + if err != nil { + // Return specific error string for the model + return 0, fmt.Errorf("http_timeout_or_network_error") } defer resp.Body.Close() if resp.StatusCode < 200 || resp.StatusCode > 299 { - return 0, fmt.Errorf("unexpected_status_code_%d", resp.StatusCode) + // Return specific status code error + return 0, fmt.Errorf("http_error_%d", resp.StatusCode) } return time.Since(start), nil