走过路过的运维大佬,docker swarm 熟悉的也来看看 - V2EX
V2EX = way to explore
V2EX 是一个关于分享和探索的地方
现在注册
已注册用户请  登录
Distributions
Ubuntu
Fedora
CentOS
中文资源站
网易开源镜像站
zxkxhnqwe123
V2EX    Linux

走过路过的运维大佬,docker swarm 熟悉的也来看看

  •  
  •   zxkxhnqwe123 2021-10-14 17:31:29 +08:00 4445 次点击
    这是一个创建于 1459 天前的主题,其中的信息可能已经有所发展或是发生改变。

    公司的开发测试环境想部署个 docker 集群, k8s 不会 ,k3s 更不会.

    目前搭配的组合是 esxi 作为基础系统 , 虚拟出多个 centos8 的系统 ,centos8 安装 docker swarm 集群, 出现了一个问题!! docker swarm 部署服务完成后,跨主机容器内都能正常 ping 但是 宿主机去访问 docker 开放的端口 访问三次只有一次成功. 具体如下:

    公司路由器网关 10.0.0.1 1. server-01 10.0.0.21 (manage) 2. server-02 10.0.0.22 3. server-03 10.0.0.23 ### 防火墙全部关闭 只有 iptable server-01 $ docker swarm init --default-addr-pool 192.0.0.0/24 server-02 $ docker swarm join server-03 $ docker swarm join ### server-01 $ docker node ls ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION km7dmxn402qt0s473kpqb47ac * Server-01 Ready Active Leader 20.10.9 k5vq74oh1njscvv4mf9gpyogh Server-02 Ready Active 20.10.9 rxzmo276saehmh1rc118fdxxe Server-03 Ready Active 20.10.9 ### 网络状态如下 server-01 $ docker network inspect ingress [ { "Name": "ingress", "Id": "m7ia7lmmlu1zm0zchr13ohk4q", "Created": "2021-10-14T15:08:48.036907446+08:00", "Scope": "swarm", "Driver": "overlay", "EnableIPv6": false, "IPAM": { "Driver": "default", "Options": null, "Config": [ { "Subnet": "192.0.0.0/24", "Gateway": "192.0.0.1" } ] }, "Internal": false, "Attachable": false, "Ingress": true, "ConfigFrom": { "Network": "" }, "ConfigOnly": false, "Containers": { "ingress-sbox": { "Name": "ingress-endpoint", "EndpointID": "4b5146ca8e180dd88a5271b7d29b439f6d5995801a47d8c648379d9b51ab0b77", "MacAddress": "02:42:c0:00:00:02", "IPv4Address": "192.0.0.2/24", "IPv6Address": "" } }, "Options": { "com.docker.network.driver.overlay.vxlanid_list": "4096" }, "Labels": {}, "Peers": [ { "Name": "6ebb8868ac00", "IP": "10.0.0.21" }, { "Name": "7982d5a14bf2", "IP": "10.0.0.22" }, { "Name": "b25e17d118a4", "IP": "10.0.0.23" } ] } ] server-01 $ docker network inspect docker_gwbridge [ { "Name": "docker_gwbridge", "Id": "6f2d03207e884bfec1918d4e8fc1a1f5f14ec9e5bcd71fd409a26630ab73d413", "Created": "2021-10-14T15:08:48.422229208+08:00", "Scope": "local", "Driver": "bridge", "EnableIPv6": false, "IPAM": { "Driver": "default", "Options": null, "Config": [ { "Subnet": "172.18.0.0/16", "Gateway": "172.18.0.1" } ] }, "Internal": false, "Attachable": false, "Ingress": false, "ConfigFrom": { "Network": "" }, "ConfigOnly": false, "Containers": { "ingress-sbox": { "Name": "gateway_ingress-sbox", "EndpointID": "1c4c1b5ba462d87832710029171c3911df457c950055a369670f59cef374247b", "MacAddress": "02:42:ac:12:00:02", "IPv4Address": "172.18.0.2/16", "IPv6Address": "" } }, "Options": { "com.docker.network.bridge.enable_icc": "false", "com.docker.network.bridge.enable_ip_masquerade": "true", "com.docker.network.bridge.name": "docker_gwbridge" }, "Labels": {} } ] ### 创建 nginx 服务 server-01 $ docker service create --replicas 3 -p 80:80 --name nginx nginx server-01 $ docker service ps nginx ID NAME IMAGE NODE DESIRED STATE CURRENT STATE ERROR PORTS xsomsqqtkr62 nginx.1 nginx:latest Server-02 Running Running 2 minutes ago selbdoapjek0 nginx.2 nginx:latest Server-03 Running Running 2 minutes ago w5bigfn8xtz4 nginx.3 nginx:latest Server-01 Running Running 2 minutes ago server-01 $ docker service ls ID NAME MODE REPLICAS IMAGE PORTS ro33x7v9ceri nginx replicated 3/3 nginx:latest *:80->80/tcp server-01 $ docker ps -a CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 37de9b7759c9 nginx:latest "/docker-entrypoint.…" 5 minutes ago Up 5 minutes 80/tcp nginx.3.w5bigfn8xtz4pi10hoe62gi4b ## 重点来了!!! [root@Server-01 ~]# curl 10.0.0.21 --卡住 ^C [root@Server-01 ~]# curl 10.0.0.21 --卡住 ^C [root@Server-01 ~]# curl 10.0.0.21 --三次成功一次 <!DOCTYPE html> <html> <head> <title>Welcome to nginx!</title> <style> html { color-scheme: light dark; } body { width: 35em; margin: 0 auto; font-family: Tahoma, Verdana, Arial, sans-serif; } </style> </head> <body> <h1>Welcome to nginx!</h1> <p>If you see this page, the nginx web server is successfully installed and working. Further configuration is required.</p> <p>For online documentation and support please refer to <a href="http://nginx.org/">nginx.org</a>.<br/> Commercial support is available at <a href="http://nginx.com/">nginx.com</a>.</p> <p><em>Thank you for using nginx.</em></p> </body> </html> [root@Server-01 ~]# netstat -tunlp Active Internet connections (only servers) Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name tcp 0 0 192.168.122.1:53 0.0.0.0:* LISTEN 1740/dnsmasq tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 1068/sshd tcp 0 0 0.0.0.0:111 0.0.0.0:* LISTEN 1/systemd tcp6 0 0 :::22 :::* LISTEN 1068/sshd tcp6 0 0 :::2377 :::* LISTEN 1222/dockerd tcp6 0 0 :::7946 :::* LISTEN 1222/dockerd tcp6 0 0 :::111 :::* LISTEN 1/systemd tcp6 0 0 :::80 :::* LISTEN 1222/dockerd udp 0 0 192.168.122.1:53 0.0.0.0:* 1740/dnsmasq udp 0 0 0.0.0.0:67 0.0.0.0:* 1740/dnsmasq udp 0 0 0.0.0.0:111 0.0.0.0:* 1/systemd udp 0 0 0.0.0.0:4789 0.0.0.0:* - udp6 0 0 :::7946 :::* 1222/dockerd udp6 0 0 :::111 :::* 1/systemd [root@Server-01 ~]# iptables -nL --line-number Chain INPUT (policy ACCEPT) num target prot opt source destination 1 LIBVIRT_INP all -- 0.0.0.0/0 0.0.0.0/0 Chain FORWARD (policy DROP) num target prot opt source destination 1 DOCKER-USER all -- 0.0.0.0/0 0.0.0.0/0 2 DOCKER-INGRESS all -- 0.0.0.0/0 0.0.0.0/0 3 DOCKER-ISOLATION-STAGE-1 all -- 0.0.0.0/0 0.0.0.0/0 4 ACCEPT all -- 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED 5 DOCKER all -- 0.0.0.0/0 0.0.0.0/0 6 ACCEPT all -- 0.0.0.0/0 0.0.0.0/0 7 ACCEPT all -- 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED 8 DOCKER all -- 0.0.0.0/0 0.0.0.0/0 9 ACCEPT all -- 0.0.0.0/0 0.0.0.0/0 10 ACCEPT all -- 0.0.0.0/0 0.0.0.0/0 11 LIBVIRT_FWX all -- 0.0.0.0/0 0.0.0.0/0 12 LIBVIRT_FWI all -- 0.0.0.0/0 0.0.0.0/0 13 LIBVIRT_FWO all -- 0.0.0.0/0 0.0.0.0/0 14 DROP all -- 0.0.0.0/0 0.0.0.0/0 Chain OUTPUT (policy ACCEPT) num target prot opt source destination 1 LIBVIRT_OUT all -- 0.0.0.0/0 0.0.0.0/0 Chain LIBVIRT_INP (1 references) num target prot opt source destination 1 ACCEPT udp -- 0.0.0.0/0 0.0.0.0/0 udp dpt:53 2 ACCEPT tcp -- 0.0.0.0/0 0.0.0.0/0 tcp dpt:53 3 ACCEPT udp -- 0.0.0.0/0 0.0.0.0/0 udp dpt:67 4 ACCEPT tcp -- 0.0.0.0/0 0.0.0.0/0 tcp dpt:67 Chain LIBVIRT_OUT (1 references) num target prot opt source destination 1 ACCEPT udp -- 0.0.0.0/0 0.0.0.0/0 udp dpt:53 2 ACCEPT tcp -- 0.0.0.0/0 0.0.0.0/0 tcp dpt:53 3 ACCEPT udp -- 0.0.0.0/0 0.0.0.0/0 udp dpt:68 4 ACCEPT tcp -- 0.0.0.0/0 0.0.0.0/0 tcp dpt:68 Chain LIBVIRT_FWO (1 references) num target prot opt source destination 1 ACCEPT all -- 192.168.122.0/24 0.0.0.0/0 2 REJECT all -- 0.0.0.0/0 0.0.0.0/0 reject-with icmp-port-unreachable Chain LIBVIRT_FWI (1 references) num target prot opt source destination 1 ACCEPT all -- 0.0.0.0/0 192.168.122.0/24 ctstate RELATED,ESTABLISHED 2 REJECT all -- 0.0.0.0/0 0.0.0.0/0 reject-with icmp-port-unreachable Chain LIBVIRT_FWX (1 references) num target prot opt source destination 1 ACCEPT all -- 0.0.0.0/0 0.0.0.0/0 Chain DOCKER (2 references) num target prot opt source destination Chain DOCKER-ISOLATION-STAGE-1 (1 references) num target prot opt source destination 1 DOCKER-ISOLATION-STAGE-2 all -- 0.0.0.0/0 0.0.0.0/0 2 DOCKER-ISOLATION-STAGE-2 all -- 0.0.0.0/0 0.0.0.0/0 3 RETURN all -- 0.0.0.0/0 0.0.0.0/0 Chain DOCKER-ISOLATION-STAGE-2 (2 references) num target prot opt source destination 1 DROP all -- 0.0.0.0/0 0.0.0.0/0 2 DROP all -- 0.0.0.0/0 0.0.0.0/0 3 RETURN all -- 0.0.0.0/0 0.0.0.0/0 Chain DOCKER-USER (1 references) num target prot opt source destination 1 RETURN all -- 0.0.0.0/0 0.0.0.0/0 Chain DOCKER-INGRESS (1 references) num target prot opt source destination 1 ACCEPT tcp -- 0.0.0.0/0 0.0.0.0/0 tcp dpt:80 2 ACCEPT tcp -- 0.0.0.0/0 0.0.0.0/0 state RELATED,ESTABLISHED tcp spt:80 3 RETURN all -- 0.0.0.0/0 0.0.0.0/0 
    • 而我的电脑无法访问 nginx 服务 http://10.0.0.21 , 修改服务器的开放端口 都不行,试过了 centos 7 系统 , rancheros 都无法正常使用.
    • 这个问题怎么解决,请各位大佬看看 出出主意.
    24 条回复    2024-08-12 10:19:14 +08:00
    saytesnake
        1
    saytesnake  
       2021-10-14 17:36:45 +08:00
    在 esxi 网卡打开允许混合。
    defunct9
        2
    defunct9  
       2021-10-14 19:59:36 +08:00
    哦。推倒重来。你用的 swarm 过时了。直接用 docker-compose
    1BF6oSYCD9ngBHo1
        3
    1BF6oSYCD9ngBHo1  
       2021-10-14 22:36:52 +08:00
    首先对楼主的测试方法有点好奇的是:你已经在 01 节点测试服务可用性的话,为什么不 curl localhost/127.0.0.1 ?你 curl 了 10 段的话,其中的测试结果应该是包含两个东西:节点服务可用性+节点本机 10 段网络的配置。

    然后,作为两年前实践过用 swarm 来尝试搭过小集群的过来人( 3 台机子,每台约 10 个 service,每个 service 从 5~20 个 replicas 不等),只想说,这货就是个全的社区项目,这并不是说 swarm 不能用,只是想要达到企业级的稳定性 /安全性 /灵活性是不可能的。为什么?其实了解下 swarm 这项目出来的目的,便会发现这东西是很难达到“好用”级别的(但是不可否认还是挺好玩)。而要想做到前面说的这些,唯有 kubernete,国内也有一些服务商有提供 out-of-box 的云原生基建平台,都非常不错。但是如果你要用手把手地用 swarm 来搞,那只能祝君好运,并且玩得愉快
    wellsc
        4
    wellsc  
       2021-10-14 22:46:44 +08:00
    @defunct9 哥,两个不是一种东西
    liuxu
        5
    liuxu  
       2021-10-14 23:14:01 +08:00
    你要是 debian/ubuntu 的话我可以帮你详细分析下,其他的系统我就只能大致说下怎么查

    首先你的 server-01 的 ip 似乎有一个 192.168.122.0/24,先确认下 server-0{1,2,3}和你本地机器的 ip 是不是在一个网段,互相 ping 一下
    然后 server-0{1,2,3}的 iptable 、netstat 和 ifconfig 都看看
    最后互相 curl,在双方机器上用 tcpdump 抓包看看
    ik
        6
    ik  
       2021-10-14 23:18:54 +08:00 via iPhone
    iptables 规则问题? 三个 docker 服务都重启一下呢?
    ziwen1943
        7
    ziwen1943  
       2021-10-15 08:57:32 +08:00
    看看防火墙和 iptables 是不是有奇奇怪怪的规则
    zxkxhnqwe123
        8
    zxkxhnqwe123  
    OP
       2021-10-15 09:05:15 +08:00
    @vinle 三台服务器上面都是一样的 调用 curl 127.0.0.1 都是一样的效果. 并且所有系统都是干净重装好的
    zxkxhnqwe123
        9
    zxkxhnqwe123  
    OP
       2021-10-15 09:05:44 +08:00
    @saytesnake 试过了 好像也不行 ! 叫混杂模式
    zxkxhnqwe123
        10
    zxkxhnqwe123  
    OP
       2021-10-15 09:07:59 +08:00
    @saytesnake 主要是 我是开发人员,公司也没有专业运维,现在想解决 devops 自动化运维 测试环境,所以只能从简单的折腾
    juzisang
        11
    juzisang  
       2021-10-15 09:30:24 +08:00
    看一下这几个端口有没有开放
    https://docs.docker.com/engine/swarm/swarm-tutorial/#open-protocols-and-ports-between-the-hosts

    前几个月也搭了一个 swarm 集群
    t/772731
    byzf
        12
    byzf  
       2021-10-15 10:53:59 +08:00
    以前碰到过几次请求三次只成功一次的情况,有 dns 配置的问题,有负载均衡的问题。
    defunct9
        13
    defunct9  
       2021-10-15 10:55:29 +08:00
    开 ssh,让我上去看看
    zxxufo008
        14
    zxxufo008  
       2021-10-15 11:42:52 +08:00
    @defunct9 好家伙,层主换了个头像,我还是通过这句话知道你还是你的 [滑稽]
    defunct9
        15
    defunct9  
       2021-10-15 12:00:15 +08:00
    @zxxufo008 小孩长大了,头像也跟着长大了。是我,是我,还是我。
    liuxu
        16
    liuxu  
       2021-10-15 12:14:12 +08:00
    @zxxufo008 我还以为是有人开机器人了,原来是换头像了
    mepwang
        17
    mepwang  
       2021-10-15 16:16:22 +08:00
    curl -v 看看卡到哪一步了
    jackleeforce3615
        18
    jackleeforce3615  
       2021-10-15 16:53:29 +08:00
    一直以为没多少人用 docker swarm 了
    mkdir
        19
    mkdir  
       2021-10-15 17:14:21 +08:00
    @jackleeforce3615 一直用一直爽
    zxkxhnqwe123
        20
    zxkxhnqwe123  
    OP
       2021-10-15 17:54:25 +08:00
    @mepwang

    [root@Server-01 ~]# curl 127.0.0.1 -v
    * Rebuilt URL to: 127.0.0.1/
    * Trying 127.0.0.1...
    * TCP_NODELAY set
    ^C
    [root@Server-01 ~]# curl 127.0.0.1 -v
    * Rebuilt URL to: 127.0.0.1/
    * Trying 127.0.0.1...
    * TCP_NODELAY set
    * Connected to 127.0.0.1 (127.0.0.1) port 80 (#0)
    > GET / HTTP/1.1
    > Host: 127.0.0.1
    > User-Agent: curl/7.61.1
    > Accept: */*
    >
    < HTTP/1.1 200 OK
    < Server: nginx/1.21.3
    < Date: Fri, 15 Oct 2021 09:56:24 GMT
    < Content-Type: text/html
    < Content-Length: 615
    < Last-Modified: Tue, 07 Sep 2021 15:21:03 GMT
    < Connection: keep-alive
    < ETag: "6137835f-267"
    < Accept-Ranges: bytes
    <
    <!DOCTYPE html>
    <html>
    <head>
    <title>Welcome to nginx!</title>
    <style>
    html { color-scheme: light dark; }
    body { width: 35em; margin: 0 auto;
    font-family: Tahoma, Verdana, Arial, sans-serif; }
    </style>
    </head>
    <body>
    <h1>Welcome to nginx!</h1>
    <p>If you see this page, the nginx web server is successfully installed and
    working. Further configuration is required.</p>

    <p>For online documentation and support please refer to
    <a href="http://nginx.org/">nginx.org</a>.<br/>
    Commercial support is available at
    <a href="http://nginx.com/">nginx.com</a>.</p>

    <p><em>Thank you for using nginx.</em></p>
    </body>
    </html>
    * Connection #0 to host 127.0.0.1 left intact
    [root@Server-01 ~]# ^C
    [root@Server-01 ~]# curl 127.0.0.1 -v
    * Rebuilt URL to: 127.0.0.1/
    * Trying 127.0.0.1...
    * TCP_NODELAY set
    mepwang
        21
    mepwang  
       2021-10-18 11:25:35 +08:00
    看不出来什么问题,curl 调用三次成功一次,会不会和你的副本数量有关系?
    能给的建议不多,
    你把 replica 的数目改成 4 个或 2 个,看看 curl 调用成功的几率是不是变成 4 次或者 2 次成功一次。
    感觉是你的 swarm 集群有点问题,直觉上是网络转发这块。
    你给你的应用添加一个 overlay network 试试看?
    zxkxhnqwe123
        22
    zxkxhnqwe123  
    OP
       2022-01-30 15:42:43 +08:00
    终于解决了 !!!! 放假花了两天时间解决了,也当学习了 . 这两天重装了 不下 20 次 ,3 台虚拟机 不停重启,重装.
    原理就是开启 esxi 网卡的混杂模式 , 网卡用 E1000e (这个其实不太确定,不想验证了). 然后确认下 swarm 网关和局域网的网关是否冲突了. 这些做完就是圆满结束

    感谢以上的朋友帮忙!!!

    判断依据 https://stackoverflow.com/questions/59007780/container-running-on-docker-swarm-not-accessible-from-outside
    isnullstring
        23
    isnullstring  
       2024-08-12 10:15:23 +08:00
    @zxkxhnqwe123 #22 回来留个脚印
    我的情况跟楼主一样,先是确认 swarm 网关,默认是 10.0.0.0 ,跟现有一致的话肯定是不行的
    环境 :exsi 6.7 + ubuntu 22.04 + 10.0.0.0
    完整解决办法:
    1 、虚拟交换机 开混杂模式
    2 、必须修改虚拟机网卡类型,E1000
    3 、初始化集群时指定 IP 段,注意 stackoverflow 中的回答
    -------------------------------------------------swarm 网段---------------------通讯 IP
    docker swarm init --default-addr-pool 11.0.0.0/8 --advertise-addr 10.0.1.137
    isnullstring
        24
    isnullstring  
       2024-08-12 10:19:14 +08:00
    @isnullstring #23 还有个奇怪现象,只有 1 个管理节点和 1 个工作节点时候就没毛病,但是通过管理节点无法访问工作节点的端口,第二个节点一加进来就凉
    关于     帮助文档     自助推广系统     博客     API     FAQ     Solana     2715 人在线   最高记录 6679       Select Language
    创意工作者们的社区
    World is powered by solitude
    VERSION: 3.9.8.5 29ms UTC 09:55 PVG 17:55 LAX 02:55 JFK 05:55
    Do have faith in what you're doing.
    ubao snddm index pchome yahoo rakuten mypaper meadowduck bidyahoo youbao zxmzxm asda bnvcg cvbfg dfscv mmhjk xxddc yybgb zznbn ccubao uaitu acv GXCV ET GDG YH FG BCVB FJFH CBRE CBC GDG ET54 WRWR RWER WREW WRWER RWER SDG EW SF DSFSF fbbs ubao fhd dfg ewr dg df ewwr ewwr et ruyut utut dfg fgd gdfgt etg dfgt dfgd ert4 gd fgg wr 235 wer3 we vsdf sdf gdf ert xcv sdf rwer hfd dfg cvb rwf afb dfh jgh bmn lgh rty gfds cxv xcv xcs vdas fdf fgd cv sdf tert sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf shasha9178 shasha9178 shasha9178 shasha9178 shasha9178 liflif2 liflif2 liflif2 liflif2 liflif2 liblib3 liblib3 liblib3 liblib3 liblib3 zhazha444 zhazha444 zhazha444 zhazha444 zhazha444 dende5 dende denden denden2 denden21 fenfen9 fenf619 fen619 fenfe9 fe619 sdf sdf sdf sdf sdf zhazh90 zhazh0 zhaa50 zha90 zh590 zho zhoz zhozh zhozho zhozho2 lislis lls95 lili95 lils5 liss9 sdf0ty987 sdft876 sdft9876 sdf09876 sd0t9876 sdf0ty98 sdf0976 sdf0ty986 sdf0ty96 sdf0t76 sdf0876 df0ty98 sf0t876 sd0ty76 sdy76 sdf76 sdf0t76 sdf0ty9 sdf0ty98 sdf0ty987 sdf0ty98 sdf6676 sdf876 sd876 sd876 sdf6 sdf6 sdf9876 sdf0t sdf06 sdf0ty9776 sdf0ty9776 sdf0ty76 sdf8876 sdf0t sd6 sdf06 s688876 sd688 sdf86