上篇文章介绍了 kubelet 的启动流程,本篇文章主要介绍 kubelet 创建 pod 的流程。
kubernetes 版本: v1.12
func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHandler) {
glog.Info("Starting kubelet main sync loop.")// syncTicker 每秒检测一次是否有需要同步的 pod workers
syncTicker := time.NewTicker(time.Second)
defer syncTicker.Stop()
// 每两秒检测一次是否有需要清理的 pod
housekeepingTicker := time.NewTicker(housekeepingPeriod)
defer housekeepingTicker.Stop()
// pod 的生命周期变化
plegCh := kl.pleg.Watch()
const (
base= 100 * time.Millisecond
max= 5 * time.Second
factor = 2
)
duration := base
for {
if rs := kl.runtimeState.runtimeErrors();
len(rs) != 0 {
time.Sleep(duration)
duration = time.Duration(math.Min(float64(max), factor*float64(duration)))
continue
}
...kl.syncLoopMonitor.Store(kl.clock.Now())
// 第二个参数为 SyncHandler 类型,SyncHandler 是一个 interface,
// 在该文件开头处定义
if !kl.syncLoopIteration(updates, handler, syncTicker.C, housekeepingTicker.C, plegCh) {
break
}
kl.syncLoopMonitor.Store(kl.clock.Now())
}
}
func (kl *Kubelet) syncLoopIteration(configCh <-chan kubetypes.PodUpdate, handler SyncHandler,
syncCh <-chan time.Time, housekeepingCh <-chan time.Time, plegCh <-chan *pleg.PodLifecycleEvent) bool {
select {
case u, open := <-configCh:
if !open {
glog.Errorf("Update channel is closed. Exiting the sync loop.")
return false
}switch u.Op {
case kubetypes.ADD:
...
case kubetypes.UPDATE:
...
case kubetypes.REMOVE:
...
case kubetypes.RECONCILE:
...
case kubetypes.DELETE:
...
case kubetypes.RESTORE:
...
case kubetypes.SET:
...
}
...
case e := <-plegCh:
...
case <-syncCh:
...
case update := <-kl.livenessManager.Updates():
...
case <-housekeepingCh:
...
}
return true
}
func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
start := kl.clock.Now()
// 对所有 pod 按照日期排序,保证最先创建的 pod 优先被处理
sort.Sort(sliceutils.PodsByCreationTime(pods))
for _, pod := range pods {
if kl.dnsConfigurer != nil && kl.dnsConfigurer.ResolverConfig != "" {
kl.dnsConfigurer.CheckLimitsForResolvConf()
}
existingPods := kl.podManager.GetPods()
// 把 pod 加入到 podManager 中
kl.podManager.AddPod(pod)// 判断是否是 mirror pod(即 static pod)
if kubepod.IsMirrorPod(pod) {
kl.handleMirrorPod(pod, start)
continue
}if !kl.podIsTerminated(pod) {
activePods := kl.filterOutTerminatedPods(existingPods)
// 通过 canAdmitPod 方法校验Pod能否在该计算节点创建(如:磁盘空间)
// Check if we can admit the pod;
if not, reject it.
if ok, reason, message := kl.canAdmitPod(activePods, pod);
!ok {
kl.rejectPod(pod, reason, message)
continue
}
}mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod)
// 通过 dispatchWork 分发 pod 做异步处理,dispatchWork 主要工作就是把接收到的参数封装成 UpdatePodOptions,调用 UpdatePod 方法.
kl.dispatchWork(pod, kubetypes.SyncPodCreate, mirrorPod, start)
// 在 probeManager 中添加 pod,如果 pod 中定义了 readiness 和 liveness 健康检查,启动 goroutine 定期进行检测
kl.probeManager.AddPod(pod)
}
}
static pod 是由 kubelet 直接管理的,k8s apiserver 并不会感知到 static pod 的存在,当然也不会和任何一个 rs 关联上,完全是由 kubelet 进程来监管,并在它异常时负责重启。Kubelet 会通过 apiserver 为每一个 static pod 创建一个对应的 mirror pod,如此以来就可以可以通过 kubectl 命令查看对应的 pod,并且可以通过 kubectl logs 命令直接查看到static pod 的日志信息。4、下发任务(dispatchWork)
func (kl *Kubelet) dispatchWork(pod *v1.Pod, syncType kubetypes.SyncPodType, mirrorPod *v1.Pod, start time.Time) {
if kl.podIsTerminated(pod) {
if pod.DeletionTimestamp != nil {
kl.statusManager.TerminatePod(pod)
}
return
}
// 落实在 podWorkers 中
kl.podWorkers.UpdatePod(&UpdatePodOptions{
Pod:pod,
MirrorPod:mirrorPod,
UpdateType: syncType,
OnCompleteFunc: func(err error) {
if err != nil {
metrics.PodWorkerLatency.WithLabelValues(syncType.String()).Observe(metrics.SinceInMicroseconds(start))
}
},
})
if syncType == kubetypes.SyncPodCreate {
metrics.ContainersPerPodCount.Observe(float64(len(pod.Spec.Containers)))
}
}
func (p *podWorkers) UpdatePod(options *UpdatePodOptions) {
pod := options.Pod
uid := pod.UID
var podUpdates chan UpdatePodOptions
var exists boolp.podLock.Lock()
defer p.podLock.Unlock()// 如果当前 pod 还没有启动过 goroutine ,则启动 goroutine,并且创建 channel
if podUpdates, exists = p.podUpdates[uid];
!exists {
// 创建 channel
podUpdates = make(chan UpdatePodOptions, 1)
p.podUpdates[uid] = podUpdates// 启动 goroutine
go func() {
defer runtime.HandleCrash()
p.managePodLoop(podUpdates)
}()
}
// 下发更新事件
if !p.isWorking[pod.UID] {
p.isWorking[pod.UID] = true
podUpdates <- *options
} else {
update, found := p.lastUndeliveredWorkUpdate[pod.UID]
if !found || update.UpdateType != kubetypes.SyncPodKill {
p.lastUndeliveredWorkUpdate[pod.UID] = *options
}
}
}
func (p *podWorkers) managePodLoop(podUpdates <-chan UpdatePodOptions) {
var lastSyncTime time.Time
for update := range podUpdates {
err := func() error {
podUID := update.Pod.UID
status, err := p.podCache.GetNewerThan(podUID, lastSyncTime)
if err != nil {
...
}
err = p.syncPodFn(syncPodOptions{
mirrorPod:update.MirrorPod,
pod:update.Pod,
podStatus:status,
killPodOptions: update.KillPodOptions,
updateType:update.UpdateType,
})
lastSyncTime = time.Now()
return err
}()
if update.OnCompleteFunc != nil {
update.OnCompleteFunc(err)
}
if err != nil {
...
}
p.wrapUp(update.Pod.UID, err)
}
}
func (kl *Kubelet) syncPod(o syncPodOptions) error {
// pull out the required options
pod := o.pod
mirrorPod := o.mirrorPod
podStatus := o.podStatus
updateType := o.updateType// 是否为 删除 pod
if updateType == kubetypes.SyncPodKill {
...
}
...
// 检查 pod 是否能运行在本节点
runnable := kl.canRunPod(pod)
if !runnable.Admit {
...
}// 更新 pod 状态
kl.statusManager.SetPodStatus(pod, apiPodStatus)// 如果 pod 非 running 状态则直接 kill 掉
if !runnable.Admit || pod.DeletionTimestamp != nil || apiPodStatus.Phase == v1.PodFailed {
...
}// 加载网络插件
if rs := kl.runtimeState.networkErrors();
len(rs) != 0 && !kubecontainer.IsHostNetworkPod(pod) {
...
}pcm := kl.containerManager.NewPodContainerManager()
if !kl.podIsTerminated(pod) {
...
// 创建并更新 pod 的 cgroups
if !(podKilled && pod.Spec.RestartPolicy == v1.RestartPolicyNever) {
if !pcm.Exists(pod) {
...
}
}
}// 为 static pod 创建对应的 mirror pod
if kubepod.IsStaticPod(pod) {
...
}// 创建数据目录
if err := kl.makePodDataDirs(pod);
err != nil {
...
}// 挂载 volume
if !kl.podIsTerminated(pod) {
if err := kl.volumeManager.WaitForAttachAndMount(pod);
err != nil {
...
}
}// 获取 secret 信息
pullSecrets := kl.getPullSecretsForPod(pod)// 调用 containerRuntime 的 SyncPod 方法开始创建容器
result := kl.containerRuntime.SyncPod(pod, apiPodStatus, podStatus, pullSecrets, kl.backOff)
kl.reasonCache.Update(pod.UID, result)
if err := result.Error();
err != nil {
...
}return nil
}
func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
// 1、计算 sandbox 和 container 是否发生变化
podContainerChanges := m.computePodActions(pod, podStatus)
if podContainerChanges.CreateSandbox {
ref, err := ref.GetReference(legacyscheme.Scheme, pod)
if err != nil {
glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
}
...
}// 2、kill 掉 sandbox 已经改变的 pod
if podContainerChanges.KillPod {
...
} else {
// 3、kill 掉非 running 状态的 containers
...
for containerID, containerInfo := range podContainerChanges.ContainersToKill {
...
if err := m.killContainer(pod, containerID, containerInfo.name, containerInfo.message, nil);
err != nil {
...
}
}
}m.pruneInitContainersBeforeStart(pod, podStatus)
podIP := ""
if podStatus != nil {
podIP = podStatus.IP
}// 4、创建 sandbox
podSandboxID := podContainerChanges.SandboxID
if podContainerChanges.CreateSandbox {
podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
if err != nil {
...
}
...
podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
if err != nil {
...
}
// 如果 pod 网络是 host 模式,容器也相同;其他情况下,容器会使用 None 网络模式,让 kubelet 的网络插件自己进行网络配置
if !kubecontainer.IsHostNetworkPod(pod) {
podIP = m.determinePodSandboxIP(pod.Namespace, pod.Name, podSandboxStatus)
glog.V(4).Infof("Determined the ip %q for pod %q after sandbox changed", podIP, format.Pod(pod))
}
}configPodSandboxResult := kubecontainer.NewSyncResult(kubecontainer.ConfigPodSandbox, podSandboxID)
result.AddSyncResult(configPodSandboxResult)
// 获取 PodSandbox 的配置(如:metadata,clusterDNS,容器的端口映射等)
podSandboxConfig, err := m.generatePodSandboxConfig(pod, podContainerChanges.Attempt)
...// 5、启动 init container
if container := podContainerChanges.NextInitContainerToStart;
container != nil {
...
if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP, kubecontainer.ContainerTypeInit);
err != nil {
...
}
}// 6、启动业务容器
for _, idx := range podContainerChanges.ContainersToStart {
...
if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP, kubecontainer.ContainerTypeRegular);
err != nil {
...
}
}return
}
func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandboxConfig *runtimeapi.PodSandboxConfig, container *v1.Container, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, podIP string, containerType kubecontainer.ContainerType) (string, error) {
// 1、检查业务镜像是否存在,不存在则到 Docker Registry 或是 Private Registry 拉取镜像。
imageRef, msg, err := m.imagePuller.EnsureImageExists(pod, container, pullSecrets)
if err != nil {
...
}ref, err := kubecontainer.GenerateContainerRef(pod, container)
if err != nil {
...
}// 设置 RestartCount
restartCount := 0
containerStatus := podStatus.FindContainerStatusByName(container.Name)
if containerStatus != nil {
restartCount = containerStatus.RestartCount + 1
}// 2、生成业务容器的配置信息
containerConfig, cleanupAction, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef, containerType)
if cleanupAction != nil {
defer cleanupAction()
}
...// 3、通过 client.CreateContainer 调用 docker api 创建业务容器
containerID, err := m.runtimeService.CreateContainer(podSandboxID, containerConfig, podSandboxConfig)
if err != nil {
...
}
err = m.internalLifecycle.PreStartContainer(pod, container, containerID)
if err != nil {
...
}
...// 3、启动业务容器
err = m.runtimeService.StartContainer(containerID)
if err != nil {
...
}containerMeta := containerConfig.GetMetadata()
sandboxMeta := podSandboxConfig.GetMetadata()
legacySymlink := legacyLogSymlink(containerID, containerMeta.Name, sandboxMeta.Name,
sandboxMeta.Namespace)
containerLog := filepath.Join(podSandboxConfig.LogDirectory, containerConfig.LogPath)
if _, err := m.osInterface.Stat(containerLog);
!os.IsNotExist(err) {
if err := m.osInterface.Symlink(containerLog, legacySymlink);
err != nil {
glog.Errorf("Failed to create legacy symbolic link %q to container %q log %q: %v",
legacySymlink, containerID, containerLog, err)
}
}// 4、执行 post start hook
if container.Lifecycle != nil && container.Lifecycle.PostStart != nil {
kubeContainerID := kubecontainer.ContainerID{
Type: m.runtimeName,
ID:containerID,
}
// runner.Run 这个方法的主要作用就是在业务容器起来的时候,
// 首先会执行一个 container hook(PostStart 和 PreStop),做一些预处理工作。
// 只有 container hook 执行成功才会运行具体的业务服务,否则容器异常。
msg, handlerErr := m.runner.Run(kubeContainerID, pod, container, container.Lifecycle.PostStart)
if handlerErr != nil {
...
}
}return "", nil
}
上篇文章介绍了 kubelet 的启动流程,本篇文章主要介绍 kubelet 创建 pod 的流程。
kubernetes 版本: v1.12
上一篇:朋友啊,朋友
下一篇:挑战21天(4)最难忘的事