start查
Array
(
    [0] => XSDocument Object
        (
            [_data:XSDocument:private] => Array
                (
                    [id] => python2556_1
                    [title] => 爬取绝招-n
                    [content] => 爬取的
                    [255] => 
                )

            [_terms:XSDocument:private] => 
            [_texts:XSDocument:private] => 
            [_charset:XSDocument:private] => UTF-8
            [_meta:XSDocument:private] => Array
                (
                    [docid] => 172
                    [rank] => 1
                    [ccount] => 0
                    [percent] => 100
                    [weight] => 3.7128221988678
                )

        )

    [1] => XSDocument Object
        (
            [_data:XSDocument:private] => Array
                (
                    [id] => python2562
                    [title] => 爬取网页中动态加载的数据
                    [lang] => python
                    [database_name] => database_name
                    [doc_type] => 1
                    [database_type] => 1
                    [doc_id] => 2562
                    [255] => 
                )

            [_terms:XSDocument:private] => 
            [_texts:XSDocument:private] => 
            [_charset:XSDocument:private] => UTF-8
            [_meta:XSDocument:private] => Array
                (
                    [docid] => 4
                    [rank] => 2
                    [ccount] => 0
                    [percent] => 78
                    [weight] => 2.9172174930573
                )

        )

    [2] => XSDocument Object
        (
            [_data:XSDocument:private] => Array
                (
                    [id] => python2560
                    [title] => 实现定时爬取网页内容
                    [lang] => python
                    [database_name] => database_name
                    [doc_type] => 1
                    [database_type] => 1
                    [doc_id] => 2560
                    [255] => 
                )

            [_terms:XSDocument:private] => 
            [_texts:XSDocument:private] => 
            [_charset:XSDocument:private] => UTF-8
            [_meta:XSDocument:private] => Array
                (
                    [docid] => 6
                    [rank] => 3
                    [ccount] => 0
                    [percent] => 78
                    [weight] => 2.9172174930573
                )

        )

    [3] => XSDocument Object
        (
            [_data:XSDocument:private] => Array
                (
                    [id] => python2558
                    [title] => 爬取北、上、广租房信息
                    [lang] => python
                    [database_name] => database_name
                    [doc_type] => 1
                    [database_type] => 1
                    [doc_id] => 2558
                    [255] => 
                )

            [_terms:XSDocument:private] => 
            [_texts:XSDocument:private] => 
            [_charset:XSDocument:private] => UTF-8
            [_meta:XSDocument:private] => Array
                (
                    [docid] => 8
                    [rank] => 4
                    [ccount] => 0
                    [percent] => 78
                    [weight] => 2.9172174930573
                )

        )

    [4] => XSDocument Object
        (
            [_data:XSDocument:private] => Array
                (
                    [id] => python2557
                    [title] => 使用多进程爬取在线课程MySQL版
                    [lang] => python
                    [database_name] => database_name
                    [doc_type] => 1
                    [database_type] => 1
                    [doc_id] => 2557
                    [255] => 
                )

            [_terms:XSDocument:private] => 
            [_texts:XSDocument:private] => 
            [_charset:XSDocument:private] => UTF-8
            [_meta:XSDocument:private] => Array
                (
                    [docid] => 9
                    [rank] => 5
                    [ccount] => 0
                    [percent] => 78
                    [weight] => 2.9172174930573
                )

        )

    [5] => XSDocument Object
        (
            [_data:XSDocument:private] => Array
                (
                    [id] => python2556
                    [title] => 爬取在线课程MySQL版
                    [lang] => python
                    [database_name] => database_name
                    [doc_type] => 1
                    [database_type] => 1
                    [doc_id] => 2556
                    [255] => 
                )

            [_terms:XSDocument:private] => 
            [_texts:XSDocument:private] => 
            [_charset:XSDocument:private] => UTF-8
            [_meta:XSDocument:private] => Array
                (
                    [docid] => 10
                    [rank] => 6
                    [ccount] => 0
                    [percent] => 78
                    [weight] => 2.9172174930573
                )

        )

    [6] => XSDocument Object
        (
            [_data:XSDocument:private] => Array
                (
                    [id] => python2555
                    [title] => 爬取在线课程Excel版
                    [content] => 11122
                    [lang] => python
                    [database_name] => database_name
                    [doc_type] => 1
                    [database_type] => 1
                    [doc_id] => 2555
                    [255] => 
                )

            [_terms:XSDocument:private] => 
            [_texts:XSDocument:private] => 
            [_charset:XSDocument:private] => UTF-8
            [_meta:XSDocument:private] => Array
                (
                    [docid] => 11
                    [rank] => 7
                    [ccount] => 0
                    [percent] => 78
                    [weight] => 2.9172174930573
                )

        )

    [7] => XSDocument Object
        (
            [_data:XSDocument:private] => Array
                (
                    [id] => python4664
                    [title] => 批量爬取B站小视频
                    [lang] => python
                    [database_name] => database_name
                    [doc_type] => 1
                    [database_type] => 1
                    [doc_id] => 4664
                    [255] => 
                )

            [_terms:XSDocument:private] => 
            [_texts:XSDocument:private] => 
            [_charset:XSDocument:private] => UTF-8
            [_meta:XSDocument:private] => Array
                (
                    [docid] => 111
                    [rank] => 8
                    [ccount] => 0
                    [percent] => 78
                    [weight] => 2.9172174930573
                )

        )

    [8] => XSDocument Object
        (
            [_data:XSDocument:private] => Array
                (
                    [id] => python4665
                    [title] => 爬取B站小视频之随机生成浏览器的头部信息
                    [lang] => python
                    [database_name] => database_name
                    [doc_type] => 1
                    [database_type] => 1
                    [doc_id] => 4665
                    [255] => 
                )

            [_terms:XSDocument:private] => 
            [_texts:XSDocument:private] => 
            [_charset:XSDocument:private] => UTF-8
            [_meta:XSDocument:private] => Array
                (
                    [docid] => 112
                    [rank] => 9
                    [ccount] => 0
                    [percent] => 78
                    [weight] => 2.9172174930573
                )

        )

    [9] => XSDocument Object
        (
            [_data:XSDocument:private] => Array
                (
                    [id] => python4666
                    [title] => 爬取B站小视频之获取要下载视频的大小
                    [lang] => python
                    [database_name] => database_name
                    [doc_type] => 1
                    [database_type] => 1
                    [doc_id] => 4666
                    [255] => 
                )

            [_terms:XSDocument:private] => 
            [_texts:XSDocument:private] => 
            [_charset:XSDocument:private] => UTF-8
            [_meta:XSDocument:private] => Array
                (
                    [docid] => 113
                    [rank] => 10
                    [ccount] => 0
                    [percent] => 78
                    [weight] => 2.9172174930573
                )

        )

    [10] => XSDocument Object
        (
            [_data:XSDocument:private] => Array
                (
                    [id] => python4667
                    [title] => 爬取B站小视频之实时打印文件下载进度
                    [lang] => python
                    [database_name] => database_name
                    [doc_type] => 1
                    [database_type] => 1
                    [doc_id] => 4667
                    [255] => 
                )

            [_terms:XSDocument:private] => 
            [_texts:XSDocument:private] => 
            [_charset:XSDocument:private] => UTF-8
            [_meta:XSDocument:private] => Array
                (
                    [docid] => 114
                    [rank] => 11
                    [ccount] => 0
                    [percent] => 78
                    [weight] => 2.9172174930573
                )

        )

    [11] => XSDocument Object
        (
            [_data:XSDocument:private] => Array
                (
                    [id] => python2565
                    [title] => 通过selenium模块实现自动切换浏览器页面
                    [content] => 


 

*   实例描述

在使用selenium框架爬取网页信息时1,偶尔需要实现爬取多个页面信息,此时如果没有进行浏览器页面的切换将无法爬取对应网页的数据内容如图1所示,那么该如何通过selenium框架实现自动切换浏览器页面呢?本实例将实现通过selenium模块实现自动切换浏览器页面。

浏览器页面切换

*   代码实现

通过selenium框架实现自动切换浏览器页面时,首先需要区分每个页面的window_handle,也就是每个页面的浏览器窗口,确定浏览器页面窗口以后进行页面窗口的切换并将执行权利传递给切换后的页面窗口中,此时切换的窗口才会拥有执行权(当前窗口的控制权)。示例代码如下:

from selenium import webdriver  # 导入浏览器驱动模块

import time                      # 导入时间模块

class Selenium():                # 创建selenium

    def __init__(self):

        # 加载谷歌浏览器驱动

        self.driver = webdriver.Chrome(

            executable_path='G:/Python/Python37/chromedriver')

        self.driver.maximize_window()  # 浏览器窗口最大化

        self.driver.get('https://www.taobao.com/') # 打开淘宝官网

    # 切换页面

    def toggle_pages(self,url):

        time.sleep(3)    # 等待2

        js = 'window.open("{url}")'.format(url=url)  # 通过执行js,开启一个天猫的窗口

        self.driver.execute_script(js)

        # 当前窗口的handle,也就是淘宝窗口

        taobao_handle = self.driver.current_window_handle

        # 获取所有窗口句柄集合(列表类型)

        handles = self.driver.window_handles

        # 获取天猫窗口

        new_handle = None

        for handle in handles:

            if handle != taobao_handle:

                new_handle = handle

        time.sleep(3)

        # 切换淘宝窗口,并传递执行权力

        self.driver.switch_to.window(taobao_handle)

        time.sleep(3)

        self.driver.close()  # 关闭淘宝窗口

        # 切换天猫窗口,并传递执行权力

        self.driver.switch_to.window(new_handle)

        time.sleep(3)

        self.driver.close()  # 关闭天猫窗口

if __name__ == '__main__':

    selenium = Selenium()   创建Selenium对象

    selenium.toggle_pages('https://www.tmall.com/') # 切换天猫官网

运行代码,首先谷歌浏览器将自动打开“淘宝”网页,然后再打开“天猫”网页,再执行从“天猫”网页自动切换至“淘宝”网页并关闭“淘宝”网页的操作。如图2所示。

自动关闭“淘宝”网页保留“天猫”网页

说明:“淘宝”网页自动关闭后,“天猫”网页将自动关闭。

 

[lang] => python [database_name] => database_name [doc_type] => 1 [database_type] => 1 [doc_id] => 2565 [255] => ) [_terms:XSDocument:private] => [_texts:XSDocument:private] => [_charset:XSDocument:private] => UTF-8 [_meta:XSDocument:private] => Array ( [docid] => 1 [rank] => 12 [ccount] => 0 [percent] => 5 [weight] => 0.21397353708744 ) ) )

Warning: error_log(/opt/lampp/htdocs/www.xunsearch.com/Application/Runtime/Logs/Home/21_08_06.log): failed to open stream: Permission denied in /opt/lampp/htdocs/www.xunsearch.com/ThinkPHP/Library/Think/Log/Driver/File.class.php on line 48