731 lines
27 KiB
Text
731 lines
27 KiB
Text
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "16feb8b8-581d-4bec-983a-68b858622696",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "682c6e18-4f1f-454f-88ba-389f999b5974",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"ira1 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_1.csv')\n",
|
||
"ira2 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_2.csv')\n",
|
||
"ira3 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_3.csv')\n",
|
||
"ira4 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_4.csv')\n",
|
||
"# ira5 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_5.csv') # has mixed types\n",
|
||
"ira6 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_6.csv')\n",
|
||
"ira7 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_7.csv')\n",
|
||
"ira8 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_8.csv')\n",
|
||
"ira9 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_9.csv')\n",
|
||
"# ira10 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_10.csv') # has mixed types\n",
|
||
"ira11 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_11.csv')\n",
|
||
"# ira12 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_12.csv') # has mixed types\n",
|
||
"ira13 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_13.csv')\n",
|
||
"\n",
|
||
"df = pd.concat([ira13,ira11,ira9,ira8,ira7,ira6,ira4,ira3,ira2,ira1])\n",
|
||
"df.reset_index(inplace=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "194a8799-893b-4418-b145-f24ce111f0f9",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 2232523 entries, 0 to 2232522\n",
|
||
"Data columns (total 22 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 index 2232523 non-null int64 \n",
|
||
" 1 external_author_id 2232523 non-null int64 \n",
|
||
" 2 author 2232523 non-null object\n",
|
||
" 3 content 2232522 non-null object\n",
|
||
" 4 region 2225360 non-null object\n",
|
||
" 5 language 2232523 non-null object\n",
|
||
" 6 publish_date 2232523 non-null object\n",
|
||
" 7 harvested_date 2232523 non-null object\n",
|
||
" 8 following 2232523 non-null int64 \n",
|
||
" 9 followers 2232523 non-null int64 \n",
|
||
" 10 updates 2232523 non-null int64 \n",
|
||
" 11 post_type 1020610 non-null object\n",
|
||
" 12 account_type 2232523 non-null object\n",
|
||
" 13 retweet 2232523 non-null int64 \n",
|
||
" 14 account_category 2232523 non-null object\n",
|
||
" 15 new_june_2018 2232523 non-null int64 \n",
|
||
" 16 alt_external_id 2232523 non-null int64 \n",
|
||
" 17 tweet_id 2232523 non-null int64 \n",
|
||
" 18 article_url 2232523 non-null object\n",
|
||
" 19 tco1_step1 1608632 non-null object\n",
|
||
" 20 tco2_step1 538066 non-null object\n",
|
||
" 21 tco3_step1 13690 non-null object\n",
|
||
"dtypes: int64(9), object(13)\n",
|
||
"memory usage: 374.7+ MB\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"df.info(show_counts=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "fc58d1a7-6e84-41ec-a616-269d012b106c",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"df = df[df.language == 'English']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "5873887b-f2ea-412f-aa1d-2f2eac02ae96",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"Int64Index: 1605873 entries, 11 to 2232522\n",
|
||
"Data columns (total 22 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 index 1605873 non-null int64 \n",
|
||
" 1 external_author_id 1605873 non-null int64 \n",
|
||
" 2 author 1605873 non-null object\n",
|
||
" 3 content 1605872 non-null object\n",
|
||
" 4 region 1604847 non-null object\n",
|
||
" 5 language 1605873 non-null object\n",
|
||
" 6 publish_date 1605873 non-null object\n",
|
||
" 7 harvested_date 1605873 non-null object\n",
|
||
" 8 following 1605873 non-null int64 \n",
|
||
" 9 followers 1605873 non-null int64 \n",
|
||
" 10 updates 1605873 non-null int64 \n",
|
||
" 11 post_type 682199 non-null object\n",
|
||
" 12 account_type 1605873 non-null object\n",
|
||
" 13 retweet 1605873 non-null int64 \n",
|
||
" 14 account_category 1605873 non-null object\n",
|
||
" 15 new_june_2018 1605873 non-null int64 \n",
|
||
" 16 alt_external_id 1605873 non-null int64 \n",
|
||
" 17 tweet_id 1605873 non-null int64 \n",
|
||
" 18 article_url 1605873 non-null object\n",
|
||
" 19 tco1_step1 1051365 non-null object\n",
|
||
" 20 tco2_step1 361124 non-null object\n",
|
||
" 21 tco3_step1 12955 non-null object\n",
|
||
"dtypes: int64(9), object(13)\n",
|
||
"memory usage: 281.8+ MB\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"df.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "9520567b-8886-4e35-a1b4-51be2a2fd26d",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array(['Russian', 'German', 'Koch', 'Left', 'Right', '?', 'local',\n",
|
||
" 'Italian', 'Hashtager', 'Arabic', 'news', 'French', 'Spanish',\n",
|
||
" 'Commercial', 'ZAPOROSHIA'], dtype=object)"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.account_type.unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "83072d1e-60d0-4a1a-9af0-d73030ed6398",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"11 9/15/2015 17:43\n",
|
||
"12 9/15/2015 17:55\n",
|
||
"32 9/16/2015 8:04\n",
|
||
"86 9/20/2015 9:11\n",
|
||
"190 9/28/2015 17:58\n",
|
||
"Name: publish_date, dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.publish_date.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "b0b2e5fa-21fa-4d63-8f47-2d4cf0f79c57",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"2232518 3/8/2017 8:59\n",
|
||
"2232519 3/8/2017 8:59\n",
|
||
"2232520 3/8/2017 8:59\n",
|
||
"2232521 3/8/2017 8:59\n",
|
||
"2232522 3/8/2017 8:59\n",
|
||
"Name: publish_date, dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.publish_date.tail()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "a58504b0-24c2-4da9-8773-49713fb0ece9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>index</th>\n",
|
||
" <th>external_author_id</th>\n",
|
||
" <th>author</th>\n",
|
||
" <th>content</th>\n",
|
||
" <th>region</th>\n",
|
||
" <th>language</th>\n",
|
||
" <th>publish_date</th>\n",
|
||
" <th>harvested_date</th>\n",
|
||
" <th>following</th>\n",
|
||
" <th>followers</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>account_type</th>\n",
|
||
" <th>retweet</th>\n",
|
||
" <th>account_category</th>\n",
|
||
" <th>new_june_2018</th>\n",
|
||
" <th>alt_external_id</th>\n",
|
||
" <th>tweet_id</th>\n",
|
||
" <th>article_url</th>\n",
|
||
" <th>tco1_step1</th>\n",
|
||
" <th>tco2_step1</th>\n",
|
||
" <th>tco3_step1</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>11</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>2589513234</td>\n",
|
||
" <td>ZUBOVNIK</td>\n",
|
||
" <td>.@McFaul same to you! http://t.co/BbPvCzR0kx</td>\n",
|
||
" <td>Unknown</td>\n",
|
||
" <td>English</td>\n",
|
||
" <td>9/15/2015 17:43</td>\n",
|
||
" <td>9/15/2015 17:43</td>\n",
|
||
" <td>3559</td>\n",
|
||
" <td>22650</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>Russian</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NonEnglish</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2589513234</td>\n",
|
||
" <td>643842467972968448</td>\n",
|
||
" <td>http://twitter.com/zubovnik/statuses/643842467...</td>\n",
|
||
" <td>https://twitter.com/zubovnik/status/6438424679...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>12</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>2589513234</td>\n",
|
||
" <td>ZUBOVNIK</td>\n",
|
||
" <td>'@McFaul in twitter?'</td>\n",
|
||
" <td>Unknown</td>\n",
|
||
" <td>English</td>\n",
|
||
" <td>9/15/2015 17:55</td>\n",
|
||
" <td>9/15/2015 17:56</td>\n",
|
||
" <td>3559</td>\n",
|
||
" <td>22650</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>Russian</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NonEnglish</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2589513234</td>\n",
|
||
" <td>643845691362668544</td>\n",
|
||
" <td>http://twitter.com/zubovnik/statuses/643845691...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>32</th>\n",
|
||
" <td>32</td>\n",
|
||
" <td>2589513234</td>\n",
|
||
" <td>ZUBOVNIK</td>\n",
|
||
" <td>'@McFaul US stop bombing Zivilisten in Syrien'</td>\n",
|
||
" <td>Unknown</td>\n",
|
||
" <td>English</td>\n",
|
||
" <td>9/16/2015 8:04</td>\n",
|
||
" <td>9/16/2015 8:04</td>\n",
|
||
" <td>3559</td>\n",
|
||
" <td>22659</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>Russian</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NonEnglish</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2589513234</td>\n",
|
||
" <td>644059347501363200</td>\n",
|
||
" <td>http://twitter.com/zubovnik/statuses/644059347...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>86</th>\n",
|
||
" <td>86</td>\n",
|
||
" <td>2589513234</td>\n",
|
||
" <td>ZUBOVNIK</td>\n",
|
||
" <td>'@realDonaldTrump @MoeHoward86 @YouTube Good l...</td>\n",
|
||
" <td>Unknown</td>\n",
|
||
" <td>English</td>\n",
|
||
" <td>9/20/2015 9:11</td>\n",
|
||
" <td>9/20/2015 9:11</td>\n",
|
||
" <td>4089</td>\n",
|
||
" <td>22453</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>Russian</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NonEnglish</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2589513234</td>\n",
|
||
" <td>645525713626529792</td>\n",
|
||
" <td>http://twitter.com/zubovnik/statuses/645525713...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>190</th>\n",
|
||
" <td>190</td>\n",
|
||
" <td>2589513234</td>\n",
|
||
" <td>ZUBOVNIK</td>\n",
|
||
" <td>Soviet soldiers marching on 1943. Notice the f...</td>\n",
|
||
" <td>Unknown</td>\n",
|
||
" <td>English</td>\n",
|
||
" <td>9/28/2015 17:58</td>\n",
|
||
" <td>9/28/2015 17:58</td>\n",
|
||
" <td>3964</td>\n",
|
||
" <td>22227</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>Russian</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NonEnglish</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2589513234</td>\n",
|
||
" <td>648557409682763776</td>\n",
|
||
" <td>http://twitter.com/zubovnik/statuses/648557409...</td>\n",
|
||
" <td>https://twitter.com/MatEvidence/status/6485570...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 22 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" index external_author_id author \\\n",
|
||
"11 11 2589513234 ZUBOVNIK \n",
|
||
"12 12 2589513234 ZUBOVNIK \n",
|
||
"32 32 2589513234 ZUBOVNIK \n",
|
||
"86 86 2589513234 ZUBOVNIK \n",
|
||
"190 190 2589513234 ZUBOVNIK \n",
|
||
"\n",
|
||
" content region language \\\n",
|
||
"11 .@McFaul same to you! http://t.co/BbPvCzR0kx Unknown English \n",
|
||
"12 '@McFaul in twitter?' Unknown English \n",
|
||
"32 '@McFaul US stop bombing Zivilisten in Syrien' Unknown English \n",
|
||
"86 '@realDonaldTrump @MoeHoward86 @YouTube Good l... Unknown English \n",
|
||
"190 Soviet soldiers marching on 1943. Notice the f... Unknown English \n",
|
||
"\n",
|
||
" publish_date harvested_date following followers ... \\\n",
|
||
"11 9/15/2015 17:43 9/15/2015 17:43 3559 22650 ... \n",
|
||
"12 9/15/2015 17:55 9/15/2015 17:56 3559 22650 ... \n",
|
||
"32 9/16/2015 8:04 9/16/2015 8:04 3559 22659 ... \n",
|
||
"86 9/20/2015 9:11 9/20/2015 9:11 4089 22453 ... \n",
|
||
"190 9/28/2015 17:58 9/28/2015 17:58 3964 22227 ... \n",
|
||
"\n",
|
||
" account_type retweet account_category new_june_2018 alt_external_id \\\n",
|
||
"11 Russian 0 NonEnglish 0 2589513234 \n",
|
||
"12 Russian 0 NonEnglish 0 2589513234 \n",
|
||
"32 Russian 0 NonEnglish 0 2589513234 \n",
|
||
"86 Russian 0 NonEnglish 0 2589513234 \n",
|
||
"190 Russian 1 NonEnglish 0 2589513234 \n",
|
||
"\n",
|
||
" tweet_id article_url \\\n",
|
||
"11 643842467972968448 http://twitter.com/zubovnik/statuses/643842467... \n",
|
||
"12 643845691362668544 http://twitter.com/zubovnik/statuses/643845691... \n",
|
||
"32 644059347501363200 http://twitter.com/zubovnik/statuses/644059347... \n",
|
||
"86 645525713626529792 http://twitter.com/zubovnik/statuses/645525713... \n",
|
||
"190 648557409682763776 http://twitter.com/zubovnik/statuses/648557409... \n",
|
||
"\n",
|
||
" tco1_step1 tco2_step1 tco3_step1 \n",
|
||
"11 https://twitter.com/zubovnik/status/6438424679... NaN NaN \n",
|
||
"12 NaN NaN NaN \n",
|
||
"32 NaN NaN NaN \n",
|
||
"86 NaN NaN NaN \n",
|
||
"190 https://twitter.com/MatEvidence/status/6485570... NaN NaN \n",
|
||
"\n",
|
||
"[5 rows x 22 columns]"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "667ba830-6e51-4cc9-82ff-e73db03e6f0e",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>index</th>\n",
|
||
" <th>external_author_id</th>\n",
|
||
" <th>author</th>\n",
|
||
" <th>content</th>\n",
|
||
" <th>region</th>\n",
|
||
" <th>language</th>\n",
|
||
" <th>publish_date</th>\n",
|
||
" <th>harvested_date</th>\n",
|
||
" <th>following</th>\n",
|
||
" <th>followers</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>account_type</th>\n",
|
||
" <th>retweet</th>\n",
|
||
" <th>account_category</th>\n",
|
||
" <th>new_june_2018</th>\n",
|
||
" <th>alt_external_id</th>\n",
|
||
" <th>tweet_id</th>\n",
|
||
" <th>article_url</th>\n",
|
||
" <th>tco1_step1</th>\n",
|
||
" <th>tco2_step1</th>\n",
|
||
" <th>tco3_step1</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2232518</th>\n",
|
||
" <td>243886</td>\n",
|
||
" <td>2497991305</td>\n",
|
||
" <td>AUSTINLOVESBEER</td>\n",
|
||
" <td>BREAKING: Killer avalanche sweeps three skiers...</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>English</td>\n",
|
||
" <td>3/8/2017 8:59</td>\n",
|
||
" <td>3/8/2017 8:59</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>34</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>Right</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>RightTroll</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2497991305</td>\n",
|
||
" <td>839400198002503680</td>\n",
|
||
" <td>http://twitter.com/2497991305/statuses/8394001...</td>\n",
|
||
" <td>https://twitter.com/Daily_Star/status/83938477...</td>\n",
|
||
" <td>http://bit.ly/2lWNDnt</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2232519</th>\n",
|
||
" <td>243887</td>\n",
|
||
" <td>2497991305</td>\n",
|
||
" <td>AUSTINLOVESBEER</td>\n",
|
||
" <td>Why men should support International Women’s D...</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>English</td>\n",
|
||
" <td>3/8/2017 8:59</td>\n",
|
||
" <td>3/8/2017 9:00</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>34</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>Right</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>RightTroll</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2497991305</td>\n",
|
||
" <td>839400290168135680</td>\n",
|
||
" <td>http://twitter.com/2497991305/statuses/8394002...</td>\n",
|
||
" <td>http://trib.al/xiMs3md</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2232520</th>\n",
|
||
" <td>243888</td>\n",
|
||
" <td>2497991305</td>\n",
|
||
" <td>AUSTINLOVESBEER</td>\n",
|
||
" <td>How we can rebuild trust in a UK divided by in...</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>English</td>\n",
|
||
" <td>3/8/2017 8:59</td>\n",
|
||
" <td>3/8/2017 8:59</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>34</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>Right</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>RightTroll</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2497991305</td>\n",
|
||
" <td>839400090582179840</td>\n",
|
||
" <td>http://twitter.com/2497991305/statuses/8394000...</td>\n",
|
||
" <td>http://trib.al/l3iyCVF</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2232521</th>\n",
|
||
" <td>243889</td>\n",
|
||
" <td>2497991305</td>\n",
|
||
" <td>AUSTINLOVESBEER</td>\n",
|
||
" <td>John Humphrys accused of patronising Angela Ra...</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>English</td>\n",
|
||
" <td>3/8/2017 8:59</td>\n",
|
||
" <td>3/8/2017 8:59</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>34</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>Right</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>RightTroll</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2497991305</td>\n",
|
||
" <td>839400131325648896</td>\n",
|
||
" <td>http://twitter.com/2497991305/statuses/8394001...</td>\n",
|
||
" <td>http://bit.ly/2m0OQL7</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2232522</th>\n",
|
||
" <td>243890</td>\n",
|
||
" <td>2497991305</td>\n",
|
||
" <td>AUSTINLOVESBEER</td>\n",
|
||
" <td>Fossilized poop found in 180-million-year-old ...</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>English</td>\n",
|
||
" <td>3/8/2017 8:59</td>\n",
|
||
" <td>3/8/2017 8:59</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>34</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>Right</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>RightTroll</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2497991305</td>\n",
|
||
" <td>839400253413437440</td>\n",
|
||
" <td>http://twitter.com/2497991305/statuses/8394002...</td>\n",
|
||
" <td>http://dailym.ai/2lV5BXf</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 22 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" index external_author_id author \\\n",
|
||
"2232518 243886 2497991305 AUSTINLOVESBEER \n",
|
||
"2232519 243887 2497991305 AUSTINLOVESBEER \n",
|
||
"2232520 243888 2497991305 AUSTINLOVESBEER \n",
|
||
"2232521 243889 2497991305 AUSTINLOVESBEER \n",
|
||
"2232522 243890 2497991305 AUSTINLOVESBEER \n",
|
||
"\n",
|
||
" content region \\\n",
|
||
"2232518 BREAKING: Killer avalanche sweeps three skiers... United States \n",
|
||
"2232519 Why men should support International Women’s D... United States \n",
|
||
"2232520 How we can rebuild trust in a UK divided by in... United States \n",
|
||
"2232521 John Humphrys accused of patronising Angela Ra... United States \n",
|
||
"2232522 Fossilized poop found in 180-million-year-old ... United States \n",
|
||
"\n",
|
||
" language publish_date harvested_date following followers ... \\\n",
|
||
"2232518 English 3/8/2017 8:59 3/8/2017 8:59 41 34 ... \n",
|
||
"2232519 English 3/8/2017 8:59 3/8/2017 9:00 41 34 ... \n",
|
||
"2232520 English 3/8/2017 8:59 3/8/2017 8:59 41 34 ... \n",
|
||
"2232521 English 3/8/2017 8:59 3/8/2017 8:59 41 34 ... \n",
|
||
"2232522 English 3/8/2017 8:59 3/8/2017 8:59 41 34 ... \n",
|
||
"\n",
|
||
" account_type retweet account_category new_june_2018 alt_external_id \\\n",
|
||
"2232518 Right 1 RightTroll 0 2497991305 \n",
|
||
"2232519 Right 1 RightTroll 0 2497991305 \n",
|
||
"2232520 Right 1 RightTroll 0 2497991305 \n",
|
||
"2232521 Right 1 RightTroll 0 2497991305 \n",
|
||
"2232522 Right 1 RightTroll 0 2497991305 \n",
|
||
"\n",
|
||
" tweet_id \\\n",
|
||
"2232518 839400198002503680 \n",
|
||
"2232519 839400290168135680 \n",
|
||
"2232520 839400090582179840 \n",
|
||
"2232521 839400131325648896 \n",
|
||
"2232522 839400253413437440 \n",
|
||
"\n",
|
||
" article_url \\\n",
|
||
"2232518 http://twitter.com/2497991305/statuses/8394001... \n",
|
||
"2232519 http://twitter.com/2497991305/statuses/8394002... \n",
|
||
"2232520 http://twitter.com/2497991305/statuses/8394000... \n",
|
||
"2232521 http://twitter.com/2497991305/statuses/8394001... \n",
|
||
"2232522 http://twitter.com/2497991305/statuses/8394002... \n",
|
||
"\n",
|
||
" tco1_step1 \\\n",
|
||
"2232518 https://twitter.com/Daily_Star/status/83938477... \n",
|
||
"2232519 http://trib.al/xiMs3md \n",
|
||
"2232520 http://trib.al/l3iyCVF \n",
|
||
"2232521 http://bit.ly/2m0OQL7 \n",
|
||
"2232522 http://dailym.ai/2lV5BXf \n",
|
||
"\n",
|
||
" tco2_step1 tco3_step1 \n",
|
||
"2232518 http://bit.ly/2lWNDnt NaN \n",
|
||
"2232519 NaN NaN \n",
|
||
"2232520 NaN NaN \n",
|
||
"2232521 NaN NaN \n",
|
||
"2232522 NaN NaN \n",
|
||
"\n",
|
||
"[5 rows x 22 columns]"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.tail()"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|