bots/ira_eda.ipynb
2024-01-12 09:27:09 -05:00

731 lines
27 KiB
Text
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "16feb8b8-581d-4bec-983a-68b858622696",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "682c6e18-4f1f-454f-88ba-389f999b5974",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"ira1 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_1.csv')\n",
"ira2 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_2.csv')\n",
"ira3 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_3.csv')\n",
"ira4 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_4.csv')\n",
"# ira5 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_5.csv') # has mixed types\n",
"ira6 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_6.csv')\n",
"ira7 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_7.csv')\n",
"ira8 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_8.csv')\n",
"ira9 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_9.csv')\n",
"# ira10 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_10.csv') # has mixed types\n",
"ira11 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_11.csv')\n",
"# ira12 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_12.csv') # has mixed types\n",
"ira13 = pd.read_csv('data/russian-troll-tweets/IRAhandle_tweets_13.csv')\n",
"\n",
"df = pd.concat([ira13,ira11,ira9,ira8,ira7,ira6,ira4,ira3,ira2,ira1])\n",
"df.reset_index(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "194a8799-893b-4418-b145-f24ce111f0f9",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 2232523 entries, 0 to 2232522\n",
"Data columns (total 22 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 index 2232523 non-null int64 \n",
" 1 external_author_id 2232523 non-null int64 \n",
" 2 author 2232523 non-null object\n",
" 3 content 2232522 non-null object\n",
" 4 region 2225360 non-null object\n",
" 5 language 2232523 non-null object\n",
" 6 publish_date 2232523 non-null object\n",
" 7 harvested_date 2232523 non-null object\n",
" 8 following 2232523 non-null int64 \n",
" 9 followers 2232523 non-null int64 \n",
" 10 updates 2232523 non-null int64 \n",
" 11 post_type 1020610 non-null object\n",
" 12 account_type 2232523 non-null object\n",
" 13 retweet 2232523 non-null int64 \n",
" 14 account_category 2232523 non-null object\n",
" 15 new_june_2018 2232523 non-null int64 \n",
" 16 alt_external_id 2232523 non-null int64 \n",
" 17 tweet_id 2232523 non-null int64 \n",
" 18 article_url 2232523 non-null object\n",
" 19 tco1_step1 1608632 non-null object\n",
" 20 tco2_step1 538066 non-null object\n",
" 21 tco3_step1 13690 non-null object\n",
"dtypes: int64(9), object(13)\n",
"memory usage: 374.7+ MB\n"
]
}
],
"source": [
"df.info(show_counts=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "fc58d1a7-6e84-41ec-a616-269d012b106c",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df = df[df.language == 'English']"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "5873887b-f2ea-412f-aa1d-2f2eac02ae96",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 1605873 entries, 11 to 2232522\n",
"Data columns (total 22 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 index 1605873 non-null int64 \n",
" 1 external_author_id 1605873 non-null int64 \n",
" 2 author 1605873 non-null object\n",
" 3 content 1605872 non-null object\n",
" 4 region 1604847 non-null object\n",
" 5 language 1605873 non-null object\n",
" 6 publish_date 1605873 non-null object\n",
" 7 harvested_date 1605873 non-null object\n",
" 8 following 1605873 non-null int64 \n",
" 9 followers 1605873 non-null int64 \n",
" 10 updates 1605873 non-null int64 \n",
" 11 post_type 682199 non-null object\n",
" 12 account_type 1605873 non-null object\n",
" 13 retweet 1605873 non-null int64 \n",
" 14 account_category 1605873 non-null object\n",
" 15 new_june_2018 1605873 non-null int64 \n",
" 16 alt_external_id 1605873 non-null int64 \n",
" 17 tweet_id 1605873 non-null int64 \n",
" 18 article_url 1605873 non-null object\n",
" 19 tco1_step1 1051365 non-null object\n",
" 20 tco2_step1 361124 non-null object\n",
" 21 tco3_step1 12955 non-null object\n",
"dtypes: int64(9), object(13)\n",
"memory usage: 281.8+ MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "9520567b-8886-4e35-a1b4-51be2a2fd26d",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"array(['Russian', 'German', 'Koch', 'Left', 'Right', '?', 'local',\n",
" 'Italian', 'Hashtager', 'Arabic', 'news', 'French', 'Spanish',\n",
" 'Commercial', 'ZAPOROSHIA'], dtype=object)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.account_type.unique()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "83072d1e-60d0-4a1a-9af0-d73030ed6398",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"11 9/15/2015 17:43\n",
"12 9/15/2015 17:55\n",
"32 9/16/2015 8:04\n",
"86 9/20/2015 9:11\n",
"190 9/28/2015 17:58\n",
"Name: publish_date, dtype: object"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.publish_date.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "b0b2e5fa-21fa-4d63-8f47-2d4cf0f79c57",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"2232518 3/8/2017 8:59\n",
"2232519 3/8/2017 8:59\n",
"2232520 3/8/2017 8:59\n",
"2232521 3/8/2017 8:59\n",
"2232522 3/8/2017 8:59\n",
"Name: publish_date, dtype: object"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.publish_date.tail()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "a58504b0-24c2-4da9-8773-49713fb0ece9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>external_author_id</th>\n",
" <th>author</th>\n",
" <th>content</th>\n",
" <th>region</th>\n",
" <th>language</th>\n",
" <th>publish_date</th>\n",
" <th>harvested_date</th>\n",
" <th>following</th>\n",
" <th>followers</th>\n",
" <th>...</th>\n",
" <th>account_type</th>\n",
" <th>retweet</th>\n",
" <th>account_category</th>\n",
" <th>new_june_2018</th>\n",
" <th>alt_external_id</th>\n",
" <th>tweet_id</th>\n",
" <th>article_url</th>\n",
" <th>tco1_step1</th>\n",
" <th>tco2_step1</th>\n",
" <th>tco3_step1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>11</td>\n",
" <td>2589513234</td>\n",
" <td>ZUBOVNIK</td>\n",
" <td>.@McFaul same to you! http://t.co/BbPvCzR0kx</td>\n",
" <td>Unknown</td>\n",
" <td>English</td>\n",
" <td>9/15/2015 17:43</td>\n",
" <td>9/15/2015 17:43</td>\n",
" <td>3559</td>\n",
" <td>22650</td>\n",
" <td>...</td>\n",
" <td>Russian</td>\n",
" <td>0</td>\n",
" <td>NonEnglish</td>\n",
" <td>0</td>\n",
" <td>2589513234</td>\n",
" <td>643842467972968448</td>\n",
" <td>http://twitter.com/zubovnik/statuses/643842467...</td>\n",
" <td>https://twitter.com/zubovnik/status/6438424679...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>12</td>\n",
" <td>2589513234</td>\n",
" <td>ZUBOVNIK</td>\n",
" <td>'@McFaul in twitter?'</td>\n",
" <td>Unknown</td>\n",
" <td>English</td>\n",
" <td>9/15/2015 17:55</td>\n",
" <td>9/15/2015 17:56</td>\n",
" <td>3559</td>\n",
" <td>22650</td>\n",
" <td>...</td>\n",
" <td>Russian</td>\n",
" <td>0</td>\n",
" <td>NonEnglish</td>\n",
" <td>0</td>\n",
" <td>2589513234</td>\n",
" <td>643845691362668544</td>\n",
" <td>http://twitter.com/zubovnik/statuses/643845691...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>32</td>\n",
" <td>2589513234</td>\n",
" <td>ZUBOVNIK</td>\n",
" <td>'@McFaul US stop bombing Zivilisten in Syrien'</td>\n",
" <td>Unknown</td>\n",
" <td>English</td>\n",
" <td>9/16/2015 8:04</td>\n",
" <td>9/16/2015 8:04</td>\n",
" <td>3559</td>\n",
" <td>22659</td>\n",
" <td>...</td>\n",
" <td>Russian</td>\n",
" <td>0</td>\n",
" <td>NonEnglish</td>\n",
" <td>0</td>\n",
" <td>2589513234</td>\n",
" <td>644059347501363200</td>\n",
" <td>http://twitter.com/zubovnik/statuses/644059347...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>86</td>\n",
" <td>2589513234</td>\n",
" <td>ZUBOVNIK</td>\n",
" <td>'@realDonaldTrump @MoeHoward86 @YouTube Good l...</td>\n",
" <td>Unknown</td>\n",
" <td>English</td>\n",
" <td>9/20/2015 9:11</td>\n",
" <td>9/20/2015 9:11</td>\n",
" <td>4089</td>\n",
" <td>22453</td>\n",
" <td>...</td>\n",
" <td>Russian</td>\n",
" <td>0</td>\n",
" <td>NonEnglish</td>\n",
" <td>0</td>\n",
" <td>2589513234</td>\n",
" <td>645525713626529792</td>\n",
" <td>http://twitter.com/zubovnik/statuses/645525713...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>190</th>\n",
" <td>190</td>\n",
" <td>2589513234</td>\n",
" <td>ZUBOVNIK</td>\n",
" <td>Soviet soldiers marching on 1943. Notice the f...</td>\n",
" <td>Unknown</td>\n",
" <td>English</td>\n",
" <td>9/28/2015 17:58</td>\n",
" <td>9/28/2015 17:58</td>\n",
" <td>3964</td>\n",
" <td>22227</td>\n",
" <td>...</td>\n",
" <td>Russian</td>\n",
" <td>1</td>\n",
" <td>NonEnglish</td>\n",
" <td>0</td>\n",
" <td>2589513234</td>\n",
" <td>648557409682763776</td>\n",
" <td>http://twitter.com/zubovnik/statuses/648557409...</td>\n",
" <td>https://twitter.com/MatEvidence/status/6485570...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" index external_author_id author \\\n",
"11 11 2589513234 ZUBOVNIK \n",
"12 12 2589513234 ZUBOVNIK \n",
"32 32 2589513234 ZUBOVNIK \n",
"86 86 2589513234 ZUBOVNIK \n",
"190 190 2589513234 ZUBOVNIK \n",
"\n",
" content region language \\\n",
"11 .@McFaul same to you! http://t.co/BbPvCzR0kx Unknown English \n",
"12 '@McFaul in twitter?' Unknown English \n",
"32 '@McFaul US stop bombing Zivilisten in Syrien' Unknown English \n",
"86 '@realDonaldTrump @MoeHoward86 @YouTube Good l... Unknown English \n",
"190 Soviet soldiers marching on 1943. Notice the f... Unknown English \n",
"\n",
" publish_date harvested_date following followers ... \\\n",
"11 9/15/2015 17:43 9/15/2015 17:43 3559 22650 ... \n",
"12 9/15/2015 17:55 9/15/2015 17:56 3559 22650 ... \n",
"32 9/16/2015 8:04 9/16/2015 8:04 3559 22659 ... \n",
"86 9/20/2015 9:11 9/20/2015 9:11 4089 22453 ... \n",
"190 9/28/2015 17:58 9/28/2015 17:58 3964 22227 ... \n",
"\n",
" account_type retweet account_category new_june_2018 alt_external_id \\\n",
"11 Russian 0 NonEnglish 0 2589513234 \n",
"12 Russian 0 NonEnglish 0 2589513234 \n",
"32 Russian 0 NonEnglish 0 2589513234 \n",
"86 Russian 0 NonEnglish 0 2589513234 \n",
"190 Russian 1 NonEnglish 0 2589513234 \n",
"\n",
" tweet_id article_url \\\n",
"11 643842467972968448 http://twitter.com/zubovnik/statuses/643842467... \n",
"12 643845691362668544 http://twitter.com/zubovnik/statuses/643845691... \n",
"32 644059347501363200 http://twitter.com/zubovnik/statuses/644059347... \n",
"86 645525713626529792 http://twitter.com/zubovnik/statuses/645525713... \n",
"190 648557409682763776 http://twitter.com/zubovnik/statuses/648557409... \n",
"\n",
" tco1_step1 tco2_step1 tco3_step1 \n",
"11 https://twitter.com/zubovnik/status/6438424679... NaN NaN \n",
"12 NaN NaN NaN \n",
"32 NaN NaN NaN \n",
"86 NaN NaN NaN \n",
"190 https://twitter.com/MatEvidence/status/6485570... NaN NaN \n",
"\n",
"[5 rows x 22 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "667ba830-6e51-4cc9-82ff-e73db03e6f0e",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>external_author_id</th>\n",
" <th>author</th>\n",
" <th>content</th>\n",
" <th>region</th>\n",
" <th>language</th>\n",
" <th>publish_date</th>\n",
" <th>harvested_date</th>\n",
" <th>following</th>\n",
" <th>followers</th>\n",
" <th>...</th>\n",
" <th>account_type</th>\n",
" <th>retweet</th>\n",
" <th>account_category</th>\n",
" <th>new_june_2018</th>\n",
" <th>alt_external_id</th>\n",
" <th>tweet_id</th>\n",
" <th>article_url</th>\n",
" <th>tco1_step1</th>\n",
" <th>tco2_step1</th>\n",
" <th>tco3_step1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2232518</th>\n",
" <td>243886</td>\n",
" <td>2497991305</td>\n",
" <td>AUSTINLOVESBEER</td>\n",
" <td>BREAKING: Killer avalanche sweeps three skiers...</td>\n",
" <td>United States</td>\n",
" <td>English</td>\n",
" <td>3/8/2017 8:59</td>\n",
" <td>3/8/2017 8:59</td>\n",
" <td>41</td>\n",
" <td>34</td>\n",
" <td>...</td>\n",
" <td>Right</td>\n",
" <td>1</td>\n",
" <td>RightTroll</td>\n",
" <td>0</td>\n",
" <td>2497991305</td>\n",
" <td>839400198002503680</td>\n",
" <td>http://twitter.com/2497991305/statuses/8394001...</td>\n",
" <td>https://twitter.com/Daily_Star/status/83938477...</td>\n",
" <td>http://bit.ly/2lWNDnt</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2232519</th>\n",
" <td>243887</td>\n",
" <td>2497991305</td>\n",
" <td>AUSTINLOVESBEER</td>\n",
" <td>Why men should support International Womens D...</td>\n",
" <td>United States</td>\n",
" <td>English</td>\n",
" <td>3/8/2017 8:59</td>\n",
" <td>3/8/2017 9:00</td>\n",
" <td>41</td>\n",
" <td>34</td>\n",
" <td>...</td>\n",
" <td>Right</td>\n",
" <td>1</td>\n",
" <td>RightTroll</td>\n",
" <td>0</td>\n",
" <td>2497991305</td>\n",
" <td>839400290168135680</td>\n",
" <td>http://twitter.com/2497991305/statuses/8394002...</td>\n",
" <td>http://trib.al/xiMs3md</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2232520</th>\n",
" <td>243888</td>\n",
" <td>2497991305</td>\n",
" <td>AUSTINLOVESBEER</td>\n",
" <td>How we can rebuild trust in a UK divided by in...</td>\n",
" <td>United States</td>\n",
" <td>English</td>\n",
" <td>3/8/2017 8:59</td>\n",
" <td>3/8/2017 8:59</td>\n",
" <td>41</td>\n",
" <td>34</td>\n",
" <td>...</td>\n",
" <td>Right</td>\n",
" <td>1</td>\n",
" <td>RightTroll</td>\n",
" <td>0</td>\n",
" <td>2497991305</td>\n",
" <td>839400090582179840</td>\n",
" <td>http://twitter.com/2497991305/statuses/8394000...</td>\n",
" <td>http://trib.al/l3iyCVF</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2232521</th>\n",
" <td>243889</td>\n",
" <td>2497991305</td>\n",
" <td>AUSTINLOVESBEER</td>\n",
" <td>John Humphrys accused of patronising Angela Ra...</td>\n",
" <td>United States</td>\n",
" <td>English</td>\n",
" <td>3/8/2017 8:59</td>\n",
" <td>3/8/2017 8:59</td>\n",
" <td>41</td>\n",
" <td>34</td>\n",
" <td>...</td>\n",
" <td>Right</td>\n",
" <td>1</td>\n",
" <td>RightTroll</td>\n",
" <td>0</td>\n",
" <td>2497991305</td>\n",
" <td>839400131325648896</td>\n",
" <td>http://twitter.com/2497991305/statuses/8394001...</td>\n",
" <td>http://bit.ly/2m0OQL7</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2232522</th>\n",
" <td>243890</td>\n",
" <td>2497991305</td>\n",
" <td>AUSTINLOVESBEER</td>\n",
" <td>Fossilized poop found in 180-million-year-old ...</td>\n",
" <td>United States</td>\n",
" <td>English</td>\n",
" <td>3/8/2017 8:59</td>\n",
" <td>3/8/2017 8:59</td>\n",
" <td>41</td>\n",
" <td>34</td>\n",
" <td>...</td>\n",
" <td>Right</td>\n",
" <td>1</td>\n",
" <td>RightTroll</td>\n",
" <td>0</td>\n",
" <td>2497991305</td>\n",
" <td>839400253413437440</td>\n",
" <td>http://twitter.com/2497991305/statuses/8394002...</td>\n",
" <td>http://dailym.ai/2lV5BXf</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" index external_author_id author \\\n",
"2232518 243886 2497991305 AUSTINLOVESBEER \n",
"2232519 243887 2497991305 AUSTINLOVESBEER \n",
"2232520 243888 2497991305 AUSTINLOVESBEER \n",
"2232521 243889 2497991305 AUSTINLOVESBEER \n",
"2232522 243890 2497991305 AUSTINLOVESBEER \n",
"\n",
" content region \\\n",
"2232518 BREAKING: Killer avalanche sweeps three skiers... United States \n",
"2232519 Why men should support International Womens D... United States \n",
"2232520 How we can rebuild trust in a UK divided by in... United States \n",
"2232521 John Humphrys accused of patronising Angela Ra... United States \n",
"2232522 Fossilized poop found in 180-million-year-old ... United States \n",
"\n",
" language publish_date harvested_date following followers ... \\\n",
"2232518 English 3/8/2017 8:59 3/8/2017 8:59 41 34 ... \n",
"2232519 English 3/8/2017 8:59 3/8/2017 9:00 41 34 ... \n",
"2232520 English 3/8/2017 8:59 3/8/2017 8:59 41 34 ... \n",
"2232521 English 3/8/2017 8:59 3/8/2017 8:59 41 34 ... \n",
"2232522 English 3/8/2017 8:59 3/8/2017 8:59 41 34 ... \n",
"\n",
" account_type retweet account_category new_june_2018 alt_external_id \\\n",
"2232518 Right 1 RightTroll 0 2497991305 \n",
"2232519 Right 1 RightTroll 0 2497991305 \n",
"2232520 Right 1 RightTroll 0 2497991305 \n",
"2232521 Right 1 RightTroll 0 2497991305 \n",
"2232522 Right 1 RightTroll 0 2497991305 \n",
"\n",
" tweet_id \\\n",
"2232518 839400198002503680 \n",
"2232519 839400290168135680 \n",
"2232520 839400090582179840 \n",
"2232521 839400131325648896 \n",
"2232522 839400253413437440 \n",
"\n",
" article_url \\\n",
"2232518 http://twitter.com/2497991305/statuses/8394001... \n",
"2232519 http://twitter.com/2497991305/statuses/8394002... \n",
"2232520 http://twitter.com/2497991305/statuses/8394000... \n",
"2232521 http://twitter.com/2497991305/statuses/8394001... \n",
"2232522 http://twitter.com/2497991305/statuses/8394002... \n",
"\n",
" tco1_step1 \\\n",
"2232518 https://twitter.com/Daily_Star/status/83938477... \n",
"2232519 http://trib.al/xiMs3md \n",
"2232520 http://trib.al/l3iyCVF \n",
"2232521 http://bit.ly/2m0OQL7 \n",
"2232522 http://dailym.ai/2lV5BXf \n",
"\n",
" tco2_step1 tco3_step1 \n",
"2232518 http://bit.ly/2lWNDnt NaN \n",
"2232519 NaN NaN \n",
"2232520 NaN NaN \n",
"2232521 NaN NaN \n",
"2232522 NaN NaN \n",
"\n",
"[5 rows x 22 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}