from IPython.display import HTML
HTML('''<button type="button" class="btn btn-outline-danger" onclick="codeToggle();">Toggle Code</button>''')
import warnings
warnings.filterwarnings('ignore')
import os
import random
import re
import string
import tabulate
from collections import Counter
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import rc
rc('animation', html='jshtml')
import seaborn as sns
import re, os, string
from sklearn.feature_extraction.text import TfidfVectorizer
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk
nltk.download('punkt')
nltk.download("stopwords")
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import silhouette_samples, silhouette_score
from sklearn.decomposition import PCA
import networkx as nx
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
SEED = 42
random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
np.random.seed(SEED)
import plotly
plotly.offline.init_notebook_mode()
papers_df = pd.read_csv('data/new_papers.csv')
papers_df.head()
| Conference | Year | Title | Author | Affiliation | |
|---|---|---|---|---|---|
| 0 | NeurIPS | 2006 | Attentional Processing on a Spike-Based VLSI N... | Yingxue Wang | Swiss Federal Institute of Technology, Zurich |
| 1 | NeurIPS | 2006 | Attentional Processing on a Spike-Based VLSI N... | Rodney J Douglas | Institute of Neuroinformatics |
| 2 | NeurIPS | 2006 | Attentional Processing on a Spike-Based VLSI N... | Shih-Chii Liu | Institute for Neuroinformatics, University of ... |
| 3 | NeurIPS | 2006 | Multi-Task Feature Learning | Andreas Argyriou | Ecole Centrale de Paris |
| 4 | NeurIPS | 2006 | Multi-Task Feature Learning | Theos Evgeniou | INSEAD |
The analysis is based on two sets of dataset.
The first dataset is papers.csv
[source]
. The Conference and Workshop on Neural Information Processing Systems (abbreviated as NeurIPS and formerly NIPS) is an yearly flagship conference, held every December, on Machine Learning (ML) and neural computation.
The dataset is a compilation of NeurIPS Papers published since 1987-2019. It contains the
year of publication
,
title
,
author details
,
abstract
, and
full text
from these years and is publicly avaiable for various data science tasks.
The second dataset is new_papers.csv
[source]
. International Conference on Machine Learning (ICML), The Conference and Workshop on Neural Information Processing Systems (NeurIPS) and The International Conference on Learning Representations (ICLR) are the top three premiere conferences on Artificial Intelligence (AI) and Machine Learning (ML).The dataset consists of the
Conference
,
Year
,
Title
,
Author
and,
Affiliation
of the papers published in these conferences from the below years:
# Visualising the data
fig = px.scatter_3d(papers_df.sample(n=100), x='Year', y='Conference', z='Author',
color='Affiliation', template="plotly_dark")
# fig.update_layout(margin={"r":0,"t":10,"l":00,"b":100})
fig.show()
A sample of 100 points is taken from the dataset for the visualisation. Each row in our dataset represents a unique paper published in a particular year. The attributes in the dataset used are: [Conference, Year, Title, Author, Affiliation] . To visualise the data in a 3D perspective, each paper is identified by the attributes: [Conference, Year, Author] . For a research paper having the same attributes, color is used to distinguish between such instances. Therefore, each point in space indicating a published paper, is presented as:
# get the list of the top 10 universities till now
dict_ = {}
for insti in papers_df['Affiliation']:
insti = str(insti)
if insti == 'None':
continue
if insti not in dict_.keys():
dict_[insti] = (papers_df.Affiliation == insti).sum()
sorted_by_value=(sorted(dict_.items(), key=lambda item: item[1], reverse=True))
top_10_institutes = []
for tuple_ in sorted_by_value[:10]:
top_10_institutes.append(tuple_[0])
# get a dictionary which outputs the list of number of papers published by top 10 universities in a year x:
y_paper_count_top_10 = {}
for year in range(2006,2022, 1):
y_paper_count_top_10[year] = []
temp_df = papers_df[papers_df.Year == year]
for insti in top_10_institutes:
count = (temp_df.Affiliation==insti).sum()
y_paper_count_top_10[year].append(count)
# Now we need to plot the growth on a bar chart for the institutes
# we will have 8 points : 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020, 2021
import plotly.graph_objects as go
# Create figure
fig = go.Figure()
# Add traces, one for each slider step
for step in np.arange(2006, 2021, 1):
fig.add_trace(
go.Bar(
visible=False,
# line=dict(color="#00CED1", width=6),
# name="𝜈 = " + str(step),
x = top_10_institutes,
y = y_paper_count_top_10[step],
name = "Year="+str(step),
# color = 'rgb(255,0,0)',
))
# Make 10th trace visible
fig.data[0].visible = True
# Create and add slider
steps = []
for i in range(len(fig.data)):
step = dict(
method="update",
args=[{"visible": [False] * len(fig.data)},
{"title": "Slider switched to Year: " + str(2006+i)}], # layout attribute
label = str(2006+i*1),
)
step["args"][0]["visible"][i] = True # Toggle i'th trace to "visible"
steps.append(step)
sliders = [dict(
active=0,
currentvalue={"prefix": "Trend For Year: ", },
pad={"t": 80},
steps=steps
)]
fig.update_layout(
sliders=sliders
)
fig.show()
The above interactive slider displays the contribution of top 10 organisations in terms of number of research papers published (centered around machine-based learning) in a certain year.
The notion of 'top organisations' in our analysis is subject to the cummulative number of papers published over the years. For our dataset available till only the year 2021, we have retrieved the top 10 organisations based on the number of papers published in the duration: 2006-2021. However, due to methodological constraints, the data corresponding to the year 2021 is not complete.
We analyse the number of research papers published to abstract the contribution of certain organisations in the mentioned field. Charting such a trend helps to analyse the behaviour of the current top 10 institutes over the years. For example, DeepMind , a current top institute in the research field of Deep Learning had no publications in thye year 2006 . However, as the demand and attention towards Deep Learning increased after 2014, the number of publications sore to 301 in the year 2019 . This paradigm shift was a response to the growing demand for faster algorithms for deep learning and related techniques after the year 2014.
Interestingly, the year 2020 witnessed the highest number of cumumulative research papers across the top 10 affiliated institutes. The trend explains the boost in current demand of the technology as every manufacturing sector depends on machine-based learning to increase responsiveness.
The growth this 'growth' trend spoken-of numerous times is evident as the average number of publications increases tremedously from 25 in the year 2014 to about 290 in the year 2020 .
papers_df
| Conference | Year | Title | Author | Affiliation | |
|---|---|---|---|---|---|
| 0 | NeurIPS | 2006 | Attentional Processing on a Spike-Based VLSI N... | Yingxue Wang | Swiss Federal Institute of Technology, Zurich |
| 1 | NeurIPS | 2006 | Attentional Processing on a Spike-Based VLSI N... | Rodney J Douglas | Institute of Neuroinformatics |
| 2 | NeurIPS | 2006 | Attentional Processing on a Spike-Based VLSI N... | Shih-Chii Liu | Institute for Neuroinformatics, University of ... |
| 3 | NeurIPS | 2006 | Multi-Task Feature Learning | Andreas Argyriou | Ecole Centrale de Paris |
| 4 | NeurIPS | 2006 | Multi-Task Feature Learning | Theos Evgeniou | INSEAD |
| ... | ... | ... | ... | ... | ... |
| 49312 | ICLR | 2021 | Self-supervised Representation Learning with R... | Martin Q Ma | School of Computer Science, Carnegie Mellon Un... |
| 49313 | ICLR | 2021 | Self-supervised Representation Learning with R... | Muqiao Yang | School of Computer Science, Carnegie Mellon Un... |
| 49314 | ICLR | 2021 | Self-supervised Representation Learning with R... | Han Zhao | University of Illinois, Urbana Champaign |
| 49315 | ICLR | 2021 | Self-supervised Representation Learning with R... | LP Morency | Carnegie Mellon University |
| 49316 | ICLR | 2021 | Self-supervised Representation Learning with R... | Ruslan Salakhutdinov | Carnegie-Mellon University |
49317 rows × 5 columns
# Trend for author count every year
# First we get author list for one year, then we see if it was in the past year or not.
# Assumption is that once we seen an author has published in some year, then that author remains active subsequent years.
# This trend is increasing (either constant or increases)
# List of year in dataset:
year_list = sorted(list(set(papers_df["Year"])))
dict_year_author = {}
dict_year_author_cumu = {}
for year in year_list:
dict_year_author[str(year)] = list(set(papers_df.loc[papers_df["Year"]==year]["Author"]))
dict_year_author_cumu[str(year_list[0])] = dict_year_author[str(year_list[0])]
for i in range(1,len(year_list)):
key_ = str(year_list[i])
prev_key = str(year_list[i-1])
dict_year_author_cumu[str(key_)] = list(set(dict_year_author[key_] + dict_year_author_cumu[prev_key]))
# Plotting trend for Authors publishing every year:
x_author = list(dict_year_author.keys())
y_author = [len(dict_year_author[key_]) for key_ in x_author]
fig = px.bar(x=x_author, y =y_author, title= "Number of Authors Publishing Each Year", color = y_author,labels={"y" :"Number of Authors", "x": "Year"})
fig.show()
Machine learning and Artifical Intelligence are heavily research driven topics. The surge of these research-oriented fields in an interval can be mapped to quantification of research papers published. There are two components to this quantity analysis:
Number of authors publishing each year: We represent how many unique authors have published in a particular year. It should be pointed out that this statistic is different from publications per year because an author may publish 20 papers a year. But having 20 different authors publishing some research paper implies the relevance and rise of interest in the field in the research community.
Number of authors active each year: The term active is used here to represent authors who have published in any of the previous year(s). The assumption is based on the probability that an author might be cited in a research paper in the subsequent years. This implies that once an author gets a publication, he remains active for each subsequent year. This quantity quantifies how many authors are added in the research fields or get interested in the researcg every year. It is worth noting that it is a different metric than the one pointed above. This statistic helps us to understand how many new authors are publishing or have published each year on average. While the former represents how many authors get interested in research each year.
# Plotting trend for Active authors added new every year:
x_author_cumu = list(dict_year_author_cumu.keys())
y_author_cumu = [len(dict_year_author_cumu[key_]) for key_ in x_author_cumu]
fig = px.bar(x=x_author_cumu, y =y_author_cumu, title= "Number of Authors Active Each Year (Cummulative)", color = y_author_cumu,
labels={"y" :"Number of Unique Authors", "x": "Year"}
)
fig.show()
For each year, the value on the y-axis represents the number of unqiue authors that have published in the particular year or in of the any previous years i.e. the sixth bar gives the count of all the unique authors (set of authors) who have published papers since 2006 up till 2011. Therefore, to get the number of new authors being added each year, we just subtract the value for the previous year from the current year. For example, to get the number of new authors who publish in the year 2011 , we just subtract the value of number of active authors in 2011 and number of active authors in 2010 . That is: number of new authors added in the year 2011 are:
Doing this for every year, we get the
plot for new authors added each year
. Note that the count of new authors for 2021 is less since the dataset doesn't capture all the publications of 2021.
# New authors added each year
new_author_year = [y_author_cumu[0]]
for i in range(1,len(y_author_cumu)):
new_author_year.append(y_author_cumu[i]-y_author_cumu[i-1])
fig = px.bar(x=x_author_cumu, y =new_author_year, title= "Number of Authors Added In a Year", color = new_author_year,
labels={"y" :"Number of New Authors", "x": "Year"}
)
fig.show()
Dataset:
The dataset contains all paper titles, authors and their affiliations from the years
ICML Conference: 2017-2020
NeurIPS Conference: 2006-2020
ICLR Conference: 2018-2021 (except 2020)
This is a distribution of the top 10 authors who published maximum number of papers since 2006 (upto 2021). Sergey Levine, Pieter Abbeel and Michael Jordan are associated with UC Berkely, Yoshua with University of Montreal and Lawrence Carin with Duke University. According to current treds, America is leading research in AI and ML in terms of number of papers are published. From the above distribution, it can be observed that six out of ten authors are associated with a USA institute which justifies the current trends.
papers = pd.read_csv('data/new_papers.csv')
papers = papers.drop_duplicates()
ath = []
val = []
for name, pub in papers.Author.value_counts().head(10).iteritems():
ath.append(name)
val.append(pub)
df = pd.DataFrame(list(zip(ath, val)),columns =['Authors', 'Papers'])
fig = px.bar(df, x="Authors", y="Papers", color='Authors')
fig.update_layout(title_text = "Authors publishing maximum number of papers",title_x=0.5)
fig.show()
Dataset:
The dataset contains all paper titles, authors and their affiliations from the years
ICML Conference: 2017-2020
NeurIPS Conference: 2006-2020
ICLR Conference: 2018-2021 (except 2020)
This is a plot of total number of papers published vs how many authors published those many papers. The y axis is plotted in logarithmic scale for better interpretation. Each colour represents differet number of papers. All the authors' performance can be considered independent over here. As observed, this doesn't follow the Central Limit Theorem and as the paper count increases, frequency of authors decreases drastically.
author_counts = papers.value_counts('Author').rename_axis('author_name').reset_index(name='paper_count')
fig = px.histogram(author_counts, x="paper_count", nbins = 60, color='paper_count', log_y=True,
labels={"y": "Frequency (log scale)", "paper_count": "Paper Count"}
)
fig.update_layout(title_text = "Plot of total papers published by authors vs frequency (Log Scale)",title_x=0.5)
fig.show()
# Now we try to answer the question in top 5% author, how many of them collab with each other
top_author_collab = pd.read_csv('data/new_papers.csv')
top_author_collab = top_author_collab.drop_duplicates()
author = []
value = []
# for name, pub in top_author_collab.Author.value_counts().head(len(top_author_collab)//100).iteritems():
for name, pub in top_author_collab.Author.value_counts().head(50).iteritems():
author.append(name)
value.append(pub)
top_author_collab = top_author_collab.loc[top_author_collab["Author"].isin(author)].copy()
top_author_collab.sample(5)
| Conference | Year | Title | Author | Affiliation | |
|---|---|---|---|---|---|
| 13815 | NeurIPS | 2017 | Stochastic Approximation for Canonical Correla... | Nati Srebro | TTI-Chicago |
| 23555 | ICLR | 2019 | L-Shapley and C-Shapley: Efficient Model Inter... | Le Song | Ant Financial & Georgia Institute of Technology |
| 40349 | NeurIPS | 2020 | Robust Optimization for Fairness with Noisy Pr... | Michael Jordan | UC Berkeley |
| 42210 | NeurIPS | 2020 | Coresets via Bilevel Optimization for Continua... | Andreas Krause | ETH Zurich |
| 7914 | NeurIPS | 2014 | Recurrent Models of Visual Attention | Nicolas Heess | Google DeepMind |
new_df = top_author_collab[['Conference','Year','Title', 'Author']].groupby(['Conference','Year','Title'])
Authors_collab = []
key_seen = []
for i in top_author_collab.index:
curr_key = tuple(top_author_collab.loc[i,['Conference','Year','Title']])
if curr_key not in key_seen:
key_seen.append(curr_key)
lst_author = list(new_df.get_group(curr_key).Author)
lst_author.sort()
if lst_author not in Authors_collab:
Authors_collab.append(lst_author)
# Author collab stores all the edges
node_list = list(set(top_author_collab.Author))
edge_list = [i for i in Authors_collab if len(i)>=2]
# creating the graph
import networkx as nx
import matplotlib.pyplot as plt
import itertools
from pyvis.network import Network
# creating notebook
net = Network(notebook = False)
class GraphVisualization:
def __init__(self):
G = None
self.visual = []
# addEdge function inputs the vertices of an
# edge and appends it to the visual list
def addEdges(self, list_edges):
to_add = list(itertools.permutations(list_edges, 2))
for i in to_add:
self.visual.append(list(i))
# In visualize function G is an object of
# class Graph given by networkx G.add_edges_from(visual)
# creates a graph with a given list
# nx.draw_networkx(G) - plots the graph
# plt.show() - displays the graph
def visualize(self):
self.G = nx.Graph()
self.G.add_edges_from(self.visual)
# nx.draw_spring(G)
pos = nx.random_layout(self.G)
nx.draw(self.G, pos , with_labels = True, width=0.4,
node_color='lightblue', node_size=200)
plt.figure(figsize=(13,13), dpi=300)
def getGraphObj(self):
self.G = nx.Graph()
self.G.add_edges_from(self.visual)
return self.G
Graph = GraphVisualization()
for i in edge_list:
Graph.addEdges(i)
From our dataset, we picked up the list of top 50 authors. The authors were chosen on the basis of number of publications given in the dataset corresponding to them. A collaboration between authors is an event which occurs when two authors have the worked or contributed on the same paper. Since each paper is represented as: , our task reduces to finding the set of authors having the same value of conference and year attribute. We grouped the dataset on the basis of ['conference','year','title'] and used the corresponding list of authors.
## VISUALISING USING PYVIS
nx_graph = Graph.getGraphObj()
net.from_nx(nx_graph, show_edge_weights=False)
net.width = '1000px'
net.show("../_html/graph.html")
from IPython.display import HTML
HTML(''' <button > <a style = "text-decoration: none; font-weight: 800;" href="https://ai-ml-growth-data-analysis.netlify.app/graph.html" target="_blank">SHOW COLLABORATION NETWORK </a></button>
''')
On clicking the SHOW COLLABORATION NETWORK button, a new tab opens with the graph output.
The output is an interactive, unweighted graph with nodes and edges. Each node corresponds to an author, and an edge exists between two authors if they have ever collaborated in any research paper.
Zoom-in or out and drag the nodes to leverage the interactivity :)
print("The author having maximum unqiue collaboration is: ", sorted(Graph.G.degree, key=lambda x: x[1], reverse=True)[0][0], end="")
print(" with ",sorted(Graph.G.degree, key=lambda x: x[1], reverse=True)[0][1], " collaborations")
The author having maximum unqiue collaboration is: Le Song with 8 collaborations
PUNCTUATION = """!"#$%&'()*+,-./:;<=>?@[\]^_ *{|}~"""
TOP_K_KEYWORDS = 10 # top k number of keywords to retrieve in a ranked document
STOPWORD_PATH = 'data/stopwords_ls.txt'
PAPERS_PATH = 'data/papers.csv'
Note: To run the below cells, unzip papers.csv.gz present in the data folder and rename it as papers.csv
def get_sw_lst(path):
with open(path,'r') as f:
listl=[]
for line in f:
strip_lines=line.strip()
listli=strip_lines.split()
listl += listli
return listl
def clean_text(text):
"""Doc cleaning"""
# Lowering text
text = text.lower()
# Removing punctuation
text = "".join([c for c in text if c not in PUNCTUATION])
# Removing whitespace and newlines
text = re.sub('\s+',' ',text)
return text
def sort_coo(coo_matrix):
"""Sort a dict with highest score"""
tuples = zip(coo_matrix.col, coo_matrix.data)
return sorted(tuples, key=lambda x: (x[1], x[0]), reverse=True)
def extract_topn_from_vector(feature_names, sorted_items, topn=10):
"""get the feature names and tf-idf score of top n items"""
#use only topn items from vector
sorted_items = sorted_items[:topn]
score_vals = []
feature_vals = []
# word index and corresponding tf-idf score
for idx, score in sorted_items:
#keep track of feature name and its corresponding score
score_vals.append(round(score, 3))
feature_vals.append(feature_names[idx])
#create a tuples of feature, score
results= {}
for idx in range(len(feature_vals)):
results[feature_vals[idx]]=score_vals[idx]
return results
def get_keywords(vectorizer, feature_names, doc):
"""Return top k keywords from a doc using TF-IDF method"""
#generate tf-idf for the given document
tf_idf_vector = vectorizer.transform([doc])
#sort the tf-idf vectors by descending order of scores
sorted_items=sort_coo(tf_idf_vector.tocoo())
#extract only TOP_K_KEYWORDS
keywords=extract_topn_from_vector(feature_names,sorted_items,TOP_K_KEYWORDS)
return list(keywords.keys())
data = pd.read_csv(PAPERS_PATH)
data.head()
| source_id | year | title | abstract | full_text | |
|---|---|---|---|---|---|
| 0 | 27 | 1987 | Bit-Serial Neural Networks | NaN | 573 \n\nBIT - SERIAL NEURAL NETWORKS \n\nAlan... |
| 1 | 63 | 1987 | Connectivity Versus Entropy | NaN | 1 \n\nCONNECTIVITY VERSUS ENTROPY \n\nYaser S... |
| 2 | 60 | 1987 | The Hopfield Model with Multi-Level Neurons | NaN | 278 \n\nTHE HOPFIELD MODEL WITH MUL TI-LEVEL N... |
| 3 | 59 | 1987 | How Neural Nets Work | NaN | 442 \n\nAlan Lapedes \nRobert Farber \n\nThe... |
| 4 | 69 | 1987 | Spatial Organization of Neural Networks: A Pro... | NaN | 740 \n\nSPATIAL ORGANIZATION OF NEURAL NEn... |
data.dropna(subset=['full_text'], inplace=True)
data['full_text'] = data['full_text'].apply(clean_text)
data.head()
| source_id | year | title | abstract | full_text | |
|---|---|---|---|---|---|
| 0 | 27 | 1987 | Bit-Serial Neural Networks | NaN | 573 bitserialneuralnetworks alanfmurrayanthony... |
| 1 | 63 | 1987 | Connectivity Versus Entropy | NaN | 1 connectivityversusentropy yasersabumostafa c... |
| 2 | 60 | 1987 | The Hopfield Model with Multi-Level Neurons | NaN | 278 thehopfieldmodelwithmultilevelneurons mich... |
| 3 | 59 | 1987 | How Neural Nets Work | NaN | 442 alanlapedes robertfarber theoreticaldivisi... |
| 4 | 69 | 1987 | Spatial Organization of Neural Networks: A Pro... | NaN | 740 spatialorganizationofneuralnenorks aprobab... |
test = data.groupby('year').sample(n= 25, random_state=25)
train = data.drop(test.index, axis = 0)
corpora = train['full_text'].to_list()
# get the stop words list
stopwords=get_sw_lst(STOPWORD_PATH)
# Initializing TF-IDF Vectorizer with the given stopword list
vectorizer = TfidfVectorizer(stop_words=stopwords, smooth_idf=True, use_idf=True)
# Creating vocabulary with our corpora
vectorizer.fit_transform(corpora)
# Storing the created vocabulary
feature_names = vectorizer.get_feature_names()
result = []
i = 0
test_lst = test['full_text'].to_list()
yrs = test.year.to_list()
word_data = {}
# Associating the top 10 keywords from each paper and their years
for doc in test_lst:
df = {}
df['full_text'] = doc
df['year'] = yrs[i]
wordsl = get_keywords(vectorizer, feature_names, doc)
df['top_keywords'] = wordsl
result.append(df)
if(yrs[i] in word_data):
word_data[yrs[i]] += wordsl
else:
word_data[yrs[i]] = wordsl
i = i+1
final = pd.DataFrame(result)
# importing all necessary modules
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
for key,val in word_data.items():
words_string=(" ").join(val)
wordcloud = WordCloud(width = 1000, height = 500).generate(words_string)
plt.figure(figsize=(15,8))
plt.imshow(wordcloud)
# plt.title('Word Cloud for the year {}'.format(str(i)))
plt.axis("off")
plt.savefig("assets/wc_{}.png".format(key), bbox_inches='tight')
# plt.show()
plt.close()
import imageio
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
for i in range(1987,2020):
x = 'assets/wc_{}.png'.format(i)
img = mpimg.imread(x)
plt.figure(figsize=(15,8), facecolor='black')
# plt.imshow(img)
plt.axis("off")
plt.title('Word Cloud for the year {}'.format(str(i)), color = 'white')
plt.imshow(img)
plt.savefig("assets/NTwc_{}.png".format(i), bbox_inches='tight')
images = {}
for year in range(1987, 2020):
x = './assets/NTwc_{}.png'.format(year)
images[x] = str(year)
new_images = []
for filename in list(images.keys()):
new_images.append(imageio.imread(filename))
imageio.mimsave('./assets/gif/movie.gif', new_images, duration = 2)
from IPython.display import Image
with open('./assets/gif/movie.gif','rb') as f:
display(Image(data=f.read(), format='png'))