import matplotlib.pyplot as plt from nltk.text import Text from nltk import pos_tag from nltk import word_tokenize import numpy as np import re # JOHN HUGHES PROJECT PYTHON #2 - Visualizations # takes in each txt file with open("ferris.txt",'r') as file: ferris=file.read() with open("sixteen.txt",'r') as file: sixteen=file.read() with open("breakfast.txt",'r') as file: breakfast=file.read() # tokenizes and creates a text object with each script ferris1 = word_tokenize(ferris) text_ferris1 = Text(ferris1) sixteen1 = word_tokenize(sixteen) text_sixteen1 = Text(sixteen1) breakfast1 = word_tokenize(breakfast) text_breakfast1 = Text(breakfast1) # filters out punctuation errors in word sets ferris2 = re.findall(r"\b\w+(?:'\w+)?\b", ferris) breakfast2 = re.findall(r"\b\w+(?:'\w+)?\b", breakfast) sixteen2 = re.findall(r"\b\w+(?:'\w+)?\b", sixteen) text_ferris2 = Text(ferris2) text_breakfast2 = Text(breakfast2) text_sixteen2 = Text(sixteen2) # tags words for parts of speech (pos_tag library) tagged_ferris = pos_tag(ferris2) tagged_breakfast = pos_tag(breakfast2) tagged_sixteen = pos_tag(sixteen2) # tags verbs (VB) ferrisverbs = [word for word, tag in tagged_ferris if tag.startswith('VB')] sixteenverbs = [word for word, tag in tagged_sixteen if tag.startswith('VB')] breakfastverbs = [word for word, tag in tagged_breakfast if tag.startswith('VB')] # tags adjectives (JJ) ferrisadj = [word for word, tag in tagged_ferris if tag.startswith('JJ')] sixteenadj = [word for word, tag in tagged_sixteen if tag.startswith('JJ')] breakfastadj = [word for word, tag in tagged_breakfast if tag.startswith('JJ')] # tags prepositions (IN) ferrisprep = [word for word, tag in tagged_ferris if tag.startswith('IN')] sixteenprep = [word for word, tag in tagged_sixteen if tag.startswith('IN')] breakfastprep = [word for word, tag in tagged_breakfast if tag.startswith('IN')] # Lexical Dispersion for words pertaining to emotion in the three movies emotion_words = [ 'love', 'hate', 'anger', 'joy', 'fear', 'sadness', 'happiness', 'hope', 'desire', 'regret', 'frustration', 'guilt', 'relief', 'loneliness', 'excitement', 'confusion', 'ashamed', 'pride', 'envy', 'guilt', 'disappointment', 'embarrassment', 'jealousy', 'grief', 'euphoria', 'sympathy', 'serenity', 'anxiety', 'contentment' ] # counting INT/EXT locations in each movie ferrisEXT = 'EXT.' ferrisINT = 'INT.' ferris_int_count = ferris.count(ferrisINT) ferris_ext_count = ferris.count(ferrisEXT) breakfastEXT = 'EXT.' breakfastINT = 'INT.' breakfast_int_count = breakfast.count(breakfastINT) breakfast_ext_count = breakfast.count(breakfastEXT) sixteenEXT = 'EXT.' sixteenINT = 'INT.' sixteen_int_count = sixteen.count(sixteenINT) sixteen_ext_count = sixteen.count(sixteenEXT) # created color sets colors1 = ['#FF6EC7', '#00FFFF'] colors2 = ['#FF00FF', '#98FF98'] colors3 = ['#FFFF66', '#9B30FF'] colors4 = ['#006C5B','#FF6B00'] # pie charts comparing INT/EXT locations in each movie plt.figure(1) plt.title('Interior vs. Exterior Locations in Ferris Bueller', fontweight='bold') y = np.array([ferris_int_count, ferris_ext_count]) loclabels = ["Interior Locations", "Exterior Locations"] plt.pie(y, labels = loclabels, colors=colors1, wedgeprops={'edgecolor': 'orange', 'linewidth': 1.5}) plt.figure(2) plt.title('Interior vs. Exterior Locations in Breakfast Club', fontweight='bold') y = np.array([breakfast_int_count, breakfast_ext_count]) loclabels = ["Interior Locations", "Exterior Locations"] plt.pie(y, labels = loclabels, colors=colors2, wedgeprops={'edgecolor': 'red', 'linewidth': 1.5}) plt.figure(3) plt.title('Interior vs. Exterior Locations in Sixteen Candles', fontweight='bold') y = np.array([sixteen_int_count, sixteen_ext_count]) loclabels = ["Interior Locations", "Exterior Locations"] plt.pie(y, labels = loclabels, colors=colors3, wedgeprops={'edgecolor': 'green', 'linewidth': 1.5}) # bar graph of character mentions in Breakfast Club specifically breakfast_characters = ['Allison', 'Andrew', 'Vernon', 'Bender', 'Brian', 'Claire'] name_counts = {name: breakfast1.count(name) for name in breakfast_characters} plt.figure(4) plt.bar(name_counts.keys(), name_counts.values(), color=colors2, edgecolor='black', linewidth=2) plt.xlabel('Characters') plt.ylabel('Screenplay Mentions') plt.title('Frequency of Main Character Mentions in Breakfast Club', fontweight='bold') # using regex, found : # lines of dialogue by Ferris ferrisLines = 298 # Ferris lines spoken to the camera ferris_fourthWallLines = 39 # bar graph of Ferris parts of speech movies = ['Verbs', 'Adjectives', 'Prepositions'] frequencies1 = [len(ferrisverbs), len(ferrisadj), len(ferrisprep)] plt.figure(5) plt.bar(movies, frequencies1, color='#FF8C42', edgecolor='black', linewidth=2) plt.xlabel('Part of Speech') plt.ylabel('Frequencies') plt.title('Part of Speech Use in Ferris Bueller', fontweight='bold') # bar graph of Breakfast Club parts of speech frequencies2 = [len(breakfastverbs), len(breakfastadj), len(breakfastprep)] plt.figure(6) plt.bar(movies, frequencies2, color='#D46A6A', edgecolor='black', linewidth=2) plt.xlabel('Part of Speech') plt.ylabel('Frequencies') plt.title('Part of Speech Use in Breakfast Club', fontweight='bold') # bar graph of Sixteen Candles parts of speech frequencies3 = [len(sixteenverbs), len(sixteenadj), len(sixteenprep)] plt.figure(7) plt.bar(movies, frequencies3, color='#F5DEB3', edgecolor='black', linewidth=2) plt.xlabel('Part of Speech') plt.ylabel('Frequencies') plt.title('Part of Speech Use in Sixteen Candles', fontweight='bold') # dialogue counts in breakfast club brian_d = 121 andrew_d = 140 claire_d = 154 allison_d = 71 bender_d = 213 vernon_d = 87 carl_d = 21 misc_d = 8 # breakfast dialogue total = 815 breakfast_totald = (brian_d + andrew_d + claire_d + allison_d + bender_d + vernon_d + carl_d + misc_d) # Breakfast Club dialogue pie chart plt.figure(8) plt.title('Breakfast Club: Dialogue Breakdown', fontweight='bold') y = np.array([brian_d, andrew_d, claire_d, allison_d, bender_d, vernon_d, carl_d, misc_d]) charlabels = ["Brian", "Andrew", "Claire", "Allison", "Bender", "Vernon", "Carl", "Misc."] d_labels = ["121 Lines", "140 Lines", "154 Lines", "71 Lines", "213 Lines", "87 Lines", "21 Lines", "8 Lines"] plt.pie(y, labels = d_labels, colors=colors1+colors2+colors3+colors4, wedgeprops={'edgecolor': 'black', 'linewidth': 1.5}) plt.legend(charlabels, title="Characters", bbox_to_anchor=(1.2, 0.6)) # Lexical Dispersion plots for words pertaining to emotion text_ferris2.dispersion_plot(emotion_words) plt.figure(9) plt.title('Lexical Dispersion of Emotion Words in Ferris Bueller', fontweight='bold') text_breakfast2.dispersion_plot(emotion_words) plt.figure(10) plt.title('Lexical Dispersion of Emotion Words in Breakfast Club', fontweight='bold') text_sixteen2.dispersion_plot(emotion_words) plt.figure(11) plt.title('Lexical Dispersion of Emotion Words in Sixteen Candles', fontweight='bold') plt.show()