#! /usr/bin/env ruby # This script can be used to parse and dump the information from # the 'html/contact_info.htm' file in a Facebook user data ZIP download. # # It dumps all cell phone call + SMS message + MMS records, plus a summary of each. # # Place this script inside the extracted Facebook data download folder # alongside the 'html' folder. # # This script requires Ruby and the Nokogiri library to be installed. require 'nokogiri' require 'byebug' require 'time' def hr $stdout.puts "-" * 24 end def indent(level = 1) $stdout.print " " * (level - 1) $stdout.flush end def section(title, level: 1) indent(level) and hr indent(level) and $stdout.puts title indent(level) and $stdout.puts yield indent(level) and hr indent(level) and $stdout.puts end # Extracts metadata from a call/text/sms/mms table # Returns nil if there is no metadata in this table. # Returns a 2d list of row/colums def extract_table_metadata(metadata_table) records = metadata_table.css('tr')[1..-1] return nil if records.size == 0 # many tables are empty. records.map do |call_record| call_record.css('td').map(&:text).map(&:chomp) end end def dig_out_metadata(container:) # If a specific type of metadata is missing (calls, texts, ..), the # container div will simply not be present. return [] if container.nil? contact_tables = container.children.select { |c| c.name == "table" } contact_tables.map do |contact_table| metadata_table = contact_table.css('table')[0] extract_table_metadata(metadata_table) end.compact end def print_metadata(metadata, metadata_title:) section(metadata_title) do byebug if metadata.include? nil metadata.each do |record| section("Another Phone Number", :level => 2) do indent(2) and puts record.join(", ") end end end end def print_timestamps(metadata, metadata_name:) timestamps = metadata.map { |r| r[1].to_s.chomp }.select { |s| s.size > 0 }.map { |t| Time.parse(t) } if timestamps.size > 0 puts "The oldest #{metadata_name} is from #{timestamps.min.to_date}, the most recent at #{timestamps.max.to_date}" end end def print_status_breakdown(metadata, metadata_name:) grouped_statuses = metadata.flatten(1).group_by(&:first) if grouped_statuses.size > 0 puts "This includes " + grouped_statuses.map { |status,records| "#{records.size} #{status.downcase} #{metadata_name}"}.join(", ") end end def print_call_history(html_doc) call_history_container = html_doc.xpath("//h2[text()='Call History']/following-sibling::div")[0] sms_history_container = html_doc.xpath("//h2[text()='SMS History']/following-sibling::div")[0] mms_history_container = html_doc.xpath("//h2[text()='MMS History']/following-sibling::div")[0] call_metadata = dig_out_metadata(:container => call_history_container) sms_metadata = dig_out_metadata(:container => sms_history_container) mms_metadata = dig_out_metadata(:container => mms_history_container) if call_history_container phone_numbers = call_history_container.xpath("//b[text()='Number:']/following-sibling::text()") .map(&:text).sort.uniq else phone_numbers = [] end print_metadata(call_metadata, :metadata_title => "Call History") print_metadata(sms_metadata, :metadata_title => "SMS History") print_metadata(mms_metadata, :metadata_title => "MMS History") section("The full list of phone numbers that have stored data") do phone_numbers.each_slice(8).to_a.map { |g| g.join(", ") }.each do |line| indent(2) and puts line end end $stdout.puts "A brief summary of phone records" hr $stdout.puts "There are phone records for #{phone_numbers.size} distinct phone numbers" $stdout.puts "There are records of #{call_metadata.flatten(1).size} distinct cell phone calls" indent(2) and print_timestamps(call_metadata, :metadata_name => "cell phone call") indent(2) and print_status_breakdown(call_metadata, :metadata_name => "cell phone calls") $stdout.puts "There are records of #{sms_metadata.flatten(1).size} distinct SMS messages" indent(2) and print_timestamps(sms_metadata, :metadata_name => "SMS message") indent(2) and print_status_breakdown(sms_metadata, :metadata_name => "SMS messages") $stdout.puts "There are records of #{mms_metadata.flatten(1).size} distinct MMS messages" indent(2) and print_timestamps(mms_metadata, :metadata_name => "MMS message") indent(2) and print_status_breakdown(mms_metadata, :metadata_name => "MMS messages") hr end html_text = File.read('html/contact_info.htm') html_doc = Nokogiri::HTML(html_text) print_call_history(html_doc)