diff options
author | icebaker <icebaker@proton.me> | 2024-01-10 20:05:48 -0300 |
---|---|---|
committer | icebaker <icebaker@proton.me> | 2024-01-10 20:05:48 -0300 |
commit | 3dc22548895718ffc7396227267ecbb4902b62f9 (patch) | |
tree | d19984eef953a2cd1e57c6fd51a73f2bcbb77541 | |
parent | a36604d266cb019311cdf67110596a16986a0fff (diff) |
improving markdown parser
-rw-r--r-- | components/embedding.rb | 2 | ||||
-rw-r--r-- | components/providers/openai.rb | 4 | ||||
-rw-r--r-- | logic/cartridge/parser.rb | 84 | ||||
-rw-r--r-- | logic/helpers/hash.rb | 13 | ||||
-rw-r--r-- | spec/data/cartridges/block.md | 7 | ||||
-rw-r--r-- | spec/data/cartridges/tools.md | 76 | ||||
-rw-r--r-- | spec/logic/cartridge/parser_spec.rb | 106 | ||||
-rw-r--r-- | spec/logic/helpers/hash_spec.rb | 9 |
8 files changed, 270 insertions, 31 deletions
diff --git a/components/embedding.rb b/components/embedding.rb index f08ff3d..4dd2e6c 100644 --- a/components/embedding.rb +++ b/components/embedding.rb @@ -45,7 +45,7 @@ module NanoBot def self.clojure(source:, parameters:, values:, safety:) ensure_safety!(safety) - raise 'TODO: sandboxed Clojure through Babashka not implemented' if safety[:sandboxed] + raise 'Sandboxed Clojure not supported.' if safety[:sandboxed] raise 'invalid Clojure parameter name' if parameters.include?('injected-parameters') diff --git a/components/providers/openai.rb b/components/providers/openai.rb index e71f143..79b935e 100644 --- a/components/providers/openai.rb +++ b/components/providers/openai.rb @@ -140,7 +140,7 @@ module NanoBot begin @client.chat(parameters: Logic::OpenAI::Tokens.apply_policies!(cartridge, payload)) rescue StandardError => e - raise e.class, e.response[:body] if e.response && e.response[:body] + raise e.class, e.response[:body] if e.respond_to?(:response) && e.response && e.response[:body] raise e end @@ -148,7 +148,7 @@ module NanoBot begin result = @client.chat(parameters: Logic::OpenAI::Tokens.apply_policies!(cartridge, payload)) rescue StandardError => e - raise e.class, e.response[:body] if e.response && e.response[:body] + raise e.class, e.response[:body] if e.respond_to?(:response) && e.response && e.response[:body] raise e end diff --git a/logic/cartridge/parser.rb b/logic/cartridge/parser.rb index 308ca36..f82b968 100644 --- a/logic/cartridge/parser.rb +++ b/logic/cartridge/parser.rb @@ -22,7 +22,50 @@ module NanoBot end def self.markdown(raw) - yaml(Markdown.instance.render(raw)) + yaml_source = [] + + tools = [] + + blocks = Markdown.new.render(raw).blocks + + previous_block_is_tool = false + + blocks.each do |block| + if block[:language] == 'yaml' + parsed = Logic::Helpers::Hash.symbolize_keys( + YAML.safe_load(block[:source], permitted_classes: [Symbol]) + ) + + if parsed.key?(:tools) && parsed[:tools].is_a?(Array) && !parsed[:tools].empty? + previous_block_is_tool = true + + tools.concat(parsed[:tools]) + + parsed.delete(:tools) + + unless parsed.empty? + yaml_source << YAML.dump(Logic::Helpers::Hash.stringify_keys( + parsed + )).gsub(/^---/, '') # TODO: Is this safe enough? + end + else + yaml_source << block[:source] + previous_block_is_tool = false + nil + end + elsif previous_block_is_tool + tools.last[block[:language].to_sym] = block[:source] + previous_block_is_tool = false + end + end + + unless tools.empty? + yaml_source << YAML.dump(Logic::Helpers::Hash.stringify_keys( + { tools: } + )).gsub(/^---/, '') # TODO: Is this safe enough? + end + + yaml(yaml_source.join("\n")) end def self.yaml(raw) @@ -32,24 +75,51 @@ module NanoBot end class Renderer < Redcarpet::Render::Base + LANGUAGES_MAP = { + 'yml' => 'yaml', + 'yaml' => 'yaml', + 'lua' => 'lua', + 'fnl' => 'fennel', + 'fennel' => 'fennel', + 'clj' => 'clojure', + 'clojure' => 'clojure' + }.freeze + + LANGUAGES = LANGUAGES_MAP.keys.freeze + + def initialize(...) + super(...) + @_nano_bots_blocks = [] + end + + attr_reader :_nano_bots_blocks + def block_code(code, language) - return nil unless %w[yml yaml].include?(language.to_s.downcase.strip) + key = language.to_s.downcase.strip - "\n#{code}\n" + return nil unless LANGUAGES.include?(key) + + @_nano_bots_blocks << { language: LANGUAGES_MAP[key], source: code } + + nil end end class Markdown - include Singleton - attr_reader :markdown def initialize - @markdown = Redcarpet::Markdown.new(Renderer, fenced_code_blocks: true) + @renderer = Renderer.new + @markdown = Redcarpet::Markdown.new(@renderer, fenced_code_blocks: true) + end + + def blocks + @renderer._nano_bots_blocks end def render(raw) - @markdown.render(raw) + @markdown.render(raw.gsub(/```\w/, "\n\n\\0")) + self end end end diff --git a/logic/helpers/hash.rb b/logic/helpers/hash.rb index 90432b5..4cb44ac 100644 --- a/logic/helpers/hash.rb +++ b/logic/helpers/hash.rb @@ -17,6 +17,19 @@ module NanoBot end end + def self.stringify_keys(object) + case object + when ::Hash + object.each_with_object({}) do |(key, value), result| + result[key.to_s] = stringify_keys(value) + end + when Array + object.map { |e| stringify_keys(e) } + else + object + end + end + def self.fetch(object, path) node = object diff --git a/spec/data/cartridges/block.md b/spec/data/cartridges/block.md new file mode 100644 index 0000000..ef8588d --- /dev/null +++ b/spec/data/cartridges/block.md @@ -0,0 +1,7 @@ +First, we need to add some important details: +```yaml +safety: + functions: + sandboxed: false +``` +Hi! diff --git a/spec/data/cartridges/tools.md b/spec/data/cartridges/tools.md new file mode 100644 index 0000000..5d2da5a --- /dev/null +++ b/spec/data/cartridges/tools.md @@ -0,0 +1,76 @@ +A cartridge is a YAML file with human-readable data that outlines the bot's goals, expected behaviors, and settings for authentication and provider utilization. + +We begin with the meta section, which provides information about what this cartridge is designed for: + +```yaml +meta: + symbol: 🕛 + name: Date and Time + author: icebaker + version: 0.0.1 + license: CC0-1.0 + description: A helpful assistant. +``` + +It includes details like versioning and license. + +Next, we add a behavior section that will provide the bot with a directive on how it should behave: + +```yaml +behaviors: + interaction: + directive: You are a helpful assistant. +``` + +Now, we need to provide instructions on how this Nano Bot should connect with a provider, which credentials to use, and what specific configurations for the LLM are required: + +```yaml +provider: + id: openai + credentials: + access-token: ENV/OPENAI_API_KEY + settings: + user: ENV/NANO_BOTS_END_USER + model: gpt-4-1106-preview +``` + +In my API, I have set the environment variables `OPENAI_API_KEY` and `NANO_BOTS_END_USER`, which is where the values for these will come from. + +Nano Bot ready; let's start adding some extra power to it. + +## Random Numbers + +```yml +tools: +- name: random-number + description: Generates a random number within a given range. + parameters: + type: object + properties: + from: + type: integer + description: The minimum expected number for random generation. + to: + type: integer + description: The maximum expected number for random generation. + required: + - from + - to +``` + +```clj +(let [{:strs [from to]} parameters] + (+ from (rand-int (+ 1 (- to from))))) +``` + +## Date and Time + +```yaml +tools: +- name: date-and-time + description: Returns the current date and time. +``` + +```fnl +(os.date) +``` diff --git a/spec/logic/cartridge/parser_spec.rb b/spec/logic/cartridge/parser_spec.rb index f8d1302..e8bac0b 100644 --- a/spec/logic/cartridge/parser_spec.rb +++ b/spec/logic/cartridge/parser_spec.rb @@ -4,28 +4,92 @@ require_relative '../../../logic/cartridge/parser' RSpec.describe NanoBot::Logic::Cartridge::Parser do context 'markdown' do - let(:raw) { File.read('spec/data/cartridges/markdown.md') } + context 'default' do + let(:raw) { File.read('spec/data/cartridges/markdown.md') } - it 'parses markdown cartridge' do - expect(described_class.parse(raw, format: 'md')).to eq( - { meta: { - symbol: '🤖', - name: 'ChatGPT 4 Turbo', - author: 'icebaker', - version: '0.0.1', - license: 'CC0-1.0', - description: 'A helpful assistant.' - }, - behaviors: { interaction: { directive: 'You are a helpful assistant.' } }, - provider: { - id: 'openai', - credentials: { 'access-token': 'ENV/OPENAI_API_KEY' }, - settings: { - user: 'ENV/NANO_BOTS_END_USER', - model: 'gpt-4-1106-preview' - } - } } - ) + it 'parses markdown cartridge' do + expect(described_class.parse(raw, format: 'md')).to eq( + { meta: { + symbol: '🤖', + name: 'ChatGPT 4 Turbo', + author: 'icebaker', + version: '0.0.1', + license: 'CC0-1.0', + description: 'A helpful assistant.' + }, + behaviors: { interaction: { directive: 'You are a helpful assistant.' } }, + provider: { + id: 'openai', + credentials: { 'access-token': 'ENV/OPENAI_API_KEY' }, + settings: { + user: 'ENV/NANO_BOTS_END_USER', + model: 'gpt-4-1106-preview' + } + } } + ) + end + end + + context 'tools' do + let(:raw) { File.read('spec/data/cartridges/tools.md') } + + it 'parses markdown cartridge' do + expect(described_class.parse(raw, format: 'md')).to eq( + { meta: { + symbol: '🕛', + name: 'Date and Time', + author: 'icebaker', + version: '0.0.1', + license: 'CC0-1.0', + description: 'A helpful assistant.' + }, + behaviors: { + interaction: { + directive: 'You are a helpful assistant.' + } + }, + provider: { + id: 'openai', + credentials: { 'access-token': 'ENV/OPENAI_API_KEY' }, + settings: { + user: 'ENV/NANO_BOTS_END_USER', + model: 'gpt-4-1106-preview' + } + }, + tools: [ + { name: 'random-number', + description: 'Generates a random number within a given range.', + parameters: { + type: 'object', + properties: { + from: { + type: 'integer', + description: 'The minimum expected number for random generation.' + }, + to: { + type: 'integer', + description: 'The maximum expected number for random generation.' + } + }, + required: %w[from to] + }, + clojure: "(let [{:strs [from to]} parameters]\n (+ from (rand-int (+ 1 (- to from)))))\n" }, + { name: 'date-and-time', + description: 'Returns the current date and time.', + fennel: "(os.date)\n" } + ] } + ) + end + end + + context 'block' do + let(:raw) { File.read('spec/data/cartridges/block.md') } + + it 'parses markdown cartridge' do + expect(described_class.parse(raw, format: 'md')).to eq( + { safety: { functions: { sandboxed: false } } } + ) + end end end end diff --git a/spec/logic/helpers/hash_spec.rb b/spec/logic/helpers/hash_spec.rb index 09012c8..5e4ec60 100644 --- a/spec/logic/helpers/hash_spec.rb +++ b/spec/logic/helpers/hash_spec.rb @@ -7,7 +7,16 @@ RSpec.describe NanoBot::Logic::Helpers::Hash do expect(described_class.symbolize_keys({ 'a' => 'b', 'c' => { 'd' => ['e'] } })).to eq( { a: 'b', c: { d: ['e'] } } ) + end + + it 'stringify keys' do + pp described_class.stringify_keys({ a: 'b', c: { d: [:e] } }) + expect(described_class.stringify_keys({ a: 'b', c: { d: [:e] } })).to eq( + { 'a' => 'b', 'c' => { 'd' => [:e] } } + ) + end + it 'fetch a path of keys' do expect(described_class.fetch({ a: 'b', c: { d: ['e'] } }, %i[c d])).to eq( ['e'] ) |